]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sh/sh.c
Remove the frame size argument from function_prologue/epilogue
[thirdparty/gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2017 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23
24 #include "config.h"
25 #define INCLUDE_VECTOR
26 #include "system.h"
27 #include "coretypes.h"
28 #include "backend.h"
29 #include "target.h"
30 #include "rtl.h"
31 #include "tree.h"
32 #include "gimple.h"
33 #include "cfghooks.h"
34 #include "df.h"
35 #include "memmodel.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "emit-rtl.h"
41 #include "recog.h"
42 #include "diagnostic-core.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "varasm.h"
48 #include "flags.h"
49 #include "explow.h"
50 #include "expr.h"
51 #include "reload.h"
52 #include "output.h"
53 #include "insn-attr.h"
54 #include "dwarf2.h"
55 #include "langhooks.h"
56 #include "cfgrtl.h"
57 #include "intl.h"
58 #include "sched-int.h"
59 #include "gimplify.h"
60 #include "tm-constrs.h"
61 #include "opts.h"
62 #include "tree-pass.h"
63 #include "context.h"
64 #include "builtins.h"
65 #include "rtl-iter.h"
66
67 /* This file should be included last. */
68 #include "target-def.h"
69
70 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
71
72 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
73 #define GEN_MOV (*(gen_movsi))
74 #define GEN_ADD3 (*(gen_addsi3))
75 #define GEN_SUB3 (*(gen_subsi3))
76
77 /* Used to simplify the logic below. Find the attributes wherever
78 they may be. */
79 #define SH_ATTRIBUTES(decl) \
80 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
81 : DECL_ATTRIBUTES (decl) \
82 ? (DECL_ATTRIBUTES (decl)) \
83 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
84
85 /* Set to true by expand_prologue() when the function is an
86 interrupt handler. */
87 bool current_function_interrupt;
88
89 tree sh_deferred_function_attributes;
90 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
91
92 /* Global variables for machine-dependent things. */
93
94 /* Which cpu are we scheduling for. */
95 enum processor_type sh_cpu;
96
97 /* Definitions used in ready queue reordering for first scheduling pass. */
98
99 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
100 static short *regmode_weight[2];
101
102 /* Total SFmode and SImode weights of scheduled insns. */
103 static int curr_regmode_pressure[2];
104
105 /* Number of r0 life regions. */
106 static int r0_life_regions;
107
108 /* If true, skip cycles for Q -> R movement. */
109 static int skip_cycles = 0;
110
111 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
112 and returned from sh_reorder2. */
113 static short cached_can_issue_more;
114
115 /* Unique number for UNSPEC_BBR pattern. */
116 static unsigned int unspec_bbr_uid = 1;
117
118 /* Provides the class number of the smallest class containing
119 reg number. */
120 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
121 {
122 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
123 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
124 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
125 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
126 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
139 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
140 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
141 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
142 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
155 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
156 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
157 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
158 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
159 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
160 GENERAL_REGS, GENERAL_REGS,
161 };
162
163 char sh_register_names[FIRST_PSEUDO_REGISTER] \
164 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
165
166 char sh_additional_register_names[ADDREGNAMES_SIZE] \
167 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
168 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
169
170 int assembler_dialect;
171
172 static void split_branches (rtx_insn *);
173 static int branch_dest (rtx);
174 static void print_slot (rtx_sequence *);
175 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
176 static void dump_table (rtx_insn *, rtx_insn *);
177 static bool broken_move (rtx_insn *);
178 static bool mova_p (rtx_insn *);
179 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
180 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
181 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
182 static void sh_reorg (void);
183 static void sh_option_override (void);
184 static void sh_override_options_after_change (void);
185 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
186 static rtx_insn* emit_frame_insn (rtx);
187 static rtx push (int);
188 static void pop (int);
189 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
190 static int calc_live_regs (HARD_REG_SET *);
191 static HOST_WIDE_INT rounded_frame_size (int);
192 static bool sh_frame_pointer_required (void);
193 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
194 static int sh_mode_needed (int, rtx_insn *);
195 static int sh_mode_after (int, int, rtx_insn *);
196 static int sh_mode_entry (int);
197 static int sh_mode_exit (int);
198 static int sh_mode_priority (int entity, int n);
199
200 static rtx mark_constant_pool_use (rtx);
201 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
202 int, bool *);
203 static tree sh_handle_resbank_handler_attribute (tree *, tree,
204 tree, int, bool *);
205 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
206 tree, int, bool *);
207 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
208 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
209 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
210 static void sh_print_operand (FILE *, rtx, int);
211 static void sh_print_operand_address (FILE *, machine_mode, rtx);
212 static bool sh_print_operand_punct_valid_p (unsigned char code);
213 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
214 static void sh_output_function_epilogue (FILE *);
215 static void sh_insert_attributes (tree, tree *);
216 static const char *sh_check_pch_target_flags (int);
217 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
218 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
219 static int sh_issue_rate (void);
220 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
221 static short find_set_regmode_weight (rtx, machine_mode);
222 static short find_insn_regmode_weight (rtx, machine_mode);
223 static void find_regmode_weight (basic_block, machine_mode);
224 static int find_r0_life_regions (basic_block);
225 static void sh_md_init_global (FILE *, int, int);
226 static void sh_md_finish_global (FILE *, int);
227 static int rank_for_reorder (const void *, const void *);
228 static void swap_reorder (rtx_insn **, int);
229 static void ready_reorder (rtx_insn **, int);
230 static bool high_pressure (machine_mode);
231 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
232 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
233 static void sh_md_init (FILE *, int, int);
234 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
235
236 static bool sh_function_ok_for_sibcall (tree, tree);
237
238 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
239 static bool sh_ms_bitfield_layout_p (const_tree);
240
241 static void sh_init_builtins (void);
242 static tree sh_builtin_decl (unsigned, bool);
243 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
244 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
245 HOST_WIDE_INT, tree);
246 static void sh_file_start (void);
247 static bool sh_assemble_integer (rtx, unsigned int, int);
248 static bool flow_dependent_p (rtx, rtx);
249 static void flow_dependent_p_1 (rtx, const_rtx, void *);
250 static int shiftcosts (rtx);
251 static int and_xor_ior_costs (rtx, int);
252 static int addsubcosts (rtx);
253 static int multcosts (rtx);
254 static bool unspec_caller_rtx_p (rtx);
255 static bool sh_cannot_copy_insn_p (rtx_insn *);
256 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
257 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
258 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
259 static int sh_pr_n_sets (void);
260 static rtx sh_allocate_initial_value (rtx);
261 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
262 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
263 machine_mode,
264 struct secondary_reload_info *);
265 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
266 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
267 static rtx sh_delegitimize_address (rtx);
268 static bool sh_cannot_substitute_mem_equiv_p (rtx);
269 static bool sh_legitimize_address_displacement (rtx *, rtx *, machine_mode);
270 static int scavenge_reg (HARD_REG_SET *s);
271
272 static rtx sh_struct_value_rtx (tree, int);
273 static rtx sh_function_value (const_tree, const_tree, bool);
274 static bool sh_function_value_regno_p (const unsigned int);
275 static rtx sh_libcall_value (machine_mode, const_rtx);
276 static bool sh_return_in_memory (const_tree, const_tree);
277 static rtx sh_builtin_saveregs (void);
278 static void sh_setup_incoming_varargs (cumulative_args_t, machine_mode,
279 tree, int *, int);
280 static bool sh_strict_argument_naming (cumulative_args_t);
281 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
282 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
283 static tree sh_build_builtin_va_list (void);
284 static void sh_va_start (tree, rtx);
285 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
286 static bool sh_promote_prototypes (const_tree);
287 static machine_mode sh_promote_function_mode (const_tree type,
288 machine_mode,
289 int *punsignedp,
290 const_tree funtype,
291 int for_return);
292 static bool sh_pass_by_reference (cumulative_args_t, machine_mode,
293 const_tree, bool);
294 static bool sh_callee_copies (cumulative_args_t, machine_mode,
295 const_tree, bool);
296 static int sh_arg_partial_bytes (cumulative_args_t, machine_mode,
297 tree, bool);
298 static void sh_function_arg_advance (cumulative_args_t, machine_mode,
299 const_tree, bool);
300 static rtx sh_function_arg (cumulative_args_t, machine_mode,
301 const_tree, bool);
302 static int sh_dwarf_calling_convention (const_tree);
303 static void sh_encode_section_info (tree, rtx, int);
304 static bool sh2a_function_vector_p (tree);
305 static void sh_trampoline_init (rtx, tree, rtx);
306 static rtx sh_trampoline_adjust_address (rtx);
307 static void sh_conditional_register_usage (void);
308 static bool sh_legitimate_constant_p (machine_mode, rtx);
309 static int mov_insn_size (machine_mode, bool);
310 static int mov_insn_alignment_mask (machine_mode, bool);
311 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
312 unsigned int,
313 enum by_pieces_operation,
314 bool);
315 static bool sequence_insn_p (rtx_insn *);
316 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
317 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
318 machine_mode, bool);
319 static bool sh_legitimate_combined_insn (rtx_insn* insn);
320
321 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
322
323 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
324 \f
325 static const struct attribute_spec sh_attribute_table[] =
326 {
327 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
328 affects_type_identity } */
329 { "interrupt_handler", 0, 0, true, false, false,
330 sh_handle_interrupt_handler_attribute, false },
331 { "sp_switch", 1, 1, true, false, false,
332 sh_handle_sp_switch_attribute, false },
333 { "trap_exit", 1, 1, true, false, false,
334 sh_handle_trap_exit_attribute, false },
335 { "renesas", 0, 0, false, true, false,
336 sh_handle_renesas_attribute, false },
337 { "trapa_handler", 0, 0, true, false, false,
338 sh_handle_interrupt_handler_attribute, false },
339 { "nosave_low_regs", 0, 0, true, false, false,
340 sh_handle_interrupt_handler_attribute, false },
341 { "resbank", 0, 0, true, false, false,
342 sh_handle_resbank_handler_attribute, false },
343 { "function_vector", 1, 1, true, false, false,
344 sh2a_handle_function_vector_handler_attribute, false },
345 { NULL, 0, 0, false, false, false, NULL, false }
346 };
347 \f
348 /* Initialize the GCC target structure. */
349 #undef TARGET_ATTRIBUTE_TABLE
350 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
351
352 /* The next two are used for debug info when compiling with -gdwarf. */
353 #undef TARGET_ASM_UNALIGNED_HI_OP
354 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
355 #undef TARGET_ASM_UNALIGNED_SI_OP
356 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
357
358 #undef TARGET_OPTION_OVERRIDE
359 #define TARGET_OPTION_OVERRIDE sh_option_override
360
361 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
362 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
363 sh_override_options_after_change
364
365 #undef TARGET_PRINT_OPERAND
366 #define TARGET_PRINT_OPERAND sh_print_operand
367 #undef TARGET_PRINT_OPERAND_ADDRESS
368 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
369 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
370 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
371 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
372 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
373
374 #undef TARGET_ASM_FUNCTION_EPILOGUE
375 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
376
377 #undef TARGET_ASM_OUTPUT_MI_THUNK
378 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
379
380 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
381 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
382 hook_bool_const_tree_hwi_hwi_const_tree_true
383
384 #undef TARGET_ASM_FILE_START
385 #define TARGET_ASM_FILE_START sh_file_start
386 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
387 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
388
389 #undef TARGET_ASM_INTEGER
390 #define TARGET_ASM_INTEGER sh_assemble_integer
391
392 #undef TARGET_REGISTER_MOVE_COST
393 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
394
395 #undef TARGET_INSERT_ATTRIBUTES
396 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
397
398 #undef TARGET_SCHED_ADJUST_COST
399 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
400
401 #undef TARGET_SCHED_ISSUE_RATE
402 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
403
404 /* The next 5 hooks have been implemented for reenabling sched1. With the
405 help of these macros we are limiting the movement of insns in sched1 to
406 reduce the register pressure. The overall idea is to keep count of SImode
407 and SFmode regs required by already scheduled insns. When these counts
408 cross some threshold values; give priority to insns that free registers.
409 The insn that frees registers is most likely to be the insn with lowest
410 LUID (original insn order); but such an insn might be there in the stalled
411 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
412 up to a max of 8 cycles so that such insns may move from Q -> R.
413
414 The description of the hooks are as below:
415
416 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
417 scheduler; it is called inside the sched_init function just after
418 find_insn_reg_weights function call. It is used to calculate the SImode
419 and SFmode weights of insns of basic blocks; much similar to what
420 find_insn_reg_weights does.
421 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
422
423 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
424 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
425 (Q)->(R).
426
427 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
428 high; reorder the ready queue so that the insn with lowest LUID will be
429 issued next.
430
431 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
432 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
433
434 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
435 can be returned from TARGET_SCHED_REORDER2.
436
437 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
438
439 #undef TARGET_SCHED_DFA_NEW_CYCLE
440 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
441
442 #undef TARGET_SCHED_INIT_GLOBAL
443 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
444
445 #undef TARGET_SCHED_FINISH_GLOBAL
446 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
447
448 #undef TARGET_SCHED_VARIABLE_ISSUE
449 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
450
451 #undef TARGET_SCHED_REORDER
452 #define TARGET_SCHED_REORDER sh_reorder
453
454 #undef TARGET_SCHED_REORDER2
455 #define TARGET_SCHED_REORDER2 sh_reorder2
456
457 #undef TARGET_SCHED_INIT
458 #define TARGET_SCHED_INIT sh_md_init
459
460 #undef TARGET_DELEGITIMIZE_ADDRESS
461 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
462
463 #undef TARGET_LEGITIMIZE_ADDRESS
464 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
465
466 #undef TARGET_CAN_FOLLOW_JUMP
467 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
468
469 #undef TARGET_MS_BITFIELD_LAYOUT_P
470 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
471
472 #undef TARGET_INIT_BUILTINS
473 #define TARGET_INIT_BUILTINS sh_init_builtins
474 #undef TARGET_BUILTIN_DECL
475 #define TARGET_BUILTIN_DECL sh_builtin_decl
476 #undef TARGET_EXPAND_BUILTIN
477 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
478
479 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
480 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
481
482 #undef TARGET_CANNOT_COPY_INSN_P
483 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
484 #undef TARGET_RTX_COSTS
485 #define TARGET_RTX_COSTS sh_rtx_costs
486 #undef TARGET_ADDRESS_COST
487 #define TARGET_ADDRESS_COST sh_address_cost
488 #undef TARGET_ALLOCATE_INITIAL_VALUE
489 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
490
491 #undef TARGET_MACHINE_DEPENDENT_REORG
492 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
493
494 #undef TARGET_DWARF_REGISTER_SPAN
495 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
496
497 #ifdef HAVE_AS_TLS
498 #undef TARGET_HAVE_TLS
499 #define TARGET_HAVE_TLS true
500 #endif
501
502 #undef TARGET_PROMOTE_PROTOTYPES
503 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
504 #undef TARGET_PROMOTE_FUNCTION_MODE
505 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
506
507 #undef TARGET_FUNCTION_VALUE
508 #define TARGET_FUNCTION_VALUE sh_function_value
509 #undef TARGET_FUNCTION_VALUE_REGNO_P
510 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
511 #undef TARGET_LIBCALL_VALUE
512 #define TARGET_LIBCALL_VALUE sh_libcall_value
513 #undef TARGET_STRUCT_VALUE_RTX
514 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
515 #undef TARGET_RETURN_IN_MEMORY
516 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
517
518 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
519 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
520 #undef TARGET_SETUP_INCOMING_VARARGS
521 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
522 #undef TARGET_STRICT_ARGUMENT_NAMING
523 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
524 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
525 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
526 #undef TARGET_MUST_PASS_IN_STACK
527 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
528 #undef TARGET_PASS_BY_REFERENCE
529 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
530 #undef TARGET_CALLEE_COPIES
531 #define TARGET_CALLEE_COPIES sh_callee_copies
532 #undef TARGET_ARG_PARTIAL_BYTES
533 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
534 #undef TARGET_FUNCTION_ARG
535 #define TARGET_FUNCTION_ARG sh_function_arg
536 #undef TARGET_FUNCTION_ARG_ADVANCE
537 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
538
539 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
540 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
541
542 #undef TARGET_BUILD_BUILTIN_VA_LIST
543 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
544 #undef TARGET_EXPAND_BUILTIN_VA_START
545 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
546 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
547 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
548
549 #undef TARGET_VECTOR_MODE_SUPPORTED_P
550 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
551
552 #undef TARGET_CHECK_PCH_TARGET_FLAGS
553 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
554
555 #undef TARGET_DWARF_CALLING_CONVENTION
556 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
557
558 #undef TARGET_FRAME_POINTER_REQUIRED
559 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
560
561 #undef TARGET_MODE_EMIT
562 #define TARGET_MODE_EMIT sh_emit_mode_set
563
564 #undef TARGET_MODE_NEEDED
565 #define TARGET_MODE_NEEDED sh_mode_needed
566
567 #undef TARGET_MODE_AFTER
568 #define TARGET_MODE_AFTER sh_mode_after
569
570 #undef TARGET_MODE_ENTRY
571 #define TARGET_MODE_ENTRY sh_mode_entry
572
573 #undef TARGET_MODE_EXIT
574 #define TARGET_MODE_EXIT sh_mode_exit
575
576 #undef TARGET_MODE_PRIORITY
577 #define TARGET_MODE_PRIORITY sh_mode_priority
578
579 /* Return regmode weight for insn. */
580 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
581 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
582
583 /* Return current register pressure for regmode. */
584 #define CURR_REGMODE_PRESSURE(MODE)\
585 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
586
587 #undef TARGET_ENCODE_SECTION_INFO
588 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
589
590 #undef TARGET_LRA_P
591 #define TARGET_LRA_P sh_lra_p
592
593 #undef TARGET_SECONDARY_RELOAD
594 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
595
596 #undef TARGET_PREFERRED_RELOAD_CLASS
597 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
598
599 #undef TARGET_CONDITIONAL_REGISTER_USAGE
600 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
601
602 #undef TARGET_LEGITIMATE_ADDRESS_P
603 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
604
605 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
606 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
607
608 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
609 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
610 sh_legitimize_address_displacement
611
612 #undef TARGET_TRAMPOLINE_INIT
613 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
614 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
615 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
616
617 #undef TARGET_LEGITIMATE_CONSTANT_P
618 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
619
620 #undef TARGET_CANONICALIZE_COMPARISON
621 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
622
623 #undef TARGET_LEGITIMATE_COMBINED_INSN
624 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
625
626 #undef TARGET_FIXED_CONDITION_CODE_REGS
627 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
628
629 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
630 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
631 sh_use_by_pieces_infrastructure_p
632
633 /* Machine-specific symbol_ref flags. */
634 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
635
636 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
637 is used by optabs.c atomic op expansion code as well as in sync.md. */
638 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
639 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
640
641 #undef TARGET_CANNOT_FORCE_CONST_MEM
642 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
643
644 struct gcc_target targetm = TARGET_INITIALIZER;
645 \f
646
647 /* Information on the currently selected atomic model.
648 This is initialized in sh_option_override. */
649 static sh_atomic_model selected_atomic_model_;
650
651 const sh_atomic_model&
652 selected_atomic_model (void)
653 {
654 return selected_atomic_model_;
655 }
656
657 static sh_atomic_model
658 parse_validate_atomic_model_option (const char* str)
659 {
660 const char* model_names[sh_atomic_model::num_models];
661 model_names[sh_atomic_model::none] = "none";
662 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
663 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
664 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
665 model_names[sh_atomic_model::soft_imask] = "soft-imask";
666
667 const char* model_cdef_names[sh_atomic_model::num_models];
668 model_cdef_names[sh_atomic_model::none] = "NONE";
669 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
670 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
671 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
672 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
673
674 sh_atomic_model ret;
675 ret.type = sh_atomic_model::none;
676 ret.name = model_names[sh_atomic_model::none];
677 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
678 ret.strict = false;
679 ret.tcb_gbr_offset = -1;
680
681 /* Handle empty string as 'none'. */
682 if (str == NULL || *str == '\0')
683 return ret;
684
685 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
686
687 std::vector<std::string> tokens;
688 for (std::stringstream ss (str); ss.good (); )
689 {
690 tokens.push_back (std::string ());
691 std::getline (ss, tokens.back (), ',');
692 }
693
694 if (tokens.empty ())
695 err_ret ("invalid atomic model option");
696
697 /* The first token must be the atomic model name. */
698 {
699 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
700 if (tokens.front () == model_names[i])
701 {
702 ret.type = (sh_atomic_model::enum_type)i;
703 ret.name = model_names[i];
704 ret.cdef_name = model_cdef_names[i];
705 goto got_mode_name;
706 }
707
708 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
709 got_mode_name:;
710 }
711
712 /* Go through the remaining tokens. */
713 for (size_t i = 1; i < tokens.size (); ++i)
714 {
715 if (tokens[i] == "strict")
716 ret.strict = true;
717 else if (tokens[i].find ("gbr-offset=") == 0)
718 {
719 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
720 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
721 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
722 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
723 "option", offset_str.c_str ());
724 }
725 else
726 err_ret ("unknown parameter \"%s\" in atomic model option",
727 tokens[i].c_str ());
728 }
729
730 /* Check that the selection makes sense. */
731 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
732 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
733 ret.name);
734
735 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
736 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
737
738 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
739 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
740
741 if (ret.type == sh_atomic_model::soft_tcb
742 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
743 || (ret.tcb_gbr_offset & 3) != 0))
744 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
745 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
746 ret.name);
747
748 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
749 err_ret ("cannot use atomic model %s in user mode", ret.name);
750
751 return ret;
752
753 #undef err_ret
754 }
755
756 /* Register SH specific RTL passes. */
757 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
758 const char* name);
759 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
760 const char* name);
761 static void
762 register_sh_passes (void)
763 {
764 /* Running the sh_treg_combine pass after ce1 generates better code when
765 comparisons are combined and reg-reg moves are introduced, because
766 reg-reg moves will be eliminated afterwards. However, there are quite
767 some cases where combine will be unable to fold comparison related insns,
768 thus for now don't do it.
769 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
770 PASS_POS_INSERT_AFTER, "ce1", 1);
771 */
772
773 /* Run sh_treg_combine pass after combine but before register allocation. */
774 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
775 PASS_POS_INSERT_AFTER, "split1", 1);
776
777 /* Run sh_treg_combine pass after register allocation and basic block
778 reordering as this sometimes creates new opportunities. */
779 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
780 PASS_POS_INSERT_AFTER, "split4", 1);
781
782 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
783 is known after a conditional branch.
784 This must be done after basic blocks and branch conditions have
785 stabilized and won't be changed by further passes. */
786 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
787 PASS_POS_INSERT_BEFORE, "sched2", 1);
788 }
789
790 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
791 various options, and do some machine dependent initialization. */
792 static void
793 sh_option_override (void)
794 {
795 int regno;
796
797 SUBTARGET_OVERRIDE_OPTIONS;
798
799 sh_cpu = PROCESSOR_SH1;
800 assembler_dialect = 0;
801 if (TARGET_SH2)
802 sh_cpu = PROCESSOR_SH2;
803 if (TARGET_SH2E)
804 sh_cpu = PROCESSOR_SH2E;
805 if (TARGET_SH2A)
806 sh_cpu = PROCESSOR_SH2A;
807 if (TARGET_SH3)
808 sh_cpu = PROCESSOR_SH3;
809 if (TARGET_SH3E)
810 sh_cpu = PROCESSOR_SH3E;
811 if (TARGET_SH4)
812 {
813 assembler_dialect = 1;
814 sh_cpu = PROCESSOR_SH4;
815 }
816 if (TARGET_SH4A)
817 {
818 assembler_dialect = 1;
819 sh_cpu = PROCESSOR_SH4A;
820 }
821
822 /* User/priviledged mode is supported only on SH3* and SH4*.
823 Disable it for everything else. */
824 if (!TARGET_SH3 && TARGET_USERMODE)
825 TARGET_USERMODE = false;
826
827 if (! strcmp (sh_div_str, "call-div1"))
828 sh_div_strategy = SH_DIV_CALL_DIV1;
829 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
830 sh_div_strategy = SH_DIV_CALL_FP;
831 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
832 sh_div_strategy = SH_DIV_CALL_TABLE;
833 else
834 {
835 /* Pick one that makes most sense for the target in general.
836 It is not much good to use different functions depending on -Os,
837 since then we'll end up with two different functions when some of
838 the code is compiled for size, and some for speed. */
839
840 /* SH4 tends to emphasize speed. */
841 if (TARGET_HARD_SH4)
842 sh_div_strategy = SH_DIV_CALL_TABLE;
843 /* These have their own way of doing things. */
844 else if (TARGET_SH2A)
845 sh_div_strategy = SH_DIV_INTRINSIC;
846 /* SH1 .. SH3 cores often go into small-footprint systems, so
847 default to the smallest implementation available. */
848 else
849 sh_div_strategy = SH_DIV_CALL_DIV1;
850 }
851
852 if (sh_divsi3_libfunc[0])
853 ; /* User supplied - leave it alone. */
854 else if (TARGET_DIVIDE_CALL_FP)
855 sh_divsi3_libfunc = "__sdivsi3_i4";
856 else if (TARGET_DIVIDE_CALL_TABLE)
857 sh_divsi3_libfunc = "__sdivsi3_i4i";
858 else
859 sh_divsi3_libfunc = "__sdivsi3";
860
861 if (sh_branch_cost == -1)
862 {
863 /* The SH1 does not have delay slots, hence we get a pipeline stall
864 at every branch. The SH4 is superscalar, so the single delay slot
865 is not sufficient to keep both pipelines filled.
866 In any case, set the default branch cost to '2', as it results in
867 slightly overall smaller code and also enables some if conversions
868 that are required for matching special T bit related insns. */
869 sh_branch_cost = 2;
870 }
871
872 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
873 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
874 TARGET_ZDCBRANCH = 1;
875
876 /* FDPIC code is a special form of PIC, and the vast majority of code
877 generation constraints that apply to PIC also apply to FDPIC, so we
878 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
879 flag_pic is checked. */
880 if (TARGET_FDPIC && !flag_pic)
881 flag_pic = 2;
882
883 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
884 if (! VALID_REGISTER_P (regno))
885 sh_register_names[regno][0] = '\0';
886
887 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
888 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
889 sh_additional_register_names[regno][0] = '\0';
890
891 if (flag_pic && ! TARGET_PREFERGOT)
892 flag_no_function_cse = 1;
893
894 if (targetm.small_register_classes_for_mode_p (VOIDmode))
895 {
896 /* Never run scheduling before reload, since that can
897 break global alloc, and generates slower code anyway due
898 to the pressure on R0. */
899 /* Enable sched1 for SH4 if the user explicitly requests.
900 When sched1 is enabled, the ready queue will be reordered by
901 the target hooks if pressure is high. We can not do this for
902 PIC, SH3 and lower as they give spill failures for R0. */
903 if (!TARGET_HARD_SH4 || flag_pic)
904 flag_schedule_insns = 0;
905 /* ??? Current exception handling places basic block boundaries
906 after call_insns. It causes the high pressure on R0 and gives
907 spill failures for R0 in reload. See PR 22553 and the thread
908 on gcc-patches
909 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
910 else if (flag_exceptions)
911 {
912 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
913 warning (0, "ignoring -fschedule-insns because of exception "
914 "handling bug");
915 flag_schedule_insns = 0;
916 }
917 else if (flag_schedule_insns
918 && !global_options_set.x_flag_schedule_insns)
919 flag_schedule_insns = 0;
920 }
921
922 /* Unwind info is not correct around the CFG unless either a frame
923 pointer is present or M_A_O_A is set. Fixing this requires rewriting
924 unwind info generation to be aware of the CFG and propagating states
925 around edges. */
926 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
927 || flag_exceptions || flag_non_call_exceptions)
928 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
929 {
930 warning (0, "unwind tables currently require either a frame pointer "
931 "or -maccumulate-outgoing-args for correctness");
932 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
933 }
934
935 if (flag_unsafe_math_optimizations)
936 {
937 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
938 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
939 TARGET_FSCA = 1;
940
941 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
942 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
943 TARGET_FSRRA = 1;
944 }
945
946 /* Allow fsrra insn only if -funsafe-math-optimizations and
947 -ffinite-math-only is enabled. */
948 TARGET_FSRRA = TARGET_FSRRA
949 && flag_unsafe_math_optimizations
950 && flag_finite_math_only;
951
952 /* If the -mieee option was not explicitly set by the user, turn it on
953 unless -ffinite-math-only was specified. See also PR 33135. */
954 if (! global_options_set.x_TARGET_IEEE)
955 TARGET_IEEE = ! flag_finite_math_only;
956
957 if (sh_fixed_range_str)
958 sh_fix_range (sh_fixed_range_str);
959
960 /* This target defaults to strict volatile bitfields. */
961 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
962 flag_strict_volatile_bitfields = 1;
963
964 sh_override_options_after_change ();
965
966 /* Parse atomic model option and make sure it is valid for the current
967 target CPU. */
968 selected_atomic_model_
969 = parse_validate_atomic_model_option (sh_atomic_model_str);
970
971 register_sh_passes ();
972 }
973
974 /* Implement targetm.override_options_after_change. */
975
976 static void
977 sh_override_options_after_change (void)
978 {
979 /* Adjust loop, jump and function alignment values (in bytes), if those
980 were not specified by the user using -falign-loops, -falign-jumps
981 and -falign-functions options.
982 32 bit alignment is better for speed, because instructions can be
983 fetched as a pair from a longword boundary. For size use 16 bit
984 alignment to get more compact code.
985 Aligning all jumps increases the code size, even if it might
986 result in slightly faster code. Thus, it is set to the smallest
987 alignment possible if not specified by the user. */
988 if (align_loops == 0)
989 align_loops = optimize_size ? 2 : 4;
990
991 if (align_jumps == 0)
992 align_jumps = 2;
993 else if (align_jumps < 2)
994 align_jumps = 2;
995
996 if (align_functions == 0)
997 align_functions = optimize_size ? 2 : 4;
998
999 /* The linker relaxation code breaks when a function contains
1000 alignments that are larger than that at the start of a
1001 compilation unit. */
1002 if (TARGET_RELAX)
1003 {
1004 int min_align = align_loops > align_jumps ? align_loops : align_jumps;
1005
1006 /* Also take possible .long constants / mova tables into account. */
1007 if (min_align < 4)
1008 min_align = 4;
1009 if (align_functions < min_align)
1010 align_functions = min_align;
1011 }
1012 }
1013 \f
1014 /* Print the operand address in x to the stream. */
1015 static void
1016 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1017 {
1018 switch (GET_CODE (x))
1019 {
1020 case REG:
1021 case SUBREG:
1022 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1023 break;
1024
1025 case PLUS:
1026 {
1027 rtx base = XEXP (x, 0);
1028 rtx index = XEXP (x, 1);
1029
1030 switch (GET_CODE (index))
1031 {
1032 case CONST_INT:
1033 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1034 reg_names[true_regnum (base)]);
1035 break;
1036
1037 case REG:
1038 case SUBREG:
1039 {
1040 int base_num = true_regnum (base);
1041 int index_num = true_regnum (index);
1042
1043 /* If base or index is R0, make sure that it comes first.
1044 Usually one of them will be R0, but the order might be wrong.
1045 If neither base nor index are R0 it's an error and we just
1046 pass it on to the assembler. This avoids silent wrong code
1047 bugs. */
1048 if (base_num == 0 && index_num != 0)
1049 std::swap (base_num, index_num);
1050
1051 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1052 reg_names[base_num]);
1053 break;
1054 }
1055
1056 default:
1057 gcc_unreachable ();
1058 }
1059 }
1060 break;
1061
1062 case PRE_DEC:
1063 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1064 break;
1065
1066 case POST_INC:
1067 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1068 break;
1069
1070 default:
1071 x = mark_constant_pool_use (x);
1072 output_addr_const (stream, x);
1073 break;
1074 }
1075 }
1076
1077 /* Print operand x (an rtx) in assembler syntax to file stream
1078 according to modifier code.
1079
1080 '.' print a .s if insn needs delay slot
1081 ',' print LOCAL_LABEL_PREFIX
1082 '@' print trap, rte or rts depending upon pragma interruptness
1083 '#' output a nop if there is nothing to put in the delay slot
1084 ''' print likelihood suffix (/u for unlikely).
1085 '>' print branch target if -fverbose-asm
1086 'O' print a constant without the #
1087 'R' print the LSW of a dp value - changes if in little endian
1088 'S' print the MSW of a dp value - changes if in little endian
1089 'T' print the next word of a dp value - same as 'R' in big endian mode.
1090 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1091 'N' print 'r63' if the operand is (const_int 0).
1092 'd' print a V2SF reg as dN instead of fpN.
1093 'm' print a pair `base,offset' or `base,index', for LD and ST.
1094 'U' Likewise for {LD,ST}{HI,LO}.
1095 'V' print the position of a single bit set.
1096 'W' print the position of a single bit cleared.
1097 't' print a memory address which is a register.
1098 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1099 'o' output an operator. */
1100 static void
1101 sh_print_operand (FILE *stream, rtx x, int code)
1102 {
1103 int regno;
1104 machine_mode mode;
1105
1106 switch (code)
1107 {
1108 tree trapa_attr;
1109
1110 case '.':
1111 if (final_sequence
1112 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1113 && get_attr_length (final_sequence->insn (1)))
1114 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1115 break;
1116 case ',':
1117 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1118 break;
1119 case '@':
1120 trapa_attr = lookup_attribute ("trap_exit",
1121 DECL_ATTRIBUTES (current_function_decl));
1122 if (trapa_attr)
1123 fprintf (stream, "trapa #%ld",
1124 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1125 else if (sh_cfun_interrupt_handler_p ())
1126 {
1127 if (sh_cfun_resbank_handler_p ())
1128 fprintf (stream, "resbank\n");
1129 fprintf (stream, "rte");
1130 }
1131 else
1132 fprintf (stream, "rts");
1133 break;
1134 case '#':
1135 /* Output a nop if there's nothing in the delay slot. */
1136 if (dbr_sequence_length () == 0)
1137 fprintf (stream, "\n\tnop");
1138 break;
1139 case '\'':
1140 {
1141 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1142
1143 if (note
1144 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1145 < profile_probability::even ())
1146 fputs ("/u", stream);
1147 break;
1148 }
1149 case '>':
1150 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1151 {
1152 fputs ("\t! target: ", stream);
1153 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1154 }
1155 break;
1156 case 'O':
1157 x = mark_constant_pool_use (x);
1158 output_addr_const (stream, x);
1159 break;
1160 /* N.B.: %R / %S / %T adjust memory addresses by four.
1161 While they can be used to access 64 bit parts of a larger value
1162 held in general purpose registers, that won't work with memory -
1163 neither for fp registers, since the frxx names are used. */
1164 case 'R':
1165 if (REG_P (x) || GET_CODE (x) == SUBREG)
1166 {
1167 regno = true_regnum (x);
1168 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1169 fputs (reg_names[regno], (stream));
1170 }
1171 else if (MEM_P (x))
1172 {
1173 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1174 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1175 }
1176 else
1177 {
1178 rtx sub = NULL_RTX;
1179
1180 mode = GET_MODE (x);
1181 if (mode == VOIDmode)
1182 mode = DImode;
1183 if (GET_MODE_SIZE (mode) >= 8)
1184 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1185 if (sub)
1186 sh_print_operand (stream, sub, 0);
1187 else
1188 output_operand_lossage ("invalid operand to %%R");
1189 }
1190 break;
1191 case 'S':
1192 if (REG_P (x) || GET_CODE (x) == SUBREG)
1193 {
1194 regno = true_regnum (x);
1195 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1196 fputs (reg_names[regno], (stream));
1197 }
1198 else if (MEM_P (x))
1199 {
1200 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1201 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1202 }
1203 else
1204 {
1205 rtx sub = NULL_RTX;
1206
1207 mode = GET_MODE (x);
1208 if (mode == VOIDmode)
1209 mode = DImode;
1210 if (GET_MODE_SIZE (mode) >= 8)
1211 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1212 if (sub)
1213 sh_print_operand (stream, sub, 0);
1214 else
1215 output_operand_lossage ("invalid operand to %%S");
1216 }
1217 break;
1218 case 'T':
1219 /* Next word of a double. */
1220 switch (GET_CODE (x))
1221 {
1222 case REG:
1223 fputs (reg_names[REGNO (x) + 1], (stream));
1224 break;
1225 case MEM:
1226 {
1227 machine_mode mode = GET_MODE (x);
1228 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1229 && GET_CODE (XEXP (x, 0)) != POST_INC)
1230 x = adjust_address (x, SImode, 4);
1231 sh_print_operand_address (stream, mode, XEXP (x, 0));
1232 }
1233 break;
1234 default:
1235 break;
1236 }
1237 break;
1238
1239 case 't':
1240 gcc_assert (MEM_P (x));
1241 x = XEXP (x, 0);
1242 switch (GET_CODE (x))
1243 {
1244 case REG:
1245 case SUBREG:
1246 sh_print_operand (stream, x, 0);
1247 break;
1248 default:
1249 break;
1250 }
1251 break;
1252
1253 case 'o':
1254 switch (GET_CODE (x))
1255 {
1256 case PLUS: fputs ("add", stream); break;
1257 case MINUS: fputs ("sub", stream); break;
1258 case MULT: fputs ("mul", stream); break;
1259 case DIV: fputs ("div", stream); break;
1260 case EQ: fputs ("eq", stream); break;
1261 case NE: fputs ("ne", stream); break;
1262 case GT: case LT: fputs ("gt", stream); break;
1263 case GE: case LE: fputs ("ge", stream); break;
1264 case GTU: case LTU: fputs ("gtu", stream); break;
1265 case GEU: case LEU: fputs ("geu", stream); break;
1266 default:
1267 break;
1268 }
1269 break;
1270 case 'M':
1271 if (MEM_P (x))
1272 {
1273 switch (GET_MODE (x))
1274 {
1275 case QImode: fputs (".b", stream); break;
1276 case HImode: fputs (".w", stream); break;
1277 case SImode: fputs (".l", stream); break;
1278 case SFmode: fputs (".s", stream); break;
1279 case DFmode: fputs (".d", stream); break;
1280 default: gcc_unreachable ();
1281 }
1282 }
1283 break;
1284
1285 case 'm':
1286 gcc_assert (MEM_P (x));
1287 x = XEXP (x, 0);
1288 /* Fall through. */
1289 case 'U':
1290 switch (GET_CODE (x))
1291 {
1292 case REG:
1293 case SUBREG:
1294 sh_print_operand (stream, x, 0);
1295 fputs (", 0", stream);
1296 break;
1297
1298 case PLUS:
1299 sh_print_operand (stream, XEXP (x, 0), 0);
1300 fputs (", ", stream);
1301 sh_print_operand (stream, XEXP (x, 1), 0);
1302 break;
1303
1304 default:
1305 gcc_unreachable ();
1306 }
1307 break;
1308
1309 case 'V':
1310 {
1311 int num = exact_log2 (INTVAL (x));
1312 gcc_assert (num >= 0);
1313 fprintf (stream, "#%d", num);
1314 }
1315 break;
1316
1317 case 'W':
1318 {
1319 int num = exact_log2 (~INTVAL (x));
1320 gcc_assert (num >= 0);
1321 fprintf (stream, "#%d", num);
1322 }
1323 break;
1324
1325 case 'd':
1326 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1327
1328 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1329 break;
1330
1331 case 'N':
1332 if (x == CONST0_RTX (GET_MODE (x)))
1333 {
1334 fprintf ((stream), "r63");
1335 break;
1336 }
1337 goto default_output;
1338 case 'u':
1339 if (CONST_INT_P (x))
1340 {
1341 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1342 break;
1343 }
1344 /* Fall through. */
1345
1346 default_output:
1347 default:
1348 regno = 0;
1349 mode = GET_MODE (x);
1350
1351 switch (GET_CODE (x))
1352 {
1353 case TRUNCATE:
1354 {
1355 rtx inner = XEXP (x, 0);
1356 int offset = 0;
1357 machine_mode inner_mode;
1358
1359 /* We might see SUBREGs with vector mode registers inside. */
1360 if (GET_CODE (inner) == SUBREG
1361 && (GET_MODE_SIZE (GET_MODE (inner))
1362 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1363 && subreg_lowpart_p (inner))
1364 inner = SUBREG_REG (inner);
1365 if (CONST_INT_P (inner))
1366 {
1367 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1368 goto default_output;
1369 }
1370 inner_mode = GET_MODE (inner);
1371 if (GET_CODE (inner) == SUBREG
1372 && (GET_MODE_SIZE (GET_MODE (inner))
1373 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1374 && REG_P (SUBREG_REG (inner)))
1375 {
1376 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1377 GET_MODE (SUBREG_REG (inner)),
1378 SUBREG_BYTE (inner),
1379 GET_MODE (inner));
1380 inner = SUBREG_REG (inner);
1381 }
1382 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1383 abort ();
1384 /* Floating point register pairs are always big endian;
1385 general purpose registers are 64 bit wide. */
1386 regno = REGNO (inner);
1387 regno = (HARD_REGNO_NREGS (regno, inner_mode)
1388 - HARD_REGNO_NREGS (regno, mode))
1389 + offset;
1390 x = inner;
1391 goto reg;
1392 }
1393 case SIGN_EXTEND:
1394 x = XEXP (x, 0);
1395 goto reg;
1396 case SUBREG:
1397 gcc_assert (SUBREG_BYTE (x) == 0
1398 && REG_P (SUBREG_REG (x)));
1399
1400 x = SUBREG_REG (x);
1401 /* Fall through. */
1402
1403 reg:
1404 case REG:
1405 regno += REGNO (x);
1406 if (FP_REGISTER_P (regno)
1407 && mode == V16SFmode)
1408 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1409 else if (FP_REGISTER_P (REGNO (x))
1410 && mode == V4SFmode)
1411 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1412 else if (REG_P (x)
1413 && mode == V2SFmode)
1414 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1415 else if (FP_REGISTER_P (REGNO (x))
1416 && GET_MODE_SIZE (mode) > 4)
1417 fprintf ((stream), "d%s", reg_names[regno] + 1);
1418 else
1419 fputs (reg_names[regno], (stream));
1420 break;
1421
1422 case MEM:
1423 output_address (GET_MODE (x), XEXP (x, 0));
1424 break;
1425
1426 default:
1427 fputc ('#', stream);
1428 output_addr_const (stream, x);
1429 break;
1430 }
1431 break;
1432 }
1433 }
1434
1435 static bool
1436 sh_print_operand_punct_valid_p (unsigned char code)
1437 {
1438 return (code == '.' || code == '#' || code == '@' || code == ','
1439 || code == '$' || code == '\'' || code == '>');
1440 }
1441
1442 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1443 static bool
1444 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1445 {
1446 if (GET_CODE (x) == UNSPEC)
1447 {
1448 switch (XINT (x, 1))
1449 {
1450 case UNSPEC_PIC:
1451 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1452 output_addr_const (file, XVECEXP (x, 0, 0));
1453 break;
1454 case UNSPEC_GOT:
1455 output_addr_const (file, XVECEXP (x, 0, 0));
1456 fputs ("@GOT", file);
1457 break;
1458 case UNSPEC_GOTOFF:
1459 output_addr_const (file, XVECEXP (x, 0, 0));
1460 fputs ("@GOTOFF", file);
1461 break;
1462 case UNSPEC_PLT:
1463 output_addr_const (file, XVECEXP (x, 0, 0));
1464 fputs ("@PLT", file);
1465 break;
1466 case UNSPEC_GOTPLT:
1467 output_addr_const (file, XVECEXP (x, 0, 0));
1468 fputs ("@GOTPLT", file);
1469 break;
1470 case UNSPEC_PCREL:
1471 output_addr_const (file, XVECEXP (x, 0, 0));
1472 fputs ("@PCREL", file);
1473 break;
1474 case UNSPEC_DTPOFF:
1475 output_addr_const (file, XVECEXP (x, 0, 0));
1476 fputs ("@DTPOFF", file);
1477 break;
1478 case UNSPEC_GOTTPOFF:
1479 output_addr_const (file, XVECEXP (x, 0, 0));
1480 fputs ("@GOTTPOFF", file);
1481 break;
1482 case UNSPEC_TPOFF:
1483 output_addr_const (file, XVECEXP (x, 0, 0));
1484 fputs ("@TPOFF", file);
1485 break;
1486 case UNSPEC_CALLER:
1487 {
1488 char name[32];
1489 /* LPCS stands for Label for PIC Call Site. */
1490 targetm.asm_out.generate_internal_label (name, "LPCS",
1491 INTVAL (XVECEXP (x, 0, 0)));
1492 assemble_name (file, name);
1493 }
1494 break;
1495 case UNSPEC_SYMOFF:
1496 output_addr_const (file, XVECEXP (x, 0, 0));
1497 fputc ('-', file);
1498 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1499 {
1500 fputc ('(', file);
1501 output_addr_const (file, XVECEXP (x, 0, 1));
1502 fputc (')', file);
1503 }
1504 else
1505 output_addr_const (file, XVECEXP (x, 0, 1));
1506 break;
1507 case UNSPEC_PCREL_SYMOFF:
1508 output_addr_const (file, XVECEXP (x, 0, 0));
1509 fputs ("-(", file);
1510 output_addr_const (file, XVECEXP (x, 0, 1));
1511 fputs ("-.)", file);
1512 break;
1513 case UNSPEC_GOTFUNCDESC:
1514 output_addr_const (file, XVECEXP (x, 0, 0));
1515 fputs ("@GOTFUNCDESC", file);
1516 break;
1517 case UNSPEC_GOTOFFFUNCDESC:
1518 output_addr_const (file, XVECEXP (x, 0, 0));
1519 fputs ("@GOTOFFFUNCDESC", file);
1520 break;
1521 default:
1522 return false;
1523 }
1524 return true;
1525 }
1526 else
1527 return false;
1528 }
1529 \f
1530 /* Encode symbol attributes of a SYMBOL_REF into its
1531 SYMBOL_REF_FLAGS. */
1532 static void
1533 sh_encode_section_info (tree decl, rtx rtl, int first)
1534 {
1535 default_encode_section_info (decl, rtl, first);
1536
1537 if (TREE_CODE (decl) == FUNCTION_DECL
1538 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1539 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1540 }
1541
1542 /* Prepare operands for a move define_expand; specifically, one of the
1543 operands must be in a register. */
1544 void
1545 prepare_move_operands (rtx operands[], machine_mode mode)
1546 {
1547 if ((mode == SImode || mode == DImode)
1548 && flag_pic
1549 && ! ((mode == Pmode || mode == ptr_mode)
1550 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1551 {
1552 rtx temp;
1553 if (SYMBOLIC_CONST_P (operands[1]))
1554 {
1555 if (MEM_P (operands[0]))
1556 operands[1] = force_reg (Pmode, operands[1]);
1557 else
1558 {
1559 temp = (!can_create_pseudo_p ()
1560 ? operands[0]
1561 : gen_reg_rtx (Pmode));
1562 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1563 }
1564 }
1565 else if (GET_CODE (operands[1]) == CONST
1566 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1567 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1568 {
1569 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1570 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1571 mode, temp);
1572 operands[1] = expand_binop (mode, add_optab, temp,
1573 XEXP (XEXP (operands[1], 0), 1),
1574 (!can_create_pseudo_p ()
1575 ? temp
1576 : gen_reg_rtx (Pmode)),
1577 0, OPTAB_LIB_WIDEN);
1578 }
1579 }
1580
1581 if (! reload_in_progress && ! reload_completed)
1582 {
1583 /* Copy the source to a register if both operands aren't registers. */
1584 if (! register_operand (operands[0], mode)
1585 && ! register_operand (operands[1], mode))
1586 operands[1] = copy_to_mode_reg (mode, operands[1]);
1587
1588 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1589 {
1590 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1591 except that we can't use that function because it is static. */
1592 rtx new_rtx = change_address (operands[0], mode, 0);
1593 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1594 operands[0] = new_rtx;
1595 }
1596
1597 /* This case can happen while generating code to move the result
1598 of a library call to the target. Reject `st r0,@(rX,rY)' because
1599 reload will fail to find a spill register for rX, since r0 is already
1600 being used for the source. */
1601 else if (refers_to_regno_p (R0_REG, operands[1])
1602 && MEM_P (operands[0])
1603 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1604 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1605 operands[1] = copy_to_mode_reg (mode, operands[1]);
1606
1607 /* When the displacement addressing is used, RA will assign r0 to
1608 the pseudo register operand for the QI/HImode load/store.
1609 This tends to make a long live range for R0 and might cause
1610 anomalous register spills in some case with LRA. See PR
1611 target/55212.
1612 We split possible load/store to two move insns via r0 so as to
1613 shorten R0 live range. It will make some codes worse but will
1614 win on average for LRA.
1615 Also when base+index addressing is used and the index term is
1616 a subreg, LRA assumes that more hard registers can be available
1617 in some situation. It isn't the case for SH in the problematic
1618 case. We can pre-allocate R0 for that index term to avoid
1619 the issue. See PR target/66591. */
1620 else if (sh_lra_p ()
1621 && ! TARGET_SH2A
1622 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1623 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1624 {
1625 bool load_p = REG_P (operands[0]);
1626 rtx reg = operands[load_p ? 0 : 1];
1627 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1628
1629 if ((mode == QImode || mode == HImode)
1630 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1631 && GET_CODE (adr) == PLUS
1632 && REG_P (XEXP (adr, 0))
1633 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1634 && CONST_INT_P (XEXP (adr, 1))
1635 && INTVAL (XEXP (adr, 1)) != 0
1636 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1637 {
1638 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1639 emit_move_insn (r0_rtx, operands[1]);
1640 operands[1] = r0_rtx;
1641 }
1642 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1643 && GET_CODE (adr) == PLUS
1644 && REG_P (XEXP (adr, 0))
1645 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1646 && SUBREG_P (XEXP (adr, 1))
1647 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1648 {
1649 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1650 emit_move_insn (r0_rtx, XEXP (adr, 1));
1651 XEXP (adr, 1) = r0_rtx;
1652 }
1653 }
1654 }
1655
1656 if (mode == Pmode || mode == ptr_mode)
1657 {
1658 rtx op0 = operands[0];
1659 rtx op1 = operands[1];
1660 rtx opc;
1661 if (GET_CODE (op1) == CONST
1662 && GET_CODE (XEXP (op1, 0)) == PLUS
1663 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1664 != TLS_MODEL_NONE))
1665 {
1666 opc = XEXP (XEXP (op1, 0), 1);
1667 op1 = XEXP (XEXP (op1, 0), 0);
1668 }
1669 else
1670 opc = NULL_RTX;
1671
1672 enum tls_model tls_kind;
1673
1674 if (! reload_in_progress && ! reload_completed
1675 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1676 {
1677 rtx tga_op1, tga_ret, tmp, tmp2;
1678
1679 if (! flag_pic
1680 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1681 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1682 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1683 {
1684 static int got_labelno;
1685 /* Don't schedule insns for getting GOT address when
1686 the first scheduling is enabled, to avoid spill
1687 failures for R0. */
1688 if (flag_schedule_insns)
1689 emit_insn (gen_blockage ());
1690 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1691 emit_use (gen_rtx_REG (SImode, PIC_REG));
1692 if (flag_schedule_insns)
1693 emit_insn (gen_blockage ());
1694 }
1695
1696 switch (tls_kind)
1697 {
1698 case TLS_MODEL_GLOBAL_DYNAMIC:
1699 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1700 if (TARGET_FDPIC)
1701 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1702 sh_get_fdpic_reg_initial_val ());
1703 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1704 tmp = gen_reg_rtx (Pmode);
1705 emit_move_insn (tmp, tga_ret);
1706 op1 = tmp;
1707 break;
1708
1709 case TLS_MODEL_LOCAL_DYNAMIC:
1710 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1711 if (TARGET_FDPIC)
1712 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1713 sh_get_fdpic_reg_initial_val ());
1714 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1715
1716 tmp = gen_reg_rtx (Pmode);
1717 emit_move_insn (tmp, tga_ret);
1718
1719 if (register_operand (op0, Pmode))
1720 tmp2 = op0;
1721 else
1722 tmp2 = gen_reg_rtx (Pmode);
1723
1724 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1725 op1 = tmp2;
1726 break;
1727
1728 case TLS_MODEL_INITIAL_EXEC:
1729 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1730 tmp = gen_sym2GOTTPOFF (op1);
1731 if (TARGET_FDPIC)
1732 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1733 sh_get_fdpic_reg_initial_val ());
1734 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1735 op1 = tga_op1;
1736 break;
1737
1738 case TLS_MODEL_LOCAL_EXEC:
1739 tmp2 = gen_reg_rtx (Pmode);
1740 emit_insn (gen_store_gbr (tmp2));
1741 tmp = gen_reg_rtx (Pmode);
1742 emit_insn (gen_symTPOFF2reg (tmp, op1));
1743
1744 if (register_operand (op0, Pmode))
1745 op1 = op0;
1746 else
1747 op1 = gen_reg_rtx (Pmode);
1748
1749 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1750 break;
1751
1752 default:
1753 gcc_unreachable ();
1754 }
1755 if (opc)
1756 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1757 operands[1] = op1;
1758 }
1759 }
1760
1761 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1762 {
1763 rtx base, offset;
1764 split_const (operands[1], &base, &offset);
1765
1766 if (GET_CODE (base) == SYMBOL_REF
1767 && !offset_within_block_p (base, INTVAL (offset)))
1768 {
1769 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1770 emit_move_insn (tmp, base);
1771 if (!arith_operand (offset, mode))
1772 offset = force_reg (mode, offset);
1773 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1774 }
1775 }
1776 }
1777
1778 /* Implement the canonicalize_comparison target hook for the combine
1779 pass. For the target hook this function is invoked via
1780 sh_canonicalize_comparison. This function is also re-used to
1781 canonicalize comparisons in cbranch pattern expanders. */
1782 static void
1783 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1784 machine_mode mode,
1785 bool op0_preserve_value)
1786 {
1787 /* When invoked from within the combine pass the mode is not specified,
1788 so try to get it from one of the operands. */
1789 if (mode == VOIDmode)
1790 mode = GET_MODE (op0);
1791 if (mode == VOIDmode)
1792 mode = GET_MODE (op1);
1793
1794 // We need to have a mode to do something useful here.
1795 if (mode == VOIDmode)
1796 return;
1797
1798 // Currently, we don't deal with floats here.
1799 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1800 return;
1801
1802 // Make sure that the constant operand is the second operand.
1803 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1804 {
1805 if (op0_preserve_value)
1806 return;
1807
1808 std::swap (op0, op1);
1809 cmp = swap_condition (cmp);
1810 }
1811
1812 if (CONST_INT_P (op1))
1813 {
1814 /* Try to adjust the constant operand in such a way that available
1815 comparison insns can be utilized better and the constant can be
1816 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1817 constant pool. */
1818 const HOST_WIDE_INT val = INTVAL (op1);
1819
1820 /* x > -1 --> x >= 0
1821 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1822 x <= -1 --> x < 0
1823 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1824 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1825 {
1826 cmp = cmp == GT ? GE : LT;
1827 op1 = gen_int_mode (val + 1, mode);
1828 }
1829
1830 /* x >= 1 --> x > 0
1831 x >= 0x80 --> x > 0x7F
1832 x < 1 --> x <= 0
1833 x < 0x80 --> x <= 0x7F */
1834 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1835 {
1836 cmp = cmp == GE ? GT : LE;
1837 op1 = gen_int_mode (val - 1, mode);
1838 }
1839
1840 /* unsigned x >= 1 --> x != 0
1841 unsigned x < 1 --> x == 0 */
1842 else if (val == 1 && (cmp == GEU || cmp == LTU))
1843 {
1844 cmp = cmp == GEU ? NE : EQ;
1845 op1 = CONST0_RTX (mode);
1846 }
1847
1848 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1849 unsigned x < 0x80 --> unsigned x < 0x7F */
1850 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1851 {
1852 cmp = cmp == GEU ? GTU : LEU;
1853 op1 = gen_int_mode (val - 1, mode);
1854 }
1855
1856 /* unsigned x > 0 --> x != 0
1857 unsigned x <= 0 --> x == 0 */
1858 else if (val == 0 && (cmp == GTU || cmp == LEU))
1859 cmp = cmp == GTU ? NE : EQ;
1860
1861 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1862 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1863 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1864 && val == 0x7FFFFFFF)
1865 {
1866 cmp = cmp == GTU ? LT : GE;
1867 op1 = const0_rtx;
1868 }
1869
1870 /* unsigned x >= 0x80000000 --> signed x < 0
1871 unsigned x < 0x80000000 --> signed x >= 0 */
1872 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1873 && (unsigned HOST_WIDE_INT)val
1874 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1875 {
1876 cmp = cmp == GEU ? LT : GE;
1877 op1 = const0_rtx;
1878 }
1879 }
1880 }
1881
1882 /* This function implements the canonicalize_comparison target hook.
1883 This wrapper around the internally used sh_canonicalize_comparison
1884 function is needed to do the enum rtx_code <-> int conversion.
1885 Target hooks cannot use enum rtx_code in its definition. */
1886 static void
1887 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1888 bool op0_preserve_value)
1889 {
1890 enum rtx_code tmp_code = (enum rtx_code)*code;
1891 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1892 VOIDmode, op0_preserve_value);
1893 *code = (int)tmp_code;
1894 }
1895
1896 /* This function implements the legitimate_combined_insn target hook,
1897 which the combine pass uses to early reject combined insns, before
1898 it tries to recog the insn and determine its cost. */
1899 static bool
1900 sh_legitimate_combined_insn (rtx_insn* insn)
1901 {
1902 /* Reject combinations of memory loads and zero extensions, as these
1903 interfere with other combine patterns such as zero extracts and bit
1904 tests. The SH2A movu.{b|w} insns are formed later in the
1905 'sh_optimize_extu_exts' pass after combine/split1. */
1906 rtx p = PATTERN (insn);
1907 if (GET_CODE (p) == SET
1908 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1909 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1910 && MEM_P (XEXP (XEXP (p, 1), 0)))
1911 return false;
1912
1913 return true;
1914 }
1915
1916 bool
1917 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1918 {
1919 *p1 = T_REG;
1920 *p2 = INVALID_REGNUM;
1921 return true;
1922 }
1923
1924 /* Try to calculate the branch distance of a conditional branch in bytes.
1925
1926 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1927 walk from this insn into the next (fall-through) basic block and see if
1928 we hit the label. */
1929 unsigned int
1930 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1931 {
1932 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1933
1934 if (dump_file)
1935 {
1936 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1937 print_rtl_single (dump_file, cbranch_insn);
1938 }
1939
1940 unsigned int dist = 0;
1941
1942 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1943 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1944 {
1945 const unsigned int i_len = get_attr_length (i);
1946 dist += i_len;
1947
1948 if (dump_file)
1949 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1950 INSN_UID (i), i_len, dist);
1951
1952 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1953 {
1954 if (l == cbranch_insn->jump_target ())
1955 {
1956 if (dump_file)
1957 fprintf (dump_file, " cbranch dist = %u\n", dist);
1958 return dist;
1959 }
1960 break;
1961 }
1962 }
1963
1964 if (dump_file)
1965 fprintf (dump_file, " cbranch dist = unknown\n");
1966
1967 return unknown_cbranch_distance;
1968 }
1969
1970 enum rtx_code
1971 prepare_cbranch_operands (rtx *operands, machine_mode mode,
1972 enum rtx_code comparison)
1973 {
1974 gcc_assert (can_create_pseudo_p ());
1975
1976 if (comparison == LAST_AND_UNUSED_RTX_CODE)
1977 comparison = GET_CODE (operands[0]);
1978
1979 sh_canonicalize_comparison (comparison, operands[1], operands[2],
1980 mode, false);
1981
1982 rtx op1 = operands[1];
1983 operands[1] = force_reg (mode, op1);
1984
1985 /* When we are handling DImode comparisons, we want to keep constants so
1986 that we can optimize the component comparisons; however, memory loads
1987 are better issued as a whole so that they can be scheduled well.
1988 SImode equality comparisons allow I08 constants, but only when they
1989 compare r0. Hence, if operands[1] has to be loaded from somewhere else
1990 into a register, that register might as well be r0, and we allow the
1991 constant. If it is already in a register, this is likely to be
1992 allocated to a different hard register, thus we load the constant into
1993 a register unless it is zero. */
1994 if (!REG_P (operands[2])
1995 && (!CONST_INT_P (operands[2])
1996 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
1997 && ((comparison != EQ && comparison != NE)
1998 || (REG_P (op1) && REGNO (op1) != R0_REG)
1999 || !satisfies_constraint_I08 (operands[2])))))
2000 operands[2] = force_reg (mode, operands[2]);
2001
2002 return comparison;
2003 }
2004
2005 static void
2006 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2007 profile_probability probability)
2008 {
2009 rtx (*branch_expander) (rtx) = gen_branch_true;
2010 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2011 switch (comparison)
2012 {
2013 case NE: case LT: case LE: case LTU: case LEU:
2014 comparison = reverse_condition (comparison);
2015 branch_expander = gen_branch_false;
2016 default: ;
2017 }
2018 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2019 gen_rtx_fmt_ee (comparison, SImode,
2020 operands[1], operands[2])));
2021 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2022 if (probability.initialized_p ())
2023 add_reg_br_prob_note (jump, probability);
2024 }
2025
2026 void
2027 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2028 {
2029 expand_cbranchsi4 (operands, comparison,
2030 profile_probability::uninitialized ());
2031 }
2032
2033 /* ??? How should we distribute probabilities when more than one branch
2034 is generated. So far we only have some ad-hoc observations:
2035 - If the operands are random, they are likely to differ in both parts.
2036 - If comparing items in a hash chain, the operands are random or equal;
2037 operation should be EQ or NE.
2038 - If items are searched in an ordered tree from the root, we can expect
2039 the highpart to be unequal about half of the time; operation should be
2040 an inequality comparison, operands non-constant, and overall probability
2041 about 50%. Likewise for quicksort.
2042 - Range checks will be often made against constants. Even if we assume for
2043 simplicity an even distribution of the non-constant operand over a
2044 sub-range here, the same probability could be generated with differently
2045 wide sub-ranges - as long as the ratio of the part of the subrange that
2046 is before the threshold to the part that comes after the threshold stays
2047 the same. Thus, we can't really tell anything here;
2048 assuming random distribution is at least simple.
2049 */
2050 bool
2051 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2052 {
2053 enum rtx_code msw_taken, msw_skip, lsw_taken;
2054 rtx_code_label *skip_label = NULL;
2055 rtx op1h, op1l, op2h, op2l;
2056 int num_branches;
2057 profile_probability prob, rev_prob;
2058 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2059 msw_skip_prob = profile_probability::uninitialized (),
2060 lsw_taken_prob = profile_probability::uninitialized ();
2061
2062 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2063 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2064 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2065 op1l = gen_lowpart (SImode, operands[1]);
2066 op2l = gen_lowpart (SImode, operands[2]);
2067 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2068 prob = split_branch_probability;
2069 rev_prob = prob.invert ();
2070 switch (comparison)
2071 {
2072 case EQ:
2073 msw_skip = NE;
2074 lsw_taken = EQ;
2075 if (prob.initialized_p ())
2076 {
2077 /* FIXME: This is not optimal. We do not really know the probablity
2078 that values differ by MCW only, but we should probably distribute
2079 probabilities more evenly. */
2080 msw_skip_prob = rev_prob;
2081 lsw_taken_prob = prob > profile_probability::never ()
2082 ? profile_probability::guessed_always ()
2083 : profile_probability::guessed_never ();
2084 }
2085 break;
2086 case NE:
2087 msw_taken = NE;
2088 msw_taken_prob = prob;
2089 lsw_taken = NE;
2090 lsw_taken_prob = profile_probability::guessed_never ();
2091 break;
2092 case GTU: case GT:
2093 msw_taken = comparison;
2094 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2095 break;
2096 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2097 msw_skip = swap_condition (msw_taken);
2098 lsw_taken = GTU;
2099 break;
2100 case GEU: case GE:
2101 if (op2l == CONST0_RTX (SImode))
2102 msw_taken = comparison;
2103 else
2104 {
2105 msw_taken = comparison == GE ? GT : GTU;
2106 msw_skip = swap_condition (msw_taken);
2107 lsw_taken = GEU;
2108 }
2109 break;
2110 case LTU: case LT:
2111 msw_taken = comparison;
2112 if (op2l == CONST0_RTX (SImode))
2113 break;
2114 msw_skip = swap_condition (msw_taken);
2115 lsw_taken = LTU;
2116 break;
2117 case LEU: case LE:
2118 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2119 msw_taken = comparison;
2120 else
2121 {
2122 lsw_taken = LEU;
2123 if (comparison == LE)
2124 msw_taken = LT;
2125 else if (op2h != CONST0_RTX (SImode))
2126 msw_taken = LTU;
2127 else
2128 {
2129 msw_skip = swap_condition (LTU);
2130 break;
2131 }
2132 msw_skip = swap_condition (msw_taken);
2133 }
2134 break;
2135 default: return false;
2136 }
2137 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2138 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2139 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2140 if (comparison != EQ && comparison != NE && num_branches > 1)
2141 {
2142 if (!CONSTANT_P (operands[2])
2143 && prob.initialized_p ()
2144 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2145 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2146 {
2147 msw_taken_prob = prob.apply_scale (1, 2);
2148 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2149 rev_prob.to_reg_br_prob_base ()
2150 + REG_BR_PROB_BASE);
2151 lsw_taken_prob = prob;
2152 }
2153 else
2154 {
2155 msw_taken_prob = prob;
2156 msw_skip_prob = profile_probability::guessed_always ();
2157 /* ??? If we have a constant op2h, should we use that when
2158 calculating lsw_taken_prob? */
2159 lsw_taken_prob = prob;
2160 }
2161 }
2162 operands[1] = op1h;
2163 operands[2] = op2h;
2164
2165 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2166 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2167 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2168 {
2169 rtx taken_label = operands[3];
2170
2171 /* Operands were possibly modified, but msw_skip doesn't expect this.
2172 Always use the original ones. */
2173 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2174 {
2175 operands[1] = op1h;
2176 operands[2] = op2h;
2177 }
2178
2179 operands[3] = skip_label = gen_label_rtx ();
2180 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2181 operands[3] = taken_label;
2182 }
2183 operands[1] = op1l;
2184 operands[2] = op2l;
2185 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2186 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2187 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2188 emit_label (skip_label);
2189 return true;
2190 }
2191
2192 /* Given an operand, return 1 if the evaluated operand plugged into an
2193 if_then_else will result in a branch_true, 0 if branch_false, or
2194 -1 if neither nor applies. The truth table goes like this:
2195
2196 op | cmpval | code | result
2197 ---------+--------+---------+--------------------
2198 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2199 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2200 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2201 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2202 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2203 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2204 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2205 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2206 int
2207 sh_eval_treg_value (rtx op)
2208 {
2209 if (t_reg_operand (op, GET_MODE (op)))
2210 return 1;
2211 if (negt_reg_operand (op, GET_MODE (op)))
2212 return 0;
2213
2214 rtx_code code = GET_CODE (op);
2215 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2216 return -1;
2217
2218 int cmpop = code == EQ ? 1 : 0;
2219 int cmpval = INTVAL (XEXP (op, 1));
2220 if (cmpval != 0 && cmpval != 1)
2221 return -1;
2222
2223 int t;
2224 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2225 t = 0;
2226 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2227 t = 1;
2228 else
2229 return -1;
2230
2231 return t ^ (cmpval == cmpop);
2232 }
2233
2234 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2235 of floating-point comparisons. */
2236 static void
2237 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2238 {
2239 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2240 && GET_CODE (insn) != PARALLEL)
2241 {
2242 insn = gen_rtx_PARALLEL (VOIDmode,
2243 gen_rtvec (3, insn,
2244 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2245 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2246 }
2247 emit_insn (insn);
2248 }
2249
2250 /* Prepare the operands for an scc instruction; make sure that the
2251 compare has been done and the result is in T_REG. */
2252 void
2253 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2254 {
2255 rtx t_reg = get_t_reg_rtx ();
2256 enum rtx_code oldcode = code;
2257
2258 /* First need a compare insn. */
2259 switch (code)
2260 {
2261 case NE:
2262 /* It isn't possible to handle this case. */
2263 gcc_unreachable ();
2264 case LT:
2265 code = GT;
2266 break;
2267 case LE:
2268 code = GE;
2269 break;
2270 case LTU:
2271 code = GTU;
2272 break;
2273 case LEU:
2274 code = GEU;
2275 break;
2276 default:
2277 break;
2278 }
2279 if (code != oldcode)
2280 std::swap (op0, op1);
2281
2282 machine_mode mode = GET_MODE (op0);
2283 if (mode == VOIDmode)
2284 mode = GET_MODE (op1);
2285
2286 op0 = force_reg (mode, op0);
2287 if ((code != EQ && code != NE
2288 && (op1 != const0_rtx
2289 || code == GTU || code == GEU || code == LTU || code == LEU))
2290 || (mode == DImode && op1 != const0_rtx)
2291 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2292 op1 = force_reg (mode, op1);
2293
2294 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2295 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2296 mode);
2297 }
2298
2299 /* Called from the md file, set up the operands of a compare instruction. */
2300 void
2301 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2302 {
2303 enum rtx_code code = GET_CODE (operands[0]);
2304 enum rtx_code branch_code;
2305 rtx op0 = operands[1];
2306 rtx op1 = operands[2];
2307 rtx insn;
2308 bool need_ccmpeq = false;
2309
2310 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2311 {
2312 op0 = force_reg (mode, op0);
2313 op1 = force_reg (mode, op1);
2314 }
2315 else
2316 {
2317 if (code != EQ || mode == DImode)
2318 {
2319 /* Force args into regs, since we can't use constants here. */
2320 op0 = force_reg (mode, op0);
2321 if (op1 != const0_rtx || code == GTU || code == GEU)
2322 op1 = force_reg (mode, op1);
2323 }
2324 }
2325
2326 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2327 {
2328 if (code == LT
2329 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2330 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2331 {
2332 std::swap (op0, op1);
2333 code = swap_condition (code);
2334 }
2335
2336 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2337 if (code == GE)
2338 {
2339 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2340 need_ccmpeq = true;
2341 code = GT;
2342 }
2343
2344 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2345 to EQ/GT respectively. */
2346 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2347 }
2348
2349 switch (code)
2350 {
2351 case EQ:
2352 case GT:
2353 case GE:
2354 case GTU:
2355 case GEU:
2356 branch_code = code;
2357 break;
2358 case NE:
2359 case LT:
2360 case LE:
2361 case LTU:
2362 case LEU:
2363 branch_code = reverse_condition (code);
2364 break;
2365 default:
2366 gcc_unreachable ();
2367 }
2368
2369 insn = gen_rtx_SET (get_t_reg_rtx (),
2370 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2371
2372 sh_emit_set_t_insn (insn, mode);
2373 if (need_ccmpeq)
2374 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2375
2376 if (branch_code == code)
2377 emit_jump_insn (gen_branch_true (operands[3]));
2378 else
2379 emit_jump_insn (gen_branch_false (operands[3]));
2380 }
2381
2382 void
2383 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2384 {
2385 enum rtx_code code = GET_CODE (operands[1]);
2386 rtx op0 = operands[2];
2387 rtx op1 = operands[3];
2388 rtx_code_label *lab = NULL;
2389 bool invert = false;
2390
2391 op0 = force_reg (mode, op0);
2392 if ((code != EQ && code != NE
2393 && (op1 != const0_rtx
2394 || code == GTU || code == GEU || code == LTU || code == LEU))
2395 || (mode == DImode && op1 != const0_rtx)
2396 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2397 op1 = force_reg (mode, op1);
2398
2399 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2400 {
2401 if (code == LT || code == LE)
2402 {
2403 std::swap (op0, op1);
2404 code = swap_condition (code);
2405 }
2406 if (code == GE)
2407 {
2408 if (TARGET_IEEE)
2409 {
2410 lab = gen_label_rtx ();
2411 sh_emit_scc_to_t (EQ, op0, op1);
2412 emit_jump_insn (gen_branch_true (lab));
2413 code = GT;
2414 }
2415 else
2416 {
2417 code = LT;
2418 invert = true;
2419 }
2420 }
2421 }
2422
2423 if (code == NE)
2424 {
2425 code = EQ;
2426 invert = true;
2427 }
2428
2429 sh_emit_scc_to_t (code, op0, op1);
2430 if (lab)
2431 emit_label (lab);
2432 if (invert)
2433 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2434 else
2435 emit_move_insn (operands[0], get_t_reg_rtx ());
2436 }
2437 \f
2438 /* Functions to output assembly code. */
2439
2440 /* Return a sequence of instructions to perform DI or DF move.
2441
2442 Since the SH cannot move a DI or DF in one instruction, we have
2443 to take care when we see overlapping source and dest registers. */
2444 const char *
2445 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2446 machine_mode mode)
2447 {
2448 rtx dst = operands[0];
2449 rtx src = operands[1];
2450
2451 if (MEM_P (dst)
2452 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2453 return "mov.l %T1,%0" "\n"
2454 " mov.l %1,%0";
2455
2456 if (register_operand (dst, mode)
2457 && register_operand (src, mode))
2458 {
2459 if (REGNO (src) == MACH_REG)
2460 return "sts mach,%S0" "\n"
2461 " sts macl,%R0";
2462
2463 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2464 when mov.d r1,r0 do r1->r0 then r2->r1. */
2465 if (REGNO (src) + 1 == REGNO (dst))
2466 return "mov %T1,%T0" "\n"
2467 " mov %1,%0";
2468 else
2469 return "mov %1,%0" "\n"
2470 " mov %T1,%T0";
2471 }
2472 else if (CONST_INT_P (src))
2473 {
2474 if (INTVAL (src) < 0)
2475 output_asm_insn ("mov #-1,%S0", operands);
2476 else
2477 output_asm_insn ("mov #0,%S0", operands);
2478
2479 return "mov %1,%R0";
2480 }
2481 else if (MEM_P (src))
2482 {
2483 int ptrreg = -1;
2484 int dreg = REGNO (dst);
2485 rtx inside = XEXP (src, 0);
2486
2487 switch (GET_CODE (inside))
2488 {
2489 case REG:
2490 ptrreg = REGNO (inside);
2491 break;
2492
2493 case SUBREG:
2494 ptrreg = subreg_regno (inside);
2495 break;
2496
2497 case PLUS:
2498 ptrreg = REGNO (XEXP (inside, 0));
2499 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2500 an offsettable address. Unfortunately, offsettable addresses use
2501 QImode to check the offset, and a QImode offsettable address
2502 requires r0 for the other operand, which is not currently
2503 supported, so we can't use the 'o' constraint.
2504 Thus we must check for and handle r0+REG addresses here.
2505 We punt for now, since this is likely very rare. */
2506 gcc_assert (!REG_P (XEXP (inside, 1)));
2507 break;
2508
2509 case LABEL_REF:
2510 return "mov.l %1,%0" "\n"
2511 " mov.l %1+4,%T0";
2512 case POST_INC:
2513 return "mov.l %1,%0" "\n"
2514 " mov.l %1,%T0";
2515 default:
2516 gcc_unreachable ();
2517 }
2518
2519 /* Work out the safe way to copy. Copy into the second half first. */
2520 if (dreg == ptrreg)
2521 return "mov.l %T1,%T0" "\n"
2522 " mov.l %1,%0";
2523 }
2524
2525 return "mov.l %1,%0" "\n"
2526 " mov.l %T1,%T0";
2527 }
2528
2529 /* Print an instruction which would have gone into a delay slot after
2530 another instruction, but couldn't because the other instruction expanded
2531 into a sequence where putting the slot insn at the end wouldn't work. */
2532 static void
2533 print_slot (rtx_sequence *seq)
2534 {
2535 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2536
2537 seq->insn (1)->set_deleted ();
2538 }
2539
2540 const char *
2541 output_far_jump (rtx_insn *insn, rtx op)
2542 {
2543 struct { rtx lab, reg, op; } this_jmp;
2544 rtx_code_label *braf_base_lab = NULL;
2545 const char *jump;
2546 int far;
2547 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2548 rtx_insn *prev;
2549
2550 this_jmp.lab = gen_label_rtx ();
2551
2552 if (TARGET_SH2
2553 && offset >= -32764
2554 && offset - get_attr_length (insn) <= 32766
2555 && ! CROSSING_JUMP_P (insn))
2556 {
2557 far = 0;
2558 jump = "mov.w %O0,%1" "\n"
2559 " braf %1";
2560 }
2561 else
2562 {
2563 far = 1;
2564 if (flag_pic)
2565 {
2566 if (TARGET_SH2)
2567 jump = "mov.l %O0,%1" "\n"
2568 " braf %1";
2569 else
2570 jump = "mov.l r0,@-r15" "\n"
2571 " mova %O0,r0" "\n"
2572 " mov.l @r0,%1" "\n"
2573 " add r0,%1" "\n"
2574 " mov.l @r15+,r0" "\n"
2575 " jmp @%1";
2576 }
2577 else
2578 jump = "mov.l %O0,%1" "\n"
2579 " jmp @%1";
2580 }
2581 /* If we have a scratch register available, use it. */
2582 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2583 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2584 {
2585 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2586 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2587 jump = "mov.l r1,@-r15" "\n"
2588 " mova %O0,r0" "\n"
2589 " mov.l @r0,r1" "\n"
2590 " add r1,r0" "\n"
2591 " mov.l @r15+,r1" "\n"
2592 " jmp @%1";
2593 output_asm_insn (jump, &this_jmp.lab);
2594 if (dbr_sequence_length ())
2595 print_slot (final_sequence);
2596 else
2597 output_asm_insn ("nop", 0);
2598 }
2599 else
2600 {
2601 /* Output the delay slot insn first if any. */
2602 if (dbr_sequence_length ())
2603 print_slot (final_sequence);
2604
2605 this_jmp.reg = gen_rtx_REG (SImode, 13);
2606 output_asm_insn ("mov.l r13,@-r15", 0);
2607 output_asm_insn (jump, &this_jmp.lab);
2608 output_asm_insn ("mov.l @r15+,r13", 0);
2609 }
2610 if (far && flag_pic && TARGET_SH2)
2611 {
2612 braf_base_lab = gen_label_rtx ();
2613 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2614 CODE_LABEL_NUMBER (braf_base_lab));
2615 }
2616 if (far)
2617 output_asm_insn (".align 2", 0);
2618 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2619 this_jmp.op = op;
2620 if (far && flag_pic)
2621 {
2622 if (TARGET_SH2)
2623 this_jmp.lab = braf_base_lab;
2624 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2625 }
2626 else
2627 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2628 return "";
2629 }
2630
2631 /* Local label counter, used for constants in the pool and inside
2632 pattern branches. */
2633 static int lf = 100;
2634
2635 /* Output code for ordinary branches. */
2636 const char *
2637 output_branch (int logic, rtx_insn *insn, rtx *operands)
2638 {
2639 switch (get_attr_length (insn))
2640 {
2641 case 6:
2642 /* This can happen if filling the delay slot has caused a forward
2643 branch to exceed its range (we could reverse it, but only
2644 when we know we won't overextend other branches; this should
2645 best be handled by relaxation).
2646 It can also happen when other condbranches hoist delay slot insn
2647 from their destination, thus leading to code size increase.
2648 But the branch will still be in the range -4092..+4098 bytes. */
2649 if (! TARGET_RELAX)
2650 {
2651 int label = lf++;
2652 /* The call to print_slot will clobber the operands. */
2653 rtx op0 = operands[0];
2654
2655 /* If the instruction in the delay slot is annulled (true), then
2656 there is no delay slot where we can put it now. The only safe
2657 place for it is after the label. final will do that by default. */
2658
2659 if (final_sequence
2660 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2661 && get_attr_length (final_sequence->insn (1)))
2662 {
2663 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2664 ASSEMBLER_DIALECT ? "/" : ".", label);
2665 print_slot (final_sequence);
2666 }
2667 else
2668 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2669
2670 output_asm_insn ("bra\t%l0", &op0);
2671 fprintf (asm_out_file, "\tnop\n");
2672 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2673
2674 return "";
2675 }
2676 /* FALLTHRU */
2677 /* When relaxing, handle this like a short branch. The linker
2678 will fix it up if it still doesn't fit after relaxation. */
2679 case 2:
2680 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2681
2682 /* These are for SH2e, in which we have to account for the
2683 extra nop because of the hardware bug in annulled branches. */
2684 case 8:
2685 if (! TARGET_RELAX)
2686 {
2687 int label = lf++;
2688
2689 gcc_assert (!final_sequence
2690 || !(INSN_ANNULLED_BRANCH_P
2691 (XVECEXP (final_sequence, 0, 0))));
2692 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2693 logic ? "f" : "t",
2694 ASSEMBLER_DIALECT ? "/" : ".", label);
2695 fprintf (asm_out_file, "\tnop\n");
2696 output_asm_insn ("bra\t%l0", operands);
2697 fprintf (asm_out_file, "\tnop\n");
2698 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2699
2700 return "";
2701 }
2702 /* FALLTHRU */
2703 case 4:
2704 {
2705 char buffer[10];
2706
2707 sprintf (buffer, "b%s%ss\t%%l0",
2708 logic ? "t" : "f",
2709 ASSEMBLER_DIALECT ? "/" : ".");
2710 output_asm_insn (buffer, &operands[0]);
2711 return "nop";
2712 }
2713
2714 default:
2715 /* There should be no longer branches now - that would
2716 indicate that something has destroyed the branches set
2717 up in machine_dependent_reorg. */
2718 gcc_unreachable ();
2719 }
2720 }
2721
2722 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2723 fill in operands 9 as a label to the successor insn.
2724 We try to use jump threading where possible.
2725 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2726 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2727 follow jmp and bt, if the address is in range. */
2728 const char *
2729 output_branchy_insn (enum rtx_code code, const char *templ,
2730 rtx_insn *insn, rtx *operands)
2731 {
2732 rtx_insn *next_insn = NEXT_INSN (insn);
2733
2734 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2735 {
2736 rtx src = SET_SRC (PATTERN (next_insn));
2737 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2738 {
2739 /* Following branch not taken */
2740 rtx_code_label *lab = gen_label_rtx ();
2741 emit_label_after (lab, next_insn);
2742 INSN_ADDRESSES_NEW (lab,
2743 INSN_ADDRESSES (INSN_UID (next_insn))
2744 + get_attr_length (next_insn));
2745 operands[9] = lab;
2746 return templ;
2747 }
2748 else
2749 {
2750 int offset = (branch_dest (next_insn)
2751 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2752 if (offset >= -252 && offset <= 258)
2753 {
2754 if (GET_CODE (src) == IF_THEN_ELSE)
2755 /* branch_true */
2756 src = XEXP (src, 1);
2757 operands[9] = src;
2758 return templ;
2759 }
2760 }
2761 }
2762 rtx_code_label *lab = gen_label_rtx ();
2763 emit_label_after (lab, insn);
2764 INSN_ADDRESSES_NEW (lab,
2765 INSN_ADDRESSES (INSN_UID (insn))
2766 + get_attr_length (insn));
2767 operands[9] = lab;
2768 return templ;
2769 }
2770
2771 const char *
2772 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2773 {
2774 return output_branchy_insn (NE, "bt %l9" "\n"
2775 " fcmp/eq %1,%0",
2776 insn, operands);
2777 }
2778 \f
2779 /* Output the start of the assembler file. */
2780 static void
2781 sh_file_start (void)
2782 {
2783 default_file_start ();
2784
2785 if (TARGET_ELF)
2786 /* We need to show the text section with the proper
2787 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2788 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2789 will complain. We can teach GAS specifically about the
2790 default attributes for our choice of text section, but
2791 then we would have to change GAS again if/when we change
2792 the text section name. */
2793 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2794 else
2795 /* Switch to the data section so that the coffsem symbol
2796 isn't in the text section. */
2797 switch_to_section (data_section);
2798
2799 if (TARGET_LITTLE_ENDIAN)
2800 fputs ("\t.little\n", asm_out_file);
2801 }
2802 \f
2803 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2804 need to be output as pointers to function descriptors for
2805 FDPIC. */
2806
2807 static bool
2808 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2809 {
2810 if (TARGET_FDPIC && size == UNITS_PER_WORD
2811 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2812 {
2813 fputs ("\t.long\t", asm_out_file);
2814 output_addr_const (asm_out_file, value);
2815 fputs ("@FUNCDESC\n", asm_out_file);
2816 return true;
2817 }
2818 return default_assemble_integer (value, size, aligned_p);
2819 }
2820 \f
2821 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2822 static bool
2823 unspec_caller_rtx_p (rtx pat)
2824 {
2825 rtx base, offset;
2826 split_const (pat, &base, &offset);
2827
2828 if (GET_CODE (base) == UNSPEC)
2829 {
2830 if (XINT (base, 1) == UNSPEC_CALLER)
2831 return true;
2832 for (int i = 0; i < XVECLEN (base, 0); i++)
2833 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2834 return true;
2835 }
2836 return false;
2837 }
2838
2839 /* Indicate that INSN cannot be duplicated. This is true for insn
2840 that generates a unique label. */
2841 static bool
2842 sh_cannot_copy_insn_p (rtx_insn *insn)
2843 {
2844 if (!reload_completed || !flag_pic)
2845 return false;
2846
2847 if (!NONJUMP_INSN_P (insn))
2848 return false;
2849 if (asm_noperands (insn) >= 0)
2850 return false;
2851
2852 rtx pat = PATTERN (insn);
2853
2854 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2855 return false;
2856
2857 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2858 {
2859 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2860 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2861 return true;
2862 }
2863
2864 if (GET_CODE (pat) != SET)
2865 return false;
2866 pat = SET_SRC (pat);
2867
2868 if (unspec_caller_rtx_p (pat))
2869 return true;
2870
2871 return false;
2872 }
2873 \f
2874 /* Number of instructions used to make an arithmetic right shift by N. */
2875 static const char ashiftrt_insns[] =
2876 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2877
2878 /* Description of a logical left or right shift, when expanded to a sequence
2879 of 1/2/8/16 shifts.
2880 Notice that one bit right shifts clobber the T bit. One bit left shifts
2881 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2882 enum
2883 {
2884 ASHL_CLOBBERS_T = 1 << 0,
2885 LSHR_CLOBBERS_T = 1 << 1
2886 };
2887
2888 struct ashl_lshr_sequence
2889 {
2890 char insn_count;
2891 signed char amount[6];
2892 char clobbers_t;
2893 };
2894
2895 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2896 {
2897 { 0, { 0 }, 0 }, // 0
2898 { 1, { 1 }, LSHR_CLOBBERS_T },
2899 { 1, { 2 }, 0 },
2900 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2901 { 2, { 2, 2 }, 0 }, // 4
2902 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2903 { 3, { 2, 2, 2 }, 0 },
2904 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2905 { 1, { 8 }, 0 }, // 8
2906 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2907 { 2, { 8, 2 }, 0 },
2908 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2909 { 3, { 8, 2, 2 }, 0 }, // 12
2910 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2911 { 3, { 8, -2, 8 }, 0 },
2912 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2913 { 1, { 16 }, 0 }, // 16
2914 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2915 { 2, { 16, 2 }, 0 },
2916 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2917 { 3, { 16, 2, 2 }, 0 }, // 20
2918 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2919 { 3, { 16, -2, 8 }, 0 },
2920 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2921 { 2, { 16, 8 }, 0 }, // 24
2922 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2923 { 3, { 16, 8, 2 }, 0 },
2924 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2925 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2926 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2927 { 3, { 16, -2, 16 }, 0 },
2928
2929 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2930 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2931 However, the shift-and combiner code needs this entry here to be in
2932 terms of real shift insns. */
2933 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2934 };
2935
2936 /* Individual shift amounts for shift amounts < 16, up to three highmost
2937 bits might be clobbered. This is typically used when combined with some
2938 kind of sign or zero extension. */
2939 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2940 {
2941 { 0, { 0 }, 0 }, // 0
2942 { 1, { 1 }, LSHR_CLOBBERS_T },
2943 { 1, { 2 }, 0 },
2944 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2945 { 2, { 2, 2 }, 0 }, // 4
2946 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2947 { 2, { 8, -2 }, 0 },
2948 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2949 { 1, { 8 }, 0 }, // 8
2950 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2951 { 2, { 8, 2 }, 0 },
2952 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2953 { 3, { 8, 2, 2 }, 0 }, // 12
2954 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2955 { 2, { 16, -2 }, 0 },
2956 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2957 { 1, { 16 }, 0 }, // 16
2958 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2959 { 2, { 16, 2 }, 0 },
2960 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2961 { 3, { 16, 2, 2 }, 0 }, // 20
2962 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2963 { 3, { 16, -2, 8 }, 0 },
2964 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2965 { 2, { 16, 8 }, 0 }, // 24
2966 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2967 { 3, { 16, 8, 2 }, 0 },
2968 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2969 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2970 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2971 { 3, { 16, -2, 16 }, 0 },
2972 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2973 };
2974
2975 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
2976 will clobber the T bit. */
2977 bool
2978 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
2979 {
2980 gcc_assert (CONST_INT_P (shift_amount));
2981
2982 const int shift_amount_i = INTVAL (shift_amount) & 31;
2983
2984 /* Special case for shift count of 31: use and-rotl sequence. */
2985 if (shift_amount_i == 31)
2986 return true;
2987
2988 return (ashl_lshr_seq[shift_amount_i].clobbers_t
2989 & ASHL_CLOBBERS_T) != 0;
2990 }
2991
2992 /* Return true if a logical right shift consisting of 1/2/8/16 shift
2993 instructions will clobber the T bit. */
2994 bool
2995 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
2996 {
2997 gcc_assert (CONST_INT_P (shift_amount));
2998
2999 /* For right shifts the constant might be negative. */
3000 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3001
3002 /* Special case for shift count of 31: use shll-movt sequence. */
3003 if (shift_amount_i == 31)
3004 return true;
3005
3006 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3007 & LSHR_CLOBBERS_T) != 0;
3008 }
3009
3010 /* Return true if it is potentially beneficial to use a dynamic shift
3011 instruction (shad / shar) instead of a combination of 1/2/8/16
3012 shift instructions for the specified shift count.
3013 If dynamic shifts are not available, always return false. */
3014 bool
3015 sh_dynamicalize_shift_p (rtx count)
3016 {
3017 gcc_assert (CONST_INT_P (count));
3018
3019 /* For right shifts the constant might be negative. */
3020 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3021 int insn_count;
3022
3023 /* For left and right shifts, there are shorter 2 insn sequences for
3024 shift amounts of 31. */
3025 if (shift_amount_i == 31)
3026 insn_count = 2;
3027 else
3028 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3029
3030 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3031 }
3032
3033 /* Assuming we have a value that has been sign-extended by at least one bit,
3034 can we use the ext_shift_amounts with the last shift turned to an
3035 arithmetic shift to shift it by N without data loss, and quicker than by
3036 other means? */
3037 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3038
3039 /* Return the cost of a shift. */
3040 static inline int
3041 shiftcosts (rtx x)
3042 {
3043 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3044 {
3045 if (GET_MODE (x) == DImode
3046 && CONST_INT_P (XEXP (x, 1))
3047 && INTVAL (XEXP (x, 1)) == 1)
3048 return 2;
3049
3050 /* Everything else is invalid, because there is no pattern for it. */
3051 return -1;
3052 }
3053 /* If shift by a non constant, then this will be expensive. */
3054 if (!CONST_INT_P (XEXP (x, 1)))
3055 return SH_DYNAMIC_SHIFT_COST;
3056
3057 /* Otherwise, return the true cost in instructions. Cope with out of range
3058 shift counts more or less arbitrarily. */
3059 int value = INTVAL (XEXP (x, 1)) & 31;
3060
3061 if (GET_CODE (x) == ASHIFTRT)
3062 {
3063 int cost = ashiftrt_insns[value];
3064 /* If dynamic shifts are available and profitable in this case, then we
3065 put the constant in a reg and use shad. */
3066 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3067 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3068 return cost;
3069 }
3070 else
3071 return ashl_lshr_seq[value].insn_count;
3072 }
3073
3074 /* Return the cost of an AND/XOR/IOR operation. */
3075 static inline int
3076 and_xor_ior_costs (rtx x, int code)
3077 {
3078 /* On SH1-4 we have only max. SImode operations.
3079 Double the cost for modes > SImode. */
3080 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3081
3082 /* A logical operation with two registers is a single cycle
3083 instruction. */
3084 if (!CONST_INT_P (XEXP (x, 1)))
3085 return 1 * cost_scale;
3086
3087 int i = INTVAL (XEXP (x, 1));
3088
3089 /* These constants are single cycle extu.[bw] instructions. */
3090 if ((i == 0xff || i == 0xffff) && code == AND)
3091 return 1 * cost_scale;
3092 /* Constants that can be used in an instruction as an immediate are
3093 a single cycle, but this requires r0, so make it a little more
3094 expensive. */
3095 if (CONST_OK_FOR_K08 (i))
3096 return 2 * cost_scale;
3097 /* Constants that can be loaded with a mov immediate need one more cycle.
3098 This case is probably unnecessary. */
3099 if (CONST_OK_FOR_I08 (i))
3100 return 2 * cost_scale;
3101 /* Any other constant requires an additional 2 cycle pc-relative load.
3102 This case is probably unnecessary. */
3103 return 3 * cost_scale;
3104 }
3105
3106 /* Return the cost of an addition or a subtraction. */
3107 static inline int
3108 addsubcosts (rtx x)
3109 {
3110 if (GET_MODE (x) == SImode)
3111 {
3112 /* The addc or subc patterns will eventually become one or two
3113 instructions. Below are some costs for some of the patterns
3114 which combine would reject because the costs of the individual
3115 insns in the patterns are lower.
3116
3117 FIXME: It would be much easier if we had something like insn cost
3118 attributes and the cost calculation machinery used those attributes
3119 in the first place. This would eliminate redundant recog-like C
3120 code to calculate costs of complex patterns. */
3121 rtx op0 = XEXP (x, 0);
3122 rtx op1 = XEXP (x, 1);
3123
3124 if (GET_CODE (x) == PLUS)
3125 {
3126 if (GET_CODE (op0) == AND
3127 && XEXP (op0, 1) == const1_rtx
3128 && (GET_CODE (op1) == PLUS
3129 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3130 return 1;
3131
3132 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3133 && GET_CODE (op1) == LSHIFTRT
3134 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3135 return 1;
3136 }
3137 /* Let's assume that adding the result of an insns that stores into
3138 the T bit is cheap. */
3139 if (treg_set_expr (op1, SImode))
3140 return 1;
3141 if (treg_set_expr (op0, SImode))
3142 return 1;
3143 }
3144
3145 /* On SH1-4 we have only max. SImode operations.
3146 Double the cost for modes > SImode. */
3147 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3148
3149 /* Adding a register is a single cycle insn. */
3150 if (REG_P (XEXP (x, 1))
3151 || GET_CODE (XEXP (x, 1)) == SUBREG)
3152 return 1 * cost_scale;
3153
3154 /* Likewise for small constants. */
3155 if (CONST_INT_P (XEXP (x, 1))
3156 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3157 return 1 * cost_scale;
3158
3159 /* Any other constant requires a 2 cycle pc-relative load plus an
3160 addition. */
3161 return 3 * cost_scale;
3162 }
3163
3164 /* Return the cost of a multiply. */
3165 static inline int
3166 multcosts (rtx x ATTRIBUTE_UNUSED)
3167 {
3168 if (sh_multcost >= 0)
3169 return sh_multcost;
3170
3171 if (TARGET_SH2)
3172 {
3173 /* We have a mul insn, so we can never take more than the mul and the
3174 read of the mac reg, but count more because of the latency and extra
3175 reg usage. */
3176 if (optimize_size)
3177 return 2;
3178 return 3;
3179 }
3180
3181 /* If we're aiming at small code, then just count the number of
3182 insns in a multiply call sequence. */
3183 if (optimize_size)
3184 return 5;
3185
3186 /* Otherwise count all the insns in the routine we'd be calling too. */
3187 return 20;
3188 }
3189
3190 /* Compute a (partial) cost for rtx X. Return true if the complete
3191 cost has been computed, and false if subexpressions should be
3192 scanned. In either case, *TOTAL contains the cost result. */
3193 static bool
3194 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3195 int opno ATTRIBUTE_UNUSED,
3196 int *total, bool speed ATTRIBUTE_UNUSED)
3197 {
3198 int code = GET_CODE (x);
3199
3200 switch (code)
3201 {
3202 /* The lower-subreg pass decides whether to split multi-word regs
3203 into individual regs by looking at the cost for a SET of certain
3204 modes with the following patterns:
3205 (set (reg) (reg))
3206 (set (reg) (const_int 0))
3207 On machines that support vector-move operations a multi-word move
3208 is the same cost as individual reg move. On SH there is no
3209 vector-move, so we have to provide the correct cost in the number
3210 of move insns to load/store the reg of the mode in question. */
3211 case SET:
3212 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3213 {
3214 *total = COSTS_N_INSNS (1);
3215 return true;
3216 }
3217
3218 if (register_operand (SET_DEST (x), VOIDmode)
3219 && (register_operand (SET_SRC (x), VOIDmode)
3220 || satisfies_constraint_Z (SET_SRC (x))))
3221 {
3222 const machine_mode mode = GET_MODE (SET_DEST (x));
3223 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3224 / mov_insn_size (mode, TARGET_SH2A));
3225 return true;
3226 }
3227 return false;
3228
3229 /* The cost of a mem access is mainly the cost of the address mode. */
3230 case MEM:
3231 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3232 true);
3233 return true;
3234
3235 case IF_THEN_ELSE:
3236 /* This case is required for the if_then_else negc pattern. */
3237 if (treg_set_expr (XEXP (x, 0), SImode))
3238 {
3239 *total = COSTS_N_INSNS (1);
3240 return true;
3241 }
3242 else
3243 return false;
3244
3245 /* Zero extracts of single bits are usually combine patterns for the
3246 tst insns. */
3247 case ZERO_EXTRACT:
3248 if (GET_CODE (XEXP (x, 0)) == XOR
3249 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3250 && XEXP (x, 1) == const1_rtx
3251 && CONST_INT_P (XEXP (x, 2))
3252 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3253 /* Check that the xor constaint overlaps with the extracted bit. */
3254 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3255 {
3256 *total = 1; //COSTS_N_INSNS (1);
3257 return true;
3258 }
3259
3260 /* div0s variant. */
3261 if (GET_CODE (XEXP (x, 0)) == XOR
3262 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3263 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3264 {
3265 *total = 1;
3266 return true;
3267 }
3268 return false;
3269
3270 /* The cost of a sign or zero extend depends on whether the source is a
3271 reg or a mem. In case of a mem take the address into account. */
3272 case SIGN_EXTEND:
3273 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3274 {
3275 *total = COSTS_N_INSNS (1);
3276 return true;
3277 }
3278 if (MEM_P (XEXP (x, 0)))
3279 {
3280 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3281 GET_MODE (XEXP (x, 0)),
3282 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3283 return true;
3284 }
3285 return false;
3286
3287 case ZERO_EXTEND:
3288 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3289 {
3290 *total = COSTS_N_INSNS (1);
3291 return true;
3292 }
3293 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3294 && (GET_MODE (XEXP (x, 0)) == QImode
3295 || GET_MODE (XEXP (x, 0)) == HImode))
3296 {
3297 /* Handle SH2A's movu.b and movu.w insn. */
3298 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3299 GET_MODE (XEXP (x, 0)),
3300 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3301 return true;
3302 }
3303 return false;
3304
3305 /* mems for SFmode and DFmode can be inside a parallel due to
3306 the way the fpscr is handled. */
3307 case PARALLEL:
3308 for (int i = 0; i < XVECLEN (x, 0); i++)
3309 {
3310 rtx xx = XVECEXP (x, 0, i);
3311 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3312 {
3313 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3314 GET_MODE (XEXP (xx, 0)),
3315 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3316 return true;
3317 }
3318 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3319 {
3320 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3321 GET_MODE (XEXP (xx, 1)),
3322 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3323 return true;
3324 }
3325 }
3326
3327 if (sh_1el_vec (x, VOIDmode))
3328 *total = outer_code != SET;
3329 else if (sh_rep_vec (x, VOIDmode))
3330 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3331 + (outer_code != SET));
3332 else
3333 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3334 return true;
3335
3336 case CONST_INT:
3337 if (CONST_OK_FOR_I08 (INTVAL (x)))
3338 *total = 0;
3339 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3340 && CONST_OK_FOR_K08 (INTVAL (x)))
3341 *total = 1;
3342 /* prepare_cmp_insn will force costly constants int registers before
3343 the cbranch[sd]i4 patterns can see them, so preserve potentially
3344 interesting ones not covered by I08 above. */
3345 else if (outer_code == COMPARE
3346 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3347 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3348 || INTVAL (x) == 0x7fffffff
3349 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3350 *total = 1;
3351 else
3352 *total = 8;
3353 return true;
3354
3355 case EQ:
3356 /* An and with a constant compared against zero is
3357 most likely going to be a TST #imm, R0 instruction. */
3358 if (XEXP (x, 1) == const0_rtx
3359 && ((GET_CODE (XEXP (x, 0)) == AND
3360 || (SUBREG_P (XEXP (x, 0))
3361 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3362 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3363 {
3364 *total = 1;
3365 return true;
3366 }
3367
3368 else if (XEXP (x, 1) == const0_rtx
3369 && GET_CODE (XEXP (x, 0)) == AND
3370 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3371 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3372 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3373 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3374 {
3375 *total = 1;
3376 return true;
3377 }
3378 else
3379 return false;
3380
3381 case SMIN:
3382 case SMAX:
3383 /* This is most likely a clips.b or clips.w insn that is being made up
3384 by combine. */
3385 if (TARGET_SH2A
3386 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3387 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3388 && REG_P (XEXP (XEXP (x, 0), 0))
3389 && CONST_INT_P (XEXP (x, 1)))
3390 {
3391 *total = COSTS_N_INSNS (1);
3392 return true;
3393 }
3394 else
3395 return false;
3396
3397 case CONST:
3398 case LABEL_REF:
3399 case SYMBOL_REF:
3400 *total = 5;
3401 return true;
3402
3403 case CONST_DOUBLE:
3404 /* prepare_cmp_insn will force costly constants int registers before
3405 the cbranchdi4 pattern can see them, so preserve potentially
3406 interesting ones. */
3407 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3408 *total = 1;
3409 else
3410 *total = 10;
3411 return true;
3412
3413 case CONST_VECTOR:
3414 /* FIXME: This looks broken. Only the last statement has any effect.
3415 Probably this could be folded with the PARALLEL case? */
3416 if (x == CONST0_RTX (GET_MODE (x)))
3417 *total = 0;
3418 else if (sh_1el_vec (x, VOIDmode))
3419 *total = outer_code != SET;
3420 if (sh_rep_vec (x, VOIDmode))
3421 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3422 + (outer_code != SET));
3423 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3424 return true;
3425
3426 case PLUS:
3427 case MINUS:
3428 *total = COSTS_N_INSNS (addsubcosts (x));
3429 return true;
3430
3431 case AND:
3432 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3433 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3434 {
3435 *total = COSTS_N_INSNS (1);
3436 return true;
3437 }
3438 /* Fall through. */
3439
3440 case XOR:
3441 case IOR:
3442 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3443 return true;
3444
3445 case MULT:
3446 *total = COSTS_N_INSNS (multcosts (x));
3447 return true;
3448
3449 case LT:
3450 case GE:
3451 /* div0s sign comparison. */
3452 if (GET_CODE (XEXP (x, 0)) == XOR
3453 && REG_P ((XEXP (XEXP (x, 0), 0)))
3454 && REG_P ((XEXP (XEXP (x, 0), 1)))
3455 && satisfies_constraint_Z (XEXP (x, 1)))
3456 {
3457 *total = COSTS_N_INSNS (1);
3458 return true;
3459 }
3460 else
3461 return false;
3462
3463 case LSHIFTRT:
3464 /* div0s sign comparison. */
3465 if (GET_CODE (XEXP (x, 0)) == XOR
3466 && REG_P ((XEXP (XEXP (x, 0), 0)))
3467 && REG_P ((XEXP (XEXP (x, 0), 1)))
3468 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3469 {
3470 *total = COSTS_N_INSNS (1);
3471 return true;
3472 }
3473 /* FALLTHRU */
3474 case ASHIFT:
3475 case ASHIFTRT:
3476 {
3477 int cost = shiftcosts (x);
3478 if (cost < 0)
3479 return false;
3480 *total = COSTS_N_INSNS (cost);
3481 return true;
3482 }
3483
3484 case DIV:
3485 case UDIV:
3486 case MOD:
3487 case UMOD:
3488 *total = COSTS_N_INSNS (20);
3489 return true;
3490
3491 case FLOAT:
3492 case FIX:
3493 *total = 100;
3494 return true;
3495
3496 default:
3497 return false;
3498 }
3499 }
3500
3501 /* Determine the size of the fundamental move insn that will be used
3502 for the specified mode. */
3503 static inline int
3504 mov_insn_size (machine_mode mode, bool consider_sh2a)
3505 {
3506 const int mode_sz = GET_MODE_SIZE (mode);
3507
3508 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3509 || (TARGET_FMOVD && mode == DFmode))
3510 return mode_sz;
3511 else
3512 {
3513 /* The max. available mode for actual move insns is SImode.
3514 Larger accesses will be split into multiple loads/stores. */
3515 const int max_mov_sz = GET_MODE_SIZE (SImode);
3516 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3517 }
3518 }
3519
3520 /* Determine the maximum possible displacement for a move insn for the
3521 specified mode. */
3522 int
3523 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3524 {
3525 /* The 4 byte displacement move insns are the same as the 2 byte
3526 versions but take a 12 bit displacement. All we need to do is to
3527 scale the max. displacement value accordingly. */
3528 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3529
3530 /* SH2A supports FPU move insns with 12 bit displacements.
3531 Other variants to do not support any kind of displacements for
3532 FPU move insns. */
3533 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3534 return 0;
3535 else
3536 {
3537 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3538 const int mode_sz = GET_MODE_SIZE (mode);
3539 int r = 15 * mov_insn_sz * disp_scale;
3540
3541 /* If the mov insn will be split into multiple loads/stores, the
3542 maximum possible displacement is a bit smaller. */
3543 if (mode_sz > mov_insn_sz)
3544 r -= mode_sz - mov_insn_sz;
3545 return r;
3546 }
3547 }
3548
3549 /* Determine the alignment mask for a move insn of the
3550 specified mode. */
3551 static inline int
3552 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3553 {
3554 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3555 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3556 }
3557
3558 /* Return the displacement value of a displacement address. */
3559 HOST_WIDE_INT
3560 sh_disp_addr_displacement (rtx x)
3561 {
3562 gcc_assert (satisfies_constraint_Sdd (x));
3563 return INTVAL (XEXP (XEXP (x, 0), 1));
3564 }
3565
3566 /* Compute the cost of an address. */
3567 static int
3568 sh_address_cost (rtx x, machine_mode mode,
3569 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3570 {
3571 /* 'GBR + 0'. Account one more because of R0 restriction. */
3572 if (REG_P (x) && REGNO (x) == GBR_REG)
3573 return 2;
3574
3575 /* Simple reg, post-inc, pre-dec addressing. */
3576 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3577 return 1;
3578
3579 /* 'reg + disp' addressing. */
3580 if (GET_CODE (x) == PLUS
3581 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3582 {
3583 /* 'GBR + disp'. Account one more because of R0 restriction. */
3584 if (REGNO (XEXP (x, 0)) == GBR_REG
3585 && gbr_displacement (XEXP (x, 1), mode))
3586 return 2;
3587
3588 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3589
3590 if (offset == 0)
3591 return 1;
3592
3593 /* The displacement would fit into a 2 byte move insn.
3594 HImode and QImode loads/stores with displacement put pressure on
3595 R0 which will most likely require another reg copy. Thus account
3596 a higher cost for that. */
3597 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3598 return (mode == HImode || mode == QImode) ? 2 : 1;
3599
3600 /* The displacement would fit into a 4 byte move insn (SH2A). */
3601 if (TARGET_SH2A
3602 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3603 return 2;
3604
3605 /* The displacement is probably out of range and will require extra
3606 calculations. */
3607 return 3;
3608 }
3609
3610 /* 'reg + reg' addressing. Account a slightly higher cost because of
3611 increased pressure on R0. */
3612 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3613 return 3;
3614
3615 /* Not sure what it is - probably expensive. */
3616 return 10;
3617 }
3618
3619 /* Code to expand a shift. */
3620 static void
3621 gen_ashift (int type, int n, rtx reg)
3622 {
3623 rtx n_rtx;
3624
3625 /* Negative values here come from the shift_amounts array. */
3626 if (n < 0)
3627 {
3628 if (type == ASHIFT)
3629 type = LSHIFTRT;
3630 else
3631 type = ASHIFT;
3632 n = -n;
3633 }
3634
3635 n_rtx = GEN_INT (n);
3636 gcc_assert (satisfies_constraint_P27 (n_rtx));
3637
3638 switch (type)
3639 {
3640 case ASHIFTRT:
3641 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3642 break;
3643 case LSHIFTRT:
3644 if (n == 1)
3645 emit_insn (gen_shlr (reg, reg));
3646 else
3647 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3648 break;
3649 case ASHIFT:
3650 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3651 break;
3652 default:
3653 gcc_unreachable ();
3654 }
3655 }
3656
3657 /* Code to expand a HImode shift. */
3658 static void
3659 gen_ashift_hi (int type, int n, rtx reg)
3660 {
3661 /* Negative values here come from the shift_amounts array. */
3662 if (n < 0)
3663 {
3664 if (type == ASHIFT)
3665 type = LSHIFTRT;
3666 else
3667 type = ASHIFT;
3668 n = -n;
3669 }
3670
3671 switch (type)
3672 {
3673 case ASHIFTRT:
3674 case LSHIFTRT:
3675 /* We don't have HImode right shift operations because using the
3676 ordinary 32 bit shift instructions for that doesn't generate proper
3677 zero/sign extension.
3678 gen_ashift_hi is only called in contexts where we know that the
3679 sign extension works out correctly. */
3680 {
3681 int offset = 0;
3682 if (GET_CODE (reg) == SUBREG)
3683 {
3684 offset = SUBREG_BYTE (reg);
3685 reg = SUBREG_REG (reg);
3686 }
3687 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3688 break;
3689 }
3690 case ASHIFT:
3691 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3692 break;
3693 }
3694 }
3695
3696 /* Output RTL to split a constant shift into its component SH constant
3697 shift instructions. */
3698 void
3699 gen_shifty_op (int code, rtx *operands)
3700 {
3701 int value = INTVAL (operands[2]);
3702 int max, i;
3703
3704 /* Truncate the shift count in case it is out of bounds. */
3705 value = value & 31;
3706
3707 if (value == 31)
3708 {
3709 if (code == LSHIFTRT)
3710 {
3711 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3712 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3713 return;
3714 }
3715 else if (code == ASHIFT)
3716 {
3717 /* There is a two instruction sequence for 31 bit left shifts,
3718 but it requires r0. */
3719 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3720 {
3721 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3722 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3723 return;
3724 }
3725 }
3726 }
3727 else if (value == 0)
3728 {
3729 /* This can happen even when optimizing, if there were subregs before
3730 reload. Don't output a nop here, as this is never optimized away;
3731 use a no-op move instead. */
3732 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3733 return;
3734 }
3735
3736 max = ashl_lshr_seq[value].insn_count;
3737 for (i = 0; i < max; i++)
3738 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3739 }
3740
3741 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3742 don't matter. */
3743 void
3744 gen_shifty_hi_op (int code, rtx *operands)
3745 {
3746 int value = INTVAL (operands[2]);
3747 int max, i;
3748 void (*gen_fun) (int, int, rtx);
3749
3750 /* This operation is used by and_shl for SImode values with a few
3751 high bits known to be cleared. */
3752 value &= 31;
3753 if (value == 0)
3754 {
3755 emit_insn (gen_nop ());
3756 return;
3757 }
3758
3759 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3760 if (code == ASHIFT)
3761 {
3762 max = ext_ashl_lshr_seq[value].insn_count;
3763 for (i = 0; i < max; i++)
3764 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3765 }
3766 else
3767 /* When shifting right, emit the shifts in reverse order, so that
3768 solitary negative values come first. */
3769 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3770 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3771 }
3772
3773 /* Output RTL for an arithmetic right shift.
3774 ??? Rewrite to use super-optimizer sequences. */
3775 bool
3776 expand_ashiftrt (rtx *operands)
3777 {
3778 rtx wrk;
3779 char func[18];
3780 int value;
3781
3782 if (TARGET_DYNSHIFT)
3783 {
3784 if (!CONST_INT_P (operands[2]))
3785 {
3786 rtx count = copy_to_mode_reg (SImode, operands[2]);
3787 emit_insn (gen_negsi2 (count, count));
3788 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3789 return true;
3790 }
3791 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3792 > 1 + SH_DYNAMIC_SHIFT_COST)
3793 {
3794 rtx count
3795 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3796 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3797 return true;
3798 }
3799 }
3800 if (!CONST_INT_P (operands[2]))
3801 return false;
3802
3803 value = INTVAL (operands[2]) & 31;
3804
3805 if (value == 31)
3806 {
3807 /* If we are called from abs expansion, arrange things so that we
3808 we can use a single MT instruction that doesn't clobber the source,
3809 if LICM can hoist out the load of the constant zero. */
3810 if (currently_expanding_to_rtl)
3811 {
3812 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3813 operands[1]));
3814 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3815 return true;
3816 }
3817 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3818 return true;
3819 }
3820 else if (value >= 16 && value <= 19)
3821 {
3822 wrk = gen_reg_rtx (SImode);
3823 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3824 value -= 16;
3825 while (value--)
3826 gen_ashift (ASHIFTRT, 1, wrk);
3827 emit_move_insn (operands[0], wrk);
3828 return true;
3829 }
3830 /* Expand a short sequence inline, longer call a magic routine. */
3831 else if (value <= 5)
3832 {
3833 wrk = gen_reg_rtx (SImode);
3834 emit_move_insn (wrk, operands[1]);
3835 while (value--)
3836 gen_ashift (ASHIFTRT, 1, wrk);
3837 emit_move_insn (operands[0], wrk);
3838 return true;
3839 }
3840
3841 wrk = gen_reg_rtx (Pmode);
3842
3843 /* Load the value into an arg reg and call a helper. */
3844 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3845 sprintf (func, "__ashiftrt_r4_%d", value);
3846 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3847 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3848 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3849 return true;
3850 }
3851
3852 /* Try to find a good way to implement the combiner pattern
3853 [(set (match_operand:SI 0 "register_operand" "r")
3854 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3855 (match_operand:SI 2 "const_int_operand" "n"))
3856 (match_operand:SI 3 "const_int_operand" "n"))) .
3857 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3858 return 0 for simple right / left or left/right shift combination.
3859 return 1 for a combination of shifts with zero_extend.
3860 return 2 for a combination of shifts with an AND that needs r0.
3861 return 3 for a combination of shifts with an AND that needs an extra
3862 scratch register, when the three highmost bits of the AND mask are clear.
3863 return 4 for a combination of shifts with an AND that needs an extra
3864 scratch register, when any of the three highmost bits of the AND mask
3865 is set.
3866 If ATTRP is set, store an initial right shift width in ATTRP[0],
3867 and the instruction length in ATTRP[1] . These values are not valid
3868 when returning 0.
3869 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3870 shift_amounts for the last shift value that is to be used before the
3871 sign extend. */
3872 int
3873 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3874 {
3875 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3876 int left = INTVAL (left_rtx), right;
3877 int best = 0;
3878 int cost, best_cost = 10000;
3879 int best_right = 0, best_len = 0;
3880 int i;
3881 int can_ext;
3882
3883 if (left < 0 || left > 31)
3884 return 0;
3885 if (CONST_INT_P (mask_rtx))
3886 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3887 else
3888 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3889 /* Can this be expressed as a right shift / left shift pair? */
3890 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3891 right = exact_log2 (lsb);
3892 mask2 = ~(mask + lsb - 1);
3893 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3894 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3895 if (! mask2)
3896 best_cost = ashl_lshr_seq[right].insn_count
3897 + ashl_lshr_seq[right + left].insn_count;
3898 /* mask has no trailing zeroes <==> ! right */
3899 else if (! right && mask2 == ~(lsb2 - 1))
3900 {
3901 int late_right = exact_log2 (lsb2);
3902 best_cost = ashl_lshr_seq[left + late_right].insn_count
3903 + ashl_lshr_seq[late_right].insn_count;
3904 }
3905 /* Try to use zero extend. */
3906 if (mask2 == ~(lsb2 - 1))
3907 {
3908 int width, first;
3909
3910 for (width = 8; width <= 16; width += 8)
3911 {
3912 /* Can we zero-extend right away? */
3913 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3914 {
3915 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3916 + ext_ashl_lshr_seq[left + right].insn_count;
3917 if (cost < best_cost)
3918 {
3919 best = 1;
3920 best_cost = cost;
3921 best_right = right;
3922 best_len = cost;
3923 if (attrp)
3924 attrp[2] = -1;
3925 }
3926 continue;
3927 }
3928 /* ??? Could try to put zero extend into initial right shift,
3929 or even shift a bit left before the right shift. */
3930 /* Determine value of first part of left shift, to get to the
3931 zero extend cut-off point. */
3932 first = width - exact_log2 (lsb2) + right;
3933 if (first >= 0 && right + left - first >= 0)
3934 {
3935 cost = ext_ashl_lshr_seq[right].insn_count
3936 + ext_ashl_lshr_seq[first].insn_count + 1
3937 + ext_ashl_lshr_seq[right + left - first].insn_count;
3938
3939 if (cost < best_cost)
3940 {
3941 best = 1;
3942 best_cost = cost;
3943 best_right = right;
3944 best_len = cost;
3945 if (attrp)
3946 attrp[2] = first;
3947 }
3948 }
3949 }
3950 }
3951 /* Try to use r0 AND pattern */
3952 for (i = 0; i <= 2; i++)
3953 {
3954 if (i > right)
3955 break;
3956 if (! CONST_OK_FOR_K08 (mask >> i))
3957 continue;
3958 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3959 if (cost < best_cost)
3960 {
3961 best = 2;
3962 best_cost = cost;
3963 best_right = i;
3964 best_len = cost - 1;
3965 }
3966 }
3967 /* Try to use a scratch register to hold the AND operand. */
3968 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3969 for (i = 0; i <= 2; i++)
3970 {
3971 if (i > right)
3972 break;
3973 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
3974 + (can_ext
3975 ? ext_ashl_lshr_seq
3976 : ashl_lshr_seq)[left + i].insn_count;
3977 if (cost < best_cost)
3978 {
3979 best = 4 - can_ext;
3980 best_cost = cost;
3981 best_right = i;
3982 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
3983 }
3984 }
3985
3986 if (attrp)
3987 {
3988 attrp[0] = best_right;
3989 attrp[1] = best_len;
3990 }
3991 return best;
3992 }
3993
3994 /* This is used in length attributes of the unnamed instructions
3995 corresponding to shl_and_kind return values of 1 and 2. */
3996 int
3997 shl_and_length (rtx insn)
3998 {
3999 rtx set_src, left_rtx, mask_rtx;
4000 int attributes[3];
4001
4002 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4003 left_rtx = XEXP (XEXP (set_src, 0), 1);
4004 mask_rtx = XEXP (set_src, 1);
4005 shl_and_kind (left_rtx, mask_rtx, attributes);
4006 return attributes[1];
4007 }
4008
4009 /* This is used in length attribute of the and_shl_scratch instruction. */
4010 int
4011 shl_and_scr_length (rtx insn)
4012 {
4013 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4014 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4015 rtx op = XEXP (set_src, 0);
4016 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4017 op = XEXP (XEXP (op, 0), 0);
4018 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4019 }
4020
4021 /* Generate rtl for instructions for which shl_and_kind advised a particular
4022 method of generating them, i.e. returned zero. */
4023 bool
4024 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4025 {
4026 int attributes[3];
4027 unsigned HOST_WIDE_INT mask;
4028 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4029 int right, total_shift;
4030 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4031
4032 right = attributes[0];
4033 total_shift = INTVAL (left_rtx) + right;
4034 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4035 switch (kind)
4036 {
4037 default:
4038 return true;
4039 case 1:
4040 {
4041 int first = attributes[2];
4042 rtx operands[3];
4043
4044 if (first < 0)
4045 {
4046 emit_insn ((mask << right) <= 0xff
4047 ? gen_zero_extendqisi2 (dest,
4048 gen_lowpart (QImode, source))
4049 : gen_zero_extendhisi2 (dest,
4050 gen_lowpart (HImode, source)));
4051 source = dest;
4052 }
4053 if (source != dest)
4054 emit_insn (gen_movsi (dest, source));
4055 operands[0] = dest;
4056 if (right)
4057 {
4058 operands[2] = GEN_INT (right);
4059 gen_shifty_hi_op (LSHIFTRT, operands);
4060 }
4061 if (first > 0)
4062 {
4063 operands[2] = GEN_INT (first);
4064 gen_shifty_hi_op (ASHIFT, operands);
4065 total_shift -= first;
4066 mask <<= first;
4067 }
4068 if (first >= 0)
4069 emit_insn (mask <= 0xff
4070 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4071 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4072 if (total_shift > 0)
4073 {
4074 operands[2] = GEN_INT (total_shift);
4075 gen_shifty_hi_op (ASHIFT, operands);
4076 }
4077 break;
4078 }
4079 case 4:
4080 shift_gen_fun = gen_shifty_op;
4081 /* FALLTHRU */
4082 case 3:
4083 /* If the topmost bit that matters is set, set the topmost bits
4084 that don't matter. This way, we might be able to get a shorter
4085 signed constant. */
4086 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4087 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4088 /* FALLTHRU */
4089 case 2:
4090 /* Don't expand fine-grained when combining, because that will
4091 make the pattern fail. */
4092 if (currently_expanding_to_rtl
4093 || reload_in_progress || reload_completed)
4094 {
4095 rtx operands[3];
4096
4097 /* Cases 3 and 4 should be handled by this split
4098 only while combining */
4099 gcc_assert (kind <= 2);
4100 if (right)
4101 {
4102 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4103 source = dest;
4104 }
4105 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4106 if (total_shift)
4107 {
4108 operands[0] = dest;
4109 operands[1] = dest;
4110 operands[2] = GEN_INT (total_shift);
4111 shift_gen_fun (ASHIFT, operands);
4112 }
4113 break;
4114 }
4115 else
4116 {
4117 int neg = 0;
4118 if (kind != 4 && total_shift < 16)
4119 {
4120 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4121 if (neg > 0)
4122 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4123 else
4124 neg = 0;
4125 }
4126 emit_insn (gen_and_shl_scratch (dest, source,
4127 GEN_INT (right),
4128 GEN_INT (mask),
4129 GEN_INT (total_shift + neg),
4130 GEN_INT (neg)));
4131 emit_insn (gen_movsi (dest, dest));
4132 break;
4133 }
4134 }
4135 return false;
4136 }
4137
4138 /* Try to find a good way to implement the combiner pattern
4139 [(set (match_operand:SI 0 "register_operand" "=r")
4140 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4141 (match_operand:SI 2 "const_int_operand" "n")
4142 (match_operand:SI 3 "const_int_operand" "n")
4143 (const_int 0)))
4144 (clobber (reg:SI T_REG))]
4145 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4146 return 0 for simple left / right shift combination.
4147 return 1 for left shift / 8 bit sign extend / left shift.
4148 return 2 for left shift / 16 bit sign extend / left shift.
4149 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4150 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4151 return 5 for left shift / 16 bit sign extend / right shift
4152 return 6 for < 8 bit sign extend / left shift.
4153 return 7 for < 8 bit sign extend / left shift / single right shift.
4154 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4155 int
4156 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4157 {
4158 int left, size, insize, ext;
4159 int cost = 0, best_cost;
4160 int kind;
4161
4162 left = INTVAL (left_rtx);
4163 size = INTVAL (size_rtx);
4164 insize = size - left;
4165 gcc_assert (insize > 0);
4166 /* Default to left / right shift. */
4167 kind = 0;
4168 best_cost = ashl_lshr_seq[32 - insize].insn_count
4169 + ashl_lshr_seq[32 - size].insn_count;
4170 if (size <= 16)
4171 {
4172 /* 16 bit shift / sign extend / 16 bit shift */
4173 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4174 + ashl_lshr_seq[16 - size].insn_count;
4175 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4176 below, by alternative 3 or something even better. */
4177 if (cost < best_cost)
4178 {
4179 kind = 5;
4180 best_cost = cost;
4181 }
4182 }
4183 /* Try a plain sign extend between two shifts. */
4184 for (ext = 16; ext >= insize; ext -= 8)
4185 {
4186 if (ext <= size)
4187 {
4188 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4189 + ashl_lshr_seq[size - ext].insn_count;
4190 if (cost < best_cost)
4191 {
4192 kind = ext / (unsigned) 8;
4193 best_cost = cost;
4194 }
4195 }
4196 /* Check if we can do a sloppy shift with a final signed shift
4197 restoring the sign. */
4198 if (EXT_SHIFT_SIGNED (size - ext))
4199 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4200 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4201 /* If not, maybe it's still cheaper to do the second shift sloppy,
4202 and do a final sign extend? */
4203 else if (size <= 16)
4204 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4205 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4206 + 1;
4207 else
4208 continue;
4209 if (cost < best_cost)
4210 {
4211 kind = ext / (unsigned) 8 + 2;
4212 best_cost = cost;
4213 }
4214 }
4215 /* Check if we can sign extend in r0 */
4216 if (insize < 8)
4217 {
4218 cost = 3 + ashl_lshr_seq[left].insn_count;
4219 if (cost < best_cost)
4220 {
4221 kind = 6;
4222 best_cost = cost;
4223 }
4224 /* Try the same with a final signed shift. */
4225 if (left < 31)
4226 {
4227 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4228 if (cost < best_cost)
4229 {
4230 kind = 7;
4231 best_cost = cost;
4232 }
4233 }
4234 }
4235 if (TARGET_DYNSHIFT)
4236 {
4237 /* Try to use a dynamic shift. */
4238 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4239 if (cost < best_cost)
4240 {
4241 kind = 0;
4242 best_cost = cost;
4243 }
4244 }
4245 if (costp)
4246 *costp = cost;
4247 return kind;
4248 }
4249
4250 /* Function to be used in the length attribute of the instructions
4251 implementing this pattern. */
4252 int
4253 shl_sext_length (rtx insn)
4254 {
4255 rtx set_src, left_rtx, size_rtx;
4256 int cost;
4257
4258 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4259 left_rtx = XEXP (XEXP (set_src, 0), 1);
4260 size_rtx = XEXP (set_src, 1);
4261 shl_sext_kind (left_rtx, size_rtx, &cost);
4262 return cost;
4263 }
4264
4265 /* Generate rtl for this pattern */
4266 bool
4267 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4268 {
4269 int kind;
4270 int left, size, insize, cost;
4271 rtx operands[3];
4272
4273 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4274 left = INTVAL (left_rtx);
4275 size = INTVAL (size_rtx);
4276 insize = size - left;
4277 switch (kind)
4278 {
4279 case 1:
4280 case 2:
4281 case 3:
4282 case 4:
4283 {
4284 int ext = kind & 1 ? 8 : 16;
4285 int shift2 = size - ext;
4286
4287 /* Don't expand fine-grained when combining, because that will
4288 make the pattern fail. */
4289 if (! currently_expanding_to_rtl
4290 && ! reload_in_progress && ! reload_completed)
4291 {
4292 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4293 emit_insn (gen_movsi (dest, source));
4294 break;
4295 }
4296 if (dest != source)
4297 emit_insn (gen_movsi (dest, source));
4298 operands[0] = dest;
4299 if (ext - insize)
4300 {
4301 operands[2] = GEN_INT (ext - insize);
4302 gen_shifty_hi_op (ASHIFT, operands);
4303 }
4304 emit_insn (kind & 1
4305 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4306 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4307 if (kind <= 2)
4308 {
4309 if (shift2)
4310 {
4311 operands[2] = GEN_INT (shift2);
4312 gen_shifty_op (ASHIFT, operands);
4313 }
4314 }
4315 else
4316 {
4317 if (shift2 > 0)
4318 {
4319 if (EXT_SHIFT_SIGNED (shift2))
4320 {
4321 operands[2] = GEN_INT (shift2 + 1);
4322 gen_shifty_op (ASHIFT, operands);
4323 operands[2] = const1_rtx;
4324 gen_shifty_op (ASHIFTRT, operands);
4325 break;
4326 }
4327 operands[2] = GEN_INT (shift2);
4328 gen_shifty_hi_op (ASHIFT, operands);
4329 }
4330 else if (shift2)
4331 {
4332 operands[2] = GEN_INT (-shift2);
4333 gen_shifty_hi_op (LSHIFTRT, operands);
4334 }
4335 emit_insn (size <= 8
4336 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4337 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4338 }
4339 break;
4340 }
4341 case 5:
4342 {
4343 int i = 16 - size;
4344 if (! currently_expanding_to_rtl
4345 && ! reload_in_progress && ! reload_completed)
4346 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4347 else
4348 {
4349 operands[0] = dest;
4350 operands[2] = GEN_INT (16 - insize);
4351 gen_shifty_hi_op (ASHIFT, operands);
4352 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4353 }
4354 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4355 while (--i >= 0)
4356 gen_ashift (ASHIFTRT, 1, dest);
4357 break;
4358 }
4359 case 6:
4360 case 7:
4361 /* Don't expand fine-grained when combining, because that will
4362 make the pattern fail. */
4363 if (! currently_expanding_to_rtl
4364 && ! reload_in_progress && ! reload_completed)
4365 {
4366 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4367 emit_insn (gen_movsi (dest, source));
4368 break;
4369 }
4370 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4371 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4372 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4373 operands[0] = dest;
4374 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4375 gen_shifty_op (ASHIFT, operands);
4376 if (kind == 7)
4377 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4378 break;
4379 default:
4380 return true;
4381 }
4382 return false;
4383 }
4384
4385 typedef struct label_ref_list_d
4386 {
4387 rtx_code_label *label;
4388 struct label_ref_list_d *next;
4389 } *label_ref_list_t;
4390
4391 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4392 ("label references list");
4393
4394 /* The SH cannot load a large constant into a register, constants have to
4395 come from a pc relative load. The reference of a pc relative load
4396 instruction must be less than 1k in front of the instruction. This
4397 means that we often have to dump a constant inside a function, and
4398 generate code to branch around it.
4399
4400 It is important to minimize this, since the branches will slow things
4401 down and make things bigger.
4402
4403 Worst case code looks like:
4404
4405 mov.l L1,rn
4406 bra L2
4407 nop
4408 align
4409 L1: .long value
4410 L2:
4411 ..
4412
4413 mov.l L3,rn
4414 bra L4
4415 nop
4416 align
4417 L3: .long value
4418 L4:
4419 ..
4420
4421 We fix this by performing a scan before scheduling, which notices which
4422 instructions need to have their operands fetched from the constant table
4423 and builds the table.
4424
4425 The algorithm is:
4426
4427 scan, find an instruction which needs a pcrel move. Look forward, find the
4428 last barrier which is within MAX_COUNT bytes of the requirement.
4429 If there isn't one, make one. Process all the instructions between
4430 the find and the barrier.
4431
4432 In the above example, we can tell that L3 is within 1k of L1, so
4433 the first move can be shrunk from the 3 insn+constant sequence into
4434 just 1 insn, and the constant moved to L3 to make:
4435
4436 mov.l L1,rn
4437 ..
4438 mov.l L3,rn
4439 bra L4
4440 nop
4441 align
4442 L3:.long value
4443 L4:.long value
4444
4445 Then the second move becomes the target for the shortening process. */
4446
4447 typedef struct
4448 {
4449 rtx value; /* Value in table. */
4450 rtx_code_label *label; /* Label of value. */
4451 label_ref_list_t wend; /* End of window. */
4452 machine_mode mode; /* Mode of value. */
4453
4454 /* True if this constant is accessed as part of a post-increment
4455 sequence. Note that HImode constants are never accessed in this way. */
4456 bool part_of_sequence_p;
4457 } pool_node;
4458
4459 /* The maximum number of constants that can fit into one pool, since
4460 constants in the range 0..510 are at least 2 bytes long, and in the
4461 range from there to 1018 at least 4 bytes. */
4462
4463 #define MAX_POOL_SIZE 372
4464 static pool_node pool_vector[MAX_POOL_SIZE];
4465 static int pool_size;
4466 static rtx_code_label *pool_window_label;
4467 static int pool_window_last;
4468
4469 static int max_labelno_before_reorg;
4470
4471 /* ??? If we need a constant in HImode which is the truncated value of a
4472 constant we need in SImode, we could combine the two entries thus saving
4473 two bytes. Is this common enough to be worth the effort of implementing
4474 it? */
4475
4476 /* ??? This stuff should be done at the same time that we shorten branches.
4477 As it is now, we must assume that all branches are the maximum size, and
4478 this causes us to almost always output constant pools sooner than
4479 necessary. */
4480
4481 /* Add a constant to the pool and return its label. */
4482 static rtx_code_label *
4483 add_constant (rtx x, machine_mode mode, rtx last_value)
4484 {
4485 rtx_code_label *lab, *new_rtx;
4486 label_ref_list_t ref, newref;
4487
4488 /* First see if we've already got it. */
4489 for (int i = 0; i < pool_size; i++)
4490 {
4491 if (x->code == pool_vector[i].value->code
4492 && mode == pool_vector[i].mode)
4493 {
4494 if (x->code == CODE_LABEL)
4495 {
4496 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4497 continue;
4498 }
4499 if (rtx_equal_p (x, pool_vector[i].value))
4500 {
4501 lab = new_rtx = 0;
4502 if (! last_value
4503 || ! i
4504 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4505 {
4506 new_rtx = gen_label_rtx ();
4507 LABEL_REFS (new_rtx) = pool_vector[i].label;
4508 pool_vector[i].label = lab = new_rtx;
4509 }
4510 if (lab && pool_window_label)
4511 {
4512 newref = label_ref_list_d_pool.allocate ();
4513 newref->label = pool_window_label;
4514 ref = pool_vector[pool_window_last].wend;
4515 newref->next = ref;
4516 pool_vector[pool_window_last].wend = newref;
4517 }
4518 if (new_rtx)
4519 pool_window_label = new_rtx;
4520 pool_window_last = i;
4521 return lab;
4522 }
4523 }
4524 }
4525
4526 /* Need a new one. */
4527 pool_vector[pool_size].value = x;
4528 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4529 {
4530 lab = 0;
4531 pool_vector[pool_size - 1].part_of_sequence_p = true;
4532 }
4533 else
4534 lab = gen_label_rtx ();
4535 pool_vector[pool_size].mode = mode;
4536 pool_vector[pool_size].label = lab;
4537 pool_vector[pool_size].wend = NULL;
4538 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4539 if (lab && pool_window_label)
4540 {
4541 newref = label_ref_list_d_pool.allocate ();
4542 newref->label = pool_window_label;
4543 ref = pool_vector[pool_window_last].wend;
4544 newref->next = ref;
4545 pool_vector[pool_window_last].wend = newref;
4546 }
4547 if (lab)
4548 pool_window_label = lab;
4549 pool_window_last = pool_size;
4550 pool_size++;
4551 return lab;
4552 }
4553
4554 /* Output the literal table. START, if nonzero, is the first instruction
4555 this table is needed for, and also indicates that there is at least one
4556 casesi_worker_2 instruction; We have to emit the operand3 labels from
4557 these insns at a 4-byte aligned position. BARRIER is the barrier
4558 after which we are to place the table. */
4559 static void
4560 dump_table (rtx_insn *start, rtx_insn *barrier)
4561 {
4562 rtx_insn *scan = barrier;
4563 bool need_align = true;
4564 rtx lab;
4565 label_ref_list_t ref;
4566 bool have_df = false;
4567
4568 /* Do two passes, first time dump out the HI sized constants. */
4569
4570 for (int i = 0; i < pool_size; i++)
4571 {
4572 pool_node *p = &pool_vector[i];
4573
4574 if (p->mode == HImode)
4575 {
4576 if (need_align)
4577 {
4578 scan = emit_insn_after (gen_align_2 (), scan);
4579 need_align = false;
4580 }
4581 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4582 scan = emit_label_after (lab, scan);
4583 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4584 scan);
4585 for (ref = p->wend; ref; ref = ref->next)
4586 {
4587 lab = ref->label;
4588 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4589 }
4590 }
4591 else if (p->mode == DFmode)
4592 have_df = true;
4593 }
4594
4595 need_align = true;
4596
4597 if (start)
4598 {
4599 scan = emit_insn_after (gen_align_4 (), scan);
4600 need_align = false;
4601 for (; start != barrier; start = NEXT_INSN (start))
4602 if (NONJUMP_INSN_P (start)
4603 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4604 {
4605 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4606 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4607
4608 scan = emit_label_after (lab, scan);
4609 }
4610 }
4611 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4612 {
4613 rtx_insn *align_insn = NULL;
4614
4615 scan = emit_label_after (gen_label_rtx (), scan);
4616 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4617 need_align = false;
4618
4619 for (int i = 0; i < pool_size; i++)
4620 {
4621 pool_node *p = &pool_vector[i];
4622
4623 switch (p->mode)
4624 {
4625 case HImode:
4626 break;
4627 case SImode:
4628 case SFmode:
4629 if (align_insn && !p->part_of_sequence_p)
4630 {
4631 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4632 emit_label_before (lab, align_insn);
4633 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4634 align_insn);
4635 for (ref = p->wend; ref; ref = ref->next)
4636 {
4637 lab = ref->label;
4638 emit_insn_before (gen_consttable_window_end (lab),
4639 align_insn);
4640 }
4641 delete_insn (align_insn);
4642 align_insn = NULL;
4643 continue;
4644 }
4645 else
4646 {
4647 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4648 scan = emit_label_after (lab, scan);
4649 scan = emit_insn_after (gen_consttable_4 (p->value,
4650 const0_rtx), scan);
4651 need_align = ! need_align;
4652 }
4653 break;
4654 case DFmode:
4655 if (need_align)
4656 {
4657 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4658 align_insn = scan;
4659 need_align = false;
4660 }
4661 /* FALLTHRU */
4662 case DImode:
4663 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4664 scan = emit_label_after (lab, scan);
4665 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4666 scan);
4667 break;
4668 default:
4669 gcc_unreachable ();
4670 }
4671
4672 if (p->mode != HImode)
4673 {
4674 for (ref = p->wend; ref; ref = ref->next)
4675 {
4676 lab = ref->label;
4677 scan = emit_insn_after (gen_consttable_window_end (lab),
4678 scan);
4679 }
4680 }
4681 }
4682
4683 pool_size = 0;
4684 }
4685
4686 for (int i = 0; i < pool_size; i++)
4687 {
4688 pool_node *p = &pool_vector[i];
4689
4690 switch (p->mode)
4691 {
4692 case HImode:
4693 break;
4694 case SImode:
4695 case SFmode:
4696 if (need_align)
4697 {
4698 need_align = false;
4699 scan = emit_label_after (gen_label_rtx (), scan);
4700 scan = emit_insn_after (gen_align_4 (), scan);
4701 }
4702 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4703 scan = emit_label_after (lab, scan);
4704 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4705 scan);
4706 break;
4707 case DFmode:
4708 case DImode:
4709 if (need_align)
4710 {
4711 need_align = false;
4712 scan = emit_label_after (gen_label_rtx (), scan);
4713 scan = emit_insn_after (gen_align_4 (), scan);
4714 }
4715 for (lab = p->label; lab; lab = LABEL_REFS (lab))
4716 scan = emit_label_after (lab, scan);
4717 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4718 scan);
4719 break;
4720 default:
4721 gcc_unreachable ();
4722 }
4723
4724 if (p->mode != HImode)
4725 {
4726 for (ref = p->wend; ref; ref = ref->next)
4727 {
4728 lab = ref->label;
4729 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4730 }
4731 }
4732 }
4733
4734 scan = emit_insn_after (gen_consttable_end (), scan);
4735 scan = emit_barrier_after (scan);
4736 pool_size = 0;
4737 pool_window_label = NULL;
4738 pool_window_last = 0;
4739 }
4740
4741 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4742
4743 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4744
4745 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4746 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4747 need to fix it if the input value is CONST_OK_FOR_I08. */
4748 static bool
4749 broken_move (rtx_insn *insn)
4750 {
4751 if (NONJUMP_INSN_P (insn))
4752 {
4753 rtx pat = PATTERN (insn);
4754 if (GET_CODE (pat) == PARALLEL)
4755 pat = XVECEXP (pat, 0, 0);
4756 if (GET_CODE (pat) == SET
4757 /* We can load any 8-bit value if we don't care what the high
4758 order bits end up as. */
4759 && GET_MODE (SET_DEST (pat)) != QImode
4760 && (CONSTANT_P (SET_SRC (pat))
4761 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4762 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4763 /* Match mova_const. */
4764 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4765 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4766 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4767 && ! (TARGET_SH2E
4768 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4769 && (fp_zero_operand (SET_SRC (pat))
4770 || fp_one_operand (SET_SRC (pat)))
4771 /* In general we don't know the current setting of fpscr, so
4772 disable fldi.
4773 There is an exception if this was a register-register move
4774 before reload - and hence it was ascertained that we have
4775 single precision setting - and in a post-reload optimization
4776 we changed this to do a constant load. In that case
4777 we don't have an r0 clobber, hence we must use fldi. */
4778 && (TARGET_FMOVD
4779 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4780 == SCRATCH))
4781 && REG_P (SET_DEST (pat))
4782 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4783 && ! (TARGET_SH2A
4784 && GET_MODE (SET_DEST (pat)) == SImode
4785 && (satisfies_constraint_I20 (SET_SRC (pat))
4786 || satisfies_constraint_I28 (SET_SRC (pat))))
4787 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4788 return true;
4789 }
4790
4791 return false;
4792 }
4793
4794 /* Return true if the specified insn is a mova insn. */
4795 static bool
4796 mova_p (rtx_insn *insn)
4797 {
4798 return (NONJUMP_INSN_P (insn)
4799 && GET_CODE (PATTERN (insn)) == SET
4800 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4801 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4802 /* Don't match mova_const. */
4803 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4804 }
4805
4806 /* Fix up a mova from a switch that went out of range. */
4807 static void
4808 fixup_mova (rtx_insn *mova)
4809 {
4810 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4811 if (! flag_pic)
4812 {
4813 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4814 INSN_CODE (mova) = -1;
4815 }
4816 else
4817 {
4818 rtx_insn *worker = mova;
4819 rtx_code_label *lab = gen_label_rtx ();
4820 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4821
4822 do
4823 {
4824 worker = NEXT_INSN (worker);
4825 gcc_assert (worker
4826 && !LABEL_P (worker)
4827 && !JUMP_P (worker));
4828 } while (NOTE_P (worker)
4829 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4830 wpat = PATTERN (worker);
4831 wpat0 = XVECEXP (wpat, 0, 0);
4832 wpat1 = XVECEXP (wpat, 0, 1);
4833 wsrc = SET_SRC (wpat0);
4834 PATTERN (worker) = (gen_casesi_worker_2
4835 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4836 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4837 XEXP (wpat1, 0)));
4838 INSN_CODE (worker) = -1;
4839 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4840 base = gen_rtx_LABEL_REF (Pmode, lab);
4841 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4842 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4843 INSN_CODE (mova) = -1;
4844 }
4845 }
4846
4847 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4848 *num_mova, and check if the new mova is not nested within the first one.
4849 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4850 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4851 static int
4852 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4853 {
4854 int n_addr = 0; /* Initialization to shut up spurious warning. */
4855 int f_target, n_target = 0; /* Likewise. */
4856
4857 if (optimize)
4858 {
4859 /* If NEW_MOVA has no address yet, it will be handled later. */
4860 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4861 return -1;
4862
4863 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4864 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4865 if (n_addr > n_target || n_addr + 1022 < n_target)
4866 {
4867 /* Change the mova into a load.
4868 broken_move will then return true for it. */
4869 fixup_mova (new_mova);
4870 return 1;
4871 }
4872 }
4873 if (!(*num_mova)++)
4874 {
4875 *first_mova = new_mova;
4876 return 2;
4877 }
4878 if (!optimize
4879 || ((f_target
4880 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4881 >= n_target))
4882 return -1;
4883
4884 (*num_mova)--;
4885 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4886 > n_target - n_addr)
4887 {
4888 fixup_mova (*first_mova);
4889 return 0;
4890 }
4891 else
4892 {
4893 fixup_mova (new_mova);
4894 return 1;
4895 }
4896 }
4897
4898 /* Find the last barrier from insn FROM which is close enough to hold the
4899 constant pool. If we can't find one, then create one near the end of
4900 the range. */
4901 static rtx_insn *
4902 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4903 {
4904 int count_si = 0;
4905 int count_hi = 0;
4906 int found_hi = 0;
4907 int found_si = 0;
4908 int hi_align = 2;
4909 int si_align = 2;
4910 int leading_mova = num_mova;
4911 rtx_insn *barrier_before_mova = NULL;
4912 rtx_insn *found_barrier = NULL;
4913 rtx_insn *good_barrier = NULL;
4914 int si_limit;
4915 int hi_limit;
4916 rtx_insn *orig = from;
4917 rtx_insn *last_got = NULL;
4918 rtx_insn *last_symoff = NULL;
4919
4920 /* For HImode: range is 510, add 4 because pc counts from address of
4921 second instruction after this one, subtract 2 for the jump instruction
4922 that we may need to emit before the table, subtract 2 for the instruction
4923 that fills the jump delay slot (in very rare cases, reorg will take an
4924 instruction from after the constant pool or will leave the delay slot
4925 empty). This gives 510.
4926 For SImode: range is 1020, add 4 because pc counts from address of
4927 second instruction after this one, subtract 2 in case pc is 2 byte
4928 aligned, subtract 2 for the jump instruction that we may need to emit
4929 before the table, subtract 2 for the instruction that fills the jump
4930 delay slot. This gives 1018. */
4931
4932 /* The branch will always be shortened now that the reference address for
4933 forward branches is the successor address, thus we need no longer make
4934 adjustments to the [sh]i_limit for -O0. */
4935
4936 si_limit = 1018;
4937 hi_limit = 510;
4938
4939 while (from && count_si < si_limit && count_hi < hi_limit)
4940 {
4941 int inc = get_attr_length (from);
4942 int new_align = 1;
4943
4944 /* If this is a label that existed at the time of the compute_alignments
4945 call, determine the alignment. N.B. When find_barrier recurses for
4946 an out-of-reach mova, we might see labels at the start of previously
4947 inserted constant tables. */
4948 if (LABEL_P (from)
4949 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4950 {
4951 if (optimize)
4952 new_align = 1 << label_to_alignment (from);
4953 else if (BARRIER_P (prev_nonnote_insn (from)))
4954 new_align = 1 << barrier_align (from);
4955 else
4956 new_align = 1;
4957 inc = 0;
4958 }
4959 /* In case we are scanning a constant table because of recursion, check
4960 for explicit alignments. If the table is long, we might be forced
4961 to emit the new table in front of it; the length of the alignment
4962 might be the last straw. */
4963 else if (NONJUMP_INSN_P (from)
4964 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4965 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
4966 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
4967 /* When we find the end of a constant table, paste the new constant
4968 at the end. That is better than putting it in front because
4969 this way, we don't need extra alignment for adding a 4-byte-aligned
4970 mov(a) label to a 2/4 or 8/4 byte aligned table. */
4971 else if (NONJUMP_INSN_P (from)
4972 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
4973 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
4974 return from;
4975
4976 if (BARRIER_P (from))
4977 {
4978 rtx_insn *next;
4979
4980 found_barrier = from;
4981
4982 /* If we are at the end of the function, or in front of an alignment
4983 instruction, we need not insert an extra alignment. We prefer
4984 this kind of barrier. */
4985 if (barrier_align (from) > 2)
4986 good_barrier = from;
4987
4988 /* If we are at the end of a hot/cold block, dump the constants
4989 here. */
4990 next = NEXT_INSN (from);
4991 if (next
4992 && NOTE_P (next)
4993 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
4994 break;
4995 }
4996
4997 if (broken_move (from))
4998 {
4999 rtx pat, src, dst;
5000 machine_mode mode;
5001
5002 pat = PATTERN (from);
5003 if (GET_CODE (pat) == PARALLEL)
5004 pat = XVECEXP (pat, 0, 0);
5005 src = SET_SRC (pat);
5006 dst = SET_DEST (pat);
5007 mode = GET_MODE (dst);
5008
5009 /* GOT pcrelat setting comes in pair of
5010 mova .L8,r0
5011 mov.l .L8,r12
5012 instructions. (plus add r0,r12).
5013 Remember if we see one without the other. */
5014 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5015 last_got = last_got ? NULL : from;
5016 else if (PIC_ADDR_P (src))
5017 last_got = last_got ? NULL : from;
5018
5019 /* We must explicitly check the mode, because sometimes the
5020 front end will generate code to load unsigned constants into
5021 HImode targets without properly sign extending them. */
5022 if (mode == HImode
5023 || (mode == SImode && satisfies_constraint_I16 (src)
5024 && REGNO (dst) != FPUL_REG))
5025 {
5026 found_hi += 2;
5027 /* We put the short constants before the long constants, so
5028 we must count the length of short constants in the range
5029 for the long constants. */
5030 /* ??? This isn't optimal, but is easy to do. */
5031 si_limit -= 2;
5032 }
5033 else
5034 {
5035 /* We dump DF/DI constants before SF/SI ones, because
5036 the limit is the same, but the alignment requirements
5037 are higher. We may waste up to 4 additional bytes
5038 for alignment, and the DF/DI constant may have
5039 another SF/SI constant placed before it. */
5040 while (si_align > 2 && found_si + si_align - 2 > count_si)
5041 si_align >>= 1;
5042 if (found_si > count_si)
5043 count_si = found_si;
5044 found_si += GET_MODE_SIZE (mode);
5045 if (num_mova)
5046 si_limit -= GET_MODE_SIZE (mode);
5047 }
5048 }
5049
5050 if (mova_p (from))
5051 {
5052 switch (untangle_mova (&num_mova, &mova, from))
5053 {
5054 case 1:
5055 if (flag_pic)
5056 {
5057 rtx src = SET_SRC (PATTERN (from));
5058 if (GET_CODE (src) == CONST
5059 && GET_CODE (XEXP (src, 0)) == UNSPEC
5060 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5061 last_symoff = from;
5062 }
5063 break;
5064 case 0: return find_barrier (0, 0, mova);
5065 case 2:
5066 {
5067 leading_mova = 0;
5068 barrier_before_mova
5069 = good_barrier ? good_barrier : found_barrier;
5070 }
5071 default: break;
5072 }
5073 if (found_si > count_si)
5074 count_si = found_si;
5075 }
5076 else if (JUMP_TABLE_DATA_P (from)
5077 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5078 {
5079 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5080 || (num_mova
5081 && (prev_nonnote_insn (from)
5082 == XEXP (MOVA_LABELREF (mova), 0))))
5083 num_mova--;
5084 if (barrier_align (next_real_insn (from)) == align_jumps_log)
5085 {
5086 /* We have just passed the barrier in front of the
5087 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5088 the ADDR_DIFF_VEC is accessed as data, just like our pool
5089 constants, this is a good opportunity to accommodate what
5090 we have gathered so far.
5091 If we waited any longer, we could end up at a barrier in
5092 front of code, which gives worse cache usage for separated
5093 instruction / data caches. */
5094 good_barrier = found_barrier;
5095 break;
5096 }
5097 else
5098 {
5099 rtx body = PATTERN (from);
5100 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5101 }
5102 }
5103 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5104 else if (JUMP_P (from)
5105 && ! TARGET_SH2
5106 && ! optimize_size)
5107 new_align = 4;
5108
5109 /* There is a possibility that a bf is transformed into a bf/s by the
5110 delay slot scheduler. */
5111 if (JUMP_P (from)
5112 && get_attr_type (from) == TYPE_CBRANCH
5113 && ! sequence_insn_p (from))
5114 inc += 2;
5115
5116 if (found_si)
5117 {
5118 count_si += inc;
5119 if (new_align > si_align)
5120 {
5121 si_limit -= (count_si - 1) & (new_align - si_align);
5122 si_align = new_align;
5123 }
5124 count_si = (count_si + new_align - 1) & -new_align;
5125 }
5126 if (found_hi)
5127 {
5128 count_hi += inc;
5129 if (new_align > hi_align)
5130 {
5131 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5132 hi_align = new_align;
5133 }
5134 count_hi = (count_hi + new_align - 1) & -new_align;
5135 }
5136 from = NEXT_INSN (from);
5137 }
5138
5139 if (num_mova)
5140 {
5141 if (leading_mova)
5142 {
5143 /* Try as we might, the leading mova is out of range. Change
5144 it into a load (which will become a pcload) and retry. */
5145 fixup_mova (mova);
5146 return find_barrier (0, 0, mova);
5147 }
5148 else
5149 {
5150 /* Insert the constant pool table before the mova instruction,
5151 to prevent the mova label reference from going out of range. */
5152 from = mova;
5153 good_barrier = found_barrier = barrier_before_mova;
5154 }
5155 }
5156
5157 if (found_barrier)
5158 {
5159 if (good_barrier && next_real_insn (found_barrier))
5160 found_barrier = good_barrier;
5161 }
5162 else
5163 {
5164 /* We didn't find a barrier in time to dump our stuff,
5165 so we'll make one. */
5166 rtx_code_label *label = gen_label_rtx ();
5167
5168 /* Don't emit a constant table in the middle of insns for
5169 casesi_worker_2. This is a bit overkill but is enough
5170 because casesi_worker_2 wouldn't appear so frequently. */
5171 if (last_symoff)
5172 from = last_symoff;
5173
5174 /* If we exceeded the range, then we must back up over the last
5175 instruction we looked at. Otherwise, we just need to undo the
5176 NEXT_INSN at the end of the loop. */
5177 if (PREV_INSN (from) != orig
5178 && (count_hi > hi_limit || count_si > si_limit))
5179 from = PREV_INSN (PREV_INSN (from));
5180 else
5181 from = PREV_INSN (from);
5182
5183 /* Don't emit a constant table int the middle of global pointer setting,
5184 since that that would move the addressing base GOT into another table.
5185 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5186 in the pool anyway, so just move up the whole constant pool.
5187
5188 However, avoid doing so when the last single GOT mov is the starting
5189 insn itself. Going past above the start insn would create a negative
5190 offset, causing errors. */
5191 if (last_got && last_got != orig)
5192 from = PREV_INSN (last_got);
5193
5194 /* Don't insert the constant pool table at the position which
5195 may be the landing pad. */
5196 if (flag_exceptions
5197 && CALL_P (from)
5198 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5199 from = PREV_INSN (from);
5200
5201 /* Walk back to be just before any jump or label.
5202 Putting it before a label reduces the number of times the branch
5203 around the constant pool table will be hit. Putting it before
5204 a jump makes it more likely that the bra delay slot will be
5205 filled. */
5206 while (NOTE_P (from) || JUMP_P (from)
5207 || LABEL_P (from))
5208 from = PREV_INSN (from);
5209
5210 /* Make sure we do not split between a call and its corresponding
5211 CALL_ARG_LOCATION note. */
5212 if (CALL_P (from))
5213 {
5214 rtx_insn *next = NEXT_INSN (from);
5215 if (next && NOTE_P (next)
5216 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
5217 from = next;
5218 }
5219
5220 from = emit_jump_insn_after (gen_jump (label), from);
5221 JUMP_LABEL (from) = label;
5222 LABEL_NUSES (label) = 1;
5223 found_barrier = emit_barrier_after (from);
5224 emit_label_after (label, found_barrier);
5225 }
5226
5227 return found_barrier;
5228 }
5229
5230 /* If the instruction INSN is implemented by a special function, and we can
5231 positively find the register that is used to call the sfunc, and this
5232 register is not used anywhere else in this instruction - except as the
5233 destination of a set, return this register; else, return 0. */
5234 rtx
5235 sfunc_uses_reg (rtx_insn *insn)
5236 {
5237 int i;
5238 rtx pattern, part, reg_part, reg;
5239
5240 if (!NONJUMP_INSN_P (insn))
5241 return NULL_RTX;
5242 pattern = PATTERN (insn);
5243 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5244 return NULL_RTX;
5245
5246 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5247 {
5248 part = XVECEXP (pattern, 0, i);
5249 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5250 reg_part = part;
5251 }
5252 if (! reg_part)
5253 return NULL_RTX;
5254 reg = XEXP (reg_part, 0);
5255 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5256 {
5257 part = XVECEXP (pattern, 0, i);
5258 if (part == reg_part || GET_CODE (part) == CLOBBER)
5259 continue;
5260 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5261 && REG_P (SET_DEST (part)))
5262 ? SET_SRC (part) : part)))
5263 return NULL_RTX;
5264 }
5265 return reg;
5266 }
5267
5268 /* See if the only way in which INSN uses REG is by calling it, or by
5269 setting it while calling it. Set *SET to a SET rtx if the register
5270 is set by INSN. */
5271 static bool
5272 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5273 {
5274 *set = NULL_RTX;
5275
5276 rtx reg2 = sfunc_uses_reg (insn);
5277 if (reg2 && REGNO (reg2) == REGNO (reg))
5278 {
5279 rtx pattern = single_set (insn);
5280 if (pattern
5281 && REG_P (SET_DEST (pattern))
5282 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5283 *set = pattern;
5284 return false;
5285 }
5286 if (!CALL_P (insn))
5287 {
5288 /* We don't use rtx_equal_p because we don't care if the mode is
5289 different. */
5290 rtx pattern = single_set (insn);
5291 if (pattern
5292 && REG_P (SET_DEST (pattern))
5293 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5294 {
5295 rtx par, part;
5296 int i;
5297
5298 *set = pattern;
5299 par = PATTERN (insn);
5300 if (GET_CODE (par) == PARALLEL)
5301 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5302 {
5303 part = XVECEXP (par, 0, i);
5304 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5305 return true;
5306 }
5307 return reg_mentioned_p (reg, SET_SRC (pattern));
5308 }
5309
5310 return true;
5311 }
5312
5313 rtx pattern = PATTERN (insn);
5314
5315 if (GET_CODE (pattern) == PARALLEL)
5316 {
5317 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5318 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5319 return true;
5320 pattern = XVECEXP (pattern, 0, 0);
5321 }
5322
5323 if (GET_CODE (pattern) == SET)
5324 {
5325 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5326 {
5327 /* We don't use rtx_equal_p, because we don't care if the
5328 mode is different. */
5329 if (!REG_P (SET_DEST (pattern))
5330 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5331 return true;
5332
5333 *set = pattern;
5334 }
5335
5336 pattern = SET_SRC (pattern);
5337 }
5338
5339 if (GET_CODE (pattern) != CALL
5340 || !MEM_P (XEXP (pattern, 0))
5341 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5342 return true;
5343
5344 return false;
5345 }
5346
5347 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5348 general registers. Bits 0..15 mean that the respective registers
5349 are used as inputs in the instruction. Bits 16..31 mean that the
5350 registers 0..15, respectively, are used as outputs, or are clobbered.
5351 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5352 int
5353 regs_used (rtx x, int is_dest)
5354 {
5355 enum rtx_code code;
5356 const char *fmt;
5357 int used = 0;
5358
5359 if (! x)
5360 return used;
5361 code = GET_CODE (x);
5362 switch (code)
5363 {
5364 case REG:
5365 if (REGNO (x) < 16)
5366 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5367 << (REGNO (x) + is_dest));
5368 return 0;
5369 case SUBREG:
5370 {
5371 rtx y = SUBREG_REG (x);
5372
5373 if (!REG_P (y))
5374 break;
5375 if (REGNO (y) < 16)
5376 return (((1 << HARD_REGNO_NREGS (0, GET_MODE (x))) - 1)
5377 << (REGNO (y) +
5378 subreg_regno_offset (REGNO (y),
5379 GET_MODE (y),
5380 SUBREG_BYTE (x),
5381 GET_MODE (x)) + is_dest));
5382 return 0;
5383 }
5384 case SET:
5385 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5386 case RETURN:
5387 /* If there was a return value, it must have been indicated with USE. */
5388 return 0x00ffff00;
5389 case CLOBBER:
5390 is_dest = 1;
5391 break;
5392 case MEM:
5393 is_dest = 0;
5394 break;
5395 case CALL:
5396 used |= 0x00ff00f0;
5397 break;
5398 default:
5399 break;
5400 }
5401
5402 fmt = GET_RTX_FORMAT (code);
5403
5404 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5405 {
5406 if (fmt[i] == 'E')
5407 {
5408 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5409 used |= regs_used (XVECEXP (x, i, j), is_dest);
5410 }
5411 else if (fmt[i] == 'e')
5412 used |= regs_used (XEXP (x, i), is_dest);
5413 }
5414 return used;
5415 }
5416
5417 /* Create an instruction that prevents redirection of a conditional branch
5418 to the destination of the JUMP with address ADDR.
5419 If the branch needs to be implemented as an indirect jump, try to find
5420 a scratch register for it.
5421 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5422 If any preceding insn that doesn't fit into a delay slot is good enough,
5423 pass 1. Pass 2 if a definite blocking insn is needed.
5424 -1 is used internally to avoid deep recursion.
5425 If a blocking instruction is made or recognized, return it. */
5426 static rtx_insn *
5427 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5428 {
5429 int dead = 0;
5430 rtx_insn *prev = prev_nonnote_insn (jump);
5431
5432 /* First, check if we already have an instruction that satisfies our need. */
5433 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5434 {
5435 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5436 return prev;
5437 if (GET_CODE (PATTERN (prev)) == USE
5438 || GET_CODE (PATTERN (prev)) == CLOBBER
5439 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5440 prev = jump;
5441 else if ((need_block &= ~1) < 0)
5442 return prev;
5443 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5444 need_block = 0;
5445 }
5446 if (GET_CODE (PATTERN (jump)) == RETURN)
5447 {
5448 if (! need_block)
5449 return prev;
5450 /* Reorg even does nasty things with return insns that cause branches
5451 to go out of range - see find_end_label and callers. */
5452 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5453 }
5454 /* We can't use JUMP_LABEL here because it might be undefined
5455 when not optimizing. */
5456 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5457 /* If the branch is out of range, try to find a scratch register for it. */
5458 if (optimize
5459 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5460 > 4092 + 4098))
5461 {
5462 rtx_insn *scan;
5463 /* Don't look for the stack pointer as a scratch register,
5464 it would cause trouble if an interrupt occurred. */
5465 unsigned attempt = 0x7fff, used;
5466 int jump_left = flag_expensive_optimizations + 1;
5467
5468 /* It is likely that the most recent eligible instruction is wanted for
5469 the delay slot. Therefore, find out which registers it uses, and
5470 try to avoid using them. */
5471
5472 for (scan = jump; (scan = PREV_INSN (scan)); )
5473 {
5474 if (scan->deleted ())
5475 continue;
5476 rtx_code code = GET_CODE (scan);
5477 if (code == CODE_LABEL || code == JUMP_INSN)
5478 break;
5479 if (code == INSN
5480 && GET_CODE (PATTERN (scan)) != USE
5481 && GET_CODE (PATTERN (scan)) != CLOBBER
5482 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5483 {
5484 attempt &= ~regs_used (PATTERN (scan), 0);
5485 break;
5486 }
5487 }
5488 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5489 (scan = NEXT_INSN (scan)); )
5490 {
5491 if (scan->deleted ())
5492 continue;
5493 rtx_code code = GET_CODE (scan);
5494 if (INSN_P (scan))
5495 {
5496 used |= regs_used (PATTERN (scan), 0);
5497 if (code == CALL_INSN)
5498 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5499 dead |= (used >> 16) & ~used;
5500 if (dead & attempt)
5501 {
5502 dead &= attempt;
5503 break;
5504 }
5505 if (code == JUMP_INSN)
5506 {
5507 if (jump_left-- && simplejump_p (scan))
5508 scan = JUMP_LABEL_AS_INSN (scan);
5509 else
5510 break;
5511 }
5512 }
5513 }
5514 /* Mask out the stack pointer again, in case it was
5515 the only 'free' register we have found. */
5516 dead &= 0x7fff;
5517 }
5518 /* If the immediate destination is still in range, check for possible
5519 threading with a jump beyond the delay slot insn.
5520 Don't check if we are called recursively; the jump has been or will be
5521 checked in a different invocation then. */
5522
5523 else if (optimize && need_block >= 0)
5524 {
5525 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5526 next = next_active_insn (next);
5527 if (next && JUMP_P (next)
5528 && GET_CODE (PATTERN (next)) == SET
5529 && recog_memoized (next) == CODE_FOR_jump_compact)
5530 {
5531 dest = JUMP_LABEL (next);
5532 if (dest
5533 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5534 > 4092 + 4098))
5535 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5536 }
5537 }
5538
5539 if (dead)
5540 {
5541 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5542
5543 /* It would be nice if we could convert the jump into an indirect
5544 jump / far branch right now, and thus exposing all constituent
5545 instructions to further optimization. However, reorg uses
5546 simplejump_p to determine if there is an unconditional jump where
5547 it should try to schedule instructions from the target of the
5548 branch; simplejump_p fails for indirect jumps even if they have
5549 a JUMP_LABEL. */
5550 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5551 (reg, GEN_INT (unspec_bbr_uid++)),
5552 jump);
5553 /* ??? We would like this to have the scope of the jump, but that
5554 scope will change when a delay slot insn of an inner scope is added.
5555 Hence, after delay slot scheduling, we'll have to expect
5556 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5557 the jump. */
5558
5559 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5560 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5561 return insn;
5562 }
5563 else if (need_block)
5564 /* We can't use JUMP_LABEL here because it might be undefined
5565 when not optimizing. */
5566 return emit_insn_before (gen_block_branch_redirect
5567 (GEN_INT (unspec_bbr_uid++)),
5568 jump);
5569 return prev;
5570 }
5571
5572 #define CONDJUMP_MIN -252
5573 #define CONDJUMP_MAX 262
5574 struct far_branch
5575 {
5576 /* A label (to be placed) in front of the jump
5577 that jumps to our ultimate destination. */
5578 rtx_insn *near_label;
5579 /* Where we are going to insert it if we cannot move the jump any farther,
5580 or the jump itself if we have picked up an existing jump. */
5581 rtx_insn *insert_place;
5582 /* The ultimate destination. */
5583 rtx_insn *far_label;
5584 struct far_branch *prev;
5585 /* If the branch has already been created, its address;
5586 else the address of its first prospective user. */
5587 int address;
5588 };
5589
5590 enum mdep_reorg_phase_e mdep_reorg_phase;
5591
5592 static void
5593 gen_far_branch (struct far_branch *bp)
5594 {
5595 rtx_insn *insn = bp->insert_place;
5596 rtx_jump_insn *jump;
5597 rtx_code_label *label = gen_label_rtx ();
5598
5599 emit_label_after (label, insn);
5600 if (bp->far_label)
5601 {
5602 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5603 LABEL_NUSES (bp->far_label)++;
5604 }
5605 else
5606 jump = emit_jump_insn_after (gen_return (), insn);
5607
5608 /* Emit a barrier so that reorg knows that any following instructions
5609 are not reachable via a fall-through path.
5610 But don't do this when not optimizing, since we wouldn't suppress the
5611 alignment for the barrier then, and could end up with out-of-range
5612 pc-relative loads. */
5613 if (optimize)
5614 emit_barrier_after (jump);
5615 emit_label_after (bp->near_label, insn);
5616
5617 if (bp->far_label)
5618 JUMP_LABEL (jump) = bp->far_label;
5619 else
5620 {
5621 rtx pat = PATTERN (jump);
5622 gcc_assert (ANY_RETURN_P (pat));
5623 JUMP_LABEL (jump) = pat;
5624 }
5625
5626 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5627 gcc_assert (ok);
5628
5629 /* If we are branching around a jump (rather than a return), prevent
5630 reorg from using an insn from the jump target as the delay slot insn -
5631 when reorg did this, it pessimized code (we rather hide the delay slot)
5632 and it could cause branches to go out of range. */
5633 if (bp->far_label)
5634 (emit_insn_after
5635 (gen_stuff_delay_slot
5636 (GEN_INT (unspec_bbr_uid++),
5637 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5638 insn));
5639 /* Prevent reorg from undoing our splits. */
5640 gen_block_redirect (jump, bp->address += 2, 2);
5641 }
5642
5643 /* Fix up ADDR_DIFF_VECs. */
5644 void
5645 fixup_addr_diff_vecs (rtx_insn *first)
5646 {
5647 rtx_insn *insn;
5648
5649 for (insn = first; insn; insn = NEXT_INSN (insn))
5650 {
5651 rtx vec_lab, pat, prevpat, x, braf_label;
5652 rtx_insn *prev;
5653
5654 if (! JUMP_TABLE_DATA_P (insn)
5655 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5656 continue;
5657 pat = PATTERN (insn);
5658 vec_lab = XEXP (XEXP (pat, 0), 0);
5659
5660 /* Search the matching casesi_jump_2. */
5661 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5662 {
5663 if (!JUMP_P (prev))
5664 continue;
5665 prevpat = PATTERN (prev);
5666 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5667 continue;
5668 x = XVECEXP (prevpat, 0, 1);
5669 if (GET_CODE (x) != USE)
5670 continue;
5671 x = XEXP (x, 0);
5672 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5673 break;
5674 }
5675 /* FIXME: This is a bug in the optimizer, but it seems harmless
5676 to just avoid panicing. */
5677 if (!prev)
5678 continue;
5679
5680 /* Emit the reference label of the braf where it belongs, right after
5681 the casesi_jump_2 (i.e. braf). */
5682 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5683 emit_label_after (braf_label, prev);
5684
5685 /* Fix up the ADDR_DIF_VEC to be relative
5686 to the reference address of the braf. */
5687 XEXP (XEXP (pat, 0), 0) = braf_label;
5688 }
5689 }
5690
5691 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5692 a barrier. Return the base 2 logarithm of the desired alignment. */
5693 int
5694 barrier_align (rtx_insn *barrier_or_label)
5695 {
5696 if (! barrier_or_label)
5697 return 0;
5698
5699 if (LABEL_P (barrier_or_label)
5700 && NEXT_INSN (barrier_or_label)
5701 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5702 return 2;
5703
5704 if (BARRIER_P (barrier_or_label)
5705 && PREV_INSN (barrier_or_label)
5706 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5707 {
5708 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5709 /* If this is a very small table, we want to keep the alignment after
5710 the table to the minimum for proper code alignment. */
5711 return ((optimize_size
5712 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5713 <= (unsigned) 1 << (CACHE_LOG - 2)))
5714 ? 1 : align_jumps_log);
5715 }
5716
5717 rtx_insn *next = next_active_insn (barrier_or_label);
5718
5719 if (! next)
5720 return 0;
5721
5722 rtx pat = PATTERN (next);
5723
5724 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5725 /* This is a barrier in front of a constant table. */
5726 return 0;
5727
5728 if (optimize_size)
5729 return 0;
5730
5731 if (! TARGET_SH2 || ! optimize)
5732 return align_jumps_log;
5733
5734 /* When fixing up pcloads, a constant table might be inserted just before
5735 the basic block that ends with the barrier. Thus, we can't trust the
5736 instruction lengths before that. */
5737 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5738 {
5739 /* Check if there is an immediately preceding branch to the insn beyond
5740 the barrier. We must weight the cost of discarding useful information
5741 from the current cache line when executing this branch and there is
5742 an alignment, against that of fetching unneeded insn in front of the
5743 branch target when there is no alignment. */
5744
5745 /* There are two delay_slot cases to consider. One is the simple case
5746 where the preceding branch is to the insn beyond the barrier (simple
5747 delay slot filling), and the other is where the preceding branch has
5748 a delay slot that is a duplicate of the insn after the barrier
5749 (fill_eager_delay_slots) and the branch is to the insn after the insn
5750 after the barrier. */
5751
5752 int slot, credit;
5753 bool jump_to_next = false;
5754
5755 /* Skip to the insn before the JUMP_INSN before the barrier under
5756 investigation. */
5757 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5758
5759 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5760 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5761 prev = prev_real_insn (prev))
5762 {
5763 jump_to_next = false;
5764 if (GET_CODE (PATTERN (prev)) == USE
5765 || GET_CODE (PATTERN (prev)) == CLOBBER)
5766 continue;
5767 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5768 {
5769 prev = prev_seq->insn (1);
5770 if (INSN_UID (prev) == INSN_UID (next))
5771 {
5772 /* Delay slot was filled with insn at jump target. */
5773 jump_to_next = true;
5774 continue;
5775 }
5776 }
5777
5778 if (slot &&
5779 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5780 slot = 0;
5781 credit -= get_attr_length (prev);
5782 }
5783 if (prev && jump_to_label_p (prev))
5784 {
5785 rtx_insn *x;
5786 if (jump_to_next
5787 || next_real_insn (JUMP_LABEL (prev)) == next
5788 /* If relax_delay_slots() decides NEXT was redundant
5789 with some previous instruction, it will have
5790 redirected PREV's jump to the following insn. */
5791 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5792 /* There is no upper bound on redundant instructions
5793 that might have been skipped, but we must not put an
5794 alignment where none had been before. */
5795 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5796 (INSN_P (x)
5797 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5798 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5799 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5800 {
5801 rtx pat = PATTERN (prev);
5802 if (GET_CODE (pat) == PARALLEL)
5803 pat = XVECEXP (pat, 0, 0);
5804 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5805 return 0;
5806 }
5807 }
5808 }
5809
5810 return align_jumps_log;
5811 }
5812
5813 /* If we are inside a phony loop, almost any kind of label can turn up as the
5814 first one in the loop. Aligning a braf label causes incorrect switch
5815 destination addresses; we can detect braf labels because they are
5816 followed by a BARRIER.
5817 Applying loop alignment to small constant or switch tables is a waste
5818 of space, so we suppress this too. */
5819 int
5820 sh_loop_align (rtx_insn *label)
5821 {
5822 rtx_insn *next = label;
5823
5824 if (! optimize || optimize_size)
5825 return 0;
5826
5827 do
5828 next = next_nonnote_insn (next);
5829 while (next && LABEL_P (next));
5830
5831 if (! next
5832 || ! INSN_P (next)
5833 || recog_memoized (next) == CODE_FOR_consttable_2)
5834 return 0;
5835
5836 return align_loops_log;
5837 }
5838
5839 /* Do a final pass over the function, just before delayed branch
5840 scheduling. */
5841 static void
5842 sh_reorg (void)
5843 {
5844 rtx_insn *first, *insn, *mova = NULL;
5845 int num_mova;
5846 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5847 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5848
5849 first = get_insns ();
5850 max_labelno_before_reorg = max_label_num ();
5851
5852 /* We must split call insns before introducing `mova's. If we're
5853 optimizing, they'll have already been split. Otherwise, make
5854 sure we don't split them too late. */
5855 if (! optimize)
5856 split_all_insns_noflow ();
5857
5858 /* If relaxing, generate pseudo-ops to associate function calls with
5859 the symbols they call. It does no harm to not generate these
5860 pseudo-ops. However, when we can generate them, it enables the
5861 linker to potentially relax the jsr to a bsr, and eliminate the
5862 register load and, possibly, the constant pool entry. */
5863
5864 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5865 if (TARGET_RELAX)
5866 {
5867 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5868 own purposes. This works because none of the remaining passes
5869 need to look at them.
5870
5871 ??? But it may break in the future. We should use a machine
5872 dependent REG_NOTE, or some other approach entirely. */
5873 for (insn = first; insn; insn = NEXT_INSN (insn))
5874 {
5875 if (INSN_P (insn))
5876 {
5877 rtx note;
5878
5879 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5880 NULL_RTX)) != 0)
5881 remove_note (insn, note);
5882 }
5883 }
5884
5885 for (insn = first; insn; insn = NEXT_INSN (insn))
5886 {
5887 rtx pattern, reg, set, dies;
5888 rtx_code_label *label;
5889 rtx_insn *link, *scan;
5890 int rescan = 0, foundinsn = 0;
5891
5892 if (CALL_P (insn))
5893 {
5894 pattern = PATTERN (insn);
5895
5896 if (GET_CODE (pattern) == PARALLEL)
5897 pattern = XVECEXP (pattern, 0, 0);
5898 if (GET_CODE (pattern) == SET)
5899 pattern = SET_SRC (pattern);
5900
5901 if (GET_CODE (pattern) != CALL
5902 || !MEM_P (XEXP (pattern, 0)))
5903 continue;
5904
5905 reg = XEXP (XEXP (pattern, 0), 0);
5906 }
5907 else
5908 {
5909 reg = sfunc_uses_reg (insn);
5910 if (! reg)
5911 continue;
5912 }
5913
5914 if (!REG_P (reg))
5915 continue;
5916
5917 /* Try scanning backward to find where the register is set. */
5918 link = NULL;
5919 for (scan = PREV_INSN (insn);
5920 scan && !LABEL_P (scan);
5921 scan = PREV_INSN (scan))
5922 {
5923 if (! INSN_P (scan))
5924 continue;
5925
5926 if (! reg_mentioned_p (reg, scan))
5927 continue;
5928
5929 if (noncall_uses_reg (reg, scan, &set))
5930 break;
5931
5932 if (set)
5933 {
5934 link = scan;
5935 break;
5936 }
5937 }
5938
5939 if (! link)
5940 continue;
5941
5942 /* The register is set at LINK. */
5943
5944 /* We can only optimize the function call if the register is
5945 being set to a symbol. In theory, we could sometimes
5946 optimize calls to a constant location, but the assembler
5947 and linker do not support that at present. */
5948 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5949 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5950 continue;
5951
5952 /* Scan forward from LINK to the place where REG dies, and
5953 make sure that the only insns which use REG are
5954 themselves function calls. */
5955
5956 /* ??? This doesn't work for call targets that were allocated
5957 by reload, since there may not be a REG_DEAD note for the
5958 register. */
5959
5960 dies = NULL_RTX;
5961 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
5962 {
5963 rtx scanset;
5964
5965 /* Don't try to trace forward past a CODE_LABEL if we haven't
5966 seen INSN yet. Ordinarily, we will only find the setting insn
5967 if it is in the same basic block. However,
5968 cross-jumping can insert code labels in between the load and
5969 the call, and can result in situations where a single call
5970 insn may have two targets depending on where we came from. */
5971
5972 if (LABEL_P (scan) && ! foundinsn)
5973 break;
5974
5975 if (! INSN_P (scan))
5976 continue;
5977
5978 /* Don't try to trace forward past a JUMP. To optimize
5979 safely, we would have to check that all the
5980 instructions at the jump destination did not use REG. */
5981
5982 if (JUMP_P (scan))
5983 break;
5984
5985 if (! reg_mentioned_p (reg, scan))
5986 continue;
5987
5988 if (noncall_uses_reg (reg, scan, &scanset))
5989 break;
5990
5991 if (scan == insn)
5992 foundinsn = 1;
5993
5994 if (scan != insn
5995 && (CALL_P (scan) || sfunc_uses_reg (scan)))
5996 {
5997 /* There is a function call to this register other
5998 than the one we are checking. If we optimize
5999 this call, we need to rescan again below. */
6000 rescan = 1;
6001 }
6002
6003 /* ??? We shouldn't have to worry about SCANSET here.
6004 We should just be able to check for a REG_DEAD note
6005 on a function call. However, the REG_DEAD notes are
6006 apparently not dependable around libcalls; c-torture
6007 execute/920501-2 is a test case. If SCANSET is set,
6008 then this insn sets the register, so it must have
6009 died earlier. Unfortunately, this will only handle
6010 the cases in which the register is, in fact, set in a
6011 later insn. */
6012
6013 /* ??? We shouldn't have to use FOUNDINSN here.
6014 This dates back to when we used LOG_LINKS to find
6015 the most recent insn which sets the register. */
6016
6017 if (foundinsn
6018 && (scanset
6019 || find_reg_note (scan, REG_DEAD, reg)))
6020 {
6021 dies = scan;
6022 break;
6023 }
6024 }
6025
6026 if (! dies)
6027 {
6028 /* Either there was a branch, or some insn used REG
6029 other than as a function call address. */
6030 continue;
6031 }
6032
6033 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6034 on the insn which sets the register, and on each call insn
6035 which uses the register. In final_prescan_insn we look for
6036 the REG_LABEL_OPERAND notes, and output the appropriate label
6037 or pseudo-op. */
6038
6039 label = gen_label_rtx ();
6040 add_reg_note (link, REG_LABEL_OPERAND, label);
6041 add_reg_note (insn, REG_LABEL_OPERAND, label);
6042 if (rescan)
6043 {
6044 scan = link;
6045 do
6046 {
6047 rtx reg2;
6048
6049 scan = NEXT_INSN (scan);
6050 if (scan != insn
6051 && ((CALL_P (scan)
6052 && reg_mentioned_p (reg, scan))
6053 || ((reg2 = sfunc_uses_reg (scan))
6054 && REGNO (reg2) == REGNO (reg))))
6055 add_reg_note (scan, REG_LABEL_OPERAND, label);
6056 }
6057 while (scan != dies);
6058 }
6059 }
6060 }
6061
6062 if (TARGET_SH2)
6063 fixup_addr_diff_vecs (first);
6064
6065 if (optimize)
6066 {
6067 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6068 shorten_branches (first);
6069 }
6070
6071 /* Scan the function looking for move instructions which have to be
6072 changed to pc-relative loads and insert the literal tables. */
6073 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6074 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6075 {
6076 if (mova_p (insn))
6077 {
6078 /* ??? basic block reordering can move a switch table dispatch
6079 below the switch table. Check if that has happened.
6080 We only have the addresses available when optimizing; but then,
6081 this check shouldn't be needed when not optimizing. */
6082 if (!untangle_mova (&num_mova, &mova, insn))
6083 {
6084 insn = mova;
6085 num_mova = 0;
6086 }
6087 }
6088 else if (JUMP_TABLE_DATA_P (insn)
6089 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6090 && num_mova
6091 /* ??? loop invariant motion can also move a mova out of a
6092 loop. Since loop does this code motion anyway, maybe we
6093 should wrap UNSPEC_MOVA into a CONST, so that reload can
6094 move it back. */
6095 && ((num_mova > 1
6096 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6097 || (prev_nonnote_insn (insn)
6098 == XEXP (MOVA_LABELREF (mova), 0))))
6099 {
6100 rtx_insn *scan;
6101 int total;
6102
6103 num_mova--;
6104
6105 /* Some code might have been inserted between the mova and
6106 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6107 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6108 total += get_attr_length (scan);
6109
6110 /* range of mova is 1020, add 4 because pc counts from address of
6111 second instruction after this one, subtract 2 in case pc is 2
6112 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6113 cancels out with alignment effects of the mova itself. */
6114 if (total > 1022)
6115 {
6116 /* Change the mova into a load, and restart scanning
6117 there. broken_move will then return true for mova. */
6118 fixup_mova (mova);
6119 insn = mova;
6120 }
6121 }
6122 if (broken_move (insn)
6123 || (NONJUMP_INSN_P (insn)
6124 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6125 {
6126 rtx_insn *scan;
6127 /* Scan ahead looking for a barrier to stick the constant table
6128 behind. */
6129 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6130 rtx_insn *last_float_move = NULL;
6131 rtx last_float = 0, *last_float_addr = NULL;
6132 int need_aligned_label = 0;
6133
6134 if (num_mova && ! mova_p (mova))
6135 {
6136 /* find_barrier had to change the first mova into a
6137 pcload; thus, we have to start with this new pcload. */
6138 insn = mova;
6139 num_mova = 0;
6140 }
6141 /* Now find all the moves between the points and modify them. */
6142 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6143 {
6144 if (LABEL_P (scan))
6145 last_float = 0;
6146 if (NONJUMP_INSN_P (scan)
6147 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6148 need_aligned_label = 1;
6149 if (broken_move (scan))
6150 {
6151 rtx *patp = &PATTERN (scan), pat = *patp;
6152 rtx src, dst;
6153 rtx lab;
6154 rtx newsrc;
6155 machine_mode mode;
6156
6157 if (GET_CODE (pat) == PARALLEL)
6158 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6159 src = SET_SRC (pat);
6160 dst = SET_DEST (pat);
6161 mode = GET_MODE (dst);
6162
6163 if (mode == SImode && satisfies_constraint_I16 (src)
6164 && REGNO (dst) != FPUL_REG)
6165 {
6166 int offset = 0;
6167
6168 mode = HImode;
6169 while (GET_CODE (dst) == SUBREG)
6170 {
6171 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6172 GET_MODE (SUBREG_REG (dst)),
6173 SUBREG_BYTE (dst),
6174 GET_MODE (dst));
6175 dst = SUBREG_REG (dst);
6176 }
6177 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6178 }
6179 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6180 {
6181 /* This must be an insn that clobbers r0. */
6182 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6183 XVECLEN (PATTERN (scan), 0)
6184 - 1);
6185 rtx clobber = *clobberp;
6186
6187 gcc_assert (GET_CODE (clobber) == CLOBBER
6188 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6189
6190 if (last_float
6191 && reg_set_between_p (r0_rtx, last_float_move, scan))
6192 last_float = 0;
6193 lab = add_constant (src, mode, last_float);
6194 if (lab)
6195 emit_insn_before (gen_mova (lab), scan);
6196 else
6197 {
6198 /* There will be a REG_UNUSED note for r0 on
6199 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6200 lest reorg:mark_target_live_regs will not
6201 consider r0 to be used, and we end up with delay
6202 slot insn in front of SCAN that clobbers r0. */
6203 rtx note
6204 = find_regno_note (last_float_move, REG_UNUSED, 0);
6205
6206 /* If we are not optimizing, then there may not be
6207 a note. */
6208 if (note)
6209 PUT_REG_NOTE_KIND (note, REG_INC);
6210
6211 *last_float_addr = r0_inc_rtx;
6212 }
6213 last_float_move = scan;
6214 last_float = src;
6215 newsrc = gen_const_mem (mode,
6216 (((TARGET_SH4 && ! TARGET_FMOVD)
6217 || REGNO (dst) == FPUL_REG)
6218 ? r0_inc_rtx
6219 : r0_rtx));
6220 last_float_addr = &XEXP (newsrc, 0);
6221
6222 /* Remove the clobber of r0. */
6223 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6224 gen_rtx_SCRATCH (Pmode));
6225 }
6226 /* This is a mova needing a label. Create it. */
6227 else if (GET_CODE (src) == UNSPEC
6228 && XINT (src, 1) == UNSPEC_MOVA
6229 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6230 {
6231 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6232 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6233 newsrc = gen_rtx_UNSPEC (SImode,
6234 gen_rtvec (1, newsrc),
6235 UNSPEC_MOVA);
6236 }
6237 else if (GET_CODE (src) == UNSPEC_VOLATILE
6238 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6239 {
6240 newsrc = XVECEXP (src, 0, 0);
6241 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6242 INSN_CODE (scan) = -1;
6243 continue;
6244 }
6245 else
6246 {
6247 lab = add_constant (src, mode, 0);
6248 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6249 newsrc = gen_const_mem (mode, newsrc);
6250 }
6251 *patp = gen_rtx_SET (dst, newsrc);
6252 INSN_CODE (scan) = -1;
6253 }
6254 }
6255 dump_table (need_aligned_label ? insn : 0, barrier);
6256 insn = barrier;
6257 }
6258 }
6259 label_ref_list_d_pool.release ();
6260 for (insn = first; insn; insn = NEXT_INSN (insn))
6261 PUT_MODE (insn, VOIDmode);
6262
6263 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6264 INSN_ADDRESSES_FREE ();
6265 split_branches (first);
6266
6267 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6268 also has an effect on the register that holds the address of the sfunc.
6269 Insert an extra dummy insn in front of each sfunc that pretends to
6270 use this register. */
6271 if (flag_delayed_branch)
6272 {
6273 for (insn = first; insn; insn = NEXT_INSN (insn))
6274 {
6275 rtx reg = sfunc_uses_reg (insn);
6276
6277 if (! reg)
6278 continue;
6279 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6280 }
6281 }
6282 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6283 }
6284
6285 /* Return the UID of the insn that follows the specified label. */
6286 int
6287 get_dest_uid (rtx label, int max_uid)
6288 {
6289 rtx_insn *dest = next_real_insn (label);
6290
6291 if (! dest)
6292 /* This can happen for an undefined label. */
6293 return 0;
6294 int dest_uid = INSN_UID (dest);
6295 /* If this is a newly created branch redirection blocking instruction,
6296 we cannot index the branch_uid or insn_addresses arrays with its
6297 uid. But then, we won't need to, because the actual destination is
6298 the following branch. */
6299 while (dest_uid >= max_uid)
6300 {
6301 dest = NEXT_INSN (dest);
6302 dest_uid = INSN_UID (dest);
6303 }
6304 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6305 return 0;
6306 return dest_uid;
6307 }
6308
6309 /* Split condbranches that are out of range. Also add clobbers for
6310 scratch registers that are needed in far jumps.
6311 We do this before delay slot scheduling, so that it can take our
6312 newly created instructions into account. It also allows us to
6313 find branches with common targets more easily. */
6314 static void
6315 split_branches (rtx_insn *first)
6316 {
6317 rtx_insn *insn;
6318 struct far_branch **uid_branch, *far_branch_list = 0;
6319 int max_uid = get_max_uid ();
6320 int ok;
6321
6322 /* Find out which branches are out of range. */
6323 shorten_branches (first);
6324
6325 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6326 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6327
6328 for (insn = first; insn; insn = NEXT_INSN (insn))
6329 if (! INSN_P (insn))
6330 continue;
6331 else if (insn->deleted ())
6332 {
6333 /* Shorten_branches would split this instruction again,
6334 so transform it into a note. */
6335 SET_INSN_DELETED (insn);
6336 }
6337 else if (JUMP_P (insn))
6338 {
6339 enum attr_type type = get_attr_type (insn);
6340 if (type == TYPE_CBRANCH)
6341 {
6342 rtx_insn *next, *beyond;
6343
6344 if (get_attr_length (insn) > 4)
6345 {
6346 rtx src = SET_SRC (PATTERN (insn));
6347 rtx olabel = XEXP (XEXP (src, 1), 0);
6348 int addr = INSN_ADDRESSES (INSN_UID (insn));
6349 rtx_insn *label = 0;
6350 int dest_uid = get_dest_uid (olabel, max_uid);
6351 struct far_branch *bp = uid_branch[dest_uid];
6352
6353 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6354 the label if the LABEL_NUSES count drops to zero. There is
6355 always a jump_optimize pass that sets these values, but it
6356 proceeds to delete unreferenced code, and then if not
6357 optimizing, to un-delete the deleted instructions, thus
6358 leaving labels with too low uses counts. */
6359 if (! optimize)
6360 {
6361 JUMP_LABEL (insn) = olabel;
6362 LABEL_NUSES (olabel)++;
6363 }
6364 if (! bp)
6365 {
6366 bp = (struct far_branch *) alloca (sizeof *bp);
6367 uid_branch[dest_uid] = bp;
6368 bp->prev = far_branch_list;
6369 far_branch_list = bp;
6370 bp->far_label = as_a <rtx_insn *> (
6371 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6372 0));
6373 LABEL_NUSES (bp->far_label)++;
6374 }
6375 else
6376 {
6377 label = bp->near_label;
6378 if (! label && bp->address - addr >= CONDJUMP_MIN)
6379 {
6380 rtx_insn *block = bp->insert_place;
6381
6382 if (GET_CODE (PATTERN (block)) == RETURN)
6383 block = PREV_INSN (block);
6384 else
6385 block = gen_block_redirect (block,
6386 bp->address, 2);
6387 label = emit_label_after (gen_label_rtx (),
6388 PREV_INSN (block));
6389 bp->near_label = label;
6390 }
6391 else if (label && ! NEXT_INSN (label))
6392 {
6393 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6394 bp->insert_place = insn;
6395 else
6396 gen_far_branch (bp);
6397 }
6398 }
6399 if (! label
6400 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6401 {
6402 bp->near_label = label = gen_label_rtx ();
6403 bp->insert_place = insn;
6404 bp->address = addr;
6405 }
6406 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6407 gcc_assert (ok);
6408 }
6409 else
6410 {
6411 /* get_attr_length (insn) == 2 */
6412 /* Check if we have a pattern where reorg wants to redirect
6413 the branch to a label from an unconditional branch that
6414 is too far away. */
6415 /* We can't use JUMP_LABEL here because it might be undefined
6416 when not optimizing. */
6417 /* A syntax error might cause beyond to be NULL_RTX. */
6418 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6419 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6420
6421 if (beyond
6422 && (JUMP_P (beyond)
6423 || ((beyond = next_active_insn (beyond))
6424 && JUMP_P (beyond)))
6425 && GET_CODE (PATTERN (beyond)) == SET
6426 && recog_memoized (beyond) == CODE_FOR_jump_compact
6427 && ((INSN_ADDRESSES
6428 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6429 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6430 > 252 + 258 + 2))
6431 gen_block_redirect (beyond,
6432 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6433 }
6434
6435 next = next_active_insn (insn);
6436
6437 if (next
6438 && (JUMP_P (next)
6439 || ((next = next_active_insn (next))
6440 && JUMP_P (next)))
6441 && GET_CODE (PATTERN (next)) == SET
6442 && recog_memoized (next) == CODE_FOR_jump_compact
6443 && ((INSN_ADDRESSES
6444 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6445 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6446 > 252 + 258 + 2))
6447 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6448 }
6449 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6450 {
6451 int addr = INSN_ADDRESSES (INSN_UID (insn));
6452 rtx_insn *far_label = 0;
6453 int dest_uid = 0;
6454 struct far_branch *bp;
6455
6456 if (type == TYPE_JUMP)
6457 {
6458 if (CROSSING_JUMP_P (insn))
6459 {
6460 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6461 insn);
6462 continue;
6463 }
6464
6465 far_label = as_a <rtx_insn *> (
6466 XEXP (SET_SRC (PATTERN (insn)), 0));
6467 dest_uid = get_dest_uid (far_label, max_uid);
6468 if (! dest_uid)
6469 {
6470 /* Parse errors can lead to labels outside
6471 the insn stream. */
6472 if (! NEXT_INSN (far_label))
6473 continue;
6474
6475 if (! optimize)
6476 {
6477 JUMP_LABEL (insn) = far_label;
6478 LABEL_NUSES (far_label)++;
6479 }
6480 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6481 far_label = 0;
6482 }
6483 }
6484 bp = uid_branch[dest_uid];
6485 if (! bp)
6486 {
6487 bp = (struct far_branch *) alloca (sizeof *bp);
6488 uid_branch[dest_uid] = bp;
6489 bp->prev = far_branch_list;
6490 far_branch_list = bp;
6491 bp->near_label = 0;
6492 bp->far_label = far_label;
6493 if (far_label)
6494 LABEL_NUSES (far_label)++;
6495 }
6496 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6497 if (addr - bp->address <= CONDJUMP_MAX)
6498 emit_label_after (bp->near_label, PREV_INSN (insn));
6499 else
6500 {
6501 gen_far_branch (bp);
6502 bp->near_label = 0;
6503 }
6504 else
6505 bp->near_label = 0;
6506 bp->address = addr;
6507 bp->insert_place = insn;
6508 if (! far_label)
6509 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6510 else
6511 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6512 }
6513 }
6514 /* Generate all pending far branches,
6515 and free our references to the far labels. */
6516 while (far_branch_list)
6517 {
6518 if (far_branch_list->near_label
6519 && ! NEXT_INSN (far_branch_list->near_label))
6520 gen_far_branch (far_branch_list);
6521 if (optimize
6522 && far_branch_list->far_label
6523 && ! --LABEL_NUSES (far_branch_list->far_label))
6524 delete_insn (far_branch_list->far_label);
6525 far_branch_list = far_branch_list->prev;
6526 }
6527
6528 /* Instruction length information is no longer valid due to the new
6529 instructions that have been generated. */
6530 init_insn_lengths ();
6531 }
6532
6533 /* Dump out instruction addresses, which is useful for debugging the
6534 constant pool table stuff.
6535
6536 If relaxing, output the label and pseudo-ops used to link together
6537 calls and the instruction which set the registers.
6538
6539 ??? The addresses printed by this routine for insns are nonsense for
6540 insns which are inside of a sequence where none of the inner insns have
6541 variable length. This is because the second pass of shorten_branches
6542 does not bother to update them. */
6543 void
6544 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6545 int noperands ATTRIBUTE_UNUSED)
6546 {
6547 if (TARGET_DUMPISIZE)
6548 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6549
6550 if (TARGET_RELAX)
6551 {
6552 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6553 {
6554 rtx pattern = PATTERN (insn);
6555 if (GET_CODE (pattern) == PARALLEL)
6556 pattern = XVECEXP (pattern, 0, 0);
6557 switch (GET_CODE (pattern))
6558 {
6559 case SET:
6560 if (GET_CODE (SET_SRC (pattern)) != CALL
6561 && get_attr_type (insn) != TYPE_SFUNC)
6562 {
6563 targetm.asm_out.internal_label
6564 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6565 break;
6566 }
6567 /* FALLTHROUGH */
6568 case CALL:
6569 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6570 CODE_LABEL_NUMBER (XEXP (note, 0)));
6571 break;
6572
6573 default:
6574 gcc_unreachable ();
6575 }
6576 }
6577 }
6578 }
6579
6580 /* Dump out any constants accumulated in the final pass. These will
6581 only be labels. */
6582 const char *
6583 output_jump_label_table (void)
6584 {
6585 if (pool_size)
6586 {
6587 fprintf (asm_out_file, "\t.align 2\n");
6588 for (int i = 0; i < pool_size; i++)
6589 {
6590 pool_node *p = &pool_vector[i];
6591
6592 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6593 CODE_LABEL_NUMBER (p->label));
6594 output_asm_insn (".long %O0", &p->value);
6595 }
6596 pool_size = 0;
6597 }
6598
6599 return "";
6600 }
6601 \f
6602 /* A full frame looks like:
6603
6604 arg-5
6605 arg-4
6606 [ if current_function_anonymous_args
6607 arg-3
6608 arg-2
6609 arg-1
6610 arg-0 ]
6611 saved-fp
6612 saved-r10
6613 saved-r11
6614 saved-r12
6615 saved-pr
6616 local-n
6617 ..
6618 local-1
6619 local-0 <- fp points here.
6620
6621 Number of bytes pushed for anonymous args, used to pass information
6622 between expand_prologue and expand_epilogue.
6623
6624 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6625 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6626 for an epilogue and a negative value means that it's for a sibcall
6627 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6628 all the registers that are about to be restored, and hence dead. */
6629 static void
6630 output_stack_adjust (int size, rtx reg, int epilogue_p,
6631 HARD_REG_SET *live_regs_mask, bool frame_p)
6632 {
6633 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6634 if (size)
6635 {
6636 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6637
6638 /* This test is bogus, as output_stack_adjust is used to re-align the
6639 stack. */
6640 #if 0
6641 gcc_assert (!(size % align));
6642 #endif
6643
6644 if (CONST_OK_FOR_ADD (size))
6645 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6646 /* Try to do it with two partial adjustments; however, we must make
6647 sure that the stack is properly aligned at all times, in case
6648 an interrupt occurs between the two partial adjustments. */
6649 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6650 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6651 {
6652 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6653 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6654 }
6655 else
6656 {
6657 rtx const_reg;
6658 rtx insn;
6659 int temp = epilogue_p ? 7 : 1;
6660 int i;
6661
6662 /* If TEMP is invalid, we could temporarily save a general
6663 register to MACL. However, there is currently no need
6664 to handle this case, so just die when we see it. */
6665 if (epilogue_p < 0
6666 || current_function_interrupt
6667 || ! call_really_used_regs[temp] || fixed_regs[temp])
6668 temp = -1;
6669 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6670 {
6671 HARD_REG_SET temps;
6672 COPY_HARD_REG_SET (temps, call_used_reg_set);
6673 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6674 if (epilogue_p > 0)
6675 {
6676 int nreg = 0;
6677 if (crtl->return_rtx)
6678 {
6679 machine_mode mode;
6680 mode = GET_MODE (crtl->return_rtx);
6681 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6682 nreg = HARD_REGNO_NREGS (FIRST_RET_REG, mode);
6683 }
6684 for (i = 0; i < nreg; i++)
6685 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6686 if (crtl->calls_eh_return)
6687 {
6688 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6689 for (i = 0; i <= 3; i++)
6690 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6691 }
6692 }
6693 if (epilogue_p <= 0)
6694 {
6695 for (i = FIRST_PARM_REG;
6696 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6697 CLEAR_HARD_REG_BIT (temps, i);
6698 if (cfun->static_chain_decl != NULL)
6699 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6700 }
6701 temp = scavenge_reg (&temps);
6702 }
6703 if (temp < 0 && live_regs_mask)
6704 {
6705 HARD_REG_SET temps;
6706
6707 COPY_HARD_REG_SET (temps, *live_regs_mask);
6708 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6709 temp = scavenge_reg (&temps);
6710 }
6711 if (temp < 0)
6712 {
6713 rtx adj_reg, tmp_reg, mem;
6714
6715 /* If we reached here, the most likely case is the (sibcall)
6716 epilogue. Put a special push/pop sequence for such case as
6717 the last resort. This looks lengthy but would not be problem
6718 because it seems to be very rare. */
6719 gcc_assert (epilogue_p);
6720
6721 /* ??? There is still the slight possibility that r4 or
6722 r5 have been reserved as fixed registers or assigned
6723 as global registers, and they change during an
6724 interrupt. There are possible ways to handle this:
6725
6726 - If we are adjusting the frame pointer (r14), we can do
6727 with a single temp register and an ordinary push / pop
6728 on the stack.
6729 - Grab any call-used or call-saved registers (i.e. not
6730 fixed or globals) for the temps we need. We might
6731 also grab r14 if we are adjusting the stack pointer.
6732 If we can't find enough available registers, issue
6733 a diagnostic and die - the user must have reserved
6734 way too many registers.
6735 But since all this is rather unlikely to happen and
6736 would require extra testing, we just die if r4 / r5
6737 are not available. */
6738 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6739 && !global_regs[4] && !global_regs[5]);
6740
6741 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6742 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6743 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6744 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6745 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6746 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6747 emit_move_insn (mem, tmp_reg);
6748 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6749 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6750 emit_move_insn (mem, tmp_reg);
6751 emit_move_insn (reg, adj_reg);
6752 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6753 emit_move_insn (adj_reg, mem);
6754 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6755 emit_move_insn (tmp_reg, mem);
6756 /* Tell flow the insns that pop r4/r5 aren't dead. */
6757 emit_use (tmp_reg);
6758 emit_use (adj_reg);
6759 return;
6760 }
6761 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6762
6763 /* If SIZE is negative, subtract the positive value.
6764 This sometimes allows a constant pool entry to be shared
6765 between prologue and epilogue code. */
6766 if (size < 0)
6767 {
6768 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6769 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6770 }
6771 else
6772 {
6773 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6774 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6775 }
6776 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6777 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6778 GEN_INT (size))));
6779 }
6780 }
6781 }
6782
6783 /* Emit the specified insn and mark it as frame related. */
6784 static rtx_insn *
6785 emit_frame_insn (rtx x)
6786 {
6787 rtx_insn *insn = emit_insn (x);
6788 RTX_FRAME_RELATED_P (insn) = 1;
6789 return insn;
6790 }
6791
6792 /* Output RTL to push register RN onto the stack. */
6793 static rtx
6794 push (int rn)
6795 {
6796 rtx x;
6797 if (rn == FPUL_REG)
6798 x = gen_push_fpul ();
6799 else if (rn == FPSCR_REG)
6800 x = gen_push_fpscr ();
6801 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6802 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6803 {
6804 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6805 return NULL_RTX;
6806 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6807 }
6808 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6809 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6810 else
6811 x = gen_push (gen_rtx_REG (SImode, rn));
6812
6813 x = emit_frame_insn (x);
6814 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6815 return x;
6816 }
6817
6818 /* Output RTL to pop register RN from the stack. */
6819 static void
6820 pop (int rn)
6821 {
6822 rtx x, sp_reg, reg;
6823 if (rn == FPUL_REG)
6824 x = gen_pop_fpul ();
6825 else if (rn == FPSCR_REG)
6826 x = gen_pop_fpscr ();
6827 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6828 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6829 {
6830 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6831 return;
6832 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6833 }
6834 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6835 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6836 else
6837 x = gen_pop (gen_rtx_REG (SImode, rn));
6838
6839 x = emit_insn (x);
6840
6841 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6842 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6843 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6844 : SET_DEST (PATTERN (x)));
6845 add_reg_note (x, REG_CFA_RESTORE, reg);
6846 add_reg_note (x, REG_CFA_ADJUST_CFA,
6847 gen_rtx_SET (sp_reg,
6848 plus_constant (SImode, sp_reg,
6849 GET_MODE_SIZE (GET_MODE (reg)))));
6850 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6851 RTX_FRAME_RELATED_P (x) = 1;
6852 }
6853
6854 /* Generate code to push the regs specified in the mask. */
6855 static void
6856 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6857 {
6858 bool skip_fpscr = false;
6859
6860 /* Push PR last; this gives better latencies after the prologue, and
6861 candidates for the return delay slot when there are no general
6862 registers pushed. */
6863 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6864 i < FIRST_PSEUDO_REGISTER; i++)
6865 {
6866 /* If this is an interrupt handler, and the SZ bit varies,
6867 and we have to push any floating point register, we need
6868 to switch to the correct precision first. */
6869 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6870 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6871 {
6872 HARD_REG_SET unsaved;
6873
6874 push (FPSCR_REG);
6875 COMPL_HARD_REG_SET (unsaved, *mask);
6876 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6877 skip_fpscr = true;
6878 }
6879 if (i != PR_REG
6880 && (i != FPSCR_REG || ! skip_fpscr)
6881 && TEST_HARD_REG_BIT (*mask, i))
6882 {
6883 /* If the ISR has RESBANK attribute assigned, don't push any of
6884 the following registers - R0-R14, MACH, MACL and GBR. */
6885 if (! (sh_cfun_resbank_handler_p ()
6886 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6887 || i == MACH_REG
6888 || i == MACL_REG
6889 || i == GBR_REG)))
6890 push (i);
6891 }
6892 }
6893
6894 /* Push banked registers last to improve delay slot opportunities. */
6895 if (interrupt_handler)
6896 {
6897 bool use_movml = false;
6898
6899 if (TARGET_SH2A)
6900 {
6901 unsigned int count = 0;
6902
6903 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6904 if (TEST_HARD_REG_BIT (*mask, i))
6905 count++;
6906 else
6907 break;
6908
6909 /* Use movml when all banked registers are pushed. */
6910 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6911 use_movml = true;
6912 }
6913
6914 if (sh_cfun_resbank_handler_p ())
6915 ; /* Do nothing. */
6916 else if (use_movml)
6917 {
6918 rtx x, mem, reg, set;
6919 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6920
6921 /* We must avoid scheduling multiple store insn with another
6922 insns. */
6923 emit_insn (gen_blockage ());
6924 x = gen_movml_push_banked (sp_reg);
6925 x = emit_frame_insn (x);
6926 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6927 {
6928 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6929 reg = gen_rtx_REG (SImode, i);
6930 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6931 }
6932
6933 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6934 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6935 emit_insn (gen_blockage ());
6936 }
6937 else
6938 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6939 if (TEST_HARD_REG_BIT (*mask, i))
6940 push (i);
6941 }
6942
6943 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6944 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6945 push (PR_REG);
6946 }
6947
6948 /* Work out the registers which need to be saved, both as a mask and a
6949 count of saved words. Return the count.
6950
6951 If doing a pragma interrupt function, then push all regs used by the
6952 function, and if we call another function (we can tell by looking at PR),
6953 make sure that all the regs it clobbers are safe too. */
6954 static int
6955 calc_live_regs (HARD_REG_SET *live_regs_mask)
6956 {
6957 unsigned int reg;
6958 tree attrs;
6959 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6960 bool nosave_low_regs;
6961
6962 attrs = DECL_ATTRIBUTES (current_function_decl);
6963 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
6964 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
6965 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
6966 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
6967
6968 CLEAR_HARD_REG_SET (*live_regs_mask);
6969 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
6970 && df_regs_ever_live_p (FPSCR_REG))
6971 target_flags &= ~MASK_FPU_SINGLE;
6972 /* If we can save a lot of saves by switching to double mode, do that. */
6973 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
6974 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
6975 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
6976 && (! call_really_used_regs[reg]
6977 || interrupt_handler)
6978 && ++count > 2)
6979 {
6980 target_flags &= ~MASK_FPU_SINGLE;
6981 break;
6982 }
6983
6984
6985 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
6986 bool pr_live = (pr_initial
6987 ? (!REG_P (pr_initial)
6988 || REGNO (pr_initial) != (PR_REG))
6989 : df_regs_ever_live_p (PR_REG));
6990 /* For Shcompact, if not optimizing, we end up with a memory reference
6991 using the return address pointer for __builtin_return_address even
6992 though there is no actual need to put the PR register on the stack. */
6993 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
6994
6995 /* Force PR to be live if the prologue has to call the SHmedia
6996 argument decoder or register saver. */
6997 bool has_call = pr_live;
6998
6999 int count;
7000 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7001 {
7002 if (reg == PR_REG
7003 ? pr_live
7004 : interrupt_handler
7005 ? (/* Need to save all the regs ever live. */
7006 (df_regs_ever_live_p (reg)
7007 || (call_really_used_regs[reg]
7008 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7009 || reg == PIC_OFFSET_TABLE_REGNUM)
7010 && has_call))
7011 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7012 && reg != RETURN_ADDRESS_POINTER_REGNUM
7013 && reg != T_REG && reg != GBR_REG
7014 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7015 /* Push fpscr only on targets which have FPU */
7016 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7017 : (/* Only push those regs which are used and need to be saved. */
7018 (false)
7019 || (df_regs_ever_live_p (reg)
7020 && ((!call_really_used_regs[reg]
7021 && !(reg != PIC_OFFSET_TABLE_REGNUM
7022 && fixed_regs[reg] && call_used_regs[reg]))
7023 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7024 || (crtl->calls_eh_return
7025 && (reg == EH_RETURN_DATA_REGNO (0)
7026 || reg == EH_RETURN_DATA_REGNO (1)
7027 || reg == EH_RETURN_DATA_REGNO (2)
7028 || reg == EH_RETURN_DATA_REGNO (3)))
7029 || ((reg == MACL_REG || reg == MACH_REG)
7030 && df_regs_ever_live_p (reg)
7031 && sh_cfun_attr_renesas_p ())
7032 ))
7033 {
7034 SET_HARD_REG_BIT (*live_regs_mask, reg);
7035 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7036
7037 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7038 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7039 {
7040 if (FP_REGISTER_P (reg))
7041 {
7042 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7043 {
7044 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7045 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7046 }
7047 }
7048 else if (XD_REGISTER_P (reg))
7049 {
7050 /* Must switch to double mode to access these registers. */
7051 target_flags &= ~MASK_FPU_SINGLE;
7052 }
7053 }
7054 }
7055 if (nosave_low_regs && reg == R8_REG)
7056 break;
7057 }
7058
7059 return count;
7060 }
7061
7062 /* Code to generate prologue and epilogue sequences */
7063
7064 /* PUSHED is the number of bytes that are being pushed on the
7065 stack for register saves. Return the frame size, padded
7066 appropriately so that the stack stays properly aligned. */
7067 static HOST_WIDE_INT
7068 rounded_frame_size (int pushed)
7069 {
7070 HOST_WIDE_INT size = get_frame_size ();
7071 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7072
7073 if (ACCUMULATE_OUTGOING_ARGS)
7074 size += crtl->outgoing_args_size;
7075
7076 return ((size + pushed + align - 1) & -align) - pushed;
7077 }
7078
7079 /* Expand code for the function prologue. */
7080 void
7081 sh_expand_prologue (void)
7082 {
7083 int save_flags = target_flags;
7084 tree sp_switch_attr
7085 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7086
7087 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7088
7089 /* We have pretend args if we had an object sent partially in registers
7090 and partially on the stack, e.g. a large structure. */
7091 int pretend_args = crtl->args.pretend_args_size;
7092 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7093 && (NPARM_REGS(SImode)
7094 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7095 pretend_args = 0;
7096
7097 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7098 int stack_usage = pretend_args;
7099
7100 /* Emit the code for SETUP_VARARGS. */
7101 if (cfun->stdarg)
7102 {
7103 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7104 {
7105 /* Push arg regs as if they'd been provided by caller in stack. */
7106 for (int i = 0; i < NPARM_REGS(SImode); i++)
7107 {
7108 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7109
7110 if (i >= (NPARM_REGS(SImode)
7111 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7112 ))
7113 break;
7114 push (rn);
7115 stack_usage += GET_MODE_SIZE (SImode);
7116 }
7117 }
7118 }
7119
7120 /* If we're supposed to switch stacks at function entry, do so now. */
7121 if (sp_switch_attr)
7122 {
7123 rtx lab, newsrc;
7124 /* The argument specifies a variable holding the address of the
7125 stack the interrupt function should switch to/from at entry/exit. */
7126 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7127 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7128 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7129
7130 lab = add_constant (sp_switch, SImode, 0);
7131 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7132
7133 emit_insn (gen_sp_switch_1 (newsrc));
7134 }
7135
7136 HARD_REG_SET live_regs_mask;
7137 int d = calc_live_regs (&live_regs_mask);
7138 /* ??? Maybe we could save some switching if we can move a mode switch
7139 that already happens to be at the function start into the prologue. */
7140 if (target_flags != save_flags && ! current_function_interrupt)
7141 emit_insn (gen_toggle_sz ());
7142
7143 push_regs (&live_regs_mask, current_function_interrupt);
7144 stack_usage += d;
7145
7146 if (flag_pic && !TARGET_FDPIC
7147 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7148 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7149
7150 if (target_flags != save_flags && ! current_function_interrupt)
7151 emit_insn (gen_toggle_sz ());
7152
7153 target_flags = save_flags;
7154
7155 output_stack_adjust (-rounded_frame_size (d),
7156 stack_pointer_rtx, 0, NULL, true);
7157 stack_usage += rounded_frame_size (d);
7158
7159 if (frame_pointer_needed)
7160 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7161
7162 /* If we are profiling, make sure no instructions are scheduled before
7163 the call to mcount. Similarly if some call instructions are swapped
7164 before frame related insns, it'll confuse the unwinder because
7165 currently SH has no unwind info for function epilogues. */
7166 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7167 emit_insn (gen_blockage ());
7168
7169 if (flag_stack_usage_info)
7170 current_function_static_stack_size = stack_usage;
7171 }
7172
7173 /* Expand code for the function epilogue. */
7174 void
7175 sh_expand_epilogue (bool sibcall_p)
7176 {
7177 int save_flags = target_flags;
7178 bool fpscr_deferred = false;
7179 int e = sibcall_p ? -1 : 1;
7180
7181 HARD_REG_SET live_regs_mask;
7182 int d = calc_live_regs (&live_regs_mask);
7183
7184 int save_size = d;
7185 int frame_size = rounded_frame_size (d);
7186
7187 if (frame_pointer_needed)
7188 {
7189 /* We must avoid scheduling the epilogue with previous basic blocks.
7190 See PR/18032 and PR/40313. */
7191 emit_insn (gen_blockage ());
7192 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7193 &live_regs_mask, true);
7194
7195 /* We must avoid moving the stack pointer adjustment past code
7196 which reads from the local frame, else an interrupt could
7197 occur after the SP adjustment and clobber data in the local
7198 frame. */
7199 emit_insn (gen_blockage ());
7200 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7201 }
7202 else if (frame_size)
7203 {
7204 /* We must avoid moving the stack pointer adjustment past code
7205 which reads from the local frame, else an interrupt could
7206 occur after the SP adjustment and clobber data in the local
7207 frame. */
7208 emit_insn (gen_blockage ());
7209 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7210 &live_regs_mask, true);
7211 }
7212
7213 /* Pop all the registers. */
7214
7215 if (target_flags != save_flags && ! current_function_interrupt)
7216 emit_insn (gen_toggle_sz ());
7217
7218 {
7219 int last_reg;
7220
7221 save_size = 0;
7222 /* For an ISR with RESBANK attribute assigned, don't pop PR
7223 register. */
7224 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7225 && !sh_cfun_resbank_handler_p ())
7226 {
7227 if (!frame_pointer_needed)
7228 emit_insn (gen_blockage ());
7229 pop (PR_REG);
7230 }
7231
7232 /* Banked registers are popped first to avoid being scheduled in the
7233 delay slot. RTE switches banks before the ds instruction. */
7234 if (current_function_interrupt)
7235 {
7236 bool use_movml = false;
7237
7238 if (TARGET_SH2A)
7239 {
7240 unsigned int count = 0;
7241
7242 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7243 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7244 count++;
7245 else
7246 break;
7247
7248 /* Use movml when all banked register are poped. */
7249 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7250 use_movml = true;
7251 }
7252
7253 if (sh_cfun_resbank_handler_p ())
7254 ; /* Do nothing. */
7255 else if (use_movml)
7256 {
7257 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7258
7259 /* We must avoid scheduling multiple load insn with another
7260 insns. */
7261 emit_insn (gen_blockage ());
7262 emit_insn (gen_movml_pop_banked (sp_reg));
7263 emit_insn (gen_blockage ());
7264 }
7265 else
7266 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7267 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7268 pop (i);
7269
7270 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7271 }
7272 else
7273 last_reg = FIRST_PSEUDO_REGISTER;
7274
7275 for (int i = 0; i < last_reg; i++)
7276 {
7277 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7278
7279 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7280 && hard_reg_set_intersect_p (live_regs_mask,
7281 reg_class_contents[DF_REGS]))
7282 fpscr_deferred = true;
7283 /* For an ISR with RESBANK attribute assigned, don't pop
7284 following registers, R0-R14, MACH, MACL and GBR. */
7285 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7286 && ! (sh_cfun_resbank_handler_p ()
7287 && ((j >= FIRST_GENERAL_REG
7288 && j < LAST_GENERAL_REG)
7289 || j == MACH_REG
7290 || j == MACL_REG
7291 || j == GBR_REG)))
7292 pop (j);
7293
7294 if (j == FIRST_FP_REG && fpscr_deferred)
7295 pop (FPSCR_REG);
7296 }
7297 }
7298 if (target_flags != save_flags && ! current_function_interrupt)
7299 emit_insn (gen_toggle_sz ());
7300 target_flags = save_flags;
7301
7302 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7303 stack_pointer_rtx, e, NULL, true);
7304
7305 if (crtl->calls_eh_return)
7306 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7307 EH_RETURN_STACKADJ_RTX));
7308
7309 /* Switch back to the normal stack if necessary. */
7310 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7311 emit_insn (gen_sp_switch_2 ());
7312
7313 /* Tell flow the insn that pops PR isn't dead. */
7314 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7315 emit_use (gen_rtx_REG (SImode, PR_REG));
7316 }
7317
7318 /* Emit code to change the current function's return address to RA.
7319 TEMP is available as a scratch register, if needed. */
7320 void
7321 sh_set_return_address (rtx ra, rtx tmp)
7322 {
7323 HARD_REG_SET live_regs_mask;
7324 int d = calc_live_regs (&live_regs_mask);
7325
7326 /* If pr_reg isn't life, we can set it directly. */
7327 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7328 {
7329 rtx rr = gen_rtx_REG (SImode, PR_REG);
7330 emit_insn (GEN_MOV (rr, ra));
7331 /* Tell flow the register for return isn't dead. */
7332 emit_use (rr);
7333 return;
7334 }
7335
7336 int pr_offset = rounded_frame_size (d);
7337
7338 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7339
7340 if (frame_pointer_needed)
7341 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7342 else
7343 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7344
7345 tmp = gen_frame_mem (Pmode, tmp);
7346 emit_insn (GEN_MOV (tmp, ra));
7347 /* Tell this store isn't dead. */
7348 emit_use (tmp);
7349 }
7350
7351 /* Clear variables at function end. */
7352 static void
7353 sh_output_function_epilogue (FILE *)
7354 {
7355 }
7356
7357 static rtx
7358 sh_builtin_saveregs (void)
7359 {
7360 /* First unnamed integer register. */
7361 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7362 /* Number of integer registers we need to save. */
7363 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7364 /* First unnamed SFmode float reg */
7365 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7366 /* Number of SFmode float regs to save. */
7367 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7368 rtx regbuf, fpregs;
7369 int bufsize, regno;
7370 alias_set_type alias_set;
7371
7372 if (!TARGET_FPU_ANY)
7373 {
7374 error ("__builtin_saveregs not supported by this subtarget");
7375 return const0_rtx;
7376 }
7377
7378 /* Allocate block of memory for the regs. */
7379 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7380 Or can assign_stack_local accept a 0 SIZE argument? */
7381 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7382
7383 if (n_floatregs & 1)
7384 {
7385 rtx addr;
7386
7387 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7388 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7389 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7390 regbuf = change_address (regbuf, BLKmode, addr);
7391 }
7392 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7393 {
7394 rtx addr, mask;
7395
7396 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7397 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7398 XEXP (regbuf, 0), 4));
7399 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7400 emit_insn (gen_andsi3 (addr, addr, mask));
7401 regbuf = change_address (regbuf, BLKmode, addr);
7402 }
7403 else
7404 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7405 alias_set = get_varargs_alias_set ();
7406 set_mem_alias_set (regbuf, alias_set);
7407
7408 /* Save int args.
7409 This is optimized to only save the regs that are necessary. Explicitly
7410 named args need not be saved. */
7411 if (n_intregs > 0)
7412 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7413 adjust_address (regbuf, BLKmode,
7414 n_floatregs * UNITS_PER_WORD),
7415 n_intregs);
7416
7417 /* Save float args.
7418 This is optimized to only save the regs that are necessary. Explicitly
7419 named args need not be saved.
7420 We explicitly build a pointer to the buffer because it halves the insn
7421 count when not optimizing (otherwise the pointer is built for each reg
7422 saved).
7423 We emit the moves in reverse order so that we can use predecrement. */
7424
7425 fpregs = copy_to_mode_reg (Pmode,
7426 plus_constant (Pmode, XEXP (regbuf, 0),
7427 n_floatregs * UNITS_PER_WORD));
7428 if (TARGET_FPU_DOUBLE)
7429 {
7430 rtx mem;
7431 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7432 {
7433 emit_insn (gen_addsi3 (fpregs, fpregs,
7434 GEN_INT (-2 * UNITS_PER_WORD)));
7435 mem = change_address (regbuf, DFmode, fpregs);
7436 emit_move_insn (mem,
7437 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7438 }
7439 regno = first_floatreg;
7440 if (regno & 1)
7441 {
7442 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7443 mem = change_address (regbuf, SFmode, fpregs);
7444 emit_move_insn (mem,
7445 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7446 + regno - SH_REG_MSW_OFFSET));
7447 }
7448 }
7449 else
7450 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7451 {
7452 rtx mem;
7453
7454 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7455 mem = change_address (regbuf, SFmode, fpregs);
7456 emit_move_insn (mem,
7457 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7458 }
7459
7460 /* Return the address of the regbuf. */
7461 return XEXP (regbuf, 0);
7462 }
7463
7464 /* Define the `__builtin_va_list' type for the ABI. */
7465 static tree
7466 sh_build_builtin_va_list (void)
7467 {
7468 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7469 tree record, type_decl;
7470
7471 if ((! TARGET_SH2E && ! TARGET_SH4)
7472 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7473 return ptr_type_node;
7474
7475 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7476 type_decl = build_decl (BUILTINS_LOCATION,
7477 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7478
7479 f_next_o = build_decl (BUILTINS_LOCATION,
7480 FIELD_DECL, get_identifier ("__va_next_o"),
7481 ptr_type_node);
7482 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7483 FIELD_DECL,
7484 get_identifier ("__va_next_o_limit"),
7485 ptr_type_node);
7486 f_next_fp = build_decl (BUILTINS_LOCATION,
7487 FIELD_DECL, get_identifier ("__va_next_fp"),
7488 ptr_type_node);
7489 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7490 FIELD_DECL,
7491 get_identifier ("__va_next_fp_limit"),
7492 ptr_type_node);
7493 f_next_stack = build_decl (BUILTINS_LOCATION,
7494 FIELD_DECL, get_identifier ("__va_next_stack"),
7495 ptr_type_node);
7496
7497 DECL_FIELD_CONTEXT (f_next_o) = record;
7498 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7499 DECL_FIELD_CONTEXT (f_next_fp) = record;
7500 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7501 DECL_FIELD_CONTEXT (f_next_stack) = record;
7502
7503 TYPE_STUB_DECL (record) = type_decl;
7504 TYPE_NAME (record) = type_decl;
7505 TYPE_FIELDS (record) = f_next_o;
7506 DECL_CHAIN (f_next_o) = f_next_o_limit;
7507 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7508 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7509 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7510
7511 layout_type (record);
7512
7513 return record;
7514 }
7515
7516 /* Implement `va_start' for varargs and stdarg. */
7517 static void
7518 sh_va_start (tree valist, rtx nextarg)
7519 {
7520 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7521 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7522 tree t, u;
7523 int nfp, nint;
7524
7525 if ((! TARGET_SH2E && ! TARGET_SH4)
7526 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7527 {
7528 std_expand_builtin_va_start (valist, nextarg);
7529 return;
7530 }
7531
7532 f_next_o = TYPE_FIELDS (va_list_type_node);
7533 f_next_o_limit = DECL_CHAIN (f_next_o);
7534 f_next_fp = DECL_CHAIN (f_next_o_limit);
7535 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7536 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7537
7538 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7539 NULL_TREE);
7540 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7541 valist, f_next_o_limit, NULL_TREE);
7542 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7543 NULL_TREE);
7544 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7545 valist, f_next_fp_limit, NULL_TREE);
7546 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7547 valist, f_next_stack, NULL_TREE);
7548
7549 /* Call __builtin_saveregs. */
7550 u = make_tree (sizetype, expand_builtin_saveregs ());
7551 u = fold_convert (ptr_type_node, u);
7552 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7553 TREE_SIDE_EFFECTS (t) = 1;
7554 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7555
7556 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7557 if (nfp < 8)
7558 nfp = 8 - nfp;
7559 else
7560 nfp = 0;
7561 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7562 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7563 TREE_SIDE_EFFECTS (t) = 1;
7564 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7565
7566 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7567 TREE_SIDE_EFFECTS (t) = 1;
7568 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7569
7570 nint = crtl->args.info.arg_count[SH_ARG_INT];
7571 if (nint < 4)
7572 nint = 4 - nint;
7573 else
7574 nint = 0;
7575 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7576 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7577 TREE_SIDE_EFFECTS (t) = 1;
7578 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7579
7580 u = make_tree (ptr_type_node, nextarg);
7581 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7582 TREE_SIDE_EFFECTS (t) = 1;
7583 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7584 }
7585
7586 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7587 member, return it. */
7588 static tree
7589 find_sole_member (tree type)
7590 {
7591 tree field, member = NULL_TREE;
7592
7593 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7594 {
7595 if (TREE_CODE (field) != FIELD_DECL)
7596 continue;
7597 if (!DECL_SIZE (field))
7598 return NULL_TREE;
7599 if (integer_zerop (DECL_SIZE (field)))
7600 continue;
7601 if (member)
7602 return NULL_TREE;
7603 member = field;
7604 }
7605 return member;
7606 }
7607
7608 /* Implement `va_arg'. */
7609 static tree
7610 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7611 gimple_seq *post_p ATTRIBUTE_UNUSED)
7612 {
7613 tree tmp;
7614 tree addr, lab_over = NULL, result = NULL;
7615 tree eff_type;
7616
7617 const bool pass_by_ref =
7618 !VOID_TYPE_P (type)
7619 && targetm.calls.must_pass_in_stack (TYPE_MODE (type), type);
7620
7621 if (pass_by_ref)
7622 type = build_pointer_type (type);
7623
7624 HOST_WIDE_INT size = int_size_in_bytes (type);
7625 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7626 tree pptr_type_node = build_pointer_type (ptr_type_node);
7627
7628 if ((TARGET_SH2E || TARGET_SH4)
7629 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7630 {
7631 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7632 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7633 tree lab_false;
7634 tree member;
7635
7636 f_next_o = TYPE_FIELDS (va_list_type_node);
7637 f_next_o_limit = DECL_CHAIN (f_next_o);
7638 f_next_fp = DECL_CHAIN (f_next_o_limit);
7639 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7640 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7641
7642 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7643 NULL_TREE);
7644 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7645 valist, f_next_o_limit, NULL_TREE);
7646 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7647 valist, f_next_fp, NULL_TREE);
7648 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7649 valist, f_next_fp_limit, NULL_TREE);
7650 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7651 valist, f_next_stack, NULL_TREE);
7652
7653 /* Structures with a single member with a distinct mode are passed
7654 like their member. This is relevant if the latter has a REAL_TYPE
7655 or COMPLEX_TYPE type. */
7656 eff_type = type;
7657 while (TREE_CODE (eff_type) == RECORD_TYPE
7658 && (member = find_sole_member (eff_type))
7659 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7660 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7661 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7662 {
7663 tree field_type = TREE_TYPE (member);
7664
7665 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7666 eff_type = field_type;
7667 else
7668 {
7669 gcc_assert ((TYPE_ALIGN (eff_type)
7670 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7671 || (TYPE_ALIGN (eff_type)
7672 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7673 break;
7674 }
7675 }
7676
7677 bool pass_as_float;
7678 if (TARGET_FPU_DOUBLE)
7679 {
7680 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7681 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7682 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7683 && size <= 16));
7684 }
7685 else
7686 {
7687 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7688 }
7689
7690 addr = create_tmp_var (pptr_type_node);
7691 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7692 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7693
7694 valist = build_simple_mem_ref (addr);
7695
7696 if (pass_as_float)
7697 {
7698 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7699 tree cmp;
7700 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7701
7702 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7703 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7704
7705 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7706 tmp = next_fp_limit;
7707 if (size > 4 && !is_double)
7708 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7709 tmp = build2 (GE_EXPR, boolean_type_node,
7710 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7711 cmp = build3 (COND_EXPR, void_type_node, tmp,
7712 build1 (GOTO_EXPR, void_type_node,
7713 unshare_expr (lab_false)), NULL_TREE);
7714 if (!is_double)
7715 gimplify_and_add (cmp, pre_p);
7716
7717 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7718 || (is_double || size == 16))
7719 {
7720 tmp = fold_convert (sizetype, next_fp_tmp);
7721 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7722 size_int (UNITS_PER_WORD));
7723 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7724 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7725 }
7726 if (is_double)
7727 gimplify_and_add (cmp, pre_p);
7728
7729 #ifdef FUNCTION_ARG_SCmode_WART
7730 if (TYPE_MODE (eff_type) == SCmode
7731 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7732 {
7733 tree subtype = TREE_TYPE (eff_type);
7734 tree real, imag;
7735
7736 imag
7737 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7738 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7739
7740 real
7741 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7742 real = get_initialized_tmp_var (real, pre_p, NULL);
7743
7744 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7745 if (type != eff_type)
7746 result = build1 (VIEW_CONVERT_EXPR, type, result);
7747 result = get_initialized_tmp_var (result, pre_p, NULL);
7748 }
7749 #endif /* FUNCTION_ARG_SCmode_WART */
7750
7751 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7752 gimplify_and_add (tmp, pre_p);
7753
7754 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7755 gimplify_and_add (tmp, pre_p);
7756
7757 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7758 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7759 gimplify_assign (unshare_expr (next_fp_tmp),
7760 unshare_expr (valist), pre_p);
7761
7762 gimplify_assign (unshare_expr (valist),
7763 unshare_expr (next_fp_tmp), post_p);
7764 valist = next_fp_tmp;
7765 }
7766 else
7767 {
7768 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7769 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7770 unshare_expr (next_o_limit));
7771 tmp = build3 (COND_EXPR, void_type_node, tmp,
7772 build1 (GOTO_EXPR, void_type_node,
7773 unshare_expr (lab_false)),
7774 NULL_TREE);
7775 gimplify_and_add (tmp, pre_p);
7776
7777 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7778 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7779
7780 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7781 gimplify_and_add (tmp, pre_p);
7782
7783 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7784 gimplify_and_add (tmp, pre_p);
7785
7786 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7787 gimplify_assign (unshare_expr (next_o),
7788 unshare_expr (next_o_limit), pre_p);
7789
7790 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7791 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7792 }
7793
7794 if (!result)
7795 {
7796 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7797 gimplify_and_add (tmp, pre_p);
7798 }
7799 }
7800
7801 /* ??? In va-sh.h, there had been code to make values larger than
7802 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7803
7804 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7805 if (result)
7806 {
7807 gimplify_assign (result, tmp, pre_p);
7808 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7809 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7810 gimplify_and_add (tmp, pre_p);
7811 }
7812 else
7813 result = tmp;
7814
7815 if (pass_by_ref)
7816 result = build_va_arg_indirect_ref (result);
7817
7818 return result;
7819 }
7820
7821 /* 64 bit floating points memory transfers are paired single precision loads
7822 or store. So DWARF information needs fixing in little endian (unless
7823 PR=SZ=1 in FPSCR). */
7824 rtx
7825 sh_dwarf_register_span (rtx reg)
7826 {
7827 unsigned regno = REGNO (reg);
7828
7829 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7830 return NULL_RTX;
7831
7832 return
7833 gen_rtx_PARALLEL (VOIDmode,
7834 gen_rtvec (2,
7835 gen_rtx_REG (SFmode, regno + 1),
7836 gen_rtx_REG (SFmode, regno)));
7837 }
7838
7839 static machine_mode
7840 sh_promote_function_mode (const_tree type, machine_mode mode,
7841 int *punsignedp, const_tree funtype,
7842 int for_return)
7843 {
7844 if (sh_promote_prototypes (funtype))
7845 return promote_mode (type, mode, punsignedp);
7846 else
7847 return default_promote_function_mode (type, mode, punsignedp, funtype,
7848 for_return);
7849 }
7850
7851 static bool
7852 sh_promote_prototypes (const_tree type)
7853 {
7854 if (TARGET_HITACHI)
7855 return false;
7856 if (! type)
7857 return true;
7858 return ! sh_attr_renesas_p (type);
7859 }
7860
7861 static bool
7862 sh_pass_by_reference (cumulative_args_t cum_v, machine_mode mode,
7863 const_tree type, bool named ATTRIBUTE_UNUSED)
7864 {
7865 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7866
7867 if (targetm.calls.must_pass_in_stack (mode, type))
7868 return true;
7869
7870 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7871 wants to know about pass-by-reference semantics for incoming
7872 arguments. */
7873 if (! cum)
7874 return false;
7875
7876 return false;
7877 }
7878
7879 static bool
7880 sh_callee_copies (cumulative_args_t cum, machine_mode mode,
7881 const_tree type, bool named ATTRIBUTE_UNUSED)
7882 {
7883 /* ??? How can it possibly be correct to return true only on the
7884 caller side of the equation? Is there someplace else in the
7885 sh backend that's magically producing the copies? */
7886 return (get_cumulative_args (cum)->outgoing
7887 && ((mode == BLKmode ? TYPE_ALIGN (type) : GET_MODE_ALIGNMENT (mode))
7888 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7889 }
7890
7891 static sh_arg_class
7892 get_sh_arg_class (machine_mode mode)
7893 {
7894 if (TARGET_FPU_ANY && mode == SFmode)
7895 return SH_ARG_FLOAT;
7896
7897 if (TARGET_FPU_DOUBLE
7898 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7899 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7900 return SH_ARG_FLOAT;
7901
7902 return SH_ARG_INT;
7903 }
7904
7905 /* Round a register number up to a proper boundary for an arg of mode
7906 MODE.
7907 The SH doesn't care about double alignment, so we only
7908 round doubles to even regs when asked to explicitly. */
7909 static int
7910 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7911 {
7912 /* FIXME: This used to be a macro and has been copy pasted into this
7913 function as is. Make this more readable. */
7914 return
7915 (((TARGET_ALIGN_DOUBLE
7916 || (TARGET_FPU_DOUBLE
7917 && (mode == DFmode || mode == DCmode)
7918 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7919 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7920 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7921 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7922 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7923 }
7924
7925 /* Return true if arg of the specified mode should be passed in a register
7926 or false otherwise. */
7927 static bool
7928 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7929 const_tree type)
7930 {
7931 /* FIXME: This used to be a macro and has been copy pasted into this
7932 function as is. Make this more readable. */
7933 return
7934 ((type == 0
7935 || (! TREE_ADDRESSABLE (type)
7936 && (! (TARGET_HITACHI || cum.renesas_abi)
7937 || ! (AGGREGATE_TYPE_P (type)
7938 || (!TARGET_FPU_ANY
7939 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7940 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7941 && ! cum.force_mem
7942 && (TARGET_SH2E
7943 ? ((mode) == BLKmode
7944 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7945 + int_size_in_bytes (type))
7946 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7947 : ((sh_round_reg (cum, mode)
7948 + HARD_REGNO_NREGS (BASE_ARG_REG (mode), mode))
7949 <= NPARM_REGS (mode)))
7950 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7951 }
7952
7953 static int
7954 sh_arg_partial_bytes (cumulative_args_t cum_v, machine_mode mode,
7955 tree type, bool named ATTRIBUTE_UNUSED)
7956 {
7957 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7958 int words = 0;
7959
7960 if (sh_pass_in_reg_p (*cum, mode, type)
7961 && !TARGET_FPU_DOUBLE
7962 && (sh_round_reg (*cum, mode)
7963 + (mode != BLKmode
7964 ? CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)
7965 : CEIL (int_size_in_bytes (type), UNITS_PER_WORD))
7966 > NPARM_REGS (mode)))
7967 words = NPARM_REGS (mode) - sh_round_reg (*cum, mode);
7968
7969 return words * UNITS_PER_WORD;
7970 }
7971
7972
7973 /* Define where to put the arguments to a function.
7974 Value is zero to push the argument on the stack,
7975 or a hard register in which to store the argument.
7976
7977 MODE is the argument's machine mode.
7978 TYPE is the data type of the argument (as a tree).
7979 This is null for libcalls where that information may
7980 not be available.
7981 CUM is a variable of type CUMULATIVE_ARGS which gives info about
7982 the preceding args and about the function being called.
7983 NAMED is nonzero if this argument is a named parameter
7984 (otherwise it is an extra parameter matching an ellipsis).
7985
7986 On SH the first args are normally in registers
7987 and the rest are pushed. Any arg that starts within the first
7988 NPARM_REGS words is at least partially passed in a register unless
7989 its data type forbids. */
7990 static rtx
7991 sh_function_arg (cumulative_args_t ca_v, machine_mode mode,
7992 const_tree type, bool named)
7993 {
7994 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
7995
7996 if (mode == VOIDmode)
7997 return ca->renesas_abi ? const1_rtx : const0_rtx;
7998
7999 if (sh_pass_in_reg_p (*ca, mode, type)
8000 && (named || ! (TARGET_HITACHI || ca->renesas_abi)))
8001 {
8002 int regno;
8003
8004 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8005 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8006 {
8007 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8008 gen_rtx_REG (SFmode,
8009 BASE_ARG_REG (mode)
8010 + (sh_round_reg (*ca, mode) ^ 1)),
8011 const0_rtx);
8012 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8013 gen_rtx_REG (SFmode,
8014 BASE_ARG_REG (mode)
8015 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8016 GEN_INT (4));
8017 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8018 }
8019
8020 /* If the alignment of a DF value causes an SF register to be
8021 skipped, we will use that skipped register for the next SF
8022 value. */
8023 if ((TARGET_HITACHI || ca->renesas_abi)
8024 && ca->free_single_fp_reg
8025 && mode == SFmode)
8026 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8027
8028 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8029 ^ (mode == SFmode && TARGET_SH4
8030 && TARGET_LITTLE_ENDIAN
8031 && ! TARGET_HITACHI && ! ca->renesas_abi);
8032 return gen_rtx_REG (mode, regno);
8033
8034 }
8035
8036 return NULL_RTX;
8037 }
8038
8039 /* Update the data in CUM to advance over an argument
8040 of mode MODE and data type TYPE.
8041 (TYPE is null for libcalls where that information may not be
8042 available.) */
8043 static void
8044 sh_function_arg_advance (cumulative_args_t ca_v, machine_mode mode,
8045 const_tree type, bool named ATTRIBUTE_UNUSED)
8046 {
8047 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8048
8049 if (ca->force_mem)
8050 ca->force_mem = false;
8051
8052 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8053 {
8054 /* Note that we've used the skipped register. */
8055 if (mode == SFmode && ca->free_single_fp_reg)
8056 {
8057 ca->free_single_fp_reg = 0;
8058 return;
8059 }
8060 /* When we have a DF after an SF, there's an SF register that get
8061 skipped in order to align the DF value. We note this skipped
8062 register, because the next SF value will use it, and not the
8063 SF that follows the DF. */
8064 if (mode == DFmode
8065 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8066 {
8067 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8068 + BASE_ARG_REG (mode));
8069 }
8070 }
8071
8072 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8073 || sh_pass_in_reg_p (*ca, mode, type))
8074 (ca->arg_count[(int) get_sh_arg_class (mode)]
8075 = (sh_round_reg (*ca, mode)
8076 + (mode == BLKmode
8077 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8078 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD))));
8079 }
8080
8081 /* The Renesas calling convention doesn't quite fit into this scheme since
8082 the address is passed like an invisible argument, but one that is always
8083 passed in memory. */
8084 static rtx
8085 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8086 {
8087 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8088 return NULL_RTX;
8089 return gen_rtx_REG (Pmode, 2);
8090 }
8091
8092 /* Worker function for TARGET_FUNCTION_VALUE.
8093
8094 For the SH, this is like LIBCALL_VALUE, except that we must change the
8095 mode like PROMOTE_MODE does.
8096 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8097 tested here has to be kept in sync with the one in
8098 explow.c:promote_mode. */
8099 static rtx
8100 sh_function_value (const_tree valtype,
8101 const_tree fn_decl_or_type,
8102 bool outgoing ATTRIBUTE_UNUSED)
8103 {
8104 if (fn_decl_or_type
8105 && !DECL_P (fn_decl_or_type))
8106 fn_decl_or_type = NULL;
8107
8108 return gen_rtx_REG (
8109 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8110 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8111 && (TREE_CODE (valtype) == INTEGER_TYPE
8112 || TREE_CODE (valtype) == ENUMERAL_TYPE
8113 || TREE_CODE (valtype) == BOOLEAN_TYPE
8114 || TREE_CODE (valtype) == REAL_TYPE
8115 || TREE_CODE (valtype) == OFFSET_TYPE))
8116 && sh_promote_prototypes (fn_decl_or_type)
8117 ? SImode : TYPE_MODE (valtype)),
8118 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8119 }
8120
8121 /* Worker function for TARGET_LIBCALL_VALUE. */
8122 static rtx
8123 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8124 {
8125 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8126 }
8127
8128 /* Return true if N is a possible register number of function value. */
8129 static bool
8130 sh_function_value_regno_p (const unsigned int regno)
8131 {
8132 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8133 }
8134
8135 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8136 static bool
8137 sh_return_in_memory (const_tree type, const_tree fndecl)
8138 {
8139 return TYPE_MODE (type) == BLKmode
8140 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8141 && TREE_CODE (type) == RECORD_TYPE);
8142 }
8143
8144 /* We actually emit the code in sh_expand_prologue. We used to use
8145 a static variable to flag that we need to emit this code, but that
8146 doesn't when inlining, when functions are deferred and then emitted
8147 later. Fortunately, we already have two flags that are part of struct
8148 function that tell if a function uses varargs or stdarg. */
8149 static void
8150 sh_setup_incoming_varargs (cumulative_args_t ca,
8151 machine_mode mode,
8152 tree type,
8153 int *pretend_arg_size,
8154 int second_time ATTRIBUTE_UNUSED)
8155 {
8156 gcc_assert (cfun->stdarg);
8157 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8158 {
8159 int named_parm_regs, anon_parm_regs;
8160
8161 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), mode)
8162 + (mode == BLKmode
8163 ? CEIL (int_size_in_bytes (type), UNITS_PER_WORD)
8164 : CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD)));
8165 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8166 if (anon_parm_regs > 0)
8167 *pretend_arg_size = anon_parm_regs * 4;
8168 }
8169 }
8170
8171 static bool
8172 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8173 {
8174 return false;
8175 }
8176
8177 static bool
8178 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8179 {
8180 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8181
8182 return ! (TARGET_HITACHI || ca->renesas_abi);
8183 }
8184
8185
8186 /* Define the offset between two registers, one to be eliminated, and
8187 the other its replacement, at the start of a routine. */
8188 int
8189 initial_elimination_offset (int from, int to)
8190 {
8191 const int regs_saved_rounding = 0;
8192 int save_flags = target_flags;
8193 HARD_REG_SET live_regs_mask;
8194
8195 int regs_saved = calc_live_regs (&live_regs_mask);
8196
8197 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8198 target_flags = save_flags;
8199
8200 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8201
8202 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8203 return total_saved_regs_space + total_auto_space;
8204
8205 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8206 return total_saved_regs_space + total_auto_space;
8207
8208 /* Initial gap between fp and sp is 0. */
8209 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8210 return 0;
8211
8212 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8213 return rounded_frame_size (0);
8214
8215 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8216 return rounded_frame_size (0);
8217
8218 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8219 && (to == HARD_FRAME_POINTER_REGNUM
8220 || to == STACK_POINTER_REGNUM));
8221 return total_auto_space;
8222 }
8223
8224 /* Parse the -mfixed-range= option string. */
8225 void
8226 sh_fix_range (const char *const_str)
8227 {
8228 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8229 REG2 are either register names or register numbers. The effect
8230 of this option is to mark the registers in the range from REG1 to
8231 REG2 as ``fixed'' so they won't be used by the compiler. */
8232
8233 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8234
8235 while (1)
8236 {
8237 char* dash = strchr (str, '-');
8238 if (!dash)
8239 {
8240 warning (0, "value of -mfixed-range must have form REG1-REG2");
8241 return;
8242 }
8243 *dash = '\0';
8244 char* comma = strchr (dash + 1, ',');
8245 if (comma)
8246 *comma = '\0';
8247
8248 int first = decode_reg_name (str);
8249 if (first < 0)
8250 {
8251 warning (0, "unknown register name: %s", str);
8252 return;
8253 }
8254
8255 int last = decode_reg_name (dash + 1);
8256 if (last < 0)
8257 {
8258 warning (0, "unknown register name: %s", dash + 1);
8259 return;
8260 }
8261
8262 *dash = '-';
8263
8264 if (first > last)
8265 {
8266 warning (0, "%s-%s is an empty range", str, dash + 1);
8267 return;
8268 }
8269
8270 for (int i = first; i <= last; ++i)
8271 fixed_regs[i] = call_used_regs[i] = 1;
8272
8273 if (!comma)
8274 break;
8275
8276 *comma = ',';
8277 str = comma + 1;
8278 }
8279 }
8280 \f
8281 /* Insert any deferred function attributes from earlier pragmas. */
8282 static void
8283 sh_insert_attributes (tree node, tree *attributes)
8284 {
8285 if (TREE_CODE (node) != FUNCTION_DECL)
8286 return;
8287
8288 /* We are only interested in fields. */
8289 if (!DECL_P (node))
8290 return;
8291
8292 /* Append the attributes to the deferred attributes. */
8293 *sh_deferred_function_attributes_tail = *attributes;
8294 tree attrs = sh_deferred_function_attributes;
8295 if (!attrs)
8296 return;
8297
8298 /* Some attributes imply or require the interrupt attribute. */
8299 if (!lookup_attribute ("interrupt_handler", attrs)
8300 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8301 {
8302 /* If we have a trapa_handler, but no interrupt_handler attribute,
8303 insert an interrupt_handler attribute. */
8304 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8305 /* We can't use sh_pr_interrupt here because that's not in the
8306 java frontend. */
8307 attrs
8308 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8309 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8310 if the interrupt attribute is missing, we ignore the attribute
8311 and warn. */
8312 else if (lookup_attribute ("sp_switch", attrs)
8313 || lookup_attribute ("trap_exit", attrs)
8314 || lookup_attribute ("nosave_low_regs", attrs)
8315 || lookup_attribute ("resbank", attrs))
8316 {
8317 tree *tail;
8318
8319 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8320 {
8321 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8322 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8323 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8324 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8325 warning (OPT_Wattributes,
8326 "%qE attribute only applies to interrupt functions",
8327 TREE_PURPOSE (attrs));
8328 else
8329 {
8330 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8331 NULL_TREE);
8332 tail = &TREE_CHAIN (*tail);
8333 }
8334 }
8335 attrs = *attributes;
8336 }
8337 }
8338
8339 /* Install the processed list. */
8340 *attributes = attrs;
8341
8342 /* Clear deferred attributes. */
8343 sh_deferred_function_attributes = NULL_TREE;
8344 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8345
8346 return;
8347 }
8348
8349 /*------------------------------------------------------------------------------
8350 Target specific attributes
8351 Supported attributes are:
8352
8353 * interrupt_handler
8354 Specifies this function is an interrupt handler.
8355
8356 * trapa_handler
8357 Like interrupt_handler, but don't save all registers.
8358
8359 * sp_switch
8360 Specifies an alternate stack for an interrupt handler to run on.
8361
8362 * trap_exit
8363 Use a trapa to exit an interrupt function instead of rte.
8364
8365 * nosave_low_regs
8366 Don't save r0..r7 in an interrupt handler function.
8367 This is useful on SH3* and SH4*, which have a separate set of low
8368 regs for user and privileged modes.
8369 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8370 those that run with interrupts disabled and thus can't be
8371 interrupted thenselves).
8372
8373 * renesas
8374 Use Renesas calling/layout conventions (functions and structures).
8375
8376 * resbank
8377 In case of an interrupt handler function, use a register bank to
8378 save registers R0-R14, MACH, MACL, GBR and PR.
8379 This is available only on SH2A targets.
8380
8381 * function_vector
8382 Declares a function to be called using the TBR relative addressing
8383 mode. Takes an argument that specifies the slot number in the table
8384 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8385 */
8386
8387 /* Handle a 'resbank' attribute. */
8388 static tree
8389 sh_handle_resbank_handler_attribute (tree * node, tree name,
8390 tree args ATTRIBUTE_UNUSED,
8391 int flags ATTRIBUTE_UNUSED,
8392 bool * no_add_attrs)
8393 {
8394 if (!TARGET_SH2A)
8395 {
8396 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8397 name);
8398 *no_add_attrs = true;
8399 }
8400 if (TREE_CODE (*node) != FUNCTION_DECL)
8401 {
8402 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8403 name);
8404 *no_add_attrs = true;
8405 }
8406
8407 return NULL_TREE;
8408 }
8409
8410 /* Handle an "interrupt_handler" attribute; arguments as in
8411 struct attribute_spec.handler. */
8412 static tree
8413 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8414 tree args ATTRIBUTE_UNUSED,
8415 int flags ATTRIBUTE_UNUSED,
8416 bool *no_add_attrs)
8417 {
8418 if (TREE_CODE (*node) != FUNCTION_DECL)
8419 {
8420 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8421 name);
8422 *no_add_attrs = true;
8423 }
8424
8425 return NULL_TREE;
8426 }
8427
8428 /* Handle an 'function_vector' attribute; arguments as in
8429 struct attribute_spec.handler. */
8430 static tree
8431 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8432 tree args ATTRIBUTE_UNUSED,
8433 int flags ATTRIBUTE_UNUSED,
8434 bool * no_add_attrs)
8435 {
8436 if (!TARGET_SH2A)
8437 {
8438 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8439 name);
8440 *no_add_attrs = true;
8441 }
8442 else if (TREE_CODE (*node) != FUNCTION_DECL)
8443 {
8444 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8445 name);
8446 *no_add_attrs = true;
8447 }
8448 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8449 {
8450 /* The argument must be a constant integer. */
8451 warning (OPT_Wattributes,
8452 "%qE attribute argument not an integer constant",
8453 name);
8454 *no_add_attrs = true;
8455 }
8456 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8457 {
8458 /* The argument value must be between 0 to 255. */
8459 warning (OPT_Wattributes,
8460 "%qE attribute argument should be between 0 to 255",
8461 name);
8462 *no_add_attrs = true;
8463 }
8464 return NULL_TREE;
8465 }
8466
8467 /* Returns true if current function has been assigned the attribute
8468 'function_vector'. */
8469 bool
8470 sh2a_is_function_vector_call (rtx x)
8471 {
8472 if (GET_CODE (x) == SYMBOL_REF
8473 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8474 {
8475 tree tr = SYMBOL_REF_DECL (x);
8476
8477 if (sh2a_function_vector_p (tr))
8478 return true;
8479 }
8480
8481 return false;
8482 }
8483
8484 /* Returns the function vector number, if the attribute
8485 'function_vector' is assigned, otherwise returns zero. */
8486 int
8487 sh2a_get_function_vector_number (rtx x)
8488 {
8489 if ((GET_CODE (x) == SYMBOL_REF)
8490 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8491 {
8492 tree t = SYMBOL_REF_DECL (x);
8493
8494 if (TREE_CODE (t) != FUNCTION_DECL)
8495 return 0;
8496
8497 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8498 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8499 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8500
8501 return 0;
8502 }
8503 else
8504 return 0;
8505 }
8506
8507 /* Handle an "sp_switch" attribute; arguments as in
8508 struct attribute_spec.handler. */
8509 static tree
8510 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8511 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8512 {
8513 if (TREE_CODE (*node) != FUNCTION_DECL)
8514 {
8515 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8516 name);
8517 *no_add_attrs = true;
8518 }
8519 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8520 {
8521 /* The argument must be a constant string. */
8522 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8523 name);
8524 *no_add_attrs = true;
8525 }
8526
8527 return NULL_TREE;
8528 }
8529
8530 /* Handle an "trap_exit" attribute; arguments as in
8531 struct attribute_spec.handler. */
8532 static tree
8533 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8534 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8535 {
8536 if (TREE_CODE (*node) != FUNCTION_DECL)
8537 {
8538 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8539 name);
8540 *no_add_attrs = true;
8541 }
8542 /* The argument specifies a trap number to be used in a trapa instruction
8543 at function exit (instead of an rte instruction). */
8544 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8545 {
8546 /* The argument must be a constant integer. */
8547 warning (OPT_Wattributes, "%qE attribute argument not an "
8548 "integer constant", name);
8549 *no_add_attrs = true;
8550 }
8551
8552 return NULL_TREE;
8553 }
8554
8555 static tree
8556 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8557 tree name ATTRIBUTE_UNUSED,
8558 tree args ATTRIBUTE_UNUSED,
8559 int flags ATTRIBUTE_UNUSED,
8560 bool *no_add_attrs ATTRIBUTE_UNUSED)
8561 {
8562 return NULL_TREE;
8563 }
8564
8565 /* True if __attribute__((renesas)) or -mrenesas. */
8566 bool
8567 sh_attr_renesas_p (const_tree td)
8568 {
8569 if (TARGET_HITACHI)
8570 return true;
8571 if (td == NULL_TREE)
8572 return false;
8573 if (DECL_P (td))
8574 td = TREE_TYPE (td);
8575 if (td == error_mark_node)
8576 return false;
8577 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8578 }
8579
8580 /* True if __attribute__((renesas)) or -mrenesas, for the current
8581 function. */
8582 bool
8583 sh_cfun_attr_renesas_p (void)
8584 {
8585 return sh_attr_renesas_p (current_function_decl);
8586 }
8587
8588 /* Returns true if the current function has the "interrupt_handler"
8589 attribute set. */
8590 bool
8591 sh_cfun_interrupt_handler_p (void)
8592 {
8593 return (lookup_attribute ("interrupt_handler",
8594 DECL_ATTRIBUTES (current_function_decl))
8595 != NULL_TREE);
8596 }
8597
8598 /* Returns true if FUNC has been assigned the attribute
8599 "function_vector". */
8600 bool
8601 sh2a_function_vector_p (tree func)
8602 {
8603 if (TREE_CODE (func) != FUNCTION_DECL)
8604 return false;
8605
8606 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8607 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8608 return true;
8609
8610 return false;
8611 }
8612
8613 /* Returns true if given tree has the "resbank" attribute set. */
8614 bool
8615 sh_cfun_resbank_handler_p (void)
8616 {
8617 return ((lookup_attribute ("resbank",
8618 DECL_ATTRIBUTES (current_function_decl))
8619 != NULL_TREE)
8620 && (lookup_attribute ("interrupt_handler",
8621 DECL_ATTRIBUTES (current_function_decl))
8622 != NULL_TREE) && TARGET_SH2A);
8623 }
8624
8625 /* Returns true if the current function has a "trap_exit" attribute set. */
8626 bool
8627 sh_cfun_trap_exit_p (void)
8628 {
8629 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8630 != NULL_TREE;
8631 }
8632
8633 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8634 static const char *
8635 sh_check_pch_target_flags (int old_flags)
8636 {
8637 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8638 | MASK_SH_E | MASK_HARD_SH4
8639 | MASK_FPU_SINGLE | MASK_SH4))
8640 return _("created and used with different architectures / ABIs");
8641 if ((old_flags ^ target_flags) & MASK_HITACHI)
8642 return _("created and used with different ABIs");
8643 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8644 return _("created and used with different endianness");
8645 return NULL;
8646 }
8647 \f
8648 /* Predicates used by the templates. */
8649
8650 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8651 Used only in general_movsrc_operand. */
8652 bool
8653 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8654 {
8655 switch (REGNO (op))
8656 {
8657 case PR_REG:
8658 case MACL_REG:
8659 case MACH_REG:
8660 return true;
8661 }
8662 return false;
8663 }
8664
8665 /* Returns true if OP is a floating point value with value 0.0. */
8666 bool
8667 fp_zero_operand (rtx op)
8668 {
8669 if (GET_MODE (op) != SFmode)
8670 return false;
8671
8672 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8673 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8674 }
8675
8676 /* Returns true if OP is a floating point value with value 1.0. */
8677 bool
8678 fp_one_operand (rtx op)
8679 {
8680 if (GET_MODE (op) != SFmode)
8681 return false;
8682
8683 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8684 }
8685
8686 /* Return the TLS type for TLS symbols. */
8687 enum tls_model
8688 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8689 {
8690 if (GET_CODE (op) != SYMBOL_REF)
8691 return TLS_MODEL_NONE;
8692 return SYMBOL_REF_TLS_MODEL (op);
8693 }
8694 \f
8695 /* Return the destination address of a branch. */
8696 static int
8697 branch_dest (rtx branch)
8698 {
8699 rtx dest = SET_SRC (PATTERN (branch));
8700
8701 if (GET_CODE (dest) == IF_THEN_ELSE)
8702 dest = XEXP (dest, 1);
8703
8704 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8705 }
8706 \f
8707 /* Return nonzero if REG is not used after INSN.
8708 We assume REG is a reload reg, and therefore does
8709 not live past labels. It may live past calls or jumps though. */
8710 bool
8711 reg_unused_after (rtx reg, rtx_insn *insn)
8712 {
8713 /* If the reg is set by this instruction, then it is safe for our
8714 case. Disregard the case where this is a store to memory, since
8715 we are checking a register used in the store address. */
8716 rtx set = single_set (insn);
8717 if (set && !MEM_P (SET_DEST (set))
8718 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8719 return true;
8720
8721 while ((insn = NEXT_INSN (insn)))
8722 {
8723 if (!INSN_P (insn))
8724 continue;
8725
8726 rtx_code code = GET_CODE (insn);
8727
8728 #if 0
8729 /* If this is a label that existed before reload, then the register
8730 is dead here. However, if this is a label added by reorg, then
8731 the register may still be live here. We can't tell the difference,
8732 so we just ignore labels completely. */
8733 if (code == CODE_LABEL)
8734 return 1;
8735 /* else */
8736 #endif
8737
8738 if (code == JUMP_INSN)
8739 return false;
8740
8741 /* If this is a sequence, we must handle them all at once.
8742 We could have for instance a call that sets the target register,
8743 and an insn in a delay slot that uses the register. In this case,
8744 we must return 0. */
8745 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8746 {
8747 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8748 bool retval = false;
8749
8750 for (int i = 0; i < seq->len (); i++)
8751 {
8752 rtx_insn *this_insn = seq->insn (i);
8753 rtx set = single_set (this_insn);
8754
8755 if (CALL_P (this_insn))
8756 code = CALL_INSN;
8757 else if (JUMP_P (this_insn))
8758 {
8759 if (INSN_ANNULLED_BRANCH_P (this_insn))
8760 return false;
8761 code = JUMP_INSN;
8762 }
8763
8764 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8765 return false;
8766 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8767 {
8768 if (!MEM_P (SET_DEST (set)))
8769 retval = true;
8770 else
8771 return false;
8772 }
8773 if (set == NULL_RTX
8774 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8775 return false;
8776 }
8777 if (retval)
8778 return true;
8779 else if (code == JUMP_INSN)
8780 return false;
8781 }
8782
8783 rtx set = single_set (insn);
8784 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8785 return false;
8786 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8787 return !MEM_P (SET_DEST (set));
8788 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8789 return false;
8790
8791 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8792 return true;
8793 }
8794 return true;
8795 }
8796 \f
8797
8798 static GTY(()) rtx t_reg_rtx;
8799 rtx
8800 get_t_reg_rtx (void)
8801 {
8802 if (! t_reg_rtx)
8803 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8804 return t_reg_rtx;
8805 }
8806
8807 static GTY(()) tree fpscr_values;
8808
8809 static void
8810 emit_fpu_switch (rtx scratch, int index)
8811 {
8812 if (fpscr_values == NULL)
8813 {
8814 tree t = build_index_type (integer_one_node);
8815 t = build_array_type (integer_type_node, t);
8816 t = build_decl (BUILTINS_LOCATION,
8817 VAR_DECL, get_identifier ("__fpscr_values"), t);
8818 DECL_ARTIFICIAL (t) = 1;
8819 DECL_IGNORED_P (t) = 1;
8820 DECL_EXTERNAL (t) = 1;
8821 TREE_STATIC (t) = 1;
8822 TREE_PUBLIC (t) = 1;
8823 TREE_USED (t) = 1;
8824
8825 fpscr_values = t;
8826 }
8827
8828 rtx src = DECL_RTL (fpscr_values);
8829 if (!can_create_pseudo_p ())
8830 {
8831 emit_move_insn (scratch, XEXP (src, 0));
8832 if (index != 0)
8833 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8834 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8835 }
8836 else
8837 src = adjust_address (src, SImode, index * 4);
8838
8839 emit_insn (gen_lds_fpscr (src));
8840 }
8841 \f
8842 static rtx get_free_reg (HARD_REG_SET);
8843
8844 /* This function returns a register to use to load the address to load
8845 the fpscr from. Currently it always returns r1 or r7, but when we are
8846 able to use pseudo registers after combine, or have a better mechanism
8847 for choosing a register, it should be done here. */
8848 /* REGS_LIVE is the liveness information for the point for which we
8849 need this allocation. In some bare-bones exit blocks, r1 is live at the
8850 start. We can even have all of r0..r3 being live:
8851 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8852 INSN before which new insns are placed with will clobber the register
8853 we return. If a basic block consists only of setting the return value
8854 register to a pseudo and using that register, the return value is not
8855 live before or after this block, yet we we'll insert our insns right in
8856 the middle. */
8857 static rtx
8858 get_free_reg (HARD_REG_SET regs_live)
8859 {
8860 if (! TEST_HARD_REG_BIT (regs_live, 1))
8861 return gen_rtx_REG (Pmode, 1);
8862
8863 /* Hard reg 1 is live; since this is a small register classes target,
8864 there shouldn't be anything but a jump before the function end. */
8865 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8866 return gen_rtx_REG (Pmode, 7);
8867 }
8868
8869 /* This function will set the fpscr from memory.
8870 MODE is the mode we are setting it to. */
8871 void
8872 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8873 {
8874 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8875 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8876
8877 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8878 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8879 }
8880
8881 /* Is the given character a logical line separator for the assembler? */
8882 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8883 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8884 #endif
8885
8886 static bool
8887 sequence_insn_p (rtx_insn *insn)
8888 {
8889 rtx_insn* prev = PREV_INSN (insn);
8890 if (prev == NULL)
8891 return false;
8892
8893 rtx_insn* next = NEXT_INSN (prev);
8894 if (next == NULL)
8895 return false;
8896
8897 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8898 }
8899
8900 int
8901 sh_insn_length_adjustment (rtx_insn *insn)
8902 {
8903 /* Instructions with unfilled delay slots take up an extra two bytes for
8904 the nop in the delay slot. */
8905 if (((NONJUMP_INSN_P (insn)
8906 && GET_CODE (PATTERN (insn)) != USE
8907 && GET_CODE (PATTERN (insn)) != CLOBBER)
8908 || CALL_P (insn) || JUMP_P (insn))
8909 && ! sequence_insn_p (insn)
8910 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8911 return 2;
8912
8913 /* Increase the insn length of a cbranch without a delay slot insn to
8914 force a delay slot which will be stuffed with a nop. */
8915 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8916 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8917 && ! sequence_insn_p (insn))
8918 return 2;
8919
8920 /* sh-dsp parallel processing insn take four bytes instead of two. */
8921
8922 if (NONJUMP_INSN_P (insn))
8923 {
8924 int sum = 0;
8925 rtx body = PATTERN (insn);
8926 const char *templ;
8927 char c;
8928 bool maybe_label = true;
8929
8930 if (GET_CODE (body) == ASM_INPUT)
8931 templ = XSTR (body, 0);
8932 else if (asm_noperands (body) >= 0)
8933 templ
8934 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8935 else
8936 return 0;
8937 do
8938 {
8939 int ppi_adjust = 0;
8940
8941 do
8942 c = *templ++;
8943 while (c == ' ' || c == '\t');
8944 /* all sh-dsp parallel-processing insns start with p.
8945 The only non-ppi sh insn starting with p is pref.
8946 The only ppi starting with pr is prnd. */
8947 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8948 ppi_adjust = 2;
8949 /* The repeat pseudo-insn expands two three insns, a total of
8950 six bytes in size. */
8951 else if ((c == 'r' || c == 'R')
8952 && ! strncasecmp ("epeat", templ, 5))
8953 ppi_adjust = 4;
8954 while (c && c != '\n'
8955 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8956 {
8957 /* If this is a label, it is obviously not a ppi insn. */
8958 if (c == ':' && maybe_label)
8959 {
8960 ppi_adjust = 0;
8961 break;
8962 }
8963 else if (c == '\'' || c == '"')
8964 maybe_label = false;
8965 c = *templ++;
8966 }
8967 sum += ppi_adjust;
8968 maybe_label = c != ':';
8969 }
8970 while (c);
8971 return sum;
8972 }
8973 return 0;
8974 }
8975 \f
8976 /* Return TRUE for a valid displacement for the REG+disp addressing
8977 with MODE. */
8978 bool
8979 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
8980 bool allow_zero)
8981 {
8982 if (! CONST_INT_P (op))
8983 return false;
8984
8985 {
8986 const HOST_WIDE_INT offset = INTVAL (op);
8987 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
8988 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
8989
8990 /* If the mode does not support any displacement always return false.
8991 Even though an index of '0' is actually always valid, it will cause
8992 troubles when e.g. a DFmode move is split into two SFmode moves,
8993 where one SFmode move will have index '0' and the other move will
8994 have index '4'. */
8995 if (!allow_zero && max_disp < 1)
8996 return false;
8997
8998 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
8999 }
9000 }
9001
9002 /* Recognize an RTL expression that is a valid memory address for
9003 an instruction.
9004 The MODE argument is the machine mode for the MEM expression
9005 that wants to use this address.
9006 Allow REG
9007 REG+disp
9008 REG+r0
9009 REG++
9010 --REG
9011 GBR
9012 GBR+disp */
9013 static bool
9014 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
9015 {
9016 if (REG_P (x) && REGNO (x) == GBR_REG)
9017 return true;
9018
9019 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9020 return true;
9021 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9022 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9023 return true;
9024 else if (GET_CODE (x) == PLUS)
9025 {
9026 rtx xop0 = XEXP (x, 0);
9027 rtx xop1 = XEXP (x, 1);
9028
9029 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9030 return gbr_displacement (xop1, mode);
9031
9032 if (GET_MODE_SIZE (mode) <= 8
9033 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9034 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9035 return true;
9036
9037 if (GET_MODE_SIZE (mode) <= 4
9038 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9039 {
9040 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9041 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9042 return true;
9043 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9044 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9045 return true;
9046 }
9047 }
9048
9049 return false;
9050 }
9051 \f
9052 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9053 isn't protected by a PIC unspec. */
9054 bool
9055 nonpic_symbol_mentioned_p (rtx x)
9056 {
9057 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9058 || GET_CODE (x) == PC)
9059 return true;
9060
9061 /* We don't want to look into the possible MEM location of a
9062 CONST_DOUBLE, since we're not going to use it, in general. */
9063 if (GET_CODE (x) == CONST_DOUBLE)
9064 return false;
9065
9066 if (GET_CODE (x) == UNSPEC
9067 && (XINT (x, 1) == UNSPEC_PIC
9068 || XINT (x, 1) == UNSPEC_GOT
9069 || XINT (x, 1) == UNSPEC_GOTOFF
9070 || XINT (x, 1) == UNSPEC_GOTPLT
9071 || XINT (x, 1) == UNSPEC_GOTTPOFF
9072 || XINT (x, 1) == UNSPEC_DTPOFF
9073 || XINT (x, 1) == UNSPEC_TPOFF
9074 || XINT (x, 1) == UNSPEC_PLT
9075 || XINT (x, 1) == UNSPEC_PCREL
9076 || XINT (x, 1) == UNSPEC_SYMOFF
9077 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9078 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9079 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9080 return false;
9081
9082 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9083 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9084 {
9085 if (fmt[i] == 'E')
9086 {
9087 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9088 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9089 return true;
9090 }
9091 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9092 return true;
9093 }
9094
9095 return false;
9096 }
9097
9098 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9099 @GOTOFF in `reg'. */
9100 rtx
9101 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9102 {
9103 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9104 return orig;
9105
9106 if (GET_CODE (orig) == LABEL_REF
9107 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9108 {
9109 if (reg == NULL_RTX)
9110 reg = gen_reg_rtx (Pmode);
9111
9112 if (TARGET_FDPIC
9113 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9114 {
9115 /* Weak functions may be NULL which doesn't work with
9116 GOTOFFFUNCDESC because the runtime offset is not known. */
9117 if (SYMBOL_REF_WEAK (orig))
9118 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9119 else
9120 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9121 }
9122 else if (TARGET_FDPIC
9123 && (GET_CODE (orig) == LABEL_REF
9124 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9125 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9126 || SYMBOL_REF_EXTERNAL_P (orig)
9127 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9128 /* In FDPIC, GOTOFF can only be used for writable data. */
9129 emit_insn (gen_symGOT2reg (reg, orig));
9130 else
9131 emit_insn (gen_symGOTOFF2reg (reg, orig));
9132 return reg;
9133 }
9134 else if (GET_CODE (orig) == SYMBOL_REF)
9135 {
9136 if (reg == NULL_RTX)
9137 reg = gen_reg_rtx (Pmode);
9138
9139 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9140 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9141 else
9142 emit_insn (gen_symGOT2reg (reg, orig));
9143 return reg;
9144 }
9145 return orig;
9146 }
9147
9148 /* Given a (logical) mode size and an offset in bytes, try to find a the
9149 appropriate displacement value for a mov insn. On SH the displacements
9150 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9151 15 bytes in QImode. To compensate this we create a new base address by
9152 adding an adjustment value to it.
9153
9154 If the originally requested offset is greater than 127 we prefer using
9155 values 124..127 over 128..131 to increase opportunities to use the
9156 add #imm, Rn insn.
9157
9158 In some cases it is possible that a requested offset might seem unaligned
9159 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9160 This is compensated by adjusting the base address so that the effective
9161 address of the displacement move insn will be aligned.
9162
9163 This is not the best possible way of rebasing the base address, as it
9164 does not look at other present displacement addressings around it.
9165 In some cases this can create more base address adjustments than would
9166 actually be necessary. */
9167 struct disp_adjust
9168 {
9169 rtx offset_adjust;
9170 rtx mov_disp;
9171 };
9172
9173 static struct disp_adjust
9174 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9175 {
9176 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9177
9178 /* Do not try to use SH2A's large displacements here, because this would
9179 effectively disable the small displacement insns. */
9180 const int mode_sz = GET_MODE_SIZE (mode);
9181 const int mov_insn_sz = mov_insn_size (mode, false);
9182 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9183 const int max_disp_next = max_disp + mov_insn_sz;
9184 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9185 HOST_WIDE_INT offset_adjust;
9186
9187 /* In some cases this actually does happen and we must check for it. */
9188 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9189 return res;
9190
9191 /* Keeps the previous behavior for QImode displacement addressing.
9192 This just decides how the offset is re-based. Removing this special
9193 case will result in slightly bigger code on average, but it's not that
9194 bad actually. */
9195 if (mov_insn_sz == 1)
9196 align_modifier = 0;
9197
9198 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9199
9200 if (mode_sz + offset - offset_adjust <= max_disp_next)
9201 {
9202 res.offset_adjust = GEN_INT (offset_adjust);
9203 res.mov_disp = GEN_INT (offset - offset_adjust);
9204 }
9205
9206 return res;
9207 }
9208
9209 /* Try to modify an illegitimate address and make it legitimate.
9210 If we find one, return the new, valid address.
9211 Otherwise, return the original address. */
9212 static rtx
9213 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9214 {
9215 if (flag_pic)
9216 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9217
9218 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9219 || (TARGET_SH2E && mode == SFmode))
9220 return x;
9221
9222 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9223 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9224 {
9225 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9226 INTVAL (XEXP (x, 1)));
9227
9228 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9229 {
9230 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9231 adj.offset_adjust, NULL_RTX, 0,
9232 OPTAB_LIB_WIDEN);
9233 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9234 }
9235 }
9236 return x;
9237 }
9238
9239 /* Attempt to replace *p, which is an address that needs reloading, with
9240 a valid memory address for an operand of mode MODE.
9241 Like for sh_legitimize_address, for the SH we try to get a normal form
9242 of the address. That will allow inheritance of the address reloads. */
9243 bool
9244 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9245 int itype)
9246 {
9247 enum reload_type type = (enum reload_type) itype;
9248 const int mode_sz = GET_MODE_SIZE (mode);
9249
9250 if (sh_lra_p ())
9251 return false;
9252
9253 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9254 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9255 {
9256 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9257 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9258
9259 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9260 {
9261 push_reload (*p, NULL_RTX, p, NULL,
9262 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9263 return true;
9264 }
9265
9266 if (TARGET_SH2E && mode == SFmode)
9267 {
9268 *p = copy_rtx (*p);
9269 push_reload (*p, NULL_RTX, p, NULL,
9270 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9271 return true;
9272 }
9273
9274 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9275 moves because then reload has a problem figuring the constraint
9276 that the move insn target/source reg must be R0.
9277 Or maybe some handling is wrong in sh_secondary_reload for this
9278 to work properly? */
9279 if ((mode_sz == 4 || mode_sz == 8)
9280 && ! (TARGET_SH4 && mode == DFmode)
9281 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9282 {
9283 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9284 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9285 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9286 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9287 return true;
9288 }
9289 }
9290
9291 /* We must re-recognize what we created before. */
9292 if (GET_CODE (*p) == PLUS
9293 && (mode_sz == 4 || mode_sz == 8)
9294 && GET_CODE (XEXP (*p, 0)) == PLUS
9295 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9296 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9297 && CONST_INT_P (XEXP (*p, 1))
9298 && ! (TARGET_SH2E && mode == SFmode))
9299 {
9300 /* Because this address is so complex, we know it must have
9301 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9302 it is already unshared, and needs no further unsharing. */
9303 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9304 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9305 return true;
9306 }
9307
9308 return false;
9309 }
9310
9311 /* In the name of slightly smaller debug output, and to cater to
9312 general assembler lossage, recognize various UNSPEC sequences
9313 and turn them back into a direct symbol reference. */
9314 static rtx
9315 sh_delegitimize_address (rtx orig_x)
9316 {
9317 orig_x = delegitimize_mem_from_attrs (orig_x);
9318
9319 rtx x = orig_x;
9320 if (MEM_P (x))
9321 x = XEXP (x, 0);
9322 if (GET_CODE (x) == CONST)
9323 {
9324 rtx y = XEXP (x, 0);
9325 if (GET_CODE (y) == UNSPEC)
9326 {
9327 if (XINT (y, 1) == UNSPEC_GOT
9328 || XINT (y, 1) == UNSPEC_GOTOFF
9329 || XINT (y, 1) == UNSPEC_SYMOFF)
9330 return XVECEXP (y, 0, 0);
9331 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9332 {
9333 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9334 {
9335 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9336
9337 if (GET_CODE (symplt) == UNSPEC
9338 && (XINT (symplt, 1) == UNSPEC_PLT
9339 || XINT (symplt, 1) == UNSPEC_PCREL))
9340 return XVECEXP (symplt, 0, 0);
9341 }
9342 }
9343 }
9344 }
9345
9346 return orig_x;
9347 }
9348
9349 /* Mark the use of a constant in the literal table. If the constant
9350 has multiple labels, make it unique. */
9351 static rtx
9352 mark_constant_pool_use (rtx x)
9353 {
9354 if (x == NULL_RTX)
9355 return x;
9356
9357 switch (GET_CODE (x))
9358 {
9359 case LABEL_REF:
9360 x = XEXP (x, 0);
9361 case CODE_LABEL:
9362 break;
9363 default:
9364 return x;
9365 }
9366
9367 /* Get the first label in the list of labels for the same constant
9368 and delete another labels in the list. */
9369 rtx_insn* lab = as_a <rtx_insn*> (x);
9370 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9371 {
9372 if (!LABEL_P (insn)
9373 || LABEL_REFS (insn) != NEXT_INSN (insn))
9374 break;
9375 lab = insn;
9376 }
9377
9378 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9379 as_a<rtx_insn *> (insn)->set_deleted ();
9380
9381 /* Mark constants in a window. */
9382 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9383 insn = NEXT_INSN (insn))
9384 {
9385 if (!NONJUMP_INSN_P (insn))
9386 continue;
9387
9388 rtx pattern = PATTERN (insn);
9389 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9390 continue;
9391
9392 switch (XINT (pattern, 1))
9393 {
9394 case UNSPECV_CONST2:
9395 case UNSPECV_CONST4:
9396 case UNSPECV_CONST8:
9397 XVECEXP (pattern, 0, 1) = const1_rtx;
9398 break;
9399 case UNSPECV_WINDOW_END:
9400 if (XVECEXP (pattern, 0, 0) == x)
9401 return lab;
9402 break;
9403 case UNSPECV_CONST_END:
9404 return lab;
9405 default:
9406 break;
9407 }
9408 }
9409
9410 return lab;
9411 }
9412 \f
9413 /* Return true if it's possible to redirect BRANCH1 to the destination
9414 of an unconditional jump BRANCH2. We only want to do this if the
9415 resulting branch will have a short displacement. */
9416 static bool
9417 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9418 {
9419 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9420 hot and cold partitions. */
9421 if (flag_reorder_blocks_and_partition
9422 && simplejump_p (branch2)
9423 && CROSSING_JUMP_P (branch2))
9424 return false;
9425
9426 if (flag_expensive_optimizations && simplejump_p (branch2))
9427 {
9428 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9429 rtx_insn *insn;
9430 int distance;
9431
9432 for (distance = 0, insn = NEXT_INSN (branch1);
9433 insn && distance < 256;
9434 insn = PREV_INSN (insn))
9435 {
9436 if (insn == dest)
9437 return true;
9438 else
9439 distance += get_attr_length (insn);
9440 }
9441 for (distance = 0, insn = NEXT_INSN (branch1);
9442 insn && distance < 256;
9443 insn = NEXT_INSN (insn))
9444 {
9445 if (insn == dest)
9446 return true;
9447 else
9448 distance += get_attr_length (insn);
9449 }
9450 }
9451 return false;
9452 }
9453
9454 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9455 bool
9456 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9457 unsigned int new_reg)
9458 {
9459 /* Interrupt functions can only use registers that have already been
9460 saved by the prologue, even if they would normally be
9461 call-clobbered. */
9462 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9463 return false;
9464
9465 return true;
9466 }
9467
9468 /* Function to update the integer COST
9469 based on the relationship between INSN that is dependent on
9470 DEP_INSN through the dependence LINK. The default is to make no
9471 adjustment to COST. This can be used for example to specify to
9472 the scheduler that an output- or anti-dependence does not incur
9473 the same cost as a data-dependence. The return value should be
9474 the new value for COST. */
9475 static int
9476 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9477 unsigned int)
9478 {
9479 rtx reg, use_pat;
9480
9481 if (dep_type == 0)
9482 {
9483 if (recog_memoized (insn) < 0
9484 || recog_memoized (dep_insn) < 0)
9485 return cost;
9486
9487 rtx dep_set = single_set (dep_insn);
9488
9489 /* The latency that we specify in the scheduling description refers
9490 to the actual output, not to an auto-increment register; for that,
9491 the latency is one. */
9492 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9493 {
9494 rtx set = single_set (insn);
9495
9496 if (set
9497 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9498 && (!MEM_P (SET_DEST (set))
9499 || !reg_mentioned_p (SET_DEST (dep_set),
9500 XEXP (SET_DEST (set), 0))))
9501 cost = 1;
9502 }
9503 /* The only input for a call that is timing-critical is the
9504 function's address. */
9505 if (CALL_P (insn))
9506 {
9507 rtx call = get_call_rtx_from (insn);
9508 if (call
9509 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9510 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9511 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9512 cost -= TARGET_SH4_300 ? 3 : 6;
9513 }
9514 /* Likewise, the most timing critical input for an sfuncs call
9515 is the function address. However, sfuncs typically start
9516 using their arguments pretty quickly.
9517 Assume a four cycle delay for SH4 before they are needed.
9518 Cached ST40-300 calls are quicker, so assume only a one
9519 cycle delay there.
9520 ??? Maybe we should encode the delays till input registers
9521 are needed by sfuncs into the sfunc call insn. */
9522 /* All sfunc calls are parallels with at least four components.
9523 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9524 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9525 && XVECLEN (PATTERN (insn), 0) >= 4
9526 && (reg = sfunc_uses_reg (insn)))
9527 {
9528 if (! reg_set_p (reg, dep_insn))
9529 cost -= TARGET_SH4_300 ? 1 : 4;
9530 }
9531 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9532 {
9533 attr_type dep_type = get_attr_type (dep_insn);
9534 attr_type type;
9535 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9536 cost--;
9537 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9538 && (type = get_attr_type (insn)) != TYPE_CALL
9539 && type != TYPE_SFUNC)
9540 cost--;
9541 /* When the preceding instruction loads the shift amount of
9542 the following SHAD/SHLD, the latency of the load is increased
9543 by 1 cycle. */
9544 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9545 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9546 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9547 XEXP (SET_SRC (single_set (insn)),
9548 1)))
9549 cost++;
9550 /* When an LS group instruction with a latency of less than
9551 3 cycles is followed by a double-precision floating-point
9552 instruction, FIPR, or FTRV, the latency of the first
9553 instruction is increased to 3 cycles. */
9554 else if (cost < 3
9555 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9556 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9557 cost = 3;
9558 /* The lsw register of a double-precision computation is ready one
9559 cycle earlier. */
9560 else if (reload_completed
9561 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9562 && (use_pat = single_set (insn))
9563 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9564 SET_SRC (use_pat)))
9565 cost -= 1;
9566
9567 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9568 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9569 cost -= 1;
9570 }
9571 else if (TARGET_SH4_300)
9572 {
9573 /* Stores need their input register two cycles later. */
9574 attr_type type;
9575 if (dep_set && cost >= 1
9576 && ((type = get_attr_type (insn)) == TYPE_STORE
9577 || type == TYPE_PSTORE
9578 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9579 {
9580 rtx set = single_set (insn);
9581
9582 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9583 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9584 {
9585 cost -= 2;
9586 /* But don't reduce the cost below 1 if the address depends
9587 on a side effect of dep_insn. */
9588 if (cost < 1
9589 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9590 cost = 1;
9591 }
9592 }
9593 }
9594 }
9595 /* An anti-dependence penalty of two applies if the first insn is a double
9596 precision fadd / fsub / fmul. */
9597 else if (!TARGET_SH4_300
9598 && dep_type == REG_DEP_ANTI
9599 && recog_memoized (dep_insn) >= 0
9600 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9601 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9602 /* A lot of alleged anti-flow dependences are fake,
9603 so check this one is real. */
9604 && flow_dependent_p (dep_insn, insn))
9605 cost = 2;
9606
9607 return cost;
9608 }
9609
9610 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9611 if DEP_INSN is anti-flow dependent on INSN. */
9612 static bool
9613 flow_dependent_p (rtx insn, rtx dep_insn)
9614 {
9615 rtx tmp = PATTERN (insn);
9616
9617 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9618 return tmp == NULL_RTX;
9619 }
9620
9621 /* A helper function for flow_dependent_p called through note_stores. */
9622 static void
9623 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9624 {
9625 rtx * pinsn = (rtx *) data;
9626
9627 if (*pinsn && reg_referenced_p (x, *pinsn))
9628 *pinsn = NULL_RTX;
9629 }
9630
9631 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9632 'special function' patterns (type sfunc) that clobber pr, but that
9633 do not look like function calls to leaf_function_p. Hence we must
9634 do this extra check. */
9635 static int
9636 sh_pr_n_sets (void)
9637 {
9638 return DF_REG_DEF_COUNT (PR_REG);
9639 }
9640
9641 /* Return where to allocate pseudo for a given hard register initial
9642 value. */
9643 static rtx
9644 sh_allocate_initial_value (rtx hard_reg)
9645 {
9646 if (REGNO (hard_reg) == PR_REG)
9647 {
9648 if (crtl->is_leaf && ! sh_pr_n_sets ())
9649 return hard_reg;
9650 else
9651 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9652 }
9653
9654 return NULL_RTX;
9655 }
9656
9657 /* This function returns "2" to indicate dual issue for the SH4
9658 processor. To be used by the DFA pipeline description. */
9659 static int
9660 sh_issue_rate (void)
9661 {
9662 if (TARGET_SUPERSCALAR)
9663 return 2;
9664 else
9665 return 1;
9666 }
9667
9668 /* Functions for ready queue reordering for sched1. */
9669
9670 /* Get weight for mode for a set x. */
9671 static short
9672 find_set_regmode_weight (rtx x, machine_mode mode)
9673 {
9674 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9675 return 1;
9676 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9677 {
9678 if (REG_P (SET_DEST (x)))
9679 {
9680 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9681 return 1;
9682 else
9683 return 0;
9684 }
9685 return 1;
9686 }
9687 return 0;
9688 }
9689
9690 /* Get regmode weight for insn. */
9691 static short
9692 find_insn_regmode_weight (rtx insn, machine_mode mode)
9693 {
9694 /* Increment weight for each register born here. */
9695 rtx x = PATTERN (insn);
9696 short reg_weight = find_set_regmode_weight (x, mode);
9697 if (GET_CODE (x) == PARALLEL)
9698 {
9699 int j;
9700 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9701 {
9702 x = XVECEXP (PATTERN (insn), 0, j);
9703 reg_weight += find_set_regmode_weight (x, mode);
9704 }
9705 }
9706 /* Decrement weight for each register that dies here. */
9707 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9708 {
9709 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9710 {
9711 rtx note = XEXP (x, 0);
9712 if (REG_P (note) && GET_MODE (note) == mode)
9713 reg_weight--;
9714 }
9715 }
9716 return reg_weight;
9717 }
9718
9719 /* Calculate regmode weights for all insns of a basic block. */
9720 static void
9721 find_regmode_weight (basic_block b, machine_mode mode)
9722 {
9723 rtx_insn *insn, *next_tail, *head, *tail;
9724
9725 get_ebb_head_tail (b, b, &head, &tail);
9726 next_tail = NEXT_INSN (tail);
9727
9728 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9729 {
9730 /* Handle register life information. */
9731 if (!INSN_P (insn))
9732 continue;
9733
9734 if (mode == SFmode)
9735 INSN_REGMODE_WEIGHT (insn, mode) =
9736 find_insn_regmode_weight (insn, mode)
9737 + 2 * find_insn_regmode_weight (insn, DFmode);
9738 else if (mode == SImode)
9739 INSN_REGMODE_WEIGHT (insn, mode) =
9740 find_insn_regmode_weight (insn, mode)
9741 + 2 * find_insn_regmode_weight (insn, DImode);
9742 }
9743 }
9744
9745 /* Comparison function for ready queue sorting. */
9746 static int
9747 rank_for_reorder (const void *x, const void *y)
9748 {
9749 rtx_insn *tmp = *(rtx_insn * const *) y;
9750 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9751
9752 /* The insn in a schedule group should be issued the first. */
9753 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9754 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9755
9756 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9757 minimizes instruction movement, thus minimizing sched's effect on
9758 register pressure. */
9759 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9760 }
9761
9762 /* Resort the array A in which only element at index N may be out of order. */
9763 static void
9764 swap_reorder (rtx_insn **a, int n)
9765 {
9766 rtx_insn *insn = a[n - 1];
9767 int i = n - 2;
9768
9769 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9770 {
9771 a[i + 1] = a[i];
9772 i -= 1;
9773 }
9774 a[i + 1] = insn;
9775 }
9776
9777 /* Sort the ready list by ascending priority. */
9778 static void
9779 ready_reorder (rtx_insn **ready, int nready)
9780 {
9781 if (nready == 2)
9782 swap_reorder (ready, nready);
9783 else if (nready > 2)
9784 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9785 }
9786
9787 /* Count life regions of r0 for a block. */
9788 static int
9789 find_r0_life_regions (basic_block b)
9790 {
9791 bool live;
9792 int set;
9793 int death = 0;
9794
9795 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9796 {
9797 set = 1;
9798 live = true;
9799 }
9800 else
9801 {
9802 set = 0;
9803 live = false;
9804 }
9805
9806 rtx_insn* insn = BB_HEAD (b);
9807 rtx_insn* end = BB_END (b);
9808 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9809 while (1)
9810 {
9811 if (INSN_P (insn))
9812 {
9813 if (find_regno_note (insn, REG_DEAD, R0_REG))
9814 {
9815 death++;
9816 live = false;
9817 }
9818
9819 rtx pset;
9820 if (!live
9821 && (pset = single_set (insn))
9822 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9823 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9824 {
9825 set++;
9826 live = true;
9827 }
9828 }
9829 if (insn == end)
9830 break;
9831 insn = NEXT_INSN (insn);
9832 }
9833 return set - death;
9834 }
9835
9836 /* Calculate regmode weights for all insns of all basic block. */
9837 static void
9838 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9839 int verbose ATTRIBUTE_UNUSED,
9840 int old_max_uid)
9841 {
9842 basic_block b;
9843
9844 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9845 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9846 r0_life_regions = 0;
9847
9848 FOR_EACH_BB_REVERSE_FN (b, cfun)
9849 {
9850 find_regmode_weight (b, SImode);
9851 find_regmode_weight (b, SFmode);
9852 if (!reload_completed)
9853 r0_life_regions += find_r0_life_regions (b);
9854 }
9855
9856 CURR_REGMODE_PRESSURE (SImode) = 0;
9857 CURR_REGMODE_PRESSURE (SFmode) = 0;
9858 }
9859
9860 /* Cleanup. */
9861 static void
9862 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9863 int verbose ATTRIBUTE_UNUSED)
9864 {
9865 if (regmode_weight[0])
9866 {
9867 free (regmode_weight[0]);
9868 regmode_weight[0] = NULL;
9869 }
9870 if (regmode_weight[1])
9871 {
9872 free (regmode_weight[1]);
9873 regmode_weight[1] = NULL;
9874 }
9875 }
9876
9877 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9878 keep count of register pressures on SImode and SFmode. */
9879 static int
9880 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9881 int sched_verbose ATTRIBUTE_UNUSED,
9882 rtx_insn *insn,
9883 int can_issue_more)
9884 {
9885 if (GET_CODE (PATTERN (insn)) != USE
9886 && GET_CODE (PATTERN (insn)) != CLOBBER)
9887 cached_can_issue_more = can_issue_more - 1;
9888 else
9889 cached_can_issue_more = can_issue_more;
9890
9891 if (reload_completed)
9892 return cached_can_issue_more;
9893
9894 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9895 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9896
9897 return cached_can_issue_more;
9898 }
9899
9900 static void
9901 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9902 int verbose ATTRIBUTE_UNUSED,
9903 int veclen ATTRIBUTE_UNUSED)
9904 {
9905 CURR_REGMODE_PRESSURE (SImode) = 0;
9906 CURR_REGMODE_PRESSURE (SFmode) = 0;
9907 }
9908
9909 /* Some magic numbers. */
9910 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9911 functions that already have high pressure on r0. */
9912 #define R0_MAX_LIFE_REGIONS 2
9913 /* Register Pressure thresholds for SImode and SFmode registers. */
9914 #define SIMODE_MAX_WEIGHT 5
9915 #define SFMODE_MAX_WEIGHT 10
9916
9917 /* Return true if the pressure is high for MODE. */
9918 static bool
9919 high_pressure (machine_mode mode)
9920 {
9921 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9922 functions that already have high pressure on r0. */
9923 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9924 return true;
9925
9926 if (mode == SFmode)
9927 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9928 else
9929 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9930 }
9931
9932 /* Reorder ready queue if register pressure is high. */
9933 static int
9934 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9935 int sched_verbose ATTRIBUTE_UNUSED,
9936 rtx_insn **ready,
9937 int *n_readyp,
9938 int clock_var ATTRIBUTE_UNUSED)
9939 {
9940 if (reload_completed)
9941 return sh_issue_rate ();
9942
9943 if (high_pressure (SFmode) || high_pressure (SImode))
9944 {
9945 ready_reorder (ready, *n_readyp);
9946 }
9947
9948 return sh_issue_rate ();
9949 }
9950
9951 /* Skip cycles if the current register pressure is high. */
9952 static int
9953 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9954 int sched_verbose ATTRIBUTE_UNUSED,
9955 rtx_insn **ready ATTRIBUTE_UNUSED,
9956 int *n_readyp ATTRIBUTE_UNUSED,
9957 int clock_var ATTRIBUTE_UNUSED)
9958 {
9959 if (reload_completed)
9960 return cached_can_issue_more;
9961
9962 if (high_pressure(SFmode) || high_pressure (SImode))
9963 skip_cycles = 1;
9964
9965 return cached_can_issue_more;
9966 }
9967
9968 /* Skip cycles without sorting the ready queue. This will move insn from
9969 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9970 queue by sh_reorder. */
9971
9972 /* Generally, skipping these many cycles are sufficient for all insns to move
9973 from Q -> R. */
9974 #define MAX_SKIPS 8
9975
9976 static int
9977 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
9978 int sched_verbose ATTRIBUTE_UNUSED,
9979 rtx_insn *insn ATTRIBUTE_UNUSED,
9980 int last_clock_var,
9981 int clock_var,
9982 int *sort_p)
9983 {
9984 if (reload_completed)
9985 return 0;
9986
9987 if (skip_cycles)
9988 {
9989 if ((clock_var - last_clock_var) < MAX_SKIPS)
9990 {
9991 *sort_p = 0;
9992 return 1;
9993 }
9994 /* If this is the last cycle we are skipping, allow reordering of R. */
9995 if ((clock_var - last_clock_var) == MAX_SKIPS)
9996 {
9997 *sort_p = 1;
9998 return 1;
9999 }
10000 }
10001
10002 skip_cycles = 0;
10003
10004 return 0;
10005 }
10006
10007 static bool
10008 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10009 {
10010 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10011 }
10012 \f
10013 /*
10014 On the SH1..SH4, the trampoline looks like
10015 2 0002 D202 mov.l l2,r2
10016 1 0000 D301 mov.l l1,r3
10017 3 0004 422B jmp @r2
10018 4 0006 0009 nop
10019 5 0008 00000000 l1: .long area
10020 6 000c 00000000 l2: .long function
10021
10022 FDPIC needs a form that includes a function descriptor and
10023 code to load the GOT register:
10024 0 0000 00000000 .long l0
10025 1 0004 00000000 .long gotval
10026 2 0008 D302 l0: mov.l l1,r3
10027 3 000a D203 mov.l l2,r2
10028 4 000c 6122 mov.l @r2,r1
10029 5 000e 5C21 mov.l @(4,r2),r12
10030 6 0010 412B jmp @r1
10031 7 0012 0009 nop
10032 8 0014 00000000 l1: .long area
10033 9 0018 00000000 l2: .long function
10034
10035 SH5 (compact) uses r1 instead of r3 for the static chain. */
10036
10037 /* Emit insns to store a value at memory address + offset. */
10038 static void
10039 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10040 {
10041 gcc_assert ((offset & 3) == 0);
10042 emit_move_insn (offset == 0
10043 ? change_address (addr, SImode, NULL_RTX)
10044 : adjust_address (addr, SImode, offset), value);
10045 }
10046
10047 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10048 static void
10049 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10050 {
10051 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10052 ? (w0 | (w1 << 16))
10053 : (w1 | (w0 << 16)), SImode));
10054 }
10055
10056 /* Emit RTL insns to initialize the variable parts of a trampoline.
10057 FNADDR is an RTX for the address of the function's pure code.
10058 CXT is an RTX for the static chain value for the function. */
10059 static void
10060 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10061 {
10062 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10063 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10064
10065 if (TARGET_FDPIC)
10066 {
10067 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10068
10069 sh_emit_storesi (tramp_mem, 0, a);
10070 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10071
10072 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10073 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10074 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10075
10076 sh_emit_storesi (tramp_mem, 20, cxt);
10077 sh_emit_storesi (tramp_mem, 24, fnaddr);
10078 }
10079 else
10080 {
10081 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10082 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10083
10084 sh_emit_storesi (tramp_mem, 8, cxt);
10085 sh_emit_storesi (tramp_mem, 12, fnaddr);
10086 }
10087 if (TARGET_HARD_SH4)
10088 {
10089 if (!TARGET_INLINE_IC_INVALIDATE
10090 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10091 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10092 FUNCTION_ORDINARY).sym,
10093 LCT_NORMAL, VOIDmode, 1, tramp, SImode);
10094 else
10095 emit_insn (gen_ic_invalidate_line (tramp));
10096 }
10097 }
10098
10099 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10100 static rtx
10101 sh_trampoline_adjust_address (rtx tramp)
10102 {
10103 return tramp;
10104 }
10105
10106 /* If PIC, we cannot make sibling calls to global functions
10107 because the PLT requires r12 to be live. */
10108 static bool
10109 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10110 {
10111 return (1
10112 && ! sh_cfun_interrupt_handler_p ()
10113 && (! flag_pic || TARGET_FDPIC
10114 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10115 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10116 }
10117
10118 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10119 void
10120 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10121 {
10122 const_tree decl = SYMBOL_REF_DECL (sym);
10123 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10124
10125 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10126 emit_insn (gen_sym_label2reg (reg, sym, lab));
10127 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10128 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10129 else
10130 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10131 }
10132 \f
10133 /* Machine specific built-in functions. */
10134
10135 struct builtin_description
10136 {
10137 bool (* const is_enabled) (void);
10138 const enum insn_code icode;
10139 const char *const name;
10140 int signature;
10141 tree fndecl;
10142 };
10143
10144 /* This function can be used if there are any built-ins that are not for
10145 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10146 static bool
10147 sh1_builtin_p (void)
10148 {
10149 return TARGET_SH1;
10150 }
10151
10152 /* describe number and signedness of arguments; arg[0] == result
10153 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10154 /* 9: 64-bit pointer, 10: 32-bit pointer */
10155 static const char signature_args[][4] =
10156 {
10157 #define SH_BLTIN_V2SI2 0
10158 { 4, 4 },
10159 #define SH_BLTIN_V4HI2 1
10160 { 4, 4 },
10161 #define SH_BLTIN_V2SI3 2
10162 { 4, 4, 4 },
10163 #define SH_BLTIN_V4HI3 3
10164 { 4, 4, 4 },
10165 #define SH_BLTIN_V8QI3 4
10166 { 4, 4, 4 },
10167 #define SH_BLTIN_MAC_HISI 5
10168 { 1, 4, 4, 1 },
10169 #define SH_BLTIN_SH_HI 6
10170 { 4, 4, 1 },
10171 #define SH_BLTIN_SH_SI 7
10172 { 4, 4, 1 },
10173 #define SH_BLTIN_V4HI2V2SI 8
10174 { 4, 4, 4 },
10175 #define SH_BLTIN_V4HI2V8QI 9
10176 { 4, 4, 4 },
10177 #define SH_BLTIN_SISF 10
10178 { 4, 2 },
10179 #define SH_BLTIN_LDUA_L 11
10180 { 2, 10 },
10181 #define SH_BLTIN_LDUA_Q 12
10182 { 1, 10 },
10183 #define SH_BLTIN_STUA_L 13
10184 { 0, 10, 2 },
10185 #define SH_BLTIN_STUA_Q 14
10186 { 0, 10, 1 },
10187 #define SH_BLTIN_LDUA_L64 15
10188 { 2, 9 },
10189 #define SH_BLTIN_LDUA_Q64 16
10190 { 1, 9 },
10191 #define SH_BLTIN_STUA_L64 17
10192 { 0, 9, 2 },
10193 #define SH_BLTIN_STUA_Q64 18
10194 { 0, 9, 1 },
10195 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10196 #define SH_BLTIN_2 19
10197 #define SH_BLTIN_SU 19
10198 { 1, 2 },
10199 #define SH_BLTIN_3 20
10200 #define SH_BLTIN_SUS 20
10201 { 2, 2, 1 },
10202 #define SH_BLTIN_PSSV 21
10203 { 0, 8, 2, 2 },
10204 #define SH_BLTIN_XXUU 22
10205 #define SH_BLTIN_UUUU 22
10206 { 1, 1, 1, 1 },
10207 #define SH_BLTIN_PV 23
10208 { 0, 8 },
10209 #define SH_BLTIN_VP 24
10210 { 8, 0 },
10211 #define SH_BLTIN_UV 25
10212 { 1, 0 },
10213 #define SH_BLTIN_VU 26
10214 { 0, 1 },
10215 };
10216 /* mcmv: operands considered unsigned. */
10217 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10218 /* mperm: control value considered unsigned int. */
10219 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10220 /* mshards_q: returns signed short. */
10221 /* nsb: takes long long arg, returns unsigned char. */
10222 static struct builtin_description bdesc[] =
10223 {
10224 { sh1_builtin_p,
10225 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10226 { sh1_builtin_p,
10227 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10228 };
10229
10230 static tree sh_builtin_get_fpscr;
10231 static tree sh_builtin_set_fpscr;
10232
10233 static void
10234 sh_init_builtins (void)
10235 {
10236 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10237 memset (shared, 0, sizeof shared);
10238
10239 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10240 {
10241 builtin_description* d = &bdesc[di];
10242
10243 if (!d->is_enabled ())
10244 continue;
10245
10246 tree type, arg_type = NULL_TREE;
10247 int signature = d->signature;
10248
10249 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10250 type = shared[signature];
10251 else
10252 {
10253 int has_result = signature_args[signature][0] != 0;
10254 tree args[3];
10255
10256 if (! TARGET_FPU_ANY
10257 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10258 continue;
10259 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10260 args[i] = NULL_TREE;
10261 for (int i = 3; ; i--)
10262 {
10263 int arg = signature_args[signature][i];
10264 int opno = i - 1 + has_result;
10265
10266 if (arg & 8)
10267 arg_type = ptr_type_node;
10268 else if (arg)
10269 arg_type = (*lang_hooks.types.type_for_mode)
10270 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10271 else if (i)
10272 continue;
10273 else
10274 arg_type = void_type_node;
10275 if (i == 0)
10276 break;
10277 args[i-1] = arg_type;
10278 }
10279 type = build_function_type_list (arg_type, args[0], args[1],
10280 args[2], NULL_TREE);
10281 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10282 shared[signature] = type;
10283 }
10284 d->fndecl =
10285 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10286 NULL, NULL_TREE);
10287 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10288 if (d->icode == CODE_FOR_sts_fpscr)
10289 sh_builtin_get_fpscr = d->fndecl;
10290 else if (d->icode == CODE_FOR_set_fpscr)
10291 sh_builtin_set_fpscr = d->fndecl;
10292 }
10293 }
10294
10295 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10296
10297 static void
10298 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10299 {
10300 const unsigned SH_FE_INVALID = 64;
10301 const unsigned SH_FE_DIVBYZERO = 32;
10302 const unsigned SH_FE_OVERFLOW = 16;
10303 const unsigned SH_FE_UNDERFLOW = 8;
10304 const unsigned SH_FE_INEXACT = 4;
10305 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10306 | SH_FE_DIVBYZERO
10307 | SH_FE_OVERFLOW
10308 | SH_FE_UNDERFLOW
10309 | SH_FE_INEXACT);
10310 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10311 tree fenv_var, mask, ld_fenv, masked_fenv;
10312 tree new_fenv_var, reload_fenv, restore_fnenv;
10313 tree update_call, atomic_feraiseexcept, hold_fnclex;
10314
10315 if (! TARGET_FPU_ANY)
10316 return;
10317
10318 /* Generate the equivalent of :
10319 unsigned int fenv_var;
10320 fenv_var = __builtin_sh_get_fpscr ();
10321
10322 unsigned int masked_fenv;
10323 masked_fenv = fenv_var & mask;
10324
10325 __builtin_sh_set_fpscr (masked_fenv); */
10326
10327 fenv_var = create_tmp_var_raw (unsigned_type_node);
10328 mask = build_int_cst (unsigned_type_node,
10329 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10330 | SH_FE_ALL_EXCEPT));
10331 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10332 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10333 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10334 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10335 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10336 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10337 ld_fenv),
10338 NULL_TREE, NULL_TREE);
10339 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10340
10341 /* Store the value of masked_fenv to clear the exceptions:
10342 __builtin_sh_set_fpscr (masked_fenv); */
10343
10344 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10345
10346 /* Generate the equivalent of :
10347 unsigned int new_fenv_var;
10348 new_fenv_var = __builtin_sh_get_fpscr ();
10349
10350 __builtin_sh_set_fpscr (fenv_var);
10351
10352 __atomic_feraiseexcept (new_fenv_var); */
10353
10354 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10355 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10356 build_call_expr (sh_builtin_get_fpscr, 0));
10357 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10358 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10359 update_call = build_call_expr (atomic_feraiseexcept, 1,
10360 fold_convert (integer_type_node,
10361 new_fenv_var));
10362 *update = build2 (COMPOUND_EXPR, void_type_node,
10363 build2 (COMPOUND_EXPR, void_type_node,
10364 reload_fenv, restore_fnenv), update_call);
10365 }
10366
10367 /* Implements target hook vector_mode_supported_p. */
10368 bool
10369 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10370 {
10371 return false;
10372 }
10373
10374 bool
10375 sh_frame_pointer_required (void)
10376 {
10377 /* If needed override this in other tm.h files to cope with various OS
10378 lossage requiring a frame pointer. */
10379 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10380 return true;
10381
10382 if (crtl->profile)
10383 return true;
10384
10385 return false;
10386 }
10387
10388 /* Implements target hook dwarf_calling_convention. Return an enum
10389 of dwarf_calling_convention. */
10390 int
10391 sh_dwarf_calling_convention (const_tree func)
10392 {
10393 if (sh_attr_renesas_p (func))
10394 return DW_CC_GNU_renesas_sh;
10395
10396 return DW_CC_normal;
10397 }
10398
10399 /* Returns the sh builtin decl for CODE. */
10400 static tree
10401 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10402 {
10403 if (code >= ARRAY_SIZE (bdesc))
10404 return error_mark_node;
10405
10406 if (!bdesc[code].is_enabled ())
10407 return error_mark_node;
10408
10409 return bdesc[code].fndecl;
10410 }
10411
10412 /* Expand an expression EXP that calls a built-in function,
10413 with result going to TARGET if that's convenient
10414 (and in mode MODE if that's convenient).
10415 SUBTARGET may be used as the target for computing one of EXP's operands.
10416 IGNORE is nonzero if the value is to be ignored. */
10417 static rtx
10418 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10419 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10420 {
10421 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10422 unsigned int fcode = DECL_FUNCTION_CODE (fndecl);
10423 const struct builtin_description *d = &bdesc[fcode];
10424 enum insn_code icode = d->icode;
10425 int signature = d->signature;
10426 int nop = 0;
10427 rtx op[4];
10428
10429 if (signature_args[signature][0])
10430 {
10431 if (ignore)
10432 return NULL_RTX;
10433
10434 machine_mode tmode = insn_data[icode].operand[0].mode;
10435 if (! target || GET_MODE (target) != tmode
10436 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10437 target = gen_reg_rtx (tmode);
10438 op[nop++] = target;
10439 }
10440 else
10441 target = NULL_RTX;
10442
10443 for (int i = 1; i <= 3; i++, nop++)
10444 {
10445 if (! signature_args[signature][i])
10446 break;
10447 tree arg = CALL_EXPR_ARG (exp, i - 1);
10448 if (arg == error_mark_node)
10449 return const0_rtx;
10450
10451 machine_mode opmode;
10452 tree optype;
10453 if (signature_args[signature][i] & 8)
10454 {
10455 opmode = ptr_mode;
10456 optype = ptr_type_node;
10457 }
10458 else
10459 {
10460 opmode = insn_data[icode].operand[nop].mode;
10461 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10462 }
10463
10464 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10465 if (argmode != opmode)
10466 arg = build1 (NOP_EXPR, optype, arg);
10467 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10468 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10469 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10470 }
10471
10472 rtx pat = NULL_RTX;
10473
10474 switch (nop)
10475 {
10476 case 1:
10477 pat = (*insn_data[d->icode].genfun) (op[0]);
10478 break;
10479 case 2:
10480 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10481 break;
10482 case 3:
10483 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10484 break;
10485 case 4:
10486 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10487 break;
10488 default:
10489 gcc_unreachable ();
10490 }
10491 if (! pat)
10492 return NULL_RTX;
10493 emit_insn (pat);
10494 return target;
10495 }
10496
10497 /* Return true if hard register REGNO can hold a value of machine-mode MODE.
10498 We can allow any mode in any general register. The special registers
10499 only allow SImode. Don't allow any mode in the PR.
10500
10501 We cannot hold DCmode values in the XD registers because alter_reg
10502 handles subregs of them incorrectly. We could work around this by
10503 spacing the XD registers like the DR registers, but this would require
10504 additional memory in every compilation to hold larger register vectors.
10505 We could hold SFmode / SCmode values in XD registers, but that
10506 would require a tertiary reload when reloading from / to memory,
10507 and a secondary reload to reload from / to general regs; that
10508 seems to be a losing proposition.
10509
10510 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10511 it won't be ferried through GP registers first. */
10512 bool
10513 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10514 {
10515 if (SPECIAL_REGISTER_P (regno))
10516 return mode == SImode;
10517
10518 if (regno == FPUL_REG)
10519 return (mode == SImode || mode == SFmode);
10520
10521 if (FP_REGISTER_P (regno) && mode == SFmode)
10522 return true;
10523
10524 if (mode == V2SFmode)
10525 {
10526 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10527 || GENERAL_REGISTER_P (regno)))
10528 return true;
10529 else
10530 return false;
10531 }
10532
10533 if (mode == V4SFmode)
10534 {
10535 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10536 || GENERAL_REGISTER_P (regno))
10537 return true;
10538 else
10539 return false;
10540 }
10541
10542 if (mode == V16SFmode)
10543 return regno == FIRST_XD_REG;
10544
10545 if (FP_REGISTER_P (regno))
10546 {
10547 if (mode == SFmode
10548 || mode == SImode
10549 || ((TARGET_SH2E) && mode == SCmode)
10550 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10551 && ((regno - FIRST_FP_REG) & 1) == 0)
10552 || (TARGET_SH4 && mode == TImode
10553 && ((regno - FIRST_FP_REG) & 3) == 0))
10554 return true;
10555 else
10556 return false;
10557 }
10558
10559 if (XD_REGISTER_P (regno))
10560 return mode == DFmode;
10561
10562 if (regno == PR_REG)
10563 return mode == SImode;
10564
10565 if (regno == FPSCR_REG)
10566 return mode == SImode;
10567
10568 return true;
10569 }
10570
10571 /* Specify the modes required to caller save a given hard regno.
10572 choose_hard_reg_mode chooses mode based on HARD_REGNO_MODE_OK
10573 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10574 permits integer modes on them. That makes LRA's split process
10575 unhappy. See PR55212.
10576 */
10577 machine_mode
10578 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10579 machine_mode mode)
10580 {
10581 if (FP_REGISTER_P (regno)
10582 && (mode == SFmode
10583 || mode == SCmode
10584 || ((mode == DFmode || mode == DCmode)
10585 && ((regno - FIRST_FP_REG) & 1) == 0)))
10586 return mode;
10587
10588 return choose_hard_reg_mode (regno, nregs, false);
10589 }
10590
10591 /* Return the class of registers for which a mode change from FROM to TO
10592 is invalid. */
10593 bool
10594 sh_cannot_change_mode_class (machine_mode from, machine_mode to,
10595 enum reg_class rclass)
10596 {
10597 /* We want to enable the use of SUBREGs as a means to
10598 VEC_SELECT a single element of a vector. */
10599
10600 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10601 This can be problematic when SFmode vector subregs need to be accessed
10602 on the stack with displacement addressing, as it happens with -O0.
10603 Thus we disallow the mode change for -O0. */
10604 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10605 return optimize ? (reg_classes_intersect_p (GENERAL_REGS, rclass)) : false;
10606
10607 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10608 {
10609 if (TARGET_LITTLE_ENDIAN)
10610 {
10611 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10612 return reg_classes_intersect_p (DF_REGS, rclass);
10613 }
10614 else
10615 {
10616 if (GET_MODE_SIZE (from) < 8)
10617 return reg_classes_intersect_p (DF_REGS, rclass);
10618 }
10619 }
10620 return false;
10621 }
10622
10623 /* Return true if registers in machine mode MODE will likely be
10624 allocated to registers in small register classes. */
10625 bool
10626 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10627 {
10628 return true;
10629 }
10630
10631 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10632 that label is used. */
10633 void
10634 sh_mark_label (rtx address, int nuses)
10635 {
10636 if (GOTOFF_P (address))
10637 {
10638 /* Extract the label or symbol. */
10639 address = XEXP (address, 0);
10640 if (GET_CODE (address) == PLUS)
10641 address = XEXP (address, 0);
10642 address = XVECEXP (address, 0, 0);
10643 }
10644 if (GET_CODE (address) == LABEL_REF
10645 && LABEL_P (XEXP (address, 0)))
10646 LABEL_NUSES (XEXP (address, 0)) += nuses;
10647 }
10648
10649 /* Compute extra cost of moving data between one register class
10650 and another.
10651
10652 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10653 uses this information. Hence, the general register <-> floating point
10654 register information here is not used for SFmode. */
10655 static int
10656 sh_register_move_cost (machine_mode mode,
10657 reg_class_t srcclass, reg_class_t dstclass)
10658 {
10659 if (dstclass == T_REGS || dstclass == PR_REGS)
10660 return 10;
10661
10662 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10663 return 4;
10664
10665 if (mode == SImode && TARGET_FMOVD
10666 && REGCLASS_HAS_FP_REG (srcclass)
10667 && REGCLASS_HAS_FP_REG (dstclass))
10668 return 4;
10669
10670 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10671 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10672
10673 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10674 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10675 return 9;
10676
10677 if ((REGCLASS_HAS_FP_REG (dstclass)
10678 && REGCLASS_HAS_GENERAL_REG (srcclass))
10679 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10680 && REGCLASS_HAS_FP_REG (srcclass)))
10681 {
10682 /* Discourage trying to use fp regs for a pointer. This also
10683 discourages fp regs with SImode because Pmode is an alias
10684 of SImode on this target. See PR target/48596. */
10685 int addend = (mode == Pmode) ? 40 : 0;
10686
10687 return ((TARGET_FMOVD ? 8 : 12) + addend)
10688 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10689 }
10690
10691 if ((dstclass == FPUL_REGS
10692 && REGCLASS_HAS_GENERAL_REG (srcclass))
10693 || (srcclass == FPUL_REGS
10694 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10695 return 5;
10696
10697 if ((dstclass == FPUL_REGS
10698 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10699 || (srcclass == FPUL_REGS
10700 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10701 return 7;
10702
10703 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10704 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10705 return 4;
10706
10707 if (TARGET_FMOVD
10708 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10709 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10710 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10711
10712 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10713 }
10714
10715 static rtx
10716 emit_load_ptr (rtx reg, rtx addr)
10717 {
10718 rtx mem = gen_const_mem (ptr_mode, addr);
10719
10720 if (Pmode != ptr_mode)
10721 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10722 return emit_move_insn (reg, mem);
10723 }
10724
10725 static void
10726 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10727 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10728 tree function)
10729 {
10730 CUMULATIVE_ARGS cum;
10731 int structure_value_byref = 0;
10732 rtx this_rtx, this_value, sibcall, funexp;
10733 rtx_insn *insns;
10734 tree funtype = TREE_TYPE (function);
10735 int simple_add = CONST_OK_FOR_ADD (delta);
10736 int did_load = 0;
10737 rtx scratch0, scratch1, scratch2;
10738
10739 reload_completed = 1;
10740 epilogue_completed = 1;
10741 crtl->uses_only_leaf_regs = 1;
10742
10743 emit_note (NOTE_INSN_PROLOGUE_END);
10744
10745 /* Find the "this" pointer. We have such a wide range of ABIs for the
10746 SH that it's best to do this completely machine independently.
10747 "this" is passed as first argument, unless a structure return pointer
10748 comes first, in which case "this" comes second. */
10749 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10750 #ifndef PCC_STATIC_STRUCT_RETURN
10751 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10752 structure_value_byref = 1;
10753 #endif /* not PCC_STATIC_STRUCT_RETURN */
10754 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10755 {
10756 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10757
10758 sh_function_arg_advance (pack_cumulative_args (&cum), Pmode, ptype, true);
10759 }
10760 this_rtx
10761 = sh_function_arg (pack_cumulative_args (&cum), Pmode, ptr_type_node, true);
10762
10763 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10764 static chain pointer (even if you can't have nested virtual functions
10765 right now, someone might implement them sometime), and the rest of the
10766 registers are used for argument passing, are callee-saved, or reserved. */
10767 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10768 -ffixed-reg has been used. */
10769 if (! call_used_regs[0] || fixed_regs[0])
10770 error ("r0 needs to be available as a call-clobbered register");
10771 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10772
10773 {
10774 if (call_used_regs[1] && ! fixed_regs[1])
10775 scratch1 = gen_rtx_REG (ptr_mode, 1);
10776 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10777 pointing where to return struct values. */
10778 if (call_used_regs[3] && ! fixed_regs[3])
10779 scratch2 = gen_rtx_REG (Pmode, 3);
10780 }
10781
10782 this_value = plus_constant (Pmode, this_rtx, delta);
10783 if (vcall_offset
10784 && (simple_add || scratch0 != scratch1)
10785 && strict_memory_address_p (ptr_mode, this_value))
10786 {
10787 emit_load_ptr (scratch0, this_value);
10788 did_load = 1;
10789 }
10790
10791 if (!delta)
10792 ; /* Do nothing. */
10793 else if (simple_add)
10794 emit_move_insn (this_rtx, this_value);
10795 else
10796 {
10797 emit_move_insn (scratch1, GEN_INT (delta));
10798 emit_insn (gen_add2_insn (this_rtx, scratch1));
10799 }
10800
10801 if (vcall_offset)
10802 {
10803 rtx offset_addr;
10804
10805 if (!did_load)
10806 emit_load_ptr (scratch0, this_rtx);
10807
10808 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10809 if (strict_memory_address_p (ptr_mode, offset_addr))
10810 ; /* Do nothing. */
10811 else if (scratch0 != scratch1)
10812 {
10813 /* scratch0 != scratch1, and we have indexed loads. Get better
10814 schedule by loading the offset into r1 and using an indexed
10815 load - then the load of r1 can issue before the load from
10816 (this_rtx + delta) finishes. */
10817 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10818 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10819 }
10820 else if (CONST_OK_FOR_ADD (vcall_offset))
10821 {
10822 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10823 offset_addr = scratch0;
10824 }
10825 else if (scratch0 != scratch1)
10826 {
10827 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10828 emit_insn (gen_add2_insn (scratch0, scratch1));
10829 offset_addr = scratch0;
10830 }
10831 else
10832 gcc_unreachable (); /* FIXME */
10833 emit_load_ptr (scratch0, offset_addr);
10834
10835 if (Pmode != ptr_mode)
10836 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10837 emit_insn (gen_add2_insn (this_rtx, scratch0));
10838 }
10839
10840 /* Generate a tail call to the target function. */
10841 if (! TREE_USED (function))
10842 {
10843 assemble_external (function);
10844 TREE_USED (function) = 1;
10845 }
10846 funexp = XEXP (DECL_RTL (function), 0);
10847 /* If the function is overridden, so is the thunk, hence we don't
10848 need GOT addressing even if this is a public symbol. */
10849 #if 0
10850 if (TARGET_SH1 && ! flag_weak)
10851 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10852 else
10853 #endif
10854 if (TARGET_SH2 && flag_pic)
10855 {
10856 if (TARGET_FDPIC)
10857 {
10858 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10859 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10860 }
10861 else
10862 {
10863 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10864 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10865 }
10866 }
10867 else
10868 {
10869 emit_move_insn (scratch2, funexp);
10870 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10871 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10872 }
10873 sibcall = emit_call_insn (sibcall);
10874 SIBLING_CALL_P (sibcall) = 1;
10875 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10876 emit_barrier ();
10877
10878 /* Run just enough of rest_of_compilation to do scheduling and get
10879 the insns emitted. Note that use_thunk calls
10880 assemble_start_function and assemble_end_function. */
10881
10882 insns = get_insns ();
10883
10884 if (optimize > 0)
10885 {
10886 if (! cfun->cfg)
10887 init_flow (cfun);
10888 split_all_insns_noflow ();
10889 }
10890
10891 sh_reorg ();
10892 shorten_branches (insns);
10893 final_start_function (insns, file, 1);
10894 final (insns, file, 1);
10895 final_end_function ();
10896
10897 reload_completed = 0;
10898 epilogue_completed = 0;
10899 }
10900
10901 /* Return an RTX pair for the address and call site label of a function
10902 NAME of kind KIND, placing the result in TARGET if not NULL. For
10903 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10904 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10905 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10906 address of the function itself, not a function descriptor, so they
10907 can only be used with functions not using the FDPIC register that
10908 are known to be called directory without a PLT entry. */
10909
10910 function_symbol_result
10911 function_symbol (rtx target, const char *name, sh_function_kind kind)
10912 {
10913 /* If this is not an ordinary function, the name usually comes from a
10914 string literal or an sprintf buffer. Make sure we use the same
10915 string consistently, so that cse will be able to unify address loads. */
10916 if (kind != FUNCTION_ORDINARY)
10917 name = IDENTIFIER_POINTER (get_identifier (name));
10918 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10919 rtx lab = const0_rtx;
10920 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10921 if (flag_pic)
10922 switch (kind)
10923 {
10924 case FUNCTION_ORDINARY:
10925 break;
10926 case SFUNC_GOT:
10927 {
10928 rtx reg = target ? target : gen_reg_rtx (Pmode);
10929
10930 emit_insn (gen_symGOT2reg (reg, sym));
10931 sym = reg;
10932 break;
10933 }
10934 case SFUNC_STATIC:
10935 {
10936 rtx reg = target ? target : gen_reg_rtx (Pmode);
10937
10938 if (TARGET_FDPIC)
10939 {
10940 /* We use PC-relative calls, since GOTOFF can only refer
10941 to writable data. This works along with sh_sfunc_call. */
10942 lab = PATTERN (gen_call_site ());
10943 emit_insn (gen_sym_label2reg (reg, sym, lab));
10944 }
10945 else
10946 {
10947 /* ??? To allow cse to work, we use GOTOFF relocations.
10948 we could add combiner patterns to transform this into
10949 straight pc-relative calls with sym2PIC / bsrf when
10950 label load and function call are still 1:1 and in the
10951 same basic block during combine. */
10952 emit_insn (gen_symGOTOFF2reg (reg, sym));
10953 }
10954
10955 sym = reg;
10956 break;
10957 }
10958 }
10959 if (target && sym != target)
10960 {
10961 emit_move_insn (target, sym);
10962 return function_symbol_result (target, lab);
10963 }
10964 return function_symbol_result (sym, lab);
10965 }
10966
10967 /* Find the number of the first general purpose register in S that
10968 is not set. */
10969 static int
10970 scavenge_reg (HARD_REG_SET *s)
10971 {
10972 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
10973 if (TEST_HARD_REG_BIT (*s, r))
10974 return r;
10975 return -1;
10976 }
10977
10978 rtx
10979 sh_get_pr_initial_val (void)
10980 {
10981 /* If we haven't finished rtl generation, there might be a nonlocal label
10982 that we haven't seen yet.
10983 ??? get_hard_reg_initial_val fails if it is called after register
10984 allocation has started, unless it has been called before for the
10985 same register. And even then, we end in trouble if we didn't use
10986 the register in the same basic block before. So call
10987 get_hard_reg_initial_val now and wrap it in an unspec if we might
10988 need to replace it. */
10989 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
10990 combine can put the pseudo returned by get_hard_reg_initial_val into
10991 instructions that need a general purpose registers, which will fail to
10992 be recognized when the pseudo becomes allocated to PR. */
10993 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
10994 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
10995 }
10996
10997 bool
10998 sh_expand_t_scc (rtx operands[])
10999 {
11000 enum rtx_code code = GET_CODE (operands[1]);
11001 rtx target = operands[0];
11002 rtx op0 = operands[2];
11003 rtx op1 = operands[3];
11004 rtx result = target;
11005
11006 if (!REG_P (op0) || REGNO (op0) != T_REG
11007 || !CONST_INT_P (op1))
11008 return false;
11009 if (!REG_P (result))
11010 result = gen_reg_rtx (SImode);
11011 HOST_WIDE_INT val = INTVAL (op1);
11012 if ((code == EQ && val == 1) || (code == NE && val == 0))
11013 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11014 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11015 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11016 else if (code == EQ || code == NE)
11017 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11018 else
11019 return false;
11020 if (result != target)
11021 emit_move_insn (target, result);
11022 return true;
11023 }
11024
11025 /* INSN is an sfunc; return the rtx that describes the address used. */
11026 static rtx
11027 extract_sfunc_addr (rtx insn)
11028 {
11029 rtx pattern = PATTERN (insn);
11030 const int len = XVECLEN (pattern, 0);
11031 for (int i = 0; i < len; i++)
11032 {
11033 rtx part = XVECEXP (pattern, 0, i);
11034 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11035 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11036 return XEXP (part, 0);
11037 }
11038 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11039 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11040 }
11041
11042 /* Verify that the register in use_sfunc_addr still agrees with the address
11043 used in the sfunc. This prevents fill_slots_from_thread from changing
11044 use_sfunc_addr.
11045 INSN is the use_sfunc_addr instruction, and REG is the register it
11046 guards. */
11047 bool
11048 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11049 {
11050 /* Search for the sfunc. It should really come right after INSN. */
11051 while ((insn = NEXT_INSN (insn)))
11052 {
11053 if (LABEL_P (insn) || JUMP_P (insn))
11054 break;
11055 if (! INSN_P (insn))
11056 continue;
11057
11058 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11059 insn = seq->insn (0);
11060 if (GET_CODE (PATTERN (insn)) != PARALLEL
11061 || get_attr_type (insn) != TYPE_SFUNC)
11062 continue;
11063 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11064 }
11065 gcc_unreachable ();
11066 }
11067
11068 /* This function returns a constant rtx that represents 2**15 / pi in
11069 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11070 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11071 static GTY(()) rtx sh_fsca_sf2int_rtx;
11072
11073 rtx
11074 sh_fsca_sf2int (void)
11075 {
11076 if (! sh_fsca_sf2int_rtx)
11077 {
11078 REAL_VALUE_TYPE rv;
11079
11080 real_from_string (&rv, "10430.378350470453");
11081 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11082 }
11083
11084 return sh_fsca_sf2int_rtx;
11085 }
11086
11087 /* This function returns a constant rtx that represents pi / 2**15 in
11088 SFmode. It's used to scale SFmode angles, in radians, to a
11089 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11090 maps to 0x10000. */
11091 static GTY(()) rtx sh_fsca_int2sf_rtx;
11092
11093 rtx
11094 sh_fsca_int2sf (void)
11095 {
11096 if (! sh_fsca_int2sf_rtx)
11097 {
11098 REAL_VALUE_TYPE rv;
11099
11100 real_from_string (&rv, "9.587379924285257e-5");
11101 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11102 }
11103
11104 return sh_fsca_int2sf_rtx;
11105 }
11106
11107 /* Initialize the CUMULATIVE_ARGS structure. */
11108 void
11109 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11110 tree fntype,
11111 rtx libname ATTRIBUTE_UNUSED,
11112 tree fndecl,
11113 signed int n_named_args,
11114 machine_mode mode)
11115 {
11116 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11117 pcum->free_single_fp_reg = 0;
11118 pcum->outgoing = n_named_args != -1;
11119
11120 /* FIXME: Should we check TARGET_HITACHI here ??? */
11121 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11122
11123 if (fntype)
11124 {
11125 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11126 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11127 pcum->prototype_p = prototype_p (fntype);
11128 pcum->arg_count [(int) SH_ARG_INT] = false;
11129 }
11130 else
11131 {
11132 pcum->arg_count [(int) SH_ARG_INT] = 0;
11133 pcum->prototype_p = false;
11134 if (mode != VOIDmode)
11135 {
11136 /* If the default ABI is the Renesas ABI then all library
11137 calls must assume that the library will be using the
11138 Renesas ABI. So if the function would return its result
11139 in memory then we must force the address of this memory
11140 block onto the stack. Ideally we would like to call
11141 targetm.calls.return_in_memory() here but we do not have
11142 the TYPE or the FNDECL available so we synthesize the
11143 contents of that function as best we can. */
11144 pcum->force_mem =
11145 (TARGET_DEFAULT & MASK_HITACHI)
11146 && (mode == BLKmode
11147 || (GET_MODE_SIZE (mode) > 4
11148 && !(mode == DFmode
11149 && TARGET_FPU_DOUBLE)));
11150 }
11151 else
11152 pcum->force_mem = false;
11153 }
11154 }
11155
11156 rtx
11157 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11158 {
11159 enum rtx_code code = TRUNCATE;
11160
11161 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11162 {
11163 rtx inner = XEXP (x, 0);
11164 machine_mode inner_mode = GET_MODE (inner);
11165
11166 if (inner_mode == mode)
11167 return inner;
11168 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11169 x = inner;
11170 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11171 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11172 {
11173 code = GET_CODE (x);
11174 x = inner;
11175 }
11176 }
11177 return gen_rtx_fmt_e (code, mode, x);
11178 }
11179
11180 /* Load and store depend on the highpart of the address. However,
11181 set_attr_alternative does not give well-defined results before reload,
11182 so we must look at the rtl ourselves to see if any of the feeding
11183 registers is used in a memref.
11184
11185 Return true iff INSN contains a MEM. */
11186 bool
11187 sh_contains_memref_p (rtx insn)
11188 {
11189 subrtx_iterator::array_type array;
11190 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11191 if (MEM_P (*iter))
11192 return true;
11193 return false;
11194 }
11195
11196 /* Return true iff INSN loads a banked register. */
11197 bool
11198 sh_loads_bankedreg_p (rtx insn)
11199 {
11200 if (GET_CODE (PATTERN (insn)) == SET)
11201 {
11202 rtx op = SET_DEST (PATTERN(insn));
11203 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11204 return true;
11205 }
11206
11207 return false;
11208 }
11209
11210 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11211 static reg_class_t
11212 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11213 {
11214 return rclass;
11215 }
11216
11217 /* Implement TARGET_SECONDARY_RELOAD. */
11218 static reg_class_t
11219 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11220 machine_mode mode, secondary_reload_info *sri)
11221 {
11222 enum reg_class rclass = (enum reg_class) rclass_i;
11223
11224 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11225 && REG_P (XEXP (XEXP (x, 0), 0))
11226 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11227 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11228
11229 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11230 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11231
11232 if (REG_P (x) && REGNO (x) == GBR_REG)
11233 return NO_REGS;
11234
11235 if (in_p)
11236 {
11237 if (REGCLASS_HAS_FP_REG (rclass)
11238 && immediate_operand ((x), mode)
11239 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11240 switch (mode)
11241 {
11242 case SFmode:
11243 sri->icode = CODE_FOR_reload_insf__frn;
11244 return NO_REGS;
11245 case DFmode:
11246 sri->icode = CODE_FOR_reload_indf__frn;
11247 return NO_REGS;
11248 case SImode:
11249 /* ??? If we knew that we are in the appropriate mode -
11250 single precision - we could use a reload pattern directly. */
11251 return FPUL_REGS;
11252 default:
11253 abort ();
11254 }
11255 if (rclass == FPUL_REGS
11256 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11257 || REGNO (x) == T_REG))
11258 || GET_CODE (x) == PLUS))
11259 return GENERAL_REGS;
11260 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11261 {
11262 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11263 return GENERAL_REGS;
11264 else if (mode == SFmode)
11265 return FP_REGS;
11266 sri->icode = CODE_FOR_reload_insi__i_fpul;
11267 return NO_REGS;
11268 }
11269 if (rclass == FPSCR_REGS
11270 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11271 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11272 return GENERAL_REGS;
11273 } /* end of input-only processing. */
11274
11275 if (((REGCLASS_HAS_FP_REG (rclass)
11276 && (REG_P (x)
11277 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11278 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11279 && TARGET_FMOVD))))
11280 || (REGCLASS_HAS_GENERAL_REG (rclass)
11281 && REG_P (x)
11282 && FP_REGISTER_P (REGNO (x))))
11283 && (mode == SFmode || mode == SImode))
11284 return FPUL_REGS;
11285 if ((rclass == FPUL_REGS
11286 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11287 && (MEM_P (x)
11288 || (REG_P (x)
11289 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11290 || REGNO (x) == T_REG
11291 || system_reg_operand (x, VOIDmode)))))
11292 {
11293 if (rclass == FPUL_REGS)
11294 return GENERAL_REGS;
11295 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11296 }
11297
11298 if ((rclass == MAC_REGS || rclass == PR_REGS)
11299 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11300 && rclass != REGNO_REG_CLASS (REGNO (x)))
11301 return GENERAL_REGS;
11302
11303 /* If here fall back to loading FPUL register through general registers.
11304 This case can happen when movsi_ie insn is picked initially to
11305 load/store the FPUL register from/to another register, and then the
11306 other register is allocated on the stack. */
11307 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11308 return GENERAL_REGS;
11309
11310 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11311 the other operand.
11312 On SH2A could also just leave it alone here, which would result in a
11313 4 byte move insn being generated instead. However, for this to work
11314 the insns must have the appropriate alternatives. */
11315 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11316 && satisfies_constraint_Sdd (x)
11317 && sh_disp_addr_displacement (x)
11318 <= sh_max_mov_insn_displacement (mode, false))
11319 return R0_REGS;
11320
11321 /* When reload is trying to address a QImode or HImode subreg on the stack,
11322 force any subreg byte into R0_REGS, as this is going to become a
11323 displacement address.
11324 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11325 is on the stack, the memref to it might already require a displacement
11326 and that has to be added to the final address. At this point we don't
11327 know the cumulative displacement so we assume the worst case. */
11328 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11329 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11330 return R0_REGS;
11331
11332 return NO_REGS;
11333 }
11334
11335 /* Return true if SUBST can't safely replace its equivalent during RA. */
11336 static bool
11337 sh_cannot_substitute_mem_equiv_p (rtx)
11338 {
11339 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11340 uses R0 and may cause spill failure when R0 is already used.
11341 We have to return true for that case at least.
11342 Moreover SH has strong R0 parity and also have not enough numbers of
11343 the hard registers to make the equiv substitution win in the size
11344 and the speed on average working sets. The pseudos produced to
11345 hold the equiv values can't get good hard registers for bad cases
11346 and end up memory save/restore insns which make the code worse. */
11347 return true;
11348 }
11349
11350 /* Return true if DISP can be legitimized. */
11351 static bool
11352 sh_legitimize_address_displacement (rtx *disp, rtx *offs,
11353 machine_mode mode)
11354 {
11355 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11356 || (TARGET_SH2E && mode == SFmode))
11357 return false;
11358
11359 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, INTVAL (*disp));
11360 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11361 {
11362 *disp = adj.mov_disp;
11363 *offs = adj.offset_adjust;
11364 return true;
11365 }
11366
11367 return false;
11368 }
11369
11370 /* Return true if movsf insn should be splited with an additional
11371 register. */
11372 bool
11373 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11374 {
11375 /* op0 == op1 */
11376 if (rtx_equal_p (op0, op1))
11377 return true;
11378 /* fy, FQ, reg */
11379 if (GET_CODE (op1) == CONST_DOUBLE
11380 && ! satisfies_constraint_G (op1)
11381 && ! satisfies_constraint_H (op1)
11382 && REG_P (op0)
11383 && REG_P (op2))
11384 return true;
11385 /* f, r, y */
11386 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11387 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11388 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11389 return true;
11390 /* r, f, y */
11391 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11392 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11393 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11394 return true;
11395
11396 return false;
11397 }
11398
11399 static void
11400 sh_conditional_register_usage (void)
11401 {
11402 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11403 if (! VALID_REGISTER_P (regno))
11404 fixed_regs[regno] = call_used_regs[regno] = 1;
11405 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11406 if (flag_pic)
11407 {
11408 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11409 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11410 }
11411 if (TARGET_FDPIC)
11412 {
11413 fixed_regs[PIC_REG] = 1;
11414 call_used_regs[PIC_REG] = 1;
11415 call_really_used_regs[PIC_REG] = 1;
11416 }
11417 /* Renesas saves and restores mac registers on call. */
11418 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11419 {
11420 call_really_used_regs[MACH_REG] = 0;
11421 call_really_used_regs[MACL_REG] = 0;
11422 }
11423
11424 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11425 if (! fixed_regs[regno] && call_really_used_regs[regno])
11426 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11427
11428 call_really_used_regs[FPSCR_MODES_REG] = 0;
11429 call_really_used_regs[FPSCR_STAT_REG] = 0;
11430 }
11431
11432 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11433
11434 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11435 static bool
11436 sh_legitimate_constant_p (machine_mode mode, rtx x)
11437 {
11438 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11439 {
11440 rtx base, offset;
11441 split_const (x, &base, &offset);
11442
11443 if (GET_CODE (base) == SYMBOL_REF
11444 && !offset_within_block_p (base, INTVAL (offset)))
11445 return false;
11446 }
11447
11448 if (TARGET_FDPIC
11449 && (SYMBOLIC_CONST_P (x)
11450 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11451 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11452 return false;
11453
11454 return GET_CODE (x) != CONST_DOUBLE
11455 || mode == DFmode || mode == SFmode
11456 || mode == DImode || GET_MODE (x) == VOIDmode;
11457 }
11458
11459 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11460
11461 static void
11462 sh_init_sync_libfuncs (void)
11463 {
11464 init_sync_libfuncs (UNITS_PER_WORD);
11465 }
11466
11467 /* Return true if it is appropriate to emit `ret' instructions in the
11468 body of a function. */
11469 bool
11470 sh_can_use_simple_return_p (void)
11471 {
11472 if (! reload_completed || frame_pointer_needed)
11473 return false;
11474
11475 /* Moving prologue around does't reduce the size. */
11476 if (optimize_function_for_size_p (cfun))
11477 return false;
11478
11479 /* Finally, allow for pr save. */
11480 HARD_REG_SET live_regs_mask;
11481 int d = calc_live_regs (&live_regs_mask);
11482
11483 if (rounded_frame_size (d) > 4)
11484 return false;
11485
11486 return true;
11487 }
11488
11489 /*------------------------------------------------------------------------------
11490 Address mode optimization support code
11491 */
11492
11493 typedef HOST_WIDE_INT disp_t;
11494 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11495 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11496 static const disp_t INVALID_DISP = MAX_DISP;
11497
11498 /* A memory reference which is described by a base register and a
11499 displacement. */
11500 class base_reg_disp
11501 {
11502 public:
11503 base_reg_disp (rtx br, disp_t d);
11504
11505 bool is_reg (void) const;
11506 bool is_disp (void) const;
11507 rtx reg (void) const;
11508 disp_t disp (void) const;
11509
11510 private:
11511 rtx reg_;
11512 disp_t disp_;
11513 };
11514
11515 inline
11516 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11517 : reg_ (br), disp_ (d)
11518 {
11519 }
11520
11521 inline bool
11522 base_reg_disp::is_reg (void) const
11523 {
11524 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11525 }
11526
11527 inline bool
11528 base_reg_disp::is_disp (void) const
11529 {
11530 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11531 }
11532
11533 inline rtx
11534 base_reg_disp::reg (void) const
11535 {
11536 return reg_;
11537 }
11538
11539 inline disp_t
11540 base_reg_disp::disp (void) const
11541 {
11542 return disp_;
11543 }
11544
11545 /* Find the base register and calculate the displacement for a given
11546 address rtx 'x'. */
11547 static base_reg_disp
11548 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11549 rtx base_reg = NULL)
11550 {
11551 if (REG_P (x))
11552 {
11553 if (REGNO (x) == GBR_REG)
11554 return base_reg_disp (x, disp);
11555
11556 /* We've reached a hard-reg. This is probably the point where
11557 function args are copied to pseudos. Do not go any further and
11558 stick to the pseudo. If the original mem addr was in a hard reg
11559 from the beginning, it will become the base reg. */
11560 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11561 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11562
11563 /* Find the def of the reg and trace it. If there are more than one
11564 defs and they are not the same, assume it's not safe to proceed. */
11565 rtx_insn* last_i = NULL;
11566 rtx last_set = NULL;
11567 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11568 d = DF_REF_NEXT_REG (d))
11569 {
11570 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11571
11572 /* Accept multiple defs, as long as they are equal. */
11573 if (last_set == NULL || rtx_equal_p (last_set, set))
11574 {
11575 last_i = DF_REF_INSN (d);
11576 last_set = set;
11577 }
11578 else
11579 {
11580 last_i = NULL;
11581 last_set = NULL;
11582 break;
11583 }
11584 }
11585
11586 if (last_set != NULL && last_i != NULL)
11587 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11588 XEXP (last_set, 0));
11589
11590 /* When here, no previous insn was found that sets the reg.
11591 The input reg is already the base reg. */
11592 return base_reg_disp (x, disp);
11593 }
11594
11595 else if (GET_CODE (x) == PLUS)
11596 {
11597 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11598 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11599
11600 /* Either left or right val must be a reg.
11601 We don't handle the case of 'reg + reg' here. */
11602 if (left_val.is_reg () && right_val.is_disp ())
11603 return base_reg_disp (left_val.reg (), left_val.disp ()
11604 + right_val.disp () + disp);
11605 else if (right_val.is_reg () && left_val.is_disp ())
11606 return base_reg_disp (right_val.reg (), right_val.disp ()
11607 + left_val.disp () + disp);
11608 else
11609 return base_reg_disp (base_reg, disp);
11610 }
11611
11612 else if (CONST_INT_P (x))
11613 return base_reg_disp (NULL, disp + INTVAL (x));
11614
11615 /* Didn't find anything useful. */
11616 return base_reg_disp (base_reg, disp);
11617 }
11618
11619 /* Given an insn and a memory operand, try to find an equivalent GBR
11620 based memory address and return the corresponding new memory address.
11621 Return NULL_RTX if not found. */
11622 rtx
11623 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11624 {
11625 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11626 return NULL_RTX;
11627
11628 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11629 if (side_effects_p (XEXP (mem, 0)))
11630 return NULL_RTX;
11631
11632 /* When not optimizing there might be no dataflow available. */
11633 if (df == NULL)
11634 return NULL_RTX;
11635
11636 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11637
11638 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11639 {
11640 /* If GBR is marked as call clobbered we bail out if we see a call.
11641 FIXME: Actually should check if this mem refers to the gbr value
11642 before or after the call. If there is a store_gbr preceeding this
11643 mem, it's safe to use GBR for this mem.
11644
11645 If GBR is not marked as call clobbered, but there is some other
11646 def than a call, it's probably a load_gbr upon which we also
11647 bail out to be on the safe side.
11648 FIXME: Should check if we have a use-after-def case, such as
11649 the call case above. */
11650 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11651 d = DF_REF_NEXT_REG (d))
11652 {
11653 if (CALL_P (DF_REF_INSN (d)))
11654 {
11655 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11656 return NULL_RTX;
11657 else
11658 continue;
11659 }
11660 else
11661 return NULL_RTX;
11662 }
11663
11664 rtx disp = GEN_INT (gbr_disp.disp ());
11665 if (gbr_displacement (disp, GET_MODE (mem)))
11666 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11667 }
11668
11669 return NULL_RTX;
11670 }
11671
11672 /*------------------------------------------------------------------------------
11673 Manual insn combine support code.
11674 */
11675
11676 /* Return true if the specified insn contains any UNSPECs or
11677 UNSPEC_VOLATILEs. */
11678 static bool
11679 sh_unspec_insn_p (rtx x)
11680 {
11681 subrtx_iterator::array_type array;
11682 FOR_EACH_SUBRTX (i, array, x, ALL)
11683 if (*i != NULL
11684 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11685 return true;
11686
11687 return false;
11688 }
11689
11690 /* Return true if the register operands of the specified insn are modified
11691 between the specified from and to insns (exclusive of those two). */
11692 bool
11693 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11694 const rtx_insn* from,
11695 const rtx_insn* to)
11696 {
11697 /* FIXME: Return true for multiple sets for now. */
11698 rtx s = single_set (operands_insn);
11699 if (s == NULL_RTX)
11700 return true;
11701
11702 subrtx_iterator::array_type array;
11703 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11704 if (*i != NULL &&
11705 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11706 return true;
11707
11708 return false;
11709 }
11710
11711 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11712 negates the T bit and stores the result in the T bit. */
11713 bool
11714 sh_is_nott_insn (const rtx_insn* i)
11715 {
11716 return i != NULL && GET_CODE (PATTERN (i)) == SET
11717 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11718 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11719 }
11720
11721 rtx
11722 sh_movt_set_dest (const rtx_insn* i)
11723 {
11724 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11725 }
11726
11727 rtx
11728 sh_movt_set_dest (const_rtx pat)
11729 {
11730 return GET_CODE (pat) == SET
11731 && arith_reg_dest (XEXP (pat, 0), SImode)
11732 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11733 }
11734
11735 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11736 that stores the negated T bit in a register, and return the destination
11737 register rtx, or null. */
11738 rtx
11739 sh_movrt_set_dest (const rtx_insn* i)
11740 {
11741 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11742 }
11743
11744 rtx
11745 sh_movrt_set_dest (const_rtx pat)
11746 {
11747 /* The negc movrt replacement is inside a parallel. */
11748 if (GET_CODE (pat) == PARALLEL)
11749 pat = XVECEXP (pat, 0, 0);
11750
11751 return GET_CODE (pat) == SET
11752 && arith_reg_dest (XEXP (pat, 0), SImode)
11753 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11754
11755 }
11756
11757 /* Given an insn and a reg number, tell whether the reg dies or is unused
11758 after the insn. */
11759 bool
11760 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11761 {
11762 return find_regno_note (i, REG_DEAD, regno) != NULL
11763 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11764 }
11765
11766 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11767 mark it as being used after the insn. */
11768 void
11769 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11770 {
11771 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11772 remove_note (i, n);
11773 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11774 remove_note (i, n);
11775 }
11776
11777 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11778 add the REG_INC notes accordingly.
11779 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11780 FIXME: This function is currently used by peephole2 patterns because
11781 the peephole2 pass does not preserve REG_INC notes. If the notes
11782 are dropped the following passes will do wrong things. */
11783 rtx_insn*
11784 sh_check_add_incdec_notes (rtx_insn* i)
11785 {
11786 struct for_each_inc_dec_clb
11787 {
11788 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11789 rtx dest, rtx src ATTRIBUTE_UNUSED,
11790 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11791 {
11792 gcc_assert (REG_P (dest));
11793
11794 rtx_insn* i = (rtx_insn*)arg;
11795 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11796 add_reg_note (i, REG_INC, dest);
11797
11798 return 0;
11799 }
11800 };
11801
11802 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11803 return i;
11804 }
11805
11806 /* Given a move insn destiation and a source, make sure that the move source
11807 operand is not a post-inc mem load with the same address reg as the
11808 destination. Returns the modified source operand with the post-inc removed
11809 if necessary. */
11810 rtx
11811 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11812 {
11813 if (!MEM_P (src))
11814 return src;
11815
11816 rtx addr = XEXP (src, 0);
11817
11818 if (GET_CODE (addr) == POST_INC
11819 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11820 return replace_equiv_address (src, XEXP (addr, 0));
11821
11822 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11823 return src;
11824 }
11825
11826 /* Emit a move insn that is safe to be used in peephole patterns. */
11827 rtx_insn*
11828 sh_peephole_emit_move_insn (rtx dst, rtx src)
11829 {
11830 return sh_check_add_incdec_notes (
11831 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11832 }
11833
11834 /* Given an op rtx and an insn, try to find out whether the result of the
11835 specified op consists only of logical operations on T bit stores. */
11836 bool
11837 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11838 {
11839 if (!logical_operator (op, SImode))
11840 return false;
11841
11842 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11843 int op_is_t_count = 0;
11844
11845 for (int i = 0; i < 2; ++i)
11846 {
11847 if (t_reg_operand (ops[i], VOIDmode)
11848 || negt_reg_operand (ops[i], VOIDmode))
11849 op_is_t_count++;
11850
11851 else
11852 {
11853 set_of_reg op_set = sh_find_set_of_reg (ops[i], insn,
11854 prev_nonnote_insn_bb);
11855 if (op_set.set_src == NULL_RTX)
11856 continue;
11857
11858 if (t_reg_operand (op_set.set_src, VOIDmode)
11859 || negt_reg_operand (op_set.set_src, VOIDmode)
11860 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11861 op_is_t_count++;
11862 }
11863 }
11864
11865 return op_is_t_count == 2;
11866 }
11867
11868 /* Given the operand that is extended in a sign/zero extend insn, and the
11869 insn, try to figure out whether the sign/zero extension can be replaced
11870 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11871 NULL_RTX otherwise. */
11872 rtx
11873 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11874 {
11875 if (REG_P (extended_op))
11876 extended_op = extended_op;
11877 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11878 extended_op = SUBREG_REG (extended_op);
11879 else
11880 return NULL_RTX;
11881
11882 /* Reg moves must be of the same mode. */
11883 if (GET_MODE (extended_op) != SImode)
11884 return NULL_RTX;
11885
11886 set_of_reg s = sh_find_set_of_reg (extended_op, insn, prev_nonnote_insn_bb);
11887 if (s.set_src == NULL_RTX)
11888 return NULL_RTX;
11889
11890 if (t_reg_operand (s.set_src, VOIDmode)
11891 || negt_reg_operand (s.set_src, VOIDmode))
11892 return extended_op;
11893
11894 /* If the zero extended reg was formed by a logical operation, check the
11895 operands of the logical operation. If both originated from T bit
11896 stores the zero extension can be eliminated. */
11897 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11898 return extended_op;
11899
11900 return NULL_RTX;
11901 }
11902
11903 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11904 figure out whether it should be converted into a movt-xor sequence in
11905 the movrt_negc splitter.
11906 Returns true if insns have been modified and the splitter has succeeded. */
11907 bool
11908 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11909 {
11910 /* In cases such as
11911 tst r4,r4
11912 mov #-1,r1
11913 negc r1,r1
11914 tst r4,r4
11915 we can replace the T bit clobbering negc with a movt-xor sequence and
11916 eliminate the redundant comparison.
11917 Because the xor insn depends on register allocation results, allow this
11918 only before reload. */
11919 if (!can_create_pseudo_p ())
11920 return false;
11921
11922 set_of_reg t_before_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
11923 prev_nonnote_insn_bb);
11924 set_of_reg t_after_negc = sh_find_set_of_reg (get_t_reg_rtx (), curr_insn,
11925 next_nonnote_insn_bb);
11926
11927 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11928 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11929 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11930 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
11931 t_before_negc.insn,
11932 t_after_negc.insn)
11933 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11934 && !sh_unspec_insn_p (t_after_negc.insn)
11935 && !volatile_insn_p (PATTERN (t_after_negc.insn))
11936 && !side_effects_p (PATTERN (t_after_negc.insn))
11937 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
11938 {
11939 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
11940 set_insn_deleted (t_after_negc.insn);
11941 return true;
11942 }
11943 else
11944 return false;
11945 }
11946
11947 /* Given a reg and the current insn, see if the value of the reg originated
11948 from a sign or zero extension and return the discovered information. */
11949 sh_extending_set_of_reg
11950 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
11951 {
11952 if (reg == NULL)
11953 return sh_extending_set_of_reg (curr_insn);
11954
11955 if (SUBREG_P (reg))
11956 reg = SUBREG_REG (reg);
11957
11958 if (!REG_P (reg))
11959 return sh_extending_set_of_reg (curr_insn);
11960
11961 /* FIXME: Also search the predecessor basic blocks. It seems that checking
11962 only the adjacent predecessor blocks would cover most of the cases.
11963 Also try to look through the first extension that we hit. There are some
11964 cases, where a zero_extend is followed an (implicit) sign_extend, and it
11965 fails to see the sign_extend. */
11966 sh_extending_set_of_reg result =
11967 sh_find_set_of_reg (reg, curr_insn, prev_nonnote_insn_bb, true);
11968
11969 if (result.set_src != NULL)
11970 {
11971 if (GET_CODE (result.set_src) == SIGN_EXTEND
11972 || GET_CODE (result.set_src) == ZERO_EXTEND)
11973 {
11974 if (dump_file)
11975 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
11976 "explicitly sign/zero extended in insn %d\n",
11977 REGNO (reg), INSN_UID (result.insn));
11978 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
11979 result.ext_code = GET_CODE (result.set_src);
11980 }
11981 else if (MEM_P (result.set_src)
11982 && (GET_MODE (result.set_src) == QImode
11983 || GET_MODE (result.set_src) == HImode)
11984 && !sh_unspec_insn_p (result.insn))
11985 {
11986 /* On SH QIHImode memory loads always sign extend. However, in
11987 some cases where it seems that the higher bits are not
11988 interesting, the loads will not be expanded as sign extending
11989 insns, but as QIHImode loads into QIHImode regs. We report that
11990 the reg has been sign extended by the mem load. When it is used
11991 as such, we must convert the mem load into a sign extending insn,
11992 see also sh_extending_set_of_reg::use_as_extended_reg. */
11993 if (dump_file)
11994 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
11995 "implicitly sign extended in insn %d\n",
11996 REGNO (reg), INSN_UID (result.insn));
11997 result.from_mode = GET_MODE (result.set_src);
11998 result.ext_code = SIGN_EXTEND;
11999 }
12000 }
12001
12002 return result;
12003 }
12004
12005 /* Given a reg that is known to be sign or zero extended at some insn,
12006 take the appropriate measures so that the extended value can be used as
12007 a reg at the specified insn and return the resulting reg rtx. */
12008 rtx
12009 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12010 {
12011 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12012 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12013 gcc_assert (from_mode == QImode || from_mode == HImode);
12014
12015 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12016 {
12017 if (dump_file)
12018 fprintf (dump_file,
12019 "use_as_extended_reg: converting non-extending mem load in "
12020 "insn %d into sign-extending load\n", INSN_UID (insn));
12021
12022 rtx r = gen_reg_rtx (SImode);
12023 rtx_insn* i0;
12024 if (from_mode == QImode)
12025 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
12026 else if (from_mode == HImode)
12027 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
12028 else
12029 gcc_unreachable ();
12030
12031 emit_insn_after (
12032 gen_move_insn (XEXP (set_rtx, 0),
12033 gen_lowpart (GET_MODE (set_src), r)), i0);
12034 set_insn_deleted (insn);
12035 return r;
12036 }
12037 else
12038 {
12039 rtx extension_dst = XEXP (set_rtx, 0);
12040 if (GET_MODE (extension_dst) != SImode)
12041 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12042 GET_MODE (extension_dst), 0);
12043 if (modified_between_p (extension_dst, insn, use_at_insn))
12044 {
12045 if (dump_file)
12046 fprintf (dump_file,
12047 "use_as_extended_reg: dest reg %d of extending insn %d is "
12048 "modified, inserting a reg-reg copy\n",
12049 REGNO (extension_dst), INSN_UID (insn));
12050
12051 rtx r = gen_reg_rtx (SImode);
12052 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12053 return r;
12054 }
12055 else
12056 {
12057 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12058 return extension_dst;
12059 }
12060 }
12061 }
12062
12063 bool
12064 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12065 {
12066 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12067 && (from_mode == QImode || from_mode == HImode)
12068 && set_src != NULL)
12069 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12070 else
12071 return false;
12072 }
12073
12074 rtx
12075 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12076 {
12077 gcc_assert (can_use_as_unextended_reg ());
12078
12079 rtx r = XEXP (set_src, 0);
12080 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12081
12082 if (modified_between_p (r, insn, use_at_insn))
12083 {
12084 rtx r1 = gen_reg_rtx (SImode);
12085 emit_insn_after (gen_move_insn (r1, r0), insn);
12086 return r1;
12087 }
12088 else
12089 {
12090 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12091 ? REGNO (SUBREG_REG (r))
12092 : REGNO (r));
12093 return r0;
12094 }
12095 }
12096
12097 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12098 perform the necessary checks on the operands and split it accordingly. */
12099 void
12100 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12101 int subreg_offset, rtx operands[])
12102 {
12103 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12104
12105 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12106 curr_insn);
12107 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12108 curr_insn);
12109
12110 /* If one of the operands is known to be zero extended, that's already
12111 sufficient to mask out the unwanted high bits. */
12112 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12113 {
12114 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12115 operands[1]));
12116 return;
12117 }
12118 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12119 {
12120 emit_insn (gen_tstsi_t (operands[0],
12121 eop1.use_as_extended_reg (curr_insn)));
12122 return;
12123 }
12124
12125 /* None of the operands seem to be zero extended.
12126 If both are sign extended it's OK, too. */
12127 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12128 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12129 {
12130 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12131 eop1.use_as_extended_reg (curr_insn)));
12132 return;
12133 }
12134
12135 /* Otherwise we have to insert a zero extension on one of the operands to
12136 mask out the unwanted high bits.
12137 Prefer the operand that has no known extension. */
12138 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12139 std::swap (operands[0], operands[1]);
12140
12141 rtx tmp0 = gen_reg_rtx (SImode);
12142 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12143 GET_MODE (operands[0]), subreg_offset);
12144 emit_insn (subreg_mode == QImode
12145 ? gen_zero_extendqisi2 (tmp0, tmp1)
12146 : gen_zero_extendhisi2 (tmp0, tmp1));
12147 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12148 }
12149
12150 /* A helper class to increment/decrement a counter variable each time a
12151 function is entered/left. */
12152 class scope_counter
12153 {
12154 public:
12155 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12156
12157 ~scope_counter (void)
12158 {
12159 --m_counter;
12160 gcc_assert (m_counter >= 0);
12161 }
12162
12163 int count (void) const { return m_counter; }
12164
12165 private:
12166 int& m_counter;
12167 };
12168
12169 /* Given an rtx x, determine whether the expression can be used to create
12170 an insn that calulates x and stores the result in the T bit.
12171 This is used by the 'treg_set_expr' predicate to construct insns sequences
12172 where T bit results are fed into other insns, such as addc, subc, negc
12173 insns.
12174
12175 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12176 distinguish between 'positive' and 'negative' forms. For now this has to
12177 be done in the preparation code. We could also introduce
12178 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12179 two different patterns for the 'postive' and 'negative' forms. However,
12180 the total amount of lines of code seems to be about the same and the
12181 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12182 recog function would need to look inside the expression by temporarily
12183 splitting it. */
12184 static int sh_recog_treg_set_expr_reent_count = 0;
12185
12186 bool
12187 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12188 {
12189 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12190
12191 /* Limit the recursion count to avoid nested expressions which we can't
12192 resolve to a single treg set insn. */
12193 if (recursion.count () > 1)
12194 return false;
12195
12196 /* Early accept known possible operands before doing recog. */
12197 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12198 || negt_reg_operand (op, mode))
12199 return true;
12200
12201 /* Early reject impossible operands before doing recog.
12202 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12203 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12204 such as lower-subreg will bail out. Some insns such as SH4A movua are
12205 done with UNSPEC, so must reject those, too, or else it would result
12206 in an invalid reg -> treg move. */
12207 if (CONST_INT_P (op) || register_operand (op, mode)
12208 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12209 return false;
12210
12211 if (!can_create_pseudo_p ())
12212 return false;
12213
12214 /* expand_debug_locations may call this to compute rtx costs at
12215 very early stage. In that case, don't make new insns here to
12216 avoid codegen differences with -g. */
12217 if (currently_expanding_to_rtl)
12218 return false;
12219
12220 /* We are going to invoke recog in a re-entrant way and thus
12221 have to capture its current state and restore it afterwards. */
12222 recog_data_d prev_recog_data = recog_data;
12223
12224 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12225 SET_PREV_INSN (i) = NULL;
12226 SET_NEXT_INSN (i) = NULL;
12227
12228 /* If the comparison op doesn't have a result mode, set it to SImode. */
12229 machine_mode prev_op_mode = GET_MODE (op);
12230 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12231 PUT_MODE (op, SImode);
12232
12233 int result = recog (PATTERN (i), i, 0);
12234
12235 /* It seems there is no insn like that. Create a negated version and
12236 try again. If we hit a negated form, we'll allow that and append a
12237 nott sequence when splitting out the insns. Insns that do the split
12238 can then remove the trailing nott if they know how to deal with it. */
12239 if (result < 0 && COMPARISON_P (op))
12240 {
12241 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12242 if (cmp_mode == VOIDmode)
12243 cmp_mode = GET_MODE (XEXP (op, 1));
12244
12245 rtx_code prev_code = GET_CODE (op);
12246 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12247 result = recog (PATTERN (i), i, 0);
12248 PUT_CODE (op, prev_code);
12249 }
12250
12251 PUT_MODE (op, prev_op_mode);
12252 recog_data = prev_recog_data;
12253 return result >= 0;
12254 }
12255
12256 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12257 This can be used as a condition for insn/split patterns to allow certain
12258 T bit setting patters only to be matched as sub expressions of other
12259 patterns. */
12260 bool
12261 sh_in_recog_treg_set_expr (void)
12262 {
12263 return sh_recog_treg_set_expr_reent_count > 0;
12264 }
12265
12266 /* Given an rtx x, which is assumed to be some expression that has been
12267 matched by the 'treg_set_expr' predicate before, split and emit the
12268 insns that are necessary to calculate the expression and store the result
12269 in the T bit.
12270 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12271 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12272 'delete_insn' which then causes the DF parts to bail out, because we
12273 currently are inside another gen_split* function and would invoke
12274 'try_split' in a reentrant way. */
12275 static std::pair<rtx_insn*, rtx_insn*>
12276 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12277 {
12278 if (dump_file)
12279 {
12280 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12281 print_rtl_single (dump_file, i);
12282 fprintf (dump_file, "\n");
12283 }
12284
12285 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12286
12287 if (seq == NULL)
12288 return std::make_pair (i, i);
12289
12290 /* Avoid infinite splitter loops if any insn of the result matches
12291 the original pattern. */
12292 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12293 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12294 return std::make_pair (i, i);
12295
12296 unshare_all_rtl_in_chain (seq);
12297
12298 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12299 a linked list, replace the single insn with the new insns. */
12300 rtx_insn* seqlast = seq;
12301 while (NEXT_INSN (seqlast) != NULL)
12302 seqlast = NEXT_INSN (seqlast);
12303
12304 if (rtx_insn* iprev = PREV_INSN (i))
12305 SET_NEXT_INSN (iprev) = seq;
12306 if (rtx_insn* inext = NEXT_INSN (i))
12307 SET_PREV_INSN (inext) = seqlast;
12308
12309 SET_PREV_INSN (seq) = PREV_INSN (i);
12310 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12311
12312 SET_PREV_INSN (i) = NULL;
12313 SET_NEXT_INSN (i) = NULL;
12314
12315 /* Recursively split all insns. */
12316 for (i = seq; ; i = NEXT_INSN (i))
12317 {
12318 std::pair<rtx_insn*, rtx_insn*> ii =
12319 sh_try_split_insn_simple (i, curr_insn, n + 1);
12320 if (i == seq)
12321 seq = ii.first;
12322 if (i == seqlast)
12323 {
12324 seqlast = ii.second;
12325 break;
12326 }
12327 i = ii.first;
12328 }
12329
12330 return std::make_pair (seq, seqlast);
12331 }
12332
12333 sh_treg_insns
12334 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12335 {
12336 if (t_reg_operand (x, VOIDmode))
12337 return sh_treg_insns ();
12338
12339 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12340
12341 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12342 SET_PREV_INSN (i) = NULL;
12343 SET_NEXT_INSN (i) = NULL;
12344
12345 if (dump_file)
12346 {
12347 fprintf (dump_file, "split_treg_set_expr insn:\n");
12348 print_rtl (dump_file, i);
12349 fprintf (dump_file, "\n");
12350 }
12351
12352 /* If the insn is not found, we will try a negated form and append
12353 a nott. */
12354 bool append_nott = false;
12355
12356 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12357 have to capture its current state and restore it afterwards. */
12358 recog_data_d prev_recog_data = recog_data;
12359
12360 if (negt_reg_operand (x, GET_MODE (x)))
12361 {
12362 /* This is a normal movt followed by a nott. It will be converted
12363 into a movrt after initial expansion. */
12364 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12365 append_nott = true;
12366 }
12367 else
12368 {
12369 /* If the comparison op doesn't have a mode set, set it to SImode. */
12370 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12371 PUT_MODE (x, SImode);
12372
12373 int insn_code = recog (PATTERN (i), i, 0);
12374
12375 if (insn_code < 0 && COMPARISON_P (x))
12376 {
12377 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12378 if (cmp_mode == VOIDmode)
12379 cmp_mode = GET_MODE (XEXP (x, 1));
12380
12381 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12382 insn_code = recog (PATTERN (i), i, 0);
12383 append_nott = true;
12384 }
12385
12386 gcc_assert (insn_code >= 0);
12387 }
12388
12389 /* Try to recursively split the insn. Some insns might refuse to split
12390 any further while we are in the treg_set_expr splitting phase. They
12391 will be emitted as part of the outer insn and then split again. */
12392 std::pair<rtx_insn*, rtx_insn*> insnlist =
12393 sh_try_split_insn_simple (i, curr_insn);
12394
12395 /* Restore recog state. */
12396 recog_data = prev_recog_data;
12397
12398 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12399 ? insnlist.second
12400 : NULL;
12401 if (dump_file)
12402 {
12403 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12404 print_rtl (dump_file, insnlist.first);
12405 fprintf (dump_file, "\n");
12406
12407 if (nott_insn != NULL)
12408 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12409 }
12410
12411 emit_insn (insnlist.first);
12412
12413 if (nott_insn != NULL && append_nott)
12414 {
12415 if (dump_file)
12416 fprintf (dump_file, "removing trailing nott\n");
12417 remove_insn (nott_insn);
12418 nott_insn = NULL;
12419 append_nott = false;
12420 }
12421
12422 if (append_nott)
12423 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12424
12425 rtx_insn* first_insn = get_insns ();
12426
12427 if (dump_file)
12428 {
12429 fprintf (dump_file, "resulting insns:\n");
12430 print_rtl (dump_file, first_insn);
12431 fprintf (dump_file, "\n");
12432 }
12433
12434 return sh_treg_insns (first_insn, nott_insn);
12435 }
12436
12437 /*------------------------------------------------------------------------------
12438 Mode switching support code.
12439 */
12440
12441 static void
12442 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12443 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12444 {
12445 if ((TARGET_SH4A_FP || TARGET_SH4_300)
12446 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12447 {
12448 emit_insn (gen_toggle_pr ());
12449 if (TARGET_FMOVD)
12450 emit_insn (gen_toggle_sz ());
12451 }
12452 else if (mode != FP_MODE_NONE)
12453 {
12454 rtx tmp = gen_reg_rtx (SImode);
12455 emit_insn (gen_sts_fpscr (tmp));
12456 rtx i = NULL;
12457
12458 const unsigned HOST_WIDE_INT fpbits =
12459 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12460
12461 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12462 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12463 else if (mode == FP_MODE_SINGLE)
12464 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12465 else if (mode == FP_MODE_DOUBLE)
12466 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12467 else
12468 gcc_unreachable ();
12469
12470 emit_insn (i);
12471 emit_insn (gen_lds_fpscr (tmp));
12472 }
12473 }
12474
12475 static int
12476 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12477 {
12478 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12479 }
12480
12481 static int
12482 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12483 {
12484 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12485 get_attr_fp_set (insn) != FP_SET_NONE)
12486 return (int) get_attr_fp_set (insn);
12487 else
12488 return mode;
12489 }
12490
12491 static int
12492 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12493 {
12494 return NORMAL_MODE (entity);
12495 }
12496
12497 static int
12498 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12499 {
12500 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12501 }
12502
12503 static int
12504 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12505 {
12506 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12507 }
12508
12509 /*------------------------------------------------------------------------------
12510 Misc
12511 */
12512
12513 /* Return true if we use LRA instead of reload pass. */
12514 bool
12515 sh_lra_p (void)
12516 {
12517 return sh_lra_flag;
12518 }
12519
12520 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12521
12522 static bool
12523 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12524 unsigned int align,
12525 enum by_pieces_operation op,
12526 bool speed_p)
12527 {
12528 switch (op)
12529 {
12530 case MOVE_BY_PIECES:
12531 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12532 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12533 case STORE_BY_PIECES:
12534 case SET_BY_PIECES:
12535 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12536 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12537 default:
12538 return default_use_by_pieces_infrastructure_p (size, align,
12539 op, speed_p);
12540 }
12541 }
12542
12543 bool
12544 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12545 rtx x ATTRIBUTE_UNUSED)
12546 {
12547 return TARGET_FDPIC;
12548 }
12549
12550 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12551 function descriptor) into r1 and the GOT address into r12,
12552 returning an rtx for r1. */
12553
12554 rtx
12555 sh_load_function_descriptor (rtx funcdesc)
12556 {
12557 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12558 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12559 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12560 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12561
12562 emit_move_insn (r1, fnaddr);
12563 /* The ABI requires the entry point address to be loaded first, so
12564 prevent the load from being moved after that of the GOT
12565 address. */
12566 emit_insn (gen_blockage ());
12567 emit_move_insn (pic_reg, gotaddr);
12568 return r1;
12569 }
12570
12571 /* Return an rtx holding the initial value of the FDPIC register (the
12572 FDPIC pointer passed in from the caller). */
12573
12574 rtx
12575 sh_get_fdpic_reg_initial_val (void)
12576 {
12577 return get_hard_reg_initial_val (Pmode, PIC_REG);
12578 }
12579
12580 #include "gt-sh.h"