]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/sh/sh.c
Use function_arg_info for TARGET_CALLEE_COPIES
[thirdparty/gcc.git] / gcc / config / sh / sh.c
1 /* Output routines for GCC for Renesas / SuperH SH.
2 Copyright (C) 1993-2019 Free Software Foundation, Inc.
3 Contributed by Steve Chamberlain (sac@cygnus.com).
4 Improved by Jim Wilson (wilson@cygnus.com).
5
6 This file is part of GCC.
7
8 GCC is free software; you can redistribute it and/or modify
9 it under the terms of the GNU General Public License as published by
10 the Free Software Foundation; either version 3, or (at your option)
11 any later version.
12
13 GCC is distributed in the hope that it will be useful,
14 but WITHOUT ANY WARRANTY; without even the implied warranty of
15 MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the
16 GNU General Public License for more details.
17
18 You should have received a copy of the GNU General Public License
19 along with GCC; see the file COPYING3. If not see
20 <http://www.gnu.org/licenses/>. */
21
22 #include <sstream>
23
24 #define IN_TARGET_CODE 1
25
26 #include "config.h"
27 #define INCLUDE_VECTOR
28 #include "system.h"
29 #include "coretypes.h"
30 #include "backend.h"
31 #include "target.h"
32 #include "rtl.h"
33 #include "tree.h"
34 #include "gimple.h"
35 #include "cfghooks.h"
36 #include "df.h"
37 #include "memmodel.h"
38 #include "tm_p.h"
39 #include "stringpool.h"
40 #include "attribs.h"
41 #include "optabs.h"
42 #include "emit-rtl.h"
43 #include "recog.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "flags.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "reload.h"
54 #include "output.h"
55 #include "insn-attr.h"
56 #include "dwarf2.h"
57 #include "langhooks.h"
58 #include "cfgrtl.h"
59 #include "intl.h"
60 #include "sched-int.h"
61 #include "gimplify.h"
62 #include "tm-constrs.h"
63 #include "opts.h"
64 #include "tree-pass.h"
65 #include "context.h"
66 #include "builtins.h"
67 #include "rtl-iter.h"
68 #include "regs.h"
69 #include "toplev.h"
70
71 /* This file should be included last. */
72 #include "target-def.h"
73
74 int code_for_indirect_jump_scratch = CODE_FOR_indirect_jump_scratch;
75
76 #define CONST_OK_FOR_ADD(size) CONST_OK_FOR_I08 (size)
77 #define GEN_MOV (*(gen_movsi))
78 #define GEN_ADD3 (*(gen_addsi3))
79 #define GEN_SUB3 (*(gen_subsi3))
80
81 /* Used to simplify the logic below. Find the attributes wherever
82 they may be. */
83 #define SH_ATTRIBUTES(decl) \
84 (TYPE_P (decl)) ? TYPE_ATTRIBUTES (decl) \
85 : DECL_ATTRIBUTES (decl) \
86 ? (DECL_ATTRIBUTES (decl)) \
87 : TYPE_ATTRIBUTES (TREE_TYPE (decl))
88
89 /* Set to true by expand_prologue() when the function is an
90 interrupt handler. */
91 bool current_function_interrupt;
92
93 tree sh_deferred_function_attributes;
94 tree *sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
95
96 /* Global variables for machine-dependent things. */
97
98 /* Which cpu are we scheduling for. */
99 enum processor_type sh_cpu;
100
101 /* Definitions used in ready queue reordering for first scheduling pass. */
102
103 /* Reg weights arrays for modes SFmode and SImode, indexed by insn LUID. */
104 static short *regmode_weight[2];
105
106 /* Total SFmode and SImode weights of scheduled insns. */
107 static int curr_regmode_pressure[2];
108
109 /* Number of r0 life regions. */
110 static int r0_life_regions;
111
112 /* If true, skip cycles for Q -> R movement. */
113 static int skip_cycles = 0;
114
115 /* Cached value of can_issue_more. This is cached in sh_variable_issue hook
116 and returned from sh_reorder2. */
117 static short cached_can_issue_more;
118
119 /* Unique number for UNSPEC_BBR pattern. */
120 static unsigned int unspec_bbr_uid = 1;
121
122 /* Provides the class number of the smallest class containing
123 reg number. */
124 enum reg_class regno_reg_class[FIRST_PSEUDO_REGISTER] =
125 {
126 R0_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
127 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
128 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
129 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
130 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
131 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
132 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
133 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
134 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
135 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
136 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
137 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
138 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
139 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
140 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
141 GENERAL_REGS, GENERAL_REGS, GENERAL_REGS, GENERAL_REGS,
142 FP0_REGS,FP_REGS, FP_REGS, FP_REGS,
143 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
144 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
145 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
146 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
147 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
148 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
149 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
150 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
151 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
152 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
153 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
154 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
155 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
156 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
157 FP_REGS, FP_REGS, FP_REGS, FP_REGS,
158 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
159 TARGET_REGS, TARGET_REGS, TARGET_REGS, TARGET_REGS,
160 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
161 DF_REGS, DF_REGS, DF_REGS, DF_REGS,
162 NO_REGS, GENERAL_REGS, PR_REGS, T_REGS,
163 MAC_REGS, MAC_REGS, FPUL_REGS, FPSCR_REGS,
164 GENERAL_REGS, GENERAL_REGS,
165 };
166
167 char sh_register_names[FIRST_PSEUDO_REGISTER] \
168 [MAX_REGISTER_NAME_LENGTH + 1] = SH_REGISTER_NAMES_INITIALIZER;
169
170 char sh_additional_register_names[ADDREGNAMES_SIZE] \
171 [MAX_ADDITIONAL_REGISTER_NAME_LENGTH + 1]
172 = SH_ADDITIONAL_REGISTER_NAMES_INITIALIZER;
173
174 int assembler_dialect;
175
176 static void split_branches (rtx_insn *);
177 static int branch_dest (rtx);
178 static void print_slot (rtx_sequence *);
179 static rtx_code_label *add_constant (rtx, machine_mode, rtx);
180 static void dump_table (rtx_insn *, rtx_insn *);
181 static bool broken_move (rtx_insn *);
182 static bool mova_p (rtx_insn *);
183 static rtx_insn *find_barrier (int, rtx_insn *, rtx_insn *);
184 static bool noncall_uses_reg (rtx, rtx_insn *, rtx *);
185 static rtx_insn *gen_block_redirect (rtx_insn *, int, int);
186 static void sh_reorg (void);
187 static void sh_option_override (void);
188 static void sh_override_options_after_change (void);
189 static void output_stack_adjust (int, rtx, int, HARD_REG_SET *, bool);
190 static rtx_insn* emit_frame_insn (rtx);
191 static rtx push (int);
192 static void pop (int);
193 static void push_regs (HARD_REG_SET* mask, bool interrupt_handler);
194 static int calc_live_regs (HARD_REG_SET *);
195 static HOST_WIDE_INT rounded_frame_size (int);
196 static bool sh_frame_pointer_required (void);
197 static void sh_emit_mode_set (int, int, int, HARD_REG_SET);
198 static int sh_mode_needed (int, rtx_insn *);
199 static int sh_mode_after (int, int, rtx_insn *);
200 static int sh_mode_entry (int);
201 static int sh_mode_exit (int);
202 static int sh_mode_priority (int entity, int n);
203
204 static rtx mark_constant_pool_use (rtx);
205 static tree sh_handle_interrupt_handler_attribute (tree *, tree, tree,
206 int, bool *);
207 static tree sh_handle_resbank_handler_attribute (tree *, tree,
208 tree, int, bool *);
209 static tree sh2a_handle_function_vector_handler_attribute (tree *, tree,
210 tree, int, bool *);
211 static tree sh_handle_sp_switch_attribute (tree *, tree, tree, int, bool *);
212 static tree sh_handle_trap_exit_attribute (tree *, tree, tree, int, bool *);
213 static tree sh_handle_renesas_attribute (tree *, tree, tree, int, bool *);
214 static void sh_print_operand (FILE *, rtx, int);
215 static void sh_print_operand_address (FILE *, machine_mode, rtx);
216 static bool sh_print_operand_punct_valid_p (unsigned char code);
217 static bool sh_asm_output_addr_const_extra (FILE *file, rtx x);
218 static void sh_output_function_epilogue (FILE *);
219 static void sh_insert_attributes (tree, tree *);
220 static const char *sh_check_pch_target_flags (int);
221 static int sh_register_move_cost (machine_mode, reg_class_t, reg_class_t);
222 static int sh_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
223 static int sh_issue_rate (void);
224 static int sh_dfa_new_cycle (FILE *, int, rtx_insn *, int, int, int *sort_p);
225 static short find_set_regmode_weight (rtx, machine_mode);
226 static short find_insn_regmode_weight (rtx, machine_mode);
227 static void find_regmode_weight (basic_block, machine_mode);
228 static int find_r0_life_regions (basic_block);
229 static void sh_md_init_global (FILE *, int, int);
230 static void sh_md_finish_global (FILE *, int);
231 static int rank_for_reorder (const void *, const void *);
232 static void swap_reorder (rtx_insn **, int);
233 static void ready_reorder (rtx_insn **, int);
234 static bool high_pressure (machine_mode);
235 static int sh_reorder (FILE *, int, rtx_insn **, int *, int);
236 static int sh_reorder2 (FILE *, int, rtx_insn **, int *, int);
237 static void sh_md_init (FILE *, int, int);
238 static int sh_variable_issue (FILE *, int, rtx_insn *, int);
239
240 static bool sh_function_ok_for_sibcall (tree, tree);
241
242 static bool sh_can_follow_jump (const rtx_insn *, const rtx_insn *);
243 static bool sh_ms_bitfield_layout_p (const_tree);
244
245 static void sh_init_builtins (void);
246 static tree sh_builtin_decl (unsigned, bool);
247 static rtx sh_expand_builtin (tree, rtx, rtx, machine_mode, int);
248 static void sh_output_mi_thunk (FILE *, tree, HOST_WIDE_INT,
249 HOST_WIDE_INT, tree);
250 static void sh_file_start (void);
251 static bool sh_assemble_integer (rtx, unsigned int, int);
252 static bool flow_dependent_p (rtx, rtx);
253 static void flow_dependent_p_1 (rtx, const_rtx, void *);
254 static int shiftcosts (rtx);
255 static int and_xor_ior_costs (rtx, int);
256 static int addsubcosts (rtx);
257 static int multcosts (rtx);
258 static bool unspec_caller_rtx_p (rtx);
259 static bool sh_cannot_copy_insn_p (rtx_insn *);
260 static bool sh_cannot_force_const_mem_p (machine_mode, rtx);
261 static bool sh_rtx_costs (rtx, machine_mode, int, int, int *, bool);
262 static int sh_address_cost (rtx, machine_mode, addr_space_t, bool);
263 static int sh_pr_n_sets (void);
264 static rtx sh_allocate_initial_value (rtx);
265 static reg_class_t sh_preferred_reload_class (rtx, reg_class_t);
266 static reg_class_t sh_secondary_reload (bool, rtx, reg_class_t,
267 machine_mode,
268 struct secondary_reload_info *);
269 static bool sh_legitimate_address_p (machine_mode, rtx, bool);
270 static rtx sh_legitimize_address (rtx, rtx, machine_mode);
271 static rtx sh_delegitimize_address (rtx);
272 static bool sh_cannot_substitute_mem_equiv_p (rtx);
273 static bool sh_legitimize_address_displacement (rtx *, rtx *,
274 poly_int64, machine_mode);
275 static int scavenge_reg (HARD_REG_SET *s);
276
277 static rtx sh_struct_value_rtx (tree, int);
278 static rtx sh_function_value (const_tree, const_tree, bool);
279 static bool sh_function_value_regno_p (const unsigned int);
280 static rtx sh_libcall_value (machine_mode, const_rtx);
281 static bool sh_return_in_memory (const_tree, const_tree);
282 static rtx sh_builtin_saveregs (void);
283 static void sh_setup_incoming_varargs (cumulative_args_t,
284 const function_arg_info &, int *, int);
285 static bool sh_strict_argument_naming (cumulative_args_t);
286 static bool sh_pretend_outgoing_varargs_named (cumulative_args_t);
287 static void sh_atomic_assign_expand_fenv (tree *, tree *, tree *);
288 static tree sh_build_builtin_va_list (void);
289 static void sh_va_start (tree, rtx);
290 static tree sh_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
291 static bool sh_promote_prototypes (const_tree);
292 static machine_mode sh_promote_function_mode (const_tree type,
293 machine_mode,
294 int *punsignedp,
295 const_tree funtype,
296 int for_return);
297 static bool sh_pass_by_reference (cumulative_args_t,
298 const function_arg_info &);
299 static bool sh_callee_copies (cumulative_args_t, const function_arg_info &);
300 static int sh_arg_partial_bytes (cumulative_args_t, const function_arg_info &);
301 static void sh_function_arg_advance (cumulative_args_t,
302 const function_arg_info &);
303 static rtx sh_function_arg (cumulative_args_t, const function_arg_info &);
304 static int sh_dwarf_calling_convention (const_tree);
305 static void sh_encode_section_info (tree, rtx, int);
306 static bool sh2a_function_vector_p (tree);
307 static void sh_trampoline_init (rtx, tree, rtx);
308 static rtx sh_trampoline_adjust_address (rtx);
309 static void sh_conditional_register_usage (void);
310 static bool sh_legitimate_constant_p (machine_mode, rtx);
311 static int mov_insn_size (machine_mode, bool);
312 static int mov_insn_alignment_mask (machine_mode, bool);
313 static bool sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT,
314 unsigned int,
315 enum by_pieces_operation,
316 bool);
317 static bool sequence_insn_p (rtx_insn *);
318 static void sh_canonicalize_comparison (int *, rtx *, rtx *, bool);
319 static void sh_canonicalize_comparison (enum rtx_code&, rtx&, rtx&,
320 machine_mode, bool);
321 static bool sh_legitimate_combined_insn (rtx_insn* insn);
322
323 static bool sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2);
324
325 static void sh_init_sync_libfuncs (void) ATTRIBUTE_UNUSED;
326 static unsigned int sh_hard_regno_nregs (unsigned int, machine_mode);
327 static bool sh_hard_regno_mode_ok (unsigned int, machine_mode);
328 static bool sh_modes_tieable_p (machine_mode, machine_mode);
329 static bool sh_can_change_mode_class (machine_mode, machine_mode, reg_class_t);
330 \f
331 static const struct attribute_spec sh_attribute_table[] =
332 {
333 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
334 affects_type_identity, handler, exclude } */
335 { "interrupt_handler", 0, 0, true, false, false, false,
336 sh_handle_interrupt_handler_attribute, NULL },
337 { "sp_switch", 1, 1, true, false, false, false,
338 sh_handle_sp_switch_attribute, NULL },
339 { "trap_exit", 1, 1, true, false, false, false,
340 sh_handle_trap_exit_attribute, NULL },
341 { "renesas", 0, 0, false, true, false, false,
342 sh_handle_renesas_attribute, NULL },
343 { "trapa_handler", 0, 0, true, false, false, false,
344 sh_handle_interrupt_handler_attribute, NULL },
345 { "nosave_low_regs", 0, 0, true, false, false, false,
346 sh_handle_interrupt_handler_attribute, NULL },
347 { "resbank", 0, 0, true, false, false, false,
348 sh_handle_resbank_handler_attribute, NULL },
349 { "function_vector", 1, 1, true, false, false, false,
350 sh2a_handle_function_vector_handler_attribute, NULL },
351 { NULL, 0, 0, false, false, false, false, NULL, NULL }
352 };
353 \f
354 /* Initialize the GCC target structure. */
355 #undef TARGET_ATTRIBUTE_TABLE
356 #define TARGET_ATTRIBUTE_TABLE sh_attribute_table
357
358 /* The next two are used for debug info when compiling with -gdwarf. */
359 #undef TARGET_ASM_UNALIGNED_HI_OP
360 #define TARGET_ASM_UNALIGNED_HI_OP "\t.uaword\t"
361 #undef TARGET_ASM_UNALIGNED_SI_OP
362 #define TARGET_ASM_UNALIGNED_SI_OP "\t.ualong\t"
363
364 #undef TARGET_OPTION_OVERRIDE
365 #define TARGET_OPTION_OVERRIDE sh_option_override
366
367 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
368 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE \
369 sh_override_options_after_change
370
371 #undef TARGET_PRINT_OPERAND
372 #define TARGET_PRINT_OPERAND sh_print_operand
373 #undef TARGET_PRINT_OPERAND_ADDRESS
374 #define TARGET_PRINT_OPERAND_ADDRESS sh_print_operand_address
375 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
376 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P sh_print_operand_punct_valid_p
377 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
378 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA sh_asm_output_addr_const_extra
379
380 #undef TARGET_ASM_FUNCTION_EPILOGUE
381 #define TARGET_ASM_FUNCTION_EPILOGUE sh_output_function_epilogue
382
383 #undef TARGET_ASM_OUTPUT_MI_THUNK
384 #define TARGET_ASM_OUTPUT_MI_THUNK sh_output_mi_thunk
385
386 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
387 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK \
388 hook_bool_const_tree_hwi_hwi_const_tree_true
389
390 #undef TARGET_ASM_FILE_START
391 #define TARGET_ASM_FILE_START sh_file_start
392 #undef TARGET_ASM_FILE_START_FILE_DIRECTIVE
393 #define TARGET_ASM_FILE_START_FILE_DIRECTIVE true
394
395 #undef TARGET_ASM_INTEGER
396 #define TARGET_ASM_INTEGER sh_assemble_integer
397
398 #undef TARGET_REGISTER_MOVE_COST
399 #define TARGET_REGISTER_MOVE_COST sh_register_move_cost
400
401 #undef TARGET_INSERT_ATTRIBUTES
402 #define TARGET_INSERT_ATTRIBUTES sh_insert_attributes
403
404 #undef TARGET_SCHED_ADJUST_COST
405 #define TARGET_SCHED_ADJUST_COST sh_adjust_cost
406
407 #undef TARGET_SCHED_ISSUE_RATE
408 #define TARGET_SCHED_ISSUE_RATE sh_issue_rate
409
410 /* The next 5 hooks have been implemented for reenabling sched1. With the
411 help of these macros we are limiting the movement of insns in sched1 to
412 reduce the register pressure. The overall idea is to keep count of SImode
413 and SFmode regs required by already scheduled insns. When these counts
414 cross some threshold values; give priority to insns that free registers.
415 The insn that frees registers is most likely to be the insn with lowest
416 LUID (original insn order); but such an insn might be there in the stalled
417 queue (Q) instead of the ready queue (R). To solve this, we skip cycles
418 up to a max of 8 cycles so that such insns may move from Q -> R.
419
420 The description of the hooks are as below:
421
422 TARGET_SCHED_INIT_GLOBAL: Added a new target hook in the generic
423 scheduler; it is called inside the sched_init function just after
424 find_insn_reg_weights function call. It is used to calculate the SImode
425 and SFmode weights of insns of basic blocks; much similar to what
426 find_insn_reg_weights does.
427 TARGET_SCHED_FINISH_GLOBAL: Corresponding cleanup hook.
428
429 TARGET_SCHED_DFA_NEW_CYCLE: Skip cycles if high register pressure is
430 indicated by TARGET_SCHED_REORDER2; doing this may move insns from
431 (Q)->(R).
432
433 TARGET_SCHED_REORDER: If the register pressure for SImode or SFmode is
434 high; reorder the ready queue so that the insn with lowest LUID will be
435 issued next.
436
437 TARGET_SCHED_REORDER2: If the register pressure is high, indicate to
438 TARGET_SCHED_DFA_NEW_CYCLE to skip cycles.
439
440 TARGET_SCHED_VARIABLE_ISSUE: Cache the value of can_issue_more so that it
441 can be returned from TARGET_SCHED_REORDER2.
442
443 TARGET_SCHED_INIT: Reset the register pressure counting variables. */
444
445 #undef TARGET_SCHED_DFA_NEW_CYCLE
446 #define TARGET_SCHED_DFA_NEW_CYCLE sh_dfa_new_cycle
447
448 #undef TARGET_SCHED_INIT_GLOBAL
449 #define TARGET_SCHED_INIT_GLOBAL sh_md_init_global
450
451 #undef TARGET_SCHED_FINISH_GLOBAL
452 #define TARGET_SCHED_FINISH_GLOBAL sh_md_finish_global
453
454 #undef TARGET_SCHED_VARIABLE_ISSUE
455 #define TARGET_SCHED_VARIABLE_ISSUE sh_variable_issue
456
457 #undef TARGET_SCHED_REORDER
458 #define TARGET_SCHED_REORDER sh_reorder
459
460 #undef TARGET_SCHED_REORDER2
461 #define TARGET_SCHED_REORDER2 sh_reorder2
462
463 #undef TARGET_SCHED_INIT
464 #define TARGET_SCHED_INIT sh_md_init
465
466 #undef TARGET_DELEGITIMIZE_ADDRESS
467 #define TARGET_DELEGITIMIZE_ADDRESS sh_delegitimize_address
468
469 #undef TARGET_LEGITIMIZE_ADDRESS
470 #define TARGET_LEGITIMIZE_ADDRESS sh_legitimize_address
471
472 #undef TARGET_CAN_FOLLOW_JUMP
473 #define TARGET_CAN_FOLLOW_JUMP sh_can_follow_jump
474
475 #undef TARGET_MS_BITFIELD_LAYOUT_P
476 #define TARGET_MS_BITFIELD_LAYOUT_P sh_ms_bitfield_layout_p
477
478 #undef TARGET_INIT_BUILTINS
479 #define TARGET_INIT_BUILTINS sh_init_builtins
480 #undef TARGET_BUILTIN_DECL
481 #define TARGET_BUILTIN_DECL sh_builtin_decl
482 #undef TARGET_EXPAND_BUILTIN
483 #define TARGET_EXPAND_BUILTIN sh_expand_builtin
484
485 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
486 #define TARGET_FUNCTION_OK_FOR_SIBCALL sh_function_ok_for_sibcall
487
488 #undef TARGET_CANNOT_COPY_INSN_P
489 #define TARGET_CANNOT_COPY_INSN_P sh_cannot_copy_insn_p
490 #undef TARGET_RTX_COSTS
491 #define TARGET_RTX_COSTS sh_rtx_costs
492 #undef TARGET_ADDRESS_COST
493 #define TARGET_ADDRESS_COST sh_address_cost
494 #undef TARGET_ALLOCATE_INITIAL_VALUE
495 #define TARGET_ALLOCATE_INITIAL_VALUE sh_allocate_initial_value
496
497 #undef TARGET_MACHINE_DEPENDENT_REORG
498 #define TARGET_MACHINE_DEPENDENT_REORG sh_reorg
499
500 #undef TARGET_DWARF_REGISTER_SPAN
501 #define TARGET_DWARF_REGISTER_SPAN sh_dwarf_register_span
502
503 #ifdef HAVE_AS_TLS
504 #undef TARGET_HAVE_TLS
505 #define TARGET_HAVE_TLS true
506 #endif
507
508 #undef TARGET_PROMOTE_PROTOTYPES
509 #define TARGET_PROMOTE_PROTOTYPES sh_promote_prototypes
510 #undef TARGET_PROMOTE_FUNCTION_MODE
511 #define TARGET_PROMOTE_FUNCTION_MODE sh_promote_function_mode
512
513 #undef TARGET_FUNCTION_VALUE
514 #define TARGET_FUNCTION_VALUE sh_function_value
515 #undef TARGET_FUNCTION_VALUE_REGNO_P
516 #define TARGET_FUNCTION_VALUE_REGNO_P sh_function_value_regno_p
517 #undef TARGET_LIBCALL_VALUE
518 #define TARGET_LIBCALL_VALUE sh_libcall_value
519 #undef TARGET_STRUCT_VALUE_RTX
520 #define TARGET_STRUCT_VALUE_RTX sh_struct_value_rtx
521 #undef TARGET_RETURN_IN_MEMORY
522 #define TARGET_RETURN_IN_MEMORY sh_return_in_memory
523
524 #undef TARGET_EXPAND_BUILTIN_SAVEREGS
525 #define TARGET_EXPAND_BUILTIN_SAVEREGS sh_builtin_saveregs
526 #undef TARGET_SETUP_INCOMING_VARARGS
527 #define TARGET_SETUP_INCOMING_VARARGS sh_setup_incoming_varargs
528 #undef TARGET_STRICT_ARGUMENT_NAMING
529 #define TARGET_STRICT_ARGUMENT_NAMING sh_strict_argument_naming
530 #undef TARGET_PRETEND_OUTGOING_VARARGS_NAMED
531 #define TARGET_PRETEND_OUTGOING_VARARGS_NAMED sh_pretend_outgoing_varargs_named
532 #undef TARGET_MUST_PASS_IN_STACK
533 #define TARGET_MUST_PASS_IN_STACK must_pass_in_stack_var_size
534 #undef TARGET_PASS_BY_REFERENCE
535 #define TARGET_PASS_BY_REFERENCE sh_pass_by_reference
536 #undef TARGET_CALLEE_COPIES
537 #define TARGET_CALLEE_COPIES sh_callee_copies
538 #undef TARGET_ARG_PARTIAL_BYTES
539 #define TARGET_ARG_PARTIAL_BYTES sh_arg_partial_bytes
540 #undef TARGET_FUNCTION_ARG
541 #define TARGET_FUNCTION_ARG sh_function_arg
542 #undef TARGET_FUNCTION_ARG_ADVANCE
543 #define TARGET_FUNCTION_ARG_ADVANCE sh_function_arg_advance
544
545 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
546 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV sh_atomic_assign_expand_fenv
547
548 #undef TARGET_BUILD_BUILTIN_VA_LIST
549 #define TARGET_BUILD_BUILTIN_VA_LIST sh_build_builtin_va_list
550 #undef TARGET_EXPAND_BUILTIN_VA_START
551 #define TARGET_EXPAND_BUILTIN_VA_START sh_va_start
552 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
553 #define TARGET_GIMPLIFY_VA_ARG_EXPR sh_gimplify_va_arg_expr
554
555 #undef TARGET_VECTOR_MODE_SUPPORTED_P
556 #define TARGET_VECTOR_MODE_SUPPORTED_P sh_vector_mode_supported_p
557
558 #undef TARGET_CHECK_PCH_TARGET_FLAGS
559 #define TARGET_CHECK_PCH_TARGET_FLAGS sh_check_pch_target_flags
560
561 #undef TARGET_DWARF_CALLING_CONVENTION
562 #define TARGET_DWARF_CALLING_CONVENTION sh_dwarf_calling_convention
563
564 #undef TARGET_FRAME_POINTER_REQUIRED
565 #define TARGET_FRAME_POINTER_REQUIRED sh_frame_pointer_required
566
567 #undef TARGET_MODE_EMIT
568 #define TARGET_MODE_EMIT sh_emit_mode_set
569
570 #undef TARGET_MODE_NEEDED
571 #define TARGET_MODE_NEEDED sh_mode_needed
572
573 #undef TARGET_MODE_AFTER
574 #define TARGET_MODE_AFTER sh_mode_after
575
576 #undef TARGET_MODE_ENTRY
577 #define TARGET_MODE_ENTRY sh_mode_entry
578
579 #undef TARGET_MODE_EXIT
580 #define TARGET_MODE_EXIT sh_mode_exit
581
582 #undef TARGET_MODE_PRIORITY
583 #define TARGET_MODE_PRIORITY sh_mode_priority
584
585 /* Return regmode weight for insn. */
586 #define INSN_REGMODE_WEIGHT(INSN, MODE)\
587 regmode_weight[((MODE) == SImode) ? 0 : 1][INSN_UID (INSN)]
588
589 /* Return current register pressure for regmode. */
590 #define CURR_REGMODE_PRESSURE(MODE)\
591 curr_regmode_pressure[((MODE) == SImode) ? 0 : 1]
592
593 #undef TARGET_ENCODE_SECTION_INFO
594 #define TARGET_ENCODE_SECTION_INFO sh_encode_section_info
595
596 #undef TARGET_LRA_P
597 #define TARGET_LRA_P sh_lra_p
598
599 #undef TARGET_SECONDARY_RELOAD
600 #define TARGET_SECONDARY_RELOAD sh_secondary_reload
601
602 #undef TARGET_PREFERRED_RELOAD_CLASS
603 #define TARGET_PREFERRED_RELOAD_CLASS sh_preferred_reload_class
604
605 #undef TARGET_CONDITIONAL_REGISTER_USAGE
606 #define TARGET_CONDITIONAL_REGISTER_USAGE sh_conditional_register_usage
607
608 #undef TARGET_LEGITIMATE_ADDRESS_P
609 #define TARGET_LEGITIMATE_ADDRESS_P sh_legitimate_address_p
610
611 #undef TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P
612 #define TARGET_CANNOT_SUBSTITUTE_MEM_EQUIV_P sh_cannot_substitute_mem_equiv_p
613
614 #undef TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT
615 #define TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT \
616 sh_legitimize_address_displacement
617
618 #undef TARGET_TRAMPOLINE_INIT
619 #define TARGET_TRAMPOLINE_INIT sh_trampoline_init
620 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
621 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS sh_trampoline_adjust_address
622
623 #undef TARGET_LEGITIMATE_CONSTANT_P
624 #define TARGET_LEGITIMATE_CONSTANT_P sh_legitimate_constant_p
625
626 #undef TARGET_CANONICALIZE_COMPARISON
627 #define TARGET_CANONICALIZE_COMPARISON sh_canonicalize_comparison
628
629 #undef TARGET_LEGITIMATE_COMBINED_INSN
630 #define TARGET_LEGITIMATE_COMBINED_INSN sh_legitimate_combined_insn
631
632 #undef TARGET_FIXED_CONDITION_CODE_REGS
633 #define TARGET_FIXED_CONDITION_CODE_REGS sh_fixed_condition_code_regs
634
635 #undef TARGET_USE_BY_PIECES_INFRASTRUCTURE_P
636 #define TARGET_USE_BY_PIECES_INFRASTRUCTURE_P \
637 sh_use_by_pieces_infrastructure_p
638
639 /* Machine-specific symbol_ref flags. */
640 #define SYMBOL_FLAG_FUNCVEC_FUNCTION (SYMBOL_FLAG_MACH_DEP << 0)
641
642 /* The tas.b instruction sets the 7th bit in the byte, i.e. 0x80. This value
643 is used by optabs.c atomic op expansion code as well as in sync.md. */
644 #undef TARGET_ATOMIC_TEST_AND_SET_TRUEVAL
645 #define TARGET_ATOMIC_TEST_AND_SET_TRUEVAL 0x80
646
647 #undef TARGET_CANNOT_FORCE_CONST_MEM
648 #define TARGET_CANNOT_FORCE_CONST_MEM sh_cannot_force_const_mem_p
649
650 #undef TARGET_HARD_REGNO_NREGS
651 #define TARGET_HARD_REGNO_NREGS sh_hard_regno_nregs
652 #undef TARGET_HARD_REGNO_MODE_OK
653 #define TARGET_HARD_REGNO_MODE_OK sh_hard_regno_mode_ok
654
655 #undef TARGET_MODES_TIEABLE_P
656 #define TARGET_MODES_TIEABLE_P sh_modes_tieable_p
657
658 #undef TARGET_CAN_CHANGE_MODE_CLASS
659 #define TARGET_CAN_CHANGE_MODE_CLASS sh_can_change_mode_class
660
661 #undef TARGET_CONSTANT_ALIGNMENT
662 #define TARGET_CONSTANT_ALIGNMENT constant_alignment_word_strings
663
664 struct gcc_target targetm = TARGET_INITIALIZER;
665 \f
666
667 /* Information on the currently selected atomic model.
668 This is initialized in sh_option_override. */
669 static sh_atomic_model selected_atomic_model_;
670
671 const sh_atomic_model&
672 selected_atomic_model (void)
673 {
674 return selected_atomic_model_;
675 }
676
677 static sh_atomic_model
678 parse_validate_atomic_model_option (const char* str)
679 {
680 const char* model_names[sh_atomic_model::num_models];
681 model_names[sh_atomic_model::none] = "none";
682 model_names[sh_atomic_model::soft_gusa] = "soft-gusa";
683 model_names[sh_atomic_model::hard_llcs] = "hard-llcs";
684 model_names[sh_atomic_model::soft_tcb] = "soft-tcb";
685 model_names[sh_atomic_model::soft_imask] = "soft-imask";
686
687 const char* model_cdef_names[sh_atomic_model::num_models];
688 model_cdef_names[sh_atomic_model::none] = "NONE";
689 model_cdef_names[sh_atomic_model::soft_gusa] = "SOFT_GUSA";
690 model_cdef_names[sh_atomic_model::hard_llcs] = "HARD_LLCS";
691 model_cdef_names[sh_atomic_model::soft_tcb] = "SOFT_TCB";
692 model_cdef_names[sh_atomic_model::soft_imask] = "SOFT_IMASK";
693
694 sh_atomic_model ret;
695 ret.type = sh_atomic_model::none;
696 ret.name = model_names[sh_atomic_model::none];
697 ret.cdef_name = model_cdef_names[sh_atomic_model::none];
698 ret.strict = false;
699 ret.tcb_gbr_offset = -1;
700
701 /* Handle empty string as 'none'. */
702 if (str == NULL || *str == '\0')
703 return ret;
704
705 #define err_ret(...) do { error (__VA_ARGS__); return ret; } while (0)
706
707 std::vector<std::string> tokens;
708 for (std::stringstream ss (str); ss.good (); )
709 {
710 tokens.push_back (std::string ());
711 std::getline (ss, tokens.back (), ',');
712 }
713
714 if (tokens.empty ())
715 err_ret ("invalid atomic model option");
716
717 /* The first token must be the atomic model name. */
718 {
719 for (size_t i = 0; i < sh_atomic_model::num_models; ++i)
720 if (tokens.front () == model_names[i])
721 {
722 ret.type = (sh_atomic_model::enum_type)i;
723 ret.name = model_names[i];
724 ret.cdef_name = model_cdef_names[i];
725 goto got_mode_name;
726 }
727
728 err_ret ("invalid atomic model name \"%s\"", tokens.front ().c_str ());
729 got_mode_name:;
730 }
731
732 /* Go through the remaining tokens. */
733 for (size_t i = 1; i < tokens.size (); ++i)
734 {
735 if (tokens[i] == "strict")
736 ret.strict = true;
737 else if (tokens[i].find ("gbr-offset=") == 0)
738 {
739 std::string offset_str = tokens[i].substr (strlen ("gbr-offset="));
740 ret.tcb_gbr_offset = integral_argument (offset_str.c_str ());
741 if (offset_str.empty () || ret.tcb_gbr_offset == -1)
742 err_ret ("could not parse gbr-offset value \"%s\" in atomic model "
743 "option", offset_str.c_str ());
744 }
745 else
746 err_ret ("unknown parameter \"%s\" in atomic model option",
747 tokens[i].c_str ());
748 }
749
750 /* Check that the selection makes sense. */
751 if (ret.type == sh_atomic_model::soft_gusa && !TARGET_SH3)
752 err_ret ("atomic model %s is only available on SH3 and SH4 targets",
753 ret.name);
754
755 if (ret.type == sh_atomic_model::hard_llcs && !TARGET_SH4A)
756 err_ret ("atomic model %s is only available on SH4A targets", ret.name);
757
758 if (ret.type == sh_atomic_model::soft_tcb && ret.tcb_gbr_offset == -1)
759 err_ret ("atomic model %s requires gbr-offset parameter", ret.name);
760
761 if (ret.type == sh_atomic_model::soft_tcb
762 && (ret.tcb_gbr_offset < 0 || ret.tcb_gbr_offset > 1020
763 || (ret.tcb_gbr_offset & 3) != 0))
764 err_ret ("invalid gbr-offset value \"%d\" for atomic model %s; it must be "
765 "a multiple of 4 in the range 0-1020", ret.tcb_gbr_offset,
766 ret.name);
767
768 if (ret.type == sh_atomic_model::soft_imask && TARGET_USERMODE)
769 err_ret ("cannot use atomic model %s in user mode", ret.name);
770
771 return ret;
772
773 #undef err_ret
774 }
775
776 /* Register SH specific RTL passes. */
777 extern opt_pass* make_pass_sh_treg_combine (gcc::context* ctx, bool split_insns,
778 const char* name);
779 extern opt_pass* make_pass_sh_optimize_sett_clrt (gcc::context* ctx,
780 const char* name);
781 static void
782 register_sh_passes (void)
783 {
784 /* Running the sh_treg_combine pass after ce1 generates better code when
785 comparisons are combined and reg-reg moves are introduced, because
786 reg-reg moves will be eliminated afterwards. However, there are quite
787 some cases where combine will be unable to fold comparison related insns,
788 thus for now don't do it.
789 register_pass (make_pass_sh_treg_combine (g, false, "sh_treg_combine1"),
790 PASS_POS_INSERT_AFTER, "ce1", 1);
791 */
792
793 /* Run sh_treg_combine pass after combine but before register allocation. */
794 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine2"),
795 PASS_POS_INSERT_AFTER, "split1", 1);
796
797 /* Run sh_treg_combine pass after register allocation and basic block
798 reordering as this sometimes creates new opportunities. */
799 register_pass (make_pass_sh_treg_combine (g, true, "sh_treg_combine3"),
800 PASS_POS_INSERT_AFTER, "split4", 1);
801
802 /* Optimize sett and clrt insns, by e.g. removing them if the T bit value
803 is known after a conditional branch.
804 This must be done after basic blocks and branch conditions have
805 stabilized and won't be changed by further passes. */
806 register_pass (make_pass_sh_optimize_sett_clrt (g, "sh_optimize_sett_clrt"),
807 PASS_POS_INSERT_BEFORE, "sched2", 1);
808 }
809
810 /* Implement TARGET_OPTION_OVERRIDE macro. Validate and override
811 various options, and do some machine dependent initialization. */
812 static void
813 sh_option_override (void)
814 {
815 int regno;
816
817 SUBTARGET_OVERRIDE_OPTIONS;
818
819 sh_cpu = PROCESSOR_SH1;
820 assembler_dialect = 0;
821 if (TARGET_SH2)
822 sh_cpu = PROCESSOR_SH2;
823 if (TARGET_SH2E)
824 sh_cpu = PROCESSOR_SH2E;
825 if (TARGET_SH2A)
826 sh_cpu = PROCESSOR_SH2A;
827 if (TARGET_SH3)
828 sh_cpu = PROCESSOR_SH3;
829 if (TARGET_SH3E)
830 sh_cpu = PROCESSOR_SH3E;
831 if (TARGET_SH4)
832 {
833 assembler_dialect = 1;
834 sh_cpu = PROCESSOR_SH4;
835 }
836 if (TARGET_SH4A)
837 {
838 assembler_dialect = 1;
839 sh_cpu = PROCESSOR_SH4A;
840 }
841
842 /* User/priviledged mode is supported only on SH3* and SH4*.
843 Disable it for everything else. */
844 if (!TARGET_SH3 && TARGET_USERMODE)
845 TARGET_USERMODE = false;
846
847 if (! strcmp (sh_div_str, "call-div1"))
848 sh_div_strategy = SH_DIV_CALL_DIV1;
849 else if (! strcmp (sh_div_str, "call-fp") && TARGET_FPU_ANY)
850 sh_div_strategy = SH_DIV_CALL_FP;
851 else if (! strcmp (sh_div_str, "call-table") && TARGET_DYNSHIFT)
852 sh_div_strategy = SH_DIV_CALL_TABLE;
853 else
854 {
855 /* Pick one that makes most sense for the target in general.
856 It is not much good to use different functions depending on -Os,
857 since then we'll end up with two different functions when some of
858 the code is compiled for size, and some for speed. */
859
860 /* SH4 tends to emphasize speed. */
861 if (TARGET_HARD_SH4)
862 sh_div_strategy = SH_DIV_CALL_TABLE;
863 /* These have their own way of doing things. */
864 else if (TARGET_SH2A)
865 sh_div_strategy = SH_DIV_INTRINSIC;
866 /* SH1 .. SH3 cores often go into small-footprint systems, so
867 default to the smallest implementation available. */
868 else
869 sh_div_strategy = SH_DIV_CALL_DIV1;
870 }
871
872 if (sh_divsi3_libfunc[0])
873 ; /* User supplied - leave it alone. */
874 else if (TARGET_DIVIDE_CALL_FP)
875 sh_divsi3_libfunc = "__sdivsi3_i4";
876 else if (TARGET_DIVIDE_CALL_TABLE)
877 sh_divsi3_libfunc = "__sdivsi3_i4i";
878 else
879 sh_divsi3_libfunc = "__sdivsi3";
880
881 if (sh_branch_cost == -1)
882 {
883 /* The SH1 does not have delay slots, hence we get a pipeline stall
884 at every branch. The SH4 is superscalar, so the single delay slot
885 is not sufficient to keep both pipelines filled.
886 In any case, set the default branch cost to '2', as it results in
887 slightly overall smaller code and also enables some if conversions
888 that are required for matching special T bit related insns. */
889 sh_branch_cost = 2;
890 }
891
892 /* Set -mzdcbranch for SH4 / SH4A if not otherwise specified by the user. */
893 if (! global_options_set.x_TARGET_ZDCBRANCH && TARGET_HARD_SH4)
894 TARGET_ZDCBRANCH = 1;
895
896 /* FDPIC code is a special form of PIC, and the vast majority of code
897 generation constraints that apply to PIC also apply to FDPIC, so we
898 set flag_pic to avoid the need to check TARGET_FDPIC everywhere
899 flag_pic is checked. */
900 if (TARGET_FDPIC && !flag_pic)
901 flag_pic = 2;
902
903 for (regno = 0; regno < FIRST_PSEUDO_REGISTER; regno++)
904 if (! VALID_REGISTER_P (regno))
905 sh_register_names[regno][0] = '\0';
906
907 for (regno = 0; regno < ADDREGNAMES_SIZE; regno++)
908 if (! VALID_REGISTER_P (ADDREGNAMES_REGNO (regno)))
909 sh_additional_register_names[regno][0] = '\0';
910
911 if (flag_pic && ! TARGET_PREFERGOT)
912 flag_no_function_cse = 1;
913
914 if (targetm.small_register_classes_for_mode_p (VOIDmode))
915 {
916 /* Never run scheduling before reload, since that can
917 break global alloc, and generates slower code anyway due
918 to the pressure on R0. */
919 /* Enable sched1 for SH4 if the user explicitly requests.
920 When sched1 is enabled, the ready queue will be reordered by
921 the target hooks if pressure is high. We cannot do this for
922 PIC, SH3 and lower as they give spill failures for R0. */
923 if (!TARGET_HARD_SH4 || flag_pic)
924 flag_schedule_insns = 0;
925 /* ??? Current exception handling places basic block boundaries
926 after call_insns. It causes the high pressure on R0 and gives
927 spill failures for R0 in reload. See PR 22553 and the thread
928 on gcc-patches
929 <http://gcc.gnu.org/ml/gcc-patches/2005-10/msg00816.html>. */
930 else if (flag_exceptions)
931 {
932 if (flag_schedule_insns && global_options_set.x_flag_schedule_insns)
933 warning (0, "ignoring %<-fschedule-insns%> because of exception "
934 "handling bug");
935 flag_schedule_insns = 0;
936 }
937 else if (flag_schedule_insns
938 && !global_options_set.x_flag_schedule_insns)
939 flag_schedule_insns = 0;
940 }
941
942 /* Unwind info is not correct around the CFG unless either a frame
943 pointer is present or M_A_O_A is set. Fixing this requires rewriting
944 unwind info generation to be aware of the CFG and propagating states
945 around edges. */
946 if ((flag_unwind_tables || flag_asynchronous_unwind_tables
947 || flag_exceptions || flag_non_call_exceptions)
948 && flag_omit_frame_pointer && !TARGET_ACCUMULATE_OUTGOING_ARGS)
949 {
950 warning (0, "unwind tables currently require either a frame pointer "
951 "or %<-maccumulate-outgoing-args%> for correctness");
952 TARGET_ACCUMULATE_OUTGOING_ARGS = 1;
953 }
954
955 if (flag_unsafe_math_optimizations)
956 {
957 /* Enable fsca insn for SH4A if not otherwise specified by the user. */
958 if (global_options_set.x_TARGET_FSCA == 0 && TARGET_SH4A_FP)
959 TARGET_FSCA = 1;
960
961 /* Enable fsrra insn for SH4A if not otherwise specified by the user. */
962 if (global_options_set.x_TARGET_FSRRA == 0 && TARGET_SH4A_FP)
963 TARGET_FSRRA = 1;
964 }
965
966 /* Allow fsrra insn only if -funsafe-math-optimizations and
967 -ffinite-math-only is enabled. */
968 TARGET_FSRRA = TARGET_FSRRA
969 && flag_unsafe_math_optimizations
970 && flag_finite_math_only;
971
972 /* If the -mieee option was not explicitly set by the user, turn it on
973 unless -ffinite-math-only was specified. See also PR 33135. */
974 if (! global_options_set.x_TARGET_IEEE)
975 TARGET_IEEE = ! flag_finite_math_only;
976
977 if (sh_fixed_range_str)
978 sh_fix_range (sh_fixed_range_str);
979
980 /* This target defaults to strict volatile bitfields. */
981 if (flag_strict_volatile_bitfields < 0 && abi_version_at_least(2))
982 flag_strict_volatile_bitfields = 1;
983
984 sh_override_options_after_change ();
985
986 /* Parse atomic model option and make sure it is valid for the current
987 target CPU. */
988 selected_atomic_model_
989 = parse_validate_atomic_model_option (sh_atomic_model_str);
990
991 register_sh_passes ();
992 }
993
994 /* Implement targetm.override_options_after_change. */
995
996 static void
997 sh_override_options_after_change (void)
998 {
999 /* Adjust loop, jump and function alignment values (in bytes), if those
1000 were not specified by the user using -falign-loops, -falign-jumps
1001 and -falign-functions options.
1002 32 bit alignment is better for speed, because instructions can be
1003 fetched as a pair from a longword boundary. For size use 16 bit
1004 alignment to get more compact code.
1005 Aligning all jumps increases the code size, even if it might
1006 result in slightly faster code. Thus, it is set to the smallest
1007 alignment possible if not specified by the user. */
1008 if (flag_align_loops && !str_align_loops)
1009 str_align_loops = optimize_size ? "2" : "4";
1010
1011 /* Parse values so that we can compare for current value. */
1012 parse_alignment_opts ();
1013 if (flag_align_jumps && !str_align_jumps)
1014 str_align_jumps = "2";
1015 else if (align_jumps.levels[0].get_value () < 2)
1016 str_align_jumps = "2";
1017
1018 if (flag_align_functions && !str_align_functions)
1019 str_align_functions = optimize_size ? "2" : "4";
1020
1021 /* The linker relaxation code breaks when a function contains
1022 alignments that are larger than that at the start of a
1023 compilation unit. */
1024 if (TARGET_RELAX)
1025 {
1026 /* Parse values so that we can compare for current value. */
1027 parse_alignment_opts ();
1028 int min_align = MAX (align_loops.levels[0].get_value (),
1029 align_jumps.levels[0].get_value ());
1030
1031 /* Also take possible .long constants / mova tables into account. */
1032 if (min_align < 4)
1033 min_align = 4;
1034 if (align_functions.levels[0].get_value () < min_align)
1035 {
1036 char *r = XNEWVEC (char, 16);
1037 sprintf (r, "%d", min_align);
1038 str_align_functions = r;
1039 }
1040 }
1041 }
1042 \f
1043 /* Print the operand address in x to the stream. */
1044 static void
1045 sh_print_operand_address (FILE *stream, machine_mode /*mode*/, rtx x)
1046 {
1047 switch (GET_CODE (x))
1048 {
1049 case REG:
1050 case SUBREG:
1051 fprintf (stream, "@%s", reg_names[true_regnum (x)]);
1052 break;
1053
1054 case PLUS:
1055 {
1056 rtx base = XEXP (x, 0);
1057 rtx index = XEXP (x, 1);
1058
1059 switch (GET_CODE (index))
1060 {
1061 case CONST_INT:
1062 fprintf (stream, "@(%d,%s)", (int) INTVAL (index),
1063 reg_names[true_regnum (base)]);
1064 break;
1065
1066 case REG:
1067 case SUBREG:
1068 {
1069 int base_num = true_regnum (base);
1070 int index_num = true_regnum (index);
1071
1072 /* If base or index is R0, make sure that it comes first.
1073 Usually one of them will be R0, but the order might be wrong.
1074 If neither base nor index are R0 it's an error and we just
1075 pass it on to the assembler. This avoids silent wrong code
1076 bugs. */
1077 if (base_num == 0 && index_num != 0)
1078 std::swap (base_num, index_num);
1079
1080 fprintf (stream, "@(%s,%s)", reg_names[index_num],
1081 reg_names[base_num]);
1082 break;
1083 }
1084
1085 default:
1086 gcc_unreachable ();
1087 }
1088 }
1089 break;
1090
1091 case PRE_DEC:
1092 fprintf (stream, "@-%s", reg_names[true_regnum (XEXP (x, 0))]);
1093 break;
1094
1095 case POST_INC:
1096 fprintf (stream, "@%s+", reg_names[true_regnum (XEXP (x, 0))]);
1097 break;
1098
1099 default:
1100 x = mark_constant_pool_use (x);
1101 output_addr_const (stream, x);
1102 break;
1103 }
1104 }
1105
1106 /* Print operand x (an rtx) in assembler syntax to file stream
1107 according to modifier code.
1108
1109 '.' print a .s if insn needs delay slot
1110 ',' print LOCAL_LABEL_PREFIX
1111 '@' print trap, rte or rts depending upon pragma interruptness
1112 '#' output a nop if there is nothing to put in the delay slot
1113 ''' print likelihood suffix (/u for unlikely).
1114 '>' print branch target if -fverbose-asm
1115 'O' print a constant without the #
1116 'R' print the LSW of a dp value - changes if in little endian
1117 'S' print the MSW of a dp value - changes if in little endian
1118 'T' print the next word of a dp value - same as 'R' in big endian mode.
1119 'M' print .b / .w / .l / .s / .d suffix if operand is a MEM.
1120 'N' print 'r63' if the operand is (const_int 0).
1121 'd' print a V2SF reg as dN instead of fpN.
1122 'm' print a pair `base,offset' or `base,index', for LD and ST.
1123 'U' Likewise for {LD,ST}{HI,LO}.
1124 'V' print the position of a single bit set.
1125 'W' print the position of a single bit cleared.
1126 't' print a memory address which is a register.
1127 'u' prints the lowest 16 bits of CONST_INT, as an unsigned value.
1128 'o' output an operator. */
1129 static void
1130 sh_print_operand (FILE *stream, rtx x, int code)
1131 {
1132 int regno;
1133 machine_mode mode;
1134
1135 switch (code)
1136 {
1137 tree trapa_attr;
1138
1139 case '.':
1140 if (final_sequence
1141 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
1142 && get_attr_length (final_sequence->insn (1)))
1143 fprintf (stream, ASSEMBLER_DIALECT ? "/s" : ".s");
1144 break;
1145 case ',':
1146 fprintf (stream, "%s", LOCAL_LABEL_PREFIX);
1147 break;
1148 case '@':
1149 trapa_attr = lookup_attribute ("trap_exit",
1150 DECL_ATTRIBUTES (current_function_decl));
1151 if (trapa_attr)
1152 fprintf (stream, "trapa #%ld",
1153 (long) TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (trapa_attr))));
1154 else if (sh_cfun_interrupt_handler_p ())
1155 {
1156 if (sh_cfun_resbank_handler_p ())
1157 fprintf (stream, "resbank\n");
1158 fprintf (stream, "rte");
1159 }
1160 else
1161 fprintf (stream, "rts");
1162 break;
1163 case '#':
1164 /* Output a nop if there's nothing in the delay slot. */
1165 if (dbr_sequence_length () == 0)
1166 fprintf (stream, "\n\tnop");
1167 break;
1168 case '\'':
1169 {
1170 rtx note = find_reg_note (current_output_insn, REG_BR_PROB, 0);
1171
1172 if (note
1173 && profile_probability::from_reg_br_prob_note (XINT (note, 0))
1174 < profile_probability::even ())
1175 fputs ("/u", stream);
1176 break;
1177 }
1178 case '>':
1179 if (flag_verbose_asm && JUMP_LABEL (current_output_insn))
1180 {
1181 fputs ("\t! target: ", stream);
1182 output_addr_const (stream, JUMP_LABEL (current_output_insn));
1183 }
1184 break;
1185 case 'O':
1186 x = mark_constant_pool_use (x);
1187 output_addr_const (stream, x);
1188 break;
1189 /* N.B.: %R / %S / %T adjust memory addresses by four.
1190 While they can be used to access 64 bit parts of a larger value
1191 held in general purpose registers, that won't work with memory -
1192 neither for fp registers, since the frxx names are used. */
1193 case 'R':
1194 if (REG_P (x) || GET_CODE (x) == SUBREG)
1195 {
1196 regno = true_regnum (x);
1197 regno += FP_REGISTER_P (regno) ? 1 : SH_REG_LSW_OFFSET;
1198 fputs (reg_names[regno], (stream));
1199 }
1200 else if (MEM_P (x))
1201 {
1202 x = adjust_address (x, SImode, 4 * SH_REG_LSW_OFFSET);
1203 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1204 }
1205 else
1206 {
1207 rtx sub = NULL_RTX;
1208
1209 mode = GET_MODE (x);
1210 if (mode == VOIDmode)
1211 mode = DImode;
1212 if (GET_MODE_SIZE (mode) >= 8)
1213 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_LSW_OFFSET);
1214 if (sub)
1215 sh_print_operand (stream, sub, 0);
1216 else
1217 output_operand_lossage ("invalid operand to %%R");
1218 }
1219 break;
1220 case 'S':
1221 if (REG_P (x) || GET_CODE (x) == SUBREG)
1222 {
1223 regno = true_regnum (x);
1224 regno += FP_REGISTER_P (regno) ? 0 : SH_REG_MSW_OFFSET;
1225 fputs (reg_names[regno], (stream));
1226 }
1227 else if (MEM_P (x))
1228 {
1229 x = adjust_address (x, SImode, 4 * SH_REG_MSW_OFFSET);
1230 sh_print_operand_address (stream, GET_MODE (x), XEXP (x, 0));
1231 }
1232 else
1233 {
1234 rtx sub = NULL_RTX;
1235
1236 mode = GET_MODE (x);
1237 if (mode == VOIDmode)
1238 mode = DImode;
1239 if (GET_MODE_SIZE (mode) >= 8)
1240 sub = simplify_subreg (SImode, x, mode, 4 * SH_REG_MSW_OFFSET);
1241 if (sub)
1242 sh_print_operand (stream, sub, 0);
1243 else
1244 output_operand_lossage ("invalid operand to %%S");
1245 }
1246 break;
1247 case 'T':
1248 /* Next word of a double. */
1249 switch (GET_CODE (x))
1250 {
1251 case REG:
1252 fputs (reg_names[REGNO (x) + 1], (stream));
1253 break;
1254 case MEM:
1255 {
1256 machine_mode mode = GET_MODE (x);
1257 if (GET_CODE (XEXP (x, 0)) != PRE_DEC
1258 && GET_CODE (XEXP (x, 0)) != POST_INC)
1259 x = adjust_address (x, SImode, 4);
1260 sh_print_operand_address (stream, mode, XEXP (x, 0));
1261 }
1262 break;
1263 default:
1264 break;
1265 }
1266 break;
1267
1268 case 't':
1269 gcc_assert (MEM_P (x));
1270 x = XEXP (x, 0);
1271 switch (GET_CODE (x))
1272 {
1273 case REG:
1274 case SUBREG:
1275 sh_print_operand (stream, x, 0);
1276 break;
1277 default:
1278 break;
1279 }
1280 break;
1281
1282 case 'o':
1283 switch (GET_CODE (x))
1284 {
1285 case PLUS: fputs ("add", stream); break;
1286 case MINUS: fputs ("sub", stream); break;
1287 case MULT: fputs ("mul", stream); break;
1288 case DIV: fputs ("div", stream); break;
1289 case EQ: fputs ("eq", stream); break;
1290 case NE: fputs ("ne", stream); break;
1291 case GT: case LT: fputs ("gt", stream); break;
1292 case GE: case LE: fputs ("ge", stream); break;
1293 case GTU: case LTU: fputs ("gtu", stream); break;
1294 case GEU: case LEU: fputs ("geu", stream); break;
1295 default:
1296 break;
1297 }
1298 break;
1299 case 'M':
1300 if (MEM_P (x))
1301 {
1302 switch (GET_MODE (x))
1303 {
1304 case E_QImode: fputs (".b", stream); break;
1305 case E_HImode: fputs (".w", stream); break;
1306 case E_SImode: fputs (".l", stream); break;
1307 case E_SFmode: fputs (".s", stream); break;
1308 case E_DFmode: fputs (".d", stream); break;
1309 default: gcc_unreachable ();
1310 }
1311 }
1312 break;
1313
1314 case 'm':
1315 gcc_assert (MEM_P (x));
1316 x = XEXP (x, 0);
1317 /* Fall through. */
1318 case 'U':
1319 switch (GET_CODE (x))
1320 {
1321 case REG:
1322 case SUBREG:
1323 sh_print_operand (stream, x, 0);
1324 fputs (", 0", stream);
1325 break;
1326
1327 case PLUS:
1328 sh_print_operand (stream, XEXP (x, 0), 0);
1329 fputs (", ", stream);
1330 sh_print_operand (stream, XEXP (x, 1), 0);
1331 break;
1332
1333 default:
1334 gcc_unreachable ();
1335 }
1336 break;
1337
1338 case 'V':
1339 {
1340 int num = exact_log2 (INTVAL (x));
1341 gcc_assert (num >= 0);
1342 fprintf (stream, "#%d", num);
1343 }
1344 break;
1345
1346 case 'W':
1347 {
1348 int num = exact_log2 (~INTVAL (x));
1349 gcc_assert (num >= 0);
1350 fprintf (stream, "#%d", num);
1351 }
1352 break;
1353
1354 case 'd':
1355 gcc_assert (REG_P (x) && GET_MODE (x) == V2SFmode);
1356
1357 fprintf ((stream), "d%s", reg_names[REGNO (x)] + 1);
1358 break;
1359
1360 case 'N':
1361 if (x == CONST0_RTX (GET_MODE (x)))
1362 {
1363 fprintf ((stream), "r63");
1364 break;
1365 }
1366 goto default_output;
1367 case 'u':
1368 if (CONST_INT_P (x))
1369 {
1370 fprintf ((stream), "%u", (unsigned) INTVAL (x) & (0x10000 - 1));
1371 break;
1372 }
1373 /* Fall through. */
1374
1375 default_output:
1376 default:
1377 regno = 0;
1378 mode = GET_MODE (x);
1379
1380 switch (GET_CODE (x))
1381 {
1382 case TRUNCATE:
1383 {
1384 rtx inner = XEXP (x, 0);
1385 int offset = 0;
1386 machine_mode inner_mode;
1387
1388 /* We might see SUBREGs with vector mode registers inside. */
1389 if (GET_CODE (inner) == SUBREG
1390 && (GET_MODE_SIZE (GET_MODE (inner))
1391 == GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1392 && subreg_lowpart_p (inner))
1393 inner = SUBREG_REG (inner);
1394 if (CONST_INT_P (inner))
1395 {
1396 x = GEN_INT (trunc_int_for_mode (INTVAL (inner), GET_MODE (x)));
1397 goto default_output;
1398 }
1399 inner_mode = GET_MODE (inner);
1400 if (GET_CODE (inner) == SUBREG
1401 && (GET_MODE_SIZE (GET_MODE (inner))
1402 < GET_MODE_SIZE (GET_MODE (SUBREG_REG (inner))))
1403 && REG_P (SUBREG_REG (inner)))
1404 {
1405 offset = subreg_regno_offset (REGNO (SUBREG_REG (inner)),
1406 GET_MODE (SUBREG_REG (inner)),
1407 SUBREG_BYTE (inner),
1408 GET_MODE (inner));
1409 inner = SUBREG_REG (inner);
1410 }
1411 if (!REG_P (inner) || GET_MODE_SIZE (inner_mode) > 8)
1412 abort ();
1413 /* Floating point register pairs are always big endian;
1414 general purpose registers are 64 bit wide. */
1415 regno = REGNO (inner);
1416 regno = (hard_regno_nregs (regno, inner_mode)
1417 - hard_regno_nregs (regno, mode))
1418 + offset;
1419 x = inner;
1420 goto reg;
1421 }
1422 case SIGN_EXTEND:
1423 x = XEXP (x, 0);
1424 goto reg;
1425 case SUBREG:
1426 gcc_assert (SUBREG_BYTE (x) == 0
1427 && REG_P (SUBREG_REG (x)));
1428
1429 x = SUBREG_REG (x);
1430 /* Fall through. */
1431
1432 reg:
1433 case REG:
1434 regno += REGNO (x);
1435 if (FP_REGISTER_P (regno)
1436 && mode == V16SFmode)
1437 fprintf ((stream), "mtrx%s", reg_names[regno] + 2);
1438 else if (FP_REGISTER_P (REGNO (x))
1439 && mode == V4SFmode)
1440 fprintf ((stream), "fv%s", reg_names[regno] + 2);
1441 else if (REG_P (x)
1442 && mode == V2SFmode)
1443 fprintf ((stream), "fp%s", reg_names[regno] + 2);
1444 else if (FP_REGISTER_P (REGNO (x))
1445 && GET_MODE_SIZE (mode) > 4)
1446 fprintf ((stream), "d%s", reg_names[regno] + 1);
1447 else
1448 fputs (reg_names[regno], (stream));
1449 break;
1450
1451 case MEM:
1452 output_address (GET_MODE (x), XEXP (x, 0));
1453 break;
1454
1455 default:
1456 fputc ('#', stream);
1457 output_addr_const (stream, x);
1458 break;
1459 }
1460 break;
1461 }
1462 }
1463
1464 static bool
1465 sh_print_operand_punct_valid_p (unsigned char code)
1466 {
1467 return (code == '.' || code == '#' || code == '@' || code == ','
1468 || code == '$' || code == '\'' || code == '>');
1469 }
1470
1471 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
1472 static bool
1473 sh_asm_output_addr_const_extra (FILE *file, rtx x)
1474 {
1475 if (GET_CODE (x) == UNSPEC)
1476 {
1477 switch (XINT (x, 1))
1478 {
1479 case UNSPEC_PIC:
1480 /* GLOBAL_OFFSET_TABLE or local symbols, no suffix. */
1481 output_addr_const (file, XVECEXP (x, 0, 0));
1482 break;
1483 case UNSPEC_GOT:
1484 output_addr_const (file, XVECEXP (x, 0, 0));
1485 fputs ("@GOT", file);
1486 break;
1487 case UNSPEC_GOTOFF:
1488 output_addr_const (file, XVECEXP (x, 0, 0));
1489 fputs ("@GOTOFF", file);
1490 break;
1491 case UNSPEC_PLT:
1492 output_addr_const (file, XVECEXP (x, 0, 0));
1493 fputs ("@PLT", file);
1494 break;
1495 case UNSPEC_GOTPLT:
1496 output_addr_const (file, XVECEXP (x, 0, 0));
1497 fputs ("@GOTPLT", file);
1498 break;
1499 case UNSPEC_PCREL:
1500 output_addr_const (file, XVECEXP (x, 0, 0));
1501 fputs ("@PCREL", file);
1502 break;
1503 case UNSPEC_DTPOFF:
1504 output_addr_const (file, XVECEXP (x, 0, 0));
1505 fputs ("@DTPOFF", file);
1506 break;
1507 case UNSPEC_GOTTPOFF:
1508 output_addr_const (file, XVECEXP (x, 0, 0));
1509 fputs ("@GOTTPOFF", file);
1510 break;
1511 case UNSPEC_TPOFF:
1512 output_addr_const (file, XVECEXP (x, 0, 0));
1513 fputs ("@TPOFF", file);
1514 break;
1515 case UNSPEC_CALLER:
1516 {
1517 char name[32];
1518 /* LPCS stands for Label for PIC Call Site. */
1519 targetm.asm_out.generate_internal_label (name, "LPCS",
1520 INTVAL (XVECEXP (x, 0, 0)));
1521 assemble_name (file, name);
1522 }
1523 break;
1524 case UNSPEC_SYMOFF:
1525 output_addr_const (file, XVECEXP (x, 0, 0));
1526 fputc ('-', file);
1527 if (GET_CODE (XVECEXP (x, 0, 1)) == CONST)
1528 {
1529 fputc ('(', file);
1530 output_addr_const (file, XVECEXP (x, 0, 1));
1531 fputc (')', file);
1532 }
1533 else
1534 output_addr_const (file, XVECEXP (x, 0, 1));
1535 break;
1536 case UNSPEC_PCREL_SYMOFF:
1537 output_addr_const (file, XVECEXP (x, 0, 0));
1538 fputs ("-(", file);
1539 output_addr_const (file, XVECEXP (x, 0, 1));
1540 fputs ("-.)", file);
1541 break;
1542 case UNSPEC_GOTFUNCDESC:
1543 output_addr_const (file, XVECEXP (x, 0, 0));
1544 fputs ("@GOTFUNCDESC", file);
1545 break;
1546 case UNSPEC_GOTOFFFUNCDESC:
1547 output_addr_const (file, XVECEXP (x, 0, 0));
1548 fputs ("@GOTOFFFUNCDESC", file);
1549 break;
1550 default:
1551 return false;
1552 }
1553 return true;
1554 }
1555 else
1556 return false;
1557 }
1558 \f
1559 /* Encode symbol attributes of a SYMBOL_REF into its
1560 SYMBOL_REF_FLAGS. */
1561 static void
1562 sh_encode_section_info (tree decl, rtx rtl, int first)
1563 {
1564 default_encode_section_info (decl, rtl, first);
1565
1566 if (TREE_CODE (decl) == FUNCTION_DECL
1567 && sh2a_function_vector_p (decl) && TARGET_SH2A)
1568 SYMBOL_REF_FLAGS (XEXP (rtl, 0)) |= SYMBOL_FLAG_FUNCVEC_FUNCTION;
1569 }
1570
1571 /* Prepare operands for a move define_expand; specifically, one of the
1572 operands must be in a register. */
1573 void
1574 prepare_move_operands (rtx operands[], machine_mode mode)
1575 {
1576 if ((mode == SImode || mode == DImode)
1577 && flag_pic
1578 && ! ((mode == Pmode || mode == ptr_mode)
1579 && tls_symbolic_operand (operands[1], Pmode) != TLS_MODEL_NONE))
1580 {
1581 rtx temp;
1582 if (SYMBOLIC_CONST_P (operands[1]))
1583 {
1584 if (MEM_P (operands[0]))
1585 operands[1] = force_reg (Pmode, operands[1]);
1586 else
1587 {
1588 temp = (!can_create_pseudo_p ()
1589 ? operands[0]
1590 : gen_reg_rtx (Pmode));
1591 operands[1] = legitimize_pic_address (operands[1], mode, temp);
1592 }
1593 }
1594 else if (GET_CODE (operands[1]) == CONST
1595 && GET_CODE (XEXP (operands[1], 0)) == PLUS
1596 && SYMBOLIC_CONST_P (XEXP (XEXP (operands[1], 0), 0)))
1597 {
1598 temp = !can_create_pseudo_p () ? operands[0] : gen_reg_rtx (Pmode);
1599 temp = legitimize_pic_address (XEXP (XEXP (operands[1], 0), 0),
1600 mode, temp);
1601 operands[1] = expand_binop (mode, add_optab, temp,
1602 XEXP (XEXP (operands[1], 0), 1),
1603 (!can_create_pseudo_p ()
1604 ? temp
1605 : gen_reg_rtx (Pmode)),
1606 0, OPTAB_LIB_WIDEN);
1607 }
1608 }
1609
1610 if (! reload_in_progress && ! reload_completed)
1611 {
1612 /* Copy the source to a register if both operands aren't registers. */
1613 if (! register_operand (operands[0], mode)
1614 && ! register_operand (operands[1], mode))
1615 operands[1] = copy_to_mode_reg (mode, operands[1]);
1616
1617 if (MEM_P (operands[0]) && ! memory_operand (operands[0], mode))
1618 {
1619 /* This is like change_address_1 (operands[0], mode, 0, 1) ,
1620 except that we can't use that function because it is static. */
1621 rtx new_rtx = change_address (operands[0], mode, 0);
1622 MEM_COPY_ATTRIBUTES (new_rtx, operands[0]);
1623 operands[0] = new_rtx;
1624 }
1625
1626 /* This case can happen while generating code to move the result
1627 of a library call to the target. Reject `st r0,@(rX,rY)' because
1628 reload will fail to find a spill register for rX, since r0 is already
1629 being used for the source. */
1630 else if (refers_to_regno_p (R0_REG, operands[1])
1631 && MEM_P (operands[0])
1632 && GET_CODE (XEXP (operands[0], 0)) == PLUS
1633 && REG_P (XEXP (XEXP (operands[0], 0), 1)))
1634 operands[1] = copy_to_mode_reg (mode, operands[1]);
1635
1636 /* When the displacement addressing is used, RA will assign r0 to
1637 the pseudo register operand for the QI/HImode load/store.
1638 This tends to make a long live range for R0 and might cause
1639 anomalous register spills in some case with LRA. See PR
1640 target/55212.
1641 We split possible load/store to two move insns via r0 so as to
1642 shorten R0 live range. It will make some codes worse but will
1643 win on average for LRA.
1644 Also when base+index addressing is used and the index term is
1645 a subreg, LRA assumes that more hard registers can be available
1646 in some situation. It isn't the case for SH in the problematic
1647 case. We can pre-allocate R0 for that index term to avoid
1648 the issue. See PR target/66591. */
1649 else if (sh_lra_p ()
1650 && ! TARGET_SH2A
1651 && ((REG_P (operands[0]) && MEM_P (operands[1]))
1652 || (REG_P (operands[1]) && MEM_P (operands[0]))))
1653 {
1654 bool load_p = REG_P (operands[0]);
1655 rtx reg = operands[load_p ? 0 : 1];
1656 rtx adr = XEXP (operands[load_p ? 1 : 0], 0);
1657
1658 if ((mode == QImode || mode == HImode)
1659 && REGNO (reg) >= FIRST_PSEUDO_REGISTER
1660 && GET_CODE (adr) == PLUS
1661 && REG_P (XEXP (adr, 0))
1662 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1663 && CONST_INT_P (XEXP (adr, 1))
1664 && INTVAL (XEXP (adr, 1)) != 0
1665 && sh_legitimate_index_p (mode, XEXP (adr, 1), false, true))
1666 {
1667 rtx r0_rtx = gen_rtx_REG (mode, R0_REG);
1668 emit_move_insn (r0_rtx, operands[1]);
1669 operands[1] = r0_rtx;
1670 }
1671 if (REGNO (reg) >= FIRST_PSEUDO_REGISTER
1672 && GET_CODE (adr) == PLUS
1673 && REG_P (XEXP (adr, 0))
1674 && (REGNO (XEXP (adr, 0)) >= FIRST_PSEUDO_REGISTER)
1675 && SUBREG_P (XEXP (adr, 1))
1676 && REG_P (SUBREG_REG (XEXP (adr, 1))))
1677 {
1678 rtx r0_rtx = gen_rtx_REG (GET_MODE (XEXP (adr, 1)), R0_REG);
1679 emit_move_insn (r0_rtx, XEXP (adr, 1));
1680 XEXP (adr, 1) = r0_rtx;
1681 }
1682 }
1683 }
1684
1685 if (mode == Pmode || mode == ptr_mode)
1686 {
1687 rtx op0 = operands[0];
1688 rtx op1 = operands[1];
1689 rtx opc;
1690 if (GET_CODE (op1) == CONST
1691 && GET_CODE (XEXP (op1, 0)) == PLUS
1692 && (tls_symbolic_operand (XEXP (XEXP (op1, 0), 0), Pmode)
1693 != TLS_MODEL_NONE))
1694 {
1695 opc = XEXP (XEXP (op1, 0), 1);
1696 op1 = XEXP (XEXP (op1, 0), 0);
1697 }
1698 else
1699 opc = NULL_RTX;
1700
1701 enum tls_model tls_kind;
1702
1703 if (! reload_in_progress && ! reload_completed
1704 && (tls_kind = tls_symbolic_operand (op1, Pmode)) != TLS_MODEL_NONE)
1705 {
1706 rtx tga_op1, tga_ret, tmp, tmp2;
1707
1708 if (! flag_pic
1709 && (tls_kind == TLS_MODEL_GLOBAL_DYNAMIC
1710 || tls_kind == TLS_MODEL_LOCAL_DYNAMIC
1711 || tls_kind == TLS_MODEL_INITIAL_EXEC))
1712 {
1713 static int got_labelno;
1714 /* Don't schedule insns for getting GOT address when
1715 the first scheduling is enabled, to avoid spill
1716 failures for R0. */
1717 if (flag_schedule_insns)
1718 emit_insn (gen_blockage ());
1719 emit_insn (gen_GOTaddr2picreg (GEN_INT (++got_labelno)));
1720 emit_use (gen_rtx_REG (SImode, PIC_REG));
1721 if (flag_schedule_insns)
1722 emit_insn (gen_blockage ());
1723 }
1724
1725 switch (tls_kind)
1726 {
1727 case TLS_MODEL_GLOBAL_DYNAMIC:
1728 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1729 if (TARGET_FDPIC)
1730 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1731 sh_get_fdpic_reg_initial_val ());
1732 emit_call_insn (gen_tls_global_dynamic (tga_ret, op1));
1733 tmp = gen_reg_rtx (Pmode);
1734 emit_move_insn (tmp, tga_ret);
1735 op1 = tmp;
1736 break;
1737
1738 case TLS_MODEL_LOCAL_DYNAMIC:
1739 tga_ret = gen_rtx_REG (Pmode, R0_REG);
1740 if (TARGET_FDPIC)
1741 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1742 sh_get_fdpic_reg_initial_val ());
1743 emit_call_insn (gen_tls_local_dynamic (tga_ret, op1));
1744
1745 tmp = gen_reg_rtx (Pmode);
1746 emit_move_insn (tmp, tga_ret);
1747
1748 if (register_operand (op0, Pmode))
1749 tmp2 = op0;
1750 else
1751 tmp2 = gen_reg_rtx (Pmode);
1752
1753 emit_insn (gen_symDTPOFF2reg (tmp2, op1, tmp));
1754 op1 = tmp2;
1755 break;
1756
1757 case TLS_MODEL_INITIAL_EXEC:
1758 tga_op1 = !can_create_pseudo_p () ? op0 : gen_reg_rtx (Pmode);
1759 tmp = gen_sym2GOTTPOFF (op1);
1760 if (TARGET_FDPIC)
1761 emit_move_insn (gen_rtx_REG (Pmode, PIC_REG),
1762 sh_get_fdpic_reg_initial_val ());
1763 emit_insn (gen_tls_initial_exec (tga_op1, tmp));
1764 op1 = tga_op1;
1765 break;
1766
1767 case TLS_MODEL_LOCAL_EXEC:
1768 tmp2 = gen_reg_rtx (Pmode);
1769 emit_insn (gen_store_gbr (tmp2));
1770 tmp = gen_reg_rtx (Pmode);
1771 emit_insn (gen_symTPOFF2reg (tmp, op1));
1772
1773 if (register_operand (op0, Pmode))
1774 op1 = op0;
1775 else
1776 op1 = gen_reg_rtx (Pmode);
1777
1778 emit_insn (gen_addsi3 (op1, tmp, tmp2));
1779 break;
1780
1781 default:
1782 gcc_unreachable ();
1783 }
1784 if (opc)
1785 emit_insn (gen_addsi3 (op1, op1, force_reg (SImode, opc)));
1786 operands[1] = op1;
1787 }
1788 }
1789
1790 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
1791 {
1792 rtx base, offset;
1793 split_const (operands[1], &base, &offset);
1794
1795 if (GET_CODE (base) == SYMBOL_REF
1796 && !offset_within_block_p (base, INTVAL (offset)))
1797 {
1798 rtx tmp = can_create_pseudo_p () ? gen_reg_rtx (mode) : operands[0];
1799 emit_move_insn (tmp, base);
1800 if (!arith_operand (offset, mode))
1801 offset = force_reg (mode, offset);
1802 emit_insn (gen_add3_insn (operands[0], tmp, offset));
1803 }
1804 }
1805 }
1806
1807 /* Implement the canonicalize_comparison target hook for the combine
1808 pass. For the target hook this function is invoked via
1809 sh_canonicalize_comparison. This function is also re-used to
1810 canonicalize comparisons in cbranch pattern expanders. */
1811 static void
1812 sh_canonicalize_comparison (enum rtx_code& cmp, rtx& op0, rtx& op1,
1813 machine_mode mode,
1814 bool op0_preserve_value)
1815 {
1816 /* When invoked from within the combine pass the mode is not specified,
1817 so try to get it from one of the operands. */
1818 if (mode == VOIDmode)
1819 mode = GET_MODE (op0);
1820 if (mode == VOIDmode)
1821 mode = GET_MODE (op1);
1822
1823 // We need to have a mode to do something useful here.
1824 if (mode == VOIDmode)
1825 return;
1826
1827 // Currently, we don't deal with floats here.
1828 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
1829 return;
1830
1831 // Make sure that the constant operand is the second operand.
1832 if (CONST_INT_P (op0) && !CONST_INT_P (op1))
1833 {
1834 if (op0_preserve_value)
1835 return;
1836
1837 std::swap (op0, op1);
1838 cmp = swap_condition (cmp);
1839 }
1840
1841 if (CONST_INT_P (op1))
1842 {
1843 /* Try to adjust the constant operand in such a way that available
1844 comparison insns can be utilized better and the constant can be
1845 loaded with a 'mov #imm,Rm' insn. This avoids a load from the
1846 constant pool. */
1847 const HOST_WIDE_INT val = INTVAL (op1);
1848
1849 /* x > -1 --> x >= 0
1850 x > 0xFFFFFF7F --> x >= 0xFFFFFF80
1851 x <= -1 --> x < 0
1852 x <= 0xFFFFFF7F --> x < 0xFFFFFF80 */
1853 if ((val == -1 || val == -0x81) && (cmp == GT || cmp == LE))
1854 {
1855 cmp = cmp == GT ? GE : LT;
1856 op1 = gen_int_mode (val + 1, mode);
1857 }
1858
1859 /* x >= 1 --> x > 0
1860 x >= 0x80 --> x > 0x7F
1861 x < 1 --> x <= 0
1862 x < 0x80 --> x <= 0x7F */
1863 else if ((val == 1 || val == 0x80) && (cmp == GE || cmp == LT))
1864 {
1865 cmp = cmp == GE ? GT : LE;
1866 op1 = gen_int_mode (val - 1, mode);
1867 }
1868
1869 /* unsigned x >= 1 --> x != 0
1870 unsigned x < 1 --> x == 0 */
1871 else if (val == 1 && (cmp == GEU || cmp == LTU))
1872 {
1873 cmp = cmp == GEU ? NE : EQ;
1874 op1 = CONST0_RTX (mode);
1875 }
1876
1877 /* unsigned x >= 0x80 --> unsigned x > 0x7F
1878 unsigned x < 0x80 --> unsigned x < 0x7F */
1879 else if (val == 0x80 && (cmp == GEU || cmp == LTU))
1880 {
1881 cmp = cmp == GEU ? GTU : LEU;
1882 op1 = gen_int_mode (val - 1, mode);
1883 }
1884
1885 /* unsigned x > 0 --> x != 0
1886 unsigned x <= 0 --> x == 0 */
1887 else if (val == 0 && (cmp == GTU || cmp == LEU))
1888 cmp = cmp == GTU ? NE : EQ;
1889
1890 /* unsigned x > 0x7FFFFFFF --> signed x < 0
1891 unsigned x <= 0x7FFFFFFF --> signed x >= 0 */
1892 else if (mode == SImode && (cmp == GTU || cmp == LEU)
1893 && val == 0x7FFFFFFF)
1894 {
1895 cmp = cmp == GTU ? LT : GE;
1896 op1 = const0_rtx;
1897 }
1898
1899 /* unsigned x >= 0x80000000 --> signed x < 0
1900 unsigned x < 0x80000000 --> signed x >= 0 */
1901 else if (mode == SImode && (cmp == GEU || cmp == LTU)
1902 && (unsigned HOST_WIDE_INT)val
1903 == ((unsigned HOST_WIDE_INT)0x7FFFFFFF + 1))
1904 {
1905 cmp = cmp == GEU ? LT : GE;
1906 op1 = const0_rtx;
1907 }
1908 }
1909 }
1910
1911 /* This function implements the canonicalize_comparison target hook.
1912 This wrapper around the internally used sh_canonicalize_comparison
1913 function is needed to do the enum rtx_code <-> int conversion.
1914 Target hooks cannot use enum rtx_code in its definition. */
1915 static void
1916 sh_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
1917 bool op0_preserve_value)
1918 {
1919 enum rtx_code tmp_code = (enum rtx_code)*code;
1920 sh_canonicalize_comparison (tmp_code, *op0, *op1,
1921 VOIDmode, op0_preserve_value);
1922 *code = (int)tmp_code;
1923 }
1924
1925 /* This function implements the legitimate_combined_insn target hook,
1926 which the combine pass uses to early reject combined insns, before
1927 it tries to recog the insn and determine its cost. */
1928 static bool
1929 sh_legitimate_combined_insn (rtx_insn* insn)
1930 {
1931 /* Reject combinations of memory loads and zero extensions, as these
1932 interfere with other combine patterns such as zero extracts and bit
1933 tests. The SH2A movu.{b|w} insns are formed later in the
1934 'sh_optimize_extu_exts' pass after combine/split1. */
1935 rtx p = PATTERN (insn);
1936 if (GET_CODE (p) == SET
1937 && REG_P (XEXP (p, 0)) && GET_MODE (XEXP (p, 0)) == SImode
1938 && GET_CODE (XEXP (p, 1)) == ZERO_EXTEND
1939 && MEM_P (XEXP (XEXP (p, 1), 0)))
1940 return false;
1941
1942 return true;
1943 }
1944
1945 bool
1946 sh_fixed_condition_code_regs (unsigned int* p1, unsigned int* p2)
1947 {
1948 *p1 = T_REG;
1949 *p2 = INVALID_REGNUM;
1950 return true;
1951 }
1952
1953 /* Try to calculate the branch distance of a conditional branch in bytes.
1954
1955 FIXME: Because of PR 59189 we can't use the CFG here. Instead just
1956 walk from this insn into the next (fall-through) basic block and see if
1957 we hit the label. */
1958 unsigned int
1959 sh_cbranch_distance (rtx_insn* _cbranch_insn, unsigned int max_dist)
1960 {
1961 rtx_jump_insn* cbranch_insn = safe_as_a<rtx_jump_insn*> (_cbranch_insn);
1962
1963 if (dump_file)
1964 {
1965 fprintf (dump_file, "sh_cbranch_distance insn = \n");
1966 print_rtl_single (dump_file, cbranch_insn);
1967 }
1968
1969 unsigned int dist = 0;
1970
1971 for (rtx_insn* i = next_nonnote_insn (cbranch_insn);
1972 i != NULL && dist < max_dist; i = next_nonnote_insn (i))
1973 {
1974 const unsigned int i_len = get_attr_length (i);
1975 dist += i_len;
1976
1977 if (dump_file)
1978 fprintf (dump_file, " insn %d length = %u dist = %u\n",
1979 INSN_UID (i), i_len, dist);
1980
1981 if (rtx_code_label* l = dyn_cast<rtx_code_label*> (i))
1982 {
1983 if (l == cbranch_insn->jump_target ())
1984 {
1985 if (dump_file)
1986 fprintf (dump_file, " cbranch dist = %u\n", dist);
1987 return dist;
1988 }
1989 break;
1990 }
1991 }
1992
1993 if (dump_file)
1994 fprintf (dump_file, " cbranch dist = unknown\n");
1995
1996 return unknown_cbranch_distance;
1997 }
1998
1999 enum rtx_code
2000 prepare_cbranch_operands (rtx *operands, machine_mode mode,
2001 enum rtx_code comparison)
2002 {
2003 gcc_assert (can_create_pseudo_p ());
2004
2005 if (comparison == LAST_AND_UNUSED_RTX_CODE)
2006 comparison = GET_CODE (operands[0]);
2007
2008 sh_canonicalize_comparison (comparison, operands[1], operands[2],
2009 mode, false);
2010
2011 rtx op1 = operands[1];
2012 operands[1] = force_reg (mode, op1);
2013
2014 /* When we are handling DImode comparisons, we want to keep constants so
2015 that we can optimize the component comparisons; however, memory loads
2016 are better issued as a whole so that they can be scheduled well.
2017 SImode equality comparisons allow I08 constants, but only when they
2018 compare r0. Hence, if operands[1] has to be loaded from somewhere else
2019 into a register, that register might as well be r0, and we allow the
2020 constant. If it is already in a register, this is likely to be
2021 allocated to a different hard register, thus we load the constant into
2022 a register unless it is zero. */
2023 if (!REG_P (operands[2])
2024 && (!CONST_INT_P (operands[2])
2025 || (mode == SImode && operands[2] != CONST0_RTX (SImode)
2026 && ((comparison != EQ && comparison != NE)
2027 || (REG_P (op1) && REGNO (op1) != R0_REG)
2028 || !satisfies_constraint_I08 (operands[2])))))
2029 operands[2] = force_reg (mode, operands[2]);
2030
2031 return comparison;
2032 }
2033
2034 static void
2035 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison,
2036 profile_probability probability)
2037 {
2038 rtx (*branch_expander) (rtx) = gen_branch_true;
2039 comparison = prepare_cbranch_operands (operands, SImode, comparison);
2040 switch (comparison)
2041 {
2042 case NE: case LT: case LE: case LTU: case LEU:
2043 comparison = reverse_condition (comparison);
2044 branch_expander = gen_branch_false;
2045 default: ;
2046 }
2047 emit_insn (gen_rtx_SET (get_t_reg_rtx (),
2048 gen_rtx_fmt_ee (comparison, SImode,
2049 operands[1], operands[2])));
2050 rtx_insn *jump = emit_jump_insn (branch_expander (operands[3]));
2051 if (probability.initialized_p ())
2052 add_reg_br_prob_note (jump, probability);
2053 }
2054
2055 void
2056 expand_cbranchsi4 (rtx *operands, enum rtx_code comparison)
2057 {
2058 expand_cbranchsi4 (operands, comparison,
2059 profile_probability::uninitialized ());
2060 }
2061
2062 /* ??? How should we distribute probabilities when more than one branch
2063 is generated. So far we only have some ad-hoc observations:
2064 - If the operands are random, they are likely to differ in both parts.
2065 - If comparing items in a hash chain, the operands are random or equal;
2066 operation should be EQ or NE.
2067 - If items are searched in an ordered tree from the root, we can expect
2068 the highpart to be unequal about half of the time; operation should be
2069 an inequality comparison, operands non-constant, and overall probability
2070 about 50%. Likewise for quicksort.
2071 - Range checks will be often made against constants. Even if we assume for
2072 simplicity an even distribution of the non-constant operand over a
2073 sub-range here, the same probability could be generated with differently
2074 wide sub-ranges - as long as the ratio of the part of the subrange that
2075 is before the threshold to the part that comes after the threshold stays
2076 the same. Thus, we can't really tell anything here;
2077 assuming random distribution is at least simple.
2078 */
2079 bool
2080 expand_cbranchdi4 (rtx *operands, enum rtx_code comparison)
2081 {
2082 enum rtx_code msw_taken, msw_skip, lsw_taken;
2083 rtx_code_label *skip_label = NULL;
2084 rtx op1h, op1l, op2h, op2l;
2085 int num_branches;
2086 profile_probability prob, rev_prob;
2087 profile_probability msw_taken_prob = profile_probability::uninitialized (),
2088 msw_skip_prob = profile_probability::uninitialized (),
2089 lsw_taken_prob = profile_probability::uninitialized ();
2090
2091 comparison = prepare_cbranch_operands (operands, DImode, comparison);
2092 op1h = gen_highpart_mode (SImode, DImode, operands[1]);
2093 op2h = gen_highpart_mode (SImode, DImode, operands[2]);
2094 op1l = gen_lowpart (SImode, operands[1]);
2095 op2l = gen_lowpart (SImode, operands[2]);
2096 msw_taken = msw_skip = lsw_taken = LAST_AND_UNUSED_RTX_CODE;
2097 prob = split_branch_probability;
2098 rev_prob = prob.invert ();
2099 switch (comparison)
2100 {
2101 case EQ:
2102 msw_skip = NE;
2103 lsw_taken = EQ;
2104 if (prob.initialized_p ())
2105 {
2106 /* FIXME: This is not optimal. We do not really know the probablity
2107 that values differ by MCW only, but we should probably distribute
2108 probabilities more evenly. */
2109 msw_skip_prob = rev_prob;
2110 lsw_taken_prob = prob > profile_probability::never ()
2111 ? profile_probability::guessed_always ()
2112 : profile_probability::guessed_never ();
2113 }
2114 break;
2115 case NE:
2116 msw_taken = NE;
2117 msw_taken_prob = prob;
2118 lsw_taken = NE;
2119 lsw_taken_prob = profile_probability::guessed_never ();
2120 break;
2121 case GTU: case GT:
2122 msw_taken = comparison;
2123 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2124 break;
2125 if (comparison != GTU || op2h != CONST0_RTX (SImode))
2126 msw_skip = swap_condition (msw_taken);
2127 lsw_taken = GTU;
2128 break;
2129 case GEU: case GE:
2130 if (op2l == CONST0_RTX (SImode))
2131 msw_taken = comparison;
2132 else
2133 {
2134 msw_taken = comparison == GE ? GT : GTU;
2135 msw_skip = swap_condition (msw_taken);
2136 lsw_taken = GEU;
2137 }
2138 break;
2139 case LTU: case LT:
2140 msw_taken = comparison;
2141 if (op2l == CONST0_RTX (SImode))
2142 break;
2143 msw_skip = swap_condition (msw_taken);
2144 lsw_taken = LTU;
2145 break;
2146 case LEU: case LE:
2147 if (CONST_INT_P (op2l) && INTVAL (op2l) == -1)
2148 msw_taken = comparison;
2149 else
2150 {
2151 lsw_taken = LEU;
2152 if (comparison == LE)
2153 msw_taken = LT;
2154 else if (op2h != CONST0_RTX (SImode))
2155 msw_taken = LTU;
2156 else
2157 {
2158 msw_skip = swap_condition (LTU);
2159 break;
2160 }
2161 msw_skip = swap_condition (msw_taken);
2162 }
2163 break;
2164 default: return false;
2165 }
2166 num_branches = ((msw_taken != LAST_AND_UNUSED_RTX_CODE)
2167 + (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2168 + (lsw_taken != LAST_AND_UNUSED_RTX_CODE));
2169 if (comparison != EQ && comparison != NE && num_branches > 1)
2170 {
2171 if (!CONSTANT_P (operands[2])
2172 && prob.initialized_p ()
2173 && prob.to_reg_br_prob_base () >= (int) (REG_BR_PROB_BASE * 3 / 8U)
2174 && prob.to_reg_br_prob_base () <= (int) (REG_BR_PROB_BASE * 5 / 8U))
2175 {
2176 msw_taken_prob = prob.apply_scale (1, 2);
2177 msw_skip_prob = rev_prob.apply_scale (REG_BR_PROB_BASE,
2178 rev_prob.to_reg_br_prob_base ()
2179 + REG_BR_PROB_BASE);
2180 lsw_taken_prob = prob;
2181 }
2182 else
2183 {
2184 msw_taken_prob = prob;
2185 msw_skip_prob = profile_probability::guessed_always ();
2186 /* ??? If we have a constant op2h, should we use that when
2187 calculating lsw_taken_prob? */
2188 lsw_taken_prob = prob;
2189 }
2190 }
2191 operands[1] = op1h;
2192 operands[2] = op2h;
2193
2194 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2195 expand_cbranchsi4 (operands, msw_taken, msw_taken_prob);
2196 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2197 {
2198 rtx taken_label = operands[3];
2199
2200 /* Operands were possibly modified, but msw_skip doesn't expect this.
2201 Always use the original ones. */
2202 if (msw_taken != LAST_AND_UNUSED_RTX_CODE)
2203 {
2204 operands[1] = op1h;
2205 operands[2] = op2h;
2206 }
2207
2208 operands[3] = skip_label = gen_label_rtx ();
2209 expand_cbranchsi4 (operands, msw_skip, msw_skip_prob);
2210 operands[3] = taken_label;
2211 }
2212 operands[1] = op1l;
2213 operands[2] = op2l;
2214 if (lsw_taken != LAST_AND_UNUSED_RTX_CODE)
2215 expand_cbranchsi4 (operands, lsw_taken, lsw_taken_prob);
2216 if (msw_skip != LAST_AND_UNUSED_RTX_CODE)
2217 emit_label (skip_label);
2218 return true;
2219 }
2220
2221 /* Given an operand, return 1 if the evaluated operand plugged into an
2222 if_then_else will result in a branch_true, 0 if branch_false, or
2223 -1 if neither nor applies. The truth table goes like this:
2224
2225 op | cmpval | code | result
2226 ---------+--------+---------+--------------------
2227 T (0) | 0 | EQ (1) | 0 = 0 ^ (0 == 1)
2228 T (0) | 1 | EQ (1) | 1 = 0 ^ (1 == 1)
2229 T (0) | 0 | NE (0) | 1 = 0 ^ (0 == 0)
2230 T (0) | 1 | NE (0) | 0 = 0 ^ (1 == 0)
2231 !T (1) | 0 | EQ (1) | 1 = 1 ^ (0 == 1)
2232 !T (1) | 1 | EQ (1) | 0 = 1 ^ (1 == 1)
2233 !T (1) | 0 | NE (0) | 0 = 1 ^ (0 == 0)
2234 !T (1) | 1 | NE (0) | 1 = 1 ^ (1 == 0) */
2235 int
2236 sh_eval_treg_value (rtx op)
2237 {
2238 if (t_reg_operand (op, GET_MODE (op)))
2239 return 1;
2240 if (negt_reg_operand (op, GET_MODE (op)))
2241 return 0;
2242
2243 rtx_code code = GET_CODE (op);
2244 if ((code != EQ && code != NE) || !CONST_INT_P (XEXP (op, 1)))
2245 return -1;
2246
2247 int cmpop = code == EQ ? 1 : 0;
2248 int cmpval = INTVAL (XEXP (op, 1));
2249 if (cmpval != 0 && cmpval != 1)
2250 return -1;
2251
2252 int t;
2253 if (t_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2254 t = 0;
2255 else if (negt_reg_operand (XEXP (op, 0), GET_MODE (XEXP (op, 0))))
2256 t = 1;
2257 else
2258 return -1;
2259
2260 return t ^ (cmpval == cmpop);
2261 }
2262
2263 /* Emit INSN, possibly in a PARALLEL with an USE/CLOBBER of FPSCR bits in case
2264 of floating-point comparisons. */
2265 static void
2266 sh_emit_set_t_insn (rtx insn, machine_mode mode)
2267 {
2268 if (TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT
2269 && GET_CODE (insn) != PARALLEL)
2270 {
2271 insn = gen_rtx_PARALLEL (VOIDmode,
2272 gen_rtvec (3, insn,
2273 gen_rtx_CLOBBER (VOIDmode, gen_rtx_REG (SImode, FPSCR_STAT_REG)),
2274 gen_rtx_USE (VOIDmode, gen_rtx_REG (SImode, FPSCR_MODES_REG))));
2275 }
2276 emit_insn (insn);
2277 }
2278
2279 /* Prepare the operands for an scc instruction; make sure that the
2280 compare has been done and the result is in T_REG. */
2281 void
2282 sh_emit_scc_to_t (enum rtx_code code, rtx op0, rtx op1)
2283 {
2284 rtx t_reg = get_t_reg_rtx ();
2285 enum rtx_code oldcode = code;
2286
2287 /* First need a compare insn. */
2288 switch (code)
2289 {
2290 case NE:
2291 /* It isn't possible to handle this case. */
2292 gcc_unreachable ();
2293 case LT:
2294 code = GT;
2295 break;
2296 case LE:
2297 code = GE;
2298 break;
2299 case LTU:
2300 code = GTU;
2301 break;
2302 case LEU:
2303 code = GEU;
2304 break;
2305 default:
2306 break;
2307 }
2308 if (code != oldcode)
2309 std::swap (op0, op1);
2310
2311 machine_mode mode = GET_MODE (op0);
2312 if (mode == VOIDmode)
2313 mode = GET_MODE (op1);
2314
2315 op0 = force_reg (mode, op0);
2316 if ((code != EQ && code != NE
2317 && (op1 != const0_rtx
2318 || code == GTU || code == GEU || code == LTU || code == LEU))
2319 || (mode == DImode && op1 != const0_rtx)
2320 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2321 op1 = force_reg (mode, op1);
2322
2323 sh_emit_set_t_insn (gen_rtx_SET (t_reg,
2324 gen_rtx_fmt_ee (code, SImode, op0, op1)),
2325 mode);
2326 }
2327
2328 /* Called from the md file, set up the operands of a compare instruction. */
2329 void
2330 sh_emit_compare_and_branch (rtx *operands, machine_mode mode)
2331 {
2332 enum rtx_code code = GET_CODE (operands[0]);
2333 enum rtx_code branch_code;
2334 rtx op0 = operands[1];
2335 rtx op1 = operands[2];
2336 rtx insn;
2337 bool need_ccmpeq = false;
2338
2339 if (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT)
2340 {
2341 op0 = force_reg (mode, op0);
2342 op1 = force_reg (mode, op1);
2343 }
2344 else
2345 {
2346 if (code != EQ || mode == DImode)
2347 {
2348 /* Force args into regs, since we can't use constants here. */
2349 op0 = force_reg (mode, op0);
2350 if (op1 != const0_rtx || code == GTU || code == GEU)
2351 op1 = force_reg (mode, op1);
2352 }
2353 }
2354
2355 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2356 {
2357 if (code == LT
2358 || (code == LE && TARGET_IEEE && TARGET_SH2E)
2359 || (code == GE && !(TARGET_IEEE && TARGET_SH2E)))
2360 {
2361 std::swap (op0, op1);
2362 code = swap_condition (code);
2363 }
2364
2365 /* GE becomes fcmp/gt+fcmp/eq, for SH2E and TARGET_IEEE only. */
2366 if (code == GE)
2367 {
2368 gcc_assert (TARGET_IEEE && TARGET_SH2E);
2369 need_ccmpeq = true;
2370 code = GT;
2371 }
2372
2373 /* Now we can have EQ, NE, GT, LE. NE and LE are then transformed
2374 to EQ/GT respectively. */
2375 gcc_assert (code == EQ || code == GT || code == NE || code == LE);
2376 }
2377
2378 switch (code)
2379 {
2380 case EQ:
2381 case GT:
2382 case GE:
2383 case GTU:
2384 case GEU:
2385 branch_code = code;
2386 break;
2387 case NE:
2388 case LT:
2389 case LE:
2390 case LTU:
2391 case LEU:
2392 branch_code = reverse_condition (code);
2393 break;
2394 default:
2395 gcc_unreachable ();
2396 }
2397
2398 insn = gen_rtx_SET (get_t_reg_rtx (),
2399 gen_rtx_fmt_ee (branch_code, SImode, op0, op1));
2400
2401 sh_emit_set_t_insn (insn, mode);
2402 if (need_ccmpeq)
2403 sh_emit_set_t_insn (gen_ieee_ccmpeqsf_t (op0, op1), mode);
2404
2405 if (branch_code == code)
2406 emit_jump_insn (gen_branch_true (operands[3]));
2407 else
2408 emit_jump_insn (gen_branch_false (operands[3]));
2409 }
2410
2411 void
2412 sh_emit_compare_and_set (rtx *operands, machine_mode mode)
2413 {
2414 enum rtx_code code = GET_CODE (operands[1]);
2415 rtx op0 = operands[2];
2416 rtx op1 = operands[3];
2417 rtx_code_label *lab = NULL;
2418 bool invert = false;
2419
2420 op0 = force_reg (mode, op0);
2421 if ((code != EQ && code != NE
2422 && (op1 != const0_rtx
2423 || code == GTU || code == GEU || code == LTU || code == LEU))
2424 || (mode == DImode && op1 != const0_rtx)
2425 || (TARGET_SH2E && GET_MODE_CLASS (mode) == MODE_FLOAT))
2426 op1 = force_reg (mode, op1);
2427
2428 if (GET_MODE_CLASS (mode) == MODE_FLOAT)
2429 {
2430 if (code == LT || code == LE)
2431 {
2432 std::swap (op0, op1);
2433 code = swap_condition (code);
2434 }
2435 if (code == GE)
2436 {
2437 if (TARGET_IEEE)
2438 {
2439 lab = gen_label_rtx ();
2440 sh_emit_scc_to_t (EQ, op0, op1);
2441 emit_jump_insn (gen_branch_true (lab));
2442 code = GT;
2443 }
2444 else
2445 {
2446 code = LT;
2447 invert = true;
2448 }
2449 }
2450 }
2451
2452 if (code == NE)
2453 {
2454 code = EQ;
2455 invert = true;
2456 }
2457
2458 sh_emit_scc_to_t (code, op0, op1);
2459 if (lab)
2460 emit_label (lab);
2461 if (invert)
2462 emit_insn (gen_movnegt (operands[0], get_t_reg_rtx ()));
2463 else
2464 emit_move_insn (operands[0], get_t_reg_rtx ());
2465 }
2466 \f
2467 /* Functions to output assembly code. */
2468
2469 /* Return a sequence of instructions to perform DI or DF move.
2470
2471 Since the SH cannot move a DI or DF in one instruction, we have
2472 to take care when we see overlapping source and dest registers. */
2473 const char *
2474 output_movedouble (rtx insn ATTRIBUTE_UNUSED, rtx operands[],
2475 machine_mode mode)
2476 {
2477 rtx dst = operands[0];
2478 rtx src = operands[1];
2479
2480 if (MEM_P (dst)
2481 && GET_CODE (XEXP (dst, 0)) == PRE_DEC)
2482 return "mov.l %T1,%0" "\n"
2483 " mov.l %1,%0";
2484
2485 if (register_operand (dst, mode)
2486 && register_operand (src, mode))
2487 {
2488 if (REGNO (src) == MACH_REG)
2489 return "sts mach,%S0" "\n"
2490 " sts macl,%R0";
2491
2492 /* When mov.d r1,r2 do r2->r3 then r1->r2;
2493 when mov.d r1,r0 do r1->r0 then r2->r1. */
2494 if (REGNO (src) + 1 == REGNO (dst))
2495 return "mov %T1,%T0" "\n"
2496 " mov %1,%0";
2497 else
2498 return "mov %1,%0" "\n"
2499 " mov %T1,%T0";
2500 }
2501 else if (CONST_INT_P (src))
2502 {
2503 if (INTVAL (src) < 0)
2504 output_asm_insn ("mov #-1,%S0", operands);
2505 else
2506 output_asm_insn ("mov #0,%S0", operands);
2507
2508 return "mov %1,%R0";
2509 }
2510 else if (MEM_P (src))
2511 {
2512 int ptrreg = -1;
2513 int dreg = REGNO (dst);
2514 rtx inside = XEXP (src, 0);
2515
2516 switch (GET_CODE (inside))
2517 {
2518 case REG:
2519 ptrreg = REGNO (inside);
2520 break;
2521
2522 case SUBREG:
2523 ptrreg = subreg_regno (inside);
2524 break;
2525
2526 case PLUS:
2527 ptrreg = REGNO (XEXP (inside, 0));
2528 /* ??? A r0+REG address shouldn't be possible here, because it isn't
2529 an offsettable address. Unfortunately, offsettable addresses use
2530 QImode to check the offset, and a QImode offsettable address
2531 requires r0 for the other operand, which is not currently
2532 supported, so we can't use the 'o' constraint.
2533 Thus we must check for and handle r0+REG addresses here.
2534 We punt for now, since this is likely very rare. */
2535 gcc_assert (!REG_P (XEXP (inside, 1)));
2536 break;
2537
2538 case LABEL_REF:
2539 return "mov.l %1,%0" "\n"
2540 " mov.l %1+4,%T0";
2541 case POST_INC:
2542 return "mov.l %1,%0" "\n"
2543 " mov.l %1,%T0";
2544 default:
2545 gcc_unreachable ();
2546 }
2547
2548 /* Work out the safe way to copy. Copy into the second half first. */
2549 if (dreg == ptrreg)
2550 return "mov.l %T1,%T0" "\n"
2551 " mov.l %1,%0";
2552 }
2553
2554 return "mov.l %1,%0" "\n"
2555 " mov.l %T1,%T0";
2556 }
2557
2558 /* Print an instruction which would have gone into a delay slot after
2559 another instruction, but couldn't because the other instruction expanded
2560 into a sequence where putting the slot insn at the end wouldn't work. */
2561 static void
2562 print_slot (rtx_sequence *seq)
2563 {
2564 final_scan_insn (seq->insn (1), asm_out_file, optimize, 1, NULL);
2565
2566 seq->insn (1)->set_deleted ();
2567 }
2568
2569 const char *
2570 output_far_jump (rtx_insn *insn, rtx op)
2571 {
2572 struct { rtx lab, reg, op; } this_jmp;
2573 rtx_code_label *braf_base_lab = NULL;
2574 const char *jump;
2575 int far;
2576 int offset = branch_dest (insn) - INSN_ADDRESSES (INSN_UID (insn));
2577 rtx_insn *prev;
2578
2579 this_jmp.lab = gen_label_rtx ();
2580
2581 if (TARGET_SH2
2582 && offset >= -32764
2583 && offset - get_attr_length (insn) <= 32766
2584 && ! CROSSING_JUMP_P (insn))
2585 {
2586 far = 0;
2587 jump = "mov.w %O0,%1" "\n"
2588 " braf %1";
2589 }
2590 else
2591 {
2592 far = 1;
2593 if (flag_pic)
2594 {
2595 if (TARGET_SH2)
2596 jump = "mov.l %O0,%1" "\n"
2597 " braf %1";
2598 else
2599 jump = "mov.l r0,@-r15" "\n"
2600 " mova %O0,r0" "\n"
2601 " mov.l @r0,%1" "\n"
2602 " add r0,%1" "\n"
2603 " mov.l @r15+,r0" "\n"
2604 " jmp @%1";
2605 }
2606 else
2607 jump = "mov.l %O0,%1" "\n"
2608 " jmp @%1";
2609 }
2610 /* If we have a scratch register available, use it. */
2611 if (NONJUMP_INSN_P ((prev = prev_nonnote_insn (insn)))
2612 && INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
2613 {
2614 this_jmp.reg = SET_DEST (XVECEXP (PATTERN (prev), 0, 0));
2615 if (REGNO (this_jmp.reg) == R0_REG && flag_pic && ! TARGET_SH2)
2616 jump = "mov.l r1,@-r15" "\n"
2617 " mova %O0,r0" "\n"
2618 " mov.l @r0,r1" "\n"
2619 " add r1,r0" "\n"
2620 " mov.l @r15+,r1" "\n"
2621 " jmp @%1";
2622 output_asm_insn (jump, &this_jmp.lab);
2623 if (dbr_sequence_length ())
2624 print_slot (final_sequence);
2625 else
2626 output_asm_insn ("nop", 0);
2627 }
2628 else
2629 {
2630 /* Output the delay slot insn first if any. */
2631 if (dbr_sequence_length ())
2632 print_slot (final_sequence);
2633
2634 this_jmp.reg = gen_rtx_REG (SImode, 13);
2635 output_asm_insn ("mov.l r13,@-r15", 0);
2636 output_asm_insn (jump, &this_jmp.lab);
2637 output_asm_insn ("mov.l @r15+,r13", 0);
2638 }
2639 if (far && flag_pic && TARGET_SH2)
2640 {
2641 braf_base_lab = gen_label_rtx ();
2642 (*targetm.asm_out.internal_label) (asm_out_file, "L",
2643 CODE_LABEL_NUMBER (braf_base_lab));
2644 }
2645 if (far)
2646 output_asm_insn (".align 2", 0);
2647 (*targetm.asm_out.internal_label) (asm_out_file, "L", CODE_LABEL_NUMBER (this_jmp.lab));
2648 this_jmp.op = op;
2649 if (far && flag_pic)
2650 {
2651 if (TARGET_SH2)
2652 this_jmp.lab = braf_base_lab;
2653 output_asm_insn (".long %O2-%O0", &this_jmp.lab);
2654 }
2655 else
2656 output_asm_insn (far ? ".long %O2" : ".word %O2-%O0", &this_jmp.lab);
2657 return "";
2658 }
2659
2660 /* Local label counter, used for constants in the pool and inside
2661 pattern branches. */
2662 static int lf = 100;
2663
2664 /* Output code for ordinary branches. */
2665 const char *
2666 output_branch (int logic, rtx_insn *insn, rtx *operands)
2667 {
2668 switch (get_attr_length (insn))
2669 {
2670 case 6:
2671 /* This can happen if filling the delay slot has caused a forward
2672 branch to exceed its range (we could reverse it, but only
2673 when we know we won't overextend other branches; this should
2674 best be handled by relaxation).
2675 It can also happen when other condbranches hoist delay slot insn
2676 from their destination, thus leading to code size increase.
2677 But the branch will still be in the range -4092..+4098 bytes. */
2678 if (! TARGET_RELAX)
2679 {
2680 int label = lf++;
2681 /* The call to print_slot will clobber the operands. */
2682 rtx op0 = operands[0];
2683
2684 /* If the instruction in the delay slot is annulled (true), then
2685 there is no delay slot where we can put it now. The only safe
2686 place for it is after the label. final will do that by default. */
2687
2688 if (final_sequence
2689 && ! INSN_ANNULLED_BRANCH_P (final_sequence->insn (0))
2690 && get_attr_length (final_sequence->insn (1)))
2691 {
2692 asm_fprintf (asm_out_file, "\tb%s%ss\t%LLF%d\n", logic ? "f" : "t",
2693 ASSEMBLER_DIALECT ? "/" : ".", label);
2694 print_slot (final_sequence);
2695 }
2696 else
2697 asm_fprintf (asm_out_file, "\tb%s\t%LLF%d\n", logic ? "f" : "t", label);
2698
2699 output_asm_insn ("bra\t%l0", &op0);
2700 fprintf (asm_out_file, "\tnop\n");
2701 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2702
2703 return "";
2704 }
2705 /* FALLTHRU */
2706 /* When relaxing, handle this like a short branch. The linker
2707 will fix it up if it still doesn't fit after relaxation. */
2708 case 2:
2709 return logic ? "bt%.\t%l0" : "bf%.\t%l0";
2710
2711 /* These are for SH2e, in which we have to account for the
2712 extra nop because of the hardware bug in annulled branches. */
2713 case 8:
2714 if (! TARGET_RELAX)
2715 {
2716 int label = lf++;
2717
2718 gcc_assert (!final_sequence
2719 || !(INSN_ANNULLED_BRANCH_P
2720 (XVECEXP (final_sequence, 0, 0))));
2721 asm_fprintf (asm_out_file, "b%s%ss\t%LLF%d\n",
2722 logic ? "f" : "t",
2723 ASSEMBLER_DIALECT ? "/" : ".", label);
2724 fprintf (asm_out_file, "\tnop\n");
2725 output_asm_insn ("bra\t%l0", operands);
2726 fprintf (asm_out_file, "\tnop\n");
2727 (*targetm.asm_out.internal_label) (asm_out_file, "LF", label);
2728
2729 return "";
2730 }
2731 /* FALLTHRU */
2732 case 4:
2733 {
2734 char buffer[10];
2735
2736 sprintf (buffer, "b%s%ss\t%%l0",
2737 logic ? "t" : "f",
2738 ASSEMBLER_DIALECT ? "/" : ".");
2739 output_asm_insn (buffer, &operands[0]);
2740 return "nop";
2741 }
2742
2743 default:
2744 /* There should be no longer branches now - that would
2745 indicate that something has destroyed the branches set
2746 up in machine_dependent_reorg. */
2747 gcc_unreachable ();
2748 }
2749 }
2750
2751 /* Output a code sequence for INSN using TEMPL with OPERANDS; but before,
2752 fill in operands 9 as a label to the successor insn.
2753 We try to use jump threading where possible.
2754 IF CODE matches the comparison in the IF_THEN_ELSE of a following jump,
2755 we assume the jump is taken. I.e. EQ means follow jmp and bf, NE means
2756 follow jmp and bt, if the address is in range. */
2757 const char *
2758 output_branchy_insn (enum rtx_code code, const char *templ,
2759 rtx_insn *insn, rtx *operands)
2760 {
2761 rtx_insn *next_insn = NEXT_INSN (insn);
2762
2763 if (next_insn && JUMP_P (next_insn) && condjump_p (next_insn))
2764 {
2765 rtx src = SET_SRC (PATTERN (next_insn));
2766 if (GET_CODE (src) == IF_THEN_ELSE && GET_CODE (XEXP (src, 0)) != code)
2767 {
2768 /* Following branch not taken */
2769 rtx_code_label *lab = gen_label_rtx ();
2770 emit_label_after (lab, next_insn);
2771 INSN_ADDRESSES_NEW (lab,
2772 INSN_ADDRESSES (INSN_UID (next_insn))
2773 + get_attr_length (next_insn));
2774 operands[9] = lab;
2775 return templ;
2776 }
2777 else
2778 {
2779 int offset = (branch_dest (next_insn)
2780 - INSN_ADDRESSES (INSN_UID (next_insn)) + 4);
2781 if (offset >= -252 && offset <= 258)
2782 {
2783 if (GET_CODE (src) == IF_THEN_ELSE)
2784 /* branch_true */
2785 src = XEXP (src, 1);
2786 operands[9] = src;
2787 return templ;
2788 }
2789 }
2790 }
2791 rtx_code_label *lab = gen_label_rtx ();
2792 emit_label_after (lab, insn);
2793 INSN_ADDRESSES_NEW (lab,
2794 INSN_ADDRESSES (INSN_UID (insn))
2795 + get_attr_length (insn));
2796 operands[9] = lab;
2797 return templ;
2798 }
2799
2800 const char *
2801 output_ieee_ccmpeq (rtx_insn *insn, rtx *operands)
2802 {
2803 return output_branchy_insn (NE, "bt %l9" "\n"
2804 " fcmp/eq %1,%0",
2805 insn, operands);
2806 }
2807 \f
2808 /* Output the start of the assembler file. */
2809 static void
2810 sh_file_start (void)
2811 {
2812 default_file_start ();
2813
2814 if (TARGET_ELF)
2815 /* We need to show the text section with the proper
2816 attributes as in TEXT_SECTION_ASM_OP, before dwarf2out
2817 emits it without attributes in TEXT_SECTION_ASM_OP, else GAS
2818 will complain. We can teach GAS specifically about the
2819 default attributes for our choice of text section, but
2820 then we would have to change GAS again if/when we change
2821 the text section name. */
2822 fprintf (asm_out_file, "%s\n", TEXT_SECTION_ASM_OP);
2823 else
2824 /* Switch to the data section so that the coffsem symbol
2825 isn't in the text section. */
2826 switch_to_section (data_section);
2827
2828 if (TARGET_LITTLE_ENDIAN)
2829 fputs ("\t.little\n", asm_out_file);
2830 }
2831 \f
2832 /* Implementation of TARGET_ASM_INTEGER for SH. Pointers to functions
2833 need to be output as pointers to function descriptors for
2834 FDPIC. */
2835
2836 static bool
2837 sh_assemble_integer (rtx value, unsigned int size, int aligned_p)
2838 {
2839 if (TARGET_FDPIC && size == UNITS_PER_WORD
2840 && GET_CODE (value) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (value))
2841 {
2842 fputs ("\t.long\t", asm_out_file);
2843 output_addr_const (asm_out_file, value);
2844 fputs ("@FUNCDESC\n", asm_out_file);
2845 return true;
2846 }
2847 return default_assemble_integer (value, size, aligned_p);
2848 }
2849 \f
2850 /* Check if PAT includes UNSPEC_CALLER unspec pattern. */
2851 static bool
2852 unspec_caller_rtx_p (rtx pat)
2853 {
2854 rtx base, offset;
2855 split_const (pat, &base, &offset);
2856
2857 if (GET_CODE (base) == UNSPEC)
2858 {
2859 if (XINT (base, 1) == UNSPEC_CALLER)
2860 return true;
2861 for (int i = 0; i < XVECLEN (base, 0); i++)
2862 if (unspec_caller_rtx_p (XVECEXP (base, 0, i)))
2863 return true;
2864 }
2865 return false;
2866 }
2867
2868 /* Indicate that INSN cannot be duplicated. This is true for insn
2869 that generates a unique label. */
2870 static bool
2871 sh_cannot_copy_insn_p (rtx_insn *insn)
2872 {
2873 if (!reload_completed || !flag_pic)
2874 return false;
2875
2876 if (!NONJUMP_INSN_P (insn))
2877 return false;
2878 if (asm_noperands (insn) >= 0)
2879 return false;
2880
2881 rtx pat = PATTERN (insn);
2882
2883 if (GET_CODE (pat) == CLOBBER || GET_CODE (pat) == USE)
2884 return false;
2885
2886 if (TARGET_FDPIC && GET_CODE (pat) == PARALLEL)
2887 {
2888 rtx t = XVECEXP (pat, 0, XVECLEN (pat, 0) - 1);
2889 if (GET_CODE (t) == USE && unspec_caller_rtx_p (XEXP (t, 0)))
2890 return true;
2891 }
2892
2893 if (GET_CODE (pat) != SET)
2894 return false;
2895 pat = SET_SRC (pat);
2896
2897 if (unspec_caller_rtx_p (pat))
2898 return true;
2899
2900 return false;
2901 }
2902 \f
2903 /* Number of instructions used to make an arithmetic right shift by N. */
2904 static const char ashiftrt_insns[] =
2905 { 0,1,2,3,4,5,8,8,8,8,8,8,8,8,8,8,2,3,4,5,8,8,8,8,8,8,8,8,8,8,8,2};
2906
2907 /* Description of a logical left or right shift, when expanded to a sequence
2908 of 1/2/8/16 shifts.
2909 Notice that one bit right shifts clobber the T bit. One bit left shifts
2910 are done with an 'add Rn,Rm' insn and thus do not clobber the T bit. */
2911 enum
2912 {
2913 ASHL_CLOBBERS_T = 1 << 0,
2914 LSHR_CLOBBERS_T = 1 << 1
2915 };
2916
2917 struct ashl_lshr_sequence
2918 {
2919 char insn_count;
2920 signed char amount[6];
2921 char clobbers_t;
2922 };
2923
2924 static const struct ashl_lshr_sequence ashl_lshr_seq[32] =
2925 {
2926 { 0, { 0 }, 0 }, // 0
2927 { 1, { 1 }, LSHR_CLOBBERS_T },
2928 { 1, { 2 }, 0 },
2929 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2930 { 2, { 2, 2 }, 0 }, // 4
2931 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2932 { 3, { 2, 2, 2 }, 0 },
2933 { 4, { 2, 2, 1, 2 }, LSHR_CLOBBERS_T },
2934 { 1, { 8 }, 0 }, // 8
2935 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2936 { 2, { 8, 2 }, 0 },
2937 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2938 { 3, { 8, 2, 2 }, 0 }, // 12
2939 { 4, { 8, 2, 1, 2 }, LSHR_CLOBBERS_T },
2940 { 3, { 8, -2, 8 }, 0 },
2941 { 3, { 8, -1, 8 }, ASHL_CLOBBERS_T },
2942 { 1, { 16 }, 0 }, // 16
2943 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2944 { 2, { 16, 2 }, 0 },
2945 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2946 { 3, { 16, 2, 2 }, 0 }, // 20
2947 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2948 { 3, { 16, -2, 8 }, 0 },
2949 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2950 { 2, { 16, 8 }, 0 }, // 24
2951 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2952 { 3, { 16, 8, 2 }, 0 },
2953 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2954 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2955 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
2956 { 3, { 16, -2, 16 }, 0 },
2957
2958 /* For a right shift by 31 a 2 insn shll-movt sequence can be used.
2959 For a left shift by 31 a 2 insn and-rotl sequences can be used.
2960 However, the shift-and combiner code needs this entry here to be in
2961 terms of real shift insns. */
2962 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
2963 };
2964
2965 /* Individual shift amounts for shift amounts < 16, up to three highmost
2966 bits might be clobbered. This is typically used when combined with some
2967 kind of sign or zero extension. */
2968 static const struct ashl_lshr_sequence ext_ashl_lshr_seq[32] =
2969 {
2970 { 0, { 0 }, 0 }, // 0
2971 { 1, { 1 }, LSHR_CLOBBERS_T },
2972 { 1, { 2 }, 0 },
2973 { 2, { 2, 1 }, LSHR_CLOBBERS_T },
2974 { 2, { 2, 2 }, 0 }, // 4
2975 { 3, { 2, 1, 2 }, LSHR_CLOBBERS_T },
2976 { 2, { 8, -2 }, 0 },
2977 { 2, { 8, -1 }, ASHL_CLOBBERS_T },
2978 { 1, { 8 }, 0 }, // 8
2979 { 2, { 8, 1 }, LSHR_CLOBBERS_T },
2980 { 2, { 8, 2 }, 0 },
2981 { 3, { 8, 1, 2 }, LSHR_CLOBBERS_T },
2982 { 3, { 8, 2, 2 }, 0 }, // 12
2983 { 3, { 16, -2, -1 }, ASHL_CLOBBERS_T },
2984 { 2, { 16, -2 }, 0 },
2985 { 2, { 16, -1 }, ASHL_CLOBBERS_T },
2986 { 1, { 16 }, 0 }, // 16
2987 { 2, { 16, 1 }, LSHR_CLOBBERS_T },
2988 { 2, { 16, 2 }, 0 },
2989 { 3, { 16, 1, 2 }, LSHR_CLOBBERS_T },
2990 { 3, { 16, 2, 2 }, 0 }, // 20
2991 { 4, { 16, 2, 1, 2 }, LSHR_CLOBBERS_T },
2992 { 3, { 16, -2, 8 }, 0 },
2993 { 3, { 16, -1, 8 }, ASHL_CLOBBERS_T },
2994 { 2, { 16, 8 }, 0 }, // 24
2995 { 3, { 16, 1, 8 }, LSHR_CLOBBERS_T },
2996 { 3, { 16, 8, 2 }, 0 },
2997 { 4, { 16, 8, 1, 2 }, LSHR_CLOBBERS_T },
2998 { 4, { 16, 8, 2, 2 }, 0 }, // 28
2999 { 4, { 16, -1, -2, 16 }, ASHL_CLOBBERS_T },
3000 { 3, { 16, -2, 16 }, 0 },
3001 { 3, { 16, -1, 16 }, ASHL_CLOBBERS_T }
3002 };
3003
3004 /* Return true if a shift left consisting of 1/2/8/16 shift instructions
3005 will clobber the T bit. */
3006 bool
3007 sh_ashlsi_clobbers_t_reg_p (rtx shift_amount)
3008 {
3009 gcc_assert (CONST_INT_P (shift_amount));
3010
3011 const int shift_amount_i = INTVAL (shift_amount) & 31;
3012
3013 /* Special case for shift count of 31: use and-rotl sequence. */
3014 if (shift_amount_i == 31)
3015 return true;
3016
3017 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3018 & ASHL_CLOBBERS_T) != 0;
3019 }
3020
3021 /* Return true if a logical right shift consisting of 1/2/8/16 shift
3022 instructions will clobber the T bit. */
3023 bool
3024 sh_lshrsi_clobbers_t_reg_p (rtx shift_amount)
3025 {
3026 gcc_assert (CONST_INT_P (shift_amount));
3027
3028 /* For right shifts the constant might be negative. */
3029 const int shift_amount_i = std::abs (INTVAL (shift_amount)) & 31;
3030
3031 /* Special case for shift count of 31: use shll-movt sequence. */
3032 if (shift_amount_i == 31)
3033 return true;
3034
3035 return (ashl_lshr_seq[shift_amount_i].clobbers_t
3036 & LSHR_CLOBBERS_T) != 0;
3037 }
3038
3039 /* Return true if it is potentially beneficial to use a dynamic shift
3040 instruction (shad / shar) instead of a combination of 1/2/8/16
3041 shift instructions for the specified shift count.
3042 If dynamic shifts are not available, always return false. */
3043 bool
3044 sh_dynamicalize_shift_p (rtx count)
3045 {
3046 gcc_assert (CONST_INT_P (count));
3047
3048 /* For right shifts the constant might be negative. */
3049 const int shift_amount_i = std::abs (INTVAL (count)) & 31;
3050 int insn_count;
3051
3052 /* For left and right shifts, there are shorter 2 insn sequences for
3053 shift amounts of 31. */
3054 if (shift_amount_i == 31)
3055 insn_count = 2;
3056 else
3057 insn_count = ashl_lshr_seq[shift_amount_i].insn_count;
3058
3059 return TARGET_DYNSHIFT && (insn_count > 1 + SH_DYNAMIC_SHIFT_COST);
3060 }
3061
3062 /* Assuming we have a value that has been sign-extended by at least one bit,
3063 can we use the ext_shift_amounts with the last shift turned to an
3064 arithmetic shift to shift it by N without data loss, and quicker than by
3065 other means? */
3066 #define EXT_SHIFT_SIGNED(n) (((n) | 8) == 15)
3067
3068 /* Return the cost of a shift. */
3069 static inline int
3070 shiftcosts (rtx x)
3071 {
3072 if (GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD)
3073 {
3074 if (GET_MODE (x) == DImode
3075 && CONST_INT_P (XEXP (x, 1))
3076 && INTVAL (XEXP (x, 1)) == 1)
3077 return 2;
3078
3079 /* Everything else is invalid, because there is no pattern for it. */
3080 return -1;
3081 }
3082 /* If shift by a non constant, then this will be expensive. */
3083 if (!CONST_INT_P (XEXP (x, 1)))
3084 return SH_DYNAMIC_SHIFT_COST;
3085
3086 /* Otherwise, return the true cost in instructions. Cope with out of range
3087 shift counts more or less arbitrarily. */
3088 int value = INTVAL (XEXP (x, 1)) & 31;
3089
3090 if (GET_CODE (x) == ASHIFTRT)
3091 {
3092 int cost = ashiftrt_insns[value];
3093 /* If dynamic shifts are available and profitable in this case, then we
3094 put the constant in a reg and use shad. */
3095 if (cost > 1 + SH_DYNAMIC_SHIFT_COST)
3096 cost = 1 + SH_DYNAMIC_SHIFT_COST;
3097 return cost;
3098 }
3099 else
3100 return ashl_lshr_seq[value].insn_count;
3101 }
3102
3103 /* Return the cost of an AND/XOR/IOR operation. */
3104 static inline int
3105 and_xor_ior_costs (rtx x, int code)
3106 {
3107 /* On SH1-4 we have only max. SImode operations.
3108 Double the cost for modes > SImode. */
3109 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3110
3111 /* A logical operation with two registers is a single cycle
3112 instruction. */
3113 if (!CONST_INT_P (XEXP (x, 1)))
3114 return 1 * cost_scale;
3115
3116 int i = INTVAL (XEXP (x, 1));
3117
3118 /* These constants are single cycle extu.[bw] instructions. */
3119 if ((i == 0xff || i == 0xffff) && code == AND)
3120 return 1 * cost_scale;
3121 /* Constants that can be used in an instruction as an immediate are
3122 a single cycle, but this requires r0, so make it a little more
3123 expensive. */
3124 if (CONST_OK_FOR_K08 (i))
3125 return 2 * cost_scale;
3126 /* Constants that can be loaded with a mov immediate need one more cycle.
3127 This case is probably unnecessary. */
3128 if (CONST_OK_FOR_I08 (i))
3129 return 2 * cost_scale;
3130 /* Any other constant requires an additional 2 cycle pc-relative load.
3131 This case is probably unnecessary. */
3132 return 3 * cost_scale;
3133 }
3134
3135 /* Return the cost of an addition or a subtraction. */
3136 static inline int
3137 addsubcosts (rtx x)
3138 {
3139 if (GET_MODE (x) == SImode)
3140 {
3141 /* The addc or subc patterns will eventually become one or two
3142 instructions. Below are some costs for some of the patterns
3143 which combine would reject because the costs of the individual
3144 insns in the patterns are lower.
3145
3146 FIXME: It would be much easier if we had something like insn cost
3147 attributes and the cost calculation machinery used those attributes
3148 in the first place. This would eliminate redundant recog-like C
3149 code to calculate costs of complex patterns. */
3150 rtx op0 = XEXP (x, 0);
3151 rtx op1 = XEXP (x, 1);
3152
3153 if (GET_CODE (x) == PLUS)
3154 {
3155 if (GET_CODE (op0) == AND
3156 && XEXP (op0, 1) == const1_rtx
3157 && (GET_CODE (op1) == PLUS
3158 || (GET_CODE (op1) == MULT && XEXP (op1, 1) == const2_rtx)))
3159 return 1;
3160
3161 if (GET_CODE (op0) == MULT && XEXP (op0, 1) == const2_rtx
3162 && GET_CODE (op1) == LSHIFTRT
3163 && CONST_INT_P (XEXP (op1, 1)) && INTVAL (XEXP (op1, 1)) == 31)
3164 return 1;
3165 }
3166 /* Let's assume that adding the result of an insns that stores into
3167 the T bit is cheap. */
3168 if (treg_set_expr (op1, SImode))
3169 return 1;
3170 if (treg_set_expr (op0, SImode))
3171 return 1;
3172 }
3173
3174 /* On SH1-4 we have only max. SImode operations.
3175 Double the cost for modes > SImode. */
3176 const int cost_scale = GET_MODE_SIZE (GET_MODE (x)) > UNITS_PER_WORD ? 2 : 1;
3177
3178 /* Adding a register is a single cycle insn. */
3179 if (REG_P (XEXP (x, 1))
3180 || GET_CODE (XEXP (x, 1)) == SUBREG)
3181 return 1 * cost_scale;
3182
3183 /* Likewise for small constants. */
3184 if (CONST_INT_P (XEXP (x, 1))
3185 && CONST_OK_FOR_ADD (INTVAL (XEXP (x, 1))))
3186 return 1 * cost_scale;
3187
3188 /* Any other constant requires a 2 cycle pc-relative load plus an
3189 addition. */
3190 return 3 * cost_scale;
3191 }
3192
3193 /* Return the cost of a multiply. */
3194 static inline int
3195 multcosts (rtx x ATTRIBUTE_UNUSED)
3196 {
3197 if (sh_multcost >= 0)
3198 return sh_multcost;
3199
3200 if (TARGET_SH2)
3201 {
3202 /* We have a mul insn, so we can never take more than the mul and the
3203 read of the mac reg, but count more because of the latency and extra
3204 reg usage. */
3205 if (optimize_size)
3206 return 2;
3207 return 3;
3208 }
3209
3210 /* If we're aiming at small code, then just count the number of
3211 insns in a multiply call sequence. */
3212 if (optimize_size)
3213 return 5;
3214
3215 /* Otherwise count all the insns in the routine we'd be calling too. */
3216 return 20;
3217 }
3218
3219 /* Compute a (partial) cost for rtx X. Return true if the complete
3220 cost has been computed, and false if subexpressions should be
3221 scanned. In either case, *TOTAL contains the cost result. */
3222 static bool
3223 sh_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
3224 int opno ATTRIBUTE_UNUSED,
3225 int *total, bool speed ATTRIBUTE_UNUSED)
3226 {
3227 int code = GET_CODE (x);
3228
3229 switch (code)
3230 {
3231 /* The lower-subreg pass decides whether to split multi-word regs
3232 into individual regs by looking at the cost for a SET of certain
3233 modes with the following patterns:
3234 (set (reg) (reg))
3235 (set (reg) (const_int 0))
3236 On machines that support vector-move operations a multi-word move
3237 is the same cost as individual reg move. On SH there is no
3238 vector-move, so we have to provide the correct cost in the number
3239 of move insns to load/store the reg of the mode in question. */
3240 case SET:
3241 if (sh_movt_set_dest (x) != NULL || sh_movrt_set_dest (x) != NULL)
3242 {
3243 *total = COSTS_N_INSNS (1);
3244 return true;
3245 }
3246
3247 if (register_operand (SET_DEST (x), VOIDmode)
3248 && (register_operand (SET_SRC (x), VOIDmode)
3249 || satisfies_constraint_Z (SET_SRC (x))))
3250 {
3251 const machine_mode mode = GET_MODE (SET_DEST (x));
3252 *total = COSTS_N_INSNS (GET_MODE_SIZE (mode)
3253 / mov_insn_size (mode, TARGET_SH2A));
3254 return true;
3255 }
3256 return false;
3257
3258 /* The cost of a mem access is mainly the cost of the address mode. */
3259 case MEM:
3260 *total = sh_address_cost (XEXP (x, 0), GET_MODE (x), MEM_ADDR_SPACE (x),
3261 true);
3262 return true;
3263
3264 case IF_THEN_ELSE:
3265 /* This case is required for the if_then_else negc pattern. */
3266 if (treg_set_expr (XEXP (x, 0), SImode))
3267 {
3268 *total = COSTS_N_INSNS (1);
3269 return true;
3270 }
3271 else
3272 return false;
3273
3274 /* Zero extracts of single bits are usually combine patterns for the
3275 tst insns. */
3276 case ZERO_EXTRACT:
3277 if (GET_CODE (XEXP (x, 0)) == XOR
3278 && arith_reg_operand (XEXP (XEXP (x, 0), 0), VOIDmode)
3279 && XEXP (x, 1) == const1_rtx
3280 && CONST_INT_P (XEXP (x, 2))
3281 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3282 /* Check that the xor constaint overlaps with the extracted bit. */
3283 && (INTVAL (XEXP (XEXP (x, 0), 1)) & (1LL << INTVAL (XEXP (x, 2)))))
3284 {
3285 *total = 1; //COSTS_N_INSNS (1);
3286 return true;
3287 }
3288
3289 /* div0s variant. */
3290 if (GET_CODE (XEXP (x, 0)) == XOR
3291 && GET_CODE (XEXP (XEXP (x, 0), 0)) == XOR
3292 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))
3293 {
3294 *total = 1;
3295 return true;
3296 }
3297 return false;
3298
3299 /* The cost of a sign or zero extend depends on whether the source is a
3300 reg or a mem. In case of a mem take the address into account. */
3301 case SIGN_EXTEND:
3302 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3303 {
3304 *total = COSTS_N_INSNS (1);
3305 return true;
3306 }
3307 if (MEM_P (XEXP (x, 0)))
3308 {
3309 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3310 GET_MODE (XEXP (x, 0)),
3311 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3312 return true;
3313 }
3314 return false;
3315
3316 case ZERO_EXTEND:
3317 if (arith_reg_operand (XEXP (x, 0), GET_MODE (XEXP (x, 0))))
3318 {
3319 *total = COSTS_N_INSNS (1);
3320 return true;
3321 }
3322 else if (TARGET_SH2A && MEM_P (XEXP (x, 0))
3323 && (GET_MODE (XEXP (x, 0)) == QImode
3324 || GET_MODE (XEXP (x, 0)) == HImode))
3325 {
3326 /* Handle SH2A's movu.b and movu.w insn. */
3327 *total = sh_address_cost (XEXP (XEXP (x, 0), 0),
3328 GET_MODE (XEXP (x, 0)),
3329 MEM_ADDR_SPACE (XEXP (x, 0)), true);
3330 return true;
3331 }
3332 return false;
3333
3334 /* mems for SFmode and DFmode can be inside a parallel due to
3335 the way the fpscr is handled. */
3336 case PARALLEL:
3337 for (int i = 0; i < XVECLEN (x, 0); i++)
3338 {
3339 rtx xx = XVECEXP (x, 0, i);
3340 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 0)))
3341 {
3342 *total = sh_address_cost (XEXP (XEXP (xx, 0), 0),
3343 GET_MODE (XEXP (xx, 0)),
3344 MEM_ADDR_SPACE (XEXP (xx, 0)), true);
3345 return true;
3346 }
3347 if (GET_CODE (xx) == SET && MEM_P (XEXP (xx, 1)))
3348 {
3349 *total = sh_address_cost (XEXP (XEXP (xx, 1), 0),
3350 GET_MODE (XEXP (xx, 1)),
3351 MEM_ADDR_SPACE (XEXP (xx, 1)), true);
3352 return true;
3353 }
3354 }
3355
3356 if (sh_1el_vec (x, VOIDmode))
3357 *total = outer_code != SET;
3358 else if (sh_rep_vec (x, VOIDmode))
3359 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3360 + (outer_code != SET));
3361 else
3362 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3363 return true;
3364
3365 case CONST_INT:
3366 if (CONST_OK_FOR_I08 (INTVAL (x)))
3367 *total = 0;
3368 else if ((outer_code == AND || outer_code == IOR || outer_code == XOR)
3369 && CONST_OK_FOR_K08 (INTVAL (x)))
3370 *total = 1;
3371 /* prepare_cmp_insn will force costly constants int registers before
3372 the cbranch[sd]i4 patterns can see them, so preserve potentially
3373 interesting ones not covered by I08 above. */
3374 else if (outer_code == COMPARE
3375 && ((unsigned HOST_WIDE_INT) INTVAL (x)
3376 == (unsigned HOST_WIDE_INT) 0x7fffffff + 1
3377 || INTVAL (x) == 0x7fffffff
3378 || INTVAL (x) == 0x80 || INTVAL (x) == -0x81))
3379 *total = 1;
3380 else
3381 *total = 8;
3382 return true;
3383
3384 case EQ:
3385 /* An and with a constant compared against zero is
3386 most likely going to be a TST #imm, R0 instruction. */
3387 if (XEXP (x, 1) == const0_rtx
3388 && ((GET_CODE (XEXP (x, 0)) == AND
3389 || (SUBREG_P (XEXP (x, 0))
3390 && GET_CODE (SUBREG_REG (XEXP (x, 0))) == AND))
3391 || GET_CODE (XEXP (x, 0)) == ZERO_EXTRACT))
3392 {
3393 *total = 1;
3394 return true;
3395 }
3396
3397 else if (XEXP (x, 1) == const0_rtx
3398 && GET_CODE (XEXP (x, 0)) == AND
3399 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3400 && GET_CODE (XEXP (XEXP (x, 0), 0)) == ASHIFT
3401 && arith_reg_operand (XEXP (XEXP (XEXP (x, 0), 0), 0), SImode)
3402 && CONST_INT_P (XEXP (XEXP (XEXP (x, 0), 0), 1)))
3403 {
3404 *total = 1;
3405 return true;
3406 }
3407 else
3408 return false;
3409
3410 case SMIN:
3411 case SMAX:
3412 /* This is most likely a clips.b or clips.w insn that is being made up
3413 by combine. */
3414 if (TARGET_SH2A
3415 && (GET_CODE (XEXP (x, 0)) == SMAX || GET_CODE (XEXP (x, 0)) == SMIN)
3416 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
3417 && REG_P (XEXP (XEXP (x, 0), 0))
3418 && CONST_INT_P (XEXP (x, 1)))
3419 {
3420 *total = COSTS_N_INSNS (1);
3421 return true;
3422 }
3423 else
3424 return false;
3425
3426 case CONST:
3427 case LABEL_REF:
3428 case SYMBOL_REF:
3429 *total = 5;
3430 return true;
3431
3432 case CONST_DOUBLE:
3433 /* prepare_cmp_insn will force costly constants int registers before
3434 the cbranchdi4 pattern can see them, so preserve potentially
3435 interesting ones. */
3436 if (outer_code == COMPARE && GET_MODE (x) == DImode)
3437 *total = 1;
3438 else
3439 *total = 10;
3440 return true;
3441
3442 case CONST_VECTOR:
3443 /* FIXME: This looks broken. Only the last statement has any effect.
3444 Probably this could be folded with the PARALLEL case? */
3445 if (x == CONST0_RTX (GET_MODE (x)))
3446 *total = 0;
3447 else if (sh_1el_vec (x, VOIDmode))
3448 *total = outer_code != SET;
3449 if (sh_rep_vec (x, VOIDmode))
3450 *total = ((GET_MODE_UNIT_SIZE (GET_MODE (x)) + 3) / 4
3451 + (outer_code != SET));
3452 *total = COSTS_N_INSNS (3) + (outer_code != SET);
3453 return true;
3454
3455 case PLUS:
3456 case MINUS:
3457 *total = COSTS_N_INSNS (addsubcosts (x));
3458 return true;
3459
3460 case AND:
3461 /* Check for (and (not (reg)) (const_int 1)) which is a tst insn. */
3462 if (GET_CODE (XEXP (x, 0)) == NOT && XEXP (x, 1) == const1_rtx)
3463 {
3464 *total = COSTS_N_INSNS (1);
3465 return true;
3466 }
3467 /* Fall through. */
3468
3469 case XOR:
3470 case IOR:
3471 *total = COSTS_N_INSNS (and_xor_ior_costs (x, code));
3472 return true;
3473
3474 case MULT:
3475 *total = COSTS_N_INSNS (multcosts (x));
3476 return true;
3477
3478 case LT:
3479 case GE:
3480 /* div0s sign comparison. */
3481 if (GET_CODE (XEXP (x, 0)) == XOR
3482 && REG_P ((XEXP (XEXP (x, 0), 0)))
3483 && REG_P ((XEXP (XEXP (x, 0), 1)))
3484 && satisfies_constraint_Z (XEXP (x, 1)))
3485 {
3486 *total = COSTS_N_INSNS (1);
3487 return true;
3488 }
3489 else
3490 return false;
3491
3492 case LSHIFTRT:
3493 /* div0s sign comparison. */
3494 if (GET_CODE (XEXP (x, 0)) == XOR
3495 && REG_P ((XEXP (XEXP (x, 0), 0)))
3496 && REG_P ((XEXP (XEXP (x, 0), 1)))
3497 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 31)
3498 {
3499 *total = COSTS_N_INSNS (1);
3500 return true;
3501 }
3502 /* FALLTHRU */
3503 case ASHIFT:
3504 case ASHIFTRT:
3505 {
3506 int cost = shiftcosts (x);
3507 if (cost < 0)
3508 return false;
3509 *total = COSTS_N_INSNS (cost);
3510 return true;
3511 }
3512
3513 case DIV:
3514 case UDIV:
3515 case MOD:
3516 case UMOD:
3517 *total = COSTS_N_INSNS (20);
3518 return true;
3519
3520 case FLOAT:
3521 case FIX:
3522 *total = 100;
3523 return true;
3524
3525 default:
3526 return false;
3527 }
3528 }
3529
3530 /* Determine the size of the fundamental move insn that will be used
3531 for the specified mode. */
3532 static inline int
3533 mov_insn_size (machine_mode mode, bool consider_sh2a)
3534 {
3535 const int mode_sz = GET_MODE_SIZE (mode);
3536
3537 if ((consider_sh2a && TARGET_SH2A_DOUBLE && mode == DFmode)
3538 || (TARGET_FMOVD && mode == DFmode))
3539 return mode_sz;
3540 else
3541 {
3542 /* The max. available mode for actual move insns is SImode.
3543 Larger accesses will be split into multiple loads/stores. */
3544 const int max_mov_sz = GET_MODE_SIZE (SImode);
3545 return mode_sz >= max_mov_sz ? max_mov_sz : mode_sz;
3546 }
3547 }
3548
3549 /* Determine the maximum possible displacement for a move insn for the
3550 specified mode. */
3551 int
3552 sh_max_mov_insn_displacement (machine_mode mode, bool consider_sh2a)
3553 {
3554 /* The 4 byte displacement move insns are the same as the 2 byte
3555 versions but take a 12 bit displacement. All we need to do is to
3556 scale the max. displacement value accordingly. */
3557 const int disp_scale = consider_sh2a ? (4095 / 15) : 1;
3558
3559 /* SH2A supports FPU move insns with 12 bit displacements.
3560 Other variants to do not support any kind of displacements for
3561 FPU move insns. */
3562 if (! consider_sh2a && TARGET_FPU_ANY && GET_MODE_CLASS (mode) == MODE_FLOAT)
3563 return 0;
3564 else
3565 {
3566 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3567 const int mode_sz = GET_MODE_SIZE (mode);
3568 int r = 15 * mov_insn_sz * disp_scale;
3569
3570 /* If the mov insn will be split into multiple loads/stores, the
3571 maximum possible displacement is a bit smaller. */
3572 if (mode_sz > mov_insn_sz)
3573 r -= mode_sz - mov_insn_sz;
3574 return r;
3575 }
3576 }
3577
3578 /* Determine the alignment mask for a move insn of the
3579 specified mode. */
3580 static inline int
3581 mov_insn_alignment_mask (machine_mode mode, bool consider_sh2a)
3582 {
3583 const int mov_insn_sz = mov_insn_size (mode, consider_sh2a);
3584 return mov_insn_sz > 0 ? (mov_insn_sz - 1) : 0;
3585 }
3586
3587 /* Return the displacement value of a displacement address. */
3588 HOST_WIDE_INT
3589 sh_disp_addr_displacement (rtx x)
3590 {
3591 gcc_assert (satisfies_constraint_Sdd (x));
3592 return INTVAL (XEXP (XEXP (x, 0), 1));
3593 }
3594
3595 /* Compute the cost of an address. */
3596 static int
3597 sh_address_cost (rtx x, machine_mode mode,
3598 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
3599 {
3600 /* 'GBR + 0'. Account one more because of R0 restriction. */
3601 if (REG_P (x) && REGNO (x) == GBR_REG)
3602 return 2;
3603
3604 /* Simple reg, post-inc, pre-dec addressing. */
3605 if (REG_P (x) || GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
3606 return 1;
3607
3608 /* 'reg + disp' addressing. */
3609 if (GET_CODE (x) == PLUS
3610 && REG_P (XEXP (x, 0)) && CONST_INT_P (XEXP (x, 1)))
3611 {
3612 /* 'GBR + disp'. Account one more because of R0 restriction. */
3613 if (REGNO (XEXP (x, 0)) == GBR_REG
3614 && gbr_displacement (XEXP (x, 1), mode))
3615 return 2;
3616
3617 const HOST_WIDE_INT offset = INTVAL (XEXP (x, 1));
3618
3619 if (offset == 0)
3620 return 1;
3621
3622 /* The displacement would fit into a 2 byte move insn.
3623 HImode and QImode loads/stores with displacement put pressure on
3624 R0 which will most likely require another reg copy. Thus account
3625 a higher cost for that. */
3626 if (offset > 0 && offset <= sh_max_mov_insn_displacement (mode, false))
3627 return (mode == HImode || mode == QImode) ? 2 : 1;
3628
3629 /* The displacement would fit into a 4 byte move insn (SH2A). */
3630 if (TARGET_SH2A
3631 && offset > 0 && offset <= sh_max_mov_insn_displacement (mode, true))
3632 return 2;
3633
3634 /* The displacement is probably out of range and will require extra
3635 calculations. */
3636 return 3;
3637 }
3638
3639 /* 'reg + reg' addressing. Account a slightly higher cost because of
3640 increased pressure on R0. */
3641 if (GET_CODE (x) == PLUS && ! CONSTANT_P (XEXP (x, 1)))
3642 return 3;
3643
3644 /* Not sure what it is - probably expensive. */
3645 return 10;
3646 }
3647
3648 /* Code to expand a shift. */
3649 static void
3650 gen_ashift (int type, int n, rtx reg)
3651 {
3652 rtx n_rtx;
3653
3654 /* Negative values here come from the shift_amounts array. */
3655 if (n < 0)
3656 {
3657 if (type == ASHIFT)
3658 type = LSHIFTRT;
3659 else
3660 type = ASHIFT;
3661 n = -n;
3662 }
3663
3664 n_rtx = GEN_INT (n);
3665 gcc_assert (satisfies_constraint_P27 (n_rtx));
3666
3667 switch (type)
3668 {
3669 case ASHIFTRT:
3670 emit_insn (gen_ashrsi3_k (reg, reg, n_rtx));
3671 break;
3672 case LSHIFTRT:
3673 if (n == 1)
3674 emit_insn (gen_shlr (reg, reg));
3675 else
3676 emit_insn (gen_lshrsi3_k (reg, reg, n_rtx));
3677 break;
3678 case ASHIFT:
3679 emit_insn (gen_ashlsi3_k (reg, reg, n_rtx));
3680 break;
3681 default:
3682 gcc_unreachable ();
3683 }
3684 }
3685
3686 /* Code to expand a HImode shift. */
3687 static void
3688 gen_ashift_hi (int type, int n, rtx reg)
3689 {
3690 /* Negative values here come from the shift_amounts array. */
3691 if (n < 0)
3692 {
3693 if (type == ASHIFT)
3694 type = LSHIFTRT;
3695 else
3696 type = ASHIFT;
3697 n = -n;
3698 }
3699
3700 switch (type)
3701 {
3702 case ASHIFTRT:
3703 case LSHIFTRT:
3704 /* We don't have HImode right shift operations because using the
3705 ordinary 32 bit shift instructions for that doesn't generate proper
3706 zero/sign extension.
3707 gen_ashift_hi is only called in contexts where we know that the
3708 sign extension works out correctly. */
3709 {
3710 int offset = 0;
3711 if (GET_CODE (reg) == SUBREG)
3712 {
3713 offset = SUBREG_BYTE (reg);
3714 reg = SUBREG_REG (reg);
3715 }
3716 gen_ashift (type, n, gen_rtx_SUBREG (SImode, reg, offset));
3717 break;
3718 }
3719 case ASHIFT:
3720 emit_insn (gen_ashlhi3_k (reg, reg, GEN_INT (n)));
3721 break;
3722 }
3723 }
3724
3725 /* Output RTL to split a constant shift into its component SH constant
3726 shift instructions. */
3727 void
3728 gen_shifty_op (int code, rtx *operands)
3729 {
3730 int value = INTVAL (operands[2]);
3731 int max, i;
3732
3733 /* Truncate the shift count in case it is out of bounds. */
3734 value = value & 31;
3735
3736 if (value == 31)
3737 {
3738 if (code == LSHIFTRT)
3739 {
3740 emit_insn (gen_rotlsi3_1 (operands[0], operands[0]));
3741 emit_insn (gen_movt (operands[0], get_t_reg_rtx ()));
3742 return;
3743 }
3744 else if (code == ASHIFT)
3745 {
3746 /* There is a two instruction sequence for 31 bit left shifts,
3747 but it requires r0. */
3748 if (REG_P (operands[0]) && REGNO (operands[0]) == 0)
3749 {
3750 emit_insn (gen_andsi3 (operands[0], operands[0], const1_rtx));
3751 emit_insn (gen_rotlsi3_31 (operands[0], operands[0]));
3752 return;
3753 }
3754 }
3755 }
3756 else if (value == 0)
3757 {
3758 /* This can happen even when optimizing, if there were subregs before
3759 reload. Don't output a nop here, as this is never optimized away;
3760 use a no-op move instead. */
3761 emit_insn (gen_rtx_SET (operands[0], operands[0]));
3762 return;
3763 }
3764
3765 max = ashl_lshr_seq[value].insn_count;
3766 for (i = 0; i < max; i++)
3767 gen_ashift (code, ashl_lshr_seq[value].amount[i], operands[0]);
3768 }
3769
3770 /* Same as gen_shifty_op, but optimized for values where the topmost bits
3771 don't matter. */
3772 void
3773 gen_shifty_hi_op (int code, rtx *operands)
3774 {
3775 int value = INTVAL (operands[2]);
3776 int max, i;
3777 void (*gen_fun) (int, int, rtx);
3778
3779 /* This operation is used by and_shl for SImode values with a few
3780 high bits known to be cleared. */
3781 value &= 31;
3782 if (value == 0)
3783 {
3784 emit_insn (gen_nop ());
3785 return;
3786 }
3787
3788 gen_fun = GET_MODE (operands[0]) == HImode ? gen_ashift_hi : gen_ashift;
3789 if (code == ASHIFT)
3790 {
3791 max = ext_ashl_lshr_seq[value].insn_count;
3792 for (i = 0; i < max; i++)
3793 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3794 }
3795 else
3796 /* When shifting right, emit the shifts in reverse order, so that
3797 solitary negative values come first. */
3798 for (i = ext_ashl_lshr_seq[value].insn_count - 1; i >= 0; i--)
3799 gen_fun (code, ext_ashl_lshr_seq[value].amount[i], operands[0]);
3800 }
3801
3802 /* Output RTL for an arithmetic right shift.
3803 ??? Rewrite to use super-optimizer sequences. */
3804 bool
3805 expand_ashiftrt (rtx *operands)
3806 {
3807 rtx wrk;
3808 char func[18];
3809 int value;
3810
3811 if (TARGET_DYNSHIFT)
3812 {
3813 if (!CONST_INT_P (operands[2]))
3814 {
3815 rtx count = copy_to_mode_reg (SImode, operands[2]);
3816 emit_insn (gen_negsi2 (count, count));
3817 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3818 return true;
3819 }
3820 else if (ashiftrt_insns[INTVAL (operands[2]) & 31]
3821 > 1 + SH_DYNAMIC_SHIFT_COST)
3822 {
3823 rtx count
3824 = force_reg (SImode, GEN_INT (- (INTVAL (operands[2]) & 31)));
3825 emit_insn (gen_ashrsi3_d (operands[0], operands[1], count));
3826 return true;
3827 }
3828 }
3829 if (!CONST_INT_P (operands[2]))
3830 return false;
3831
3832 value = INTVAL (operands[2]) & 31;
3833
3834 if (value == 31)
3835 {
3836 /* If we are called from abs expansion, arrange things so that we
3837 we can use a single MT instruction that doesn't clobber the source,
3838 if LICM can hoist out the load of the constant zero. */
3839 if (currently_expanding_to_rtl)
3840 {
3841 emit_insn (gen_cmpgtsi_t (force_reg (SImode, CONST0_RTX (SImode)),
3842 operands[1]));
3843 emit_insn (gen_mov_neg_si_t (operands[0], get_t_reg_rtx ()));
3844 return true;
3845 }
3846 emit_insn (gen_ashrsi2_31 (operands[0], operands[1]));
3847 return true;
3848 }
3849 else if (value >= 16 && value <= 19)
3850 {
3851 wrk = gen_reg_rtx (SImode);
3852 emit_insn (gen_ashrsi2_16 (wrk, operands[1]));
3853 value -= 16;
3854 while (value--)
3855 gen_ashift (ASHIFTRT, 1, wrk);
3856 emit_move_insn (operands[0], wrk);
3857 return true;
3858 }
3859 /* Expand a short sequence inline, longer call a magic routine. */
3860 else if (value <= 5)
3861 {
3862 wrk = gen_reg_rtx (SImode);
3863 emit_move_insn (wrk, operands[1]);
3864 while (value--)
3865 gen_ashift (ASHIFTRT, 1, wrk);
3866 emit_move_insn (operands[0], wrk);
3867 return true;
3868 }
3869
3870 wrk = gen_reg_rtx (Pmode);
3871
3872 /* Load the value into an arg reg and call a helper. */
3873 emit_move_insn (gen_rtx_REG (SImode, 4), operands[1]);
3874 sprintf (func, "__ashiftrt_r4_%d", value);
3875 rtx lab = function_symbol (wrk, func, SFUNC_STATIC).lab;
3876 emit_insn (gen_ashrsi3_n (GEN_INT (value), wrk, lab));
3877 emit_move_insn (operands[0], gen_rtx_REG (SImode, 4));
3878 return true;
3879 }
3880
3881 /* Try to find a good way to implement the combiner pattern
3882 [(set (match_operand:SI 0 "register_operand" "r")
3883 (and:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
3884 (match_operand:SI 2 "const_int_operand" "n"))
3885 (match_operand:SI 3 "const_int_operand" "n"))) .
3886 LEFT_RTX is operand 2 in the above pattern, and MASK_RTX is operand 3.
3887 return 0 for simple right / left or left/right shift combination.
3888 return 1 for a combination of shifts with zero_extend.
3889 return 2 for a combination of shifts with an AND that needs r0.
3890 return 3 for a combination of shifts with an AND that needs an extra
3891 scratch register, when the three highmost bits of the AND mask are clear.
3892 return 4 for a combination of shifts with an AND that needs an extra
3893 scratch register, when any of the three highmost bits of the AND mask
3894 is set.
3895 If ATTRP is set, store an initial right shift width in ATTRP[0],
3896 and the instruction length in ATTRP[1] . These values are not valid
3897 when returning 0.
3898 When ATTRP is set and returning 1, ATTRP[2] gets set to the index into
3899 shift_amounts for the last shift value that is to be used before the
3900 sign extend. */
3901 int
3902 shl_and_kind (rtx left_rtx, rtx mask_rtx, int *attrp)
3903 {
3904 unsigned HOST_WIDE_INT mask, lsb, mask2, lsb2;
3905 int left = INTVAL (left_rtx), right;
3906 int best = 0;
3907 int cost, best_cost = 10000;
3908 int best_right = 0, best_len = 0;
3909 int i;
3910 int can_ext;
3911
3912 if (left < 0 || left > 31)
3913 return 0;
3914 if (CONST_INT_P (mask_rtx))
3915 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> left;
3916 else
3917 mask = (unsigned HOST_WIDE_INT) GET_MODE_MASK (SImode) >> left;
3918 /* Can this be expressed as a right shift / left shift pair? */
3919 lsb = ((mask ^ (mask - 1)) >> 1) + 1;
3920 right = exact_log2 (lsb);
3921 mask2 = ~(mask + lsb - 1);
3922 lsb2 = ((mask2 ^ (mask2 - 1)) >> 1) + 1;
3923 /* mask has no zeroes but trailing zeroes <==> ! mask2 */
3924 if (! mask2)
3925 best_cost = ashl_lshr_seq[right].insn_count
3926 + ashl_lshr_seq[right + left].insn_count;
3927 /* mask has no trailing zeroes <==> ! right */
3928 else if (! right && mask2 == ~(lsb2 - 1))
3929 {
3930 int late_right = exact_log2 (lsb2);
3931 best_cost = ashl_lshr_seq[left + late_right].insn_count
3932 + ashl_lshr_seq[late_right].insn_count;
3933 }
3934 /* Try to use zero extend. */
3935 if (mask2 == ~(lsb2 - 1))
3936 {
3937 int width, first;
3938
3939 for (width = 8; width <= 16; width += 8)
3940 {
3941 /* Can we zero-extend right away? */
3942 if (lsb2 == (unsigned HOST_WIDE_INT) 1 << width)
3943 {
3944 cost = 1 + ext_ashl_lshr_seq[right].insn_count
3945 + ext_ashl_lshr_seq[left + right].insn_count;
3946 if (cost < best_cost)
3947 {
3948 best = 1;
3949 best_cost = cost;
3950 best_right = right;
3951 best_len = cost;
3952 if (attrp)
3953 attrp[2] = -1;
3954 }
3955 continue;
3956 }
3957 /* ??? Could try to put zero extend into initial right shift,
3958 or even shift a bit left before the right shift. */
3959 /* Determine value of first part of left shift, to get to the
3960 zero extend cut-off point. */
3961 first = width - exact_log2 (lsb2) + right;
3962 if (first >= 0 && right + left - first >= 0)
3963 {
3964 cost = ext_ashl_lshr_seq[right].insn_count
3965 + ext_ashl_lshr_seq[first].insn_count + 1
3966 + ext_ashl_lshr_seq[right + left - first].insn_count;
3967
3968 if (cost < best_cost)
3969 {
3970 best = 1;
3971 best_cost = cost;
3972 best_right = right;
3973 best_len = cost;
3974 if (attrp)
3975 attrp[2] = first;
3976 }
3977 }
3978 }
3979 }
3980 /* Try to use r0 AND pattern */
3981 for (i = 0; i <= 2; i++)
3982 {
3983 if (i > right)
3984 break;
3985 if (! CONST_OK_FOR_K08 (mask >> i))
3986 continue;
3987 cost = (i != 0) + 2 + ext_ashl_lshr_seq[left + i].insn_count;
3988 if (cost < best_cost)
3989 {
3990 best = 2;
3991 best_cost = cost;
3992 best_right = i;
3993 best_len = cost - 1;
3994 }
3995 }
3996 /* Try to use a scratch register to hold the AND operand. */
3997 can_ext = ((mask << left) & ((unsigned HOST_WIDE_INT) 3 << 30)) == 0;
3998 for (i = 0; i <= 2; i++)
3999 {
4000 if (i > right)
4001 break;
4002 cost = (i != 0) + (CONST_OK_FOR_I08 (mask >> i) ? 2 : 3)
4003 + (can_ext
4004 ? ext_ashl_lshr_seq
4005 : ashl_lshr_seq)[left + i].insn_count;
4006 if (cost < best_cost)
4007 {
4008 best = 4 - can_ext;
4009 best_cost = cost;
4010 best_right = i;
4011 best_len = cost - 1 - ! CONST_OK_FOR_I08 (mask >> i);
4012 }
4013 }
4014
4015 if (attrp)
4016 {
4017 attrp[0] = best_right;
4018 attrp[1] = best_len;
4019 }
4020 return best;
4021 }
4022
4023 /* This is used in length attributes of the unnamed instructions
4024 corresponding to shl_and_kind return values of 1 and 2. */
4025 int
4026 shl_and_length (rtx insn)
4027 {
4028 rtx set_src, left_rtx, mask_rtx;
4029 int attributes[3];
4030
4031 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4032 left_rtx = XEXP (XEXP (set_src, 0), 1);
4033 mask_rtx = XEXP (set_src, 1);
4034 shl_and_kind (left_rtx, mask_rtx, attributes);
4035 return attributes[1];
4036 }
4037
4038 /* This is used in length attribute of the and_shl_scratch instruction. */
4039 int
4040 shl_and_scr_length (rtx insn)
4041 {
4042 rtx set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4043 int len = ashl_lshr_seq[INTVAL (XEXP (set_src, 1)) & 31].insn_count;
4044 rtx op = XEXP (set_src, 0);
4045 len += ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count + 1;
4046 op = XEXP (XEXP (op, 0), 0);
4047 return len + ashl_lshr_seq[INTVAL (XEXP (op, 1)) & 31].insn_count;
4048 }
4049
4050 /* Generate rtl for instructions for which shl_and_kind advised a particular
4051 method of generating them, i.e. returned zero. */
4052 bool
4053 gen_shl_and (rtx dest, rtx left_rtx, rtx mask_rtx, rtx source)
4054 {
4055 int attributes[3];
4056 unsigned HOST_WIDE_INT mask;
4057 int kind = shl_and_kind (left_rtx, mask_rtx, attributes);
4058 int right, total_shift;
4059 void (*shift_gen_fun) (int, rtx *) = gen_shifty_hi_op;
4060
4061 right = attributes[0];
4062 total_shift = INTVAL (left_rtx) + right;
4063 mask = (unsigned HOST_WIDE_INT) INTVAL (mask_rtx) >> total_shift;
4064 switch (kind)
4065 {
4066 default:
4067 return true;
4068 case 1:
4069 {
4070 int first = attributes[2];
4071 rtx operands[3];
4072
4073 if (first < 0)
4074 {
4075 emit_insn ((mask << right) <= 0xff
4076 ? gen_zero_extendqisi2 (dest,
4077 gen_lowpart (QImode, source))
4078 : gen_zero_extendhisi2 (dest,
4079 gen_lowpart (HImode, source)));
4080 source = dest;
4081 }
4082 if (source != dest)
4083 emit_insn (gen_movsi (dest, source));
4084 operands[0] = dest;
4085 if (right)
4086 {
4087 operands[2] = GEN_INT (right);
4088 gen_shifty_hi_op (LSHIFTRT, operands);
4089 }
4090 if (first > 0)
4091 {
4092 operands[2] = GEN_INT (first);
4093 gen_shifty_hi_op (ASHIFT, operands);
4094 total_shift -= first;
4095 mask <<= first;
4096 }
4097 if (first >= 0)
4098 emit_insn (mask <= 0xff
4099 ? gen_zero_extendqisi2 (dest, gen_lowpart (QImode, dest))
4100 : gen_zero_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4101 if (total_shift > 0)
4102 {
4103 operands[2] = GEN_INT (total_shift);
4104 gen_shifty_hi_op (ASHIFT, operands);
4105 }
4106 break;
4107 }
4108 case 4:
4109 shift_gen_fun = gen_shifty_op;
4110 /* FALLTHRU */
4111 case 3:
4112 /* If the topmost bit that matters is set, set the topmost bits
4113 that don't matter. This way, we might be able to get a shorter
4114 signed constant. */
4115 if (mask & ((HOST_WIDE_INT) 1 << (31 - total_shift)))
4116 mask |= (HOST_WIDE_INT) ((HOST_WIDE_INT_M1U) << (31 - total_shift));
4117 /* FALLTHRU */
4118 case 2:
4119 /* Don't expand fine-grained when combining, because that will
4120 make the pattern fail. */
4121 if (currently_expanding_to_rtl
4122 || reload_in_progress || reload_completed)
4123 {
4124 rtx operands[3];
4125
4126 /* Cases 3 and 4 should be handled by this split
4127 only while combining */
4128 gcc_assert (kind <= 2);
4129 if (right)
4130 {
4131 emit_insn (gen_lshrsi3 (dest, source, GEN_INT (right)));
4132 source = dest;
4133 }
4134 emit_insn (gen_andsi3 (dest, source, GEN_INT (mask)));
4135 if (total_shift)
4136 {
4137 operands[0] = dest;
4138 operands[1] = dest;
4139 operands[2] = GEN_INT (total_shift);
4140 shift_gen_fun (ASHIFT, operands);
4141 }
4142 break;
4143 }
4144 else
4145 {
4146 int neg = 0;
4147 if (kind != 4 && total_shift < 16)
4148 {
4149 neg = -ext_ashl_lshr_seq[total_shift].amount[1];
4150 if (neg > 0)
4151 neg -= ext_ashl_lshr_seq[total_shift].amount[2];
4152 else
4153 neg = 0;
4154 }
4155 emit_insn (gen_and_shl_scratch (dest, source,
4156 GEN_INT (right),
4157 GEN_INT (mask),
4158 GEN_INT (total_shift + neg),
4159 GEN_INT (neg)));
4160 emit_insn (gen_movsi (dest, dest));
4161 break;
4162 }
4163 }
4164 return false;
4165 }
4166
4167 /* Try to find a good way to implement the combiner pattern
4168 [(set (match_operand:SI 0 "register_operand" "=r")
4169 (sign_extract:SI (ashift:SI (match_operand:SI 1 "register_operand" "r")
4170 (match_operand:SI 2 "const_int_operand" "n")
4171 (match_operand:SI 3 "const_int_operand" "n")
4172 (const_int 0)))
4173 (clobber (reg:SI T_REG))]
4174 LEFT_RTX is operand 2 in the above pattern, and SIZE_RTX is operand 3.
4175 return 0 for simple left / right shift combination.
4176 return 1 for left shift / 8 bit sign extend / left shift.
4177 return 2 for left shift / 16 bit sign extend / left shift.
4178 return 3 for left shift / 8 bit sign extend / shift / sign extend.
4179 return 4 for left shift / 16 bit sign extend / shift / sign extend.
4180 return 5 for left shift / 16 bit sign extend / right shift
4181 return 6 for < 8 bit sign extend / left shift.
4182 return 7 for < 8 bit sign extend / left shift / single right shift.
4183 If COSTP is nonzero, assign the calculated cost to *COSTP. */
4184 int
4185 shl_sext_kind (rtx left_rtx, rtx size_rtx, int *costp)
4186 {
4187 int left, size, insize, ext;
4188 int cost = 0, best_cost;
4189 int kind;
4190
4191 left = INTVAL (left_rtx);
4192 size = INTVAL (size_rtx);
4193 insize = size - left;
4194 gcc_assert (insize > 0);
4195 /* Default to left / right shift. */
4196 kind = 0;
4197 best_cost = ashl_lshr_seq[32 - insize].insn_count
4198 + ashl_lshr_seq[32 - size].insn_count;
4199 if (size <= 16)
4200 {
4201 /* 16 bit shift / sign extend / 16 bit shift */
4202 cost = ashl_lshr_seq[16 - insize].insn_count + 1
4203 + ashl_lshr_seq[16 - size].insn_count;
4204 /* If ashiftrt_insns[16 - size] is 8, this choice will be overridden
4205 below, by alternative 3 or something even better. */
4206 if (cost < best_cost)
4207 {
4208 kind = 5;
4209 best_cost = cost;
4210 }
4211 }
4212 /* Try a plain sign extend between two shifts. */
4213 for (ext = 16; ext >= insize; ext -= 8)
4214 {
4215 if (ext <= size)
4216 {
4217 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4218 + ashl_lshr_seq[size - ext].insn_count;
4219 if (cost < best_cost)
4220 {
4221 kind = ext / (unsigned) 8;
4222 best_cost = cost;
4223 }
4224 }
4225 /* Check if we can do a sloppy shift with a final signed shift
4226 restoring the sign. */
4227 if (EXT_SHIFT_SIGNED (size - ext))
4228 cost = ext_ashl_lshr_seq[ext - insize].insn_count
4229 + ext_ashl_lshr_seq[size - ext].insn_count + 1;
4230 /* If not, maybe it's still cheaper to do the second shift sloppy,
4231 and do a final sign extend? */
4232 else if (size <= 16)
4233 cost = ext_ashl_lshr_seq[ext - insize].insn_count + 1
4234 + ext_ashl_lshr_seq[size > ext ? size - ext : ext - size].insn_count
4235 + 1;
4236 else
4237 continue;
4238 if (cost < best_cost)
4239 {
4240 kind = ext / (unsigned) 8 + 2;
4241 best_cost = cost;
4242 }
4243 }
4244 /* Check if we can sign extend in r0 */
4245 if (insize < 8)
4246 {
4247 cost = 3 + ashl_lshr_seq[left].insn_count;
4248 if (cost < best_cost)
4249 {
4250 kind = 6;
4251 best_cost = cost;
4252 }
4253 /* Try the same with a final signed shift. */
4254 if (left < 31)
4255 {
4256 cost = 3 + ext_ashl_lshr_seq[left + 1].insn_count + 1;
4257 if (cost < best_cost)
4258 {
4259 kind = 7;
4260 best_cost = cost;
4261 }
4262 }
4263 }
4264 if (TARGET_DYNSHIFT)
4265 {
4266 /* Try to use a dynamic shift. */
4267 cost = ashl_lshr_seq[32 - insize].insn_count + 1 + SH_DYNAMIC_SHIFT_COST;
4268 if (cost < best_cost)
4269 {
4270 kind = 0;
4271 best_cost = cost;
4272 }
4273 }
4274 if (costp)
4275 *costp = cost;
4276 return kind;
4277 }
4278
4279 /* Function to be used in the length attribute of the instructions
4280 implementing this pattern. */
4281 int
4282 shl_sext_length (rtx insn)
4283 {
4284 rtx set_src, left_rtx, size_rtx;
4285 int cost;
4286
4287 set_src = SET_SRC (XVECEXP (PATTERN (insn), 0, 0));
4288 left_rtx = XEXP (XEXP (set_src, 0), 1);
4289 size_rtx = XEXP (set_src, 1);
4290 shl_sext_kind (left_rtx, size_rtx, &cost);
4291 return cost;
4292 }
4293
4294 /* Generate rtl for this pattern */
4295 bool
4296 gen_shl_sext (rtx dest, rtx left_rtx, rtx size_rtx, rtx source)
4297 {
4298 int kind;
4299 int left, size, insize, cost;
4300 rtx operands[3];
4301
4302 kind = shl_sext_kind (left_rtx, size_rtx, &cost);
4303 left = INTVAL (left_rtx);
4304 size = INTVAL (size_rtx);
4305 insize = size - left;
4306 switch (kind)
4307 {
4308 case 1:
4309 case 2:
4310 case 3:
4311 case 4:
4312 {
4313 int ext = kind & 1 ? 8 : 16;
4314 int shift2 = size - ext;
4315
4316 /* Don't expand fine-grained when combining, because that will
4317 make the pattern fail. */
4318 if (! currently_expanding_to_rtl
4319 && ! reload_in_progress && ! reload_completed)
4320 {
4321 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4322 emit_insn (gen_movsi (dest, source));
4323 break;
4324 }
4325 if (dest != source)
4326 emit_insn (gen_movsi (dest, source));
4327 operands[0] = dest;
4328 if (ext - insize)
4329 {
4330 operands[2] = GEN_INT (ext - insize);
4331 gen_shifty_hi_op (ASHIFT, operands);
4332 }
4333 emit_insn (kind & 1
4334 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4335 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4336 if (kind <= 2)
4337 {
4338 if (shift2)
4339 {
4340 operands[2] = GEN_INT (shift2);
4341 gen_shifty_op (ASHIFT, operands);
4342 }
4343 }
4344 else
4345 {
4346 if (shift2 > 0)
4347 {
4348 if (EXT_SHIFT_SIGNED (shift2))
4349 {
4350 operands[2] = GEN_INT (shift2 + 1);
4351 gen_shifty_op (ASHIFT, operands);
4352 operands[2] = const1_rtx;
4353 gen_shifty_op (ASHIFTRT, operands);
4354 break;
4355 }
4356 operands[2] = GEN_INT (shift2);
4357 gen_shifty_hi_op (ASHIFT, operands);
4358 }
4359 else if (shift2)
4360 {
4361 operands[2] = GEN_INT (-shift2);
4362 gen_shifty_hi_op (LSHIFTRT, operands);
4363 }
4364 emit_insn (size <= 8
4365 ? gen_extendqisi2 (dest, gen_lowpart (QImode, dest))
4366 : gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4367 }
4368 break;
4369 }
4370 case 5:
4371 {
4372 int i = 16 - size;
4373 if (! currently_expanding_to_rtl
4374 && ! reload_in_progress && ! reload_completed)
4375 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4376 else
4377 {
4378 operands[0] = dest;
4379 operands[2] = GEN_INT (16 - insize);
4380 gen_shifty_hi_op (ASHIFT, operands);
4381 emit_insn (gen_extendhisi2 (dest, gen_lowpart (HImode, dest)));
4382 }
4383 /* Don't use gen_ashrsi3 because it generates new pseudos. */
4384 while (--i >= 0)
4385 gen_ashift (ASHIFTRT, 1, dest);
4386 break;
4387 }
4388 case 6:
4389 case 7:
4390 /* Don't expand fine-grained when combining, because that will
4391 make the pattern fail. */
4392 if (! currently_expanding_to_rtl
4393 && ! reload_in_progress && ! reload_completed)
4394 {
4395 emit_insn (gen_shl_sext_ext (dest, source, left_rtx, size_rtx));
4396 emit_insn (gen_movsi (dest, source));
4397 break;
4398 }
4399 emit_insn (gen_andsi3 (dest, source, GEN_INT ((1 << insize) - 1)));
4400 emit_insn (gen_xorsi3 (dest, dest, GEN_INT (1 << (insize - 1))));
4401 emit_insn (gen_addsi3 (dest, dest, GEN_INT (HOST_WIDE_INT_M1U << (insize - 1))));
4402 operands[0] = dest;
4403 operands[2] = kind == 7 ? GEN_INT (left + 1) : left_rtx;
4404 gen_shifty_op (ASHIFT, operands);
4405 if (kind == 7)
4406 emit_insn (gen_ashrsi3_k (dest, dest, const1_rtx));
4407 break;
4408 default:
4409 return true;
4410 }
4411 return false;
4412 }
4413
4414 typedef struct label_ref_list_d
4415 {
4416 rtx_code_label *label;
4417 struct label_ref_list_d *next;
4418 } *label_ref_list_t;
4419
4420 static object_allocator<label_ref_list_d> label_ref_list_d_pool
4421 ("label references list");
4422
4423 /* The SH cannot load a large constant into a register, constants have to
4424 come from a pc relative load. The reference of a pc relative load
4425 instruction must be less than 1k in front of the instruction. This
4426 means that we often have to dump a constant inside a function, and
4427 generate code to branch around it.
4428
4429 It is important to minimize this, since the branches will slow things
4430 down and make things bigger.
4431
4432 Worst case code looks like:
4433
4434 mov.l L1,rn
4435 bra L2
4436 nop
4437 align
4438 L1: .long value
4439 L2:
4440 ..
4441
4442 mov.l L3,rn
4443 bra L4
4444 nop
4445 align
4446 L3: .long value
4447 L4:
4448 ..
4449
4450 We fix this by performing a scan before scheduling, which notices which
4451 instructions need to have their operands fetched from the constant table
4452 and builds the table.
4453
4454 The algorithm is:
4455
4456 scan, find an instruction which needs a pcrel move. Look forward, find the
4457 last barrier which is within MAX_COUNT bytes of the requirement.
4458 If there isn't one, make one. Process all the instructions between
4459 the find and the barrier.
4460
4461 In the above example, we can tell that L3 is within 1k of L1, so
4462 the first move can be shrunk from the 3 insn+constant sequence into
4463 just 1 insn, and the constant moved to L3 to make:
4464
4465 mov.l L1,rn
4466 ..
4467 mov.l L3,rn
4468 bra L4
4469 nop
4470 align
4471 L3:.long value
4472 L4:.long value
4473
4474 Then the second move becomes the target for the shortening process. */
4475
4476 typedef struct
4477 {
4478 rtx value; /* Value in table. */
4479 rtx_code_label *label; /* Label of value. */
4480 label_ref_list_t wend; /* End of window. */
4481 machine_mode mode; /* Mode of value. */
4482
4483 /* True if this constant is accessed as part of a post-increment
4484 sequence. Note that HImode constants are never accessed in this way. */
4485 bool part_of_sequence_p;
4486 } pool_node;
4487
4488 /* The maximum number of constants that can fit into one pool, since
4489 constants in the range 0..510 are at least 2 bytes long, and in the
4490 range from there to 1018 at least 4 bytes. */
4491
4492 #define MAX_POOL_SIZE 372
4493 static pool_node pool_vector[MAX_POOL_SIZE];
4494 static int pool_size;
4495 static rtx_code_label *pool_window_label;
4496 static int pool_window_last;
4497
4498 static int max_labelno_before_reorg;
4499
4500 /* ??? If we need a constant in HImode which is the truncated value of a
4501 constant we need in SImode, we could combine the two entries thus saving
4502 two bytes. Is this common enough to be worth the effort of implementing
4503 it? */
4504
4505 /* ??? This stuff should be done at the same time that we shorten branches.
4506 As it is now, we must assume that all branches are the maximum size, and
4507 this causes us to almost always output constant pools sooner than
4508 necessary. */
4509
4510 /* Add a constant to the pool and return its label. */
4511 static rtx_code_label *
4512 add_constant (rtx x, machine_mode mode, rtx last_value)
4513 {
4514 rtx_code_label *lab, *new_rtx;
4515 label_ref_list_t ref, newref;
4516
4517 /* First see if we've already got it. */
4518 for (int i = 0; i < pool_size; i++)
4519 {
4520 if (x->code == pool_vector[i].value->code
4521 && mode == pool_vector[i].mode)
4522 {
4523 if (x->code == CODE_LABEL)
4524 {
4525 if (XINT (x, 3) != XINT (pool_vector[i].value, 3))
4526 continue;
4527 }
4528 if (rtx_equal_p (x, pool_vector[i].value))
4529 {
4530 lab = new_rtx = 0;
4531 if (! last_value
4532 || ! i
4533 || ! rtx_equal_p (last_value, pool_vector[i-1].value))
4534 {
4535 new_rtx = gen_label_rtx ();
4536 LABEL_REFS (new_rtx) = pool_vector[i].label;
4537 pool_vector[i].label = lab = new_rtx;
4538 }
4539 if (lab && pool_window_label)
4540 {
4541 newref = label_ref_list_d_pool.allocate ();
4542 newref->label = pool_window_label;
4543 ref = pool_vector[pool_window_last].wend;
4544 newref->next = ref;
4545 pool_vector[pool_window_last].wend = newref;
4546 }
4547 if (new_rtx)
4548 pool_window_label = new_rtx;
4549 pool_window_last = i;
4550 return lab;
4551 }
4552 }
4553 }
4554
4555 /* Need a new one. */
4556 pool_vector[pool_size].value = x;
4557 if (last_value && rtx_equal_p (last_value, pool_vector[pool_size - 1].value))
4558 {
4559 lab = 0;
4560 pool_vector[pool_size - 1].part_of_sequence_p = true;
4561 }
4562 else
4563 lab = gen_label_rtx ();
4564 pool_vector[pool_size].mode = mode;
4565 pool_vector[pool_size].label = lab;
4566 pool_vector[pool_size].wend = NULL;
4567 pool_vector[pool_size].part_of_sequence_p = (lab == 0);
4568 if (lab && pool_window_label)
4569 {
4570 newref = label_ref_list_d_pool.allocate ();
4571 newref->label = pool_window_label;
4572 ref = pool_vector[pool_window_last].wend;
4573 newref->next = ref;
4574 pool_vector[pool_window_last].wend = newref;
4575 }
4576 if (lab)
4577 pool_window_label = lab;
4578 pool_window_last = pool_size;
4579 pool_size++;
4580 return lab;
4581 }
4582
4583 /* Output the literal table. START, if nonzero, is the first instruction
4584 this table is needed for, and also indicates that there is at least one
4585 casesi_worker_2 instruction; We have to emit the operand3 labels from
4586 these insns at a 4-byte aligned position. BARRIER is the barrier
4587 after which we are to place the table. */
4588 static void
4589 dump_table (rtx_insn *start, rtx_insn *barrier)
4590 {
4591 rtx_insn *scan = barrier;
4592 bool need_align = true;
4593 rtx_code_label *lab;
4594 label_ref_list_t ref;
4595 bool have_df = false;
4596
4597 /* Do two passes, first time dump out the HI sized constants. */
4598
4599 for (int i = 0; i < pool_size; i++)
4600 {
4601 pool_node *p = &pool_vector[i];
4602
4603 if (p->mode == HImode)
4604 {
4605 if (need_align)
4606 {
4607 scan = emit_insn_after (gen_align_2 (), scan);
4608 need_align = false;
4609 }
4610 for (lab = p->label; lab;
4611 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4612 scan = emit_label_after (lab, scan);
4613 scan = emit_insn_after (gen_consttable_2 (p->value, const0_rtx),
4614 scan);
4615 for (ref = p->wend; ref; ref = ref->next)
4616 {
4617 lab = ref->label;
4618 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4619 }
4620 }
4621 else if (p->mode == DFmode)
4622 have_df = true;
4623 }
4624
4625 need_align = true;
4626
4627 if (start)
4628 {
4629 scan = emit_insn_after (gen_align_4 (), scan);
4630 need_align = false;
4631 for (; start != barrier; start = NEXT_INSN (start))
4632 if (NONJUMP_INSN_P (start)
4633 && recog_memoized (start) == CODE_FOR_casesi_worker_2)
4634 {
4635 rtx src = SET_SRC (XVECEXP (PATTERN (start), 0, 0));
4636 rtx lab = XEXP (XVECEXP (src, 0, 3), 0);
4637
4638 scan = emit_label_after (as_a <rtx_insn *> (lab), scan);
4639 }
4640 }
4641 if (TARGET_FMOVD && TARGET_ALIGN_DOUBLE && have_df)
4642 {
4643 rtx_insn *align_insn = NULL;
4644
4645 scan = emit_label_after (gen_label_rtx (), scan);
4646 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4647 need_align = false;
4648
4649 for (int i = 0; i < pool_size; i++)
4650 {
4651 pool_node *p = &pool_vector[i];
4652
4653 switch (p->mode)
4654 {
4655 case E_HImode:
4656 break;
4657 case E_SImode:
4658 case E_SFmode:
4659 if (align_insn && !p->part_of_sequence_p)
4660 {
4661 for (lab = p->label; lab;
4662 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4663 emit_label_before (lab, align_insn);
4664 emit_insn_before (gen_consttable_4 (p->value, const0_rtx),
4665 align_insn);
4666 for (ref = p->wend; ref; ref = ref->next)
4667 {
4668 lab = ref->label;
4669 emit_insn_before (gen_consttable_window_end (lab),
4670 align_insn);
4671 }
4672 delete_insn (align_insn);
4673 align_insn = NULL;
4674 continue;
4675 }
4676 else
4677 {
4678 for (lab = p->label; lab;
4679 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4680 scan = emit_label_after (lab, scan);
4681 scan = emit_insn_after (gen_consttable_4 (p->value,
4682 const0_rtx), scan);
4683 need_align = ! need_align;
4684 }
4685 break;
4686 case E_DFmode:
4687 if (need_align)
4688 {
4689 scan = emit_insn_after (gen_align_log (GEN_INT (3)), scan);
4690 align_insn = scan;
4691 need_align = false;
4692 }
4693 /* FALLTHRU */
4694 case E_DImode:
4695 for (lab = p->label; lab;
4696 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4697 scan = emit_label_after (lab, scan);
4698 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4699 scan);
4700 break;
4701 default:
4702 gcc_unreachable ();
4703 }
4704
4705 if (p->mode != HImode)
4706 {
4707 for (ref = p->wend; ref; ref = ref->next)
4708 {
4709 lab = ref->label;
4710 scan = emit_insn_after (gen_consttable_window_end (lab),
4711 scan);
4712 }
4713 }
4714 }
4715
4716 pool_size = 0;
4717 }
4718
4719 for (int i = 0; i < pool_size; i++)
4720 {
4721 pool_node *p = &pool_vector[i];
4722
4723 switch (p->mode)
4724 {
4725 case E_HImode:
4726 break;
4727 case E_SImode:
4728 case E_SFmode:
4729 if (need_align)
4730 {
4731 need_align = false;
4732 scan = emit_label_after (gen_label_rtx (), scan);
4733 scan = emit_insn_after (gen_align_4 (), scan);
4734 }
4735 for (lab = p->label; lab;
4736 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4737 scan = emit_label_after (lab, scan);
4738 scan = emit_insn_after (gen_consttable_4 (p->value, const0_rtx),
4739 scan);
4740 break;
4741 case E_DFmode:
4742 case E_DImode:
4743 if (need_align)
4744 {
4745 need_align = false;
4746 scan = emit_label_after (gen_label_rtx (), scan);
4747 scan = emit_insn_after (gen_align_4 (), scan);
4748 }
4749 for (lab = p->label; lab;
4750 lab = safe_as_a <rtx_code_label *> (LABEL_REFS (lab)))
4751 scan = emit_label_after (lab, scan);
4752 scan = emit_insn_after (gen_consttable_8 (p->value, const0_rtx),
4753 scan);
4754 break;
4755 default:
4756 gcc_unreachable ();
4757 }
4758
4759 if (p->mode != HImode)
4760 {
4761 for (ref = p->wend; ref; ref = ref->next)
4762 {
4763 lab = ref->label;
4764 scan = emit_insn_after (gen_consttable_window_end (lab), scan);
4765 }
4766 }
4767 }
4768
4769 scan = emit_insn_after (gen_consttable_end (), scan);
4770 scan = emit_barrier_after (scan);
4771 pool_size = 0;
4772 pool_window_label = NULL;
4773 pool_window_last = 0;
4774 }
4775
4776 #define MOVA_LABELREF(mova) XVECEXP (SET_SRC (PATTERN (mova)), 0, 0)
4777
4778 /* Nonzero if the insn is a move instruction which needs to be fixed. */
4779
4780 /* ??? For a DImode/DFmode moves, we don't need to fix it if each half of the
4781 CONST_DOUBLE input value is CONST_OK_FOR_I08. For a SFmode move, we don't
4782 need to fix it if the input value is CONST_OK_FOR_I08. */
4783 static bool
4784 broken_move (rtx_insn *insn)
4785 {
4786 if (NONJUMP_INSN_P (insn))
4787 {
4788 rtx pat = PATTERN (insn);
4789 if (GET_CODE (pat) == PARALLEL)
4790 pat = XVECEXP (pat, 0, 0);
4791 if (GET_CODE (pat) == SET
4792 /* We can load any 8-bit value if we don't care what the high
4793 order bits end up as. */
4794 && GET_MODE (SET_DEST (pat)) != QImode
4795 && (CONSTANT_P (SET_SRC (pat))
4796 || (GET_CODE (SET_SRC (pat)) == UNSPEC_VOLATILE
4797 && XINT (SET_SRC (pat), 1) == UNSPECV_SP_SWITCH_B)
4798 /* Match mova_const. */
4799 || (GET_CODE (SET_SRC (pat)) == UNSPEC
4800 && XINT (SET_SRC (pat), 1) == UNSPEC_MOVA
4801 && GET_CODE (XVECEXP (SET_SRC (pat), 0, 0)) == CONST))
4802 && ! (TARGET_SH2E
4803 && GET_CODE (SET_SRC (pat)) == CONST_DOUBLE
4804 && (fp_zero_operand (SET_SRC (pat))
4805 || fp_one_operand (SET_SRC (pat)))
4806 /* In general we don't know the current setting of fpscr, so
4807 disable fldi.
4808 There is an exception if this was a register-register move
4809 before reload - and hence it was ascertained that we have
4810 single precision setting - and in a post-reload optimization
4811 we changed this to do a constant load. In that case
4812 we don't have an r0 clobber, hence we must use fldi. */
4813 && (TARGET_FMOVD
4814 || (GET_CODE (XEXP (XVECEXP (PATTERN (insn), 0, 2), 0))
4815 == SCRATCH))
4816 && REG_P (SET_DEST (pat))
4817 && FP_REGISTER_P (REGNO (SET_DEST (pat))))
4818 && ! (TARGET_SH2A
4819 && GET_MODE (SET_DEST (pat)) == SImode
4820 && (satisfies_constraint_I20 (SET_SRC (pat))
4821 || satisfies_constraint_I28 (SET_SRC (pat))))
4822 && ! satisfies_constraint_I08 (SET_SRC (pat)))
4823 return true;
4824 }
4825
4826 return false;
4827 }
4828
4829 /* Return true if the specified insn is a mova insn. */
4830 static bool
4831 mova_p (rtx_insn *insn)
4832 {
4833 return (NONJUMP_INSN_P (insn)
4834 && GET_CODE (PATTERN (insn)) == SET
4835 && GET_CODE (SET_SRC (PATTERN (insn))) == UNSPEC
4836 && XINT (SET_SRC (PATTERN (insn)), 1) == UNSPEC_MOVA
4837 /* Don't match mova_const. */
4838 && GET_CODE (MOVA_LABELREF (insn)) == LABEL_REF);
4839 }
4840
4841 /* Fix up a mova from a switch that went out of range. */
4842 static void
4843 fixup_mova (rtx_insn *mova)
4844 {
4845 PUT_MODE (XEXP (MOVA_LABELREF (mova), 0), QImode);
4846 if (! flag_pic)
4847 {
4848 SET_SRC (PATTERN (mova)) = MOVA_LABELREF (mova);
4849 INSN_CODE (mova) = -1;
4850 }
4851 else
4852 {
4853 rtx_insn *worker = mova;
4854 rtx_code_label *lab = gen_label_rtx ();
4855 rtx wpat, wpat0, wpat1, wsrc, target, base, diff;
4856
4857 do
4858 {
4859 worker = NEXT_INSN (worker);
4860 gcc_assert (worker
4861 && !LABEL_P (worker)
4862 && !JUMP_P (worker));
4863 } while (NOTE_P (worker)
4864 || recog_memoized (worker) != CODE_FOR_casesi_worker_1);
4865 wpat = PATTERN (worker);
4866 wpat0 = XVECEXP (wpat, 0, 0);
4867 wpat1 = XVECEXP (wpat, 0, 1);
4868 wsrc = SET_SRC (wpat0);
4869 PATTERN (worker) = (gen_casesi_worker_2
4870 (SET_DEST (wpat0), XVECEXP (wsrc, 0, 1),
4871 XEXP (XVECEXP (wsrc, 0, 2), 0), lab,
4872 XEXP (wpat1, 0)));
4873 INSN_CODE (worker) = -1;
4874 target = XVECEXP (SET_SRC (PATTERN (mova)), 0, 0);
4875 base = gen_rtx_LABEL_REF (Pmode, lab);
4876 diff = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, target, base), UNSPEC_SYMOFF);
4877 SET_SRC (PATTERN (mova)) = gen_rtx_CONST (Pmode, diff);
4878 INSN_CODE (mova) = -1;
4879 }
4880 }
4881
4882 /* NEW_MOVA is a mova we've just encountered while scanning forward. Update
4883 *num_mova, and check if the new mova is not nested within the first one.
4884 return 0 if *first_mova was replaced, 1 if new_mova was replaced,
4885 2 if new_mova has been assigned to *first_mova, -1 otherwise.. */
4886 static int
4887 untangle_mova (int *num_mova, rtx_insn **first_mova, rtx_insn *new_mova)
4888 {
4889 int n_addr = 0; /* Initialization to shut up spurious warning. */
4890 int f_target, n_target = 0; /* Likewise. */
4891
4892 if (optimize)
4893 {
4894 /* If NEW_MOVA has no address yet, it will be handled later. */
4895 if (INSN_ADDRESSES_SIZE() <= (unsigned) INSN_UID (new_mova))
4896 return -1;
4897
4898 n_addr = INSN_ADDRESSES (INSN_UID (new_mova));
4899 n_target = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (new_mova), 0)));
4900 if (n_addr > n_target || n_addr + 1022 < n_target)
4901 {
4902 /* Change the mova into a load.
4903 broken_move will then return true for it. */
4904 fixup_mova (new_mova);
4905 return 1;
4906 }
4907 }
4908 if (!(*num_mova)++)
4909 {
4910 *first_mova = new_mova;
4911 return 2;
4912 }
4913 if (!optimize
4914 || ((f_target
4915 = INSN_ADDRESSES (INSN_UID (XEXP (MOVA_LABELREF (*first_mova), 0))))
4916 >= n_target))
4917 return -1;
4918
4919 (*num_mova)--;
4920 if (f_target - INSN_ADDRESSES (INSN_UID (*first_mova))
4921 > n_target - n_addr)
4922 {
4923 fixup_mova (*first_mova);
4924 return 0;
4925 }
4926 else
4927 {
4928 fixup_mova (new_mova);
4929 return 1;
4930 }
4931 }
4932
4933 /* Find the last barrier from insn FROM which is close enough to hold the
4934 constant pool. If we can't find one, then create one near the end of
4935 the range. */
4936 static rtx_insn *
4937 find_barrier (int num_mova, rtx_insn *mova, rtx_insn *from)
4938 {
4939 int count_si = 0;
4940 int count_hi = 0;
4941 int found_hi = 0;
4942 int found_si = 0;
4943 int hi_align = 2;
4944 int si_align = 2;
4945 int leading_mova = num_mova;
4946 rtx_insn *barrier_before_mova = NULL;
4947 rtx_insn *found_barrier = NULL;
4948 rtx_insn *good_barrier = NULL;
4949 int si_limit;
4950 int hi_limit;
4951 rtx_insn *orig = from;
4952 rtx_insn *last_got = NULL;
4953 rtx_insn *last_symoff = NULL;
4954
4955 /* For HImode: range is 510, add 4 because pc counts from address of
4956 second instruction after this one, subtract 2 for the jump instruction
4957 that we may need to emit before the table, subtract 2 for the instruction
4958 that fills the jump delay slot (in very rare cases, reorg will take an
4959 instruction from after the constant pool or will leave the delay slot
4960 empty). This gives 510.
4961 For SImode: range is 1020, add 4 because pc counts from address of
4962 second instruction after this one, subtract 2 in case pc is 2 byte
4963 aligned, subtract 2 for the jump instruction that we may need to emit
4964 before the table, subtract 2 for the instruction that fills the jump
4965 delay slot. This gives 1018. */
4966
4967 /* The branch will always be shortened now that the reference address for
4968 forward branches is the successor address, thus we need no longer make
4969 adjustments to the [sh]i_limit for -O0. */
4970
4971 si_limit = 1018;
4972 hi_limit = 510;
4973
4974 while (from && count_si < si_limit && count_hi < hi_limit)
4975 {
4976 int inc = get_attr_length (from);
4977 int new_align = 1;
4978
4979 /* If this is a label that existed at the time of the compute_alignments
4980 call, determine the alignment. N.B. When find_barrier recurses for
4981 an out-of-reach mova, we might see labels at the start of previously
4982 inserted constant tables. */
4983 if (LABEL_P (from)
4984 && CODE_LABEL_NUMBER (from) <= max_labelno_before_reorg)
4985 {
4986 if (optimize)
4987 new_align = 1 << label_to_alignment (from).levels[0].log;
4988 else if (BARRIER_P (prev_nonnote_insn (from)))
4989 new_align = 1 << barrier_align (from);
4990 else
4991 new_align = 1;
4992 inc = 0;
4993 }
4994 /* In case we are scanning a constant table because of recursion, check
4995 for explicit alignments. If the table is long, we might be forced
4996 to emit the new table in front of it; the length of the alignment
4997 might be the last straw. */
4998 else if (NONJUMP_INSN_P (from)
4999 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5000 && XINT (PATTERN (from), 1) == UNSPECV_ALIGN)
5001 new_align = INTVAL (XVECEXP (PATTERN (from), 0, 0));
5002 /* When we find the end of a constant table, paste the new constant
5003 at the end. That is better than putting it in front because
5004 this way, we don't need extra alignment for adding a 4-byte-aligned
5005 mov(a) label to a 2/4 or 8/4 byte aligned table. */
5006 else if (NONJUMP_INSN_P (from)
5007 && GET_CODE (PATTERN (from)) == UNSPEC_VOLATILE
5008 && XINT (PATTERN (from), 1) == UNSPECV_CONST_END)
5009 return from;
5010
5011 if (BARRIER_P (from))
5012 {
5013 rtx_insn *next;
5014
5015 found_barrier = from;
5016
5017 /* If we are at the end of the function, or in front of an alignment
5018 instruction, we need not insert an extra alignment. We prefer
5019 this kind of barrier. */
5020 if (barrier_align (from) > 2)
5021 good_barrier = from;
5022
5023 /* If we are at the end of a hot/cold block, dump the constants
5024 here. */
5025 next = NEXT_INSN (from);
5026 if (next
5027 && NOTE_P (next)
5028 && NOTE_KIND (next) == NOTE_INSN_SWITCH_TEXT_SECTIONS)
5029 break;
5030 }
5031
5032 if (broken_move (from))
5033 {
5034 rtx pat, src, dst;
5035 machine_mode mode;
5036
5037 pat = PATTERN (from);
5038 if (GET_CODE (pat) == PARALLEL)
5039 pat = XVECEXP (pat, 0, 0);
5040 src = SET_SRC (pat);
5041 dst = SET_DEST (pat);
5042 mode = GET_MODE (dst);
5043
5044 /* GOT pcrelat setting comes in pair of
5045 mova .L8,r0
5046 mov.l .L8,r12
5047 instructions. (plus add r0,r12).
5048 Remember if we see one without the other. */
5049 if (GET_CODE (src) == UNSPEC && PIC_ADDR_P (XVECEXP (src, 0, 0)))
5050 last_got = last_got ? NULL : from;
5051 else if (PIC_ADDR_P (src))
5052 last_got = last_got ? NULL : from;
5053
5054 /* We must explicitly check the mode, because sometimes the
5055 front end will generate code to load unsigned constants into
5056 HImode targets without properly sign extending them. */
5057 if (mode == HImode
5058 || (mode == SImode && satisfies_constraint_I16 (src)
5059 && REGNO (dst) != FPUL_REG))
5060 {
5061 found_hi += 2;
5062 /* We put the short constants before the long constants, so
5063 we must count the length of short constants in the range
5064 for the long constants. */
5065 /* ??? This isn't optimal, but is easy to do. */
5066 si_limit -= 2;
5067 }
5068 else
5069 {
5070 /* We dump DF/DI constants before SF/SI ones, because
5071 the limit is the same, but the alignment requirements
5072 are higher. We may waste up to 4 additional bytes
5073 for alignment, and the DF/DI constant may have
5074 another SF/SI constant placed before it. */
5075 while (si_align > 2 && found_si + si_align - 2 > count_si)
5076 si_align >>= 1;
5077 if (found_si > count_si)
5078 count_si = found_si;
5079 found_si += GET_MODE_SIZE (mode);
5080 if (num_mova)
5081 si_limit -= GET_MODE_SIZE (mode);
5082 }
5083 }
5084
5085 if (mova_p (from))
5086 {
5087 switch (untangle_mova (&num_mova, &mova, from))
5088 {
5089 case 1:
5090 if (flag_pic)
5091 {
5092 rtx src = SET_SRC (PATTERN (from));
5093 if (GET_CODE (src) == CONST
5094 && GET_CODE (XEXP (src, 0)) == UNSPEC
5095 && XINT (XEXP (src, 0), 1) == UNSPEC_SYMOFF)
5096 last_symoff = from;
5097 }
5098 break;
5099 case 0: return find_barrier (0, 0, mova);
5100 case 2:
5101 {
5102 leading_mova = 0;
5103 barrier_before_mova
5104 = good_barrier ? good_barrier : found_barrier;
5105 }
5106 default: break;
5107 }
5108 if (found_si > count_si)
5109 count_si = found_si;
5110 }
5111 else if (JUMP_TABLE_DATA_P (from)
5112 && GET_CODE (PATTERN (from)) == ADDR_DIFF_VEC)
5113 {
5114 if ((num_mova > 1 && GET_MODE (prev_nonnote_insn (from)) == VOIDmode)
5115 || (num_mova
5116 && (prev_nonnote_insn (from)
5117 == XEXP (MOVA_LABELREF (mova), 0))))
5118 num_mova--;
5119 if (barrier_align (next_real_insn (from)) == align_jumps.levels[0].log)
5120 {
5121 /* We have just passed the barrier in front of the
5122 ADDR_DIFF_VEC, which is stored in found_barrier. Since
5123 the ADDR_DIFF_VEC is accessed as data, just like our pool
5124 constants, this is a good opportunity to accommodate what
5125 we have gathered so far.
5126 If we waited any longer, we could end up at a barrier in
5127 front of code, which gives worse cache usage for separated
5128 instruction / data caches. */
5129 good_barrier = found_barrier;
5130 break;
5131 }
5132 else
5133 {
5134 rtx body = PATTERN (from);
5135 inc = XVECLEN (body, 1) * GET_MODE_SIZE (GET_MODE (body));
5136 }
5137 }
5138 /* For the SH1, we generate alignments even after jumps-around-jumps. */
5139 else if (JUMP_P (from)
5140 && ! TARGET_SH2
5141 && ! optimize_size)
5142 new_align = 4;
5143
5144 /* There is a possibility that a bf is transformed into a bf/s by the
5145 delay slot scheduler. */
5146 if (JUMP_P (from)
5147 && get_attr_type (from) == TYPE_CBRANCH
5148 && ! sequence_insn_p (from))
5149 inc += 2;
5150
5151 if (found_si)
5152 {
5153 count_si += inc;
5154 if (new_align > si_align)
5155 {
5156 si_limit -= (count_si - 1) & (new_align - si_align);
5157 si_align = new_align;
5158 }
5159 count_si = (count_si + new_align - 1) & -new_align;
5160 }
5161 if (found_hi)
5162 {
5163 count_hi += inc;
5164 if (new_align > hi_align)
5165 {
5166 hi_limit -= (count_hi - 1) & (new_align - hi_align);
5167 hi_align = new_align;
5168 }
5169 count_hi = (count_hi + new_align - 1) & -new_align;
5170 }
5171 from = NEXT_INSN (from);
5172 }
5173
5174 if (num_mova)
5175 {
5176 if (leading_mova)
5177 {
5178 /* Try as we might, the leading mova is out of range. Change
5179 it into a load (which will become a pcload) and retry. */
5180 fixup_mova (mova);
5181 return find_barrier (0, 0, mova);
5182 }
5183 else
5184 {
5185 /* Insert the constant pool table before the mova instruction,
5186 to prevent the mova label reference from going out of range. */
5187 from = mova;
5188 good_barrier = found_barrier = barrier_before_mova;
5189 }
5190 }
5191
5192 if (found_barrier)
5193 {
5194 if (good_barrier && next_real_insn (found_barrier))
5195 found_barrier = good_barrier;
5196 }
5197 else
5198 {
5199 /* We didn't find a barrier in time to dump our stuff,
5200 so we'll make one. */
5201 rtx_code_label *label = gen_label_rtx ();
5202
5203 /* Don't emit a constant table in the middle of insns for
5204 casesi_worker_2. This is a bit overkill but is enough
5205 because casesi_worker_2 wouldn't appear so frequently. */
5206 if (last_symoff)
5207 from = last_symoff;
5208
5209 /* If we exceeded the range, then we must back up over the last
5210 instruction we looked at. Otherwise, we just need to undo the
5211 NEXT_INSN at the end of the loop. */
5212 if (PREV_INSN (from) != orig
5213 && (count_hi > hi_limit || count_si > si_limit))
5214 from = PREV_INSN (PREV_INSN (from));
5215 else
5216 from = PREV_INSN (from);
5217
5218 /* Don't emit a constant table int the middle of global pointer setting,
5219 since that that would move the addressing base GOT into another table.
5220 We need the first mov instruction before the _GLOBAL_OFFSET_TABLE_
5221 in the pool anyway, so just move up the whole constant pool.
5222
5223 However, avoid doing so when the last single GOT mov is the starting
5224 insn itself. Going past above the start insn would create a negative
5225 offset, causing errors. */
5226 if (last_got && last_got != orig)
5227 from = PREV_INSN (last_got);
5228
5229 /* Don't insert the constant pool table at the position which
5230 may be the landing pad. */
5231 if (flag_exceptions
5232 && CALL_P (from)
5233 && find_reg_note (from, REG_EH_REGION, NULL_RTX))
5234 from = PREV_INSN (from);
5235
5236 /* Walk back to be just before any jump or label.
5237 Putting it before a label reduces the number of times the branch
5238 around the constant pool table will be hit. Putting it before
5239 a jump makes it more likely that the bra delay slot will be
5240 filled. */
5241 while (NOTE_P (from) || JUMP_P (from) || LABEL_P (from))
5242 from = PREV_INSN (from);
5243
5244 if (CALL_P (from))
5245 {
5246 bool sibcall_p = SIBLING_CALL_P (from);
5247
5248 /* If FROM was a sibling call, then we know that control
5249 will not return. In fact, we were guaranteed to hit
5250 a barrier before another real insn.
5251
5252 The jump around the constant pool is unnecessary. It
5253 costs space, but more importantly it confuses dwarf2cfi
5254 generation. */
5255 if (sibcall_p)
5256 return emit_barrier_after (from);
5257 }
5258
5259 from = emit_jump_insn_after (gen_jump (label), from);
5260 JUMP_LABEL (from) = label;
5261 LABEL_NUSES (label) = 1;
5262 found_barrier = emit_barrier_after (from);
5263 emit_label_after (label, found_barrier);
5264 }
5265
5266 return found_barrier;
5267 }
5268
5269 /* If the instruction INSN is implemented by a special function, and we can
5270 positively find the register that is used to call the sfunc, and this
5271 register is not used anywhere else in this instruction - except as the
5272 destination of a set, return this register; else, return 0. */
5273 rtx
5274 sfunc_uses_reg (rtx_insn *insn)
5275 {
5276 int i;
5277 rtx pattern, part, reg_part, reg;
5278
5279 if (!NONJUMP_INSN_P (insn))
5280 return NULL_RTX;
5281 pattern = PATTERN (insn);
5282 if (GET_CODE (pattern) != PARALLEL || get_attr_type (insn) != TYPE_SFUNC)
5283 return NULL_RTX;
5284
5285 for (reg_part = NULL_RTX, i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5286 {
5287 part = XVECEXP (pattern, 0, i);
5288 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == SImode)
5289 reg_part = part;
5290 }
5291 if (! reg_part)
5292 return NULL_RTX;
5293 reg = XEXP (reg_part, 0);
5294 for (int i = XVECLEN (pattern, 0) - 1; i >= 0; i--)
5295 {
5296 part = XVECEXP (pattern, 0, i);
5297 if (part == reg_part || GET_CODE (part) == CLOBBER)
5298 continue;
5299 if (reg_mentioned_p (reg, ((GET_CODE (part) == SET
5300 && REG_P (SET_DEST (part)))
5301 ? SET_SRC (part) : part)))
5302 return NULL_RTX;
5303 }
5304 return reg;
5305 }
5306
5307 /* See if the only way in which INSN uses REG is by calling it, or by
5308 setting it while calling it. Set *SET to a SET rtx if the register
5309 is set by INSN. */
5310 static bool
5311 noncall_uses_reg (rtx reg, rtx_insn *insn, rtx *set)
5312 {
5313 *set = NULL_RTX;
5314
5315 rtx reg2 = sfunc_uses_reg (insn);
5316 if (reg2 && REGNO (reg2) == REGNO (reg))
5317 {
5318 rtx pattern = single_set (insn);
5319 if (pattern
5320 && REG_P (SET_DEST (pattern))
5321 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5322 *set = pattern;
5323 return false;
5324 }
5325 if (!CALL_P (insn))
5326 {
5327 /* We don't use rtx_equal_p because we don't care if the mode is
5328 different. */
5329 rtx pattern = single_set (insn);
5330 if (pattern
5331 && REG_P (SET_DEST (pattern))
5332 && REGNO (reg) == REGNO (SET_DEST (pattern)))
5333 {
5334 rtx par, part;
5335 int i;
5336
5337 *set = pattern;
5338 par = PATTERN (insn);
5339 if (GET_CODE (par) == PARALLEL)
5340 for (i = XVECLEN (par, 0) - 1; i >= 0; i--)
5341 {
5342 part = XVECEXP (par, 0, i);
5343 if (GET_CODE (part) != SET && reg_mentioned_p (reg, part))
5344 return true;
5345 }
5346 return reg_mentioned_p (reg, SET_SRC (pattern));
5347 }
5348
5349 return true;
5350 }
5351
5352 rtx pattern = PATTERN (insn);
5353
5354 if (GET_CODE (pattern) == PARALLEL)
5355 {
5356 for (int i = XVECLEN (pattern, 0) - 1; i >= 1; i--)
5357 if (reg_mentioned_p (reg, XVECEXP (pattern, 0, i)))
5358 return true;
5359 pattern = XVECEXP (pattern, 0, 0);
5360 }
5361
5362 if (GET_CODE (pattern) == SET)
5363 {
5364 if (reg_mentioned_p (reg, SET_DEST (pattern)))
5365 {
5366 /* We don't use rtx_equal_p, because we don't care if the
5367 mode is different. */
5368 if (!REG_P (SET_DEST (pattern))
5369 || REGNO (reg) != REGNO (SET_DEST (pattern)))
5370 return true;
5371
5372 *set = pattern;
5373 }
5374
5375 pattern = SET_SRC (pattern);
5376 }
5377
5378 if (GET_CODE (pattern) != CALL
5379 || !MEM_P (XEXP (pattern, 0))
5380 || ! rtx_equal_p (reg, XEXP (XEXP (pattern, 0), 0)))
5381 return true;
5382
5383 return false;
5384 }
5385
5386 /* Given a X, a pattern of an insn or a part of it, return a mask of used
5387 general registers. Bits 0..15 mean that the respective registers
5388 are used as inputs in the instruction. Bits 16..31 mean that the
5389 registers 0..15, respectively, are used as outputs, or are clobbered.
5390 IS_DEST should be set to 16 if X is the destination of a SET, else to 0. */
5391 int
5392 regs_used (rtx x, int is_dest)
5393 {
5394 enum rtx_code code;
5395 const char *fmt;
5396 int used = 0;
5397
5398 if (! x)
5399 return used;
5400 code = GET_CODE (x);
5401 switch (code)
5402 {
5403 case REG:
5404 if (REGNO (x) < 16)
5405 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5406 << (REGNO (x) + is_dest));
5407 return 0;
5408 case SUBREG:
5409 {
5410 rtx y = SUBREG_REG (x);
5411
5412 if (!REG_P (y))
5413 break;
5414 if (REGNO (y) < 16)
5415 return (((1 << hard_regno_nregs (0, GET_MODE (x))) - 1)
5416 << (REGNO (y) +
5417 subreg_regno_offset (REGNO (y),
5418 GET_MODE (y),
5419 SUBREG_BYTE (x),
5420 GET_MODE (x)) + is_dest));
5421 return 0;
5422 }
5423 case SET:
5424 return regs_used (SET_SRC (x), 0) | regs_used (SET_DEST (x), 16);
5425 case RETURN:
5426 /* If there was a return value, it must have been indicated with USE. */
5427 return 0x00ffff00;
5428 case CLOBBER:
5429 is_dest = 1;
5430 break;
5431 case MEM:
5432 is_dest = 0;
5433 break;
5434 case CALL:
5435 used |= 0x00ff00f0;
5436 break;
5437 default:
5438 break;
5439 }
5440
5441 fmt = GET_RTX_FORMAT (code);
5442
5443 for (int i = GET_RTX_LENGTH (code) - 1; i >= 0; i--)
5444 {
5445 if (fmt[i] == 'E')
5446 {
5447 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
5448 used |= regs_used (XVECEXP (x, i, j), is_dest);
5449 }
5450 else if (fmt[i] == 'e')
5451 used |= regs_used (XEXP (x, i), is_dest);
5452 }
5453 return used;
5454 }
5455
5456 /* Create an instruction that prevents redirection of a conditional branch
5457 to the destination of the JUMP with address ADDR.
5458 If the branch needs to be implemented as an indirect jump, try to find
5459 a scratch register for it.
5460 If NEED_BLOCK is 0, don't do anything unless we need a scratch register.
5461 If any preceding insn that doesn't fit into a delay slot is good enough,
5462 pass 1. Pass 2 if a definite blocking insn is needed.
5463 -1 is used internally to avoid deep recursion.
5464 If a blocking instruction is made or recognized, return it. */
5465 static rtx_insn *
5466 gen_block_redirect (rtx_insn *jump, int addr, int need_block)
5467 {
5468 int dead = 0;
5469 rtx_insn *prev = prev_nonnote_insn (jump);
5470
5471 /* First, check if we already have an instruction that satisfies our need. */
5472 if (prev && NONJUMP_INSN_P (prev) && ! prev->deleted ())
5473 {
5474 if (INSN_CODE (prev) == CODE_FOR_indirect_jump_scratch)
5475 return prev;
5476 if (GET_CODE (PATTERN (prev)) == USE
5477 || GET_CODE (PATTERN (prev)) == CLOBBER
5478 || get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5479 prev = jump;
5480 else if ((need_block &= ~1) < 0)
5481 return prev;
5482 else if (recog_memoized (prev) == CODE_FOR_block_branch_redirect)
5483 need_block = 0;
5484 }
5485 if (GET_CODE (PATTERN (jump)) == RETURN)
5486 {
5487 if (! need_block)
5488 return prev;
5489 /* Reorg even does nasty things with return insns that cause branches
5490 to go out of range - see find_end_label and callers. */
5491 return emit_insn_before (gen_block_branch_redirect (const0_rtx) , jump);
5492 }
5493 /* We can't use JUMP_LABEL here because it might be undefined
5494 when not optimizing. */
5495 rtx dest = XEXP (SET_SRC (PATTERN (jump)), 0);
5496 /* If the branch is out of range, try to find a scratch register for it. */
5497 if (optimize
5498 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5499 > 4092 + 4098))
5500 {
5501 rtx_insn *scan;
5502 /* Don't look for the stack pointer as a scratch register,
5503 it would cause trouble if an interrupt occurred. */
5504 unsigned attempt = 0x7fff, used;
5505 int jump_left = flag_expensive_optimizations + 1;
5506
5507 /* It is likely that the most recent eligible instruction is wanted for
5508 the delay slot. Therefore, find out which registers it uses, and
5509 try to avoid using them. */
5510
5511 for (scan = jump; (scan = PREV_INSN (scan)); )
5512 {
5513 if (scan->deleted ())
5514 continue;
5515 rtx_code code = GET_CODE (scan);
5516 if (code == CODE_LABEL || code == JUMP_INSN)
5517 break;
5518 if (code == INSN
5519 && GET_CODE (PATTERN (scan)) != USE
5520 && GET_CODE (PATTERN (scan)) != CLOBBER
5521 && get_attr_in_delay_slot (scan) == IN_DELAY_SLOT_YES)
5522 {
5523 attempt &= ~regs_used (PATTERN (scan), 0);
5524 break;
5525 }
5526 }
5527 for (used = dead = 0, scan = JUMP_LABEL_AS_INSN (jump);
5528 (scan = NEXT_INSN (scan)); )
5529 {
5530 if (scan->deleted ())
5531 continue;
5532 rtx_code code = GET_CODE (scan);
5533 if (INSN_P (scan))
5534 {
5535 used |= regs_used (PATTERN (scan), 0);
5536 if (code == CALL_INSN)
5537 used |= regs_used (CALL_INSN_FUNCTION_USAGE (scan), 0);
5538 dead |= (used >> 16) & ~used;
5539 if (dead & attempt)
5540 {
5541 dead &= attempt;
5542 break;
5543 }
5544 if (code == JUMP_INSN)
5545 {
5546 if (jump_left-- && simplejump_p (scan))
5547 scan = JUMP_LABEL_AS_INSN (scan);
5548 else
5549 break;
5550 }
5551 }
5552 }
5553 /* Mask out the stack pointer again, in case it was
5554 the only 'free' register we have found. */
5555 dead &= 0x7fff;
5556 }
5557 /* If the immediate destination is still in range, check for possible
5558 threading with a jump beyond the delay slot insn.
5559 Don't check if we are called recursively; the jump has been or will be
5560 checked in a different invocation then. */
5561
5562 else if (optimize && need_block >= 0)
5563 {
5564 rtx_insn *next = next_active_insn (as_a<rtx_insn *> (dest));
5565 next = next_active_insn (next);
5566 if (next && JUMP_P (next)
5567 && GET_CODE (PATTERN (next)) == SET
5568 && recog_memoized (next) == CODE_FOR_jump_compact)
5569 {
5570 dest = JUMP_LABEL (next);
5571 if (dest
5572 && (INSN_ADDRESSES (INSN_UID (dest)) - addr + (unsigned) 4092
5573 > 4092 + 4098))
5574 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), -1);
5575 }
5576 }
5577
5578 if (dead)
5579 {
5580 rtx reg = gen_rtx_REG (SImode, exact_log2 (dead & -dead));
5581
5582 /* It would be nice if we could convert the jump into an indirect
5583 jump / far branch right now, and thus exposing all constituent
5584 instructions to further optimization. However, reorg uses
5585 simplejump_p to determine if there is an unconditional jump where
5586 it should try to schedule instructions from the target of the
5587 branch; simplejump_p fails for indirect jumps even if they have
5588 a JUMP_LABEL. */
5589 rtx_insn *insn = emit_insn_before (gen_indirect_jump_scratch
5590 (reg, GEN_INT (unspec_bbr_uid++)),
5591 jump);
5592 /* ??? We would like this to have the scope of the jump, but that
5593 scope will change when a delay slot insn of an inner scope is added.
5594 Hence, after delay slot scheduling, we'll have to expect
5595 NOTE_INSN_BLOCK_END notes between the indirect_jump_scratch and
5596 the jump. */
5597
5598 INSN_LOCATION (insn) = INSN_LOCATION (jump);
5599 INSN_CODE (insn) = CODE_FOR_indirect_jump_scratch;
5600 return insn;
5601 }
5602 else if (need_block)
5603 /* We can't use JUMP_LABEL here because it might be undefined
5604 when not optimizing. */
5605 return emit_insn_before (gen_block_branch_redirect
5606 (GEN_INT (unspec_bbr_uid++)),
5607 jump);
5608 return prev;
5609 }
5610
5611 #define CONDJUMP_MIN -252
5612 #define CONDJUMP_MAX 262
5613 struct far_branch
5614 {
5615 /* A label (to be placed) in front of the jump
5616 that jumps to our ultimate destination. */
5617 rtx_insn *near_label;
5618 /* Where we are going to insert it if we cannot move the jump any farther,
5619 or the jump itself if we have picked up an existing jump. */
5620 rtx_insn *insert_place;
5621 /* The ultimate destination. */
5622 rtx_insn *far_label;
5623 struct far_branch *prev;
5624 /* If the branch has already been created, its address;
5625 else the address of its first prospective user. */
5626 int address;
5627 };
5628
5629 enum mdep_reorg_phase_e mdep_reorg_phase;
5630
5631 static void
5632 gen_far_branch (struct far_branch *bp)
5633 {
5634 rtx_insn *insn = bp->insert_place;
5635 rtx_jump_insn *jump;
5636 rtx_code_label *label = gen_label_rtx ();
5637
5638 emit_label_after (label, insn);
5639 if (bp->far_label)
5640 {
5641 jump = emit_jump_insn_after (gen_jump (bp->far_label), insn);
5642 LABEL_NUSES (bp->far_label)++;
5643 }
5644 else
5645 jump = emit_jump_insn_after (gen_return (), insn);
5646
5647 /* Emit a barrier so that reorg knows that any following instructions
5648 are not reachable via a fall-through path.
5649 But don't do this when not optimizing, since we wouldn't suppress the
5650 alignment for the barrier then, and could end up with out-of-range
5651 pc-relative loads. */
5652 if (optimize)
5653 emit_barrier_after (jump);
5654 emit_label_after (bp->near_label, insn);
5655
5656 if (bp->far_label)
5657 JUMP_LABEL (jump) = bp->far_label;
5658 else
5659 {
5660 rtx pat = PATTERN (jump);
5661 gcc_assert (ANY_RETURN_P (pat));
5662 JUMP_LABEL (jump) = pat;
5663 }
5664
5665 bool ok = invert_jump (as_a <rtx_jump_insn *> (insn), label, 1);
5666 gcc_assert (ok);
5667
5668 /* If we are branching around a jump (rather than a return), prevent
5669 reorg from using an insn from the jump target as the delay slot insn -
5670 when reorg did this, it pessimized code (we rather hide the delay slot)
5671 and it could cause branches to go out of range. */
5672 if (bp->far_label)
5673 (emit_insn_after
5674 (gen_stuff_delay_slot
5675 (GEN_INT (unspec_bbr_uid++),
5676 GEN_INT (recog_memoized (insn) == CODE_FOR_branch_false)),
5677 insn));
5678 /* Prevent reorg from undoing our splits. */
5679 gen_block_redirect (jump, bp->address += 2, 2);
5680 }
5681
5682 /* Fix up ADDR_DIFF_VECs. */
5683 void
5684 fixup_addr_diff_vecs (rtx_insn *first)
5685 {
5686 rtx_insn *insn;
5687
5688 for (insn = first; insn; insn = NEXT_INSN (insn))
5689 {
5690 rtx vec_lab, pat, prevpat, x, braf_label;
5691 rtx_insn *prev;
5692
5693 if (! JUMP_TABLE_DATA_P (insn)
5694 || GET_CODE (PATTERN (insn)) != ADDR_DIFF_VEC)
5695 continue;
5696 pat = PATTERN (insn);
5697 vec_lab = XEXP (XEXP (pat, 0), 0);
5698
5699 /* Search the matching casesi_jump_2. */
5700 for (prev = as_a <rtx_insn *> (vec_lab); ; prev = PREV_INSN (prev))
5701 {
5702 if (!JUMP_P (prev))
5703 continue;
5704 prevpat = PATTERN (prev);
5705 if (GET_CODE (prevpat) != PARALLEL || XVECLEN (prevpat, 0) != 2)
5706 continue;
5707 x = XVECEXP (prevpat, 0, 1);
5708 if (GET_CODE (x) != USE)
5709 continue;
5710 x = XEXP (x, 0);
5711 if (GET_CODE (x) == LABEL_REF && XEXP (x, 0) == vec_lab)
5712 break;
5713 }
5714 /* FIXME: This is a bug in the optimizer, but it seems harmless
5715 to just avoid panicing. */
5716 if (!prev)
5717 continue;
5718
5719 /* Emit the reference label of the braf where it belongs, right after
5720 the casesi_jump_2 (i.e. braf). */
5721 braf_label = XEXP (XEXP (SET_SRC (XVECEXP (prevpat, 0, 0)), 1), 0);
5722 emit_label_after (as_a <rtx_insn *> (braf_label), prev);
5723
5724 /* Fix up the ADDR_DIF_VEC to be relative
5725 to the reference address of the braf. */
5726 XEXP (XEXP (pat, 0), 0) = braf_label;
5727 }
5728 }
5729
5730 /* BARRIER_OR_LABEL is either a BARRIER or a CODE_LABEL immediately following
5731 a barrier. Return the base 2 logarithm of the desired alignment. */
5732 int
5733 barrier_align (rtx_insn *barrier_or_label)
5734 {
5735 if (! barrier_or_label)
5736 return 0;
5737
5738 if (LABEL_P (barrier_or_label)
5739 && NEXT_INSN (barrier_or_label)
5740 && JUMP_TABLE_DATA_P (NEXT_INSN (barrier_or_label)))
5741 return 2;
5742
5743 if (BARRIER_P (barrier_or_label)
5744 && PREV_INSN (barrier_or_label)
5745 && JUMP_TABLE_DATA_P (PREV_INSN (barrier_or_label)))
5746 {
5747 rtx pat = PATTERN (PREV_INSN (barrier_or_label));
5748 /* If this is a very small table, we want to keep the alignment after
5749 the table to the minimum for proper code alignment. */
5750 return ((optimize_size
5751 || ((unsigned) XVECLEN (pat, 1) * GET_MODE_SIZE (GET_MODE (pat))
5752 <= (unsigned) 1 << (CACHE_LOG - 2)))
5753 ? 1 : align_jumps.levels[0].log);
5754 }
5755
5756 rtx_insn *next = next_active_insn (barrier_or_label);
5757
5758 if (! next)
5759 return 0;
5760
5761 rtx pat = PATTERN (next);
5762
5763 if (GET_CODE (pat) == UNSPEC_VOLATILE && XINT (pat, 1) == UNSPECV_ALIGN)
5764 /* This is a barrier in front of a constant table. */
5765 return 0;
5766
5767 if (optimize_size)
5768 return 0;
5769
5770 if (! TARGET_SH2 || ! optimize)
5771 return align_jumps.levels[0].log;
5772
5773 /* When fixing up pcloads, a constant table might be inserted just before
5774 the basic block that ends with the barrier. Thus, we can't trust the
5775 instruction lengths before that. */
5776 if (mdep_reorg_phase > SH_FIXUP_PCLOAD)
5777 {
5778 /* Check if there is an immediately preceding branch to the insn beyond
5779 the barrier. We must weight the cost of discarding useful information
5780 from the current cache line when executing this branch and there is
5781 an alignment, against that of fetching unneeded insn in front of the
5782 branch target when there is no alignment. */
5783
5784 /* There are two delay_slot cases to consider. One is the simple case
5785 where the preceding branch is to the insn beyond the barrier (simple
5786 delay slot filling), and the other is where the preceding branch has
5787 a delay slot that is a duplicate of the insn after the barrier
5788 (fill_eager_delay_slots) and the branch is to the insn after the insn
5789 after the barrier. */
5790
5791 int slot, credit;
5792 bool jump_to_next = false;
5793
5794 /* Skip to the insn before the JUMP_INSN before the barrier under
5795 investigation. */
5796 rtx_insn *prev = prev_real_insn (prev_active_insn (barrier_or_label));
5797
5798 for (slot = 2, credit = (1 << (CACHE_LOG - 2)) + 2;
5799 credit >= 0 && prev && NONJUMP_INSN_P (prev);
5800 prev = prev_real_insn (prev))
5801 {
5802 jump_to_next = false;
5803 if (GET_CODE (PATTERN (prev)) == USE
5804 || GET_CODE (PATTERN (prev)) == CLOBBER)
5805 continue;
5806 if (rtx_sequence *prev_seq = dyn_cast <rtx_sequence *> (PATTERN (prev)))
5807 {
5808 prev = prev_seq->insn (1);
5809 if (INSN_UID (prev) == INSN_UID (next))
5810 {
5811 /* Delay slot was filled with insn at jump target. */
5812 jump_to_next = true;
5813 continue;
5814 }
5815 }
5816
5817 if (slot &&
5818 get_attr_in_delay_slot (prev) == IN_DELAY_SLOT_YES)
5819 slot = 0;
5820 credit -= get_attr_length (prev);
5821 }
5822 if (prev && jump_to_label_p (prev))
5823 {
5824 rtx_insn *x;
5825 if (jump_to_next
5826 || next_real_insn (JUMP_LABEL_AS_INSN (prev)) == next
5827 /* If relax_delay_slots() decides NEXT was redundant
5828 with some previous instruction, it will have
5829 redirected PREV's jump to the following insn. */
5830 || JUMP_LABEL (prev) == next_nonnote_insn (next)
5831 /* There is no upper bound on redundant instructions
5832 that might have been skipped, but we must not put an
5833 alignment where none had been before. */
5834 || (x = (NEXT_INSN (NEXT_INSN (PREV_INSN (prev)))),
5835 (INSN_P (x)
5836 && (INSN_CODE (x) == CODE_FOR_block_branch_redirect
5837 || INSN_CODE (x) == CODE_FOR_indirect_jump_scratch
5838 || INSN_CODE (x) == CODE_FOR_stuff_delay_slot))))
5839 {
5840 rtx pat = PATTERN (prev);
5841 if (GET_CODE (pat) == PARALLEL)
5842 pat = XVECEXP (pat, 0, 0);
5843 if (credit - slot >= (GET_CODE (SET_SRC (pat)) == PC ? 2 : 0))
5844 return 0;
5845 }
5846 }
5847 }
5848
5849 return align_jumps.levels[0].log;
5850 }
5851
5852 /* If we are inside a phony loop, almost any kind of label can turn up as the
5853 first one in the loop. Aligning a braf label causes incorrect switch
5854 destination addresses; we can detect braf labels because they are
5855 followed by a BARRIER.
5856 Applying loop alignment to small constant or switch tables is a waste
5857 of space, so we suppress this too. */
5858 int
5859 sh_loop_align (rtx_insn *label)
5860 {
5861 rtx_insn *next = label;
5862
5863 if (! optimize || optimize_size)
5864 return 0;
5865
5866 do
5867 next = next_nonnote_insn (next);
5868 while (next && LABEL_P (next));
5869
5870 if (! next
5871 || ! INSN_P (next)
5872 || recog_memoized (next) == CODE_FOR_consttable_2)
5873 return 0;
5874
5875 return align_loops.levels[0].log;
5876 }
5877
5878 /* Do a final pass over the function, just before delayed branch
5879 scheduling. */
5880 static void
5881 sh_reorg (void)
5882 {
5883 rtx_insn *first, *insn, *mova = NULL;
5884 int num_mova;
5885 rtx r0_rtx = gen_rtx_REG (Pmode, 0);
5886 rtx r0_inc_rtx = gen_rtx_POST_INC (Pmode, r0_rtx);
5887
5888 first = get_insns ();
5889 max_labelno_before_reorg = max_label_num ();
5890
5891 /* We must split call insns before introducing `mova's. If we're
5892 optimizing, they'll have already been split. Otherwise, make
5893 sure we don't split them too late. */
5894 if (! optimize)
5895 split_all_insns_noflow ();
5896
5897 /* If relaxing, generate pseudo-ops to associate function calls with
5898 the symbols they call. It does no harm to not generate these
5899 pseudo-ops. However, when we can generate them, it enables the
5900 linker to potentially relax the jsr to a bsr, and eliminate the
5901 register load and, possibly, the constant pool entry. */
5902
5903 mdep_reorg_phase = SH_INSERT_USES_LABELS;
5904 if (TARGET_RELAX)
5905 {
5906 /* Remove all REG_LABEL_OPERAND notes. We want to use them for our
5907 own purposes. This works because none of the remaining passes
5908 need to look at them.
5909
5910 ??? But it may break in the future. We should use a machine
5911 dependent REG_NOTE, or some other approach entirely. */
5912 for (insn = first; insn; insn = NEXT_INSN (insn))
5913 {
5914 if (INSN_P (insn))
5915 {
5916 rtx note;
5917
5918 while ((note = find_reg_note (insn, REG_LABEL_OPERAND,
5919 NULL_RTX)) != 0)
5920 remove_note (insn, note);
5921 }
5922 }
5923
5924 for (insn = first; insn; insn = NEXT_INSN (insn))
5925 {
5926 rtx pattern, reg, set, dies;
5927 rtx_code_label *label;
5928 rtx_insn *link, *scan;
5929 int rescan = 0, foundinsn = 0;
5930
5931 if (CALL_P (insn))
5932 {
5933 pattern = PATTERN (insn);
5934
5935 if (GET_CODE (pattern) == PARALLEL)
5936 pattern = XVECEXP (pattern, 0, 0);
5937 if (GET_CODE (pattern) == SET)
5938 pattern = SET_SRC (pattern);
5939
5940 if (GET_CODE (pattern) != CALL
5941 || !MEM_P (XEXP (pattern, 0)))
5942 continue;
5943
5944 reg = XEXP (XEXP (pattern, 0), 0);
5945 }
5946 else
5947 {
5948 reg = sfunc_uses_reg (insn);
5949 if (! reg)
5950 continue;
5951 }
5952
5953 if (!REG_P (reg))
5954 continue;
5955
5956 /* Try scanning backward to find where the register is set. */
5957 link = NULL;
5958 for (scan = PREV_INSN (insn);
5959 scan && !LABEL_P (scan);
5960 scan = PREV_INSN (scan))
5961 {
5962 if (! INSN_P (scan))
5963 continue;
5964
5965 if (! reg_mentioned_p (reg, scan))
5966 continue;
5967
5968 if (noncall_uses_reg (reg, scan, &set))
5969 break;
5970
5971 if (set)
5972 {
5973 link = scan;
5974 break;
5975 }
5976 }
5977
5978 if (! link)
5979 continue;
5980
5981 /* The register is set at LINK. */
5982
5983 /* We can only optimize the function call if the register is
5984 being set to a symbol. In theory, we could sometimes
5985 optimize calls to a constant location, but the assembler
5986 and linker do not support that at present. */
5987 if (GET_CODE (SET_SRC (set)) != SYMBOL_REF
5988 && GET_CODE (SET_SRC (set)) != LABEL_REF)
5989 continue;
5990
5991 /* Scan forward from LINK to the place where REG dies, and
5992 make sure that the only insns which use REG are
5993 themselves function calls. */
5994
5995 /* ??? This doesn't work for call targets that were allocated
5996 by reload, since there may not be a REG_DEAD note for the
5997 register. */
5998
5999 dies = NULL_RTX;
6000 for (scan = NEXT_INSN (link); scan; scan = NEXT_INSN (scan))
6001 {
6002 rtx scanset;
6003
6004 /* Don't try to trace forward past a CODE_LABEL if we haven't
6005 seen INSN yet. Ordinarily, we will only find the setting insn
6006 if it is in the same basic block. However,
6007 cross-jumping can insert code labels in between the load and
6008 the call, and can result in situations where a single call
6009 insn may have two targets depending on where we came from. */
6010
6011 if (LABEL_P (scan) && ! foundinsn)
6012 break;
6013
6014 if (! INSN_P (scan))
6015 continue;
6016
6017 /* Don't try to trace forward past a JUMP. To optimize
6018 safely, we would have to check that all the
6019 instructions at the jump destination did not use REG. */
6020
6021 if (JUMP_P (scan))
6022 break;
6023
6024 if (! reg_mentioned_p (reg, scan))
6025 continue;
6026
6027 if (noncall_uses_reg (reg, scan, &scanset))
6028 break;
6029
6030 if (scan == insn)
6031 foundinsn = 1;
6032
6033 if (scan != insn
6034 && (CALL_P (scan) || sfunc_uses_reg (scan)))
6035 {
6036 /* There is a function call to this register other
6037 than the one we are checking. If we optimize
6038 this call, we need to rescan again below. */
6039 rescan = 1;
6040 }
6041
6042 /* ??? We shouldn't have to worry about SCANSET here.
6043 We should just be able to check for a REG_DEAD note
6044 on a function call. However, the REG_DEAD notes are
6045 apparently not dependable around libcalls; c-torture
6046 execute/920501-2 is a test case. If SCANSET is set,
6047 then this insn sets the register, so it must have
6048 died earlier. Unfortunately, this will only handle
6049 the cases in which the register is, in fact, set in a
6050 later insn. */
6051
6052 /* ??? We shouldn't have to use FOUNDINSN here.
6053 This dates back to when we used LOG_LINKS to find
6054 the most recent insn which sets the register. */
6055
6056 if (foundinsn
6057 && (scanset
6058 || find_reg_note (scan, REG_DEAD, reg)))
6059 {
6060 dies = scan;
6061 break;
6062 }
6063 }
6064
6065 if (! dies)
6066 {
6067 /* Either there was a branch, or some insn used REG
6068 other than as a function call address. */
6069 continue;
6070 }
6071
6072 /* Create a code label, and put it in a REG_LABEL_OPERAND note
6073 on the insn which sets the register, and on each call insn
6074 which uses the register. In final_prescan_insn we look for
6075 the REG_LABEL_OPERAND notes, and output the appropriate label
6076 or pseudo-op. */
6077
6078 label = gen_label_rtx ();
6079 add_reg_note (link, REG_LABEL_OPERAND, label);
6080 add_reg_note (insn, REG_LABEL_OPERAND, label);
6081 if (rescan)
6082 {
6083 scan = link;
6084 do
6085 {
6086 rtx reg2;
6087
6088 scan = NEXT_INSN (scan);
6089 if (scan != insn
6090 && ((CALL_P (scan)
6091 && reg_mentioned_p (reg, scan))
6092 || ((reg2 = sfunc_uses_reg (scan))
6093 && REGNO (reg2) == REGNO (reg))))
6094 add_reg_note (scan, REG_LABEL_OPERAND, label);
6095 }
6096 while (scan != dies);
6097 }
6098 }
6099 }
6100
6101 if (TARGET_SH2)
6102 fixup_addr_diff_vecs (first);
6103
6104 if (optimize)
6105 {
6106 mdep_reorg_phase = SH_SHORTEN_BRANCHES0;
6107 shorten_branches (first);
6108 }
6109
6110 /* Scan the function looking for move instructions which have to be
6111 changed to pc-relative loads and insert the literal tables. */
6112 mdep_reorg_phase = SH_FIXUP_PCLOAD;
6113 for (insn = first, num_mova = 0; insn; insn = NEXT_INSN (insn))
6114 {
6115 if (mova_p (insn))
6116 {
6117 /* ??? basic block reordering can move a switch table dispatch
6118 below the switch table. Check if that has happened.
6119 We only have the addresses available when optimizing; but then,
6120 this check shouldn't be needed when not optimizing. */
6121 if (!untangle_mova (&num_mova, &mova, insn))
6122 {
6123 insn = mova;
6124 num_mova = 0;
6125 }
6126 }
6127 else if (JUMP_TABLE_DATA_P (insn)
6128 && GET_CODE (PATTERN (insn)) == ADDR_DIFF_VEC
6129 && num_mova
6130 /* ??? loop invariant motion can also move a mova out of a
6131 loop. Since loop does this code motion anyway, maybe we
6132 should wrap UNSPEC_MOVA into a CONST, so that reload can
6133 move it back. */
6134 && ((num_mova > 1
6135 && GET_MODE (prev_nonnote_insn (insn)) == VOIDmode)
6136 || (prev_nonnote_insn (insn)
6137 == XEXP (MOVA_LABELREF (mova), 0))))
6138 {
6139 rtx_insn *scan;
6140 int total;
6141
6142 num_mova--;
6143
6144 /* Some code might have been inserted between the mova and
6145 its ADDR_DIFF_VEC. Check if the mova is still in range. */
6146 for (scan = mova, total = 0; scan != insn; scan = NEXT_INSN (scan))
6147 total += get_attr_length (scan);
6148
6149 /* range of mova is 1020, add 4 because pc counts from address of
6150 second instruction after this one, subtract 2 in case pc is 2
6151 byte aligned. Possible alignment needed for the ADDR_DIFF_VEC
6152 cancels out with alignment effects of the mova itself. */
6153 if (total > 1022)
6154 {
6155 /* Change the mova into a load, and restart scanning
6156 there. broken_move will then return true for mova. */
6157 fixup_mova (mova);
6158 insn = mova;
6159 }
6160 }
6161 if (broken_move (insn)
6162 || (NONJUMP_INSN_P (insn)
6163 && recog_memoized (insn) == CODE_FOR_casesi_worker_2))
6164 {
6165 rtx_insn *scan;
6166 /* Scan ahead looking for a barrier to stick the constant table
6167 behind. */
6168 rtx_insn *barrier = find_barrier (num_mova, mova, insn);
6169 rtx_insn *last_float_move = NULL;
6170 rtx last_float = 0, *last_float_addr = NULL;
6171 int need_aligned_label = 0;
6172
6173 if (num_mova && ! mova_p (mova))
6174 {
6175 /* find_barrier had to change the first mova into a
6176 pcload; thus, we have to start with this new pcload. */
6177 insn = mova;
6178 num_mova = 0;
6179 }
6180 /* Now find all the moves between the points and modify them. */
6181 for (scan = insn; scan != barrier; scan = NEXT_INSN (scan))
6182 {
6183 if (LABEL_P (scan))
6184 last_float = 0;
6185 if (NONJUMP_INSN_P (scan)
6186 && recog_memoized (scan) == CODE_FOR_casesi_worker_2)
6187 need_aligned_label = 1;
6188 if (broken_move (scan))
6189 {
6190 rtx *patp = &PATTERN (scan), pat = *patp;
6191 rtx src, dst;
6192 rtx lab;
6193 rtx newsrc;
6194 machine_mode mode;
6195
6196 if (GET_CODE (pat) == PARALLEL)
6197 patp = &XVECEXP (pat, 0, 0), pat = *patp;
6198 src = SET_SRC (pat);
6199 dst = SET_DEST (pat);
6200 mode = GET_MODE (dst);
6201
6202 if (mode == SImode && satisfies_constraint_I16 (src)
6203 && REGNO (dst) != FPUL_REG)
6204 {
6205 int offset = 0;
6206
6207 mode = HImode;
6208 while (GET_CODE (dst) == SUBREG)
6209 {
6210 offset += subreg_regno_offset (REGNO (SUBREG_REG (dst)),
6211 GET_MODE (SUBREG_REG (dst)),
6212 SUBREG_BYTE (dst),
6213 GET_MODE (dst));
6214 dst = SUBREG_REG (dst);
6215 }
6216 dst = gen_rtx_REG (HImode, REGNO (dst) + offset);
6217 }
6218 if (REG_P (dst) && FP_ANY_REGISTER_P (REGNO (dst)))
6219 {
6220 /* This must be an insn that clobbers r0. */
6221 rtx *clobberp = &XVECEXP (PATTERN (scan), 0,
6222 XVECLEN (PATTERN (scan), 0)
6223 - 1);
6224 rtx clobber = *clobberp;
6225
6226 gcc_assert (GET_CODE (clobber) == CLOBBER
6227 && rtx_equal_p (XEXP (clobber, 0), r0_rtx));
6228
6229 if (last_float
6230 && reg_set_between_p (r0_rtx, last_float_move, scan))
6231 last_float = 0;
6232 lab = add_constant (src, mode, last_float);
6233 if (lab)
6234 emit_insn_before (gen_mova (lab), scan);
6235 else
6236 {
6237 /* There will be a REG_UNUSED note for r0 on
6238 LAST_FLOAT_MOVE; we have to change it to REG_INC,
6239 lest reorg:mark_target_live_regs will not
6240 consider r0 to be used, and we end up with delay
6241 slot insn in front of SCAN that clobbers r0. */
6242 rtx note
6243 = find_regno_note (last_float_move, REG_UNUSED, 0);
6244
6245 /* If we are not optimizing, then there may not be
6246 a note. */
6247 if (note)
6248 PUT_REG_NOTE_KIND (note, REG_INC);
6249
6250 *last_float_addr = r0_inc_rtx;
6251 }
6252 last_float_move = scan;
6253 last_float = src;
6254 newsrc = gen_const_mem (mode,
6255 (((TARGET_SH4 && ! TARGET_FMOVD)
6256 || REGNO (dst) == FPUL_REG)
6257 ? r0_inc_rtx
6258 : r0_rtx));
6259 last_float_addr = &XEXP (newsrc, 0);
6260
6261 /* Remove the clobber of r0. */
6262 *clobberp = gen_rtx_CLOBBER (GET_MODE (clobber),
6263 gen_rtx_SCRATCH (Pmode));
6264 }
6265 /* This is a mova needing a label. Create it. */
6266 else if (GET_CODE (src) == UNSPEC
6267 && XINT (src, 1) == UNSPEC_MOVA
6268 && GET_CODE (XVECEXP (src, 0, 0)) == CONST)
6269 {
6270 lab = add_constant (XVECEXP (src, 0, 0), mode, 0);
6271 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6272 newsrc = gen_rtx_UNSPEC (SImode,
6273 gen_rtvec (1, newsrc),
6274 UNSPEC_MOVA);
6275 }
6276 else if (GET_CODE (src) == UNSPEC_VOLATILE
6277 && XINT (src, 1) == UNSPECV_SP_SWITCH_B)
6278 {
6279 newsrc = XVECEXP (src, 0, 0);
6280 XVECEXP (src, 0, 0) = gen_const_mem (mode, newsrc);
6281 INSN_CODE (scan) = -1;
6282 continue;
6283 }
6284 else
6285 {
6286 lab = add_constant (src, mode, 0);
6287 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
6288 newsrc = gen_const_mem (mode, newsrc);
6289 }
6290 *patp = gen_rtx_SET (dst, newsrc);
6291 INSN_CODE (scan) = -1;
6292 }
6293 }
6294 dump_table (need_aligned_label ? insn : 0, barrier);
6295 insn = barrier;
6296 }
6297 }
6298 label_ref_list_d_pool.release ();
6299 for (insn = first; insn; insn = NEXT_INSN (insn))
6300 PUT_MODE (insn, VOIDmode);
6301
6302 mdep_reorg_phase = SH_SHORTEN_BRANCHES1;
6303 INSN_ADDRESSES_FREE ();
6304 split_branches (first);
6305
6306 /* The INSN_REFERENCES_ARE_DELAYED in sh.h is problematic because it
6307 also has an effect on the register that holds the address of the sfunc.
6308 Insert an extra dummy insn in front of each sfunc that pretends to
6309 use this register. */
6310 if (flag_delayed_branch)
6311 {
6312 for (insn = first; insn; insn = NEXT_INSN (insn))
6313 {
6314 rtx reg = sfunc_uses_reg (insn);
6315
6316 if (! reg)
6317 continue;
6318 emit_insn_before (gen_use_sfunc_addr (reg), insn);
6319 }
6320 }
6321 mdep_reorg_phase = SH_AFTER_MDEP_REORG;
6322 }
6323
6324 /* Return the UID of the insn that follows the specified label. */
6325 int
6326 get_dest_uid (rtx_insn *label, int max_uid)
6327 {
6328 rtx_insn *dest = next_real_insn (label);
6329
6330 if (! dest)
6331 /* This can happen for an undefined label. */
6332 return 0;
6333 int dest_uid = INSN_UID (dest);
6334 /* If this is a newly created branch redirection blocking instruction,
6335 we cannot index the branch_uid or insn_addresses arrays with its
6336 uid. But then, we won't need to, because the actual destination is
6337 the following branch. */
6338 while (dest_uid >= max_uid)
6339 {
6340 dest = NEXT_INSN (dest);
6341 dest_uid = INSN_UID (dest);
6342 }
6343 if (JUMP_P (dest) && GET_CODE (PATTERN (dest)) == RETURN)
6344 return 0;
6345 return dest_uid;
6346 }
6347
6348 /* Split condbranches that are out of range. Also add clobbers for
6349 scratch registers that are needed in far jumps.
6350 We do this before delay slot scheduling, so that it can take our
6351 newly created instructions into account. It also allows us to
6352 find branches with common targets more easily. */
6353 static void
6354 split_branches (rtx_insn *first)
6355 {
6356 rtx_insn *insn;
6357 struct far_branch **uid_branch, *far_branch_list = 0;
6358 int max_uid = get_max_uid ();
6359 int ok;
6360
6361 /* Find out which branches are out of range. */
6362 shorten_branches (first);
6363
6364 uid_branch = (struct far_branch **) alloca (max_uid * sizeof *uid_branch);
6365 memset ((char *) uid_branch, 0, max_uid * sizeof *uid_branch);
6366
6367 for (insn = first; insn; insn = NEXT_INSN (insn))
6368 if (! INSN_P (insn))
6369 continue;
6370 else if (insn->deleted ())
6371 {
6372 /* Shorten_branches would split this instruction again,
6373 so transform it into a note. */
6374 SET_INSN_DELETED (insn);
6375 }
6376 else if (JUMP_P (insn))
6377 {
6378 enum attr_type type = get_attr_type (insn);
6379 if (type == TYPE_CBRANCH)
6380 {
6381 rtx_insn *next, *beyond;
6382
6383 if (get_attr_length (insn) > 4)
6384 {
6385 rtx src = SET_SRC (PATTERN (insn));
6386 rtx_insn *olabel = safe_as_a <rtx_insn *> (XEXP (XEXP (src, 1), 0));
6387 int addr = INSN_ADDRESSES (INSN_UID (insn));
6388 rtx_insn *label = 0;
6389 int dest_uid = get_dest_uid (olabel, max_uid);
6390 struct far_branch *bp = uid_branch[dest_uid];
6391
6392 /* redirect_jump needs a valid JUMP_LABEL, and it might delete
6393 the label if the LABEL_NUSES count drops to zero. There is
6394 always a jump_optimize pass that sets these values, but it
6395 proceeds to delete unreferenced code, and then if not
6396 optimizing, to un-delete the deleted instructions, thus
6397 leaving labels with too low uses counts. */
6398 if (! optimize)
6399 {
6400 JUMP_LABEL (insn) = olabel;
6401 LABEL_NUSES (olabel)++;
6402 }
6403 if (! bp)
6404 {
6405 bp = (struct far_branch *) alloca (sizeof *bp);
6406 uid_branch[dest_uid] = bp;
6407 bp->prev = far_branch_list;
6408 far_branch_list = bp;
6409 bp->far_label = as_a <rtx_insn *> (
6410 XEXP (XEXP (SET_SRC (PATTERN (insn)), 1),
6411 0));
6412 LABEL_NUSES (bp->far_label)++;
6413 }
6414 else
6415 {
6416 label = bp->near_label;
6417 if (! label && bp->address - addr >= CONDJUMP_MIN)
6418 {
6419 rtx_insn *block = bp->insert_place;
6420
6421 if (GET_CODE (PATTERN (block)) == RETURN)
6422 block = PREV_INSN (block);
6423 else
6424 block = gen_block_redirect (block,
6425 bp->address, 2);
6426 label = emit_label_after (gen_label_rtx (),
6427 PREV_INSN (block));
6428 bp->near_label = label;
6429 }
6430 else if (label && ! NEXT_INSN (label))
6431 {
6432 if (addr + 2 - bp->address <= CONDJUMP_MAX)
6433 bp->insert_place = insn;
6434 else
6435 gen_far_branch (bp);
6436 }
6437 }
6438 if (! label
6439 || (NEXT_INSN (label) && bp->address - addr < CONDJUMP_MIN))
6440 {
6441 bp->near_label = label = gen_label_rtx ();
6442 bp->insert_place = insn;
6443 bp->address = addr;
6444 }
6445 ok = redirect_jump (as_a <rtx_jump_insn *> (insn), label, 0);
6446 gcc_assert (ok);
6447 }
6448 else
6449 {
6450 /* get_attr_length (insn) == 2 */
6451 /* Check if we have a pattern where reorg wants to redirect
6452 the branch to a label from an unconditional branch that
6453 is too far away. */
6454 /* We can't use JUMP_LABEL here because it might be undefined
6455 when not optimizing. */
6456 /* A syntax error might cause beyond to be NULL_RTX. */
6457 rtx temp = XEXP (XEXP (SET_SRC (PATTERN (insn)), 1), 0);
6458 beyond = next_active_insn (as_a<rtx_insn *> (temp));
6459
6460 if (beyond
6461 && (JUMP_P (beyond)
6462 || ((beyond = next_active_insn (beyond))
6463 && JUMP_P (beyond)))
6464 && GET_CODE (PATTERN (beyond)) == SET
6465 && recog_memoized (beyond) == CODE_FOR_jump_compact
6466 && ((INSN_ADDRESSES
6467 (INSN_UID (XEXP (SET_SRC (PATTERN (beyond)), 0)))
6468 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6469 > 252 + 258 + 2))
6470 gen_block_redirect (beyond,
6471 INSN_ADDRESSES (INSN_UID (beyond)), 1);
6472 }
6473
6474 next = next_active_insn (insn);
6475
6476 if (next
6477 && (JUMP_P (next)
6478 || ((next = next_active_insn (next))
6479 && JUMP_P (next)))
6480 && GET_CODE (PATTERN (next)) == SET
6481 && recog_memoized (next) == CODE_FOR_jump_compact
6482 && ((INSN_ADDRESSES
6483 (INSN_UID (XEXP (SET_SRC (PATTERN (next)), 0)))
6484 - INSN_ADDRESSES (INSN_UID (insn)) + (unsigned) 252)
6485 > 252 + 258 + 2))
6486 gen_block_redirect (next, INSN_ADDRESSES (INSN_UID (next)), 1);
6487 }
6488 else if (type == TYPE_JUMP || type == TYPE_RETURN)
6489 {
6490 int addr = INSN_ADDRESSES (INSN_UID (insn));
6491 rtx_insn *far_label = 0;
6492 int dest_uid = 0;
6493 struct far_branch *bp;
6494
6495 if (type == TYPE_JUMP)
6496 {
6497 if (CROSSING_JUMP_P (insn))
6498 {
6499 emit_insn_before (gen_block_branch_redirect (const0_rtx),
6500 insn);
6501 continue;
6502 }
6503
6504 far_label = as_a <rtx_insn *> (
6505 XEXP (SET_SRC (PATTERN (insn)), 0));
6506 dest_uid = get_dest_uid (far_label, max_uid);
6507 if (! dest_uid)
6508 {
6509 /* Parse errors can lead to labels outside
6510 the insn stream. */
6511 if (! NEXT_INSN (far_label))
6512 continue;
6513
6514 if (! optimize)
6515 {
6516 JUMP_LABEL (insn) = far_label;
6517 LABEL_NUSES (far_label)++;
6518 }
6519 redirect_jump (as_a <rtx_jump_insn *> (insn), ret_rtx, 1);
6520 far_label = 0;
6521 }
6522 }
6523 bp = uid_branch[dest_uid];
6524 if (! bp)
6525 {
6526 bp = (struct far_branch *) alloca (sizeof *bp);
6527 uid_branch[dest_uid] = bp;
6528 bp->prev = far_branch_list;
6529 far_branch_list = bp;
6530 bp->near_label = 0;
6531 bp->far_label = far_label;
6532 if (far_label)
6533 LABEL_NUSES (far_label)++;
6534 }
6535 else if (bp->near_label && ! NEXT_INSN (bp->near_label))
6536 if (addr - bp->address <= CONDJUMP_MAX)
6537 emit_label_after (bp->near_label, PREV_INSN (insn));
6538 else
6539 {
6540 gen_far_branch (bp);
6541 bp->near_label = 0;
6542 }
6543 else
6544 bp->near_label = 0;
6545 bp->address = addr;
6546 bp->insert_place = insn;
6547 if (! far_label)
6548 emit_insn_before (gen_block_branch_redirect (const0_rtx), insn);
6549 else
6550 gen_block_redirect (insn, addr, bp->near_label ? 2 : 0);
6551 }
6552 }
6553 /* Generate all pending far branches,
6554 and free our references to the far labels. */
6555 while (far_branch_list)
6556 {
6557 if (far_branch_list->near_label
6558 && ! NEXT_INSN (far_branch_list->near_label))
6559 gen_far_branch (far_branch_list);
6560 if (optimize
6561 && far_branch_list->far_label
6562 && ! --LABEL_NUSES (far_branch_list->far_label))
6563 delete_insn (far_branch_list->far_label);
6564 far_branch_list = far_branch_list->prev;
6565 }
6566
6567 /* Instruction length information is no longer valid due to the new
6568 instructions that have been generated. */
6569 init_insn_lengths ();
6570 }
6571
6572 /* Dump out instruction addresses, which is useful for debugging the
6573 constant pool table stuff.
6574
6575 If relaxing, output the label and pseudo-ops used to link together
6576 calls and the instruction which set the registers.
6577
6578 ??? The addresses printed by this routine for insns are nonsense for
6579 insns which are inside of a sequence where none of the inner insns have
6580 variable length. This is because the second pass of shorten_branches
6581 does not bother to update them. */
6582 void
6583 final_prescan_insn (rtx_insn *insn, rtx *opvec ATTRIBUTE_UNUSED,
6584 int noperands ATTRIBUTE_UNUSED)
6585 {
6586 if (TARGET_DUMPISIZE)
6587 fprintf (asm_out_file, "\n! at %04x\n", INSN_ADDRESSES (INSN_UID (insn)));
6588
6589 if (TARGET_RELAX)
6590 {
6591 if (rtx note = find_reg_note (insn, REG_LABEL_OPERAND, NULL_RTX))
6592 {
6593 rtx pattern = PATTERN (insn);
6594 if (GET_CODE (pattern) == PARALLEL)
6595 pattern = XVECEXP (pattern, 0, 0);
6596 switch (GET_CODE (pattern))
6597 {
6598 case SET:
6599 if (GET_CODE (SET_SRC (pattern)) != CALL
6600 && get_attr_type (insn) != TYPE_SFUNC)
6601 {
6602 targetm.asm_out.internal_label
6603 (asm_out_file, "L", CODE_LABEL_NUMBER (XEXP (note, 0)));
6604 break;
6605 }
6606 /* FALLTHROUGH */
6607 case CALL:
6608 asm_fprintf (asm_out_file, "\t.uses %LL%d\n",
6609 CODE_LABEL_NUMBER (XEXP (note, 0)));
6610 break;
6611
6612 default:
6613 gcc_unreachable ();
6614 }
6615 }
6616 }
6617 }
6618
6619 /* Dump out any constants accumulated in the final pass. These will
6620 only be labels. */
6621 const char *
6622 output_jump_label_table (void)
6623 {
6624 if (pool_size)
6625 {
6626 fprintf (asm_out_file, "\t.align 2\n");
6627 for (int i = 0; i < pool_size; i++)
6628 {
6629 pool_node *p = &pool_vector[i];
6630
6631 (*targetm.asm_out.internal_label) (asm_out_file, "L",
6632 CODE_LABEL_NUMBER (p->label));
6633 output_asm_insn (".long %O0", &p->value);
6634 }
6635 pool_size = 0;
6636 }
6637
6638 return "";
6639 }
6640 \f
6641 /* A full frame looks like:
6642
6643 arg-5
6644 arg-4
6645 [ if current_function_anonymous_args
6646 arg-3
6647 arg-2
6648 arg-1
6649 arg-0 ]
6650 saved-fp
6651 saved-r10
6652 saved-r11
6653 saved-r12
6654 saved-pr
6655 local-n
6656 ..
6657 local-1
6658 local-0 <- fp points here.
6659
6660 Number of bytes pushed for anonymous args, used to pass information
6661 between expand_prologue and expand_epilogue.
6662
6663 Adjust the stack by SIZE bytes. REG holds the rtl of the register to be
6664 adjusted. If epilogue_p is zero, this is for a prologue; otherwise, it's
6665 for an epilogue and a negative value means that it's for a sibcall
6666 epilogue. If LIVE_REGS_MASK is nonzero, it points to a HARD_REG_SET of
6667 all the registers that are about to be restored, and hence dead. */
6668 static void
6669 output_stack_adjust (int size, rtx reg, int epilogue_p,
6670 HARD_REG_SET *live_regs_mask, bool frame_p)
6671 {
6672 rtx_insn *(*emit_fn) (rtx) = frame_p ? &emit_frame_insn : &emit_insn;
6673 if (size)
6674 {
6675 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
6676
6677 /* This test is bogus, as output_stack_adjust is used to re-align the
6678 stack. */
6679 #if 0
6680 gcc_assert (!(size % align));
6681 #endif
6682
6683 if (CONST_OK_FOR_ADD (size))
6684 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size)));
6685 /* Try to do it with two partial adjustments; however, we must make
6686 sure that the stack is properly aligned at all times, in case
6687 an interrupt occurs between the two partial adjustments. */
6688 else if (CONST_OK_FOR_ADD (size / 2 & -align)
6689 && CONST_OK_FOR_ADD (size - (size / 2 & -align)))
6690 {
6691 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size / 2 & -align)));
6692 emit_fn (GEN_ADD3 (reg, reg, GEN_INT (size - (size / 2 & -align))));
6693 }
6694 else
6695 {
6696 rtx const_reg;
6697 rtx insn;
6698 int temp = epilogue_p ? 7 : 1;
6699 int i;
6700
6701 /* If TEMP is invalid, we could temporarily save a general
6702 register to MACL. However, there is currently no need
6703 to handle this case, so just die when we see it. */
6704 if (epilogue_p < 0
6705 || current_function_interrupt
6706 || ! call_really_used_regs[temp] || fixed_regs[temp])
6707 temp = -1;
6708 if (temp < 0 && ! current_function_interrupt && epilogue_p >= 0)
6709 {
6710 HARD_REG_SET temps;
6711 COPY_HARD_REG_SET (temps, call_used_reg_set);
6712 AND_COMPL_HARD_REG_SET (temps, call_fixed_reg_set);
6713 if (epilogue_p > 0)
6714 {
6715 int nreg = 0;
6716 if (crtl->return_rtx)
6717 {
6718 machine_mode mode;
6719 mode = GET_MODE (crtl->return_rtx);
6720 if (BASE_RETURN_VALUE_REG (mode) == FIRST_RET_REG)
6721 nreg = hard_regno_nregs (FIRST_RET_REG, mode);
6722 }
6723 for (i = 0; i < nreg; i++)
6724 CLEAR_HARD_REG_BIT (temps, FIRST_RET_REG + i);
6725 if (crtl->calls_eh_return)
6726 {
6727 CLEAR_HARD_REG_BIT (temps, EH_RETURN_STACKADJ_REGNO);
6728 for (i = 0; i <= 3; i++)
6729 CLEAR_HARD_REG_BIT (temps, EH_RETURN_DATA_REGNO (i));
6730 }
6731 }
6732 if (epilogue_p <= 0)
6733 {
6734 for (i = FIRST_PARM_REG;
6735 i < FIRST_PARM_REG + NPARM_REGS (SImode); i++)
6736 CLEAR_HARD_REG_BIT (temps, i);
6737 if (cfun->static_chain_decl != NULL)
6738 CLEAR_HARD_REG_BIT (temps, STATIC_CHAIN_REGNUM);
6739 }
6740 temp = scavenge_reg (&temps);
6741 }
6742 if (temp < 0 && live_regs_mask)
6743 {
6744 HARD_REG_SET temps;
6745
6746 COPY_HARD_REG_SET (temps, *live_regs_mask);
6747 CLEAR_HARD_REG_BIT (temps, REGNO (reg));
6748 temp = scavenge_reg (&temps);
6749 }
6750 if (temp < 0)
6751 {
6752 rtx adj_reg, tmp_reg, mem;
6753
6754 /* If we reached here, the most likely case is the (sibcall)
6755 epilogue. Put a special push/pop sequence for such case as
6756 the last resort. This looks lengthy but would not be problem
6757 because it seems to be very rare. */
6758 gcc_assert (epilogue_p);
6759
6760 /* ??? There is still the slight possibility that r4 or
6761 r5 have been reserved as fixed registers or assigned
6762 as global registers, and they change during an
6763 interrupt. There are possible ways to handle this:
6764
6765 - If we are adjusting the frame pointer (r14), we can do
6766 with a single temp register and an ordinary push / pop
6767 on the stack.
6768 - Grab any call-used or call-saved registers (i.e. not
6769 fixed or globals) for the temps we need. We might
6770 also grab r14 if we are adjusting the stack pointer.
6771 If we can't find enough available registers, issue
6772 a diagnostic and die - the user must have reserved
6773 way too many registers.
6774 But since all this is rather unlikely to happen and
6775 would require extra testing, we just die if r4 / r5
6776 are not available. */
6777 gcc_assert (!fixed_regs[4] && !fixed_regs[5]
6778 && !global_regs[4] && !global_regs[5]);
6779
6780 adj_reg = gen_rtx_REG (GET_MODE (reg), 4);
6781 tmp_reg = gen_rtx_REG (GET_MODE (reg), 5);
6782 emit_move_insn (gen_tmp_stack_mem (Pmode, reg), adj_reg);
6783 emit_insn (GEN_MOV (adj_reg, GEN_INT (size)));
6784 emit_insn (GEN_ADD3 (adj_reg, adj_reg, reg));
6785 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6786 emit_move_insn (mem, tmp_reg);
6787 emit_move_insn (tmp_reg, gen_tmp_stack_mem (Pmode, reg));
6788 mem = gen_tmp_stack_mem (Pmode, gen_rtx_PRE_DEC (Pmode, adj_reg));
6789 emit_move_insn (mem, tmp_reg);
6790 emit_move_insn (reg, adj_reg);
6791 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6792 emit_move_insn (adj_reg, mem);
6793 mem = gen_tmp_stack_mem (Pmode, gen_rtx_POST_INC (Pmode, reg));
6794 emit_move_insn (tmp_reg, mem);
6795 /* Tell flow the insns that pop r4/r5 aren't dead. */
6796 emit_use (tmp_reg);
6797 emit_use (adj_reg);
6798 return;
6799 }
6800 const_reg = gen_rtx_REG (GET_MODE (reg), temp);
6801
6802 /* If SIZE is negative, subtract the positive value.
6803 This sometimes allows a constant pool entry to be shared
6804 between prologue and epilogue code. */
6805 if (size < 0)
6806 {
6807 emit_insn (GEN_MOV (const_reg, GEN_INT (-size)));
6808 insn = emit_fn (GEN_SUB3 (reg, reg, const_reg));
6809 }
6810 else
6811 {
6812 emit_insn (GEN_MOV (const_reg, GEN_INT (size)));
6813 insn = emit_fn (GEN_ADD3 (reg, reg, const_reg));
6814 }
6815 add_reg_note (insn, REG_FRAME_RELATED_EXPR,
6816 gen_rtx_SET (reg, gen_rtx_PLUS (SImode, reg,
6817 GEN_INT (size))));
6818 }
6819 }
6820 }
6821
6822 /* Emit the specified insn and mark it as frame related. */
6823 static rtx_insn *
6824 emit_frame_insn (rtx x)
6825 {
6826 rtx_insn *insn = emit_insn (x);
6827 RTX_FRAME_RELATED_P (insn) = 1;
6828 return insn;
6829 }
6830
6831 /* Output RTL to push register RN onto the stack. */
6832 static rtx
6833 push (int rn)
6834 {
6835 rtx x;
6836 if (rn == FPUL_REG)
6837 x = gen_push_fpul ();
6838 else if (rn == FPSCR_REG)
6839 x = gen_push_fpscr ();
6840 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6841 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6842 {
6843 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6844 return NULL_RTX;
6845 x = gen_push_4 (gen_rtx_REG (DFmode, rn));
6846 }
6847 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6848 x = gen_push_e (gen_rtx_REG (SFmode, rn));
6849 else
6850 x = gen_push (gen_rtx_REG (SImode, rn));
6851
6852 x = emit_frame_insn (x);
6853 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6854 return x;
6855 }
6856
6857 /* Output RTL to pop register RN from the stack. */
6858 static void
6859 pop (int rn)
6860 {
6861 rtx x, sp_reg, reg;
6862 if (rn == FPUL_REG)
6863 x = gen_pop_fpul ();
6864 else if (rn == FPSCR_REG)
6865 x = gen_pop_fpscr ();
6866 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD
6867 && ! TARGET_FPU_SINGLE && FP_OR_XD_REGISTER_P (rn))
6868 {
6869 if (FP_REGISTER_P (rn) && (rn - FIRST_FP_REG) & 1)
6870 return;
6871 x = gen_pop_4 (gen_rtx_REG (DFmode, rn));
6872 }
6873 else if (TARGET_SH2E && FP_REGISTER_P (rn))
6874 x = gen_pop_e (gen_rtx_REG (SFmode, rn));
6875 else
6876 x = gen_pop (gen_rtx_REG (SImode, rn));
6877
6878 x = emit_insn (x);
6879
6880 sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6881 reg = copy_rtx (GET_CODE (PATTERN (x)) == PARALLEL
6882 ? SET_DEST (XVECEXP (PATTERN (x), 0, 0))
6883 : SET_DEST (PATTERN (x)));
6884 add_reg_note (x, REG_CFA_RESTORE, reg);
6885 add_reg_note (x, REG_CFA_ADJUST_CFA,
6886 gen_rtx_SET (sp_reg,
6887 plus_constant (SImode, sp_reg,
6888 GET_MODE_SIZE (GET_MODE (reg)))));
6889 add_reg_note (x, REG_INC, gen_rtx_REG (SImode, STACK_POINTER_REGNUM));
6890 RTX_FRAME_RELATED_P (x) = 1;
6891 }
6892
6893 /* Generate code to push the regs specified in the mask. */
6894 static void
6895 push_regs (HARD_REG_SET *mask, bool interrupt_handler)
6896 {
6897 bool skip_fpscr = false;
6898
6899 /* Push PR last; this gives better latencies after the prologue, and
6900 candidates for the return delay slot when there are no general
6901 registers pushed. */
6902 for (int i = interrupt_handler ? LAST_BANKED_REG + 1 : 0;
6903 i < FIRST_PSEUDO_REGISTER; i++)
6904 {
6905 /* If this is an interrupt handler, and the SZ bit varies,
6906 and we have to push any floating point register, we need
6907 to switch to the correct precision first. */
6908 if (i == FIRST_FP_REG && interrupt_handler && TARGET_FMOVD
6909 && hard_reg_set_intersect_p (*mask, reg_class_contents[DF_REGS]))
6910 {
6911 HARD_REG_SET unsaved;
6912
6913 push (FPSCR_REG);
6914 COMPL_HARD_REG_SET (unsaved, *mask);
6915 fpscr_set_from_mem (NORMAL_MODE (FP_MODE), unsaved);
6916 skip_fpscr = true;
6917 }
6918 if (i != PR_REG
6919 && (i != FPSCR_REG || ! skip_fpscr)
6920 && TEST_HARD_REG_BIT (*mask, i))
6921 {
6922 /* If the ISR has RESBANK attribute assigned, don't push any of
6923 the following registers - R0-R14, MACH, MACL and GBR. */
6924 if (! (sh_cfun_resbank_handler_p ()
6925 && ((i >= FIRST_GENERAL_REG && i < LAST_GENERAL_REG)
6926 || i == MACH_REG
6927 || i == MACL_REG
6928 || i == GBR_REG)))
6929 push (i);
6930 }
6931 }
6932
6933 /* Push banked registers last to improve delay slot opportunities. */
6934 if (interrupt_handler)
6935 {
6936 bool use_movml = false;
6937
6938 if (TARGET_SH2A)
6939 {
6940 unsigned int count = 0;
6941
6942 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6943 if (TEST_HARD_REG_BIT (*mask, i))
6944 count++;
6945 else
6946 break;
6947
6948 /* Use movml when all banked registers are pushed. */
6949 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
6950 use_movml = true;
6951 }
6952
6953 if (sh_cfun_resbank_handler_p ())
6954 ; /* Do nothing. */
6955 else if (use_movml)
6956 {
6957 rtx x, mem, reg, set;
6958 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
6959
6960 /* We must avoid scheduling multiple store insn with another
6961 insns. */
6962 emit_insn (gen_blockage ());
6963 x = gen_movml_push_banked (sp_reg);
6964 x = emit_frame_insn (x);
6965 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6966 {
6967 mem = gen_rtx_MEM (SImode, plus_constant (Pmode, sp_reg, i * 4));
6968 reg = gen_rtx_REG (SImode, i);
6969 add_reg_note (x, REG_CFA_OFFSET, gen_rtx_SET (mem, reg));
6970 }
6971
6972 set = gen_rtx_SET (sp_reg, plus_constant (Pmode, sp_reg, - 32));
6973 add_reg_note (x, REG_CFA_ADJUST_CFA, set);
6974 emit_insn (gen_blockage ());
6975 }
6976 else
6977 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
6978 if (TEST_HARD_REG_BIT (*mask, i))
6979 push (i);
6980 }
6981
6982 /* Don't push PR register for an ISR with RESBANK attribute assigned. */
6983 if (TEST_HARD_REG_BIT (*mask, PR_REG) && !sh_cfun_resbank_handler_p ())
6984 push (PR_REG);
6985 }
6986
6987 /* Work out the registers which need to be saved, both as a mask and a
6988 count of saved words. Return the count.
6989
6990 If doing a pragma interrupt function, then push all regs used by the
6991 function, and if we call another function (we can tell by looking at PR),
6992 make sure that all the regs it clobbers are safe too. */
6993 static int
6994 calc_live_regs (HARD_REG_SET *live_regs_mask)
6995 {
6996 unsigned int reg;
6997 tree attrs;
6998 bool interrupt_or_trapa_handler, trapa_handler, interrupt_handler;
6999 bool nosave_low_regs;
7000
7001 attrs = DECL_ATTRIBUTES (current_function_decl);
7002 interrupt_or_trapa_handler = sh_cfun_interrupt_handler_p ();
7003 trapa_handler = lookup_attribute ("trapa_handler", attrs) != NULL_TREE;
7004 interrupt_handler = interrupt_or_trapa_handler && ! trapa_handler;
7005 nosave_low_regs = lookup_attribute ("nosave_low_regs", attrs) != NULL_TREE;
7006
7007 CLEAR_HARD_REG_SET (*live_regs_mask);
7008 if (TARGET_FPU_DOUBLE && TARGET_FMOVD && interrupt_handler
7009 && df_regs_ever_live_p (FPSCR_REG))
7010 target_flags &= ~MASK_FPU_SINGLE;
7011 /* If we can save a lot of saves by switching to double mode, do that. */
7012 else if (TARGET_FPU_DOUBLE && TARGET_FMOVD && TARGET_FPU_SINGLE)
7013 for (int count = 0, reg = FIRST_FP_REG; reg <= LAST_FP_REG; reg += 2)
7014 if (df_regs_ever_live_p (reg) && df_regs_ever_live_p (reg+1)
7015 && (! call_really_used_regs[reg]
7016 || interrupt_handler)
7017 && ++count > 2)
7018 {
7019 target_flags &= ~MASK_FPU_SINGLE;
7020 break;
7021 }
7022
7023
7024 rtx pr_initial = has_hard_reg_initial_val (Pmode, PR_REG);
7025 bool pr_live = (pr_initial
7026 ? (!REG_P (pr_initial)
7027 || REGNO (pr_initial) != (PR_REG))
7028 : df_regs_ever_live_p (PR_REG));
7029 /* For Shcompact, if not optimizing, we end up with a memory reference
7030 using the return address pointer for __builtin_return_address even
7031 though there is no actual need to put the PR register on the stack. */
7032 pr_live |= df_regs_ever_live_p (RETURN_ADDRESS_POINTER_REGNUM);
7033
7034 /* Force PR to be live if the prologue has to call the SHmedia
7035 argument decoder or register saver. */
7036 bool has_call = pr_live;
7037
7038 int count;
7039 for (count = 0, reg = FIRST_PSEUDO_REGISTER; reg-- != 0; )
7040 {
7041 if (reg == PR_REG
7042 ? pr_live
7043 : interrupt_handler
7044 ? (/* Need to save all the regs ever live. */
7045 (df_regs_ever_live_p (reg)
7046 || (call_really_used_regs[reg]
7047 && (! fixed_regs[reg] || reg == MACH_REG || reg == MACL_REG
7048 || reg == PIC_OFFSET_TABLE_REGNUM)
7049 && has_call))
7050 && reg != STACK_POINTER_REGNUM && reg != ARG_POINTER_REGNUM
7051 && reg != RETURN_ADDRESS_POINTER_REGNUM
7052 && reg != T_REG && reg != GBR_REG
7053 && reg != FPSCR_MODES_REG && reg != FPSCR_STAT_REG
7054 /* Push fpscr only on targets which have FPU */
7055 && (reg != FPSCR_REG || TARGET_FPU_ANY))
7056 : (/* Only push those regs which are used and need to be saved. */
7057 (false)
7058 || (df_regs_ever_live_p (reg)
7059 && ((!call_really_used_regs[reg]
7060 && !(reg != PIC_OFFSET_TABLE_REGNUM
7061 && fixed_regs[reg] && call_used_regs[reg]))
7062 || (trapa_handler && reg == FPSCR_REG && TARGET_FPU_ANY)))
7063 || (crtl->calls_eh_return
7064 && (reg == EH_RETURN_DATA_REGNO (0)
7065 || reg == EH_RETURN_DATA_REGNO (1)
7066 || reg == EH_RETURN_DATA_REGNO (2)
7067 || reg == EH_RETURN_DATA_REGNO (3)))
7068 || ((reg == MACL_REG || reg == MACH_REG)
7069 && df_regs_ever_live_p (reg)
7070 && sh_cfun_attr_renesas_p ())
7071 ))
7072 {
7073 SET_HARD_REG_BIT (*live_regs_mask, reg);
7074 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg));
7075
7076 if (TARGET_FPU_DOUBLE && TARGET_FMOVD
7077 && GET_MODE_CLASS (REGISTER_NATURAL_MODE (reg)) == MODE_FLOAT)
7078 {
7079 if (FP_REGISTER_P (reg))
7080 {
7081 if (! TARGET_FPU_SINGLE && ! df_regs_ever_live_p (reg ^ 1))
7082 {
7083 SET_HARD_REG_BIT (*live_regs_mask, (reg ^ 1));
7084 count += GET_MODE_SIZE (REGISTER_NATURAL_MODE (reg ^ 1));
7085 }
7086 }
7087 else if (XD_REGISTER_P (reg))
7088 {
7089 /* Must switch to double mode to access these registers. */
7090 target_flags &= ~MASK_FPU_SINGLE;
7091 }
7092 }
7093 }
7094 if (nosave_low_regs && reg == R8_REG)
7095 break;
7096 }
7097
7098 return count;
7099 }
7100
7101 /* Code to generate prologue and epilogue sequences */
7102
7103 /* PUSHED is the number of bytes that are being pushed on the
7104 stack for register saves. Return the frame size, padded
7105 appropriately so that the stack stays properly aligned. */
7106 static HOST_WIDE_INT
7107 rounded_frame_size (int pushed)
7108 {
7109 HOST_WIDE_INT size = get_frame_size ();
7110 HOST_WIDE_INT align = STACK_BOUNDARY / BITS_PER_UNIT;
7111
7112 if (ACCUMULATE_OUTGOING_ARGS)
7113 size += crtl->outgoing_args_size;
7114
7115 return ((size + pushed + align - 1) & -align) - pushed;
7116 }
7117
7118 /* Expand code for the function prologue. */
7119 void
7120 sh_expand_prologue (void)
7121 {
7122 int save_flags = target_flags;
7123 tree sp_switch_attr
7124 = lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl));
7125
7126 current_function_interrupt = sh_cfun_interrupt_handler_p ();
7127
7128 /* We have pretend args if we had an object sent partially in registers
7129 and partially on the stack, e.g. a large structure. */
7130 int pretend_args = crtl->args.pretend_args_size;
7131 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl)
7132 && (NPARM_REGS(SImode)
7133 > crtl->args.info.arg_count[(int) SH_ARG_INT]))
7134 pretend_args = 0;
7135
7136 output_stack_adjust (-pretend_args, stack_pointer_rtx, 0, NULL, true);
7137 int stack_usage = pretend_args;
7138
7139 /* Emit the code for SETUP_VARARGS. */
7140 if (cfun->stdarg)
7141 {
7142 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
7143 {
7144 /* Push arg regs as if they'd been provided by caller in stack. */
7145 for (int i = 0; i < NPARM_REGS(SImode); i++)
7146 {
7147 int rn = NPARM_REGS(SImode) + FIRST_PARM_REG - i - 1;
7148
7149 if (i >= (NPARM_REGS(SImode)
7150 - crtl->args.info.arg_count[(int) SH_ARG_INT]
7151 ))
7152 break;
7153 push (rn);
7154 stack_usage += GET_MODE_SIZE (SImode);
7155 }
7156 }
7157 }
7158
7159 /* If we're supposed to switch stacks at function entry, do so now. */
7160 if (sp_switch_attr)
7161 {
7162 rtx lab, newsrc;
7163 /* The argument specifies a variable holding the address of the
7164 stack the interrupt function should switch to/from at entry/exit. */
7165 tree arg = TREE_VALUE ( TREE_VALUE (sp_switch_attr));
7166 const char* s = ggc_strdup (TREE_STRING_POINTER (arg));
7167 rtx sp_switch = gen_rtx_SYMBOL_REF (Pmode, s);
7168
7169 lab = add_constant (sp_switch, SImode, 0);
7170 newsrc = gen_rtx_LABEL_REF (VOIDmode, lab);
7171
7172 emit_insn (gen_sp_switch_1 (newsrc));
7173 }
7174
7175 HARD_REG_SET live_regs_mask;
7176 int d = calc_live_regs (&live_regs_mask);
7177 /* ??? Maybe we could save some switching if we can move a mode switch
7178 that already happens to be at the function start into the prologue. */
7179 if (target_flags != save_flags && ! current_function_interrupt)
7180 emit_insn (gen_toggle_sz ());
7181
7182 push_regs (&live_regs_mask, current_function_interrupt);
7183 stack_usage += d;
7184
7185 if (flag_pic && !TARGET_FDPIC
7186 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
7187 emit_insn (gen_GOTaddr2picreg (const0_rtx));
7188
7189 if (target_flags != save_flags && ! current_function_interrupt)
7190 emit_insn (gen_toggle_sz ());
7191
7192 target_flags = save_flags;
7193
7194 output_stack_adjust (-rounded_frame_size (d),
7195 stack_pointer_rtx, 0, NULL, true);
7196 stack_usage += rounded_frame_size (d);
7197
7198 if (frame_pointer_needed)
7199 emit_frame_insn (GEN_MOV (hard_frame_pointer_rtx, stack_pointer_rtx));
7200
7201 /* If we are profiling, make sure no instructions are scheduled before
7202 the call to mcount. Similarly if some call instructions are swapped
7203 before frame related insns, it'll confuse the unwinder because
7204 currently SH has no unwind info for function epilogues. */
7205 if (crtl->profile || flag_exceptions || flag_unwind_tables)
7206 emit_insn (gen_blockage ());
7207
7208 if (flag_stack_usage_info)
7209 current_function_static_stack_size = stack_usage;
7210 }
7211
7212 /* Expand code for the function epilogue. */
7213 void
7214 sh_expand_epilogue (bool sibcall_p)
7215 {
7216 int save_flags = target_flags;
7217 bool fpscr_deferred = false;
7218 int e = sibcall_p ? -1 : 1;
7219
7220 HARD_REG_SET live_regs_mask;
7221 int d = calc_live_regs (&live_regs_mask);
7222
7223 int save_size = d;
7224 int frame_size = rounded_frame_size (d);
7225
7226 if (frame_pointer_needed)
7227 {
7228 /* We must avoid scheduling the epilogue with previous basic blocks.
7229 See PR/18032 and PR/40313. */
7230 emit_insn (gen_blockage ());
7231 output_stack_adjust (frame_size, hard_frame_pointer_rtx, e,
7232 &live_regs_mask, true);
7233
7234 /* We must avoid moving the stack pointer adjustment past code
7235 which reads from the local frame, else an interrupt could
7236 occur after the SP adjustment and clobber data in the local
7237 frame. */
7238 emit_insn (gen_blockage ());
7239 emit_frame_insn (GEN_MOV (stack_pointer_rtx, hard_frame_pointer_rtx));
7240 }
7241 else if (frame_size)
7242 {
7243 /* We must avoid moving the stack pointer adjustment past code
7244 which reads from the local frame, else an interrupt could
7245 occur after the SP adjustment and clobber data in the local
7246 frame. */
7247 emit_insn (gen_blockage ());
7248 output_stack_adjust (frame_size, stack_pointer_rtx, e,
7249 &live_regs_mask, true);
7250 }
7251
7252 /* Pop all the registers. */
7253
7254 if (target_flags != save_flags && ! current_function_interrupt)
7255 emit_insn (gen_toggle_sz ());
7256
7257 {
7258 int last_reg;
7259
7260 save_size = 0;
7261 /* For an ISR with RESBANK attribute assigned, don't pop PR
7262 register. */
7263 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG)
7264 && !sh_cfun_resbank_handler_p ())
7265 {
7266 if (!frame_pointer_needed)
7267 emit_insn (gen_blockage ());
7268 pop (PR_REG);
7269 }
7270
7271 /* Banked registers are popped first to avoid being scheduled in the
7272 delay slot. RTE switches banks before the ds instruction. */
7273 if (current_function_interrupt)
7274 {
7275 bool use_movml = false;
7276
7277 if (TARGET_SH2A)
7278 {
7279 unsigned int count = 0;
7280
7281 for (int i = FIRST_BANKED_REG; i <= LAST_BANKED_REG; i++)
7282 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7283 count++;
7284 else
7285 break;
7286
7287 /* Use movml when all banked register are poped. */
7288 if (count == LAST_BANKED_REG - FIRST_BANKED_REG + 1)
7289 use_movml = true;
7290 }
7291
7292 if (sh_cfun_resbank_handler_p ())
7293 ; /* Do nothing. */
7294 else if (use_movml)
7295 {
7296 rtx sp_reg = gen_rtx_REG (SImode, STACK_POINTER_REGNUM);
7297
7298 /* We must avoid scheduling multiple load insn with another
7299 insns. */
7300 emit_insn (gen_blockage ());
7301 emit_insn (gen_movml_pop_banked (sp_reg));
7302 emit_insn (gen_blockage ());
7303 }
7304 else
7305 for (int i = LAST_BANKED_REG; i >= FIRST_BANKED_REG; i--)
7306 if (TEST_HARD_REG_BIT (live_regs_mask, i))
7307 pop (i);
7308
7309 last_reg = FIRST_PSEUDO_REGISTER - LAST_BANKED_REG - 1;
7310 }
7311 else
7312 last_reg = FIRST_PSEUDO_REGISTER;
7313
7314 for (int i = 0; i < last_reg; i++)
7315 {
7316 int j = (FIRST_PSEUDO_REGISTER - 1) - i;
7317
7318 if (j == FPSCR_REG && current_function_interrupt && TARGET_FMOVD
7319 && hard_reg_set_intersect_p (live_regs_mask,
7320 reg_class_contents[DF_REGS]))
7321 fpscr_deferred = true;
7322 /* For an ISR with RESBANK attribute assigned, don't pop
7323 following registers, R0-R14, MACH, MACL and GBR. */
7324 else if (j != PR_REG && TEST_HARD_REG_BIT (live_regs_mask, j)
7325 && ! (sh_cfun_resbank_handler_p ()
7326 && ((j >= FIRST_GENERAL_REG
7327 && j < LAST_GENERAL_REG)
7328 || j == MACH_REG
7329 || j == MACL_REG
7330 || j == GBR_REG)))
7331 pop (j);
7332
7333 if (j == FIRST_FP_REG && fpscr_deferred)
7334 pop (FPSCR_REG);
7335 }
7336 }
7337 if (target_flags != save_flags && ! current_function_interrupt)
7338 emit_insn (gen_toggle_sz ());
7339 target_flags = save_flags;
7340
7341 output_stack_adjust (crtl->args.pretend_args_size + save_size,
7342 stack_pointer_rtx, e, NULL, true);
7343
7344 if (crtl->calls_eh_return)
7345 emit_insn (GEN_ADD3 (stack_pointer_rtx, stack_pointer_rtx,
7346 EH_RETURN_STACKADJ_RTX));
7347
7348 /* Switch back to the normal stack if necessary. */
7349 if (lookup_attribute ("sp_switch", DECL_ATTRIBUTES (current_function_decl)))
7350 emit_insn (gen_sp_switch_2 ());
7351
7352 /* Tell flow the insn that pops PR isn't dead. */
7353 if (TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7354 emit_use (gen_rtx_REG (SImode, PR_REG));
7355 }
7356
7357 /* Emit code to change the current function's return address to RA.
7358 TEMP is available as a scratch register, if needed. */
7359 void
7360 sh_set_return_address (rtx ra, rtx tmp)
7361 {
7362 HARD_REG_SET live_regs_mask;
7363 int d = calc_live_regs (&live_regs_mask);
7364
7365 /* If pr_reg isn't life, we can set it directly. */
7366 if (! TEST_HARD_REG_BIT (live_regs_mask, PR_REG))
7367 {
7368 rtx rr = gen_rtx_REG (SImode, PR_REG);
7369 emit_insn (GEN_MOV (rr, ra));
7370 /* Tell flow the register for return isn't dead. */
7371 emit_use (rr);
7372 return;
7373 }
7374
7375 int pr_offset = rounded_frame_size (d);
7376
7377 emit_insn (GEN_MOV (tmp, GEN_INT (pr_offset)));
7378
7379 if (frame_pointer_needed)
7380 emit_insn (GEN_ADD3 (tmp, tmp, hard_frame_pointer_rtx));
7381 else
7382 emit_insn (GEN_ADD3 (tmp, tmp, stack_pointer_rtx));
7383
7384 tmp = gen_frame_mem (Pmode, tmp);
7385 emit_insn (GEN_MOV (tmp, ra));
7386 /* Tell this store isn't dead. */
7387 emit_use (tmp);
7388 }
7389
7390 /* Clear variables at function end. */
7391 static void
7392 sh_output_function_epilogue (FILE *)
7393 {
7394 }
7395
7396 static rtx
7397 sh_builtin_saveregs (void)
7398 {
7399 /* First unnamed integer register. */
7400 int first_intreg = crtl->args.info.arg_count[(int) SH_ARG_INT];
7401 /* Number of integer registers we need to save. */
7402 int n_intregs = MAX (0, NPARM_REGS (SImode) - first_intreg);
7403 /* First unnamed SFmode float reg */
7404 int first_floatreg = crtl->args.info.arg_count[(int) SH_ARG_FLOAT];
7405 /* Number of SFmode float regs to save. */
7406 int n_floatregs = MAX (0, NPARM_REGS (SFmode) - first_floatreg);
7407 rtx regbuf, fpregs;
7408 int bufsize, regno;
7409 alias_set_type alias_set;
7410
7411 if (!TARGET_FPU_ANY)
7412 {
7413 error ("%<__builtin_saveregs%> not supported by this subtarget");
7414 return const0_rtx;
7415 }
7416
7417 /* Allocate block of memory for the regs. */
7418 /* ??? If n_intregs + n_floatregs == 0, should we allocate at least 1 byte?
7419 Or can assign_stack_local accept a 0 SIZE argument? */
7420 bufsize = (n_intregs * UNITS_PER_WORD) + (n_floatregs * UNITS_PER_WORD);
7421
7422 if (n_floatregs & 1)
7423 {
7424 rtx addr;
7425
7426 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7427 addr = copy_to_mode_reg (Pmode, XEXP (regbuf, 0));
7428 emit_insn (gen_iorsi3 (addr, addr, GEN_INT (UNITS_PER_WORD)));
7429 regbuf = change_address (regbuf, BLKmode, addr);
7430 }
7431 else if (STACK_BOUNDARY < 64 && TARGET_FPU_DOUBLE && n_floatregs)
7432 {
7433 rtx addr, mask;
7434
7435 regbuf = assign_stack_local (BLKmode, bufsize + UNITS_PER_WORD, 0);
7436 addr = copy_to_mode_reg (Pmode, plus_constant (Pmode,
7437 XEXP (regbuf, 0), 4));
7438 mask = copy_to_mode_reg (Pmode, GEN_INT (-8));
7439 emit_insn (gen_andsi3 (addr, addr, mask));
7440 regbuf = change_address (regbuf, BLKmode, addr);
7441 }
7442 else
7443 regbuf = assign_stack_local (BLKmode, bufsize, TARGET_FPU_DOUBLE ? 64 : 0);
7444 alias_set = get_varargs_alias_set ();
7445 set_mem_alias_set (regbuf, alias_set);
7446
7447 /* Save int args.
7448 This is optimized to only save the regs that are necessary. Explicitly
7449 named args need not be saved. */
7450 if (n_intregs > 0)
7451 move_block_from_reg (BASE_ARG_REG (SImode) + first_intreg,
7452 adjust_address (regbuf, BLKmode,
7453 n_floatregs * UNITS_PER_WORD),
7454 n_intregs);
7455
7456 /* Save float args.
7457 This is optimized to only save the regs that are necessary. Explicitly
7458 named args need not be saved.
7459 We explicitly build a pointer to the buffer because it halves the insn
7460 count when not optimizing (otherwise the pointer is built for each reg
7461 saved).
7462 We emit the moves in reverse order so that we can use predecrement. */
7463
7464 fpregs = copy_to_mode_reg (Pmode,
7465 plus_constant (Pmode, XEXP (regbuf, 0),
7466 n_floatregs * UNITS_PER_WORD));
7467 if (TARGET_FPU_DOUBLE)
7468 {
7469 rtx mem;
7470 for (regno = NPARM_REGS (DFmode) - 2; regno >= first_floatreg; regno -= 2)
7471 {
7472 emit_insn (gen_addsi3 (fpregs, fpregs,
7473 GEN_INT (-2 * UNITS_PER_WORD)));
7474 mem = change_address (regbuf, DFmode, fpregs);
7475 emit_move_insn (mem,
7476 gen_rtx_REG (DFmode, BASE_ARG_REG (DFmode) + regno));
7477 }
7478 regno = first_floatreg;
7479 if (regno & 1)
7480 {
7481 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7482 mem = change_address (regbuf, SFmode, fpregs);
7483 emit_move_insn (mem,
7484 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode)
7485 + regno - SH_REG_MSW_OFFSET));
7486 }
7487 }
7488 else
7489 for (regno = NPARM_REGS (SFmode) - 1; regno >= first_floatreg; regno--)
7490 {
7491 rtx mem;
7492
7493 emit_insn (gen_addsi3 (fpregs, fpregs, GEN_INT (-UNITS_PER_WORD)));
7494 mem = change_address (regbuf, SFmode, fpregs);
7495 emit_move_insn (mem,
7496 gen_rtx_REG (SFmode, BASE_ARG_REG (SFmode) + regno));
7497 }
7498
7499 /* Return the address of the regbuf. */
7500 return XEXP (regbuf, 0);
7501 }
7502
7503 /* Define the `__builtin_va_list' type for the ABI. */
7504 static tree
7505 sh_build_builtin_va_list (void)
7506 {
7507 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7508 tree record, type_decl;
7509
7510 if ((! TARGET_SH2E && ! TARGET_SH4)
7511 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7512 return ptr_type_node;
7513
7514 record = (*lang_hooks.types.make_type) (RECORD_TYPE);
7515 type_decl = build_decl (BUILTINS_LOCATION,
7516 TYPE_DECL, get_identifier ("__va_list_tag"), record);
7517
7518 f_next_o = build_decl (BUILTINS_LOCATION,
7519 FIELD_DECL, get_identifier ("__va_next_o"),
7520 ptr_type_node);
7521 f_next_o_limit = build_decl (BUILTINS_LOCATION,
7522 FIELD_DECL,
7523 get_identifier ("__va_next_o_limit"),
7524 ptr_type_node);
7525 f_next_fp = build_decl (BUILTINS_LOCATION,
7526 FIELD_DECL, get_identifier ("__va_next_fp"),
7527 ptr_type_node);
7528 f_next_fp_limit = build_decl (BUILTINS_LOCATION,
7529 FIELD_DECL,
7530 get_identifier ("__va_next_fp_limit"),
7531 ptr_type_node);
7532 f_next_stack = build_decl (BUILTINS_LOCATION,
7533 FIELD_DECL, get_identifier ("__va_next_stack"),
7534 ptr_type_node);
7535
7536 DECL_FIELD_CONTEXT (f_next_o) = record;
7537 DECL_FIELD_CONTEXT (f_next_o_limit) = record;
7538 DECL_FIELD_CONTEXT (f_next_fp) = record;
7539 DECL_FIELD_CONTEXT (f_next_fp_limit) = record;
7540 DECL_FIELD_CONTEXT (f_next_stack) = record;
7541
7542 TYPE_STUB_DECL (record) = type_decl;
7543 TYPE_NAME (record) = type_decl;
7544 TYPE_FIELDS (record) = f_next_o;
7545 DECL_CHAIN (f_next_o) = f_next_o_limit;
7546 DECL_CHAIN (f_next_o_limit) = f_next_fp;
7547 DECL_CHAIN (f_next_fp) = f_next_fp_limit;
7548 DECL_CHAIN (f_next_fp_limit) = f_next_stack;
7549
7550 layout_type (record);
7551
7552 return record;
7553 }
7554
7555 /* Implement `va_start' for varargs and stdarg. */
7556 static void
7557 sh_va_start (tree valist, rtx nextarg)
7558 {
7559 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7560 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7561 tree t, u;
7562 int nfp, nint;
7563
7564 if ((! TARGET_SH2E && ! TARGET_SH4)
7565 || TARGET_HITACHI || sh_cfun_attr_renesas_p ())
7566 {
7567 std_expand_builtin_va_start (valist, nextarg);
7568 return;
7569 }
7570
7571 f_next_o = TYPE_FIELDS (va_list_type_node);
7572 f_next_o_limit = DECL_CHAIN (f_next_o);
7573 f_next_fp = DECL_CHAIN (f_next_o_limit);
7574 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7575 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7576
7577 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7578 NULL_TREE);
7579 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7580 valist, f_next_o_limit, NULL_TREE);
7581 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp), valist, f_next_fp,
7582 NULL_TREE);
7583 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7584 valist, f_next_fp_limit, NULL_TREE);
7585 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7586 valist, f_next_stack, NULL_TREE);
7587
7588 /* Call __builtin_saveregs. */
7589 u = make_tree (sizetype, expand_builtin_saveregs ());
7590 u = fold_convert (ptr_type_node, u);
7591 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp, u);
7592 TREE_SIDE_EFFECTS (t) = 1;
7593 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7594
7595 nfp = crtl->args.info.arg_count[SH_ARG_FLOAT];
7596 if (nfp < 8)
7597 nfp = 8 - nfp;
7598 else
7599 nfp = 0;
7600 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nfp);
7601 t = build2 (MODIFY_EXPR, ptr_type_node, next_fp_limit, u);
7602 TREE_SIDE_EFFECTS (t) = 1;
7603 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7604
7605 t = build2 (MODIFY_EXPR, ptr_type_node, next_o, u);
7606 TREE_SIDE_EFFECTS (t) = 1;
7607 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7608
7609 nint = crtl->args.info.arg_count[SH_ARG_INT];
7610 if (nint < 4)
7611 nint = 4 - nint;
7612 else
7613 nint = 0;
7614 u = fold_build_pointer_plus_hwi (u, UNITS_PER_WORD * nint);
7615 t = build2 (MODIFY_EXPR, ptr_type_node, next_o_limit, u);
7616 TREE_SIDE_EFFECTS (t) = 1;
7617 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7618
7619 u = make_tree (ptr_type_node, nextarg);
7620 t = build2 (MODIFY_EXPR, ptr_type_node, next_stack, u);
7621 TREE_SIDE_EFFECTS (t) = 1;
7622 expand_expr (t, const0_rtx, VOIDmode, EXPAND_NORMAL);
7623 }
7624
7625 /* TYPE is a RECORD_TYPE. If there is only a single nonzero-sized
7626 member, return it. */
7627 static tree
7628 find_sole_member (tree type)
7629 {
7630 tree field, member = NULL_TREE;
7631
7632 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
7633 {
7634 if (TREE_CODE (field) != FIELD_DECL)
7635 continue;
7636 if (!DECL_SIZE (field))
7637 return NULL_TREE;
7638 if (integer_zerop (DECL_SIZE (field)))
7639 continue;
7640 if (member)
7641 return NULL_TREE;
7642 member = field;
7643 }
7644 return member;
7645 }
7646
7647 /* Implement `va_arg'. */
7648 static tree
7649 sh_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
7650 gimple_seq *post_p ATTRIBUTE_UNUSED)
7651 {
7652 tree tmp;
7653 tree addr, lab_over = NULL, result = NULL;
7654 tree eff_type;
7655
7656 const bool pass_by_ref
7657 = !VOID_TYPE_P (type) && must_pass_va_arg_in_stack (type);
7658
7659 if (pass_by_ref)
7660 type = build_pointer_type (type);
7661
7662 HOST_WIDE_INT size = int_size_in_bytes (type);
7663 HOST_WIDE_INT rsize = (size + UNITS_PER_WORD - 1) & -UNITS_PER_WORD;
7664 tree pptr_type_node = build_pointer_type (ptr_type_node);
7665
7666 if ((TARGET_SH2E || TARGET_SH4)
7667 && ! (TARGET_HITACHI || sh_cfun_attr_renesas_p ()))
7668 {
7669 tree f_next_o, f_next_o_limit, f_next_fp, f_next_fp_limit, f_next_stack;
7670 tree next_o, next_o_limit, next_fp, next_fp_limit, next_stack;
7671 tree lab_false;
7672 tree member;
7673
7674 f_next_o = TYPE_FIELDS (va_list_type_node);
7675 f_next_o_limit = DECL_CHAIN (f_next_o);
7676 f_next_fp = DECL_CHAIN (f_next_o_limit);
7677 f_next_fp_limit = DECL_CHAIN (f_next_fp);
7678 f_next_stack = DECL_CHAIN (f_next_fp_limit);
7679
7680 next_o = build3 (COMPONENT_REF, TREE_TYPE (f_next_o), valist, f_next_o,
7681 NULL_TREE);
7682 next_o_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_o_limit),
7683 valist, f_next_o_limit, NULL_TREE);
7684 next_fp = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp),
7685 valist, f_next_fp, NULL_TREE);
7686 next_fp_limit = build3 (COMPONENT_REF, TREE_TYPE (f_next_fp_limit),
7687 valist, f_next_fp_limit, NULL_TREE);
7688 next_stack = build3 (COMPONENT_REF, TREE_TYPE (f_next_stack),
7689 valist, f_next_stack, NULL_TREE);
7690
7691 /* Structures with a single member with a distinct mode are passed
7692 like their member. This is relevant if the latter has a REAL_TYPE
7693 or COMPLEX_TYPE type. */
7694 eff_type = type;
7695 while (TREE_CODE (eff_type) == RECORD_TYPE
7696 && (member = find_sole_member (eff_type))
7697 && (TREE_CODE (TREE_TYPE (member)) == REAL_TYPE
7698 || TREE_CODE (TREE_TYPE (member)) == COMPLEX_TYPE
7699 || TREE_CODE (TREE_TYPE (member)) == RECORD_TYPE))
7700 {
7701 tree field_type = TREE_TYPE (member);
7702
7703 if (TYPE_MODE (eff_type) == TYPE_MODE (field_type))
7704 eff_type = field_type;
7705 else
7706 {
7707 gcc_assert ((TYPE_ALIGN (eff_type)
7708 < GET_MODE_ALIGNMENT (TYPE_MODE (field_type)))
7709 || (TYPE_ALIGN (eff_type)
7710 > GET_MODE_BITSIZE (TYPE_MODE (field_type))));
7711 break;
7712 }
7713 }
7714
7715 bool pass_as_float;
7716 if (TARGET_FPU_DOUBLE)
7717 {
7718 pass_as_float = ((TREE_CODE (eff_type) == REAL_TYPE && size <= 8)
7719 || (TREE_CODE (eff_type) == COMPLEX_TYPE
7720 && TREE_CODE (TREE_TYPE (eff_type)) == REAL_TYPE
7721 && size <= 16));
7722 }
7723 else
7724 {
7725 pass_as_float = (TREE_CODE (eff_type) == REAL_TYPE && size == 4);
7726 }
7727
7728 addr = create_tmp_var (pptr_type_node);
7729 lab_false = create_artificial_label (UNKNOWN_LOCATION);
7730 lab_over = create_artificial_label (UNKNOWN_LOCATION);
7731
7732 valist = build_simple_mem_ref (addr);
7733
7734 if (pass_as_float)
7735 {
7736 tree next_fp_tmp = create_tmp_var (TREE_TYPE (f_next_fp));
7737 tree cmp;
7738 bool is_double = size == 8 && TREE_CODE (eff_type) == REAL_TYPE;
7739
7740 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_fp));
7741 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7742
7743 gimplify_assign (unshare_expr (next_fp_tmp), valist, pre_p);
7744 tmp = next_fp_limit;
7745 if (size > 4 && !is_double)
7746 tmp = fold_build_pointer_plus_hwi (unshare_expr (tmp), 4 - size);
7747 tmp = build2 (GE_EXPR, boolean_type_node,
7748 unshare_expr (next_fp_tmp), unshare_expr (tmp));
7749 cmp = build3 (COND_EXPR, void_type_node, tmp,
7750 build1 (GOTO_EXPR, void_type_node,
7751 unshare_expr (lab_false)), NULL_TREE);
7752 if (!is_double)
7753 gimplify_and_add (cmp, pre_p);
7754
7755 if (TYPE_ALIGN (eff_type) > BITS_PER_WORD
7756 || (is_double || size == 16))
7757 {
7758 tmp = fold_convert (sizetype, next_fp_tmp);
7759 tmp = build2 (BIT_AND_EXPR, sizetype, tmp,
7760 size_int (UNITS_PER_WORD));
7761 tmp = fold_build_pointer_plus (unshare_expr (next_fp_tmp), tmp);
7762 gimplify_assign (unshare_expr (next_fp_tmp), tmp, pre_p);
7763 }
7764 if (is_double)
7765 gimplify_and_add (cmp, pre_p);
7766
7767 #ifdef FUNCTION_ARG_SCmode_WART
7768 if (TYPE_MODE (eff_type) == SCmode
7769 && TARGET_SH4 && TARGET_LITTLE_ENDIAN)
7770 {
7771 tree subtype = TREE_TYPE (eff_type);
7772 tree real, imag;
7773
7774 imag
7775 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7776 imag = get_initialized_tmp_var (imag, pre_p, NULL);
7777
7778 real
7779 = std_gimplify_va_arg_expr (next_fp_tmp, subtype, pre_p, NULL);
7780 real = get_initialized_tmp_var (real, pre_p, NULL);
7781
7782 result = build2 (COMPLEX_EXPR, eff_type, real, imag);
7783 if (type != eff_type)
7784 result = build1 (VIEW_CONVERT_EXPR, type, result);
7785 result = get_initialized_tmp_var (result, pre_p, NULL);
7786 }
7787 #endif /* FUNCTION_ARG_SCmode_WART */
7788
7789 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7790 gimplify_and_add (tmp, pre_p);
7791
7792 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7793 gimplify_and_add (tmp, pre_p);
7794
7795 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7796 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7797 gimplify_assign (unshare_expr (next_fp_tmp),
7798 unshare_expr (valist), pre_p);
7799
7800 gimplify_assign (unshare_expr (valist),
7801 unshare_expr (next_fp_tmp), post_p);
7802 valist = next_fp_tmp;
7803 }
7804 else
7805 {
7806 tmp = fold_build_pointer_plus_hwi (unshare_expr (next_o), rsize);
7807 tmp = build2 (GT_EXPR, boolean_type_node, tmp,
7808 unshare_expr (next_o_limit));
7809 tmp = build3 (COND_EXPR, void_type_node, tmp,
7810 build1 (GOTO_EXPR, void_type_node,
7811 unshare_expr (lab_false)),
7812 NULL_TREE);
7813 gimplify_and_add (tmp, pre_p);
7814
7815 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_o));
7816 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7817
7818 tmp = build1 (GOTO_EXPR, void_type_node, unshare_expr (lab_over));
7819 gimplify_and_add (tmp, pre_p);
7820
7821 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_false));
7822 gimplify_and_add (tmp, pre_p);
7823
7824 if (size > 4 && ! (TARGET_SH4 || TARGET_SH2A))
7825 gimplify_assign (unshare_expr (next_o),
7826 unshare_expr (next_o_limit), pre_p);
7827
7828 tmp = build1 (ADDR_EXPR, pptr_type_node, unshare_expr (next_stack));
7829 gimplify_assign (unshare_expr (addr), tmp, pre_p);
7830 }
7831
7832 if (!result)
7833 {
7834 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7835 gimplify_and_add (tmp, pre_p);
7836 }
7837 }
7838
7839 /* ??? In va-sh.h, there had been code to make values larger than
7840 size 8 indirect. This does not match the FUNCTION_ARG macros. */
7841
7842 tmp = std_gimplify_va_arg_expr (valist, type, pre_p, NULL);
7843 if (result)
7844 {
7845 gimplify_assign (result, tmp, pre_p);
7846 result = build1 (NOP_EXPR, TREE_TYPE (result), result);
7847 tmp = build1 (LABEL_EXPR, void_type_node, unshare_expr (lab_over));
7848 gimplify_and_add (tmp, pre_p);
7849 }
7850 else
7851 result = tmp;
7852
7853 if (pass_by_ref)
7854 result = build_va_arg_indirect_ref (result);
7855
7856 return result;
7857 }
7858
7859 /* 64 bit floating points memory transfers are paired single precision loads
7860 or store. So DWARF information needs fixing in little endian (unless
7861 PR=SZ=1 in FPSCR). */
7862 rtx
7863 sh_dwarf_register_span (rtx reg)
7864 {
7865 unsigned regno = REGNO (reg);
7866
7867 if (WORDS_BIG_ENDIAN || GET_MODE (reg) != DFmode)
7868 return NULL_RTX;
7869
7870 return
7871 gen_rtx_PARALLEL (VOIDmode,
7872 gen_rtvec (2,
7873 gen_rtx_REG (SFmode, regno + 1),
7874 gen_rtx_REG (SFmode, regno)));
7875 }
7876
7877 static machine_mode
7878 sh_promote_function_mode (const_tree type, machine_mode mode,
7879 int *punsignedp, const_tree funtype,
7880 int for_return)
7881 {
7882 if (sh_promote_prototypes (funtype))
7883 return promote_mode (type, mode, punsignedp);
7884 else
7885 return default_promote_function_mode (type, mode, punsignedp, funtype,
7886 for_return);
7887 }
7888
7889 static bool
7890 sh_promote_prototypes (const_tree type)
7891 {
7892 if (TARGET_HITACHI)
7893 return false;
7894 if (! type)
7895 return true;
7896 return ! sh_attr_renesas_p (type);
7897 }
7898
7899 static bool
7900 sh_pass_by_reference (cumulative_args_t cum_v, const function_arg_info &arg)
7901 {
7902 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7903
7904 if (targetm.calls.must_pass_in_stack (arg.mode, arg.type))
7905 return true;
7906
7907 /* ??? std_gimplify_va_arg_expr passes NULL for cum. That function
7908 wants to know about pass-by-reference semantics for incoming
7909 arguments. */
7910 if (! cum)
7911 return false;
7912
7913 return false;
7914 }
7915
7916 static bool
7917 sh_callee_copies (cumulative_args_t cum, const function_arg_info &arg)
7918 {
7919 /* ??? How can it possibly be correct to return true only on the
7920 caller side of the equation? Is there someplace else in the
7921 sh backend that's magically producing the copies? */
7922 return (get_cumulative_args (cum)->outgoing
7923 && ((arg.mode == BLKmode
7924 ? TYPE_ALIGN (arg.type)
7925 : GET_MODE_ALIGNMENT (arg.mode))
7926 % SH_MIN_ALIGN_FOR_CALLEE_COPY == 0));
7927 }
7928
7929 static sh_arg_class
7930 get_sh_arg_class (machine_mode mode)
7931 {
7932 if (TARGET_FPU_ANY && mode == SFmode)
7933 return SH_ARG_FLOAT;
7934
7935 if (TARGET_FPU_DOUBLE
7936 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7937 || GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT))
7938 return SH_ARG_FLOAT;
7939
7940 return SH_ARG_INT;
7941 }
7942
7943 /* Round a register number up to a proper boundary for an arg of mode
7944 MODE.
7945 The SH doesn't care about double alignment, so we only
7946 round doubles to even regs when asked to explicitly. */
7947 static int
7948 sh_round_reg (const CUMULATIVE_ARGS& cum, machine_mode mode)
7949 {
7950 /* FIXME: This used to be a macro and has been copy pasted into this
7951 function as is. Make this more readable. */
7952 return
7953 (((TARGET_ALIGN_DOUBLE
7954 || (TARGET_FPU_DOUBLE
7955 && (mode == DFmode || mode == DCmode)
7956 && cum.arg_count[(int) SH_ARG_FLOAT] < NPARM_REGS (mode)))
7957 && GET_MODE_UNIT_SIZE (mode) > UNITS_PER_WORD)
7958 ? (cum.arg_count[(int) get_sh_arg_class (mode)]
7959 + (cum.arg_count[(int) get_sh_arg_class (mode)] & 1))
7960 : cum.arg_count[(int) get_sh_arg_class (mode)]);
7961 }
7962
7963 /* Return true if arg of the specified mode should be passed in a register
7964 or false otherwise. */
7965 static bool
7966 sh_pass_in_reg_p (const CUMULATIVE_ARGS& cum, machine_mode mode,
7967 const_tree type)
7968 {
7969 /* FIXME: This used to be a macro and has been copy pasted into this
7970 function as is. Make this more readable. */
7971 return
7972 ((type == 0
7973 || (! TREE_ADDRESSABLE (type)
7974 && (! (TARGET_HITACHI || cum.renesas_abi)
7975 || ! (AGGREGATE_TYPE_P (type)
7976 || (!TARGET_FPU_ANY
7977 && (GET_MODE_CLASS (mode) == MODE_FLOAT
7978 && GET_MODE_SIZE (mode) > GET_MODE_SIZE (SFmode)))))))
7979 && ! cum.force_mem
7980 && (TARGET_SH2E
7981 ? ((mode) == BLKmode
7982 ? ((cum.arg_count[(int) SH_ARG_INT] * UNITS_PER_WORD
7983 + int_size_in_bytes (type))
7984 <= NPARM_REGS (SImode) * UNITS_PER_WORD)
7985 : ((sh_round_reg (cum, mode)
7986 + sh_hard_regno_nregs (BASE_ARG_REG (mode), mode))
7987 <= NPARM_REGS (mode)))
7988 : sh_round_reg (cum, mode) < NPARM_REGS (mode)));
7989 }
7990
7991 static int
7992 sh_arg_partial_bytes (cumulative_args_t cum_v, const function_arg_info &arg)
7993 {
7994 CUMULATIVE_ARGS *cum = get_cumulative_args (cum_v);
7995 int words = 0;
7996
7997 if (sh_pass_in_reg_p (*cum, arg.mode, arg.type)
7998 && !TARGET_FPU_DOUBLE
7999 && (sh_round_reg (*cum, arg.mode)
8000 + CEIL (arg.promoted_size_in_bytes (), UNITS_PER_WORD)
8001 > NPARM_REGS (arg.mode)))
8002 words = NPARM_REGS (arg.mode) - sh_round_reg (*cum, arg.mode);
8003
8004 return words * UNITS_PER_WORD;
8005 }
8006
8007
8008 /* Define where to put the arguments to a function.
8009 Value is zero to push the argument on the stack,
8010 or a hard register in which to store the argument.
8011
8012 CUM is a variable of type CUMULATIVE_ARGS which gives info about
8013 the preceding args and about the function being called.
8014 ARG is a description of the argument.
8015
8016 On SH the first args are normally in registers
8017 and the rest are pushed. Any arg that starts within the first
8018 NPARM_REGS words is at least partially passed in a register unless
8019 its data type forbids. */
8020 static rtx
8021 sh_function_arg (cumulative_args_t ca_v, const function_arg_info &arg)
8022 {
8023 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8024 machine_mode mode = arg.mode;
8025
8026 if (arg.end_marker_p ())
8027 return ca->renesas_abi ? const1_rtx : const0_rtx;
8028
8029 if (sh_pass_in_reg_p (*ca, mode, arg.type)
8030 && (arg.named || ! (TARGET_HITACHI || ca->renesas_abi)))
8031 {
8032 int regno;
8033
8034 if (mode == SCmode && TARGET_SH4 && TARGET_LITTLE_ENDIAN
8035 && (! FUNCTION_ARG_SCmode_WART || (sh_round_reg (*ca, mode) & 1)))
8036 {
8037 rtx r1 = gen_rtx_EXPR_LIST (VOIDmode,
8038 gen_rtx_REG (SFmode,
8039 BASE_ARG_REG (mode)
8040 + (sh_round_reg (*ca, mode) ^ 1)),
8041 const0_rtx);
8042 rtx r2 = gen_rtx_EXPR_LIST (VOIDmode,
8043 gen_rtx_REG (SFmode,
8044 BASE_ARG_REG (mode)
8045 + ((sh_round_reg (*ca, mode) + 1) ^ 1)),
8046 GEN_INT (4));
8047 return gen_rtx_PARALLEL(SCmode, gen_rtvec(2, r1, r2));
8048 }
8049
8050 /* If the alignment of a DF value causes an SF register to be
8051 skipped, we will use that skipped register for the next SF
8052 value. */
8053 if ((TARGET_HITACHI || ca->renesas_abi)
8054 && ca->free_single_fp_reg
8055 && mode == SFmode)
8056 return gen_rtx_REG (mode, ca->free_single_fp_reg);
8057
8058 regno = (BASE_ARG_REG (mode) + sh_round_reg (*ca, mode))
8059 ^ (mode == SFmode && TARGET_SH4
8060 && TARGET_LITTLE_ENDIAN
8061 && ! TARGET_HITACHI && ! ca->renesas_abi);
8062 return gen_rtx_REG (mode, regno);
8063
8064 }
8065
8066 return NULL_RTX;
8067 }
8068
8069 /* Update the data in CUM to advance over argument ARG. */
8070 static void
8071 sh_function_arg_advance (cumulative_args_t ca_v,
8072 const function_arg_info &arg)
8073 {
8074 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8075
8076 if (ca->force_mem)
8077 ca->force_mem = false;
8078
8079 if ((TARGET_HITACHI || ca->renesas_abi) && TARGET_FPU_DOUBLE)
8080 {
8081 /* Note that we've used the skipped register. */
8082 if (arg.mode == SFmode && ca->free_single_fp_reg)
8083 {
8084 ca->free_single_fp_reg = 0;
8085 return;
8086 }
8087 /* When we have a DF after an SF, there's an SF register that get
8088 skipped in order to align the DF value. We note this skipped
8089 register, because the next SF value will use it, and not the
8090 SF that follows the DF. */
8091 if (arg.mode == DFmode
8092 && sh_round_reg (*ca, DFmode) != sh_round_reg (*ca, SFmode))
8093 {
8094 ca->free_single_fp_reg = (sh_round_reg (*ca, SFmode)
8095 + BASE_ARG_REG (arg.mode));
8096 }
8097 }
8098
8099 if (! ((TARGET_SH4 || TARGET_SH2A) || ca->renesas_abi)
8100 || sh_pass_in_reg_p (*ca, arg.mode, arg.type))
8101 (ca->arg_count[(int) get_sh_arg_class (arg.mode)]
8102 = (sh_round_reg (*ca, arg.mode)
8103 + CEIL (arg.promoted_size_in_bytes (), UNITS_PER_WORD)));
8104 }
8105
8106 /* The Renesas calling convention doesn't quite fit into this scheme since
8107 the address is passed like an invisible argument, but one that is always
8108 passed in memory. */
8109 static rtx
8110 sh_struct_value_rtx (tree fndecl, int incoming ATTRIBUTE_UNUSED)
8111 {
8112 if (TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8113 return NULL_RTX;
8114 return gen_rtx_REG (Pmode, 2);
8115 }
8116
8117 /* Worker function for TARGET_FUNCTION_VALUE.
8118
8119 For the SH, this is like LIBCALL_VALUE, except that we must change the
8120 mode like PROMOTE_MODE does.
8121 ??? PROMOTE_MODE is ignored for non-scalar types. The set of types
8122 tested here has to be kept in sync with the one in
8123 explow.c:promote_mode. */
8124 static rtx
8125 sh_function_value (const_tree valtype,
8126 const_tree fn_decl_or_type,
8127 bool outgoing ATTRIBUTE_UNUSED)
8128 {
8129 if (fn_decl_or_type
8130 && !DECL_P (fn_decl_or_type))
8131 fn_decl_or_type = NULL;
8132
8133 return gen_rtx_REG (
8134 ((GET_MODE_CLASS (TYPE_MODE (valtype)) == MODE_INT
8135 && GET_MODE_SIZE (TYPE_MODE (valtype)) < 4
8136 && (TREE_CODE (valtype) == INTEGER_TYPE
8137 || TREE_CODE (valtype) == ENUMERAL_TYPE
8138 || TREE_CODE (valtype) == BOOLEAN_TYPE
8139 || TREE_CODE (valtype) == REAL_TYPE
8140 || TREE_CODE (valtype) == OFFSET_TYPE))
8141 && sh_promote_prototypes (fn_decl_or_type)
8142 ? SImode : TYPE_MODE (valtype)),
8143 BASE_RETURN_VALUE_REG (TYPE_MODE (valtype)));
8144 }
8145
8146 /* Worker function for TARGET_LIBCALL_VALUE. */
8147 static rtx
8148 sh_libcall_value (machine_mode mode, const_rtx fun ATTRIBUTE_UNUSED)
8149 {
8150 return gen_rtx_REG (mode, BASE_RETURN_VALUE_REG (mode));
8151 }
8152
8153 /* Return true if N is a possible register number of function value. */
8154 static bool
8155 sh_function_value_regno_p (const unsigned int regno)
8156 {
8157 return regno == FIRST_RET_REG || (TARGET_SH2E && regno == FIRST_FP_RET_REG);
8158 }
8159
8160 /* Worker function for TARGET_RETURN_IN_MEMORY. */
8161 static bool
8162 sh_return_in_memory (const_tree type, const_tree fndecl)
8163 {
8164 return TYPE_MODE (type) == BLKmode
8165 || ((TARGET_HITACHI || sh_attr_renesas_p (fndecl))
8166 && TREE_CODE (type) == RECORD_TYPE);
8167 }
8168
8169 /* We actually emit the code in sh_expand_prologue. We used to use
8170 a static variable to flag that we need to emit this code, but that
8171 doesn't when inlining, when functions are deferred and then emitted
8172 later. Fortunately, we already have two flags that are part of struct
8173 function that tell if a function uses varargs or stdarg. */
8174 static void
8175 sh_setup_incoming_varargs (cumulative_args_t ca,
8176 const function_arg_info &arg,
8177 int *pretend_arg_size,
8178 int second_time ATTRIBUTE_UNUSED)
8179 {
8180 gcc_assert (cfun->stdarg);
8181 if (TARGET_VARARGS_PRETEND_ARGS (current_function_decl))
8182 {
8183 int named_parm_regs, anon_parm_regs;
8184
8185 named_parm_regs = (sh_round_reg (*get_cumulative_args (ca), arg.mode)
8186 + CEIL (arg.promoted_size_in_bytes (),
8187 UNITS_PER_WORD));
8188 anon_parm_regs = NPARM_REGS (SImode) - named_parm_regs;
8189 if (anon_parm_regs > 0)
8190 *pretend_arg_size = anon_parm_regs * 4;
8191 }
8192 }
8193
8194 static bool
8195 sh_strict_argument_naming (cumulative_args_t ca ATTRIBUTE_UNUSED)
8196 {
8197 return false;
8198 }
8199
8200 static bool
8201 sh_pretend_outgoing_varargs_named (cumulative_args_t ca_v)
8202 {
8203 CUMULATIVE_ARGS *ca = get_cumulative_args (ca_v);
8204
8205 return ! (TARGET_HITACHI || ca->renesas_abi);
8206 }
8207
8208
8209 /* Define the offset between two registers, one to be eliminated, and
8210 the other its replacement, at the start of a routine. */
8211 int
8212 initial_elimination_offset (int from, int to)
8213 {
8214 const int regs_saved_rounding = 0;
8215 int save_flags = target_flags;
8216 HARD_REG_SET live_regs_mask;
8217
8218 int regs_saved = calc_live_regs (&live_regs_mask);
8219
8220 int total_auto_space = rounded_frame_size (regs_saved) - regs_saved_rounding;
8221 target_flags = save_flags;
8222
8223 int total_saved_regs_space = regs_saved + regs_saved_rounding;
8224
8225 if (from == ARG_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8226 return total_saved_regs_space + total_auto_space;
8227
8228 if (from == ARG_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8229 return total_saved_regs_space + total_auto_space;
8230
8231 /* Initial gap between fp and sp is 0. */
8232 if (from == HARD_FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8233 return 0;
8234
8235 if (from == FRAME_POINTER_REGNUM && to == STACK_POINTER_REGNUM)
8236 return rounded_frame_size (0);
8237
8238 if (from == FRAME_POINTER_REGNUM && to == HARD_FRAME_POINTER_REGNUM)
8239 return rounded_frame_size (0);
8240
8241 gcc_assert (from == RETURN_ADDRESS_POINTER_REGNUM
8242 && (to == HARD_FRAME_POINTER_REGNUM
8243 || to == STACK_POINTER_REGNUM));
8244 return total_auto_space;
8245 }
8246
8247 /* Parse the -mfixed-range= option string. */
8248 void
8249 sh_fix_range (const char *const_str)
8250 {
8251 /* str must be of the form REG1'-'REG2{,REG1'-'REG} where REG1 and
8252 REG2 are either register names or register numbers. The effect
8253 of this option is to mark the registers in the range from REG1 to
8254 REG2 as ``fixed'' so they won't be used by the compiler. */
8255
8256 char* str = strcpy ((char*)alloca (strlen (const_str) + 1), const_str);
8257
8258 while (1)
8259 {
8260 char* dash = strchr (str, '-');
8261 if (!dash)
8262 {
8263 warning (0, "value of %<-mfixed-range%> must have form REG1-REG2");
8264 return;
8265 }
8266 *dash = '\0';
8267 char* comma = strchr (dash + 1, ',');
8268 if (comma)
8269 *comma = '\0';
8270
8271 int first = decode_reg_name (str);
8272 if (first < 0)
8273 {
8274 warning (0, "unknown register name: %s", str);
8275 return;
8276 }
8277
8278 int last = decode_reg_name (dash + 1);
8279 if (last < 0)
8280 {
8281 warning (0, "unknown register name: %s", dash + 1);
8282 return;
8283 }
8284
8285 *dash = '-';
8286
8287 if (first > last)
8288 {
8289 warning (0, "%s-%s is an empty range", str, dash + 1);
8290 return;
8291 }
8292
8293 for (int i = first; i <= last; ++i)
8294 fixed_regs[i] = call_used_regs[i] = 1;
8295
8296 if (!comma)
8297 break;
8298
8299 *comma = ',';
8300 str = comma + 1;
8301 }
8302 }
8303 \f
8304 /* Insert any deferred function attributes from earlier pragmas. */
8305 static void
8306 sh_insert_attributes (tree node, tree *attributes)
8307 {
8308 if (TREE_CODE (node) != FUNCTION_DECL)
8309 return;
8310
8311 /* We are only interested in fields. */
8312 if (!DECL_P (node))
8313 return;
8314
8315 /* Append the attributes to the deferred attributes. */
8316 *sh_deferred_function_attributes_tail = *attributes;
8317 tree attrs = sh_deferred_function_attributes;
8318 if (!attrs)
8319 return;
8320
8321 /* Some attributes imply or require the interrupt attribute. */
8322 if (!lookup_attribute ("interrupt_handler", attrs)
8323 && !lookup_attribute ("interrupt_handler", DECL_ATTRIBUTES (node)))
8324 {
8325 /* If we have a trapa_handler, but no interrupt_handler attribute,
8326 insert an interrupt_handler attribute. */
8327 if (lookup_attribute ("trapa_handler", attrs) != NULL_TREE)
8328 /* We can't use sh_pr_interrupt here because that's not in the
8329 java frontend. */
8330 attrs
8331 = tree_cons (get_identifier("interrupt_handler"), NULL_TREE, attrs);
8332 /* However, for sp_switch, trap_exit, nosave_low_regs and resbank,
8333 if the interrupt attribute is missing, we ignore the attribute
8334 and warn. */
8335 else if (lookup_attribute ("sp_switch", attrs)
8336 || lookup_attribute ("trap_exit", attrs)
8337 || lookup_attribute ("nosave_low_regs", attrs)
8338 || lookup_attribute ("resbank", attrs))
8339 {
8340 tree *tail;
8341
8342 for (tail = attributes; attrs; attrs = TREE_CHAIN (attrs))
8343 {
8344 if (is_attribute_p ("sp_switch", TREE_PURPOSE (attrs))
8345 || is_attribute_p ("trap_exit", TREE_PURPOSE (attrs))
8346 || is_attribute_p ("nosave_low_regs", TREE_PURPOSE (attrs))
8347 || is_attribute_p ("resbank", TREE_PURPOSE (attrs)))
8348 warning (OPT_Wattributes,
8349 "%qE attribute only applies to interrupt functions",
8350 TREE_PURPOSE (attrs));
8351 else
8352 {
8353 *tail = tree_cons (TREE_PURPOSE (attrs), NULL_TREE,
8354 NULL_TREE);
8355 tail = &TREE_CHAIN (*tail);
8356 }
8357 }
8358 attrs = *attributes;
8359 }
8360 }
8361
8362 /* Install the processed list. */
8363 *attributes = attrs;
8364
8365 /* Clear deferred attributes. */
8366 sh_deferred_function_attributes = NULL_TREE;
8367 sh_deferred_function_attributes_tail = &sh_deferred_function_attributes;
8368
8369 return;
8370 }
8371
8372 /*------------------------------------------------------------------------------
8373 Target specific attributes
8374 Supported attributes are:
8375
8376 * interrupt_handler
8377 Specifies this function is an interrupt handler.
8378
8379 * trapa_handler
8380 Like interrupt_handler, but don't save all registers.
8381
8382 * sp_switch
8383 Specifies an alternate stack for an interrupt handler to run on.
8384
8385 * trap_exit
8386 Use a trapa to exit an interrupt function instead of rte.
8387
8388 * nosave_low_regs
8389 Don't save r0..r7 in an interrupt handler function.
8390 This is useful on SH3* and SH4*, which have a separate set of low
8391 regs for user and privileged modes.
8392 This is mainly to be used for non-reentrant interrupt handlers (i.e.
8393 those that run with interrupts disabled and thus can't be
8394 interrupted thenselves).
8395
8396 * renesas
8397 Use Renesas calling/layout conventions (functions and structures).
8398
8399 * resbank
8400 In case of an interrupt handler function, use a register bank to
8401 save registers R0-R14, MACH, MACL, GBR and PR.
8402 This is available only on SH2A targets.
8403
8404 * function_vector
8405 Declares a function to be called using the TBR relative addressing
8406 mode. Takes an argument that specifies the slot number in the table
8407 where this function can be looked up by the JSR/N @@(disp8,TBR) insn.
8408 */
8409
8410 /* Handle a 'resbank' attribute. */
8411 static tree
8412 sh_handle_resbank_handler_attribute (tree * node, tree name,
8413 tree args ATTRIBUTE_UNUSED,
8414 int flags ATTRIBUTE_UNUSED,
8415 bool * no_add_attrs)
8416 {
8417 if (!TARGET_SH2A)
8418 {
8419 warning (OPT_Wattributes, "%qE attribute is supported only for SH2A",
8420 name);
8421 *no_add_attrs = true;
8422 }
8423 if (TREE_CODE (*node) != FUNCTION_DECL)
8424 {
8425 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8426 name);
8427 *no_add_attrs = true;
8428 }
8429
8430 return NULL_TREE;
8431 }
8432
8433 /* Handle an "interrupt_handler" attribute; arguments as in
8434 struct attribute_spec.handler. */
8435 static tree
8436 sh_handle_interrupt_handler_attribute (tree *node, tree name,
8437 tree args ATTRIBUTE_UNUSED,
8438 int flags ATTRIBUTE_UNUSED,
8439 bool *no_add_attrs)
8440 {
8441 if (TREE_CODE (*node) != FUNCTION_DECL)
8442 {
8443 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8444 name);
8445 *no_add_attrs = true;
8446 }
8447
8448 return NULL_TREE;
8449 }
8450
8451 /* Handle an 'function_vector' attribute; arguments as in
8452 struct attribute_spec.handler. */
8453 static tree
8454 sh2a_handle_function_vector_handler_attribute (tree * node, tree name,
8455 tree args ATTRIBUTE_UNUSED,
8456 int flags ATTRIBUTE_UNUSED,
8457 bool * no_add_attrs)
8458 {
8459 if (!TARGET_SH2A)
8460 {
8461 warning (OPT_Wattributes, "%qE attribute only applies to SH2A",
8462 name);
8463 *no_add_attrs = true;
8464 }
8465 else if (TREE_CODE (*node) != FUNCTION_DECL)
8466 {
8467 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8468 name);
8469 *no_add_attrs = true;
8470 }
8471 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8472 {
8473 /* The argument must be a constant integer. */
8474 warning (OPT_Wattributes,
8475 "%qE attribute argument not an integer constant",
8476 name);
8477 *no_add_attrs = true;
8478 }
8479 else if (TREE_INT_CST_LOW (TREE_VALUE (args)) > 255)
8480 {
8481 /* The argument value must be between 0 to 255. */
8482 warning (OPT_Wattributes,
8483 "%qE attribute argument should be between 0 to 255",
8484 name);
8485 *no_add_attrs = true;
8486 }
8487 return NULL_TREE;
8488 }
8489
8490 /* Returns true if current function has been assigned the attribute
8491 'function_vector'. */
8492 bool
8493 sh2a_is_function_vector_call (rtx x)
8494 {
8495 if (GET_CODE (x) == SYMBOL_REF
8496 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8497 {
8498 tree tr = SYMBOL_REF_DECL (x);
8499
8500 if (sh2a_function_vector_p (tr))
8501 return true;
8502 }
8503
8504 return false;
8505 }
8506
8507 /* Returns the function vector number, if the attribute
8508 'function_vector' is assigned, otherwise returns zero. */
8509 int
8510 sh2a_get_function_vector_number (rtx x)
8511 {
8512 if ((GET_CODE (x) == SYMBOL_REF)
8513 && (SYMBOL_REF_FLAGS (x) & SYMBOL_FLAG_FUNCVEC_FUNCTION))
8514 {
8515 tree t = SYMBOL_REF_DECL (x);
8516
8517 if (TREE_CODE (t) != FUNCTION_DECL)
8518 return 0;
8519
8520 for (tree list = SH_ATTRIBUTES (t); list; list = TREE_CHAIN (list))
8521 if (is_attribute_p ("function_vector", TREE_PURPOSE (list)))
8522 return TREE_INT_CST_LOW (TREE_VALUE (TREE_VALUE (list)));
8523
8524 return 0;
8525 }
8526 else
8527 return 0;
8528 }
8529
8530 /* Handle an "sp_switch" attribute; arguments as in
8531 struct attribute_spec.handler. */
8532 static tree
8533 sh_handle_sp_switch_attribute (tree *node, tree name, tree args,
8534 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8535 {
8536 if (TREE_CODE (*node) != FUNCTION_DECL)
8537 {
8538 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8539 name);
8540 *no_add_attrs = true;
8541 }
8542 else if (TREE_CODE (TREE_VALUE (args)) != STRING_CST)
8543 {
8544 /* The argument must be a constant string. */
8545 warning (OPT_Wattributes, "%qE attribute argument not a string constant",
8546 name);
8547 *no_add_attrs = true;
8548 }
8549
8550 return NULL_TREE;
8551 }
8552
8553 /* Handle an "trap_exit" attribute; arguments as in
8554 struct attribute_spec.handler. */
8555 static tree
8556 sh_handle_trap_exit_attribute (tree *node, tree name, tree args,
8557 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
8558 {
8559 if (TREE_CODE (*node) != FUNCTION_DECL)
8560 {
8561 warning (OPT_Wattributes, "%qE attribute only applies to functions",
8562 name);
8563 *no_add_attrs = true;
8564 }
8565 /* The argument specifies a trap number to be used in a trapa instruction
8566 at function exit (instead of an rte instruction). */
8567 else if (TREE_CODE (TREE_VALUE (args)) != INTEGER_CST)
8568 {
8569 /* The argument must be a constant integer. */
8570 warning (OPT_Wattributes, "%qE attribute argument not an "
8571 "integer constant", name);
8572 *no_add_attrs = true;
8573 }
8574
8575 return NULL_TREE;
8576 }
8577
8578 static tree
8579 sh_handle_renesas_attribute (tree *node ATTRIBUTE_UNUSED,
8580 tree name ATTRIBUTE_UNUSED,
8581 tree args ATTRIBUTE_UNUSED,
8582 int flags ATTRIBUTE_UNUSED,
8583 bool *no_add_attrs ATTRIBUTE_UNUSED)
8584 {
8585 return NULL_TREE;
8586 }
8587
8588 /* True if __attribute__((renesas)) or -mrenesas. */
8589 bool
8590 sh_attr_renesas_p (const_tree td)
8591 {
8592 if (TARGET_HITACHI)
8593 return true;
8594 if (td == NULL_TREE)
8595 return false;
8596 if (DECL_P (td))
8597 td = TREE_TYPE (td);
8598 if (td == error_mark_node)
8599 return false;
8600 return lookup_attribute ("renesas", TYPE_ATTRIBUTES (td)) != NULL_TREE;
8601 }
8602
8603 /* True if __attribute__((renesas)) or -mrenesas, for the current
8604 function. */
8605 bool
8606 sh_cfun_attr_renesas_p (void)
8607 {
8608 return sh_attr_renesas_p (current_function_decl);
8609 }
8610
8611 /* Returns true if the current function has the "interrupt_handler"
8612 attribute set. */
8613 bool
8614 sh_cfun_interrupt_handler_p (void)
8615 {
8616 return (lookup_attribute ("interrupt_handler",
8617 DECL_ATTRIBUTES (current_function_decl))
8618 != NULL_TREE);
8619 }
8620
8621 /* Returns true if FUNC has been assigned the attribute
8622 "function_vector". */
8623 bool
8624 sh2a_function_vector_p (tree func)
8625 {
8626 if (TREE_CODE (func) != FUNCTION_DECL)
8627 return false;
8628
8629 for (tree list = SH_ATTRIBUTES (func); list; list = TREE_CHAIN (list))
8630 if (is_attribute_p ("function_vector", get_attribute_name (list)))
8631 return true;
8632
8633 return false;
8634 }
8635
8636 /* Returns true if given tree has the "resbank" attribute set. */
8637 bool
8638 sh_cfun_resbank_handler_p (void)
8639 {
8640 return ((lookup_attribute ("resbank",
8641 DECL_ATTRIBUTES (current_function_decl))
8642 != NULL_TREE)
8643 && (lookup_attribute ("interrupt_handler",
8644 DECL_ATTRIBUTES (current_function_decl))
8645 != NULL_TREE) && TARGET_SH2A);
8646 }
8647
8648 /* Returns true if the current function has a "trap_exit" attribute set. */
8649 bool
8650 sh_cfun_trap_exit_p (void)
8651 {
8652 return lookup_attribute ("trap_exit", DECL_ATTRIBUTES (current_function_decl))
8653 != NULL_TREE;
8654 }
8655
8656 /* Implement TARGET_CHECK_PCH_TARGET_FLAGS. */
8657 static const char *
8658 sh_check_pch_target_flags (int old_flags)
8659 {
8660 if ((old_flags ^ target_flags) & (MASK_SH1 | MASK_SH2 | MASK_SH3
8661 | MASK_SH_E | MASK_HARD_SH4
8662 | MASK_FPU_SINGLE | MASK_SH4))
8663 return _("created and used with different architectures / ABIs");
8664 if ((old_flags ^ target_flags) & MASK_HITACHI)
8665 return _("created and used with different ABIs");
8666 if ((old_flags ^ target_flags) & MASK_LITTLE_ENDIAN)
8667 return _("created and used with different endianness");
8668 return NULL;
8669 }
8670 \f
8671 /* Predicates used by the templates. */
8672
8673 /* Returns true if OP is MACL, MACH or PR. The input must be a REG rtx.
8674 Used only in general_movsrc_operand. */
8675 bool
8676 system_reg_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8677 {
8678 switch (REGNO (op))
8679 {
8680 case PR_REG:
8681 case MACL_REG:
8682 case MACH_REG:
8683 return true;
8684 }
8685 return false;
8686 }
8687
8688 /* Returns true if OP is a floating point value with value 0.0. */
8689 bool
8690 fp_zero_operand (rtx op)
8691 {
8692 if (GET_MODE (op) != SFmode)
8693 return false;
8694
8695 const REAL_VALUE_TYPE* r = CONST_DOUBLE_REAL_VALUE (op);
8696 return real_equal (r, &dconst0) && ! REAL_VALUE_MINUS_ZERO (*r);
8697 }
8698
8699 /* Returns true if OP is a floating point value with value 1.0. */
8700 bool
8701 fp_one_operand (rtx op)
8702 {
8703 if (GET_MODE (op) != SFmode)
8704 return false;
8705
8706 return real_equal (CONST_DOUBLE_REAL_VALUE (op), &dconst1);
8707 }
8708
8709 /* Return the TLS type for TLS symbols. */
8710 enum tls_model
8711 tls_symbolic_operand (rtx op, machine_mode mode ATTRIBUTE_UNUSED)
8712 {
8713 if (GET_CODE (op) != SYMBOL_REF)
8714 return TLS_MODEL_NONE;
8715 return SYMBOL_REF_TLS_MODEL (op);
8716 }
8717 \f
8718 /* Return the destination address of a branch. */
8719 static int
8720 branch_dest (rtx branch)
8721 {
8722 rtx dest = SET_SRC (PATTERN (branch));
8723
8724 if (GET_CODE (dest) == IF_THEN_ELSE)
8725 dest = XEXP (dest, 1);
8726
8727 return INSN_ADDRESSES (INSN_UID (XEXP (dest, 0)));
8728 }
8729 \f
8730 /* Return nonzero if REG is not used after INSN.
8731 We assume REG is a reload reg, and therefore does
8732 not live past labels. It may live past calls or jumps though. */
8733 bool
8734 reg_unused_after (rtx reg, rtx_insn *insn)
8735 {
8736 /* If the reg is set by this instruction, then it is safe for our
8737 case. Disregard the case where this is a store to memory, since
8738 we are checking a register used in the store address. */
8739 rtx set = single_set (insn);
8740 if (set && !MEM_P (SET_DEST (set))
8741 && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8742 return true;
8743
8744 while ((insn = NEXT_INSN (insn)))
8745 {
8746 if (!INSN_P (insn))
8747 continue;
8748
8749 rtx_code code = GET_CODE (insn);
8750
8751 #if 0
8752 /* If this is a label that existed before reload, then the register
8753 is dead here. However, if this is a label added by reorg, then
8754 the register may still be live here. We can't tell the difference,
8755 so we just ignore labels completely. */
8756 if (code == CODE_LABEL)
8757 return 1;
8758 /* else */
8759 #endif
8760
8761 if (code == JUMP_INSN)
8762 return false;
8763
8764 /* If this is a sequence, we must handle them all at once.
8765 We could have for instance a call that sets the target register,
8766 and an insn in a delay slot that uses the register. In this case,
8767 we must return 0. */
8768 else if (code == INSN && GET_CODE (PATTERN (insn)) == SEQUENCE)
8769 {
8770 rtx_sequence *seq = as_a <rtx_sequence *> (PATTERN (insn));
8771 bool retval = false;
8772
8773 for (int i = 0; i < seq->len (); i++)
8774 {
8775 rtx_insn *this_insn = seq->insn (i);
8776 rtx set = single_set (this_insn);
8777
8778 if (CALL_P (this_insn))
8779 code = CALL_INSN;
8780 else if (JUMP_P (this_insn))
8781 {
8782 if (INSN_ANNULLED_BRANCH_P (this_insn))
8783 return false;
8784 code = JUMP_INSN;
8785 }
8786
8787 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8788 return false;
8789 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8790 {
8791 if (!MEM_P (SET_DEST (set)))
8792 retval = true;
8793 else
8794 return false;
8795 }
8796 if (set == NULL_RTX
8797 && reg_overlap_mentioned_p (reg, PATTERN (this_insn)))
8798 return false;
8799 }
8800 if (retval)
8801 return true;
8802 else if (code == JUMP_INSN)
8803 return false;
8804 }
8805
8806 rtx set = single_set (insn);
8807 if (set && reg_overlap_mentioned_p (reg, SET_SRC (set)))
8808 return false;
8809 if (set && reg_overlap_mentioned_p (reg, SET_DEST (set)))
8810 return !MEM_P (SET_DEST (set));
8811 if (set == NULL && reg_overlap_mentioned_p (reg, PATTERN (insn)))
8812 return false;
8813
8814 if (code == CALL_INSN && call_really_used_regs[REGNO (reg)])
8815 return true;
8816 }
8817 return true;
8818 }
8819 \f
8820
8821 static GTY(()) rtx t_reg_rtx;
8822 rtx
8823 get_t_reg_rtx (void)
8824 {
8825 if (! t_reg_rtx)
8826 t_reg_rtx = gen_rtx_REG (SImode, T_REG);
8827 return t_reg_rtx;
8828 }
8829
8830 static GTY(()) tree fpscr_values;
8831
8832 static void
8833 emit_fpu_switch (rtx scratch, int index)
8834 {
8835 if (fpscr_values == NULL)
8836 {
8837 tree t = build_index_type (integer_one_node);
8838 t = build_array_type (integer_type_node, t);
8839 t = build_decl (BUILTINS_LOCATION,
8840 VAR_DECL, get_identifier ("__fpscr_values"), t);
8841 DECL_ARTIFICIAL (t) = 1;
8842 DECL_IGNORED_P (t) = 1;
8843 DECL_EXTERNAL (t) = 1;
8844 TREE_STATIC (t) = 1;
8845 TREE_PUBLIC (t) = 1;
8846 TREE_USED (t) = 1;
8847
8848 fpscr_values = t;
8849 }
8850
8851 rtx src = DECL_RTL (fpscr_values);
8852 if (!can_create_pseudo_p ())
8853 {
8854 emit_move_insn (scratch, XEXP (src, 0));
8855 if (index != 0)
8856 emit_insn (gen_addsi3 (scratch, scratch, GEN_INT (index * 4)));
8857 src = adjust_automodify_address (src, SImode, scratch, index * 4);
8858 }
8859 else
8860 src = adjust_address (src, SImode, index * 4);
8861
8862 emit_insn (gen_lds_fpscr (src));
8863 }
8864 \f
8865 static rtx get_free_reg (HARD_REG_SET);
8866
8867 /* This function returns a register to use to load the address to load
8868 the fpscr from. Currently it always returns r1 or r7, but when we are
8869 able to use pseudo registers after combine, or have a better mechanism
8870 for choosing a register, it should be done here. */
8871 /* REGS_LIVE is the liveness information for the point for which we
8872 need this allocation. In some bare-bones exit blocks, r1 is live at the
8873 start. We can even have all of r0..r3 being live:
8874 __complex__ long long f (double d) { if (d == 0) return 2; else return 3; }
8875 INSN before which new insns are placed with will clobber the register
8876 we return. If a basic block consists only of setting the return value
8877 register to a pseudo and using that register, the return value is not
8878 live before or after this block, yet we we'll insert our insns right in
8879 the middle. */
8880 static rtx
8881 get_free_reg (HARD_REG_SET regs_live)
8882 {
8883 if (! TEST_HARD_REG_BIT (regs_live, 1))
8884 return gen_rtx_REG (Pmode, 1);
8885
8886 /* Hard reg 1 is live; since this is a small register classes target,
8887 there shouldn't be anything but a jump before the function end. */
8888 gcc_assert (!TEST_HARD_REG_BIT (regs_live, 7));
8889 return gen_rtx_REG (Pmode, 7);
8890 }
8891
8892 /* This function will set the fpscr from memory.
8893 MODE is the mode we are setting it to. */
8894 void
8895 fpscr_set_from_mem (int mode, HARD_REG_SET regs_live)
8896 {
8897 enum attr_fp_mode fp_mode = (enum attr_fp_mode) mode;
8898 enum attr_fp_mode norm_mode = ACTUAL_NORMAL_MODE (FP_MODE);
8899
8900 rtx addr_reg = !can_create_pseudo_p () ? get_free_reg (regs_live) : NULL_RTX;
8901 emit_fpu_switch (addr_reg, fp_mode == norm_mode);
8902 }
8903
8904 /* Is the given character a logical line separator for the assembler? */
8905 #ifndef IS_ASM_LOGICAL_LINE_SEPARATOR
8906 #define IS_ASM_LOGICAL_LINE_SEPARATOR(C, STR) ((C) == ';')
8907 #endif
8908
8909 static bool
8910 sequence_insn_p (rtx_insn *insn)
8911 {
8912 rtx_insn* prev = PREV_INSN (insn);
8913 if (prev == NULL)
8914 return false;
8915
8916 rtx_insn* next = NEXT_INSN (prev);
8917 if (next == NULL)
8918 return false;
8919
8920 return INSN_P (next) && GET_CODE (PATTERN (next)) == SEQUENCE;
8921 }
8922
8923 int
8924 sh_insn_length_adjustment (rtx_insn *insn)
8925 {
8926 /* Instructions with unfilled delay slots take up an extra two bytes for
8927 the nop in the delay slot. */
8928 if (((NONJUMP_INSN_P (insn)
8929 && GET_CODE (PATTERN (insn)) != USE
8930 && GET_CODE (PATTERN (insn)) != CLOBBER)
8931 || CALL_P (insn) || JUMP_P (insn))
8932 && ! sequence_insn_p (insn)
8933 && get_attr_needs_delay_slot (insn) == NEEDS_DELAY_SLOT_YES)
8934 return 2;
8935
8936 /* Increase the insn length of a cbranch without a delay slot insn to
8937 force a delay slot which will be stuffed with a nop. */
8938 if (TARGET_CBRANCH_FORCE_DELAY_SLOT && TARGET_SH2
8939 && JUMP_P (insn) && get_attr_type (insn) == TYPE_CBRANCH
8940 && ! sequence_insn_p (insn))
8941 return 2;
8942
8943 /* sh-dsp parallel processing insn take four bytes instead of two. */
8944
8945 if (NONJUMP_INSN_P (insn))
8946 {
8947 int sum = 0;
8948 rtx body = PATTERN (insn);
8949 const char *templ;
8950 char c;
8951 bool maybe_label = true;
8952
8953 if (GET_CODE (body) == ASM_INPUT)
8954 templ = XSTR (body, 0);
8955 else if (asm_noperands (body) >= 0)
8956 templ
8957 = decode_asm_operands (body, NULL, NULL, NULL, NULL, NULL);
8958 else
8959 return 0;
8960 do
8961 {
8962 int ppi_adjust = 0;
8963
8964 do
8965 c = *templ++;
8966 while (c == ' ' || c == '\t');
8967 /* all sh-dsp parallel-processing insns start with p.
8968 The only non-ppi sh insn starting with p is pref.
8969 The only ppi starting with pr is prnd. */
8970 if ((c == 'p' || c == 'P') && strncasecmp ("re", templ, 2))
8971 ppi_adjust = 2;
8972 /* The repeat pseudo-insn expands two three insns, a total of
8973 six bytes in size. */
8974 else if ((c == 'r' || c == 'R')
8975 && ! strncasecmp ("epeat", templ, 5))
8976 ppi_adjust = 4;
8977 while (c && c != '\n'
8978 && ! IS_ASM_LOGICAL_LINE_SEPARATOR (c, templ))
8979 {
8980 /* If this is a label, it is obviously not a ppi insn. */
8981 if (c == ':' && maybe_label)
8982 {
8983 ppi_adjust = 0;
8984 break;
8985 }
8986 else if (c == '\'' || c == '"')
8987 maybe_label = false;
8988 c = *templ++;
8989 }
8990 sum += ppi_adjust;
8991 maybe_label = c != ':';
8992 }
8993 while (c);
8994 return sum;
8995 }
8996 return 0;
8997 }
8998 \f
8999 /* Return TRUE for a valid displacement for the REG+disp addressing
9000 with MODE. */
9001 bool
9002 sh_legitimate_index_p (machine_mode mode, rtx op, bool consider_sh2a,
9003 bool allow_zero)
9004 {
9005 if (! CONST_INT_P (op))
9006 return false;
9007
9008 {
9009 const HOST_WIDE_INT offset = INTVAL (op);
9010 const int max_disp = sh_max_mov_insn_displacement (mode, consider_sh2a);
9011 const int align_mask = mov_insn_alignment_mask (mode, consider_sh2a);
9012
9013 /* If the mode does not support any displacement always return false.
9014 Even though an index of '0' is actually always valid, it will cause
9015 troubles when e.g. a DFmode move is split into two SFmode moves,
9016 where one SFmode move will have index '0' and the other move will
9017 have index '4'. */
9018 if (!allow_zero && max_disp < 1)
9019 return false;
9020
9021 return offset >= 0 && offset <= max_disp && (offset & align_mask) == 0;
9022 }
9023 }
9024
9025 /* Recognize an RTL expression that is a valid memory address for
9026 an instruction.
9027 The MODE argument is the machine mode for the MEM expression
9028 that wants to use this address.
9029 Allow REG
9030 REG+disp
9031 REG+r0
9032 REG++
9033 --REG
9034 GBR
9035 GBR+disp */
9036 static bool
9037 sh_legitimate_address_p (machine_mode mode, rtx x, bool strict)
9038 {
9039 if (REG_P (x) && REGNO (x) == GBR_REG)
9040 return true;
9041
9042 if (MAYBE_BASE_REGISTER_RTX_P (x, strict))
9043 return true;
9044 else if ((GET_CODE (x) == POST_INC || GET_CODE (x) == PRE_DEC)
9045 && MAYBE_BASE_REGISTER_RTX_P (XEXP (x, 0), strict))
9046 return true;
9047 else if (GET_CODE (x) == PLUS)
9048 {
9049 rtx xop0 = XEXP (x, 0);
9050 rtx xop1 = XEXP (x, 1);
9051
9052 if (REG_P (xop0) && REGNO (xop0) == GBR_REG)
9053 return gbr_displacement (xop1, mode);
9054
9055 if (GET_MODE_SIZE (mode) <= 8
9056 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict)
9057 && sh_legitimate_index_p (mode, xop1, TARGET_SH2A, false))
9058 return true;
9059
9060 if (GET_MODE_SIZE (mode) <= 4
9061 || (TARGET_FPU_DOUBLE && TARGET_FMOVD && mode == DFmode))
9062 {
9063 if (MAYBE_BASE_REGISTER_RTX_P (xop1, strict)
9064 && MAYBE_INDEX_REGISTER_RTX_P (xop0, strict))
9065 return true;
9066 if (MAYBE_INDEX_REGISTER_RTX_P (xop1, strict)
9067 && MAYBE_BASE_REGISTER_RTX_P (xop0, strict))
9068 return true;
9069 }
9070 }
9071
9072 return false;
9073 }
9074 \f
9075 /* Return TRUE if X references a SYMBOL_REF or LABEL_REF whose symbol
9076 isn't protected by a PIC unspec. */
9077 bool
9078 nonpic_symbol_mentioned_p (rtx x)
9079 {
9080 if (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF
9081 || GET_CODE (x) == PC)
9082 return true;
9083
9084 /* We don't want to look into the possible MEM location of a
9085 CONST_DOUBLE, since we're not going to use it, in general. */
9086 if (GET_CODE (x) == CONST_DOUBLE)
9087 return false;
9088
9089 if (GET_CODE (x) == UNSPEC
9090 && (XINT (x, 1) == UNSPEC_PIC
9091 || XINT (x, 1) == UNSPEC_GOT
9092 || XINT (x, 1) == UNSPEC_GOTOFF
9093 || XINT (x, 1) == UNSPEC_GOTPLT
9094 || XINT (x, 1) == UNSPEC_GOTTPOFF
9095 || XINT (x, 1) == UNSPEC_DTPOFF
9096 || XINT (x, 1) == UNSPEC_TPOFF
9097 || XINT (x, 1) == UNSPEC_PLT
9098 || XINT (x, 1) == UNSPEC_PCREL
9099 || XINT (x, 1) == UNSPEC_SYMOFF
9100 || XINT (x, 1) == UNSPEC_PCREL_SYMOFF
9101 || XINT (x, 1) == UNSPEC_GOTFUNCDESC
9102 || XINT (x, 1) == UNSPEC_GOTOFFFUNCDESC))
9103 return false;
9104
9105 const char* fmt = GET_RTX_FORMAT (GET_CODE (x));
9106 for (int i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
9107 {
9108 if (fmt[i] == 'E')
9109 {
9110 for (int j = XVECLEN (x, i) - 1; j >= 0; j--)
9111 if (nonpic_symbol_mentioned_p (XVECEXP (x, i, j)))
9112 return true;
9113 }
9114 else if (fmt[i] == 'e' && nonpic_symbol_mentioned_p (XEXP (x, i)))
9115 return true;
9116 }
9117
9118 return false;
9119 }
9120
9121 /* Convert a non-PIC address in `orig' to a PIC address using @GOT or
9122 @GOTOFF in `reg'. */
9123 rtx
9124 legitimize_pic_address (rtx orig, machine_mode mode ATTRIBUTE_UNUSED, rtx reg)
9125 {
9126 if (tls_symbolic_operand (orig, Pmode) != TLS_MODEL_NONE)
9127 return orig;
9128
9129 if (GET_CODE (orig) == LABEL_REF
9130 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_LOCAL_P (orig)))
9131 {
9132 if (reg == NULL_RTX)
9133 reg = gen_reg_rtx (Pmode);
9134
9135 if (TARGET_FDPIC
9136 && GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_FUNCTION_P (orig))
9137 {
9138 /* Weak functions may be NULL which doesn't work with
9139 GOTOFFFUNCDESC because the runtime offset is not known. */
9140 if (SYMBOL_REF_WEAK (orig))
9141 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9142 else
9143 emit_insn (gen_symGOTOFFFUNCDESC2reg (reg, orig));
9144 }
9145 else if (TARGET_FDPIC
9146 && (GET_CODE (orig) == LABEL_REF
9147 || (GET_CODE (orig) == SYMBOL_REF && SYMBOL_REF_DECL (orig)
9148 && (TREE_READONLY (SYMBOL_REF_DECL (orig))
9149 || SYMBOL_REF_EXTERNAL_P (orig)
9150 || DECL_SECTION_NAME(SYMBOL_REF_DECL (orig))))))
9151 /* In FDPIC, GOTOFF can only be used for writable data. */
9152 emit_insn (gen_symGOT2reg (reg, orig));
9153 else
9154 emit_insn (gen_symGOTOFF2reg (reg, orig));
9155 return reg;
9156 }
9157 else if (GET_CODE (orig) == SYMBOL_REF)
9158 {
9159 if (reg == NULL_RTX)
9160 reg = gen_reg_rtx (Pmode);
9161
9162 if (TARGET_FDPIC && SYMBOL_REF_FUNCTION_P (orig))
9163 emit_insn (gen_symGOTFUNCDESC2reg (reg, orig));
9164 else
9165 emit_insn (gen_symGOT2reg (reg, orig));
9166 return reg;
9167 }
9168 return orig;
9169 }
9170
9171 /* Given a (logical) mode size and an offset in bytes, try to find a the
9172 appropriate displacement value for a mov insn. On SH the displacements
9173 are limited to max. 60 bytes for SImode, max. 30 bytes in HImode and max.
9174 15 bytes in QImode. To compensate this we create a new base address by
9175 adding an adjustment value to it.
9176
9177 If the originally requested offset is greater than 127 we prefer using
9178 values 124..127 over 128..131 to increase opportunities to use the
9179 add #imm, Rn insn.
9180
9181 In some cases it is possible that a requested offset might seem unaligned
9182 or inappropriate for the mode size, like offset = 2 and mode size = 4.
9183 This is compensated by adjusting the base address so that the effective
9184 address of the displacement move insn will be aligned.
9185
9186 This is not the best possible way of rebasing the base address, as it
9187 does not look at other present displacement addressings around it.
9188 In some cases this can create more base address adjustments than would
9189 actually be necessary. */
9190 struct disp_adjust
9191 {
9192 rtx offset_adjust;
9193 rtx mov_disp;
9194 };
9195
9196 static struct disp_adjust
9197 sh_find_mov_disp_adjust (machine_mode mode, HOST_WIDE_INT offset)
9198 {
9199 struct disp_adjust res = { NULL_RTX, NULL_RTX };
9200
9201 /* Do not try to use SH2A's large displacements here, because this would
9202 effectively disable the small displacement insns. */
9203 const int mode_sz = GET_MODE_SIZE (mode);
9204 const int mov_insn_sz = mov_insn_size (mode, false);
9205 const int max_disp = sh_max_mov_insn_displacement (mode, false);
9206 const int max_disp_next = max_disp + mov_insn_sz;
9207 HOST_WIDE_INT align_modifier = offset > 127 ? mov_insn_sz : 0;
9208 HOST_WIDE_INT offset_adjust;
9209
9210 /* In some cases this actually does happen and we must check for it. */
9211 if (mode_sz < 1 || mode_sz > 8 || max_disp < 1)
9212 return res;
9213
9214 /* Keeps the previous behavior for QImode displacement addressing.
9215 This just decides how the offset is re-based. Removing this special
9216 case will result in slightly bigger code on average, but it's not that
9217 bad actually. */
9218 if (mov_insn_sz == 1)
9219 align_modifier = 0;
9220
9221 offset_adjust = ((offset + align_modifier) & ~max_disp) - align_modifier;
9222
9223 if (mode_sz + offset - offset_adjust <= max_disp_next)
9224 {
9225 res.offset_adjust = GEN_INT (offset_adjust);
9226 res.mov_disp = GEN_INT (offset - offset_adjust);
9227 }
9228
9229 return res;
9230 }
9231
9232 /* Try to modify an illegitimate address and make it legitimate.
9233 If we find one, return the new, valid address.
9234 Otherwise, return the original address. */
9235 static rtx
9236 sh_legitimize_address (rtx x, rtx oldx, machine_mode mode)
9237 {
9238 if (flag_pic)
9239 x = legitimize_pic_address (oldx, mode, NULL_RTX);
9240
9241 if ((TARGET_FPU_DOUBLE && mode == DFmode)
9242 || (TARGET_SH2E && mode == SFmode))
9243 return x;
9244
9245 if (GET_CODE (x) == PLUS && CONST_INT_P (XEXP (x, 1))
9246 && BASE_REGISTER_RTX_P (XEXP (x, 0)))
9247 {
9248 struct disp_adjust adj = sh_find_mov_disp_adjust (mode,
9249 INTVAL (XEXP (x, 1)));
9250
9251 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9252 {
9253 rtx sum = expand_binop (Pmode, add_optab, XEXP (x, 0),
9254 adj.offset_adjust, NULL_RTX, 0,
9255 OPTAB_LIB_WIDEN);
9256 return gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9257 }
9258 }
9259 return x;
9260 }
9261
9262 /* Attempt to replace *p, which is an address that needs reloading, with
9263 a valid memory address for an operand of mode MODE.
9264 Like for sh_legitimize_address, for the SH we try to get a normal form
9265 of the address. That will allow inheritance of the address reloads. */
9266 bool
9267 sh_legitimize_reload_address (rtx *p, machine_mode mode, int opnum,
9268 int itype)
9269 {
9270 enum reload_type type = (enum reload_type) itype;
9271 const int mode_sz = GET_MODE_SIZE (mode);
9272
9273 if (sh_lra_p ())
9274 return false;
9275
9276 if (GET_CODE (*p) == PLUS && CONST_INT_P (XEXP (*p, 1))
9277 && MAYBE_BASE_REGISTER_RTX_P (XEXP (*p, 0), true))
9278 {
9279 const HOST_WIDE_INT offset = INTVAL (XEXP (*p, 1));
9280 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, offset);
9281
9282 if (TARGET_SH2A && mode == DFmode && (offset & 0x7))
9283 {
9284 push_reload (*p, NULL_RTX, p, NULL,
9285 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9286 return true;
9287 }
9288
9289 if (TARGET_SH2E && mode == SFmode)
9290 {
9291 *p = copy_rtx (*p);
9292 push_reload (*p, NULL_RTX, p, NULL,
9293 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9294 return true;
9295 }
9296
9297 /* FIXME: Do not allow to legitimize QImode and HImode displacement
9298 moves because then reload has a problem figuring the constraint
9299 that the move insn target/source reg must be R0.
9300 Or maybe some handling is wrong in sh_secondary_reload for this
9301 to work properly? */
9302 if ((mode_sz == 4 || mode_sz == 8)
9303 && ! (TARGET_SH4 && mode == DFmode)
9304 && adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
9305 {
9306 rtx sum = gen_rtx_PLUS (Pmode, XEXP (*p, 0), adj.offset_adjust);
9307 *p = gen_rtx_PLUS (Pmode, sum, adj.mov_disp);
9308 push_reload (sum, NULL_RTX, &XEXP (*p, 0), NULL,
9309 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9310 return true;
9311 }
9312 }
9313
9314 /* We must re-recognize what we created before. */
9315 if (GET_CODE (*p) == PLUS
9316 && (mode_sz == 4 || mode_sz == 8)
9317 && GET_CODE (XEXP (*p, 0)) == PLUS
9318 && CONST_INT_P (XEXP (XEXP (*p, 0), 1))
9319 && MAYBE_BASE_REGISTER_RTX_P (XEXP (XEXP (*p, 0), 0), true)
9320 && CONST_INT_P (XEXP (*p, 1))
9321 && ! (TARGET_SH2E && mode == SFmode))
9322 {
9323 /* Because this address is so complex, we know it must have
9324 been created by LEGITIMIZE_RELOAD_ADDRESS before; thus,
9325 it is already unshared, and needs no further unsharing. */
9326 push_reload (XEXP (*p, 0), NULL_RTX, &XEXP (*p, 0), NULL,
9327 BASE_REG_CLASS, Pmode, VOIDmode, 0, 0, opnum, type);
9328 return true;
9329 }
9330
9331 return false;
9332 }
9333
9334 /* In the name of slightly smaller debug output, and to cater to
9335 general assembler lossage, recognize various UNSPEC sequences
9336 and turn them back into a direct symbol reference. */
9337 static rtx
9338 sh_delegitimize_address (rtx orig_x)
9339 {
9340 orig_x = delegitimize_mem_from_attrs (orig_x);
9341
9342 rtx x = orig_x;
9343 if (MEM_P (x))
9344 x = XEXP (x, 0);
9345 if (GET_CODE (x) == CONST)
9346 {
9347 rtx y = XEXP (x, 0);
9348 if (GET_CODE (y) == UNSPEC)
9349 {
9350 if (XINT (y, 1) == UNSPEC_GOT
9351 || XINT (y, 1) == UNSPEC_GOTOFF
9352 || XINT (y, 1) == UNSPEC_SYMOFF)
9353 return XVECEXP (y, 0, 0);
9354 else if (XINT (y, 1) == UNSPEC_PCREL_SYMOFF)
9355 {
9356 if (GET_CODE (XVECEXP (y, 0, 0)) == CONST)
9357 {
9358 rtx symplt = XEXP (XVECEXP (y, 0, 0), 0);
9359
9360 if (GET_CODE (symplt) == UNSPEC
9361 && (XINT (symplt, 1) == UNSPEC_PLT
9362 || XINT (symplt, 1) == UNSPEC_PCREL))
9363 return XVECEXP (symplt, 0, 0);
9364 }
9365 }
9366 }
9367 }
9368
9369 return orig_x;
9370 }
9371
9372 /* Mark the use of a constant in the literal table. If the constant
9373 has multiple labels, make it unique. */
9374 static rtx
9375 mark_constant_pool_use (rtx x)
9376 {
9377 if (x == NULL_RTX)
9378 return x;
9379
9380 switch (GET_CODE (x))
9381 {
9382 case LABEL_REF:
9383 x = XEXP (x, 0);
9384 case CODE_LABEL:
9385 break;
9386 default:
9387 return x;
9388 }
9389
9390 /* Get the first label in the list of labels for the same constant
9391 and delete another labels in the list. */
9392 rtx_insn* lab = as_a <rtx_insn*> (x);
9393 for (rtx_insn* insn = PREV_INSN (lab); insn; insn = PREV_INSN (insn))
9394 {
9395 if (!LABEL_P (insn)
9396 || LABEL_REFS (insn) != NEXT_INSN (insn))
9397 break;
9398 lab = insn;
9399 }
9400
9401 for (rtx insn = LABEL_REFS (lab); insn; insn = LABEL_REFS (insn))
9402 as_a<rtx_insn *> (insn)->set_deleted ();
9403
9404 /* Mark constants in a window. */
9405 for (rtx_insn* insn = NEXT_INSN (as_a <rtx_insn *> (x)); insn;
9406 insn = NEXT_INSN (insn))
9407 {
9408 if (!NONJUMP_INSN_P (insn))
9409 continue;
9410
9411 rtx pattern = PATTERN (insn);
9412 if (GET_CODE (pattern) != UNSPEC_VOLATILE)
9413 continue;
9414
9415 switch (XINT (pattern, 1))
9416 {
9417 case UNSPECV_CONST2:
9418 case UNSPECV_CONST4:
9419 case UNSPECV_CONST8:
9420 XVECEXP (pattern, 0, 1) = const1_rtx;
9421 break;
9422 case UNSPECV_WINDOW_END:
9423 if (XVECEXP (pattern, 0, 0) == x)
9424 return lab;
9425 break;
9426 case UNSPECV_CONST_END:
9427 return lab;
9428 default:
9429 break;
9430 }
9431 }
9432
9433 return lab;
9434 }
9435 \f
9436 /* Return true if it's possible to redirect BRANCH1 to the destination
9437 of an unconditional jump BRANCH2. We only want to do this if the
9438 resulting branch will have a short displacement. */
9439 static bool
9440 sh_can_follow_jump (const rtx_insn *branch1, const rtx_insn *branch2)
9441 {
9442 /* Don't follow if BRANCH2 is possible to be a jump crossing between
9443 hot and cold partitions. */
9444 if (flag_reorder_blocks_and_partition
9445 && simplejump_p (branch2)
9446 && CROSSING_JUMP_P (branch2))
9447 return false;
9448
9449 if (flag_expensive_optimizations && simplejump_p (branch2))
9450 {
9451 rtx dest = XEXP (SET_SRC (single_set (branch2)), 0);
9452 rtx_insn *insn;
9453 int distance;
9454
9455 for (distance = 0, insn = NEXT_INSN (branch1);
9456 insn && distance < 256;
9457 insn = PREV_INSN (insn))
9458 {
9459 if (insn == dest)
9460 return true;
9461 else
9462 distance += get_attr_length (insn);
9463 }
9464 for (distance = 0, insn = NEXT_INSN (branch1);
9465 insn && distance < 256;
9466 insn = NEXT_INSN (insn))
9467 {
9468 if (insn == dest)
9469 return true;
9470 else
9471 distance += get_attr_length (insn);
9472 }
9473 }
9474 return false;
9475 }
9476
9477 /* Return nonzero if register old_reg can be renamed to register new_reg. */
9478 bool
9479 sh_hard_regno_rename_ok (unsigned int old_reg ATTRIBUTE_UNUSED,
9480 unsigned int new_reg)
9481 {
9482 /* Interrupt functions can only use registers that have already been
9483 saved by the prologue, even if they would normally be
9484 call-clobbered. */
9485 if (sh_cfun_interrupt_handler_p () && !df_regs_ever_live_p (new_reg))
9486 return false;
9487
9488 return true;
9489 }
9490
9491 /* Function to update the integer COST
9492 based on the relationship between INSN that is dependent on
9493 DEP_INSN through the dependence LINK. The default is to make no
9494 adjustment to COST. This can be used for example to specify to
9495 the scheduler that an output- or anti-dependence does not incur
9496 the same cost as a data-dependence. The return value should be
9497 the new value for COST. */
9498 static int
9499 sh_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep_insn, int cost,
9500 unsigned int)
9501 {
9502 rtx reg, use_pat;
9503
9504 if (dep_type == 0)
9505 {
9506 if (recog_memoized (insn) < 0
9507 || recog_memoized (dep_insn) < 0)
9508 return cost;
9509
9510 rtx dep_set = single_set (dep_insn);
9511
9512 /* The latency that we specify in the scheduling description refers
9513 to the actual output, not to an auto-increment register; for that,
9514 the latency is one. */
9515 if (dep_set && MEM_P (SET_SRC (dep_set)) && cost > 1)
9516 {
9517 rtx set = single_set (insn);
9518
9519 if (set
9520 && !reg_mentioned_p (SET_DEST (dep_set), SET_SRC (set))
9521 && (!MEM_P (SET_DEST (set))
9522 || !reg_mentioned_p (SET_DEST (dep_set),
9523 XEXP (SET_DEST (set), 0))))
9524 cost = 1;
9525 }
9526 /* The only input for a call that is timing-critical is the
9527 function's address. */
9528 if (CALL_P (insn))
9529 {
9530 rtx call = get_call_rtx_from (insn);
9531 if (call
9532 /* sibcalli_thunk uses a symbol_ref in an unspec. */
9533 && (GET_CODE (XEXP (XEXP (call, 0), 0)) == UNSPEC
9534 || ! reg_set_p (XEXP (XEXP (call, 0), 0), dep_insn)))
9535 cost -= TARGET_SH4_300 ? 3 : 6;
9536 }
9537 /* Likewise, the most timing critical input for an sfuncs call
9538 is the function address. However, sfuncs typically start
9539 using their arguments pretty quickly.
9540 Assume a four cycle delay for SH4 before they are needed.
9541 Cached ST40-300 calls are quicker, so assume only a one
9542 cycle delay there.
9543 ??? Maybe we should encode the delays till input registers
9544 are needed by sfuncs into the sfunc call insn. */
9545 /* All sfunc calls are parallels with at least four components.
9546 Exploit this to avoid unnecessary calls to sfunc_uses_reg. */
9547 else if (GET_CODE (PATTERN (insn)) == PARALLEL
9548 && XVECLEN (PATTERN (insn), 0) >= 4
9549 && (reg = sfunc_uses_reg (insn)))
9550 {
9551 if (! reg_set_p (reg, dep_insn))
9552 cost -= TARGET_SH4_300 ? 1 : 4;
9553 }
9554 if (TARGET_HARD_SH4 && !TARGET_SH4_300)
9555 {
9556 attr_type dep_type = get_attr_type (dep_insn);
9557 attr_type type;
9558 if (dep_type == TYPE_FLOAD || dep_type == TYPE_PCFLOAD)
9559 cost--;
9560 else if ((dep_type == TYPE_LOAD_SI || dep_type == TYPE_PCLOAD_SI)
9561 && (type = get_attr_type (insn)) != TYPE_CALL
9562 && type != TYPE_SFUNC)
9563 cost--;
9564 /* When the preceding instruction loads the shift amount of
9565 the following SHAD/SHLD, the latency of the load is increased
9566 by 1 cycle. */
9567 if (get_attr_type (insn) == TYPE_DYN_SHIFT
9568 && get_attr_any_int_load (dep_insn) == ANY_INT_LOAD_YES
9569 && reg_overlap_mentioned_p (SET_DEST (dep_set),
9570 XEXP (SET_SRC (single_set (insn)),
9571 1)))
9572 cost++;
9573 /* When an LS group instruction with a latency of less than
9574 3 cycles is followed by a double-precision floating-point
9575 instruction, FIPR, or FTRV, the latency of the first
9576 instruction is increased to 3 cycles. */
9577 else if (cost < 3
9578 && get_attr_insn_class (dep_insn) == INSN_CLASS_LS_GROUP
9579 && get_attr_dfp_comp (insn) == DFP_COMP_YES)
9580 cost = 3;
9581 /* The lsw register of a double-precision computation is ready one
9582 cycle earlier. */
9583 else if (reload_completed
9584 && get_attr_dfp_comp (dep_insn) == DFP_COMP_YES
9585 && (use_pat = single_set (insn))
9586 && ! regno_use_in (REGNO (SET_DEST (single_set (dep_insn))),
9587 SET_SRC (use_pat)))
9588 cost -= 1;
9589
9590 if (get_attr_any_fp_comp (dep_insn) == ANY_FP_COMP_YES
9591 && get_attr_late_fp_use (insn) == LATE_FP_USE_YES)
9592 cost -= 1;
9593 }
9594 else if (TARGET_SH4_300)
9595 {
9596 /* Stores need their input register two cycles later. */
9597 attr_type type;
9598 if (dep_set && cost >= 1
9599 && ((type = get_attr_type (insn)) == TYPE_STORE
9600 || type == TYPE_PSTORE
9601 || type == TYPE_FSTORE || type == TYPE_MAC_MEM))
9602 {
9603 rtx set = single_set (insn);
9604
9605 if (!reg_mentioned_p (SET_SRC (set), XEXP (SET_DEST (set), 0))
9606 && rtx_equal_p (SET_SRC (set), SET_DEST (dep_set)))
9607 {
9608 cost -= 2;
9609 /* But don't reduce the cost below 1 if the address depends
9610 on a side effect of dep_insn. */
9611 if (cost < 1
9612 && modified_in_p (XEXP (SET_DEST (set), 0), dep_insn))
9613 cost = 1;
9614 }
9615 }
9616 }
9617 }
9618 /* An anti-dependence penalty of two applies if the first insn is a double
9619 precision fadd / fsub / fmul. */
9620 else if (!TARGET_SH4_300
9621 && dep_type == REG_DEP_ANTI
9622 && recog_memoized (dep_insn) >= 0
9623 && (get_attr_type (dep_insn) == TYPE_DFP_ARITH
9624 || get_attr_type (dep_insn) == TYPE_DFP_MUL)
9625 /* A lot of alleged anti-flow dependences are fake,
9626 so check this one is real. */
9627 && flow_dependent_p (dep_insn, insn))
9628 cost = 2;
9629
9630 return cost;
9631 }
9632
9633 /* Check if INSN is flow-dependent on DEP_INSN. Can also be used to check
9634 if DEP_INSN is anti-flow dependent on INSN. */
9635 static bool
9636 flow_dependent_p (rtx insn, rtx dep_insn)
9637 {
9638 rtx tmp = PATTERN (insn);
9639
9640 note_stores (PATTERN (dep_insn), flow_dependent_p_1, &tmp);
9641 return tmp == NULL_RTX;
9642 }
9643
9644 /* A helper function for flow_dependent_p called through note_stores. */
9645 static void
9646 flow_dependent_p_1 (rtx x, const_rtx pat ATTRIBUTE_UNUSED, void *data)
9647 {
9648 rtx * pinsn = (rtx *) data;
9649
9650 if (*pinsn && reg_referenced_p (x, *pinsn))
9651 *pinsn = NULL_RTX;
9652 }
9653
9654 /* For use by sh_allocate_initial_value. Note that sh.md contains some
9655 'special function' patterns (type sfunc) that clobber pr, but that
9656 do not look like function calls to leaf_function_p. Hence we must
9657 do this extra check. */
9658 static int
9659 sh_pr_n_sets (void)
9660 {
9661 return DF_REG_DEF_COUNT (PR_REG);
9662 }
9663
9664 /* Return where to allocate pseudo for a given hard register initial
9665 value. */
9666 static rtx
9667 sh_allocate_initial_value (rtx hard_reg)
9668 {
9669 if (REGNO (hard_reg) == PR_REG)
9670 {
9671 if (crtl->is_leaf && ! sh_pr_n_sets ())
9672 return hard_reg;
9673 else
9674 return gen_frame_mem (Pmode, return_address_pointer_rtx);
9675 }
9676
9677 return NULL_RTX;
9678 }
9679
9680 /* This function returns "2" to indicate dual issue for the SH4
9681 processor. To be used by the DFA pipeline description. */
9682 static int
9683 sh_issue_rate (void)
9684 {
9685 if (TARGET_SUPERSCALAR)
9686 return 2;
9687 else
9688 return 1;
9689 }
9690
9691 /* Functions for ready queue reordering for sched1. */
9692
9693 /* Get weight for mode for a set x. */
9694 static short
9695 find_set_regmode_weight (rtx x, machine_mode mode)
9696 {
9697 if (GET_CODE (x) == CLOBBER && register_operand (SET_DEST (x), mode))
9698 return 1;
9699 if (GET_CODE (x) == SET && register_operand (SET_DEST (x), mode))
9700 {
9701 if (REG_P (SET_DEST (x)))
9702 {
9703 if (!reg_mentioned_p (SET_DEST (x), SET_SRC (x)))
9704 return 1;
9705 else
9706 return 0;
9707 }
9708 return 1;
9709 }
9710 return 0;
9711 }
9712
9713 /* Get regmode weight for insn. */
9714 static short
9715 find_insn_regmode_weight (rtx insn, machine_mode mode)
9716 {
9717 /* Increment weight for each register born here. */
9718 rtx x = PATTERN (insn);
9719 short reg_weight = find_set_regmode_weight (x, mode);
9720 if (GET_CODE (x) == PARALLEL)
9721 {
9722 int j;
9723 for (j = XVECLEN (x, 0) - 1; j >= 0; j--)
9724 {
9725 x = XVECEXP (PATTERN (insn), 0, j);
9726 reg_weight += find_set_regmode_weight (x, mode);
9727 }
9728 }
9729 /* Decrement weight for each register that dies here. */
9730 for (x = REG_NOTES (insn); x; x = XEXP (x, 1))
9731 {
9732 if (REG_NOTE_KIND (x) == REG_DEAD || REG_NOTE_KIND (x) == REG_UNUSED)
9733 {
9734 rtx note = XEXP (x, 0);
9735 if (REG_P (note) && GET_MODE (note) == mode)
9736 reg_weight--;
9737 }
9738 }
9739 return reg_weight;
9740 }
9741
9742 /* Calculate regmode weights for all insns of a basic block. */
9743 static void
9744 find_regmode_weight (basic_block b, machine_mode mode)
9745 {
9746 rtx_insn *insn, *next_tail, *head, *tail;
9747
9748 get_ebb_head_tail (b, b, &head, &tail);
9749 next_tail = NEXT_INSN (tail);
9750
9751 for (insn = head; insn != next_tail; insn = NEXT_INSN (insn))
9752 {
9753 /* Handle register life information. */
9754 if (!INSN_P (insn))
9755 continue;
9756
9757 if (mode == SFmode)
9758 INSN_REGMODE_WEIGHT (insn, mode) =
9759 find_insn_regmode_weight (insn, mode)
9760 + 2 * find_insn_regmode_weight (insn, DFmode);
9761 else if (mode == SImode)
9762 INSN_REGMODE_WEIGHT (insn, mode) =
9763 find_insn_regmode_weight (insn, mode)
9764 + 2 * find_insn_regmode_weight (insn, DImode);
9765 }
9766 }
9767
9768 /* Comparison function for ready queue sorting. */
9769 static int
9770 rank_for_reorder (const void *x, const void *y)
9771 {
9772 rtx_insn *tmp = *(rtx_insn * const *) y;
9773 rtx_insn *tmp2 = *(rtx_insn * const *) x;
9774
9775 /* The insn in a schedule group should be issued the first. */
9776 if (SCHED_GROUP_P (tmp) != SCHED_GROUP_P (tmp2))
9777 return SCHED_GROUP_P (tmp2) ? 1 : -1;
9778
9779 /* If insns are equally good, sort by INSN_LUID (original insn order), This
9780 minimizes instruction movement, thus minimizing sched's effect on
9781 register pressure. */
9782 return INSN_LUID (tmp) - INSN_LUID (tmp2);
9783 }
9784
9785 /* Resort the array A in which only element at index N may be out of order. */
9786 static void
9787 swap_reorder (rtx_insn **a, int n)
9788 {
9789 rtx_insn *insn = a[n - 1];
9790 int i = n - 2;
9791
9792 while (i >= 0 && rank_for_reorder (a + i, &insn) >= 0)
9793 {
9794 a[i + 1] = a[i];
9795 i -= 1;
9796 }
9797 a[i + 1] = insn;
9798 }
9799
9800 /* Sort the ready list by ascending priority. */
9801 static void
9802 ready_reorder (rtx_insn **ready, int nready)
9803 {
9804 if (nready == 2)
9805 swap_reorder (ready, nready);
9806 else if (nready > 2)
9807 qsort (ready, nready, sizeof (rtx_insn *), rank_for_reorder);
9808 }
9809
9810 /* Count life regions of r0 for a block. */
9811 static int
9812 find_r0_life_regions (basic_block b)
9813 {
9814 bool live;
9815 int set;
9816 int death = 0;
9817
9818 if (REGNO_REG_SET_P (df_get_live_in (b), R0_REG))
9819 {
9820 set = 1;
9821 live = true;
9822 }
9823 else
9824 {
9825 set = 0;
9826 live = false;
9827 }
9828
9829 rtx_insn* insn = BB_HEAD (b);
9830 rtx_insn* end = BB_END (b);
9831 rtx r0_reg = gen_rtx_REG (SImode, R0_REG);
9832 while (1)
9833 {
9834 if (INSN_P (insn))
9835 {
9836 if (find_regno_note (insn, REG_DEAD, R0_REG))
9837 {
9838 death++;
9839 live = false;
9840 }
9841
9842 rtx pset;
9843 if (!live
9844 && (pset = single_set (insn))
9845 && reg_overlap_mentioned_p (r0_reg, SET_DEST (pset))
9846 && !find_regno_note (insn, REG_UNUSED, R0_REG))
9847 {
9848 set++;
9849 live = true;
9850 }
9851 }
9852 if (insn == end)
9853 break;
9854 insn = NEXT_INSN (insn);
9855 }
9856 return set - death;
9857 }
9858
9859 /* Calculate regmode weights for all insns of all basic block. */
9860 static void
9861 sh_md_init_global (FILE *dump ATTRIBUTE_UNUSED,
9862 int verbose ATTRIBUTE_UNUSED,
9863 int old_max_uid)
9864 {
9865 basic_block b;
9866
9867 regmode_weight[0] = (short *) xcalloc (old_max_uid, sizeof (short));
9868 regmode_weight[1] = (short *) xcalloc (old_max_uid, sizeof (short));
9869 r0_life_regions = 0;
9870
9871 FOR_EACH_BB_REVERSE_FN (b, cfun)
9872 {
9873 find_regmode_weight (b, SImode);
9874 find_regmode_weight (b, SFmode);
9875 if (!reload_completed)
9876 r0_life_regions += find_r0_life_regions (b);
9877 }
9878
9879 CURR_REGMODE_PRESSURE (SImode) = 0;
9880 CURR_REGMODE_PRESSURE (SFmode) = 0;
9881 }
9882
9883 /* Cleanup. */
9884 static void
9885 sh_md_finish_global (FILE *dump ATTRIBUTE_UNUSED,
9886 int verbose ATTRIBUTE_UNUSED)
9887 {
9888 if (regmode_weight[0])
9889 {
9890 free (regmode_weight[0]);
9891 regmode_weight[0] = NULL;
9892 }
9893 if (regmode_weight[1])
9894 {
9895 free (regmode_weight[1]);
9896 regmode_weight[1] = NULL;
9897 }
9898 }
9899
9900 /* Cache the can_issue_more so that we can return it from reorder2. Also,
9901 keep count of register pressures on SImode and SFmode. */
9902 static int
9903 sh_variable_issue (FILE *dump ATTRIBUTE_UNUSED,
9904 int sched_verbose ATTRIBUTE_UNUSED,
9905 rtx_insn *insn,
9906 int can_issue_more)
9907 {
9908 if (GET_CODE (PATTERN (insn)) != USE
9909 && GET_CODE (PATTERN (insn)) != CLOBBER)
9910 cached_can_issue_more = can_issue_more - 1;
9911 else
9912 cached_can_issue_more = can_issue_more;
9913
9914 if (reload_completed)
9915 return cached_can_issue_more;
9916
9917 CURR_REGMODE_PRESSURE (SImode) += INSN_REGMODE_WEIGHT (insn, SImode);
9918 CURR_REGMODE_PRESSURE (SFmode) += INSN_REGMODE_WEIGHT (insn, SFmode);
9919
9920 return cached_can_issue_more;
9921 }
9922
9923 static void
9924 sh_md_init (FILE *dump ATTRIBUTE_UNUSED,
9925 int verbose ATTRIBUTE_UNUSED,
9926 int veclen ATTRIBUTE_UNUSED)
9927 {
9928 CURR_REGMODE_PRESSURE (SImode) = 0;
9929 CURR_REGMODE_PRESSURE (SFmode) = 0;
9930 }
9931
9932 /* Some magic numbers. */
9933 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9934 functions that already have high pressure on r0. */
9935 #define R0_MAX_LIFE_REGIONS 2
9936 /* Register Pressure thresholds for SImode and SFmode registers. */
9937 #define SIMODE_MAX_WEIGHT 5
9938 #define SFMODE_MAX_WEIGHT 10
9939
9940 /* Return true if the pressure is high for MODE. */
9941 static bool
9942 high_pressure (machine_mode mode)
9943 {
9944 /* Pressure on register r0 can lead to spill failures. so avoid sched1 for
9945 functions that already have high pressure on r0. */
9946 if (r0_life_regions >= R0_MAX_LIFE_REGIONS)
9947 return true;
9948
9949 if (mode == SFmode)
9950 return (CURR_REGMODE_PRESSURE (SFmode) > SFMODE_MAX_WEIGHT);
9951 else
9952 return (CURR_REGMODE_PRESSURE (SImode) > SIMODE_MAX_WEIGHT);
9953 }
9954
9955 /* Reorder ready queue if register pressure is high. */
9956 static int
9957 sh_reorder (FILE *dump ATTRIBUTE_UNUSED,
9958 int sched_verbose ATTRIBUTE_UNUSED,
9959 rtx_insn **ready,
9960 int *n_readyp,
9961 int clock_var ATTRIBUTE_UNUSED)
9962 {
9963 if (reload_completed)
9964 return sh_issue_rate ();
9965
9966 if (high_pressure (SFmode) || high_pressure (SImode))
9967 {
9968 ready_reorder (ready, *n_readyp);
9969 }
9970
9971 return sh_issue_rate ();
9972 }
9973
9974 /* Skip cycles if the current register pressure is high. */
9975 static int
9976 sh_reorder2 (FILE *dump ATTRIBUTE_UNUSED,
9977 int sched_verbose ATTRIBUTE_UNUSED,
9978 rtx_insn **ready ATTRIBUTE_UNUSED,
9979 int *n_readyp ATTRIBUTE_UNUSED,
9980 int clock_var ATTRIBUTE_UNUSED)
9981 {
9982 if (reload_completed)
9983 return cached_can_issue_more;
9984
9985 if (high_pressure(SFmode) || high_pressure (SImode))
9986 skip_cycles = 1;
9987
9988 return cached_can_issue_more;
9989 }
9990
9991 /* Skip cycles without sorting the ready queue. This will move insn from
9992 Q->R. If this is the last cycle we are skipping; allow sorting of ready
9993 queue by sh_reorder. */
9994
9995 /* Generally, skipping these many cycles are sufficient for all insns to move
9996 from Q -> R. */
9997 #define MAX_SKIPS 8
9998
9999 static int
10000 sh_dfa_new_cycle (FILE *sched_dump ATTRIBUTE_UNUSED,
10001 int sched_verbose ATTRIBUTE_UNUSED,
10002 rtx_insn *insn ATTRIBUTE_UNUSED,
10003 int last_clock_var,
10004 int clock_var,
10005 int *sort_p)
10006 {
10007 if (reload_completed)
10008 return 0;
10009
10010 if (skip_cycles)
10011 {
10012 if ((clock_var - last_clock_var) < MAX_SKIPS)
10013 {
10014 *sort_p = 0;
10015 return 1;
10016 }
10017 /* If this is the last cycle we are skipping, allow reordering of R. */
10018 if ((clock_var - last_clock_var) == MAX_SKIPS)
10019 {
10020 *sort_p = 1;
10021 return 1;
10022 }
10023 }
10024
10025 skip_cycles = 0;
10026
10027 return 0;
10028 }
10029
10030 static bool
10031 sh_ms_bitfield_layout_p (const_tree record_type ATTRIBUTE_UNUSED)
10032 {
10033 return TARGET_HITACHI || sh_attr_renesas_p (record_type);
10034 }
10035 \f
10036 /*
10037 On the SH1..SH4, the trampoline looks like
10038 2 0002 D202 mov.l l2,r2
10039 1 0000 D301 mov.l l1,r3
10040 3 0004 422B jmp @r2
10041 4 0006 0009 nop
10042 5 0008 00000000 l1: .long area
10043 6 000c 00000000 l2: .long function
10044
10045 FDPIC needs a form that includes a function descriptor and
10046 code to load the GOT register:
10047 0 0000 00000000 .long l0
10048 1 0004 00000000 .long gotval
10049 2 0008 D302 l0: mov.l l1,r3
10050 3 000a D203 mov.l l2,r2
10051 4 000c 6122 mov.l @r2,r1
10052 5 000e 5C21 mov.l @(4,r2),r12
10053 6 0010 412B jmp @r1
10054 7 0012 0009 nop
10055 8 0014 00000000 l1: .long area
10056 9 0018 00000000 l2: .long function
10057
10058 SH5 (compact) uses r1 instead of r3 for the static chain. */
10059
10060 /* Emit insns to store a value at memory address + offset. */
10061 static void
10062 sh_emit_storesi (rtx addr, HOST_WIDE_INT offset, rtx value)
10063 {
10064 gcc_assert ((offset & 3) == 0);
10065 emit_move_insn (offset == 0
10066 ? change_address (addr, SImode, NULL_RTX)
10067 : adjust_address (addr, SImode, offset), value);
10068 }
10069
10070 /* Emit insns to store w0 at addr + offset and w1 at addr + offset + 2. */
10071 static void
10072 sh_emit_storehi (rtx addr, HOST_WIDE_INT offset, uint16_t w0, uint16_t w1)
10073 {
10074 sh_emit_storesi (addr, offset, gen_int_mode (TARGET_LITTLE_ENDIAN
10075 ? (w0 | (w1 << 16))
10076 : (w1 | (w0 << 16)), SImode));
10077 }
10078
10079 /* Emit RTL insns to initialize the variable parts of a trampoline.
10080 FNADDR is an RTX for the address of the function's pure code.
10081 CXT is an RTX for the static chain value for the function. */
10082 static void
10083 sh_trampoline_init (rtx tramp_mem, tree fndecl, rtx cxt)
10084 {
10085 rtx fnaddr = XEXP (DECL_RTL (fndecl), 0);
10086 rtx tramp = force_reg (Pmode, XEXP (tramp_mem, 0));
10087
10088 if (TARGET_FDPIC)
10089 {
10090 rtx a = force_reg (Pmode, plus_constant (Pmode, XEXP (tramp_mem, 0), 8));
10091
10092 sh_emit_storesi (tramp_mem, 0, a);
10093 sh_emit_storesi (tramp_mem, 4, sh_get_fdpic_reg_initial_val ());
10094
10095 sh_emit_storehi (tramp_mem, 8, 0xd302, 0xd203);
10096 sh_emit_storehi (tramp_mem, 12, 0x6122, 0x5c21);
10097 sh_emit_storehi (tramp_mem, 16, 0x412b, 0x0009);
10098
10099 sh_emit_storesi (tramp_mem, 20, cxt);
10100 sh_emit_storesi (tramp_mem, 24, fnaddr);
10101 }
10102 else
10103 {
10104 sh_emit_storehi (tramp_mem, 0, 0xd202, 0xd301);
10105 sh_emit_storehi (tramp_mem, 4, 0x422b, 0x0009);
10106
10107 sh_emit_storesi (tramp_mem, 8, cxt);
10108 sh_emit_storesi (tramp_mem, 12, fnaddr);
10109 }
10110 if (TARGET_HARD_SH4)
10111 {
10112 if (!TARGET_INLINE_IC_INVALIDATE
10113 || (!(TARGET_SH4A || TARGET_SH4_300) && TARGET_USERMODE))
10114 emit_library_call (function_symbol (NULL, "__ic_invalidate",
10115 FUNCTION_ORDINARY).sym,
10116 LCT_NORMAL, VOIDmode, tramp, SImode);
10117 else
10118 emit_insn (gen_ic_invalidate_line (tramp));
10119 }
10120 }
10121
10122 /* On SH5, trampolines are SHmedia code, so add 1 to the address. */
10123 static rtx
10124 sh_trampoline_adjust_address (rtx tramp)
10125 {
10126 return tramp;
10127 }
10128
10129 /* If PIC, we cannot make sibling calls to global functions
10130 because the PLT requires r12 to be live. */
10131 static bool
10132 sh_function_ok_for_sibcall (tree decl, tree exp ATTRIBUTE_UNUSED)
10133 {
10134 return (1
10135 && ! sh_cfun_interrupt_handler_p ()
10136 && (! flag_pic || TARGET_FDPIC
10137 || (decl && ! (TREE_PUBLIC (decl) || DECL_WEAK (decl)))
10138 || (decl && DECL_VISIBILITY (decl) != VISIBILITY_DEFAULT)));
10139 }
10140
10141 /* Expand to appropriate sym*_label2reg for SYM and SIBCALL_P. */
10142 void
10143 sh_expand_sym_label2reg (rtx reg, rtx sym, rtx lab, bool sibcall_p)
10144 {
10145 const_tree decl = SYMBOL_REF_DECL (sym);
10146 bool is_weak = (decl && DECL_P (decl) && DECL_WEAK (decl));
10147
10148 if (!is_weak && SYMBOL_REF_LOCAL_P (sym))
10149 emit_insn (gen_sym_label2reg (reg, sym, lab));
10150 else if (sibcall_p && SYMBOL_REF_LOCAL_P (sym))
10151 emit_insn (gen_symPCREL_label2reg (reg, sym, lab));
10152 else
10153 emit_insn (gen_symPLT_label2reg (reg, sym, lab));
10154 }
10155 \f
10156 /* Machine specific built-in functions. */
10157
10158 struct builtin_description
10159 {
10160 bool (* const is_enabled) (void);
10161 const enum insn_code icode;
10162 const char *const name;
10163 int signature;
10164 tree fndecl;
10165 };
10166
10167 /* This function can be used if there are any built-ins that are not for
10168 SHmedia. It's commented out to avoid the defined-but-unused warning. */
10169 static bool
10170 sh1_builtin_p (void)
10171 {
10172 return TARGET_SH1;
10173 }
10174
10175 /* describe number and signedness of arguments; arg[0] == result
10176 (1: unsigned, 2: signed, 4: don't care, 8: pointer 0: no argument */
10177 /* 9: 64-bit pointer, 10: 32-bit pointer */
10178 static const char signature_args[][4] =
10179 {
10180 #define SH_BLTIN_V2SI2 0
10181 { 4, 4 },
10182 #define SH_BLTIN_V4HI2 1
10183 { 4, 4 },
10184 #define SH_BLTIN_V2SI3 2
10185 { 4, 4, 4 },
10186 #define SH_BLTIN_V4HI3 3
10187 { 4, 4, 4 },
10188 #define SH_BLTIN_V8QI3 4
10189 { 4, 4, 4 },
10190 #define SH_BLTIN_MAC_HISI 5
10191 { 1, 4, 4, 1 },
10192 #define SH_BLTIN_SH_HI 6
10193 { 4, 4, 1 },
10194 #define SH_BLTIN_SH_SI 7
10195 { 4, 4, 1 },
10196 #define SH_BLTIN_V4HI2V2SI 8
10197 { 4, 4, 4 },
10198 #define SH_BLTIN_V4HI2V8QI 9
10199 { 4, 4, 4 },
10200 #define SH_BLTIN_SISF 10
10201 { 4, 2 },
10202 #define SH_BLTIN_LDUA_L 11
10203 { 2, 10 },
10204 #define SH_BLTIN_LDUA_Q 12
10205 { 1, 10 },
10206 #define SH_BLTIN_STUA_L 13
10207 { 0, 10, 2 },
10208 #define SH_BLTIN_STUA_Q 14
10209 { 0, 10, 1 },
10210 #define SH_BLTIN_LDUA_L64 15
10211 { 2, 9 },
10212 #define SH_BLTIN_LDUA_Q64 16
10213 { 1, 9 },
10214 #define SH_BLTIN_STUA_L64 17
10215 { 0, 9, 2 },
10216 #define SH_BLTIN_STUA_Q64 18
10217 { 0, 9, 1 },
10218 #define SH_BLTIN_NUM_SHARED_SIGNATURES 19
10219 #define SH_BLTIN_2 19
10220 #define SH_BLTIN_SU 19
10221 { 1, 2 },
10222 #define SH_BLTIN_3 20
10223 #define SH_BLTIN_SUS 20
10224 { 2, 2, 1 },
10225 #define SH_BLTIN_PSSV 21
10226 { 0, 8, 2, 2 },
10227 #define SH_BLTIN_XXUU 22
10228 #define SH_BLTIN_UUUU 22
10229 { 1, 1, 1, 1 },
10230 #define SH_BLTIN_PV 23
10231 { 0, 8 },
10232 #define SH_BLTIN_VP 24
10233 { 8, 0 },
10234 #define SH_BLTIN_UV 25
10235 { 1, 0 },
10236 #define SH_BLTIN_VU 26
10237 { 0, 1 },
10238 };
10239 /* mcmv: operands considered unsigned. */
10240 /* mmulsum_wq, msad_ubq: result considered unsigned long long. */
10241 /* mperm: control value considered unsigned int. */
10242 /* mshalds, mshard, mshards, mshlld, mshlrd: shift count is unsigned int. */
10243 /* mshards_q: returns signed short. */
10244 /* nsb: takes long long arg, returns unsigned char. */
10245 static struct builtin_description bdesc[] =
10246 {
10247 { sh1_builtin_p,
10248 CODE_FOR_sts_fpscr, "__builtin_sh_get_fpscr", SH_BLTIN_UV, 0 },
10249 { sh1_builtin_p,
10250 CODE_FOR_set_fpscr, "__builtin_sh_set_fpscr", SH_BLTIN_VU, 0 },
10251 };
10252
10253 static tree sh_builtin_get_fpscr;
10254 static tree sh_builtin_set_fpscr;
10255
10256 static void
10257 sh_init_builtins (void)
10258 {
10259 tree shared[SH_BLTIN_NUM_SHARED_SIGNATURES];
10260 memset (shared, 0, sizeof shared);
10261
10262 for (unsigned int di = 0; di < ARRAY_SIZE (bdesc); ++di)
10263 {
10264 builtin_description* d = &bdesc[di];
10265
10266 if (!d->is_enabled ())
10267 continue;
10268
10269 tree type, arg_type = NULL_TREE;
10270 int signature = d->signature;
10271
10272 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES && shared[signature])
10273 type = shared[signature];
10274 else
10275 {
10276 int has_result = signature_args[signature][0] != 0;
10277 tree args[3];
10278
10279 if (! TARGET_FPU_ANY
10280 && FLOAT_MODE_P (insn_data[d->icode].operand[0].mode))
10281 continue;
10282 for (unsigned int i = 0; i < ARRAY_SIZE (args); i++)
10283 args[i] = NULL_TREE;
10284 for (int i = 3; ; i--)
10285 {
10286 int arg = signature_args[signature][i];
10287 int opno = i - 1 + has_result;
10288
10289 if (arg & 8)
10290 arg_type = ptr_type_node;
10291 else if (arg)
10292 arg_type = (*lang_hooks.types.type_for_mode)
10293 (insn_data[d->icode].operand[opno].mode, (arg & 1));
10294 else if (i)
10295 continue;
10296 else
10297 arg_type = void_type_node;
10298 if (i == 0)
10299 break;
10300 args[i-1] = arg_type;
10301 }
10302 type = build_function_type_list (arg_type, args[0], args[1],
10303 args[2], NULL_TREE);
10304 if (signature < SH_BLTIN_NUM_SHARED_SIGNATURES)
10305 shared[signature] = type;
10306 }
10307 d->fndecl =
10308 add_builtin_function (d->name, type, d - bdesc, BUILT_IN_MD,
10309 NULL, NULL_TREE);
10310 /* Recode {sts,set}_fpscr decls for sh_atomic_assign_expand_fenv. */
10311 if (d->icode == CODE_FOR_sts_fpscr)
10312 sh_builtin_get_fpscr = d->fndecl;
10313 else if (d->icode == CODE_FOR_set_fpscr)
10314 sh_builtin_set_fpscr = d->fndecl;
10315 }
10316 }
10317
10318 /* Implement TARGET_ATOMIC_ASSIGN_EXPAND_FENV. */
10319
10320 static void
10321 sh_atomic_assign_expand_fenv (tree *hold, tree *clear, tree *update)
10322 {
10323 const unsigned SH_FE_INVALID = 64;
10324 const unsigned SH_FE_DIVBYZERO = 32;
10325 const unsigned SH_FE_OVERFLOW = 16;
10326 const unsigned SH_FE_UNDERFLOW = 8;
10327 const unsigned SH_FE_INEXACT = 4;
10328 const unsigned HOST_WIDE_INT SH_FE_ALL_EXCEPT = (SH_FE_INVALID
10329 | SH_FE_DIVBYZERO
10330 | SH_FE_OVERFLOW
10331 | SH_FE_UNDERFLOW
10332 | SH_FE_INEXACT);
10333 const unsigned HOST_WIDE_INT SH_FE_EXCEPT_SHIFT = 5;
10334 tree fenv_var, mask, ld_fenv, masked_fenv;
10335 tree new_fenv_var, reload_fenv, restore_fnenv;
10336 tree update_call, atomic_feraiseexcept, hold_fnclex;
10337
10338 if (! TARGET_FPU_ANY)
10339 return;
10340
10341 /* Generate the equivalent of :
10342 unsigned int fenv_var;
10343 fenv_var = __builtin_sh_get_fpscr ();
10344
10345 unsigned int masked_fenv;
10346 masked_fenv = fenv_var & mask;
10347
10348 __builtin_sh_set_fpscr (masked_fenv); */
10349
10350 fenv_var = create_tmp_var_raw (unsigned_type_node);
10351 mask = build_int_cst (unsigned_type_node,
10352 ~((SH_FE_ALL_EXCEPT << SH_FE_EXCEPT_SHIFT)
10353 | SH_FE_ALL_EXCEPT));
10354 ld_fenv = build2 (MODIFY_EXPR, unsigned_type_node,
10355 fenv_var, build_call_expr (sh_builtin_get_fpscr, 0));
10356 masked_fenv = build2 (BIT_AND_EXPR, unsigned_type_node, fenv_var, mask);
10357 hold_fnclex = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10358 fenv_var = build4 (TARGET_EXPR, unsigned_type_node, fenv_var,
10359 build2 (COMPOUND_EXPR, void_type_node, masked_fenv,
10360 ld_fenv),
10361 NULL_TREE, NULL_TREE);
10362 *hold = build2 (COMPOUND_EXPR, void_type_node, fenv_var, hold_fnclex);
10363
10364 /* Store the value of masked_fenv to clear the exceptions:
10365 __builtin_sh_set_fpscr (masked_fenv); */
10366
10367 *clear = build_call_expr (sh_builtin_set_fpscr, 1, masked_fenv);
10368
10369 /* Generate the equivalent of :
10370 unsigned int new_fenv_var;
10371 new_fenv_var = __builtin_sh_get_fpscr ();
10372
10373 __builtin_sh_set_fpscr (fenv_var);
10374
10375 __atomic_feraiseexcept (new_fenv_var); */
10376
10377 new_fenv_var = create_tmp_var_raw (unsigned_type_node);
10378 reload_fenv = build2 (MODIFY_EXPR, unsigned_type_node, new_fenv_var,
10379 build_call_expr (sh_builtin_get_fpscr, 0));
10380 restore_fnenv = build_call_expr (sh_builtin_set_fpscr, 1, fenv_var);
10381 atomic_feraiseexcept = builtin_decl_implicit (BUILT_IN_ATOMIC_FERAISEEXCEPT);
10382 update_call = build_call_expr (atomic_feraiseexcept, 1,
10383 fold_convert (integer_type_node,
10384 new_fenv_var));
10385 *update = build2 (COMPOUND_EXPR, void_type_node,
10386 build2 (COMPOUND_EXPR, void_type_node,
10387 reload_fenv, restore_fnenv), update_call);
10388 }
10389
10390 /* Implements target hook vector_mode_supported_p. */
10391 bool
10392 sh_vector_mode_supported_p (machine_mode mode ATTRIBUTE_UNUSED)
10393 {
10394 return false;
10395 }
10396
10397 bool
10398 sh_frame_pointer_required (void)
10399 {
10400 /* If needed override this in other tm.h files to cope with various OS
10401 lossage requiring a frame pointer. */
10402 if (SUBTARGET_FRAME_POINTER_REQUIRED)
10403 return true;
10404
10405 if (crtl->profile)
10406 return true;
10407
10408 return false;
10409 }
10410
10411 /* Implements target hook dwarf_calling_convention. Return an enum
10412 of dwarf_calling_convention. */
10413 int
10414 sh_dwarf_calling_convention (const_tree func)
10415 {
10416 if (sh_attr_renesas_p (func))
10417 return DW_CC_GNU_renesas_sh;
10418
10419 return DW_CC_normal;
10420 }
10421
10422 /* Returns the sh builtin decl for CODE. */
10423 static tree
10424 sh_builtin_decl (unsigned code, bool initialize_p ATTRIBUTE_UNUSED)
10425 {
10426 if (code >= ARRAY_SIZE (bdesc))
10427 return error_mark_node;
10428
10429 if (!bdesc[code].is_enabled ())
10430 return error_mark_node;
10431
10432 return bdesc[code].fndecl;
10433 }
10434
10435 /* Expand an expression EXP that calls a built-in function,
10436 with result going to TARGET if that's convenient
10437 (and in mode MODE if that's convenient).
10438 SUBTARGET may be used as the target for computing one of EXP's operands.
10439 IGNORE is nonzero if the value is to be ignored. */
10440 static rtx
10441 sh_expand_builtin (tree exp, rtx target, rtx subtarget ATTRIBUTE_UNUSED,
10442 machine_mode mode ATTRIBUTE_UNUSED, int ignore)
10443 {
10444 tree fndecl = TREE_OPERAND (CALL_EXPR_FN (exp), 0);
10445 unsigned int fcode = DECL_MD_FUNCTION_CODE (fndecl);
10446 const struct builtin_description *d = &bdesc[fcode];
10447 enum insn_code icode = d->icode;
10448 int signature = d->signature;
10449 int nop = 0;
10450 rtx op[4];
10451
10452 if (signature_args[signature][0])
10453 {
10454 if (ignore)
10455 return NULL_RTX;
10456
10457 machine_mode tmode = insn_data[icode].operand[0].mode;
10458 if (! target || GET_MODE (target) != tmode
10459 || ! (*insn_data[icode].operand[0].predicate) (target, tmode))
10460 target = gen_reg_rtx (tmode);
10461 op[nop++] = target;
10462 }
10463 else
10464 target = NULL_RTX;
10465
10466 for (int i = 1; i <= 3; i++, nop++)
10467 {
10468 if (! signature_args[signature][i])
10469 break;
10470 tree arg = CALL_EXPR_ARG (exp, i - 1);
10471 if (arg == error_mark_node)
10472 return const0_rtx;
10473
10474 machine_mode opmode;
10475 tree optype;
10476 if (signature_args[signature][i] & 8)
10477 {
10478 opmode = ptr_mode;
10479 optype = ptr_type_node;
10480 }
10481 else
10482 {
10483 opmode = insn_data[icode].operand[nop].mode;
10484 optype = (*lang_hooks.types.type_for_mode) (opmode, 0);
10485 }
10486
10487 machine_mode argmode = TYPE_MODE (TREE_TYPE (arg));
10488 if (argmode != opmode)
10489 arg = build1 (NOP_EXPR, optype, arg);
10490 op[nop] = expand_expr (arg, NULL_RTX, opmode, EXPAND_NORMAL);
10491 if (! (*insn_data[icode].operand[nop].predicate) (op[nop], opmode))
10492 op[nop] = copy_to_mode_reg (opmode, op[nop]);
10493 }
10494
10495 rtx pat = NULL_RTX;
10496
10497 switch (nop)
10498 {
10499 case 1:
10500 pat = (*insn_data[d->icode].genfun) (op[0]);
10501 break;
10502 case 2:
10503 pat = (*insn_data[d->icode].genfun) (op[0], op[1]);
10504 break;
10505 case 3:
10506 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2]);
10507 break;
10508 case 4:
10509 pat = (*insn_data[d->icode].genfun) (op[0], op[1], op[2], op[3]);
10510 break;
10511 default:
10512 gcc_unreachable ();
10513 }
10514 if (! pat)
10515 return NULL_RTX;
10516 emit_insn (pat);
10517 return target;
10518 }
10519
10520 /* Implement TARGET_HARD_REGNO_NREGS. On the SH all but the XD regs are
10521 UNITS_PER_WORD bits wide. */
10522
10523 static unsigned int
10524 sh_hard_regno_nregs (unsigned int regno, machine_mode mode)
10525 {
10526 if (XD_REGISTER_P (regno))
10527 return CEIL (GET_MODE_SIZE (mode), 2 * UNITS_PER_WORD);
10528 return CEIL (GET_MODE_SIZE (mode), UNITS_PER_WORD);
10529 }
10530
10531 /* Implement TARGET_HARD_REGNO_MODE_OK.
10532
10533 We can allow any mode in any general register. The special registers
10534 only allow SImode. Don't allow any mode in the PR.
10535
10536 We cannot hold DCmode values in the XD registers because alter_reg
10537 handles subregs of them incorrectly. We could work around this by
10538 spacing the XD registers like the DR registers, but this would require
10539 additional memory in every compilation to hold larger register vectors.
10540 We could hold SFmode / SCmode values in XD registers, but that
10541 would require a tertiary reload when reloading from / to memory,
10542 and a secondary reload to reload from / to general regs; that
10543 seems to be a losing proposition.
10544
10545 We want to allow TImode FP regs so that when V4SFmode is loaded as TImode,
10546 it won't be ferried through GP registers first. */
10547 static bool
10548 sh_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
10549 {
10550 if (SPECIAL_REGISTER_P (regno))
10551 return mode == SImode;
10552
10553 if (regno == FPUL_REG)
10554 return (mode == SImode || mode == SFmode);
10555
10556 if (FP_REGISTER_P (regno) && mode == SFmode)
10557 return true;
10558
10559 if (mode == V2SFmode)
10560 {
10561 if (((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 2 == 0)
10562 || GENERAL_REGISTER_P (regno)))
10563 return true;
10564 else
10565 return false;
10566 }
10567
10568 if (mode == V4SFmode)
10569 {
10570 if ((FP_REGISTER_P (regno) && (regno - FIRST_FP_REG) % 4 == 0)
10571 || GENERAL_REGISTER_P (regno))
10572 return true;
10573 else
10574 return false;
10575 }
10576
10577 if (mode == V16SFmode)
10578 return regno == FIRST_XD_REG;
10579
10580 if (FP_REGISTER_P (regno))
10581 {
10582 if (mode == SFmode
10583 || mode == SImode
10584 || ((TARGET_SH2E) && mode == SCmode)
10585 || (((TARGET_FPU_DOUBLE && mode == DFmode) || mode == DCmode)
10586 && ((regno - FIRST_FP_REG) & 1) == 0)
10587 || (TARGET_SH4 && mode == TImode
10588 && ((regno - FIRST_FP_REG) & 3) == 0))
10589 return true;
10590 else
10591 return false;
10592 }
10593
10594 if (XD_REGISTER_P (regno))
10595 return mode == DFmode;
10596
10597 if (regno == PR_REG)
10598 return mode == SImode;
10599
10600 if (regno == FPSCR_REG)
10601 return mode == SImode;
10602
10603 return true;
10604 }
10605
10606 /* Implement TARGET_MODES_TIEABLE_P.
10607
10608 If TARGET_HARD_REGNO_MODE_OK could produce different values for MODE1
10609 and MODE2, for any hard reg, then this must be false for correct output.
10610 That's the case for xd registers: we don't hold SFmode values in
10611 them, so we can't tie an SFmode pseudos with one in another
10612 floating-point mode. */
10613
10614 static bool
10615 sh_modes_tieable_p (machine_mode mode1, machine_mode mode2)
10616 {
10617 return (mode1 == mode2
10618 || (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2)
10619 && (mode1 != SFmode && mode2 != SFmode)));
10620 }
10621
10622 /* Specify the modes required to caller save a given hard regno.
10623 choose_hard_reg_mode chooses mode based on TARGET_HARD_REGNO_MODE_OK
10624 and returns ?Imode for float regs when sh_hard_regno_mode_ok
10625 permits integer modes on them. That makes LRA's split process
10626 unhappy. See PR55212.
10627 */
10628 machine_mode
10629 sh_hard_regno_caller_save_mode (unsigned int regno, unsigned int nregs,
10630 machine_mode mode)
10631 {
10632 if (FP_REGISTER_P (regno)
10633 && (mode == SFmode
10634 || mode == SCmode
10635 || ((mode == DFmode || mode == DCmode)
10636 && ((regno - FIRST_FP_REG) & 1) == 0)))
10637 return mode;
10638
10639 return choose_hard_reg_mode (regno, nregs, false);
10640 }
10641
10642 /* Implement TARGET_CAN_CHANGE_MODE_CLASS. */
10643 static bool
10644 sh_can_change_mode_class (machine_mode from, machine_mode to,
10645 reg_class_t rclass)
10646 {
10647 /* We want to enable the use of SUBREGs as a means to
10648 VEC_SELECT a single element of a vector. */
10649
10650 /* This effectively disallows using GENERAL_REGS for SFmode vector subregs.
10651 This can be problematic when SFmode vector subregs need to be accessed
10652 on the stack with displacement addressing, as it happens with -O0.
10653 Thus we disallow the mode change for -O0. */
10654 if (to == SFmode && VECTOR_MODE_P (from) && GET_MODE_INNER (from) == SFmode)
10655 return optimize ? !reg_classes_intersect_p (GENERAL_REGS, rclass) : true;
10656
10657 if (GET_MODE_SIZE (from) != GET_MODE_SIZE (to))
10658 {
10659 if (TARGET_LITTLE_ENDIAN)
10660 {
10661 if (GET_MODE_SIZE (to) < 8 || GET_MODE_SIZE (from) < 8)
10662 return !reg_classes_intersect_p (DF_REGS, rclass);
10663 }
10664 else
10665 {
10666 if (GET_MODE_SIZE (from) < 8)
10667 return !reg_classes_intersect_p (DF_REGS, rclass);
10668 }
10669 }
10670 return true;
10671 }
10672
10673 /* Return true if registers in machine mode MODE will likely be
10674 allocated to registers in small register classes. */
10675 bool
10676 sh_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
10677 {
10678 return true;
10679 }
10680
10681 /* If ADDRESS refers to a CODE_LABEL, add NUSES to the number of times
10682 that label is used. */
10683 void
10684 sh_mark_label (rtx address, int nuses)
10685 {
10686 if (GOTOFF_P (address))
10687 {
10688 /* Extract the label or symbol. */
10689 address = XEXP (address, 0);
10690 if (GET_CODE (address) == PLUS)
10691 address = XEXP (address, 0);
10692 address = XVECEXP (address, 0, 0);
10693 }
10694 if (GET_CODE (address) == LABEL_REF
10695 && LABEL_P (XEXP (address, 0)))
10696 LABEL_NUSES (XEXP (address, 0)) += nuses;
10697 }
10698
10699 /* Compute extra cost of moving data between one register class
10700 and another.
10701
10702 If SECONDARY*_RELOAD_CLASS says something about the src/dst pair, regclass
10703 uses this information. Hence, the general register <-> floating point
10704 register information here is not used for SFmode. */
10705 static int
10706 sh_register_move_cost (machine_mode mode,
10707 reg_class_t srcclass, reg_class_t dstclass)
10708 {
10709 if (dstclass == T_REGS || dstclass == PR_REGS)
10710 return 10;
10711
10712 if (dstclass == MAC_REGS && srcclass == MAC_REGS)
10713 return 4;
10714
10715 if (mode == SImode && TARGET_FMOVD
10716 && REGCLASS_HAS_FP_REG (srcclass)
10717 && REGCLASS_HAS_FP_REG (dstclass))
10718 return 4;
10719
10720 if (REGCLASS_HAS_FP_REG (dstclass) && srcclass == T_REGS)
10721 return ((TARGET_HARD_SH4 && !optimize_size) ? 10 : 7);
10722
10723 if ((REGCLASS_HAS_FP_REG (dstclass) && srcclass == MAC_REGS)
10724 || (dstclass == MAC_REGS && REGCLASS_HAS_FP_REG (srcclass)))
10725 return 9;
10726
10727 if ((REGCLASS_HAS_FP_REG (dstclass)
10728 && REGCLASS_HAS_GENERAL_REG (srcclass))
10729 || (REGCLASS_HAS_GENERAL_REG (dstclass)
10730 && REGCLASS_HAS_FP_REG (srcclass)))
10731 {
10732 /* Discourage trying to use fp regs for a pointer. This also
10733 discourages fp regs with SImode because Pmode is an alias
10734 of SImode on this target. See PR target/48596. */
10735 int addend = (mode == Pmode) ? 40 : 0;
10736
10737 return ((TARGET_FMOVD ? 8 : 12) + addend)
10738 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10739 }
10740
10741 if ((dstclass == FPUL_REGS
10742 && REGCLASS_HAS_GENERAL_REG (srcclass))
10743 || (srcclass == FPUL_REGS
10744 && REGCLASS_HAS_GENERAL_REG (dstclass)))
10745 return 5;
10746
10747 if ((dstclass == FPUL_REGS
10748 && (srcclass == PR_REGS || srcclass == MAC_REGS || srcclass == T_REGS))
10749 || (srcclass == FPUL_REGS
10750 && (dstclass == PR_REGS || dstclass == MAC_REGS)))
10751 return 7;
10752
10753 if ((srcclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10754 || (dstclass == FPSCR_REGS && ! REGCLASS_HAS_GENERAL_REG (srcclass)))
10755 return 4;
10756
10757 if (TARGET_FMOVD
10758 && ! REGCLASS_HAS_GENERAL_REG (srcclass)
10759 && ! REGCLASS_HAS_GENERAL_REG (dstclass))
10760 return 2 * ((GET_MODE_SIZE (mode) + 7) / 8U);
10761
10762 return 2 * ((GET_MODE_SIZE (mode) + 3) / 4U);
10763 }
10764
10765 static rtx
10766 emit_load_ptr (rtx reg, rtx addr)
10767 {
10768 rtx mem = gen_const_mem (ptr_mode, addr);
10769
10770 if (Pmode != ptr_mode)
10771 mem = gen_rtx_SIGN_EXTEND (Pmode, mem);
10772 return emit_move_insn (reg, mem);
10773 }
10774
10775 static void
10776 sh_output_mi_thunk (FILE *file, tree thunk_fndecl ATTRIBUTE_UNUSED,
10777 HOST_WIDE_INT delta, HOST_WIDE_INT vcall_offset,
10778 tree function)
10779 {
10780 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk_fndecl));
10781 CUMULATIVE_ARGS cum;
10782 int structure_value_byref = 0;
10783 rtx this_rtx, this_value, sibcall, funexp;
10784 rtx_insn *insns;
10785 tree funtype = TREE_TYPE (function);
10786 int simple_add = CONST_OK_FOR_ADD (delta);
10787 int did_load = 0;
10788 rtx scratch0, scratch1, scratch2;
10789
10790 reload_completed = 1;
10791 epilogue_completed = 1;
10792 crtl->uses_only_leaf_regs = 1;
10793
10794 emit_note (NOTE_INSN_PROLOGUE_END);
10795
10796 /* Find the "this" pointer. We have such a wide range of ABIs for the
10797 SH that it's best to do this completely machine independently.
10798 "this" is passed as first argument, unless a structure return pointer
10799 comes first, in which case "this" comes second. */
10800 INIT_CUMULATIVE_ARGS (cum, funtype, NULL_RTX, 0, 1);
10801 #ifndef PCC_STATIC_STRUCT_RETURN
10802 if (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function))
10803 structure_value_byref = 1;
10804 #endif /* not PCC_STATIC_STRUCT_RETURN */
10805 if (structure_value_byref && sh_struct_value_rtx (function, 0) == 0)
10806 {
10807 tree ptype = build_pointer_type (TREE_TYPE (funtype));
10808
10809 function_arg_info ptr_arg (ptype, Pmode, /*named=*/true);
10810 sh_function_arg_advance (pack_cumulative_args (&cum), ptr_arg);
10811 }
10812 function_arg_info ptr_arg (ptr_type_node, Pmode, /*named=*/true);
10813 this_rtx = sh_function_arg (pack_cumulative_args (&cum), ptr_arg);
10814
10815 /* For SHcompact, we only have r0 for a scratch register: r1 is the
10816 static chain pointer (even if you can't have nested virtual functions
10817 right now, someone might implement them sometime), and the rest of the
10818 registers are used for argument passing, are callee-saved, or reserved. */
10819 /* We need to check call_used_regs / fixed_regs in case -fcall_saved-reg /
10820 -ffixed-reg has been used. */
10821 if (! call_used_regs[0] || fixed_regs[0])
10822 error ("r0 needs to be available as a call-clobbered register");
10823 scratch0 = scratch1 = scratch2 = gen_rtx_REG (Pmode, 0);
10824
10825 {
10826 if (call_used_regs[1] && ! fixed_regs[1])
10827 scratch1 = gen_rtx_REG (ptr_mode, 1);
10828 /* N.B., if not TARGET_HITACHI, register 2 is used to pass the pointer
10829 pointing where to return struct values. */
10830 if (call_used_regs[3] && ! fixed_regs[3])
10831 scratch2 = gen_rtx_REG (Pmode, 3);
10832 }
10833
10834 this_value = plus_constant (Pmode, this_rtx, delta);
10835 if (vcall_offset
10836 && (simple_add || scratch0 != scratch1)
10837 && strict_memory_address_p (ptr_mode, this_value))
10838 {
10839 emit_load_ptr (scratch0, this_value);
10840 did_load = 1;
10841 }
10842
10843 if (!delta)
10844 ; /* Do nothing. */
10845 else if (simple_add)
10846 emit_move_insn (this_rtx, this_value);
10847 else
10848 {
10849 emit_move_insn (scratch1, GEN_INT (delta));
10850 emit_insn (gen_add2_insn (this_rtx, scratch1));
10851 }
10852
10853 if (vcall_offset)
10854 {
10855 rtx offset_addr;
10856
10857 if (!did_load)
10858 emit_load_ptr (scratch0, this_rtx);
10859
10860 offset_addr = plus_constant (Pmode, scratch0, vcall_offset);
10861 if (strict_memory_address_p (ptr_mode, offset_addr))
10862 ; /* Do nothing. */
10863 else if (scratch0 != scratch1)
10864 {
10865 /* scratch0 != scratch1, and we have indexed loads. Get better
10866 schedule by loading the offset into r1 and using an indexed
10867 load - then the load of r1 can issue before the load from
10868 (this_rtx + delta) finishes. */
10869 emit_move_insn (scratch1, GEN_INT (vcall_offset));
10870 offset_addr = gen_rtx_PLUS (Pmode, scratch0, scratch1);
10871 }
10872 else if (CONST_OK_FOR_ADD (vcall_offset))
10873 {
10874 emit_insn (gen_add2_insn (scratch0, GEN_INT (vcall_offset)));
10875 offset_addr = scratch0;
10876 }
10877 else
10878 gcc_unreachable (); /* FIXME */
10879 emit_load_ptr (scratch0, offset_addr);
10880
10881 if (Pmode != ptr_mode)
10882 scratch0 = gen_rtx_TRUNCATE (ptr_mode, scratch0);
10883 emit_insn (gen_add2_insn (this_rtx, scratch0));
10884 }
10885
10886 /* Generate a tail call to the target function. */
10887 if (! TREE_USED (function))
10888 {
10889 assemble_external (function);
10890 TREE_USED (function) = 1;
10891 }
10892 funexp = XEXP (DECL_RTL (function), 0);
10893 /* If the function is overridden, so is the thunk, hence we don't
10894 need GOT addressing even if this is a public symbol. */
10895 #if 0
10896 if (TARGET_SH1 && ! flag_weak)
10897 sibcall = gen_sibcalli_thunk (funexp, const0_rtx);
10898 else
10899 #endif
10900 if (TARGET_SH2 && flag_pic)
10901 {
10902 if (TARGET_FDPIC)
10903 {
10904 sibcall = gen_sibcall_pcrel_fdpic (funexp, const0_rtx);
10905 XEXP (XVECEXP (sibcall, 0, 3), 0) = scratch2;
10906 }
10907 else
10908 {
10909 sibcall = gen_sibcall_pcrel (funexp, const0_rtx);
10910 XEXP (XVECEXP (sibcall, 0, 2), 0) = scratch2;
10911 }
10912 }
10913 else
10914 {
10915 emit_move_insn (scratch2, funexp);
10916 funexp = gen_rtx_MEM (FUNCTION_MODE, scratch2);
10917 sibcall = gen_sibcall (funexp, const0_rtx, NULL_RTX);
10918 }
10919 sibcall = emit_call_insn (sibcall);
10920 SIBLING_CALL_P (sibcall) = 1;
10921 use_reg (&CALL_INSN_FUNCTION_USAGE (sibcall), this_rtx);
10922 emit_barrier ();
10923
10924 /* Run just enough of rest_of_compilation to do scheduling and get
10925 the insns emitted. */
10926
10927 insns = get_insns ();
10928
10929 if (optimize > 0)
10930 {
10931 if (! cfun->cfg)
10932 init_flow (cfun);
10933 split_all_insns_noflow ();
10934 }
10935
10936 sh_reorg ();
10937 shorten_branches (insns);
10938 assemble_start_function (thunk_fndecl, fnname);
10939 final_start_function (insns, file, 1);
10940 final (insns, file, 1);
10941 final_end_function ();
10942 assemble_end_function (thunk_fndecl, fnname);
10943
10944 reload_completed = 0;
10945 epilogue_completed = 0;
10946 }
10947
10948 /* Return an RTX pair for the address and call site label of a function
10949 NAME of kind KIND, placing the result in TARGET if not NULL. For
10950 SFUNC_STATIC, if FDPIC, the LAB member of result will be set to
10951 (const_int 0) if jsr should be used, or a label_ref if bsrf should
10952 be used. For FDPIC, both SFUNC_GOT and SFUNC_STATIC will return the
10953 address of the function itself, not a function descriptor, so they
10954 can only be used with functions not using the FDPIC register that
10955 are known to be called directory without a PLT entry. */
10956
10957 function_symbol_result
10958 function_symbol (rtx target, const char *name, sh_function_kind kind)
10959 {
10960 /* If this is not an ordinary function, the name usually comes from a
10961 string literal or an sprintf buffer. Make sure we use the same
10962 string consistently, so that cse will be able to unify address loads. */
10963 if (kind != FUNCTION_ORDINARY)
10964 name = IDENTIFIER_POINTER (get_identifier (name));
10965 rtx sym = gen_rtx_SYMBOL_REF (Pmode, name);
10966 rtx lab = const0_rtx;
10967 SYMBOL_REF_FLAGS (sym) = SYMBOL_FLAG_FUNCTION;
10968 if (flag_pic)
10969 switch (kind)
10970 {
10971 case FUNCTION_ORDINARY:
10972 break;
10973 case SFUNC_GOT:
10974 {
10975 rtx reg = target ? target : gen_reg_rtx (Pmode);
10976
10977 emit_insn (gen_symGOT2reg (reg, sym));
10978 sym = reg;
10979 break;
10980 }
10981 case SFUNC_STATIC:
10982 {
10983 rtx reg = target ? target : gen_reg_rtx (Pmode);
10984
10985 if (TARGET_FDPIC)
10986 {
10987 /* We use PC-relative calls, since GOTOFF can only refer
10988 to writable data. This works along with sh_sfunc_call. */
10989 lab = PATTERN (gen_call_site ());
10990 emit_insn (gen_sym_label2reg (reg, sym, lab));
10991 }
10992 else
10993 {
10994 /* ??? To allow cse to work, we use GOTOFF relocations.
10995 we could add combiner patterns to transform this into
10996 straight pc-relative calls with sym2PIC / bsrf when
10997 label load and function call are still 1:1 and in the
10998 same basic block during combine. */
10999 emit_insn (gen_symGOTOFF2reg (reg, sym));
11000 }
11001
11002 sym = reg;
11003 break;
11004 }
11005 }
11006 if (target && sym != target)
11007 {
11008 emit_move_insn (target, sym);
11009 return function_symbol_result (target, lab);
11010 }
11011 return function_symbol_result (sym, lab);
11012 }
11013
11014 /* Find the number of the first general purpose register in S that
11015 is not set. */
11016 static int
11017 scavenge_reg (HARD_REG_SET *s)
11018 {
11019 for (int r = FIRST_GENERAL_REG; r <= LAST_GENERAL_REG; r++)
11020 if (TEST_HARD_REG_BIT (*s, r))
11021 return r;
11022 return -1;
11023 }
11024
11025 rtx
11026 sh_get_pr_initial_val (void)
11027 {
11028 /* If we haven't finished rtl generation, there might be a nonlocal label
11029 that we haven't seen yet.
11030 ??? get_hard_reg_initial_val fails if it is called after register
11031 allocation has started, unless it has been called before for the
11032 same register. And even then, we end in trouble if we didn't use
11033 the register in the same basic block before. So call
11034 get_hard_reg_initial_val now and wrap it in an unspec if we might
11035 need to replace it. */
11036 /* ??? We also must do this for TARGET_SH1 in general, because otherwise
11037 combine can put the pseudo returned by get_hard_reg_initial_val into
11038 instructions that need a general purpose registers, which will fail to
11039 be recognized when the pseudo becomes allocated to PR. */
11040 rtx val = get_hard_reg_initial_val (Pmode, PR_REG);
11041 return gen_rtx_UNSPEC (SImode, gen_rtvec (1, val), UNSPEC_RA);
11042 }
11043
11044 bool
11045 sh_expand_t_scc (rtx operands[])
11046 {
11047 enum rtx_code code = GET_CODE (operands[1]);
11048 rtx target = operands[0];
11049 rtx op0 = operands[2];
11050 rtx op1 = operands[3];
11051 rtx result = target;
11052
11053 if (!REG_P (op0) || REGNO (op0) != T_REG
11054 || !CONST_INT_P (op1))
11055 return false;
11056 if (!REG_P (result))
11057 result = gen_reg_rtx (SImode);
11058 HOST_WIDE_INT val = INTVAL (op1);
11059 if ((code == EQ && val == 1) || (code == NE && val == 0))
11060 emit_insn (gen_movt (result, get_t_reg_rtx ()));
11061 else if ((code == EQ && val == 0) || (code == NE && val == 1))
11062 emit_insn (gen_movnegt (result, get_t_reg_rtx ()));
11063 else if (code == EQ || code == NE)
11064 emit_insn (gen_move_insn (result, GEN_INT (code == NE)));
11065 else
11066 return false;
11067 if (result != target)
11068 emit_move_insn (target, result);
11069 return true;
11070 }
11071
11072 /* INSN is an sfunc; return the rtx that describes the address used. */
11073 static rtx
11074 extract_sfunc_addr (rtx insn)
11075 {
11076 rtx pattern = PATTERN (insn);
11077 const int len = XVECLEN (pattern, 0);
11078 for (int i = 0; i < len; i++)
11079 {
11080 rtx part = XVECEXP (pattern, 0, i);
11081 if (GET_CODE (part) == USE && GET_MODE (XEXP (part, 0)) == Pmode
11082 && GENERAL_REGISTER_P (true_regnum (XEXP (part, 0))))
11083 return XEXP (part, 0);
11084 }
11085 gcc_assert (GET_CODE (XVECEXP (pattern, 0, 0)) == UNSPEC_VOLATILE);
11086 return XVECEXP (XVECEXP (pattern, 0, 0), 0, 1);
11087 }
11088
11089 /* Verify that the register in use_sfunc_addr still agrees with the address
11090 used in the sfunc. This prevents fill_slots_from_thread from changing
11091 use_sfunc_addr.
11092 INSN is the use_sfunc_addr instruction, and REG is the register it
11093 guards. */
11094 bool
11095 check_use_sfunc_addr (rtx_insn *insn, rtx reg)
11096 {
11097 /* Search for the sfunc. It should really come right after INSN. */
11098 while ((insn = NEXT_INSN (insn)))
11099 {
11100 if (LABEL_P (insn) || JUMP_P (insn))
11101 break;
11102 if (! INSN_P (insn))
11103 continue;
11104
11105 if (rtx_sequence *seq = dyn_cast<rtx_sequence *> (PATTERN (insn)))
11106 insn = seq->insn (0);
11107 if (GET_CODE (PATTERN (insn)) != PARALLEL
11108 || get_attr_type (insn) != TYPE_SFUNC)
11109 continue;
11110 return rtx_equal_p (extract_sfunc_addr (insn), reg);
11111 }
11112 gcc_unreachable ();
11113 }
11114
11115 /* This function returns a constant rtx that represents 2**15 / pi in
11116 SFmode. It's used to scale a fixed-point signed 16.16-bit fraction
11117 of a full circle back to an SFmode value, i.e. 0x10000 maps to 2*pi. */
11118 static GTY(()) rtx sh_fsca_sf2int_rtx;
11119
11120 rtx
11121 sh_fsca_sf2int (void)
11122 {
11123 if (! sh_fsca_sf2int_rtx)
11124 {
11125 REAL_VALUE_TYPE rv;
11126
11127 real_from_string (&rv, "10430.378350470453");
11128 sh_fsca_sf2int_rtx = const_double_from_real_value (rv, SFmode);
11129 }
11130
11131 return sh_fsca_sf2int_rtx;
11132 }
11133
11134 /* This function returns a constant rtx that represents pi / 2**15 in
11135 SFmode. It's used to scale SFmode angles, in radians, to a
11136 fixed-point signed 16.16-bit fraction of a full circle, i.e. 2*pi
11137 maps to 0x10000. */
11138 static GTY(()) rtx sh_fsca_int2sf_rtx;
11139
11140 rtx
11141 sh_fsca_int2sf (void)
11142 {
11143 if (! sh_fsca_int2sf_rtx)
11144 {
11145 REAL_VALUE_TYPE rv;
11146
11147 real_from_string (&rv, "9.587379924285257e-5");
11148 sh_fsca_int2sf_rtx = const_double_from_real_value (rv, SFmode);
11149 }
11150
11151 return sh_fsca_int2sf_rtx;
11152 }
11153
11154 /* Initialize the CUMULATIVE_ARGS structure. */
11155 void
11156 sh_init_cumulative_args (CUMULATIVE_ARGS * pcum,
11157 tree fntype,
11158 rtx libname ATTRIBUTE_UNUSED,
11159 tree fndecl,
11160 signed int n_named_args,
11161 machine_mode mode)
11162 {
11163 pcum->arg_count [(int) SH_ARG_FLOAT] = 0;
11164 pcum->free_single_fp_reg = 0;
11165 pcum->outgoing = n_named_args != -1;
11166
11167 /* FIXME: Should we check TARGET_HITACHI here ??? */
11168 pcum->renesas_abi = sh_attr_renesas_p (fntype);
11169
11170 if (fntype)
11171 {
11172 pcum->force_mem = ((TARGET_HITACHI || pcum->renesas_abi)
11173 && aggregate_value_p (TREE_TYPE (fntype), fndecl));
11174 pcum->prototype_p = prototype_p (fntype);
11175 pcum->arg_count [(int) SH_ARG_INT] = false;
11176 }
11177 else
11178 {
11179 pcum->arg_count [(int) SH_ARG_INT] = 0;
11180 pcum->prototype_p = false;
11181 if (mode != VOIDmode)
11182 {
11183 /* If the default ABI is the Renesas ABI then all library
11184 calls must assume that the library will be using the
11185 Renesas ABI. So if the function would return its result
11186 in memory then we must force the address of this memory
11187 block onto the stack. Ideally we would like to call
11188 targetm.calls.return_in_memory() here but we do not have
11189 the TYPE or the FNDECL available so we synthesize the
11190 contents of that function as best we can. */
11191 pcum->force_mem =
11192 (TARGET_DEFAULT & MASK_HITACHI)
11193 && (mode == BLKmode
11194 || (GET_MODE_SIZE (mode) > 4
11195 && !(mode == DFmode
11196 && TARGET_FPU_DOUBLE)));
11197 }
11198 else
11199 pcum->force_mem = false;
11200 }
11201 }
11202
11203 rtx
11204 sh_gen_truncate (machine_mode mode, rtx x, int need_sign_ext)
11205 {
11206 enum rtx_code code = TRUNCATE;
11207
11208 if (GET_CODE (x) == ZERO_EXTEND || GET_CODE (x) == SIGN_EXTEND)
11209 {
11210 rtx inner = XEXP (x, 0);
11211 machine_mode inner_mode = GET_MODE (inner);
11212
11213 if (inner_mode == mode)
11214 return inner;
11215 else if (GET_MODE_SIZE (inner_mode) >= GET_MODE_SIZE (mode))
11216 x = inner;
11217 else if (GET_MODE_SIZE (inner_mode) < GET_MODE_SIZE (mode)
11218 && (! need_sign_ext || GET_CODE (x) == SIGN_EXTEND))
11219 {
11220 code = GET_CODE (x);
11221 x = inner;
11222 }
11223 }
11224 return gen_rtx_fmt_e (code, mode, x);
11225 }
11226
11227 /* Load and store depend on the highpart of the address. However,
11228 set_attr_alternative does not give well-defined results before reload,
11229 so we must look at the rtl ourselves to see if any of the feeding
11230 registers is used in a memref.
11231
11232 Return true iff INSN contains a MEM. */
11233 bool
11234 sh_contains_memref_p (rtx insn)
11235 {
11236 subrtx_iterator::array_type array;
11237 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), NONCONST)
11238 if (MEM_P (*iter))
11239 return true;
11240 return false;
11241 }
11242
11243 /* Return true iff INSN loads a banked register. */
11244 bool
11245 sh_loads_bankedreg_p (rtx insn)
11246 {
11247 if (GET_CODE (PATTERN (insn)) == SET)
11248 {
11249 rtx op = SET_DEST (PATTERN(insn));
11250 if (REG_P (op) && BANKED_REGISTER_P (REGNO (op)))
11251 return true;
11252 }
11253
11254 return false;
11255 }
11256
11257 /* Implement TARGET_PREFERRED_RELOAD_CLASS. */
11258 static reg_class_t
11259 sh_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
11260 {
11261 return rclass;
11262 }
11263
11264 /* Implement TARGET_SECONDARY_RELOAD. */
11265 static reg_class_t
11266 sh_secondary_reload (bool in_p, rtx x, reg_class_t rclass_i,
11267 machine_mode mode, secondary_reload_info *sri)
11268 {
11269 enum reg_class rclass = (enum reg_class) rclass_i;
11270
11271 if (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS
11272 && REG_P (XEXP (XEXP (x, 0), 0))
11273 && REGNO (XEXP (XEXP (x, 0), 0)) == GBR_REG)
11274 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11275
11276 if (MEM_P (x) && REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == GBR_REG)
11277 return rclass == R0_REGS ? NO_REGS : R0_REGS;
11278
11279 if (REG_P (x) && REGNO (x) == GBR_REG)
11280 return NO_REGS;
11281
11282 if (in_p)
11283 {
11284 if (REGCLASS_HAS_FP_REG (rclass)
11285 && immediate_operand ((x), mode)
11286 && ! ((fp_zero_operand (x) || fp_one_operand (x)) && mode == SFmode))
11287 switch (mode)
11288 {
11289 case E_SFmode:
11290 sri->icode = CODE_FOR_reload_insf__frn;
11291 return NO_REGS;
11292 case E_DFmode:
11293 sri->icode = CODE_FOR_reload_indf__frn;
11294 return NO_REGS;
11295 case E_SImode:
11296 /* ??? If we knew that we are in the appropriate mode -
11297 single precision - we could use a reload pattern directly. */
11298 return FPUL_REGS;
11299 default:
11300 abort ();
11301 }
11302 if (rclass == FPUL_REGS
11303 && ((REG_P (x) && (REGNO (x) == MACL_REG || REGNO (x) == MACH_REG
11304 || REGNO (x) == T_REG))
11305 || GET_CODE (x) == PLUS))
11306 return GENERAL_REGS;
11307 if (rclass == FPUL_REGS && immediate_operand (x, mode))
11308 {
11309 if (satisfies_constraint_I08 (x) || fp_zero_operand (x))
11310 return GENERAL_REGS;
11311 else if (mode == SFmode)
11312 return FP_REGS;
11313 sri->icode = CODE_FOR_reload_insi__i_fpul;
11314 return NO_REGS;
11315 }
11316 if (rclass == FPSCR_REGS
11317 && ((REG_P (x) && REGNO (x) >= FIRST_PSEUDO_REGISTER)
11318 || (MEM_P (x) && GET_CODE (XEXP (x, 0)) == PLUS)))
11319 return GENERAL_REGS;
11320 } /* end of input-only processing. */
11321
11322 if (((REGCLASS_HAS_FP_REG (rclass)
11323 && (REG_P (x)
11324 && (GENERAL_OR_AP_REGISTER_P (REGNO (x))
11325 || (FP_REGISTER_P (REGNO (x)) && mode == SImode
11326 && TARGET_FMOVD))))
11327 || (REGCLASS_HAS_GENERAL_REG (rclass)
11328 && REG_P (x)
11329 && FP_REGISTER_P (REGNO (x))))
11330 && (mode == SFmode || mode == SImode))
11331 return FPUL_REGS;
11332 if ((rclass == FPUL_REGS
11333 || (REGCLASS_HAS_FP_REG (rclass) && mode == SImode))
11334 && (MEM_P (x)
11335 || (REG_P (x)
11336 && (REGNO (x) >= FIRST_PSEUDO_REGISTER
11337 || REGNO (x) == T_REG
11338 || system_reg_operand (x, VOIDmode)))))
11339 {
11340 if (rclass == FPUL_REGS)
11341 return GENERAL_REGS;
11342 return NO_REGS; // LRA wants NO_REGS here, it used to be FPUL_REGS;
11343 }
11344
11345 if ((rclass == MAC_REGS || rclass == PR_REGS)
11346 && REG_P (x) && ! GENERAL_REGISTER_P (REGNO (x))
11347 && rclass != REGNO_REG_CLASS (REGNO (x)))
11348 return GENERAL_REGS;
11349
11350 /* If here fall back to loading FPUL register through general registers.
11351 This case can happen when movsi_ie insn is picked initially to
11352 load/store the FPUL register from/to another register, and then the
11353 other register is allocated on the stack. */
11354 if (rclass == FPUL_REGS && true_regnum (x) == -1)
11355 return GENERAL_REGS;
11356
11357 /* Force mov.b / mov.w displacement addressing insn to use R0 as
11358 the other operand.
11359 On SH2A could also just leave it alone here, which would result in a
11360 4 byte move insn being generated instead. However, for this to work
11361 the insns must have the appropriate alternatives. */
11362 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11363 && satisfies_constraint_Sdd (x)
11364 && sh_disp_addr_displacement (x)
11365 <= sh_max_mov_insn_displacement (mode, false))
11366 return R0_REGS;
11367
11368 /* When reload is trying to address a QImode or HImode subreg on the stack,
11369 force any subreg byte into R0_REGS, as this is going to become a
11370 displacement address.
11371 We could restrict this to SUBREG_BYTE (x) > 0, but if the actual reg
11372 is on the stack, the memref to it might already require a displacement
11373 and that has to be added to the final address. At this point we don't
11374 know the cumulative displacement so we assume the worst case. */
11375 if ((mode == QImode || mode == HImode) && rclass != R0_REGS
11376 && GET_CODE (x) == SUBREG && true_regnum (x) == -1)
11377 return R0_REGS;
11378
11379 return NO_REGS;
11380 }
11381
11382 /* Return true if SUBST can't safely replace its equivalent during RA. */
11383 static bool
11384 sh_cannot_substitute_mem_equiv_p (rtx)
11385 {
11386 /* If SUBST is mem[base+index] or QI/HImode mem[base+disp], the insn
11387 uses R0 and may cause spill failure when R0 is already used.
11388 We have to return true for that case at least.
11389 Moreover SH has strong R0 parity and also have not enough numbers of
11390 the hard registers to make the equiv substitution win in the size
11391 and the speed on average working sets. The pseudos produced to
11392 hold the equiv values can't get good hard registers for bad cases
11393 and end up memory save/restore insns which make the code worse. */
11394 return true;
11395 }
11396
11397 /* Implement TARGET_LEGITIMIZE_ADDRESS_DISPLACEMENT. */
11398 static bool
11399 sh_legitimize_address_displacement (rtx *offset1, rtx *offset2,
11400 poly_int64 orig_offset,
11401 machine_mode mode)
11402 {
11403 if ((TARGET_FPU_DOUBLE && mode == DFmode)
11404 || (TARGET_SH2E && mode == SFmode))
11405 return false;
11406
11407 struct disp_adjust adj = sh_find_mov_disp_adjust (mode, orig_offset);
11408 if (adj.offset_adjust != NULL_RTX && adj.mov_disp != NULL_RTX)
11409 {
11410 *offset1 = adj.offset_adjust;
11411 *offset2 = adj.mov_disp;
11412 return true;
11413 }
11414
11415 return false;
11416 }
11417
11418 /* Return true if movsf insn should be splited with an additional
11419 register. */
11420 bool
11421 sh_movsf_ie_ra_split_p (rtx op0, rtx op1, rtx op2)
11422 {
11423 /* op0 == op1 */
11424 if (rtx_equal_p (op0, op1))
11425 return true;
11426 /* fy, FQ, reg */
11427 if (GET_CODE (op1) == CONST_DOUBLE
11428 && ! satisfies_constraint_G (op1)
11429 && ! satisfies_constraint_H (op1)
11430 && REG_P (op0)
11431 && REG_P (op2))
11432 return true;
11433 /* f, r, y */
11434 if (REG_P (op0) && FP_REGISTER_P (REGNO (op0))
11435 && REG_P (op1) && GENERAL_REGISTER_P (REGNO (op1))
11436 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11437 return true;
11438 /* r, f, y */
11439 if (REG_P (op1) && FP_REGISTER_P (REGNO (op1))
11440 && REG_P (op0) && GENERAL_REGISTER_P (REGNO (op0))
11441 && REG_P (op2) && (REGNO (op2) == FPUL_REG))
11442 return true;
11443
11444 return false;
11445 }
11446
11447 static void
11448 sh_conditional_register_usage (void)
11449 {
11450 for (int regno = 0; regno < FIRST_PSEUDO_REGISTER; regno ++)
11451 if (! VALID_REGISTER_P (regno))
11452 fixed_regs[regno] = call_used_regs[regno] = 1;
11453 /* R8 and R9 are call-clobbered on SH5, but not on earlier SH ABIs. */
11454 if (flag_pic)
11455 {
11456 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11457 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
11458 }
11459 if (TARGET_FDPIC)
11460 {
11461 fixed_regs[PIC_REG] = 1;
11462 call_used_regs[PIC_REG] = 1;
11463 call_really_used_regs[PIC_REG] = 1;
11464 }
11465 /* Renesas saves and restores mac registers on call. */
11466 if (TARGET_HITACHI && ! TARGET_NOMACSAVE)
11467 {
11468 call_really_used_regs[MACH_REG] = 0;
11469 call_really_used_regs[MACL_REG] = 0;
11470 }
11471
11472 for (int regno = FIRST_GENERAL_REG; regno <= LAST_GENERAL_REG; regno++)
11473 if (! fixed_regs[regno] && call_really_used_regs[regno])
11474 SET_HARD_REG_BIT (reg_class_contents[SIBCALL_REGS], regno);
11475
11476 call_really_used_regs[FPSCR_MODES_REG] = 0;
11477 call_really_used_regs[FPSCR_STAT_REG] = 0;
11478 }
11479
11480 /* Implement TARGET_LEGITIMATE_CONSTANT_P
11481
11482 can_store_by_pieces constructs VOIDmode CONST_DOUBLEs. */
11483 static bool
11484 sh_legitimate_constant_p (machine_mode mode, rtx x)
11485 {
11486 if (SH_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
11487 {
11488 rtx base, offset;
11489 split_const (x, &base, &offset);
11490
11491 if (GET_CODE (base) == SYMBOL_REF
11492 && !offset_within_block_p (base, INTVAL (offset)))
11493 return false;
11494 }
11495
11496 if (TARGET_FDPIC
11497 && (SYMBOLIC_CONST_P (x)
11498 || (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS
11499 && SYMBOLIC_CONST_P (XEXP (XEXP (x, 0), 0)))))
11500 return false;
11501
11502 return GET_CODE (x) != CONST_DOUBLE
11503 || mode == DFmode || mode == SFmode
11504 || mode == DImode || GET_MODE (x) == VOIDmode;
11505 }
11506
11507 enum sh_divide_strategy_e sh_div_strategy = SH_DIV_STRATEGY_DEFAULT;
11508
11509 static void
11510 sh_init_sync_libfuncs (void)
11511 {
11512 init_sync_libfuncs (UNITS_PER_WORD);
11513 }
11514
11515 /* Return true if it is appropriate to emit `ret' instructions in the
11516 body of a function. */
11517 bool
11518 sh_can_use_simple_return_p (void)
11519 {
11520 if (! reload_completed || frame_pointer_needed)
11521 return false;
11522
11523 /* Moving prologue around does't reduce the size. */
11524 if (optimize_function_for_size_p (cfun))
11525 return false;
11526
11527 /* Finally, allow for pr save. */
11528 HARD_REG_SET live_regs_mask;
11529 int d = calc_live_regs (&live_regs_mask);
11530
11531 if (rounded_frame_size (d) > 4)
11532 return false;
11533
11534 return true;
11535 }
11536
11537 /*------------------------------------------------------------------------------
11538 Address mode optimization support code
11539 */
11540
11541 typedef HOST_WIDE_INT disp_t;
11542 static const disp_t MIN_DISP = HOST_WIDE_INT_MIN;
11543 static const disp_t MAX_DISP = HOST_WIDE_INT_MAX;
11544 static const disp_t INVALID_DISP = MAX_DISP;
11545
11546 /* A memory reference which is described by a base register and a
11547 displacement. */
11548 class base_reg_disp
11549 {
11550 public:
11551 base_reg_disp (rtx br, disp_t d);
11552
11553 bool is_reg (void) const;
11554 bool is_disp (void) const;
11555 rtx reg (void) const;
11556 disp_t disp (void) const;
11557
11558 private:
11559 rtx reg_;
11560 disp_t disp_;
11561 };
11562
11563 inline
11564 base_reg_disp::base_reg_disp (rtx br, disp_t d)
11565 : reg_ (br), disp_ (d)
11566 {
11567 }
11568
11569 inline bool
11570 base_reg_disp::is_reg (void) const
11571 {
11572 return reg_ != NULL_RTX && disp_ != INVALID_DISP;
11573 }
11574
11575 inline bool
11576 base_reg_disp::is_disp (void) const
11577 {
11578 return reg_ == NULL_RTX && disp_ != INVALID_DISP;
11579 }
11580
11581 inline rtx
11582 base_reg_disp::reg (void) const
11583 {
11584 return reg_;
11585 }
11586
11587 inline disp_t
11588 base_reg_disp::disp (void) const
11589 {
11590 return disp_;
11591 }
11592
11593 /* Find the base register and calculate the displacement for a given
11594 address rtx 'x'. */
11595 static base_reg_disp
11596 sh_find_base_reg_disp (rtx_insn* insn, rtx x, disp_t disp = 0,
11597 rtx base_reg = NULL)
11598 {
11599 if (REG_P (x))
11600 {
11601 if (REGNO (x) == GBR_REG)
11602 return base_reg_disp (x, disp);
11603
11604 /* We've reached a hard-reg. This is probably the point where
11605 function args are copied to pseudos. Do not go any further and
11606 stick to the pseudo. If the original mem addr was in a hard reg
11607 from the beginning, it will become the base reg. */
11608 if (REGNO (x) < FIRST_PSEUDO_REGISTER)
11609 return base_reg_disp (base_reg != NULL ? base_reg : x, disp);
11610
11611 /* Find the def of the reg and trace it. If there are more than one
11612 defs and they are not the same, assume it's not safe to proceed. */
11613 rtx_insn* last_i = NULL;
11614 rtx last_set = NULL;
11615 for (df_ref d = DF_REG_DEF_CHAIN (REGNO (x)); d != NULL;
11616 d = DF_REF_NEXT_REG (d))
11617 {
11618 rtx set = const_cast<rtx> (set_of (x, DF_REF_INSN (d)));
11619
11620 /* Accept multiple defs, as long as they are equal. */
11621 if (last_set == NULL || rtx_equal_p (last_set, set))
11622 {
11623 last_i = DF_REF_INSN (d);
11624 last_set = set;
11625 }
11626 else
11627 {
11628 last_i = NULL;
11629 last_set = NULL;
11630 break;
11631 }
11632 }
11633
11634 if (last_set != NULL && last_i != NULL)
11635 return sh_find_base_reg_disp (last_i, XEXP (last_set, 1), disp,
11636 XEXP (last_set, 0));
11637
11638 /* When here, no previous insn was found that sets the reg.
11639 The input reg is already the base reg. */
11640 return base_reg_disp (x, disp);
11641 }
11642
11643 else if (GET_CODE (x) == PLUS)
11644 {
11645 base_reg_disp left_val = sh_find_base_reg_disp (insn, XEXP (x, 0));
11646 base_reg_disp right_val = sh_find_base_reg_disp (insn, XEXP (x, 1));
11647
11648 /* Either left or right val must be a reg.
11649 We don't handle the case of 'reg + reg' here. */
11650 if (left_val.is_reg () && right_val.is_disp ())
11651 return base_reg_disp (left_val.reg (), left_val.disp ()
11652 + right_val.disp () + disp);
11653 else if (right_val.is_reg () && left_val.is_disp ())
11654 return base_reg_disp (right_val.reg (), right_val.disp ()
11655 + left_val.disp () + disp);
11656 else
11657 return base_reg_disp (base_reg, disp);
11658 }
11659
11660 else if (CONST_INT_P (x))
11661 return base_reg_disp (NULL, disp + INTVAL (x));
11662
11663 /* Didn't find anything useful. */
11664 return base_reg_disp (base_reg, disp);
11665 }
11666
11667 /* Given an insn and a memory operand, try to find an equivalent GBR
11668 based memory address and return the corresponding new memory address.
11669 Return NULL_RTX if not found. */
11670 rtx
11671 sh_find_equiv_gbr_addr (rtx_insn* insn, rtx mem)
11672 {
11673 if (!MEM_P (mem) || gbr_address_mem (mem, GET_MODE (mem)))
11674 return NULL_RTX;
11675
11676 /* Leave post/pre inc/dec or any other side effect addresses alone. */
11677 if (side_effects_p (XEXP (mem, 0)))
11678 return NULL_RTX;
11679
11680 /* When not optimizing there might be no dataflow available. */
11681 if (df == NULL)
11682 return NULL_RTX;
11683
11684 base_reg_disp gbr_disp = sh_find_base_reg_disp (insn, XEXP (mem, 0));
11685
11686 if (gbr_disp.is_reg () && REGNO (gbr_disp.reg ()) == GBR_REG)
11687 {
11688 /* If GBR is marked as call clobbered we bail out if we see a call.
11689 FIXME: Actually should check if this mem refers to the gbr value
11690 before or after the call. If there is a store_gbr preceeding this
11691 mem, it's safe to use GBR for this mem.
11692
11693 If GBR is not marked as call clobbered, but there is some other
11694 def than a call, it's probably a load_gbr upon which we also
11695 bail out to be on the safe side.
11696 FIXME: Should check if we have a use-after-def case, such as
11697 the call case above. */
11698 for (df_ref d = DF_REG_DEF_CHAIN (GBR_REG); d != NULL;
11699 d = DF_REF_NEXT_REG (d))
11700 {
11701 if (CALL_P (DF_REF_INSN (d)))
11702 {
11703 if (REGNO_REG_SET_P (regs_invalidated_by_call_regset, GBR_REG))
11704 return NULL_RTX;
11705 else
11706 continue;
11707 }
11708 else
11709 return NULL_RTX;
11710 }
11711
11712 rtx disp = GEN_INT (gbr_disp.disp ());
11713 if (gbr_displacement (disp, GET_MODE (mem)))
11714 return gen_rtx_PLUS (SImode, gen_rtx_REG (SImode, GBR_REG), disp);
11715 }
11716
11717 return NULL_RTX;
11718 }
11719
11720 /*------------------------------------------------------------------------------
11721 Manual insn combine support code.
11722 */
11723
11724 /* Return true if the specified insn contains any UNSPECs or
11725 UNSPEC_VOLATILEs. */
11726 static bool
11727 sh_unspec_insn_p (rtx x)
11728 {
11729 subrtx_iterator::array_type array;
11730 FOR_EACH_SUBRTX (i, array, x, ALL)
11731 if (*i != NULL
11732 && (GET_CODE (*i) == UNSPEC || GET_CODE (*i) == UNSPEC_VOLATILE))
11733 return true;
11734
11735 return false;
11736 }
11737
11738 /* Return true if the register operands of the specified insn are modified
11739 between the specified from and to insns (exclusive of those two). */
11740 bool
11741 sh_insn_operands_modified_between_p (rtx_insn* operands_insn,
11742 const rtx_insn* from,
11743 const rtx_insn* to)
11744 {
11745 /* FIXME: Return true for multiple sets for now. */
11746 rtx s = single_set (operands_insn);
11747 if (s == NULL_RTX)
11748 return true;
11749
11750 subrtx_iterator::array_type array;
11751 FOR_EACH_SUBRTX (i, array, SET_SRC (s), ALL)
11752 if (*i != NULL &&
11753 ((REG_P (*i) || SUBREG_P (*i)) && reg_set_between_p (*i, from, to)))
11754 return true;
11755
11756 return false;
11757 }
11758
11759 /* Given an insn, determine whether it's a 'nott' insn, i.e. an insn that
11760 negates the T bit and stores the result in the T bit. */
11761 bool
11762 sh_is_nott_insn (const rtx_insn* i)
11763 {
11764 return i != NULL && GET_CODE (PATTERN (i)) == SET
11765 && t_reg_operand (XEXP (PATTERN (i), 0), VOIDmode)
11766 && negt_reg_operand (XEXP (PATTERN (i), 1), VOIDmode);
11767 }
11768
11769 rtx
11770 sh_movt_set_dest (const rtx_insn* i)
11771 {
11772 return i == NULL ? NULL : sh_movt_set_dest (PATTERN (i));
11773 }
11774
11775 rtx
11776 sh_movt_set_dest (const_rtx pat)
11777 {
11778 return GET_CODE (pat) == SET
11779 && arith_reg_dest (XEXP (pat, 0), SImode)
11780 && t_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11781 }
11782
11783 /* Given an insn, check whether it's a 'movrt' kind of insn, i.e. an insn
11784 that stores the negated T bit in a register, and return the destination
11785 register rtx, or null. */
11786 rtx
11787 sh_movrt_set_dest (const rtx_insn* i)
11788 {
11789 return i == NULL ? NULL : sh_movrt_set_dest (PATTERN (i));
11790 }
11791
11792 rtx
11793 sh_movrt_set_dest (const_rtx pat)
11794 {
11795 /* The negc movrt replacement is inside a parallel. */
11796 if (GET_CODE (pat) == PARALLEL)
11797 pat = XVECEXP (pat, 0, 0);
11798
11799 return GET_CODE (pat) == SET
11800 && arith_reg_dest (XEXP (pat, 0), SImode)
11801 && negt_reg_operand (XEXP (pat, 1), VOIDmode) ? XEXP (pat, 0) : NULL;
11802
11803 }
11804
11805 /* Given an insn and a reg number, tell whether the reg dies or is unused
11806 after the insn. */
11807 bool
11808 sh_reg_dead_or_unused_after_insn (const rtx_insn* i, int regno)
11809 {
11810 return find_regno_note (i, REG_DEAD, regno) != NULL
11811 || find_regno_note (i, REG_UNUSED, regno) != NULL;
11812 }
11813
11814 /* Given an insn and a reg number, remove reg dead or reg unused notes to
11815 mark it as being used after the insn. */
11816 void
11817 sh_remove_reg_dead_or_unused_notes (rtx_insn* i, int regno)
11818 {
11819 if (rtx n = find_regno_note (i, REG_DEAD, regno))
11820 remove_note (i, n);
11821 if (rtx n = find_regno_note (i, REG_UNUSED, regno))
11822 remove_note (i, n);
11823 }
11824
11825 /* Given an insn check if it contains any post/pre inc/dec mem operands and
11826 add the REG_INC notes accordingly.
11827 FIXME: This function is very similar to lra.c (add_auto_inc_notes).
11828 FIXME: This function is currently used by peephole2 patterns because
11829 the peephole2 pass does not preserve REG_INC notes. If the notes
11830 are dropped the following passes will do wrong things. */
11831 rtx_insn*
11832 sh_check_add_incdec_notes (rtx_insn* i)
11833 {
11834 struct for_each_inc_dec_clb
11835 {
11836 static int func (rtx mem ATTRIBUTE_UNUSED, rtx op ATTRIBUTE_UNUSED,
11837 rtx dest, rtx src ATTRIBUTE_UNUSED,
11838 rtx srcoff ATTRIBUTE_UNUSED, void* arg)
11839 {
11840 gcc_assert (REG_P (dest));
11841
11842 rtx_insn* i = (rtx_insn*)arg;
11843 if (find_regno_note (i, REG_INC, REGNO (dest)) == NULL)
11844 add_reg_note (i, REG_INC, dest);
11845
11846 return 0;
11847 }
11848 };
11849
11850 for_each_inc_dec (PATTERN (i), for_each_inc_dec_clb::func, i);
11851 return i;
11852 }
11853
11854 /* Given a move insn destiation and a source, make sure that the move source
11855 operand is not a post-inc mem load with the same address reg as the
11856 destination. Returns the modified source operand with the post-inc removed
11857 if necessary. */
11858 rtx
11859 sh_remove_overlapping_post_inc (rtx dst, rtx src)
11860 {
11861 if (!MEM_P (src))
11862 return src;
11863
11864 rtx addr = XEXP (src, 0);
11865
11866 if (GET_CODE (addr) == POST_INC
11867 && reg_overlap_mentioned_p (XEXP (addr, 0), dst))
11868 return replace_equiv_address (src, XEXP (addr, 0));
11869
11870 gcc_assert (GET_CODE (addr) != POST_MODIFY);
11871 return src;
11872 }
11873
11874 /* Emit a move insn that is safe to be used in peephole patterns. */
11875 rtx_insn*
11876 sh_peephole_emit_move_insn (rtx dst, rtx src)
11877 {
11878 return sh_check_add_incdec_notes (
11879 emit_move_insn (dst, sh_remove_overlapping_post_inc (dst, src)));
11880 }
11881
11882 /* Given an op rtx and an insn, try to find out whether the result of the
11883 specified op consists only of logical operations on T bit stores. */
11884 bool
11885 sh_is_logical_t_store_expr (rtx op, rtx_insn* insn)
11886 {
11887 if (!logical_operator (op, SImode))
11888 return false;
11889
11890 rtx ops[2] = { XEXP (op, 0), XEXP (op, 1) };
11891 int op_is_t_count = 0;
11892
11893 for (int i = 0; i < 2; ++i)
11894 {
11895 if (t_reg_operand (ops[i], VOIDmode)
11896 || negt_reg_operand (ops[i], VOIDmode))
11897 op_is_t_count++;
11898
11899 else
11900 {
11901 set_of_reg op_set = sh_find_set_of_reg
11902 (ops[i], insn, prev_nonnote_nondebug_insn_bb);
11903 if (op_set.set_src == NULL_RTX)
11904 continue;
11905
11906 if (t_reg_operand (op_set.set_src, VOIDmode)
11907 || negt_reg_operand (op_set.set_src, VOIDmode)
11908 || sh_is_logical_t_store_expr (op_set.set_src, op_set.insn))
11909 op_is_t_count++;
11910 }
11911 }
11912
11913 return op_is_t_count == 2;
11914 }
11915
11916 /* Given the operand that is extended in a sign/zero extend insn, and the
11917 insn, try to figure out whether the sign/zero extension can be replaced
11918 by a simple reg-reg copy. If so, the replacement reg rtx is returned,
11919 NULL_RTX otherwise. */
11920 rtx
11921 sh_try_omit_signzero_extend (rtx extended_op, rtx_insn* insn)
11922 {
11923 if (REG_P (extended_op))
11924 extended_op = extended_op;
11925 else if (GET_CODE (extended_op) == SUBREG && REG_P (SUBREG_REG (extended_op)))
11926 extended_op = SUBREG_REG (extended_op);
11927 else
11928 return NULL_RTX;
11929
11930 /* Reg moves must be of the same mode. */
11931 if (GET_MODE (extended_op) != SImode)
11932 return NULL_RTX;
11933
11934 set_of_reg s = sh_find_set_of_reg (extended_op, insn,
11935 prev_nonnote_nondebug_insn_bb);
11936 if (s.set_src == NULL_RTX)
11937 return NULL_RTX;
11938
11939 if (t_reg_operand (s.set_src, VOIDmode)
11940 || negt_reg_operand (s.set_src, VOIDmode))
11941 return extended_op;
11942
11943 /* If the zero extended reg was formed by a logical operation, check the
11944 operands of the logical operation. If both originated from T bit
11945 stores the zero extension can be eliminated. */
11946 else if (sh_is_logical_t_store_expr (s.set_src, s.insn))
11947 return extended_op;
11948
11949 return NULL_RTX;
11950 }
11951
11952 /* Given the current insn, which is assumed to be a movrt_negc insn, try to
11953 figure out whether it should be converted into a movt-xor sequence in
11954 the movrt_negc splitter.
11955 Returns true if insns have been modified and the splitter has succeeded. */
11956 bool
11957 sh_split_movrt_negc_to_movt_xor (rtx_insn* curr_insn, rtx operands[])
11958 {
11959 /* In cases such as
11960 tst r4,r4
11961 mov #-1,r1
11962 negc r1,r1
11963 tst r4,r4
11964 we can replace the T bit clobbering negc with a movt-xor sequence and
11965 eliminate the redundant comparison.
11966 Because the xor insn depends on register allocation results, allow this
11967 only before reload. */
11968 if (!can_create_pseudo_p ())
11969 return false;
11970
11971 set_of_reg t_before_negc = sh_find_set_of_reg
11972 (get_t_reg_rtx (), curr_insn, prev_nonnote_nondebug_insn_bb);
11973 set_of_reg t_after_negc = sh_find_set_of_reg
11974 (get_t_reg_rtx (), curr_insn, next_nonnote_nondebug_insn_bb);
11975
11976 if (t_before_negc.set_rtx != NULL_RTX && t_after_negc.set_rtx != NULL_RTX
11977 && rtx_equal_p (t_before_negc.set_rtx, t_after_negc.set_rtx)
11978 && !reg_used_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11979 && !sh_insn_operands_modified_between_p (t_before_negc.insn,
11980 t_before_negc.insn,
11981 t_after_negc.insn)
11982 && !modified_between_p (get_t_reg_rtx (), curr_insn, t_after_negc.insn)
11983 && !sh_unspec_insn_p (t_after_negc.insn)
11984 && !volatile_insn_p (PATTERN (t_after_negc.insn))
11985 && !side_effects_p (PATTERN (t_after_negc.insn))
11986 && !may_trap_or_fault_p (PATTERN (t_after_negc.insn)))
11987 {
11988 emit_insn (gen_movrt_xor (operands[0], get_t_reg_rtx ()));
11989 set_insn_deleted (t_after_negc.insn);
11990 return true;
11991 }
11992 else
11993 return false;
11994 }
11995
11996 /* Given a reg and the current insn, see if the value of the reg originated
11997 from a sign or zero extension and return the discovered information. */
11998 sh_extending_set_of_reg
11999 sh_find_extending_set_of_reg (rtx reg, rtx_insn* curr_insn)
12000 {
12001 if (reg == NULL)
12002 return sh_extending_set_of_reg (curr_insn);
12003
12004 if (SUBREG_P (reg))
12005 reg = SUBREG_REG (reg);
12006
12007 if (!REG_P (reg))
12008 return sh_extending_set_of_reg (curr_insn);
12009
12010 /* FIXME: Also search the predecessor basic blocks. It seems that checking
12011 only the adjacent predecessor blocks would cover most of the cases.
12012 Also try to look through the first extension that we hit. There are some
12013 cases, where a zero_extend is followed an (implicit) sign_extend, and it
12014 fails to see the sign_extend. */
12015 sh_extending_set_of_reg result = sh_find_set_of_reg
12016 (reg, curr_insn, prev_nonnote_nondebug_insn_bb, true);
12017
12018 if (result.set_src != NULL)
12019 {
12020 if (GET_CODE (result.set_src) == SIGN_EXTEND
12021 || GET_CODE (result.set_src) == ZERO_EXTEND)
12022 {
12023 if (dump_file)
12024 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12025 "explicitly sign/zero extended in insn %d\n",
12026 REGNO (reg), INSN_UID (result.insn));
12027 result.from_mode = GET_MODE (XEXP (result.set_src, 0));
12028 result.ext_code = GET_CODE (result.set_src);
12029 }
12030 else if (MEM_P (result.set_src)
12031 && (GET_MODE (result.set_src) == QImode
12032 || GET_MODE (result.set_src) == HImode)
12033 && !sh_unspec_insn_p (result.insn))
12034 {
12035 /* On SH QIHImode memory loads always sign extend. However, in
12036 some cases where it seems that the higher bits are not
12037 interesting, the loads will not be expanded as sign extending
12038 insns, but as QIHImode loads into QIHImode regs. We report that
12039 the reg has been sign extended by the mem load. When it is used
12040 as such, we must convert the mem load into a sign extending insn,
12041 see also sh_extending_set_of_reg::use_as_extended_reg. */
12042 if (dump_file)
12043 fprintf (dump_file, "sh_find_extending_set_of_reg: reg %d is "
12044 "implicitly sign extended in insn %d\n",
12045 REGNO (reg), INSN_UID (result.insn));
12046 result.from_mode = GET_MODE (result.set_src);
12047 result.ext_code = SIGN_EXTEND;
12048 }
12049 }
12050
12051 return result;
12052 }
12053
12054 /* Given a reg that is known to be sign or zero extended at some insn,
12055 take the appropriate measures so that the extended value can be used as
12056 a reg at the specified insn and return the resulting reg rtx. */
12057 rtx
12058 sh_extending_set_of_reg::use_as_extended_reg (rtx_insn* use_at_insn) const
12059 {
12060 gcc_assert (insn != NULL && set_src != NULL && set_rtx != NULL);
12061 gcc_assert (ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND);
12062 gcc_assert (from_mode == QImode || from_mode == HImode);
12063
12064 if (MEM_P (set_src) && ext_code == SIGN_EXTEND)
12065 {
12066 if (dump_file)
12067 fprintf (dump_file,
12068 "use_as_extended_reg: converting non-extending mem load in "
12069 "insn %d into sign-extending load\n", INSN_UID (insn));
12070
12071 rtx r = gen_reg_rtx (SImode);
12072 rtx_insn* i0;
12073 if (from_mode == QImode)
12074 i0 = emit_insn_after (gen_extendqisi2 (r, set_src), insn);
12075 else if (from_mode == HImode)
12076 i0 = emit_insn_after (gen_extendhisi2 (r, set_src), insn);
12077 else
12078 gcc_unreachable ();
12079
12080 emit_insn_after (
12081 gen_move_insn (XEXP (set_rtx, 0),
12082 gen_lowpart (GET_MODE (set_src), r)), i0);
12083 set_insn_deleted (insn);
12084 return r;
12085 }
12086 else
12087 {
12088 rtx extension_dst = XEXP (set_rtx, 0);
12089 if (GET_MODE (extension_dst) != SImode)
12090 extension_dst = simplify_gen_subreg (SImode, extension_dst,
12091 GET_MODE (extension_dst), 0);
12092 if (modified_between_p (extension_dst, insn, use_at_insn))
12093 {
12094 if (dump_file)
12095 fprintf (dump_file,
12096 "use_as_extended_reg: dest reg %d of extending insn %d is "
12097 "modified, inserting a reg-reg copy\n",
12098 REGNO (extension_dst), INSN_UID (insn));
12099
12100 rtx r = gen_reg_rtx (SImode);
12101 emit_insn_after (gen_move_insn (r, extension_dst), insn);
12102 return r;
12103 }
12104 else
12105 {
12106 sh_remove_reg_dead_or_unused_notes (insn, REGNO (extension_dst));
12107 return extension_dst;
12108 }
12109 }
12110 }
12111
12112 bool
12113 sh_extending_set_of_reg::can_use_as_unextended_reg (void) const
12114 {
12115 if ((ext_code == SIGN_EXTEND || ext_code == ZERO_EXTEND)
12116 && (from_mode == QImode || from_mode == HImode)
12117 && set_src != NULL)
12118 return arith_reg_operand (XEXP (set_src, 0), from_mode);
12119 else
12120 return false;
12121 }
12122
12123 rtx
12124 sh_extending_set_of_reg::use_as_unextended_reg (rtx_insn* use_at_insn) const
12125 {
12126 gcc_assert (can_use_as_unextended_reg ());
12127
12128 rtx r = XEXP (set_src, 0);
12129 rtx r0 = simplify_gen_subreg (SImode, r, from_mode, 0);
12130
12131 if (modified_between_p (r, insn, use_at_insn))
12132 {
12133 rtx r1 = gen_reg_rtx (SImode);
12134 emit_insn_after (gen_move_insn (r1, r0), insn);
12135 return r1;
12136 }
12137 else
12138 {
12139 sh_remove_reg_dead_or_unused_notes (insn, SUBREG_P (r)
12140 ? REGNO (SUBREG_REG (r))
12141 : REGNO (r));
12142 return r0;
12143 }
12144 }
12145
12146 /* Given the current insn, which is assumed to be the *tst<mode>_t_subregs insn,
12147 perform the necessary checks on the operands and split it accordingly. */
12148 void
12149 sh_split_tst_subregs (rtx_insn* curr_insn, machine_mode subreg_mode,
12150 int subreg_offset, rtx operands[])
12151 {
12152 gcc_assert (subreg_mode == QImode || subreg_mode == HImode);
12153
12154 sh_extending_set_of_reg eop0 = sh_find_extending_set_of_reg (operands[0],
12155 curr_insn);
12156 sh_extending_set_of_reg eop1 = sh_find_extending_set_of_reg (operands[1],
12157 curr_insn);
12158
12159 /* If one of the operands is known to be zero extended, that's already
12160 sufficient to mask out the unwanted high bits. */
12161 if (eop0.ext_code == ZERO_EXTEND && eop0.from_mode == subreg_mode)
12162 {
12163 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12164 operands[1]));
12165 return;
12166 }
12167 if (eop1.ext_code == ZERO_EXTEND && eop1.from_mode == subreg_mode)
12168 {
12169 emit_insn (gen_tstsi_t (operands[0],
12170 eop1.use_as_extended_reg (curr_insn)));
12171 return;
12172 }
12173
12174 /* None of the operands seem to be zero extended.
12175 If both are sign extended it's OK, too. */
12176 if (eop0.ext_code == SIGN_EXTEND && eop1.ext_code == SIGN_EXTEND
12177 && eop0.from_mode == subreg_mode && eop1.from_mode == subreg_mode)
12178 {
12179 emit_insn (gen_tstsi_t (eop0.use_as_extended_reg (curr_insn),
12180 eop1.use_as_extended_reg (curr_insn)));
12181 return;
12182 }
12183
12184 /* Otherwise we have to insert a zero extension on one of the operands to
12185 mask out the unwanted high bits.
12186 Prefer the operand that has no known extension. */
12187 if (eop0.ext_code != UNKNOWN && eop1.ext_code == UNKNOWN)
12188 std::swap (operands[0], operands[1]);
12189
12190 rtx tmp0 = gen_reg_rtx (SImode);
12191 rtx tmp1 = simplify_gen_subreg (subreg_mode, operands[0],
12192 GET_MODE (operands[0]), subreg_offset);
12193 emit_insn (subreg_mode == QImode
12194 ? gen_zero_extendqisi2 (tmp0, tmp1)
12195 : gen_zero_extendhisi2 (tmp0, tmp1));
12196 emit_insn (gen_tstsi_t (tmp0, operands[1]));
12197 }
12198
12199 /* A helper class to increment/decrement a counter variable each time a
12200 function is entered/left. */
12201 class scope_counter
12202 {
12203 public:
12204 scope_counter (int& counter) : m_counter (counter) { ++m_counter; }
12205
12206 ~scope_counter (void)
12207 {
12208 --m_counter;
12209 gcc_assert (m_counter >= 0);
12210 }
12211
12212 int count (void) const { return m_counter; }
12213
12214 private:
12215 int& m_counter;
12216 };
12217
12218 /* Given an rtx x, determine whether the expression can be used to create
12219 an insn that calulates x and stores the result in the T bit.
12220 This is used by the 'treg_set_expr' predicate to construct insns sequences
12221 where T bit results are fed into other insns, such as addc, subc, negc
12222 insns.
12223
12224 FIXME: The patterns that expand 'treg_set_expr' operands tend to
12225 distinguish between 'positive' and 'negative' forms. For now this has to
12226 be done in the preparation code. We could also introduce
12227 'pos_treg_set_expr' and 'neg_treg_set_expr' predicates for that and write
12228 two different patterns for the 'postive' and 'negative' forms. However,
12229 the total amount of lines of code seems to be about the same and the
12230 '{pos|neg}_treg_set_expr' predicates would be more expensive, because the
12231 recog function would need to look inside the expression by temporarily
12232 splitting it. */
12233 static int sh_recog_treg_set_expr_reent_count = 0;
12234
12235 bool
12236 sh_recog_treg_set_expr (rtx op, machine_mode mode)
12237 {
12238 scope_counter recursion (sh_recog_treg_set_expr_reent_count);
12239
12240 /* Limit the recursion count to avoid nested expressions which we can't
12241 resolve to a single treg set insn. */
12242 if (recursion.count () > 1)
12243 return false;
12244
12245 /* Early accept known possible operands before doing recog. */
12246 if (op == const0_rtx || op == const1_rtx || t_reg_operand (op, mode)
12247 || negt_reg_operand (op, mode))
12248 return true;
12249
12250 /* Early reject impossible operands before doing recog.
12251 There are some (set ((t) (subreg ...))) patterns, but we must be careful
12252 not to allow any invalid reg-reg or mem-reg moves, or else other passes
12253 such as lower-subreg will bail out. Some insns such as SH4A movua are
12254 done with UNSPEC, so must reject those, too, or else it would result
12255 in an invalid reg -> treg move. */
12256 if (CONST_INT_P (op) || register_operand (op, mode)
12257 || memory_operand (op, mode) || sh_unspec_insn_p (op))
12258 return false;
12259
12260 if (!can_create_pseudo_p ())
12261 return false;
12262
12263 /* expand_debug_locations may call this to compute rtx costs at
12264 very early stage. In that case, don't make new insns here to
12265 avoid codegen differences with -g. */
12266 if (currently_expanding_to_rtl)
12267 return false;
12268
12269 /* We are going to invoke recog in a re-entrant way and thus
12270 have to capture its current state and restore it afterwards. */
12271 recog_data_d prev_recog_data = recog_data;
12272
12273 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), op));
12274 SET_PREV_INSN (i) = NULL;
12275 SET_NEXT_INSN (i) = NULL;
12276
12277 /* If the comparison op doesn't have a result mode, set it to SImode. */
12278 machine_mode prev_op_mode = GET_MODE (op);
12279 if (COMPARISON_P (op) && prev_op_mode == VOIDmode)
12280 PUT_MODE (op, SImode);
12281
12282 int result = recog (PATTERN (i), i, 0);
12283
12284 /* It seems there is no insn like that. Create a negated version and
12285 try again. If we hit a negated form, we'll allow that and append a
12286 nott sequence when splitting out the insns. Insns that do the split
12287 can then remove the trailing nott if they know how to deal with it. */
12288 if (result < 0 && COMPARISON_P (op))
12289 {
12290 machine_mode cmp_mode = GET_MODE (XEXP (op, 0));
12291 if (cmp_mode == VOIDmode)
12292 cmp_mode = GET_MODE (XEXP (op, 1));
12293
12294 rtx_code prev_code = GET_CODE (op);
12295 PUT_CODE (op, reverse_condition (GET_CODE (op)));
12296 result = recog (PATTERN (i), i, 0);
12297 PUT_CODE (op, prev_code);
12298 }
12299
12300 PUT_MODE (op, prev_op_mode);
12301 recog_data = prev_recog_data;
12302 return result >= 0;
12303 }
12304
12305 /* Returns true when recog of a 'treg_set_expr' is currently in progress.
12306 This can be used as a condition for insn/split patterns to allow certain
12307 T bit setting patters only to be matched as sub expressions of other
12308 patterns. */
12309 bool
12310 sh_in_recog_treg_set_expr (void)
12311 {
12312 return sh_recog_treg_set_expr_reent_count > 0;
12313 }
12314
12315 /* Given an rtx x, which is assumed to be some expression that has been
12316 matched by the 'treg_set_expr' predicate before, split and emit the
12317 insns that are necessary to calculate the expression and store the result
12318 in the T bit.
12319 The splitting is done recursively similar to 'try_split' in emit-rt.c.
12320 Unfortunately we can't use 'try_split' here directly, as it tries to invoke
12321 'delete_insn' which then causes the DF parts to bail out, because we
12322 currently are inside another gen_split* function and would invoke
12323 'try_split' in a reentrant way. */
12324 static std::pair<rtx_insn*, rtx_insn*>
12325 sh_try_split_insn_simple (rtx_insn* i, rtx_insn* curr_insn, int n = 0)
12326 {
12327 if (dump_file)
12328 {
12329 fprintf (dump_file, "sh_try_split_insn_simple n = %d i = \n", n);
12330 print_rtl_single (dump_file, i);
12331 fprintf (dump_file, "\n");
12332 }
12333
12334 rtx_insn* seq = split_insns (PATTERN (i), curr_insn);
12335
12336 if (seq == NULL)
12337 return std::make_pair (i, i);
12338
12339 /* Avoid infinite splitter loops if any insn of the result matches
12340 the original pattern. */
12341 for (rtx_insn* s = seq; s != NULL; s = NEXT_INSN (s))
12342 if (INSN_P (s) && rtx_equal_p (PATTERN (s), PATTERN (i)))
12343 return std::make_pair (i, i);
12344
12345 unshare_all_rtl_in_chain (seq);
12346
12347 /* 'seq' is now a replacement for 'i'. Assuming that 'i' is an insn in
12348 a linked list, replace the single insn with the new insns. */
12349 rtx_insn* seqlast = seq;
12350 while (NEXT_INSN (seqlast) != NULL)
12351 seqlast = NEXT_INSN (seqlast);
12352
12353 if (rtx_insn* iprev = PREV_INSN (i))
12354 SET_NEXT_INSN (iprev) = seq;
12355 if (rtx_insn* inext = NEXT_INSN (i))
12356 SET_PREV_INSN (inext) = seqlast;
12357
12358 SET_PREV_INSN (seq) = PREV_INSN (i);
12359 SET_NEXT_INSN (seqlast) = NEXT_INSN (i);
12360
12361 SET_PREV_INSN (i) = NULL;
12362 SET_NEXT_INSN (i) = NULL;
12363
12364 /* Recursively split all insns. */
12365 for (i = seq; ; i = NEXT_INSN (i))
12366 {
12367 std::pair<rtx_insn*, rtx_insn*> ii =
12368 sh_try_split_insn_simple (i, curr_insn, n + 1);
12369 if (i == seq)
12370 seq = ii.first;
12371 if (i == seqlast)
12372 {
12373 seqlast = ii.second;
12374 break;
12375 }
12376 i = ii.first;
12377 }
12378
12379 return std::make_pair (seq, seqlast);
12380 }
12381
12382 sh_treg_insns
12383 sh_split_treg_set_expr (rtx x, rtx_insn* curr_insn)
12384 {
12385 if (t_reg_operand (x, VOIDmode))
12386 return sh_treg_insns ();
12387
12388 scope_counter in_treg_set_expr (sh_recog_treg_set_expr_reent_count);
12389
12390 rtx_insn* i = make_insn_raw (gen_rtx_SET (get_t_reg_rtx (), x));
12391 SET_PREV_INSN (i) = NULL;
12392 SET_NEXT_INSN (i) = NULL;
12393
12394 if (dump_file)
12395 {
12396 fprintf (dump_file, "split_treg_set_expr insn:\n");
12397 print_rtl (dump_file, i);
12398 fprintf (dump_file, "\n");
12399 }
12400
12401 /* If the insn is not found, we will try a negated form and append
12402 a nott. */
12403 bool append_nott = false;
12404
12405 /* We are going to invoke recog/split_insns in a re-entrant way and thus
12406 have to capture its current state and restore it afterwards. */
12407 recog_data_d prev_recog_data = recog_data;
12408
12409 if (negt_reg_operand (x, GET_MODE (x)))
12410 {
12411 /* This is a normal movt followed by a nott. It will be converted
12412 into a movrt after initial expansion. */
12413 XEXP (PATTERN (i), 1) = get_t_reg_rtx ();
12414 append_nott = true;
12415 }
12416 else
12417 {
12418 /* If the comparison op doesn't have a mode set, set it to SImode. */
12419 if (COMPARISON_P (x) && GET_MODE (x) == VOIDmode)
12420 PUT_MODE (x, SImode);
12421
12422 int insn_code = recog (PATTERN (i), i, 0);
12423
12424 if (insn_code < 0 && COMPARISON_P (x))
12425 {
12426 machine_mode cmp_mode = GET_MODE (XEXP (x, 0));
12427 if (cmp_mode == VOIDmode)
12428 cmp_mode = GET_MODE (XEXP (x, 1));
12429
12430 PUT_CODE (x, reverse_condition (GET_CODE (x)));
12431 insn_code = recog (PATTERN (i), i, 0);
12432 append_nott = true;
12433 }
12434
12435 gcc_assert (insn_code >= 0);
12436 }
12437
12438 /* Try to recursively split the insn. Some insns might refuse to split
12439 any further while we are in the treg_set_expr splitting phase. They
12440 will be emitted as part of the outer insn and then split again. */
12441 std::pair<rtx_insn*, rtx_insn*> insnlist =
12442 sh_try_split_insn_simple (i, curr_insn);
12443
12444 /* Restore recog state. */
12445 recog_data = prev_recog_data;
12446
12447 rtx_insn* nott_insn = sh_is_nott_insn (insnlist.second)
12448 ? insnlist.second
12449 : NULL;
12450 if (dump_file)
12451 {
12452 fprintf (dump_file, "split_treg_set_expr insnlist:\n");
12453 print_rtl (dump_file, insnlist.first);
12454 fprintf (dump_file, "\n");
12455
12456 if (nott_insn != NULL)
12457 fprintf (dump_file, "trailing nott insn %d\n", INSN_UID (nott_insn));
12458 }
12459
12460 emit_insn (insnlist.first);
12461
12462 if (nott_insn != NULL && append_nott)
12463 {
12464 if (dump_file)
12465 fprintf (dump_file, "removing trailing nott\n");
12466 remove_insn (nott_insn);
12467 nott_insn = NULL;
12468 append_nott = false;
12469 }
12470
12471 if (append_nott)
12472 nott_insn = emit_insn (gen_nott (get_t_reg_rtx ()));
12473
12474 rtx_insn* first_insn = get_insns ();
12475
12476 if (dump_file)
12477 {
12478 fprintf (dump_file, "resulting insns:\n");
12479 print_rtl (dump_file, first_insn);
12480 fprintf (dump_file, "\n");
12481 }
12482
12483 return sh_treg_insns (first_insn, nott_insn);
12484 }
12485
12486 /*------------------------------------------------------------------------------
12487 Mode switching support code.
12488 */
12489
12490 static void
12491 sh_emit_mode_set (int entity ATTRIBUTE_UNUSED, int mode,
12492 int prev_mode, HARD_REG_SET regs_live ATTRIBUTE_UNUSED)
12493 {
12494 if ((TARGET_SH4A_FP || TARGET_SH4_300)
12495 && prev_mode != FP_MODE_NONE && prev_mode != mode)
12496 {
12497 emit_insn (gen_toggle_pr ());
12498 if (TARGET_FMOVD)
12499 emit_insn (gen_toggle_sz ());
12500 }
12501 else if (mode != FP_MODE_NONE)
12502 {
12503 rtx tmp = gen_reg_rtx (SImode);
12504 emit_insn (gen_sts_fpscr (tmp));
12505 rtx i = NULL;
12506
12507 const unsigned HOST_WIDE_INT fpbits =
12508 TARGET_FMOVD ? (FPSCR_PR | FPSCR_SZ) : FPSCR_PR;
12509
12510 if (prev_mode != FP_MODE_NONE && prev_mode != mode)
12511 i = gen_xorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12512 else if (mode == FP_MODE_SINGLE)
12513 i = gen_andsi3 (tmp, tmp, force_reg (SImode, GEN_INT (~fpbits)));
12514 else if (mode == FP_MODE_DOUBLE)
12515 i = gen_iorsi3 (tmp, tmp, force_reg (SImode, GEN_INT (fpbits)));
12516 else
12517 gcc_unreachable ();
12518
12519 emit_insn (i);
12520 emit_insn (gen_lds_fpscr (tmp));
12521 }
12522 }
12523
12524 static int
12525 sh_mode_needed (int entity ATTRIBUTE_UNUSED, rtx_insn *insn)
12526 {
12527 return recog_memoized (insn) >= 0 ? get_attr_fp_mode (insn) : FP_MODE_NONE;
12528 }
12529
12530 static int
12531 sh_mode_after (int entity ATTRIBUTE_UNUSED, int mode, rtx_insn *insn)
12532 {
12533 if (TARGET_HITACHI && recog_memoized (insn) >= 0 &&
12534 get_attr_fp_set (insn) != FP_SET_NONE)
12535 return (int) get_attr_fp_set (insn);
12536 else
12537 return mode;
12538 }
12539
12540 static int
12541 sh_mode_entry (int entity ATTRIBUTE_UNUSED)
12542 {
12543 return NORMAL_MODE (entity);
12544 }
12545
12546 static int
12547 sh_mode_exit (int entity ATTRIBUTE_UNUSED)
12548 {
12549 return sh_cfun_attr_renesas_p () ? FP_MODE_NONE : NORMAL_MODE (entity);
12550 }
12551
12552 static int
12553 sh_mode_priority (int entity ATTRIBUTE_UNUSED, int n)
12554 {
12555 return ((TARGET_FPU_SINGLE != 0) ^ (n) ? FP_MODE_SINGLE : FP_MODE_DOUBLE);
12556 }
12557
12558 /*------------------------------------------------------------------------------
12559 Misc
12560 */
12561
12562 /* Return true if we use LRA instead of reload pass. */
12563 bool
12564 sh_lra_p (void)
12565 {
12566 return sh_lra_flag;
12567 }
12568
12569 /* Implement TARGET_USE_BY_PIECES_INFRASTRUCTURE_P. */
12570
12571 static bool
12572 sh_use_by_pieces_infrastructure_p (unsigned HOST_WIDE_INT size,
12573 unsigned int align,
12574 enum by_pieces_operation op,
12575 bool speed_p)
12576 {
12577 switch (op)
12578 {
12579 case MOVE_BY_PIECES:
12580 return by_pieces_ninsns (size, align, MOVE_MAX_PIECES + 1, op)
12581 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12582 case STORE_BY_PIECES:
12583 case SET_BY_PIECES:
12584 return by_pieces_ninsns (size, align, STORE_MAX_PIECES + 1, op)
12585 < (!speed_p ? 2 : (align >= 32) ? 16 : 2);
12586 default:
12587 return default_use_by_pieces_infrastructure_p (size, align,
12588 op, speed_p);
12589 }
12590 }
12591
12592 bool
12593 sh_cannot_force_const_mem_p (machine_mode mode ATTRIBUTE_UNUSED,
12594 rtx x ATTRIBUTE_UNUSED)
12595 {
12596 return TARGET_FDPIC;
12597 }
12598
12599 /* Emit insns to load the function address from FUNCDESC (an FDPIC
12600 function descriptor) into r1 and the GOT address into r12,
12601 returning an rtx for r1. */
12602
12603 rtx
12604 sh_load_function_descriptor (rtx funcdesc)
12605 {
12606 rtx r1 = gen_rtx_REG (Pmode, R1_REG);
12607 rtx pic_reg = gen_rtx_REG (Pmode, PIC_REG);
12608 rtx fnaddr = gen_rtx_MEM (Pmode, funcdesc);
12609 rtx gotaddr = gen_rtx_MEM (Pmode, plus_constant (Pmode, funcdesc, 4));
12610
12611 emit_move_insn (r1, fnaddr);
12612 /* The ABI requires the entry point address to be loaded first, so
12613 prevent the load from being moved after that of the GOT
12614 address. */
12615 emit_insn (gen_blockage ());
12616 emit_move_insn (pic_reg, gotaddr);
12617 return r1;
12618 }
12619
12620 /* Return an rtx holding the initial value of the FDPIC register (the
12621 FDPIC pointer passed in from the caller). */
12622
12623 rtx
12624 sh_get_fdpic_reg_initial_val (void)
12625 {
12626 return get_hard_reg_initial_val (Pmode, PIC_REG);
12627 }
12628
12629 #include "gt-sh.h"