]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
PR c/81544 - attribute noreturn and warn_unused_result on the same function accepted
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #define INCLUDE_STRING
25 #include "system.h"
26 #include "coretypes.h"
27 #include "backend.h"
28 #include "target.h"
29 #include "rtl.h"
30 #include "tree.h"
31 #include "memmodel.h"
32 #include "cfghooks.h"
33 #include "df.h"
34 #include "tm_p.h"
35 #include "stringpool.h"
36 #include "attribs.h"
37 #include "optabs.h"
38 #include "regs.h"
39 #include "emit-rtl.h"
40 #include "recog.h"
41 #include "cgraph.h"
42 #include "diagnostic-core.h"
43 #include "alias.h"
44 #include "fold-const.h"
45 #include "stor-layout.h"
46 #include "calls.h"
47 #include "varasm.h"
48 #include "output.h"
49 #include "insn-attr.h"
50 #include "flags.h"
51 #include "reload.h"
52 #include "explow.h"
53 #include "expr.h"
54 #include "cfgrtl.h"
55 #include "sched-int.h"
56 #include "common/common-target.h"
57 #include "langhooks.h"
58 #include "intl.h"
59 #include "libfuncs.h"
60 #include "params.h"
61 #include "opts.h"
62 #include "dumpfile.h"
63 #include "target-globals.h"
64 #include "builtins.h"
65 #include "tm-constrs.h"
66 #include "rtl-iter.h"
67 #include "optabs-libfuncs.h"
68 #include "gimplify.h"
69 #include "gimple.h"
70 #include "selftest.h"
71
72 /* This file should be included last. */
73 #include "target-def.h"
74
75 /* Forward definitions of types. */
76 typedef struct minipool_node Mnode;
77 typedef struct minipool_fixup Mfix;
78
79 void (*arm_lang_output_object_attributes_hook)(void);
80
81 struct four_ints
82 {
83 int i[4];
84 };
85
86 /* Forward function declarations. */
87 static bool arm_const_not_ok_for_debug_p (rtx);
88 static int arm_needs_doubleword_align (machine_mode, const_tree);
89 static int arm_compute_static_chain_stack_bytes (void);
90 static arm_stack_offsets *arm_get_frame_offsets (void);
91 static void arm_compute_frame_layout (void);
92 static void arm_add_gc_roots (void);
93 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
94 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
95 static unsigned bit_count (unsigned long);
96 static unsigned bitmap_popcount (const sbitmap);
97 static int arm_address_register_rtx_p (rtx, int);
98 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
99 static bool is_called_in_ARM_mode (tree);
100 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
101 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
102 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
103 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
104 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
105 inline static int thumb1_index_register_rtx_p (rtx, int);
106 static int thumb_far_jump_used_p (void);
107 static bool thumb_force_lr_save (void);
108 static unsigned arm_size_return_regs (void);
109 static bool arm_assemble_integer (rtx, unsigned int, int);
110 static void arm_print_operand (FILE *, rtx, int);
111 static void arm_print_operand_address (FILE *, machine_mode, rtx);
112 static bool arm_print_operand_punct_valid_p (unsigned char code);
113 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
114 static arm_cc get_arm_condition_code (rtx);
115 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
116 static const char *output_multi_immediate (rtx *, const char *, const char *,
117 int, HOST_WIDE_INT);
118 static const char *shift_op (rtx, HOST_WIDE_INT *);
119 static struct machine_function *arm_init_machine_status (void);
120 static void thumb_exit (FILE *, int);
121 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
122 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
123 static Mnode *add_minipool_forward_ref (Mfix *);
124 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
125 static Mnode *add_minipool_backward_ref (Mfix *);
126 static void assign_minipool_offsets (Mfix *);
127 static void arm_print_value (FILE *, rtx);
128 static void dump_minipool (rtx_insn *);
129 static int arm_barrier_cost (rtx_insn *);
130 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
131 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
132 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
133 machine_mode, rtx);
134 static void arm_reorg (void);
135 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
136 static unsigned long arm_compute_save_reg0_reg12_mask (void);
137 static unsigned long arm_compute_save_core_reg_mask (void);
138 static unsigned long arm_isr_value (tree);
139 static unsigned long arm_compute_func_type (void);
140 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
142 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
143 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
144 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
145 #endif
146 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
147 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
148 static void arm_output_function_epilogue (FILE *);
149 static void arm_output_function_prologue (FILE *);
150 static int arm_comp_type_attributes (const_tree, const_tree);
151 static void arm_set_default_type_attributes (tree);
152 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
153 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
154 static int optimal_immediate_sequence (enum rtx_code code,
155 unsigned HOST_WIDE_INT val,
156 struct four_ints *return_sequence);
157 static int optimal_immediate_sequence_1 (enum rtx_code code,
158 unsigned HOST_WIDE_INT val,
159 struct four_ints *return_sequence,
160 int i);
161 static int arm_get_strip_length (int);
162 static bool arm_function_ok_for_sibcall (tree, tree);
163 static machine_mode arm_promote_function_mode (const_tree,
164 machine_mode, int *,
165 const_tree, int);
166 static bool arm_return_in_memory (const_tree, const_tree);
167 static rtx arm_function_value (const_tree, const_tree, bool);
168 static rtx arm_libcall_value_1 (machine_mode);
169 static rtx arm_libcall_value (machine_mode, const_rtx);
170 static bool arm_function_value_regno_p (const unsigned int);
171 static void arm_internal_label (FILE *, const char *, unsigned long);
172 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
173 tree);
174 static bool arm_have_conditional_execution (void);
175 static bool arm_cannot_force_const_mem (machine_mode, rtx);
176 static bool arm_legitimate_constant_p (machine_mode, rtx);
177 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
178 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
179 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
180 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
181 static void emit_constant_insn (rtx cond, rtx pattern);
182 static rtx_insn *emit_set_insn (rtx, rtx);
183 static rtx emit_multi_reg_push (unsigned long, unsigned long);
184 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
185 tree, bool);
186 static rtx arm_function_arg (cumulative_args_t, machine_mode,
187 const_tree, bool);
188 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
189 const_tree, bool);
190 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
191 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
192 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
193 const_tree);
194 static rtx aapcs_libcall_value (machine_mode);
195 static int aapcs_select_return_coproc (const_tree, const_tree);
196
197 #ifdef OBJECT_FORMAT_ELF
198 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
199 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
200 #endif
201 #ifndef ARM_PE
202 static void arm_encode_section_info (tree, rtx, int);
203 #endif
204
205 static void arm_file_end (void);
206 static void arm_file_start (void);
207 static void arm_insert_attributes (tree, tree *);
208
209 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
210 tree, int *, int);
211 static bool arm_pass_by_reference (cumulative_args_t,
212 machine_mode, const_tree, bool);
213 static bool arm_promote_prototypes (const_tree);
214 static bool arm_default_short_enums (void);
215 static bool arm_align_anon_bitfield (void);
216 static bool arm_return_in_msb (const_tree);
217 static bool arm_must_pass_in_stack (machine_mode, const_tree);
218 static bool arm_return_in_memory (const_tree, const_tree);
219 #if ARM_UNWIND_INFO
220 static void arm_unwind_emit (FILE *, rtx_insn *);
221 static bool arm_output_ttype (rtx);
222 static void arm_asm_emit_except_personality (rtx);
223 #endif
224 static void arm_asm_init_sections (void);
225 static rtx arm_dwarf_register_span (rtx);
226
227 static tree arm_cxx_guard_type (void);
228 static bool arm_cxx_guard_mask_bit (void);
229 static tree arm_get_cookie_size (tree);
230 static bool arm_cookie_has_size (void);
231 static bool arm_cxx_cdtor_returns_this (void);
232 static bool arm_cxx_key_method_may_be_inline (void);
233 static void arm_cxx_determine_class_data_visibility (tree);
234 static bool arm_cxx_class_data_always_comdat (void);
235 static bool arm_cxx_use_aeabi_atexit (void);
236 static void arm_init_libfuncs (void);
237 static tree arm_build_builtin_va_list (void);
238 static void arm_expand_builtin_va_start (tree, rtx);
239 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
240 static void arm_option_override (void);
241 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
242 static void arm_option_restore (struct gcc_options *,
243 struct cl_target_option *);
244 static void arm_override_options_after_change (void);
245 static void arm_option_print (FILE *, int, struct cl_target_option *);
246 static void arm_set_current_function (tree);
247 static bool arm_can_inline_p (tree, tree);
248 static void arm_relayout_function (tree);
249 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
250 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
251 static bool arm_sched_can_speculate_insn (rtx_insn *);
252 static bool arm_macro_fusion_p (void);
253 static bool arm_cannot_copy_insn_p (rtx_insn *);
254 static int arm_issue_rate (void);
255 static int arm_first_cycle_multipass_dfa_lookahead (void);
256 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
257 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
258 static bool arm_output_addr_const_extra (FILE *, rtx);
259 static bool arm_allocate_stack_slots_for_args (void);
260 static bool arm_warn_func_return (tree);
261 static tree arm_promoted_type (const_tree t);
262 static bool arm_scalar_mode_supported_p (scalar_mode);
263 static bool arm_frame_pointer_required (void);
264 static bool arm_can_eliminate (const int, const int);
265 static void arm_asm_trampoline_template (FILE *);
266 static void arm_trampoline_init (rtx, tree, rtx);
267 static rtx arm_trampoline_adjust_address (rtx);
268 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
269 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
272 static bool arm_array_mode_supported_p (machine_mode,
273 unsigned HOST_WIDE_INT);
274 static machine_mode arm_preferred_simd_mode (scalar_mode);
275 static bool arm_class_likely_spilled_p (reg_class_t);
276 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
277 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
278 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
279 const_tree type,
280 int misalignment,
281 bool is_packed);
282 static void arm_conditional_register_usage (void);
283 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
284 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
285 static unsigned int arm_autovectorize_vector_sizes (void);
286 static int arm_default_branch_cost (bool, bool);
287 static int arm_cortex_a5_branch_cost (bool, bool);
288 static int arm_cortex_m_branch_cost (bool, bool);
289 static int arm_cortex_m7_branch_cost (bool, bool);
290
291 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
292
293 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
294
295 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
296 tree vectype,
297 int misalign ATTRIBUTE_UNUSED);
298 static unsigned arm_add_stmt_cost (void *data, int count,
299 enum vect_cost_for_stmt kind,
300 struct _stmt_vec_info *stmt_info,
301 int misalign,
302 enum vect_cost_model_location where);
303
304 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
305 bool op0_preserve_value);
306 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
307
308 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
309 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
310 const_tree);
311 static section *arm_function_section (tree, enum node_frequency, bool, bool);
312 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
313 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
314 int reloc);
315 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
316 static opt_scalar_float_mode arm_floatn_mode (int, bool);
317 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
318 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
319 static bool arm_modes_tieable_p (machine_mode, machine_mode);
320 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
321 \f
322 /* Table of machine attributes. */
323 static const struct attribute_spec arm_attribute_table[] =
324 {
325 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
326 affects_type_identity, exclusions } */
327 /* Function calls made to this symbol must be done indirectly, because
328 it may lie outside of the 26 bit addressing range of a normal function
329 call. */
330 { "long_call", 0, 0, false, true, true, NULL, false, NULL },
331 /* Whereas these functions are always known to reside within the 26 bit
332 addressing range. */
333 { "short_call", 0, 0, false, true, true, NULL, false, NULL },
334 /* Specify the procedure call conventions for a function. */
335 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
336 false, NULL },
337 /* Interrupt Service Routines have special prologue and epilogue requirements. */
338 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
339 false, NULL },
340 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
341 false, NULL },
342 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
343 false, NULL },
344 #ifdef ARM_PE
345 /* ARM/PE has three new attributes:
346 interfacearm - ?
347 dllexport - for exporting a function/variable that will live in a dll
348 dllimport - for importing a function/variable from a dll
349
350 Microsoft allows multiple declspecs in one __declspec, separating
351 them with spaces. We do NOT support this. Instead, use __declspec
352 multiple times.
353 */
354 { "dllimport", 0, 0, true, false, false, NULL, false, NULL },
355 { "dllexport", 0, 0, true, false, false, NULL, false, NULL },
356 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
357 false, NULL },
358 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
359 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false,
360 NULL },
361 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false,
362 NULL },
363 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
364 false, NULL },
365 #endif
366 /* ARMv8-M Security Extensions support. */
367 { "cmse_nonsecure_entry", 0, 0, true, false, false,
368 arm_handle_cmse_nonsecure_entry, false, NULL },
369 { "cmse_nonsecure_call", 0, 0, true, false, false,
370 arm_handle_cmse_nonsecure_call, true, NULL },
371 { NULL, 0, 0, false, false, false, NULL, false, NULL }
372 };
373 \f
374 /* Initialize the GCC target structure. */
375 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
376 #undef TARGET_MERGE_DECL_ATTRIBUTES
377 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
378 #endif
379
380 #undef TARGET_LEGITIMIZE_ADDRESS
381 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
382
383 #undef TARGET_ATTRIBUTE_TABLE
384 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
385
386 #undef TARGET_INSERT_ATTRIBUTES
387 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
388
389 #undef TARGET_ASM_FILE_START
390 #define TARGET_ASM_FILE_START arm_file_start
391 #undef TARGET_ASM_FILE_END
392 #define TARGET_ASM_FILE_END arm_file_end
393
394 #undef TARGET_ASM_ALIGNED_SI_OP
395 #define TARGET_ASM_ALIGNED_SI_OP NULL
396 #undef TARGET_ASM_INTEGER
397 #define TARGET_ASM_INTEGER arm_assemble_integer
398
399 #undef TARGET_PRINT_OPERAND
400 #define TARGET_PRINT_OPERAND arm_print_operand
401 #undef TARGET_PRINT_OPERAND_ADDRESS
402 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
403 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
404 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
405
406 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
407 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
408
409 #undef TARGET_ASM_FUNCTION_PROLOGUE
410 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
411
412 #undef TARGET_ASM_FUNCTION_EPILOGUE
413 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
414
415 #undef TARGET_CAN_INLINE_P
416 #define TARGET_CAN_INLINE_P arm_can_inline_p
417
418 #undef TARGET_RELAYOUT_FUNCTION
419 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
420
421 #undef TARGET_OPTION_OVERRIDE
422 #define TARGET_OPTION_OVERRIDE arm_option_override
423
424 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
425 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
426
427 #undef TARGET_OPTION_SAVE
428 #define TARGET_OPTION_SAVE arm_option_save
429
430 #undef TARGET_OPTION_RESTORE
431 #define TARGET_OPTION_RESTORE arm_option_restore
432
433 #undef TARGET_OPTION_PRINT
434 #define TARGET_OPTION_PRINT arm_option_print
435
436 #undef TARGET_COMP_TYPE_ATTRIBUTES
437 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
438
439 #undef TARGET_SCHED_CAN_SPECULATE_INSN
440 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
441
442 #undef TARGET_SCHED_MACRO_FUSION_P
443 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
444
445 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
446 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
447
448 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
449 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
450
451 #undef TARGET_SCHED_ADJUST_COST
452 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
453
454 #undef TARGET_SET_CURRENT_FUNCTION
455 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
456
457 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
458 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
459
460 #undef TARGET_SCHED_REORDER
461 #define TARGET_SCHED_REORDER arm_sched_reorder
462
463 #undef TARGET_REGISTER_MOVE_COST
464 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
465
466 #undef TARGET_MEMORY_MOVE_COST
467 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
468
469 #undef TARGET_ENCODE_SECTION_INFO
470 #ifdef ARM_PE
471 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
472 #else
473 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
474 #endif
475
476 #undef TARGET_STRIP_NAME_ENCODING
477 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
478
479 #undef TARGET_ASM_INTERNAL_LABEL
480 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
481
482 #undef TARGET_FLOATN_MODE
483 #define TARGET_FLOATN_MODE arm_floatn_mode
484
485 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
486 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
487
488 #undef TARGET_FUNCTION_VALUE
489 #define TARGET_FUNCTION_VALUE arm_function_value
490
491 #undef TARGET_LIBCALL_VALUE
492 #define TARGET_LIBCALL_VALUE arm_libcall_value
493
494 #undef TARGET_FUNCTION_VALUE_REGNO_P
495 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
496
497 #undef TARGET_ASM_OUTPUT_MI_THUNK
498 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
499 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
500 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
501
502 #undef TARGET_RTX_COSTS
503 #define TARGET_RTX_COSTS arm_rtx_costs
504 #undef TARGET_ADDRESS_COST
505 #define TARGET_ADDRESS_COST arm_address_cost
506
507 #undef TARGET_SHIFT_TRUNCATION_MASK
508 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
509 #undef TARGET_VECTOR_MODE_SUPPORTED_P
510 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
511 #undef TARGET_ARRAY_MODE_SUPPORTED_P
512 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
513 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
514 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
515 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
516 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
517 arm_autovectorize_vector_sizes
518
519 #undef TARGET_MACHINE_DEPENDENT_REORG
520 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
521
522 #undef TARGET_INIT_BUILTINS
523 #define TARGET_INIT_BUILTINS arm_init_builtins
524 #undef TARGET_EXPAND_BUILTIN
525 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
526 #undef TARGET_BUILTIN_DECL
527 #define TARGET_BUILTIN_DECL arm_builtin_decl
528
529 #undef TARGET_INIT_LIBFUNCS
530 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
531
532 #undef TARGET_PROMOTE_FUNCTION_MODE
533 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
534 #undef TARGET_PROMOTE_PROTOTYPES
535 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
536 #undef TARGET_PASS_BY_REFERENCE
537 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
538 #undef TARGET_ARG_PARTIAL_BYTES
539 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
540 #undef TARGET_FUNCTION_ARG
541 #define TARGET_FUNCTION_ARG arm_function_arg
542 #undef TARGET_FUNCTION_ARG_ADVANCE
543 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
544 #undef TARGET_FUNCTION_ARG_PADDING
545 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
546 #undef TARGET_FUNCTION_ARG_BOUNDARY
547 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
548
549 #undef TARGET_SETUP_INCOMING_VARARGS
550 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
551
552 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
553 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
554
555 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
556 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
557 #undef TARGET_TRAMPOLINE_INIT
558 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
559 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
560 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
561
562 #undef TARGET_WARN_FUNC_RETURN
563 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
564
565 #undef TARGET_DEFAULT_SHORT_ENUMS
566 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
567
568 #undef TARGET_ALIGN_ANON_BITFIELD
569 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
570
571 #undef TARGET_NARROW_VOLATILE_BITFIELD
572 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
573
574 #undef TARGET_CXX_GUARD_TYPE
575 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
576
577 #undef TARGET_CXX_GUARD_MASK_BIT
578 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
579
580 #undef TARGET_CXX_GET_COOKIE_SIZE
581 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
582
583 #undef TARGET_CXX_COOKIE_HAS_SIZE
584 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
585
586 #undef TARGET_CXX_CDTOR_RETURNS_THIS
587 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
588
589 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
590 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
591
592 #undef TARGET_CXX_USE_AEABI_ATEXIT
593 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
594
595 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
596 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
597 arm_cxx_determine_class_data_visibility
598
599 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
600 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
601
602 #undef TARGET_RETURN_IN_MSB
603 #define TARGET_RETURN_IN_MSB arm_return_in_msb
604
605 #undef TARGET_RETURN_IN_MEMORY
606 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
607
608 #undef TARGET_MUST_PASS_IN_STACK
609 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
610
611 #if ARM_UNWIND_INFO
612 #undef TARGET_ASM_UNWIND_EMIT
613 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
614
615 /* EABI unwinding tables use a different format for the typeinfo tables. */
616 #undef TARGET_ASM_TTYPE
617 #define TARGET_ASM_TTYPE arm_output_ttype
618
619 #undef TARGET_ARM_EABI_UNWINDER
620 #define TARGET_ARM_EABI_UNWINDER true
621
622 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
623 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
624
625 #endif /* ARM_UNWIND_INFO */
626
627 #undef TARGET_ASM_INIT_SECTIONS
628 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
629
630 #undef TARGET_DWARF_REGISTER_SPAN
631 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
632
633 #undef TARGET_CANNOT_COPY_INSN_P
634 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
635
636 #ifdef HAVE_AS_TLS
637 #undef TARGET_HAVE_TLS
638 #define TARGET_HAVE_TLS true
639 #endif
640
641 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
642 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
643
644 #undef TARGET_LEGITIMATE_CONSTANT_P
645 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
646
647 #undef TARGET_CANNOT_FORCE_CONST_MEM
648 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
649
650 #undef TARGET_MAX_ANCHOR_OFFSET
651 #define TARGET_MAX_ANCHOR_OFFSET 4095
652
653 /* The minimum is set such that the total size of the block
654 for a particular anchor is -4088 + 1 + 4095 bytes, which is
655 divisible by eight, ensuring natural spacing of anchors. */
656 #undef TARGET_MIN_ANCHOR_OFFSET
657 #define TARGET_MIN_ANCHOR_OFFSET -4088
658
659 #undef TARGET_SCHED_ISSUE_RATE
660 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
661
662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
664 arm_first_cycle_multipass_dfa_lookahead
665
666 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
667 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
668 arm_first_cycle_multipass_dfa_lookahead_guard
669
670 #undef TARGET_MANGLE_TYPE
671 #define TARGET_MANGLE_TYPE arm_mangle_type
672
673 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
674 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
675
676 #undef TARGET_BUILD_BUILTIN_VA_LIST
677 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
678 #undef TARGET_EXPAND_BUILTIN_VA_START
679 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
680 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
681 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
682
683 #ifdef HAVE_AS_TLS
684 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
685 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
686 #endif
687
688 #undef TARGET_LEGITIMATE_ADDRESS_P
689 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
690
691 #undef TARGET_PREFERRED_RELOAD_CLASS
692 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
693
694 #undef TARGET_PROMOTED_TYPE
695 #define TARGET_PROMOTED_TYPE arm_promoted_type
696
697 #undef TARGET_SCALAR_MODE_SUPPORTED_P
698 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
699
700 #undef TARGET_COMPUTE_FRAME_LAYOUT
701 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
702
703 #undef TARGET_FRAME_POINTER_REQUIRED
704 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
705
706 #undef TARGET_CAN_ELIMINATE
707 #define TARGET_CAN_ELIMINATE arm_can_eliminate
708
709 #undef TARGET_CONDITIONAL_REGISTER_USAGE
710 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
711
712 #undef TARGET_CLASS_LIKELY_SPILLED_P
713 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
714
715 #undef TARGET_VECTORIZE_BUILTINS
716 #define TARGET_VECTORIZE_BUILTINS
717
718 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
719 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
720 arm_builtin_vectorized_function
721
722 #undef TARGET_VECTOR_ALIGNMENT
723 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
724
725 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
726 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
727 arm_vector_alignment_reachable
728
729 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
730 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
731 arm_builtin_support_vector_misalignment
732
733 #undef TARGET_PREFERRED_RENAME_CLASS
734 #define TARGET_PREFERRED_RENAME_CLASS \
735 arm_preferred_rename_class
736
737 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
738 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
739 arm_vectorize_vec_perm_const_ok
740
741 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
742 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
743 arm_builtin_vectorization_cost
744 #undef TARGET_VECTORIZE_ADD_STMT_COST
745 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
746
747 #undef TARGET_CANONICALIZE_COMPARISON
748 #define TARGET_CANONICALIZE_COMPARISON \
749 arm_canonicalize_comparison
750
751 #undef TARGET_ASAN_SHADOW_OFFSET
752 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
753
754 #undef MAX_INSN_PER_IT_BLOCK
755 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
756
757 #undef TARGET_CAN_USE_DOLOOP_P
758 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
759
760 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
761 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
762
763 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
764 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
765
766 #undef TARGET_SCHED_FUSION_PRIORITY
767 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
768
769 #undef TARGET_ASM_FUNCTION_SECTION
770 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
771
772 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
773 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
774
775 #undef TARGET_SECTION_TYPE_FLAGS
776 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
777
778 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
779 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
780
781 #undef TARGET_C_EXCESS_PRECISION
782 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
783
784 /* Although the architecture reserves bits 0 and 1, only the former is
785 used for ARM/Thumb ISA selection in v7 and earlier versions. */
786 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
787 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
788
789 #undef TARGET_FIXED_CONDITION_CODE_REGS
790 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
791
792 #undef TARGET_HARD_REGNO_NREGS
793 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
794 #undef TARGET_HARD_REGNO_MODE_OK
795 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
796
797 #undef TARGET_MODES_TIEABLE_P
798 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
799
800 #undef TARGET_CAN_CHANGE_MODE_CLASS
801 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
802
803 #undef TARGET_CONSTANT_ALIGNMENT
804 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
805 \f
806 /* Obstack for minipool constant handling. */
807 static struct obstack minipool_obstack;
808 static char * minipool_startobj;
809
810 /* The maximum number of insns skipped which
811 will be conditionalised if possible. */
812 static int max_insns_skipped = 5;
813
814 extern FILE * asm_out_file;
815
816 /* True if we are currently building a constant table. */
817 int making_const_table;
818
819 /* The processor for which instructions should be scheduled. */
820 enum processor_type arm_tune = TARGET_CPU_arm_none;
821
822 /* The current tuning set. */
823 const struct tune_params *current_tune;
824
825 /* Which floating point hardware to schedule for. */
826 int arm_fpu_attr;
827
828 /* Used for Thumb call_via trampolines. */
829 rtx thumb_call_via_label[14];
830 static int thumb_call_reg_needed;
831
832 /* The bits in this mask specify which instruction scheduling options should
833 be used. */
834 unsigned int tune_flags = 0;
835
836 /* The highest ARM architecture version supported by the
837 target. */
838 enum base_architecture arm_base_arch = BASE_ARCH_0;
839
840 /* Active target architecture and tuning. */
841
842 struct arm_build_target arm_active_target;
843
844 /* The following are used in the arm.md file as equivalents to bits
845 in the above two flag variables. */
846
847 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
848 int arm_arch3m = 0;
849
850 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
851 int arm_arch4 = 0;
852
853 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
854 int arm_arch4t = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
857 int arm_arch5 = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
860 int arm_arch5e = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
863 int arm_arch5te = 0;
864
865 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
866 int arm_arch6 = 0;
867
868 /* Nonzero if this chip supports the ARM 6K extensions. */
869 int arm_arch6k = 0;
870
871 /* Nonzero if this chip supports the ARM 6KZ extensions. */
872 int arm_arch6kz = 0;
873
874 /* Nonzero if instructions present in ARMv6-M can be used. */
875 int arm_arch6m = 0;
876
877 /* Nonzero if this chip supports the ARM 7 extensions. */
878 int arm_arch7 = 0;
879
880 /* Nonzero if this chip supports the Large Physical Address Extension. */
881 int arm_arch_lpae = 0;
882
883 /* Nonzero if instructions not present in the 'M' profile can be used. */
884 int arm_arch_notm = 0;
885
886 /* Nonzero if instructions present in ARMv7E-M can be used. */
887 int arm_arch7em = 0;
888
889 /* Nonzero if instructions present in ARMv8 can be used. */
890 int arm_arch8 = 0;
891
892 /* Nonzero if this chip supports the ARMv8.1 extensions. */
893 int arm_arch8_1 = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
896 int arm_arch8_2 = 0;
897
898 /* Nonzero if this chip supports the FP16 instructions extension of ARM
899 Architecture 8.2. */
900 int arm_fp16_inst = 0;
901
902 /* Nonzero if this chip can benefit from load scheduling. */
903 int arm_ld_sched = 0;
904
905 /* Nonzero if this chip is a StrongARM. */
906 int arm_tune_strongarm = 0;
907
908 /* Nonzero if this chip supports Intel Wireless MMX technology. */
909 int arm_arch_iwmmxt = 0;
910
911 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
912 int arm_arch_iwmmxt2 = 0;
913
914 /* Nonzero if this chip is an XScale. */
915 int arm_arch_xscale = 0;
916
917 /* Nonzero if tuning for XScale */
918 int arm_tune_xscale = 0;
919
920 /* Nonzero if we want to tune for stores that access the write-buffer.
921 This typically means an ARM6 or ARM7 with MMU or MPU. */
922 int arm_tune_wbuf = 0;
923
924 /* Nonzero if tuning for Cortex-A9. */
925 int arm_tune_cortex_a9 = 0;
926
927 /* Nonzero if we should define __THUMB_INTERWORK__ in the
928 preprocessor.
929 XXX This is a bit of a hack, it's intended to help work around
930 problems in GLD which doesn't understand that armv5t code is
931 interworking clean. */
932 int arm_cpp_interwork = 0;
933
934 /* Nonzero if chip supports Thumb 1. */
935 int arm_arch_thumb1;
936
937 /* Nonzero if chip supports Thumb 2. */
938 int arm_arch_thumb2;
939
940 /* Nonzero if chip supports integer division instruction. */
941 int arm_arch_arm_hwdiv;
942 int arm_arch_thumb_hwdiv;
943
944 /* Nonzero if chip disallows volatile memory access in IT block. */
945 int arm_arch_no_volatile_ce;
946
947 /* Nonzero if we should use Neon to handle 64-bits operations rather
948 than core registers. */
949 int prefer_neon_for_64bits = 0;
950
951 /* Nonzero if we shouldn't use literal pools. */
952 bool arm_disable_literal_pool = false;
953
954 /* The register number to be used for the PIC offset register. */
955 unsigned arm_pic_register = INVALID_REGNUM;
956
957 enum arm_pcs arm_pcs_default;
958
959 /* For an explanation of these variables, see final_prescan_insn below. */
960 int arm_ccfsm_state;
961 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
962 enum arm_cond_code arm_current_cc;
963
964 rtx arm_target_insn;
965 int arm_target_label;
966 /* The number of conditionally executed insns, including the current insn. */
967 int arm_condexec_count = 0;
968 /* A bitmask specifying the patterns for the IT block.
969 Zero means do not output an IT block before this insn. */
970 int arm_condexec_mask = 0;
971 /* The number of bits used in arm_condexec_mask. */
972 int arm_condexec_masklen = 0;
973
974 /* Nonzero if chip supports the ARMv8 CRC instructions. */
975 int arm_arch_crc = 0;
976
977 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
978 int arm_arch_dotprod = 0;
979
980 /* Nonzero if chip supports the ARMv8-M security extensions. */
981 int arm_arch_cmse = 0;
982
983 /* Nonzero if the core has a very small, high-latency, multiply unit. */
984 int arm_m_profile_small_mul = 0;
985
986 /* The condition codes of the ARM, and the inverse function. */
987 static const char * const arm_condition_codes[] =
988 {
989 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
990 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
991 };
992
993 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
994 int arm_regs_in_sequence[] =
995 {
996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
997 };
998
999 #define ARM_LSL_NAME "lsl"
1000 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1001
1002 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1003 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1004 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1005 \f
1006 /* Initialization code. */
1007
1008 struct cpu_tune
1009 {
1010 enum processor_type scheduler;
1011 unsigned int tune_flags;
1012 const struct tune_params *tune;
1013 };
1014
1015 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1016 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1017 { \
1018 num_slots, \
1019 l1_size, \
1020 l1_line_size \
1021 }
1022
1023 /* arm generic vectorizer costs. */
1024 static const
1025 struct cpu_vec_costs arm_default_vec_cost = {
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 1, /* vec_unalign_load_cost. */
1034 1, /* vec_unalign_store_cost. */
1035 1, /* vec_store_cost. */
1036 3, /* cond_taken_branch_cost. */
1037 1, /* cond_not_taken_branch_cost. */
1038 };
1039
1040 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1041 #include "aarch-cost-tables.h"
1042
1043
1044
1045 const struct cpu_cost_table cortexa9_extra_costs =
1046 {
1047 /* ALU */
1048 {
1049 0, /* arith. */
1050 0, /* logical. */
1051 0, /* shift. */
1052 COSTS_N_INSNS (1), /* shift_reg. */
1053 COSTS_N_INSNS (1), /* arith_shift. */
1054 COSTS_N_INSNS (2), /* arith_shift_reg. */
1055 0, /* log_shift. */
1056 COSTS_N_INSNS (1), /* log_shift_reg. */
1057 COSTS_N_INSNS (1), /* extend. */
1058 COSTS_N_INSNS (2), /* extend_arith. */
1059 COSTS_N_INSNS (1), /* bfi. */
1060 COSTS_N_INSNS (1), /* bfx. */
1061 0, /* clz. */
1062 0, /* rev. */
1063 0, /* non_exec. */
1064 true /* non_exec_costs_exec. */
1065 },
1066 {
1067 /* MULT SImode */
1068 {
1069 COSTS_N_INSNS (3), /* simple. */
1070 COSTS_N_INSNS (3), /* flag_setting. */
1071 COSTS_N_INSNS (2), /* extend. */
1072 COSTS_N_INSNS (3), /* add. */
1073 COSTS_N_INSNS (2), /* extend_add. */
1074 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1075 },
1076 /* MULT DImode */
1077 {
1078 0, /* simple (N/A). */
1079 0, /* flag_setting (N/A). */
1080 COSTS_N_INSNS (4), /* extend. */
1081 0, /* add (N/A). */
1082 COSTS_N_INSNS (4), /* extend_add. */
1083 0 /* idiv (N/A). */
1084 }
1085 },
1086 /* LD/ST */
1087 {
1088 COSTS_N_INSNS (2), /* load. */
1089 COSTS_N_INSNS (2), /* load_sign_extend. */
1090 COSTS_N_INSNS (2), /* ldrd. */
1091 COSTS_N_INSNS (2), /* ldm_1st. */
1092 1, /* ldm_regs_per_insn_1st. */
1093 2, /* ldm_regs_per_insn_subsequent. */
1094 COSTS_N_INSNS (5), /* loadf. */
1095 COSTS_N_INSNS (5), /* loadd. */
1096 COSTS_N_INSNS (1), /* load_unaligned. */
1097 COSTS_N_INSNS (2), /* store. */
1098 COSTS_N_INSNS (2), /* strd. */
1099 COSTS_N_INSNS (2), /* stm_1st. */
1100 1, /* stm_regs_per_insn_1st. */
1101 2, /* stm_regs_per_insn_subsequent. */
1102 COSTS_N_INSNS (1), /* storef. */
1103 COSTS_N_INSNS (1), /* stored. */
1104 COSTS_N_INSNS (1), /* store_unaligned. */
1105 COSTS_N_INSNS (1), /* loadv. */
1106 COSTS_N_INSNS (1) /* storev. */
1107 },
1108 {
1109 /* FP SFmode */
1110 {
1111 COSTS_N_INSNS (14), /* div. */
1112 COSTS_N_INSNS (4), /* mult. */
1113 COSTS_N_INSNS (7), /* mult_addsub. */
1114 COSTS_N_INSNS (30), /* fma. */
1115 COSTS_N_INSNS (3), /* addsub. */
1116 COSTS_N_INSNS (1), /* fpconst. */
1117 COSTS_N_INSNS (1), /* neg. */
1118 COSTS_N_INSNS (3), /* compare. */
1119 COSTS_N_INSNS (3), /* widen. */
1120 COSTS_N_INSNS (3), /* narrow. */
1121 COSTS_N_INSNS (3), /* toint. */
1122 COSTS_N_INSNS (3), /* fromint. */
1123 COSTS_N_INSNS (3) /* roundint. */
1124 },
1125 /* FP DFmode */
1126 {
1127 COSTS_N_INSNS (24), /* div. */
1128 COSTS_N_INSNS (5), /* mult. */
1129 COSTS_N_INSNS (8), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (3), /* addsub. */
1132 COSTS_N_INSNS (1), /* fpconst. */
1133 COSTS_N_INSNS (1), /* neg. */
1134 COSTS_N_INSNS (3), /* compare. */
1135 COSTS_N_INSNS (3), /* widen. */
1136 COSTS_N_INSNS (3), /* narrow. */
1137 COSTS_N_INSNS (3), /* toint. */
1138 COSTS_N_INSNS (3), /* fromint. */
1139 COSTS_N_INSNS (3) /* roundint. */
1140 }
1141 },
1142 /* Vector */
1143 {
1144 COSTS_N_INSNS (1) /* alu. */
1145 }
1146 };
1147
1148 const struct cpu_cost_table cortexa8_extra_costs =
1149 {
1150 /* ALU */
1151 {
1152 0, /* arith. */
1153 0, /* logical. */
1154 COSTS_N_INSNS (1), /* shift. */
1155 0, /* shift_reg. */
1156 COSTS_N_INSNS (1), /* arith_shift. */
1157 0, /* arith_shift_reg. */
1158 COSTS_N_INSNS (1), /* log_shift. */
1159 0, /* log_shift_reg. */
1160 0, /* extend. */
1161 0, /* extend_arith. */
1162 0, /* bfi. */
1163 0, /* bfx. */
1164 0, /* clz. */
1165 0, /* rev. */
1166 0, /* non_exec. */
1167 true /* non_exec_costs_exec. */
1168 },
1169 {
1170 /* MULT SImode */
1171 {
1172 COSTS_N_INSNS (1), /* simple. */
1173 COSTS_N_INSNS (1), /* flag_setting. */
1174 COSTS_N_INSNS (1), /* extend. */
1175 COSTS_N_INSNS (1), /* add. */
1176 COSTS_N_INSNS (1), /* extend_add. */
1177 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1178 },
1179 /* MULT DImode */
1180 {
1181 0, /* simple (N/A). */
1182 0, /* flag_setting (N/A). */
1183 COSTS_N_INSNS (2), /* extend. */
1184 0, /* add (N/A). */
1185 COSTS_N_INSNS (2), /* extend_add. */
1186 0 /* idiv (N/A). */
1187 }
1188 },
1189 /* LD/ST */
1190 {
1191 COSTS_N_INSNS (1), /* load. */
1192 COSTS_N_INSNS (1), /* load_sign_extend. */
1193 COSTS_N_INSNS (1), /* ldrd. */
1194 COSTS_N_INSNS (1), /* ldm_1st. */
1195 1, /* ldm_regs_per_insn_1st. */
1196 2, /* ldm_regs_per_insn_subsequent. */
1197 COSTS_N_INSNS (1), /* loadf. */
1198 COSTS_N_INSNS (1), /* loadd. */
1199 COSTS_N_INSNS (1), /* load_unaligned. */
1200 COSTS_N_INSNS (1), /* store. */
1201 COSTS_N_INSNS (1), /* strd. */
1202 COSTS_N_INSNS (1), /* stm_1st. */
1203 1, /* stm_regs_per_insn_1st. */
1204 2, /* stm_regs_per_insn_subsequent. */
1205 COSTS_N_INSNS (1), /* storef. */
1206 COSTS_N_INSNS (1), /* stored. */
1207 COSTS_N_INSNS (1), /* store_unaligned. */
1208 COSTS_N_INSNS (1), /* loadv. */
1209 COSTS_N_INSNS (1) /* storev. */
1210 },
1211 {
1212 /* FP SFmode */
1213 {
1214 COSTS_N_INSNS (36), /* div. */
1215 COSTS_N_INSNS (11), /* mult. */
1216 COSTS_N_INSNS (20), /* mult_addsub. */
1217 COSTS_N_INSNS (30), /* fma. */
1218 COSTS_N_INSNS (9), /* addsub. */
1219 COSTS_N_INSNS (3), /* fpconst. */
1220 COSTS_N_INSNS (3), /* neg. */
1221 COSTS_N_INSNS (6), /* compare. */
1222 COSTS_N_INSNS (4), /* widen. */
1223 COSTS_N_INSNS (4), /* narrow. */
1224 COSTS_N_INSNS (8), /* toint. */
1225 COSTS_N_INSNS (8), /* fromint. */
1226 COSTS_N_INSNS (8) /* roundint. */
1227 },
1228 /* FP DFmode */
1229 {
1230 COSTS_N_INSNS (64), /* div. */
1231 COSTS_N_INSNS (16), /* mult. */
1232 COSTS_N_INSNS (25), /* mult_addsub. */
1233 COSTS_N_INSNS (30), /* fma. */
1234 COSTS_N_INSNS (9), /* addsub. */
1235 COSTS_N_INSNS (3), /* fpconst. */
1236 COSTS_N_INSNS (3), /* neg. */
1237 COSTS_N_INSNS (6), /* compare. */
1238 COSTS_N_INSNS (6), /* widen. */
1239 COSTS_N_INSNS (6), /* narrow. */
1240 COSTS_N_INSNS (8), /* toint. */
1241 COSTS_N_INSNS (8), /* fromint. */
1242 COSTS_N_INSNS (8) /* roundint. */
1243 }
1244 },
1245 /* Vector */
1246 {
1247 COSTS_N_INSNS (1) /* alu. */
1248 }
1249 };
1250
1251 const struct cpu_cost_table cortexa5_extra_costs =
1252 {
1253 /* ALU */
1254 {
1255 0, /* arith. */
1256 0, /* logical. */
1257 COSTS_N_INSNS (1), /* shift. */
1258 COSTS_N_INSNS (1), /* shift_reg. */
1259 COSTS_N_INSNS (1), /* arith_shift. */
1260 COSTS_N_INSNS (1), /* arith_shift_reg. */
1261 COSTS_N_INSNS (1), /* log_shift. */
1262 COSTS_N_INSNS (1), /* log_shift_reg. */
1263 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (1), /* extend_arith. */
1265 COSTS_N_INSNS (1), /* bfi. */
1266 COSTS_N_INSNS (1), /* bfx. */
1267 COSTS_N_INSNS (1), /* clz. */
1268 COSTS_N_INSNS (1), /* rev. */
1269 0, /* non_exec. */
1270 true /* non_exec_costs_exec. */
1271 },
1272
1273 {
1274 /* MULT SImode */
1275 {
1276 0, /* simple. */
1277 COSTS_N_INSNS (1), /* flag_setting. */
1278 COSTS_N_INSNS (1), /* extend. */
1279 COSTS_N_INSNS (1), /* add. */
1280 COSTS_N_INSNS (1), /* extend_add. */
1281 COSTS_N_INSNS (7) /* idiv. */
1282 },
1283 /* MULT DImode */
1284 {
1285 0, /* simple (N/A). */
1286 0, /* flag_setting (N/A). */
1287 COSTS_N_INSNS (1), /* extend. */
1288 0, /* add. */
1289 COSTS_N_INSNS (2), /* extend_add. */
1290 0 /* idiv (N/A). */
1291 }
1292 },
1293 /* LD/ST */
1294 {
1295 COSTS_N_INSNS (1), /* load. */
1296 COSTS_N_INSNS (1), /* load_sign_extend. */
1297 COSTS_N_INSNS (6), /* ldrd. */
1298 COSTS_N_INSNS (1), /* ldm_1st. */
1299 1, /* ldm_regs_per_insn_1st. */
1300 2, /* ldm_regs_per_insn_subsequent. */
1301 COSTS_N_INSNS (2), /* loadf. */
1302 COSTS_N_INSNS (4), /* loadd. */
1303 COSTS_N_INSNS (1), /* load_unaligned. */
1304 COSTS_N_INSNS (1), /* store. */
1305 COSTS_N_INSNS (3), /* strd. */
1306 COSTS_N_INSNS (1), /* stm_1st. */
1307 1, /* stm_regs_per_insn_1st. */
1308 2, /* stm_regs_per_insn_subsequent. */
1309 COSTS_N_INSNS (2), /* storef. */
1310 COSTS_N_INSNS (2), /* stored. */
1311 COSTS_N_INSNS (1), /* store_unaligned. */
1312 COSTS_N_INSNS (1), /* loadv. */
1313 COSTS_N_INSNS (1) /* storev. */
1314 },
1315 {
1316 /* FP SFmode */
1317 {
1318 COSTS_N_INSNS (15), /* div. */
1319 COSTS_N_INSNS (3), /* mult. */
1320 COSTS_N_INSNS (7), /* mult_addsub. */
1321 COSTS_N_INSNS (7), /* fma. */
1322 COSTS_N_INSNS (3), /* addsub. */
1323 COSTS_N_INSNS (3), /* fpconst. */
1324 COSTS_N_INSNS (3), /* neg. */
1325 COSTS_N_INSNS (3), /* compare. */
1326 COSTS_N_INSNS (3), /* widen. */
1327 COSTS_N_INSNS (3), /* narrow. */
1328 COSTS_N_INSNS (3), /* toint. */
1329 COSTS_N_INSNS (3), /* fromint. */
1330 COSTS_N_INSNS (3) /* roundint. */
1331 },
1332 /* FP DFmode */
1333 {
1334 COSTS_N_INSNS (30), /* div. */
1335 COSTS_N_INSNS (6), /* mult. */
1336 COSTS_N_INSNS (10), /* mult_addsub. */
1337 COSTS_N_INSNS (7), /* fma. */
1338 COSTS_N_INSNS (3), /* addsub. */
1339 COSTS_N_INSNS (3), /* fpconst. */
1340 COSTS_N_INSNS (3), /* neg. */
1341 COSTS_N_INSNS (3), /* compare. */
1342 COSTS_N_INSNS (3), /* widen. */
1343 COSTS_N_INSNS (3), /* narrow. */
1344 COSTS_N_INSNS (3), /* toint. */
1345 COSTS_N_INSNS (3), /* fromint. */
1346 COSTS_N_INSNS (3) /* roundint. */
1347 }
1348 },
1349 /* Vector */
1350 {
1351 COSTS_N_INSNS (1) /* alu. */
1352 }
1353 };
1354
1355
1356 const struct cpu_cost_table cortexa7_extra_costs =
1357 {
1358 /* ALU */
1359 {
1360 0, /* arith. */
1361 0, /* logical. */
1362 COSTS_N_INSNS (1), /* shift. */
1363 COSTS_N_INSNS (1), /* shift_reg. */
1364 COSTS_N_INSNS (1), /* arith_shift. */
1365 COSTS_N_INSNS (1), /* arith_shift_reg. */
1366 COSTS_N_INSNS (1), /* log_shift. */
1367 COSTS_N_INSNS (1), /* log_shift_reg. */
1368 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (1), /* extend_arith. */
1370 COSTS_N_INSNS (1), /* bfi. */
1371 COSTS_N_INSNS (1), /* bfx. */
1372 COSTS_N_INSNS (1), /* clz. */
1373 COSTS_N_INSNS (1), /* rev. */
1374 0, /* non_exec. */
1375 true /* non_exec_costs_exec. */
1376 },
1377
1378 {
1379 /* MULT SImode */
1380 {
1381 0, /* simple. */
1382 COSTS_N_INSNS (1), /* flag_setting. */
1383 COSTS_N_INSNS (1), /* extend. */
1384 COSTS_N_INSNS (1), /* add. */
1385 COSTS_N_INSNS (1), /* extend_add. */
1386 COSTS_N_INSNS (7) /* idiv. */
1387 },
1388 /* MULT DImode */
1389 {
1390 0, /* simple (N/A). */
1391 0, /* flag_setting (N/A). */
1392 COSTS_N_INSNS (1), /* extend. */
1393 0, /* add. */
1394 COSTS_N_INSNS (2), /* extend_add. */
1395 0 /* idiv (N/A). */
1396 }
1397 },
1398 /* LD/ST */
1399 {
1400 COSTS_N_INSNS (1), /* load. */
1401 COSTS_N_INSNS (1), /* load_sign_extend. */
1402 COSTS_N_INSNS (3), /* ldrd. */
1403 COSTS_N_INSNS (1), /* ldm_1st. */
1404 1, /* ldm_regs_per_insn_1st. */
1405 2, /* ldm_regs_per_insn_subsequent. */
1406 COSTS_N_INSNS (2), /* loadf. */
1407 COSTS_N_INSNS (2), /* loadd. */
1408 COSTS_N_INSNS (1), /* load_unaligned. */
1409 COSTS_N_INSNS (1), /* store. */
1410 COSTS_N_INSNS (3), /* strd. */
1411 COSTS_N_INSNS (1), /* stm_1st. */
1412 1, /* stm_regs_per_insn_1st. */
1413 2, /* stm_regs_per_insn_subsequent. */
1414 COSTS_N_INSNS (2), /* storef. */
1415 COSTS_N_INSNS (2), /* stored. */
1416 COSTS_N_INSNS (1), /* store_unaligned. */
1417 COSTS_N_INSNS (1), /* loadv. */
1418 COSTS_N_INSNS (1) /* storev. */
1419 },
1420 {
1421 /* FP SFmode */
1422 {
1423 COSTS_N_INSNS (15), /* div. */
1424 COSTS_N_INSNS (3), /* mult. */
1425 COSTS_N_INSNS (7), /* mult_addsub. */
1426 COSTS_N_INSNS (7), /* fma. */
1427 COSTS_N_INSNS (3), /* addsub. */
1428 COSTS_N_INSNS (3), /* fpconst. */
1429 COSTS_N_INSNS (3), /* neg. */
1430 COSTS_N_INSNS (3), /* compare. */
1431 COSTS_N_INSNS (3), /* widen. */
1432 COSTS_N_INSNS (3), /* narrow. */
1433 COSTS_N_INSNS (3), /* toint. */
1434 COSTS_N_INSNS (3), /* fromint. */
1435 COSTS_N_INSNS (3) /* roundint. */
1436 },
1437 /* FP DFmode */
1438 {
1439 COSTS_N_INSNS (30), /* div. */
1440 COSTS_N_INSNS (6), /* mult. */
1441 COSTS_N_INSNS (10), /* mult_addsub. */
1442 COSTS_N_INSNS (7), /* fma. */
1443 COSTS_N_INSNS (3), /* addsub. */
1444 COSTS_N_INSNS (3), /* fpconst. */
1445 COSTS_N_INSNS (3), /* neg. */
1446 COSTS_N_INSNS (3), /* compare. */
1447 COSTS_N_INSNS (3), /* widen. */
1448 COSTS_N_INSNS (3), /* narrow. */
1449 COSTS_N_INSNS (3), /* toint. */
1450 COSTS_N_INSNS (3), /* fromint. */
1451 COSTS_N_INSNS (3) /* roundint. */
1452 }
1453 },
1454 /* Vector */
1455 {
1456 COSTS_N_INSNS (1) /* alu. */
1457 }
1458 };
1459
1460 const struct cpu_cost_table cortexa12_extra_costs =
1461 {
1462 /* ALU */
1463 {
1464 0, /* arith. */
1465 0, /* logical. */
1466 0, /* shift. */
1467 COSTS_N_INSNS (1), /* shift_reg. */
1468 COSTS_N_INSNS (1), /* arith_shift. */
1469 COSTS_N_INSNS (1), /* arith_shift_reg. */
1470 COSTS_N_INSNS (1), /* log_shift. */
1471 COSTS_N_INSNS (1), /* log_shift_reg. */
1472 0, /* extend. */
1473 COSTS_N_INSNS (1), /* extend_arith. */
1474 0, /* bfi. */
1475 COSTS_N_INSNS (1), /* bfx. */
1476 COSTS_N_INSNS (1), /* clz. */
1477 COSTS_N_INSNS (1), /* rev. */
1478 0, /* non_exec. */
1479 true /* non_exec_costs_exec. */
1480 },
1481 /* MULT SImode */
1482 {
1483 {
1484 COSTS_N_INSNS (2), /* simple. */
1485 COSTS_N_INSNS (3), /* flag_setting. */
1486 COSTS_N_INSNS (2), /* extend. */
1487 COSTS_N_INSNS (3), /* add. */
1488 COSTS_N_INSNS (2), /* extend_add. */
1489 COSTS_N_INSNS (18) /* idiv. */
1490 },
1491 /* MULT DImode */
1492 {
1493 0, /* simple (N/A). */
1494 0, /* flag_setting (N/A). */
1495 COSTS_N_INSNS (3), /* extend. */
1496 0, /* add (N/A). */
1497 COSTS_N_INSNS (3), /* extend_add. */
1498 0 /* idiv (N/A). */
1499 }
1500 },
1501 /* LD/ST */
1502 {
1503 COSTS_N_INSNS (3), /* load. */
1504 COSTS_N_INSNS (3), /* load_sign_extend. */
1505 COSTS_N_INSNS (3), /* ldrd. */
1506 COSTS_N_INSNS (3), /* ldm_1st. */
1507 1, /* ldm_regs_per_insn_1st. */
1508 2, /* ldm_regs_per_insn_subsequent. */
1509 COSTS_N_INSNS (3), /* loadf. */
1510 COSTS_N_INSNS (3), /* loadd. */
1511 0, /* load_unaligned. */
1512 0, /* store. */
1513 0, /* strd. */
1514 0, /* stm_1st. */
1515 1, /* stm_regs_per_insn_1st. */
1516 2, /* stm_regs_per_insn_subsequent. */
1517 COSTS_N_INSNS (2), /* storef. */
1518 COSTS_N_INSNS (2), /* stored. */
1519 0, /* store_unaligned. */
1520 COSTS_N_INSNS (1), /* loadv. */
1521 COSTS_N_INSNS (1) /* storev. */
1522 },
1523 {
1524 /* FP SFmode */
1525 {
1526 COSTS_N_INSNS (17), /* div. */
1527 COSTS_N_INSNS (4), /* mult. */
1528 COSTS_N_INSNS (8), /* mult_addsub. */
1529 COSTS_N_INSNS (8), /* fma. */
1530 COSTS_N_INSNS (4), /* addsub. */
1531 COSTS_N_INSNS (2), /* fpconst. */
1532 COSTS_N_INSNS (2), /* neg. */
1533 COSTS_N_INSNS (2), /* compare. */
1534 COSTS_N_INSNS (4), /* widen. */
1535 COSTS_N_INSNS (4), /* narrow. */
1536 COSTS_N_INSNS (4), /* toint. */
1537 COSTS_N_INSNS (4), /* fromint. */
1538 COSTS_N_INSNS (4) /* roundint. */
1539 },
1540 /* FP DFmode */
1541 {
1542 COSTS_N_INSNS (31), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (2), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1555 }
1556 },
1557 /* Vector */
1558 {
1559 COSTS_N_INSNS (1) /* alu. */
1560 }
1561 };
1562
1563 const struct cpu_cost_table cortexa15_extra_costs =
1564 {
1565 /* ALU */
1566 {
1567 0, /* arith. */
1568 0, /* logical. */
1569 0, /* shift. */
1570 0, /* shift_reg. */
1571 COSTS_N_INSNS (1), /* arith_shift. */
1572 COSTS_N_INSNS (1), /* arith_shift_reg. */
1573 COSTS_N_INSNS (1), /* log_shift. */
1574 COSTS_N_INSNS (1), /* log_shift_reg. */
1575 0, /* extend. */
1576 COSTS_N_INSNS (1), /* extend_arith. */
1577 COSTS_N_INSNS (1), /* bfi. */
1578 0, /* bfx. */
1579 0, /* clz. */
1580 0, /* rev. */
1581 0, /* non_exec. */
1582 true /* non_exec_costs_exec. */
1583 },
1584 /* MULT SImode */
1585 {
1586 {
1587 COSTS_N_INSNS (2), /* simple. */
1588 COSTS_N_INSNS (3), /* flag_setting. */
1589 COSTS_N_INSNS (2), /* extend. */
1590 COSTS_N_INSNS (2), /* add. */
1591 COSTS_N_INSNS (2), /* extend_add. */
1592 COSTS_N_INSNS (18) /* idiv. */
1593 },
1594 /* MULT DImode */
1595 {
1596 0, /* simple (N/A). */
1597 0, /* flag_setting (N/A). */
1598 COSTS_N_INSNS (3), /* extend. */
1599 0, /* add (N/A). */
1600 COSTS_N_INSNS (3), /* extend_add. */
1601 0 /* idiv (N/A). */
1602 }
1603 },
1604 /* LD/ST */
1605 {
1606 COSTS_N_INSNS (3), /* load. */
1607 COSTS_N_INSNS (3), /* load_sign_extend. */
1608 COSTS_N_INSNS (3), /* ldrd. */
1609 COSTS_N_INSNS (4), /* ldm_1st. */
1610 1, /* ldm_regs_per_insn_1st. */
1611 2, /* ldm_regs_per_insn_subsequent. */
1612 COSTS_N_INSNS (4), /* loadf. */
1613 COSTS_N_INSNS (4), /* loadd. */
1614 0, /* load_unaligned. */
1615 0, /* store. */
1616 0, /* strd. */
1617 COSTS_N_INSNS (1), /* stm_1st. */
1618 1, /* stm_regs_per_insn_1st. */
1619 2, /* stm_regs_per_insn_subsequent. */
1620 0, /* storef. */
1621 0, /* stored. */
1622 0, /* store_unaligned. */
1623 COSTS_N_INSNS (1), /* loadv. */
1624 COSTS_N_INSNS (1) /* storev. */
1625 },
1626 {
1627 /* FP SFmode */
1628 {
1629 COSTS_N_INSNS (17), /* div. */
1630 COSTS_N_INSNS (4), /* mult. */
1631 COSTS_N_INSNS (8), /* mult_addsub. */
1632 COSTS_N_INSNS (8), /* fma. */
1633 COSTS_N_INSNS (4), /* addsub. */
1634 COSTS_N_INSNS (2), /* fpconst. */
1635 COSTS_N_INSNS (2), /* neg. */
1636 COSTS_N_INSNS (5), /* compare. */
1637 COSTS_N_INSNS (4), /* widen. */
1638 COSTS_N_INSNS (4), /* narrow. */
1639 COSTS_N_INSNS (4), /* toint. */
1640 COSTS_N_INSNS (4), /* fromint. */
1641 COSTS_N_INSNS (4) /* roundint. */
1642 },
1643 /* FP DFmode */
1644 {
1645 COSTS_N_INSNS (31), /* div. */
1646 COSTS_N_INSNS (4), /* mult. */
1647 COSTS_N_INSNS (8), /* mult_addsub. */
1648 COSTS_N_INSNS (8), /* fma. */
1649 COSTS_N_INSNS (4), /* addsub. */
1650 COSTS_N_INSNS (2), /* fpconst. */
1651 COSTS_N_INSNS (2), /* neg. */
1652 COSTS_N_INSNS (2), /* compare. */
1653 COSTS_N_INSNS (4), /* widen. */
1654 COSTS_N_INSNS (4), /* narrow. */
1655 COSTS_N_INSNS (4), /* toint. */
1656 COSTS_N_INSNS (4), /* fromint. */
1657 COSTS_N_INSNS (4) /* roundint. */
1658 }
1659 },
1660 /* Vector */
1661 {
1662 COSTS_N_INSNS (1) /* alu. */
1663 }
1664 };
1665
1666 const struct cpu_cost_table v7m_extra_costs =
1667 {
1668 /* ALU */
1669 {
1670 0, /* arith. */
1671 0, /* logical. */
1672 0, /* shift. */
1673 0, /* shift_reg. */
1674 0, /* arith_shift. */
1675 COSTS_N_INSNS (1), /* arith_shift_reg. */
1676 0, /* log_shift. */
1677 COSTS_N_INSNS (1), /* log_shift_reg. */
1678 0, /* extend. */
1679 COSTS_N_INSNS (1), /* extend_arith. */
1680 0, /* bfi. */
1681 0, /* bfx. */
1682 0, /* clz. */
1683 0, /* rev. */
1684 COSTS_N_INSNS (1), /* non_exec. */
1685 false /* non_exec_costs_exec. */
1686 },
1687 {
1688 /* MULT SImode */
1689 {
1690 COSTS_N_INSNS (1), /* simple. */
1691 COSTS_N_INSNS (1), /* flag_setting. */
1692 COSTS_N_INSNS (2), /* extend. */
1693 COSTS_N_INSNS (1), /* add. */
1694 COSTS_N_INSNS (3), /* extend_add. */
1695 COSTS_N_INSNS (8) /* idiv. */
1696 },
1697 /* MULT DImode */
1698 {
1699 0, /* simple (N/A). */
1700 0, /* flag_setting (N/A). */
1701 COSTS_N_INSNS (2), /* extend. */
1702 0, /* add (N/A). */
1703 COSTS_N_INSNS (3), /* extend_add. */
1704 0 /* idiv (N/A). */
1705 }
1706 },
1707 /* LD/ST */
1708 {
1709 COSTS_N_INSNS (2), /* load. */
1710 0, /* load_sign_extend. */
1711 COSTS_N_INSNS (3), /* ldrd. */
1712 COSTS_N_INSNS (2), /* ldm_1st. */
1713 1, /* ldm_regs_per_insn_1st. */
1714 1, /* ldm_regs_per_insn_subsequent. */
1715 COSTS_N_INSNS (2), /* loadf. */
1716 COSTS_N_INSNS (3), /* loadd. */
1717 COSTS_N_INSNS (1), /* load_unaligned. */
1718 COSTS_N_INSNS (2), /* store. */
1719 COSTS_N_INSNS (3), /* strd. */
1720 COSTS_N_INSNS (2), /* stm_1st. */
1721 1, /* stm_regs_per_insn_1st. */
1722 1, /* stm_regs_per_insn_subsequent. */
1723 COSTS_N_INSNS (2), /* storef. */
1724 COSTS_N_INSNS (3), /* stored. */
1725 COSTS_N_INSNS (1), /* store_unaligned. */
1726 COSTS_N_INSNS (1), /* loadv. */
1727 COSTS_N_INSNS (1) /* storev. */
1728 },
1729 {
1730 /* FP SFmode */
1731 {
1732 COSTS_N_INSNS (7), /* div. */
1733 COSTS_N_INSNS (2), /* mult. */
1734 COSTS_N_INSNS (5), /* mult_addsub. */
1735 COSTS_N_INSNS (3), /* fma. */
1736 COSTS_N_INSNS (1), /* addsub. */
1737 0, /* fpconst. */
1738 0, /* neg. */
1739 0, /* compare. */
1740 0, /* widen. */
1741 0, /* narrow. */
1742 0, /* toint. */
1743 0, /* fromint. */
1744 0 /* roundint. */
1745 },
1746 /* FP DFmode */
1747 {
1748 COSTS_N_INSNS (15), /* div. */
1749 COSTS_N_INSNS (5), /* mult. */
1750 COSTS_N_INSNS (7), /* mult_addsub. */
1751 COSTS_N_INSNS (7), /* fma. */
1752 COSTS_N_INSNS (3), /* addsub. */
1753 0, /* fpconst. */
1754 0, /* neg. */
1755 0, /* compare. */
1756 0, /* widen. */
1757 0, /* narrow. */
1758 0, /* toint. */
1759 0, /* fromint. */
1760 0 /* roundint. */
1761 }
1762 },
1763 /* Vector */
1764 {
1765 COSTS_N_INSNS (1) /* alu. */
1766 }
1767 };
1768
1769 const struct addr_mode_cost_table generic_addr_mode_costs =
1770 {
1771 /* int. */
1772 {
1773 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1774 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1775 COSTS_N_INSNS (0) /* AMO_WB. */
1776 },
1777 /* float. */
1778 {
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1782 },
1783 /* vector. */
1784 {
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1788 }
1789 };
1790
1791 const struct tune_params arm_slowmul_tune =
1792 {
1793 &generic_extra_costs, /* Insn extra costs. */
1794 &generic_addr_mode_costs, /* Addressing mode costs. */
1795 NULL, /* Sched adj cost. */
1796 arm_default_branch_cost,
1797 &arm_default_vec_cost,
1798 3, /* Constant limit. */
1799 5, /* Max cond insns. */
1800 8, /* Memset max inline. */
1801 1, /* Issue rate. */
1802 ARM_PREFETCH_NOT_BENEFICIAL,
1803 tune_params::PREF_CONST_POOL_TRUE,
1804 tune_params::PREF_LDRD_FALSE,
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1807 tune_params::DISPARAGE_FLAGS_NEITHER,
1808 tune_params::PREF_NEON_64_FALSE,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE,
1810 tune_params::FUSE_NOTHING,
1811 tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_fastmul_tune =
1815 {
1816 &generic_extra_costs, /* Insn extra costs. */
1817 &generic_addr_mode_costs, /* Addressing mode costs. */
1818 NULL, /* Sched adj cost. */
1819 arm_default_branch_cost,
1820 &arm_default_vec_cost,
1821 1, /* Constant limit. */
1822 5, /* Max cond insns. */
1823 8, /* Memset max inline. */
1824 1, /* Issue rate. */
1825 ARM_PREFETCH_NOT_BENEFICIAL,
1826 tune_params::PREF_CONST_POOL_TRUE,
1827 tune_params::PREF_LDRD_FALSE,
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1830 tune_params::DISPARAGE_FLAGS_NEITHER,
1831 tune_params::PREF_NEON_64_FALSE,
1832 tune_params::PREF_NEON_STRINGOPS_FALSE,
1833 tune_params::FUSE_NOTHING,
1834 tune_params::SCHED_AUTOPREF_OFF
1835 };
1836
1837 /* StrongARM has early execution of branches, so a sequence that is worth
1838 skipping is shorter. Set max_insns_skipped to a lower value. */
1839
1840 const struct tune_params arm_strongarm_tune =
1841 {
1842 &generic_extra_costs, /* Insn extra costs. */
1843 &generic_addr_mode_costs, /* Addressing mode costs. */
1844 NULL, /* Sched adj cost. */
1845 arm_default_branch_cost,
1846 &arm_default_vec_cost,
1847 1, /* Constant limit. */
1848 3, /* Max cond insns. */
1849 8, /* Memset max inline. */
1850 1, /* Issue rate. */
1851 ARM_PREFETCH_NOT_BENEFICIAL,
1852 tune_params::PREF_CONST_POOL_TRUE,
1853 tune_params::PREF_LDRD_FALSE,
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1855 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1856 tune_params::DISPARAGE_FLAGS_NEITHER,
1857 tune_params::PREF_NEON_64_FALSE,
1858 tune_params::PREF_NEON_STRINGOPS_FALSE,
1859 tune_params::FUSE_NOTHING,
1860 tune_params::SCHED_AUTOPREF_OFF
1861 };
1862
1863 const struct tune_params arm_xscale_tune =
1864 {
1865 &generic_extra_costs, /* Insn extra costs. */
1866 &generic_addr_mode_costs, /* Addressing mode costs. */
1867 xscale_sched_adjust_cost,
1868 arm_default_branch_cost,
1869 &arm_default_vec_cost,
1870 2, /* Constant limit. */
1871 3, /* Max cond insns. */
1872 8, /* Memset max inline. */
1873 1, /* Issue rate. */
1874 ARM_PREFETCH_NOT_BENEFICIAL,
1875 tune_params::PREF_CONST_POOL_TRUE,
1876 tune_params::PREF_LDRD_FALSE,
1877 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1878 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1879 tune_params::DISPARAGE_FLAGS_NEITHER,
1880 tune_params::PREF_NEON_64_FALSE,
1881 tune_params::PREF_NEON_STRINGOPS_FALSE,
1882 tune_params::FUSE_NOTHING,
1883 tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 const struct tune_params arm_9e_tune =
1887 {
1888 &generic_extra_costs, /* Insn extra costs. */
1889 &generic_addr_mode_costs, /* Addressing mode costs. */
1890 NULL, /* Sched adj cost. */
1891 arm_default_branch_cost,
1892 &arm_default_vec_cost,
1893 1, /* Constant limit. */
1894 5, /* Max cond insns. */
1895 8, /* Memset max inline. */
1896 1, /* Issue rate. */
1897 ARM_PREFETCH_NOT_BENEFICIAL,
1898 tune_params::PREF_CONST_POOL_TRUE,
1899 tune_params::PREF_LDRD_FALSE,
1900 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1901 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1902 tune_params::DISPARAGE_FLAGS_NEITHER,
1903 tune_params::PREF_NEON_64_FALSE,
1904 tune_params::PREF_NEON_STRINGOPS_FALSE,
1905 tune_params::FUSE_NOTHING,
1906 tune_params::SCHED_AUTOPREF_OFF
1907 };
1908
1909 const struct tune_params arm_marvell_pj4_tune =
1910 {
1911 &generic_extra_costs, /* Insn extra costs. */
1912 &generic_addr_mode_costs, /* Addressing mode costs. */
1913 NULL, /* Sched adj cost. */
1914 arm_default_branch_cost,
1915 &arm_default_vec_cost,
1916 1, /* Constant limit. */
1917 5, /* Max cond insns. */
1918 8, /* Memset max inline. */
1919 2, /* Issue rate. */
1920 ARM_PREFETCH_NOT_BENEFICIAL,
1921 tune_params::PREF_CONST_POOL_TRUE,
1922 tune_params::PREF_LDRD_FALSE,
1923 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1925 tune_params::DISPARAGE_FLAGS_NEITHER,
1926 tune_params::PREF_NEON_64_FALSE,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE,
1928 tune_params::FUSE_NOTHING,
1929 tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 const struct tune_params arm_v6t2_tune =
1933 {
1934 &generic_extra_costs, /* Insn extra costs. */
1935 &generic_addr_mode_costs, /* Addressing mode costs. */
1936 NULL, /* Sched adj cost. */
1937 arm_default_branch_cost,
1938 &arm_default_vec_cost,
1939 1, /* Constant limit. */
1940 5, /* Max cond insns. */
1941 8, /* Memset max inline. */
1942 1, /* Issue rate. */
1943 ARM_PREFETCH_NOT_BENEFICIAL,
1944 tune_params::PREF_CONST_POOL_FALSE,
1945 tune_params::PREF_LDRD_FALSE,
1946 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1947 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1948 tune_params::DISPARAGE_FLAGS_NEITHER,
1949 tune_params::PREF_NEON_64_FALSE,
1950 tune_params::PREF_NEON_STRINGOPS_FALSE,
1951 tune_params::FUSE_NOTHING,
1952 tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955
1956 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1957 const struct tune_params arm_cortex_tune =
1958 {
1959 &generic_extra_costs,
1960 &generic_addr_mode_costs, /* Addressing mode costs. */
1961 NULL, /* Sched adj cost. */
1962 arm_default_branch_cost,
1963 &arm_default_vec_cost,
1964 1, /* Constant limit. */
1965 5, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 2, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 tune_params::PREF_CONST_POOL_FALSE,
1970 tune_params::PREF_LDRD_FALSE,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER,
1974 tune_params::PREF_NEON_64_FALSE,
1975 tune_params::PREF_NEON_STRINGOPS_FALSE,
1976 tune_params::FUSE_NOTHING,
1977 tune_params::SCHED_AUTOPREF_OFF
1978 };
1979
1980 const struct tune_params arm_cortex_a8_tune =
1981 {
1982 &cortexa8_extra_costs,
1983 &generic_addr_mode_costs, /* Addressing mode costs. */
1984 NULL, /* Sched adj cost. */
1985 arm_default_branch_cost,
1986 &arm_default_vec_cost,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL,
1992 tune_params::PREF_CONST_POOL_FALSE,
1993 tune_params::PREF_LDRD_FALSE,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER,
1997 tune_params::PREF_NEON_64_FALSE,
1998 tune_params::PREF_NEON_STRINGOPS_TRUE,
1999 tune_params::FUSE_NOTHING,
2000 tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_cortex_a7_tune =
2004 {
2005 &cortexa7_extra_costs,
2006 &generic_addr_mode_costs, /* Addressing mode costs. */
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_64_FALSE,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE,
2022 tune_params::FUSE_NOTHING,
2023 tune_params::SCHED_AUTOPREF_OFF
2024 };
2025
2026 const struct tune_params arm_cortex_a15_tune =
2027 {
2028 &cortexa15_extra_costs,
2029 &generic_addr_mode_costs, /* Addressing mode costs. */
2030 NULL, /* Sched adj cost. */
2031 arm_default_branch_cost,
2032 &arm_default_vec_cost,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL,
2038 tune_params::PREF_CONST_POOL_FALSE,
2039 tune_params::PREF_LDRD_TRUE,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL,
2043 tune_params::PREF_NEON_64_FALSE,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE,
2045 tune_params::FUSE_NOTHING,
2046 tune_params::SCHED_AUTOPREF_FULL
2047 };
2048
2049 const struct tune_params arm_cortex_a35_tune =
2050 {
2051 &cortexa53_extra_costs,
2052 &generic_addr_mode_costs, /* Addressing mode costs. */
2053 NULL, /* Sched adj cost. */
2054 arm_default_branch_cost,
2055 &arm_default_vec_cost,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 1, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL,
2061 tune_params::PREF_CONST_POOL_FALSE,
2062 tune_params::PREF_LDRD_FALSE,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER,
2066 tune_params::PREF_NEON_64_FALSE,
2067 tune_params::PREF_NEON_STRINGOPS_TRUE,
2068 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2069 tune_params::SCHED_AUTOPREF_OFF
2070 };
2071
2072 const struct tune_params arm_cortex_a53_tune =
2073 {
2074 &cortexa53_extra_costs,
2075 &generic_addr_mode_costs, /* Addressing mode costs. */
2076 NULL, /* Sched adj cost. */
2077 arm_default_branch_cost,
2078 &arm_default_vec_cost,
2079 1, /* Constant limit. */
2080 5, /* Max cond insns. */
2081 8, /* Memset max inline. */
2082 2, /* Issue rate. */
2083 ARM_PREFETCH_NOT_BENEFICIAL,
2084 tune_params::PREF_CONST_POOL_FALSE,
2085 tune_params::PREF_LDRD_FALSE,
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2087 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2088 tune_params::DISPARAGE_FLAGS_NEITHER,
2089 tune_params::PREF_NEON_64_FALSE,
2090 tune_params::PREF_NEON_STRINGOPS_TRUE,
2091 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2092 tune_params::SCHED_AUTOPREF_OFF
2093 };
2094
2095 const struct tune_params arm_cortex_a57_tune =
2096 {
2097 &cortexa57_extra_costs,
2098 &generic_addr_mode_costs, /* addressing mode costs */
2099 NULL, /* Sched adj cost. */
2100 arm_default_branch_cost,
2101 &arm_default_vec_cost,
2102 1, /* Constant limit. */
2103 2, /* Max cond insns. */
2104 8, /* Memset max inline. */
2105 3, /* Issue rate. */
2106 ARM_PREFETCH_NOT_BENEFICIAL,
2107 tune_params::PREF_CONST_POOL_FALSE,
2108 tune_params::PREF_LDRD_TRUE,
2109 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2111 tune_params::DISPARAGE_FLAGS_ALL,
2112 tune_params::PREF_NEON_64_FALSE,
2113 tune_params::PREF_NEON_STRINGOPS_TRUE,
2114 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2115 tune_params::SCHED_AUTOPREF_FULL
2116 };
2117
2118 const struct tune_params arm_exynosm1_tune =
2119 {
2120 &exynosm1_extra_costs,
2121 &generic_addr_mode_costs, /* Addressing mode costs. */
2122 NULL, /* Sched adj cost. */
2123 arm_default_branch_cost,
2124 &arm_default_vec_cost,
2125 1, /* Constant limit. */
2126 2, /* Max cond insns. */
2127 8, /* Memset max inline. */
2128 3, /* Issue rate. */
2129 ARM_PREFETCH_NOT_BENEFICIAL,
2130 tune_params::PREF_CONST_POOL_FALSE,
2131 tune_params::PREF_LDRD_TRUE,
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2133 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2134 tune_params::DISPARAGE_FLAGS_ALL,
2135 tune_params::PREF_NEON_64_FALSE,
2136 tune_params::PREF_NEON_STRINGOPS_TRUE,
2137 tune_params::FUSE_NOTHING,
2138 tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_xgene1_tune =
2142 {
2143 &xgene1_extra_costs,
2144 &generic_addr_mode_costs, /* Addressing mode costs. */
2145 NULL, /* Sched adj cost. */
2146 arm_default_branch_cost,
2147 &arm_default_vec_cost,
2148 1, /* Constant limit. */
2149 2, /* Max cond insns. */
2150 32, /* Memset max inline. */
2151 4, /* Issue rate. */
2152 ARM_PREFETCH_NOT_BENEFICIAL,
2153 tune_params::PREF_CONST_POOL_FALSE,
2154 tune_params::PREF_LDRD_TRUE,
2155 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2157 tune_params::DISPARAGE_FLAGS_ALL,
2158 tune_params::PREF_NEON_64_FALSE,
2159 tune_params::PREF_NEON_STRINGOPS_FALSE,
2160 tune_params::FUSE_NOTHING,
2161 tune_params::SCHED_AUTOPREF_OFF
2162 };
2163
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165 less appealing. Set max_insns_skipped to a low value. */
2166
2167 const struct tune_params arm_cortex_a5_tune =
2168 {
2169 &cortexa5_extra_costs,
2170 &generic_addr_mode_costs, /* Addressing mode costs. */
2171 NULL, /* Sched adj cost. */
2172 arm_cortex_a5_branch_cost,
2173 &arm_default_vec_cost,
2174 1, /* Constant limit. */
2175 1, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 2, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL,
2179 tune_params::PREF_CONST_POOL_FALSE,
2180 tune_params::PREF_LDRD_FALSE,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_NEITHER,
2184 tune_params::PREF_NEON_64_FALSE,
2185 tune_params::PREF_NEON_STRINGOPS_TRUE,
2186 tune_params::FUSE_NOTHING,
2187 tune_params::SCHED_AUTOPREF_OFF
2188 };
2189
2190 const struct tune_params arm_cortex_a9_tune =
2191 {
2192 &cortexa9_extra_costs,
2193 &generic_addr_mode_costs, /* Addressing mode costs. */
2194 cortex_a9_sched_adjust_cost,
2195 arm_default_branch_cost,
2196 &arm_default_vec_cost,
2197 1, /* Constant limit. */
2198 5, /* Max cond insns. */
2199 8, /* Memset max inline. */
2200 2, /* Issue rate. */
2201 ARM_PREFETCH_BENEFICIAL(4,32,32),
2202 tune_params::PREF_CONST_POOL_FALSE,
2203 tune_params::PREF_LDRD_FALSE,
2204 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2206 tune_params::DISPARAGE_FLAGS_NEITHER,
2207 tune_params::PREF_NEON_64_FALSE,
2208 tune_params::PREF_NEON_STRINGOPS_FALSE,
2209 tune_params::FUSE_NOTHING,
2210 tune_params::SCHED_AUTOPREF_OFF
2211 };
2212
2213 const struct tune_params arm_cortex_a12_tune =
2214 {
2215 &cortexa12_extra_costs,
2216 &generic_addr_mode_costs, /* Addressing mode costs. */
2217 NULL, /* Sched adj cost. */
2218 arm_default_branch_cost,
2219 &arm_default_vec_cost, /* Vectorizer costs. */
2220 1, /* Constant limit. */
2221 2, /* Max cond insns. */
2222 8, /* Memset max inline. */
2223 2, /* Issue rate. */
2224 ARM_PREFETCH_NOT_BENEFICIAL,
2225 tune_params::PREF_CONST_POOL_FALSE,
2226 tune_params::PREF_LDRD_TRUE,
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2228 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2229 tune_params::DISPARAGE_FLAGS_ALL,
2230 tune_params::PREF_NEON_64_FALSE,
2231 tune_params::PREF_NEON_STRINGOPS_TRUE,
2232 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2233 tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 const struct tune_params arm_cortex_a73_tune =
2237 {
2238 &cortexa57_extra_costs,
2239 &generic_addr_mode_costs, /* Addressing mode costs. */
2240 NULL, /* Sched adj cost. */
2241 arm_default_branch_cost,
2242 &arm_default_vec_cost, /* Vectorizer costs. */
2243 1, /* Constant limit. */
2244 2, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL,
2248 tune_params::PREF_CONST_POOL_FALSE,
2249 tune_params::PREF_LDRD_TRUE,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_ALL,
2253 tune_params::PREF_NEON_64_FALSE,
2254 tune_params::PREF_NEON_STRINGOPS_TRUE,
2255 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2256 tune_params::SCHED_AUTOPREF_FULL
2257 };
2258
2259 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2260 cycle to execute each. An LDR from the constant pool also takes two cycles
2261 to execute, but mildly increases pipelining opportunity (consecutive
2262 loads/stores can be pipelined together, saving one cycle), and may also
2263 improve icache utilisation. Hence we prefer the constant pool for such
2264 processors. */
2265
2266 const struct tune_params arm_v7m_tune =
2267 {
2268 &v7m_extra_costs,
2269 &generic_addr_mode_costs, /* Addressing mode costs. */
2270 NULL, /* Sched adj cost. */
2271 arm_cortex_m_branch_cost,
2272 &arm_default_vec_cost,
2273 1, /* Constant limit. */
2274 2, /* Max cond insns. */
2275 8, /* Memset max inline. */
2276 1, /* Issue rate. */
2277 ARM_PREFETCH_NOT_BENEFICIAL,
2278 tune_params::PREF_CONST_POOL_TRUE,
2279 tune_params::PREF_LDRD_FALSE,
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2281 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2282 tune_params::DISPARAGE_FLAGS_NEITHER,
2283 tune_params::PREF_NEON_64_FALSE,
2284 tune_params::PREF_NEON_STRINGOPS_FALSE,
2285 tune_params::FUSE_NOTHING,
2286 tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 /* Cortex-M7 tuning. */
2290
2291 const struct tune_params arm_cortex_m7_tune =
2292 {
2293 &v7m_extra_costs,
2294 &generic_addr_mode_costs, /* Addressing mode costs. */
2295 NULL, /* Sched adj cost. */
2296 arm_cortex_m7_branch_cost,
2297 &arm_default_vec_cost,
2298 0, /* Constant limit. */
2299 1, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL,
2303 tune_params::PREF_CONST_POOL_TRUE,
2304 tune_params::PREF_LDRD_FALSE,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER,
2308 tune_params::PREF_NEON_64_FALSE,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE,
2310 tune_params::FUSE_NOTHING,
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2315 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2316 cortex-m23. */
2317 const struct tune_params arm_v6m_tune =
2318 {
2319 &generic_extra_costs, /* Insn extra costs. */
2320 &generic_addr_mode_costs, /* Addressing mode costs. */
2321 NULL, /* Sched adj cost. */
2322 arm_default_branch_cost,
2323 &arm_default_vec_cost, /* Vectorizer costs. */
2324 1, /* Constant limit. */
2325 5, /* Max cond insns. */
2326 8, /* Memset max inline. */
2327 1, /* Issue rate. */
2328 ARM_PREFETCH_NOT_BENEFICIAL,
2329 tune_params::PREF_CONST_POOL_FALSE,
2330 tune_params::PREF_LDRD_FALSE,
2331 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2332 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2333 tune_params::DISPARAGE_FLAGS_NEITHER,
2334 tune_params::PREF_NEON_64_FALSE,
2335 tune_params::PREF_NEON_STRINGOPS_FALSE,
2336 tune_params::FUSE_NOTHING,
2337 tune_params::SCHED_AUTOPREF_OFF
2338 };
2339
2340 const struct tune_params arm_fa726te_tune =
2341 {
2342 &generic_extra_costs, /* Insn extra costs. */
2343 &generic_addr_mode_costs, /* Addressing mode costs. */
2344 fa726te_sched_adjust_cost,
2345 arm_default_branch_cost,
2346 &arm_default_vec_cost,
2347 1, /* Constant limit. */
2348 5, /* Max cond insns. */
2349 8, /* Memset max inline. */
2350 2, /* Issue rate. */
2351 ARM_PREFETCH_NOT_BENEFICIAL,
2352 tune_params::PREF_CONST_POOL_TRUE,
2353 tune_params::PREF_LDRD_FALSE,
2354 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2355 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2356 tune_params::DISPARAGE_FLAGS_NEITHER,
2357 tune_params::PREF_NEON_64_FALSE,
2358 tune_params::PREF_NEON_STRINGOPS_FALSE,
2359 tune_params::FUSE_NOTHING,
2360 tune_params::SCHED_AUTOPREF_OFF
2361 };
2362
2363 /* Auto-generated CPU, FPU and architecture tables. */
2364 #include "arm-cpu-data.h"
2365
2366 /* The name of the preprocessor macro to define for this architecture. PROFILE
2367 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2368 is thus chosen to be big enough to hold the longest architecture name. */
2369
2370 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2371
2372 /* Supported TLS relocations. */
2373
2374 enum tls_reloc {
2375 TLS_GD32,
2376 TLS_LDM32,
2377 TLS_LDO32,
2378 TLS_IE32,
2379 TLS_LE32,
2380 TLS_DESCSEQ /* GNU scheme */
2381 };
2382
2383 /* The maximum number of insns to be used when loading a constant. */
2384 inline static int
2385 arm_constant_limit (bool size_p)
2386 {
2387 return size_p ? 1 : current_tune->constant_limit;
2388 }
2389
2390 /* Emit an insn that's a simple single-set. Both the operands must be known
2391 to be valid. */
2392 inline static rtx_insn *
2393 emit_set_insn (rtx x, rtx y)
2394 {
2395 return emit_insn (gen_rtx_SET (x, y));
2396 }
2397
2398 /* Return the number of bits set in VALUE. */
2399 static unsigned
2400 bit_count (unsigned long value)
2401 {
2402 unsigned long count = 0;
2403
2404 while (value)
2405 {
2406 count++;
2407 value &= value - 1; /* Clear the least-significant set bit. */
2408 }
2409
2410 return count;
2411 }
2412
2413 /* Return the number of bits set in BMAP. */
2414 static unsigned
2415 bitmap_popcount (const sbitmap bmap)
2416 {
2417 unsigned int count = 0;
2418 unsigned int n = 0;
2419 sbitmap_iterator sbi;
2420
2421 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2422 count++;
2423 return count;
2424 }
2425
2426 typedef struct
2427 {
2428 machine_mode mode;
2429 const char *name;
2430 } arm_fixed_mode_set;
2431
2432 /* A small helper for setting fixed-point library libfuncs. */
2433
2434 static void
2435 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2436 const char *funcname, const char *modename,
2437 int num_suffix)
2438 {
2439 char buffer[50];
2440
2441 if (num_suffix == 0)
2442 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2443 else
2444 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2445
2446 set_optab_libfunc (optable, mode, buffer);
2447 }
2448
2449 static void
2450 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2451 machine_mode from, const char *funcname,
2452 const char *toname, const char *fromname)
2453 {
2454 char buffer[50];
2455 const char *maybe_suffix_2 = "";
2456
2457 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2458 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2459 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2460 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2461 maybe_suffix_2 = "2";
2462
2463 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2464 maybe_suffix_2);
2465
2466 set_conv_libfunc (optable, to, from, buffer);
2467 }
2468
2469 /* Set up library functions unique to ARM. */
2470
2471 static void
2472 arm_init_libfuncs (void)
2473 {
2474 /* For Linux, we have access to kernel support for atomic operations. */
2475 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2476 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2477
2478 /* There are no special library functions unless we are using the
2479 ARM BPABI. */
2480 if (!TARGET_BPABI)
2481 return;
2482
2483 /* The functions below are described in Section 4 of the "Run-Time
2484 ABI for the ARM architecture", Version 1.0. */
2485
2486 /* Double-precision floating-point arithmetic. Table 2. */
2487 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2488 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2489 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2490 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2491 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2492
2493 /* Double-precision comparisons. Table 3. */
2494 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2495 set_optab_libfunc (ne_optab, DFmode, NULL);
2496 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2497 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2498 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2499 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2500 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2501
2502 /* Single-precision floating-point arithmetic. Table 4. */
2503 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2504 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2505 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2506 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2507 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2508
2509 /* Single-precision comparisons. Table 5. */
2510 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2511 set_optab_libfunc (ne_optab, SFmode, NULL);
2512 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2513 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2514 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2515 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2516 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2517
2518 /* Floating-point to integer conversions. Table 6. */
2519 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2520 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2521 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2522 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2523 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2524 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2525 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2526 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2527
2528 /* Conversions between floating types. Table 7. */
2529 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2530 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2531
2532 /* Integer to floating-point conversions. Table 8. */
2533 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2534 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2535 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2536 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2537 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2538 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2539 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2540 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2541
2542 /* Long long. Table 9. */
2543 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2544 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2545 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2546 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2547 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2548 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2549 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2550 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2551
2552 /* Integer (32/32->32) division. \S 4.3.1. */
2553 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2554 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2555
2556 /* The divmod functions are designed so that they can be used for
2557 plain division, even though they return both the quotient and the
2558 remainder. The quotient is returned in the usual location (i.e.,
2559 r0 for SImode, {r0, r1} for DImode), just as would be expected
2560 for an ordinary division routine. Because the AAPCS calling
2561 conventions specify that all of { r0, r1, r2, r3 } are
2562 callee-saved registers, there is no need to tell the compiler
2563 explicitly that those registers are clobbered by these
2564 routines. */
2565 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2566 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2567
2568 /* For SImode division the ABI provides div-without-mod routines,
2569 which are faster. */
2570 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2571 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2572
2573 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2574 divmod libcalls instead. */
2575 set_optab_libfunc (smod_optab, DImode, NULL);
2576 set_optab_libfunc (umod_optab, DImode, NULL);
2577 set_optab_libfunc (smod_optab, SImode, NULL);
2578 set_optab_libfunc (umod_optab, SImode, NULL);
2579
2580 /* Half-precision float operations. The compiler handles all operations
2581 with NULL libfuncs by converting the SFmode. */
2582 switch (arm_fp16_format)
2583 {
2584 case ARM_FP16_FORMAT_IEEE:
2585 case ARM_FP16_FORMAT_ALTERNATIVE:
2586
2587 /* Conversions. */
2588 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2589 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2590 ? "__gnu_f2h_ieee"
2591 : "__gnu_f2h_alternative"));
2592 set_conv_libfunc (sext_optab, SFmode, HFmode,
2593 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2594 ? "__gnu_h2f_ieee"
2595 : "__gnu_h2f_alternative"));
2596
2597 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2598 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2599 ? "__gnu_d2h_ieee"
2600 : "__gnu_d2h_alternative"));
2601
2602 /* Arithmetic. */
2603 set_optab_libfunc (add_optab, HFmode, NULL);
2604 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2605 set_optab_libfunc (smul_optab, HFmode, NULL);
2606 set_optab_libfunc (neg_optab, HFmode, NULL);
2607 set_optab_libfunc (sub_optab, HFmode, NULL);
2608
2609 /* Comparisons. */
2610 set_optab_libfunc (eq_optab, HFmode, NULL);
2611 set_optab_libfunc (ne_optab, HFmode, NULL);
2612 set_optab_libfunc (lt_optab, HFmode, NULL);
2613 set_optab_libfunc (le_optab, HFmode, NULL);
2614 set_optab_libfunc (ge_optab, HFmode, NULL);
2615 set_optab_libfunc (gt_optab, HFmode, NULL);
2616 set_optab_libfunc (unord_optab, HFmode, NULL);
2617 break;
2618
2619 default:
2620 break;
2621 }
2622
2623 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2624 {
2625 const arm_fixed_mode_set fixed_arith_modes[] =
2626 {
2627 { E_QQmode, "qq" },
2628 { E_UQQmode, "uqq" },
2629 { E_HQmode, "hq" },
2630 { E_UHQmode, "uhq" },
2631 { E_SQmode, "sq" },
2632 { E_USQmode, "usq" },
2633 { E_DQmode, "dq" },
2634 { E_UDQmode, "udq" },
2635 { E_TQmode, "tq" },
2636 { E_UTQmode, "utq" },
2637 { E_HAmode, "ha" },
2638 { E_UHAmode, "uha" },
2639 { E_SAmode, "sa" },
2640 { E_USAmode, "usa" },
2641 { E_DAmode, "da" },
2642 { E_UDAmode, "uda" },
2643 { E_TAmode, "ta" },
2644 { E_UTAmode, "uta" }
2645 };
2646 const arm_fixed_mode_set fixed_conv_modes[] =
2647 {
2648 { E_QQmode, "qq" },
2649 { E_UQQmode, "uqq" },
2650 { E_HQmode, "hq" },
2651 { E_UHQmode, "uhq" },
2652 { E_SQmode, "sq" },
2653 { E_USQmode, "usq" },
2654 { E_DQmode, "dq" },
2655 { E_UDQmode, "udq" },
2656 { E_TQmode, "tq" },
2657 { E_UTQmode, "utq" },
2658 { E_HAmode, "ha" },
2659 { E_UHAmode, "uha" },
2660 { E_SAmode, "sa" },
2661 { E_USAmode, "usa" },
2662 { E_DAmode, "da" },
2663 { E_UDAmode, "uda" },
2664 { E_TAmode, "ta" },
2665 { E_UTAmode, "uta" },
2666 { E_QImode, "qi" },
2667 { E_HImode, "hi" },
2668 { E_SImode, "si" },
2669 { E_DImode, "di" },
2670 { E_TImode, "ti" },
2671 { E_SFmode, "sf" },
2672 { E_DFmode, "df" }
2673 };
2674 unsigned int i, j;
2675
2676 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2677 {
2678 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2679 "add", fixed_arith_modes[i].name, 3);
2680 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2681 "ssadd", fixed_arith_modes[i].name, 3);
2682 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2683 "usadd", fixed_arith_modes[i].name, 3);
2684 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2685 "sub", fixed_arith_modes[i].name, 3);
2686 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2687 "sssub", fixed_arith_modes[i].name, 3);
2688 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2689 "ussub", fixed_arith_modes[i].name, 3);
2690 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2691 "mul", fixed_arith_modes[i].name, 3);
2692 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2693 "ssmul", fixed_arith_modes[i].name, 3);
2694 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2695 "usmul", fixed_arith_modes[i].name, 3);
2696 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2697 "div", fixed_arith_modes[i].name, 3);
2698 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2699 "udiv", fixed_arith_modes[i].name, 3);
2700 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2701 "ssdiv", fixed_arith_modes[i].name, 3);
2702 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2703 "usdiv", fixed_arith_modes[i].name, 3);
2704 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2705 "neg", fixed_arith_modes[i].name, 2);
2706 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2707 "ssneg", fixed_arith_modes[i].name, 2);
2708 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2709 "usneg", fixed_arith_modes[i].name, 2);
2710 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2711 "ashl", fixed_arith_modes[i].name, 3);
2712 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2713 "ashr", fixed_arith_modes[i].name, 3);
2714 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2715 "lshr", fixed_arith_modes[i].name, 3);
2716 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2717 "ssashl", fixed_arith_modes[i].name, 3);
2718 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2719 "usashl", fixed_arith_modes[i].name, 3);
2720 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2721 "cmp", fixed_arith_modes[i].name, 2);
2722 }
2723
2724 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2725 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2726 {
2727 if (i == j
2728 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2729 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2730 continue;
2731
2732 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2733 fixed_conv_modes[j].mode, "fract",
2734 fixed_conv_modes[i].name,
2735 fixed_conv_modes[j].name);
2736 arm_set_fixed_conv_libfunc (satfract_optab,
2737 fixed_conv_modes[i].mode,
2738 fixed_conv_modes[j].mode, "satfract",
2739 fixed_conv_modes[i].name,
2740 fixed_conv_modes[j].name);
2741 arm_set_fixed_conv_libfunc (fractuns_optab,
2742 fixed_conv_modes[i].mode,
2743 fixed_conv_modes[j].mode, "fractuns",
2744 fixed_conv_modes[i].name,
2745 fixed_conv_modes[j].name);
2746 arm_set_fixed_conv_libfunc (satfractuns_optab,
2747 fixed_conv_modes[i].mode,
2748 fixed_conv_modes[j].mode, "satfractuns",
2749 fixed_conv_modes[i].name,
2750 fixed_conv_modes[j].name);
2751 }
2752 }
2753
2754 if (TARGET_AAPCS_BASED)
2755 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2756 }
2757
2758 /* On AAPCS systems, this is the "struct __va_list". */
2759 static GTY(()) tree va_list_type;
2760
2761 /* Return the type to use as __builtin_va_list. */
2762 static tree
2763 arm_build_builtin_va_list (void)
2764 {
2765 tree va_list_name;
2766 tree ap_field;
2767
2768 if (!TARGET_AAPCS_BASED)
2769 return std_build_builtin_va_list ();
2770
2771 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2772 defined as:
2773
2774 struct __va_list
2775 {
2776 void *__ap;
2777 };
2778
2779 The C Library ABI further reinforces this definition in \S
2780 4.1.
2781
2782 We must follow this definition exactly. The structure tag
2783 name is visible in C++ mangled names, and thus forms a part
2784 of the ABI. The field name may be used by people who
2785 #include <stdarg.h>. */
2786 /* Create the type. */
2787 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2788 /* Give it the required name. */
2789 va_list_name = build_decl (BUILTINS_LOCATION,
2790 TYPE_DECL,
2791 get_identifier ("__va_list"),
2792 va_list_type);
2793 DECL_ARTIFICIAL (va_list_name) = 1;
2794 TYPE_NAME (va_list_type) = va_list_name;
2795 TYPE_STUB_DECL (va_list_type) = va_list_name;
2796 /* Create the __ap field. */
2797 ap_field = build_decl (BUILTINS_LOCATION,
2798 FIELD_DECL,
2799 get_identifier ("__ap"),
2800 ptr_type_node);
2801 DECL_ARTIFICIAL (ap_field) = 1;
2802 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2803 TYPE_FIELDS (va_list_type) = ap_field;
2804 /* Compute its layout. */
2805 layout_type (va_list_type);
2806
2807 return va_list_type;
2808 }
2809
2810 /* Return an expression of type "void *" pointing to the next
2811 available argument in a variable-argument list. VALIST is the
2812 user-level va_list object, of type __builtin_va_list. */
2813 static tree
2814 arm_extract_valist_ptr (tree valist)
2815 {
2816 if (TREE_TYPE (valist) == error_mark_node)
2817 return error_mark_node;
2818
2819 /* On an AAPCS target, the pointer is stored within "struct
2820 va_list". */
2821 if (TARGET_AAPCS_BASED)
2822 {
2823 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2824 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2825 valist, ap_field, NULL_TREE);
2826 }
2827
2828 return valist;
2829 }
2830
2831 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2832 static void
2833 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2834 {
2835 valist = arm_extract_valist_ptr (valist);
2836 std_expand_builtin_va_start (valist, nextarg);
2837 }
2838
2839 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2840 static tree
2841 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2842 gimple_seq *post_p)
2843 {
2844 valist = arm_extract_valist_ptr (valist);
2845 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2846 }
2847
2848 /* Check any incompatible options that the user has specified. */
2849 static void
2850 arm_option_check_internal (struct gcc_options *opts)
2851 {
2852 int flags = opts->x_target_flags;
2853
2854 /* iWMMXt and NEON are incompatible. */
2855 if (TARGET_IWMMXT
2856 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2857 error ("iWMMXt and NEON are incompatible");
2858
2859 /* Make sure that the processor choice does not conflict with any of the
2860 other command line choices. */
2861 if (TARGET_ARM_P (flags)
2862 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2863 error ("target CPU does not support ARM mode");
2864
2865 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2866 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2867 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2868
2869 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2870 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2871
2872 /* If this target is normally configured to use APCS frames, warn if they
2873 are turned off and debugging is turned on. */
2874 if (TARGET_ARM_P (flags)
2875 && write_symbols != NO_DEBUG
2876 && !TARGET_APCS_FRAME
2877 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2878 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2879
2880 /* iWMMXt unsupported under Thumb mode. */
2881 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2882 error ("iWMMXt unsupported under Thumb mode");
2883
2884 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2885 error ("can not use -mtp=cp15 with 16-bit Thumb");
2886
2887 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2888 {
2889 error ("RTP PIC is incompatible with Thumb");
2890 flag_pic = 0;
2891 }
2892
2893 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2894 with MOVT. */
2895 if ((target_pure_code || target_slow_flash_data)
2896 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2897 {
2898 const char *flag = (target_pure_code ? "-mpure-code" :
2899 "-mslow-flash-data");
2900 error ("%s only supports non-pic code on M-profile targets with the "
2901 "MOVT instruction", flag);
2902 }
2903
2904 }
2905
2906 /* Recompute the global settings depending on target attribute options. */
2907
2908 static void
2909 arm_option_params_internal (void)
2910 {
2911 /* If we are not using the default (ARM mode) section anchor offset
2912 ranges, then set the correct ranges now. */
2913 if (TARGET_THUMB1)
2914 {
2915 /* Thumb-1 LDR instructions cannot have negative offsets.
2916 Permissible positive offset ranges are 5-bit (for byte loads),
2917 6-bit (for halfword loads), or 7-bit (for word loads).
2918 Empirical results suggest a 7-bit anchor range gives the best
2919 overall code size. */
2920 targetm.min_anchor_offset = 0;
2921 targetm.max_anchor_offset = 127;
2922 }
2923 else if (TARGET_THUMB2)
2924 {
2925 /* The minimum is set such that the total size of the block
2926 for a particular anchor is 248 + 1 + 4095 bytes, which is
2927 divisible by eight, ensuring natural spacing of anchors. */
2928 targetm.min_anchor_offset = -248;
2929 targetm.max_anchor_offset = 4095;
2930 }
2931 else
2932 {
2933 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2934 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2935 }
2936
2937 /* Increase the number of conditional instructions with -Os. */
2938 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2939
2940 /* For THUMB2, we limit the conditional sequence to one IT block. */
2941 if (TARGET_THUMB2)
2942 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2943 }
2944
2945 /* True if -mflip-thumb should next add an attribute for the default
2946 mode, false if it should next add an attribute for the opposite mode. */
2947 static GTY(()) bool thumb_flipper;
2948
2949 /* Options after initial target override. */
2950 static GTY(()) tree init_optimize;
2951
2952 static void
2953 arm_override_options_after_change_1 (struct gcc_options *opts)
2954 {
2955 if (opts->x_align_functions <= 0)
2956 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2957 && opts->x_optimize_size ? 2 : 4;
2958 }
2959
2960 /* Implement targetm.override_options_after_change. */
2961
2962 static void
2963 arm_override_options_after_change (void)
2964 {
2965 arm_configure_build_target (&arm_active_target,
2966 TREE_TARGET_OPTION (target_option_default_node),
2967 &global_options_set, false);
2968
2969 arm_override_options_after_change_1 (&global_options);
2970 }
2971
2972 /* Implement TARGET_OPTION_SAVE. */
2973 static void
2974 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2975 {
2976 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2977 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2978 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2979 }
2980
2981 /* Implement TARGET_OPTION_RESTORE. */
2982 static void
2983 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2984 {
2985 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2986 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2987 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2988 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2989 false);
2990 }
2991
2992 /* Reset options between modes that the user has specified. */
2993 static void
2994 arm_option_override_internal (struct gcc_options *opts,
2995 struct gcc_options *opts_set)
2996 {
2997 arm_override_options_after_change_1 (opts);
2998
2999 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3000 {
3001 /* The default is to enable interworking, so this warning message would
3002 be confusing to users who have just compiled with, eg, -march=armv3. */
3003 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3004 opts->x_target_flags &= ~MASK_INTERWORK;
3005 }
3006
3007 if (TARGET_THUMB_P (opts->x_target_flags)
3008 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3009 {
3010 warning (0, "target CPU does not support THUMB instructions");
3011 opts->x_target_flags &= ~MASK_THUMB;
3012 }
3013
3014 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3015 {
3016 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3017 opts->x_target_flags &= ~MASK_APCS_FRAME;
3018 }
3019
3020 /* Callee super interworking implies thumb interworking. Adding
3021 this to the flags here simplifies the logic elsewhere. */
3022 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3023 opts->x_target_flags |= MASK_INTERWORK;
3024
3025 /* need to remember initial values so combinaisons of options like
3026 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3027 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3028
3029 if (! opts_set->x_arm_restrict_it)
3030 opts->x_arm_restrict_it = arm_arch8;
3031
3032 /* ARM execution state and M profile don't have [restrict] IT. */
3033 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3034 opts->x_arm_restrict_it = 0;
3035
3036 /* Enable -munaligned-access by default for
3037 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3038 i.e. Thumb2 and ARM state only.
3039 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3040 - ARMv8 architecture-base processors.
3041
3042 Disable -munaligned-access by default for
3043 - all pre-ARMv6 architecture-based processors
3044 - ARMv6-M architecture-based processors
3045 - ARMv8-M Baseline processors. */
3046
3047 if (! opts_set->x_unaligned_access)
3048 {
3049 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3050 && arm_arch6 && (arm_arch_notm || arm_arch7));
3051 }
3052 else if (opts->x_unaligned_access == 1
3053 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3054 {
3055 warning (0, "target CPU does not support unaligned accesses");
3056 opts->x_unaligned_access = 0;
3057 }
3058
3059 /* Don't warn since it's on by default in -O2. */
3060 if (TARGET_THUMB1_P (opts->x_target_flags))
3061 opts->x_flag_schedule_insns = 0;
3062 else
3063 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3064
3065 /* Disable shrink-wrap when optimizing function for size, since it tends to
3066 generate additional returns. */
3067 if (optimize_function_for_size_p (cfun)
3068 && TARGET_THUMB2_P (opts->x_target_flags))
3069 opts->x_flag_shrink_wrap = false;
3070 else
3071 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3072
3073 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3074 - epilogue_insns - does not accurately model the corresponding insns
3075 emitted in the asm file. In particular, see the comment in thumb_exit
3076 'Find out how many of the (return) argument registers we can corrupt'.
3077 As a consequence, the epilogue may clobber registers without fipa-ra
3078 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3079 TODO: Accurately model clobbers for epilogue_insns and reenable
3080 fipa-ra. */
3081 if (TARGET_THUMB1_P (opts->x_target_flags))
3082 opts->x_flag_ipa_ra = 0;
3083 else
3084 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3085
3086 /* Thumb2 inline assembly code should always use unified syntax.
3087 This will apply to ARM and Thumb1 eventually. */
3088 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3089
3090 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3091 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3092 #endif
3093 }
3094
3095 static sbitmap isa_all_fpubits;
3096 static sbitmap isa_quirkbits;
3097
3098 /* Configure a build target TARGET from the user-specified options OPTS and
3099 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3100 architecture have been specified, but the two are not identical. */
3101 void
3102 arm_configure_build_target (struct arm_build_target *target,
3103 struct cl_target_option *opts,
3104 struct gcc_options *opts_set,
3105 bool warn_compatible)
3106 {
3107 const cpu_option *arm_selected_tune = NULL;
3108 const arch_option *arm_selected_arch = NULL;
3109 const cpu_option *arm_selected_cpu = NULL;
3110 const arm_fpu_desc *arm_selected_fpu = NULL;
3111 const char *tune_opts = NULL;
3112 const char *arch_opts = NULL;
3113 const char *cpu_opts = NULL;
3114
3115 bitmap_clear (target->isa);
3116 target->core_name = NULL;
3117 target->arch_name = NULL;
3118
3119 if (opts_set->x_arm_arch_string)
3120 {
3121 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3122 "-march",
3123 opts->x_arm_arch_string);
3124 arch_opts = strchr (opts->x_arm_arch_string, '+');
3125 }
3126
3127 if (opts_set->x_arm_cpu_string)
3128 {
3129 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3130 opts->x_arm_cpu_string);
3131 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3132 arm_selected_tune = arm_selected_cpu;
3133 /* If taking the tuning from -mcpu, we don't need to rescan the
3134 options for tuning. */
3135 }
3136
3137 if (opts_set->x_arm_tune_string)
3138 {
3139 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3140 opts->x_arm_tune_string);
3141 tune_opts = strchr (opts->x_arm_tune_string, '+');
3142 }
3143
3144 if (arm_selected_arch)
3145 {
3146 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3147 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3148 arch_opts);
3149
3150 if (arm_selected_cpu)
3151 {
3152 auto_sbitmap cpu_isa (isa_num_bits);
3153 auto_sbitmap isa_delta (isa_num_bits);
3154
3155 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3156 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3157 cpu_opts);
3158 bitmap_xor (isa_delta, cpu_isa, target->isa);
3159 /* Ignore any bits that are quirk bits. */
3160 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3161 /* Ignore (for now) any bits that might be set by -mfpu. */
3162 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3163
3164 if (!bitmap_empty_p (isa_delta))
3165 {
3166 if (warn_compatible)
3167 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3168 arm_selected_cpu->common.name,
3169 arm_selected_arch->common.name);
3170 /* -march wins for code generation.
3171 -mcpu wins for default tuning. */
3172 if (!arm_selected_tune)
3173 arm_selected_tune = arm_selected_cpu;
3174
3175 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3176 target->arch_name = arm_selected_arch->common.name;
3177 }
3178 else
3179 {
3180 /* Architecture and CPU are essentially the same.
3181 Prefer the CPU setting. */
3182 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3183 target->core_name = arm_selected_cpu->common.name;
3184 /* Copy the CPU's capabilities, so that we inherit the
3185 appropriate extensions and quirks. */
3186 bitmap_copy (target->isa, cpu_isa);
3187 }
3188 }
3189 else
3190 {
3191 /* Pick a CPU based on the architecture. */
3192 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3193 target->arch_name = arm_selected_arch->common.name;
3194 /* Note: target->core_name is left unset in this path. */
3195 }
3196 }
3197 else if (arm_selected_cpu)
3198 {
3199 target->core_name = arm_selected_cpu->common.name;
3200 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3201 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3202 cpu_opts);
3203 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3204 }
3205 /* If the user did not specify a processor or architecture, choose
3206 one for them. */
3207 else
3208 {
3209 const cpu_option *sel;
3210 auto_sbitmap sought_isa (isa_num_bits);
3211 bitmap_clear (sought_isa);
3212 auto_sbitmap default_isa (isa_num_bits);
3213
3214 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3215 TARGET_CPU_DEFAULT);
3216 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3217 gcc_assert (arm_selected_cpu->common.name);
3218
3219 /* RWE: All of the selection logic below (to the end of this
3220 'if' clause) looks somewhat suspect. It appears to be mostly
3221 there to support forcing thumb support when the default CPU
3222 does not have thumb (somewhat dubious in terms of what the
3223 user might be expecting). I think it should be removed once
3224 support for the pre-thumb era cores is removed. */
3225 sel = arm_selected_cpu;
3226 arm_initialize_isa (default_isa, sel->common.isa_bits);
3227 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3228 cpu_opts);
3229
3230 /* Now check to see if the user has specified any command line
3231 switches that require certain abilities from the cpu. */
3232
3233 if (TARGET_INTERWORK || TARGET_THUMB)
3234 {
3235 bitmap_set_bit (sought_isa, isa_bit_thumb);
3236 bitmap_set_bit (sought_isa, isa_bit_mode32);
3237
3238 /* There are no ARM processors that support both APCS-26 and
3239 interworking. Therefore we forcibly remove MODE26 from
3240 from the isa features here (if it was set), so that the
3241 search below will always be able to find a compatible
3242 processor. */
3243 bitmap_clear_bit (default_isa, isa_bit_mode26);
3244 }
3245
3246 /* If there are such requirements and the default CPU does not
3247 satisfy them, we need to run over the complete list of
3248 cores looking for one that is satisfactory. */
3249 if (!bitmap_empty_p (sought_isa)
3250 && !bitmap_subset_p (sought_isa, default_isa))
3251 {
3252 auto_sbitmap candidate_isa (isa_num_bits);
3253 /* We're only interested in a CPU with at least the
3254 capabilities of the default CPU and the required
3255 additional features. */
3256 bitmap_ior (default_isa, default_isa, sought_isa);
3257
3258 /* Try to locate a CPU type that supports all of the abilities
3259 of the default CPU, plus the extra abilities requested by
3260 the user. */
3261 for (sel = all_cores; sel->common.name != NULL; sel++)
3262 {
3263 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3264 /* An exact match? */
3265 if (bitmap_equal_p (default_isa, candidate_isa))
3266 break;
3267 }
3268
3269 if (sel->common.name == NULL)
3270 {
3271 unsigned current_bit_count = isa_num_bits;
3272 const cpu_option *best_fit = NULL;
3273
3274 /* Ideally we would like to issue an error message here
3275 saying that it was not possible to find a CPU compatible
3276 with the default CPU, but which also supports the command
3277 line options specified by the programmer, and so they
3278 ought to use the -mcpu=<name> command line option to
3279 override the default CPU type.
3280
3281 If we cannot find a CPU that has exactly the
3282 characteristics of the default CPU and the given
3283 command line options we scan the array again looking
3284 for a best match. The best match must have at least
3285 the capabilities of the perfect match. */
3286 for (sel = all_cores; sel->common.name != NULL; sel++)
3287 {
3288 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3289
3290 if (bitmap_subset_p (default_isa, candidate_isa))
3291 {
3292 unsigned count;
3293
3294 bitmap_and_compl (candidate_isa, candidate_isa,
3295 default_isa);
3296 count = bitmap_popcount (candidate_isa);
3297
3298 if (count < current_bit_count)
3299 {
3300 best_fit = sel;
3301 current_bit_count = count;
3302 }
3303 }
3304
3305 gcc_assert (best_fit);
3306 sel = best_fit;
3307 }
3308 }
3309 arm_selected_cpu = sel;
3310 }
3311
3312 /* Now we know the CPU, we can finally initialize the target
3313 structure. */
3314 target->core_name = arm_selected_cpu->common.name;
3315 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3316 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3317 cpu_opts);
3318 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3319 }
3320
3321 gcc_assert (arm_selected_cpu);
3322 gcc_assert (arm_selected_arch);
3323
3324 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3325 {
3326 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3327 auto_sbitmap fpu_bits (isa_num_bits);
3328
3329 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3330 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3331 bitmap_ior (target->isa, target->isa, fpu_bits);
3332 }
3333
3334 if (!arm_selected_tune)
3335 arm_selected_tune = arm_selected_cpu;
3336 else /* Validate the features passed to -mtune. */
3337 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3338
3339 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3340
3341 /* Finish initializing the target structure. */
3342 target->arch_pp_name = arm_selected_arch->arch;
3343 target->base_arch = arm_selected_arch->base_arch;
3344 target->profile = arm_selected_arch->profile;
3345
3346 target->tune_flags = tune_data->tune_flags;
3347 target->tune = tune_data->tune;
3348 target->tune_core = tune_data->scheduler;
3349 arm_option_reconfigure_globals ();
3350 }
3351
3352 /* Fix up any incompatible options that the user has specified. */
3353 static void
3354 arm_option_override (void)
3355 {
3356 static const enum isa_feature fpu_bitlist[]
3357 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3358 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3359 cl_target_option opts;
3360
3361 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3362 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3363
3364 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3365 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3366
3367 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3368
3369 if (!global_options_set.x_arm_fpu_index)
3370 {
3371 bool ok;
3372 int fpu_index;
3373
3374 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3375 CL_TARGET);
3376 gcc_assert (ok);
3377 arm_fpu_index = (enum fpu_type) fpu_index;
3378 }
3379
3380 cl_target_option_save (&opts, &global_options);
3381 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3382 true);
3383
3384 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3385 SUBTARGET_OVERRIDE_OPTIONS;
3386 #endif
3387
3388 /* Initialize boolean versions of the architectural flags, for use
3389 in the arm.md file and for enabling feature flags. */
3390 arm_option_reconfigure_globals ();
3391
3392 arm_tune = arm_active_target.tune_core;
3393 tune_flags = arm_active_target.tune_flags;
3394 current_tune = arm_active_target.tune;
3395
3396 /* TBD: Dwarf info for apcs frame is not handled yet. */
3397 if (TARGET_APCS_FRAME)
3398 flag_shrink_wrap = false;
3399
3400 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3401 {
3402 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3403 target_flags |= MASK_APCS_FRAME;
3404 }
3405
3406 if (TARGET_POKE_FUNCTION_NAME)
3407 target_flags |= MASK_APCS_FRAME;
3408
3409 if (TARGET_APCS_REENT && flag_pic)
3410 error ("-fpic and -mapcs-reent are incompatible");
3411
3412 if (TARGET_APCS_REENT)
3413 warning (0, "APCS reentrant code not supported. Ignored");
3414
3415 /* Set up some tuning parameters. */
3416 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3417 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3418 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3419 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3420 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3421 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3422
3423 /* For arm2/3 there is no need to do any scheduling if we are doing
3424 software floating-point. */
3425 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3426 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3427
3428 /* Override the default structure alignment for AAPCS ABI. */
3429 if (!global_options_set.x_arm_structure_size_boundary)
3430 {
3431 if (TARGET_AAPCS_BASED)
3432 arm_structure_size_boundary = 8;
3433 }
3434 else
3435 {
3436 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3437
3438 if (arm_structure_size_boundary != 8
3439 && arm_structure_size_boundary != 32
3440 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3441 {
3442 if (ARM_DOUBLEWORD_ALIGN)
3443 warning (0,
3444 "structure size boundary can only be set to 8, 32 or 64");
3445 else
3446 warning (0, "structure size boundary can only be set to 8 or 32");
3447 arm_structure_size_boundary
3448 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3449 }
3450 }
3451
3452 if (TARGET_VXWORKS_RTP)
3453 {
3454 if (!global_options_set.x_arm_pic_data_is_text_relative)
3455 arm_pic_data_is_text_relative = 0;
3456 }
3457 else if (flag_pic
3458 && !arm_pic_data_is_text_relative
3459 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3460 /* When text & data segments don't have a fixed displacement, the
3461 intended use is with a single, read only, pic base register.
3462 Unless the user explicitly requested not to do that, set
3463 it. */
3464 target_flags |= MASK_SINGLE_PIC_BASE;
3465
3466 /* If stack checking is disabled, we can use r10 as the PIC register,
3467 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3468 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3469 {
3470 if (TARGET_VXWORKS_RTP)
3471 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3472 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3473 }
3474
3475 if (flag_pic && TARGET_VXWORKS_RTP)
3476 arm_pic_register = 9;
3477
3478 if (arm_pic_register_string != NULL)
3479 {
3480 int pic_register = decode_reg_name (arm_pic_register_string);
3481
3482 if (!flag_pic)
3483 warning (0, "-mpic-register= is useless without -fpic");
3484
3485 /* Prevent the user from choosing an obviously stupid PIC register. */
3486 else if (pic_register < 0 || call_used_regs[pic_register]
3487 || pic_register == HARD_FRAME_POINTER_REGNUM
3488 || pic_register == STACK_POINTER_REGNUM
3489 || pic_register >= PC_REGNUM
3490 || (TARGET_VXWORKS_RTP
3491 && (unsigned int) pic_register != arm_pic_register))
3492 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3493 else
3494 arm_pic_register = pic_register;
3495 }
3496
3497 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3498 if (fix_cm3_ldrd == 2)
3499 {
3500 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3501 fix_cm3_ldrd = 1;
3502 else
3503 fix_cm3_ldrd = 0;
3504 }
3505
3506 /* Hot/Cold partitioning is not currently supported, since we can't
3507 handle literal pool placement in that case. */
3508 if (flag_reorder_blocks_and_partition)
3509 {
3510 inform (input_location,
3511 "-freorder-blocks-and-partition not supported on this architecture");
3512 flag_reorder_blocks_and_partition = 0;
3513 flag_reorder_blocks = 1;
3514 }
3515
3516 if (flag_pic)
3517 /* Hoisting PIC address calculations more aggressively provides a small,
3518 but measurable, size reduction for PIC code. Therefore, we decrease
3519 the bar for unrestricted expression hoisting to the cost of PIC address
3520 calculation, which is 2 instructions. */
3521 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3522 global_options.x_param_values,
3523 global_options_set.x_param_values);
3524
3525 /* ARM EABI defaults to strict volatile bitfields. */
3526 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3527 && abi_version_at_least(2))
3528 flag_strict_volatile_bitfields = 1;
3529
3530 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3531 have deemed it beneficial (signified by setting
3532 prefetch.num_slots to 1 or more). */
3533 if (flag_prefetch_loop_arrays < 0
3534 && HAVE_prefetch
3535 && optimize >= 3
3536 && current_tune->prefetch.num_slots > 0)
3537 flag_prefetch_loop_arrays = 1;
3538
3539 /* Set up parameters to be used in prefetching algorithm. Do not
3540 override the defaults unless we are tuning for a core we have
3541 researched values for. */
3542 if (current_tune->prefetch.num_slots > 0)
3543 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3544 current_tune->prefetch.num_slots,
3545 global_options.x_param_values,
3546 global_options_set.x_param_values);
3547 if (current_tune->prefetch.l1_cache_line_size >= 0)
3548 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3549 current_tune->prefetch.l1_cache_line_size,
3550 global_options.x_param_values,
3551 global_options_set.x_param_values);
3552 if (current_tune->prefetch.l1_cache_size >= 0)
3553 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3554 current_tune->prefetch.l1_cache_size,
3555 global_options.x_param_values,
3556 global_options_set.x_param_values);
3557
3558 /* Use Neon to perform 64-bits operations rather than core
3559 registers. */
3560 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3561 if (use_neon_for_64bits == 1)
3562 prefer_neon_for_64bits = true;
3563
3564 /* Use the alternative scheduling-pressure algorithm by default. */
3565 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3566 global_options.x_param_values,
3567 global_options_set.x_param_values);
3568
3569 /* Look through ready list and all of queue for instructions
3570 relevant for L2 auto-prefetcher. */
3571 int param_sched_autopref_queue_depth;
3572
3573 switch (current_tune->sched_autopref)
3574 {
3575 case tune_params::SCHED_AUTOPREF_OFF:
3576 param_sched_autopref_queue_depth = -1;
3577 break;
3578
3579 case tune_params::SCHED_AUTOPREF_RANK:
3580 param_sched_autopref_queue_depth = 0;
3581 break;
3582
3583 case tune_params::SCHED_AUTOPREF_FULL:
3584 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3585 break;
3586
3587 default:
3588 gcc_unreachable ();
3589 }
3590
3591 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3592 param_sched_autopref_queue_depth,
3593 global_options.x_param_values,
3594 global_options_set.x_param_values);
3595
3596 /* Currently, for slow flash data, we just disable literal pools. We also
3597 disable it for pure-code. */
3598 if (target_slow_flash_data || target_pure_code)
3599 arm_disable_literal_pool = true;
3600
3601 /* Disable scheduling fusion by default if it's not armv7 processor
3602 or doesn't prefer ldrd/strd. */
3603 if (flag_schedule_fusion == 2
3604 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3605 flag_schedule_fusion = 0;
3606
3607 /* Need to remember initial options before they are overriden. */
3608 init_optimize = build_optimization_node (&global_options);
3609
3610 arm_options_perform_arch_sanity_checks ();
3611 arm_option_override_internal (&global_options, &global_options_set);
3612 arm_option_check_internal (&global_options);
3613 arm_option_params_internal ();
3614
3615 /* Create the default target_options structure. */
3616 target_option_default_node = target_option_current_node
3617 = build_target_option_node (&global_options);
3618
3619 /* Register global variables with the garbage collector. */
3620 arm_add_gc_roots ();
3621
3622 /* Init initial mode for testing. */
3623 thumb_flipper = TARGET_THUMB;
3624 }
3625
3626
3627 /* Reconfigure global status flags from the active_target.isa. */
3628 void
3629 arm_option_reconfigure_globals (void)
3630 {
3631 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3632 arm_base_arch = arm_active_target.base_arch;
3633
3634 /* Initialize boolean versions of the architectural flags, for use
3635 in the arm.md file. */
3636 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3637 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3638 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3639 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3640 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3641 arm_arch5te = arm_arch5e
3642 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3643 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3644 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3645 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3646 arm_arch6m = arm_arch6 && !arm_arch_notm;
3647 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3648 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3649 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3650 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3651 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3652 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3653 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3654 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3655 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3656 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3657 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3658 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3659 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3660 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3661 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3662 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3663 if (arm_fp16_inst)
3664 {
3665 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3666 error ("selected fp16 options are incompatible");
3667 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3668 }
3669
3670 /* And finally, set up some quirks. */
3671 arm_arch_no_volatile_ce
3672 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3673 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3674 isa_bit_quirk_armv6kz);
3675
3676 /* Use the cp15 method if it is available. */
3677 if (target_thread_pointer == TP_AUTO)
3678 {
3679 if (arm_arch6k && !TARGET_THUMB1)
3680 target_thread_pointer = TP_CP15;
3681 else
3682 target_thread_pointer = TP_SOFT;
3683 }
3684 }
3685
3686 /* Perform some validation between the desired architecture and the rest of the
3687 options. */
3688 void
3689 arm_options_perform_arch_sanity_checks (void)
3690 {
3691 /* V5 code we generate is completely interworking capable, so we turn off
3692 TARGET_INTERWORK here to avoid many tests later on. */
3693
3694 /* XXX However, we must pass the right pre-processor defines to CPP
3695 or GLD can get confused. This is a hack. */
3696 if (TARGET_INTERWORK)
3697 arm_cpp_interwork = 1;
3698
3699 if (arm_arch5)
3700 target_flags &= ~MASK_INTERWORK;
3701
3702 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3703 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3704
3705 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3706 error ("iwmmxt abi requires an iwmmxt capable cpu");
3707
3708 /* BPABI targets use linker tricks to allow interworking on cores
3709 without thumb support. */
3710 if (TARGET_INTERWORK
3711 && !TARGET_BPABI
3712 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3713 {
3714 warning (0, "target CPU does not support interworking" );
3715 target_flags &= ~MASK_INTERWORK;
3716 }
3717
3718 /* If soft-float is specified then don't use FPU. */
3719 if (TARGET_SOFT_FLOAT)
3720 arm_fpu_attr = FPU_NONE;
3721 else
3722 arm_fpu_attr = FPU_VFP;
3723
3724 if (TARGET_AAPCS_BASED)
3725 {
3726 if (TARGET_CALLER_INTERWORKING)
3727 error ("AAPCS does not support -mcaller-super-interworking");
3728 else
3729 if (TARGET_CALLEE_INTERWORKING)
3730 error ("AAPCS does not support -mcallee-super-interworking");
3731 }
3732
3733 /* __fp16 support currently assumes the core has ldrh. */
3734 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3735 sorry ("__fp16 and no ldrh");
3736
3737 if (use_cmse && !arm_arch_cmse)
3738 error ("target CPU does not support ARMv8-M Security Extensions");
3739
3740 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3741 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3742 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3743 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3744
3745
3746 if (TARGET_AAPCS_BASED)
3747 {
3748 if (arm_abi == ARM_ABI_IWMMXT)
3749 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3750 else if (TARGET_HARD_FLOAT_ABI)
3751 {
3752 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3753 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3754 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3755 }
3756 else
3757 arm_pcs_default = ARM_PCS_AAPCS;
3758 }
3759 else
3760 {
3761 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3762 sorry ("-mfloat-abi=hard and VFP");
3763
3764 if (arm_abi == ARM_ABI_APCS)
3765 arm_pcs_default = ARM_PCS_APCS;
3766 else
3767 arm_pcs_default = ARM_PCS_ATPCS;
3768 }
3769 }
3770
3771 static void
3772 arm_add_gc_roots (void)
3773 {
3774 gcc_obstack_init(&minipool_obstack);
3775 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3776 }
3777 \f
3778 /* A table of known ARM exception types.
3779 For use with the interrupt function attribute. */
3780
3781 typedef struct
3782 {
3783 const char *const arg;
3784 const unsigned long return_value;
3785 }
3786 isr_attribute_arg;
3787
3788 static const isr_attribute_arg isr_attribute_args [] =
3789 {
3790 { "IRQ", ARM_FT_ISR },
3791 { "irq", ARM_FT_ISR },
3792 { "FIQ", ARM_FT_FIQ },
3793 { "fiq", ARM_FT_FIQ },
3794 { "ABORT", ARM_FT_ISR },
3795 { "abort", ARM_FT_ISR },
3796 { "ABORT", ARM_FT_ISR },
3797 { "abort", ARM_FT_ISR },
3798 { "UNDEF", ARM_FT_EXCEPTION },
3799 { "undef", ARM_FT_EXCEPTION },
3800 { "SWI", ARM_FT_EXCEPTION },
3801 { "swi", ARM_FT_EXCEPTION },
3802 { NULL, ARM_FT_NORMAL }
3803 };
3804
3805 /* Returns the (interrupt) function type of the current
3806 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3807
3808 static unsigned long
3809 arm_isr_value (tree argument)
3810 {
3811 const isr_attribute_arg * ptr;
3812 const char * arg;
3813
3814 if (!arm_arch_notm)
3815 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3816
3817 /* No argument - default to IRQ. */
3818 if (argument == NULL_TREE)
3819 return ARM_FT_ISR;
3820
3821 /* Get the value of the argument. */
3822 if (TREE_VALUE (argument) == NULL_TREE
3823 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3824 return ARM_FT_UNKNOWN;
3825
3826 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3827
3828 /* Check it against the list of known arguments. */
3829 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3830 if (streq (arg, ptr->arg))
3831 return ptr->return_value;
3832
3833 /* An unrecognized interrupt type. */
3834 return ARM_FT_UNKNOWN;
3835 }
3836
3837 /* Computes the type of the current function. */
3838
3839 static unsigned long
3840 arm_compute_func_type (void)
3841 {
3842 unsigned long type = ARM_FT_UNKNOWN;
3843 tree a;
3844 tree attr;
3845
3846 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3847
3848 /* Decide if the current function is volatile. Such functions
3849 never return, and many memory cycles can be saved by not storing
3850 register values that will never be needed again. This optimization
3851 was added to speed up context switching in a kernel application. */
3852 if (optimize > 0
3853 && (TREE_NOTHROW (current_function_decl)
3854 || !(flag_unwind_tables
3855 || (flag_exceptions
3856 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3857 && TREE_THIS_VOLATILE (current_function_decl))
3858 type |= ARM_FT_VOLATILE;
3859
3860 if (cfun->static_chain_decl != NULL)
3861 type |= ARM_FT_NESTED;
3862
3863 attr = DECL_ATTRIBUTES (current_function_decl);
3864
3865 a = lookup_attribute ("naked", attr);
3866 if (a != NULL_TREE)
3867 type |= ARM_FT_NAKED;
3868
3869 a = lookup_attribute ("isr", attr);
3870 if (a == NULL_TREE)
3871 a = lookup_attribute ("interrupt", attr);
3872
3873 if (a == NULL_TREE)
3874 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3875 else
3876 type |= arm_isr_value (TREE_VALUE (a));
3877
3878 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3879 type |= ARM_FT_CMSE_ENTRY;
3880
3881 return type;
3882 }
3883
3884 /* Returns the type of the current function. */
3885
3886 unsigned long
3887 arm_current_func_type (void)
3888 {
3889 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3890 cfun->machine->func_type = arm_compute_func_type ();
3891
3892 return cfun->machine->func_type;
3893 }
3894
3895 bool
3896 arm_allocate_stack_slots_for_args (void)
3897 {
3898 /* Naked functions should not allocate stack slots for arguments. */
3899 return !IS_NAKED (arm_current_func_type ());
3900 }
3901
3902 static bool
3903 arm_warn_func_return (tree decl)
3904 {
3905 /* Naked functions are implemented entirely in assembly, including the
3906 return sequence, so suppress warnings about this. */
3907 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3908 }
3909
3910 \f
3911 /* Output assembler code for a block containing the constant parts
3912 of a trampoline, leaving space for the variable parts.
3913
3914 On the ARM, (if r8 is the static chain regnum, and remembering that
3915 referencing pc adds an offset of 8) the trampoline looks like:
3916 ldr r8, [pc, #0]
3917 ldr pc, [pc]
3918 .word static chain value
3919 .word function's address
3920 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3921
3922 static void
3923 arm_asm_trampoline_template (FILE *f)
3924 {
3925 fprintf (f, "\t.syntax unified\n");
3926
3927 if (TARGET_ARM)
3928 {
3929 fprintf (f, "\t.arm\n");
3930 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3931 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3932 }
3933 else if (TARGET_THUMB2)
3934 {
3935 fprintf (f, "\t.thumb\n");
3936 /* The Thumb-2 trampoline is similar to the arm implementation.
3937 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3938 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3939 STATIC_CHAIN_REGNUM, PC_REGNUM);
3940 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3941 }
3942 else
3943 {
3944 ASM_OUTPUT_ALIGN (f, 2);
3945 fprintf (f, "\t.code\t16\n");
3946 fprintf (f, ".Ltrampoline_start:\n");
3947 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3948 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3949 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3950 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3951 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3952 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3953 }
3954 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3955 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3956 }
3957
3958 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3959
3960 static void
3961 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3962 {
3963 rtx fnaddr, mem, a_tramp;
3964
3965 emit_block_move (m_tramp, assemble_trampoline_template (),
3966 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3967
3968 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3969 emit_move_insn (mem, chain_value);
3970
3971 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3972 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3973 emit_move_insn (mem, fnaddr);
3974
3975 a_tramp = XEXP (m_tramp, 0);
3976 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3977 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3978 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3979 }
3980
3981 /* Thumb trampolines should be entered in thumb mode, so set
3982 the bottom bit of the address. */
3983
3984 static rtx
3985 arm_trampoline_adjust_address (rtx addr)
3986 {
3987 if (TARGET_THUMB)
3988 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3989 NULL, 0, OPTAB_LIB_WIDEN);
3990 return addr;
3991 }
3992 \f
3993 /* Return 1 if it is possible to return using a single instruction.
3994 If SIBLING is non-null, this is a test for a return before a sibling
3995 call. SIBLING is the call insn, so we can examine its register usage. */
3996
3997 int
3998 use_return_insn (int iscond, rtx sibling)
3999 {
4000 int regno;
4001 unsigned int func_type;
4002 unsigned long saved_int_regs;
4003 unsigned HOST_WIDE_INT stack_adjust;
4004 arm_stack_offsets *offsets;
4005
4006 /* Never use a return instruction before reload has run. */
4007 if (!reload_completed)
4008 return 0;
4009
4010 func_type = arm_current_func_type ();
4011
4012 /* Naked, volatile and stack alignment functions need special
4013 consideration. */
4014 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4015 return 0;
4016
4017 /* So do interrupt functions that use the frame pointer and Thumb
4018 interrupt functions. */
4019 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4020 return 0;
4021
4022 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4023 && !optimize_function_for_size_p (cfun))
4024 return 0;
4025
4026 offsets = arm_get_frame_offsets ();
4027 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4028
4029 /* As do variadic functions. */
4030 if (crtl->args.pretend_args_size
4031 || cfun->machine->uses_anonymous_args
4032 /* Or if the function calls __builtin_eh_return () */
4033 || crtl->calls_eh_return
4034 /* Or if the function calls alloca */
4035 || cfun->calls_alloca
4036 /* Or if there is a stack adjustment. However, if the stack pointer
4037 is saved on the stack, we can use a pre-incrementing stack load. */
4038 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4039 && stack_adjust == 4))
4040 /* Or if the static chain register was saved above the frame, under the
4041 assumption that the stack pointer isn't saved on the stack. */
4042 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4043 && arm_compute_static_chain_stack_bytes() != 0))
4044 return 0;
4045
4046 saved_int_regs = offsets->saved_regs_mask;
4047
4048 /* Unfortunately, the insn
4049
4050 ldmib sp, {..., sp, ...}
4051
4052 triggers a bug on most SA-110 based devices, such that the stack
4053 pointer won't be correctly restored if the instruction takes a
4054 page fault. We work around this problem by popping r3 along with
4055 the other registers, since that is never slower than executing
4056 another instruction.
4057
4058 We test for !arm_arch5 here, because code for any architecture
4059 less than this could potentially be run on one of the buggy
4060 chips. */
4061 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
4062 {
4063 /* Validate that r3 is a call-clobbered register (always true in
4064 the default abi) ... */
4065 if (!call_used_regs[3])
4066 return 0;
4067
4068 /* ... that it isn't being used for a return value ... */
4069 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4070 return 0;
4071
4072 /* ... or for a tail-call argument ... */
4073 if (sibling)
4074 {
4075 gcc_assert (CALL_P (sibling));
4076
4077 if (find_regno_fusage (sibling, USE, 3))
4078 return 0;
4079 }
4080
4081 /* ... and that there are no call-saved registers in r0-r2
4082 (always true in the default ABI). */
4083 if (saved_int_regs & 0x7)
4084 return 0;
4085 }
4086
4087 /* Can't be done if interworking with Thumb, and any registers have been
4088 stacked. */
4089 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4090 return 0;
4091
4092 /* On StrongARM, conditional returns are expensive if they aren't
4093 taken and multiple registers have been stacked. */
4094 if (iscond && arm_tune_strongarm)
4095 {
4096 /* Conditional return when just the LR is stored is a simple
4097 conditional-load instruction, that's not expensive. */
4098 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4099 return 0;
4100
4101 if (flag_pic
4102 && arm_pic_register != INVALID_REGNUM
4103 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4104 return 0;
4105 }
4106
4107 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4108 several instructions if anything needs to be popped. */
4109 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4110 return 0;
4111
4112 /* If there are saved registers but the LR isn't saved, then we need
4113 two instructions for the return. */
4114 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4115 return 0;
4116
4117 /* Can't be done if any of the VFP regs are pushed,
4118 since this also requires an insn. */
4119 if (TARGET_HARD_FLOAT)
4120 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4121 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4122 return 0;
4123
4124 if (TARGET_REALLY_IWMMXT)
4125 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4126 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4127 return 0;
4128
4129 return 1;
4130 }
4131
4132 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4133 shrink-wrapping if possible. This is the case if we need to emit a
4134 prologue, which we can test by looking at the offsets. */
4135 bool
4136 use_simple_return_p (void)
4137 {
4138 arm_stack_offsets *offsets;
4139
4140 /* Note this function can be called before or after reload. */
4141 if (!reload_completed)
4142 arm_compute_frame_layout ();
4143
4144 offsets = arm_get_frame_offsets ();
4145 return offsets->outgoing_args != 0;
4146 }
4147
4148 /* Return TRUE if int I is a valid immediate ARM constant. */
4149
4150 int
4151 const_ok_for_arm (HOST_WIDE_INT i)
4152 {
4153 int lowbit;
4154
4155 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4156 be all zero, or all one. */
4157 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4158 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4159 != ((~(unsigned HOST_WIDE_INT) 0)
4160 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4161 return FALSE;
4162
4163 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4164
4165 /* Fast return for 0 and small values. We must do this for zero, since
4166 the code below can't handle that one case. */
4167 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4168 return TRUE;
4169
4170 /* Get the number of trailing zeros. */
4171 lowbit = ffs((int) i) - 1;
4172
4173 /* Only even shifts are allowed in ARM mode so round down to the
4174 nearest even number. */
4175 if (TARGET_ARM)
4176 lowbit &= ~1;
4177
4178 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4179 return TRUE;
4180
4181 if (TARGET_ARM)
4182 {
4183 /* Allow rotated constants in ARM mode. */
4184 if (lowbit <= 4
4185 && ((i & ~0xc000003f) == 0
4186 || (i & ~0xf000000f) == 0
4187 || (i & ~0xfc000003) == 0))
4188 return TRUE;
4189 }
4190 else if (TARGET_THUMB2)
4191 {
4192 HOST_WIDE_INT v;
4193
4194 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4195 v = i & 0xff;
4196 v |= v << 16;
4197 if (i == v || i == (v | (v << 8)))
4198 return TRUE;
4199
4200 /* Allow repeated pattern 0xXY00XY00. */
4201 v = i & 0xff00;
4202 v |= v << 16;
4203 if (i == v)
4204 return TRUE;
4205 }
4206 else if (TARGET_HAVE_MOVT)
4207 {
4208 /* Thumb-1 Targets with MOVT. */
4209 if (i > 0xffff)
4210 return FALSE;
4211 else
4212 return TRUE;
4213 }
4214
4215 return FALSE;
4216 }
4217
4218 /* Return true if I is a valid constant for the operation CODE. */
4219 int
4220 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4221 {
4222 if (const_ok_for_arm (i))
4223 return 1;
4224
4225 switch (code)
4226 {
4227 case SET:
4228 /* See if we can use movw. */
4229 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4230 return 1;
4231 else
4232 /* Otherwise, try mvn. */
4233 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4234
4235 case PLUS:
4236 /* See if we can use addw or subw. */
4237 if (TARGET_THUMB2
4238 && ((i & 0xfffff000) == 0
4239 || ((-i) & 0xfffff000) == 0))
4240 return 1;
4241 /* Fall through. */
4242 case COMPARE:
4243 case EQ:
4244 case NE:
4245 case GT:
4246 case LE:
4247 case LT:
4248 case GE:
4249 case GEU:
4250 case LTU:
4251 case GTU:
4252 case LEU:
4253 case UNORDERED:
4254 case ORDERED:
4255 case UNEQ:
4256 case UNGE:
4257 case UNLT:
4258 case UNGT:
4259 case UNLE:
4260 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4261
4262 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4263 case XOR:
4264 return 0;
4265
4266 case IOR:
4267 if (TARGET_THUMB2)
4268 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4269 return 0;
4270
4271 case AND:
4272 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4273
4274 default:
4275 gcc_unreachable ();
4276 }
4277 }
4278
4279 /* Return true if I is a valid di mode constant for the operation CODE. */
4280 int
4281 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4282 {
4283 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4284 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4285 rtx hi = GEN_INT (hi_val);
4286 rtx lo = GEN_INT (lo_val);
4287
4288 if (TARGET_THUMB1)
4289 return 0;
4290
4291 switch (code)
4292 {
4293 case AND:
4294 case IOR:
4295 case XOR:
4296 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4297 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4298 case PLUS:
4299 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4300
4301 default:
4302 return 0;
4303 }
4304 }
4305
4306 /* Emit a sequence of insns to handle a large constant.
4307 CODE is the code of the operation required, it can be any of SET, PLUS,
4308 IOR, AND, XOR, MINUS;
4309 MODE is the mode in which the operation is being performed;
4310 VAL is the integer to operate on;
4311 SOURCE is the other operand (a register, or a null-pointer for SET);
4312 SUBTARGETS means it is safe to create scratch registers if that will
4313 either produce a simpler sequence, or we will want to cse the values.
4314 Return value is the number of insns emitted. */
4315
4316 /* ??? Tweak this for thumb2. */
4317 int
4318 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4319 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4320 {
4321 rtx cond;
4322
4323 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4324 cond = COND_EXEC_TEST (PATTERN (insn));
4325 else
4326 cond = NULL_RTX;
4327
4328 if (subtargets || code == SET
4329 || (REG_P (target) && REG_P (source)
4330 && REGNO (target) != REGNO (source)))
4331 {
4332 /* After arm_reorg has been called, we can't fix up expensive
4333 constants by pushing them into memory so we must synthesize
4334 them in-line, regardless of the cost. This is only likely to
4335 be more costly on chips that have load delay slots and we are
4336 compiling without running the scheduler (so no splitting
4337 occurred before the final instruction emission).
4338
4339 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4340 */
4341 if (!cfun->machine->after_arm_reorg
4342 && !cond
4343 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4344 1, 0)
4345 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4346 + (code != SET))))
4347 {
4348 if (code == SET)
4349 {
4350 /* Currently SET is the only monadic value for CODE, all
4351 the rest are diadic. */
4352 if (TARGET_USE_MOVT)
4353 arm_emit_movpair (target, GEN_INT (val));
4354 else
4355 emit_set_insn (target, GEN_INT (val));
4356
4357 return 1;
4358 }
4359 else
4360 {
4361 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4362
4363 if (TARGET_USE_MOVT)
4364 arm_emit_movpair (temp, GEN_INT (val));
4365 else
4366 emit_set_insn (temp, GEN_INT (val));
4367
4368 /* For MINUS, the value is subtracted from, since we never
4369 have subtraction of a constant. */
4370 if (code == MINUS)
4371 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4372 else
4373 emit_set_insn (target,
4374 gen_rtx_fmt_ee (code, mode, source, temp));
4375 return 2;
4376 }
4377 }
4378 }
4379
4380 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4381 1);
4382 }
4383
4384 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4385 ARM/THUMB2 immediates, and add up to VAL.
4386 Thr function return value gives the number of insns required. */
4387 static int
4388 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4389 struct four_ints *return_sequence)
4390 {
4391 int best_consecutive_zeros = 0;
4392 int i;
4393 int best_start = 0;
4394 int insns1, insns2;
4395 struct four_ints tmp_sequence;
4396
4397 /* If we aren't targeting ARM, the best place to start is always at
4398 the bottom, otherwise look more closely. */
4399 if (TARGET_ARM)
4400 {
4401 for (i = 0; i < 32; i += 2)
4402 {
4403 int consecutive_zeros = 0;
4404
4405 if (!(val & (3 << i)))
4406 {
4407 while ((i < 32) && !(val & (3 << i)))
4408 {
4409 consecutive_zeros += 2;
4410 i += 2;
4411 }
4412 if (consecutive_zeros > best_consecutive_zeros)
4413 {
4414 best_consecutive_zeros = consecutive_zeros;
4415 best_start = i - consecutive_zeros;
4416 }
4417 i -= 2;
4418 }
4419 }
4420 }
4421
4422 /* So long as it won't require any more insns to do so, it's
4423 desirable to emit a small constant (in bits 0...9) in the last
4424 insn. This way there is more chance that it can be combined with
4425 a later addressing insn to form a pre-indexed load or store
4426 operation. Consider:
4427
4428 *((volatile int *)0xe0000100) = 1;
4429 *((volatile int *)0xe0000110) = 2;
4430
4431 We want this to wind up as:
4432
4433 mov rA, #0xe0000000
4434 mov rB, #1
4435 str rB, [rA, #0x100]
4436 mov rB, #2
4437 str rB, [rA, #0x110]
4438
4439 rather than having to synthesize both large constants from scratch.
4440
4441 Therefore, we calculate how many insns would be required to emit
4442 the constant starting from `best_start', and also starting from
4443 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4444 yield a shorter sequence, we may as well use zero. */
4445 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4446 if (best_start != 0
4447 && ((HOST_WIDE_INT_1U << best_start) < val))
4448 {
4449 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4450 if (insns2 <= insns1)
4451 {
4452 *return_sequence = tmp_sequence;
4453 insns1 = insns2;
4454 }
4455 }
4456
4457 return insns1;
4458 }
4459
4460 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4461 static int
4462 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4463 struct four_ints *return_sequence, int i)
4464 {
4465 int remainder = val & 0xffffffff;
4466 int insns = 0;
4467
4468 /* Try and find a way of doing the job in either two or three
4469 instructions.
4470
4471 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4472 location. We start at position I. This may be the MSB, or
4473 optimial_immediate_sequence may have positioned it at the largest block
4474 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4475 wrapping around to the top of the word when we drop off the bottom.
4476 In the worst case this code should produce no more than four insns.
4477
4478 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4479 constants, shifted to any arbitrary location. We should always start
4480 at the MSB. */
4481 do
4482 {
4483 int end;
4484 unsigned int b1, b2, b3, b4;
4485 unsigned HOST_WIDE_INT result;
4486 int loc;
4487
4488 gcc_assert (insns < 4);
4489
4490 if (i <= 0)
4491 i += 32;
4492
4493 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4494 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4495 {
4496 loc = i;
4497 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4498 /* We can use addw/subw for the last 12 bits. */
4499 result = remainder;
4500 else
4501 {
4502 /* Use an 8-bit shifted/rotated immediate. */
4503 end = i - 8;
4504 if (end < 0)
4505 end += 32;
4506 result = remainder & ((0x0ff << end)
4507 | ((i < end) ? (0xff >> (32 - end))
4508 : 0));
4509 i -= 8;
4510 }
4511 }
4512 else
4513 {
4514 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4515 arbitrary shifts. */
4516 i -= TARGET_ARM ? 2 : 1;
4517 continue;
4518 }
4519
4520 /* Next, see if we can do a better job with a thumb2 replicated
4521 constant.
4522
4523 We do it this way around to catch the cases like 0x01F001E0 where
4524 two 8-bit immediates would work, but a replicated constant would
4525 make it worse.
4526
4527 TODO: 16-bit constants that don't clear all the bits, but still win.
4528 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4529 if (TARGET_THUMB2)
4530 {
4531 b1 = (remainder & 0xff000000) >> 24;
4532 b2 = (remainder & 0x00ff0000) >> 16;
4533 b3 = (remainder & 0x0000ff00) >> 8;
4534 b4 = remainder & 0xff;
4535
4536 if (loc > 24)
4537 {
4538 /* The 8-bit immediate already found clears b1 (and maybe b2),
4539 but must leave b3 and b4 alone. */
4540
4541 /* First try to find a 32-bit replicated constant that clears
4542 almost everything. We can assume that we can't do it in one,
4543 or else we wouldn't be here. */
4544 unsigned int tmp = b1 & b2 & b3 & b4;
4545 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4546 + (tmp << 24);
4547 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4548 + (tmp == b3) + (tmp == b4);
4549 if (tmp
4550 && (matching_bytes >= 3
4551 || (matching_bytes == 2
4552 && const_ok_for_op (remainder & ~tmp2, code))))
4553 {
4554 /* At least 3 of the bytes match, and the fourth has at
4555 least as many bits set, or two of the bytes match
4556 and it will only require one more insn to finish. */
4557 result = tmp2;
4558 i = tmp != b1 ? 32
4559 : tmp != b2 ? 24
4560 : tmp != b3 ? 16
4561 : 8;
4562 }
4563
4564 /* Second, try to find a 16-bit replicated constant that can
4565 leave three of the bytes clear. If b2 or b4 is already
4566 zero, then we can. If the 8-bit from above would not
4567 clear b2 anyway, then we still win. */
4568 else if (b1 == b3 && (!b2 || !b4
4569 || (remainder & 0x00ff0000 & ~result)))
4570 {
4571 result = remainder & 0xff00ff00;
4572 i = 24;
4573 }
4574 }
4575 else if (loc > 16)
4576 {
4577 /* The 8-bit immediate already found clears b2 (and maybe b3)
4578 and we don't get here unless b1 is alredy clear, but it will
4579 leave b4 unchanged. */
4580
4581 /* If we can clear b2 and b4 at once, then we win, since the
4582 8-bits couldn't possibly reach that far. */
4583 if (b2 == b4)
4584 {
4585 result = remainder & 0x00ff00ff;
4586 i = 16;
4587 }
4588 }
4589 }
4590
4591 return_sequence->i[insns++] = result;
4592 remainder &= ~result;
4593
4594 if (code == SET || code == MINUS)
4595 code = PLUS;
4596 }
4597 while (remainder);
4598
4599 return insns;
4600 }
4601
4602 /* Emit an instruction with the indicated PATTERN. If COND is
4603 non-NULL, conditionalize the execution of the instruction on COND
4604 being true. */
4605
4606 static void
4607 emit_constant_insn (rtx cond, rtx pattern)
4608 {
4609 if (cond)
4610 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4611 emit_insn (pattern);
4612 }
4613
4614 /* As above, but extra parameter GENERATE which, if clear, suppresses
4615 RTL generation. */
4616
4617 static int
4618 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4619 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4620 int subtargets, int generate)
4621 {
4622 int can_invert = 0;
4623 int can_negate = 0;
4624 int final_invert = 0;
4625 int i;
4626 int set_sign_bit_copies = 0;
4627 int clear_sign_bit_copies = 0;
4628 int clear_zero_bit_copies = 0;
4629 int set_zero_bit_copies = 0;
4630 int insns = 0, neg_insns, inv_insns;
4631 unsigned HOST_WIDE_INT temp1, temp2;
4632 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4633 struct four_ints *immediates;
4634 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4635
4636 /* Find out which operations are safe for a given CODE. Also do a quick
4637 check for degenerate cases; these can occur when DImode operations
4638 are split. */
4639 switch (code)
4640 {
4641 case SET:
4642 can_invert = 1;
4643 break;
4644
4645 case PLUS:
4646 can_negate = 1;
4647 break;
4648
4649 case IOR:
4650 if (remainder == 0xffffffff)
4651 {
4652 if (generate)
4653 emit_constant_insn (cond,
4654 gen_rtx_SET (target,
4655 GEN_INT (ARM_SIGN_EXTEND (val))));
4656 return 1;
4657 }
4658
4659 if (remainder == 0)
4660 {
4661 if (reload_completed && rtx_equal_p (target, source))
4662 return 0;
4663
4664 if (generate)
4665 emit_constant_insn (cond, gen_rtx_SET (target, source));
4666 return 1;
4667 }
4668 break;
4669
4670 case AND:
4671 if (remainder == 0)
4672 {
4673 if (generate)
4674 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4675 return 1;
4676 }
4677 if (remainder == 0xffffffff)
4678 {
4679 if (reload_completed && rtx_equal_p (target, source))
4680 return 0;
4681 if (generate)
4682 emit_constant_insn (cond, gen_rtx_SET (target, source));
4683 return 1;
4684 }
4685 can_invert = 1;
4686 break;
4687
4688 case XOR:
4689 if (remainder == 0)
4690 {
4691 if (reload_completed && rtx_equal_p (target, source))
4692 return 0;
4693 if (generate)
4694 emit_constant_insn (cond, gen_rtx_SET (target, source));
4695 return 1;
4696 }
4697
4698 if (remainder == 0xffffffff)
4699 {
4700 if (generate)
4701 emit_constant_insn (cond,
4702 gen_rtx_SET (target,
4703 gen_rtx_NOT (mode, source)));
4704 return 1;
4705 }
4706 final_invert = 1;
4707 break;
4708
4709 case MINUS:
4710 /* We treat MINUS as (val - source), since (source - val) is always
4711 passed as (source + (-val)). */
4712 if (remainder == 0)
4713 {
4714 if (generate)
4715 emit_constant_insn (cond,
4716 gen_rtx_SET (target,
4717 gen_rtx_NEG (mode, source)));
4718 return 1;
4719 }
4720 if (const_ok_for_arm (val))
4721 {
4722 if (generate)
4723 emit_constant_insn (cond,
4724 gen_rtx_SET (target,
4725 gen_rtx_MINUS (mode, GEN_INT (val),
4726 source)));
4727 return 1;
4728 }
4729
4730 break;
4731
4732 default:
4733 gcc_unreachable ();
4734 }
4735
4736 /* If we can do it in one insn get out quickly. */
4737 if (const_ok_for_op (val, code))
4738 {
4739 if (generate)
4740 emit_constant_insn (cond,
4741 gen_rtx_SET (target,
4742 (source
4743 ? gen_rtx_fmt_ee (code, mode, source,
4744 GEN_INT (val))
4745 : GEN_INT (val))));
4746 return 1;
4747 }
4748
4749 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4750 insn. */
4751 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4752 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4753 {
4754 if (generate)
4755 {
4756 if (mode == SImode && i == 16)
4757 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4758 smaller insn. */
4759 emit_constant_insn (cond,
4760 gen_zero_extendhisi2
4761 (target, gen_lowpart (HImode, source)));
4762 else
4763 /* Extz only supports SImode, but we can coerce the operands
4764 into that mode. */
4765 emit_constant_insn (cond,
4766 gen_extzv_t2 (gen_lowpart (SImode, target),
4767 gen_lowpart (SImode, source),
4768 GEN_INT (i), const0_rtx));
4769 }
4770
4771 return 1;
4772 }
4773
4774 /* Calculate a few attributes that may be useful for specific
4775 optimizations. */
4776 /* Count number of leading zeros. */
4777 for (i = 31; i >= 0; i--)
4778 {
4779 if ((remainder & (1 << i)) == 0)
4780 clear_sign_bit_copies++;
4781 else
4782 break;
4783 }
4784
4785 /* Count number of leading 1's. */
4786 for (i = 31; i >= 0; i--)
4787 {
4788 if ((remainder & (1 << i)) != 0)
4789 set_sign_bit_copies++;
4790 else
4791 break;
4792 }
4793
4794 /* Count number of trailing zero's. */
4795 for (i = 0; i <= 31; i++)
4796 {
4797 if ((remainder & (1 << i)) == 0)
4798 clear_zero_bit_copies++;
4799 else
4800 break;
4801 }
4802
4803 /* Count number of trailing 1's. */
4804 for (i = 0; i <= 31; i++)
4805 {
4806 if ((remainder & (1 << i)) != 0)
4807 set_zero_bit_copies++;
4808 else
4809 break;
4810 }
4811
4812 switch (code)
4813 {
4814 case SET:
4815 /* See if we can do this by sign_extending a constant that is known
4816 to be negative. This is a good, way of doing it, since the shift
4817 may well merge into a subsequent insn. */
4818 if (set_sign_bit_copies > 1)
4819 {
4820 if (const_ok_for_arm
4821 (temp1 = ARM_SIGN_EXTEND (remainder
4822 << (set_sign_bit_copies - 1))))
4823 {
4824 if (generate)
4825 {
4826 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4827 emit_constant_insn (cond,
4828 gen_rtx_SET (new_src, GEN_INT (temp1)));
4829 emit_constant_insn (cond,
4830 gen_ashrsi3 (target, new_src,
4831 GEN_INT (set_sign_bit_copies - 1)));
4832 }
4833 return 2;
4834 }
4835 /* For an inverted constant, we will need to set the low bits,
4836 these will be shifted out of harm's way. */
4837 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4838 if (const_ok_for_arm (~temp1))
4839 {
4840 if (generate)
4841 {
4842 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4843 emit_constant_insn (cond,
4844 gen_rtx_SET (new_src, GEN_INT (temp1)));
4845 emit_constant_insn (cond,
4846 gen_ashrsi3 (target, new_src,
4847 GEN_INT (set_sign_bit_copies - 1)));
4848 }
4849 return 2;
4850 }
4851 }
4852
4853 /* See if we can calculate the value as the difference between two
4854 valid immediates. */
4855 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4856 {
4857 int topshift = clear_sign_bit_copies & ~1;
4858
4859 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4860 & (0xff000000 >> topshift));
4861
4862 /* If temp1 is zero, then that means the 9 most significant
4863 bits of remainder were 1 and we've caused it to overflow.
4864 When topshift is 0 we don't need to do anything since we
4865 can borrow from 'bit 32'. */
4866 if (temp1 == 0 && topshift != 0)
4867 temp1 = 0x80000000 >> (topshift - 1);
4868
4869 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4870
4871 if (const_ok_for_arm (temp2))
4872 {
4873 if (generate)
4874 {
4875 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4876 emit_constant_insn (cond,
4877 gen_rtx_SET (new_src, GEN_INT (temp1)));
4878 emit_constant_insn (cond,
4879 gen_addsi3 (target, new_src,
4880 GEN_INT (-temp2)));
4881 }
4882
4883 return 2;
4884 }
4885 }
4886
4887 /* See if we can generate this by setting the bottom (or the top)
4888 16 bits, and then shifting these into the other half of the
4889 word. We only look for the simplest cases, to do more would cost
4890 too much. Be careful, however, not to generate this when the
4891 alternative would take fewer insns. */
4892 if (val & 0xffff0000)
4893 {
4894 temp1 = remainder & 0xffff0000;
4895 temp2 = remainder & 0x0000ffff;
4896
4897 /* Overlaps outside this range are best done using other methods. */
4898 for (i = 9; i < 24; i++)
4899 {
4900 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4901 && !const_ok_for_arm (temp2))
4902 {
4903 rtx new_src = (subtargets
4904 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4905 : target);
4906 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4907 source, subtargets, generate);
4908 source = new_src;
4909 if (generate)
4910 emit_constant_insn
4911 (cond,
4912 gen_rtx_SET
4913 (target,
4914 gen_rtx_IOR (mode,
4915 gen_rtx_ASHIFT (mode, source,
4916 GEN_INT (i)),
4917 source)));
4918 return insns + 1;
4919 }
4920 }
4921
4922 /* Don't duplicate cases already considered. */
4923 for (i = 17; i < 24; i++)
4924 {
4925 if (((temp1 | (temp1 >> i)) == remainder)
4926 && !const_ok_for_arm (temp1))
4927 {
4928 rtx new_src = (subtargets
4929 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4930 : target);
4931 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4932 source, subtargets, generate);
4933 source = new_src;
4934 if (generate)
4935 emit_constant_insn
4936 (cond,
4937 gen_rtx_SET (target,
4938 gen_rtx_IOR
4939 (mode,
4940 gen_rtx_LSHIFTRT (mode, source,
4941 GEN_INT (i)),
4942 source)));
4943 return insns + 1;
4944 }
4945 }
4946 }
4947 break;
4948
4949 case IOR:
4950 case XOR:
4951 /* If we have IOR or XOR, and the constant can be loaded in a
4952 single instruction, and we can find a temporary to put it in,
4953 then this can be done in two instructions instead of 3-4. */
4954 if (subtargets
4955 /* TARGET can't be NULL if SUBTARGETS is 0 */
4956 || (reload_completed && !reg_mentioned_p (target, source)))
4957 {
4958 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4959 {
4960 if (generate)
4961 {
4962 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4963
4964 emit_constant_insn (cond,
4965 gen_rtx_SET (sub, GEN_INT (val)));
4966 emit_constant_insn (cond,
4967 gen_rtx_SET (target,
4968 gen_rtx_fmt_ee (code, mode,
4969 source, sub)));
4970 }
4971 return 2;
4972 }
4973 }
4974
4975 if (code == XOR)
4976 break;
4977
4978 /* Convert.
4979 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4980 and the remainder 0s for e.g. 0xfff00000)
4981 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4982
4983 This can be done in 2 instructions by using shifts with mov or mvn.
4984 e.g. for
4985 x = x | 0xfff00000;
4986 we generate.
4987 mvn r0, r0, asl #12
4988 mvn r0, r0, lsr #12 */
4989 if (set_sign_bit_copies > 8
4990 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4991 {
4992 if (generate)
4993 {
4994 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4995 rtx shift = GEN_INT (set_sign_bit_copies);
4996
4997 emit_constant_insn
4998 (cond,
4999 gen_rtx_SET (sub,
5000 gen_rtx_NOT (mode,
5001 gen_rtx_ASHIFT (mode,
5002 source,
5003 shift))));
5004 emit_constant_insn
5005 (cond,
5006 gen_rtx_SET (target,
5007 gen_rtx_NOT (mode,
5008 gen_rtx_LSHIFTRT (mode, sub,
5009 shift))));
5010 }
5011 return 2;
5012 }
5013
5014 /* Convert
5015 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5016 to
5017 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5018
5019 For eg. r0 = r0 | 0xfff
5020 mvn r0, r0, lsr #12
5021 mvn r0, r0, asl #12
5022
5023 */
5024 if (set_zero_bit_copies > 8
5025 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5026 {
5027 if (generate)
5028 {
5029 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5030 rtx shift = GEN_INT (set_zero_bit_copies);
5031
5032 emit_constant_insn
5033 (cond,
5034 gen_rtx_SET (sub,
5035 gen_rtx_NOT (mode,
5036 gen_rtx_LSHIFTRT (mode,
5037 source,
5038 shift))));
5039 emit_constant_insn
5040 (cond,
5041 gen_rtx_SET (target,
5042 gen_rtx_NOT (mode,
5043 gen_rtx_ASHIFT (mode, sub,
5044 shift))));
5045 }
5046 return 2;
5047 }
5048
5049 /* This will never be reached for Thumb2 because orn is a valid
5050 instruction. This is for Thumb1 and the ARM 32 bit cases.
5051
5052 x = y | constant (such that ~constant is a valid constant)
5053 Transform this to
5054 x = ~(~y & ~constant).
5055 */
5056 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5057 {
5058 if (generate)
5059 {
5060 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5061 emit_constant_insn (cond,
5062 gen_rtx_SET (sub,
5063 gen_rtx_NOT (mode, source)));
5064 source = sub;
5065 if (subtargets)
5066 sub = gen_reg_rtx (mode);
5067 emit_constant_insn (cond,
5068 gen_rtx_SET (sub,
5069 gen_rtx_AND (mode, source,
5070 GEN_INT (temp1))));
5071 emit_constant_insn (cond,
5072 gen_rtx_SET (target,
5073 gen_rtx_NOT (mode, sub)));
5074 }
5075 return 3;
5076 }
5077 break;
5078
5079 case AND:
5080 /* See if two shifts will do 2 or more insn's worth of work. */
5081 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5082 {
5083 HOST_WIDE_INT shift_mask = ((0xffffffff
5084 << (32 - clear_sign_bit_copies))
5085 & 0xffffffff);
5086
5087 if ((remainder | shift_mask) != 0xffffffff)
5088 {
5089 HOST_WIDE_INT new_val
5090 = ARM_SIGN_EXTEND (remainder | shift_mask);
5091
5092 if (generate)
5093 {
5094 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5095 insns = arm_gen_constant (AND, SImode, cond, new_val,
5096 new_src, source, subtargets, 1);
5097 source = new_src;
5098 }
5099 else
5100 {
5101 rtx targ = subtargets ? NULL_RTX : target;
5102 insns = arm_gen_constant (AND, mode, cond, new_val,
5103 targ, source, subtargets, 0);
5104 }
5105 }
5106
5107 if (generate)
5108 {
5109 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5110 rtx shift = GEN_INT (clear_sign_bit_copies);
5111
5112 emit_insn (gen_ashlsi3 (new_src, source, shift));
5113 emit_insn (gen_lshrsi3 (target, new_src, shift));
5114 }
5115
5116 return insns + 2;
5117 }
5118
5119 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5120 {
5121 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5122
5123 if ((remainder | shift_mask) != 0xffffffff)
5124 {
5125 HOST_WIDE_INT new_val
5126 = ARM_SIGN_EXTEND (remainder | shift_mask);
5127 if (generate)
5128 {
5129 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5130
5131 insns = arm_gen_constant (AND, mode, cond, new_val,
5132 new_src, source, subtargets, 1);
5133 source = new_src;
5134 }
5135 else
5136 {
5137 rtx targ = subtargets ? NULL_RTX : target;
5138
5139 insns = arm_gen_constant (AND, mode, cond, new_val,
5140 targ, source, subtargets, 0);
5141 }
5142 }
5143
5144 if (generate)
5145 {
5146 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5147 rtx shift = GEN_INT (clear_zero_bit_copies);
5148
5149 emit_insn (gen_lshrsi3 (new_src, source, shift));
5150 emit_insn (gen_ashlsi3 (target, new_src, shift));
5151 }
5152
5153 return insns + 2;
5154 }
5155
5156 break;
5157
5158 default:
5159 break;
5160 }
5161
5162 /* Calculate what the instruction sequences would be if we generated it
5163 normally, negated, or inverted. */
5164 if (code == AND)
5165 /* AND cannot be split into multiple insns, so invert and use BIC. */
5166 insns = 99;
5167 else
5168 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5169
5170 if (can_negate)
5171 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5172 &neg_immediates);
5173 else
5174 neg_insns = 99;
5175
5176 if (can_invert || final_invert)
5177 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5178 &inv_immediates);
5179 else
5180 inv_insns = 99;
5181
5182 immediates = &pos_immediates;
5183
5184 /* Is the negated immediate sequence more efficient? */
5185 if (neg_insns < insns && neg_insns <= inv_insns)
5186 {
5187 insns = neg_insns;
5188 immediates = &neg_immediates;
5189 }
5190 else
5191 can_negate = 0;
5192
5193 /* Is the inverted immediate sequence more efficient?
5194 We must allow for an extra NOT instruction for XOR operations, although
5195 there is some chance that the final 'mvn' will get optimized later. */
5196 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5197 {
5198 insns = inv_insns;
5199 immediates = &inv_immediates;
5200 }
5201 else
5202 {
5203 can_invert = 0;
5204 final_invert = 0;
5205 }
5206
5207 /* Now output the chosen sequence as instructions. */
5208 if (generate)
5209 {
5210 for (i = 0; i < insns; i++)
5211 {
5212 rtx new_src, temp1_rtx;
5213
5214 temp1 = immediates->i[i];
5215
5216 if (code == SET || code == MINUS)
5217 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5218 else if ((final_invert || i < (insns - 1)) && subtargets)
5219 new_src = gen_reg_rtx (mode);
5220 else
5221 new_src = target;
5222
5223 if (can_invert)
5224 temp1 = ~temp1;
5225 else if (can_negate)
5226 temp1 = -temp1;
5227
5228 temp1 = trunc_int_for_mode (temp1, mode);
5229 temp1_rtx = GEN_INT (temp1);
5230
5231 if (code == SET)
5232 ;
5233 else if (code == MINUS)
5234 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5235 else
5236 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5237
5238 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5239 source = new_src;
5240
5241 if (code == SET)
5242 {
5243 can_negate = can_invert;
5244 can_invert = 0;
5245 code = PLUS;
5246 }
5247 else if (code == MINUS)
5248 code = PLUS;
5249 }
5250 }
5251
5252 if (final_invert)
5253 {
5254 if (generate)
5255 emit_constant_insn (cond, gen_rtx_SET (target,
5256 gen_rtx_NOT (mode, source)));
5257 insns++;
5258 }
5259
5260 return insns;
5261 }
5262
5263 /* Canonicalize a comparison so that we are more likely to recognize it.
5264 This can be done for a few constant compares, where we can make the
5265 immediate value easier to load. */
5266
5267 static void
5268 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5269 bool op0_preserve_value)
5270 {
5271 machine_mode mode;
5272 unsigned HOST_WIDE_INT i, maxval;
5273
5274 mode = GET_MODE (*op0);
5275 if (mode == VOIDmode)
5276 mode = GET_MODE (*op1);
5277
5278 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5279
5280 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5281 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5282 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5283 for GTU/LEU in Thumb mode. */
5284 if (mode == DImode)
5285 {
5286
5287 if (*code == GT || *code == LE
5288 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5289 {
5290 /* Missing comparison. First try to use an available
5291 comparison. */
5292 if (CONST_INT_P (*op1))
5293 {
5294 i = INTVAL (*op1);
5295 switch (*code)
5296 {
5297 case GT:
5298 case LE:
5299 if (i != maxval
5300 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5301 {
5302 *op1 = GEN_INT (i + 1);
5303 *code = *code == GT ? GE : LT;
5304 return;
5305 }
5306 break;
5307 case GTU:
5308 case LEU:
5309 if (i != ~((unsigned HOST_WIDE_INT) 0)
5310 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5311 {
5312 *op1 = GEN_INT (i + 1);
5313 *code = *code == GTU ? GEU : LTU;
5314 return;
5315 }
5316 break;
5317 default:
5318 gcc_unreachable ();
5319 }
5320 }
5321
5322 /* If that did not work, reverse the condition. */
5323 if (!op0_preserve_value)
5324 {
5325 std::swap (*op0, *op1);
5326 *code = (int)swap_condition ((enum rtx_code)*code);
5327 }
5328 }
5329 return;
5330 }
5331
5332 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5333 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5334 to facilitate possible combining with a cmp into 'ands'. */
5335 if (mode == SImode
5336 && GET_CODE (*op0) == ZERO_EXTEND
5337 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5338 && GET_MODE (XEXP (*op0, 0)) == QImode
5339 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5340 && subreg_lowpart_p (XEXP (*op0, 0))
5341 && *op1 == const0_rtx)
5342 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5343 GEN_INT (255));
5344
5345 /* Comparisons smaller than DImode. Only adjust comparisons against
5346 an out-of-range constant. */
5347 if (!CONST_INT_P (*op1)
5348 || const_ok_for_arm (INTVAL (*op1))
5349 || const_ok_for_arm (- INTVAL (*op1)))
5350 return;
5351
5352 i = INTVAL (*op1);
5353
5354 switch (*code)
5355 {
5356 case EQ:
5357 case NE:
5358 return;
5359
5360 case GT:
5361 case LE:
5362 if (i != maxval
5363 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5364 {
5365 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5366 *code = *code == GT ? GE : LT;
5367 return;
5368 }
5369 break;
5370
5371 case GE:
5372 case LT:
5373 if (i != ~maxval
5374 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5375 {
5376 *op1 = GEN_INT (i - 1);
5377 *code = *code == GE ? GT : LE;
5378 return;
5379 }
5380 break;
5381
5382 case GTU:
5383 case LEU:
5384 if (i != ~((unsigned HOST_WIDE_INT) 0)
5385 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5386 {
5387 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5388 *code = *code == GTU ? GEU : LTU;
5389 return;
5390 }
5391 break;
5392
5393 case GEU:
5394 case LTU:
5395 if (i != 0
5396 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5397 {
5398 *op1 = GEN_INT (i - 1);
5399 *code = *code == GEU ? GTU : LEU;
5400 return;
5401 }
5402 break;
5403
5404 default:
5405 gcc_unreachable ();
5406 }
5407 }
5408
5409
5410 /* Define how to find the value returned by a function. */
5411
5412 static rtx
5413 arm_function_value(const_tree type, const_tree func,
5414 bool outgoing ATTRIBUTE_UNUSED)
5415 {
5416 machine_mode mode;
5417 int unsignedp ATTRIBUTE_UNUSED;
5418 rtx r ATTRIBUTE_UNUSED;
5419
5420 mode = TYPE_MODE (type);
5421
5422 if (TARGET_AAPCS_BASED)
5423 return aapcs_allocate_return_reg (mode, type, func);
5424
5425 /* Promote integer types. */
5426 if (INTEGRAL_TYPE_P (type))
5427 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5428
5429 /* Promotes small structs returned in a register to full-word size
5430 for big-endian AAPCS. */
5431 if (arm_return_in_msb (type))
5432 {
5433 HOST_WIDE_INT size = int_size_in_bytes (type);
5434 if (size % UNITS_PER_WORD != 0)
5435 {
5436 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5437 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5438 }
5439 }
5440
5441 return arm_libcall_value_1 (mode);
5442 }
5443
5444 /* libcall hashtable helpers. */
5445
5446 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5447 {
5448 static inline hashval_t hash (const rtx_def *);
5449 static inline bool equal (const rtx_def *, const rtx_def *);
5450 static inline void remove (rtx_def *);
5451 };
5452
5453 inline bool
5454 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5455 {
5456 return rtx_equal_p (p1, p2);
5457 }
5458
5459 inline hashval_t
5460 libcall_hasher::hash (const rtx_def *p1)
5461 {
5462 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5463 }
5464
5465 typedef hash_table<libcall_hasher> libcall_table_type;
5466
5467 static void
5468 add_libcall (libcall_table_type *htab, rtx libcall)
5469 {
5470 *htab->find_slot (libcall, INSERT) = libcall;
5471 }
5472
5473 static bool
5474 arm_libcall_uses_aapcs_base (const_rtx libcall)
5475 {
5476 static bool init_done = false;
5477 static libcall_table_type *libcall_htab = NULL;
5478
5479 if (!init_done)
5480 {
5481 init_done = true;
5482
5483 libcall_htab = new libcall_table_type (31);
5484 add_libcall (libcall_htab,
5485 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5486 add_libcall (libcall_htab,
5487 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5488 add_libcall (libcall_htab,
5489 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5490 add_libcall (libcall_htab,
5491 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5492
5493 add_libcall (libcall_htab,
5494 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5495 add_libcall (libcall_htab,
5496 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5497 add_libcall (libcall_htab,
5498 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5499 add_libcall (libcall_htab,
5500 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5501
5502 add_libcall (libcall_htab,
5503 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5504 add_libcall (libcall_htab,
5505 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5506 add_libcall (libcall_htab,
5507 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5508 add_libcall (libcall_htab,
5509 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5510 add_libcall (libcall_htab,
5511 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5512 add_libcall (libcall_htab,
5513 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5514 add_libcall (libcall_htab,
5515 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5516 add_libcall (libcall_htab,
5517 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5518
5519 /* Values from double-precision helper functions are returned in core
5520 registers if the selected core only supports single-precision
5521 arithmetic, even if we are using the hard-float ABI. The same is
5522 true for single-precision helpers, but we will never be using the
5523 hard-float ABI on a CPU which doesn't support single-precision
5524 operations in hardware. */
5525 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5526 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5527 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5528 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5529 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5530 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5531 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5532 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5533 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5534 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5535 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5536 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5537 SFmode));
5538 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5539 DFmode));
5540 add_libcall (libcall_htab,
5541 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5542 }
5543
5544 return libcall && libcall_htab->find (libcall) != NULL;
5545 }
5546
5547 static rtx
5548 arm_libcall_value_1 (machine_mode mode)
5549 {
5550 if (TARGET_AAPCS_BASED)
5551 return aapcs_libcall_value (mode);
5552 else if (TARGET_IWMMXT_ABI
5553 && arm_vector_mode_supported_p (mode))
5554 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5555 else
5556 return gen_rtx_REG (mode, ARG_REGISTER (1));
5557 }
5558
5559 /* Define how to find the value returned by a library function
5560 assuming the value has mode MODE. */
5561
5562 static rtx
5563 arm_libcall_value (machine_mode mode, const_rtx libcall)
5564 {
5565 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5566 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5567 {
5568 /* The following libcalls return their result in integer registers,
5569 even though they return a floating point value. */
5570 if (arm_libcall_uses_aapcs_base (libcall))
5571 return gen_rtx_REG (mode, ARG_REGISTER(1));
5572
5573 }
5574
5575 return arm_libcall_value_1 (mode);
5576 }
5577
5578 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5579
5580 static bool
5581 arm_function_value_regno_p (const unsigned int regno)
5582 {
5583 if (regno == ARG_REGISTER (1)
5584 || (TARGET_32BIT
5585 && TARGET_AAPCS_BASED
5586 && TARGET_HARD_FLOAT
5587 && regno == FIRST_VFP_REGNUM)
5588 || (TARGET_IWMMXT_ABI
5589 && regno == FIRST_IWMMXT_REGNUM))
5590 return true;
5591
5592 return false;
5593 }
5594
5595 /* Determine the amount of memory needed to store the possible return
5596 registers of an untyped call. */
5597 int
5598 arm_apply_result_size (void)
5599 {
5600 int size = 16;
5601
5602 if (TARGET_32BIT)
5603 {
5604 if (TARGET_HARD_FLOAT_ABI)
5605 size += 32;
5606 if (TARGET_IWMMXT_ABI)
5607 size += 8;
5608 }
5609
5610 return size;
5611 }
5612
5613 /* Decide whether TYPE should be returned in memory (true)
5614 or in a register (false). FNTYPE is the type of the function making
5615 the call. */
5616 static bool
5617 arm_return_in_memory (const_tree type, const_tree fntype)
5618 {
5619 HOST_WIDE_INT size;
5620
5621 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5622
5623 if (TARGET_AAPCS_BASED)
5624 {
5625 /* Simple, non-aggregate types (ie not including vectors and
5626 complex) are always returned in a register (or registers).
5627 We don't care about which register here, so we can short-cut
5628 some of the detail. */
5629 if (!AGGREGATE_TYPE_P (type)
5630 && TREE_CODE (type) != VECTOR_TYPE
5631 && TREE_CODE (type) != COMPLEX_TYPE)
5632 return false;
5633
5634 /* Any return value that is no larger than one word can be
5635 returned in r0. */
5636 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5637 return false;
5638
5639 /* Check any available co-processors to see if they accept the
5640 type as a register candidate (VFP, for example, can return
5641 some aggregates in consecutive registers). These aren't
5642 available if the call is variadic. */
5643 if (aapcs_select_return_coproc (type, fntype) >= 0)
5644 return false;
5645
5646 /* Vector values should be returned using ARM registers, not
5647 memory (unless they're over 16 bytes, which will break since
5648 we only have four call-clobbered registers to play with). */
5649 if (TREE_CODE (type) == VECTOR_TYPE)
5650 return (size < 0 || size > (4 * UNITS_PER_WORD));
5651
5652 /* The rest go in memory. */
5653 return true;
5654 }
5655
5656 if (TREE_CODE (type) == VECTOR_TYPE)
5657 return (size < 0 || size > (4 * UNITS_PER_WORD));
5658
5659 if (!AGGREGATE_TYPE_P (type) &&
5660 (TREE_CODE (type) != VECTOR_TYPE))
5661 /* All simple types are returned in registers. */
5662 return false;
5663
5664 if (arm_abi != ARM_ABI_APCS)
5665 {
5666 /* ATPCS and later return aggregate types in memory only if they are
5667 larger than a word (or are variable size). */
5668 return (size < 0 || size > UNITS_PER_WORD);
5669 }
5670
5671 /* For the arm-wince targets we choose to be compatible with Microsoft's
5672 ARM and Thumb compilers, which always return aggregates in memory. */
5673 #ifndef ARM_WINCE
5674 /* All structures/unions bigger than one word are returned in memory.
5675 Also catch the case where int_size_in_bytes returns -1. In this case
5676 the aggregate is either huge or of variable size, and in either case
5677 we will want to return it via memory and not in a register. */
5678 if (size < 0 || size > UNITS_PER_WORD)
5679 return true;
5680
5681 if (TREE_CODE (type) == RECORD_TYPE)
5682 {
5683 tree field;
5684
5685 /* For a struct the APCS says that we only return in a register
5686 if the type is 'integer like' and every addressable element
5687 has an offset of zero. For practical purposes this means
5688 that the structure can have at most one non bit-field element
5689 and that this element must be the first one in the structure. */
5690
5691 /* Find the first field, ignoring non FIELD_DECL things which will
5692 have been created by C++. */
5693 for (field = TYPE_FIELDS (type);
5694 field && TREE_CODE (field) != FIELD_DECL;
5695 field = DECL_CHAIN (field))
5696 continue;
5697
5698 if (field == NULL)
5699 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5700
5701 /* Check that the first field is valid for returning in a register. */
5702
5703 /* ... Floats are not allowed */
5704 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5705 return true;
5706
5707 /* ... Aggregates that are not themselves valid for returning in
5708 a register are not allowed. */
5709 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5710 return true;
5711
5712 /* Now check the remaining fields, if any. Only bitfields are allowed,
5713 since they are not addressable. */
5714 for (field = DECL_CHAIN (field);
5715 field;
5716 field = DECL_CHAIN (field))
5717 {
5718 if (TREE_CODE (field) != FIELD_DECL)
5719 continue;
5720
5721 if (!DECL_BIT_FIELD_TYPE (field))
5722 return true;
5723 }
5724
5725 return false;
5726 }
5727
5728 if (TREE_CODE (type) == UNION_TYPE)
5729 {
5730 tree field;
5731
5732 /* Unions can be returned in registers if every element is
5733 integral, or can be returned in an integer register. */
5734 for (field = TYPE_FIELDS (type);
5735 field;
5736 field = DECL_CHAIN (field))
5737 {
5738 if (TREE_CODE (field) != FIELD_DECL)
5739 continue;
5740
5741 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5742 return true;
5743
5744 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5745 return true;
5746 }
5747
5748 return false;
5749 }
5750 #endif /* not ARM_WINCE */
5751
5752 /* Return all other types in memory. */
5753 return true;
5754 }
5755
5756 const struct pcs_attribute_arg
5757 {
5758 const char *arg;
5759 enum arm_pcs value;
5760 } pcs_attribute_args[] =
5761 {
5762 {"aapcs", ARM_PCS_AAPCS},
5763 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5764 #if 0
5765 /* We could recognize these, but changes would be needed elsewhere
5766 * to implement them. */
5767 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5768 {"atpcs", ARM_PCS_ATPCS},
5769 {"apcs", ARM_PCS_APCS},
5770 #endif
5771 {NULL, ARM_PCS_UNKNOWN}
5772 };
5773
5774 static enum arm_pcs
5775 arm_pcs_from_attribute (tree attr)
5776 {
5777 const struct pcs_attribute_arg *ptr;
5778 const char *arg;
5779
5780 /* Get the value of the argument. */
5781 if (TREE_VALUE (attr) == NULL_TREE
5782 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5783 return ARM_PCS_UNKNOWN;
5784
5785 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5786
5787 /* Check it against the list of known arguments. */
5788 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5789 if (streq (arg, ptr->arg))
5790 return ptr->value;
5791
5792 /* An unrecognized interrupt type. */
5793 return ARM_PCS_UNKNOWN;
5794 }
5795
5796 /* Get the PCS variant to use for this call. TYPE is the function's type
5797 specification, DECL is the specific declartion. DECL may be null if
5798 the call could be indirect or if this is a library call. */
5799 static enum arm_pcs
5800 arm_get_pcs_model (const_tree type, const_tree decl)
5801 {
5802 bool user_convention = false;
5803 enum arm_pcs user_pcs = arm_pcs_default;
5804 tree attr;
5805
5806 gcc_assert (type);
5807
5808 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5809 if (attr)
5810 {
5811 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5812 user_convention = true;
5813 }
5814
5815 if (TARGET_AAPCS_BASED)
5816 {
5817 /* Detect varargs functions. These always use the base rules
5818 (no argument is ever a candidate for a co-processor
5819 register). */
5820 bool base_rules = stdarg_p (type);
5821
5822 if (user_convention)
5823 {
5824 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5825 sorry ("non-AAPCS derived PCS variant");
5826 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5827 error ("variadic functions must use the base AAPCS variant");
5828 }
5829
5830 if (base_rules)
5831 return ARM_PCS_AAPCS;
5832 else if (user_convention)
5833 return user_pcs;
5834 else if (decl && flag_unit_at_a_time)
5835 {
5836 /* Local functions never leak outside this compilation unit,
5837 so we are free to use whatever conventions are
5838 appropriate. */
5839 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5840 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5841 if (i && i->local)
5842 return ARM_PCS_AAPCS_LOCAL;
5843 }
5844 }
5845 else if (user_convention && user_pcs != arm_pcs_default)
5846 sorry ("PCS variant");
5847
5848 /* For everything else we use the target's default. */
5849 return arm_pcs_default;
5850 }
5851
5852
5853 static void
5854 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5855 const_tree fntype ATTRIBUTE_UNUSED,
5856 rtx libcall ATTRIBUTE_UNUSED,
5857 const_tree fndecl ATTRIBUTE_UNUSED)
5858 {
5859 /* Record the unallocated VFP registers. */
5860 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5861 pcum->aapcs_vfp_reg_alloc = 0;
5862 }
5863
5864 /* Walk down the type tree of TYPE counting consecutive base elements.
5865 If *MODEP is VOIDmode, then set it to the first valid floating point
5866 type. If a non-floating point type is found, or if a floating point
5867 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5868 otherwise return the count in the sub-tree. */
5869 static int
5870 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5871 {
5872 machine_mode mode;
5873 HOST_WIDE_INT size;
5874
5875 switch (TREE_CODE (type))
5876 {
5877 case REAL_TYPE:
5878 mode = TYPE_MODE (type);
5879 if (mode != DFmode && mode != SFmode && mode != HFmode)
5880 return -1;
5881
5882 if (*modep == VOIDmode)
5883 *modep = mode;
5884
5885 if (*modep == mode)
5886 return 1;
5887
5888 break;
5889
5890 case COMPLEX_TYPE:
5891 mode = TYPE_MODE (TREE_TYPE (type));
5892 if (mode != DFmode && mode != SFmode)
5893 return -1;
5894
5895 if (*modep == VOIDmode)
5896 *modep = mode;
5897
5898 if (*modep == mode)
5899 return 2;
5900
5901 break;
5902
5903 case VECTOR_TYPE:
5904 /* Use V2SImode and V4SImode as representatives of all 64-bit
5905 and 128-bit vector types, whether or not those modes are
5906 supported with the present options. */
5907 size = int_size_in_bytes (type);
5908 switch (size)
5909 {
5910 case 8:
5911 mode = V2SImode;
5912 break;
5913 case 16:
5914 mode = V4SImode;
5915 break;
5916 default:
5917 return -1;
5918 }
5919
5920 if (*modep == VOIDmode)
5921 *modep = mode;
5922
5923 /* Vector modes are considered to be opaque: two vectors are
5924 equivalent for the purposes of being homogeneous aggregates
5925 if they are the same size. */
5926 if (*modep == mode)
5927 return 1;
5928
5929 break;
5930
5931 case ARRAY_TYPE:
5932 {
5933 int count;
5934 tree index = TYPE_DOMAIN (type);
5935
5936 /* Can't handle incomplete types nor sizes that are not
5937 fixed. */
5938 if (!COMPLETE_TYPE_P (type)
5939 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5940 return -1;
5941
5942 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5943 if (count == -1
5944 || !index
5945 || !TYPE_MAX_VALUE (index)
5946 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5947 || !TYPE_MIN_VALUE (index)
5948 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5949 || count < 0)
5950 return -1;
5951
5952 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5953 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5954
5955 /* There must be no padding. */
5956 if (wi::to_wide (TYPE_SIZE (type))
5957 != count * GET_MODE_BITSIZE (*modep))
5958 return -1;
5959
5960 return count;
5961 }
5962
5963 case RECORD_TYPE:
5964 {
5965 int count = 0;
5966 int sub_count;
5967 tree field;
5968
5969 /* Can't handle incomplete types nor sizes that are not
5970 fixed. */
5971 if (!COMPLETE_TYPE_P (type)
5972 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5973 return -1;
5974
5975 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5976 {
5977 if (TREE_CODE (field) != FIELD_DECL)
5978 continue;
5979
5980 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5981 if (sub_count < 0)
5982 return -1;
5983 count += sub_count;
5984 }
5985
5986 /* There must be no padding. */
5987 if (wi::to_wide (TYPE_SIZE (type))
5988 != count * GET_MODE_BITSIZE (*modep))
5989 return -1;
5990
5991 return count;
5992 }
5993
5994 case UNION_TYPE:
5995 case QUAL_UNION_TYPE:
5996 {
5997 /* These aren't very interesting except in a degenerate case. */
5998 int count = 0;
5999 int sub_count;
6000 tree field;
6001
6002 /* Can't handle incomplete types nor sizes that are not
6003 fixed. */
6004 if (!COMPLETE_TYPE_P (type)
6005 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6006 return -1;
6007
6008 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6009 {
6010 if (TREE_CODE (field) != FIELD_DECL)
6011 continue;
6012
6013 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6014 if (sub_count < 0)
6015 return -1;
6016 count = count > sub_count ? count : sub_count;
6017 }
6018
6019 /* There must be no padding. */
6020 if (wi::to_wide (TYPE_SIZE (type))
6021 != count * GET_MODE_BITSIZE (*modep))
6022 return -1;
6023
6024 return count;
6025 }
6026
6027 default:
6028 break;
6029 }
6030
6031 return -1;
6032 }
6033
6034 /* Return true if PCS_VARIANT should use VFP registers. */
6035 static bool
6036 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6037 {
6038 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6039 {
6040 static bool seen_thumb1_vfp = false;
6041
6042 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6043 {
6044 sorry ("Thumb-1 hard-float VFP ABI");
6045 /* sorry() is not immediately fatal, so only display this once. */
6046 seen_thumb1_vfp = true;
6047 }
6048
6049 return true;
6050 }
6051
6052 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6053 return false;
6054
6055 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6056 (TARGET_VFP_DOUBLE || !is_double));
6057 }
6058
6059 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6060 suitable for passing or returning in VFP registers for the PCS
6061 variant selected. If it is, then *BASE_MODE is updated to contain
6062 a machine mode describing each element of the argument's type and
6063 *COUNT to hold the number of such elements. */
6064 static bool
6065 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6066 machine_mode mode, const_tree type,
6067 machine_mode *base_mode, int *count)
6068 {
6069 machine_mode new_mode = VOIDmode;
6070
6071 /* If we have the type information, prefer that to working things
6072 out from the mode. */
6073 if (type)
6074 {
6075 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6076
6077 if (ag_count > 0 && ag_count <= 4)
6078 *count = ag_count;
6079 else
6080 return false;
6081 }
6082 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6083 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6084 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6085 {
6086 *count = 1;
6087 new_mode = mode;
6088 }
6089 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6090 {
6091 *count = 2;
6092 new_mode = (mode == DCmode ? DFmode : SFmode);
6093 }
6094 else
6095 return false;
6096
6097
6098 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6099 return false;
6100
6101 *base_mode = new_mode;
6102 return true;
6103 }
6104
6105 static bool
6106 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6107 machine_mode mode, const_tree type)
6108 {
6109 int count ATTRIBUTE_UNUSED;
6110 machine_mode ag_mode ATTRIBUTE_UNUSED;
6111
6112 if (!use_vfp_abi (pcs_variant, false))
6113 return false;
6114 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6115 &ag_mode, &count);
6116 }
6117
6118 static bool
6119 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6120 const_tree type)
6121 {
6122 if (!use_vfp_abi (pcum->pcs_variant, false))
6123 return false;
6124
6125 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6126 &pcum->aapcs_vfp_rmode,
6127 &pcum->aapcs_vfp_rcount);
6128 }
6129
6130 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6131 for the behaviour of this function. */
6132
6133 static bool
6134 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6135 const_tree type ATTRIBUTE_UNUSED)
6136 {
6137 int rmode_size
6138 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6139 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6140 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6141 int regno;
6142
6143 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6144 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6145 {
6146 pcum->aapcs_vfp_reg_alloc = mask << regno;
6147 if (mode == BLKmode
6148 || (mode == TImode && ! TARGET_NEON)
6149 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6150 {
6151 int i;
6152 int rcount = pcum->aapcs_vfp_rcount;
6153 int rshift = shift;
6154 machine_mode rmode = pcum->aapcs_vfp_rmode;
6155 rtx par;
6156 if (!TARGET_NEON)
6157 {
6158 /* Avoid using unsupported vector modes. */
6159 if (rmode == V2SImode)
6160 rmode = DImode;
6161 else if (rmode == V4SImode)
6162 {
6163 rmode = DImode;
6164 rcount *= 2;
6165 rshift /= 2;
6166 }
6167 }
6168 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6169 for (i = 0; i < rcount; i++)
6170 {
6171 rtx tmp = gen_rtx_REG (rmode,
6172 FIRST_VFP_REGNUM + regno + i * rshift);
6173 tmp = gen_rtx_EXPR_LIST
6174 (VOIDmode, tmp,
6175 GEN_INT (i * GET_MODE_SIZE (rmode)));
6176 XVECEXP (par, 0, i) = tmp;
6177 }
6178
6179 pcum->aapcs_reg = par;
6180 }
6181 else
6182 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6183 return true;
6184 }
6185 return false;
6186 }
6187
6188 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6189 comment there for the behaviour of this function. */
6190
6191 static rtx
6192 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6193 machine_mode mode,
6194 const_tree type ATTRIBUTE_UNUSED)
6195 {
6196 if (!use_vfp_abi (pcs_variant, false))
6197 return NULL;
6198
6199 if (mode == BLKmode
6200 || (GET_MODE_CLASS (mode) == MODE_INT
6201 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6202 && !TARGET_NEON))
6203 {
6204 int count;
6205 machine_mode ag_mode;
6206 int i;
6207 rtx par;
6208 int shift;
6209
6210 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6211 &ag_mode, &count);
6212
6213 if (!TARGET_NEON)
6214 {
6215 if (ag_mode == V2SImode)
6216 ag_mode = DImode;
6217 else if (ag_mode == V4SImode)
6218 {
6219 ag_mode = DImode;
6220 count *= 2;
6221 }
6222 }
6223 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6224 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6225 for (i = 0; i < count; i++)
6226 {
6227 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6228 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6229 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6230 XVECEXP (par, 0, i) = tmp;
6231 }
6232
6233 return par;
6234 }
6235
6236 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6237 }
6238
6239 static void
6240 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6241 machine_mode mode ATTRIBUTE_UNUSED,
6242 const_tree type ATTRIBUTE_UNUSED)
6243 {
6244 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6245 pcum->aapcs_vfp_reg_alloc = 0;
6246 return;
6247 }
6248
6249 #define AAPCS_CP(X) \
6250 { \
6251 aapcs_ ## X ## _cum_init, \
6252 aapcs_ ## X ## _is_call_candidate, \
6253 aapcs_ ## X ## _allocate, \
6254 aapcs_ ## X ## _is_return_candidate, \
6255 aapcs_ ## X ## _allocate_return_reg, \
6256 aapcs_ ## X ## _advance \
6257 }
6258
6259 /* Table of co-processors that can be used to pass arguments in
6260 registers. Idealy no arugment should be a candidate for more than
6261 one co-processor table entry, but the table is processed in order
6262 and stops after the first match. If that entry then fails to put
6263 the argument into a co-processor register, the argument will go on
6264 the stack. */
6265 static struct
6266 {
6267 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6268 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6269
6270 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6271 BLKmode) is a candidate for this co-processor's registers; this
6272 function should ignore any position-dependent state in
6273 CUMULATIVE_ARGS and only use call-type dependent information. */
6274 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6275
6276 /* Return true if the argument does get a co-processor register; it
6277 should set aapcs_reg to an RTX of the register allocated as is
6278 required for a return from FUNCTION_ARG. */
6279 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6280
6281 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6282 be returned in this co-processor's registers. */
6283 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6284
6285 /* Allocate and return an RTX element to hold the return type of a call. This
6286 routine must not fail and will only be called if is_return_candidate
6287 returned true with the same parameters. */
6288 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6289
6290 /* Finish processing this argument and prepare to start processing
6291 the next one. */
6292 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6293 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6294 {
6295 AAPCS_CP(vfp)
6296 };
6297
6298 #undef AAPCS_CP
6299
6300 static int
6301 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6302 const_tree type)
6303 {
6304 int i;
6305
6306 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6307 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6308 return i;
6309
6310 return -1;
6311 }
6312
6313 static int
6314 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6315 {
6316 /* We aren't passed a decl, so we can't check that a call is local.
6317 However, it isn't clear that that would be a win anyway, since it
6318 might limit some tail-calling opportunities. */
6319 enum arm_pcs pcs_variant;
6320
6321 if (fntype)
6322 {
6323 const_tree fndecl = NULL_TREE;
6324
6325 if (TREE_CODE (fntype) == FUNCTION_DECL)
6326 {
6327 fndecl = fntype;
6328 fntype = TREE_TYPE (fntype);
6329 }
6330
6331 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6332 }
6333 else
6334 pcs_variant = arm_pcs_default;
6335
6336 if (pcs_variant != ARM_PCS_AAPCS)
6337 {
6338 int i;
6339
6340 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6341 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6342 TYPE_MODE (type),
6343 type))
6344 return i;
6345 }
6346 return -1;
6347 }
6348
6349 static rtx
6350 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6351 const_tree fntype)
6352 {
6353 /* We aren't passed a decl, so we can't check that a call is local.
6354 However, it isn't clear that that would be a win anyway, since it
6355 might limit some tail-calling opportunities. */
6356 enum arm_pcs pcs_variant;
6357 int unsignedp ATTRIBUTE_UNUSED;
6358
6359 if (fntype)
6360 {
6361 const_tree fndecl = NULL_TREE;
6362
6363 if (TREE_CODE (fntype) == FUNCTION_DECL)
6364 {
6365 fndecl = fntype;
6366 fntype = TREE_TYPE (fntype);
6367 }
6368
6369 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6370 }
6371 else
6372 pcs_variant = arm_pcs_default;
6373
6374 /* Promote integer types. */
6375 if (type && INTEGRAL_TYPE_P (type))
6376 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6377
6378 if (pcs_variant != ARM_PCS_AAPCS)
6379 {
6380 int i;
6381
6382 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6383 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6384 type))
6385 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6386 mode, type);
6387 }
6388
6389 /* Promotes small structs returned in a register to full-word size
6390 for big-endian AAPCS. */
6391 if (type && arm_return_in_msb (type))
6392 {
6393 HOST_WIDE_INT size = int_size_in_bytes (type);
6394 if (size % UNITS_PER_WORD != 0)
6395 {
6396 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6397 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6398 }
6399 }
6400
6401 return gen_rtx_REG (mode, R0_REGNUM);
6402 }
6403
6404 static rtx
6405 aapcs_libcall_value (machine_mode mode)
6406 {
6407 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6408 && GET_MODE_SIZE (mode) <= 4)
6409 mode = SImode;
6410
6411 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6412 }
6413
6414 /* Lay out a function argument using the AAPCS rules. The rule
6415 numbers referred to here are those in the AAPCS. */
6416 static void
6417 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6418 const_tree type, bool named)
6419 {
6420 int nregs, nregs2;
6421 int ncrn;
6422
6423 /* We only need to do this once per argument. */
6424 if (pcum->aapcs_arg_processed)
6425 return;
6426
6427 pcum->aapcs_arg_processed = true;
6428
6429 /* Special case: if named is false then we are handling an incoming
6430 anonymous argument which is on the stack. */
6431 if (!named)
6432 return;
6433
6434 /* Is this a potential co-processor register candidate? */
6435 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6436 {
6437 int slot = aapcs_select_call_coproc (pcum, mode, type);
6438 pcum->aapcs_cprc_slot = slot;
6439
6440 /* We don't have to apply any of the rules from part B of the
6441 preparation phase, these are handled elsewhere in the
6442 compiler. */
6443
6444 if (slot >= 0)
6445 {
6446 /* A Co-processor register candidate goes either in its own
6447 class of registers or on the stack. */
6448 if (!pcum->aapcs_cprc_failed[slot])
6449 {
6450 /* C1.cp - Try to allocate the argument to co-processor
6451 registers. */
6452 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6453 return;
6454
6455 /* C2.cp - Put the argument on the stack and note that we
6456 can't assign any more candidates in this slot. We also
6457 need to note that we have allocated stack space, so that
6458 we won't later try to split a non-cprc candidate between
6459 core registers and the stack. */
6460 pcum->aapcs_cprc_failed[slot] = true;
6461 pcum->can_split = false;
6462 }
6463
6464 /* We didn't get a register, so this argument goes on the
6465 stack. */
6466 gcc_assert (pcum->can_split == false);
6467 return;
6468 }
6469 }
6470
6471 /* C3 - For double-word aligned arguments, round the NCRN up to the
6472 next even number. */
6473 ncrn = pcum->aapcs_ncrn;
6474 if (ncrn & 1)
6475 {
6476 int res = arm_needs_doubleword_align (mode, type);
6477 /* Only warn during RTL expansion of call stmts, otherwise we would
6478 warn e.g. during gimplification even on functions that will be
6479 always inlined, and we'd warn multiple times. Don't warn when
6480 called in expand_function_start either, as we warn instead in
6481 arm_function_arg_boundary in that case. */
6482 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6483 inform (input_location, "parameter passing for argument of type "
6484 "%qT changed in GCC 7.1", type);
6485 else if (res > 0)
6486 ncrn++;
6487 }
6488
6489 nregs = ARM_NUM_REGS2(mode, type);
6490
6491 /* Sigh, this test should really assert that nregs > 0, but a GCC
6492 extension allows empty structs and then gives them empty size; it
6493 then allows such a structure to be passed by value. For some of
6494 the code below we have to pretend that such an argument has
6495 non-zero size so that we 'locate' it correctly either in
6496 registers or on the stack. */
6497 gcc_assert (nregs >= 0);
6498
6499 nregs2 = nregs ? nregs : 1;
6500
6501 /* C4 - Argument fits entirely in core registers. */
6502 if (ncrn + nregs2 <= NUM_ARG_REGS)
6503 {
6504 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6505 pcum->aapcs_next_ncrn = ncrn + nregs;
6506 return;
6507 }
6508
6509 /* C5 - Some core registers left and there are no arguments already
6510 on the stack: split this argument between the remaining core
6511 registers and the stack. */
6512 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6513 {
6514 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6515 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6516 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6517 return;
6518 }
6519
6520 /* C6 - NCRN is set to 4. */
6521 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6522
6523 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6524 return;
6525 }
6526
6527 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6528 for a call to a function whose data type is FNTYPE.
6529 For a library call, FNTYPE is NULL. */
6530 void
6531 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6532 rtx libname,
6533 tree fndecl ATTRIBUTE_UNUSED)
6534 {
6535 /* Long call handling. */
6536 if (fntype)
6537 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6538 else
6539 pcum->pcs_variant = arm_pcs_default;
6540
6541 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6542 {
6543 if (arm_libcall_uses_aapcs_base (libname))
6544 pcum->pcs_variant = ARM_PCS_AAPCS;
6545
6546 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6547 pcum->aapcs_reg = NULL_RTX;
6548 pcum->aapcs_partial = 0;
6549 pcum->aapcs_arg_processed = false;
6550 pcum->aapcs_cprc_slot = -1;
6551 pcum->can_split = true;
6552
6553 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6554 {
6555 int i;
6556
6557 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6558 {
6559 pcum->aapcs_cprc_failed[i] = false;
6560 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6561 }
6562 }
6563 return;
6564 }
6565
6566 /* Legacy ABIs */
6567
6568 /* On the ARM, the offset starts at 0. */
6569 pcum->nregs = 0;
6570 pcum->iwmmxt_nregs = 0;
6571 pcum->can_split = true;
6572
6573 /* Varargs vectors are treated the same as long long.
6574 named_count avoids having to change the way arm handles 'named' */
6575 pcum->named_count = 0;
6576 pcum->nargs = 0;
6577
6578 if (TARGET_REALLY_IWMMXT && fntype)
6579 {
6580 tree fn_arg;
6581
6582 for (fn_arg = TYPE_ARG_TYPES (fntype);
6583 fn_arg;
6584 fn_arg = TREE_CHAIN (fn_arg))
6585 pcum->named_count += 1;
6586
6587 if (! pcum->named_count)
6588 pcum->named_count = INT_MAX;
6589 }
6590 }
6591
6592 /* Return 1 if double word alignment is required for argument passing.
6593 Return -1 if double word alignment used to be required for argument
6594 passing before PR77728 ABI fix, but is not required anymore.
6595 Return 0 if double word alignment is not required and wasn't requried
6596 before either. */
6597 static int
6598 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6599 {
6600 if (!type)
6601 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6602
6603 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6604 if (!AGGREGATE_TYPE_P (type))
6605 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6606
6607 /* Array types: Use member alignment of element type. */
6608 if (TREE_CODE (type) == ARRAY_TYPE)
6609 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6610
6611 int ret = 0;
6612 /* Record/aggregate types: Use greatest member alignment of any member. */
6613 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6614 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6615 {
6616 if (TREE_CODE (field) == FIELD_DECL)
6617 return 1;
6618 else
6619 /* Before PR77728 fix, we were incorrectly considering also
6620 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6621 Make sure we can warn about that with -Wpsabi. */
6622 ret = -1;
6623 }
6624
6625 return ret;
6626 }
6627
6628
6629 /* Determine where to put an argument to a function.
6630 Value is zero to push the argument on the stack,
6631 or a hard register in which to store the argument.
6632
6633 MODE is the argument's machine mode.
6634 TYPE is the data type of the argument (as a tree).
6635 This is null for libcalls where that information may
6636 not be available.
6637 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6638 the preceding args and about the function being called.
6639 NAMED is nonzero if this argument is a named parameter
6640 (otherwise it is an extra parameter matching an ellipsis).
6641
6642 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6643 other arguments are passed on the stack. If (NAMED == 0) (which happens
6644 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6645 defined), say it is passed in the stack (function_prologue will
6646 indeed make it pass in the stack if necessary). */
6647
6648 static rtx
6649 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6650 const_tree type, bool named)
6651 {
6652 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6653 int nregs;
6654
6655 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6656 a call insn (op3 of a call_value insn). */
6657 if (mode == VOIDmode)
6658 return const0_rtx;
6659
6660 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6661 {
6662 aapcs_layout_arg (pcum, mode, type, named);
6663 return pcum->aapcs_reg;
6664 }
6665
6666 /* Varargs vectors are treated the same as long long.
6667 named_count avoids having to change the way arm handles 'named' */
6668 if (TARGET_IWMMXT_ABI
6669 && arm_vector_mode_supported_p (mode)
6670 && pcum->named_count > pcum->nargs + 1)
6671 {
6672 if (pcum->iwmmxt_nregs <= 9)
6673 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6674 else
6675 {
6676 pcum->can_split = false;
6677 return NULL_RTX;
6678 }
6679 }
6680
6681 /* Put doubleword aligned quantities in even register pairs. */
6682 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6683 {
6684 int res = arm_needs_doubleword_align (mode, type);
6685 if (res < 0 && warn_psabi)
6686 inform (input_location, "parameter passing for argument of type "
6687 "%qT changed in GCC 7.1", type);
6688 else if (res > 0)
6689 pcum->nregs++;
6690 }
6691
6692 /* Only allow splitting an arg between regs and memory if all preceding
6693 args were allocated to regs. For args passed by reference we only count
6694 the reference pointer. */
6695 if (pcum->can_split)
6696 nregs = 1;
6697 else
6698 nregs = ARM_NUM_REGS2 (mode, type);
6699
6700 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6701 return NULL_RTX;
6702
6703 return gen_rtx_REG (mode, pcum->nregs);
6704 }
6705
6706 static unsigned int
6707 arm_function_arg_boundary (machine_mode mode, const_tree type)
6708 {
6709 if (!ARM_DOUBLEWORD_ALIGN)
6710 return PARM_BOUNDARY;
6711
6712 int res = arm_needs_doubleword_align (mode, type);
6713 if (res < 0 && warn_psabi)
6714 inform (input_location, "parameter passing for argument of type %qT "
6715 "changed in GCC 7.1", type);
6716
6717 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6718 }
6719
6720 static int
6721 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6722 tree type, bool named)
6723 {
6724 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6725 int nregs = pcum->nregs;
6726
6727 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6728 {
6729 aapcs_layout_arg (pcum, mode, type, named);
6730 return pcum->aapcs_partial;
6731 }
6732
6733 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6734 return 0;
6735
6736 if (NUM_ARG_REGS > nregs
6737 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6738 && pcum->can_split)
6739 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6740
6741 return 0;
6742 }
6743
6744 /* Update the data in PCUM to advance over an argument
6745 of mode MODE and data type TYPE.
6746 (TYPE is null for libcalls where that information may not be available.) */
6747
6748 static void
6749 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6750 const_tree type, bool named)
6751 {
6752 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6753
6754 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6755 {
6756 aapcs_layout_arg (pcum, mode, type, named);
6757
6758 if (pcum->aapcs_cprc_slot >= 0)
6759 {
6760 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6761 type);
6762 pcum->aapcs_cprc_slot = -1;
6763 }
6764
6765 /* Generic stuff. */
6766 pcum->aapcs_arg_processed = false;
6767 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6768 pcum->aapcs_reg = NULL_RTX;
6769 pcum->aapcs_partial = 0;
6770 }
6771 else
6772 {
6773 pcum->nargs += 1;
6774 if (arm_vector_mode_supported_p (mode)
6775 && pcum->named_count > pcum->nargs
6776 && TARGET_IWMMXT_ABI)
6777 pcum->iwmmxt_nregs += 1;
6778 else
6779 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6780 }
6781 }
6782
6783 /* Variable sized types are passed by reference. This is a GCC
6784 extension to the ARM ABI. */
6785
6786 static bool
6787 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6788 machine_mode mode ATTRIBUTE_UNUSED,
6789 const_tree type, bool named ATTRIBUTE_UNUSED)
6790 {
6791 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6792 }
6793 \f
6794 /* Encode the current state of the #pragma [no_]long_calls. */
6795 typedef enum
6796 {
6797 OFF, /* No #pragma [no_]long_calls is in effect. */
6798 LONG, /* #pragma long_calls is in effect. */
6799 SHORT /* #pragma no_long_calls is in effect. */
6800 } arm_pragma_enum;
6801
6802 static arm_pragma_enum arm_pragma_long_calls = OFF;
6803
6804 void
6805 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6806 {
6807 arm_pragma_long_calls = LONG;
6808 }
6809
6810 void
6811 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6812 {
6813 arm_pragma_long_calls = SHORT;
6814 }
6815
6816 void
6817 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6818 {
6819 arm_pragma_long_calls = OFF;
6820 }
6821 \f
6822 /* Handle an attribute requiring a FUNCTION_DECL;
6823 arguments as in struct attribute_spec.handler. */
6824 static tree
6825 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6826 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6827 {
6828 if (TREE_CODE (*node) != FUNCTION_DECL)
6829 {
6830 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6831 name);
6832 *no_add_attrs = true;
6833 }
6834
6835 return NULL_TREE;
6836 }
6837
6838 /* Handle an "interrupt" or "isr" attribute;
6839 arguments as in struct attribute_spec.handler. */
6840 static tree
6841 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6842 bool *no_add_attrs)
6843 {
6844 if (DECL_P (*node))
6845 {
6846 if (TREE_CODE (*node) != FUNCTION_DECL)
6847 {
6848 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6849 name);
6850 *no_add_attrs = true;
6851 }
6852 /* FIXME: the argument if any is checked for type attributes;
6853 should it be checked for decl ones? */
6854 }
6855 else
6856 {
6857 if (TREE_CODE (*node) == FUNCTION_TYPE
6858 || TREE_CODE (*node) == METHOD_TYPE)
6859 {
6860 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6861 {
6862 warning (OPT_Wattributes, "%qE attribute ignored",
6863 name);
6864 *no_add_attrs = true;
6865 }
6866 }
6867 else if (TREE_CODE (*node) == POINTER_TYPE
6868 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6869 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6870 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6871 {
6872 *node = build_variant_type_copy (*node);
6873 TREE_TYPE (*node) = build_type_attribute_variant
6874 (TREE_TYPE (*node),
6875 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6876 *no_add_attrs = true;
6877 }
6878 else
6879 {
6880 /* Possibly pass this attribute on from the type to a decl. */
6881 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6882 | (int) ATTR_FLAG_FUNCTION_NEXT
6883 | (int) ATTR_FLAG_ARRAY_NEXT))
6884 {
6885 *no_add_attrs = true;
6886 return tree_cons (name, args, NULL_TREE);
6887 }
6888 else
6889 {
6890 warning (OPT_Wattributes, "%qE attribute ignored",
6891 name);
6892 }
6893 }
6894 }
6895
6896 return NULL_TREE;
6897 }
6898
6899 /* Handle a "pcs" attribute; arguments as in struct
6900 attribute_spec.handler. */
6901 static tree
6902 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6903 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6904 {
6905 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6906 {
6907 warning (OPT_Wattributes, "%qE attribute ignored", name);
6908 *no_add_attrs = true;
6909 }
6910 return NULL_TREE;
6911 }
6912
6913 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6914 /* Handle the "notshared" attribute. This attribute is another way of
6915 requesting hidden visibility. ARM's compiler supports
6916 "__declspec(notshared)"; we support the same thing via an
6917 attribute. */
6918
6919 static tree
6920 arm_handle_notshared_attribute (tree *node,
6921 tree name ATTRIBUTE_UNUSED,
6922 tree args ATTRIBUTE_UNUSED,
6923 int flags ATTRIBUTE_UNUSED,
6924 bool *no_add_attrs)
6925 {
6926 tree decl = TYPE_NAME (*node);
6927
6928 if (decl)
6929 {
6930 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6931 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6932 *no_add_attrs = false;
6933 }
6934 return NULL_TREE;
6935 }
6936 #endif
6937
6938 /* This function returns true if a function with declaration FNDECL and type
6939 FNTYPE uses the stack to pass arguments or return variables and false
6940 otherwise. This is used for functions with the attributes
6941 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6942 diagnostic messages if the stack is used. NAME is the name of the attribute
6943 used. */
6944
6945 static bool
6946 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6947 {
6948 function_args_iterator args_iter;
6949 CUMULATIVE_ARGS args_so_far_v;
6950 cumulative_args_t args_so_far;
6951 bool first_param = true;
6952 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6953
6954 /* Error out if any argument is passed on the stack. */
6955 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6956 args_so_far = pack_cumulative_args (&args_so_far_v);
6957 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6958 {
6959 rtx arg_rtx;
6960 machine_mode arg_mode = TYPE_MODE (arg_type);
6961
6962 prev_arg_type = arg_type;
6963 if (VOID_TYPE_P (arg_type))
6964 continue;
6965
6966 if (!first_param)
6967 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6968 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6969 if (!arg_rtx
6970 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6971 {
6972 error ("%qE attribute not available to functions with arguments "
6973 "passed on the stack", name);
6974 return true;
6975 }
6976 first_param = false;
6977 }
6978
6979 /* Error out for variadic functions since we cannot control how many
6980 arguments will be passed and thus stack could be used. stdarg_p () is not
6981 used for the checking to avoid browsing arguments twice. */
6982 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6983 {
6984 error ("%qE attribute not available to functions with variable number "
6985 "of arguments", name);
6986 return true;
6987 }
6988
6989 /* Error out if return value is passed on the stack. */
6990 ret_type = TREE_TYPE (fntype);
6991 if (arm_return_in_memory (ret_type, fntype))
6992 {
6993 error ("%qE attribute not available to functions that return value on "
6994 "the stack", name);
6995 return true;
6996 }
6997 return false;
6998 }
6999
7000 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7001 function will check whether the attribute is allowed here and will add the
7002 attribute to the function declaration tree or otherwise issue a warning. */
7003
7004 static tree
7005 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7006 tree /* args */,
7007 int /* flags */,
7008 bool *no_add_attrs)
7009 {
7010 tree fndecl;
7011
7012 if (!use_cmse)
7013 {
7014 *no_add_attrs = true;
7015 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7016 name);
7017 return NULL_TREE;
7018 }
7019
7020 /* Ignore attribute for function types. */
7021 if (TREE_CODE (*node) != FUNCTION_DECL)
7022 {
7023 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7024 name);
7025 *no_add_attrs = true;
7026 return NULL_TREE;
7027 }
7028
7029 fndecl = *node;
7030
7031 /* Warn for static linkage functions. */
7032 if (!TREE_PUBLIC (fndecl))
7033 {
7034 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7035 "with static linkage", name);
7036 *no_add_attrs = true;
7037 return NULL_TREE;
7038 }
7039
7040 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7041 TREE_TYPE (fndecl));
7042 return NULL_TREE;
7043 }
7044
7045
7046 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7047 function will check whether the attribute is allowed here and will add the
7048 attribute to the function type tree or otherwise issue a diagnostic. The
7049 reason we check this at declaration time is to only allow the use of the
7050 attribute with declarations of function pointers and not function
7051 declarations. This function checks NODE is of the expected type and issues
7052 diagnostics otherwise using NAME. If it is not of the expected type
7053 *NO_ADD_ATTRS will be set to true. */
7054
7055 static tree
7056 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7057 tree /* args */,
7058 int /* flags */,
7059 bool *no_add_attrs)
7060 {
7061 tree decl = NULL_TREE, fntype = NULL_TREE;
7062 tree type;
7063
7064 if (!use_cmse)
7065 {
7066 *no_add_attrs = true;
7067 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7068 name);
7069 return NULL_TREE;
7070 }
7071
7072 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7073 {
7074 decl = *node;
7075 fntype = TREE_TYPE (decl);
7076 }
7077
7078 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7079 fntype = TREE_TYPE (fntype);
7080
7081 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7082 {
7083 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7084 "function pointer", name);
7085 *no_add_attrs = true;
7086 return NULL_TREE;
7087 }
7088
7089 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7090
7091 if (*no_add_attrs)
7092 return NULL_TREE;
7093
7094 /* Prevent trees being shared among function types with and without
7095 cmse_nonsecure_call attribute. */
7096 type = TREE_TYPE (decl);
7097
7098 type = build_distinct_type_copy (type);
7099 TREE_TYPE (decl) = type;
7100 fntype = type;
7101
7102 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7103 {
7104 type = fntype;
7105 fntype = TREE_TYPE (fntype);
7106 fntype = build_distinct_type_copy (fntype);
7107 TREE_TYPE (type) = fntype;
7108 }
7109
7110 /* Construct a type attribute and add it to the function type. */
7111 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7112 TYPE_ATTRIBUTES (fntype));
7113 TYPE_ATTRIBUTES (fntype) = attrs;
7114 return NULL_TREE;
7115 }
7116
7117 /* Return 0 if the attributes for two types are incompatible, 1 if they
7118 are compatible, and 2 if they are nearly compatible (which causes a
7119 warning to be generated). */
7120 static int
7121 arm_comp_type_attributes (const_tree type1, const_tree type2)
7122 {
7123 int l1, l2, s1, s2;
7124
7125 /* Check for mismatch of non-default calling convention. */
7126 if (TREE_CODE (type1) != FUNCTION_TYPE)
7127 return 1;
7128
7129 /* Check for mismatched call attributes. */
7130 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7131 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7132 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7133 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7134
7135 /* Only bother to check if an attribute is defined. */
7136 if (l1 | l2 | s1 | s2)
7137 {
7138 /* If one type has an attribute, the other must have the same attribute. */
7139 if ((l1 != l2) || (s1 != s2))
7140 return 0;
7141
7142 /* Disallow mixed attributes. */
7143 if ((l1 & s2) || (l2 & s1))
7144 return 0;
7145 }
7146
7147 /* Check for mismatched ISR attribute. */
7148 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7149 if (! l1)
7150 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7151 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7152 if (! l2)
7153 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7154 if (l1 != l2)
7155 return 0;
7156
7157 l1 = lookup_attribute ("cmse_nonsecure_call",
7158 TYPE_ATTRIBUTES (type1)) != NULL;
7159 l2 = lookup_attribute ("cmse_nonsecure_call",
7160 TYPE_ATTRIBUTES (type2)) != NULL;
7161
7162 if (l1 != l2)
7163 return 0;
7164
7165 return 1;
7166 }
7167
7168 /* Assigns default attributes to newly defined type. This is used to
7169 set short_call/long_call attributes for function types of
7170 functions defined inside corresponding #pragma scopes. */
7171 static void
7172 arm_set_default_type_attributes (tree type)
7173 {
7174 /* Add __attribute__ ((long_call)) to all functions, when
7175 inside #pragma long_calls or __attribute__ ((short_call)),
7176 when inside #pragma no_long_calls. */
7177 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7178 {
7179 tree type_attr_list, attr_name;
7180 type_attr_list = TYPE_ATTRIBUTES (type);
7181
7182 if (arm_pragma_long_calls == LONG)
7183 attr_name = get_identifier ("long_call");
7184 else if (arm_pragma_long_calls == SHORT)
7185 attr_name = get_identifier ("short_call");
7186 else
7187 return;
7188
7189 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7190 TYPE_ATTRIBUTES (type) = type_attr_list;
7191 }
7192 }
7193 \f
7194 /* Return true if DECL is known to be linked into section SECTION. */
7195
7196 static bool
7197 arm_function_in_section_p (tree decl, section *section)
7198 {
7199 /* We can only be certain about the prevailing symbol definition. */
7200 if (!decl_binds_to_current_def_p (decl))
7201 return false;
7202
7203 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7204 if (!DECL_SECTION_NAME (decl))
7205 {
7206 /* Make sure that we will not create a unique section for DECL. */
7207 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7208 return false;
7209 }
7210
7211 return function_section (decl) == section;
7212 }
7213
7214 /* Return nonzero if a 32-bit "long_call" should be generated for
7215 a call from the current function to DECL. We generate a long_call
7216 if the function:
7217
7218 a. has an __attribute__((long call))
7219 or b. is within the scope of a #pragma long_calls
7220 or c. the -mlong-calls command line switch has been specified
7221
7222 However we do not generate a long call if the function:
7223
7224 d. has an __attribute__ ((short_call))
7225 or e. is inside the scope of a #pragma no_long_calls
7226 or f. is defined in the same section as the current function. */
7227
7228 bool
7229 arm_is_long_call_p (tree decl)
7230 {
7231 tree attrs;
7232
7233 if (!decl)
7234 return TARGET_LONG_CALLS;
7235
7236 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7237 if (lookup_attribute ("short_call", attrs))
7238 return false;
7239
7240 /* For "f", be conservative, and only cater for cases in which the
7241 whole of the current function is placed in the same section. */
7242 if (!flag_reorder_blocks_and_partition
7243 && TREE_CODE (decl) == FUNCTION_DECL
7244 && arm_function_in_section_p (decl, current_function_section ()))
7245 return false;
7246
7247 if (lookup_attribute ("long_call", attrs))
7248 return true;
7249
7250 return TARGET_LONG_CALLS;
7251 }
7252
7253 /* Return nonzero if it is ok to make a tail-call to DECL. */
7254 static bool
7255 arm_function_ok_for_sibcall (tree decl, tree exp)
7256 {
7257 unsigned long func_type;
7258
7259 if (cfun->machine->sibcall_blocked)
7260 return false;
7261
7262 /* Never tailcall something if we are generating code for Thumb-1. */
7263 if (TARGET_THUMB1)
7264 return false;
7265
7266 /* The PIC register is live on entry to VxWorks PLT entries, so we
7267 must make the call before restoring the PIC register. */
7268 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7269 return false;
7270
7271 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7272 may be used both as target of the call and base register for restoring
7273 the VFP registers */
7274 if (TARGET_APCS_FRAME && TARGET_ARM
7275 && TARGET_HARD_FLOAT
7276 && decl && arm_is_long_call_p (decl))
7277 return false;
7278
7279 /* If we are interworking and the function is not declared static
7280 then we can't tail-call it unless we know that it exists in this
7281 compilation unit (since it might be a Thumb routine). */
7282 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7283 && !TREE_ASM_WRITTEN (decl))
7284 return false;
7285
7286 func_type = arm_current_func_type ();
7287 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7288 if (IS_INTERRUPT (func_type))
7289 return false;
7290
7291 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7292 generated for entry functions themselves. */
7293 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7294 return false;
7295
7296 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7297 this would complicate matters for later code generation. */
7298 if (TREE_CODE (exp) == CALL_EXPR)
7299 {
7300 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7301 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7302 return false;
7303 }
7304
7305 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7306 {
7307 /* Check that the return value locations are the same. For
7308 example that we aren't returning a value from the sibling in
7309 a VFP register but then need to transfer it to a core
7310 register. */
7311 rtx a, b;
7312 tree decl_or_type = decl;
7313
7314 /* If it is an indirect function pointer, get the function type. */
7315 if (!decl)
7316 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7317
7318 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7319 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7320 cfun->decl, false);
7321 if (!rtx_equal_p (a, b))
7322 return false;
7323 }
7324
7325 /* Never tailcall if function may be called with a misaligned SP. */
7326 if (IS_STACKALIGN (func_type))
7327 return false;
7328
7329 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7330 references should become a NOP. Don't convert such calls into
7331 sibling calls. */
7332 if (TARGET_AAPCS_BASED
7333 && arm_abi == ARM_ABI_AAPCS
7334 && decl
7335 && DECL_WEAK (decl))
7336 return false;
7337
7338 /* We cannot do a tailcall for an indirect call by descriptor if all the
7339 argument registers are used because the only register left to load the
7340 address is IP and it will already contain the static chain. */
7341 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7342 {
7343 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7344 CUMULATIVE_ARGS cum;
7345 cumulative_args_t cum_v;
7346
7347 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7348 cum_v = pack_cumulative_args (&cum);
7349
7350 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7351 {
7352 tree type = TREE_VALUE (t);
7353 if (!VOID_TYPE_P (type))
7354 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7355 }
7356
7357 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7358 return false;
7359 }
7360
7361 /* Everything else is ok. */
7362 return true;
7363 }
7364
7365 \f
7366 /* Addressing mode support functions. */
7367
7368 /* Return nonzero if X is a legitimate immediate operand when compiling
7369 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7370 int
7371 legitimate_pic_operand_p (rtx x)
7372 {
7373 if (GET_CODE (x) == SYMBOL_REF
7374 || (GET_CODE (x) == CONST
7375 && GET_CODE (XEXP (x, 0)) == PLUS
7376 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7377 return 0;
7378
7379 return 1;
7380 }
7381
7382 /* Record that the current function needs a PIC register. Initialize
7383 cfun->machine->pic_reg if we have not already done so. */
7384
7385 static void
7386 require_pic_register (void)
7387 {
7388 /* A lot of the logic here is made obscure by the fact that this
7389 routine gets called as part of the rtx cost estimation process.
7390 We don't want those calls to affect any assumptions about the real
7391 function; and further, we can't call entry_of_function() until we
7392 start the real expansion process. */
7393 if (!crtl->uses_pic_offset_table)
7394 {
7395 gcc_assert (can_create_pseudo_p ());
7396 if (arm_pic_register != INVALID_REGNUM
7397 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7398 {
7399 if (!cfun->machine->pic_reg)
7400 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7401
7402 /* Play games to avoid marking the function as needing pic
7403 if we are being called as part of the cost-estimation
7404 process. */
7405 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7406 crtl->uses_pic_offset_table = 1;
7407 }
7408 else
7409 {
7410 rtx_insn *seq, *insn;
7411
7412 if (!cfun->machine->pic_reg)
7413 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7414
7415 /* Play games to avoid marking the function as needing pic
7416 if we are being called as part of the cost-estimation
7417 process. */
7418 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7419 {
7420 crtl->uses_pic_offset_table = 1;
7421 start_sequence ();
7422
7423 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7424 && arm_pic_register > LAST_LO_REGNUM)
7425 emit_move_insn (cfun->machine->pic_reg,
7426 gen_rtx_REG (Pmode, arm_pic_register));
7427 else
7428 arm_load_pic_register (0UL);
7429
7430 seq = get_insns ();
7431 end_sequence ();
7432
7433 for (insn = seq; insn; insn = NEXT_INSN (insn))
7434 if (INSN_P (insn))
7435 INSN_LOCATION (insn) = prologue_location;
7436
7437 /* We can be called during expansion of PHI nodes, where
7438 we can't yet emit instructions directly in the final
7439 insn stream. Queue the insns on the entry edge, they will
7440 be committed after everything else is expanded. */
7441 insert_insn_on_edge (seq,
7442 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7443 }
7444 }
7445 }
7446 }
7447
7448 rtx
7449 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7450 {
7451 if (GET_CODE (orig) == SYMBOL_REF
7452 || GET_CODE (orig) == LABEL_REF)
7453 {
7454 if (reg == 0)
7455 {
7456 gcc_assert (can_create_pseudo_p ());
7457 reg = gen_reg_rtx (Pmode);
7458 }
7459
7460 /* VxWorks does not impose a fixed gap between segments; the run-time
7461 gap can be different from the object-file gap. We therefore can't
7462 use GOTOFF unless we are absolutely sure that the symbol is in the
7463 same segment as the GOT. Unfortunately, the flexibility of linker
7464 scripts means that we can't be sure of that in general, so assume
7465 that GOTOFF is never valid on VxWorks. */
7466 /* References to weak symbols cannot be resolved locally: they
7467 may be overridden by a non-weak definition at link time. */
7468 rtx_insn *insn;
7469 if ((GET_CODE (orig) == LABEL_REF
7470 || (GET_CODE (orig) == SYMBOL_REF
7471 && SYMBOL_REF_LOCAL_P (orig)
7472 && (SYMBOL_REF_DECL (orig)
7473 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7474 && NEED_GOT_RELOC
7475 && arm_pic_data_is_text_relative)
7476 insn = arm_pic_static_addr (orig, reg);
7477 else
7478 {
7479 rtx pat;
7480 rtx mem;
7481
7482 /* If this function doesn't have a pic register, create one now. */
7483 require_pic_register ();
7484
7485 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7486
7487 /* Make the MEM as close to a constant as possible. */
7488 mem = SET_SRC (pat);
7489 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7490 MEM_READONLY_P (mem) = 1;
7491 MEM_NOTRAP_P (mem) = 1;
7492
7493 insn = emit_insn (pat);
7494 }
7495
7496 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7497 by loop. */
7498 set_unique_reg_note (insn, REG_EQUAL, orig);
7499
7500 return reg;
7501 }
7502 else if (GET_CODE (orig) == CONST)
7503 {
7504 rtx base, offset;
7505
7506 if (GET_CODE (XEXP (orig, 0)) == PLUS
7507 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7508 return orig;
7509
7510 /* Handle the case where we have: const (UNSPEC_TLS). */
7511 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7512 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7513 return orig;
7514
7515 /* Handle the case where we have:
7516 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7517 CONST_INT. */
7518 if (GET_CODE (XEXP (orig, 0)) == PLUS
7519 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7520 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7521 {
7522 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7523 return orig;
7524 }
7525
7526 if (reg == 0)
7527 {
7528 gcc_assert (can_create_pseudo_p ());
7529 reg = gen_reg_rtx (Pmode);
7530 }
7531
7532 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7533
7534 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7535 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7536 base == reg ? 0 : reg);
7537
7538 if (CONST_INT_P (offset))
7539 {
7540 /* The base register doesn't really matter, we only want to
7541 test the index for the appropriate mode. */
7542 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7543 {
7544 gcc_assert (can_create_pseudo_p ());
7545 offset = force_reg (Pmode, offset);
7546 }
7547
7548 if (CONST_INT_P (offset))
7549 return plus_constant (Pmode, base, INTVAL (offset));
7550 }
7551
7552 if (GET_MODE_SIZE (mode) > 4
7553 && (GET_MODE_CLASS (mode) == MODE_INT
7554 || TARGET_SOFT_FLOAT))
7555 {
7556 emit_insn (gen_addsi3 (reg, base, offset));
7557 return reg;
7558 }
7559
7560 return gen_rtx_PLUS (Pmode, base, offset);
7561 }
7562
7563 return orig;
7564 }
7565
7566
7567 /* Find a spare register to use during the prolog of a function. */
7568
7569 static int
7570 thumb_find_work_register (unsigned long pushed_regs_mask)
7571 {
7572 int reg;
7573
7574 /* Check the argument registers first as these are call-used. The
7575 register allocation order means that sometimes r3 might be used
7576 but earlier argument registers might not, so check them all. */
7577 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7578 if (!df_regs_ever_live_p (reg))
7579 return reg;
7580
7581 /* Before going on to check the call-saved registers we can try a couple
7582 more ways of deducing that r3 is available. The first is when we are
7583 pushing anonymous arguments onto the stack and we have less than 4
7584 registers worth of fixed arguments(*). In this case r3 will be part of
7585 the variable argument list and so we can be sure that it will be
7586 pushed right at the start of the function. Hence it will be available
7587 for the rest of the prologue.
7588 (*): ie crtl->args.pretend_args_size is greater than 0. */
7589 if (cfun->machine->uses_anonymous_args
7590 && crtl->args.pretend_args_size > 0)
7591 return LAST_ARG_REGNUM;
7592
7593 /* The other case is when we have fixed arguments but less than 4 registers
7594 worth. In this case r3 might be used in the body of the function, but
7595 it is not being used to convey an argument into the function. In theory
7596 we could just check crtl->args.size to see how many bytes are
7597 being passed in argument registers, but it seems that it is unreliable.
7598 Sometimes it will have the value 0 when in fact arguments are being
7599 passed. (See testcase execute/20021111-1.c for an example). So we also
7600 check the args_info.nregs field as well. The problem with this field is
7601 that it makes no allowances for arguments that are passed to the
7602 function but which are not used. Hence we could miss an opportunity
7603 when a function has an unused argument in r3. But it is better to be
7604 safe than to be sorry. */
7605 if (! cfun->machine->uses_anonymous_args
7606 && crtl->args.size >= 0
7607 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7608 && (TARGET_AAPCS_BASED
7609 ? crtl->args.info.aapcs_ncrn < 4
7610 : crtl->args.info.nregs < 4))
7611 return LAST_ARG_REGNUM;
7612
7613 /* Otherwise look for a call-saved register that is going to be pushed. */
7614 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7615 if (pushed_regs_mask & (1 << reg))
7616 return reg;
7617
7618 if (TARGET_THUMB2)
7619 {
7620 /* Thumb-2 can use high regs. */
7621 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7622 if (pushed_regs_mask & (1 << reg))
7623 return reg;
7624 }
7625 /* Something went wrong - thumb_compute_save_reg_mask()
7626 should have arranged for a suitable register to be pushed. */
7627 gcc_unreachable ();
7628 }
7629
7630 static GTY(()) int pic_labelno;
7631
7632 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7633 low register. */
7634
7635 void
7636 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7637 {
7638 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7639
7640 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7641 return;
7642
7643 gcc_assert (flag_pic);
7644
7645 pic_reg = cfun->machine->pic_reg;
7646 if (TARGET_VXWORKS_RTP)
7647 {
7648 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7649 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7650 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7651
7652 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7653
7654 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7655 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7656 }
7657 else
7658 {
7659 /* We use an UNSPEC rather than a LABEL_REF because this label
7660 never appears in the code stream. */
7661
7662 labelno = GEN_INT (pic_labelno++);
7663 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7664 l1 = gen_rtx_CONST (VOIDmode, l1);
7665
7666 /* On the ARM the PC register contains 'dot + 8' at the time of the
7667 addition, on the Thumb it is 'dot + 4'. */
7668 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7669 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7670 UNSPEC_GOTSYM_OFF);
7671 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7672
7673 if (TARGET_32BIT)
7674 {
7675 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7676 }
7677 else /* TARGET_THUMB1 */
7678 {
7679 if (arm_pic_register != INVALID_REGNUM
7680 && REGNO (pic_reg) > LAST_LO_REGNUM)
7681 {
7682 /* We will have pushed the pic register, so we should always be
7683 able to find a work register. */
7684 pic_tmp = gen_rtx_REG (SImode,
7685 thumb_find_work_register (saved_regs));
7686 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7687 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7688 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7689 }
7690 else if (arm_pic_register != INVALID_REGNUM
7691 && arm_pic_register > LAST_LO_REGNUM
7692 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7693 {
7694 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7695 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7696 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7697 }
7698 else
7699 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7700 }
7701 }
7702
7703 /* Need to emit this whether or not we obey regdecls,
7704 since setjmp/longjmp can cause life info to screw up. */
7705 emit_use (pic_reg);
7706 }
7707
7708 /* Generate code to load the address of a static var when flag_pic is set. */
7709 static rtx_insn *
7710 arm_pic_static_addr (rtx orig, rtx reg)
7711 {
7712 rtx l1, labelno, offset_rtx;
7713
7714 gcc_assert (flag_pic);
7715
7716 /* We use an UNSPEC rather than a LABEL_REF because this label
7717 never appears in the code stream. */
7718 labelno = GEN_INT (pic_labelno++);
7719 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7720 l1 = gen_rtx_CONST (VOIDmode, l1);
7721
7722 /* On the ARM the PC register contains 'dot + 8' at the time of the
7723 addition, on the Thumb it is 'dot + 4'. */
7724 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7725 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7726 UNSPEC_SYMBOL_OFFSET);
7727 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7728
7729 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7730 }
7731
7732 /* Return nonzero if X is valid as an ARM state addressing register. */
7733 static int
7734 arm_address_register_rtx_p (rtx x, int strict_p)
7735 {
7736 int regno;
7737
7738 if (!REG_P (x))
7739 return 0;
7740
7741 regno = REGNO (x);
7742
7743 if (strict_p)
7744 return ARM_REGNO_OK_FOR_BASE_P (regno);
7745
7746 return (regno <= LAST_ARM_REGNUM
7747 || regno >= FIRST_PSEUDO_REGISTER
7748 || regno == FRAME_POINTER_REGNUM
7749 || regno == ARG_POINTER_REGNUM);
7750 }
7751
7752 /* Return TRUE if this rtx is the difference of a symbol and a label,
7753 and will reduce to a PC-relative relocation in the object file.
7754 Expressions like this can be left alone when generating PIC, rather
7755 than forced through the GOT. */
7756 static int
7757 pcrel_constant_p (rtx x)
7758 {
7759 if (GET_CODE (x) == MINUS)
7760 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7761
7762 return FALSE;
7763 }
7764
7765 /* Return true if X will surely end up in an index register after next
7766 splitting pass. */
7767 static bool
7768 will_be_in_index_register (const_rtx x)
7769 {
7770 /* arm.md: calculate_pic_address will split this into a register. */
7771 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7772 }
7773
7774 /* Return nonzero if X is a valid ARM state address operand. */
7775 int
7776 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7777 int strict_p)
7778 {
7779 bool use_ldrd;
7780 enum rtx_code code = GET_CODE (x);
7781
7782 if (arm_address_register_rtx_p (x, strict_p))
7783 return 1;
7784
7785 use_ldrd = (TARGET_LDRD
7786 && (mode == DImode || mode == DFmode));
7787
7788 if (code == POST_INC || code == PRE_DEC
7789 || ((code == PRE_INC || code == POST_DEC)
7790 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7791 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7792
7793 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7794 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7795 && GET_CODE (XEXP (x, 1)) == PLUS
7796 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7797 {
7798 rtx addend = XEXP (XEXP (x, 1), 1);
7799
7800 /* Don't allow ldrd post increment by register because it's hard
7801 to fixup invalid register choices. */
7802 if (use_ldrd
7803 && GET_CODE (x) == POST_MODIFY
7804 && REG_P (addend))
7805 return 0;
7806
7807 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7808 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7809 }
7810
7811 /* After reload constants split into minipools will have addresses
7812 from a LABEL_REF. */
7813 else if (reload_completed
7814 && (code == LABEL_REF
7815 || (code == CONST
7816 && GET_CODE (XEXP (x, 0)) == PLUS
7817 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7818 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7819 return 1;
7820
7821 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7822 return 0;
7823
7824 else if (code == PLUS)
7825 {
7826 rtx xop0 = XEXP (x, 0);
7827 rtx xop1 = XEXP (x, 1);
7828
7829 return ((arm_address_register_rtx_p (xop0, strict_p)
7830 && ((CONST_INT_P (xop1)
7831 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7832 || (!strict_p && will_be_in_index_register (xop1))))
7833 || (arm_address_register_rtx_p (xop1, strict_p)
7834 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7835 }
7836
7837 #if 0
7838 /* Reload currently can't handle MINUS, so disable this for now */
7839 else if (GET_CODE (x) == MINUS)
7840 {
7841 rtx xop0 = XEXP (x, 0);
7842 rtx xop1 = XEXP (x, 1);
7843
7844 return (arm_address_register_rtx_p (xop0, strict_p)
7845 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7846 }
7847 #endif
7848
7849 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7850 && code == SYMBOL_REF
7851 && CONSTANT_POOL_ADDRESS_P (x)
7852 && ! (flag_pic
7853 && symbol_mentioned_p (get_pool_constant (x))
7854 && ! pcrel_constant_p (get_pool_constant (x))))
7855 return 1;
7856
7857 return 0;
7858 }
7859
7860 /* Return true if we can avoid creating a constant pool entry for x. */
7861 static bool
7862 can_avoid_literal_pool_for_label_p (rtx x)
7863 {
7864 /* Normally we can assign constant values to target registers without
7865 the help of constant pool. But there are cases we have to use constant
7866 pool like:
7867 1) assign a label to register.
7868 2) sign-extend a 8bit value to 32bit and then assign to register.
7869
7870 Constant pool access in format:
7871 (set (reg r0) (mem (symbol_ref (".LC0"))))
7872 will cause the use of literal pool (later in function arm_reorg).
7873 So here we mark such format as an invalid format, then the compiler
7874 will adjust it into:
7875 (set (reg r0) (symbol_ref (".LC0")))
7876 (set (reg r0) (mem (reg r0))).
7877 No extra register is required, and (mem (reg r0)) won't cause the use
7878 of literal pools. */
7879 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7880 && CONSTANT_POOL_ADDRESS_P (x))
7881 return 1;
7882 return 0;
7883 }
7884
7885
7886 /* Return nonzero if X is a valid Thumb-2 address operand. */
7887 static int
7888 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7889 {
7890 bool use_ldrd;
7891 enum rtx_code code = GET_CODE (x);
7892
7893 if (arm_address_register_rtx_p (x, strict_p))
7894 return 1;
7895
7896 use_ldrd = (TARGET_LDRD
7897 && (mode == DImode || mode == DFmode));
7898
7899 if (code == POST_INC || code == PRE_DEC
7900 || ((code == PRE_INC || code == POST_DEC)
7901 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7902 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7903
7904 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7905 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7906 && GET_CODE (XEXP (x, 1)) == PLUS
7907 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7908 {
7909 /* Thumb-2 only has autoincrement by constant. */
7910 rtx addend = XEXP (XEXP (x, 1), 1);
7911 HOST_WIDE_INT offset;
7912
7913 if (!CONST_INT_P (addend))
7914 return 0;
7915
7916 offset = INTVAL(addend);
7917 if (GET_MODE_SIZE (mode) <= 4)
7918 return (offset > -256 && offset < 256);
7919
7920 return (use_ldrd && offset > -1024 && offset < 1024
7921 && (offset & 3) == 0);
7922 }
7923
7924 /* After reload constants split into minipools will have addresses
7925 from a LABEL_REF. */
7926 else if (reload_completed
7927 && (code == LABEL_REF
7928 || (code == CONST
7929 && GET_CODE (XEXP (x, 0)) == PLUS
7930 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7931 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7932 return 1;
7933
7934 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7935 return 0;
7936
7937 else if (code == PLUS)
7938 {
7939 rtx xop0 = XEXP (x, 0);
7940 rtx xop1 = XEXP (x, 1);
7941
7942 return ((arm_address_register_rtx_p (xop0, strict_p)
7943 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7944 || (!strict_p && will_be_in_index_register (xop1))))
7945 || (arm_address_register_rtx_p (xop1, strict_p)
7946 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7947 }
7948
7949 else if (can_avoid_literal_pool_for_label_p (x))
7950 return 0;
7951
7952 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7953 && code == SYMBOL_REF
7954 && CONSTANT_POOL_ADDRESS_P (x)
7955 && ! (flag_pic
7956 && symbol_mentioned_p (get_pool_constant (x))
7957 && ! pcrel_constant_p (get_pool_constant (x))))
7958 return 1;
7959
7960 return 0;
7961 }
7962
7963 /* Return nonzero if INDEX is valid for an address index operand in
7964 ARM state. */
7965 static int
7966 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7967 int strict_p)
7968 {
7969 HOST_WIDE_INT range;
7970 enum rtx_code code = GET_CODE (index);
7971
7972 /* Standard coprocessor addressing modes. */
7973 if (TARGET_HARD_FLOAT
7974 && (mode == SFmode || mode == DFmode))
7975 return (code == CONST_INT && INTVAL (index) < 1024
7976 && INTVAL (index) > -1024
7977 && (INTVAL (index) & 3) == 0);
7978
7979 /* For quad modes, we restrict the constant offset to be slightly less
7980 than what the instruction format permits. We do this because for
7981 quad mode moves, we will actually decompose them into two separate
7982 double-mode reads or writes. INDEX must therefore be a valid
7983 (double-mode) offset and so should INDEX+8. */
7984 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7985 return (code == CONST_INT
7986 && INTVAL (index) < 1016
7987 && INTVAL (index) > -1024
7988 && (INTVAL (index) & 3) == 0);
7989
7990 /* We have no such constraint on double mode offsets, so we permit the
7991 full range of the instruction format. */
7992 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7993 return (code == CONST_INT
7994 && INTVAL (index) < 1024
7995 && INTVAL (index) > -1024
7996 && (INTVAL (index) & 3) == 0);
7997
7998 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7999 return (code == CONST_INT
8000 && INTVAL (index) < 1024
8001 && INTVAL (index) > -1024
8002 && (INTVAL (index) & 3) == 0);
8003
8004 if (arm_address_register_rtx_p (index, strict_p)
8005 && (GET_MODE_SIZE (mode) <= 4))
8006 return 1;
8007
8008 if (mode == DImode || mode == DFmode)
8009 {
8010 if (code == CONST_INT)
8011 {
8012 HOST_WIDE_INT val = INTVAL (index);
8013
8014 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8015 If vldr is selected it uses arm_coproc_mem_operand. */
8016 if (TARGET_LDRD)
8017 return val > -256 && val < 256;
8018 else
8019 return val > -4096 && val < 4092;
8020 }
8021
8022 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8023 }
8024
8025 if (GET_MODE_SIZE (mode) <= 4
8026 && ! (arm_arch4
8027 && (mode == HImode
8028 || mode == HFmode
8029 || (mode == QImode && outer == SIGN_EXTEND))))
8030 {
8031 if (code == MULT)
8032 {
8033 rtx xiop0 = XEXP (index, 0);
8034 rtx xiop1 = XEXP (index, 1);
8035
8036 return ((arm_address_register_rtx_p (xiop0, strict_p)
8037 && power_of_two_operand (xiop1, SImode))
8038 || (arm_address_register_rtx_p (xiop1, strict_p)
8039 && power_of_two_operand (xiop0, SImode)));
8040 }
8041 else if (code == LSHIFTRT || code == ASHIFTRT
8042 || code == ASHIFT || code == ROTATERT)
8043 {
8044 rtx op = XEXP (index, 1);
8045
8046 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8047 && CONST_INT_P (op)
8048 && INTVAL (op) > 0
8049 && INTVAL (op) <= 31);
8050 }
8051 }
8052
8053 /* For ARM v4 we may be doing a sign-extend operation during the
8054 load. */
8055 if (arm_arch4)
8056 {
8057 if (mode == HImode
8058 || mode == HFmode
8059 || (outer == SIGN_EXTEND && mode == QImode))
8060 range = 256;
8061 else
8062 range = 4096;
8063 }
8064 else
8065 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8066
8067 return (code == CONST_INT
8068 && INTVAL (index) < range
8069 && INTVAL (index) > -range);
8070 }
8071
8072 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8073 index operand. i.e. 1, 2, 4 or 8. */
8074 static bool
8075 thumb2_index_mul_operand (rtx op)
8076 {
8077 HOST_WIDE_INT val;
8078
8079 if (!CONST_INT_P (op))
8080 return false;
8081
8082 val = INTVAL(op);
8083 return (val == 1 || val == 2 || val == 4 || val == 8);
8084 }
8085
8086 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8087 static int
8088 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8089 {
8090 enum rtx_code code = GET_CODE (index);
8091
8092 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8093 /* Standard coprocessor addressing modes. */
8094 if (TARGET_HARD_FLOAT
8095 && (mode == SFmode || mode == DFmode))
8096 return (code == CONST_INT && INTVAL (index) < 1024
8097 /* Thumb-2 allows only > -256 index range for it's core register
8098 load/stores. Since we allow SF/DF in core registers, we have
8099 to use the intersection between -256~4096 (core) and -1024~1024
8100 (coprocessor). */
8101 && INTVAL (index) > -256
8102 && (INTVAL (index) & 3) == 0);
8103
8104 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8105 {
8106 /* For DImode assume values will usually live in core regs
8107 and only allow LDRD addressing modes. */
8108 if (!TARGET_LDRD || mode != DImode)
8109 return (code == CONST_INT
8110 && INTVAL (index) < 1024
8111 && INTVAL (index) > -1024
8112 && (INTVAL (index) & 3) == 0);
8113 }
8114
8115 /* For quad modes, we restrict the constant offset to be slightly less
8116 than what the instruction format permits. We do this because for
8117 quad mode moves, we will actually decompose them into two separate
8118 double-mode reads or writes. INDEX must therefore be a valid
8119 (double-mode) offset and so should INDEX+8. */
8120 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8121 return (code == CONST_INT
8122 && INTVAL (index) < 1016
8123 && INTVAL (index) > -1024
8124 && (INTVAL (index) & 3) == 0);
8125
8126 /* We have no such constraint on double mode offsets, so we permit the
8127 full range of the instruction format. */
8128 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8129 return (code == CONST_INT
8130 && INTVAL (index) < 1024
8131 && INTVAL (index) > -1024
8132 && (INTVAL (index) & 3) == 0);
8133
8134 if (arm_address_register_rtx_p (index, strict_p)
8135 && (GET_MODE_SIZE (mode) <= 4))
8136 return 1;
8137
8138 if (mode == DImode || mode == DFmode)
8139 {
8140 if (code == CONST_INT)
8141 {
8142 HOST_WIDE_INT val = INTVAL (index);
8143 /* Thumb-2 ldrd only has reg+const addressing modes.
8144 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8145 If vldr is selected it uses arm_coproc_mem_operand. */
8146 if (TARGET_LDRD)
8147 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8148 else
8149 return IN_RANGE (val, -255, 4095 - 4);
8150 }
8151 else
8152 return 0;
8153 }
8154
8155 if (code == MULT)
8156 {
8157 rtx xiop0 = XEXP (index, 0);
8158 rtx xiop1 = XEXP (index, 1);
8159
8160 return ((arm_address_register_rtx_p (xiop0, strict_p)
8161 && thumb2_index_mul_operand (xiop1))
8162 || (arm_address_register_rtx_p (xiop1, strict_p)
8163 && thumb2_index_mul_operand (xiop0)));
8164 }
8165 else if (code == ASHIFT)
8166 {
8167 rtx op = XEXP (index, 1);
8168
8169 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8170 && CONST_INT_P (op)
8171 && INTVAL (op) > 0
8172 && INTVAL (op) <= 3);
8173 }
8174
8175 return (code == CONST_INT
8176 && INTVAL (index) < 4096
8177 && INTVAL (index) > -256);
8178 }
8179
8180 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8181 static int
8182 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8183 {
8184 int regno;
8185
8186 if (!REG_P (x))
8187 return 0;
8188
8189 regno = REGNO (x);
8190
8191 if (strict_p)
8192 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8193
8194 return (regno <= LAST_LO_REGNUM
8195 || regno > LAST_VIRTUAL_REGISTER
8196 || regno == FRAME_POINTER_REGNUM
8197 || (GET_MODE_SIZE (mode) >= 4
8198 && (regno == STACK_POINTER_REGNUM
8199 || regno >= FIRST_PSEUDO_REGISTER
8200 || x == hard_frame_pointer_rtx
8201 || x == arg_pointer_rtx)));
8202 }
8203
8204 /* Return nonzero if x is a legitimate index register. This is the case
8205 for any base register that can access a QImode object. */
8206 inline static int
8207 thumb1_index_register_rtx_p (rtx x, int strict_p)
8208 {
8209 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8210 }
8211
8212 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8213
8214 The AP may be eliminated to either the SP or the FP, so we use the
8215 least common denominator, e.g. SImode, and offsets from 0 to 64.
8216
8217 ??? Verify whether the above is the right approach.
8218
8219 ??? Also, the FP may be eliminated to the SP, so perhaps that
8220 needs special handling also.
8221
8222 ??? Look at how the mips16 port solves this problem. It probably uses
8223 better ways to solve some of these problems.
8224
8225 Although it is not incorrect, we don't accept QImode and HImode
8226 addresses based on the frame pointer or arg pointer until the
8227 reload pass starts. This is so that eliminating such addresses
8228 into stack based ones won't produce impossible code. */
8229 int
8230 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8231 {
8232 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8233 return 0;
8234
8235 /* ??? Not clear if this is right. Experiment. */
8236 if (GET_MODE_SIZE (mode) < 4
8237 && !(reload_in_progress || reload_completed)
8238 && (reg_mentioned_p (frame_pointer_rtx, x)
8239 || reg_mentioned_p (arg_pointer_rtx, x)
8240 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8241 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8242 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8243 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8244 return 0;
8245
8246 /* Accept any base register. SP only in SImode or larger. */
8247 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8248 return 1;
8249
8250 /* This is PC relative data before arm_reorg runs. */
8251 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8252 && GET_CODE (x) == SYMBOL_REF
8253 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8254 return 1;
8255
8256 /* This is PC relative data after arm_reorg runs. */
8257 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8258 && reload_completed
8259 && (GET_CODE (x) == LABEL_REF
8260 || (GET_CODE (x) == CONST
8261 && GET_CODE (XEXP (x, 0)) == PLUS
8262 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8263 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8264 return 1;
8265
8266 /* Post-inc indexing only supported for SImode and larger. */
8267 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8268 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8269 return 1;
8270
8271 else if (GET_CODE (x) == PLUS)
8272 {
8273 /* REG+REG address can be any two index registers. */
8274 /* We disallow FRAME+REG addressing since we know that FRAME
8275 will be replaced with STACK, and SP relative addressing only
8276 permits SP+OFFSET. */
8277 if (GET_MODE_SIZE (mode) <= 4
8278 && XEXP (x, 0) != frame_pointer_rtx
8279 && XEXP (x, 1) != frame_pointer_rtx
8280 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8281 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8282 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8283 return 1;
8284
8285 /* REG+const has 5-7 bit offset for non-SP registers. */
8286 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8287 || XEXP (x, 0) == arg_pointer_rtx)
8288 && CONST_INT_P (XEXP (x, 1))
8289 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8290 return 1;
8291
8292 /* REG+const has 10-bit offset for SP, but only SImode and
8293 larger is supported. */
8294 /* ??? Should probably check for DI/DFmode overflow here
8295 just like GO_IF_LEGITIMATE_OFFSET does. */
8296 else if (REG_P (XEXP (x, 0))
8297 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8298 && GET_MODE_SIZE (mode) >= 4
8299 && CONST_INT_P (XEXP (x, 1))
8300 && INTVAL (XEXP (x, 1)) >= 0
8301 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8302 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8303 return 1;
8304
8305 else if (REG_P (XEXP (x, 0))
8306 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8307 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8308 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8309 && REGNO (XEXP (x, 0))
8310 <= LAST_VIRTUAL_POINTER_REGISTER))
8311 && GET_MODE_SIZE (mode) >= 4
8312 && CONST_INT_P (XEXP (x, 1))
8313 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8314 return 1;
8315 }
8316
8317 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8318 && GET_MODE_SIZE (mode) == 4
8319 && GET_CODE (x) == SYMBOL_REF
8320 && CONSTANT_POOL_ADDRESS_P (x)
8321 && ! (flag_pic
8322 && symbol_mentioned_p (get_pool_constant (x))
8323 && ! pcrel_constant_p (get_pool_constant (x))))
8324 return 1;
8325
8326 return 0;
8327 }
8328
8329 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8330 instruction of mode MODE. */
8331 int
8332 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8333 {
8334 switch (GET_MODE_SIZE (mode))
8335 {
8336 case 1:
8337 return val >= 0 && val < 32;
8338
8339 case 2:
8340 return val >= 0 && val < 64 && (val & 1) == 0;
8341
8342 default:
8343 return (val >= 0
8344 && (val + GET_MODE_SIZE (mode)) <= 128
8345 && (val & 3) == 0);
8346 }
8347 }
8348
8349 bool
8350 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8351 {
8352 if (TARGET_ARM)
8353 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8354 else if (TARGET_THUMB2)
8355 return thumb2_legitimate_address_p (mode, x, strict_p);
8356 else /* if (TARGET_THUMB1) */
8357 return thumb1_legitimate_address_p (mode, x, strict_p);
8358 }
8359
8360 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8361
8362 Given an rtx X being reloaded into a reg required to be
8363 in class CLASS, return the class of reg to actually use.
8364 In general this is just CLASS, but for the Thumb core registers and
8365 immediate constants we prefer a LO_REGS class or a subset. */
8366
8367 static reg_class_t
8368 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8369 {
8370 if (TARGET_32BIT)
8371 return rclass;
8372 else
8373 {
8374 if (rclass == GENERAL_REGS)
8375 return LO_REGS;
8376 else
8377 return rclass;
8378 }
8379 }
8380
8381 /* Build the SYMBOL_REF for __tls_get_addr. */
8382
8383 static GTY(()) rtx tls_get_addr_libfunc;
8384
8385 static rtx
8386 get_tls_get_addr (void)
8387 {
8388 if (!tls_get_addr_libfunc)
8389 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8390 return tls_get_addr_libfunc;
8391 }
8392
8393 rtx
8394 arm_load_tp (rtx target)
8395 {
8396 if (!target)
8397 target = gen_reg_rtx (SImode);
8398
8399 if (TARGET_HARD_TP)
8400 {
8401 /* Can return in any reg. */
8402 emit_insn (gen_load_tp_hard (target));
8403 }
8404 else
8405 {
8406 /* Always returned in r0. Immediately copy the result into a pseudo,
8407 otherwise other uses of r0 (e.g. setting up function arguments) may
8408 clobber the value. */
8409
8410 rtx tmp;
8411
8412 emit_insn (gen_load_tp_soft ());
8413
8414 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8415 emit_move_insn (target, tmp);
8416 }
8417 return target;
8418 }
8419
8420 static rtx
8421 load_tls_operand (rtx x, rtx reg)
8422 {
8423 rtx tmp;
8424
8425 if (reg == NULL_RTX)
8426 reg = gen_reg_rtx (SImode);
8427
8428 tmp = gen_rtx_CONST (SImode, x);
8429
8430 emit_move_insn (reg, tmp);
8431
8432 return reg;
8433 }
8434
8435 static rtx_insn *
8436 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8437 {
8438 rtx label, labelno, sum;
8439
8440 gcc_assert (reloc != TLS_DESCSEQ);
8441 start_sequence ();
8442
8443 labelno = GEN_INT (pic_labelno++);
8444 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8445 label = gen_rtx_CONST (VOIDmode, label);
8446
8447 sum = gen_rtx_UNSPEC (Pmode,
8448 gen_rtvec (4, x, GEN_INT (reloc), label,
8449 GEN_INT (TARGET_ARM ? 8 : 4)),
8450 UNSPEC_TLS);
8451 reg = load_tls_operand (sum, reg);
8452
8453 if (TARGET_ARM)
8454 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8455 else
8456 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8457
8458 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8459 LCT_PURE, /* LCT_CONST? */
8460 Pmode, reg, Pmode);
8461
8462 rtx_insn *insns = get_insns ();
8463 end_sequence ();
8464
8465 return insns;
8466 }
8467
8468 static rtx
8469 arm_tls_descseq_addr (rtx x, rtx reg)
8470 {
8471 rtx labelno = GEN_INT (pic_labelno++);
8472 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8473 rtx sum = gen_rtx_UNSPEC (Pmode,
8474 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8475 gen_rtx_CONST (VOIDmode, label),
8476 GEN_INT (!TARGET_ARM)),
8477 UNSPEC_TLS);
8478 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8479
8480 emit_insn (gen_tlscall (x, labelno));
8481 if (!reg)
8482 reg = gen_reg_rtx (SImode);
8483 else
8484 gcc_assert (REGNO (reg) != R0_REGNUM);
8485
8486 emit_move_insn (reg, reg0);
8487
8488 return reg;
8489 }
8490
8491 rtx
8492 legitimize_tls_address (rtx x, rtx reg)
8493 {
8494 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8495 rtx_insn *insns;
8496 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8497
8498 switch (model)
8499 {
8500 case TLS_MODEL_GLOBAL_DYNAMIC:
8501 if (TARGET_GNU2_TLS)
8502 {
8503 reg = arm_tls_descseq_addr (x, reg);
8504
8505 tp = arm_load_tp (NULL_RTX);
8506
8507 dest = gen_rtx_PLUS (Pmode, tp, reg);
8508 }
8509 else
8510 {
8511 /* Original scheme */
8512 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8513 dest = gen_reg_rtx (Pmode);
8514 emit_libcall_block (insns, dest, ret, x);
8515 }
8516 return dest;
8517
8518 case TLS_MODEL_LOCAL_DYNAMIC:
8519 if (TARGET_GNU2_TLS)
8520 {
8521 reg = arm_tls_descseq_addr (x, reg);
8522
8523 tp = arm_load_tp (NULL_RTX);
8524
8525 dest = gen_rtx_PLUS (Pmode, tp, reg);
8526 }
8527 else
8528 {
8529 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8530
8531 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8532 share the LDM result with other LD model accesses. */
8533 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8534 UNSPEC_TLS);
8535 dest = gen_reg_rtx (Pmode);
8536 emit_libcall_block (insns, dest, ret, eqv);
8537
8538 /* Load the addend. */
8539 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8540 GEN_INT (TLS_LDO32)),
8541 UNSPEC_TLS);
8542 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8543 dest = gen_rtx_PLUS (Pmode, dest, addend);
8544 }
8545 return dest;
8546
8547 case TLS_MODEL_INITIAL_EXEC:
8548 labelno = GEN_INT (pic_labelno++);
8549 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8550 label = gen_rtx_CONST (VOIDmode, label);
8551 sum = gen_rtx_UNSPEC (Pmode,
8552 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8553 GEN_INT (TARGET_ARM ? 8 : 4)),
8554 UNSPEC_TLS);
8555 reg = load_tls_operand (sum, reg);
8556
8557 if (TARGET_ARM)
8558 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8559 else if (TARGET_THUMB2)
8560 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8561 else
8562 {
8563 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8564 emit_move_insn (reg, gen_const_mem (SImode, reg));
8565 }
8566
8567 tp = arm_load_tp (NULL_RTX);
8568
8569 return gen_rtx_PLUS (Pmode, tp, reg);
8570
8571 case TLS_MODEL_LOCAL_EXEC:
8572 tp = arm_load_tp (NULL_RTX);
8573
8574 reg = gen_rtx_UNSPEC (Pmode,
8575 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8576 UNSPEC_TLS);
8577 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8578
8579 return gen_rtx_PLUS (Pmode, tp, reg);
8580
8581 default:
8582 abort ();
8583 }
8584 }
8585
8586 /* Try machine-dependent ways of modifying an illegitimate address
8587 to be legitimate. If we find one, return the new, valid address. */
8588 rtx
8589 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8590 {
8591 if (arm_tls_referenced_p (x))
8592 {
8593 rtx addend = NULL;
8594
8595 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8596 {
8597 addend = XEXP (XEXP (x, 0), 1);
8598 x = XEXP (XEXP (x, 0), 0);
8599 }
8600
8601 if (GET_CODE (x) != SYMBOL_REF)
8602 return x;
8603
8604 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8605
8606 x = legitimize_tls_address (x, NULL_RTX);
8607
8608 if (addend)
8609 {
8610 x = gen_rtx_PLUS (SImode, x, addend);
8611 orig_x = x;
8612 }
8613 else
8614 return x;
8615 }
8616
8617 if (!TARGET_ARM)
8618 {
8619 /* TODO: legitimize_address for Thumb2. */
8620 if (TARGET_THUMB2)
8621 return x;
8622 return thumb_legitimize_address (x, orig_x, mode);
8623 }
8624
8625 if (GET_CODE (x) == PLUS)
8626 {
8627 rtx xop0 = XEXP (x, 0);
8628 rtx xop1 = XEXP (x, 1);
8629
8630 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8631 xop0 = force_reg (SImode, xop0);
8632
8633 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8634 && !symbol_mentioned_p (xop1))
8635 xop1 = force_reg (SImode, xop1);
8636
8637 if (ARM_BASE_REGISTER_RTX_P (xop0)
8638 && CONST_INT_P (xop1))
8639 {
8640 HOST_WIDE_INT n, low_n;
8641 rtx base_reg, val;
8642 n = INTVAL (xop1);
8643
8644 /* VFP addressing modes actually allow greater offsets, but for
8645 now we just stick with the lowest common denominator. */
8646 if (mode == DImode || mode == DFmode)
8647 {
8648 low_n = n & 0x0f;
8649 n &= ~0x0f;
8650 if (low_n > 4)
8651 {
8652 n += 16;
8653 low_n -= 16;
8654 }
8655 }
8656 else
8657 {
8658 low_n = ((mode) == TImode ? 0
8659 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8660 n -= low_n;
8661 }
8662
8663 base_reg = gen_reg_rtx (SImode);
8664 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8665 emit_move_insn (base_reg, val);
8666 x = plus_constant (Pmode, base_reg, low_n);
8667 }
8668 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8669 x = gen_rtx_PLUS (SImode, xop0, xop1);
8670 }
8671
8672 /* XXX We don't allow MINUS any more -- see comment in
8673 arm_legitimate_address_outer_p (). */
8674 else if (GET_CODE (x) == MINUS)
8675 {
8676 rtx xop0 = XEXP (x, 0);
8677 rtx xop1 = XEXP (x, 1);
8678
8679 if (CONSTANT_P (xop0))
8680 xop0 = force_reg (SImode, xop0);
8681
8682 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8683 xop1 = force_reg (SImode, xop1);
8684
8685 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8686 x = gen_rtx_MINUS (SImode, xop0, xop1);
8687 }
8688
8689 /* Make sure to take full advantage of the pre-indexed addressing mode
8690 with absolute addresses which often allows for the base register to
8691 be factorized for multiple adjacent memory references, and it might
8692 even allows for the mini pool to be avoided entirely. */
8693 else if (CONST_INT_P (x) && optimize > 0)
8694 {
8695 unsigned int bits;
8696 HOST_WIDE_INT mask, base, index;
8697 rtx base_reg;
8698
8699 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8700 use a 8-bit index. So let's use a 12-bit index for SImode only and
8701 hope that arm_gen_constant will enable ldrb to use more bits. */
8702 bits = (mode == SImode) ? 12 : 8;
8703 mask = (1 << bits) - 1;
8704 base = INTVAL (x) & ~mask;
8705 index = INTVAL (x) & mask;
8706 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8707 {
8708 /* It'll most probably be more efficient to generate the base
8709 with more bits set and use a negative index instead. */
8710 base |= mask;
8711 index -= mask;
8712 }
8713 base_reg = force_reg (SImode, GEN_INT (base));
8714 x = plus_constant (Pmode, base_reg, index);
8715 }
8716
8717 if (flag_pic)
8718 {
8719 /* We need to find and carefully transform any SYMBOL and LABEL
8720 references; so go back to the original address expression. */
8721 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8722
8723 if (new_x != orig_x)
8724 x = new_x;
8725 }
8726
8727 return x;
8728 }
8729
8730
8731 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8732 to be legitimate. If we find one, return the new, valid address. */
8733 rtx
8734 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8735 {
8736 if (GET_CODE (x) == PLUS
8737 && CONST_INT_P (XEXP (x, 1))
8738 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8739 || INTVAL (XEXP (x, 1)) < 0))
8740 {
8741 rtx xop0 = XEXP (x, 0);
8742 rtx xop1 = XEXP (x, 1);
8743 HOST_WIDE_INT offset = INTVAL (xop1);
8744
8745 /* Try and fold the offset into a biasing of the base register and
8746 then offsetting that. Don't do this when optimizing for space
8747 since it can cause too many CSEs. */
8748 if (optimize_size && offset >= 0
8749 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8750 {
8751 HOST_WIDE_INT delta;
8752
8753 if (offset >= 256)
8754 delta = offset - (256 - GET_MODE_SIZE (mode));
8755 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8756 delta = 31 * GET_MODE_SIZE (mode);
8757 else
8758 delta = offset & (~31 * GET_MODE_SIZE (mode));
8759
8760 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8761 NULL_RTX);
8762 x = plus_constant (Pmode, xop0, delta);
8763 }
8764 else if (offset < 0 && offset > -256)
8765 /* Small negative offsets are best done with a subtract before the
8766 dereference, forcing these into a register normally takes two
8767 instructions. */
8768 x = force_operand (x, NULL_RTX);
8769 else
8770 {
8771 /* For the remaining cases, force the constant into a register. */
8772 xop1 = force_reg (SImode, xop1);
8773 x = gen_rtx_PLUS (SImode, xop0, xop1);
8774 }
8775 }
8776 else if (GET_CODE (x) == PLUS
8777 && s_register_operand (XEXP (x, 1), SImode)
8778 && !s_register_operand (XEXP (x, 0), SImode))
8779 {
8780 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8781
8782 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8783 }
8784
8785 if (flag_pic)
8786 {
8787 /* We need to find and carefully transform any SYMBOL and LABEL
8788 references; so go back to the original address expression. */
8789 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8790
8791 if (new_x != orig_x)
8792 x = new_x;
8793 }
8794
8795 return x;
8796 }
8797
8798 /* Return TRUE if X contains any TLS symbol references. */
8799
8800 bool
8801 arm_tls_referenced_p (rtx x)
8802 {
8803 if (! TARGET_HAVE_TLS)
8804 return false;
8805
8806 subrtx_iterator::array_type array;
8807 FOR_EACH_SUBRTX (iter, array, x, ALL)
8808 {
8809 const_rtx x = *iter;
8810 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8811 {
8812 /* ARM currently does not provide relocations to encode TLS variables
8813 into AArch32 instructions, only data, so there is no way to
8814 currently implement these if a literal pool is disabled. */
8815 if (arm_disable_literal_pool)
8816 sorry ("accessing thread-local storage is not currently supported "
8817 "with -mpure-code or -mslow-flash-data");
8818
8819 return true;
8820 }
8821
8822 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8823 TLS offsets, not real symbol references. */
8824 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8825 iter.skip_subrtxes ();
8826 }
8827 return false;
8828 }
8829
8830 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8831
8832 On the ARM, allow any integer (invalid ones are removed later by insn
8833 patterns), nice doubles and symbol_refs which refer to the function's
8834 constant pool XXX.
8835
8836 When generating pic allow anything. */
8837
8838 static bool
8839 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8840 {
8841 return flag_pic || !label_mentioned_p (x);
8842 }
8843
8844 static bool
8845 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8846 {
8847 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8848 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8849 for ARMv8-M Baseline or later the result is valid. */
8850 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8851 x = XEXP (x, 0);
8852
8853 return (CONST_INT_P (x)
8854 || CONST_DOUBLE_P (x)
8855 || CONSTANT_ADDRESS_P (x)
8856 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8857 || flag_pic);
8858 }
8859
8860 static bool
8861 arm_legitimate_constant_p (machine_mode mode, rtx x)
8862 {
8863 return (!arm_cannot_force_const_mem (mode, x)
8864 && (TARGET_32BIT
8865 ? arm_legitimate_constant_p_1 (mode, x)
8866 : thumb_legitimate_constant_p (mode, x)));
8867 }
8868
8869 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8870
8871 static bool
8872 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8873 {
8874 rtx base, offset;
8875
8876 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8877 {
8878 split_const (x, &base, &offset);
8879 if (GET_CODE (base) == SYMBOL_REF
8880 && !offset_within_block_p (base, INTVAL (offset)))
8881 return true;
8882 }
8883 return arm_tls_referenced_p (x);
8884 }
8885 \f
8886 #define REG_OR_SUBREG_REG(X) \
8887 (REG_P (X) \
8888 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8889
8890 #define REG_OR_SUBREG_RTX(X) \
8891 (REG_P (X) ? (X) : SUBREG_REG (X))
8892
8893 static inline int
8894 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8895 {
8896 machine_mode mode = GET_MODE (x);
8897 int total, words;
8898
8899 switch (code)
8900 {
8901 case ASHIFT:
8902 case ASHIFTRT:
8903 case LSHIFTRT:
8904 case ROTATERT:
8905 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8906
8907 case PLUS:
8908 case MINUS:
8909 case COMPARE:
8910 case NEG:
8911 case NOT:
8912 return COSTS_N_INSNS (1);
8913
8914 case MULT:
8915 if (arm_arch6m && arm_m_profile_small_mul)
8916 return COSTS_N_INSNS (32);
8917
8918 if (CONST_INT_P (XEXP (x, 1)))
8919 {
8920 int cycles = 0;
8921 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8922
8923 while (i)
8924 {
8925 i >>= 2;
8926 cycles++;
8927 }
8928 return COSTS_N_INSNS (2) + cycles;
8929 }
8930 return COSTS_N_INSNS (1) + 16;
8931
8932 case SET:
8933 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8934 the mode. */
8935 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8936 return (COSTS_N_INSNS (words)
8937 + 4 * ((MEM_P (SET_SRC (x)))
8938 + MEM_P (SET_DEST (x))));
8939
8940 case CONST_INT:
8941 if (outer == SET)
8942 {
8943 if (UINTVAL (x) < 256
8944 /* 16-bit constant. */
8945 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8946 return 0;
8947 if (thumb_shiftable_const (INTVAL (x)))
8948 return COSTS_N_INSNS (2);
8949 return COSTS_N_INSNS (3);
8950 }
8951 else if ((outer == PLUS || outer == COMPARE)
8952 && INTVAL (x) < 256 && INTVAL (x) > -256)
8953 return 0;
8954 else if ((outer == IOR || outer == XOR || outer == AND)
8955 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8956 return COSTS_N_INSNS (1);
8957 else if (outer == AND)
8958 {
8959 int i;
8960 /* This duplicates the tests in the andsi3 expander. */
8961 for (i = 9; i <= 31; i++)
8962 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8963 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8964 return COSTS_N_INSNS (2);
8965 }
8966 else if (outer == ASHIFT || outer == ASHIFTRT
8967 || outer == LSHIFTRT)
8968 return 0;
8969 return COSTS_N_INSNS (2);
8970
8971 case CONST:
8972 case CONST_DOUBLE:
8973 case LABEL_REF:
8974 case SYMBOL_REF:
8975 return COSTS_N_INSNS (3);
8976
8977 case UDIV:
8978 case UMOD:
8979 case DIV:
8980 case MOD:
8981 return 100;
8982
8983 case TRUNCATE:
8984 return 99;
8985
8986 case AND:
8987 case XOR:
8988 case IOR:
8989 /* XXX guess. */
8990 return 8;
8991
8992 case MEM:
8993 /* XXX another guess. */
8994 /* Memory costs quite a lot for the first word, but subsequent words
8995 load at the equivalent of a single insn each. */
8996 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8997 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8998 ? 4 : 0));
8999
9000 case IF_THEN_ELSE:
9001 /* XXX a guess. */
9002 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9003 return 14;
9004 return 2;
9005
9006 case SIGN_EXTEND:
9007 case ZERO_EXTEND:
9008 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9009 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9010
9011 if (mode == SImode)
9012 return total;
9013
9014 if (arm_arch6)
9015 return total + COSTS_N_INSNS (1);
9016
9017 /* Assume a two-shift sequence. Increase the cost slightly so
9018 we prefer actual shifts over an extend operation. */
9019 return total + 1 + COSTS_N_INSNS (2);
9020
9021 default:
9022 return 99;
9023 }
9024 }
9025
9026 /* Estimates the size cost of thumb1 instructions.
9027 For now most of the code is copied from thumb1_rtx_costs. We need more
9028 fine grain tuning when we have more related test cases. */
9029 static inline int
9030 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9031 {
9032 machine_mode mode = GET_MODE (x);
9033 int words, cost;
9034
9035 switch (code)
9036 {
9037 case ASHIFT:
9038 case ASHIFTRT:
9039 case LSHIFTRT:
9040 case ROTATERT:
9041 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9042
9043 case PLUS:
9044 case MINUS:
9045 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9046 defined by RTL expansion, especially for the expansion of
9047 multiplication. */
9048 if ((GET_CODE (XEXP (x, 0)) == MULT
9049 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9050 || (GET_CODE (XEXP (x, 1)) == MULT
9051 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9052 return COSTS_N_INSNS (2);
9053 /* Fall through. */
9054 case COMPARE:
9055 case NEG:
9056 case NOT:
9057 return COSTS_N_INSNS (1);
9058
9059 case MULT:
9060 if (CONST_INT_P (XEXP (x, 1)))
9061 {
9062 /* Thumb1 mul instruction can't operate on const. We must Load it
9063 into a register first. */
9064 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9065 /* For the targets which have a very small and high-latency multiply
9066 unit, we prefer to synthesize the mult with up to 5 instructions,
9067 giving a good balance between size and performance. */
9068 if (arm_arch6m && arm_m_profile_small_mul)
9069 return COSTS_N_INSNS (5);
9070 else
9071 return COSTS_N_INSNS (1) + const_size;
9072 }
9073 return COSTS_N_INSNS (1);
9074
9075 case SET:
9076 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9077 the mode. */
9078 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9079 cost = COSTS_N_INSNS (words);
9080 if (satisfies_constraint_J (SET_SRC (x))
9081 || satisfies_constraint_K (SET_SRC (x))
9082 /* Too big an immediate for a 2-byte mov, using MOVT. */
9083 || (CONST_INT_P (SET_SRC (x))
9084 && UINTVAL (SET_SRC (x)) >= 256
9085 && TARGET_HAVE_MOVT
9086 && satisfies_constraint_j (SET_SRC (x)))
9087 /* thumb1_movdi_insn. */
9088 || ((words > 1) && MEM_P (SET_SRC (x))))
9089 cost += COSTS_N_INSNS (1);
9090 return cost;
9091
9092 case CONST_INT:
9093 if (outer == SET)
9094 {
9095 if (UINTVAL (x) < 256)
9096 return COSTS_N_INSNS (1);
9097 /* movw is 4byte long. */
9098 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9099 return COSTS_N_INSNS (2);
9100 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9101 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9102 return COSTS_N_INSNS (2);
9103 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9104 if (thumb_shiftable_const (INTVAL (x)))
9105 return COSTS_N_INSNS (2);
9106 return COSTS_N_INSNS (3);
9107 }
9108 else if ((outer == PLUS || outer == COMPARE)
9109 && INTVAL (x) < 256 && INTVAL (x) > -256)
9110 return 0;
9111 else if ((outer == IOR || outer == XOR || outer == AND)
9112 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9113 return COSTS_N_INSNS (1);
9114 else if (outer == AND)
9115 {
9116 int i;
9117 /* This duplicates the tests in the andsi3 expander. */
9118 for (i = 9; i <= 31; i++)
9119 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9120 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9121 return COSTS_N_INSNS (2);
9122 }
9123 else if (outer == ASHIFT || outer == ASHIFTRT
9124 || outer == LSHIFTRT)
9125 return 0;
9126 return COSTS_N_INSNS (2);
9127
9128 case CONST:
9129 case CONST_DOUBLE:
9130 case LABEL_REF:
9131 case SYMBOL_REF:
9132 return COSTS_N_INSNS (3);
9133
9134 case UDIV:
9135 case UMOD:
9136 case DIV:
9137 case MOD:
9138 return 100;
9139
9140 case TRUNCATE:
9141 return 99;
9142
9143 case AND:
9144 case XOR:
9145 case IOR:
9146 return COSTS_N_INSNS (1);
9147
9148 case MEM:
9149 return (COSTS_N_INSNS (1)
9150 + COSTS_N_INSNS (1)
9151 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9152 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9153 ? COSTS_N_INSNS (1) : 0));
9154
9155 case IF_THEN_ELSE:
9156 /* XXX a guess. */
9157 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9158 return 14;
9159 return 2;
9160
9161 case ZERO_EXTEND:
9162 /* XXX still guessing. */
9163 switch (GET_MODE (XEXP (x, 0)))
9164 {
9165 case E_QImode:
9166 return (1 + (mode == DImode ? 4 : 0)
9167 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9168
9169 case E_HImode:
9170 return (4 + (mode == DImode ? 4 : 0)
9171 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9172
9173 case E_SImode:
9174 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9175
9176 default:
9177 return 99;
9178 }
9179
9180 default:
9181 return 99;
9182 }
9183 }
9184
9185 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9186 operand, then return the operand that is being shifted. If the shift
9187 is not by a constant, then set SHIFT_REG to point to the operand.
9188 Return NULL if OP is not a shifter operand. */
9189 static rtx
9190 shifter_op_p (rtx op, rtx *shift_reg)
9191 {
9192 enum rtx_code code = GET_CODE (op);
9193
9194 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9195 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9196 return XEXP (op, 0);
9197 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9198 return XEXP (op, 0);
9199 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9200 || code == ASHIFTRT)
9201 {
9202 if (!CONST_INT_P (XEXP (op, 1)))
9203 *shift_reg = XEXP (op, 1);
9204 return XEXP (op, 0);
9205 }
9206
9207 return NULL;
9208 }
9209
9210 static bool
9211 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9212 {
9213 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9214 rtx_code code = GET_CODE (x);
9215 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9216
9217 switch (XINT (x, 1))
9218 {
9219 case UNSPEC_UNALIGNED_LOAD:
9220 /* We can only do unaligned loads into the integer unit, and we can't
9221 use LDM or LDRD. */
9222 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9223 if (speed_p)
9224 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9225 + extra_cost->ldst.load_unaligned);
9226
9227 #ifdef NOT_YET
9228 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9229 ADDR_SPACE_GENERIC, speed_p);
9230 #endif
9231 return true;
9232
9233 case UNSPEC_UNALIGNED_STORE:
9234 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9235 if (speed_p)
9236 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9237 + extra_cost->ldst.store_unaligned);
9238
9239 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9240 #ifdef NOT_YET
9241 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9242 ADDR_SPACE_GENERIC, speed_p);
9243 #endif
9244 return true;
9245
9246 case UNSPEC_VRINTZ:
9247 case UNSPEC_VRINTP:
9248 case UNSPEC_VRINTM:
9249 case UNSPEC_VRINTR:
9250 case UNSPEC_VRINTX:
9251 case UNSPEC_VRINTA:
9252 if (speed_p)
9253 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9254
9255 return true;
9256 default:
9257 *cost = COSTS_N_INSNS (2);
9258 break;
9259 }
9260 return true;
9261 }
9262
9263 /* Cost of a libcall. We assume one insn per argument, an amount for the
9264 call (one insn for -Os) and then one for processing the result. */
9265 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9266
9267 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9268 do \
9269 { \
9270 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9271 if (shift_op != NULL \
9272 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9273 { \
9274 if (shift_reg) \
9275 { \
9276 if (speed_p) \
9277 *cost += extra_cost->alu.arith_shift_reg; \
9278 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9279 ASHIFT, 1, speed_p); \
9280 } \
9281 else if (speed_p) \
9282 *cost += extra_cost->alu.arith_shift; \
9283 \
9284 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9285 ASHIFT, 0, speed_p) \
9286 + rtx_cost (XEXP (x, 1 - IDX), \
9287 GET_MODE (shift_op), \
9288 OP, 1, speed_p)); \
9289 return true; \
9290 } \
9291 } \
9292 while (0)
9293
9294 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9295 considering the costs of the addressing mode and memory access
9296 separately. */
9297 static bool
9298 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9299 int *cost, bool speed_p)
9300 {
9301 machine_mode mode = GET_MODE (x);
9302
9303 *cost = COSTS_N_INSNS (1);
9304
9305 if (flag_pic
9306 && GET_CODE (XEXP (x, 0)) == PLUS
9307 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9308 /* This will be split into two instructions. Add the cost of the
9309 additional instruction here. The cost of the memory access is computed
9310 below. See arm.md:calculate_pic_address. */
9311 *cost += COSTS_N_INSNS (1);
9312
9313 /* Calculate cost of the addressing mode. */
9314 if (speed_p)
9315 {
9316 arm_addr_mode_op op_type;
9317 switch (GET_CODE (XEXP (x, 0)))
9318 {
9319 default:
9320 case REG:
9321 op_type = AMO_DEFAULT;
9322 break;
9323 case MINUS:
9324 /* MINUS does not appear in RTL, but the architecture supports it,
9325 so handle this case defensively. */
9326 /* fall through */
9327 case PLUS:
9328 op_type = AMO_NO_WB;
9329 break;
9330 case PRE_INC:
9331 case PRE_DEC:
9332 case POST_INC:
9333 case POST_DEC:
9334 case PRE_MODIFY:
9335 case POST_MODIFY:
9336 op_type = AMO_WB;
9337 break;
9338 }
9339
9340 if (VECTOR_MODE_P (mode))
9341 *cost += current_tune->addr_mode_costs->vector[op_type];
9342 else if (FLOAT_MODE_P (mode))
9343 *cost += current_tune->addr_mode_costs->fp[op_type];
9344 else
9345 *cost += current_tune->addr_mode_costs->integer[op_type];
9346 }
9347
9348 /* Calculate cost of memory access. */
9349 if (speed_p)
9350 {
9351 if (FLOAT_MODE_P (mode))
9352 {
9353 if (GET_MODE_SIZE (mode) == 8)
9354 *cost += extra_cost->ldst.loadd;
9355 else
9356 *cost += extra_cost->ldst.loadf;
9357 }
9358 else if (VECTOR_MODE_P (mode))
9359 *cost += extra_cost->ldst.loadv;
9360 else
9361 {
9362 /* Integer modes */
9363 if (GET_MODE_SIZE (mode) == 8)
9364 *cost += extra_cost->ldst.ldrd;
9365 else
9366 *cost += extra_cost->ldst.load;
9367 }
9368 }
9369
9370 return true;
9371 }
9372
9373 /* RTX costs. Make an estimate of the cost of executing the operation
9374 X, which is contained within an operation with code OUTER_CODE.
9375 SPEED_P indicates whether the cost desired is the performance cost,
9376 or the size cost. The estimate is stored in COST and the return
9377 value is TRUE if the cost calculation is final, or FALSE if the
9378 caller should recurse through the operands of X to add additional
9379 costs.
9380
9381 We currently make no attempt to model the size savings of Thumb-2
9382 16-bit instructions. At the normal points in compilation where
9383 this code is called we have no measure of whether the condition
9384 flags are live or not, and thus no realistic way to determine what
9385 the size will eventually be. */
9386 static bool
9387 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9388 const struct cpu_cost_table *extra_cost,
9389 int *cost, bool speed_p)
9390 {
9391 machine_mode mode = GET_MODE (x);
9392
9393 *cost = COSTS_N_INSNS (1);
9394
9395 if (TARGET_THUMB1)
9396 {
9397 if (speed_p)
9398 *cost = thumb1_rtx_costs (x, code, outer_code);
9399 else
9400 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9401 return true;
9402 }
9403
9404 switch (code)
9405 {
9406 case SET:
9407 *cost = 0;
9408 /* SET RTXs don't have a mode so we get it from the destination. */
9409 mode = GET_MODE (SET_DEST (x));
9410
9411 if (REG_P (SET_SRC (x))
9412 && REG_P (SET_DEST (x)))
9413 {
9414 /* Assume that most copies can be done with a single insn,
9415 unless we don't have HW FP, in which case everything
9416 larger than word mode will require two insns. */
9417 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9418 && GET_MODE_SIZE (mode) > 4)
9419 || mode == DImode)
9420 ? 2 : 1);
9421 /* Conditional register moves can be encoded
9422 in 16 bits in Thumb mode. */
9423 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9424 *cost >>= 1;
9425
9426 return true;
9427 }
9428
9429 if (CONST_INT_P (SET_SRC (x)))
9430 {
9431 /* Handle CONST_INT here, since the value doesn't have a mode
9432 and we would otherwise be unable to work out the true cost. */
9433 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9434 0, speed_p);
9435 outer_code = SET;
9436 /* Slightly lower the cost of setting a core reg to a constant.
9437 This helps break up chains and allows for better scheduling. */
9438 if (REG_P (SET_DEST (x))
9439 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9440 *cost -= 1;
9441 x = SET_SRC (x);
9442 /* Immediate moves with an immediate in the range [0, 255] can be
9443 encoded in 16 bits in Thumb mode. */
9444 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9445 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9446 *cost >>= 1;
9447 goto const_int_cost;
9448 }
9449
9450 return false;
9451
9452 case MEM:
9453 return arm_mem_costs (x, extra_cost, cost, speed_p);
9454
9455 case PARALLEL:
9456 {
9457 /* Calculations of LDM costs are complex. We assume an initial cost
9458 (ldm_1st) which will load the number of registers mentioned in
9459 ldm_regs_per_insn_1st registers; then each additional
9460 ldm_regs_per_insn_subsequent registers cost one more insn. The
9461 formula for N regs is thus:
9462
9463 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9464 + ldm_regs_per_insn_subsequent - 1)
9465 / ldm_regs_per_insn_subsequent).
9466
9467 Additional costs may also be added for addressing. A similar
9468 formula is used for STM. */
9469
9470 bool is_ldm = load_multiple_operation (x, SImode);
9471 bool is_stm = store_multiple_operation (x, SImode);
9472
9473 if (is_ldm || is_stm)
9474 {
9475 if (speed_p)
9476 {
9477 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9478 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9479 ? extra_cost->ldst.ldm_regs_per_insn_1st
9480 : extra_cost->ldst.stm_regs_per_insn_1st;
9481 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9482 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9483 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9484
9485 *cost += regs_per_insn_1st
9486 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9487 + regs_per_insn_sub - 1)
9488 / regs_per_insn_sub);
9489 return true;
9490 }
9491
9492 }
9493 return false;
9494 }
9495 case DIV:
9496 case UDIV:
9497 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9498 && (mode == SFmode || !TARGET_VFP_SINGLE))
9499 *cost += COSTS_N_INSNS (speed_p
9500 ? extra_cost->fp[mode != SFmode].div : 0);
9501 else if (mode == SImode && TARGET_IDIV)
9502 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9503 else
9504 *cost = LIBCALL_COST (2);
9505
9506 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9507 possible udiv is prefered. */
9508 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9509 return false; /* All arguments must be in registers. */
9510
9511 case MOD:
9512 /* MOD by a power of 2 can be expanded as:
9513 rsbs r1, r0, #0
9514 and r0, r0, #(n - 1)
9515 and r1, r1, #(n - 1)
9516 rsbpl r0, r1, #0. */
9517 if (CONST_INT_P (XEXP (x, 1))
9518 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9519 && mode == SImode)
9520 {
9521 *cost += COSTS_N_INSNS (3);
9522
9523 if (speed_p)
9524 *cost += 2 * extra_cost->alu.logical
9525 + extra_cost->alu.arith;
9526 return true;
9527 }
9528
9529 /* Fall-through. */
9530 case UMOD:
9531 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9532 possible udiv is prefered. */
9533 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9534 return false; /* All arguments must be in registers. */
9535
9536 case ROTATE:
9537 if (mode == SImode && REG_P (XEXP (x, 1)))
9538 {
9539 *cost += (COSTS_N_INSNS (1)
9540 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9541 if (speed_p)
9542 *cost += extra_cost->alu.shift_reg;
9543 return true;
9544 }
9545 /* Fall through */
9546 case ROTATERT:
9547 case ASHIFT:
9548 case LSHIFTRT:
9549 case ASHIFTRT:
9550 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9551 {
9552 *cost += (COSTS_N_INSNS (2)
9553 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9554 if (speed_p)
9555 *cost += 2 * extra_cost->alu.shift;
9556 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9557 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9558 *cost += 1;
9559 return true;
9560 }
9561 else if (mode == SImode)
9562 {
9563 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9564 /* Slightly disparage register shifts at -Os, but not by much. */
9565 if (!CONST_INT_P (XEXP (x, 1)))
9566 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9567 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9568 return true;
9569 }
9570 else if (GET_MODE_CLASS (mode) == MODE_INT
9571 && GET_MODE_SIZE (mode) < 4)
9572 {
9573 if (code == ASHIFT)
9574 {
9575 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9576 /* Slightly disparage register shifts at -Os, but not by
9577 much. */
9578 if (!CONST_INT_P (XEXP (x, 1)))
9579 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9580 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9581 }
9582 else if (code == LSHIFTRT || code == ASHIFTRT)
9583 {
9584 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9585 {
9586 /* Can use SBFX/UBFX. */
9587 if (speed_p)
9588 *cost += extra_cost->alu.bfx;
9589 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9590 }
9591 else
9592 {
9593 *cost += COSTS_N_INSNS (1);
9594 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9595 if (speed_p)
9596 {
9597 if (CONST_INT_P (XEXP (x, 1)))
9598 *cost += 2 * extra_cost->alu.shift;
9599 else
9600 *cost += (extra_cost->alu.shift
9601 + extra_cost->alu.shift_reg);
9602 }
9603 else
9604 /* Slightly disparage register shifts. */
9605 *cost += !CONST_INT_P (XEXP (x, 1));
9606 }
9607 }
9608 else /* Rotates. */
9609 {
9610 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9611 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9612 if (speed_p)
9613 {
9614 if (CONST_INT_P (XEXP (x, 1)))
9615 *cost += (2 * extra_cost->alu.shift
9616 + extra_cost->alu.log_shift);
9617 else
9618 *cost += (extra_cost->alu.shift
9619 + extra_cost->alu.shift_reg
9620 + extra_cost->alu.log_shift_reg);
9621 }
9622 }
9623 return true;
9624 }
9625
9626 *cost = LIBCALL_COST (2);
9627 return false;
9628
9629 case BSWAP:
9630 if (arm_arch6)
9631 {
9632 if (mode == SImode)
9633 {
9634 if (speed_p)
9635 *cost += extra_cost->alu.rev;
9636
9637 return false;
9638 }
9639 }
9640 else
9641 {
9642 /* No rev instruction available. Look at arm_legacy_rev
9643 and thumb_legacy_rev for the form of RTL used then. */
9644 if (TARGET_THUMB)
9645 {
9646 *cost += COSTS_N_INSNS (9);
9647
9648 if (speed_p)
9649 {
9650 *cost += 6 * extra_cost->alu.shift;
9651 *cost += 3 * extra_cost->alu.logical;
9652 }
9653 }
9654 else
9655 {
9656 *cost += COSTS_N_INSNS (4);
9657
9658 if (speed_p)
9659 {
9660 *cost += 2 * extra_cost->alu.shift;
9661 *cost += extra_cost->alu.arith_shift;
9662 *cost += 2 * extra_cost->alu.logical;
9663 }
9664 }
9665 return true;
9666 }
9667 return false;
9668
9669 case MINUS:
9670 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9671 && (mode == SFmode || !TARGET_VFP_SINGLE))
9672 {
9673 if (GET_CODE (XEXP (x, 0)) == MULT
9674 || GET_CODE (XEXP (x, 1)) == MULT)
9675 {
9676 rtx mul_op0, mul_op1, sub_op;
9677
9678 if (speed_p)
9679 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9680
9681 if (GET_CODE (XEXP (x, 0)) == MULT)
9682 {
9683 mul_op0 = XEXP (XEXP (x, 0), 0);
9684 mul_op1 = XEXP (XEXP (x, 0), 1);
9685 sub_op = XEXP (x, 1);
9686 }
9687 else
9688 {
9689 mul_op0 = XEXP (XEXP (x, 1), 0);
9690 mul_op1 = XEXP (XEXP (x, 1), 1);
9691 sub_op = XEXP (x, 0);
9692 }
9693
9694 /* The first operand of the multiply may be optionally
9695 negated. */
9696 if (GET_CODE (mul_op0) == NEG)
9697 mul_op0 = XEXP (mul_op0, 0);
9698
9699 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9700 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9701 + rtx_cost (sub_op, mode, code, 0, speed_p));
9702
9703 return true;
9704 }
9705
9706 if (speed_p)
9707 *cost += extra_cost->fp[mode != SFmode].addsub;
9708 return false;
9709 }
9710
9711 if (mode == SImode)
9712 {
9713 rtx shift_by_reg = NULL;
9714 rtx shift_op;
9715 rtx non_shift_op;
9716
9717 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9718 if (shift_op == NULL)
9719 {
9720 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9721 non_shift_op = XEXP (x, 0);
9722 }
9723 else
9724 non_shift_op = XEXP (x, 1);
9725
9726 if (shift_op != NULL)
9727 {
9728 if (shift_by_reg != NULL)
9729 {
9730 if (speed_p)
9731 *cost += extra_cost->alu.arith_shift_reg;
9732 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9733 }
9734 else if (speed_p)
9735 *cost += extra_cost->alu.arith_shift;
9736
9737 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9738 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9739 return true;
9740 }
9741
9742 if (arm_arch_thumb2
9743 && GET_CODE (XEXP (x, 1)) == MULT)
9744 {
9745 /* MLS. */
9746 if (speed_p)
9747 *cost += extra_cost->mult[0].add;
9748 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9749 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9750 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9751 return true;
9752 }
9753
9754 if (CONST_INT_P (XEXP (x, 0)))
9755 {
9756 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9757 INTVAL (XEXP (x, 0)), NULL_RTX,
9758 NULL_RTX, 1, 0);
9759 *cost = COSTS_N_INSNS (insns);
9760 if (speed_p)
9761 *cost += insns * extra_cost->alu.arith;
9762 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9763 return true;
9764 }
9765 else if (speed_p)
9766 *cost += extra_cost->alu.arith;
9767
9768 return false;
9769 }
9770
9771 if (GET_MODE_CLASS (mode) == MODE_INT
9772 && GET_MODE_SIZE (mode) < 4)
9773 {
9774 rtx shift_op, shift_reg;
9775 shift_reg = NULL;
9776
9777 /* We check both sides of the MINUS for shifter operands since,
9778 unlike PLUS, it's not commutative. */
9779
9780 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9781 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9782
9783 /* Slightly disparage, as we might need to widen the result. */
9784 *cost += 1;
9785 if (speed_p)
9786 *cost += extra_cost->alu.arith;
9787
9788 if (CONST_INT_P (XEXP (x, 0)))
9789 {
9790 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9791 return true;
9792 }
9793
9794 return false;
9795 }
9796
9797 if (mode == DImode)
9798 {
9799 *cost += COSTS_N_INSNS (1);
9800
9801 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9802 {
9803 rtx op1 = XEXP (x, 1);
9804
9805 if (speed_p)
9806 *cost += 2 * extra_cost->alu.arith;
9807
9808 if (GET_CODE (op1) == ZERO_EXTEND)
9809 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9810 0, speed_p);
9811 else
9812 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9813 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9814 0, speed_p);
9815 return true;
9816 }
9817 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9818 {
9819 if (speed_p)
9820 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9821 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9822 0, speed_p)
9823 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9824 return true;
9825 }
9826 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9827 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9828 {
9829 if (speed_p)
9830 *cost += (extra_cost->alu.arith
9831 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9832 ? extra_cost->alu.arith
9833 : extra_cost->alu.arith_shift));
9834 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9835 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9836 GET_CODE (XEXP (x, 1)), 0, speed_p));
9837 return true;
9838 }
9839
9840 if (speed_p)
9841 *cost += 2 * extra_cost->alu.arith;
9842 return false;
9843 }
9844
9845 /* Vector mode? */
9846
9847 *cost = LIBCALL_COST (2);
9848 return false;
9849
9850 case PLUS:
9851 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9852 && (mode == SFmode || !TARGET_VFP_SINGLE))
9853 {
9854 if (GET_CODE (XEXP (x, 0)) == MULT)
9855 {
9856 rtx mul_op0, mul_op1, add_op;
9857
9858 if (speed_p)
9859 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9860
9861 mul_op0 = XEXP (XEXP (x, 0), 0);
9862 mul_op1 = XEXP (XEXP (x, 0), 1);
9863 add_op = XEXP (x, 1);
9864
9865 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9866 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9867 + rtx_cost (add_op, mode, code, 0, speed_p));
9868
9869 return true;
9870 }
9871
9872 if (speed_p)
9873 *cost += extra_cost->fp[mode != SFmode].addsub;
9874 return false;
9875 }
9876 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9877 {
9878 *cost = LIBCALL_COST (2);
9879 return false;
9880 }
9881
9882 /* Narrow modes can be synthesized in SImode, but the range
9883 of useful sub-operations is limited. Check for shift operations
9884 on one of the operands. Only left shifts can be used in the
9885 narrow modes. */
9886 if (GET_MODE_CLASS (mode) == MODE_INT
9887 && GET_MODE_SIZE (mode) < 4)
9888 {
9889 rtx shift_op, shift_reg;
9890 shift_reg = NULL;
9891
9892 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9893
9894 if (CONST_INT_P (XEXP (x, 1)))
9895 {
9896 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9897 INTVAL (XEXP (x, 1)), NULL_RTX,
9898 NULL_RTX, 1, 0);
9899 *cost = COSTS_N_INSNS (insns);
9900 if (speed_p)
9901 *cost += insns * extra_cost->alu.arith;
9902 /* Slightly penalize a narrow operation as the result may
9903 need widening. */
9904 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9905 return true;
9906 }
9907
9908 /* Slightly penalize a narrow operation as the result may
9909 need widening. */
9910 *cost += 1;
9911 if (speed_p)
9912 *cost += extra_cost->alu.arith;
9913
9914 return false;
9915 }
9916
9917 if (mode == SImode)
9918 {
9919 rtx shift_op, shift_reg;
9920
9921 if (TARGET_INT_SIMD
9922 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9923 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9924 {
9925 /* UXTA[BH] or SXTA[BH]. */
9926 if (speed_p)
9927 *cost += extra_cost->alu.extend_arith;
9928 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9929 0, speed_p)
9930 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9931 return true;
9932 }
9933
9934 shift_reg = NULL;
9935 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9936 if (shift_op != NULL)
9937 {
9938 if (shift_reg)
9939 {
9940 if (speed_p)
9941 *cost += extra_cost->alu.arith_shift_reg;
9942 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9943 }
9944 else if (speed_p)
9945 *cost += extra_cost->alu.arith_shift;
9946
9947 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9948 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9949 return true;
9950 }
9951 if (GET_CODE (XEXP (x, 0)) == MULT)
9952 {
9953 rtx mul_op = XEXP (x, 0);
9954
9955 if (TARGET_DSP_MULTIPLY
9956 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9957 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9958 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9959 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9960 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9961 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9962 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9963 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9964 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9965 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9966 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9967 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9968 == 16))))))
9969 {
9970 /* SMLA[BT][BT]. */
9971 if (speed_p)
9972 *cost += extra_cost->mult[0].extend_add;
9973 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9974 SIGN_EXTEND, 0, speed_p)
9975 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9976 SIGN_EXTEND, 0, speed_p)
9977 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9978 return true;
9979 }
9980
9981 if (speed_p)
9982 *cost += extra_cost->mult[0].add;
9983 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9984 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9985 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9986 return true;
9987 }
9988 if (CONST_INT_P (XEXP (x, 1)))
9989 {
9990 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9991 INTVAL (XEXP (x, 1)), NULL_RTX,
9992 NULL_RTX, 1, 0);
9993 *cost = COSTS_N_INSNS (insns);
9994 if (speed_p)
9995 *cost += insns * extra_cost->alu.arith;
9996 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9997 return true;
9998 }
9999 else if (speed_p)
10000 *cost += extra_cost->alu.arith;
10001
10002 return false;
10003 }
10004
10005 if (mode == DImode)
10006 {
10007 if (arm_arch3m
10008 && GET_CODE (XEXP (x, 0)) == MULT
10009 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10010 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10011 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10012 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10013 {
10014 if (speed_p)
10015 *cost += extra_cost->mult[1].extend_add;
10016 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10017 ZERO_EXTEND, 0, speed_p)
10018 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10019 ZERO_EXTEND, 0, speed_p)
10020 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10021 return true;
10022 }
10023
10024 *cost += COSTS_N_INSNS (1);
10025
10026 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10027 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10028 {
10029 if (speed_p)
10030 *cost += (extra_cost->alu.arith
10031 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10032 ? extra_cost->alu.arith
10033 : extra_cost->alu.arith_shift));
10034
10035 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10036 0, speed_p)
10037 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10038 return true;
10039 }
10040
10041 if (speed_p)
10042 *cost += 2 * extra_cost->alu.arith;
10043 return false;
10044 }
10045
10046 /* Vector mode? */
10047 *cost = LIBCALL_COST (2);
10048 return false;
10049 case IOR:
10050 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10051 {
10052 if (speed_p)
10053 *cost += extra_cost->alu.rev;
10054
10055 return true;
10056 }
10057 /* Fall through. */
10058 case AND: case XOR:
10059 if (mode == SImode)
10060 {
10061 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10062 rtx op0 = XEXP (x, 0);
10063 rtx shift_op, shift_reg;
10064
10065 if (subcode == NOT
10066 && (code == AND
10067 || (code == IOR && TARGET_THUMB2)))
10068 op0 = XEXP (op0, 0);
10069
10070 shift_reg = NULL;
10071 shift_op = shifter_op_p (op0, &shift_reg);
10072 if (shift_op != NULL)
10073 {
10074 if (shift_reg)
10075 {
10076 if (speed_p)
10077 *cost += extra_cost->alu.log_shift_reg;
10078 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10079 }
10080 else if (speed_p)
10081 *cost += extra_cost->alu.log_shift;
10082
10083 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10084 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10085 return true;
10086 }
10087
10088 if (CONST_INT_P (XEXP (x, 1)))
10089 {
10090 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10091 INTVAL (XEXP (x, 1)), NULL_RTX,
10092 NULL_RTX, 1, 0);
10093
10094 *cost = COSTS_N_INSNS (insns);
10095 if (speed_p)
10096 *cost += insns * extra_cost->alu.logical;
10097 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10098 return true;
10099 }
10100
10101 if (speed_p)
10102 *cost += extra_cost->alu.logical;
10103 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10104 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10105 return true;
10106 }
10107
10108 if (mode == DImode)
10109 {
10110 rtx op0 = XEXP (x, 0);
10111 enum rtx_code subcode = GET_CODE (op0);
10112
10113 *cost += COSTS_N_INSNS (1);
10114
10115 if (subcode == NOT
10116 && (code == AND
10117 || (code == IOR && TARGET_THUMB2)))
10118 op0 = XEXP (op0, 0);
10119
10120 if (GET_CODE (op0) == ZERO_EXTEND)
10121 {
10122 if (speed_p)
10123 *cost += 2 * extra_cost->alu.logical;
10124
10125 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10126 0, speed_p)
10127 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10128 return true;
10129 }
10130 else if (GET_CODE (op0) == SIGN_EXTEND)
10131 {
10132 if (speed_p)
10133 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10134
10135 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10136 0, speed_p)
10137 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10138 return true;
10139 }
10140
10141 if (speed_p)
10142 *cost += 2 * extra_cost->alu.logical;
10143
10144 return true;
10145 }
10146 /* Vector mode? */
10147
10148 *cost = LIBCALL_COST (2);
10149 return false;
10150
10151 case MULT:
10152 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10153 && (mode == SFmode || !TARGET_VFP_SINGLE))
10154 {
10155 rtx op0 = XEXP (x, 0);
10156
10157 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10158 op0 = XEXP (op0, 0);
10159
10160 if (speed_p)
10161 *cost += extra_cost->fp[mode != SFmode].mult;
10162
10163 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10164 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10165 return true;
10166 }
10167 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10168 {
10169 *cost = LIBCALL_COST (2);
10170 return false;
10171 }
10172
10173 if (mode == SImode)
10174 {
10175 if (TARGET_DSP_MULTIPLY
10176 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10177 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10178 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10179 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10180 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10181 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10182 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10183 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10184 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10185 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10186 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10187 && (INTVAL (XEXP (XEXP (x, 1), 1))
10188 == 16))))))
10189 {
10190 /* SMUL[TB][TB]. */
10191 if (speed_p)
10192 *cost += extra_cost->mult[0].extend;
10193 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10194 SIGN_EXTEND, 0, speed_p);
10195 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10196 SIGN_EXTEND, 1, speed_p);
10197 return true;
10198 }
10199 if (speed_p)
10200 *cost += extra_cost->mult[0].simple;
10201 return false;
10202 }
10203
10204 if (mode == DImode)
10205 {
10206 if (arm_arch3m
10207 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10208 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10209 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10210 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10211 {
10212 if (speed_p)
10213 *cost += extra_cost->mult[1].extend;
10214 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10215 ZERO_EXTEND, 0, speed_p)
10216 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10217 ZERO_EXTEND, 0, speed_p));
10218 return true;
10219 }
10220
10221 *cost = LIBCALL_COST (2);
10222 return false;
10223 }
10224
10225 /* Vector mode? */
10226 *cost = LIBCALL_COST (2);
10227 return false;
10228
10229 case NEG:
10230 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10231 && (mode == SFmode || !TARGET_VFP_SINGLE))
10232 {
10233 if (GET_CODE (XEXP (x, 0)) == MULT)
10234 {
10235 /* VNMUL. */
10236 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10237 return true;
10238 }
10239
10240 if (speed_p)
10241 *cost += extra_cost->fp[mode != SFmode].neg;
10242
10243 return false;
10244 }
10245 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10246 {
10247 *cost = LIBCALL_COST (1);
10248 return false;
10249 }
10250
10251 if (mode == SImode)
10252 {
10253 if (GET_CODE (XEXP (x, 0)) == ABS)
10254 {
10255 *cost += COSTS_N_INSNS (1);
10256 /* Assume the non-flag-changing variant. */
10257 if (speed_p)
10258 *cost += (extra_cost->alu.log_shift
10259 + extra_cost->alu.arith_shift);
10260 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10261 return true;
10262 }
10263
10264 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10265 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10266 {
10267 *cost += COSTS_N_INSNS (1);
10268 /* No extra cost for MOV imm and MVN imm. */
10269 /* If the comparison op is using the flags, there's no further
10270 cost, otherwise we need to add the cost of the comparison. */
10271 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10272 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10273 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10274 {
10275 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10276 *cost += (COSTS_N_INSNS (1)
10277 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10278 0, speed_p)
10279 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10280 1, speed_p));
10281 if (speed_p)
10282 *cost += extra_cost->alu.arith;
10283 }
10284 return true;
10285 }
10286
10287 if (speed_p)
10288 *cost += extra_cost->alu.arith;
10289 return false;
10290 }
10291
10292 if (GET_MODE_CLASS (mode) == MODE_INT
10293 && GET_MODE_SIZE (mode) < 4)
10294 {
10295 /* Slightly disparage, as we might need an extend operation. */
10296 *cost += 1;
10297 if (speed_p)
10298 *cost += extra_cost->alu.arith;
10299 return false;
10300 }
10301
10302 if (mode == DImode)
10303 {
10304 *cost += COSTS_N_INSNS (1);
10305 if (speed_p)
10306 *cost += 2 * extra_cost->alu.arith;
10307 return false;
10308 }
10309
10310 /* Vector mode? */
10311 *cost = LIBCALL_COST (1);
10312 return false;
10313
10314 case NOT:
10315 if (mode == SImode)
10316 {
10317 rtx shift_op;
10318 rtx shift_reg = NULL;
10319
10320 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10321
10322 if (shift_op)
10323 {
10324 if (shift_reg != NULL)
10325 {
10326 if (speed_p)
10327 *cost += extra_cost->alu.log_shift_reg;
10328 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10329 }
10330 else if (speed_p)
10331 *cost += extra_cost->alu.log_shift;
10332 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10333 return true;
10334 }
10335
10336 if (speed_p)
10337 *cost += extra_cost->alu.logical;
10338 return false;
10339 }
10340 if (mode == DImode)
10341 {
10342 *cost += COSTS_N_INSNS (1);
10343 return false;
10344 }
10345
10346 /* Vector mode? */
10347
10348 *cost += LIBCALL_COST (1);
10349 return false;
10350
10351 case IF_THEN_ELSE:
10352 {
10353 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10354 {
10355 *cost += COSTS_N_INSNS (3);
10356 return true;
10357 }
10358 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10359 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10360
10361 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10362 /* Assume that if one arm of the if_then_else is a register,
10363 that it will be tied with the result and eliminate the
10364 conditional insn. */
10365 if (REG_P (XEXP (x, 1)))
10366 *cost += op2cost;
10367 else if (REG_P (XEXP (x, 2)))
10368 *cost += op1cost;
10369 else
10370 {
10371 if (speed_p)
10372 {
10373 if (extra_cost->alu.non_exec_costs_exec)
10374 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10375 else
10376 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10377 }
10378 else
10379 *cost += op1cost + op2cost;
10380 }
10381 }
10382 return true;
10383
10384 case COMPARE:
10385 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10386 *cost = 0;
10387 else
10388 {
10389 machine_mode op0mode;
10390 /* We'll mostly assume that the cost of a compare is the cost of the
10391 LHS. However, there are some notable exceptions. */
10392
10393 /* Floating point compares are never done as side-effects. */
10394 op0mode = GET_MODE (XEXP (x, 0));
10395 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10396 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10397 {
10398 if (speed_p)
10399 *cost += extra_cost->fp[op0mode != SFmode].compare;
10400
10401 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10402 {
10403 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10404 return true;
10405 }
10406
10407 return false;
10408 }
10409 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10410 {
10411 *cost = LIBCALL_COST (2);
10412 return false;
10413 }
10414
10415 /* DImode compares normally take two insns. */
10416 if (op0mode == DImode)
10417 {
10418 *cost += COSTS_N_INSNS (1);
10419 if (speed_p)
10420 *cost += 2 * extra_cost->alu.arith;
10421 return false;
10422 }
10423
10424 if (op0mode == SImode)
10425 {
10426 rtx shift_op;
10427 rtx shift_reg;
10428
10429 if (XEXP (x, 1) == const0_rtx
10430 && !(REG_P (XEXP (x, 0))
10431 || (GET_CODE (XEXP (x, 0)) == SUBREG
10432 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10433 {
10434 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10435
10436 /* Multiply operations that set the flags are often
10437 significantly more expensive. */
10438 if (speed_p
10439 && GET_CODE (XEXP (x, 0)) == MULT
10440 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10441 *cost += extra_cost->mult[0].flag_setting;
10442
10443 if (speed_p
10444 && GET_CODE (XEXP (x, 0)) == PLUS
10445 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10446 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10447 0), 1), mode))
10448 *cost += extra_cost->mult[0].flag_setting;
10449 return true;
10450 }
10451
10452 shift_reg = NULL;
10453 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10454 if (shift_op != NULL)
10455 {
10456 if (shift_reg != NULL)
10457 {
10458 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10459 1, speed_p);
10460 if (speed_p)
10461 *cost += extra_cost->alu.arith_shift_reg;
10462 }
10463 else if (speed_p)
10464 *cost += extra_cost->alu.arith_shift;
10465 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10466 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10467 return true;
10468 }
10469
10470 if (speed_p)
10471 *cost += extra_cost->alu.arith;
10472 if (CONST_INT_P (XEXP (x, 1))
10473 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10474 {
10475 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10476 return true;
10477 }
10478 return false;
10479 }
10480
10481 /* Vector mode? */
10482
10483 *cost = LIBCALL_COST (2);
10484 return false;
10485 }
10486 return true;
10487
10488 case EQ:
10489 case NE:
10490 case LT:
10491 case LE:
10492 case GT:
10493 case GE:
10494 case LTU:
10495 case LEU:
10496 case GEU:
10497 case GTU:
10498 case ORDERED:
10499 case UNORDERED:
10500 case UNEQ:
10501 case UNLE:
10502 case UNLT:
10503 case UNGE:
10504 case UNGT:
10505 case LTGT:
10506 if (outer_code == SET)
10507 {
10508 /* Is it a store-flag operation? */
10509 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10510 && XEXP (x, 1) == const0_rtx)
10511 {
10512 /* Thumb also needs an IT insn. */
10513 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10514 return true;
10515 }
10516 if (XEXP (x, 1) == const0_rtx)
10517 {
10518 switch (code)
10519 {
10520 case LT:
10521 /* LSR Rd, Rn, #31. */
10522 if (speed_p)
10523 *cost += extra_cost->alu.shift;
10524 break;
10525
10526 case EQ:
10527 /* RSBS T1, Rn, #0
10528 ADC Rd, Rn, T1. */
10529
10530 case NE:
10531 /* SUBS T1, Rn, #1
10532 SBC Rd, Rn, T1. */
10533 *cost += COSTS_N_INSNS (1);
10534 break;
10535
10536 case LE:
10537 /* RSBS T1, Rn, Rn, LSR #31
10538 ADC Rd, Rn, T1. */
10539 *cost += COSTS_N_INSNS (1);
10540 if (speed_p)
10541 *cost += extra_cost->alu.arith_shift;
10542 break;
10543
10544 case GT:
10545 /* RSB Rd, Rn, Rn, ASR #1
10546 LSR Rd, Rd, #31. */
10547 *cost += COSTS_N_INSNS (1);
10548 if (speed_p)
10549 *cost += (extra_cost->alu.arith_shift
10550 + extra_cost->alu.shift);
10551 break;
10552
10553 case GE:
10554 /* ASR Rd, Rn, #31
10555 ADD Rd, Rn, #1. */
10556 *cost += COSTS_N_INSNS (1);
10557 if (speed_p)
10558 *cost += extra_cost->alu.shift;
10559 break;
10560
10561 default:
10562 /* Remaining cases are either meaningless or would take
10563 three insns anyway. */
10564 *cost = COSTS_N_INSNS (3);
10565 break;
10566 }
10567 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10568 return true;
10569 }
10570 else
10571 {
10572 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10573 if (CONST_INT_P (XEXP (x, 1))
10574 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10575 {
10576 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10577 return true;
10578 }
10579
10580 return false;
10581 }
10582 }
10583 /* Not directly inside a set. If it involves the condition code
10584 register it must be the condition for a branch, cond_exec or
10585 I_T_E operation. Since the comparison is performed elsewhere
10586 this is just the control part which has no additional
10587 cost. */
10588 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10589 && XEXP (x, 1) == const0_rtx)
10590 {
10591 *cost = 0;
10592 return true;
10593 }
10594 return false;
10595
10596 case ABS:
10597 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10598 && (mode == SFmode || !TARGET_VFP_SINGLE))
10599 {
10600 if (speed_p)
10601 *cost += extra_cost->fp[mode != SFmode].neg;
10602
10603 return false;
10604 }
10605 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10606 {
10607 *cost = LIBCALL_COST (1);
10608 return false;
10609 }
10610
10611 if (mode == SImode)
10612 {
10613 if (speed_p)
10614 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10615 return false;
10616 }
10617 /* Vector mode? */
10618 *cost = LIBCALL_COST (1);
10619 return false;
10620
10621 case SIGN_EXTEND:
10622 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10623 && MEM_P (XEXP (x, 0)))
10624 {
10625 if (mode == DImode)
10626 *cost += COSTS_N_INSNS (1);
10627
10628 if (!speed_p)
10629 return true;
10630
10631 if (GET_MODE (XEXP (x, 0)) == SImode)
10632 *cost += extra_cost->ldst.load;
10633 else
10634 *cost += extra_cost->ldst.load_sign_extend;
10635
10636 if (mode == DImode)
10637 *cost += extra_cost->alu.shift;
10638
10639 return true;
10640 }
10641
10642 /* Widening from less than 32-bits requires an extend operation. */
10643 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10644 {
10645 /* We have SXTB/SXTH. */
10646 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10647 if (speed_p)
10648 *cost += extra_cost->alu.extend;
10649 }
10650 else if (GET_MODE (XEXP (x, 0)) != SImode)
10651 {
10652 /* Needs two shifts. */
10653 *cost += COSTS_N_INSNS (1);
10654 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10655 if (speed_p)
10656 *cost += 2 * extra_cost->alu.shift;
10657 }
10658
10659 /* Widening beyond 32-bits requires one more insn. */
10660 if (mode == DImode)
10661 {
10662 *cost += COSTS_N_INSNS (1);
10663 if (speed_p)
10664 *cost += extra_cost->alu.shift;
10665 }
10666
10667 return true;
10668
10669 case ZERO_EXTEND:
10670 if ((arm_arch4
10671 || GET_MODE (XEXP (x, 0)) == SImode
10672 || GET_MODE (XEXP (x, 0)) == QImode)
10673 && MEM_P (XEXP (x, 0)))
10674 {
10675 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10676
10677 if (mode == DImode)
10678 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10679
10680 return true;
10681 }
10682
10683 /* Widening from less than 32-bits requires an extend operation. */
10684 if (GET_MODE (XEXP (x, 0)) == QImode)
10685 {
10686 /* UXTB can be a shorter instruction in Thumb2, but it might
10687 be slower than the AND Rd, Rn, #255 alternative. When
10688 optimizing for speed it should never be slower to use
10689 AND, and we don't really model 16-bit vs 32-bit insns
10690 here. */
10691 if (speed_p)
10692 *cost += extra_cost->alu.logical;
10693 }
10694 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10695 {
10696 /* We have UXTB/UXTH. */
10697 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10698 if (speed_p)
10699 *cost += extra_cost->alu.extend;
10700 }
10701 else if (GET_MODE (XEXP (x, 0)) != SImode)
10702 {
10703 /* Needs two shifts. It's marginally preferable to use
10704 shifts rather than two BIC instructions as the second
10705 shift may merge with a subsequent insn as a shifter
10706 op. */
10707 *cost = COSTS_N_INSNS (2);
10708 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10709 if (speed_p)
10710 *cost += 2 * extra_cost->alu.shift;
10711 }
10712
10713 /* Widening beyond 32-bits requires one more insn. */
10714 if (mode == DImode)
10715 {
10716 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10717 }
10718
10719 return true;
10720
10721 case CONST_INT:
10722 *cost = 0;
10723 /* CONST_INT has no mode, so we cannot tell for sure how many
10724 insns are really going to be needed. The best we can do is
10725 look at the value passed. If it fits in SImode, then assume
10726 that's the mode it will be used for. Otherwise assume it
10727 will be used in DImode. */
10728 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10729 mode = SImode;
10730 else
10731 mode = DImode;
10732
10733 /* Avoid blowing up in arm_gen_constant (). */
10734 if (!(outer_code == PLUS
10735 || outer_code == AND
10736 || outer_code == IOR
10737 || outer_code == XOR
10738 || outer_code == MINUS))
10739 outer_code = SET;
10740
10741 const_int_cost:
10742 if (mode == SImode)
10743 {
10744 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10745 INTVAL (x), NULL, NULL,
10746 0, 0));
10747 /* Extra costs? */
10748 }
10749 else
10750 {
10751 *cost += COSTS_N_INSNS (arm_gen_constant
10752 (outer_code, SImode, NULL,
10753 trunc_int_for_mode (INTVAL (x), SImode),
10754 NULL, NULL, 0, 0)
10755 + arm_gen_constant (outer_code, SImode, NULL,
10756 INTVAL (x) >> 32, NULL,
10757 NULL, 0, 0));
10758 /* Extra costs? */
10759 }
10760
10761 return true;
10762
10763 case CONST:
10764 case LABEL_REF:
10765 case SYMBOL_REF:
10766 if (speed_p)
10767 {
10768 if (arm_arch_thumb2 && !flag_pic)
10769 *cost += COSTS_N_INSNS (1);
10770 else
10771 *cost += extra_cost->ldst.load;
10772 }
10773 else
10774 *cost += COSTS_N_INSNS (1);
10775
10776 if (flag_pic)
10777 {
10778 *cost += COSTS_N_INSNS (1);
10779 if (speed_p)
10780 *cost += extra_cost->alu.arith;
10781 }
10782
10783 return true;
10784
10785 case CONST_FIXED:
10786 *cost = COSTS_N_INSNS (4);
10787 /* Fixme. */
10788 return true;
10789
10790 case CONST_DOUBLE:
10791 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10792 && (mode == SFmode || !TARGET_VFP_SINGLE))
10793 {
10794 if (vfp3_const_double_rtx (x))
10795 {
10796 if (speed_p)
10797 *cost += extra_cost->fp[mode == DFmode].fpconst;
10798 return true;
10799 }
10800
10801 if (speed_p)
10802 {
10803 if (mode == DFmode)
10804 *cost += extra_cost->ldst.loadd;
10805 else
10806 *cost += extra_cost->ldst.loadf;
10807 }
10808 else
10809 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10810
10811 return true;
10812 }
10813 *cost = COSTS_N_INSNS (4);
10814 return true;
10815
10816 case CONST_VECTOR:
10817 /* Fixme. */
10818 if (TARGET_NEON
10819 && TARGET_HARD_FLOAT
10820 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10821 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10822 *cost = COSTS_N_INSNS (1);
10823 else
10824 *cost = COSTS_N_INSNS (4);
10825 return true;
10826
10827 case HIGH:
10828 case LO_SUM:
10829 /* When optimizing for size, we prefer constant pool entries to
10830 MOVW/MOVT pairs, so bump the cost of these slightly. */
10831 if (!speed_p)
10832 *cost += 1;
10833 return true;
10834
10835 case CLZ:
10836 if (speed_p)
10837 *cost += extra_cost->alu.clz;
10838 return false;
10839
10840 case SMIN:
10841 if (XEXP (x, 1) == const0_rtx)
10842 {
10843 if (speed_p)
10844 *cost += extra_cost->alu.log_shift;
10845 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10846 return true;
10847 }
10848 /* Fall through. */
10849 case SMAX:
10850 case UMIN:
10851 case UMAX:
10852 *cost += COSTS_N_INSNS (1);
10853 return false;
10854
10855 case TRUNCATE:
10856 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10857 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10858 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10859 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10860 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10861 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10862 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10863 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10864 == ZERO_EXTEND))))
10865 {
10866 if (speed_p)
10867 *cost += extra_cost->mult[1].extend;
10868 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10869 ZERO_EXTEND, 0, speed_p)
10870 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10871 ZERO_EXTEND, 0, speed_p));
10872 return true;
10873 }
10874 *cost = LIBCALL_COST (1);
10875 return false;
10876
10877 case UNSPEC_VOLATILE:
10878 case UNSPEC:
10879 return arm_unspec_cost (x, outer_code, speed_p, cost);
10880
10881 case PC:
10882 /* Reading the PC is like reading any other register. Writing it
10883 is more expensive, but we take that into account elsewhere. */
10884 *cost = 0;
10885 return true;
10886
10887 case ZERO_EXTRACT:
10888 /* TODO: Simple zero_extract of bottom bits using AND. */
10889 /* Fall through. */
10890 case SIGN_EXTRACT:
10891 if (arm_arch6
10892 && mode == SImode
10893 && CONST_INT_P (XEXP (x, 1))
10894 && CONST_INT_P (XEXP (x, 2)))
10895 {
10896 if (speed_p)
10897 *cost += extra_cost->alu.bfx;
10898 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10899 return true;
10900 }
10901 /* Without UBFX/SBFX, need to resort to shift operations. */
10902 *cost += COSTS_N_INSNS (1);
10903 if (speed_p)
10904 *cost += 2 * extra_cost->alu.shift;
10905 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10906 return true;
10907
10908 case FLOAT_EXTEND:
10909 if (TARGET_HARD_FLOAT)
10910 {
10911 if (speed_p)
10912 *cost += extra_cost->fp[mode == DFmode].widen;
10913 if (!TARGET_VFP5
10914 && GET_MODE (XEXP (x, 0)) == HFmode)
10915 {
10916 /* Pre v8, widening HF->DF is a two-step process, first
10917 widening to SFmode. */
10918 *cost += COSTS_N_INSNS (1);
10919 if (speed_p)
10920 *cost += extra_cost->fp[0].widen;
10921 }
10922 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10923 return true;
10924 }
10925
10926 *cost = LIBCALL_COST (1);
10927 return false;
10928
10929 case FLOAT_TRUNCATE:
10930 if (TARGET_HARD_FLOAT)
10931 {
10932 if (speed_p)
10933 *cost += extra_cost->fp[mode == DFmode].narrow;
10934 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10935 return true;
10936 /* Vector modes? */
10937 }
10938 *cost = LIBCALL_COST (1);
10939 return false;
10940
10941 case FMA:
10942 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10943 {
10944 rtx op0 = XEXP (x, 0);
10945 rtx op1 = XEXP (x, 1);
10946 rtx op2 = XEXP (x, 2);
10947
10948
10949 /* vfms or vfnma. */
10950 if (GET_CODE (op0) == NEG)
10951 op0 = XEXP (op0, 0);
10952
10953 /* vfnms or vfnma. */
10954 if (GET_CODE (op2) == NEG)
10955 op2 = XEXP (op2, 0);
10956
10957 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10958 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10959 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10960
10961 if (speed_p)
10962 *cost += extra_cost->fp[mode ==DFmode].fma;
10963
10964 return true;
10965 }
10966
10967 *cost = LIBCALL_COST (3);
10968 return false;
10969
10970 case FIX:
10971 case UNSIGNED_FIX:
10972 if (TARGET_HARD_FLOAT)
10973 {
10974 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10975 a vcvt fixed-point conversion. */
10976 if (code == FIX && mode == SImode
10977 && GET_CODE (XEXP (x, 0)) == FIX
10978 && GET_MODE (XEXP (x, 0)) == SFmode
10979 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10980 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10981 > 0)
10982 {
10983 if (speed_p)
10984 *cost += extra_cost->fp[0].toint;
10985
10986 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10987 code, 0, speed_p);
10988 return true;
10989 }
10990
10991 if (GET_MODE_CLASS (mode) == MODE_INT)
10992 {
10993 mode = GET_MODE (XEXP (x, 0));
10994 if (speed_p)
10995 *cost += extra_cost->fp[mode == DFmode].toint;
10996 /* Strip of the 'cost' of rounding towards zero. */
10997 if (GET_CODE (XEXP (x, 0)) == FIX)
10998 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10999 0, speed_p);
11000 else
11001 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11002 /* ??? Increase the cost to deal with transferring from
11003 FP -> CORE registers? */
11004 return true;
11005 }
11006 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11007 && TARGET_VFP5)
11008 {
11009 if (speed_p)
11010 *cost += extra_cost->fp[mode == DFmode].roundint;
11011 return false;
11012 }
11013 /* Vector costs? */
11014 }
11015 *cost = LIBCALL_COST (1);
11016 return false;
11017
11018 case FLOAT:
11019 case UNSIGNED_FLOAT:
11020 if (TARGET_HARD_FLOAT)
11021 {
11022 /* ??? Increase the cost to deal with transferring from CORE
11023 -> FP registers? */
11024 if (speed_p)
11025 *cost += extra_cost->fp[mode == DFmode].fromint;
11026 return false;
11027 }
11028 *cost = LIBCALL_COST (1);
11029 return false;
11030
11031 case CALL:
11032 return true;
11033
11034 case ASM_OPERANDS:
11035 {
11036 /* Just a guess. Guess number of instructions in the asm
11037 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11038 though (see PR60663). */
11039 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11040 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11041
11042 *cost = COSTS_N_INSNS (asm_length + num_operands);
11043 return true;
11044 }
11045 default:
11046 if (mode != VOIDmode)
11047 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11048 else
11049 *cost = COSTS_N_INSNS (4); /* Who knows? */
11050 return false;
11051 }
11052 }
11053
11054 #undef HANDLE_NARROW_SHIFT_ARITH
11055
11056 /* RTX costs entry point. */
11057
11058 static bool
11059 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11060 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11061 {
11062 bool result;
11063 int code = GET_CODE (x);
11064 gcc_assert (current_tune->insn_extra_cost);
11065
11066 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11067 (enum rtx_code) outer_code,
11068 current_tune->insn_extra_cost,
11069 total, speed);
11070
11071 if (dump_file && (dump_flags & TDF_DETAILS))
11072 {
11073 print_rtl_single (dump_file, x);
11074 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11075 *total, result ? "final" : "partial");
11076 }
11077 return result;
11078 }
11079
11080 /* All address computations that can be done are free, but rtx cost returns
11081 the same for practically all of them. So we weight the different types
11082 of address here in the order (most pref first):
11083 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11084 static inline int
11085 arm_arm_address_cost (rtx x)
11086 {
11087 enum rtx_code c = GET_CODE (x);
11088
11089 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11090 return 0;
11091 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11092 return 10;
11093
11094 if (c == PLUS)
11095 {
11096 if (CONST_INT_P (XEXP (x, 1)))
11097 return 2;
11098
11099 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11100 return 3;
11101
11102 return 4;
11103 }
11104
11105 return 6;
11106 }
11107
11108 static inline int
11109 arm_thumb_address_cost (rtx x)
11110 {
11111 enum rtx_code c = GET_CODE (x);
11112
11113 if (c == REG)
11114 return 1;
11115 if (c == PLUS
11116 && REG_P (XEXP (x, 0))
11117 && CONST_INT_P (XEXP (x, 1)))
11118 return 1;
11119
11120 return 2;
11121 }
11122
11123 static int
11124 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11125 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11126 {
11127 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11128 }
11129
11130 /* Adjust cost hook for XScale. */
11131 static bool
11132 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11133 int * cost)
11134 {
11135 /* Some true dependencies can have a higher cost depending
11136 on precisely how certain input operands are used. */
11137 if (dep_type == 0
11138 && recog_memoized (insn) >= 0
11139 && recog_memoized (dep) >= 0)
11140 {
11141 int shift_opnum = get_attr_shift (insn);
11142 enum attr_type attr_type = get_attr_type (dep);
11143
11144 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11145 operand for INSN. If we have a shifted input operand and the
11146 instruction we depend on is another ALU instruction, then we may
11147 have to account for an additional stall. */
11148 if (shift_opnum != 0
11149 && (attr_type == TYPE_ALU_SHIFT_IMM
11150 || attr_type == TYPE_ALUS_SHIFT_IMM
11151 || attr_type == TYPE_LOGIC_SHIFT_IMM
11152 || attr_type == TYPE_LOGICS_SHIFT_IMM
11153 || attr_type == TYPE_ALU_SHIFT_REG
11154 || attr_type == TYPE_ALUS_SHIFT_REG
11155 || attr_type == TYPE_LOGIC_SHIFT_REG
11156 || attr_type == TYPE_LOGICS_SHIFT_REG
11157 || attr_type == TYPE_MOV_SHIFT
11158 || attr_type == TYPE_MVN_SHIFT
11159 || attr_type == TYPE_MOV_SHIFT_REG
11160 || attr_type == TYPE_MVN_SHIFT_REG))
11161 {
11162 rtx shifted_operand;
11163 int opno;
11164
11165 /* Get the shifted operand. */
11166 extract_insn (insn);
11167 shifted_operand = recog_data.operand[shift_opnum];
11168
11169 /* Iterate over all the operands in DEP. If we write an operand
11170 that overlaps with SHIFTED_OPERAND, then we have increase the
11171 cost of this dependency. */
11172 extract_insn (dep);
11173 preprocess_constraints (dep);
11174 for (opno = 0; opno < recog_data.n_operands; opno++)
11175 {
11176 /* We can ignore strict inputs. */
11177 if (recog_data.operand_type[opno] == OP_IN)
11178 continue;
11179
11180 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11181 shifted_operand))
11182 {
11183 *cost = 2;
11184 return false;
11185 }
11186 }
11187 }
11188 }
11189 return true;
11190 }
11191
11192 /* Adjust cost hook for Cortex A9. */
11193 static bool
11194 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11195 int * cost)
11196 {
11197 switch (dep_type)
11198 {
11199 case REG_DEP_ANTI:
11200 *cost = 0;
11201 return false;
11202
11203 case REG_DEP_TRUE:
11204 case REG_DEP_OUTPUT:
11205 if (recog_memoized (insn) >= 0
11206 && recog_memoized (dep) >= 0)
11207 {
11208 if (GET_CODE (PATTERN (insn)) == SET)
11209 {
11210 if (GET_MODE_CLASS
11211 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11212 || GET_MODE_CLASS
11213 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11214 {
11215 enum attr_type attr_type_insn = get_attr_type (insn);
11216 enum attr_type attr_type_dep = get_attr_type (dep);
11217
11218 /* By default all dependencies of the form
11219 s0 = s0 <op> s1
11220 s0 = s0 <op> s2
11221 have an extra latency of 1 cycle because
11222 of the input and output dependency in this
11223 case. However this gets modeled as an true
11224 dependency and hence all these checks. */
11225 if (REG_P (SET_DEST (PATTERN (insn)))
11226 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11227 {
11228 /* FMACS is a special case where the dependent
11229 instruction can be issued 3 cycles before
11230 the normal latency in case of an output
11231 dependency. */
11232 if ((attr_type_insn == TYPE_FMACS
11233 || attr_type_insn == TYPE_FMACD)
11234 && (attr_type_dep == TYPE_FMACS
11235 || attr_type_dep == TYPE_FMACD))
11236 {
11237 if (dep_type == REG_DEP_OUTPUT)
11238 *cost = insn_default_latency (dep) - 3;
11239 else
11240 *cost = insn_default_latency (dep);
11241 return false;
11242 }
11243 else
11244 {
11245 if (dep_type == REG_DEP_OUTPUT)
11246 *cost = insn_default_latency (dep) + 1;
11247 else
11248 *cost = insn_default_latency (dep);
11249 }
11250 return false;
11251 }
11252 }
11253 }
11254 }
11255 break;
11256
11257 default:
11258 gcc_unreachable ();
11259 }
11260
11261 return true;
11262 }
11263
11264 /* Adjust cost hook for FA726TE. */
11265 static bool
11266 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11267 int * cost)
11268 {
11269 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11270 have penalty of 3. */
11271 if (dep_type == REG_DEP_TRUE
11272 && recog_memoized (insn) >= 0
11273 && recog_memoized (dep) >= 0
11274 && get_attr_conds (dep) == CONDS_SET)
11275 {
11276 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11277 if (get_attr_conds (insn) == CONDS_USE
11278 && get_attr_type (insn) != TYPE_BRANCH)
11279 {
11280 *cost = 3;
11281 return false;
11282 }
11283
11284 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11285 || get_attr_conds (insn) == CONDS_USE)
11286 {
11287 *cost = 0;
11288 return false;
11289 }
11290 }
11291
11292 return true;
11293 }
11294
11295 /* Implement TARGET_REGISTER_MOVE_COST.
11296
11297 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11298 it is typically more expensive than a single memory access. We set
11299 the cost to less than two memory accesses so that floating
11300 point to integer conversion does not go through memory. */
11301
11302 int
11303 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11304 reg_class_t from, reg_class_t to)
11305 {
11306 if (TARGET_32BIT)
11307 {
11308 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11309 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11310 return 15;
11311 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11312 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11313 return 4;
11314 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11315 return 20;
11316 else
11317 return 2;
11318 }
11319 else
11320 {
11321 if (from == HI_REGS || to == HI_REGS)
11322 return 4;
11323 else
11324 return 2;
11325 }
11326 }
11327
11328 /* Implement TARGET_MEMORY_MOVE_COST. */
11329
11330 int
11331 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11332 bool in ATTRIBUTE_UNUSED)
11333 {
11334 if (TARGET_32BIT)
11335 return 10;
11336 else
11337 {
11338 if (GET_MODE_SIZE (mode) < 4)
11339 return 8;
11340 else
11341 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11342 }
11343 }
11344
11345 /* Vectorizer cost model implementation. */
11346
11347 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11348 static int
11349 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11350 tree vectype,
11351 int misalign ATTRIBUTE_UNUSED)
11352 {
11353 unsigned elements;
11354
11355 switch (type_of_cost)
11356 {
11357 case scalar_stmt:
11358 return current_tune->vec_costs->scalar_stmt_cost;
11359
11360 case scalar_load:
11361 return current_tune->vec_costs->scalar_load_cost;
11362
11363 case scalar_store:
11364 return current_tune->vec_costs->scalar_store_cost;
11365
11366 case vector_stmt:
11367 return current_tune->vec_costs->vec_stmt_cost;
11368
11369 case vector_load:
11370 return current_tune->vec_costs->vec_align_load_cost;
11371
11372 case vector_store:
11373 return current_tune->vec_costs->vec_store_cost;
11374
11375 case vec_to_scalar:
11376 return current_tune->vec_costs->vec_to_scalar_cost;
11377
11378 case scalar_to_vec:
11379 return current_tune->vec_costs->scalar_to_vec_cost;
11380
11381 case unaligned_load:
11382 case vector_gather_load:
11383 return current_tune->vec_costs->vec_unalign_load_cost;
11384
11385 case unaligned_store:
11386 case vector_scatter_store:
11387 return current_tune->vec_costs->vec_unalign_store_cost;
11388
11389 case cond_branch_taken:
11390 return current_tune->vec_costs->cond_taken_branch_cost;
11391
11392 case cond_branch_not_taken:
11393 return current_tune->vec_costs->cond_not_taken_branch_cost;
11394
11395 case vec_perm:
11396 case vec_promote_demote:
11397 return current_tune->vec_costs->vec_stmt_cost;
11398
11399 case vec_construct:
11400 elements = TYPE_VECTOR_SUBPARTS (vectype);
11401 return elements / 2 + 1;
11402
11403 default:
11404 gcc_unreachable ();
11405 }
11406 }
11407
11408 /* Implement targetm.vectorize.add_stmt_cost. */
11409
11410 static unsigned
11411 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11412 struct _stmt_vec_info *stmt_info, int misalign,
11413 enum vect_cost_model_location where)
11414 {
11415 unsigned *cost = (unsigned *) data;
11416 unsigned retval = 0;
11417
11418 if (flag_vect_cost_model)
11419 {
11420 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11421 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11422
11423 /* Statements in an inner loop relative to the loop being
11424 vectorized are weighted more heavily. The value here is
11425 arbitrary and could potentially be improved with analysis. */
11426 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11427 count *= 50; /* FIXME. */
11428
11429 retval = (unsigned) (count * stmt_cost);
11430 cost[where] += retval;
11431 }
11432
11433 return retval;
11434 }
11435
11436 /* Return true if and only if this insn can dual-issue only as older. */
11437 static bool
11438 cortexa7_older_only (rtx_insn *insn)
11439 {
11440 if (recog_memoized (insn) < 0)
11441 return false;
11442
11443 switch (get_attr_type (insn))
11444 {
11445 case TYPE_ALU_DSP_REG:
11446 case TYPE_ALU_SREG:
11447 case TYPE_ALUS_SREG:
11448 case TYPE_LOGIC_REG:
11449 case TYPE_LOGICS_REG:
11450 case TYPE_ADC_REG:
11451 case TYPE_ADCS_REG:
11452 case TYPE_ADR:
11453 case TYPE_BFM:
11454 case TYPE_REV:
11455 case TYPE_MVN_REG:
11456 case TYPE_SHIFT_IMM:
11457 case TYPE_SHIFT_REG:
11458 case TYPE_LOAD_BYTE:
11459 case TYPE_LOAD_4:
11460 case TYPE_STORE_4:
11461 case TYPE_FFARITHS:
11462 case TYPE_FADDS:
11463 case TYPE_FFARITHD:
11464 case TYPE_FADDD:
11465 case TYPE_FMOV:
11466 case TYPE_F_CVT:
11467 case TYPE_FCMPS:
11468 case TYPE_FCMPD:
11469 case TYPE_FCONSTS:
11470 case TYPE_FCONSTD:
11471 case TYPE_FMULS:
11472 case TYPE_FMACS:
11473 case TYPE_FMULD:
11474 case TYPE_FMACD:
11475 case TYPE_FDIVS:
11476 case TYPE_FDIVD:
11477 case TYPE_F_MRC:
11478 case TYPE_F_MRRC:
11479 case TYPE_F_FLAG:
11480 case TYPE_F_LOADS:
11481 case TYPE_F_STORES:
11482 return true;
11483 default:
11484 return false;
11485 }
11486 }
11487
11488 /* Return true if and only if this insn can dual-issue as younger. */
11489 static bool
11490 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11491 {
11492 if (recog_memoized (insn) < 0)
11493 {
11494 if (verbose > 5)
11495 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11496 return false;
11497 }
11498
11499 switch (get_attr_type (insn))
11500 {
11501 case TYPE_ALU_IMM:
11502 case TYPE_ALUS_IMM:
11503 case TYPE_LOGIC_IMM:
11504 case TYPE_LOGICS_IMM:
11505 case TYPE_EXTEND:
11506 case TYPE_MVN_IMM:
11507 case TYPE_MOV_IMM:
11508 case TYPE_MOV_REG:
11509 case TYPE_MOV_SHIFT:
11510 case TYPE_MOV_SHIFT_REG:
11511 case TYPE_BRANCH:
11512 case TYPE_CALL:
11513 return true;
11514 default:
11515 return false;
11516 }
11517 }
11518
11519
11520 /* Look for an instruction that can dual issue only as an older
11521 instruction, and move it in front of any instructions that can
11522 dual-issue as younger, while preserving the relative order of all
11523 other instructions in the ready list. This is a hueuristic to help
11524 dual-issue in later cycles, by postponing issue of more flexible
11525 instructions. This heuristic may affect dual issue opportunities
11526 in the current cycle. */
11527 static void
11528 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11529 int *n_readyp, int clock)
11530 {
11531 int i;
11532 int first_older_only = -1, first_younger = -1;
11533
11534 if (verbose > 5)
11535 fprintf (file,
11536 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11537 clock,
11538 *n_readyp);
11539
11540 /* Traverse the ready list from the head (the instruction to issue
11541 first), and looking for the first instruction that can issue as
11542 younger and the first instruction that can dual-issue only as
11543 older. */
11544 for (i = *n_readyp - 1; i >= 0; i--)
11545 {
11546 rtx_insn *insn = ready[i];
11547 if (cortexa7_older_only (insn))
11548 {
11549 first_older_only = i;
11550 if (verbose > 5)
11551 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11552 break;
11553 }
11554 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11555 first_younger = i;
11556 }
11557
11558 /* Nothing to reorder because either no younger insn found or insn
11559 that can dual-issue only as older appears before any insn that
11560 can dual-issue as younger. */
11561 if (first_younger == -1)
11562 {
11563 if (verbose > 5)
11564 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11565 return;
11566 }
11567
11568 /* Nothing to reorder because no older-only insn in the ready list. */
11569 if (first_older_only == -1)
11570 {
11571 if (verbose > 5)
11572 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11573 return;
11574 }
11575
11576 /* Move first_older_only insn before first_younger. */
11577 if (verbose > 5)
11578 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11579 INSN_UID(ready [first_older_only]),
11580 INSN_UID(ready [first_younger]));
11581 rtx_insn *first_older_only_insn = ready [first_older_only];
11582 for (i = first_older_only; i < first_younger; i++)
11583 {
11584 ready[i] = ready[i+1];
11585 }
11586
11587 ready[i] = first_older_only_insn;
11588 return;
11589 }
11590
11591 /* Implement TARGET_SCHED_REORDER. */
11592 static int
11593 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11594 int clock)
11595 {
11596 switch (arm_tune)
11597 {
11598 case TARGET_CPU_cortexa7:
11599 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11600 break;
11601 default:
11602 /* Do nothing for other cores. */
11603 break;
11604 }
11605
11606 return arm_issue_rate ();
11607 }
11608
11609 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11610 It corrects the value of COST based on the relationship between
11611 INSN and DEP through the dependence LINK. It returns the new
11612 value. There is a per-core adjust_cost hook to adjust scheduler costs
11613 and the per-core hook can choose to completely override the generic
11614 adjust_cost function. Only put bits of code into arm_adjust_cost that
11615 are common across all cores. */
11616 static int
11617 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11618 unsigned int)
11619 {
11620 rtx i_pat, d_pat;
11621
11622 /* When generating Thumb-1 code, we want to place flag-setting operations
11623 close to a conditional branch which depends on them, so that we can
11624 omit the comparison. */
11625 if (TARGET_THUMB1
11626 && dep_type == 0
11627 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11628 && recog_memoized (dep) >= 0
11629 && get_attr_conds (dep) == CONDS_SET)
11630 return 0;
11631
11632 if (current_tune->sched_adjust_cost != NULL)
11633 {
11634 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11635 return cost;
11636 }
11637
11638 /* XXX Is this strictly true? */
11639 if (dep_type == REG_DEP_ANTI
11640 || dep_type == REG_DEP_OUTPUT)
11641 return 0;
11642
11643 /* Call insns don't incur a stall, even if they follow a load. */
11644 if (dep_type == 0
11645 && CALL_P (insn))
11646 return 1;
11647
11648 if ((i_pat = single_set (insn)) != NULL
11649 && MEM_P (SET_SRC (i_pat))
11650 && (d_pat = single_set (dep)) != NULL
11651 && MEM_P (SET_DEST (d_pat)))
11652 {
11653 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11654 /* This is a load after a store, there is no conflict if the load reads
11655 from a cached area. Assume that loads from the stack, and from the
11656 constant pool are cached, and that others will miss. This is a
11657 hack. */
11658
11659 if ((GET_CODE (src_mem) == SYMBOL_REF
11660 && CONSTANT_POOL_ADDRESS_P (src_mem))
11661 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11662 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11663 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11664 return 1;
11665 }
11666
11667 return cost;
11668 }
11669
11670 int
11671 arm_max_conditional_execute (void)
11672 {
11673 return max_insns_skipped;
11674 }
11675
11676 static int
11677 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11678 {
11679 if (TARGET_32BIT)
11680 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11681 else
11682 return (optimize > 0) ? 2 : 0;
11683 }
11684
11685 static int
11686 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11687 {
11688 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11689 }
11690
11691 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11692 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11693 sequences of non-executed instructions in IT blocks probably take the same
11694 amount of time as executed instructions (and the IT instruction itself takes
11695 space in icache). This function was experimentally determined to give good
11696 results on a popular embedded benchmark. */
11697
11698 static int
11699 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11700 {
11701 return (TARGET_32BIT && speed_p) ? 1
11702 : arm_default_branch_cost (speed_p, predictable_p);
11703 }
11704
11705 static int
11706 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11707 {
11708 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11709 }
11710
11711 static bool fp_consts_inited = false;
11712
11713 static REAL_VALUE_TYPE value_fp0;
11714
11715 static void
11716 init_fp_table (void)
11717 {
11718 REAL_VALUE_TYPE r;
11719
11720 r = REAL_VALUE_ATOF ("0", DFmode);
11721 value_fp0 = r;
11722 fp_consts_inited = true;
11723 }
11724
11725 /* Return TRUE if rtx X is a valid immediate FP constant. */
11726 int
11727 arm_const_double_rtx (rtx x)
11728 {
11729 const REAL_VALUE_TYPE *r;
11730
11731 if (!fp_consts_inited)
11732 init_fp_table ();
11733
11734 r = CONST_DOUBLE_REAL_VALUE (x);
11735 if (REAL_VALUE_MINUS_ZERO (*r))
11736 return 0;
11737
11738 if (real_equal (r, &value_fp0))
11739 return 1;
11740
11741 return 0;
11742 }
11743
11744 /* VFPv3 has a fairly wide range of representable immediates, formed from
11745 "quarter-precision" floating-point values. These can be evaluated using this
11746 formula (with ^ for exponentiation):
11747
11748 -1^s * n * 2^-r
11749
11750 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11751 16 <= n <= 31 and 0 <= r <= 7.
11752
11753 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11754
11755 - A (most-significant) is the sign bit.
11756 - BCD are the exponent (encoded as r XOR 3).
11757 - EFGH are the mantissa (encoded as n - 16).
11758 */
11759
11760 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11761 fconst[sd] instruction, or -1 if X isn't suitable. */
11762 static int
11763 vfp3_const_double_index (rtx x)
11764 {
11765 REAL_VALUE_TYPE r, m;
11766 int sign, exponent;
11767 unsigned HOST_WIDE_INT mantissa, mant_hi;
11768 unsigned HOST_WIDE_INT mask;
11769 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11770 bool fail;
11771
11772 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11773 return -1;
11774
11775 r = *CONST_DOUBLE_REAL_VALUE (x);
11776
11777 /* We can't represent these things, so detect them first. */
11778 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11779 return -1;
11780
11781 /* Extract sign, exponent and mantissa. */
11782 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11783 r = real_value_abs (&r);
11784 exponent = REAL_EXP (&r);
11785 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11786 highest (sign) bit, with a fixed binary point at bit point_pos.
11787 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11788 bits for the mantissa, this may fail (low bits would be lost). */
11789 real_ldexp (&m, &r, point_pos - exponent);
11790 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11791 mantissa = w.elt (0);
11792 mant_hi = w.elt (1);
11793
11794 /* If there are bits set in the low part of the mantissa, we can't
11795 represent this value. */
11796 if (mantissa != 0)
11797 return -1;
11798
11799 /* Now make it so that mantissa contains the most-significant bits, and move
11800 the point_pos to indicate that the least-significant bits have been
11801 discarded. */
11802 point_pos -= HOST_BITS_PER_WIDE_INT;
11803 mantissa = mant_hi;
11804
11805 /* We can permit four significant bits of mantissa only, plus a high bit
11806 which is always 1. */
11807 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11808 if ((mantissa & mask) != 0)
11809 return -1;
11810
11811 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11812 mantissa >>= point_pos - 5;
11813
11814 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11815 floating-point immediate zero with Neon using an integer-zero load, but
11816 that case is handled elsewhere.) */
11817 if (mantissa == 0)
11818 return -1;
11819
11820 gcc_assert (mantissa >= 16 && mantissa <= 31);
11821
11822 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11823 normalized significands are in the range [1, 2). (Our mantissa is shifted
11824 left 4 places at this point relative to normalized IEEE754 values). GCC
11825 internally uses [0.5, 1) (see real.c), so the exponent returned from
11826 REAL_EXP must be altered. */
11827 exponent = 5 - exponent;
11828
11829 if (exponent < 0 || exponent > 7)
11830 return -1;
11831
11832 /* Sign, mantissa and exponent are now in the correct form to plug into the
11833 formula described in the comment above. */
11834 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11835 }
11836
11837 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11838 int
11839 vfp3_const_double_rtx (rtx x)
11840 {
11841 if (!TARGET_VFP3)
11842 return 0;
11843
11844 return vfp3_const_double_index (x) != -1;
11845 }
11846
11847 /* Recognize immediates which can be used in various Neon instructions. Legal
11848 immediates are described by the following table (for VMVN variants, the
11849 bitwise inverse of the constant shown is recognized. In either case, VMOV
11850 is output and the correct instruction to use for a given constant is chosen
11851 by the assembler). The constant shown is replicated across all elements of
11852 the destination vector.
11853
11854 insn elems variant constant (binary)
11855 ---- ----- ------- -----------------
11856 vmov i32 0 00000000 00000000 00000000 abcdefgh
11857 vmov i32 1 00000000 00000000 abcdefgh 00000000
11858 vmov i32 2 00000000 abcdefgh 00000000 00000000
11859 vmov i32 3 abcdefgh 00000000 00000000 00000000
11860 vmov i16 4 00000000 abcdefgh
11861 vmov i16 5 abcdefgh 00000000
11862 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11863 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11864 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11865 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11866 vmvn i16 10 00000000 abcdefgh
11867 vmvn i16 11 abcdefgh 00000000
11868 vmov i32 12 00000000 00000000 abcdefgh 11111111
11869 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11870 vmov i32 14 00000000 abcdefgh 11111111 11111111
11871 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11872 vmov i8 16 abcdefgh
11873 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11874 eeeeeeee ffffffff gggggggg hhhhhhhh
11875 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11876 vmov f32 19 00000000 00000000 00000000 00000000
11877
11878 For case 18, B = !b. Representable values are exactly those accepted by
11879 vfp3_const_double_index, but are output as floating-point numbers rather
11880 than indices.
11881
11882 For case 19, we will change it to vmov.i32 when assembling.
11883
11884 Variants 0-5 (inclusive) may also be used as immediates for the second
11885 operand of VORR/VBIC instructions.
11886
11887 The INVERSE argument causes the bitwise inverse of the given operand to be
11888 recognized instead (used for recognizing legal immediates for the VAND/VORN
11889 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11890 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11891 output, rather than the real insns vbic/vorr).
11892
11893 INVERSE makes no difference to the recognition of float vectors.
11894
11895 The return value is the variant of immediate as shown in the above table, or
11896 -1 if the given value doesn't match any of the listed patterns.
11897 */
11898 static int
11899 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11900 rtx *modconst, int *elementwidth)
11901 {
11902 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11903 matches = 1; \
11904 for (i = 0; i < idx; i += (STRIDE)) \
11905 if (!(TEST)) \
11906 matches = 0; \
11907 if (matches) \
11908 { \
11909 immtype = (CLASS); \
11910 elsize = (ELSIZE); \
11911 break; \
11912 }
11913
11914 unsigned int i, elsize = 0, idx = 0, n_elts;
11915 unsigned int innersize;
11916 unsigned char bytes[16];
11917 int immtype = -1, matches;
11918 unsigned int invmask = inverse ? 0xff : 0;
11919 bool vector = GET_CODE (op) == CONST_VECTOR;
11920
11921 if (vector)
11922 n_elts = CONST_VECTOR_NUNITS (op);
11923 else
11924 {
11925 n_elts = 1;
11926 if (mode == VOIDmode)
11927 mode = DImode;
11928 }
11929
11930 innersize = GET_MODE_UNIT_SIZE (mode);
11931
11932 /* Vectors of float constants. */
11933 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11934 {
11935 rtx el0 = CONST_VECTOR_ELT (op, 0);
11936
11937 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11938 return -1;
11939
11940 /* FP16 vectors cannot be represented. */
11941 if (GET_MODE_INNER (mode) == HFmode)
11942 return -1;
11943
11944 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11945 are distinct in this context. */
11946 if (!const_vec_duplicate_p (op))
11947 return -1;
11948
11949 if (modconst)
11950 *modconst = CONST_VECTOR_ELT (op, 0);
11951
11952 if (elementwidth)
11953 *elementwidth = 0;
11954
11955 if (el0 == CONST0_RTX (GET_MODE (el0)))
11956 return 19;
11957 else
11958 return 18;
11959 }
11960
11961 /* The tricks done in the code below apply for little-endian vector layout.
11962 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11963 FIXME: Implement logic for big-endian vectors. */
11964 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11965 return -1;
11966
11967 /* Splat vector constant out into a byte vector. */
11968 for (i = 0; i < n_elts; i++)
11969 {
11970 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11971 unsigned HOST_WIDE_INT elpart;
11972
11973 gcc_assert (CONST_INT_P (el));
11974 elpart = INTVAL (el);
11975
11976 for (unsigned int byte = 0; byte < innersize; byte++)
11977 {
11978 bytes[idx++] = (elpart & 0xff) ^ invmask;
11979 elpart >>= BITS_PER_UNIT;
11980 }
11981 }
11982
11983 /* Sanity check. */
11984 gcc_assert (idx == GET_MODE_SIZE (mode));
11985
11986 do
11987 {
11988 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11989 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11990
11991 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11992 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11993
11994 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11995 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11996
11997 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11998 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11999
12000 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12001
12002 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12003
12004 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12005 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12006
12007 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12008 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12009
12010 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12011 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12012
12013 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12014 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12015
12016 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12017
12018 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12019
12020 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12021 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12022
12023 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12024 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12025
12026 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12027 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12028
12029 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12030 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12031
12032 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12033
12034 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12035 && bytes[i] == bytes[(i + 8) % idx]);
12036 }
12037 while (0);
12038
12039 if (immtype == -1)
12040 return -1;
12041
12042 if (elementwidth)
12043 *elementwidth = elsize;
12044
12045 if (modconst)
12046 {
12047 unsigned HOST_WIDE_INT imm = 0;
12048
12049 /* Un-invert bytes of recognized vector, if necessary. */
12050 if (invmask != 0)
12051 for (i = 0; i < idx; i++)
12052 bytes[i] ^= invmask;
12053
12054 if (immtype == 17)
12055 {
12056 /* FIXME: Broken on 32-bit H_W_I hosts. */
12057 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12058
12059 for (i = 0; i < 8; i++)
12060 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12061 << (i * BITS_PER_UNIT);
12062
12063 *modconst = GEN_INT (imm);
12064 }
12065 else
12066 {
12067 unsigned HOST_WIDE_INT imm = 0;
12068
12069 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12070 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12071
12072 *modconst = GEN_INT (imm);
12073 }
12074 }
12075
12076 return immtype;
12077 #undef CHECK
12078 }
12079
12080 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12081 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12082 float elements), and a modified constant (whatever should be output for a
12083 VMOV) in *MODCONST. */
12084
12085 int
12086 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12087 rtx *modconst, int *elementwidth)
12088 {
12089 rtx tmpconst;
12090 int tmpwidth;
12091 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12092
12093 if (retval == -1)
12094 return 0;
12095
12096 if (modconst)
12097 *modconst = tmpconst;
12098
12099 if (elementwidth)
12100 *elementwidth = tmpwidth;
12101
12102 return 1;
12103 }
12104
12105 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12106 the immediate is valid, write a constant suitable for using as an operand
12107 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12108 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12109
12110 int
12111 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12112 rtx *modconst, int *elementwidth)
12113 {
12114 rtx tmpconst;
12115 int tmpwidth;
12116 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12117
12118 if (retval < 0 || retval > 5)
12119 return 0;
12120
12121 if (modconst)
12122 *modconst = tmpconst;
12123
12124 if (elementwidth)
12125 *elementwidth = tmpwidth;
12126
12127 return 1;
12128 }
12129
12130 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12131 the immediate is valid, write a constant suitable for using as an operand
12132 to VSHR/VSHL to *MODCONST and the corresponding element width to
12133 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12134 because they have different limitations. */
12135
12136 int
12137 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12138 rtx *modconst, int *elementwidth,
12139 bool isleftshift)
12140 {
12141 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12142 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12143 unsigned HOST_WIDE_INT last_elt = 0;
12144 unsigned HOST_WIDE_INT maxshift;
12145
12146 /* Split vector constant out into a byte vector. */
12147 for (i = 0; i < n_elts; i++)
12148 {
12149 rtx el = CONST_VECTOR_ELT (op, i);
12150 unsigned HOST_WIDE_INT elpart;
12151
12152 if (CONST_INT_P (el))
12153 elpart = INTVAL (el);
12154 else if (CONST_DOUBLE_P (el))
12155 return 0;
12156 else
12157 gcc_unreachable ();
12158
12159 if (i != 0 && elpart != last_elt)
12160 return 0;
12161
12162 last_elt = elpart;
12163 }
12164
12165 /* Shift less than element size. */
12166 maxshift = innersize * 8;
12167
12168 if (isleftshift)
12169 {
12170 /* Left shift immediate value can be from 0 to <size>-1. */
12171 if (last_elt >= maxshift)
12172 return 0;
12173 }
12174 else
12175 {
12176 /* Right shift immediate value can be from 1 to <size>. */
12177 if (last_elt == 0 || last_elt > maxshift)
12178 return 0;
12179 }
12180
12181 if (elementwidth)
12182 *elementwidth = innersize * 8;
12183
12184 if (modconst)
12185 *modconst = CONST_VECTOR_ELT (op, 0);
12186
12187 return 1;
12188 }
12189
12190 /* Return a string suitable for output of Neon immediate logic operation
12191 MNEM. */
12192
12193 char *
12194 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12195 int inverse, int quad)
12196 {
12197 int width, is_valid;
12198 static char templ[40];
12199
12200 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12201
12202 gcc_assert (is_valid != 0);
12203
12204 if (quad)
12205 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12206 else
12207 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12208
12209 return templ;
12210 }
12211
12212 /* Return a string suitable for output of Neon immediate shift operation
12213 (VSHR or VSHL) MNEM. */
12214
12215 char *
12216 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12217 machine_mode mode, int quad,
12218 bool isleftshift)
12219 {
12220 int width, is_valid;
12221 static char templ[40];
12222
12223 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12224 gcc_assert (is_valid != 0);
12225
12226 if (quad)
12227 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12228 else
12229 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12230
12231 return templ;
12232 }
12233
12234 /* Output a sequence of pairwise operations to implement a reduction.
12235 NOTE: We do "too much work" here, because pairwise operations work on two
12236 registers-worth of operands in one go. Unfortunately we can't exploit those
12237 extra calculations to do the full operation in fewer steps, I don't think.
12238 Although all vector elements of the result but the first are ignored, we
12239 actually calculate the same result in each of the elements. An alternative
12240 such as initially loading a vector with zero to use as each of the second
12241 operands would use up an additional register and take an extra instruction,
12242 for no particular gain. */
12243
12244 void
12245 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12246 rtx (*reduc) (rtx, rtx, rtx))
12247 {
12248 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12249 rtx tmpsum = op1;
12250
12251 for (i = parts / 2; i >= 1; i /= 2)
12252 {
12253 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12254 emit_insn (reduc (dest, tmpsum, tmpsum));
12255 tmpsum = dest;
12256 }
12257 }
12258
12259 /* If VALS is a vector constant that can be loaded into a register
12260 using VDUP, generate instructions to do so and return an RTX to
12261 assign to the register. Otherwise return NULL_RTX. */
12262
12263 static rtx
12264 neon_vdup_constant (rtx vals)
12265 {
12266 machine_mode mode = GET_MODE (vals);
12267 machine_mode inner_mode = GET_MODE_INNER (mode);
12268 rtx x;
12269
12270 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12271 return NULL_RTX;
12272
12273 if (!const_vec_duplicate_p (vals, &x))
12274 /* The elements are not all the same. We could handle repeating
12275 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12276 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12277 vdup.i16). */
12278 return NULL_RTX;
12279
12280 /* We can load this constant by using VDUP and a constant in a
12281 single ARM register. This will be cheaper than a vector
12282 load. */
12283
12284 x = copy_to_mode_reg (inner_mode, x);
12285 return gen_vec_duplicate (mode, x);
12286 }
12287
12288 /* Generate code to load VALS, which is a PARALLEL containing only
12289 constants (for vec_init) or CONST_VECTOR, efficiently into a
12290 register. Returns an RTX to copy into the register, or NULL_RTX
12291 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12292
12293 rtx
12294 neon_make_constant (rtx vals)
12295 {
12296 machine_mode mode = GET_MODE (vals);
12297 rtx target;
12298 rtx const_vec = NULL_RTX;
12299 int n_elts = GET_MODE_NUNITS (mode);
12300 int n_const = 0;
12301 int i;
12302
12303 if (GET_CODE (vals) == CONST_VECTOR)
12304 const_vec = vals;
12305 else if (GET_CODE (vals) == PARALLEL)
12306 {
12307 /* A CONST_VECTOR must contain only CONST_INTs and
12308 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12309 Only store valid constants in a CONST_VECTOR. */
12310 for (i = 0; i < n_elts; ++i)
12311 {
12312 rtx x = XVECEXP (vals, 0, i);
12313 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12314 n_const++;
12315 }
12316 if (n_const == n_elts)
12317 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12318 }
12319 else
12320 gcc_unreachable ();
12321
12322 if (const_vec != NULL
12323 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12324 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12325 return const_vec;
12326 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12327 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12328 pipeline cycle; creating the constant takes one or two ARM
12329 pipeline cycles. */
12330 return target;
12331 else if (const_vec != NULL_RTX)
12332 /* Load from constant pool. On Cortex-A8 this takes two cycles
12333 (for either double or quad vectors). We can not take advantage
12334 of single-cycle VLD1 because we need a PC-relative addressing
12335 mode. */
12336 return const_vec;
12337 else
12338 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12339 We can not construct an initializer. */
12340 return NULL_RTX;
12341 }
12342
12343 /* Initialize vector TARGET to VALS. */
12344
12345 void
12346 neon_expand_vector_init (rtx target, rtx vals)
12347 {
12348 machine_mode mode = GET_MODE (target);
12349 machine_mode inner_mode = GET_MODE_INNER (mode);
12350 int n_elts = GET_MODE_NUNITS (mode);
12351 int n_var = 0, one_var = -1;
12352 bool all_same = true;
12353 rtx x, mem;
12354 int i;
12355
12356 for (i = 0; i < n_elts; ++i)
12357 {
12358 x = XVECEXP (vals, 0, i);
12359 if (!CONSTANT_P (x))
12360 ++n_var, one_var = i;
12361
12362 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12363 all_same = false;
12364 }
12365
12366 if (n_var == 0)
12367 {
12368 rtx constant = neon_make_constant (vals);
12369 if (constant != NULL_RTX)
12370 {
12371 emit_move_insn (target, constant);
12372 return;
12373 }
12374 }
12375
12376 /* Splat a single non-constant element if we can. */
12377 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12378 {
12379 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12380 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12381 return;
12382 }
12383
12384 /* One field is non-constant. Load constant then overwrite varying
12385 field. This is more efficient than using the stack. */
12386 if (n_var == 1)
12387 {
12388 rtx copy = copy_rtx (vals);
12389 rtx index = GEN_INT (one_var);
12390
12391 /* Load constant part of vector, substitute neighboring value for
12392 varying element. */
12393 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12394 neon_expand_vector_init (target, copy);
12395
12396 /* Insert variable. */
12397 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12398 switch (mode)
12399 {
12400 case E_V8QImode:
12401 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12402 break;
12403 case E_V16QImode:
12404 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12405 break;
12406 case E_V4HImode:
12407 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12408 break;
12409 case E_V8HImode:
12410 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12411 break;
12412 case E_V2SImode:
12413 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12414 break;
12415 case E_V4SImode:
12416 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12417 break;
12418 case E_V2SFmode:
12419 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12420 break;
12421 case E_V4SFmode:
12422 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12423 break;
12424 case E_V2DImode:
12425 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12426 break;
12427 default:
12428 gcc_unreachable ();
12429 }
12430 return;
12431 }
12432
12433 /* Construct the vector in memory one field at a time
12434 and load the whole vector. */
12435 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12436 for (i = 0; i < n_elts; i++)
12437 emit_move_insn (adjust_address_nv (mem, inner_mode,
12438 i * GET_MODE_SIZE (inner_mode)),
12439 XVECEXP (vals, 0, i));
12440 emit_move_insn (target, mem);
12441 }
12442
12443 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12444 ERR if it doesn't. EXP indicates the source location, which includes the
12445 inlining history for intrinsics. */
12446
12447 static void
12448 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12449 const_tree exp, const char *desc)
12450 {
12451 HOST_WIDE_INT lane;
12452
12453 gcc_assert (CONST_INT_P (operand));
12454
12455 lane = INTVAL (operand);
12456
12457 if (lane < low || lane >= high)
12458 {
12459 if (exp)
12460 error ("%K%s %wd out of range %wd - %wd",
12461 exp, desc, lane, low, high - 1);
12462 else
12463 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12464 }
12465 }
12466
12467 /* Bounds-check lanes. */
12468
12469 void
12470 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12471 const_tree exp)
12472 {
12473 bounds_check (operand, low, high, exp, "lane");
12474 }
12475
12476 /* Bounds-check constants. */
12477
12478 void
12479 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12480 {
12481 bounds_check (operand, low, high, NULL_TREE, "constant");
12482 }
12483
12484 HOST_WIDE_INT
12485 neon_element_bits (machine_mode mode)
12486 {
12487 return GET_MODE_UNIT_BITSIZE (mode);
12488 }
12489
12490 \f
12491 /* Predicates for `match_operand' and `match_operator'. */
12492
12493 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12494 WB is true if full writeback address modes are allowed and is false
12495 if limited writeback address modes (POST_INC and PRE_DEC) are
12496 allowed. */
12497
12498 int
12499 arm_coproc_mem_operand (rtx op, bool wb)
12500 {
12501 rtx ind;
12502
12503 /* Reject eliminable registers. */
12504 if (! (reload_in_progress || reload_completed || lra_in_progress)
12505 && ( reg_mentioned_p (frame_pointer_rtx, op)
12506 || reg_mentioned_p (arg_pointer_rtx, op)
12507 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12508 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12509 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12510 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12511 return FALSE;
12512
12513 /* Constants are converted into offsets from labels. */
12514 if (!MEM_P (op))
12515 return FALSE;
12516
12517 ind = XEXP (op, 0);
12518
12519 if (reload_completed
12520 && (GET_CODE (ind) == LABEL_REF
12521 || (GET_CODE (ind) == CONST
12522 && GET_CODE (XEXP (ind, 0)) == PLUS
12523 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12524 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12525 return TRUE;
12526
12527 /* Match: (mem (reg)). */
12528 if (REG_P (ind))
12529 return arm_address_register_rtx_p (ind, 0);
12530
12531 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12532 acceptable in any case (subject to verification by
12533 arm_address_register_rtx_p). We need WB to be true to accept
12534 PRE_INC and POST_DEC. */
12535 if (GET_CODE (ind) == POST_INC
12536 || GET_CODE (ind) == PRE_DEC
12537 || (wb
12538 && (GET_CODE (ind) == PRE_INC
12539 || GET_CODE (ind) == POST_DEC)))
12540 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12541
12542 if (wb
12543 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12544 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12545 && GET_CODE (XEXP (ind, 1)) == PLUS
12546 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12547 ind = XEXP (ind, 1);
12548
12549 /* Match:
12550 (plus (reg)
12551 (const)). */
12552 if (GET_CODE (ind) == PLUS
12553 && REG_P (XEXP (ind, 0))
12554 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12555 && CONST_INT_P (XEXP (ind, 1))
12556 && INTVAL (XEXP (ind, 1)) > -1024
12557 && INTVAL (XEXP (ind, 1)) < 1024
12558 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12559 return TRUE;
12560
12561 return FALSE;
12562 }
12563
12564 /* Return TRUE if OP is a memory operand which we can load or store a vector
12565 to/from. TYPE is one of the following values:
12566 0 - Vector load/stor (vldr)
12567 1 - Core registers (ldm)
12568 2 - Element/structure loads (vld1)
12569 */
12570 int
12571 neon_vector_mem_operand (rtx op, int type, bool strict)
12572 {
12573 rtx ind;
12574
12575 /* Reject eliminable registers. */
12576 if (strict && ! (reload_in_progress || reload_completed)
12577 && (reg_mentioned_p (frame_pointer_rtx, op)
12578 || reg_mentioned_p (arg_pointer_rtx, op)
12579 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12580 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12581 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12582 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12583 return FALSE;
12584
12585 /* Constants are converted into offsets from labels. */
12586 if (!MEM_P (op))
12587 return FALSE;
12588
12589 ind = XEXP (op, 0);
12590
12591 if (reload_completed
12592 && (GET_CODE (ind) == LABEL_REF
12593 || (GET_CODE (ind) == CONST
12594 && GET_CODE (XEXP (ind, 0)) == PLUS
12595 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12596 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12597 return TRUE;
12598
12599 /* Match: (mem (reg)). */
12600 if (REG_P (ind))
12601 return arm_address_register_rtx_p (ind, 0);
12602
12603 /* Allow post-increment with Neon registers. */
12604 if ((type != 1 && GET_CODE (ind) == POST_INC)
12605 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12606 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12607
12608 /* Allow post-increment by register for VLDn */
12609 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12610 && GET_CODE (XEXP (ind, 1)) == PLUS
12611 && REG_P (XEXP (XEXP (ind, 1), 1)))
12612 return true;
12613
12614 /* Match:
12615 (plus (reg)
12616 (const)). */
12617 if (type == 0
12618 && GET_CODE (ind) == PLUS
12619 && REG_P (XEXP (ind, 0))
12620 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12621 && CONST_INT_P (XEXP (ind, 1))
12622 && INTVAL (XEXP (ind, 1)) > -1024
12623 /* For quad modes, we restrict the constant offset to be slightly less
12624 than what the instruction format permits. We have no such constraint
12625 on double mode offsets. (This must match arm_legitimate_index_p.) */
12626 && (INTVAL (XEXP (ind, 1))
12627 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12628 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12629 return TRUE;
12630
12631 return FALSE;
12632 }
12633
12634 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12635 type. */
12636 int
12637 neon_struct_mem_operand (rtx op)
12638 {
12639 rtx ind;
12640
12641 /* Reject eliminable registers. */
12642 if (! (reload_in_progress || reload_completed)
12643 && ( reg_mentioned_p (frame_pointer_rtx, op)
12644 || reg_mentioned_p (arg_pointer_rtx, op)
12645 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12646 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12647 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12648 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12649 return FALSE;
12650
12651 /* Constants are converted into offsets from labels. */
12652 if (!MEM_P (op))
12653 return FALSE;
12654
12655 ind = XEXP (op, 0);
12656
12657 if (reload_completed
12658 && (GET_CODE (ind) == LABEL_REF
12659 || (GET_CODE (ind) == CONST
12660 && GET_CODE (XEXP (ind, 0)) == PLUS
12661 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12662 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12663 return TRUE;
12664
12665 /* Match: (mem (reg)). */
12666 if (REG_P (ind))
12667 return arm_address_register_rtx_p (ind, 0);
12668
12669 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12670 if (GET_CODE (ind) == POST_INC
12671 || GET_CODE (ind) == PRE_DEC)
12672 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12673
12674 return FALSE;
12675 }
12676
12677 /* Return true if X is a register that will be eliminated later on. */
12678 int
12679 arm_eliminable_register (rtx x)
12680 {
12681 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12682 || REGNO (x) == ARG_POINTER_REGNUM
12683 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12684 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12685 }
12686
12687 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12688 coprocessor registers. Otherwise return NO_REGS. */
12689
12690 enum reg_class
12691 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12692 {
12693 if (mode == HFmode)
12694 {
12695 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12696 return GENERAL_REGS;
12697 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12698 return NO_REGS;
12699 return GENERAL_REGS;
12700 }
12701
12702 /* The neon move patterns handle all legitimate vector and struct
12703 addresses. */
12704 if (TARGET_NEON
12705 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12706 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12707 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12708 || VALID_NEON_STRUCT_MODE (mode)))
12709 return NO_REGS;
12710
12711 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12712 return NO_REGS;
12713
12714 return GENERAL_REGS;
12715 }
12716
12717 /* Values which must be returned in the most-significant end of the return
12718 register. */
12719
12720 static bool
12721 arm_return_in_msb (const_tree valtype)
12722 {
12723 return (TARGET_AAPCS_BASED
12724 && BYTES_BIG_ENDIAN
12725 && (AGGREGATE_TYPE_P (valtype)
12726 || TREE_CODE (valtype) == COMPLEX_TYPE
12727 || FIXED_POINT_TYPE_P (valtype)));
12728 }
12729
12730 /* Return TRUE if X references a SYMBOL_REF. */
12731 int
12732 symbol_mentioned_p (rtx x)
12733 {
12734 const char * fmt;
12735 int i;
12736
12737 if (GET_CODE (x) == SYMBOL_REF)
12738 return 1;
12739
12740 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12741 are constant offsets, not symbols. */
12742 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12743 return 0;
12744
12745 fmt = GET_RTX_FORMAT (GET_CODE (x));
12746
12747 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12748 {
12749 if (fmt[i] == 'E')
12750 {
12751 int j;
12752
12753 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12754 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12755 return 1;
12756 }
12757 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12758 return 1;
12759 }
12760
12761 return 0;
12762 }
12763
12764 /* Return TRUE if X references a LABEL_REF. */
12765 int
12766 label_mentioned_p (rtx x)
12767 {
12768 const char * fmt;
12769 int i;
12770
12771 if (GET_CODE (x) == LABEL_REF)
12772 return 1;
12773
12774 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12775 instruction, but they are constant offsets, not symbols. */
12776 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12777 return 0;
12778
12779 fmt = GET_RTX_FORMAT (GET_CODE (x));
12780 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12781 {
12782 if (fmt[i] == 'E')
12783 {
12784 int j;
12785
12786 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12787 if (label_mentioned_p (XVECEXP (x, i, j)))
12788 return 1;
12789 }
12790 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12791 return 1;
12792 }
12793
12794 return 0;
12795 }
12796
12797 int
12798 tls_mentioned_p (rtx x)
12799 {
12800 switch (GET_CODE (x))
12801 {
12802 case CONST:
12803 return tls_mentioned_p (XEXP (x, 0));
12804
12805 case UNSPEC:
12806 if (XINT (x, 1) == UNSPEC_TLS)
12807 return 1;
12808
12809 /* Fall through. */
12810 default:
12811 return 0;
12812 }
12813 }
12814
12815 /* Must not copy any rtx that uses a pc-relative address.
12816 Also, disallow copying of load-exclusive instructions that
12817 may appear after splitting of compare-and-swap-style operations
12818 so as to prevent those loops from being transformed away from their
12819 canonical forms (see PR 69904). */
12820
12821 static bool
12822 arm_cannot_copy_insn_p (rtx_insn *insn)
12823 {
12824 /* The tls call insn cannot be copied, as it is paired with a data
12825 word. */
12826 if (recog_memoized (insn) == CODE_FOR_tlscall)
12827 return true;
12828
12829 subrtx_iterator::array_type array;
12830 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12831 {
12832 const_rtx x = *iter;
12833 if (GET_CODE (x) == UNSPEC
12834 && (XINT (x, 1) == UNSPEC_PIC_BASE
12835 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12836 return true;
12837 }
12838
12839 rtx set = single_set (insn);
12840 if (set)
12841 {
12842 rtx src = SET_SRC (set);
12843 if (GET_CODE (src) == ZERO_EXTEND)
12844 src = XEXP (src, 0);
12845
12846 /* Catch the load-exclusive and load-acquire operations. */
12847 if (GET_CODE (src) == UNSPEC_VOLATILE
12848 && (XINT (src, 1) == VUNSPEC_LL
12849 || XINT (src, 1) == VUNSPEC_LAX))
12850 return true;
12851 }
12852 return false;
12853 }
12854
12855 enum rtx_code
12856 minmax_code (rtx x)
12857 {
12858 enum rtx_code code = GET_CODE (x);
12859
12860 switch (code)
12861 {
12862 case SMAX:
12863 return GE;
12864 case SMIN:
12865 return LE;
12866 case UMIN:
12867 return LEU;
12868 case UMAX:
12869 return GEU;
12870 default:
12871 gcc_unreachable ();
12872 }
12873 }
12874
12875 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12876
12877 bool
12878 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12879 int *mask, bool *signed_sat)
12880 {
12881 /* The high bound must be a power of two minus one. */
12882 int log = exact_log2 (INTVAL (hi_bound) + 1);
12883 if (log == -1)
12884 return false;
12885
12886 /* The low bound is either zero (for usat) or one less than the
12887 negation of the high bound (for ssat). */
12888 if (INTVAL (lo_bound) == 0)
12889 {
12890 if (mask)
12891 *mask = log;
12892 if (signed_sat)
12893 *signed_sat = false;
12894
12895 return true;
12896 }
12897
12898 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12899 {
12900 if (mask)
12901 *mask = log + 1;
12902 if (signed_sat)
12903 *signed_sat = true;
12904
12905 return true;
12906 }
12907
12908 return false;
12909 }
12910
12911 /* Return 1 if memory locations are adjacent. */
12912 int
12913 adjacent_mem_locations (rtx a, rtx b)
12914 {
12915 /* We don't guarantee to preserve the order of these memory refs. */
12916 if (volatile_refs_p (a) || volatile_refs_p (b))
12917 return 0;
12918
12919 if ((REG_P (XEXP (a, 0))
12920 || (GET_CODE (XEXP (a, 0)) == PLUS
12921 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12922 && (REG_P (XEXP (b, 0))
12923 || (GET_CODE (XEXP (b, 0)) == PLUS
12924 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12925 {
12926 HOST_WIDE_INT val0 = 0, val1 = 0;
12927 rtx reg0, reg1;
12928 int val_diff;
12929
12930 if (GET_CODE (XEXP (a, 0)) == PLUS)
12931 {
12932 reg0 = XEXP (XEXP (a, 0), 0);
12933 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12934 }
12935 else
12936 reg0 = XEXP (a, 0);
12937
12938 if (GET_CODE (XEXP (b, 0)) == PLUS)
12939 {
12940 reg1 = XEXP (XEXP (b, 0), 0);
12941 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12942 }
12943 else
12944 reg1 = XEXP (b, 0);
12945
12946 /* Don't accept any offset that will require multiple
12947 instructions to handle, since this would cause the
12948 arith_adjacentmem pattern to output an overlong sequence. */
12949 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12950 return 0;
12951
12952 /* Don't allow an eliminable register: register elimination can make
12953 the offset too large. */
12954 if (arm_eliminable_register (reg0))
12955 return 0;
12956
12957 val_diff = val1 - val0;
12958
12959 if (arm_ld_sched)
12960 {
12961 /* If the target has load delay slots, then there's no benefit
12962 to using an ldm instruction unless the offset is zero and
12963 we are optimizing for size. */
12964 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12965 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12966 && (val_diff == 4 || val_diff == -4));
12967 }
12968
12969 return ((REGNO (reg0) == REGNO (reg1))
12970 && (val_diff == 4 || val_diff == -4));
12971 }
12972
12973 return 0;
12974 }
12975
12976 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12977 for load operations, false for store operations. CONSECUTIVE is true
12978 if the register numbers in the operation must be consecutive in the register
12979 bank. RETURN_PC is true if value is to be loaded in PC.
12980 The pattern we are trying to match for load is:
12981 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12982 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12983 :
12984 :
12985 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12986 ]
12987 where
12988 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12989 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12990 3. If consecutive is TRUE, then for kth register being loaded,
12991 REGNO (R_dk) = REGNO (R_d0) + k.
12992 The pattern for store is similar. */
12993 bool
12994 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12995 bool consecutive, bool return_pc)
12996 {
12997 HOST_WIDE_INT count = XVECLEN (op, 0);
12998 rtx reg, mem, addr;
12999 unsigned regno;
13000 unsigned first_regno;
13001 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13002 rtx elt;
13003 bool addr_reg_in_reglist = false;
13004 bool update = false;
13005 int reg_increment;
13006 int offset_adj;
13007 int regs_per_val;
13008
13009 /* If not in SImode, then registers must be consecutive
13010 (e.g., VLDM instructions for DFmode). */
13011 gcc_assert ((mode == SImode) || consecutive);
13012 /* Setting return_pc for stores is illegal. */
13013 gcc_assert (!return_pc || load);
13014
13015 /* Set up the increments and the regs per val based on the mode. */
13016 reg_increment = GET_MODE_SIZE (mode);
13017 regs_per_val = reg_increment / 4;
13018 offset_adj = return_pc ? 1 : 0;
13019
13020 if (count <= 1
13021 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13022 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13023 return false;
13024
13025 /* Check if this is a write-back. */
13026 elt = XVECEXP (op, 0, offset_adj);
13027 if (GET_CODE (SET_SRC (elt)) == PLUS)
13028 {
13029 i++;
13030 base = 1;
13031 update = true;
13032
13033 /* The offset adjustment must be the number of registers being
13034 popped times the size of a single register. */
13035 if (!REG_P (SET_DEST (elt))
13036 || !REG_P (XEXP (SET_SRC (elt), 0))
13037 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13038 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13039 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13040 ((count - 1 - offset_adj) * reg_increment))
13041 return false;
13042 }
13043
13044 i = i + offset_adj;
13045 base = base + offset_adj;
13046 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13047 success depends on the type: VLDM can do just one reg,
13048 LDM must do at least two. */
13049 if ((count <= i) && (mode == SImode))
13050 return false;
13051
13052 elt = XVECEXP (op, 0, i - 1);
13053 if (GET_CODE (elt) != SET)
13054 return false;
13055
13056 if (load)
13057 {
13058 reg = SET_DEST (elt);
13059 mem = SET_SRC (elt);
13060 }
13061 else
13062 {
13063 reg = SET_SRC (elt);
13064 mem = SET_DEST (elt);
13065 }
13066
13067 if (!REG_P (reg) || !MEM_P (mem))
13068 return false;
13069
13070 regno = REGNO (reg);
13071 first_regno = regno;
13072 addr = XEXP (mem, 0);
13073 if (GET_CODE (addr) == PLUS)
13074 {
13075 if (!CONST_INT_P (XEXP (addr, 1)))
13076 return false;
13077
13078 offset = INTVAL (XEXP (addr, 1));
13079 addr = XEXP (addr, 0);
13080 }
13081
13082 if (!REG_P (addr))
13083 return false;
13084
13085 /* Don't allow SP to be loaded unless it is also the base register. It
13086 guarantees that SP is reset correctly when an LDM instruction
13087 is interrupted. Otherwise, we might end up with a corrupt stack. */
13088 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13089 return false;
13090
13091 for (; i < count; i++)
13092 {
13093 elt = XVECEXP (op, 0, i);
13094 if (GET_CODE (elt) != SET)
13095 return false;
13096
13097 if (load)
13098 {
13099 reg = SET_DEST (elt);
13100 mem = SET_SRC (elt);
13101 }
13102 else
13103 {
13104 reg = SET_SRC (elt);
13105 mem = SET_DEST (elt);
13106 }
13107
13108 if (!REG_P (reg)
13109 || GET_MODE (reg) != mode
13110 || REGNO (reg) <= regno
13111 || (consecutive
13112 && (REGNO (reg) !=
13113 (unsigned int) (first_regno + regs_per_val * (i - base))))
13114 /* Don't allow SP to be loaded unless it is also the base register. It
13115 guarantees that SP is reset correctly when an LDM instruction
13116 is interrupted. Otherwise, we might end up with a corrupt stack. */
13117 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13118 || !MEM_P (mem)
13119 || GET_MODE (mem) != mode
13120 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13121 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13122 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13123 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13124 offset + (i - base) * reg_increment))
13125 && (!REG_P (XEXP (mem, 0))
13126 || offset + (i - base) * reg_increment != 0)))
13127 return false;
13128
13129 regno = REGNO (reg);
13130 if (regno == REGNO (addr))
13131 addr_reg_in_reglist = true;
13132 }
13133
13134 if (load)
13135 {
13136 if (update && addr_reg_in_reglist)
13137 return false;
13138
13139 /* For Thumb-1, address register is always modified - either by write-back
13140 or by explicit load. If the pattern does not describe an update,
13141 then the address register must be in the list of loaded registers. */
13142 if (TARGET_THUMB1)
13143 return update || addr_reg_in_reglist;
13144 }
13145
13146 return true;
13147 }
13148
13149 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13150 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13151 instruction. ADD_OFFSET is nonzero if the base address register needs
13152 to be modified with an add instruction before we can use it. */
13153
13154 static bool
13155 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13156 int nops, HOST_WIDE_INT add_offset)
13157 {
13158 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13159 if the offset isn't small enough. The reason 2 ldrs are faster
13160 is because these ARMs are able to do more than one cache access
13161 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13162 whilst the ARM8 has a double bandwidth cache. This means that
13163 these cores can do both an instruction fetch and a data fetch in
13164 a single cycle, so the trick of calculating the address into a
13165 scratch register (one of the result regs) and then doing a load
13166 multiple actually becomes slower (and no smaller in code size).
13167 That is the transformation
13168
13169 ldr rd1, [rbase + offset]
13170 ldr rd2, [rbase + offset + 4]
13171
13172 to
13173
13174 add rd1, rbase, offset
13175 ldmia rd1, {rd1, rd2}
13176
13177 produces worse code -- '3 cycles + any stalls on rd2' instead of
13178 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13179 access per cycle, the first sequence could never complete in less
13180 than 6 cycles, whereas the ldm sequence would only take 5 and
13181 would make better use of sequential accesses if not hitting the
13182 cache.
13183
13184 We cheat here and test 'arm_ld_sched' which we currently know to
13185 only be true for the ARM8, ARM9 and StrongARM. If this ever
13186 changes, then the test below needs to be reworked. */
13187 if (nops == 2 && arm_ld_sched && add_offset != 0)
13188 return false;
13189
13190 /* XScale has load-store double instructions, but they have stricter
13191 alignment requirements than load-store multiple, so we cannot
13192 use them.
13193
13194 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13195 the pipeline until completion.
13196
13197 NREGS CYCLES
13198 1 3
13199 2 4
13200 3 5
13201 4 6
13202
13203 An ldr instruction takes 1-3 cycles, but does not block the
13204 pipeline.
13205
13206 NREGS CYCLES
13207 1 1-3
13208 2 2-6
13209 3 3-9
13210 4 4-12
13211
13212 Best case ldr will always win. However, the more ldr instructions
13213 we issue, the less likely we are to be able to schedule them well.
13214 Using ldr instructions also increases code size.
13215
13216 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13217 for counts of 3 or 4 regs. */
13218 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13219 return false;
13220 return true;
13221 }
13222
13223 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13224 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13225 an array ORDER which describes the sequence to use when accessing the
13226 offsets that produces an ascending order. In this sequence, each
13227 offset must be larger by exactly 4 than the previous one. ORDER[0]
13228 must have been filled in with the lowest offset by the caller.
13229 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13230 we use to verify that ORDER produces an ascending order of registers.
13231 Return true if it was possible to construct such an order, false if
13232 not. */
13233
13234 static bool
13235 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13236 int *unsorted_regs)
13237 {
13238 int i;
13239 for (i = 1; i < nops; i++)
13240 {
13241 int j;
13242
13243 order[i] = order[i - 1];
13244 for (j = 0; j < nops; j++)
13245 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13246 {
13247 /* We must find exactly one offset that is higher than the
13248 previous one by 4. */
13249 if (order[i] != order[i - 1])
13250 return false;
13251 order[i] = j;
13252 }
13253 if (order[i] == order[i - 1])
13254 return false;
13255 /* The register numbers must be ascending. */
13256 if (unsorted_regs != NULL
13257 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13258 return false;
13259 }
13260 return true;
13261 }
13262
13263 /* Used to determine in a peephole whether a sequence of load
13264 instructions can be changed into a load-multiple instruction.
13265 NOPS is the number of separate load instructions we are examining. The
13266 first NOPS entries in OPERANDS are the destination registers, the
13267 next NOPS entries are memory operands. If this function is
13268 successful, *BASE is set to the common base register of the memory
13269 accesses; *LOAD_OFFSET is set to the first memory location's offset
13270 from that base register.
13271 REGS is an array filled in with the destination register numbers.
13272 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13273 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13274 the sequence of registers in REGS matches the loads from ascending memory
13275 locations, and the function verifies that the register numbers are
13276 themselves ascending. If CHECK_REGS is false, the register numbers
13277 are stored in the order they are found in the operands. */
13278 static int
13279 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13280 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13281 {
13282 int unsorted_regs[MAX_LDM_STM_OPS];
13283 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13284 int order[MAX_LDM_STM_OPS];
13285 rtx base_reg_rtx = NULL;
13286 int base_reg = -1;
13287 int i, ldm_case;
13288
13289 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13290 easily extended if required. */
13291 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13292
13293 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13294
13295 /* Loop over the operands and check that the memory references are
13296 suitable (i.e. immediate offsets from the same base register). At
13297 the same time, extract the target register, and the memory
13298 offsets. */
13299 for (i = 0; i < nops; i++)
13300 {
13301 rtx reg;
13302 rtx offset;
13303
13304 /* Convert a subreg of a mem into the mem itself. */
13305 if (GET_CODE (operands[nops + i]) == SUBREG)
13306 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13307
13308 gcc_assert (MEM_P (operands[nops + i]));
13309
13310 /* Don't reorder volatile memory references; it doesn't seem worth
13311 looking for the case where the order is ok anyway. */
13312 if (MEM_VOLATILE_P (operands[nops + i]))
13313 return 0;
13314
13315 offset = const0_rtx;
13316
13317 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13318 || (GET_CODE (reg) == SUBREG
13319 && REG_P (reg = SUBREG_REG (reg))))
13320 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13321 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13322 || (GET_CODE (reg) == SUBREG
13323 && REG_P (reg = SUBREG_REG (reg))))
13324 && (CONST_INT_P (offset
13325 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13326 {
13327 if (i == 0)
13328 {
13329 base_reg = REGNO (reg);
13330 base_reg_rtx = reg;
13331 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13332 return 0;
13333 }
13334 else if (base_reg != (int) REGNO (reg))
13335 /* Not addressed from the same base register. */
13336 return 0;
13337
13338 unsorted_regs[i] = (REG_P (operands[i])
13339 ? REGNO (operands[i])
13340 : REGNO (SUBREG_REG (operands[i])));
13341
13342 /* If it isn't an integer register, or if it overwrites the
13343 base register but isn't the last insn in the list, then
13344 we can't do this. */
13345 if (unsorted_regs[i] < 0
13346 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13347 || unsorted_regs[i] > 14
13348 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13349 return 0;
13350
13351 /* Don't allow SP to be loaded unless it is also the base
13352 register. It guarantees that SP is reset correctly when
13353 an LDM instruction is interrupted. Otherwise, we might
13354 end up with a corrupt stack. */
13355 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13356 return 0;
13357
13358 unsorted_offsets[i] = INTVAL (offset);
13359 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13360 order[0] = i;
13361 }
13362 else
13363 /* Not a suitable memory address. */
13364 return 0;
13365 }
13366
13367 /* All the useful information has now been extracted from the
13368 operands into unsorted_regs and unsorted_offsets; additionally,
13369 order[0] has been set to the lowest offset in the list. Sort
13370 the offsets into order, verifying that they are adjacent, and
13371 check that the register numbers are ascending. */
13372 if (!compute_offset_order (nops, unsorted_offsets, order,
13373 check_regs ? unsorted_regs : NULL))
13374 return 0;
13375
13376 if (saved_order)
13377 memcpy (saved_order, order, sizeof order);
13378
13379 if (base)
13380 {
13381 *base = base_reg;
13382
13383 for (i = 0; i < nops; i++)
13384 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13385
13386 *load_offset = unsorted_offsets[order[0]];
13387 }
13388
13389 if (TARGET_THUMB1
13390 && !peep2_reg_dead_p (nops, base_reg_rtx))
13391 return 0;
13392
13393 if (unsorted_offsets[order[0]] == 0)
13394 ldm_case = 1; /* ldmia */
13395 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13396 ldm_case = 2; /* ldmib */
13397 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13398 ldm_case = 3; /* ldmda */
13399 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13400 ldm_case = 4; /* ldmdb */
13401 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13402 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13403 ldm_case = 5;
13404 else
13405 return 0;
13406
13407 if (!multiple_operation_profitable_p (false, nops,
13408 ldm_case == 5
13409 ? unsorted_offsets[order[0]] : 0))
13410 return 0;
13411
13412 return ldm_case;
13413 }
13414
13415 /* Used to determine in a peephole whether a sequence of store instructions can
13416 be changed into a store-multiple instruction.
13417 NOPS is the number of separate store instructions we are examining.
13418 NOPS_TOTAL is the total number of instructions recognized by the peephole
13419 pattern.
13420 The first NOPS entries in OPERANDS are the source registers, the next
13421 NOPS entries are memory operands. If this function is successful, *BASE is
13422 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13423 to the first memory location's offset from that base register. REGS is an
13424 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13425 likewise filled with the corresponding rtx's.
13426 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13427 numbers to an ascending order of stores.
13428 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13429 from ascending memory locations, and the function verifies that the register
13430 numbers are themselves ascending. If CHECK_REGS is false, the register
13431 numbers are stored in the order they are found in the operands. */
13432 static int
13433 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13434 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13435 HOST_WIDE_INT *load_offset, bool check_regs)
13436 {
13437 int unsorted_regs[MAX_LDM_STM_OPS];
13438 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13439 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13440 int order[MAX_LDM_STM_OPS];
13441 int base_reg = -1;
13442 rtx base_reg_rtx = NULL;
13443 int i, stm_case;
13444
13445 /* Write back of base register is currently only supported for Thumb 1. */
13446 int base_writeback = TARGET_THUMB1;
13447
13448 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13449 easily extended if required. */
13450 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13451
13452 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13453
13454 /* Loop over the operands and check that the memory references are
13455 suitable (i.e. immediate offsets from the same base register). At
13456 the same time, extract the target register, and the memory
13457 offsets. */
13458 for (i = 0; i < nops; i++)
13459 {
13460 rtx reg;
13461 rtx offset;
13462
13463 /* Convert a subreg of a mem into the mem itself. */
13464 if (GET_CODE (operands[nops + i]) == SUBREG)
13465 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13466
13467 gcc_assert (MEM_P (operands[nops + i]));
13468
13469 /* Don't reorder volatile memory references; it doesn't seem worth
13470 looking for the case where the order is ok anyway. */
13471 if (MEM_VOLATILE_P (operands[nops + i]))
13472 return 0;
13473
13474 offset = const0_rtx;
13475
13476 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13477 || (GET_CODE (reg) == SUBREG
13478 && REG_P (reg = SUBREG_REG (reg))))
13479 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13480 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13481 || (GET_CODE (reg) == SUBREG
13482 && REG_P (reg = SUBREG_REG (reg))))
13483 && (CONST_INT_P (offset
13484 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13485 {
13486 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13487 ? operands[i] : SUBREG_REG (operands[i]));
13488 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13489
13490 if (i == 0)
13491 {
13492 base_reg = REGNO (reg);
13493 base_reg_rtx = reg;
13494 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13495 return 0;
13496 }
13497 else if (base_reg != (int) REGNO (reg))
13498 /* Not addressed from the same base register. */
13499 return 0;
13500
13501 /* If it isn't an integer register, then we can't do this. */
13502 if (unsorted_regs[i] < 0
13503 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13504 /* The effects are unpredictable if the base register is
13505 both updated and stored. */
13506 || (base_writeback && unsorted_regs[i] == base_reg)
13507 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13508 || unsorted_regs[i] > 14)
13509 return 0;
13510
13511 unsorted_offsets[i] = INTVAL (offset);
13512 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13513 order[0] = i;
13514 }
13515 else
13516 /* Not a suitable memory address. */
13517 return 0;
13518 }
13519
13520 /* All the useful information has now been extracted from the
13521 operands into unsorted_regs and unsorted_offsets; additionally,
13522 order[0] has been set to the lowest offset in the list. Sort
13523 the offsets into order, verifying that they are adjacent, and
13524 check that the register numbers are ascending. */
13525 if (!compute_offset_order (nops, unsorted_offsets, order,
13526 check_regs ? unsorted_regs : NULL))
13527 return 0;
13528
13529 if (saved_order)
13530 memcpy (saved_order, order, sizeof order);
13531
13532 if (base)
13533 {
13534 *base = base_reg;
13535
13536 for (i = 0; i < nops; i++)
13537 {
13538 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13539 if (reg_rtxs)
13540 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13541 }
13542
13543 *load_offset = unsorted_offsets[order[0]];
13544 }
13545
13546 if (TARGET_THUMB1
13547 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13548 return 0;
13549
13550 if (unsorted_offsets[order[0]] == 0)
13551 stm_case = 1; /* stmia */
13552 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13553 stm_case = 2; /* stmib */
13554 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13555 stm_case = 3; /* stmda */
13556 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13557 stm_case = 4; /* stmdb */
13558 else
13559 return 0;
13560
13561 if (!multiple_operation_profitable_p (false, nops, 0))
13562 return 0;
13563
13564 return stm_case;
13565 }
13566 \f
13567 /* Routines for use in generating RTL. */
13568
13569 /* Generate a load-multiple instruction. COUNT is the number of loads in
13570 the instruction; REGS and MEMS are arrays containing the operands.
13571 BASEREG is the base register to be used in addressing the memory operands.
13572 WBACK_OFFSET is nonzero if the instruction should update the base
13573 register. */
13574
13575 static rtx
13576 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13577 HOST_WIDE_INT wback_offset)
13578 {
13579 int i = 0, j;
13580 rtx result;
13581
13582 if (!multiple_operation_profitable_p (false, count, 0))
13583 {
13584 rtx seq;
13585
13586 start_sequence ();
13587
13588 for (i = 0; i < count; i++)
13589 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13590
13591 if (wback_offset != 0)
13592 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13593
13594 seq = get_insns ();
13595 end_sequence ();
13596
13597 return seq;
13598 }
13599
13600 result = gen_rtx_PARALLEL (VOIDmode,
13601 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13602 if (wback_offset != 0)
13603 {
13604 XVECEXP (result, 0, 0)
13605 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13606 i = 1;
13607 count++;
13608 }
13609
13610 for (j = 0; i < count; i++, j++)
13611 XVECEXP (result, 0, i)
13612 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13613
13614 return result;
13615 }
13616
13617 /* Generate a store-multiple instruction. COUNT is the number of stores in
13618 the instruction; REGS and MEMS are arrays containing the operands.
13619 BASEREG is the base register to be used in addressing the memory operands.
13620 WBACK_OFFSET is nonzero if the instruction should update the base
13621 register. */
13622
13623 static rtx
13624 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13625 HOST_WIDE_INT wback_offset)
13626 {
13627 int i = 0, j;
13628 rtx result;
13629
13630 if (GET_CODE (basereg) == PLUS)
13631 basereg = XEXP (basereg, 0);
13632
13633 if (!multiple_operation_profitable_p (false, count, 0))
13634 {
13635 rtx seq;
13636
13637 start_sequence ();
13638
13639 for (i = 0; i < count; i++)
13640 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13641
13642 if (wback_offset != 0)
13643 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13644
13645 seq = get_insns ();
13646 end_sequence ();
13647
13648 return seq;
13649 }
13650
13651 result = gen_rtx_PARALLEL (VOIDmode,
13652 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13653 if (wback_offset != 0)
13654 {
13655 XVECEXP (result, 0, 0)
13656 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13657 i = 1;
13658 count++;
13659 }
13660
13661 for (j = 0; i < count; i++, j++)
13662 XVECEXP (result, 0, i)
13663 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13664
13665 return result;
13666 }
13667
13668 /* Generate either a load-multiple or a store-multiple instruction. This
13669 function can be used in situations where we can start with a single MEM
13670 rtx and adjust its address upwards.
13671 COUNT is the number of operations in the instruction, not counting a
13672 possible update of the base register. REGS is an array containing the
13673 register operands.
13674 BASEREG is the base register to be used in addressing the memory operands,
13675 which are constructed from BASEMEM.
13676 WRITE_BACK specifies whether the generated instruction should include an
13677 update of the base register.
13678 OFFSETP is used to pass an offset to and from this function; this offset
13679 is not used when constructing the address (instead BASEMEM should have an
13680 appropriate offset in its address), it is used only for setting
13681 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13682
13683 static rtx
13684 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13685 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13686 {
13687 rtx mems[MAX_LDM_STM_OPS];
13688 HOST_WIDE_INT offset = *offsetp;
13689 int i;
13690
13691 gcc_assert (count <= MAX_LDM_STM_OPS);
13692
13693 if (GET_CODE (basereg) == PLUS)
13694 basereg = XEXP (basereg, 0);
13695
13696 for (i = 0; i < count; i++)
13697 {
13698 rtx addr = plus_constant (Pmode, basereg, i * 4);
13699 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13700 offset += 4;
13701 }
13702
13703 if (write_back)
13704 *offsetp = offset;
13705
13706 if (is_load)
13707 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13708 write_back ? 4 * count : 0);
13709 else
13710 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13711 write_back ? 4 * count : 0);
13712 }
13713
13714 rtx
13715 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13716 rtx basemem, HOST_WIDE_INT *offsetp)
13717 {
13718 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13719 offsetp);
13720 }
13721
13722 rtx
13723 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13724 rtx basemem, HOST_WIDE_INT *offsetp)
13725 {
13726 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13727 offsetp);
13728 }
13729
13730 /* Called from a peephole2 expander to turn a sequence of loads into an
13731 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13732 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13733 is true if we can reorder the registers because they are used commutatively
13734 subsequently.
13735 Returns true iff we could generate a new instruction. */
13736
13737 bool
13738 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13739 {
13740 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13741 rtx mems[MAX_LDM_STM_OPS];
13742 int i, j, base_reg;
13743 rtx base_reg_rtx;
13744 HOST_WIDE_INT offset;
13745 int write_back = FALSE;
13746 int ldm_case;
13747 rtx addr;
13748
13749 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13750 &base_reg, &offset, !sort_regs);
13751
13752 if (ldm_case == 0)
13753 return false;
13754
13755 if (sort_regs)
13756 for (i = 0; i < nops - 1; i++)
13757 for (j = i + 1; j < nops; j++)
13758 if (regs[i] > regs[j])
13759 {
13760 int t = regs[i];
13761 regs[i] = regs[j];
13762 regs[j] = t;
13763 }
13764 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13765
13766 if (TARGET_THUMB1)
13767 {
13768 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13769 gcc_assert (ldm_case == 1 || ldm_case == 5);
13770 write_back = TRUE;
13771 }
13772
13773 if (ldm_case == 5)
13774 {
13775 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13776 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13777 offset = 0;
13778 if (!TARGET_THUMB1)
13779 base_reg_rtx = newbase;
13780 }
13781
13782 for (i = 0; i < nops; i++)
13783 {
13784 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13785 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13786 SImode, addr, 0);
13787 }
13788 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13789 write_back ? offset + i * 4 : 0));
13790 return true;
13791 }
13792
13793 /* Called from a peephole2 expander to turn a sequence of stores into an
13794 STM instruction. OPERANDS are the operands found by the peephole matcher;
13795 NOPS indicates how many separate stores we are trying to combine.
13796 Returns true iff we could generate a new instruction. */
13797
13798 bool
13799 gen_stm_seq (rtx *operands, int nops)
13800 {
13801 int i;
13802 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13803 rtx mems[MAX_LDM_STM_OPS];
13804 int base_reg;
13805 rtx base_reg_rtx;
13806 HOST_WIDE_INT offset;
13807 int write_back = FALSE;
13808 int stm_case;
13809 rtx addr;
13810 bool base_reg_dies;
13811
13812 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13813 mem_order, &base_reg, &offset, true);
13814
13815 if (stm_case == 0)
13816 return false;
13817
13818 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13819
13820 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13821 if (TARGET_THUMB1)
13822 {
13823 gcc_assert (base_reg_dies);
13824 write_back = TRUE;
13825 }
13826
13827 if (stm_case == 5)
13828 {
13829 gcc_assert (base_reg_dies);
13830 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13831 offset = 0;
13832 }
13833
13834 addr = plus_constant (Pmode, base_reg_rtx, offset);
13835
13836 for (i = 0; i < nops; i++)
13837 {
13838 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13839 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13840 SImode, addr, 0);
13841 }
13842 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13843 write_back ? offset + i * 4 : 0));
13844 return true;
13845 }
13846
13847 /* Called from a peephole2 expander to turn a sequence of stores that are
13848 preceded by constant loads into an STM instruction. OPERANDS are the
13849 operands found by the peephole matcher; NOPS indicates how many
13850 separate stores we are trying to combine; there are 2 * NOPS
13851 instructions in the peephole.
13852 Returns true iff we could generate a new instruction. */
13853
13854 bool
13855 gen_const_stm_seq (rtx *operands, int nops)
13856 {
13857 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13858 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13859 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13860 rtx mems[MAX_LDM_STM_OPS];
13861 int base_reg;
13862 rtx base_reg_rtx;
13863 HOST_WIDE_INT offset;
13864 int write_back = FALSE;
13865 int stm_case;
13866 rtx addr;
13867 bool base_reg_dies;
13868 int i, j;
13869 HARD_REG_SET allocated;
13870
13871 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13872 mem_order, &base_reg, &offset, false);
13873
13874 if (stm_case == 0)
13875 return false;
13876
13877 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13878
13879 /* If the same register is used more than once, try to find a free
13880 register. */
13881 CLEAR_HARD_REG_SET (allocated);
13882 for (i = 0; i < nops; i++)
13883 {
13884 for (j = i + 1; j < nops; j++)
13885 if (regs[i] == regs[j])
13886 {
13887 rtx t = peep2_find_free_register (0, nops * 2,
13888 TARGET_THUMB1 ? "l" : "r",
13889 SImode, &allocated);
13890 if (t == NULL_RTX)
13891 return false;
13892 reg_rtxs[i] = t;
13893 regs[i] = REGNO (t);
13894 }
13895 }
13896
13897 /* Compute an ordering that maps the register numbers to an ascending
13898 sequence. */
13899 reg_order[0] = 0;
13900 for (i = 0; i < nops; i++)
13901 if (regs[i] < regs[reg_order[0]])
13902 reg_order[0] = i;
13903
13904 for (i = 1; i < nops; i++)
13905 {
13906 int this_order = reg_order[i - 1];
13907 for (j = 0; j < nops; j++)
13908 if (regs[j] > regs[reg_order[i - 1]]
13909 && (this_order == reg_order[i - 1]
13910 || regs[j] < regs[this_order]))
13911 this_order = j;
13912 reg_order[i] = this_order;
13913 }
13914
13915 /* Ensure that registers that must be live after the instruction end
13916 up with the correct value. */
13917 for (i = 0; i < nops; i++)
13918 {
13919 int this_order = reg_order[i];
13920 if ((this_order != mem_order[i]
13921 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13922 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13923 return false;
13924 }
13925
13926 /* Load the constants. */
13927 for (i = 0; i < nops; i++)
13928 {
13929 rtx op = operands[2 * nops + mem_order[i]];
13930 sorted_regs[i] = regs[reg_order[i]];
13931 emit_move_insn (reg_rtxs[reg_order[i]], op);
13932 }
13933
13934 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13935
13936 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13937 if (TARGET_THUMB1)
13938 {
13939 gcc_assert (base_reg_dies);
13940 write_back = TRUE;
13941 }
13942
13943 if (stm_case == 5)
13944 {
13945 gcc_assert (base_reg_dies);
13946 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13947 offset = 0;
13948 }
13949
13950 addr = plus_constant (Pmode, base_reg_rtx, offset);
13951
13952 for (i = 0; i < nops; i++)
13953 {
13954 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13955 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13956 SImode, addr, 0);
13957 }
13958 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13959 write_back ? offset + i * 4 : 0));
13960 return true;
13961 }
13962
13963 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13964 unaligned copies on processors which support unaligned semantics for those
13965 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13966 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13967 An interleave factor of 1 (the minimum) will perform no interleaving.
13968 Load/store multiple are used for aligned addresses where possible. */
13969
13970 static void
13971 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13972 HOST_WIDE_INT length,
13973 unsigned int interleave_factor)
13974 {
13975 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13976 int *regnos = XALLOCAVEC (int, interleave_factor);
13977 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13978 HOST_WIDE_INT i, j;
13979 HOST_WIDE_INT remaining = length, words;
13980 rtx halfword_tmp = NULL, byte_tmp = NULL;
13981 rtx dst, src;
13982 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13983 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13984 HOST_WIDE_INT srcoffset, dstoffset;
13985 HOST_WIDE_INT src_autoinc, dst_autoinc;
13986 rtx mem, addr;
13987
13988 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13989
13990 /* Use hard registers if we have aligned source or destination so we can use
13991 load/store multiple with contiguous registers. */
13992 if (dst_aligned || src_aligned)
13993 for (i = 0; i < interleave_factor; i++)
13994 regs[i] = gen_rtx_REG (SImode, i);
13995 else
13996 for (i = 0; i < interleave_factor; i++)
13997 regs[i] = gen_reg_rtx (SImode);
13998
13999 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14000 src = copy_addr_to_reg (XEXP (srcbase, 0));
14001
14002 srcoffset = dstoffset = 0;
14003
14004 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14005 For copying the last bytes we want to subtract this offset again. */
14006 src_autoinc = dst_autoinc = 0;
14007
14008 for (i = 0; i < interleave_factor; i++)
14009 regnos[i] = i;
14010
14011 /* Copy BLOCK_SIZE_BYTES chunks. */
14012
14013 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14014 {
14015 /* Load words. */
14016 if (src_aligned && interleave_factor > 1)
14017 {
14018 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14019 TRUE, srcbase, &srcoffset));
14020 src_autoinc += UNITS_PER_WORD * interleave_factor;
14021 }
14022 else
14023 {
14024 for (j = 0; j < interleave_factor; j++)
14025 {
14026 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14027 - src_autoinc));
14028 mem = adjust_automodify_address (srcbase, SImode, addr,
14029 srcoffset + j * UNITS_PER_WORD);
14030 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14031 }
14032 srcoffset += block_size_bytes;
14033 }
14034
14035 /* Store words. */
14036 if (dst_aligned && interleave_factor > 1)
14037 {
14038 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14039 TRUE, dstbase, &dstoffset));
14040 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14041 }
14042 else
14043 {
14044 for (j = 0; j < interleave_factor; j++)
14045 {
14046 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14047 - dst_autoinc));
14048 mem = adjust_automodify_address (dstbase, SImode, addr,
14049 dstoffset + j * UNITS_PER_WORD);
14050 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14051 }
14052 dstoffset += block_size_bytes;
14053 }
14054
14055 remaining -= block_size_bytes;
14056 }
14057
14058 /* Copy any whole words left (note these aren't interleaved with any
14059 subsequent halfword/byte load/stores in the interests of simplicity). */
14060
14061 words = remaining / UNITS_PER_WORD;
14062
14063 gcc_assert (words < interleave_factor);
14064
14065 if (src_aligned && words > 1)
14066 {
14067 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14068 &srcoffset));
14069 src_autoinc += UNITS_PER_WORD * words;
14070 }
14071 else
14072 {
14073 for (j = 0; j < words; j++)
14074 {
14075 addr = plus_constant (Pmode, src,
14076 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14077 mem = adjust_automodify_address (srcbase, SImode, addr,
14078 srcoffset + j * UNITS_PER_WORD);
14079 if (src_aligned)
14080 emit_move_insn (regs[j], mem);
14081 else
14082 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14083 }
14084 srcoffset += words * UNITS_PER_WORD;
14085 }
14086
14087 if (dst_aligned && words > 1)
14088 {
14089 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14090 &dstoffset));
14091 dst_autoinc += words * UNITS_PER_WORD;
14092 }
14093 else
14094 {
14095 for (j = 0; j < words; j++)
14096 {
14097 addr = plus_constant (Pmode, dst,
14098 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14099 mem = adjust_automodify_address (dstbase, SImode, addr,
14100 dstoffset + j * UNITS_PER_WORD);
14101 if (dst_aligned)
14102 emit_move_insn (mem, regs[j]);
14103 else
14104 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14105 }
14106 dstoffset += words * UNITS_PER_WORD;
14107 }
14108
14109 remaining -= words * UNITS_PER_WORD;
14110
14111 gcc_assert (remaining < 4);
14112
14113 /* Copy a halfword if necessary. */
14114
14115 if (remaining >= 2)
14116 {
14117 halfword_tmp = gen_reg_rtx (SImode);
14118
14119 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14120 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14121 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14122
14123 /* Either write out immediately, or delay until we've loaded the last
14124 byte, depending on interleave factor. */
14125 if (interleave_factor == 1)
14126 {
14127 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14128 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14129 emit_insn (gen_unaligned_storehi (mem,
14130 gen_lowpart (HImode, halfword_tmp)));
14131 halfword_tmp = NULL;
14132 dstoffset += 2;
14133 }
14134
14135 remaining -= 2;
14136 srcoffset += 2;
14137 }
14138
14139 gcc_assert (remaining < 2);
14140
14141 /* Copy last byte. */
14142
14143 if ((remaining & 1) != 0)
14144 {
14145 byte_tmp = gen_reg_rtx (SImode);
14146
14147 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14148 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14149 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14150
14151 if (interleave_factor == 1)
14152 {
14153 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14154 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14155 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14156 byte_tmp = NULL;
14157 dstoffset++;
14158 }
14159
14160 remaining--;
14161 srcoffset++;
14162 }
14163
14164 /* Store last halfword if we haven't done so already. */
14165
14166 if (halfword_tmp)
14167 {
14168 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14169 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14170 emit_insn (gen_unaligned_storehi (mem,
14171 gen_lowpart (HImode, halfword_tmp)));
14172 dstoffset += 2;
14173 }
14174
14175 /* Likewise for last byte. */
14176
14177 if (byte_tmp)
14178 {
14179 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14180 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14181 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14182 dstoffset++;
14183 }
14184
14185 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14186 }
14187
14188 /* From mips_adjust_block_mem:
14189
14190 Helper function for doing a loop-based block operation on memory
14191 reference MEM. Each iteration of the loop will operate on LENGTH
14192 bytes of MEM.
14193
14194 Create a new base register for use within the loop and point it to
14195 the start of MEM. Create a new memory reference that uses this
14196 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14197
14198 static void
14199 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14200 rtx *loop_mem)
14201 {
14202 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14203
14204 /* Although the new mem does not refer to a known location,
14205 it does keep up to LENGTH bytes of alignment. */
14206 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14207 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14208 }
14209
14210 /* From mips_block_move_loop:
14211
14212 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14213 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14214 the memory regions do not overlap. */
14215
14216 static void
14217 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14218 unsigned int interleave_factor,
14219 HOST_WIDE_INT bytes_per_iter)
14220 {
14221 rtx src_reg, dest_reg, final_src, test;
14222 HOST_WIDE_INT leftover;
14223
14224 leftover = length % bytes_per_iter;
14225 length -= leftover;
14226
14227 /* Create registers and memory references for use within the loop. */
14228 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14229 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14230
14231 /* Calculate the value that SRC_REG should have after the last iteration of
14232 the loop. */
14233 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14234 0, 0, OPTAB_WIDEN);
14235
14236 /* Emit the start of the loop. */
14237 rtx_code_label *label = gen_label_rtx ();
14238 emit_label (label);
14239
14240 /* Emit the loop body. */
14241 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14242 interleave_factor);
14243
14244 /* Move on to the next block. */
14245 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14246 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14247
14248 /* Emit the loop condition. */
14249 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14250 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14251
14252 /* Mop up any left-over bytes. */
14253 if (leftover)
14254 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14255 }
14256
14257 /* Emit a block move when either the source or destination is unaligned (not
14258 aligned to a four-byte boundary). This may need further tuning depending on
14259 core type, optimize_size setting, etc. */
14260
14261 static int
14262 arm_movmemqi_unaligned (rtx *operands)
14263 {
14264 HOST_WIDE_INT length = INTVAL (operands[2]);
14265
14266 if (optimize_size)
14267 {
14268 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14269 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14270 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14271 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14272 or dst_aligned though: allow more interleaving in those cases since the
14273 resulting code can be smaller. */
14274 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14275 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14276
14277 if (length > 12)
14278 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14279 interleave_factor, bytes_per_iter);
14280 else
14281 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14282 interleave_factor);
14283 }
14284 else
14285 {
14286 /* Note that the loop created by arm_block_move_unaligned_loop may be
14287 subject to loop unrolling, which makes tuning this condition a little
14288 redundant. */
14289 if (length > 32)
14290 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14291 else
14292 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14293 }
14294
14295 return 1;
14296 }
14297
14298 int
14299 arm_gen_movmemqi (rtx *operands)
14300 {
14301 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14302 HOST_WIDE_INT srcoffset, dstoffset;
14303 rtx src, dst, srcbase, dstbase;
14304 rtx part_bytes_reg = NULL;
14305 rtx mem;
14306
14307 if (!CONST_INT_P (operands[2])
14308 || !CONST_INT_P (operands[3])
14309 || INTVAL (operands[2]) > 64)
14310 return 0;
14311
14312 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14313 return arm_movmemqi_unaligned (operands);
14314
14315 if (INTVAL (operands[3]) & 3)
14316 return 0;
14317
14318 dstbase = operands[0];
14319 srcbase = operands[1];
14320
14321 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14322 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14323
14324 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14325 out_words_to_go = INTVAL (operands[2]) / 4;
14326 last_bytes = INTVAL (operands[2]) & 3;
14327 dstoffset = srcoffset = 0;
14328
14329 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14330 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14331
14332 while (in_words_to_go >= 2)
14333 {
14334 if (in_words_to_go > 4)
14335 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14336 TRUE, srcbase, &srcoffset));
14337 else
14338 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14339 src, FALSE, srcbase,
14340 &srcoffset));
14341
14342 if (out_words_to_go)
14343 {
14344 if (out_words_to_go > 4)
14345 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14346 TRUE, dstbase, &dstoffset));
14347 else if (out_words_to_go != 1)
14348 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14349 out_words_to_go, dst,
14350 (last_bytes == 0
14351 ? FALSE : TRUE),
14352 dstbase, &dstoffset));
14353 else
14354 {
14355 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14356 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14357 if (last_bytes != 0)
14358 {
14359 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14360 dstoffset += 4;
14361 }
14362 }
14363 }
14364
14365 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14366 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14367 }
14368
14369 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14370 if (out_words_to_go)
14371 {
14372 rtx sreg;
14373
14374 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14375 sreg = copy_to_reg (mem);
14376
14377 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14378 emit_move_insn (mem, sreg);
14379 in_words_to_go--;
14380
14381 gcc_assert (!in_words_to_go); /* Sanity check */
14382 }
14383
14384 if (in_words_to_go)
14385 {
14386 gcc_assert (in_words_to_go > 0);
14387
14388 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14389 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14390 }
14391
14392 gcc_assert (!last_bytes || part_bytes_reg);
14393
14394 if (BYTES_BIG_ENDIAN && last_bytes)
14395 {
14396 rtx tmp = gen_reg_rtx (SImode);
14397
14398 /* The bytes we want are in the top end of the word. */
14399 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14400 GEN_INT (8 * (4 - last_bytes))));
14401 part_bytes_reg = tmp;
14402
14403 while (last_bytes)
14404 {
14405 mem = adjust_automodify_address (dstbase, QImode,
14406 plus_constant (Pmode, dst,
14407 last_bytes - 1),
14408 dstoffset + last_bytes - 1);
14409 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14410
14411 if (--last_bytes)
14412 {
14413 tmp = gen_reg_rtx (SImode);
14414 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14415 part_bytes_reg = tmp;
14416 }
14417 }
14418
14419 }
14420 else
14421 {
14422 if (last_bytes > 1)
14423 {
14424 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14425 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14426 last_bytes -= 2;
14427 if (last_bytes)
14428 {
14429 rtx tmp = gen_reg_rtx (SImode);
14430 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14431 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14432 part_bytes_reg = tmp;
14433 dstoffset += 2;
14434 }
14435 }
14436
14437 if (last_bytes)
14438 {
14439 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14440 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14441 }
14442 }
14443
14444 return 1;
14445 }
14446
14447 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14448 by mode size. */
14449 inline static rtx
14450 next_consecutive_mem (rtx mem)
14451 {
14452 machine_mode mode = GET_MODE (mem);
14453 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14454 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14455
14456 return adjust_automodify_address (mem, mode, addr, offset);
14457 }
14458
14459 /* Copy using LDRD/STRD instructions whenever possible.
14460 Returns true upon success. */
14461 bool
14462 gen_movmem_ldrd_strd (rtx *operands)
14463 {
14464 unsigned HOST_WIDE_INT len;
14465 HOST_WIDE_INT align;
14466 rtx src, dst, base;
14467 rtx reg0;
14468 bool src_aligned, dst_aligned;
14469 bool src_volatile, dst_volatile;
14470
14471 gcc_assert (CONST_INT_P (operands[2]));
14472 gcc_assert (CONST_INT_P (operands[3]));
14473
14474 len = UINTVAL (operands[2]);
14475 if (len > 64)
14476 return false;
14477
14478 /* Maximum alignment we can assume for both src and dst buffers. */
14479 align = INTVAL (operands[3]);
14480
14481 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14482 return false;
14483
14484 /* Place src and dst addresses in registers
14485 and update the corresponding mem rtx. */
14486 dst = operands[0];
14487 dst_volatile = MEM_VOLATILE_P (dst);
14488 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14489 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14490 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14491
14492 src = operands[1];
14493 src_volatile = MEM_VOLATILE_P (src);
14494 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14495 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14496 src = adjust_automodify_address (src, VOIDmode, base, 0);
14497
14498 if (!unaligned_access && !(src_aligned && dst_aligned))
14499 return false;
14500
14501 if (src_volatile || dst_volatile)
14502 return false;
14503
14504 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14505 if (!(dst_aligned || src_aligned))
14506 return arm_gen_movmemqi (operands);
14507
14508 /* If the either src or dst is unaligned we'll be accessing it as pairs
14509 of unaligned SImode accesses. Otherwise we can generate DImode
14510 ldrd/strd instructions. */
14511 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14512 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14513
14514 while (len >= 8)
14515 {
14516 len -= 8;
14517 reg0 = gen_reg_rtx (DImode);
14518 rtx low_reg = NULL_RTX;
14519 rtx hi_reg = NULL_RTX;
14520
14521 if (!src_aligned || !dst_aligned)
14522 {
14523 low_reg = gen_lowpart (SImode, reg0);
14524 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14525 }
14526 if (src_aligned)
14527 emit_move_insn (reg0, src);
14528 else
14529 {
14530 emit_insn (gen_unaligned_loadsi (low_reg, src));
14531 src = next_consecutive_mem (src);
14532 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14533 }
14534
14535 if (dst_aligned)
14536 emit_move_insn (dst, reg0);
14537 else
14538 {
14539 emit_insn (gen_unaligned_storesi (dst, low_reg));
14540 dst = next_consecutive_mem (dst);
14541 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14542 }
14543
14544 src = next_consecutive_mem (src);
14545 dst = next_consecutive_mem (dst);
14546 }
14547
14548 gcc_assert (len < 8);
14549 if (len >= 4)
14550 {
14551 /* More than a word but less than a double-word to copy. Copy a word. */
14552 reg0 = gen_reg_rtx (SImode);
14553 src = adjust_address (src, SImode, 0);
14554 dst = adjust_address (dst, SImode, 0);
14555 if (src_aligned)
14556 emit_move_insn (reg0, src);
14557 else
14558 emit_insn (gen_unaligned_loadsi (reg0, src));
14559
14560 if (dst_aligned)
14561 emit_move_insn (dst, reg0);
14562 else
14563 emit_insn (gen_unaligned_storesi (dst, reg0));
14564
14565 src = next_consecutive_mem (src);
14566 dst = next_consecutive_mem (dst);
14567 len -= 4;
14568 }
14569
14570 if (len == 0)
14571 return true;
14572
14573 /* Copy the remaining bytes. */
14574 if (len >= 2)
14575 {
14576 dst = adjust_address (dst, HImode, 0);
14577 src = adjust_address (src, HImode, 0);
14578 reg0 = gen_reg_rtx (SImode);
14579 if (src_aligned)
14580 emit_insn (gen_zero_extendhisi2 (reg0, src));
14581 else
14582 emit_insn (gen_unaligned_loadhiu (reg0, src));
14583
14584 if (dst_aligned)
14585 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14586 else
14587 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14588
14589 src = next_consecutive_mem (src);
14590 dst = next_consecutive_mem (dst);
14591 if (len == 2)
14592 return true;
14593 }
14594
14595 dst = adjust_address (dst, QImode, 0);
14596 src = adjust_address (src, QImode, 0);
14597 reg0 = gen_reg_rtx (QImode);
14598 emit_move_insn (reg0, src);
14599 emit_move_insn (dst, reg0);
14600 return true;
14601 }
14602
14603 /* Select a dominance comparison mode if possible for a test of the general
14604 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14605 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14606 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14607 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14608 In all cases OP will be either EQ or NE, but we don't need to know which
14609 here. If we are unable to support a dominance comparison we return
14610 CC mode. This will then fail to match for the RTL expressions that
14611 generate this call. */
14612 machine_mode
14613 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14614 {
14615 enum rtx_code cond1, cond2;
14616 int swapped = 0;
14617
14618 /* Currently we will probably get the wrong result if the individual
14619 comparisons are not simple. This also ensures that it is safe to
14620 reverse a comparison if necessary. */
14621 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14622 != CCmode)
14623 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14624 != CCmode))
14625 return CCmode;
14626
14627 /* The if_then_else variant of this tests the second condition if the
14628 first passes, but is true if the first fails. Reverse the first
14629 condition to get a true "inclusive-or" expression. */
14630 if (cond_or == DOM_CC_NX_OR_Y)
14631 cond1 = reverse_condition (cond1);
14632
14633 /* If the comparisons are not equal, and one doesn't dominate the other,
14634 then we can't do this. */
14635 if (cond1 != cond2
14636 && !comparison_dominates_p (cond1, cond2)
14637 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14638 return CCmode;
14639
14640 if (swapped)
14641 std::swap (cond1, cond2);
14642
14643 switch (cond1)
14644 {
14645 case EQ:
14646 if (cond_or == DOM_CC_X_AND_Y)
14647 return CC_DEQmode;
14648
14649 switch (cond2)
14650 {
14651 case EQ: return CC_DEQmode;
14652 case LE: return CC_DLEmode;
14653 case LEU: return CC_DLEUmode;
14654 case GE: return CC_DGEmode;
14655 case GEU: return CC_DGEUmode;
14656 default: gcc_unreachable ();
14657 }
14658
14659 case LT:
14660 if (cond_or == DOM_CC_X_AND_Y)
14661 return CC_DLTmode;
14662
14663 switch (cond2)
14664 {
14665 case LT:
14666 return CC_DLTmode;
14667 case LE:
14668 return CC_DLEmode;
14669 case NE:
14670 return CC_DNEmode;
14671 default:
14672 gcc_unreachable ();
14673 }
14674
14675 case GT:
14676 if (cond_or == DOM_CC_X_AND_Y)
14677 return CC_DGTmode;
14678
14679 switch (cond2)
14680 {
14681 case GT:
14682 return CC_DGTmode;
14683 case GE:
14684 return CC_DGEmode;
14685 case NE:
14686 return CC_DNEmode;
14687 default:
14688 gcc_unreachable ();
14689 }
14690
14691 case LTU:
14692 if (cond_or == DOM_CC_X_AND_Y)
14693 return CC_DLTUmode;
14694
14695 switch (cond2)
14696 {
14697 case LTU:
14698 return CC_DLTUmode;
14699 case LEU:
14700 return CC_DLEUmode;
14701 case NE:
14702 return CC_DNEmode;
14703 default:
14704 gcc_unreachable ();
14705 }
14706
14707 case GTU:
14708 if (cond_or == DOM_CC_X_AND_Y)
14709 return CC_DGTUmode;
14710
14711 switch (cond2)
14712 {
14713 case GTU:
14714 return CC_DGTUmode;
14715 case GEU:
14716 return CC_DGEUmode;
14717 case NE:
14718 return CC_DNEmode;
14719 default:
14720 gcc_unreachable ();
14721 }
14722
14723 /* The remaining cases only occur when both comparisons are the
14724 same. */
14725 case NE:
14726 gcc_assert (cond1 == cond2);
14727 return CC_DNEmode;
14728
14729 case LE:
14730 gcc_assert (cond1 == cond2);
14731 return CC_DLEmode;
14732
14733 case GE:
14734 gcc_assert (cond1 == cond2);
14735 return CC_DGEmode;
14736
14737 case LEU:
14738 gcc_assert (cond1 == cond2);
14739 return CC_DLEUmode;
14740
14741 case GEU:
14742 gcc_assert (cond1 == cond2);
14743 return CC_DGEUmode;
14744
14745 default:
14746 gcc_unreachable ();
14747 }
14748 }
14749
14750 machine_mode
14751 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14752 {
14753 /* All floating point compares return CCFP if it is an equality
14754 comparison, and CCFPE otherwise. */
14755 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14756 {
14757 switch (op)
14758 {
14759 case EQ:
14760 case NE:
14761 case UNORDERED:
14762 case ORDERED:
14763 case UNLT:
14764 case UNLE:
14765 case UNGT:
14766 case UNGE:
14767 case UNEQ:
14768 case LTGT:
14769 return CCFPmode;
14770
14771 case LT:
14772 case LE:
14773 case GT:
14774 case GE:
14775 return CCFPEmode;
14776
14777 default:
14778 gcc_unreachable ();
14779 }
14780 }
14781
14782 /* A compare with a shifted operand. Because of canonicalization, the
14783 comparison will have to be swapped when we emit the assembler. */
14784 if (GET_MODE (y) == SImode
14785 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14786 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14787 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14788 || GET_CODE (x) == ROTATERT))
14789 return CC_SWPmode;
14790
14791 /* This operation is performed swapped, but since we only rely on the Z
14792 flag we don't need an additional mode. */
14793 if (GET_MODE (y) == SImode
14794 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14795 && GET_CODE (x) == NEG
14796 && (op == EQ || op == NE))
14797 return CC_Zmode;
14798
14799 /* This is a special case that is used by combine to allow a
14800 comparison of a shifted byte load to be split into a zero-extend
14801 followed by a comparison of the shifted integer (only valid for
14802 equalities and unsigned inequalities). */
14803 if (GET_MODE (x) == SImode
14804 && GET_CODE (x) == ASHIFT
14805 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14806 && GET_CODE (XEXP (x, 0)) == SUBREG
14807 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14808 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14809 && (op == EQ || op == NE
14810 || op == GEU || op == GTU || op == LTU || op == LEU)
14811 && CONST_INT_P (y))
14812 return CC_Zmode;
14813
14814 /* A construct for a conditional compare, if the false arm contains
14815 0, then both conditions must be true, otherwise either condition
14816 must be true. Not all conditions are possible, so CCmode is
14817 returned if it can't be done. */
14818 if (GET_CODE (x) == IF_THEN_ELSE
14819 && (XEXP (x, 2) == const0_rtx
14820 || XEXP (x, 2) == const1_rtx)
14821 && COMPARISON_P (XEXP (x, 0))
14822 && COMPARISON_P (XEXP (x, 1)))
14823 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14824 INTVAL (XEXP (x, 2)));
14825
14826 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14827 if (GET_CODE (x) == AND
14828 && (op == EQ || op == NE)
14829 && COMPARISON_P (XEXP (x, 0))
14830 && COMPARISON_P (XEXP (x, 1)))
14831 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14832 DOM_CC_X_AND_Y);
14833
14834 if (GET_CODE (x) == IOR
14835 && (op == EQ || op == NE)
14836 && COMPARISON_P (XEXP (x, 0))
14837 && COMPARISON_P (XEXP (x, 1)))
14838 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14839 DOM_CC_X_OR_Y);
14840
14841 /* An operation (on Thumb) where we want to test for a single bit.
14842 This is done by shifting that bit up into the top bit of a
14843 scratch register; we can then branch on the sign bit. */
14844 if (TARGET_THUMB1
14845 && GET_MODE (x) == SImode
14846 && (op == EQ || op == NE)
14847 && GET_CODE (x) == ZERO_EXTRACT
14848 && XEXP (x, 1) == const1_rtx)
14849 return CC_Nmode;
14850
14851 /* An operation that sets the condition codes as a side-effect, the
14852 V flag is not set correctly, so we can only use comparisons where
14853 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14854 instead.) */
14855 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14856 if (GET_MODE (x) == SImode
14857 && y == const0_rtx
14858 && (op == EQ || op == NE || op == LT || op == GE)
14859 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14860 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14861 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14862 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14863 || GET_CODE (x) == LSHIFTRT
14864 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14865 || GET_CODE (x) == ROTATERT
14866 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14867 return CC_NOOVmode;
14868
14869 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14870 return CC_Zmode;
14871
14872 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14873 && GET_CODE (x) == PLUS
14874 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14875 return CC_Cmode;
14876
14877 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14878 {
14879 switch (op)
14880 {
14881 case EQ:
14882 case NE:
14883 /* A DImode comparison against zero can be implemented by
14884 or'ing the two halves together. */
14885 if (y == const0_rtx)
14886 return CC_Zmode;
14887
14888 /* We can do an equality test in three Thumb instructions. */
14889 if (!TARGET_32BIT)
14890 return CC_Zmode;
14891
14892 /* FALLTHROUGH */
14893
14894 case LTU:
14895 case LEU:
14896 case GTU:
14897 case GEU:
14898 /* DImode unsigned comparisons can be implemented by cmp +
14899 cmpeq without a scratch register. Not worth doing in
14900 Thumb-2. */
14901 if (TARGET_32BIT)
14902 return CC_CZmode;
14903
14904 /* FALLTHROUGH */
14905
14906 case LT:
14907 case LE:
14908 case GT:
14909 case GE:
14910 /* DImode signed and unsigned comparisons can be implemented
14911 by cmp + sbcs with a scratch register, but that does not
14912 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14913 gcc_assert (op != EQ && op != NE);
14914 return CC_NCVmode;
14915
14916 default:
14917 gcc_unreachable ();
14918 }
14919 }
14920
14921 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14922 return GET_MODE (x);
14923
14924 return CCmode;
14925 }
14926
14927 /* X and Y are two things to compare using CODE. Emit the compare insn and
14928 return the rtx for register 0 in the proper mode. FP means this is a
14929 floating point compare: I don't think that it is needed on the arm. */
14930 rtx
14931 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14932 {
14933 machine_mode mode;
14934 rtx cc_reg;
14935 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14936
14937 /* We might have X as a constant, Y as a register because of the predicates
14938 used for cmpdi. If so, force X to a register here. */
14939 if (dimode_comparison && !REG_P (x))
14940 x = force_reg (DImode, x);
14941
14942 mode = SELECT_CC_MODE (code, x, y);
14943 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14944
14945 if (dimode_comparison
14946 && mode != CC_CZmode)
14947 {
14948 rtx clobber, set;
14949
14950 /* To compare two non-zero values for equality, XOR them and
14951 then compare against zero. Not used for ARM mode; there
14952 CC_CZmode is cheaper. */
14953 if (mode == CC_Zmode && y != const0_rtx)
14954 {
14955 gcc_assert (!reload_completed);
14956 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14957 y = const0_rtx;
14958 }
14959
14960 /* A scratch register is required. */
14961 if (reload_completed)
14962 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14963 else
14964 scratch = gen_rtx_SCRATCH (SImode);
14965
14966 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14967 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14968 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14969 }
14970 else
14971 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14972
14973 return cc_reg;
14974 }
14975
14976 /* Generate a sequence of insns that will generate the correct return
14977 address mask depending on the physical architecture that the program
14978 is running on. */
14979 rtx
14980 arm_gen_return_addr_mask (void)
14981 {
14982 rtx reg = gen_reg_rtx (Pmode);
14983
14984 emit_insn (gen_return_addr_mask (reg));
14985 return reg;
14986 }
14987
14988 void
14989 arm_reload_in_hi (rtx *operands)
14990 {
14991 rtx ref = operands[1];
14992 rtx base, scratch;
14993 HOST_WIDE_INT offset = 0;
14994
14995 if (GET_CODE (ref) == SUBREG)
14996 {
14997 offset = SUBREG_BYTE (ref);
14998 ref = SUBREG_REG (ref);
14999 }
15000
15001 if (REG_P (ref))
15002 {
15003 /* We have a pseudo which has been spilt onto the stack; there
15004 are two cases here: the first where there is a simple
15005 stack-slot replacement and a second where the stack-slot is
15006 out of range, or is used as a subreg. */
15007 if (reg_equiv_mem (REGNO (ref)))
15008 {
15009 ref = reg_equiv_mem (REGNO (ref));
15010 base = find_replacement (&XEXP (ref, 0));
15011 }
15012 else
15013 /* The slot is out of range, or was dressed up in a SUBREG. */
15014 base = reg_equiv_address (REGNO (ref));
15015
15016 /* PR 62554: If there is no equivalent memory location then just move
15017 the value as an SImode register move. This happens when the target
15018 architecture variant does not have an HImode register move. */
15019 if (base == NULL)
15020 {
15021 gcc_assert (REG_P (operands[0]));
15022 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15023 gen_rtx_SUBREG (SImode, ref, 0)));
15024 return;
15025 }
15026 }
15027 else
15028 base = find_replacement (&XEXP (ref, 0));
15029
15030 /* Handle the case where the address is too complex to be offset by 1. */
15031 if (GET_CODE (base) == MINUS
15032 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15033 {
15034 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15035
15036 emit_set_insn (base_plus, base);
15037 base = base_plus;
15038 }
15039 else if (GET_CODE (base) == PLUS)
15040 {
15041 /* The addend must be CONST_INT, or we would have dealt with it above. */
15042 HOST_WIDE_INT hi, lo;
15043
15044 offset += INTVAL (XEXP (base, 1));
15045 base = XEXP (base, 0);
15046
15047 /* Rework the address into a legal sequence of insns. */
15048 /* Valid range for lo is -4095 -> 4095 */
15049 lo = (offset >= 0
15050 ? (offset & 0xfff)
15051 : -((-offset) & 0xfff));
15052
15053 /* Corner case, if lo is the max offset then we would be out of range
15054 once we have added the additional 1 below, so bump the msb into the
15055 pre-loading insn(s). */
15056 if (lo == 4095)
15057 lo &= 0x7ff;
15058
15059 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15060 ^ (HOST_WIDE_INT) 0x80000000)
15061 - (HOST_WIDE_INT) 0x80000000);
15062
15063 gcc_assert (hi + lo == offset);
15064
15065 if (hi != 0)
15066 {
15067 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15068
15069 /* Get the base address; addsi3 knows how to handle constants
15070 that require more than one insn. */
15071 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15072 base = base_plus;
15073 offset = lo;
15074 }
15075 }
15076
15077 /* Operands[2] may overlap operands[0] (though it won't overlap
15078 operands[1]), that's why we asked for a DImode reg -- so we can
15079 use the bit that does not overlap. */
15080 if (REGNO (operands[2]) == REGNO (operands[0]))
15081 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15082 else
15083 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15084
15085 emit_insn (gen_zero_extendqisi2 (scratch,
15086 gen_rtx_MEM (QImode,
15087 plus_constant (Pmode, base,
15088 offset))));
15089 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15090 gen_rtx_MEM (QImode,
15091 plus_constant (Pmode, base,
15092 offset + 1))));
15093 if (!BYTES_BIG_ENDIAN)
15094 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15095 gen_rtx_IOR (SImode,
15096 gen_rtx_ASHIFT
15097 (SImode,
15098 gen_rtx_SUBREG (SImode, operands[0], 0),
15099 GEN_INT (8)),
15100 scratch));
15101 else
15102 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15103 gen_rtx_IOR (SImode,
15104 gen_rtx_ASHIFT (SImode, scratch,
15105 GEN_INT (8)),
15106 gen_rtx_SUBREG (SImode, operands[0], 0)));
15107 }
15108
15109 /* Handle storing a half-word to memory during reload by synthesizing as two
15110 byte stores. Take care not to clobber the input values until after we
15111 have moved them somewhere safe. This code assumes that if the DImode
15112 scratch in operands[2] overlaps either the input value or output address
15113 in some way, then that value must die in this insn (we absolutely need
15114 two scratch registers for some corner cases). */
15115 void
15116 arm_reload_out_hi (rtx *operands)
15117 {
15118 rtx ref = operands[0];
15119 rtx outval = operands[1];
15120 rtx base, scratch;
15121 HOST_WIDE_INT offset = 0;
15122
15123 if (GET_CODE (ref) == SUBREG)
15124 {
15125 offset = SUBREG_BYTE (ref);
15126 ref = SUBREG_REG (ref);
15127 }
15128
15129 if (REG_P (ref))
15130 {
15131 /* We have a pseudo which has been spilt onto the stack; there
15132 are two cases here: the first where there is a simple
15133 stack-slot replacement and a second where the stack-slot is
15134 out of range, or is used as a subreg. */
15135 if (reg_equiv_mem (REGNO (ref)))
15136 {
15137 ref = reg_equiv_mem (REGNO (ref));
15138 base = find_replacement (&XEXP (ref, 0));
15139 }
15140 else
15141 /* The slot is out of range, or was dressed up in a SUBREG. */
15142 base = reg_equiv_address (REGNO (ref));
15143
15144 /* PR 62254: If there is no equivalent memory location then just move
15145 the value as an SImode register move. This happens when the target
15146 architecture variant does not have an HImode register move. */
15147 if (base == NULL)
15148 {
15149 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15150
15151 if (REG_P (outval))
15152 {
15153 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15154 gen_rtx_SUBREG (SImode, outval, 0)));
15155 }
15156 else /* SUBREG_P (outval) */
15157 {
15158 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15159 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15160 SUBREG_REG (outval)));
15161 else
15162 /* FIXME: Handle other cases ? */
15163 gcc_unreachable ();
15164 }
15165 return;
15166 }
15167 }
15168 else
15169 base = find_replacement (&XEXP (ref, 0));
15170
15171 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15172
15173 /* Handle the case where the address is too complex to be offset by 1. */
15174 if (GET_CODE (base) == MINUS
15175 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15176 {
15177 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15178
15179 /* Be careful not to destroy OUTVAL. */
15180 if (reg_overlap_mentioned_p (base_plus, outval))
15181 {
15182 /* Updating base_plus might destroy outval, see if we can
15183 swap the scratch and base_plus. */
15184 if (!reg_overlap_mentioned_p (scratch, outval))
15185 std::swap (scratch, base_plus);
15186 else
15187 {
15188 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15189
15190 /* Be conservative and copy OUTVAL into the scratch now,
15191 this should only be necessary if outval is a subreg
15192 of something larger than a word. */
15193 /* XXX Might this clobber base? I can't see how it can,
15194 since scratch is known to overlap with OUTVAL, and
15195 must be wider than a word. */
15196 emit_insn (gen_movhi (scratch_hi, outval));
15197 outval = scratch_hi;
15198 }
15199 }
15200
15201 emit_set_insn (base_plus, base);
15202 base = base_plus;
15203 }
15204 else if (GET_CODE (base) == PLUS)
15205 {
15206 /* The addend must be CONST_INT, or we would have dealt with it above. */
15207 HOST_WIDE_INT hi, lo;
15208
15209 offset += INTVAL (XEXP (base, 1));
15210 base = XEXP (base, 0);
15211
15212 /* Rework the address into a legal sequence of insns. */
15213 /* Valid range for lo is -4095 -> 4095 */
15214 lo = (offset >= 0
15215 ? (offset & 0xfff)
15216 : -((-offset) & 0xfff));
15217
15218 /* Corner case, if lo is the max offset then we would be out of range
15219 once we have added the additional 1 below, so bump the msb into the
15220 pre-loading insn(s). */
15221 if (lo == 4095)
15222 lo &= 0x7ff;
15223
15224 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15225 ^ (HOST_WIDE_INT) 0x80000000)
15226 - (HOST_WIDE_INT) 0x80000000);
15227
15228 gcc_assert (hi + lo == offset);
15229
15230 if (hi != 0)
15231 {
15232 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15233
15234 /* Be careful not to destroy OUTVAL. */
15235 if (reg_overlap_mentioned_p (base_plus, outval))
15236 {
15237 /* Updating base_plus might destroy outval, see if we
15238 can swap the scratch and base_plus. */
15239 if (!reg_overlap_mentioned_p (scratch, outval))
15240 std::swap (scratch, base_plus);
15241 else
15242 {
15243 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15244
15245 /* Be conservative and copy outval into scratch now,
15246 this should only be necessary if outval is a
15247 subreg of something larger than a word. */
15248 /* XXX Might this clobber base? I can't see how it
15249 can, since scratch is known to overlap with
15250 outval. */
15251 emit_insn (gen_movhi (scratch_hi, outval));
15252 outval = scratch_hi;
15253 }
15254 }
15255
15256 /* Get the base address; addsi3 knows how to handle constants
15257 that require more than one insn. */
15258 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15259 base = base_plus;
15260 offset = lo;
15261 }
15262 }
15263
15264 if (BYTES_BIG_ENDIAN)
15265 {
15266 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15267 plus_constant (Pmode, base,
15268 offset + 1)),
15269 gen_lowpart (QImode, outval)));
15270 emit_insn (gen_lshrsi3 (scratch,
15271 gen_rtx_SUBREG (SImode, outval, 0),
15272 GEN_INT (8)));
15273 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15274 offset)),
15275 gen_lowpart (QImode, scratch)));
15276 }
15277 else
15278 {
15279 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15280 offset)),
15281 gen_lowpart (QImode, outval)));
15282 emit_insn (gen_lshrsi3 (scratch,
15283 gen_rtx_SUBREG (SImode, outval, 0),
15284 GEN_INT (8)));
15285 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15286 plus_constant (Pmode, base,
15287 offset + 1)),
15288 gen_lowpart (QImode, scratch)));
15289 }
15290 }
15291
15292 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15293 (padded to the size of a word) should be passed in a register. */
15294
15295 static bool
15296 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15297 {
15298 if (TARGET_AAPCS_BASED)
15299 return must_pass_in_stack_var_size (mode, type);
15300 else
15301 return must_pass_in_stack_var_size_or_pad (mode, type);
15302 }
15303
15304
15305 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15306 byte of a stack argument has useful data. For legacy APCS ABIs we use
15307 the default. For AAPCS based ABIs small aggregate types are placed
15308 in the lowest memory address. */
15309
15310 static pad_direction
15311 arm_function_arg_padding (machine_mode mode, const_tree type)
15312 {
15313 if (!TARGET_AAPCS_BASED)
15314 return default_function_arg_padding (mode, type);
15315
15316 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15317 return PAD_DOWNWARD;
15318
15319 return PAD_UPWARD;
15320 }
15321
15322
15323 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15324 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15325 register has useful data, and return the opposite if the most
15326 significant byte does. */
15327
15328 bool
15329 arm_pad_reg_upward (machine_mode mode,
15330 tree type, int first ATTRIBUTE_UNUSED)
15331 {
15332 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15333 {
15334 /* For AAPCS, small aggregates, small fixed-point types,
15335 and small complex types are always padded upwards. */
15336 if (type)
15337 {
15338 if ((AGGREGATE_TYPE_P (type)
15339 || TREE_CODE (type) == COMPLEX_TYPE
15340 || FIXED_POINT_TYPE_P (type))
15341 && int_size_in_bytes (type) <= 4)
15342 return true;
15343 }
15344 else
15345 {
15346 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15347 && GET_MODE_SIZE (mode) <= 4)
15348 return true;
15349 }
15350 }
15351
15352 /* Otherwise, use default padding. */
15353 return !BYTES_BIG_ENDIAN;
15354 }
15355
15356 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15357 assuming that the address in the base register is word aligned. */
15358 bool
15359 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15360 {
15361 HOST_WIDE_INT max_offset;
15362
15363 /* Offset must be a multiple of 4 in Thumb mode. */
15364 if (TARGET_THUMB2 && ((offset & 3) != 0))
15365 return false;
15366
15367 if (TARGET_THUMB2)
15368 max_offset = 1020;
15369 else if (TARGET_ARM)
15370 max_offset = 255;
15371 else
15372 return false;
15373
15374 return ((offset <= max_offset) && (offset >= -max_offset));
15375 }
15376
15377 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15378 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15379 Assumes that the address in the base register RN is word aligned. Pattern
15380 guarantees that both memory accesses use the same base register,
15381 the offsets are constants within the range, and the gap between the offsets is 4.
15382 If preload complete then check that registers are legal. WBACK indicates whether
15383 address is updated. LOAD indicates whether memory access is load or store. */
15384 bool
15385 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15386 bool wback, bool load)
15387 {
15388 unsigned int t, t2, n;
15389
15390 if (!reload_completed)
15391 return true;
15392
15393 if (!offset_ok_for_ldrd_strd (offset))
15394 return false;
15395
15396 t = REGNO (rt);
15397 t2 = REGNO (rt2);
15398 n = REGNO (rn);
15399
15400 if ((TARGET_THUMB2)
15401 && ((wback && (n == t || n == t2))
15402 || (t == SP_REGNUM)
15403 || (t == PC_REGNUM)
15404 || (t2 == SP_REGNUM)
15405 || (t2 == PC_REGNUM)
15406 || (!load && (n == PC_REGNUM))
15407 || (load && (t == t2))
15408 /* Triggers Cortex-M3 LDRD errata. */
15409 || (!wback && load && fix_cm3_ldrd && (n == t))))
15410 return false;
15411
15412 if ((TARGET_ARM)
15413 && ((wback && (n == t || n == t2))
15414 || (t2 == PC_REGNUM)
15415 || (t % 2 != 0) /* First destination register is not even. */
15416 || (t2 != t + 1)
15417 /* PC can be used as base register (for offset addressing only),
15418 but it is depricated. */
15419 || (n == PC_REGNUM)))
15420 return false;
15421
15422 return true;
15423 }
15424
15425 /* Return true if a 64-bit access with alignment ALIGN and with a
15426 constant offset OFFSET from the base pointer is permitted on this
15427 architecture. */
15428 static bool
15429 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15430 {
15431 return (unaligned_access
15432 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15433 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15434 }
15435
15436 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15437 operand MEM's address contains an immediate offset from the base
15438 register and has no side effects, in which case it sets BASE,
15439 OFFSET and ALIGN accordingly. */
15440 static bool
15441 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15442 {
15443 rtx addr;
15444
15445 gcc_assert (base != NULL && offset != NULL);
15446
15447 /* TODO: Handle more general memory operand patterns, such as
15448 PRE_DEC and PRE_INC. */
15449
15450 if (side_effects_p (mem))
15451 return false;
15452
15453 /* Can't deal with subregs. */
15454 if (GET_CODE (mem) == SUBREG)
15455 return false;
15456
15457 gcc_assert (MEM_P (mem));
15458
15459 *offset = const0_rtx;
15460 *align = MEM_ALIGN (mem);
15461
15462 addr = XEXP (mem, 0);
15463
15464 /* If addr isn't valid for DImode, then we can't handle it. */
15465 if (!arm_legitimate_address_p (DImode, addr,
15466 reload_in_progress || reload_completed))
15467 return false;
15468
15469 if (REG_P (addr))
15470 {
15471 *base = addr;
15472 return true;
15473 }
15474 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15475 {
15476 *base = XEXP (addr, 0);
15477 *offset = XEXP (addr, 1);
15478 return (REG_P (*base) && CONST_INT_P (*offset));
15479 }
15480
15481 return false;
15482 }
15483
15484 /* Called from a peephole2 to replace two word-size accesses with a
15485 single LDRD/STRD instruction. Returns true iff we can generate a
15486 new instruction sequence. That is, both accesses use the same base
15487 register and the gap between constant offsets is 4. This function
15488 may reorder its operands to match ldrd/strd RTL templates.
15489 OPERANDS are the operands found by the peephole matcher;
15490 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15491 corresponding memory operands. LOAD indicaates whether the access
15492 is load or store. CONST_STORE indicates a store of constant
15493 integer values held in OPERANDS[4,5] and assumes that the pattern
15494 is of length 4 insn, for the purpose of checking dead registers.
15495 COMMUTE indicates that register operands may be reordered. */
15496 bool
15497 gen_operands_ldrd_strd (rtx *operands, bool load,
15498 bool const_store, bool commute)
15499 {
15500 int nops = 2;
15501 HOST_WIDE_INT offsets[2], offset, align[2];
15502 rtx base = NULL_RTX;
15503 rtx cur_base, cur_offset, tmp;
15504 int i, gap;
15505 HARD_REG_SET regset;
15506
15507 gcc_assert (!const_store || !load);
15508 /* Check that the memory references are immediate offsets from the
15509 same base register. Extract the base register, the destination
15510 registers, and the corresponding memory offsets. */
15511 for (i = 0; i < nops; i++)
15512 {
15513 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15514 &align[i]))
15515 return false;
15516
15517 if (i == 0)
15518 base = cur_base;
15519 else if (REGNO (base) != REGNO (cur_base))
15520 return false;
15521
15522 offsets[i] = INTVAL (cur_offset);
15523 if (GET_CODE (operands[i]) == SUBREG)
15524 {
15525 tmp = SUBREG_REG (operands[i]);
15526 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15527 operands[i] = tmp;
15528 }
15529 }
15530
15531 /* Make sure there is no dependency between the individual loads. */
15532 if (load && REGNO (operands[0]) == REGNO (base))
15533 return false; /* RAW */
15534
15535 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15536 return false; /* WAW */
15537
15538 /* If the same input register is used in both stores
15539 when storing different constants, try to find a free register.
15540 For example, the code
15541 mov r0, 0
15542 str r0, [r2]
15543 mov r0, 1
15544 str r0, [r2, #4]
15545 can be transformed into
15546 mov r1, 0
15547 mov r0, 1
15548 strd r1, r0, [r2]
15549 in Thumb mode assuming that r1 is free.
15550 For ARM mode do the same but only if the starting register
15551 can be made to be even. */
15552 if (const_store
15553 && REGNO (operands[0]) == REGNO (operands[1])
15554 && INTVAL (operands[4]) != INTVAL (operands[5]))
15555 {
15556 if (TARGET_THUMB2)
15557 {
15558 CLEAR_HARD_REG_SET (regset);
15559 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15560 if (tmp == NULL_RTX)
15561 return false;
15562
15563 /* Use the new register in the first load to ensure that
15564 if the original input register is not dead after peephole,
15565 then it will have the correct constant value. */
15566 operands[0] = tmp;
15567 }
15568 else if (TARGET_ARM)
15569 {
15570 int regno = REGNO (operands[0]);
15571 if (!peep2_reg_dead_p (4, operands[0]))
15572 {
15573 /* When the input register is even and is not dead after the
15574 pattern, it has to hold the second constant but we cannot
15575 form a legal STRD in ARM mode with this register as the second
15576 register. */
15577 if (regno % 2 == 0)
15578 return false;
15579
15580 /* Is regno-1 free? */
15581 SET_HARD_REG_SET (regset);
15582 CLEAR_HARD_REG_BIT(regset, regno - 1);
15583 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15584 if (tmp == NULL_RTX)
15585 return false;
15586
15587 operands[0] = tmp;
15588 }
15589 else
15590 {
15591 /* Find a DImode register. */
15592 CLEAR_HARD_REG_SET (regset);
15593 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15594 if (tmp != NULL_RTX)
15595 {
15596 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15597 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15598 }
15599 else
15600 {
15601 /* Can we use the input register to form a DI register? */
15602 SET_HARD_REG_SET (regset);
15603 CLEAR_HARD_REG_BIT(regset,
15604 regno % 2 == 0 ? regno + 1 : regno - 1);
15605 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15606 if (tmp == NULL_RTX)
15607 return false;
15608 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15609 }
15610 }
15611
15612 gcc_assert (operands[0] != NULL_RTX);
15613 gcc_assert (operands[1] != NULL_RTX);
15614 gcc_assert (REGNO (operands[0]) % 2 == 0);
15615 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15616 }
15617 }
15618
15619 /* Make sure the instructions are ordered with lower memory access first. */
15620 if (offsets[0] > offsets[1])
15621 {
15622 gap = offsets[0] - offsets[1];
15623 offset = offsets[1];
15624
15625 /* Swap the instructions such that lower memory is accessed first. */
15626 std::swap (operands[0], operands[1]);
15627 std::swap (operands[2], operands[3]);
15628 std::swap (align[0], align[1]);
15629 if (const_store)
15630 std::swap (operands[4], operands[5]);
15631 }
15632 else
15633 {
15634 gap = offsets[1] - offsets[0];
15635 offset = offsets[0];
15636 }
15637
15638 /* Make sure accesses are to consecutive memory locations. */
15639 if (gap != 4)
15640 return false;
15641
15642 if (!align_ok_ldrd_strd (align[0], offset))
15643 return false;
15644
15645 /* Make sure we generate legal instructions. */
15646 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15647 false, load))
15648 return true;
15649
15650 /* In Thumb state, where registers are almost unconstrained, there
15651 is little hope to fix it. */
15652 if (TARGET_THUMB2)
15653 return false;
15654
15655 if (load && commute)
15656 {
15657 /* Try reordering registers. */
15658 std::swap (operands[0], operands[1]);
15659 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15660 false, load))
15661 return true;
15662 }
15663
15664 if (const_store)
15665 {
15666 /* If input registers are dead after this pattern, they can be
15667 reordered or replaced by other registers that are free in the
15668 current pattern. */
15669 if (!peep2_reg_dead_p (4, operands[0])
15670 || !peep2_reg_dead_p (4, operands[1]))
15671 return false;
15672
15673 /* Try to reorder the input registers. */
15674 /* For example, the code
15675 mov r0, 0
15676 mov r1, 1
15677 str r1, [r2]
15678 str r0, [r2, #4]
15679 can be transformed into
15680 mov r1, 0
15681 mov r0, 1
15682 strd r0, [r2]
15683 */
15684 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15685 false, false))
15686 {
15687 std::swap (operands[0], operands[1]);
15688 return true;
15689 }
15690
15691 /* Try to find a free DI register. */
15692 CLEAR_HARD_REG_SET (regset);
15693 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15694 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15695 while (true)
15696 {
15697 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15698 if (tmp == NULL_RTX)
15699 return false;
15700
15701 /* DREG must be an even-numbered register in DImode.
15702 Split it into SI registers. */
15703 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15704 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15705 gcc_assert (operands[0] != NULL_RTX);
15706 gcc_assert (operands[1] != NULL_RTX);
15707 gcc_assert (REGNO (operands[0]) % 2 == 0);
15708 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15709
15710 return (operands_ok_ldrd_strd (operands[0], operands[1],
15711 base, offset,
15712 false, load));
15713 }
15714 }
15715
15716 return false;
15717 }
15718
15719
15720
15721 \f
15722 /* Print a symbolic form of X to the debug file, F. */
15723 static void
15724 arm_print_value (FILE *f, rtx x)
15725 {
15726 switch (GET_CODE (x))
15727 {
15728 case CONST_INT:
15729 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15730 return;
15731
15732 case CONST_DOUBLE:
15733 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15734 return;
15735
15736 case CONST_VECTOR:
15737 {
15738 int i;
15739
15740 fprintf (f, "<");
15741 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15742 {
15743 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15744 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15745 fputc (',', f);
15746 }
15747 fprintf (f, ">");
15748 }
15749 return;
15750
15751 case CONST_STRING:
15752 fprintf (f, "\"%s\"", XSTR (x, 0));
15753 return;
15754
15755 case SYMBOL_REF:
15756 fprintf (f, "`%s'", XSTR (x, 0));
15757 return;
15758
15759 case LABEL_REF:
15760 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15761 return;
15762
15763 case CONST:
15764 arm_print_value (f, XEXP (x, 0));
15765 return;
15766
15767 case PLUS:
15768 arm_print_value (f, XEXP (x, 0));
15769 fprintf (f, "+");
15770 arm_print_value (f, XEXP (x, 1));
15771 return;
15772
15773 case PC:
15774 fprintf (f, "pc");
15775 return;
15776
15777 default:
15778 fprintf (f, "????");
15779 return;
15780 }
15781 }
15782 \f
15783 /* Routines for manipulation of the constant pool. */
15784
15785 /* Arm instructions cannot load a large constant directly into a
15786 register; they have to come from a pc relative load. The constant
15787 must therefore be placed in the addressable range of the pc
15788 relative load. Depending on the precise pc relative load
15789 instruction the range is somewhere between 256 bytes and 4k. This
15790 means that we often have to dump a constant inside a function, and
15791 generate code to branch around it.
15792
15793 It is important to minimize this, since the branches will slow
15794 things down and make the code larger.
15795
15796 Normally we can hide the table after an existing unconditional
15797 branch so that there is no interruption of the flow, but in the
15798 worst case the code looks like this:
15799
15800 ldr rn, L1
15801 ...
15802 b L2
15803 align
15804 L1: .long value
15805 L2:
15806 ...
15807
15808 ldr rn, L3
15809 ...
15810 b L4
15811 align
15812 L3: .long value
15813 L4:
15814 ...
15815
15816 We fix this by performing a scan after scheduling, which notices
15817 which instructions need to have their operands fetched from the
15818 constant table and builds the table.
15819
15820 The algorithm starts by building a table of all the constants that
15821 need fixing up and all the natural barriers in the function (places
15822 where a constant table can be dropped without breaking the flow).
15823 For each fixup we note how far the pc-relative replacement will be
15824 able to reach and the offset of the instruction into the function.
15825
15826 Having built the table we then group the fixes together to form
15827 tables that are as large as possible (subject to addressing
15828 constraints) and emit each table of constants after the last
15829 barrier that is within range of all the instructions in the group.
15830 If a group does not contain a barrier, then we forcibly create one
15831 by inserting a jump instruction into the flow. Once the table has
15832 been inserted, the insns are then modified to reference the
15833 relevant entry in the pool.
15834
15835 Possible enhancements to the algorithm (not implemented) are:
15836
15837 1) For some processors and object formats, there may be benefit in
15838 aligning the pools to the start of cache lines; this alignment
15839 would need to be taken into account when calculating addressability
15840 of a pool. */
15841
15842 /* These typedefs are located at the start of this file, so that
15843 they can be used in the prototypes there. This comment is to
15844 remind readers of that fact so that the following structures
15845 can be understood more easily.
15846
15847 typedef struct minipool_node Mnode;
15848 typedef struct minipool_fixup Mfix; */
15849
15850 struct minipool_node
15851 {
15852 /* Doubly linked chain of entries. */
15853 Mnode * next;
15854 Mnode * prev;
15855 /* The maximum offset into the code that this entry can be placed. While
15856 pushing fixes for forward references, all entries are sorted in order
15857 of increasing max_address. */
15858 HOST_WIDE_INT max_address;
15859 /* Similarly for an entry inserted for a backwards ref. */
15860 HOST_WIDE_INT min_address;
15861 /* The number of fixes referencing this entry. This can become zero
15862 if we "unpush" an entry. In this case we ignore the entry when we
15863 come to emit the code. */
15864 int refcount;
15865 /* The offset from the start of the minipool. */
15866 HOST_WIDE_INT offset;
15867 /* The value in table. */
15868 rtx value;
15869 /* The mode of value. */
15870 machine_mode mode;
15871 /* The size of the value. With iWMMXt enabled
15872 sizes > 4 also imply an alignment of 8-bytes. */
15873 int fix_size;
15874 };
15875
15876 struct minipool_fixup
15877 {
15878 Mfix * next;
15879 rtx_insn * insn;
15880 HOST_WIDE_INT address;
15881 rtx * loc;
15882 machine_mode mode;
15883 int fix_size;
15884 rtx value;
15885 Mnode * minipool;
15886 HOST_WIDE_INT forwards;
15887 HOST_WIDE_INT backwards;
15888 };
15889
15890 /* Fixes less than a word need padding out to a word boundary. */
15891 #define MINIPOOL_FIX_SIZE(mode) \
15892 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15893
15894 static Mnode * minipool_vector_head;
15895 static Mnode * minipool_vector_tail;
15896 static rtx_code_label *minipool_vector_label;
15897 static int minipool_pad;
15898
15899 /* The linked list of all minipool fixes required for this function. */
15900 Mfix * minipool_fix_head;
15901 Mfix * minipool_fix_tail;
15902 /* The fix entry for the current minipool, once it has been placed. */
15903 Mfix * minipool_barrier;
15904
15905 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15906 #define JUMP_TABLES_IN_TEXT_SECTION 0
15907 #endif
15908
15909 static HOST_WIDE_INT
15910 get_jump_table_size (rtx_jump_table_data *insn)
15911 {
15912 /* ADDR_VECs only take room if read-only data does into the text
15913 section. */
15914 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15915 {
15916 rtx body = PATTERN (insn);
15917 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15918 HOST_WIDE_INT size;
15919 HOST_WIDE_INT modesize;
15920
15921 modesize = GET_MODE_SIZE (GET_MODE (body));
15922 size = modesize * XVECLEN (body, elt);
15923 switch (modesize)
15924 {
15925 case 1:
15926 /* Round up size of TBB table to a halfword boundary. */
15927 size = (size + 1) & ~HOST_WIDE_INT_1;
15928 break;
15929 case 2:
15930 /* No padding necessary for TBH. */
15931 break;
15932 case 4:
15933 /* Add two bytes for alignment on Thumb. */
15934 if (TARGET_THUMB)
15935 size += 2;
15936 break;
15937 default:
15938 gcc_unreachable ();
15939 }
15940 return size;
15941 }
15942
15943 return 0;
15944 }
15945
15946 /* Return the maximum amount of padding that will be inserted before
15947 label LABEL. */
15948
15949 static HOST_WIDE_INT
15950 get_label_padding (rtx label)
15951 {
15952 HOST_WIDE_INT align, min_insn_size;
15953
15954 align = 1 << label_to_alignment (label);
15955 min_insn_size = TARGET_THUMB ? 2 : 4;
15956 return align > min_insn_size ? align - min_insn_size : 0;
15957 }
15958
15959 /* Move a minipool fix MP from its current location to before MAX_MP.
15960 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15961 constraints may need updating. */
15962 static Mnode *
15963 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15964 HOST_WIDE_INT max_address)
15965 {
15966 /* The code below assumes these are different. */
15967 gcc_assert (mp != max_mp);
15968
15969 if (max_mp == NULL)
15970 {
15971 if (max_address < mp->max_address)
15972 mp->max_address = max_address;
15973 }
15974 else
15975 {
15976 if (max_address > max_mp->max_address - mp->fix_size)
15977 mp->max_address = max_mp->max_address - mp->fix_size;
15978 else
15979 mp->max_address = max_address;
15980
15981 /* Unlink MP from its current position. Since max_mp is non-null,
15982 mp->prev must be non-null. */
15983 mp->prev->next = mp->next;
15984 if (mp->next != NULL)
15985 mp->next->prev = mp->prev;
15986 else
15987 minipool_vector_tail = mp->prev;
15988
15989 /* Re-insert it before MAX_MP. */
15990 mp->next = max_mp;
15991 mp->prev = max_mp->prev;
15992 max_mp->prev = mp;
15993
15994 if (mp->prev != NULL)
15995 mp->prev->next = mp;
15996 else
15997 minipool_vector_head = mp;
15998 }
15999
16000 /* Save the new entry. */
16001 max_mp = mp;
16002
16003 /* Scan over the preceding entries and adjust their addresses as
16004 required. */
16005 while (mp->prev != NULL
16006 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16007 {
16008 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16009 mp = mp->prev;
16010 }
16011
16012 return max_mp;
16013 }
16014
16015 /* Add a constant to the minipool for a forward reference. Returns the
16016 node added or NULL if the constant will not fit in this pool. */
16017 static Mnode *
16018 add_minipool_forward_ref (Mfix *fix)
16019 {
16020 /* If set, max_mp is the first pool_entry that has a lower
16021 constraint than the one we are trying to add. */
16022 Mnode * max_mp = NULL;
16023 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16024 Mnode * mp;
16025
16026 /* If the minipool starts before the end of FIX->INSN then this FIX
16027 can not be placed into the current pool. Furthermore, adding the
16028 new constant pool entry may cause the pool to start FIX_SIZE bytes
16029 earlier. */
16030 if (minipool_vector_head &&
16031 (fix->address + get_attr_length (fix->insn)
16032 >= minipool_vector_head->max_address - fix->fix_size))
16033 return NULL;
16034
16035 /* Scan the pool to see if a constant with the same value has
16036 already been added. While we are doing this, also note the
16037 location where we must insert the constant if it doesn't already
16038 exist. */
16039 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16040 {
16041 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16042 && fix->mode == mp->mode
16043 && (!LABEL_P (fix->value)
16044 || (CODE_LABEL_NUMBER (fix->value)
16045 == CODE_LABEL_NUMBER (mp->value)))
16046 && rtx_equal_p (fix->value, mp->value))
16047 {
16048 /* More than one fix references this entry. */
16049 mp->refcount++;
16050 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16051 }
16052
16053 /* Note the insertion point if necessary. */
16054 if (max_mp == NULL
16055 && mp->max_address > max_address)
16056 max_mp = mp;
16057
16058 /* If we are inserting an 8-bytes aligned quantity and
16059 we have not already found an insertion point, then
16060 make sure that all such 8-byte aligned quantities are
16061 placed at the start of the pool. */
16062 if (ARM_DOUBLEWORD_ALIGN
16063 && max_mp == NULL
16064 && fix->fix_size >= 8
16065 && mp->fix_size < 8)
16066 {
16067 max_mp = mp;
16068 max_address = mp->max_address;
16069 }
16070 }
16071
16072 /* The value is not currently in the minipool, so we need to create
16073 a new entry for it. If MAX_MP is NULL, the entry will be put on
16074 the end of the list since the placement is less constrained than
16075 any existing entry. Otherwise, we insert the new fix before
16076 MAX_MP and, if necessary, adjust the constraints on the other
16077 entries. */
16078 mp = XNEW (Mnode);
16079 mp->fix_size = fix->fix_size;
16080 mp->mode = fix->mode;
16081 mp->value = fix->value;
16082 mp->refcount = 1;
16083 /* Not yet required for a backwards ref. */
16084 mp->min_address = -65536;
16085
16086 if (max_mp == NULL)
16087 {
16088 mp->max_address = max_address;
16089 mp->next = NULL;
16090 mp->prev = minipool_vector_tail;
16091
16092 if (mp->prev == NULL)
16093 {
16094 minipool_vector_head = mp;
16095 minipool_vector_label = gen_label_rtx ();
16096 }
16097 else
16098 mp->prev->next = mp;
16099
16100 minipool_vector_tail = mp;
16101 }
16102 else
16103 {
16104 if (max_address > max_mp->max_address - mp->fix_size)
16105 mp->max_address = max_mp->max_address - mp->fix_size;
16106 else
16107 mp->max_address = max_address;
16108
16109 mp->next = max_mp;
16110 mp->prev = max_mp->prev;
16111 max_mp->prev = mp;
16112 if (mp->prev != NULL)
16113 mp->prev->next = mp;
16114 else
16115 minipool_vector_head = mp;
16116 }
16117
16118 /* Save the new entry. */
16119 max_mp = mp;
16120
16121 /* Scan over the preceding entries and adjust their addresses as
16122 required. */
16123 while (mp->prev != NULL
16124 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16125 {
16126 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16127 mp = mp->prev;
16128 }
16129
16130 return max_mp;
16131 }
16132
16133 static Mnode *
16134 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16135 HOST_WIDE_INT min_address)
16136 {
16137 HOST_WIDE_INT offset;
16138
16139 /* The code below assumes these are different. */
16140 gcc_assert (mp != min_mp);
16141
16142 if (min_mp == NULL)
16143 {
16144 if (min_address > mp->min_address)
16145 mp->min_address = min_address;
16146 }
16147 else
16148 {
16149 /* We will adjust this below if it is too loose. */
16150 mp->min_address = min_address;
16151
16152 /* Unlink MP from its current position. Since min_mp is non-null,
16153 mp->next must be non-null. */
16154 mp->next->prev = mp->prev;
16155 if (mp->prev != NULL)
16156 mp->prev->next = mp->next;
16157 else
16158 minipool_vector_head = mp->next;
16159
16160 /* Reinsert it after MIN_MP. */
16161 mp->prev = min_mp;
16162 mp->next = min_mp->next;
16163 min_mp->next = mp;
16164 if (mp->next != NULL)
16165 mp->next->prev = mp;
16166 else
16167 minipool_vector_tail = mp;
16168 }
16169
16170 min_mp = mp;
16171
16172 offset = 0;
16173 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16174 {
16175 mp->offset = offset;
16176 if (mp->refcount > 0)
16177 offset += mp->fix_size;
16178
16179 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16180 mp->next->min_address = mp->min_address + mp->fix_size;
16181 }
16182
16183 return min_mp;
16184 }
16185
16186 /* Add a constant to the minipool for a backward reference. Returns the
16187 node added or NULL if the constant will not fit in this pool.
16188
16189 Note that the code for insertion for a backwards reference can be
16190 somewhat confusing because the calculated offsets for each fix do
16191 not take into account the size of the pool (which is still under
16192 construction. */
16193 static Mnode *
16194 add_minipool_backward_ref (Mfix *fix)
16195 {
16196 /* If set, min_mp is the last pool_entry that has a lower constraint
16197 than the one we are trying to add. */
16198 Mnode *min_mp = NULL;
16199 /* This can be negative, since it is only a constraint. */
16200 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16201 Mnode *mp;
16202
16203 /* If we can't reach the current pool from this insn, or if we can't
16204 insert this entry at the end of the pool without pushing other
16205 fixes out of range, then we don't try. This ensures that we
16206 can't fail later on. */
16207 if (min_address >= minipool_barrier->address
16208 || (minipool_vector_tail->min_address + fix->fix_size
16209 >= minipool_barrier->address))
16210 return NULL;
16211
16212 /* Scan the pool to see if a constant with the same value has
16213 already been added. While we are doing this, also note the
16214 location where we must insert the constant if it doesn't already
16215 exist. */
16216 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16217 {
16218 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16219 && fix->mode == mp->mode
16220 && (!LABEL_P (fix->value)
16221 || (CODE_LABEL_NUMBER (fix->value)
16222 == CODE_LABEL_NUMBER (mp->value)))
16223 && rtx_equal_p (fix->value, mp->value)
16224 /* Check that there is enough slack to move this entry to the
16225 end of the table (this is conservative). */
16226 && (mp->max_address
16227 > (minipool_barrier->address
16228 + minipool_vector_tail->offset
16229 + minipool_vector_tail->fix_size)))
16230 {
16231 mp->refcount++;
16232 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16233 }
16234
16235 if (min_mp != NULL)
16236 mp->min_address += fix->fix_size;
16237 else
16238 {
16239 /* Note the insertion point if necessary. */
16240 if (mp->min_address < min_address)
16241 {
16242 /* For now, we do not allow the insertion of 8-byte alignment
16243 requiring nodes anywhere but at the start of the pool. */
16244 if (ARM_DOUBLEWORD_ALIGN
16245 && fix->fix_size >= 8 && mp->fix_size < 8)
16246 return NULL;
16247 else
16248 min_mp = mp;
16249 }
16250 else if (mp->max_address
16251 < minipool_barrier->address + mp->offset + fix->fix_size)
16252 {
16253 /* Inserting before this entry would push the fix beyond
16254 its maximum address (which can happen if we have
16255 re-located a forwards fix); force the new fix to come
16256 after it. */
16257 if (ARM_DOUBLEWORD_ALIGN
16258 && fix->fix_size >= 8 && mp->fix_size < 8)
16259 return NULL;
16260 else
16261 {
16262 min_mp = mp;
16263 min_address = mp->min_address + fix->fix_size;
16264 }
16265 }
16266 /* Do not insert a non-8-byte aligned quantity before 8-byte
16267 aligned quantities. */
16268 else if (ARM_DOUBLEWORD_ALIGN
16269 && fix->fix_size < 8
16270 && mp->fix_size >= 8)
16271 {
16272 min_mp = mp;
16273 min_address = mp->min_address + fix->fix_size;
16274 }
16275 }
16276 }
16277
16278 /* We need to create a new entry. */
16279 mp = XNEW (Mnode);
16280 mp->fix_size = fix->fix_size;
16281 mp->mode = fix->mode;
16282 mp->value = fix->value;
16283 mp->refcount = 1;
16284 mp->max_address = minipool_barrier->address + 65536;
16285
16286 mp->min_address = min_address;
16287
16288 if (min_mp == NULL)
16289 {
16290 mp->prev = NULL;
16291 mp->next = minipool_vector_head;
16292
16293 if (mp->next == NULL)
16294 {
16295 minipool_vector_tail = mp;
16296 minipool_vector_label = gen_label_rtx ();
16297 }
16298 else
16299 mp->next->prev = mp;
16300
16301 minipool_vector_head = mp;
16302 }
16303 else
16304 {
16305 mp->next = min_mp->next;
16306 mp->prev = min_mp;
16307 min_mp->next = mp;
16308
16309 if (mp->next != NULL)
16310 mp->next->prev = mp;
16311 else
16312 minipool_vector_tail = mp;
16313 }
16314
16315 /* Save the new entry. */
16316 min_mp = mp;
16317
16318 if (mp->prev)
16319 mp = mp->prev;
16320 else
16321 mp->offset = 0;
16322
16323 /* Scan over the following entries and adjust their offsets. */
16324 while (mp->next != NULL)
16325 {
16326 if (mp->next->min_address < mp->min_address + mp->fix_size)
16327 mp->next->min_address = mp->min_address + mp->fix_size;
16328
16329 if (mp->refcount)
16330 mp->next->offset = mp->offset + mp->fix_size;
16331 else
16332 mp->next->offset = mp->offset;
16333
16334 mp = mp->next;
16335 }
16336
16337 return min_mp;
16338 }
16339
16340 static void
16341 assign_minipool_offsets (Mfix *barrier)
16342 {
16343 HOST_WIDE_INT offset = 0;
16344 Mnode *mp;
16345
16346 minipool_barrier = barrier;
16347
16348 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16349 {
16350 mp->offset = offset;
16351
16352 if (mp->refcount > 0)
16353 offset += mp->fix_size;
16354 }
16355 }
16356
16357 /* Output the literal table */
16358 static void
16359 dump_minipool (rtx_insn *scan)
16360 {
16361 Mnode * mp;
16362 Mnode * nmp;
16363 int align64 = 0;
16364
16365 if (ARM_DOUBLEWORD_ALIGN)
16366 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16367 if (mp->refcount > 0 && mp->fix_size >= 8)
16368 {
16369 align64 = 1;
16370 break;
16371 }
16372
16373 if (dump_file)
16374 fprintf (dump_file,
16375 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16376 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16377
16378 scan = emit_label_after (gen_label_rtx (), scan);
16379 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16380 scan = emit_label_after (minipool_vector_label, scan);
16381
16382 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16383 {
16384 if (mp->refcount > 0)
16385 {
16386 if (dump_file)
16387 {
16388 fprintf (dump_file,
16389 ";; Offset %u, min %ld, max %ld ",
16390 (unsigned) mp->offset, (unsigned long) mp->min_address,
16391 (unsigned long) mp->max_address);
16392 arm_print_value (dump_file, mp->value);
16393 fputc ('\n', dump_file);
16394 }
16395
16396 rtx val = copy_rtx (mp->value);
16397
16398 switch (GET_MODE_SIZE (mp->mode))
16399 {
16400 #ifdef HAVE_consttable_1
16401 case 1:
16402 scan = emit_insn_after (gen_consttable_1 (val), scan);
16403 break;
16404
16405 #endif
16406 #ifdef HAVE_consttable_2
16407 case 2:
16408 scan = emit_insn_after (gen_consttable_2 (val), scan);
16409 break;
16410
16411 #endif
16412 #ifdef HAVE_consttable_4
16413 case 4:
16414 scan = emit_insn_after (gen_consttable_4 (val), scan);
16415 break;
16416
16417 #endif
16418 #ifdef HAVE_consttable_8
16419 case 8:
16420 scan = emit_insn_after (gen_consttable_8 (val), scan);
16421 break;
16422
16423 #endif
16424 #ifdef HAVE_consttable_16
16425 case 16:
16426 scan = emit_insn_after (gen_consttable_16 (val), scan);
16427 break;
16428
16429 #endif
16430 default:
16431 gcc_unreachable ();
16432 }
16433 }
16434
16435 nmp = mp->next;
16436 free (mp);
16437 }
16438
16439 minipool_vector_head = minipool_vector_tail = NULL;
16440 scan = emit_insn_after (gen_consttable_end (), scan);
16441 scan = emit_barrier_after (scan);
16442 }
16443
16444 /* Return the cost of forcibly inserting a barrier after INSN. */
16445 static int
16446 arm_barrier_cost (rtx_insn *insn)
16447 {
16448 /* Basing the location of the pool on the loop depth is preferable,
16449 but at the moment, the basic block information seems to be
16450 corrupt by this stage of the compilation. */
16451 int base_cost = 50;
16452 rtx_insn *next = next_nonnote_insn (insn);
16453
16454 if (next != NULL && LABEL_P (next))
16455 base_cost -= 20;
16456
16457 switch (GET_CODE (insn))
16458 {
16459 case CODE_LABEL:
16460 /* It will always be better to place the table before the label, rather
16461 than after it. */
16462 return 50;
16463
16464 case INSN:
16465 case CALL_INSN:
16466 return base_cost;
16467
16468 case JUMP_INSN:
16469 return base_cost - 10;
16470
16471 default:
16472 return base_cost + 10;
16473 }
16474 }
16475
16476 /* Find the best place in the insn stream in the range
16477 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16478 Create the barrier by inserting a jump and add a new fix entry for
16479 it. */
16480 static Mfix *
16481 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16482 {
16483 HOST_WIDE_INT count = 0;
16484 rtx_barrier *barrier;
16485 rtx_insn *from = fix->insn;
16486 /* The instruction after which we will insert the jump. */
16487 rtx_insn *selected = NULL;
16488 int selected_cost;
16489 /* The address at which the jump instruction will be placed. */
16490 HOST_WIDE_INT selected_address;
16491 Mfix * new_fix;
16492 HOST_WIDE_INT max_count = max_address - fix->address;
16493 rtx_code_label *label = gen_label_rtx ();
16494
16495 selected_cost = arm_barrier_cost (from);
16496 selected_address = fix->address;
16497
16498 while (from && count < max_count)
16499 {
16500 rtx_jump_table_data *tmp;
16501 int new_cost;
16502
16503 /* This code shouldn't have been called if there was a natural barrier
16504 within range. */
16505 gcc_assert (!BARRIER_P (from));
16506
16507 /* Count the length of this insn. This must stay in sync with the
16508 code that pushes minipool fixes. */
16509 if (LABEL_P (from))
16510 count += get_label_padding (from);
16511 else
16512 count += get_attr_length (from);
16513
16514 /* If there is a jump table, add its length. */
16515 if (tablejump_p (from, NULL, &tmp))
16516 {
16517 count += get_jump_table_size (tmp);
16518
16519 /* Jump tables aren't in a basic block, so base the cost on
16520 the dispatch insn. If we select this location, we will
16521 still put the pool after the table. */
16522 new_cost = arm_barrier_cost (from);
16523
16524 if (count < max_count
16525 && (!selected || new_cost <= selected_cost))
16526 {
16527 selected = tmp;
16528 selected_cost = new_cost;
16529 selected_address = fix->address + count;
16530 }
16531
16532 /* Continue after the dispatch table. */
16533 from = NEXT_INSN (tmp);
16534 continue;
16535 }
16536
16537 new_cost = arm_barrier_cost (from);
16538
16539 if (count < max_count
16540 && (!selected || new_cost <= selected_cost))
16541 {
16542 selected = from;
16543 selected_cost = new_cost;
16544 selected_address = fix->address + count;
16545 }
16546
16547 from = NEXT_INSN (from);
16548 }
16549
16550 /* Make sure that we found a place to insert the jump. */
16551 gcc_assert (selected);
16552
16553 /* Make sure we do not split a call and its corresponding
16554 CALL_ARG_LOCATION note. */
16555 if (CALL_P (selected))
16556 {
16557 rtx_insn *next = NEXT_INSN (selected);
16558 if (next && NOTE_P (next)
16559 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16560 selected = next;
16561 }
16562
16563 /* Create a new JUMP_INSN that branches around a barrier. */
16564 from = emit_jump_insn_after (gen_jump (label), selected);
16565 JUMP_LABEL (from) = label;
16566 barrier = emit_barrier_after (from);
16567 emit_label_after (label, barrier);
16568
16569 /* Create a minipool barrier entry for the new barrier. */
16570 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16571 new_fix->insn = barrier;
16572 new_fix->address = selected_address;
16573 new_fix->next = fix->next;
16574 fix->next = new_fix;
16575
16576 return new_fix;
16577 }
16578
16579 /* Record that there is a natural barrier in the insn stream at
16580 ADDRESS. */
16581 static void
16582 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16583 {
16584 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16585
16586 fix->insn = insn;
16587 fix->address = address;
16588
16589 fix->next = NULL;
16590 if (minipool_fix_head != NULL)
16591 minipool_fix_tail->next = fix;
16592 else
16593 minipool_fix_head = fix;
16594
16595 minipool_fix_tail = fix;
16596 }
16597
16598 /* Record INSN, which will need fixing up to load a value from the
16599 minipool. ADDRESS is the offset of the insn since the start of the
16600 function; LOC is a pointer to the part of the insn which requires
16601 fixing; VALUE is the constant that must be loaded, which is of type
16602 MODE. */
16603 static void
16604 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16605 machine_mode mode, rtx value)
16606 {
16607 gcc_assert (!arm_disable_literal_pool);
16608 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16609
16610 fix->insn = insn;
16611 fix->address = address;
16612 fix->loc = loc;
16613 fix->mode = mode;
16614 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16615 fix->value = value;
16616 fix->forwards = get_attr_pool_range (insn);
16617 fix->backwards = get_attr_neg_pool_range (insn);
16618 fix->minipool = NULL;
16619
16620 /* If an insn doesn't have a range defined for it, then it isn't
16621 expecting to be reworked by this code. Better to stop now than
16622 to generate duff assembly code. */
16623 gcc_assert (fix->forwards || fix->backwards);
16624
16625 /* If an entry requires 8-byte alignment then assume all constant pools
16626 require 4 bytes of padding. Trying to do this later on a per-pool
16627 basis is awkward because existing pool entries have to be modified. */
16628 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16629 minipool_pad = 4;
16630
16631 if (dump_file)
16632 {
16633 fprintf (dump_file,
16634 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16635 GET_MODE_NAME (mode),
16636 INSN_UID (insn), (unsigned long) address,
16637 -1 * (long)fix->backwards, (long)fix->forwards);
16638 arm_print_value (dump_file, fix->value);
16639 fprintf (dump_file, "\n");
16640 }
16641
16642 /* Add it to the chain of fixes. */
16643 fix->next = NULL;
16644
16645 if (minipool_fix_head != NULL)
16646 minipool_fix_tail->next = fix;
16647 else
16648 minipool_fix_head = fix;
16649
16650 minipool_fix_tail = fix;
16651 }
16652
16653 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16654 Returns the number of insns needed, or 99 if we always want to synthesize
16655 the value. */
16656 int
16657 arm_max_const_double_inline_cost ()
16658 {
16659 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16660 }
16661
16662 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16663 Returns the number of insns needed, or 99 if we don't know how to
16664 do it. */
16665 int
16666 arm_const_double_inline_cost (rtx val)
16667 {
16668 rtx lowpart, highpart;
16669 machine_mode mode;
16670
16671 mode = GET_MODE (val);
16672
16673 if (mode == VOIDmode)
16674 mode = DImode;
16675
16676 gcc_assert (GET_MODE_SIZE (mode) == 8);
16677
16678 lowpart = gen_lowpart (SImode, val);
16679 highpart = gen_highpart_mode (SImode, mode, val);
16680
16681 gcc_assert (CONST_INT_P (lowpart));
16682 gcc_assert (CONST_INT_P (highpart));
16683
16684 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16685 NULL_RTX, NULL_RTX, 0, 0)
16686 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16687 NULL_RTX, NULL_RTX, 0, 0));
16688 }
16689
16690 /* Cost of loading a SImode constant. */
16691 static inline int
16692 arm_const_inline_cost (enum rtx_code code, rtx val)
16693 {
16694 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16695 NULL_RTX, NULL_RTX, 1, 0);
16696 }
16697
16698 /* Return true if it is worthwhile to split a 64-bit constant into two
16699 32-bit operations. This is the case if optimizing for size, or
16700 if we have load delay slots, or if one 32-bit part can be done with
16701 a single data operation. */
16702 bool
16703 arm_const_double_by_parts (rtx val)
16704 {
16705 machine_mode mode = GET_MODE (val);
16706 rtx part;
16707
16708 if (optimize_size || arm_ld_sched)
16709 return true;
16710
16711 if (mode == VOIDmode)
16712 mode = DImode;
16713
16714 part = gen_highpart_mode (SImode, mode, val);
16715
16716 gcc_assert (CONST_INT_P (part));
16717
16718 if (const_ok_for_arm (INTVAL (part))
16719 || const_ok_for_arm (~INTVAL (part)))
16720 return true;
16721
16722 part = gen_lowpart (SImode, val);
16723
16724 gcc_assert (CONST_INT_P (part));
16725
16726 if (const_ok_for_arm (INTVAL (part))
16727 || const_ok_for_arm (~INTVAL (part)))
16728 return true;
16729
16730 return false;
16731 }
16732
16733 /* Return true if it is possible to inline both the high and low parts
16734 of a 64-bit constant into 32-bit data processing instructions. */
16735 bool
16736 arm_const_double_by_immediates (rtx val)
16737 {
16738 machine_mode mode = GET_MODE (val);
16739 rtx part;
16740
16741 if (mode == VOIDmode)
16742 mode = DImode;
16743
16744 part = gen_highpart_mode (SImode, mode, val);
16745
16746 gcc_assert (CONST_INT_P (part));
16747
16748 if (!const_ok_for_arm (INTVAL (part)))
16749 return false;
16750
16751 part = gen_lowpart (SImode, val);
16752
16753 gcc_assert (CONST_INT_P (part));
16754
16755 if (!const_ok_for_arm (INTVAL (part)))
16756 return false;
16757
16758 return true;
16759 }
16760
16761 /* Scan INSN and note any of its operands that need fixing.
16762 If DO_PUSHES is false we do not actually push any of the fixups
16763 needed. */
16764 static void
16765 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16766 {
16767 int opno;
16768
16769 extract_constrain_insn (insn);
16770
16771 if (recog_data.n_alternatives == 0)
16772 return;
16773
16774 /* Fill in recog_op_alt with information about the constraints of
16775 this insn. */
16776 preprocess_constraints (insn);
16777
16778 const operand_alternative *op_alt = which_op_alt ();
16779 for (opno = 0; opno < recog_data.n_operands; opno++)
16780 {
16781 /* Things we need to fix can only occur in inputs. */
16782 if (recog_data.operand_type[opno] != OP_IN)
16783 continue;
16784
16785 /* If this alternative is a memory reference, then any mention
16786 of constants in this alternative is really to fool reload
16787 into allowing us to accept one there. We need to fix them up
16788 now so that we output the right code. */
16789 if (op_alt[opno].memory_ok)
16790 {
16791 rtx op = recog_data.operand[opno];
16792
16793 if (CONSTANT_P (op))
16794 {
16795 if (do_pushes)
16796 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16797 recog_data.operand_mode[opno], op);
16798 }
16799 else if (MEM_P (op)
16800 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16801 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16802 {
16803 if (do_pushes)
16804 {
16805 rtx cop = avoid_constant_pool_reference (op);
16806
16807 /* Casting the address of something to a mode narrower
16808 than a word can cause avoid_constant_pool_reference()
16809 to return the pool reference itself. That's no good to
16810 us here. Lets just hope that we can use the
16811 constant pool value directly. */
16812 if (op == cop)
16813 cop = get_pool_constant (XEXP (op, 0));
16814
16815 push_minipool_fix (insn, address,
16816 recog_data.operand_loc[opno],
16817 recog_data.operand_mode[opno], cop);
16818 }
16819
16820 }
16821 }
16822 }
16823
16824 return;
16825 }
16826
16827 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16828 and unions in the context of ARMv8-M Security Extensions. It is used as a
16829 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16830 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16831 or four masks, depending on whether it is being computed for a
16832 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16833 respectively. The tree for the type of the argument or a field within an
16834 argument is passed in ARG_TYPE, the current register this argument or field
16835 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16836 argument or field starts at is passed in STARTING_BIT and the last used bit
16837 is kept in LAST_USED_BIT which is also updated accordingly. */
16838
16839 static unsigned HOST_WIDE_INT
16840 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16841 uint32_t * padding_bits_to_clear,
16842 unsigned starting_bit, int * last_used_bit)
16843
16844 {
16845 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16846
16847 if (TREE_CODE (arg_type) == RECORD_TYPE)
16848 {
16849 unsigned current_bit = starting_bit;
16850 tree field;
16851 long int offset, size;
16852
16853
16854 field = TYPE_FIELDS (arg_type);
16855 while (field)
16856 {
16857 /* The offset within a structure is always an offset from
16858 the start of that structure. Make sure we take that into the
16859 calculation of the register based offset that we use here. */
16860 offset = starting_bit;
16861 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16862 offset %= 32;
16863
16864 /* This is the actual size of the field, for bitfields this is the
16865 bitfield width and not the container size. */
16866 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16867
16868 if (*last_used_bit != offset)
16869 {
16870 if (offset < *last_used_bit)
16871 {
16872 /* This field's offset is before the 'last_used_bit', that
16873 means this field goes on the next register. So we need to
16874 pad the rest of the current register and increase the
16875 register number. */
16876 uint32_t mask;
16877 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16878 mask++;
16879
16880 padding_bits_to_clear[*regno] |= mask;
16881 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16882 (*regno)++;
16883 }
16884 else
16885 {
16886 /* Otherwise we pad the bits between the last field's end and
16887 the start of the new field. */
16888 uint32_t mask;
16889
16890 mask = ((uint32_t)-1) >> (32 - offset);
16891 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16892 padding_bits_to_clear[*regno] |= mask;
16893 }
16894 current_bit = offset;
16895 }
16896
16897 /* Calculate further padding bits for inner structs/unions too. */
16898 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16899 {
16900 *last_used_bit = current_bit;
16901 not_to_clear_reg_mask
16902 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16903 padding_bits_to_clear, offset,
16904 last_used_bit);
16905 }
16906 else
16907 {
16908 /* Update 'current_bit' with this field's size. If the
16909 'current_bit' lies in a subsequent register, update 'regno' and
16910 reset 'current_bit' to point to the current bit in that new
16911 register. */
16912 current_bit += size;
16913 while (current_bit >= 32)
16914 {
16915 current_bit-=32;
16916 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16917 (*regno)++;
16918 }
16919 *last_used_bit = current_bit;
16920 }
16921
16922 field = TREE_CHAIN (field);
16923 }
16924 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16925 }
16926 else if (TREE_CODE (arg_type) == UNION_TYPE)
16927 {
16928 tree field, field_t;
16929 int i, regno_t, field_size;
16930 int max_reg = -1;
16931 int max_bit = -1;
16932 uint32_t mask;
16933 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16934 = {-1, -1, -1, -1};
16935
16936 /* To compute the padding bits in a union we only consider bits as
16937 padding bits if they are always either a padding bit or fall outside a
16938 fields size for all fields in the union. */
16939 field = TYPE_FIELDS (arg_type);
16940 while (field)
16941 {
16942 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16943 = {0U, 0U, 0U, 0U};
16944 int last_used_bit_t = *last_used_bit;
16945 regno_t = *regno;
16946 field_t = TREE_TYPE (field);
16947
16948 /* If the field's type is either a record or a union make sure to
16949 compute their padding bits too. */
16950 if (RECORD_OR_UNION_TYPE_P (field_t))
16951 not_to_clear_reg_mask
16952 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16953 &padding_bits_to_clear_t[0],
16954 starting_bit, &last_used_bit_t);
16955 else
16956 {
16957 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16958 regno_t = (field_size / 32) + *regno;
16959 last_used_bit_t = (starting_bit + field_size) % 32;
16960 }
16961
16962 for (i = *regno; i < regno_t; i++)
16963 {
16964 /* For all but the last register used by this field only keep the
16965 padding bits that were padding bits in this field. */
16966 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16967 }
16968
16969 /* For the last register, keep all padding bits that were padding
16970 bits in this field and any padding bits that are still valid
16971 as padding bits but fall outside of this field's size. */
16972 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16973 padding_bits_to_clear_res[regno_t]
16974 &= padding_bits_to_clear_t[regno_t] | mask;
16975
16976 /* Update the maximum size of the fields in terms of registers used
16977 ('max_reg') and the 'last_used_bit' in said register. */
16978 if (max_reg < regno_t)
16979 {
16980 max_reg = regno_t;
16981 max_bit = last_used_bit_t;
16982 }
16983 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16984 max_bit = last_used_bit_t;
16985
16986 field = TREE_CHAIN (field);
16987 }
16988
16989 /* Update the current padding_bits_to_clear using the intersection of the
16990 padding bits of all the fields. */
16991 for (i=*regno; i < max_reg; i++)
16992 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16993
16994 /* Do not keep trailing padding bits, we do not know yet whether this
16995 is the end of the argument. */
16996 mask = ((uint32_t) 1 << max_bit) - 1;
16997 padding_bits_to_clear[max_reg]
16998 |= padding_bits_to_clear_res[max_reg] & mask;
16999
17000 *regno = max_reg;
17001 *last_used_bit = max_bit;
17002 }
17003 else
17004 /* This function should only be used for structs and unions. */
17005 gcc_unreachable ();
17006
17007 return not_to_clear_reg_mask;
17008 }
17009
17010 /* In the context of ARMv8-M Security Extensions, this function is used for both
17011 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17012 registers are used when returning or passing arguments, which is then
17013 returned as a mask. It will also compute a mask to indicate padding/unused
17014 bits for each of these registers, and passes this through the
17015 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17016 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17017 the starting register used to pass this argument or return value is passed
17018 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17019 for struct and union types. */
17020
17021 static unsigned HOST_WIDE_INT
17022 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17023 uint32_t * padding_bits_to_clear)
17024
17025 {
17026 int last_used_bit = 0;
17027 unsigned HOST_WIDE_INT not_to_clear_mask;
17028
17029 if (RECORD_OR_UNION_TYPE_P (arg_type))
17030 {
17031 not_to_clear_mask
17032 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17033 padding_bits_to_clear, 0,
17034 &last_used_bit);
17035
17036
17037 /* If the 'last_used_bit' is not zero, that means we are still using a
17038 part of the last 'regno'. In such cases we must clear the trailing
17039 bits. Otherwise we are not using regno and we should mark it as to
17040 clear. */
17041 if (last_used_bit != 0)
17042 padding_bits_to_clear[regno]
17043 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17044 else
17045 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17046 }
17047 else
17048 {
17049 not_to_clear_mask = 0;
17050 /* We are not dealing with structs nor unions. So these arguments may be
17051 passed in floating point registers too. In some cases a BLKmode is
17052 used when returning or passing arguments in multiple VFP registers. */
17053 if (GET_MODE (arg_rtx) == BLKmode)
17054 {
17055 int i, arg_regs;
17056 rtx reg;
17057
17058 /* This should really only occur when dealing with the hard-float
17059 ABI. */
17060 gcc_assert (TARGET_HARD_FLOAT_ABI);
17061
17062 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17063 {
17064 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17065 gcc_assert (REG_P (reg));
17066
17067 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17068
17069 /* If we are dealing with DF mode, make sure we don't
17070 clear either of the registers it addresses. */
17071 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17072 if (arg_regs > 1)
17073 {
17074 unsigned HOST_WIDE_INT mask;
17075 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17076 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17077 not_to_clear_mask |= mask;
17078 }
17079 }
17080 }
17081 else
17082 {
17083 /* Otherwise we can rely on the MODE to determine how many registers
17084 are being used by this argument. */
17085 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17086 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17087 if (arg_regs > 1)
17088 {
17089 unsigned HOST_WIDE_INT
17090 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17091 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17092 not_to_clear_mask |= mask;
17093 }
17094 }
17095 }
17096
17097 return not_to_clear_mask;
17098 }
17099
17100 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17101 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17102 are to be fully cleared, using the value in register CLEARING_REG if more
17103 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17104 the bits that needs to be cleared in caller-saved core registers, with
17105 SCRATCH_REG used as a scratch register for that clearing.
17106
17107 NOTE: one of three following assertions must hold:
17108 - SCRATCH_REG is a low register
17109 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17110 in TO_CLEAR_BITMAP)
17111 - CLEARING_REG is a low register. */
17112
17113 static void
17114 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17115 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17116 {
17117 bool saved_clearing = false;
17118 rtx saved_clearing_reg = NULL_RTX;
17119 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17120
17121 gcc_assert (arm_arch_cmse);
17122
17123 if (!bitmap_empty_p (to_clear_bitmap))
17124 {
17125 minregno = bitmap_first_set_bit (to_clear_bitmap);
17126 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17127 }
17128 clearing_regno = REGNO (clearing_reg);
17129
17130 /* Clear padding bits. */
17131 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17132 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17133 {
17134 uint64_t mask;
17135 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17136
17137 if (padding_bits_to_clear[i] == 0)
17138 continue;
17139
17140 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17141 CLEARING_REG as scratch. */
17142 if (TARGET_THUMB1
17143 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17144 {
17145 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17146 such that we can use clearing_reg to clear the unused bits in the
17147 arguments. */
17148 if ((clearing_regno > maxregno
17149 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17150 && !saved_clearing)
17151 {
17152 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17153 emit_move_insn (scratch_reg, clearing_reg);
17154 saved_clearing = true;
17155 saved_clearing_reg = scratch_reg;
17156 }
17157 scratch_reg = clearing_reg;
17158 }
17159
17160 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17161 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17162 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17163
17164 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17165 mask = (~padding_bits_to_clear[i]) >> 16;
17166 rtx16 = gen_int_mode (16, SImode);
17167 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17168 if (mask)
17169 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17170
17171 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17172 }
17173 if (saved_clearing)
17174 emit_move_insn (clearing_reg, saved_clearing_reg);
17175
17176
17177 /* Clear full registers. */
17178
17179 /* If not marked for clearing, clearing_reg already does not contain
17180 any secret. */
17181 if (clearing_regno <= maxregno
17182 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17183 {
17184 emit_move_insn (clearing_reg, const0_rtx);
17185 emit_use (clearing_reg);
17186 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17187 }
17188
17189 for (regno = minregno; regno <= maxregno; regno++)
17190 {
17191 if (!bitmap_bit_p (to_clear_bitmap, regno))
17192 continue;
17193
17194 if (IS_VFP_REGNUM (regno))
17195 {
17196 /* If regno is an even vfp register and its successor is also to
17197 be cleared, use vmov. */
17198 if (TARGET_VFP_DOUBLE
17199 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17200 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17201 {
17202 emit_move_insn (gen_rtx_REG (DFmode, regno),
17203 CONST1_RTX (DFmode));
17204 emit_use (gen_rtx_REG (DFmode, regno));
17205 regno++;
17206 }
17207 else
17208 {
17209 emit_move_insn (gen_rtx_REG (SFmode, regno),
17210 CONST1_RTX (SFmode));
17211 emit_use (gen_rtx_REG (SFmode, regno));
17212 }
17213 }
17214 else
17215 {
17216 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17217 emit_use (gen_rtx_REG (SImode, regno));
17218 }
17219 }
17220 }
17221
17222 /* Clears caller saved registers not used to pass arguments before a
17223 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17224 registers is done in __gnu_cmse_nonsecure_call libcall.
17225 See libgcc/config/arm/cmse_nonsecure_call.S. */
17226
17227 static void
17228 cmse_nonsecure_call_clear_caller_saved (void)
17229 {
17230 basic_block bb;
17231
17232 FOR_EACH_BB_FN (bb, cfun)
17233 {
17234 rtx_insn *insn;
17235
17236 FOR_BB_INSNS (bb, insn)
17237 {
17238 unsigned address_regnum, regno, maxregno =
17239 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17240 auto_sbitmap to_clear_bitmap (maxregno + 1);
17241 rtx_insn *seq;
17242 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17243 rtx address;
17244 CUMULATIVE_ARGS args_so_far_v;
17245 cumulative_args_t args_so_far;
17246 tree arg_type, fntype;
17247 bool first_param = true;
17248 function_args_iterator args_iter;
17249 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17250
17251 if (!NONDEBUG_INSN_P (insn))
17252 continue;
17253
17254 if (!CALL_P (insn))
17255 continue;
17256
17257 pat = PATTERN (insn);
17258 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17259 call = XVECEXP (pat, 0, 0);
17260
17261 /* Get the real call RTX if the insn sets a value, ie. returns. */
17262 if (GET_CODE (call) == SET)
17263 call = SET_SRC (call);
17264
17265 /* Check if it is a cmse_nonsecure_call. */
17266 unspec = XEXP (call, 0);
17267 if (GET_CODE (unspec) != UNSPEC
17268 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17269 continue;
17270
17271 /* Determine the caller-saved registers we need to clear. */
17272 bitmap_clear (to_clear_bitmap);
17273 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17274
17275 /* Only look at the caller-saved floating point registers in case of
17276 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17277 lazy store and loads which clear both caller- and callee-saved
17278 registers. */
17279 if (TARGET_HARD_FLOAT_ABI)
17280 {
17281 auto_sbitmap float_bitmap (maxregno + 1);
17282
17283 bitmap_clear (float_bitmap);
17284 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17285 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17286 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17287 }
17288
17289 /* Make sure the register used to hold the function address is not
17290 cleared. */
17291 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17292 gcc_assert (MEM_P (address));
17293 gcc_assert (REG_P (XEXP (address, 0)));
17294 address_regnum = REGNO (XEXP (address, 0));
17295 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17296 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17297
17298 /* Set basic block of call insn so that df rescan is performed on
17299 insns inserted here. */
17300 set_block_for_insn (insn, bb);
17301 df_set_flags (DF_DEFER_INSN_RESCAN);
17302 start_sequence ();
17303
17304 /* Make sure the scheduler doesn't schedule other insns beyond
17305 here. */
17306 emit_insn (gen_blockage ());
17307
17308 /* Walk through all arguments and clear registers appropriately.
17309 */
17310 fntype = TREE_TYPE (MEM_EXPR (address));
17311 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17312 NULL_TREE);
17313 args_so_far = pack_cumulative_args (&args_so_far_v);
17314 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17315 {
17316 rtx arg_rtx;
17317 uint64_t to_clear_args_mask;
17318 machine_mode arg_mode = TYPE_MODE (arg_type);
17319
17320 if (VOID_TYPE_P (arg_type))
17321 continue;
17322
17323 if (!first_param)
17324 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17325 true);
17326
17327 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17328 true);
17329 gcc_assert (REG_P (arg_rtx));
17330 to_clear_args_mask
17331 = compute_not_to_clear_mask (arg_type, arg_rtx,
17332 REGNO (arg_rtx),
17333 &padding_bits_to_clear[0]);
17334 if (to_clear_args_mask)
17335 {
17336 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17337 {
17338 if (to_clear_args_mask & (1ULL << regno))
17339 bitmap_clear_bit (to_clear_bitmap, regno);
17340 }
17341 }
17342
17343 first_param = false;
17344 }
17345
17346 /* We use right shift and left shift to clear the LSB of the address
17347 we jump to instead of using bic, to avoid having to use an extra
17348 register on Thumb-1. */
17349 clearing_reg = XEXP (address, 0);
17350 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17351 emit_insn (gen_rtx_SET (clearing_reg, shift));
17352 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17353 emit_insn (gen_rtx_SET (clearing_reg, shift));
17354
17355 /* Clear caller-saved registers that leak before doing a non-secure
17356 call. */
17357 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17358 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17359 NUM_ARG_REGS, ip_reg, clearing_reg);
17360
17361 seq = get_insns ();
17362 end_sequence ();
17363 emit_insn_before (seq, insn);
17364 }
17365 }
17366 }
17367
17368 /* Rewrite move insn into subtract of 0 if the condition codes will
17369 be useful in next conditional jump insn. */
17370
17371 static void
17372 thumb1_reorg (void)
17373 {
17374 basic_block bb;
17375
17376 FOR_EACH_BB_FN (bb, cfun)
17377 {
17378 rtx dest, src;
17379 rtx cmp, op0, op1, set = NULL;
17380 rtx_insn *prev, *insn = BB_END (bb);
17381 bool insn_clobbered = false;
17382
17383 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17384 insn = PREV_INSN (insn);
17385
17386 /* Find the last cbranchsi4_insn in basic block BB. */
17387 if (insn == BB_HEAD (bb)
17388 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17389 continue;
17390
17391 /* Get the register with which we are comparing. */
17392 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17393 op0 = XEXP (cmp, 0);
17394 op1 = XEXP (cmp, 1);
17395
17396 /* Check that comparison is against ZERO. */
17397 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17398 continue;
17399
17400 /* Find the first flag setting insn before INSN in basic block BB. */
17401 gcc_assert (insn != BB_HEAD (bb));
17402 for (prev = PREV_INSN (insn);
17403 (!insn_clobbered
17404 && prev != BB_HEAD (bb)
17405 && (NOTE_P (prev)
17406 || DEBUG_INSN_P (prev)
17407 || ((set = single_set (prev)) != NULL
17408 && get_attr_conds (prev) == CONDS_NOCOND)));
17409 prev = PREV_INSN (prev))
17410 {
17411 if (reg_set_p (op0, prev))
17412 insn_clobbered = true;
17413 }
17414
17415 /* Skip if op0 is clobbered by insn other than prev. */
17416 if (insn_clobbered)
17417 continue;
17418
17419 if (!set)
17420 continue;
17421
17422 dest = SET_DEST (set);
17423 src = SET_SRC (set);
17424 if (!low_register_operand (dest, SImode)
17425 || !low_register_operand (src, SImode))
17426 continue;
17427
17428 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17429 in INSN. Both src and dest of the move insn are checked. */
17430 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17431 {
17432 dest = copy_rtx (dest);
17433 src = copy_rtx (src);
17434 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17435 PATTERN (prev) = gen_rtx_SET (dest, src);
17436 INSN_CODE (prev) = -1;
17437 /* Set test register in INSN to dest. */
17438 XEXP (cmp, 0) = copy_rtx (dest);
17439 INSN_CODE (insn) = -1;
17440 }
17441 }
17442 }
17443
17444 /* Convert instructions to their cc-clobbering variant if possible, since
17445 that allows us to use smaller encodings. */
17446
17447 static void
17448 thumb2_reorg (void)
17449 {
17450 basic_block bb;
17451 regset_head live;
17452
17453 INIT_REG_SET (&live);
17454
17455 /* We are freeing block_for_insn in the toplev to keep compatibility
17456 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17457 compute_bb_for_insn ();
17458 df_analyze ();
17459
17460 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17461
17462 FOR_EACH_BB_FN (bb, cfun)
17463 {
17464 if ((current_tune->disparage_flag_setting_t16_encodings
17465 == tune_params::DISPARAGE_FLAGS_ALL)
17466 && optimize_bb_for_speed_p (bb))
17467 continue;
17468
17469 rtx_insn *insn;
17470 Convert_Action action = SKIP;
17471 Convert_Action action_for_partial_flag_setting
17472 = ((current_tune->disparage_flag_setting_t16_encodings
17473 != tune_params::DISPARAGE_FLAGS_NEITHER)
17474 && optimize_bb_for_speed_p (bb))
17475 ? SKIP : CONV;
17476
17477 COPY_REG_SET (&live, DF_LR_OUT (bb));
17478 df_simulate_initialize_backwards (bb, &live);
17479 FOR_BB_INSNS_REVERSE (bb, insn)
17480 {
17481 if (NONJUMP_INSN_P (insn)
17482 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17483 && GET_CODE (PATTERN (insn)) == SET)
17484 {
17485 action = SKIP;
17486 rtx pat = PATTERN (insn);
17487 rtx dst = XEXP (pat, 0);
17488 rtx src = XEXP (pat, 1);
17489 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17490
17491 if (UNARY_P (src) || BINARY_P (src))
17492 op0 = XEXP (src, 0);
17493
17494 if (BINARY_P (src))
17495 op1 = XEXP (src, 1);
17496
17497 if (low_register_operand (dst, SImode))
17498 {
17499 switch (GET_CODE (src))
17500 {
17501 case PLUS:
17502 /* Adding two registers and storing the result
17503 in the first source is already a 16-bit
17504 operation. */
17505 if (rtx_equal_p (dst, op0)
17506 && register_operand (op1, SImode))
17507 break;
17508
17509 if (low_register_operand (op0, SImode))
17510 {
17511 /* ADDS <Rd>,<Rn>,<Rm> */
17512 if (low_register_operand (op1, SImode))
17513 action = CONV;
17514 /* ADDS <Rdn>,#<imm8> */
17515 /* SUBS <Rdn>,#<imm8> */
17516 else if (rtx_equal_p (dst, op0)
17517 && CONST_INT_P (op1)
17518 && IN_RANGE (INTVAL (op1), -255, 255))
17519 action = CONV;
17520 /* ADDS <Rd>,<Rn>,#<imm3> */
17521 /* SUBS <Rd>,<Rn>,#<imm3> */
17522 else if (CONST_INT_P (op1)
17523 && IN_RANGE (INTVAL (op1), -7, 7))
17524 action = CONV;
17525 }
17526 /* ADCS <Rd>, <Rn> */
17527 else if (GET_CODE (XEXP (src, 0)) == PLUS
17528 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17529 && low_register_operand (XEXP (XEXP (src, 0), 1),
17530 SImode)
17531 && COMPARISON_P (op1)
17532 && cc_register (XEXP (op1, 0), VOIDmode)
17533 && maybe_get_arm_condition_code (op1) == ARM_CS
17534 && XEXP (op1, 1) == const0_rtx)
17535 action = CONV;
17536 break;
17537
17538 case MINUS:
17539 /* RSBS <Rd>,<Rn>,#0
17540 Not handled here: see NEG below. */
17541 /* SUBS <Rd>,<Rn>,#<imm3>
17542 SUBS <Rdn>,#<imm8>
17543 Not handled here: see PLUS above. */
17544 /* SUBS <Rd>,<Rn>,<Rm> */
17545 if (low_register_operand (op0, SImode)
17546 && low_register_operand (op1, SImode))
17547 action = CONV;
17548 break;
17549
17550 case MULT:
17551 /* MULS <Rdm>,<Rn>,<Rdm>
17552 As an exception to the rule, this is only used
17553 when optimizing for size since MULS is slow on all
17554 known implementations. We do not even want to use
17555 MULS in cold code, if optimizing for speed, so we
17556 test the global flag here. */
17557 if (!optimize_size)
17558 break;
17559 /* Fall through. */
17560 case AND:
17561 case IOR:
17562 case XOR:
17563 /* ANDS <Rdn>,<Rm> */
17564 if (rtx_equal_p (dst, op0)
17565 && low_register_operand (op1, SImode))
17566 action = action_for_partial_flag_setting;
17567 else if (rtx_equal_p (dst, op1)
17568 && low_register_operand (op0, SImode))
17569 action = action_for_partial_flag_setting == SKIP
17570 ? SKIP : SWAP_CONV;
17571 break;
17572
17573 case ASHIFTRT:
17574 case ASHIFT:
17575 case LSHIFTRT:
17576 /* ASRS <Rdn>,<Rm> */
17577 /* LSRS <Rdn>,<Rm> */
17578 /* LSLS <Rdn>,<Rm> */
17579 if (rtx_equal_p (dst, op0)
17580 && low_register_operand (op1, SImode))
17581 action = action_for_partial_flag_setting;
17582 /* ASRS <Rd>,<Rm>,#<imm5> */
17583 /* LSRS <Rd>,<Rm>,#<imm5> */
17584 /* LSLS <Rd>,<Rm>,#<imm5> */
17585 else if (low_register_operand (op0, SImode)
17586 && CONST_INT_P (op1)
17587 && IN_RANGE (INTVAL (op1), 0, 31))
17588 action = action_for_partial_flag_setting;
17589 break;
17590
17591 case ROTATERT:
17592 /* RORS <Rdn>,<Rm> */
17593 if (rtx_equal_p (dst, op0)
17594 && low_register_operand (op1, SImode))
17595 action = action_for_partial_flag_setting;
17596 break;
17597
17598 case NOT:
17599 /* MVNS <Rd>,<Rm> */
17600 if (low_register_operand (op0, SImode))
17601 action = action_for_partial_flag_setting;
17602 break;
17603
17604 case NEG:
17605 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17606 if (low_register_operand (op0, SImode))
17607 action = CONV;
17608 break;
17609
17610 case CONST_INT:
17611 /* MOVS <Rd>,#<imm8> */
17612 if (CONST_INT_P (src)
17613 && IN_RANGE (INTVAL (src), 0, 255))
17614 action = action_for_partial_flag_setting;
17615 break;
17616
17617 case REG:
17618 /* MOVS and MOV<c> with registers have different
17619 encodings, so are not relevant here. */
17620 break;
17621
17622 default:
17623 break;
17624 }
17625 }
17626
17627 if (action != SKIP)
17628 {
17629 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17630 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17631 rtvec vec;
17632
17633 if (action == SWAP_CONV)
17634 {
17635 src = copy_rtx (src);
17636 XEXP (src, 0) = op1;
17637 XEXP (src, 1) = op0;
17638 pat = gen_rtx_SET (dst, src);
17639 vec = gen_rtvec (2, pat, clobber);
17640 }
17641 else /* action == CONV */
17642 vec = gen_rtvec (2, pat, clobber);
17643
17644 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17645 INSN_CODE (insn) = -1;
17646 }
17647 }
17648
17649 if (NONDEBUG_INSN_P (insn))
17650 df_simulate_one_insn_backwards (bb, insn, &live);
17651 }
17652 }
17653
17654 CLEAR_REG_SET (&live);
17655 }
17656
17657 /* Gcc puts the pool in the wrong place for ARM, since we can only
17658 load addresses a limited distance around the pc. We do some
17659 special munging to move the constant pool values to the correct
17660 point in the code. */
17661 static void
17662 arm_reorg (void)
17663 {
17664 rtx_insn *insn;
17665 HOST_WIDE_INT address = 0;
17666 Mfix * fix;
17667
17668 if (use_cmse)
17669 cmse_nonsecure_call_clear_caller_saved ();
17670 if (TARGET_THUMB1)
17671 thumb1_reorg ();
17672 else if (TARGET_THUMB2)
17673 thumb2_reorg ();
17674
17675 /* Ensure all insns that must be split have been split at this point.
17676 Otherwise, the pool placement code below may compute incorrect
17677 insn lengths. Note that when optimizing, all insns have already
17678 been split at this point. */
17679 if (!optimize)
17680 split_all_insns_noflow ();
17681
17682 /* Make sure we do not attempt to create a literal pool even though it should
17683 no longer be necessary to create any. */
17684 if (arm_disable_literal_pool)
17685 return ;
17686
17687 minipool_fix_head = minipool_fix_tail = NULL;
17688
17689 /* The first insn must always be a note, or the code below won't
17690 scan it properly. */
17691 insn = get_insns ();
17692 gcc_assert (NOTE_P (insn));
17693 minipool_pad = 0;
17694
17695 /* Scan all the insns and record the operands that will need fixing. */
17696 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17697 {
17698 if (BARRIER_P (insn))
17699 push_minipool_barrier (insn, address);
17700 else if (INSN_P (insn))
17701 {
17702 rtx_jump_table_data *table;
17703
17704 note_invalid_constants (insn, address, true);
17705 address += get_attr_length (insn);
17706
17707 /* If the insn is a vector jump, add the size of the table
17708 and skip the table. */
17709 if (tablejump_p (insn, NULL, &table))
17710 {
17711 address += get_jump_table_size (table);
17712 insn = table;
17713 }
17714 }
17715 else if (LABEL_P (insn))
17716 /* Add the worst-case padding due to alignment. We don't add
17717 the _current_ padding because the minipool insertions
17718 themselves might change it. */
17719 address += get_label_padding (insn);
17720 }
17721
17722 fix = minipool_fix_head;
17723
17724 /* Now scan the fixups and perform the required changes. */
17725 while (fix)
17726 {
17727 Mfix * ftmp;
17728 Mfix * fdel;
17729 Mfix * last_added_fix;
17730 Mfix * last_barrier = NULL;
17731 Mfix * this_fix;
17732
17733 /* Skip any further barriers before the next fix. */
17734 while (fix && BARRIER_P (fix->insn))
17735 fix = fix->next;
17736
17737 /* No more fixes. */
17738 if (fix == NULL)
17739 break;
17740
17741 last_added_fix = NULL;
17742
17743 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17744 {
17745 if (BARRIER_P (ftmp->insn))
17746 {
17747 if (ftmp->address >= minipool_vector_head->max_address)
17748 break;
17749
17750 last_barrier = ftmp;
17751 }
17752 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17753 break;
17754
17755 last_added_fix = ftmp; /* Keep track of the last fix added. */
17756 }
17757
17758 /* If we found a barrier, drop back to that; any fixes that we
17759 could have reached but come after the barrier will now go in
17760 the next mini-pool. */
17761 if (last_barrier != NULL)
17762 {
17763 /* Reduce the refcount for those fixes that won't go into this
17764 pool after all. */
17765 for (fdel = last_barrier->next;
17766 fdel && fdel != ftmp;
17767 fdel = fdel->next)
17768 {
17769 fdel->minipool->refcount--;
17770 fdel->minipool = NULL;
17771 }
17772
17773 ftmp = last_barrier;
17774 }
17775 else
17776 {
17777 /* ftmp is first fix that we can't fit into this pool and
17778 there no natural barriers that we could use. Insert a
17779 new barrier in the code somewhere between the previous
17780 fix and this one, and arrange to jump around it. */
17781 HOST_WIDE_INT max_address;
17782
17783 /* The last item on the list of fixes must be a barrier, so
17784 we can never run off the end of the list of fixes without
17785 last_barrier being set. */
17786 gcc_assert (ftmp);
17787
17788 max_address = minipool_vector_head->max_address;
17789 /* Check that there isn't another fix that is in range that
17790 we couldn't fit into this pool because the pool was
17791 already too large: we need to put the pool before such an
17792 instruction. The pool itself may come just after the
17793 fix because create_fix_barrier also allows space for a
17794 jump instruction. */
17795 if (ftmp->address < max_address)
17796 max_address = ftmp->address + 1;
17797
17798 last_barrier = create_fix_barrier (last_added_fix, max_address);
17799 }
17800
17801 assign_minipool_offsets (last_barrier);
17802
17803 while (ftmp)
17804 {
17805 if (!BARRIER_P (ftmp->insn)
17806 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17807 == NULL))
17808 break;
17809
17810 ftmp = ftmp->next;
17811 }
17812
17813 /* Scan over the fixes we have identified for this pool, fixing them
17814 up and adding the constants to the pool itself. */
17815 for (this_fix = fix; this_fix && ftmp != this_fix;
17816 this_fix = this_fix->next)
17817 if (!BARRIER_P (this_fix->insn))
17818 {
17819 rtx addr
17820 = plus_constant (Pmode,
17821 gen_rtx_LABEL_REF (VOIDmode,
17822 minipool_vector_label),
17823 this_fix->minipool->offset);
17824 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17825 }
17826
17827 dump_minipool (last_barrier->insn);
17828 fix = ftmp;
17829 }
17830
17831 /* From now on we must synthesize any constants that we can't handle
17832 directly. This can happen if the RTL gets split during final
17833 instruction generation. */
17834 cfun->machine->after_arm_reorg = 1;
17835
17836 /* Free the minipool memory. */
17837 obstack_free (&minipool_obstack, minipool_startobj);
17838 }
17839 \f
17840 /* Routines to output assembly language. */
17841
17842 /* Return string representation of passed in real value. */
17843 static const char *
17844 fp_const_from_val (REAL_VALUE_TYPE *r)
17845 {
17846 if (!fp_consts_inited)
17847 init_fp_table ();
17848
17849 gcc_assert (real_equal (r, &value_fp0));
17850 return "0";
17851 }
17852
17853 /* OPERANDS[0] is the entire list of insns that constitute pop,
17854 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17855 is in the list, UPDATE is true iff the list contains explicit
17856 update of base register. */
17857 void
17858 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17859 bool update)
17860 {
17861 int i;
17862 char pattern[100];
17863 int offset;
17864 const char *conditional;
17865 int num_saves = XVECLEN (operands[0], 0);
17866 unsigned int regno;
17867 unsigned int regno_base = REGNO (operands[1]);
17868 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17869
17870 offset = 0;
17871 offset += update ? 1 : 0;
17872 offset += return_pc ? 1 : 0;
17873
17874 /* Is the base register in the list? */
17875 for (i = offset; i < num_saves; i++)
17876 {
17877 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17878 /* If SP is in the list, then the base register must be SP. */
17879 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17880 /* If base register is in the list, there must be no explicit update. */
17881 if (regno == regno_base)
17882 gcc_assert (!update);
17883 }
17884
17885 conditional = reverse ? "%?%D0" : "%?%d0";
17886 /* Can't use POP if returning from an interrupt. */
17887 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17888 sprintf (pattern, "pop%s\t{", conditional);
17889 else
17890 {
17891 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17892 It's just a convention, their semantics are identical. */
17893 if (regno_base == SP_REGNUM)
17894 sprintf (pattern, "ldmfd%s\t", conditional);
17895 else if (update)
17896 sprintf (pattern, "ldmia%s\t", conditional);
17897 else
17898 sprintf (pattern, "ldm%s\t", conditional);
17899
17900 strcat (pattern, reg_names[regno_base]);
17901 if (update)
17902 strcat (pattern, "!, {");
17903 else
17904 strcat (pattern, ", {");
17905 }
17906
17907 /* Output the first destination register. */
17908 strcat (pattern,
17909 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17910
17911 /* Output the rest of the destination registers. */
17912 for (i = offset + 1; i < num_saves; i++)
17913 {
17914 strcat (pattern, ", ");
17915 strcat (pattern,
17916 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17917 }
17918
17919 strcat (pattern, "}");
17920
17921 if (interrupt_p && return_pc)
17922 strcat (pattern, "^");
17923
17924 output_asm_insn (pattern, &cond);
17925 }
17926
17927
17928 /* Output the assembly for a store multiple. */
17929
17930 const char *
17931 vfp_output_vstmd (rtx * operands)
17932 {
17933 char pattern[100];
17934 int p;
17935 int base;
17936 int i;
17937 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17938 ? XEXP (operands[0], 0)
17939 : XEXP (XEXP (operands[0], 0), 0);
17940 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17941
17942 if (push_p)
17943 strcpy (pattern, "vpush%?.64\t{%P1");
17944 else
17945 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17946
17947 p = strlen (pattern);
17948
17949 gcc_assert (REG_P (operands[1]));
17950
17951 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17952 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17953 {
17954 p += sprintf (&pattern[p], ", d%d", base + i);
17955 }
17956 strcpy (&pattern[p], "}");
17957
17958 output_asm_insn (pattern, operands);
17959 return "";
17960 }
17961
17962
17963 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17964 number of bytes pushed. */
17965
17966 static int
17967 vfp_emit_fstmd (int base_reg, int count)
17968 {
17969 rtx par;
17970 rtx dwarf;
17971 rtx tmp, reg;
17972 int i;
17973
17974 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17975 register pairs are stored by a store multiple insn. We avoid this
17976 by pushing an extra pair. */
17977 if (count == 2 && !arm_arch6)
17978 {
17979 if (base_reg == LAST_VFP_REGNUM - 3)
17980 base_reg -= 2;
17981 count++;
17982 }
17983
17984 /* FSTMD may not store more than 16 doubleword registers at once. Split
17985 larger stores into multiple parts (up to a maximum of two, in
17986 practice). */
17987 if (count > 16)
17988 {
17989 int saved;
17990 /* NOTE: base_reg is an internal register number, so each D register
17991 counts as 2. */
17992 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17993 saved += vfp_emit_fstmd (base_reg, 16);
17994 return saved;
17995 }
17996
17997 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17998 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17999
18000 reg = gen_rtx_REG (DFmode, base_reg);
18001 base_reg += 2;
18002
18003 XVECEXP (par, 0, 0)
18004 = gen_rtx_SET (gen_frame_mem
18005 (BLKmode,
18006 gen_rtx_PRE_MODIFY (Pmode,
18007 stack_pointer_rtx,
18008 plus_constant
18009 (Pmode, stack_pointer_rtx,
18010 - (count * 8)))
18011 ),
18012 gen_rtx_UNSPEC (BLKmode,
18013 gen_rtvec (1, reg),
18014 UNSPEC_PUSH_MULT));
18015
18016 tmp = gen_rtx_SET (stack_pointer_rtx,
18017 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18018 RTX_FRAME_RELATED_P (tmp) = 1;
18019 XVECEXP (dwarf, 0, 0) = tmp;
18020
18021 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18022 RTX_FRAME_RELATED_P (tmp) = 1;
18023 XVECEXP (dwarf, 0, 1) = tmp;
18024
18025 for (i = 1; i < count; i++)
18026 {
18027 reg = gen_rtx_REG (DFmode, base_reg);
18028 base_reg += 2;
18029 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18030
18031 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18032 plus_constant (Pmode,
18033 stack_pointer_rtx,
18034 i * 8)),
18035 reg);
18036 RTX_FRAME_RELATED_P (tmp) = 1;
18037 XVECEXP (dwarf, 0, i + 1) = tmp;
18038 }
18039
18040 par = emit_insn (par);
18041 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18042 RTX_FRAME_RELATED_P (par) = 1;
18043
18044 return count * 8;
18045 }
18046
18047 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18048 has the cmse_nonsecure_call attribute and returns false otherwise. */
18049
18050 bool
18051 detect_cmse_nonsecure_call (tree addr)
18052 {
18053 if (!addr)
18054 return FALSE;
18055
18056 tree fntype = TREE_TYPE (addr);
18057 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18058 TYPE_ATTRIBUTES (fntype)))
18059 return TRUE;
18060 return FALSE;
18061 }
18062
18063
18064 /* Emit a call instruction with pattern PAT. ADDR is the address of
18065 the call target. */
18066
18067 void
18068 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18069 {
18070 rtx insn;
18071
18072 insn = emit_call_insn (pat);
18073
18074 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18075 If the call might use such an entry, add a use of the PIC register
18076 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18077 if (TARGET_VXWORKS_RTP
18078 && flag_pic
18079 && !sibcall
18080 && GET_CODE (addr) == SYMBOL_REF
18081 && (SYMBOL_REF_DECL (addr)
18082 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18083 : !SYMBOL_REF_LOCAL_P (addr)))
18084 {
18085 require_pic_register ();
18086 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18087 }
18088
18089 if (TARGET_AAPCS_BASED)
18090 {
18091 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18092 linker. We need to add an IP clobber to allow setting
18093 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18094 is not needed since it's a fixed register. */
18095 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18096 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18097 }
18098 }
18099
18100 /* Output a 'call' insn. */
18101 const char *
18102 output_call (rtx *operands)
18103 {
18104 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
18105
18106 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18107 if (REGNO (operands[0]) == LR_REGNUM)
18108 {
18109 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18110 output_asm_insn ("mov%?\t%0, %|lr", operands);
18111 }
18112
18113 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18114
18115 if (TARGET_INTERWORK || arm_arch4t)
18116 output_asm_insn ("bx%?\t%0", operands);
18117 else
18118 output_asm_insn ("mov%?\t%|pc, %0", operands);
18119
18120 return "";
18121 }
18122
18123 /* Output a move from arm registers to arm registers of a long double
18124 OPERANDS[0] is the destination.
18125 OPERANDS[1] is the source. */
18126 const char *
18127 output_mov_long_double_arm_from_arm (rtx *operands)
18128 {
18129 /* We have to be careful here because the two might overlap. */
18130 int dest_start = REGNO (operands[0]);
18131 int src_start = REGNO (operands[1]);
18132 rtx ops[2];
18133 int i;
18134
18135 if (dest_start < src_start)
18136 {
18137 for (i = 0; i < 3; i++)
18138 {
18139 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18140 ops[1] = gen_rtx_REG (SImode, src_start + i);
18141 output_asm_insn ("mov%?\t%0, %1", ops);
18142 }
18143 }
18144 else
18145 {
18146 for (i = 2; i >= 0; i--)
18147 {
18148 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18149 ops[1] = gen_rtx_REG (SImode, src_start + i);
18150 output_asm_insn ("mov%?\t%0, %1", ops);
18151 }
18152 }
18153
18154 return "";
18155 }
18156
18157 void
18158 arm_emit_movpair (rtx dest, rtx src)
18159 {
18160 /* If the src is an immediate, simplify it. */
18161 if (CONST_INT_P (src))
18162 {
18163 HOST_WIDE_INT val = INTVAL (src);
18164 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18165 if ((val >> 16) & 0x0000ffff)
18166 {
18167 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18168 GEN_INT (16)),
18169 GEN_INT ((val >> 16) & 0x0000ffff));
18170 rtx_insn *insn = get_last_insn ();
18171 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18172 }
18173 return;
18174 }
18175 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18176 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18177 rtx_insn *insn = get_last_insn ();
18178 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18179 }
18180
18181 /* Output a move between double words. It must be REG<-MEM
18182 or MEM<-REG. */
18183 const char *
18184 output_move_double (rtx *operands, bool emit, int *count)
18185 {
18186 enum rtx_code code0 = GET_CODE (operands[0]);
18187 enum rtx_code code1 = GET_CODE (operands[1]);
18188 rtx otherops[3];
18189 if (count)
18190 *count = 1;
18191
18192 /* The only case when this might happen is when
18193 you are looking at the length of a DImode instruction
18194 that has an invalid constant in it. */
18195 if (code0 == REG && code1 != MEM)
18196 {
18197 gcc_assert (!emit);
18198 *count = 2;
18199 return "";
18200 }
18201
18202 if (code0 == REG)
18203 {
18204 unsigned int reg0 = REGNO (operands[0]);
18205
18206 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18207
18208 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18209
18210 switch (GET_CODE (XEXP (operands[1], 0)))
18211 {
18212 case REG:
18213
18214 if (emit)
18215 {
18216 if (TARGET_LDRD
18217 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18218 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18219 else
18220 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18221 }
18222 break;
18223
18224 case PRE_INC:
18225 gcc_assert (TARGET_LDRD);
18226 if (emit)
18227 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18228 break;
18229
18230 case PRE_DEC:
18231 if (emit)
18232 {
18233 if (TARGET_LDRD)
18234 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18235 else
18236 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18237 }
18238 break;
18239
18240 case POST_INC:
18241 if (emit)
18242 {
18243 if (TARGET_LDRD)
18244 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18245 else
18246 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18247 }
18248 break;
18249
18250 case POST_DEC:
18251 gcc_assert (TARGET_LDRD);
18252 if (emit)
18253 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18254 break;
18255
18256 case PRE_MODIFY:
18257 case POST_MODIFY:
18258 /* Autoicrement addressing modes should never have overlapping
18259 base and destination registers, and overlapping index registers
18260 are already prohibited, so this doesn't need to worry about
18261 fix_cm3_ldrd. */
18262 otherops[0] = operands[0];
18263 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18264 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18265
18266 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18267 {
18268 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18269 {
18270 /* Registers overlap so split out the increment. */
18271 if (emit)
18272 {
18273 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18274 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18275 }
18276 if (count)
18277 *count = 2;
18278 }
18279 else
18280 {
18281 /* Use a single insn if we can.
18282 FIXME: IWMMXT allows offsets larger than ldrd can
18283 handle, fix these up with a pair of ldr. */
18284 if (TARGET_THUMB2
18285 || !CONST_INT_P (otherops[2])
18286 || (INTVAL (otherops[2]) > -256
18287 && INTVAL (otherops[2]) < 256))
18288 {
18289 if (emit)
18290 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18291 }
18292 else
18293 {
18294 if (emit)
18295 {
18296 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18297 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18298 }
18299 if (count)
18300 *count = 2;
18301
18302 }
18303 }
18304 }
18305 else
18306 {
18307 /* Use a single insn if we can.
18308 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18309 fix these up with a pair of ldr. */
18310 if (TARGET_THUMB2
18311 || !CONST_INT_P (otherops[2])
18312 || (INTVAL (otherops[2]) > -256
18313 && INTVAL (otherops[2]) < 256))
18314 {
18315 if (emit)
18316 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18317 }
18318 else
18319 {
18320 if (emit)
18321 {
18322 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18323 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18324 }
18325 if (count)
18326 *count = 2;
18327 }
18328 }
18329 break;
18330
18331 case LABEL_REF:
18332 case CONST:
18333 /* We might be able to use ldrd %0, %1 here. However the range is
18334 different to ldr/adr, and it is broken on some ARMv7-M
18335 implementations. */
18336 /* Use the second register of the pair to avoid problematic
18337 overlap. */
18338 otherops[1] = operands[1];
18339 if (emit)
18340 output_asm_insn ("adr%?\t%0, %1", otherops);
18341 operands[1] = otherops[0];
18342 if (emit)
18343 {
18344 if (TARGET_LDRD)
18345 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18346 else
18347 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18348 }
18349
18350 if (count)
18351 *count = 2;
18352 break;
18353
18354 /* ??? This needs checking for thumb2. */
18355 default:
18356 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18357 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18358 {
18359 otherops[0] = operands[0];
18360 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18361 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18362
18363 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18364 {
18365 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18366 {
18367 switch ((int) INTVAL (otherops[2]))
18368 {
18369 case -8:
18370 if (emit)
18371 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18372 return "";
18373 case -4:
18374 if (TARGET_THUMB2)
18375 break;
18376 if (emit)
18377 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18378 return "";
18379 case 4:
18380 if (TARGET_THUMB2)
18381 break;
18382 if (emit)
18383 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18384 return "";
18385 }
18386 }
18387 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18388 operands[1] = otherops[0];
18389 if (TARGET_LDRD
18390 && (REG_P (otherops[2])
18391 || TARGET_THUMB2
18392 || (CONST_INT_P (otherops[2])
18393 && INTVAL (otherops[2]) > -256
18394 && INTVAL (otherops[2]) < 256)))
18395 {
18396 if (reg_overlap_mentioned_p (operands[0],
18397 otherops[2]))
18398 {
18399 /* Swap base and index registers over to
18400 avoid a conflict. */
18401 std::swap (otherops[1], otherops[2]);
18402 }
18403 /* If both registers conflict, it will usually
18404 have been fixed by a splitter. */
18405 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18406 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18407 {
18408 if (emit)
18409 {
18410 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18411 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18412 }
18413 if (count)
18414 *count = 2;
18415 }
18416 else
18417 {
18418 otherops[0] = operands[0];
18419 if (emit)
18420 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18421 }
18422 return "";
18423 }
18424
18425 if (CONST_INT_P (otherops[2]))
18426 {
18427 if (emit)
18428 {
18429 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18430 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18431 else
18432 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18433 }
18434 }
18435 else
18436 {
18437 if (emit)
18438 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18439 }
18440 }
18441 else
18442 {
18443 if (emit)
18444 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18445 }
18446
18447 if (count)
18448 *count = 2;
18449
18450 if (TARGET_LDRD)
18451 return "ldrd%?\t%0, [%1]";
18452
18453 return "ldmia%?\t%1, %M0";
18454 }
18455 else
18456 {
18457 otherops[1] = adjust_address (operands[1], SImode, 4);
18458 /* Take care of overlapping base/data reg. */
18459 if (reg_mentioned_p (operands[0], operands[1]))
18460 {
18461 if (emit)
18462 {
18463 output_asm_insn ("ldr%?\t%0, %1", otherops);
18464 output_asm_insn ("ldr%?\t%0, %1", operands);
18465 }
18466 if (count)
18467 *count = 2;
18468
18469 }
18470 else
18471 {
18472 if (emit)
18473 {
18474 output_asm_insn ("ldr%?\t%0, %1", operands);
18475 output_asm_insn ("ldr%?\t%0, %1", otherops);
18476 }
18477 if (count)
18478 *count = 2;
18479 }
18480 }
18481 }
18482 }
18483 else
18484 {
18485 /* Constraints should ensure this. */
18486 gcc_assert (code0 == MEM && code1 == REG);
18487 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18488 || (TARGET_ARM && TARGET_LDRD));
18489
18490 switch (GET_CODE (XEXP (operands[0], 0)))
18491 {
18492 case REG:
18493 if (emit)
18494 {
18495 if (TARGET_LDRD)
18496 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18497 else
18498 output_asm_insn ("stm%?\t%m0, %M1", operands);
18499 }
18500 break;
18501
18502 case PRE_INC:
18503 gcc_assert (TARGET_LDRD);
18504 if (emit)
18505 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18506 break;
18507
18508 case PRE_DEC:
18509 if (emit)
18510 {
18511 if (TARGET_LDRD)
18512 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18513 else
18514 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18515 }
18516 break;
18517
18518 case POST_INC:
18519 if (emit)
18520 {
18521 if (TARGET_LDRD)
18522 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18523 else
18524 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18525 }
18526 break;
18527
18528 case POST_DEC:
18529 gcc_assert (TARGET_LDRD);
18530 if (emit)
18531 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18532 break;
18533
18534 case PRE_MODIFY:
18535 case POST_MODIFY:
18536 otherops[0] = operands[1];
18537 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18538 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18539
18540 /* IWMMXT allows offsets larger than ldrd can handle,
18541 fix these up with a pair of ldr. */
18542 if (!TARGET_THUMB2
18543 && CONST_INT_P (otherops[2])
18544 && (INTVAL(otherops[2]) <= -256
18545 || INTVAL(otherops[2]) >= 256))
18546 {
18547 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18548 {
18549 if (emit)
18550 {
18551 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18552 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18553 }
18554 if (count)
18555 *count = 2;
18556 }
18557 else
18558 {
18559 if (emit)
18560 {
18561 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18562 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18563 }
18564 if (count)
18565 *count = 2;
18566 }
18567 }
18568 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18569 {
18570 if (emit)
18571 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18572 }
18573 else
18574 {
18575 if (emit)
18576 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18577 }
18578 break;
18579
18580 case PLUS:
18581 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18582 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18583 {
18584 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18585 {
18586 case -8:
18587 if (emit)
18588 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18589 return "";
18590
18591 case -4:
18592 if (TARGET_THUMB2)
18593 break;
18594 if (emit)
18595 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18596 return "";
18597
18598 case 4:
18599 if (TARGET_THUMB2)
18600 break;
18601 if (emit)
18602 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18603 return "";
18604 }
18605 }
18606 if (TARGET_LDRD
18607 && (REG_P (otherops[2])
18608 || TARGET_THUMB2
18609 || (CONST_INT_P (otherops[2])
18610 && INTVAL (otherops[2]) > -256
18611 && INTVAL (otherops[2]) < 256)))
18612 {
18613 otherops[0] = operands[1];
18614 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18615 if (emit)
18616 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18617 return "";
18618 }
18619 /* Fall through */
18620
18621 default:
18622 otherops[0] = adjust_address (operands[0], SImode, 4);
18623 otherops[1] = operands[1];
18624 if (emit)
18625 {
18626 output_asm_insn ("str%?\t%1, %0", operands);
18627 output_asm_insn ("str%?\t%H1, %0", otherops);
18628 }
18629 if (count)
18630 *count = 2;
18631 }
18632 }
18633
18634 return "";
18635 }
18636
18637 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18638 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18639
18640 const char *
18641 output_move_quad (rtx *operands)
18642 {
18643 if (REG_P (operands[0]))
18644 {
18645 /* Load, or reg->reg move. */
18646
18647 if (MEM_P (operands[1]))
18648 {
18649 switch (GET_CODE (XEXP (operands[1], 0)))
18650 {
18651 case REG:
18652 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18653 break;
18654
18655 case LABEL_REF:
18656 case CONST:
18657 output_asm_insn ("adr%?\t%0, %1", operands);
18658 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18659 break;
18660
18661 default:
18662 gcc_unreachable ();
18663 }
18664 }
18665 else
18666 {
18667 rtx ops[2];
18668 int dest, src, i;
18669
18670 gcc_assert (REG_P (operands[1]));
18671
18672 dest = REGNO (operands[0]);
18673 src = REGNO (operands[1]);
18674
18675 /* This seems pretty dumb, but hopefully GCC won't try to do it
18676 very often. */
18677 if (dest < src)
18678 for (i = 0; i < 4; i++)
18679 {
18680 ops[0] = gen_rtx_REG (SImode, dest + i);
18681 ops[1] = gen_rtx_REG (SImode, src + i);
18682 output_asm_insn ("mov%?\t%0, %1", ops);
18683 }
18684 else
18685 for (i = 3; i >= 0; i--)
18686 {
18687 ops[0] = gen_rtx_REG (SImode, dest + i);
18688 ops[1] = gen_rtx_REG (SImode, src + i);
18689 output_asm_insn ("mov%?\t%0, %1", ops);
18690 }
18691 }
18692 }
18693 else
18694 {
18695 gcc_assert (MEM_P (operands[0]));
18696 gcc_assert (REG_P (operands[1]));
18697 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18698
18699 switch (GET_CODE (XEXP (operands[0], 0)))
18700 {
18701 case REG:
18702 output_asm_insn ("stm%?\t%m0, %M1", operands);
18703 break;
18704
18705 default:
18706 gcc_unreachable ();
18707 }
18708 }
18709
18710 return "";
18711 }
18712
18713 /* Output a VFP load or store instruction. */
18714
18715 const char *
18716 output_move_vfp (rtx *operands)
18717 {
18718 rtx reg, mem, addr, ops[2];
18719 int load = REG_P (operands[0]);
18720 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18721 int sp = (!TARGET_VFP_FP16INST
18722 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18723 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18724 const char *templ;
18725 char buff[50];
18726 machine_mode mode;
18727
18728 reg = operands[!load];
18729 mem = operands[load];
18730
18731 mode = GET_MODE (reg);
18732
18733 gcc_assert (REG_P (reg));
18734 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18735 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18736 || mode == SFmode
18737 || mode == DFmode
18738 || mode == HImode
18739 || mode == SImode
18740 || mode == DImode
18741 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18742 gcc_assert (MEM_P (mem));
18743
18744 addr = XEXP (mem, 0);
18745
18746 switch (GET_CODE (addr))
18747 {
18748 case PRE_DEC:
18749 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18750 ops[0] = XEXP (addr, 0);
18751 ops[1] = reg;
18752 break;
18753
18754 case POST_INC:
18755 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18756 ops[0] = XEXP (addr, 0);
18757 ops[1] = reg;
18758 break;
18759
18760 default:
18761 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18762 ops[0] = reg;
18763 ops[1] = mem;
18764 break;
18765 }
18766
18767 sprintf (buff, templ,
18768 load ? "ld" : "st",
18769 dp ? "64" : sp ? "32" : "16",
18770 dp ? "P" : "",
18771 integer_p ? "\t%@ int" : "");
18772 output_asm_insn (buff, ops);
18773
18774 return "";
18775 }
18776
18777 /* Output a Neon double-word or quad-word load or store, or a load
18778 or store for larger structure modes.
18779
18780 WARNING: The ordering of elements is weird in big-endian mode,
18781 because the EABI requires that vectors stored in memory appear
18782 as though they were stored by a VSTM, as required by the EABI.
18783 GCC RTL defines element ordering based on in-memory order.
18784 This can be different from the architectural ordering of elements
18785 within a NEON register. The intrinsics defined in arm_neon.h use the
18786 NEON register element ordering, not the GCC RTL element ordering.
18787
18788 For example, the in-memory ordering of a big-endian a quadword
18789 vector with 16-bit elements when stored from register pair {d0,d1}
18790 will be (lowest address first, d0[N] is NEON register element N):
18791
18792 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18793
18794 When necessary, quadword registers (dN, dN+1) are moved to ARM
18795 registers from rN in the order:
18796
18797 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18798
18799 So that STM/LDM can be used on vectors in ARM registers, and the
18800 same memory layout will result as if VSTM/VLDM were used.
18801
18802 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18803 possible, which allows use of appropriate alignment tags.
18804 Note that the choice of "64" is independent of the actual vector
18805 element size; this size simply ensures that the behavior is
18806 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18807
18808 Due to limitations of those instructions, use of VST1.64/VLD1.64
18809 is not possible if:
18810 - the address contains PRE_DEC, or
18811 - the mode refers to more than 4 double-word registers
18812
18813 In those cases, it would be possible to replace VSTM/VLDM by a
18814 sequence of instructions; this is not currently implemented since
18815 this is not certain to actually improve performance. */
18816
18817 const char *
18818 output_move_neon (rtx *operands)
18819 {
18820 rtx reg, mem, addr, ops[2];
18821 int regno, nregs, load = REG_P (operands[0]);
18822 const char *templ;
18823 char buff[50];
18824 machine_mode mode;
18825
18826 reg = operands[!load];
18827 mem = operands[load];
18828
18829 mode = GET_MODE (reg);
18830
18831 gcc_assert (REG_P (reg));
18832 regno = REGNO (reg);
18833 nregs = REG_NREGS (reg) / 2;
18834 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18835 || NEON_REGNO_OK_FOR_QUAD (regno));
18836 gcc_assert (VALID_NEON_DREG_MODE (mode)
18837 || VALID_NEON_QREG_MODE (mode)
18838 || VALID_NEON_STRUCT_MODE (mode));
18839 gcc_assert (MEM_P (mem));
18840
18841 addr = XEXP (mem, 0);
18842
18843 /* Strip off const from addresses like (const (plus (...))). */
18844 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18845 addr = XEXP (addr, 0);
18846
18847 switch (GET_CODE (addr))
18848 {
18849 case POST_INC:
18850 /* We have to use vldm / vstm for too-large modes. */
18851 if (nregs > 4)
18852 {
18853 templ = "v%smia%%?\t%%0!, %%h1";
18854 ops[0] = XEXP (addr, 0);
18855 }
18856 else
18857 {
18858 templ = "v%s1.64\t%%h1, %%A0";
18859 ops[0] = mem;
18860 }
18861 ops[1] = reg;
18862 break;
18863
18864 case PRE_DEC:
18865 /* We have to use vldm / vstm in this case, since there is no
18866 pre-decrement form of the vld1 / vst1 instructions. */
18867 templ = "v%smdb%%?\t%%0!, %%h1";
18868 ops[0] = XEXP (addr, 0);
18869 ops[1] = reg;
18870 break;
18871
18872 case POST_MODIFY:
18873 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18874 gcc_unreachable ();
18875
18876 case REG:
18877 /* We have to use vldm / vstm for too-large modes. */
18878 if (nregs > 1)
18879 {
18880 if (nregs > 4)
18881 templ = "v%smia%%?\t%%m0, %%h1";
18882 else
18883 templ = "v%s1.64\t%%h1, %%A0";
18884
18885 ops[0] = mem;
18886 ops[1] = reg;
18887 break;
18888 }
18889 /* Fall through. */
18890 case LABEL_REF:
18891 case PLUS:
18892 {
18893 int i;
18894 int overlap = -1;
18895 for (i = 0; i < nregs; i++)
18896 {
18897 /* We're only using DImode here because it's a convenient size. */
18898 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18899 ops[1] = adjust_address (mem, DImode, 8 * i);
18900 if (reg_overlap_mentioned_p (ops[0], mem))
18901 {
18902 gcc_assert (overlap == -1);
18903 overlap = i;
18904 }
18905 else
18906 {
18907 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18908 output_asm_insn (buff, ops);
18909 }
18910 }
18911 if (overlap != -1)
18912 {
18913 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18914 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18915 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18916 output_asm_insn (buff, ops);
18917 }
18918
18919 return "";
18920 }
18921
18922 default:
18923 gcc_unreachable ();
18924 }
18925
18926 sprintf (buff, templ, load ? "ld" : "st");
18927 output_asm_insn (buff, ops);
18928
18929 return "";
18930 }
18931
18932 /* Compute and return the length of neon_mov<mode>, where <mode> is
18933 one of VSTRUCT modes: EI, OI, CI or XI. */
18934 int
18935 arm_attr_length_move_neon (rtx_insn *insn)
18936 {
18937 rtx reg, mem, addr;
18938 int load;
18939 machine_mode mode;
18940
18941 extract_insn_cached (insn);
18942
18943 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18944 {
18945 mode = GET_MODE (recog_data.operand[0]);
18946 switch (mode)
18947 {
18948 case E_EImode:
18949 case E_OImode:
18950 return 8;
18951 case E_CImode:
18952 return 12;
18953 case E_XImode:
18954 return 16;
18955 default:
18956 gcc_unreachable ();
18957 }
18958 }
18959
18960 load = REG_P (recog_data.operand[0]);
18961 reg = recog_data.operand[!load];
18962 mem = recog_data.operand[load];
18963
18964 gcc_assert (MEM_P (mem));
18965
18966 addr = XEXP (mem, 0);
18967
18968 /* Strip off const from addresses like (const (plus (...))). */
18969 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18970 addr = XEXP (addr, 0);
18971
18972 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18973 {
18974 int insns = REG_NREGS (reg) / 2;
18975 return insns * 4;
18976 }
18977 else
18978 return 4;
18979 }
18980
18981 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18982 return zero. */
18983
18984 int
18985 arm_address_offset_is_imm (rtx_insn *insn)
18986 {
18987 rtx mem, addr;
18988
18989 extract_insn_cached (insn);
18990
18991 if (REG_P (recog_data.operand[0]))
18992 return 0;
18993
18994 mem = recog_data.operand[0];
18995
18996 gcc_assert (MEM_P (mem));
18997
18998 addr = XEXP (mem, 0);
18999
19000 if (REG_P (addr)
19001 || (GET_CODE (addr) == PLUS
19002 && REG_P (XEXP (addr, 0))
19003 && CONST_INT_P (XEXP (addr, 1))))
19004 return 1;
19005 else
19006 return 0;
19007 }
19008
19009 /* Output an ADD r, s, #n where n may be too big for one instruction.
19010 If adding zero to one register, output nothing. */
19011 const char *
19012 output_add_immediate (rtx *operands)
19013 {
19014 HOST_WIDE_INT n = INTVAL (operands[2]);
19015
19016 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19017 {
19018 if (n < 0)
19019 output_multi_immediate (operands,
19020 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19021 -n);
19022 else
19023 output_multi_immediate (operands,
19024 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19025 n);
19026 }
19027
19028 return "";
19029 }
19030
19031 /* Output a multiple immediate operation.
19032 OPERANDS is the vector of operands referred to in the output patterns.
19033 INSTR1 is the output pattern to use for the first constant.
19034 INSTR2 is the output pattern to use for subsequent constants.
19035 IMMED_OP is the index of the constant slot in OPERANDS.
19036 N is the constant value. */
19037 static const char *
19038 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19039 int immed_op, HOST_WIDE_INT n)
19040 {
19041 #if HOST_BITS_PER_WIDE_INT > 32
19042 n &= 0xffffffff;
19043 #endif
19044
19045 if (n == 0)
19046 {
19047 /* Quick and easy output. */
19048 operands[immed_op] = const0_rtx;
19049 output_asm_insn (instr1, operands);
19050 }
19051 else
19052 {
19053 int i;
19054 const char * instr = instr1;
19055
19056 /* Note that n is never zero here (which would give no output). */
19057 for (i = 0; i < 32; i += 2)
19058 {
19059 if (n & (3 << i))
19060 {
19061 operands[immed_op] = GEN_INT (n & (255 << i));
19062 output_asm_insn (instr, operands);
19063 instr = instr2;
19064 i += 6;
19065 }
19066 }
19067 }
19068
19069 return "";
19070 }
19071
19072 /* Return the name of a shifter operation. */
19073 static const char *
19074 arm_shift_nmem(enum rtx_code code)
19075 {
19076 switch (code)
19077 {
19078 case ASHIFT:
19079 return ARM_LSL_NAME;
19080
19081 case ASHIFTRT:
19082 return "asr";
19083
19084 case LSHIFTRT:
19085 return "lsr";
19086
19087 case ROTATERT:
19088 return "ror";
19089
19090 default:
19091 abort();
19092 }
19093 }
19094
19095 /* Return the appropriate ARM instruction for the operation code.
19096 The returned result should not be overwritten. OP is the rtx of the
19097 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19098 was shifted. */
19099 const char *
19100 arithmetic_instr (rtx op, int shift_first_arg)
19101 {
19102 switch (GET_CODE (op))
19103 {
19104 case PLUS:
19105 return "add";
19106
19107 case MINUS:
19108 return shift_first_arg ? "rsb" : "sub";
19109
19110 case IOR:
19111 return "orr";
19112
19113 case XOR:
19114 return "eor";
19115
19116 case AND:
19117 return "and";
19118
19119 case ASHIFT:
19120 case ASHIFTRT:
19121 case LSHIFTRT:
19122 case ROTATERT:
19123 return arm_shift_nmem(GET_CODE(op));
19124
19125 default:
19126 gcc_unreachable ();
19127 }
19128 }
19129
19130 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19131 for the operation code. The returned result should not be overwritten.
19132 OP is the rtx code of the shift.
19133 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19134 shift. */
19135 static const char *
19136 shift_op (rtx op, HOST_WIDE_INT *amountp)
19137 {
19138 const char * mnem;
19139 enum rtx_code code = GET_CODE (op);
19140
19141 switch (code)
19142 {
19143 case ROTATE:
19144 if (!CONST_INT_P (XEXP (op, 1)))
19145 {
19146 output_operand_lossage ("invalid shift operand");
19147 return NULL;
19148 }
19149
19150 code = ROTATERT;
19151 *amountp = 32 - INTVAL (XEXP (op, 1));
19152 mnem = "ror";
19153 break;
19154
19155 case ASHIFT:
19156 case ASHIFTRT:
19157 case LSHIFTRT:
19158 case ROTATERT:
19159 mnem = arm_shift_nmem(code);
19160 if (CONST_INT_P (XEXP (op, 1)))
19161 {
19162 *amountp = INTVAL (XEXP (op, 1));
19163 }
19164 else if (REG_P (XEXP (op, 1)))
19165 {
19166 *amountp = -1;
19167 return mnem;
19168 }
19169 else
19170 {
19171 output_operand_lossage ("invalid shift operand");
19172 return NULL;
19173 }
19174 break;
19175
19176 case MULT:
19177 /* We never have to worry about the amount being other than a
19178 power of 2, since this case can never be reloaded from a reg. */
19179 if (!CONST_INT_P (XEXP (op, 1)))
19180 {
19181 output_operand_lossage ("invalid shift operand");
19182 return NULL;
19183 }
19184
19185 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19186
19187 /* Amount must be a power of two. */
19188 if (*amountp & (*amountp - 1))
19189 {
19190 output_operand_lossage ("invalid shift operand");
19191 return NULL;
19192 }
19193
19194 *amountp = exact_log2 (*amountp);
19195 gcc_assert (IN_RANGE (*amountp, 0, 31));
19196 return ARM_LSL_NAME;
19197
19198 default:
19199 output_operand_lossage ("invalid shift operand");
19200 return NULL;
19201 }
19202
19203 /* This is not 100% correct, but follows from the desire to merge
19204 multiplication by a power of 2 with the recognizer for a
19205 shift. >=32 is not a valid shift for "lsl", so we must try and
19206 output a shift that produces the correct arithmetical result.
19207 Using lsr #32 is identical except for the fact that the carry bit
19208 is not set correctly if we set the flags; but we never use the
19209 carry bit from such an operation, so we can ignore that. */
19210 if (code == ROTATERT)
19211 /* Rotate is just modulo 32. */
19212 *amountp &= 31;
19213 else if (*amountp != (*amountp & 31))
19214 {
19215 if (code == ASHIFT)
19216 mnem = "lsr";
19217 *amountp = 32;
19218 }
19219
19220 /* Shifts of 0 are no-ops. */
19221 if (*amountp == 0)
19222 return NULL;
19223
19224 return mnem;
19225 }
19226
19227 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19228 because /bin/as is horribly restrictive. The judgement about
19229 whether or not each character is 'printable' (and can be output as
19230 is) or not (and must be printed with an octal escape) must be made
19231 with reference to the *host* character set -- the situation is
19232 similar to that discussed in the comments above pp_c_char in
19233 c-pretty-print.c. */
19234
19235 #define MAX_ASCII_LEN 51
19236
19237 void
19238 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19239 {
19240 int i;
19241 int len_so_far = 0;
19242
19243 fputs ("\t.ascii\t\"", stream);
19244
19245 for (i = 0; i < len; i++)
19246 {
19247 int c = p[i];
19248
19249 if (len_so_far >= MAX_ASCII_LEN)
19250 {
19251 fputs ("\"\n\t.ascii\t\"", stream);
19252 len_so_far = 0;
19253 }
19254
19255 if (ISPRINT (c))
19256 {
19257 if (c == '\\' || c == '\"')
19258 {
19259 putc ('\\', stream);
19260 len_so_far++;
19261 }
19262 putc (c, stream);
19263 len_so_far++;
19264 }
19265 else
19266 {
19267 fprintf (stream, "\\%03o", c);
19268 len_so_far += 4;
19269 }
19270 }
19271
19272 fputs ("\"\n", stream);
19273 }
19274 \f
19275 /* Whether a register is callee saved or not. This is necessary because high
19276 registers are marked as caller saved when optimizing for size on Thumb-1
19277 targets despite being callee saved in order to avoid using them. */
19278 #define callee_saved_reg_p(reg) \
19279 (!call_used_regs[reg] \
19280 || (TARGET_THUMB1 && optimize_size \
19281 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19282
19283 /* Compute the register save mask for registers 0 through 12
19284 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19285
19286 static unsigned long
19287 arm_compute_save_reg0_reg12_mask (void)
19288 {
19289 unsigned long func_type = arm_current_func_type ();
19290 unsigned long save_reg_mask = 0;
19291 unsigned int reg;
19292
19293 if (IS_INTERRUPT (func_type))
19294 {
19295 unsigned int max_reg;
19296 /* Interrupt functions must not corrupt any registers,
19297 even call clobbered ones. If this is a leaf function
19298 we can just examine the registers used by the RTL, but
19299 otherwise we have to assume that whatever function is
19300 called might clobber anything, and so we have to save
19301 all the call-clobbered registers as well. */
19302 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19303 /* FIQ handlers have registers r8 - r12 banked, so
19304 we only need to check r0 - r7, Normal ISRs only
19305 bank r14 and r15, so we must check up to r12.
19306 r13 is the stack pointer which is always preserved,
19307 so we do not need to consider it here. */
19308 max_reg = 7;
19309 else
19310 max_reg = 12;
19311
19312 for (reg = 0; reg <= max_reg; reg++)
19313 if (df_regs_ever_live_p (reg)
19314 || (! crtl->is_leaf && call_used_regs[reg]))
19315 save_reg_mask |= (1 << reg);
19316
19317 /* Also save the pic base register if necessary. */
19318 if (flag_pic
19319 && !TARGET_SINGLE_PIC_BASE
19320 && arm_pic_register != INVALID_REGNUM
19321 && crtl->uses_pic_offset_table)
19322 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19323 }
19324 else if (IS_VOLATILE(func_type))
19325 {
19326 /* For noreturn functions we historically omitted register saves
19327 altogether. However this really messes up debugging. As a
19328 compromise save just the frame pointers. Combined with the link
19329 register saved elsewhere this should be sufficient to get
19330 a backtrace. */
19331 if (frame_pointer_needed)
19332 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19333 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19334 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19335 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19336 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19337 }
19338 else
19339 {
19340 /* In the normal case we only need to save those registers
19341 which are call saved and which are used by this function. */
19342 for (reg = 0; reg <= 11; reg++)
19343 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19344 save_reg_mask |= (1 << reg);
19345
19346 /* Handle the frame pointer as a special case. */
19347 if (frame_pointer_needed)
19348 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19349
19350 /* If we aren't loading the PIC register,
19351 don't stack it even though it may be live. */
19352 if (flag_pic
19353 && !TARGET_SINGLE_PIC_BASE
19354 && arm_pic_register != INVALID_REGNUM
19355 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19356 || crtl->uses_pic_offset_table))
19357 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19358
19359 /* The prologue will copy SP into R0, so save it. */
19360 if (IS_STACKALIGN (func_type))
19361 save_reg_mask |= 1;
19362 }
19363
19364 /* Save registers so the exception handler can modify them. */
19365 if (crtl->calls_eh_return)
19366 {
19367 unsigned int i;
19368
19369 for (i = 0; ; i++)
19370 {
19371 reg = EH_RETURN_DATA_REGNO (i);
19372 if (reg == INVALID_REGNUM)
19373 break;
19374 save_reg_mask |= 1 << reg;
19375 }
19376 }
19377
19378 return save_reg_mask;
19379 }
19380
19381 /* Return true if r3 is live at the start of the function. */
19382
19383 static bool
19384 arm_r3_live_at_start_p (void)
19385 {
19386 /* Just look at cfg info, which is still close enough to correct at this
19387 point. This gives false positives for broken functions that might use
19388 uninitialized data that happens to be allocated in r3, but who cares? */
19389 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19390 }
19391
19392 /* Compute the number of bytes used to store the static chain register on the
19393 stack, above the stack frame. We need to know this accurately to get the
19394 alignment of the rest of the stack frame correct. */
19395
19396 static int
19397 arm_compute_static_chain_stack_bytes (void)
19398 {
19399 /* See the defining assertion in arm_expand_prologue. */
19400 if (IS_NESTED (arm_current_func_type ())
19401 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19402 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19403 || flag_stack_clash_protection)
19404 && !df_regs_ever_live_p (LR_REGNUM)))
19405 && arm_r3_live_at_start_p ()
19406 && crtl->args.pretend_args_size == 0)
19407 return 4;
19408
19409 return 0;
19410 }
19411
19412 /* Compute a bit mask of which core registers need to be
19413 saved on the stack for the current function.
19414 This is used by arm_compute_frame_layout, which may add extra registers. */
19415
19416 static unsigned long
19417 arm_compute_save_core_reg_mask (void)
19418 {
19419 unsigned int save_reg_mask = 0;
19420 unsigned long func_type = arm_current_func_type ();
19421 unsigned int reg;
19422
19423 if (IS_NAKED (func_type))
19424 /* This should never really happen. */
19425 return 0;
19426
19427 /* If we are creating a stack frame, then we must save the frame pointer,
19428 IP (which will hold the old stack pointer), LR and the PC. */
19429 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19430 save_reg_mask |=
19431 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19432 | (1 << IP_REGNUM)
19433 | (1 << LR_REGNUM)
19434 | (1 << PC_REGNUM);
19435
19436 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19437
19438 /* Decide if we need to save the link register.
19439 Interrupt routines have their own banked link register,
19440 so they never need to save it.
19441 Otherwise if we do not use the link register we do not need to save
19442 it. If we are pushing other registers onto the stack however, we
19443 can save an instruction in the epilogue by pushing the link register
19444 now and then popping it back into the PC. This incurs extra memory
19445 accesses though, so we only do it when optimizing for size, and only
19446 if we know that we will not need a fancy return sequence. */
19447 if (df_regs_ever_live_p (LR_REGNUM)
19448 || (save_reg_mask
19449 && optimize_size
19450 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19451 && !crtl->tail_call_emit
19452 && !crtl->calls_eh_return))
19453 save_reg_mask |= 1 << LR_REGNUM;
19454
19455 if (cfun->machine->lr_save_eliminated)
19456 save_reg_mask &= ~ (1 << LR_REGNUM);
19457
19458 if (TARGET_REALLY_IWMMXT
19459 && ((bit_count (save_reg_mask)
19460 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19461 arm_compute_static_chain_stack_bytes())
19462 ) % 2) != 0)
19463 {
19464 /* The total number of registers that are going to be pushed
19465 onto the stack is odd. We need to ensure that the stack
19466 is 64-bit aligned before we start to save iWMMXt registers,
19467 and also before we start to create locals. (A local variable
19468 might be a double or long long which we will load/store using
19469 an iWMMXt instruction). Therefore we need to push another
19470 ARM register, so that the stack will be 64-bit aligned. We
19471 try to avoid using the arg registers (r0 -r3) as they might be
19472 used to pass values in a tail call. */
19473 for (reg = 4; reg <= 12; reg++)
19474 if ((save_reg_mask & (1 << reg)) == 0)
19475 break;
19476
19477 if (reg <= 12)
19478 save_reg_mask |= (1 << reg);
19479 else
19480 {
19481 cfun->machine->sibcall_blocked = 1;
19482 save_reg_mask |= (1 << 3);
19483 }
19484 }
19485
19486 /* We may need to push an additional register for use initializing the
19487 PIC base register. */
19488 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19489 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19490 {
19491 reg = thumb_find_work_register (1 << 4);
19492 if (!call_used_regs[reg])
19493 save_reg_mask |= (1 << reg);
19494 }
19495
19496 return save_reg_mask;
19497 }
19498
19499 /* Compute a bit mask of which core registers need to be
19500 saved on the stack for the current function. */
19501 static unsigned long
19502 thumb1_compute_save_core_reg_mask (void)
19503 {
19504 unsigned long mask;
19505 unsigned reg;
19506
19507 mask = 0;
19508 for (reg = 0; reg < 12; reg ++)
19509 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19510 mask |= 1 << reg;
19511
19512 /* Handle the frame pointer as a special case. */
19513 if (frame_pointer_needed)
19514 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19515
19516 if (flag_pic
19517 && !TARGET_SINGLE_PIC_BASE
19518 && arm_pic_register != INVALID_REGNUM
19519 && crtl->uses_pic_offset_table)
19520 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19521
19522 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19523 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19524 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19525
19526 /* LR will also be pushed if any lo regs are pushed. */
19527 if (mask & 0xff || thumb_force_lr_save ())
19528 mask |= (1 << LR_REGNUM);
19529
19530 /* Make sure we have a low work register if we need one.
19531 We will need one if we are going to push a high register,
19532 but we are not currently intending to push a low register. */
19533 if ((mask & 0xff) == 0
19534 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19535 {
19536 /* Use thumb_find_work_register to choose which register
19537 we will use. If the register is live then we will
19538 have to push it. Use LAST_LO_REGNUM as our fallback
19539 choice for the register to select. */
19540 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19541 /* Make sure the register returned by thumb_find_work_register is
19542 not part of the return value. */
19543 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19544 reg = LAST_LO_REGNUM;
19545
19546 if (callee_saved_reg_p (reg))
19547 mask |= 1 << reg;
19548 }
19549
19550 /* The 504 below is 8 bytes less than 512 because there are two possible
19551 alignment words. We can't tell here if they will be present or not so we
19552 have to play it safe and assume that they are. */
19553 if ((CALLER_INTERWORKING_SLOT_SIZE +
19554 ROUND_UP_WORD (get_frame_size ()) +
19555 crtl->outgoing_args_size) >= 504)
19556 {
19557 /* This is the same as the code in thumb1_expand_prologue() which
19558 determines which register to use for stack decrement. */
19559 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19560 if (mask & (1 << reg))
19561 break;
19562
19563 if (reg > LAST_LO_REGNUM)
19564 {
19565 /* Make sure we have a register available for stack decrement. */
19566 mask |= 1 << LAST_LO_REGNUM;
19567 }
19568 }
19569
19570 return mask;
19571 }
19572
19573
19574 /* Return the number of bytes required to save VFP registers. */
19575 static int
19576 arm_get_vfp_saved_size (void)
19577 {
19578 unsigned int regno;
19579 int count;
19580 int saved;
19581
19582 saved = 0;
19583 /* Space for saved VFP registers. */
19584 if (TARGET_HARD_FLOAT)
19585 {
19586 count = 0;
19587 for (regno = FIRST_VFP_REGNUM;
19588 regno < LAST_VFP_REGNUM;
19589 regno += 2)
19590 {
19591 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19592 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19593 {
19594 if (count > 0)
19595 {
19596 /* Workaround ARM10 VFPr1 bug. */
19597 if (count == 2 && !arm_arch6)
19598 count++;
19599 saved += count * 8;
19600 }
19601 count = 0;
19602 }
19603 else
19604 count++;
19605 }
19606 if (count > 0)
19607 {
19608 if (count == 2 && !arm_arch6)
19609 count++;
19610 saved += count * 8;
19611 }
19612 }
19613 return saved;
19614 }
19615
19616
19617 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19618 everything bar the final return instruction. If simple_return is true,
19619 then do not output epilogue, because it has already been emitted in RTL.
19620
19621 Note: do not forget to update length attribute of corresponding insn pattern
19622 when changing assembly output (eg. length attribute of
19623 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19624 register clearing sequences). */
19625 const char *
19626 output_return_instruction (rtx operand, bool really_return, bool reverse,
19627 bool simple_return)
19628 {
19629 char conditional[10];
19630 char instr[100];
19631 unsigned reg;
19632 unsigned long live_regs_mask;
19633 unsigned long func_type;
19634 arm_stack_offsets *offsets;
19635
19636 func_type = arm_current_func_type ();
19637
19638 if (IS_NAKED (func_type))
19639 return "";
19640
19641 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19642 {
19643 /* If this function was declared non-returning, and we have
19644 found a tail call, then we have to trust that the called
19645 function won't return. */
19646 if (really_return)
19647 {
19648 rtx ops[2];
19649
19650 /* Otherwise, trap an attempted return by aborting. */
19651 ops[0] = operand;
19652 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19653 : "abort");
19654 assemble_external_libcall (ops[1]);
19655 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19656 }
19657
19658 return "";
19659 }
19660
19661 gcc_assert (!cfun->calls_alloca || really_return);
19662
19663 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19664
19665 cfun->machine->return_used_this_function = 1;
19666
19667 offsets = arm_get_frame_offsets ();
19668 live_regs_mask = offsets->saved_regs_mask;
19669
19670 if (!simple_return && live_regs_mask)
19671 {
19672 const char * return_reg;
19673
19674 /* If we do not have any special requirements for function exit
19675 (e.g. interworking) then we can load the return address
19676 directly into the PC. Otherwise we must load it into LR. */
19677 if (really_return
19678 && !IS_CMSE_ENTRY (func_type)
19679 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19680 return_reg = reg_names[PC_REGNUM];
19681 else
19682 return_reg = reg_names[LR_REGNUM];
19683
19684 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19685 {
19686 /* There are three possible reasons for the IP register
19687 being saved. 1) a stack frame was created, in which case
19688 IP contains the old stack pointer, or 2) an ISR routine
19689 corrupted it, or 3) it was saved to align the stack on
19690 iWMMXt. In case 1, restore IP into SP, otherwise just
19691 restore IP. */
19692 if (frame_pointer_needed)
19693 {
19694 live_regs_mask &= ~ (1 << IP_REGNUM);
19695 live_regs_mask |= (1 << SP_REGNUM);
19696 }
19697 else
19698 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19699 }
19700
19701 /* On some ARM architectures it is faster to use LDR rather than
19702 LDM to load a single register. On other architectures, the
19703 cost is the same. In 26 bit mode, or for exception handlers,
19704 we have to use LDM to load the PC so that the CPSR is also
19705 restored. */
19706 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19707 if (live_regs_mask == (1U << reg))
19708 break;
19709
19710 if (reg <= LAST_ARM_REGNUM
19711 && (reg != LR_REGNUM
19712 || ! really_return
19713 || ! IS_INTERRUPT (func_type)))
19714 {
19715 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19716 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19717 }
19718 else
19719 {
19720 char *p;
19721 int first = 1;
19722
19723 /* Generate the load multiple instruction to restore the
19724 registers. Note we can get here, even if
19725 frame_pointer_needed is true, but only if sp already
19726 points to the base of the saved core registers. */
19727 if (live_regs_mask & (1 << SP_REGNUM))
19728 {
19729 unsigned HOST_WIDE_INT stack_adjust;
19730
19731 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19732 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19733
19734 if (stack_adjust && arm_arch5 && TARGET_ARM)
19735 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19736 else
19737 {
19738 /* If we can't use ldmib (SA110 bug),
19739 then try to pop r3 instead. */
19740 if (stack_adjust)
19741 live_regs_mask |= 1 << 3;
19742
19743 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19744 }
19745 }
19746 /* For interrupt returns we have to use an LDM rather than
19747 a POP so that we can use the exception return variant. */
19748 else if (IS_INTERRUPT (func_type))
19749 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19750 else
19751 sprintf (instr, "pop%s\t{", conditional);
19752
19753 p = instr + strlen (instr);
19754
19755 for (reg = 0; reg <= SP_REGNUM; reg++)
19756 if (live_regs_mask & (1 << reg))
19757 {
19758 int l = strlen (reg_names[reg]);
19759
19760 if (first)
19761 first = 0;
19762 else
19763 {
19764 memcpy (p, ", ", 2);
19765 p += 2;
19766 }
19767
19768 memcpy (p, "%|", 2);
19769 memcpy (p + 2, reg_names[reg], l);
19770 p += l + 2;
19771 }
19772
19773 if (live_regs_mask & (1 << LR_REGNUM))
19774 {
19775 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19776 /* If returning from an interrupt, restore the CPSR. */
19777 if (IS_INTERRUPT (func_type))
19778 strcat (p, "^");
19779 }
19780 else
19781 strcpy (p, "}");
19782 }
19783
19784 output_asm_insn (instr, & operand);
19785
19786 /* See if we need to generate an extra instruction to
19787 perform the actual function return. */
19788 if (really_return
19789 && func_type != ARM_FT_INTERWORKED
19790 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19791 {
19792 /* The return has already been handled
19793 by loading the LR into the PC. */
19794 return "";
19795 }
19796 }
19797
19798 if (really_return)
19799 {
19800 switch ((int) ARM_FUNC_TYPE (func_type))
19801 {
19802 case ARM_FT_ISR:
19803 case ARM_FT_FIQ:
19804 /* ??? This is wrong for unified assembly syntax. */
19805 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19806 break;
19807
19808 case ARM_FT_INTERWORKED:
19809 gcc_assert (arm_arch5 || arm_arch4t);
19810 sprintf (instr, "bx%s\t%%|lr", conditional);
19811 break;
19812
19813 case ARM_FT_EXCEPTION:
19814 /* ??? This is wrong for unified assembly syntax. */
19815 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19816 break;
19817
19818 default:
19819 if (IS_CMSE_ENTRY (func_type))
19820 {
19821 /* Check if we have to clear the 'GE bits' which is only used if
19822 parallel add and subtraction instructions are available. */
19823 if (TARGET_INT_SIMD)
19824 snprintf (instr, sizeof (instr),
19825 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19826 else
19827 snprintf (instr, sizeof (instr),
19828 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19829
19830 output_asm_insn (instr, & operand);
19831 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19832 {
19833 /* Clear the cumulative exception-status bits (0-4,7) and the
19834 condition code bits (28-31) of the FPSCR. We need to
19835 remember to clear the first scratch register used (IP) and
19836 save and restore the second (r4). */
19837 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19838 output_asm_insn (instr, & operand);
19839 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19840 output_asm_insn (instr, & operand);
19841 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19842 output_asm_insn (instr, & operand);
19843 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19844 output_asm_insn (instr, & operand);
19845 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19846 output_asm_insn (instr, & operand);
19847 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19848 output_asm_insn (instr, & operand);
19849 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19850 output_asm_insn (instr, & operand);
19851 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19852 output_asm_insn (instr, & operand);
19853 }
19854 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19855 }
19856 /* Use bx if it's available. */
19857 else if (arm_arch5 || arm_arch4t)
19858 sprintf (instr, "bx%s\t%%|lr", conditional);
19859 else
19860 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19861 break;
19862 }
19863
19864 output_asm_insn (instr, & operand);
19865 }
19866
19867 return "";
19868 }
19869
19870 /* Output in FILE asm statements needed to declare the NAME of the function
19871 defined by its DECL node. */
19872
19873 void
19874 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19875 {
19876 size_t cmse_name_len;
19877 char *cmse_name = 0;
19878 char cmse_prefix[] = "__acle_se_";
19879
19880 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19881 extra function label for each function with the 'cmse_nonsecure_entry'
19882 attribute. This extra function label should be prepended with
19883 '__acle_se_', telling the linker that it needs to create secure gateway
19884 veneers for this function. */
19885 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19886 DECL_ATTRIBUTES (decl)))
19887 {
19888 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19889 cmse_name = XALLOCAVEC (char, cmse_name_len);
19890 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19891 targetm.asm_out.globalize_label (file, cmse_name);
19892
19893 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19894 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19895 }
19896
19897 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19898 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19899 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19900 ASM_OUTPUT_LABEL (file, name);
19901
19902 if (cmse_name)
19903 ASM_OUTPUT_LABEL (file, cmse_name);
19904
19905 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19906 }
19907
19908 /* Write the function name into the code section, directly preceding
19909 the function prologue.
19910
19911 Code will be output similar to this:
19912 t0
19913 .ascii "arm_poke_function_name", 0
19914 .align
19915 t1
19916 .word 0xff000000 + (t1 - t0)
19917 arm_poke_function_name
19918 mov ip, sp
19919 stmfd sp!, {fp, ip, lr, pc}
19920 sub fp, ip, #4
19921
19922 When performing a stack backtrace, code can inspect the value
19923 of 'pc' stored at 'fp' + 0. If the trace function then looks
19924 at location pc - 12 and the top 8 bits are set, then we know
19925 that there is a function name embedded immediately preceding this
19926 location and has length ((pc[-3]) & 0xff000000).
19927
19928 We assume that pc is declared as a pointer to an unsigned long.
19929
19930 It is of no benefit to output the function name if we are assembling
19931 a leaf function. These function types will not contain a stack
19932 backtrace structure, therefore it is not possible to determine the
19933 function name. */
19934 void
19935 arm_poke_function_name (FILE *stream, const char *name)
19936 {
19937 unsigned long alignlength;
19938 unsigned long length;
19939 rtx x;
19940
19941 length = strlen (name) + 1;
19942 alignlength = ROUND_UP_WORD (length);
19943
19944 ASM_OUTPUT_ASCII (stream, name, length);
19945 ASM_OUTPUT_ALIGN (stream, 2);
19946 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19947 assemble_aligned_integer (UNITS_PER_WORD, x);
19948 }
19949
19950 /* Place some comments into the assembler stream
19951 describing the current function. */
19952 static void
19953 arm_output_function_prologue (FILE *f)
19954 {
19955 unsigned long func_type;
19956
19957 /* Sanity check. */
19958 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19959
19960 func_type = arm_current_func_type ();
19961
19962 switch ((int) ARM_FUNC_TYPE (func_type))
19963 {
19964 default:
19965 case ARM_FT_NORMAL:
19966 break;
19967 case ARM_FT_INTERWORKED:
19968 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19969 break;
19970 case ARM_FT_ISR:
19971 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19972 break;
19973 case ARM_FT_FIQ:
19974 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19975 break;
19976 case ARM_FT_EXCEPTION:
19977 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19978 break;
19979 }
19980
19981 if (IS_NAKED (func_type))
19982 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19983
19984 if (IS_VOLATILE (func_type))
19985 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19986
19987 if (IS_NESTED (func_type))
19988 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19989 if (IS_STACKALIGN (func_type))
19990 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19991 if (IS_CMSE_ENTRY (func_type))
19992 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19993
19994 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19995 crtl->args.size,
19996 crtl->args.pretend_args_size,
19997 (HOST_WIDE_INT) get_frame_size ());
19998
19999 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20000 frame_pointer_needed,
20001 cfun->machine->uses_anonymous_args);
20002
20003 if (cfun->machine->lr_save_eliminated)
20004 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20005
20006 if (crtl->calls_eh_return)
20007 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20008
20009 }
20010
20011 static void
20012 arm_output_function_epilogue (FILE *)
20013 {
20014 arm_stack_offsets *offsets;
20015
20016 if (TARGET_THUMB1)
20017 {
20018 int regno;
20019
20020 /* Emit any call-via-reg trampolines that are needed for v4t support
20021 of call_reg and call_value_reg type insns. */
20022 for (regno = 0; regno < LR_REGNUM; regno++)
20023 {
20024 rtx label = cfun->machine->call_via[regno];
20025
20026 if (label != NULL)
20027 {
20028 switch_to_section (function_section (current_function_decl));
20029 targetm.asm_out.internal_label (asm_out_file, "L",
20030 CODE_LABEL_NUMBER (label));
20031 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20032 }
20033 }
20034
20035 /* ??? Probably not safe to set this here, since it assumes that a
20036 function will be emitted as assembly immediately after we generate
20037 RTL for it. This does not happen for inline functions. */
20038 cfun->machine->return_used_this_function = 0;
20039 }
20040 else /* TARGET_32BIT */
20041 {
20042 /* We need to take into account any stack-frame rounding. */
20043 offsets = arm_get_frame_offsets ();
20044
20045 gcc_assert (!use_return_insn (FALSE, NULL)
20046 || (cfun->machine->return_used_this_function != 0)
20047 || offsets->saved_regs == offsets->outgoing_args
20048 || frame_pointer_needed);
20049 }
20050 }
20051
20052 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20053 STR and STRD. If an even number of registers are being pushed, one
20054 or more STRD patterns are created for each register pair. If an
20055 odd number of registers are pushed, emit an initial STR followed by
20056 as many STRD instructions as are needed. This works best when the
20057 stack is initially 64-bit aligned (the normal case), since it
20058 ensures that each STRD is also 64-bit aligned. */
20059 static void
20060 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20061 {
20062 int num_regs = 0;
20063 int i;
20064 int regno;
20065 rtx par = NULL_RTX;
20066 rtx dwarf = NULL_RTX;
20067 rtx tmp;
20068 bool first = true;
20069
20070 num_regs = bit_count (saved_regs_mask);
20071
20072 /* Must be at least one register to save, and can't save SP or PC. */
20073 gcc_assert (num_regs > 0 && num_regs <= 14);
20074 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20075 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20076
20077 /* Create sequence for DWARF info. All the frame-related data for
20078 debugging is held in this wrapper. */
20079 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20080
20081 /* Describe the stack adjustment. */
20082 tmp = gen_rtx_SET (stack_pointer_rtx,
20083 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20084 RTX_FRAME_RELATED_P (tmp) = 1;
20085 XVECEXP (dwarf, 0, 0) = tmp;
20086
20087 /* Find the first register. */
20088 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20089 ;
20090
20091 i = 0;
20092
20093 /* If there's an odd number of registers to push. Start off by
20094 pushing a single register. This ensures that subsequent strd
20095 operations are dword aligned (assuming that SP was originally
20096 64-bit aligned). */
20097 if ((num_regs & 1) != 0)
20098 {
20099 rtx reg, mem, insn;
20100
20101 reg = gen_rtx_REG (SImode, regno);
20102 if (num_regs == 1)
20103 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20104 stack_pointer_rtx));
20105 else
20106 mem = gen_frame_mem (Pmode,
20107 gen_rtx_PRE_MODIFY
20108 (Pmode, stack_pointer_rtx,
20109 plus_constant (Pmode, stack_pointer_rtx,
20110 -4 * num_regs)));
20111
20112 tmp = gen_rtx_SET (mem, reg);
20113 RTX_FRAME_RELATED_P (tmp) = 1;
20114 insn = emit_insn (tmp);
20115 RTX_FRAME_RELATED_P (insn) = 1;
20116 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20117 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20118 RTX_FRAME_RELATED_P (tmp) = 1;
20119 i++;
20120 regno++;
20121 XVECEXP (dwarf, 0, i) = tmp;
20122 first = false;
20123 }
20124
20125 while (i < num_regs)
20126 if (saved_regs_mask & (1 << regno))
20127 {
20128 rtx reg1, reg2, mem1, mem2;
20129 rtx tmp0, tmp1, tmp2;
20130 int regno2;
20131
20132 /* Find the register to pair with this one. */
20133 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20134 regno2++)
20135 ;
20136
20137 reg1 = gen_rtx_REG (SImode, regno);
20138 reg2 = gen_rtx_REG (SImode, regno2);
20139
20140 if (first)
20141 {
20142 rtx insn;
20143
20144 first = false;
20145 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20146 stack_pointer_rtx,
20147 -4 * num_regs));
20148 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20149 stack_pointer_rtx,
20150 -4 * (num_regs - 1)));
20151 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20152 plus_constant (Pmode, stack_pointer_rtx,
20153 -4 * (num_regs)));
20154 tmp1 = gen_rtx_SET (mem1, reg1);
20155 tmp2 = gen_rtx_SET (mem2, reg2);
20156 RTX_FRAME_RELATED_P (tmp0) = 1;
20157 RTX_FRAME_RELATED_P (tmp1) = 1;
20158 RTX_FRAME_RELATED_P (tmp2) = 1;
20159 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20160 XVECEXP (par, 0, 0) = tmp0;
20161 XVECEXP (par, 0, 1) = tmp1;
20162 XVECEXP (par, 0, 2) = tmp2;
20163 insn = emit_insn (par);
20164 RTX_FRAME_RELATED_P (insn) = 1;
20165 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20166 }
20167 else
20168 {
20169 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20170 stack_pointer_rtx,
20171 4 * i));
20172 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20173 stack_pointer_rtx,
20174 4 * (i + 1)));
20175 tmp1 = gen_rtx_SET (mem1, reg1);
20176 tmp2 = gen_rtx_SET (mem2, reg2);
20177 RTX_FRAME_RELATED_P (tmp1) = 1;
20178 RTX_FRAME_RELATED_P (tmp2) = 1;
20179 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20180 XVECEXP (par, 0, 0) = tmp1;
20181 XVECEXP (par, 0, 1) = tmp2;
20182 emit_insn (par);
20183 }
20184
20185 /* Create unwind information. This is an approximation. */
20186 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20187 plus_constant (Pmode,
20188 stack_pointer_rtx,
20189 4 * i)),
20190 reg1);
20191 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20192 plus_constant (Pmode,
20193 stack_pointer_rtx,
20194 4 * (i + 1))),
20195 reg2);
20196
20197 RTX_FRAME_RELATED_P (tmp1) = 1;
20198 RTX_FRAME_RELATED_P (tmp2) = 1;
20199 XVECEXP (dwarf, 0, i + 1) = tmp1;
20200 XVECEXP (dwarf, 0, i + 2) = tmp2;
20201 i += 2;
20202 regno = regno2 + 1;
20203 }
20204 else
20205 regno++;
20206
20207 return;
20208 }
20209
20210 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20211 whenever possible, otherwise it emits single-word stores. The first store
20212 also allocates stack space for all saved registers, using writeback with
20213 post-addressing mode. All other stores use offset addressing. If no STRD
20214 can be emitted, this function emits a sequence of single-word stores,
20215 and not an STM as before, because single-word stores provide more freedom
20216 scheduling and can be turned into an STM by peephole optimizations. */
20217 static void
20218 arm_emit_strd_push (unsigned long saved_regs_mask)
20219 {
20220 int num_regs = 0;
20221 int i, j, dwarf_index = 0;
20222 int offset = 0;
20223 rtx dwarf = NULL_RTX;
20224 rtx insn = NULL_RTX;
20225 rtx tmp, mem;
20226
20227 /* TODO: A more efficient code can be emitted by changing the
20228 layout, e.g., first push all pairs that can use STRD to keep the
20229 stack aligned, and then push all other registers. */
20230 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20231 if (saved_regs_mask & (1 << i))
20232 num_regs++;
20233
20234 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20235 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20236 gcc_assert (num_regs > 0);
20237
20238 /* Create sequence for DWARF info. */
20239 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20240
20241 /* For dwarf info, we generate explicit stack update. */
20242 tmp = gen_rtx_SET (stack_pointer_rtx,
20243 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20244 RTX_FRAME_RELATED_P (tmp) = 1;
20245 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20246
20247 /* Save registers. */
20248 offset = - 4 * num_regs;
20249 j = 0;
20250 while (j <= LAST_ARM_REGNUM)
20251 if (saved_regs_mask & (1 << j))
20252 {
20253 if ((j % 2 == 0)
20254 && (saved_regs_mask & (1 << (j + 1))))
20255 {
20256 /* Current register and previous register form register pair for
20257 which STRD can be generated. */
20258 if (offset < 0)
20259 {
20260 /* Allocate stack space for all saved registers. */
20261 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20262 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20263 mem = gen_frame_mem (DImode, tmp);
20264 offset = 0;
20265 }
20266 else if (offset > 0)
20267 mem = gen_frame_mem (DImode,
20268 plus_constant (Pmode,
20269 stack_pointer_rtx,
20270 offset));
20271 else
20272 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20273
20274 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20275 RTX_FRAME_RELATED_P (tmp) = 1;
20276 tmp = emit_insn (tmp);
20277
20278 /* Record the first store insn. */
20279 if (dwarf_index == 1)
20280 insn = tmp;
20281
20282 /* Generate dwarf info. */
20283 mem = gen_frame_mem (SImode,
20284 plus_constant (Pmode,
20285 stack_pointer_rtx,
20286 offset));
20287 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20288 RTX_FRAME_RELATED_P (tmp) = 1;
20289 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20290
20291 mem = gen_frame_mem (SImode,
20292 plus_constant (Pmode,
20293 stack_pointer_rtx,
20294 offset + 4));
20295 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20296 RTX_FRAME_RELATED_P (tmp) = 1;
20297 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20298
20299 offset += 8;
20300 j += 2;
20301 }
20302 else
20303 {
20304 /* Emit a single word store. */
20305 if (offset < 0)
20306 {
20307 /* Allocate stack space for all saved registers. */
20308 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20309 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20310 mem = gen_frame_mem (SImode, tmp);
20311 offset = 0;
20312 }
20313 else if (offset > 0)
20314 mem = gen_frame_mem (SImode,
20315 plus_constant (Pmode,
20316 stack_pointer_rtx,
20317 offset));
20318 else
20319 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20320
20321 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20322 RTX_FRAME_RELATED_P (tmp) = 1;
20323 tmp = emit_insn (tmp);
20324
20325 /* Record the first store insn. */
20326 if (dwarf_index == 1)
20327 insn = tmp;
20328
20329 /* Generate dwarf info. */
20330 mem = gen_frame_mem (SImode,
20331 plus_constant(Pmode,
20332 stack_pointer_rtx,
20333 offset));
20334 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20335 RTX_FRAME_RELATED_P (tmp) = 1;
20336 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20337
20338 offset += 4;
20339 j += 1;
20340 }
20341 }
20342 else
20343 j++;
20344
20345 /* Attach dwarf info to the first insn we generate. */
20346 gcc_assert (insn != NULL_RTX);
20347 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20348 RTX_FRAME_RELATED_P (insn) = 1;
20349 }
20350
20351 /* Generate and emit an insn that we will recognize as a push_multi.
20352 Unfortunately, since this insn does not reflect very well the actual
20353 semantics of the operation, we need to annotate the insn for the benefit
20354 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20355 MASK for registers that should be annotated for DWARF2 frame unwind
20356 information. */
20357 static rtx
20358 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20359 {
20360 int num_regs = 0;
20361 int num_dwarf_regs = 0;
20362 int i, j;
20363 rtx par;
20364 rtx dwarf;
20365 int dwarf_par_index;
20366 rtx tmp, reg;
20367
20368 /* We don't record the PC in the dwarf frame information. */
20369 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20370
20371 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20372 {
20373 if (mask & (1 << i))
20374 num_regs++;
20375 if (dwarf_regs_mask & (1 << i))
20376 num_dwarf_regs++;
20377 }
20378
20379 gcc_assert (num_regs && num_regs <= 16);
20380 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20381
20382 /* For the body of the insn we are going to generate an UNSPEC in
20383 parallel with several USEs. This allows the insn to be recognized
20384 by the push_multi pattern in the arm.md file.
20385
20386 The body of the insn looks something like this:
20387
20388 (parallel [
20389 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20390 (const_int:SI <num>)))
20391 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20392 (use (reg:SI XX))
20393 (use (reg:SI YY))
20394 ...
20395 ])
20396
20397 For the frame note however, we try to be more explicit and actually
20398 show each register being stored into the stack frame, plus a (single)
20399 decrement of the stack pointer. We do it this way in order to be
20400 friendly to the stack unwinding code, which only wants to see a single
20401 stack decrement per instruction. The RTL we generate for the note looks
20402 something like this:
20403
20404 (sequence [
20405 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20406 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20407 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20408 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20409 ...
20410 ])
20411
20412 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20413 instead we'd have a parallel expression detailing all
20414 the stores to the various memory addresses so that debug
20415 information is more up-to-date. Remember however while writing
20416 this to take care of the constraints with the push instruction.
20417
20418 Note also that this has to be taken care of for the VFP registers.
20419
20420 For more see PR43399. */
20421
20422 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20423 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20424 dwarf_par_index = 1;
20425
20426 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20427 {
20428 if (mask & (1 << i))
20429 {
20430 reg = gen_rtx_REG (SImode, i);
20431
20432 XVECEXP (par, 0, 0)
20433 = gen_rtx_SET (gen_frame_mem
20434 (BLKmode,
20435 gen_rtx_PRE_MODIFY (Pmode,
20436 stack_pointer_rtx,
20437 plus_constant
20438 (Pmode, stack_pointer_rtx,
20439 -4 * num_regs))
20440 ),
20441 gen_rtx_UNSPEC (BLKmode,
20442 gen_rtvec (1, reg),
20443 UNSPEC_PUSH_MULT));
20444
20445 if (dwarf_regs_mask & (1 << i))
20446 {
20447 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20448 reg);
20449 RTX_FRAME_RELATED_P (tmp) = 1;
20450 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20451 }
20452
20453 break;
20454 }
20455 }
20456
20457 for (j = 1, i++; j < num_regs; i++)
20458 {
20459 if (mask & (1 << i))
20460 {
20461 reg = gen_rtx_REG (SImode, i);
20462
20463 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20464
20465 if (dwarf_regs_mask & (1 << i))
20466 {
20467 tmp
20468 = gen_rtx_SET (gen_frame_mem
20469 (SImode,
20470 plus_constant (Pmode, stack_pointer_rtx,
20471 4 * j)),
20472 reg);
20473 RTX_FRAME_RELATED_P (tmp) = 1;
20474 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20475 }
20476
20477 j++;
20478 }
20479 }
20480
20481 par = emit_insn (par);
20482
20483 tmp = gen_rtx_SET (stack_pointer_rtx,
20484 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20485 RTX_FRAME_RELATED_P (tmp) = 1;
20486 XVECEXP (dwarf, 0, 0) = tmp;
20487
20488 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20489
20490 return par;
20491 }
20492
20493 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20494 SIZE is the offset to be adjusted.
20495 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20496 static void
20497 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20498 {
20499 rtx dwarf;
20500
20501 RTX_FRAME_RELATED_P (insn) = 1;
20502 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20503 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20504 }
20505
20506 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20507 SAVED_REGS_MASK shows which registers need to be restored.
20508
20509 Unfortunately, since this insn does not reflect very well the actual
20510 semantics of the operation, we need to annotate the insn for the benefit
20511 of DWARF2 frame unwind information. */
20512 static void
20513 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20514 {
20515 int num_regs = 0;
20516 int i, j;
20517 rtx par;
20518 rtx dwarf = NULL_RTX;
20519 rtx tmp, reg;
20520 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20521 int offset_adj;
20522 int emit_update;
20523
20524 offset_adj = return_in_pc ? 1 : 0;
20525 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20526 if (saved_regs_mask & (1 << i))
20527 num_regs++;
20528
20529 gcc_assert (num_regs && num_regs <= 16);
20530
20531 /* If SP is in reglist, then we don't emit SP update insn. */
20532 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20533
20534 /* The parallel needs to hold num_regs SETs
20535 and one SET for the stack update. */
20536 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20537
20538 if (return_in_pc)
20539 XVECEXP (par, 0, 0) = ret_rtx;
20540
20541 if (emit_update)
20542 {
20543 /* Increment the stack pointer, based on there being
20544 num_regs 4-byte registers to restore. */
20545 tmp = gen_rtx_SET (stack_pointer_rtx,
20546 plus_constant (Pmode,
20547 stack_pointer_rtx,
20548 4 * num_regs));
20549 RTX_FRAME_RELATED_P (tmp) = 1;
20550 XVECEXP (par, 0, offset_adj) = tmp;
20551 }
20552
20553 /* Now restore every reg, which may include PC. */
20554 for (j = 0, i = 0; j < num_regs; i++)
20555 if (saved_regs_mask & (1 << i))
20556 {
20557 reg = gen_rtx_REG (SImode, i);
20558 if ((num_regs == 1) && emit_update && !return_in_pc)
20559 {
20560 /* Emit single load with writeback. */
20561 tmp = gen_frame_mem (SImode,
20562 gen_rtx_POST_INC (Pmode,
20563 stack_pointer_rtx));
20564 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20565 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20566 return;
20567 }
20568
20569 tmp = gen_rtx_SET (reg,
20570 gen_frame_mem
20571 (SImode,
20572 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20573 RTX_FRAME_RELATED_P (tmp) = 1;
20574 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20575
20576 /* We need to maintain a sequence for DWARF info too. As dwarf info
20577 should not have PC, skip PC. */
20578 if (i != PC_REGNUM)
20579 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20580
20581 j++;
20582 }
20583
20584 if (return_in_pc)
20585 par = emit_jump_insn (par);
20586 else
20587 par = emit_insn (par);
20588
20589 REG_NOTES (par) = dwarf;
20590 if (!return_in_pc)
20591 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20592 stack_pointer_rtx, stack_pointer_rtx);
20593 }
20594
20595 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20596 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20597
20598 Unfortunately, since this insn does not reflect very well the actual
20599 semantics of the operation, we need to annotate the insn for the benefit
20600 of DWARF2 frame unwind information. */
20601 static void
20602 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20603 {
20604 int i, j;
20605 rtx par;
20606 rtx dwarf = NULL_RTX;
20607 rtx tmp, reg;
20608
20609 gcc_assert (num_regs && num_regs <= 32);
20610
20611 /* Workaround ARM10 VFPr1 bug. */
20612 if (num_regs == 2 && !arm_arch6)
20613 {
20614 if (first_reg == 15)
20615 first_reg--;
20616
20617 num_regs++;
20618 }
20619
20620 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20621 there could be up to 32 D-registers to restore.
20622 If there are more than 16 D-registers, make two recursive calls,
20623 each of which emits one pop_multi instruction. */
20624 if (num_regs > 16)
20625 {
20626 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20627 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20628 return;
20629 }
20630
20631 /* The parallel needs to hold num_regs SETs
20632 and one SET for the stack update. */
20633 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20634
20635 /* Increment the stack pointer, based on there being
20636 num_regs 8-byte registers to restore. */
20637 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20638 RTX_FRAME_RELATED_P (tmp) = 1;
20639 XVECEXP (par, 0, 0) = tmp;
20640
20641 /* Now show every reg that will be restored, using a SET for each. */
20642 for (j = 0, i=first_reg; j < num_regs; i += 2)
20643 {
20644 reg = gen_rtx_REG (DFmode, i);
20645
20646 tmp = gen_rtx_SET (reg,
20647 gen_frame_mem
20648 (DFmode,
20649 plus_constant (Pmode, base_reg, 8 * j)));
20650 RTX_FRAME_RELATED_P (tmp) = 1;
20651 XVECEXP (par, 0, j + 1) = tmp;
20652
20653 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20654
20655 j++;
20656 }
20657
20658 par = emit_insn (par);
20659 REG_NOTES (par) = dwarf;
20660
20661 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20662 if (REGNO (base_reg) == IP_REGNUM)
20663 {
20664 RTX_FRAME_RELATED_P (par) = 1;
20665 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20666 }
20667 else
20668 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20669 base_reg, base_reg);
20670 }
20671
20672 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20673 number of registers are being popped, multiple LDRD patterns are created for
20674 all register pairs. If odd number of registers are popped, last register is
20675 loaded by using LDR pattern. */
20676 static void
20677 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20678 {
20679 int num_regs = 0;
20680 int i, j;
20681 rtx par = NULL_RTX;
20682 rtx dwarf = NULL_RTX;
20683 rtx tmp, reg, tmp1;
20684 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20685
20686 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20687 if (saved_regs_mask & (1 << i))
20688 num_regs++;
20689
20690 gcc_assert (num_regs && num_regs <= 16);
20691
20692 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20693 to be popped. So, if num_regs is even, now it will become odd,
20694 and we can generate pop with PC. If num_regs is odd, it will be
20695 even now, and ldr with return can be generated for PC. */
20696 if (return_in_pc)
20697 num_regs--;
20698
20699 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20700
20701 /* Var j iterates over all the registers to gather all the registers in
20702 saved_regs_mask. Var i gives index of saved registers in stack frame.
20703 A PARALLEL RTX of register-pair is created here, so that pattern for
20704 LDRD can be matched. As PC is always last register to be popped, and
20705 we have already decremented num_regs if PC, we don't have to worry
20706 about PC in this loop. */
20707 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20708 if (saved_regs_mask & (1 << j))
20709 {
20710 /* Create RTX for memory load. */
20711 reg = gen_rtx_REG (SImode, j);
20712 tmp = gen_rtx_SET (reg,
20713 gen_frame_mem (SImode,
20714 plus_constant (Pmode,
20715 stack_pointer_rtx, 4 * i)));
20716 RTX_FRAME_RELATED_P (tmp) = 1;
20717
20718 if (i % 2 == 0)
20719 {
20720 /* When saved-register index (i) is even, the RTX to be emitted is
20721 yet to be created. Hence create it first. The LDRD pattern we
20722 are generating is :
20723 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20724 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20725 where target registers need not be consecutive. */
20726 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20727 dwarf = NULL_RTX;
20728 }
20729
20730 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20731 added as 0th element and if i is odd, reg_i is added as 1st element
20732 of LDRD pattern shown above. */
20733 XVECEXP (par, 0, (i % 2)) = tmp;
20734 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20735
20736 if ((i % 2) == 1)
20737 {
20738 /* When saved-register index (i) is odd, RTXs for both the registers
20739 to be loaded are generated in above given LDRD pattern, and the
20740 pattern can be emitted now. */
20741 par = emit_insn (par);
20742 REG_NOTES (par) = dwarf;
20743 RTX_FRAME_RELATED_P (par) = 1;
20744 }
20745
20746 i++;
20747 }
20748
20749 /* If the number of registers pushed is odd AND return_in_pc is false OR
20750 number of registers are even AND return_in_pc is true, last register is
20751 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20752 then LDR with post increment. */
20753
20754 /* Increment the stack pointer, based on there being
20755 num_regs 4-byte registers to restore. */
20756 tmp = gen_rtx_SET (stack_pointer_rtx,
20757 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20758 RTX_FRAME_RELATED_P (tmp) = 1;
20759 tmp = emit_insn (tmp);
20760 if (!return_in_pc)
20761 {
20762 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20763 stack_pointer_rtx, stack_pointer_rtx);
20764 }
20765
20766 dwarf = NULL_RTX;
20767
20768 if (((num_regs % 2) == 1 && !return_in_pc)
20769 || ((num_regs % 2) == 0 && return_in_pc))
20770 {
20771 /* Scan for the single register to be popped. Skip until the saved
20772 register is found. */
20773 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20774
20775 /* Gen LDR with post increment here. */
20776 tmp1 = gen_rtx_MEM (SImode,
20777 gen_rtx_POST_INC (SImode,
20778 stack_pointer_rtx));
20779 set_mem_alias_set (tmp1, get_frame_alias_set ());
20780
20781 reg = gen_rtx_REG (SImode, j);
20782 tmp = gen_rtx_SET (reg, tmp1);
20783 RTX_FRAME_RELATED_P (tmp) = 1;
20784 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20785
20786 if (return_in_pc)
20787 {
20788 /* If return_in_pc, j must be PC_REGNUM. */
20789 gcc_assert (j == PC_REGNUM);
20790 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20791 XVECEXP (par, 0, 0) = ret_rtx;
20792 XVECEXP (par, 0, 1) = tmp;
20793 par = emit_jump_insn (par);
20794 }
20795 else
20796 {
20797 par = emit_insn (tmp);
20798 REG_NOTES (par) = dwarf;
20799 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20800 stack_pointer_rtx, stack_pointer_rtx);
20801 }
20802
20803 }
20804 else if ((num_regs % 2) == 1 && return_in_pc)
20805 {
20806 /* There are 2 registers to be popped. So, generate the pattern
20807 pop_multiple_with_stack_update_and_return to pop in PC. */
20808 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20809 }
20810
20811 return;
20812 }
20813
20814 /* LDRD in ARM mode needs consecutive registers as operands. This function
20815 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20816 offset addressing and then generates one separate stack udpate. This provides
20817 more scheduling freedom, compared to writeback on every load. However,
20818 if the function returns using load into PC directly
20819 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20820 before the last load. TODO: Add a peephole optimization to recognize
20821 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20822 peephole optimization to merge the load at stack-offset zero
20823 with the stack update instruction using load with writeback
20824 in post-index addressing mode. */
20825 static void
20826 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20827 {
20828 int j = 0;
20829 int offset = 0;
20830 rtx par = NULL_RTX;
20831 rtx dwarf = NULL_RTX;
20832 rtx tmp, mem;
20833
20834 /* Restore saved registers. */
20835 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20836 j = 0;
20837 while (j <= LAST_ARM_REGNUM)
20838 if (saved_regs_mask & (1 << j))
20839 {
20840 if ((j % 2) == 0
20841 && (saved_regs_mask & (1 << (j + 1)))
20842 && (j + 1) != PC_REGNUM)
20843 {
20844 /* Current register and next register form register pair for which
20845 LDRD can be generated. PC is always the last register popped, and
20846 we handle it separately. */
20847 if (offset > 0)
20848 mem = gen_frame_mem (DImode,
20849 plus_constant (Pmode,
20850 stack_pointer_rtx,
20851 offset));
20852 else
20853 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20854
20855 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20856 tmp = emit_insn (tmp);
20857 RTX_FRAME_RELATED_P (tmp) = 1;
20858
20859 /* Generate dwarf info. */
20860
20861 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20862 gen_rtx_REG (SImode, j),
20863 NULL_RTX);
20864 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20865 gen_rtx_REG (SImode, j + 1),
20866 dwarf);
20867
20868 REG_NOTES (tmp) = dwarf;
20869
20870 offset += 8;
20871 j += 2;
20872 }
20873 else if (j != PC_REGNUM)
20874 {
20875 /* Emit a single word load. */
20876 if (offset > 0)
20877 mem = gen_frame_mem (SImode,
20878 plus_constant (Pmode,
20879 stack_pointer_rtx,
20880 offset));
20881 else
20882 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20883
20884 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20885 tmp = emit_insn (tmp);
20886 RTX_FRAME_RELATED_P (tmp) = 1;
20887
20888 /* Generate dwarf info. */
20889 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20890 gen_rtx_REG (SImode, j),
20891 NULL_RTX);
20892
20893 offset += 4;
20894 j += 1;
20895 }
20896 else /* j == PC_REGNUM */
20897 j++;
20898 }
20899 else
20900 j++;
20901
20902 /* Update the stack. */
20903 if (offset > 0)
20904 {
20905 tmp = gen_rtx_SET (stack_pointer_rtx,
20906 plus_constant (Pmode,
20907 stack_pointer_rtx,
20908 offset));
20909 tmp = emit_insn (tmp);
20910 arm_add_cfa_adjust_cfa_note (tmp, offset,
20911 stack_pointer_rtx, stack_pointer_rtx);
20912 offset = 0;
20913 }
20914
20915 if (saved_regs_mask & (1 << PC_REGNUM))
20916 {
20917 /* Only PC is to be popped. */
20918 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20919 XVECEXP (par, 0, 0) = ret_rtx;
20920 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20921 gen_frame_mem (SImode,
20922 gen_rtx_POST_INC (SImode,
20923 stack_pointer_rtx)));
20924 RTX_FRAME_RELATED_P (tmp) = 1;
20925 XVECEXP (par, 0, 1) = tmp;
20926 par = emit_jump_insn (par);
20927
20928 /* Generate dwarf info. */
20929 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20930 gen_rtx_REG (SImode, PC_REGNUM),
20931 NULL_RTX);
20932 REG_NOTES (par) = dwarf;
20933 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20934 stack_pointer_rtx, stack_pointer_rtx);
20935 }
20936 }
20937
20938 /* Calculate the size of the return value that is passed in registers. */
20939 static unsigned
20940 arm_size_return_regs (void)
20941 {
20942 machine_mode mode;
20943
20944 if (crtl->return_rtx != 0)
20945 mode = GET_MODE (crtl->return_rtx);
20946 else
20947 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20948
20949 return GET_MODE_SIZE (mode);
20950 }
20951
20952 /* Return true if the current function needs to save/restore LR. */
20953 static bool
20954 thumb_force_lr_save (void)
20955 {
20956 return !cfun->machine->lr_save_eliminated
20957 && (!crtl->is_leaf
20958 || thumb_far_jump_used_p ()
20959 || df_regs_ever_live_p (LR_REGNUM));
20960 }
20961
20962 /* We do not know if r3 will be available because
20963 we do have an indirect tailcall happening in this
20964 particular case. */
20965 static bool
20966 is_indirect_tailcall_p (rtx call)
20967 {
20968 rtx pat = PATTERN (call);
20969
20970 /* Indirect tail call. */
20971 pat = XVECEXP (pat, 0, 0);
20972 if (GET_CODE (pat) == SET)
20973 pat = SET_SRC (pat);
20974
20975 pat = XEXP (XEXP (pat, 0), 0);
20976 return REG_P (pat);
20977 }
20978
20979 /* Return true if r3 is used by any of the tail call insns in the
20980 current function. */
20981 static bool
20982 any_sibcall_could_use_r3 (void)
20983 {
20984 edge_iterator ei;
20985 edge e;
20986
20987 if (!crtl->tail_call_emit)
20988 return false;
20989 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20990 if (e->flags & EDGE_SIBCALL)
20991 {
20992 rtx_insn *call = BB_END (e->src);
20993 if (!CALL_P (call))
20994 call = prev_nonnote_nondebug_insn (call);
20995 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20996 if (find_regno_fusage (call, USE, 3)
20997 || is_indirect_tailcall_p (call))
20998 return true;
20999 }
21000 return false;
21001 }
21002
21003
21004 /* Compute the distance from register FROM to register TO.
21005 These can be the arg pointer (26), the soft frame pointer (25),
21006 the stack pointer (13) or the hard frame pointer (11).
21007 In thumb mode r7 is used as the soft frame pointer, if needed.
21008 Typical stack layout looks like this:
21009
21010 old stack pointer -> | |
21011 ----
21012 | | \
21013 | | saved arguments for
21014 | | vararg functions
21015 | | /
21016 --
21017 hard FP & arg pointer -> | | \
21018 | | stack
21019 | | frame
21020 | | /
21021 --
21022 | | \
21023 | | call saved
21024 | | registers
21025 soft frame pointer -> | | /
21026 --
21027 | | \
21028 | | local
21029 | | variables
21030 locals base pointer -> | | /
21031 --
21032 | | \
21033 | | outgoing
21034 | | arguments
21035 current stack pointer -> | | /
21036 --
21037
21038 For a given function some or all of these stack components
21039 may not be needed, giving rise to the possibility of
21040 eliminating some of the registers.
21041
21042 The values returned by this function must reflect the behavior
21043 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21044
21045 The sign of the number returned reflects the direction of stack
21046 growth, so the values are positive for all eliminations except
21047 from the soft frame pointer to the hard frame pointer.
21048
21049 SFP may point just inside the local variables block to ensure correct
21050 alignment. */
21051
21052
21053 /* Return cached stack offsets. */
21054
21055 static arm_stack_offsets *
21056 arm_get_frame_offsets (void)
21057 {
21058 struct arm_stack_offsets *offsets;
21059
21060 offsets = &cfun->machine->stack_offsets;
21061
21062 return offsets;
21063 }
21064
21065
21066 /* Calculate stack offsets. These are used to calculate register elimination
21067 offsets and in prologue/epilogue code. Also calculates which registers
21068 should be saved. */
21069
21070 static void
21071 arm_compute_frame_layout (void)
21072 {
21073 struct arm_stack_offsets *offsets;
21074 unsigned long func_type;
21075 int saved;
21076 int core_saved;
21077 HOST_WIDE_INT frame_size;
21078 int i;
21079
21080 offsets = &cfun->machine->stack_offsets;
21081
21082 /* Initially this is the size of the local variables. It will translated
21083 into an offset once we have determined the size of preceding data. */
21084 frame_size = ROUND_UP_WORD (get_frame_size ());
21085
21086 /* Space for variadic functions. */
21087 offsets->saved_args = crtl->args.pretend_args_size;
21088
21089 /* In Thumb mode this is incorrect, but never used. */
21090 offsets->frame
21091 = (offsets->saved_args
21092 + arm_compute_static_chain_stack_bytes ()
21093 + (frame_pointer_needed ? 4 : 0));
21094
21095 if (TARGET_32BIT)
21096 {
21097 unsigned int regno;
21098
21099 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21100 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21101 saved = core_saved;
21102
21103 /* We know that SP will be doubleword aligned on entry, and we must
21104 preserve that condition at any subroutine call. We also require the
21105 soft frame pointer to be doubleword aligned. */
21106
21107 if (TARGET_REALLY_IWMMXT)
21108 {
21109 /* Check for the call-saved iWMMXt registers. */
21110 for (regno = FIRST_IWMMXT_REGNUM;
21111 regno <= LAST_IWMMXT_REGNUM;
21112 regno++)
21113 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21114 saved += 8;
21115 }
21116
21117 func_type = arm_current_func_type ();
21118 /* Space for saved VFP registers. */
21119 if (! IS_VOLATILE (func_type)
21120 && TARGET_HARD_FLOAT)
21121 saved += arm_get_vfp_saved_size ();
21122 }
21123 else /* TARGET_THUMB1 */
21124 {
21125 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21126 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21127 saved = core_saved;
21128 if (TARGET_BACKTRACE)
21129 saved += 16;
21130 }
21131
21132 /* Saved registers include the stack frame. */
21133 offsets->saved_regs
21134 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21135 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21136
21137 /* A leaf function does not need any stack alignment if it has nothing
21138 on the stack. */
21139 if (crtl->is_leaf && frame_size == 0
21140 /* However if it calls alloca(), we have a dynamically allocated
21141 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21142 && ! cfun->calls_alloca)
21143 {
21144 offsets->outgoing_args = offsets->soft_frame;
21145 offsets->locals_base = offsets->soft_frame;
21146 return;
21147 }
21148
21149 /* Ensure SFP has the correct alignment. */
21150 if (ARM_DOUBLEWORD_ALIGN
21151 && (offsets->soft_frame & 7))
21152 {
21153 offsets->soft_frame += 4;
21154 /* Try to align stack by pushing an extra reg. Don't bother doing this
21155 when there is a stack frame as the alignment will be rolled into
21156 the normal stack adjustment. */
21157 if (frame_size + crtl->outgoing_args_size == 0)
21158 {
21159 int reg = -1;
21160
21161 /* Register r3 is caller-saved. Normally it does not need to be
21162 saved on entry by the prologue. However if we choose to save
21163 it for padding then we may confuse the compiler into thinking
21164 a prologue sequence is required when in fact it is not. This
21165 will occur when shrink-wrapping if r3 is used as a scratch
21166 register and there are no other callee-saved writes.
21167
21168 This situation can be avoided when other callee-saved registers
21169 are available and r3 is not mandatory if we choose a callee-saved
21170 register for padding. */
21171 bool prefer_callee_reg_p = false;
21172
21173 /* If it is safe to use r3, then do so. This sometimes
21174 generates better code on Thumb-2 by avoiding the need to
21175 use 32-bit push/pop instructions. */
21176 if (! any_sibcall_could_use_r3 ()
21177 && arm_size_return_regs () <= 12
21178 && (offsets->saved_regs_mask & (1 << 3)) == 0
21179 && (TARGET_THUMB2
21180 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21181 {
21182 reg = 3;
21183 if (!TARGET_THUMB2)
21184 prefer_callee_reg_p = true;
21185 }
21186 if (reg == -1
21187 || prefer_callee_reg_p)
21188 {
21189 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21190 {
21191 /* Avoid fixed registers; they may be changed at
21192 arbitrary times so it's unsafe to restore them
21193 during the epilogue. */
21194 if (!fixed_regs[i]
21195 && (offsets->saved_regs_mask & (1 << i)) == 0)
21196 {
21197 reg = i;
21198 break;
21199 }
21200 }
21201 }
21202
21203 if (reg != -1)
21204 {
21205 offsets->saved_regs += 4;
21206 offsets->saved_regs_mask |= (1 << reg);
21207 }
21208 }
21209 }
21210
21211 offsets->locals_base = offsets->soft_frame + frame_size;
21212 offsets->outgoing_args = (offsets->locals_base
21213 + crtl->outgoing_args_size);
21214
21215 if (ARM_DOUBLEWORD_ALIGN)
21216 {
21217 /* Ensure SP remains doubleword aligned. */
21218 if (offsets->outgoing_args & 7)
21219 offsets->outgoing_args += 4;
21220 gcc_assert (!(offsets->outgoing_args & 7));
21221 }
21222 }
21223
21224
21225 /* Calculate the relative offsets for the different stack pointers. Positive
21226 offsets are in the direction of stack growth. */
21227
21228 HOST_WIDE_INT
21229 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21230 {
21231 arm_stack_offsets *offsets;
21232
21233 offsets = arm_get_frame_offsets ();
21234
21235 /* OK, now we have enough information to compute the distances.
21236 There must be an entry in these switch tables for each pair
21237 of registers in ELIMINABLE_REGS, even if some of the entries
21238 seem to be redundant or useless. */
21239 switch (from)
21240 {
21241 case ARG_POINTER_REGNUM:
21242 switch (to)
21243 {
21244 case THUMB_HARD_FRAME_POINTER_REGNUM:
21245 return 0;
21246
21247 case FRAME_POINTER_REGNUM:
21248 /* This is the reverse of the soft frame pointer
21249 to hard frame pointer elimination below. */
21250 return offsets->soft_frame - offsets->saved_args;
21251
21252 case ARM_HARD_FRAME_POINTER_REGNUM:
21253 /* This is only non-zero in the case where the static chain register
21254 is stored above the frame. */
21255 return offsets->frame - offsets->saved_args - 4;
21256
21257 case STACK_POINTER_REGNUM:
21258 /* If nothing has been pushed on the stack at all
21259 then this will return -4. This *is* correct! */
21260 return offsets->outgoing_args - (offsets->saved_args + 4);
21261
21262 default:
21263 gcc_unreachable ();
21264 }
21265 gcc_unreachable ();
21266
21267 case FRAME_POINTER_REGNUM:
21268 switch (to)
21269 {
21270 case THUMB_HARD_FRAME_POINTER_REGNUM:
21271 return 0;
21272
21273 case ARM_HARD_FRAME_POINTER_REGNUM:
21274 /* The hard frame pointer points to the top entry in the
21275 stack frame. The soft frame pointer to the bottom entry
21276 in the stack frame. If there is no stack frame at all,
21277 then they are identical. */
21278
21279 return offsets->frame - offsets->soft_frame;
21280
21281 case STACK_POINTER_REGNUM:
21282 return offsets->outgoing_args - offsets->soft_frame;
21283
21284 default:
21285 gcc_unreachable ();
21286 }
21287 gcc_unreachable ();
21288
21289 default:
21290 /* You cannot eliminate from the stack pointer.
21291 In theory you could eliminate from the hard frame
21292 pointer to the stack pointer, but this will never
21293 happen, since if a stack frame is not needed the
21294 hard frame pointer will never be used. */
21295 gcc_unreachable ();
21296 }
21297 }
21298
21299 /* Given FROM and TO register numbers, say whether this elimination is
21300 allowed. Frame pointer elimination is automatically handled.
21301
21302 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21303 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21304 pointer, we must eliminate FRAME_POINTER_REGNUM into
21305 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21306 ARG_POINTER_REGNUM. */
21307
21308 bool
21309 arm_can_eliminate (const int from, const int to)
21310 {
21311 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21312 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21313 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21314 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21315 true);
21316 }
21317
21318 /* Emit RTL to save coprocessor registers on function entry. Returns the
21319 number of bytes pushed. */
21320
21321 static int
21322 arm_save_coproc_regs(void)
21323 {
21324 int saved_size = 0;
21325 unsigned reg;
21326 unsigned start_reg;
21327 rtx insn;
21328
21329 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21330 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21331 {
21332 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21333 insn = gen_rtx_MEM (V2SImode, insn);
21334 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21335 RTX_FRAME_RELATED_P (insn) = 1;
21336 saved_size += 8;
21337 }
21338
21339 if (TARGET_HARD_FLOAT)
21340 {
21341 start_reg = FIRST_VFP_REGNUM;
21342
21343 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21344 {
21345 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21346 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21347 {
21348 if (start_reg != reg)
21349 saved_size += vfp_emit_fstmd (start_reg,
21350 (reg - start_reg) / 2);
21351 start_reg = reg + 2;
21352 }
21353 }
21354 if (start_reg != reg)
21355 saved_size += vfp_emit_fstmd (start_reg,
21356 (reg - start_reg) / 2);
21357 }
21358 return saved_size;
21359 }
21360
21361
21362 /* Set the Thumb frame pointer from the stack pointer. */
21363
21364 static void
21365 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21366 {
21367 HOST_WIDE_INT amount;
21368 rtx insn, dwarf;
21369
21370 amount = offsets->outgoing_args - offsets->locals_base;
21371 if (amount < 1024)
21372 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21373 stack_pointer_rtx, GEN_INT (amount)));
21374 else
21375 {
21376 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21377 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21378 expects the first two operands to be the same. */
21379 if (TARGET_THUMB2)
21380 {
21381 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21382 stack_pointer_rtx,
21383 hard_frame_pointer_rtx));
21384 }
21385 else
21386 {
21387 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21388 hard_frame_pointer_rtx,
21389 stack_pointer_rtx));
21390 }
21391 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21392 plus_constant (Pmode, stack_pointer_rtx, amount));
21393 RTX_FRAME_RELATED_P (dwarf) = 1;
21394 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21395 }
21396
21397 RTX_FRAME_RELATED_P (insn) = 1;
21398 }
21399
21400 struct scratch_reg {
21401 rtx reg;
21402 bool saved;
21403 };
21404
21405 /* Return a short-lived scratch register for use as a 2nd scratch register on
21406 function entry after the registers are saved in the prologue. This register
21407 must be released by means of release_scratch_register_on_entry. IP is not
21408 considered since it is always used as the 1st scratch register if available.
21409
21410 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21411 mask of live registers. */
21412
21413 static void
21414 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21415 unsigned long live_regs)
21416 {
21417 int regno = -1;
21418
21419 sr->saved = false;
21420
21421 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21422 regno = LR_REGNUM;
21423 else
21424 {
21425 unsigned int i;
21426
21427 for (i = 4; i < 11; i++)
21428 if (regno1 != i && (live_regs & (1 << i)) != 0)
21429 {
21430 regno = i;
21431 break;
21432 }
21433
21434 if (regno < 0)
21435 {
21436 /* If IP is used as the 1st scratch register for a nested function,
21437 then either r3 wasn't available or is used to preserve IP. */
21438 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21439 regno1 = 3;
21440 regno = (regno1 == 3 ? 2 : 3);
21441 sr->saved
21442 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21443 regno);
21444 }
21445 }
21446
21447 sr->reg = gen_rtx_REG (SImode, regno);
21448 if (sr->saved)
21449 {
21450 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21451 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21452 rtx x = gen_rtx_SET (stack_pointer_rtx,
21453 plus_constant (Pmode, stack_pointer_rtx, -4));
21454 RTX_FRAME_RELATED_P (insn) = 1;
21455 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21456 }
21457 }
21458
21459 /* Release a scratch register obtained from the preceding function. */
21460
21461 static void
21462 release_scratch_register_on_entry (struct scratch_reg *sr)
21463 {
21464 if (sr->saved)
21465 {
21466 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21467 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21468 rtx x = gen_rtx_SET (stack_pointer_rtx,
21469 plus_constant (Pmode, stack_pointer_rtx, 4));
21470 RTX_FRAME_RELATED_P (insn) = 1;
21471 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21472 }
21473 }
21474
21475 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21476
21477 #if PROBE_INTERVAL > 4096
21478 #error Cannot use indexed addressing mode for stack probing
21479 #endif
21480
21481 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21482 inclusive. These are offsets from the current stack pointer. REGNO1
21483 is the index number of the 1st scratch register and LIVE_REGS is the
21484 mask of live registers. */
21485
21486 static void
21487 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21488 unsigned int regno1, unsigned long live_regs)
21489 {
21490 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21491
21492 /* See if we have a constant small number of probes to generate. If so,
21493 that's the easy case. */
21494 if (size <= PROBE_INTERVAL)
21495 {
21496 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21497 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21498 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21499 }
21500
21501 /* The run-time loop is made up of 10 insns in the generic case while the
21502 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21503 else if (size <= 5 * PROBE_INTERVAL)
21504 {
21505 HOST_WIDE_INT i, rem;
21506
21507 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21508 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21509 emit_stack_probe (reg1);
21510
21511 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21512 it exceeds SIZE. If only two probes are needed, this will not
21513 generate any code. Then probe at FIRST + SIZE. */
21514 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21515 {
21516 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21517 emit_stack_probe (reg1);
21518 }
21519
21520 rem = size - (i - PROBE_INTERVAL);
21521 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21522 {
21523 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21524 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21525 }
21526 else
21527 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21528 }
21529
21530 /* Otherwise, do the same as above, but in a loop. Note that we must be
21531 extra careful with variables wrapping around because we might be at
21532 the very top (or the very bottom) of the address space and we have
21533 to be able to handle this case properly; in particular, we use an
21534 equality test for the loop condition. */
21535 else
21536 {
21537 HOST_WIDE_INT rounded_size;
21538 struct scratch_reg sr;
21539
21540 get_scratch_register_on_entry (&sr, regno1, live_regs);
21541
21542 emit_move_insn (reg1, GEN_INT (first));
21543
21544
21545 /* Step 1: round SIZE to the previous multiple of the interval. */
21546
21547 rounded_size = size & -PROBE_INTERVAL;
21548 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21549
21550
21551 /* Step 2: compute initial and final value of the loop counter. */
21552
21553 /* TEST_ADDR = SP + FIRST. */
21554 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21555
21556 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21557 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21558
21559
21560 /* Step 3: the loop
21561
21562 do
21563 {
21564 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21565 probe at TEST_ADDR
21566 }
21567 while (TEST_ADDR != LAST_ADDR)
21568
21569 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21570 until it is equal to ROUNDED_SIZE. */
21571
21572 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21573
21574
21575 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21576 that SIZE is equal to ROUNDED_SIZE. */
21577
21578 if (size != rounded_size)
21579 {
21580 HOST_WIDE_INT rem = size - rounded_size;
21581
21582 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21583 {
21584 emit_set_insn (sr.reg,
21585 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21586 emit_stack_probe (plus_constant (Pmode, sr.reg,
21587 PROBE_INTERVAL - rem));
21588 }
21589 else
21590 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21591 }
21592
21593 release_scratch_register_on_entry (&sr);
21594 }
21595
21596 /* Make sure nothing is scheduled before we are done. */
21597 emit_insn (gen_blockage ());
21598 }
21599
21600 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21601 absolute addresses. */
21602
21603 const char *
21604 output_probe_stack_range (rtx reg1, rtx reg2)
21605 {
21606 static int labelno = 0;
21607 char loop_lab[32];
21608 rtx xops[2];
21609
21610 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21611
21612 /* Loop. */
21613 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21614
21615 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21616 xops[0] = reg1;
21617 xops[1] = GEN_INT (PROBE_INTERVAL);
21618 output_asm_insn ("sub\t%0, %0, %1", xops);
21619
21620 /* Probe at TEST_ADDR. */
21621 output_asm_insn ("str\tr0, [%0, #0]", xops);
21622
21623 /* Test if TEST_ADDR == LAST_ADDR. */
21624 xops[1] = reg2;
21625 output_asm_insn ("cmp\t%0, %1", xops);
21626
21627 /* Branch. */
21628 fputs ("\tbne\t", asm_out_file);
21629 assemble_name_raw (asm_out_file, loop_lab);
21630 fputc ('\n', asm_out_file);
21631
21632 return "";
21633 }
21634
21635 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21636 function. */
21637 void
21638 arm_expand_prologue (void)
21639 {
21640 rtx amount;
21641 rtx insn;
21642 rtx ip_rtx;
21643 unsigned long live_regs_mask;
21644 unsigned long func_type;
21645 int fp_offset = 0;
21646 int saved_pretend_args = 0;
21647 int saved_regs = 0;
21648 unsigned HOST_WIDE_INT args_to_push;
21649 HOST_WIDE_INT size;
21650 arm_stack_offsets *offsets;
21651 bool clobber_ip;
21652
21653 func_type = arm_current_func_type ();
21654
21655 /* Naked functions don't have prologues. */
21656 if (IS_NAKED (func_type))
21657 {
21658 if (flag_stack_usage_info)
21659 current_function_static_stack_size = 0;
21660 return;
21661 }
21662
21663 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21664 args_to_push = crtl->args.pretend_args_size;
21665
21666 /* Compute which register we will have to save onto the stack. */
21667 offsets = arm_get_frame_offsets ();
21668 live_regs_mask = offsets->saved_regs_mask;
21669
21670 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21671
21672 if (IS_STACKALIGN (func_type))
21673 {
21674 rtx r0, r1;
21675
21676 /* Handle a word-aligned stack pointer. We generate the following:
21677
21678 mov r0, sp
21679 bic r1, r0, #7
21680 mov sp, r1
21681 <save and restore r0 in normal prologue/epilogue>
21682 mov sp, r0
21683 bx lr
21684
21685 The unwinder doesn't need to know about the stack realignment.
21686 Just tell it we saved SP in r0. */
21687 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21688
21689 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21690 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21691
21692 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21693 RTX_FRAME_RELATED_P (insn) = 1;
21694 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21695
21696 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21697
21698 /* ??? The CFA changes here, which may cause GDB to conclude that it
21699 has entered a different function. That said, the unwind info is
21700 correct, individually, before and after this instruction because
21701 we've described the save of SP, which will override the default
21702 handling of SP as restoring from the CFA. */
21703 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21704 }
21705
21706 /* The static chain register is the same as the IP register. If it is
21707 clobbered when creating the frame, we need to save and restore it. */
21708 clobber_ip = IS_NESTED (func_type)
21709 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21710 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21711 || flag_stack_clash_protection)
21712 && !df_regs_ever_live_p (LR_REGNUM)
21713 && arm_r3_live_at_start_p ()));
21714
21715 /* Find somewhere to store IP whilst the frame is being created.
21716 We try the following places in order:
21717
21718 1. The last argument register r3 if it is available.
21719 2. A slot on the stack above the frame if there are no
21720 arguments to push onto the stack.
21721 3. Register r3 again, after pushing the argument registers
21722 onto the stack, if this is a varargs function.
21723 4. The last slot on the stack created for the arguments to
21724 push, if this isn't a varargs function.
21725
21726 Note - we only need to tell the dwarf2 backend about the SP
21727 adjustment in the second variant; the static chain register
21728 doesn't need to be unwound, as it doesn't contain a value
21729 inherited from the caller. */
21730 if (clobber_ip)
21731 {
21732 if (!arm_r3_live_at_start_p ())
21733 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21734 else if (args_to_push == 0)
21735 {
21736 rtx addr, dwarf;
21737
21738 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21739 saved_regs += 4;
21740
21741 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21742 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21743 fp_offset = 4;
21744
21745 /* Just tell the dwarf backend that we adjusted SP. */
21746 dwarf = gen_rtx_SET (stack_pointer_rtx,
21747 plus_constant (Pmode, stack_pointer_rtx,
21748 -fp_offset));
21749 RTX_FRAME_RELATED_P (insn) = 1;
21750 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21751 }
21752 else
21753 {
21754 /* Store the args on the stack. */
21755 if (cfun->machine->uses_anonymous_args)
21756 {
21757 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21758 (0xf0 >> (args_to_push / 4)) & 0xf);
21759 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21760 saved_pretend_args = 1;
21761 }
21762 else
21763 {
21764 rtx addr, dwarf;
21765
21766 if (args_to_push == 4)
21767 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21768 else
21769 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21770 plus_constant (Pmode,
21771 stack_pointer_rtx,
21772 -args_to_push));
21773
21774 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21775
21776 /* Just tell the dwarf backend that we adjusted SP. */
21777 dwarf = gen_rtx_SET (stack_pointer_rtx,
21778 plus_constant (Pmode, stack_pointer_rtx,
21779 -args_to_push));
21780 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21781 }
21782
21783 RTX_FRAME_RELATED_P (insn) = 1;
21784 fp_offset = args_to_push;
21785 args_to_push = 0;
21786 }
21787 }
21788
21789 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21790 {
21791 if (IS_INTERRUPT (func_type))
21792 {
21793 /* Interrupt functions must not corrupt any registers.
21794 Creating a frame pointer however, corrupts the IP
21795 register, so we must push it first. */
21796 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21797
21798 /* Do not set RTX_FRAME_RELATED_P on this insn.
21799 The dwarf stack unwinding code only wants to see one
21800 stack decrement per function, and this is not it. If
21801 this instruction is labeled as being part of the frame
21802 creation sequence then dwarf2out_frame_debug_expr will
21803 die when it encounters the assignment of IP to FP
21804 later on, since the use of SP here establishes SP as
21805 the CFA register and not IP.
21806
21807 Anyway this instruction is not really part of the stack
21808 frame creation although it is part of the prologue. */
21809 }
21810
21811 insn = emit_set_insn (ip_rtx,
21812 plus_constant (Pmode, stack_pointer_rtx,
21813 fp_offset));
21814 RTX_FRAME_RELATED_P (insn) = 1;
21815 }
21816
21817 if (args_to_push)
21818 {
21819 /* Push the argument registers, or reserve space for them. */
21820 if (cfun->machine->uses_anonymous_args)
21821 insn = emit_multi_reg_push
21822 ((0xf0 >> (args_to_push / 4)) & 0xf,
21823 (0xf0 >> (args_to_push / 4)) & 0xf);
21824 else
21825 insn = emit_insn
21826 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21827 GEN_INT (- args_to_push)));
21828 RTX_FRAME_RELATED_P (insn) = 1;
21829 }
21830
21831 /* If this is an interrupt service routine, and the link register
21832 is going to be pushed, and we're not generating extra
21833 push of IP (needed when frame is needed and frame layout if apcs),
21834 subtracting four from LR now will mean that the function return
21835 can be done with a single instruction. */
21836 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21837 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21838 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21839 && TARGET_ARM)
21840 {
21841 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21842
21843 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21844 }
21845
21846 if (live_regs_mask)
21847 {
21848 unsigned long dwarf_regs_mask = live_regs_mask;
21849
21850 saved_regs += bit_count (live_regs_mask) * 4;
21851 if (optimize_size && !frame_pointer_needed
21852 && saved_regs == offsets->saved_regs - offsets->saved_args)
21853 {
21854 /* If no coprocessor registers are being pushed and we don't have
21855 to worry about a frame pointer then push extra registers to
21856 create the stack frame. This is done in a way that does not
21857 alter the frame layout, so is independent of the epilogue. */
21858 int n;
21859 int frame;
21860 n = 0;
21861 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21862 n++;
21863 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21864 if (frame && n * 4 >= frame)
21865 {
21866 n = frame / 4;
21867 live_regs_mask |= (1 << n) - 1;
21868 saved_regs += frame;
21869 }
21870 }
21871
21872 if (TARGET_LDRD
21873 && current_tune->prefer_ldrd_strd
21874 && !optimize_function_for_size_p (cfun))
21875 {
21876 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21877 if (TARGET_THUMB2)
21878 thumb2_emit_strd_push (live_regs_mask);
21879 else if (TARGET_ARM
21880 && !TARGET_APCS_FRAME
21881 && !IS_INTERRUPT (func_type))
21882 arm_emit_strd_push (live_regs_mask);
21883 else
21884 {
21885 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21886 RTX_FRAME_RELATED_P (insn) = 1;
21887 }
21888 }
21889 else
21890 {
21891 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21892 RTX_FRAME_RELATED_P (insn) = 1;
21893 }
21894 }
21895
21896 if (! IS_VOLATILE (func_type))
21897 saved_regs += arm_save_coproc_regs ();
21898
21899 if (frame_pointer_needed && TARGET_ARM)
21900 {
21901 /* Create the new frame pointer. */
21902 if (TARGET_APCS_FRAME)
21903 {
21904 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21905 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21906 RTX_FRAME_RELATED_P (insn) = 1;
21907 }
21908 else
21909 {
21910 insn = GEN_INT (saved_regs - (4 + fp_offset));
21911 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21912 stack_pointer_rtx, insn));
21913 RTX_FRAME_RELATED_P (insn) = 1;
21914 }
21915 }
21916
21917 size = offsets->outgoing_args - offsets->saved_args;
21918 if (flag_stack_usage_info)
21919 current_function_static_stack_size = size;
21920
21921 /* If this isn't an interrupt service routine and we have a frame, then do
21922 stack checking. We use IP as the first scratch register, except for the
21923 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21924 if (!IS_INTERRUPT (func_type)
21925 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21926 || flag_stack_clash_protection))
21927 {
21928 unsigned int regno;
21929
21930 if (!IS_NESTED (func_type) || clobber_ip)
21931 regno = IP_REGNUM;
21932 else if (df_regs_ever_live_p (LR_REGNUM))
21933 regno = LR_REGNUM;
21934 else
21935 regno = 3;
21936
21937 if (crtl->is_leaf && !cfun->calls_alloca)
21938 {
21939 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21940 arm_emit_probe_stack_range (get_stack_check_protect (),
21941 size - get_stack_check_protect (),
21942 regno, live_regs_mask);
21943 }
21944 else if (size > 0)
21945 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21946 regno, live_regs_mask);
21947 }
21948
21949 /* Recover the static chain register. */
21950 if (clobber_ip)
21951 {
21952 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21953 insn = gen_rtx_REG (SImode, 3);
21954 else
21955 {
21956 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21957 insn = gen_frame_mem (SImode, insn);
21958 }
21959 emit_set_insn (ip_rtx, insn);
21960 emit_insn (gen_force_register_use (ip_rtx));
21961 }
21962
21963 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21964 {
21965 /* This add can produce multiple insns for a large constant, so we
21966 need to get tricky. */
21967 rtx_insn *last = get_last_insn ();
21968
21969 amount = GEN_INT (offsets->saved_args + saved_regs
21970 - offsets->outgoing_args);
21971
21972 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21973 amount));
21974 do
21975 {
21976 last = last ? NEXT_INSN (last) : get_insns ();
21977 RTX_FRAME_RELATED_P (last) = 1;
21978 }
21979 while (last != insn);
21980
21981 /* If the frame pointer is needed, emit a special barrier that
21982 will prevent the scheduler from moving stores to the frame
21983 before the stack adjustment. */
21984 if (frame_pointer_needed)
21985 emit_insn (gen_stack_tie (stack_pointer_rtx,
21986 hard_frame_pointer_rtx));
21987 }
21988
21989
21990 if (frame_pointer_needed && TARGET_THUMB2)
21991 thumb_set_frame_pointer (offsets);
21992
21993 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21994 {
21995 unsigned long mask;
21996
21997 mask = live_regs_mask;
21998 mask &= THUMB2_WORK_REGS;
21999 if (!IS_NESTED (func_type))
22000 mask |= (1 << IP_REGNUM);
22001 arm_load_pic_register (mask);
22002 }
22003
22004 /* If we are profiling, make sure no instructions are scheduled before
22005 the call to mcount. Similarly if the user has requested no
22006 scheduling in the prolog. Similarly if we want non-call exceptions
22007 using the EABI unwinder, to prevent faulting instructions from being
22008 swapped with a stack adjustment. */
22009 if (crtl->profile || !TARGET_SCHED_PROLOG
22010 || (arm_except_unwind_info (&global_options) == UI_TARGET
22011 && cfun->can_throw_non_call_exceptions))
22012 emit_insn (gen_blockage ());
22013
22014 /* If the link register is being kept alive, with the return address in it,
22015 then make sure that it does not get reused by the ce2 pass. */
22016 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22017 cfun->machine->lr_save_eliminated = 1;
22018 }
22019 \f
22020 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22021 static void
22022 arm_print_condition (FILE *stream)
22023 {
22024 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22025 {
22026 /* Branch conversion is not implemented for Thumb-2. */
22027 if (TARGET_THUMB)
22028 {
22029 output_operand_lossage ("predicated Thumb instruction");
22030 return;
22031 }
22032 if (current_insn_predicate != NULL)
22033 {
22034 output_operand_lossage
22035 ("predicated instruction in conditional sequence");
22036 return;
22037 }
22038
22039 fputs (arm_condition_codes[arm_current_cc], stream);
22040 }
22041 else if (current_insn_predicate)
22042 {
22043 enum arm_cond_code code;
22044
22045 if (TARGET_THUMB1)
22046 {
22047 output_operand_lossage ("predicated Thumb instruction");
22048 return;
22049 }
22050
22051 code = get_arm_condition_code (current_insn_predicate);
22052 fputs (arm_condition_codes[code], stream);
22053 }
22054 }
22055
22056
22057 /* Globally reserved letters: acln
22058 Puncutation letters currently used: @_|?().!#
22059 Lower case letters currently used: bcdefhimpqtvwxyz
22060 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22061 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22062
22063 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22064
22065 If CODE is 'd', then the X is a condition operand and the instruction
22066 should only be executed if the condition is true.
22067 if CODE is 'D', then the X is a condition operand and the instruction
22068 should only be executed if the condition is false: however, if the mode
22069 of the comparison is CCFPEmode, then always execute the instruction -- we
22070 do this because in these circumstances !GE does not necessarily imply LT;
22071 in these cases the instruction pattern will take care to make sure that
22072 an instruction containing %d will follow, thereby undoing the effects of
22073 doing this instruction unconditionally.
22074 If CODE is 'N' then X is a floating point operand that must be negated
22075 before output.
22076 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22077 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22078 static void
22079 arm_print_operand (FILE *stream, rtx x, int code)
22080 {
22081 switch (code)
22082 {
22083 case '@':
22084 fputs (ASM_COMMENT_START, stream);
22085 return;
22086
22087 case '_':
22088 fputs (user_label_prefix, stream);
22089 return;
22090
22091 case '|':
22092 fputs (REGISTER_PREFIX, stream);
22093 return;
22094
22095 case '?':
22096 arm_print_condition (stream);
22097 return;
22098
22099 case '.':
22100 /* The current condition code for a condition code setting instruction.
22101 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22102 fputc('s', stream);
22103 arm_print_condition (stream);
22104 return;
22105
22106 case '!':
22107 /* If the instruction is conditionally executed then print
22108 the current condition code, otherwise print 's'. */
22109 gcc_assert (TARGET_THUMB2);
22110 if (current_insn_predicate)
22111 arm_print_condition (stream);
22112 else
22113 fputc('s', stream);
22114 break;
22115
22116 /* %# is a "break" sequence. It doesn't output anything, but is used to
22117 separate e.g. operand numbers from following text, if that text consists
22118 of further digits which we don't want to be part of the operand
22119 number. */
22120 case '#':
22121 return;
22122
22123 case 'N':
22124 {
22125 REAL_VALUE_TYPE r;
22126 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22127 fprintf (stream, "%s", fp_const_from_val (&r));
22128 }
22129 return;
22130
22131 /* An integer or symbol address without a preceding # sign. */
22132 case 'c':
22133 switch (GET_CODE (x))
22134 {
22135 case CONST_INT:
22136 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22137 break;
22138
22139 case SYMBOL_REF:
22140 output_addr_const (stream, x);
22141 break;
22142
22143 case CONST:
22144 if (GET_CODE (XEXP (x, 0)) == PLUS
22145 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22146 {
22147 output_addr_const (stream, x);
22148 break;
22149 }
22150 /* Fall through. */
22151
22152 default:
22153 output_operand_lossage ("Unsupported operand for code '%c'", code);
22154 }
22155 return;
22156
22157 /* An integer that we want to print in HEX. */
22158 case 'x':
22159 switch (GET_CODE (x))
22160 {
22161 case CONST_INT:
22162 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22163 break;
22164
22165 default:
22166 output_operand_lossage ("Unsupported operand for code '%c'", code);
22167 }
22168 return;
22169
22170 case 'B':
22171 if (CONST_INT_P (x))
22172 {
22173 HOST_WIDE_INT val;
22174 val = ARM_SIGN_EXTEND (~INTVAL (x));
22175 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22176 }
22177 else
22178 {
22179 putc ('~', stream);
22180 output_addr_const (stream, x);
22181 }
22182 return;
22183
22184 case 'b':
22185 /* Print the log2 of a CONST_INT. */
22186 {
22187 HOST_WIDE_INT val;
22188
22189 if (!CONST_INT_P (x)
22190 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22191 output_operand_lossage ("Unsupported operand for code '%c'", code);
22192 else
22193 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22194 }
22195 return;
22196
22197 case 'L':
22198 /* The low 16 bits of an immediate constant. */
22199 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22200 return;
22201
22202 case 'i':
22203 fprintf (stream, "%s", arithmetic_instr (x, 1));
22204 return;
22205
22206 case 'I':
22207 fprintf (stream, "%s", arithmetic_instr (x, 0));
22208 return;
22209
22210 case 'S':
22211 {
22212 HOST_WIDE_INT val;
22213 const char *shift;
22214
22215 shift = shift_op (x, &val);
22216
22217 if (shift)
22218 {
22219 fprintf (stream, ", %s ", shift);
22220 if (val == -1)
22221 arm_print_operand (stream, XEXP (x, 1), 0);
22222 else
22223 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22224 }
22225 }
22226 return;
22227
22228 /* An explanation of the 'Q', 'R' and 'H' register operands:
22229
22230 In a pair of registers containing a DI or DF value the 'Q'
22231 operand returns the register number of the register containing
22232 the least significant part of the value. The 'R' operand returns
22233 the register number of the register containing the most
22234 significant part of the value.
22235
22236 The 'H' operand returns the higher of the two register numbers.
22237 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22238 same as the 'Q' operand, since the most significant part of the
22239 value is held in the lower number register. The reverse is true
22240 on systems where WORDS_BIG_ENDIAN is false.
22241
22242 The purpose of these operands is to distinguish between cases
22243 where the endian-ness of the values is important (for example
22244 when they are added together), and cases where the endian-ness
22245 is irrelevant, but the order of register operations is important.
22246 For example when loading a value from memory into a register
22247 pair, the endian-ness does not matter. Provided that the value
22248 from the lower memory address is put into the lower numbered
22249 register, and the value from the higher address is put into the
22250 higher numbered register, the load will work regardless of whether
22251 the value being loaded is big-wordian or little-wordian. The
22252 order of the two register loads can matter however, if the address
22253 of the memory location is actually held in one of the registers
22254 being overwritten by the load.
22255
22256 The 'Q' and 'R' constraints are also available for 64-bit
22257 constants. */
22258 case 'Q':
22259 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22260 {
22261 rtx part = gen_lowpart (SImode, x);
22262 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22263 return;
22264 }
22265
22266 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22267 {
22268 output_operand_lossage ("invalid operand for code '%c'", code);
22269 return;
22270 }
22271
22272 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22273 return;
22274
22275 case 'R':
22276 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22277 {
22278 machine_mode mode = GET_MODE (x);
22279 rtx part;
22280
22281 if (mode == VOIDmode)
22282 mode = DImode;
22283 part = gen_highpart_mode (SImode, mode, x);
22284 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22285 return;
22286 }
22287
22288 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22289 {
22290 output_operand_lossage ("invalid operand for code '%c'", code);
22291 return;
22292 }
22293
22294 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22295 return;
22296
22297 case 'H':
22298 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22299 {
22300 output_operand_lossage ("invalid operand for code '%c'", code);
22301 return;
22302 }
22303
22304 asm_fprintf (stream, "%r", REGNO (x) + 1);
22305 return;
22306
22307 case 'J':
22308 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22309 {
22310 output_operand_lossage ("invalid operand for code '%c'", code);
22311 return;
22312 }
22313
22314 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22315 return;
22316
22317 case 'K':
22318 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22319 {
22320 output_operand_lossage ("invalid operand for code '%c'", code);
22321 return;
22322 }
22323
22324 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22325 return;
22326
22327 case 'm':
22328 asm_fprintf (stream, "%r",
22329 REG_P (XEXP (x, 0))
22330 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22331 return;
22332
22333 case 'M':
22334 asm_fprintf (stream, "{%r-%r}",
22335 REGNO (x),
22336 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22337 return;
22338
22339 /* Like 'M', but writing doubleword vector registers, for use by Neon
22340 insns. */
22341 case 'h':
22342 {
22343 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22344 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22345 if (numregs == 1)
22346 asm_fprintf (stream, "{d%d}", regno);
22347 else
22348 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22349 }
22350 return;
22351
22352 case 'd':
22353 /* CONST_TRUE_RTX means always -- that's the default. */
22354 if (x == const_true_rtx)
22355 return;
22356
22357 if (!COMPARISON_P (x))
22358 {
22359 output_operand_lossage ("invalid operand for code '%c'", code);
22360 return;
22361 }
22362
22363 fputs (arm_condition_codes[get_arm_condition_code (x)],
22364 stream);
22365 return;
22366
22367 case 'D':
22368 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22369 want to do that. */
22370 if (x == const_true_rtx)
22371 {
22372 output_operand_lossage ("instruction never executed");
22373 return;
22374 }
22375 if (!COMPARISON_P (x))
22376 {
22377 output_operand_lossage ("invalid operand for code '%c'", code);
22378 return;
22379 }
22380
22381 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22382 (get_arm_condition_code (x))],
22383 stream);
22384 return;
22385
22386 case 's':
22387 case 'V':
22388 case 'W':
22389 case 'X':
22390 case 'Y':
22391 case 'Z':
22392 /* Former Maverick support, removed after GCC-4.7. */
22393 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22394 return;
22395
22396 case 'U':
22397 if (!REG_P (x)
22398 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22399 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22400 /* Bad value for wCG register number. */
22401 {
22402 output_operand_lossage ("invalid operand for code '%c'", code);
22403 return;
22404 }
22405
22406 else
22407 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22408 return;
22409
22410 /* Print an iWMMXt control register name. */
22411 case 'w':
22412 if (!CONST_INT_P (x)
22413 || INTVAL (x) < 0
22414 || INTVAL (x) >= 16)
22415 /* Bad value for wC register number. */
22416 {
22417 output_operand_lossage ("invalid operand for code '%c'", code);
22418 return;
22419 }
22420
22421 else
22422 {
22423 static const char * wc_reg_names [16] =
22424 {
22425 "wCID", "wCon", "wCSSF", "wCASF",
22426 "wC4", "wC5", "wC6", "wC7",
22427 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22428 "wC12", "wC13", "wC14", "wC15"
22429 };
22430
22431 fputs (wc_reg_names [INTVAL (x)], stream);
22432 }
22433 return;
22434
22435 /* Print the high single-precision register of a VFP double-precision
22436 register. */
22437 case 'p':
22438 {
22439 machine_mode mode = GET_MODE (x);
22440 int regno;
22441
22442 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22443 {
22444 output_operand_lossage ("invalid operand for code '%c'", code);
22445 return;
22446 }
22447
22448 regno = REGNO (x);
22449 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22450 {
22451 output_operand_lossage ("invalid operand for code '%c'", code);
22452 return;
22453 }
22454
22455 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22456 }
22457 return;
22458
22459 /* Print a VFP/Neon double precision or quad precision register name. */
22460 case 'P':
22461 case 'q':
22462 {
22463 machine_mode mode = GET_MODE (x);
22464 int is_quad = (code == 'q');
22465 int regno;
22466
22467 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22468 {
22469 output_operand_lossage ("invalid operand for code '%c'", code);
22470 return;
22471 }
22472
22473 if (!REG_P (x)
22474 || !IS_VFP_REGNUM (REGNO (x)))
22475 {
22476 output_operand_lossage ("invalid operand for code '%c'", code);
22477 return;
22478 }
22479
22480 regno = REGNO (x);
22481 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22482 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22483 {
22484 output_operand_lossage ("invalid operand for code '%c'", code);
22485 return;
22486 }
22487
22488 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22489 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22490 }
22491 return;
22492
22493 /* These two codes print the low/high doubleword register of a Neon quad
22494 register, respectively. For pair-structure types, can also print
22495 low/high quadword registers. */
22496 case 'e':
22497 case 'f':
22498 {
22499 machine_mode mode = GET_MODE (x);
22500 int regno;
22501
22502 if ((GET_MODE_SIZE (mode) != 16
22503 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22504 {
22505 output_operand_lossage ("invalid operand for code '%c'", code);
22506 return;
22507 }
22508
22509 regno = REGNO (x);
22510 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22511 {
22512 output_operand_lossage ("invalid operand for code '%c'", code);
22513 return;
22514 }
22515
22516 if (GET_MODE_SIZE (mode) == 16)
22517 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22518 + (code == 'f' ? 1 : 0));
22519 else
22520 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22521 + (code == 'f' ? 1 : 0));
22522 }
22523 return;
22524
22525 /* Print a VFPv3 floating-point constant, represented as an integer
22526 index. */
22527 case 'G':
22528 {
22529 int index = vfp3_const_double_index (x);
22530 gcc_assert (index != -1);
22531 fprintf (stream, "%d", index);
22532 }
22533 return;
22534
22535 /* Print bits representing opcode features for Neon.
22536
22537 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22538 and polynomials as unsigned.
22539
22540 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22541
22542 Bit 2 is 1 for rounding functions, 0 otherwise. */
22543
22544 /* Identify the type as 's', 'u', 'p' or 'f'. */
22545 case 'T':
22546 {
22547 HOST_WIDE_INT bits = INTVAL (x);
22548 fputc ("uspf"[bits & 3], stream);
22549 }
22550 return;
22551
22552 /* Likewise, but signed and unsigned integers are both 'i'. */
22553 case 'F':
22554 {
22555 HOST_WIDE_INT bits = INTVAL (x);
22556 fputc ("iipf"[bits & 3], stream);
22557 }
22558 return;
22559
22560 /* As for 'T', but emit 'u' instead of 'p'. */
22561 case 't':
22562 {
22563 HOST_WIDE_INT bits = INTVAL (x);
22564 fputc ("usuf"[bits & 3], stream);
22565 }
22566 return;
22567
22568 /* Bit 2: rounding (vs none). */
22569 case 'O':
22570 {
22571 HOST_WIDE_INT bits = INTVAL (x);
22572 fputs ((bits & 4) != 0 ? "r" : "", stream);
22573 }
22574 return;
22575
22576 /* Memory operand for vld1/vst1 instruction. */
22577 case 'A':
22578 {
22579 rtx addr;
22580 bool postinc = FALSE;
22581 rtx postinc_reg = NULL;
22582 unsigned align, memsize, align_bits;
22583
22584 gcc_assert (MEM_P (x));
22585 addr = XEXP (x, 0);
22586 if (GET_CODE (addr) == POST_INC)
22587 {
22588 postinc = 1;
22589 addr = XEXP (addr, 0);
22590 }
22591 if (GET_CODE (addr) == POST_MODIFY)
22592 {
22593 postinc_reg = XEXP( XEXP (addr, 1), 1);
22594 addr = XEXP (addr, 0);
22595 }
22596 asm_fprintf (stream, "[%r", REGNO (addr));
22597
22598 /* We know the alignment of this access, so we can emit a hint in the
22599 instruction (for some alignments) as an aid to the memory subsystem
22600 of the target. */
22601 align = MEM_ALIGN (x) >> 3;
22602 memsize = MEM_SIZE (x);
22603
22604 /* Only certain alignment specifiers are supported by the hardware. */
22605 if (memsize == 32 && (align % 32) == 0)
22606 align_bits = 256;
22607 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22608 align_bits = 128;
22609 else if (memsize >= 8 && (align % 8) == 0)
22610 align_bits = 64;
22611 else
22612 align_bits = 0;
22613
22614 if (align_bits != 0)
22615 asm_fprintf (stream, ":%d", align_bits);
22616
22617 asm_fprintf (stream, "]");
22618
22619 if (postinc)
22620 fputs("!", stream);
22621 if (postinc_reg)
22622 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22623 }
22624 return;
22625
22626 case 'C':
22627 {
22628 rtx addr;
22629
22630 gcc_assert (MEM_P (x));
22631 addr = XEXP (x, 0);
22632 gcc_assert (REG_P (addr));
22633 asm_fprintf (stream, "[%r]", REGNO (addr));
22634 }
22635 return;
22636
22637 /* Translate an S register number into a D register number and element index. */
22638 case 'y':
22639 {
22640 machine_mode mode = GET_MODE (x);
22641 int regno;
22642
22643 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22644 {
22645 output_operand_lossage ("invalid operand for code '%c'", code);
22646 return;
22647 }
22648
22649 regno = REGNO (x);
22650 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22651 {
22652 output_operand_lossage ("invalid operand for code '%c'", code);
22653 return;
22654 }
22655
22656 regno = regno - FIRST_VFP_REGNUM;
22657 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22658 }
22659 return;
22660
22661 case 'v':
22662 gcc_assert (CONST_DOUBLE_P (x));
22663 int result;
22664 result = vfp3_const_double_for_fract_bits (x);
22665 if (result == 0)
22666 result = vfp3_const_double_for_bits (x);
22667 fprintf (stream, "#%d", result);
22668 return;
22669
22670 /* Register specifier for vld1.16/vst1.16. Translate the S register
22671 number into a D register number and element index. */
22672 case 'z':
22673 {
22674 machine_mode mode = GET_MODE (x);
22675 int regno;
22676
22677 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22678 {
22679 output_operand_lossage ("invalid operand for code '%c'", code);
22680 return;
22681 }
22682
22683 regno = REGNO (x);
22684 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22685 {
22686 output_operand_lossage ("invalid operand for code '%c'", code);
22687 return;
22688 }
22689
22690 regno = regno - FIRST_VFP_REGNUM;
22691 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22692 }
22693 return;
22694
22695 default:
22696 if (x == 0)
22697 {
22698 output_operand_lossage ("missing operand");
22699 return;
22700 }
22701
22702 switch (GET_CODE (x))
22703 {
22704 case REG:
22705 asm_fprintf (stream, "%r", REGNO (x));
22706 break;
22707
22708 case MEM:
22709 output_address (GET_MODE (x), XEXP (x, 0));
22710 break;
22711
22712 case CONST_DOUBLE:
22713 {
22714 char fpstr[20];
22715 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22716 sizeof (fpstr), 0, 1);
22717 fprintf (stream, "#%s", fpstr);
22718 }
22719 break;
22720
22721 default:
22722 gcc_assert (GET_CODE (x) != NEG);
22723 fputc ('#', stream);
22724 if (GET_CODE (x) == HIGH)
22725 {
22726 fputs (":lower16:", stream);
22727 x = XEXP (x, 0);
22728 }
22729
22730 output_addr_const (stream, x);
22731 break;
22732 }
22733 }
22734 }
22735 \f
22736 /* Target hook for printing a memory address. */
22737 static void
22738 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22739 {
22740 if (TARGET_32BIT)
22741 {
22742 int is_minus = GET_CODE (x) == MINUS;
22743
22744 if (REG_P (x))
22745 asm_fprintf (stream, "[%r]", REGNO (x));
22746 else if (GET_CODE (x) == PLUS || is_minus)
22747 {
22748 rtx base = XEXP (x, 0);
22749 rtx index = XEXP (x, 1);
22750 HOST_WIDE_INT offset = 0;
22751 if (!REG_P (base)
22752 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22753 {
22754 /* Ensure that BASE is a register. */
22755 /* (one of them must be). */
22756 /* Also ensure the SP is not used as in index register. */
22757 std::swap (base, index);
22758 }
22759 switch (GET_CODE (index))
22760 {
22761 case CONST_INT:
22762 offset = INTVAL (index);
22763 if (is_minus)
22764 offset = -offset;
22765 asm_fprintf (stream, "[%r, #%wd]",
22766 REGNO (base), offset);
22767 break;
22768
22769 case REG:
22770 asm_fprintf (stream, "[%r, %s%r]",
22771 REGNO (base), is_minus ? "-" : "",
22772 REGNO (index));
22773 break;
22774
22775 case MULT:
22776 case ASHIFTRT:
22777 case LSHIFTRT:
22778 case ASHIFT:
22779 case ROTATERT:
22780 {
22781 asm_fprintf (stream, "[%r, %s%r",
22782 REGNO (base), is_minus ? "-" : "",
22783 REGNO (XEXP (index, 0)));
22784 arm_print_operand (stream, index, 'S');
22785 fputs ("]", stream);
22786 break;
22787 }
22788
22789 default:
22790 gcc_unreachable ();
22791 }
22792 }
22793 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22794 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22795 {
22796 gcc_assert (REG_P (XEXP (x, 0)));
22797
22798 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22799 asm_fprintf (stream, "[%r, #%s%d]!",
22800 REGNO (XEXP (x, 0)),
22801 GET_CODE (x) == PRE_DEC ? "-" : "",
22802 GET_MODE_SIZE (mode));
22803 else
22804 asm_fprintf (stream, "[%r], #%s%d",
22805 REGNO (XEXP (x, 0)),
22806 GET_CODE (x) == POST_DEC ? "-" : "",
22807 GET_MODE_SIZE (mode));
22808 }
22809 else if (GET_CODE (x) == PRE_MODIFY)
22810 {
22811 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22812 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22813 asm_fprintf (stream, "#%wd]!",
22814 INTVAL (XEXP (XEXP (x, 1), 1)));
22815 else
22816 asm_fprintf (stream, "%r]!",
22817 REGNO (XEXP (XEXP (x, 1), 1)));
22818 }
22819 else if (GET_CODE (x) == POST_MODIFY)
22820 {
22821 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22822 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22823 asm_fprintf (stream, "#%wd",
22824 INTVAL (XEXP (XEXP (x, 1), 1)));
22825 else
22826 asm_fprintf (stream, "%r",
22827 REGNO (XEXP (XEXP (x, 1), 1)));
22828 }
22829 else output_addr_const (stream, x);
22830 }
22831 else
22832 {
22833 if (REG_P (x))
22834 asm_fprintf (stream, "[%r]", REGNO (x));
22835 else if (GET_CODE (x) == POST_INC)
22836 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22837 else if (GET_CODE (x) == PLUS)
22838 {
22839 gcc_assert (REG_P (XEXP (x, 0)));
22840 if (CONST_INT_P (XEXP (x, 1)))
22841 asm_fprintf (stream, "[%r, #%wd]",
22842 REGNO (XEXP (x, 0)),
22843 INTVAL (XEXP (x, 1)));
22844 else
22845 asm_fprintf (stream, "[%r, %r]",
22846 REGNO (XEXP (x, 0)),
22847 REGNO (XEXP (x, 1)));
22848 }
22849 else
22850 output_addr_const (stream, x);
22851 }
22852 }
22853 \f
22854 /* Target hook for indicating whether a punctuation character for
22855 TARGET_PRINT_OPERAND is valid. */
22856 static bool
22857 arm_print_operand_punct_valid_p (unsigned char code)
22858 {
22859 return (code == '@' || code == '|' || code == '.'
22860 || code == '(' || code == ')' || code == '#'
22861 || (TARGET_32BIT && (code == '?'))
22862 || (TARGET_THUMB2 && (code == '!'))
22863 || (TARGET_THUMB && (code == '_')));
22864 }
22865 \f
22866 /* Target hook for assembling integer objects. The ARM version needs to
22867 handle word-sized values specially. */
22868 static bool
22869 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22870 {
22871 machine_mode mode;
22872
22873 if (size == UNITS_PER_WORD && aligned_p)
22874 {
22875 fputs ("\t.word\t", asm_out_file);
22876 output_addr_const (asm_out_file, x);
22877
22878 /* Mark symbols as position independent. We only do this in the
22879 .text segment, not in the .data segment. */
22880 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22881 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22882 {
22883 /* See legitimize_pic_address for an explanation of the
22884 TARGET_VXWORKS_RTP check. */
22885 /* References to weak symbols cannot be resolved locally:
22886 they may be overridden by a non-weak definition at link
22887 time. */
22888 if (!arm_pic_data_is_text_relative
22889 || (GET_CODE (x) == SYMBOL_REF
22890 && (!SYMBOL_REF_LOCAL_P (x)
22891 || (SYMBOL_REF_DECL (x)
22892 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22893 fputs ("(GOT)", asm_out_file);
22894 else
22895 fputs ("(GOTOFF)", asm_out_file);
22896 }
22897 fputc ('\n', asm_out_file);
22898 return true;
22899 }
22900
22901 mode = GET_MODE (x);
22902
22903 if (arm_vector_mode_supported_p (mode))
22904 {
22905 int i, units;
22906
22907 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22908
22909 units = CONST_VECTOR_NUNITS (x);
22910 size = GET_MODE_UNIT_SIZE (mode);
22911
22912 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22913 for (i = 0; i < units; i++)
22914 {
22915 rtx elt = CONST_VECTOR_ELT (x, i);
22916 assemble_integer
22917 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22918 }
22919 else
22920 for (i = 0; i < units; i++)
22921 {
22922 rtx elt = CONST_VECTOR_ELT (x, i);
22923 assemble_real
22924 (*CONST_DOUBLE_REAL_VALUE (elt),
22925 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22926 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22927 }
22928
22929 return true;
22930 }
22931
22932 return default_assemble_integer (x, size, aligned_p);
22933 }
22934
22935 static void
22936 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22937 {
22938 section *s;
22939
22940 if (!TARGET_AAPCS_BASED)
22941 {
22942 (is_ctor ?
22943 default_named_section_asm_out_constructor
22944 : default_named_section_asm_out_destructor) (symbol, priority);
22945 return;
22946 }
22947
22948 /* Put these in the .init_array section, using a special relocation. */
22949 if (priority != DEFAULT_INIT_PRIORITY)
22950 {
22951 char buf[18];
22952 sprintf (buf, "%s.%.5u",
22953 is_ctor ? ".init_array" : ".fini_array",
22954 priority);
22955 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22956 }
22957 else if (is_ctor)
22958 s = ctors_section;
22959 else
22960 s = dtors_section;
22961
22962 switch_to_section (s);
22963 assemble_align (POINTER_SIZE);
22964 fputs ("\t.word\t", asm_out_file);
22965 output_addr_const (asm_out_file, symbol);
22966 fputs ("(target1)\n", asm_out_file);
22967 }
22968
22969 /* Add a function to the list of static constructors. */
22970
22971 static void
22972 arm_elf_asm_constructor (rtx symbol, int priority)
22973 {
22974 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22975 }
22976
22977 /* Add a function to the list of static destructors. */
22978
22979 static void
22980 arm_elf_asm_destructor (rtx symbol, int priority)
22981 {
22982 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22983 }
22984 \f
22985 /* A finite state machine takes care of noticing whether or not instructions
22986 can be conditionally executed, and thus decrease execution time and code
22987 size by deleting branch instructions. The fsm is controlled by
22988 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22989
22990 /* The state of the fsm controlling condition codes are:
22991 0: normal, do nothing special
22992 1: make ASM_OUTPUT_OPCODE not output this instruction
22993 2: make ASM_OUTPUT_OPCODE not output this instruction
22994 3: make instructions conditional
22995 4: make instructions conditional
22996
22997 State transitions (state->state by whom under condition):
22998 0 -> 1 final_prescan_insn if the `target' is a label
22999 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23000 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23001 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23002 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23003 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23004 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23005 (the target insn is arm_target_insn).
23006
23007 If the jump clobbers the conditions then we use states 2 and 4.
23008
23009 A similar thing can be done with conditional return insns.
23010
23011 XXX In case the `target' is an unconditional branch, this conditionalising
23012 of the instructions always reduces code size, but not always execution
23013 time. But then, I want to reduce the code size to somewhere near what
23014 /bin/cc produces. */
23015
23016 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23017 instructions. When a COND_EXEC instruction is seen the subsequent
23018 instructions are scanned so that multiple conditional instructions can be
23019 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23020 specify the length and true/false mask for the IT block. These will be
23021 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23022
23023 /* Returns the index of the ARM condition code string in
23024 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23025 COMPARISON should be an rtx like `(eq (...) (...))'. */
23026
23027 enum arm_cond_code
23028 maybe_get_arm_condition_code (rtx comparison)
23029 {
23030 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23031 enum arm_cond_code code;
23032 enum rtx_code comp_code = GET_CODE (comparison);
23033
23034 if (GET_MODE_CLASS (mode) != MODE_CC)
23035 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23036 XEXP (comparison, 1));
23037
23038 switch (mode)
23039 {
23040 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23041 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23042 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23043 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23044 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23045 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23046 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23047 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23048 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23049 case E_CC_DLTUmode: code = ARM_CC;
23050
23051 dominance:
23052 if (comp_code == EQ)
23053 return ARM_INVERSE_CONDITION_CODE (code);
23054 if (comp_code == NE)
23055 return code;
23056 return ARM_NV;
23057
23058 case E_CC_NOOVmode:
23059 switch (comp_code)
23060 {
23061 case NE: return ARM_NE;
23062 case EQ: return ARM_EQ;
23063 case GE: return ARM_PL;
23064 case LT: return ARM_MI;
23065 default: return ARM_NV;
23066 }
23067
23068 case E_CC_Zmode:
23069 switch (comp_code)
23070 {
23071 case NE: return ARM_NE;
23072 case EQ: return ARM_EQ;
23073 default: return ARM_NV;
23074 }
23075
23076 case E_CC_Nmode:
23077 switch (comp_code)
23078 {
23079 case NE: return ARM_MI;
23080 case EQ: return ARM_PL;
23081 default: return ARM_NV;
23082 }
23083
23084 case E_CCFPEmode:
23085 case E_CCFPmode:
23086 /* We can handle all cases except UNEQ and LTGT. */
23087 switch (comp_code)
23088 {
23089 case GE: return ARM_GE;
23090 case GT: return ARM_GT;
23091 case LE: return ARM_LS;
23092 case LT: return ARM_MI;
23093 case NE: return ARM_NE;
23094 case EQ: return ARM_EQ;
23095 case ORDERED: return ARM_VC;
23096 case UNORDERED: return ARM_VS;
23097 case UNLT: return ARM_LT;
23098 case UNLE: return ARM_LE;
23099 case UNGT: return ARM_HI;
23100 case UNGE: return ARM_PL;
23101 /* UNEQ and LTGT do not have a representation. */
23102 case UNEQ: /* Fall through. */
23103 case LTGT: /* Fall through. */
23104 default: return ARM_NV;
23105 }
23106
23107 case E_CC_SWPmode:
23108 switch (comp_code)
23109 {
23110 case NE: return ARM_NE;
23111 case EQ: return ARM_EQ;
23112 case GE: return ARM_LE;
23113 case GT: return ARM_LT;
23114 case LE: return ARM_GE;
23115 case LT: return ARM_GT;
23116 case GEU: return ARM_LS;
23117 case GTU: return ARM_CC;
23118 case LEU: return ARM_CS;
23119 case LTU: return ARM_HI;
23120 default: return ARM_NV;
23121 }
23122
23123 case E_CC_Cmode:
23124 switch (comp_code)
23125 {
23126 case LTU: return ARM_CS;
23127 case GEU: return ARM_CC;
23128 case NE: return ARM_CS;
23129 case EQ: return ARM_CC;
23130 default: return ARM_NV;
23131 }
23132
23133 case E_CC_CZmode:
23134 switch (comp_code)
23135 {
23136 case NE: return ARM_NE;
23137 case EQ: return ARM_EQ;
23138 case GEU: return ARM_CS;
23139 case GTU: return ARM_HI;
23140 case LEU: return ARM_LS;
23141 case LTU: return ARM_CC;
23142 default: return ARM_NV;
23143 }
23144
23145 case E_CC_NCVmode:
23146 switch (comp_code)
23147 {
23148 case GE: return ARM_GE;
23149 case LT: return ARM_LT;
23150 case GEU: return ARM_CS;
23151 case LTU: return ARM_CC;
23152 default: return ARM_NV;
23153 }
23154
23155 case E_CC_Vmode:
23156 switch (comp_code)
23157 {
23158 case NE: return ARM_VS;
23159 case EQ: return ARM_VC;
23160 default: return ARM_NV;
23161 }
23162
23163 case E_CCmode:
23164 switch (comp_code)
23165 {
23166 case NE: return ARM_NE;
23167 case EQ: return ARM_EQ;
23168 case GE: return ARM_GE;
23169 case GT: return ARM_GT;
23170 case LE: return ARM_LE;
23171 case LT: return ARM_LT;
23172 case GEU: return ARM_CS;
23173 case GTU: return ARM_HI;
23174 case LEU: return ARM_LS;
23175 case LTU: return ARM_CC;
23176 default: return ARM_NV;
23177 }
23178
23179 default: gcc_unreachable ();
23180 }
23181 }
23182
23183 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23184 static enum arm_cond_code
23185 get_arm_condition_code (rtx comparison)
23186 {
23187 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23188 gcc_assert (code != ARM_NV);
23189 return code;
23190 }
23191
23192 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23193 code registers when not targetting Thumb1. The VFP condition register
23194 only exists when generating hard-float code. */
23195 static bool
23196 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23197 {
23198 if (!TARGET_32BIT)
23199 return false;
23200
23201 *p1 = CC_REGNUM;
23202 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23203 return true;
23204 }
23205
23206 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23207 instructions. */
23208 void
23209 thumb2_final_prescan_insn (rtx_insn *insn)
23210 {
23211 rtx_insn *first_insn = insn;
23212 rtx body = PATTERN (insn);
23213 rtx predicate;
23214 enum arm_cond_code code;
23215 int n;
23216 int mask;
23217 int max;
23218
23219 /* max_insns_skipped in the tune was already taken into account in the
23220 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23221 just emit the IT blocks as we can. It does not make sense to split
23222 the IT blocks. */
23223 max = MAX_INSN_PER_IT_BLOCK;
23224
23225 /* Remove the previous insn from the count of insns to be output. */
23226 if (arm_condexec_count)
23227 arm_condexec_count--;
23228
23229 /* Nothing to do if we are already inside a conditional block. */
23230 if (arm_condexec_count)
23231 return;
23232
23233 if (GET_CODE (body) != COND_EXEC)
23234 return;
23235
23236 /* Conditional jumps are implemented directly. */
23237 if (JUMP_P (insn))
23238 return;
23239
23240 predicate = COND_EXEC_TEST (body);
23241 arm_current_cc = get_arm_condition_code (predicate);
23242
23243 n = get_attr_ce_count (insn);
23244 arm_condexec_count = 1;
23245 arm_condexec_mask = (1 << n) - 1;
23246 arm_condexec_masklen = n;
23247 /* See if subsequent instructions can be combined into the same block. */
23248 for (;;)
23249 {
23250 insn = next_nonnote_insn (insn);
23251
23252 /* Jumping into the middle of an IT block is illegal, so a label or
23253 barrier terminates the block. */
23254 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23255 break;
23256
23257 body = PATTERN (insn);
23258 /* USE and CLOBBER aren't really insns, so just skip them. */
23259 if (GET_CODE (body) == USE
23260 || GET_CODE (body) == CLOBBER)
23261 continue;
23262
23263 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23264 if (GET_CODE (body) != COND_EXEC)
23265 break;
23266 /* Maximum number of conditionally executed instructions in a block. */
23267 n = get_attr_ce_count (insn);
23268 if (arm_condexec_masklen + n > max)
23269 break;
23270
23271 predicate = COND_EXEC_TEST (body);
23272 code = get_arm_condition_code (predicate);
23273 mask = (1 << n) - 1;
23274 if (arm_current_cc == code)
23275 arm_condexec_mask |= (mask << arm_condexec_masklen);
23276 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23277 break;
23278
23279 arm_condexec_count++;
23280 arm_condexec_masklen += n;
23281
23282 /* A jump must be the last instruction in a conditional block. */
23283 if (JUMP_P (insn))
23284 break;
23285 }
23286 /* Restore recog_data (getting the attributes of other insns can
23287 destroy this array, but final.c assumes that it remains intact
23288 across this call). */
23289 extract_constrain_insn_cached (first_insn);
23290 }
23291
23292 void
23293 arm_final_prescan_insn (rtx_insn *insn)
23294 {
23295 /* BODY will hold the body of INSN. */
23296 rtx body = PATTERN (insn);
23297
23298 /* This will be 1 if trying to repeat the trick, and things need to be
23299 reversed if it appears to fail. */
23300 int reverse = 0;
23301
23302 /* If we start with a return insn, we only succeed if we find another one. */
23303 int seeking_return = 0;
23304 enum rtx_code return_code = UNKNOWN;
23305
23306 /* START_INSN will hold the insn from where we start looking. This is the
23307 first insn after the following code_label if REVERSE is true. */
23308 rtx_insn *start_insn = insn;
23309
23310 /* If in state 4, check if the target branch is reached, in order to
23311 change back to state 0. */
23312 if (arm_ccfsm_state == 4)
23313 {
23314 if (insn == arm_target_insn)
23315 {
23316 arm_target_insn = NULL;
23317 arm_ccfsm_state = 0;
23318 }
23319 return;
23320 }
23321
23322 /* If in state 3, it is possible to repeat the trick, if this insn is an
23323 unconditional branch to a label, and immediately following this branch
23324 is the previous target label which is only used once, and the label this
23325 branch jumps to is not too far off. */
23326 if (arm_ccfsm_state == 3)
23327 {
23328 if (simplejump_p (insn))
23329 {
23330 start_insn = next_nonnote_insn (start_insn);
23331 if (BARRIER_P (start_insn))
23332 {
23333 /* XXX Isn't this always a barrier? */
23334 start_insn = next_nonnote_insn (start_insn);
23335 }
23336 if (LABEL_P (start_insn)
23337 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23338 && LABEL_NUSES (start_insn) == 1)
23339 reverse = TRUE;
23340 else
23341 return;
23342 }
23343 else if (ANY_RETURN_P (body))
23344 {
23345 start_insn = next_nonnote_insn (start_insn);
23346 if (BARRIER_P (start_insn))
23347 start_insn = next_nonnote_insn (start_insn);
23348 if (LABEL_P (start_insn)
23349 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23350 && LABEL_NUSES (start_insn) == 1)
23351 {
23352 reverse = TRUE;
23353 seeking_return = 1;
23354 return_code = GET_CODE (body);
23355 }
23356 else
23357 return;
23358 }
23359 else
23360 return;
23361 }
23362
23363 gcc_assert (!arm_ccfsm_state || reverse);
23364 if (!JUMP_P (insn))
23365 return;
23366
23367 /* This jump might be paralleled with a clobber of the condition codes
23368 the jump should always come first */
23369 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23370 body = XVECEXP (body, 0, 0);
23371
23372 if (reverse
23373 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23374 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23375 {
23376 int insns_skipped;
23377 int fail = FALSE, succeed = FALSE;
23378 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23379 int then_not_else = TRUE;
23380 rtx_insn *this_insn = start_insn;
23381 rtx label = 0;
23382
23383 /* Register the insn jumped to. */
23384 if (reverse)
23385 {
23386 if (!seeking_return)
23387 label = XEXP (SET_SRC (body), 0);
23388 }
23389 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23390 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23391 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23392 {
23393 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23394 then_not_else = FALSE;
23395 }
23396 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23397 {
23398 seeking_return = 1;
23399 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23400 }
23401 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23402 {
23403 seeking_return = 1;
23404 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23405 then_not_else = FALSE;
23406 }
23407 else
23408 gcc_unreachable ();
23409
23410 /* See how many insns this branch skips, and what kind of insns. If all
23411 insns are okay, and the label or unconditional branch to the same
23412 label is not too far away, succeed. */
23413 for (insns_skipped = 0;
23414 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23415 {
23416 rtx scanbody;
23417
23418 this_insn = next_nonnote_insn (this_insn);
23419 if (!this_insn)
23420 break;
23421
23422 switch (GET_CODE (this_insn))
23423 {
23424 case CODE_LABEL:
23425 /* Succeed if it is the target label, otherwise fail since
23426 control falls in from somewhere else. */
23427 if (this_insn == label)
23428 {
23429 arm_ccfsm_state = 1;
23430 succeed = TRUE;
23431 }
23432 else
23433 fail = TRUE;
23434 break;
23435
23436 case BARRIER:
23437 /* Succeed if the following insn is the target label.
23438 Otherwise fail.
23439 If return insns are used then the last insn in a function
23440 will be a barrier. */
23441 this_insn = next_nonnote_insn (this_insn);
23442 if (this_insn && this_insn == label)
23443 {
23444 arm_ccfsm_state = 1;
23445 succeed = TRUE;
23446 }
23447 else
23448 fail = TRUE;
23449 break;
23450
23451 case CALL_INSN:
23452 /* The AAPCS says that conditional calls should not be
23453 used since they make interworking inefficient (the
23454 linker can't transform BL<cond> into BLX). That's
23455 only a problem if the machine has BLX. */
23456 if (arm_arch5)
23457 {
23458 fail = TRUE;
23459 break;
23460 }
23461
23462 /* Succeed if the following insn is the target label, or
23463 if the following two insns are a barrier and the
23464 target label. */
23465 this_insn = next_nonnote_insn (this_insn);
23466 if (this_insn && BARRIER_P (this_insn))
23467 this_insn = next_nonnote_insn (this_insn);
23468
23469 if (this_insn && this_insn == label
23470 && insns_skipped < max_insns_skipped)
23471 {
23472 arm_ccfsm_state = 1;
23473 succeed = TRUE;
23474 }
23475 else
23476 fail = TRUE;
23477 break;
23478
23479 case JUMP_INSN:
23480 /* If this is an unconditional branch to the same label, succeed.
23481 If it is to another label, do nothing. If it is conditional,
23482 fail. */
23483 /* XXX Probably, the tests for SET and the PC are
23484 unnecessary. */
23485
23486 scanbody = PATTERN (this_insn);
23487 if (GET_CODE (scanbody) == SET
23488 && GET_CODE (SET_DEST (scanbody)) == PC)
23489 {
23490 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23491 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23492 {
23493 arm_ccfsm_state = 2;
23494 succeed = TRUE;
23495 }
23496 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23497 fail = TRUE;
23498 }
23499 /* Fail if a conditional return is undesirable (e.g. on a
23500 StrongARM), but still allow this if optimizing for size. */
23501 else if (GET_CODE (scanbody) == return_code
23502 && !use_return_insn (TRUE, NULL)
23503 && !optimize_size)
23504 fail = TRUE;
23505 else if (GET_CODE (scanbody) == return_code)
23506 {
23507 arm_ccfsm_state = 2;
23508 succeed = TRUE;
23509 }
23510 else if (GET_CODE (scanbody) == PARALLEL)
23511 {
23512 switch (get_attr_conds (this_insn))
23513 {
23514 case CONDS_NOCOND:
23515 break;
23516 default:
23517 fail = TRUE;
23518 break;
23519 }
23520 }
23521 else
23522 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23523
23524 break;
23525
23526 case INSN:
23527 /* Instructions using or affecting the condition codes make it
23528 fail. */
23529 scanbody = PATTERN (this_insn);
23530 if (!(GET_CODE (scanbody) == SET
23531 || GET_CODE (scanbody) == PARALLEL)
23532 || get_attr_conds (this_insn) != CONDS_NOCOND)
23533 fail = TRUE;
23534 break;
23535
23536 default:
23537 break;
23538 }
23539 }
23540 if (succeed)
23541 {
23542 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23543 arm_target_label = CODE_LABEL_NUMBER (label);
23544 else
23545 {
23546 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23547
23548 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23549 {
23550 this_insn = next_nonnote_insn (this_insn);
23551 gcc_assert (!this_insn
23552 || (!BARRIER_P (this_insn)
23553 && !LABEL_P (this_insn)));
23554 }
23555 if (!this_insn)
23556 {
23557 /* Oh, dear! we ran off the end.. give up. */
23558 extract_constrain_insn_cached (insn);
23559 arm_ccfsm_state = 0;
23560 arm_target_insn = NULL;
23561 return;
23562 }
23563 arm_target_insn = this_insn;
23564 }
23565
23566 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23567 what it was. */
23568 if (!reverse)
23569 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23570
23571 if (reverse || then_not_else)
23572 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23573 }
23574
23575 /* Restore recog_data (getting the attributes of other insns can
23576 destroy this array, but final.c assumes that it remains intact
23577 across this call. */
23578 extract_constrain_insn_cached (insn);
23579 }
23580 }
23581
23582 /* Output IT instructions. */
23583 void
23584 thumb2_asm_output_opcode (FILE * stream)
23585 {
23586 char buff[5];
23587 int n;
23588
23589 if (arm_condexec_mask)
23590 {
23591 for (n = 0; n < arm_condexec_masklen; n++)
23592 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23593 buff[n] = 0;
23594 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23595 arm_condition_codes[arm_current_cc]);
23596 arm_condexec_mask = 0;
23597 }
23598 }
23599
23600 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23601 UNITS_PER_WORD bytes wide. */
23602 static unsigned int
23603 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23604 {
23605 if (TARGET_32BIT
23606 && regno > PC_REGNUM
23607 && regno != FRAME_POINTER_REGNUM
23608 && regno != ARG_POINTER_REGNUM
23609 && !IS_VFP_REGNUM (regno))
23610 return 1;
23611
23612 return ARM_NUM_REGS (mode);
23613 }
23614
23615 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23616 static bool
23617 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23618 {
23619 if (GET_MODE_CLASS (mode) == MODE_CC)
23620 return (regno == CC_REGNUM
23621 || (TARGET_HARD_FLOAT
23622 && regno == VFPCC_REGNUM));
23623
23624 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23625 return false;
23626
23627 if (TARGET_THUMB1)
23628 /* For the Thumb we only allow values bigger than SImode in
23629 registers 0 - 6, so that there is always a second low
23630 register available to hold the upper part of the value.
23631 We probably we ought to ensure that the register is the
23632 start of an even numbered register pair. */
23633 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23634
23635 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23636 {
23637 if (mode == SFmode || mode == SImode)
23638 return VFP_REGNO_OK_FOR_SINGLE (regno);
23639
23640 if (mode == DFmode)
23641 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23642
23643 if (mode == HFmode)
23644 return VFP_REGNO_OK_FOR_SINGLE (regno);
23645
23646 /* VFP registers can hold HImode values. */
23647 if (mode == HImode)
23648 return VFP_REGNO_OK_FOR_SINGLE (regno);
23649
23650 if (TARGET_NEON)
23651 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23652 || (VALID_NEON_QREG_MODE (mode)
23653 && NEON_REGNO_OK_FOR_QUAD (regno))
23654 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23655 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23656 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23657 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23658 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23659
23660 return false;
23661 }
23662
23663 if (TARGET_REALLY_IWMMXT)
23664 {
23665 if (IS_IWMMXT_GR_REGNUM (regno))
23666 return mode == SImode;
23667
23668 if (IS_IWMMXT_REGNUM (regno))
23669 return VALID_IWMMXT_REG_MODE (mode);
23670 }
23671
23672 /* We allow almost any value to be stored in the general registers.
23673 Restrict doubleword quantities to even register pairs in ARM state
23674 so that we can use ldrd. Do not allow very large Neon structure
23675 opaque modes in general registers; they would use too many. */
23676 if (regno <= LAST_ARM_REGNUM)
23677 {
23678 if (ARM_NUM_REGS (mode) > 4)
23679 return false;
23680
23681 if (TARGET_THUMB2)
23682 return true;
23683
23684 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23685 }
23686
23687 if (regno == FRAME_POINTER_REGNUM
23688 || regno == ARG_POINTER_REGNUM)
23689 /* We only allow integers in the fake hard registers. */
23690 return GET_MODE_CLASS (mode) == MODE_INT;
23691
23692 return false;
23693 }
23694
23695 /* Implement TARGET_MODES_TIEABLE_P. */
23696
23697 static bool
23698 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23699 {
23700 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23701 return true;
23702
23703 /* We specifically want to allow elements of "structure" modes to
23704 be tieable to the structure. This more general condition allows
23705 other rarer situations too. */
23706 if (TARGET_NEON
23707 && (VALID_NEON_DREG_MODE (mode1)
23708 || VALID_NEON_QREG_MODE (mode1)
23709 || VALID_NEON_STRUCT_MODE (mode1))
23710 && (VALID_NEON_DREG_MODE (mode2)
23711 || VALID_NEON_QREG_MODE (mode2)
23712 || VALID_NEON_STRUCT_MODE (mode2)))
23713 return true;
23714
23715 return false;
23716 }
23717
23718 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23719 not used in arm mode. */
23720
23721 enum reg_class
23722 arm_regno_class (int regno)
23723 {
23724 if (regno == PC_REGNUM)
23725 return NO_REGS;
23726
23727 if (TARGET_THUMB1)
23728 {
23729 if (regno == STACK_POINTER_REGNUM)
23730 return STACK_REG;
23731 if (regno == CC_REGNUM)
23732 return CC_REG;
23733 if (regno < 8)
23734 return LO_REGS;
23735 return HI_REGS;
23736 }
23737
23738 if (TARGET_THUMB2 && regno < 8)
23739 return LO_REGS;
23740
23741 if ( regno <= LAST_ARM_REGNUM
23742 || regno == FRAME_POINTER_REGNUM
23743 || regno == ARG_POINTER_REGNUM)
23744 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23745
23746 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23747 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23748
23749 if (IS_VFP_REGNUM (regno))
23750 {
23751 if (regno <= D7_VFP_REGNUM)
23752 return VFP_D0_D7_REGS;
23753 else if (regno <= LAST_LO_VFP_REGNUM)
23754 return VFP_LO_REGS;
23755 else
23756 return VFP_HI_REGS;
23757 }
23758
23759 if (IS_IWMMXT_REGNUM (regno))
23760 return IWMMXT_REGS;
23761
23762 if (IS_IWMMXT_GR_REGNUM (regno))
23763 return IWMMXT_GR_REGS;
23764
23765 return NO_REGS;
23766 }
23767
23768 /* Handle a special case when computing the offset
23769 of an argument from the frame pointer. */
23770 int
23771 arm_debugger_arg_offset (int value, rtx addr)
23772 {
23773 rtx_insn *insn;
23774
23775 /* We are only interested if dbxout_parms() failed to compute the offset. */
23776 if (value != 0)
23777 return 0;
23778
23779 /* We can only cope with the case where the address is held in a register. */
23780 if (!REG_P (addr))
23781 return 0;
23782
23783 /* If we are using the frame pointer to point at the argument, then
23784 an offset of 0 is correct. */
23785 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23786 return 0;
23787
23788 /* If we are using the stack pointer to point at the
23789 argument, then an offset of 0 is correct. */
23790 /* ??? Check this is consistent with thumb2 frame layout. */
23791 if ((TARGET_THUMB || !frame_pointer_needed)
23792 && REGNO (addr) == SP_REGNUM)
23793 return 0;
23794
23795 /* Oh dear. The argument is pointed to by a register rather
23796 than being held in a register, or being stored at a known
23797 offset from the frame pointer. Since GDB only understands
23798 those two kinds of argument we must translate the address
23799 held in the register into an offset from the frame pointer.
23800 We do this by searching through the insns for the function
23801 looking to see where this register gets its value. If the
23802 register is initialized from the frame pointer plus an offset
23803 then we are in luck and we can continue, otherwise we give up.
23804
23805 This code is exercised by producing debugging information
23806 for a function with arguments like this:
23807
23808 double func (double a, double b, int c, double d) {return d;}
23809
23810 Without this code the stab for parameter 'd' will be set to
23811 an offset of 0 from the frame pointer, rather than 8. */
23812
23813 /* The if() statement says:
23814
23815 If the insn is a normal instruction
23816 and if the insn is setting the value in a register
23817 and if the register being set is the register holding the address of the argument
23818 and if the address is computing by an addition
23819 that involves adding to a register
23820 which is the frame pointer
23821 a constant integer
23822
23823 then... */
23824
23825 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23826 {
23827 if ( NONJUMP_INSN_P (insn)
23828 && GET_CODE (PATTERN (insn)) == SET
23829 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23830 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23831 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23832 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23833 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23834 )
23835 {
23836 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23837
23838 break;
23839 }
23840 }
23841
23842 if (value == 0)
23843 {
23844 debug_rtx (addr);
23845 warning (0, "unable to compute real location of stacked parameter");
23846 value = 8; /* XXX magic hack */
23847 }
23848
23849 return value;
23850 }
23851 \f
23852 /* Implement TARGET_PROMOTED_TYPE. */
23853
23854 static tree
23855 arm_promoted_type (const_tree t)
23856 {
23857 if (SCALAR_FLOAT_TYPE_P (t)
23858 && TYPE_PRECISION (t) == 16
23859 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23860 return float_type_node;
23861 return NULL_TREE;
23862 }
23863
23864 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23865 This simply adds HFmode as a supported mode; even though we don't
23866 implement arithmetic on this type directly, it's supported by
23867 optabs conversions, much the way the double-word arithmetic is
23868 special-cased in the default hook. */
23869
23870 static bool
23871 arm_scalar_mode_supported_p (scalar_mode mode)
23872 {
23873 if (mode == HFmode)
23874 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23875 else if (ALL_FIXED_POINT_MODE_P (mode))
23876 return true;
23877 else
23878 return default_scalar_mode_supported_p (mode);
23879 }
23880
23881 /* Set the value of FLT_EVAL_METHOD.
23882 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23883
23884 0: evaluate all operations and constants, whose semantic type has at
23885 most the range and precision of type float, to the range and
23886 precision of float; evaluate all other operations and constants to
23887 the range and precision of the semantic type;
23888
23889 N, where _FloatN is a supported interchange floating type
23890 evaluate all operations and constants, whose semantic type has at
23891 most the range and precision of _FloatN type, to the range and
23892 precision of the _FloatN type; evaluate all other operations and
23893 constants to the range and precision of the semantic type;
23894
23895 If we have the ARMv8.2-A extensions then we support _Float16 in native
23896 precision, so we should set this to 16. Otherwise, we support the type,
23897 but want to evaluate expressions in float precision, so set this to
23898 0. */
23899
23900 static enum flt_eval_method
23901 arm_excess_precision (enum excess_precision_type type)
23902 {
23903 switch (type)
23904 {
23905 case EXCESS_PRECISION_TYPE_FAST:
23906 case EXCESS_PRECISION_TYPE_STANDARD:
23907 /* We can calculate either in 16-bit range and precision or
23908 32-bit range and precision. Make that decision based on whether
23909 we have native support for the ARMv8.2-A 16-bit floating-point
23910 instructions or not. */
23911 return (TARGET_VFP_FP16INST
23912 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23913 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23914 case EXCESS_PRECISION_TYPE_IMPLICIT:
23915 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23916 default:
23917 gcc_unreachable ();
23918 }
23919 return FLT_EVAL_METHOD_UNPREDICTABLE;
23920 }
23921
23922
23923 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23924 _Float16 if we are using anything other than ieee format for 16-bit
23925 floating point. Otherwise, punt to the default implementation. */
23926 static opt_scalar_float_mode
23927 arm_floatn_mode (int n, bool extended)
23928 {
23929 if (!extended && n == 16)
23930 {
23931 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23932 return HFmode;
23933 return opt_scalar_float_mode ();
23934 }
23935
23936 return default_floatn_mode (n, extended);
23937 }
23938
23939
23940 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23941 not to early-clobber SRC registers in the process.
23942
23943 We assume that the operands described by SRC and DEST represent a
23944 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23945 number of components into which the copy has been decomposed. */
23946 void
23947 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23948 {
23949 unsigned int i;
23950
23951 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23952 || REGNO (operands[0]) < REGNO (operands[1]))
23953 {
23954 for (i = 0; i < count; i++)
23955 {
23956 operands[2 * i] = dest[i];
23957 operands[2 * i + 1] = src[i];
23958 }
23959 }
23960 else
23961 {
23962 for (i = 0; i < count; i++)
23963 {
23964 operands[2 * i] = dest[count - i - 1];
23965 operands[2 * i + 1] = src[count - i - 1];
23966 }
23967 }
23968 }
23969
23970 /* Split operands into moves from op[1] + op[2] into op[0]. */
23971
23972 void
23973 neon_split_vcombine (rtx operands[3])
23974 {
23975 unsigned int dest = REGNO (operands[0]);
23976 unsigned int src1 = REGNO (operands[1]);
23977 unsigned int src2 = REGNO (operands[2]);
23978 machine_mode halfmode = GET_MODE (operands[1]);
23979 unsigned int halfregs = REG_NREGS (operands[1]);
23980 rtx destlo, desthi;
23981
23982 if (src1 == dest && src2 == dest + halfregs)
23983 {
23984 /* No-op move. Can't split to nothing; emit something. */
23985 emit_note (NOTE_INSN_DELETED);
23986 return;
23987 }
23988
23989 /* Preserve register attributes for variable tracking. */
23990 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23991 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23992 GET_MODE_SIZE (halfmode));
23993
23994 /* Special case of reversed high/low parts. Use VSWP. */
23995 if (src2 == dest && src1 == dest + halfregs)
23996 {
23997 rtx x = gen_rtx_SET (destlo, operands[1]);
23998 rtx y = gen_rtx_SET (desthi, operands[2]);
23999 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24000 return;
24001 }
24002
24003 if (!reg_overlap_mentioned_p (operands[2], destlo))
24004 {
24005 /* Try to avoid unnecessary moves if part of the result
24006 is in the right place already. */
24007 if (src1 != dest)
24008 emit_move_insn (destlo, operands[1]);
24009 if (src2 != dest + halfregs)
24010 emit_move_insn (desthi, operands[2]);
24011 }
24012 else
24013 {
24014 if (src2 != dest + halfregs)
24015 emit_move_insn (desthi, operands[2]);
24016 if (src1 != dest)
24017 emit_move_insn (destlo, operands[1]);
24018 }
24019 }
24020 \f
24021 /* Return the number (counting from 0) of
24022 the least significant set bit in MASK. */
24023
24024 inline static int
24025 number_of_first_bit_set (unsigned mask)
24026 {
24027 return ctz_hwi (mask);
24028 }
24029
24030 /* Like emit_multi_reg_push, but allowing for a different set of
24031 registers to be described as saved. MASK is the set of registers
24032 to be saved; REAL_REGS is the set of registers to be described as
24033 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24034
24035 static rtx_insn *
24036 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24037 {
24038 unsigned long regno;
24039 rtx par[10], tmp, reg;
24040 rtx_insn *insn;
24041 int i, j;
24042
24043 /* Build the parallel of the registers actually being stored. */
24044 for (i = 0; mask; ++i, mask &= mask - 1)
24045 {
24046 regno = ctz_hwi (mask);
24047 reg = gen_rtx_REG (SImode, regno);
24048
24049 if (i == 0)
24050 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24051 else
24052 tmp = gen_rtx_USE (VOIDmode, reg);
24053
24054 par[i] = tmp;
24055 }
24056
24057 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24058 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24059 tmp = gen_frame_mem (BLKmode, tmp);
24060 tmp = gen_rtx_SET (tmp, par[0]);
24061 par[0] = tmp;
24062
24063 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24064 insn = emit_insn (tmp);
24065
24066 /* Always build the stack adjustment note for unwind info. */
24067 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24068 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24069 par[0] = tmp;
24070
24071 /* Build the parallel of the registers recorded as saved for unwind. */
24072 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24073 {
24074 regno = ctz_hwi (real_regs);
24075 reg = gen_rtx_REG (SImode, regno);
24076
24077 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24078 tmp = gen_frame_mem (SImode, tmp);
24079 tmp = gen_rtx_SET (tmp, reg);
24080 RTX_FRAME_RELATED_P (tmp) = 1;
24081 par[j + 1] = tmp;
24082 }
24083
24084 if (j == 0)
24085 tmp = par[0];
24086 else
24087 {
24088 RTX_FRAME_RELATED_P (par[0]) = 1;
24089 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24090 }
24091
24092 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24093
24094 return insn;
24095 }
24096
24097 /* Emit code to push or pop registers to or from the stack. F is the
24098 assembly file. MASK is the registers to pop. */
24099 static void
24100 thumb_pop (FILE *f, unsigned long mask)
24101 {
24102 int regno;
24103 int lo_mask = mask & 0xFF;
24104
24105 gcc_assert (mask);
24106
24107 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24108 {
24109 /* Special case. Do not generate a POP PC statement here, do it in
24110 thumb_exit() */
24111 thumb_exit (f, -1);
24112 return;
24113 }
24114
24115 fprintf (f, "\tpop\t{");
24116
24117 /* Look at the low registers first. */
24118 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24119 {
24120 if (lo_mask & 1)
24121 {
24122 asm_fprintf (f, "%r", regno);
24123
24124 if ((lo_mask & ~1) != 0)
24125 fprintf (f, ", ");
24126 }
24127 }
24128
24129 if (mask & (1 << PC_REGNUM))
24130 {
24131 /* Catch popping the PC. */
24132 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24133 || IS_CMSE_ENTRY (arm_current_func_type ()))
24134 {
24135 /* The PC is never poped directly, instead
24136 it is popped into r3 and then BX is used. */
24137 fprintf (f, "}\n");
24138
24139 thumb_exit (f, -1);
24140
24141 return;
24142 }
24143 else
24144 {
24145 if (mask & 0xFF)
24146 fprintf (f, ", ");
24147
24148 asm_fprintf (f, "%r", PC_REGNUM);
24149 }
24150 }
24151
24152 fprintf (f, "}\n");
24153 }
24154
24155 /* Generate code to return from a thumb function.
24156 If 'reg_containing_return_addr' is -1, then the return address is
24157 actually on the stack, at the stack pointer.
24158
24159 Note: do not forget to update length attribute of corresponding insn pattern
24160 when changing assembly output (eg. length attribute of epilogue_insns when
24161 updating Armv8-M Baseline Security Extensions register clearing
24162 sequences). */
24163 static void
24164 thumb_exit (FILE *f, int reg_containing_return_addr)
24165 {
24166 unsigned regs_available_for_popping;
24167 unsigned regs_to_pop;
24168 int pops_needed;
24169 unsigned available;
24170 unsigned required;
24171 machine_mode mode;
24172 int size;
24173 int restore_a4 = FALSE;
24174
24175 /* Compute the registers we need to pop. */
24176 regs_to_pop = 0;
24177 pops_needed = 0;
24178
24179 if (reg_containing_return_addr == -1)
24180 {
24181 regs_to_pop |= 1 << LR_REGNUM;
24182 ++pops_needed;
24183 }
24184
24185 if (TARGET_BACKTRACE)
24186 {
24187 /* Restore the (ARM) frame pointer and stack pointer. */
24188 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24189 pops_needed += 2;
24190 }
24191
24192 /* If there is nothing to pop then just emit the BX instruction and
24193 return. */
24194 if (pops_needed == 0)
24195 {
24196 if (crtl->calls_eh_return)
24197 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24198
24199 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24200 {
24201 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24202 reg_containing_return_addr);
24203 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24204 }
24205 else
24206 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24207 return;
24208 }
24209 /* Otherwise if we are not supporting interworking and we have not created
24210 a backtrace structure and the function was not entered in ARM mode then
24211 just pop the return address straight into the PC. */
24212 else if (!TARGET_INTERWORK
24213 && !TARGET_BACKTRACE
24214 && !is_called_in_ARM_mode (current_function_decl)
24215 && !crtl->calls_eh_return
24216 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24217 {
24218 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24219 return;
24220 }
24221
24222 /* Find out how many of the (return) argument registers we can corrupt. */
24223 regs_available_for_popping = 0;
24224
24225 /* If returning via __builtin_eh_return, the bottom three registers
24226 all contain information needed for the return. */
24227 if (crtl->calls_eh_return)
24228 size = 12;
24229 else
24230 {
24231 /* If we can deduce the registers used from the function's
24232 return value. This is more reliable that examining
24233 df_regs_ever_live_p () because that will be set if the register is
24234 ever used in the function, not just if the register is used
24235 to hold a return value. */
24236
24237 if (crtl->return_rtx != 0)
24238 mode = GET_MODE (crtl->return_rtx);
24239 else
24240 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24241
24242 size = GET_MODE_SIZE (mode);
24243
24244 if (size == 0)
24245 {
24246 /* In a void function we can use any argument register.
24247 In a function that returns a structure on the stack
24248 we can use the second and third argument registers. */
24249 if (mode == VOIDmode)
24250 regs_available_for_popping =
24251 (1 << ARG_REGISTER (1))
24252 | (1 << ARG_REGISTER (2))
24253 | (1 << ARG_REGISTER (3));
24254 else
24255 regs_available_for_popping =
24256 (1 << ARG_REGISTER (2))
24257 | (1 << ARG_REGISTER (3));
24258 }
24259 else if (size <= 4)
24260 regs_available_for_popping =
24261 (1 << ARG_REGISTER (2))
24262 | (1 << ARG_REGISTER (3));
24263 else if (size <= 8)
24264 regs_available_for_popping =
24265 (1 << ARG_REGISTER (3));
24266 }
24267
24268 /* Match registers to be popped with registers into which we pop them. */
24269 for (available = regs_available_for_popping,
24270 required = regs_to_pop;
24271 required != 0 && available != 0;
24272 available &= ~(available & - available),
24273 required &= ~(required & - required))
24274 -- pops_needed;
24275
24276 /* If we have any popping registers left over, remove them. */
24277 if (available > 0)
24278 regs_available_for_popping &= ~available;
24279
24280 /* Otherwise if we need another popping register we can use
24281 the fourth argument register. */
24282 else if (pops_needed)
24283 {
24284 /* If we have not found any free argument registers and
24285 reg a4 contains the return address, we must move it. */
24286 if (regs_available_for_popping == 0
24287 && reg_containing_return_addr == LAST_ARG_REGNUM)
24288 {
24289 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24290 reg_containing_return_addr = LR_REGNUM;
24291 }
24292 else if (size > 12)
24293 {
24294 /* Register a4 is being used to hold part of the return value,
24295 but we have dire need of a free, low register. */
24296 restore_a4 = TRUE;
24297
24298 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24299 }
24300
24301 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24302 {
24303 /* The fourth argument register is available. */
24304 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24305
24306 --pops_needed;
24307 }
24308 }
24309
24310 /* Pop as many registers as we can. */
24311 thumb_pop (f, regs_available_for_popping);
24312
24313 /* Process the registers we popped. */
24314 if (reg_containing_return_addr == -1)
24315 {
24316 /* The return address was popped into the lowest numbered register. */
24317 regs_to_pop &= ~(1 << LR_REGNUM);
24318
24319 reg_containing_return_addr =
24320 number_of_first_bit_set (regs_available_for_popping);
24321
24322 /* Remove this register for the mask of available registers, so that
24323 the return address will not be corrupted by further pops. */
24324 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24325 }
24326
24327 /* If we popped other registers then handle them here. */
24328 if (regs_available_for_popping)
24329 {
24330 int frame_pointer;
24331
24332 /* Work out which register currently contains the frame pointer. */
24333 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24334
24335 /* Move it into the correct place. */
24336 asm_fprintf (f, "\tmov\t%r, %r\n",
24337 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24338
24339 /* (Temporarily) remove it from the mask of popped registers. */
24340 regs_available_for_popping &= ~(1 << frame_pointer);
24341 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24342
24343 if (regs_available_for_popping)
24344 {
24345 int stack_pointer;
24346
24347 /* We popped the stack pointer as well,
24348 find the register that contains it. */
24349 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24350
24351 /* Move it into the stack register. */
24352 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24353
24354 /* At this point we have popped all necessary registers, so
24355 do not worry about restoring regs_available_for_popping
24356 to its correct value:
24357
24358 assert (pops_needed == 0)
24359 assert (regs_available_for_popping == (1 << frame_pointer))
24360 assert (regs_to_pop == (1 << STACK_POINTER)) */
24361 }
24362 else
24363 {
24364 /* Since we have just move the popped value into the frame
24365 pointer, the popping register is available for reuse, and
24366 we know that we still have the stack pointer left to pop. */
24367 regs_available_for_popping |= (1 << frame_pointer);
24368 }
24369 }
24370
24371 /* If we still have registers left on the stack, but we no longer have
24372 any registers into which we can pop them, then we must move the return
24373 address into the link register and make available the register that
24374 contained it. */
24375 if (regs_available_for_popping == 0 && pops_needed > 0)
24376 {
24377 regs_available_for_popping |= 1 << reg_containing_return_addr;
24378
24379 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24380 reg_containing_return_addr);
24381
24382 reg_containing_return_addr = LR_REGNUM;
24383 }
24384
24385 /* If we have registers left on the stack then pop some more.
24386 We know that at most we will want to pop FP and SP. */
24387 if (pops_needed > 0)
24388 {
24389 int popped_into;
24390 int move_to;
24391
24392 thumb_pop (f, regs_available_for_popping);
24393
24394 /* We have popped either FP or SP.
24395 Move whichever one it is into the correct register. */
24396 popped_into = number_of_first_bit_set (regs_available_for_popping);
24397 move_to = number_of_first_bit_set (regs_to_pop);
24398
24399 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24400 --pops_needed;
24401 }
24402
24403 /* If we still have not popped everything then we must have only
24404 had one register available to us and we are now popping the SP. */
24405 if (pops_needed > 0)
24406 {
24407 int popped_into;
24408
24409 thumb_pop (f, regs_available_for_popping);
24410
24411 popped_into = number_of_first_bit_set (regs_available_for_popping);
24412
24413 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24414 /*
24415 assert (regs_to_pop == (1 << STACK_POINTER))
24416 assert (pops_needed == 1)
24417 */
24418 }
24419
24420 /* If necessary restore the a4 register. */
24421 if (restore_a4)
24422 {
24423 if (reg_containing_return_addr != LR_REGNUM)
24424 {
24425 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24426 reg_containing_return_addr = LR_REGNUM;
24427 }
24428
24429 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24430 }
24431
24432 if (crtl->calls_eh_return)
24433 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24434
24435 /* Return to caller. */
24436 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24437 {
24438 /* This is for the cases where LR is not being used to contain the return
24439 address. It may therefore contain information that we might not want
24440 to leak, hence it must be cleared. The value in R0 will never be a
24441 secret at this point, so it is safe to use it, see the clearing code
24442 in 'cmse_nonsecure_entry_clear_before_return'. */
24443 if (reg_containing_return_addr != LR_REGNUM)
24444 asm_fprintf (f, "\tmov\tlr, r0\n");
24445
24446 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24447 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24448 }
24449 else
24450 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24451 }
24452 \f
24453 /* Scan INSN just before assembler is output for it.
24454 For Thumb-1, we track the status of the condition codes; this
24455 information is used in the cbranchsi4_insn pattern. */
24456 void
24457 thumb1_final_prescan_insn (rtx_insn *insn)
24458 {
24459 if (flag_print_asm_name)
24460 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24461 INSN_ADDRESSES (INSN_UID (insn)));
24462 /* Don't overwrite the previous setter when we get to a cbranch. */
24463 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24464 {
24465 enum attr_conds conds;
24466
24467 if (cfun->machine->thumb1_cc_insn)
24468 {
24469 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24470 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24471 CC_STATUS_INIT;
24472 }
24473 conds = get_attr_conds (insn);
24474 if (conds == CONDS_SET)
24475 {
24476 rtx set = single_set (insn);
24477 cfun->machine->thumb1_cc_insn = insn;
24478 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24479 cfun->machine->thumb1_cc_op1 = const0_rtx;
24480 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24481 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24482 {
24483 rtx src1 = XEXP (SET_SRC (set), 1);
24484 if (src1 == const0_rtx)
24485 cfun->machine->thumb1_cc_mode = CCmode;
24486 }
24487 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24488 {
24489 /* Record the src register operand instead of dest because
24490 cprop_hardreg pass propagates src. */
24491 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24492 }
24493 }
24494 else if (conds != CONDS_NOCOND)
24495 cfun->machine->thumb1_cc_insn = NULL_RTX;
24496 }
24497
24498 /* Check if unexpected far jump is used. */
24499 if (cfun->machine->lr_save_eliminated
24500 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24501 internal_error("Unexpected thumb1 far jump");
24502 }
24503
24504 int
24505 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24506 {
24507 unsigned HOST_WIDE_INT mask = 0xff;
24508 int i;
24509
24510 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24511 if (val == 0) /* XXX */
24512 return 0;
24513
24514 for (i = 0; i < 25; i++)
24515 if ((val & (mask << i)) == val)
24516 return 1;
24517
24518 return 0;
24519 }
24520
24521 /* Returns nonzero if the current function contains,
24522 or might contain a far jump. */
24523 static int
24524 thumb_far_jump_used_p (void)
24525 {
24526 rtx_insn *insn;
24527 bool far_jump = false;
24528 unsigned int func_size = 0;
24529
24530 /* If we have already decided that far jumps may be used,
24531 do not bother checking again, and always return true even if
24532 it turns out that they are not being used. Once we have made
24533 the decision that far jumps are present (and that hence the link
24534 register will be pushed onto the stack) we cannot go back on it. */
24535 if (cfun->machine->far_jump_used)
24536 return 1;
24537
24538 /* If this function is not being called from the prologue/epilogue
24539 generation code then it must be being called from the
24540 INITIAL_ELIMINATION_OFFSET macro. */
24541 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24542 {
24543 /* In this case we know that we are being asked about the elimination
24544 of the arg pointer register. If that register is not being used,
24545 then there are no arguments on the stack, and we do not have to
24546 worry that a far jump might force the prologue to push the link
24547 register, changing the stack offsets. In this case we can just
24548 return false, since the presence of far jumps in the function will
24549 not affect stack offsets.
24550
24551 If the arg pointer is live (or if it was live, but has now been
24552 eliminated and so set to dead) then we do have to test to see if
24553 the function might contain a far jump. This test can lead to some
24554 false negatives, since before reload is completed, then length of
24555 branch instructions is not known, so gcc defaults to returning their
24556 longest length, which in turn sets the far jump attribute to true.
24557
24558 A false negative will not result in bad code being generated, but it
24559 will result in a needless push and pop of the link register. We
24560 hope that this does not occur too often.
24561
24562 If we need doubleword stack alignment this could affect the other
24563 elimination offsets so we can't risk getting it wrong. */
24564 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24565 cfun->machine->arg_pointer_live = 1;
24566 else if (!cfun->machine->arg_pointer_live)
24567 return 0;
24568 }
24569
24570 /* We should not change far_jump_used during or after reload, as there is
24571 no chance to change stack frame layout. */
24572 if (reload_in_progress || reload_completed)
24573 return 0;
24574
24575 /* Check to see if the function contains a branch
24576 insn with the far jump attribute set. */
24577 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24578 {
24579 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24580 {
24581 far_jump = true;
24582 }
24583 func_size += get_attr_length (insn);
24584 }
24585
24586 /* Attribute far_jump will always be true for thumb1 before
24587 shorten_branch pass. So checking far_jump attribute before
24588 shorten_branch isn't much useful.
24589
24590 Following heuristic tries to estimate more accurately if a far jump
24591 may finally be used. The heuristic is very conservative as there is
24592 no chance to roll-back the decision of not to use far jump.
24593
24594 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24595 2-byte insn is associated with a 4 byte constant pool. Using
24596 function size 2048/3 as the threshold is conservative enough. */
24597 if (far_jump)
24598 {
24599 if ((func_size * 3) >= 2048)
24600 {
24601 /* Record the fact that we have decided that
24602 the function does use far jumps. */
24603 cfun->machine->far_jump_used = 1;
24604 return 1;
24605 }
24606 }
24607
24608 return 0;
24609 }
24610
24611 /* Return nonzero if FUNC must be entered in ARM mode. */
24612 static bool
24613 is_called_in_ARM_mode (tree func)
24614 {
24615 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24616
24617 /* Ignore the problem about functions whose address is taken. */
24618 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24619 return true;
24620
24621 #ifdef ARM_PE
24622 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24623 #else
24624 return false;
24625 #endif
24626 }
24627
24628 /* Given the stack offsets and register mask in OFFSETS, decide how
24629 many additional registers to push instead of subtracting a constant
24630 from SP. For epilogues the principle is the same except we use pop.
24631 FOR_PROLOGUE indicates which we're generating. */
24632 static int
24633 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24634 {
24635 HOST_WIDE_INT amount;
24636 unsigned long live_regs_mask = offsets->saved_regs_mask;
24637 /* Extract a mask of the ones we can give to the Thumb's push/pop
24638 instruction. */
24639 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24640 /* Then count how many other high registers will need to be pushed. */
24641 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24642 int n_free, reg_base, size;
24643
24644 if (!for_prologue && frame_pointer_needed)
24645 amount = offsets->locals_base - offsets->saved_regs;
24646 else
24647 amount = offsets->outgoing_args - offsets->saved_regs;
24648
24649 /* If the stack frame size is 512 exactly, we can save one load
24650 instruction, which should make this a win even when optimizing
24651 for speed. */
24652 if (!optimize_size && amount != 512)
24653 return 0;
24654
24655 /* Can't do this if there are high registers to push. */
24656 if (high_regs_pushed != 0)
24657 return 0;
24658
24659 /* Shouldn't do it in the prologue if no registers would normally
24660 be pushed at all. In the epilogue, also allow it if we'll have
24661 a pop insn for the PC. */
24662 if (l_mask == 0
24663 && (for_prologue
24664 || TARGET_BACKTRACE
24665 || (live_regs_mask & 1 << LR_REGNUM) == 0
24666 || TARGET_INTERWORK
24667 || crtl->args.pretend_args_size != 0))
24668 return 0;
24669
24670 /* Don't do this if thumb_expand_prologue wants to emit instructions
24671 between the push and the stack frame allocation. */
24672 if (for_prologue
24673 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24674 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24675 return 0;
24676
24677 reg_base = 0;
24678 n_free = 0;
24679 if (!for_prologue)
24680 {
24681 size = arm_size_return_regs ();
24682 reg_base = ARM_NUM_INTS (size);
24683 live_regs_mask >>= reg_base;
24684 }
24685
24686 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24687 && (for_prologue || call_used_regs[reg_base + n_free]))
24688 {
24689 live_regs_mask >>= 1;
24690 n_free++;
24691 }
24692
24693 if (n_free == 0)
24694 return 0;
24695 gcc_assert (amount / 4 * 4 == amount);
24696
24697 if (amount >= 512 && (amount - n_free * 4) < 512)
24698 return (amount - 508) / 4;
24699 if (amount <= n_free * 4)
24700 return amount / 4;
24701 return 0;
24702 }
24703
24704 /* The bits which aren't usefully expanded as rtl. */
24705 const char *
24706 thumb1_unexpanded_epilogue (void)
24707 {
24708 arm_stack_offsets *offsets;
24709 int regno;
24710 unsigned long live_regs_mask = 0;
24711 int high_regs_pushed = 0;
24712 int extra_pop;
24713 int had_to_push_lr;
24714 int size;
24715
24716 if (cfun->machine->return_used_this_function != 0)
24717 return "";
24718
24719 if (IS_NAKED (arm_current_func_type ()))
24720 return "";
24721
24722 offsets = arm_get_frame_offsets ();
24723 live_regs_mask = offsets->saved_regs_mask;
24724 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24725
24726 /* If we can deduce the registers used from the function's return value.
24727 This is more reliable that examining df_regs_ever_live_p () because that
24728 will be set if the register is ever used in the function, not just if
24729 the register is used to hold a return value. */
24730 size = arm_size_return_regs ();
24731
24732 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24733 if (extra_pop > 0)
24734 {
24735 unsigned long extra_mask = (1 << extra_pop) - 1;
24736 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24737 }
24738
24739 /* The prolog may have pushed some high registers to use as
24740 work registers. e.g. the testsuite file:
24741 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24742 compiles to produce:
24743 push {r4, r5, r6, r7, lr}
24744 mov r7, r9
24745 mov r6, r8
24746 push {r6, r7}
24747 as part of the prolog. We have to undo that pushing here. */
24748
24749 if (high_regs_pushed)
24750 {
24751 unsigned long mask = live_regs_mask & 0xff;
24752 int next_hi_reg;
24753
24754 /* The available low registers depend on the size of the value we are
24755 returning. */
24756 if (size <= 12)
24757 mask |= 1 << 3;
24758 if (size <= 8)
24759 mask |= 1 << 2;
24760
24761 if (mask == 0)
24762 /* Oh dear! We have no low registers into which we can pop
24763 high registers! */
24764 internal_error
24765 ("no low registers available for popping high registers");
24766
24767 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24768 if (live_regs_mask & (1 << next_hi_reg))
24769 break;
24770
24771 while (high_regs_pushed)
24772 {
24773 /* Find lo register(s) into which the high register(s) can
24774 be popped. */
24775 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24776 {
24777 if (mask & (1 << regno))
24778 high_regs_pushed--;
24779 if (high_regs_pushed == 0)
24780 break;
24781 }
24782
24783 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24784
24785 /* Pop the values into the low register(s). */
24786 thumb_pop (asm_out_file, mask);
24787
24788 /* Move the value(s) into the high registers. */
24789 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24790 {
24791 if (mask & (1 << regno))
24792 {
24793 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24794 regno);
24795
24796 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24797 if (live_regs_mask & (1 << next_hi_reg))
24798 break;
24799 }
24800 }
24801 }
24802 live_regs_mask &= ~0x0f00;
24803 }
24804
24805 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24806 live_regs_mask &= 0xff;
24807
24808 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24809 {
24810 /* Pop the return address into the PC. */
24811 if (had_to_push_lr)
24812 live_regs_mask |= 1 << PC_REGNUM;
24813
24814 /* Either no argument registers were pushed or a backtrace
24815 structure was created which includes an adjusted stack
24816 pointer, so just pop everything. */
24817 if (live_regs_mask)
24818 thumb_pop (asm_out_file, live_regs_mask);
24819
24820 /* We have either just popped the return address into the
24821 PC or it is was kept in LR for the entire function.
24822 Note that thumb_pop has already called thumb_exit if the
24823 PC was in the list. */
24824 if (!had_to_push_lr)
24825 thumb_exit (asm_out_file, LR_REGNUM);
24826 }
24827 else
24828 {
24829 /* Pop everything but the return address. */
24830 if (live_regs_mask)
24831 thumb_pop (asm_out_file, live_regs_mask);
24832
24833 if (had_to_push_lr)
24834 {
24835 if (size > 12)
24836 {
24837 /* We have no free low regs, so save one. */
24838 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24839 LAST_ARG_REGNUM);
24840 }
24841
24842 /* Get the return address into a temporary register. */
24843 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24844
24845 if (size > 12)
24846 {
24847 /* Move the return address to lr. */
24848 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24849 LAST_ARG_REGNUM);
24850 /* Restore the low register. */
24851 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24852 IP_REGNUM);
24853 regno = LR_REGNUM;
24854 }
24855 else
24856 regno = LAST_ARG_REGNUM;
24857 }
24858 else
24859 regno = LR_REGNUM;
24860
24861 /* Remove the argument registers that were pushed onto the stack. */
24862 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24863 SP_REGNUM, SP_REGNUM,
24864 crtl->args.pretend_args_size);
24865
24866 thumb_exit (asm_out_file, regno);
24867 }
24868
24869 return "";
24870 }
24871
24872 /* Functions to save and restore machine-specific function data. */
24873 static struct machine_function *
24874 arm_init_machine_status (void)
24875 {
24876 struct machine_function *machine;
24877 machine = ggc_cleared_alloc<machine_function> ();
24878
24879 #if ARM_FT_UNKNOWN != 0
24880 machine->func_type = ARM_FT_UNKNOWN;
24881 #endif
24882 return machine;
24883 }
24884
24885 /* Return an RTX indicating where the return address to the
24886 calling function can be found. */
24887 rtx
24888 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24889 {
24890 if (count != 0)
24891 return NULL_RTX;
24892
24893 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24894 }
24895
24896 /* Do anything needed before RTL is emitted for each function. */
24897 void
24898 arm_init_expanders (void)
24899 {
24900 /* Arrange to initialize and mark the machine per-function status. */
24901 init_machine_status = arm_init_machine_status;
24902
24903 /* This is to stop the combine pass optimizing away the alignment
24904 adjustment of va_arg. */
24905 /* ??? It is claimed that this should not be necessary. */
24906 if (cfun)
24907 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24908 }
24909
24910 /* Check that FUNC is called with a different mode. */
24911
24912 bool
24913 arm_change_mode_p (tree func)
24914 {
24915 if (TREE_CODE (func) != FUNCTION_DECL)
24916 return false;
24917
24918 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24919
24920 if (!callee_tree)
24921 callee_tree = target_option_default_node;
24922
24923 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24924 int flags = callee_opts->x_target_flags;
24925
24926 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24927 }
24928
24929 /* Like arm_compute_initial_elimination offset. Simpler because there
24930 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24931 to point at the base of the local variables after static stack
24932 space for a function has been allocated. */
24933
24934 HOST_WIDE_INT
24935 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24936 {
24937 arm_stack_offsets *offsets;
24938
24939 offsets = arm_get_frame_offsets ();
24940
24941 switch (from)
24942 {
24943 case ARG_POINTER_REGNUM:
24944 switch (to)
24945 {
24946 case STACK_POINTER_REGNUM:
24947 return offsets->outgoing_args - offsets->saved_args;
24948
24949 case FRAME_POINTER_REGNUM:
24950 return offsets->soft_frame - offsets->saved_args;
24951
24952 case ARM_HARD_FRAME_POINTER_REGNUM:
24953 return offsets->saved_regs - offsets->saved_args;
24954
24955 case THUMB_HARD_FRAME_POINTER_REGNUM:
24956 return offsets->locals_base - offsets->saved_args;
24957
24958 default:
24959 gcc_unreachable ();
24960 }
24961 break;
24962
24963 case FRAME_POINTER_REGNUM:
24964 switch (to)
24965 {
24966 case STACK_POINTER_REGNUM:
24967 return offsets->outgoing_args - offsets->soft_frame;
24968
24969 case ARM_HARD_FRAME_POINTER_REGNUM:
24970 return offsets->saved_regs - offsets->soft_frame;
24971
24972 case THUMB_HARD_FRAME_POINTER_REGNUM:
24973 return offsets->locals_base - offsets->soft_frame;
24974
24975 default:
24976 gcc_unreachable ();
24977 }
24978 break;
24979
24980 default:
24981 gcc_unreachable ();
24982 }
24983 }
24984
24985 /* Generate the function's prologue. */
24986
24987 void
24988 thumb1_expand_prologue (void)
24989 {
24990 rtx_insn *insn;
24991
24992 HOST_WIDE_INT amount;
24993 HOST_WIDE_INT size;
24994 arm_stack_offsets *offsets;
24995 unsigned long func_type;
24996 int regno;
24997 unsigned long live_regs_mask;
24998 unsigned long l_mask;
24999 unsigned high_regs_pushed = 0;
25000 bool lr_needs_saving;
25001
25002 func_type = arm_current_func_type ();
25003
25004 /* Naked functions don't have prologues. */
25005 if (IS_NAKED (func_type))
25006 {
25007 if (flag_stack_usage_info)
25008 current_function_static_stack_size = 0;
25009 return;
25010 }
25011
25012 if (IS_INTERRUPT (func_type))
25013 {
25014 error ("interrupt Service Routines cannot be coded in Thumb mode");
25015 return;
25016 }
25017
25018 if (is_called_in_ARM_mode (current_function_decl))
25019 emit_insn (gen_prologue_thumb1_interwork ());
25020
25021 offsets = arm_get_frame_offsets ();
25022 live_regs_mask = offsets->saved_regs_mask;
25023 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25024
25025 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25026 l_mask = live_regs_mask & 0x40ff;
25027 /* Then count how many other high registers will need to be pushed. */
25028 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25029
25030 if (crtl->args.pretend_args_size)
25031 {
25032 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25033
25034 if (cfun->machine->uses_anonymous_args)
25035 {
25036 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25037 unsigned long mask;
25038
25039 mask = 1ul << (LAST_ARG_REGNUM + 1);
25040 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25041
25042 insn = thumb1_emit_multi_reg_push (mask, 0);
25043 }
25044 else
25045 {
25046 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25047 stack_pointer_rtx, x));
25048 }
25049 RTX_FRAME_RELATED_P (insn) = 1;
25050 }
25051
25052 if (TARGET_BACKTRACE)
25053 {
25054 HOST_WIDE_INT offset = 0;
25055 unsigned work_register;
25056 rtx work_reg, x, arm_hfp_rtx;
25057
25058 /* We have been asked to create a stack backtrace structure.
25059 The code looks like this:
25060
25061 0 .align 2
25062 0 func:
25063 0 sub SP, #16 Reserve space for 4 registers.
25064 2 push {R7} Push low registers.
25065 4 add R7, SP, #20 Get the stack pointer before the push.
25066 6 str R7, [SP, #8] Store the stack pointer
25067 (before reserving the space).
25068 8 mov R7, PC Get hold of the start of this code + 12.
25069 10 str R7, [SP, #16] Store it.
25070 12 mov R7, FP Get hold of the current frame pointer.
25071 14 str R7, [SP, #4] Store it.
25072 16 mov R7, LR Get hold of the current return address.
25073 18 str R7, [SP, #12] Store it.
25074 20 add R7, SP, #16 Point at the start of the
25075 backtrace structure.
25076 22 mov FP, R7 Put this value into the frame pointer. */
25077
25078 work_register = thumb_find_work_register (live_regs_mask);
25079 work_reg = gen_rtx_REG (SImode, work_register);
25080 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25081
25082 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25083 stack_pointer_rtx, GEN_INT (-16)));
25084 RTX_FRAME_RELATED_P (insn) = 1;
25085
25086 if (l_mask)
25087 {
25088 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25089 RTX_FRAME_RELATED_P (insn) = 1;
25090 lr_needs_saving = false;
25091
25092 offset = bit_count (l_mask) * UNITS_PER_WORD;
25093 }
25094
25095 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25096 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25097
25098 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25099 x = gen_frame_mem (SImode, x);
25100 emit_move_insn (x, work_reg);
25101
25102 /* Make sure that the instruction fetching the PC is in the right place
25103 to calculate "start of backtrace creation code + 12". */
25104 /* ??? The stores using the common WORK_REG ought to be enough to
25105 prevent the scheduler from doing anything weird. Failing that
25106 we could always move all of the following into an UNSPEC_VOLATILE. */
25107 if (l_mask)
25108 {
25109 x = gen_rtx_REG (SImode, PC_REGNUM);
25110 emit_move_insn (work_reg, x);
25111
25112 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25113 x = gen_frame_mem (SImode, x);
25114 emit_move_insn (x, work_reg);
25115
25116 emit_move_insn (work_reg, arm_hfp_rtx);
25117
25118 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25119 x = gen_frame_mem (SImode, x);
25120 emit_move_insn (x, work_reg);
25121 }
25122 else
25123 {
25124 emit_move_insn (work_reg, arm_hfp_rtx);
25125
25126 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25127 x = gen_frame_mem (SImode, x);
25128 emit_move_insn (x, work_reg);
25129
25130 x = gen_rtx_REG (SImode, PC_REGNUM);
25131 emit_move_insn (work_reg, x);
25132
25133 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25134 x = gen_frame_mem (SImode, x);
25135 emit_move_insn (x, work_reg);
25136 }
25137
25138 x = gen_rtx_REG (SImode, LR_REGNUM);
25139 emit_move_insn (work_reg, x);
25140
25141 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25142 x = gen_frame_mem (SImode, x);
25143 emit_move_insn (x, work_reg);
25144
25145 x = GEN_INT (offset + 12);
25146 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25147
25148 emit_move_insn (arm_hfp_rtx, work_reg);
25149 }
25150 /* Optimization: If we are not pushing any low registers but we are going
25151 to push some high registers then delay our first push. This will just
25152 be a push of LR and we can combine it with the push of the first high
25153 register. */
25154 else if ((l_mask & 0xff) != 0
25155 || (high_regs_pushed == 0 && lr_needs_saving))
25156 {
25157 unsigned long mask = l_mask;
25158 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25159 insn = thumb1_emit_multi_reg_push (mask, mask);
25160 RTX_FRAME_RELATED_P (insn) = 1;
25161 lr_needs_saving = false;
25162 }
25163
25164 if (high_regs_pushed)
25165 {
25166 unsigned pushable_regs;
25167 unsigned next_hi_reg;
25168 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25169 : crtl->args.info.nregs;
25170 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25171
25172 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25173 if (live_regs_mask & (1 << next_hi_reg))
25174 break;
25175
25176 /* Here we need to mask out registers used for passing arguments
25177 even if they can be pushed. This is to avoid using them to stash the high
25178 registers. Such kind of stash may clobber the use of arguments. */
25179 pushable_regs = l_mask & (~arg_regs_mask);
25180 if (lr_needs_saving)
25181 pushable_regs &= ~(1 << LR_REGNUM);
25182
25183 if (pushable_regs == 0)
25184 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25185
25186 while (high_regs_pushed > 0)
25187 {
25188 unsigned long real_regs_mask = 0;
25189 unsigned long push_mask = 0;
25190
25191 for (regno = LR_REGNUM; regno >= 0; regno --)
25192 {
25193 if (pushable_regs & (1 << regno))
25194 {
25195 emit_move_insn (gen_rtx_REG (SImode, regno),
25196 gen_rtx_REG (SImode, next_hi_reg));
25197
25198 high_regs_pushed --;
25199 real_regs_mask |= (1 << next_hi_reg);
25200 push_mask |= (1 << regno);
25201
25202 if (high_regs_pushed)
25203 {
25204 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25205 next_hi_reg --)
25206 if (live_regs_mask & (1 << next_hi_reg))
25207 break;
25208 }
25209 else
25210 break;
25211 }
25212 }
25213
25214 /* If we had to find a work register and we have not yet
25215 saved the LR then add it to the list of regs to push. */
25216 if (lr_needs_saving)
25217 {
25218 push_mask |= 1 << LR_REGNUM;
25219 real_regs_mask |= 1 << LR_REGNUM;
25220 lr_needs_saving = false;
25221 }
25222
25223 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25224 RTX_FRAME_RELATED_P (insn) = 1;
25225 }
25226 }
25227
25228 /* Load the pic register before setting the frame pointer,
25229 so we can use r7 as a temporary work register. */
25230 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25231 arm_load_pic_register (live_regs_mask);
25232
25233 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25234 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25235 stack_pointer_rtx);
25236
25237 size = offsets->outgoing_args - offsets->saved_args;
25238 if (flag_stack_usage_info)
25239 current_function_static_stack_size = size;
25240
25241 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25242 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25243 || flag_stack_clash_protection)
25244 && size)
25245 sorry ("-fstack-check=specific for Thumb-1");
25246
25247 amount = offsets->outgoing_args - offsets->saved_regs;
25248 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25249 if (amount)
25250 {
25251 if (amount < 512)
25252 {
25253 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25254 GEN_INT (- amount)));
25255 RTX_FRAME_RELATED_P (insn) = 1;
25256 }
25257 else
25258 {
25259 rtx reg, dwarf;
25260
25261 /* The stack decrement is too big for an immediate value in a single
25262 insn. In theory we could issue multiple subtracts, but after
25263 three of them it becomes more space efficient to place the full
25264 value in the constant pool and load into a register. (Also the
25265 ARM debugger really likes to see only one stack decrement per
25266 function). So instead we look for a scratch register into which
25267 we can load the decrement, and then we subtract this from the
25268 stack pointer. Unfortunately on the thumb the only available
25269 scratch registers are the argument registers, and we cannot use
25270 these as they may hold arguments to the function. Instead we
25271 attempt to locate a call preserved register which is used by this
25272 function. If we can find one, then we know that it will have
25273 been pushed at the start of the prologue and so we can corrupt
25274 it now. */
25275 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25276 if (live_regs_mask & (1 << regno))
25277 break;
25278
25279 gcc_assert(regno <= LAST_LO_REGNUM);
25280
25281 reg = gen_rtx_REG (SImode, regno);
25282
25283 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25284
25285 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25286 stack_pointer_rtx, reg));
25287
25288 dwarf = gen_rtx_SET (stack_pointer_rtx,
25289 plus_constant (Pmode, stack_pointer_rtx,
25290 -amount));
25291 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25292 RTX_FRAME_RELATED_P (insn) = 1;
25293 }
25294 }
25295
25296 if (frame_pointer_needed)
25297 thumb_set_frame_pointer (offsets);
25298
25299 /* If we are profiling, make sure no instructions are scheduled before
25300 the call to mcount. Similarly if the user has requested no
25301 scheduling in the prolog. Similarly if we want non-call exceptions
25302 using the EABI unwinder, to prevent faulting instructions from being
25303 swapped with a stack adjustment. */
25304 if (crtl->profile || !TARGET_SCHED_PROLOG
25305 || (arm_except_unwind_info (&global_options) == UI_TARGET
25306 && cfun->can_throw_non_call_exceptions))
25307 emit_insn (gen_blockage ());
25308
25309 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25310 if (live_regs_mask & 0xff)
25311 cfun->machine->lr_save_eliminated = 0;
25312 }
25313
25314 /* Clear caller saved registers not used to pass return values and leaked
25315 condition flags before exiting a cmse_nonsecure_entry function. */
25316
25317 void
25318 cmse_nonsecure_entry_clear_before_return (void)
25319 {
25320 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25321 uint32_t padding_bits_to_clear = 0;
25322 auto_sbitmap to_clear_bitmap (maxregno + 1);
25323 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25324 tree result_type;
25325
25326 bitmap_clear (to_clear_bitmap);
25327 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25328 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25329
25330 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25331 registers. */
25332 if (TARGET_HARD_FLOAT)
25333 {
25334 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25335
25336 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25337
25338 /* Make sure we don't clear the two scratch registers used to clear the
25339 relevant FPSCR bits in output_return_instruction. */
25340 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25341 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25342 emit_use (gen_rtx_REG (SImode, 4));
25343 bitmap_clear_bit (to_clear_bitmap, 4);
25344 }
25345
25346 /* If the user has defined registers to be caller saved, these are no longer
25347 restored by the function before returning and must thus be cleared for
25348 security purposes. */
25349 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25350 {
25351 /* We do not touch registers that can be used to pass arguments as per
25352 the AAPCS, since these should never be made callee-saved by user
25353 options. */
25354 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25355 continue;
25356 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25357 continue;
25358 if (call_used_regs[regno])
25359 bitmap_set_bit (to_clear_bitmap, regno);
25360 }
25361
25362 /* Make sure we do not clear the registers used to return the result in. */
25363 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25364 if (!VOID_TYPE_P (result_type))
25365 {
25366 uint64_t to_clear_return_mask;
25367 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25368
25369 /* No need to check that we return in registers, because we don't
25370 support returning on stack yet. */
25371 gcc_assert (REG_P (result_rtl));
25372 to_clear_return_mask
25373 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25374 &padding_bits_to_clear);
25375 if (to_clear_return_mask)
25376 {
25377 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25378 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25379 {
25380 if (to_clear_return_mask & (1ULL << regno))
25381 bitmap_clear_bit (to_clear_bitmap, regno);
25382 }
25383 }
25384 }
25385
25386 if (padding_bits_to_clear != 0)
25387 {
25388 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25389 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25390
25391 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25392 returning a composite type, which only uses r0. Let's make sure that
25393 r1-r3 is cleared too. */
25394 bitmap_clear (to_clear_arg_regs_bitmap);
25395 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25396 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25397 }
25398
25399 /* Clear full registers that leak before returning. */
25400 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25401 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25402 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25403 clearing_reg);
25404 }
25405
25406 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25407 POP instruction can be generated. LR should be replaced by PC. All
25408 the checks required are already done by USE_RETURN_INSN (). Hence,
25409 all we really need to check here is if single register is to be
25410 returned, or multiple register return. */
25411 void
25412 thumb2_expand_return (bool simple_return)
25413 {
25414 int i, num_regs;
25415 unsigned long saved_regs_mask;
25416 arm_stack_offsets *offsets;
25417
25418 offsets = arm_get_frame_offsets ();
25419 saved_regs_mask = offsets->saved_regs_mask;
25420
25421 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25422 if (saved_regs_mask & (1 << i))
25423 num_regs++;
25424
25425 if (!simple_return && saved_regs_mask)
25426 {
25427 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25428 functions or adapt code to handle according to ACLE. This path should
25429 not be reachable for cmse_nonsecure_entry functions though we prefer
25430 to assert it for now to ensure that future code changes do not silently
25431 change this behavior. */
25432 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25433 if (num_regs == 1)
25434 {
25435 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25436 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25437 rtx addr = gen_rtx_MEM (SImode,
25438 gen_rtx_POST_INC (SImode,
25439 stack_pointer_rtx));
25440 set_mem_alias_set (addr, get_frame_alias_set ());
25441 XVECEXP (par, 0, 0) = ret_rtx;
25442 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25443 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25444 emit_jump_insn (par);
25445 }
25446 else
25447 {
25448 saved_regs_mask &= ~ (1 << LR_REGNUM);
25449 saved_regs_mask |= (1 << PC_REGNUM);
25450 arm_emit_multi_reg_pop (saved_regs_mask);
25451 }
25452 }
25453 else
25454 {
25455 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25456 cmse_nonsecure_entry_clear_before_return ();
25457 emit_jump_insn (simple_return_rtx);
25458 }
25459 }
25460
25461 void
25462 thumb1_expand_epilogue (void)
25463 {
25464 HOST_WIDE_INT amount;
25465 arm_stack_offsets *offsets;
25466 int regno;
25467
25468 /* Naked functions don't have prologues. */
25469 if (IS_NAKED (arm_current_func_type ()))
25470 return;
25471
25472 offsets = arm_get_frame_offsets ();
25473 amount = offsets->outgoing_args - offsets->saved_regs;
25474
25475 if (frame_pointer_needed)
25476 {
25477 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25478 amount = offsets->locals_base - offsets->saved_regs;
25479 }
25480 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25481
25482 gcc_assert (amount >= 0);
25483 if (amount)
25484 {
25485 emit_insn (gen_blockage ());
25486
25487 if (amount < 512)
25488 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25489 GEN_INT (amount)));
25490 else
25491 {
25492 /* r3 is always free in the epilogue. */
25493 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25494
25495 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25496 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25497 }
25498 }
25499
25500 /* Emit a USE (stack_pointer_rtx), so that
25501 the stack adjustment will not be deleted. */
25502 emit_insn (gen_force_register_use (stack_pointer_rtx));
25503
25504 if (crtl->profile || !TARGET_SCHED_PROLOG)
25505 emit_insn (gen_blockage ());
25506
25507 /* Emit a clobber for each insn that will be restored in the epilogue,
25508 so that flow2 will get register lifetimes correct. */
25509 for (regno = 0; regno < 13; regno++)
25510 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25511 emit_clobber (gen_rtx_REG (SImode, regno));
25512
25513 if (! df_regs_ever_live_p (LR_REGNUM))
25514 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25515
25516 /* Clear all caller-saved regs that are not used to return. */
25517 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25518 cmse_nonsecure_entry_clear_before_return ();
25519 }
25520
25521 /* Epilogue code for APCS frame. */
25522 static void
25523 arm_expand_epilogue_apcs_frame (bool really_return)
25524 {
25525 unsigned long func_type;
25526 unsigned long saved_regs_mask;
25527 int num_regs = 0;
25528 int i;
25529 int floats_from_frame = 0;
25530 arm_stack_offsets *offsets;
25531
25532 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25533 func_type = arm_current_func_type ();
25534
25535 /* Get frame offsets for ARM. */
25536 offsets = arm_get_frame_offsets ();
25537 saved_regs_mask = offsets->saved_regs_mask;
25538
25539 /* Find the offset of the floating-point save area in the frame. */
25540 floats_from_frame
25541 = (offsets->saved_args
25542 + arm_compute_static_chain_stack_bytes ()
25543 - offsets->frame);
25544
25545 /* Compute how many core registers saved and how far away the floats are. */
25546 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25547 if (saved_regs_mask & (1 << i))
25548 {
25549 num_regs++;
25550 floats_from_frame += 4;
25551 }
25552
25553 if (TARGET_HARD_FLOAT)
25554 {
25555 int start_reg;
25556 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25557
25558 /* The offset is from IP_REGNUM. */
25559 int saved_size = arm_get_vfp_saved_size ();
25560 if (saved_size > 0)
25561 {
25562 rtx_insn *insn;
25563 floats_from_frame += saved_size;
25564 insn = emit_insn (gen_addsi3 (ip_rtx,
25565 hard_frame_pointer_rtx,
25566 GEN_INT (-floats_from_frame)));
25567 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25568 ip_rtx, hard_frame_pointer_rtx);
25569 }
25570
25571 /* Generate VFP register multi-pop. */
25572 start_reg = FIRST_VFP_REGNUM;
25573
25574 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25575 /* Look for a case where a reg does not need restoring. */
25576 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25577 && (!df_regs_ever_live_p (i + 1)
25578 || call_used_regs[i + 1]))
25579 {
25580 if (start_reg != i)
25581 arm_emit_vfp_multi_reg_pop (start_reg,
25582 (i - start_reg) / 2,
25583 gen_rtx_REG (SImode,
25584 IP_REGNUM));
25585 start_reg = i + 2;
25586 }
25587
25588 /* Restore the remaining regs that we have discovered (or possibly
25589 even all of them, if the conditional in the for loop never
25590 fired). */
25591 if (start_reg != i)
25592 arm_emit_vfp_multi_reg_pop (start_reg,
25593 (i - start_reg) / 2,
25594 gen_rtx_REG (SImode, IP_REGNUM));
25595 }
25596
25597 if (TARGET_IWMMXT)
25598 {
25599 /* The frame pointer is guaranteed to be non-double-word aligned, as
25600 it is set to double-word-aligned old_stack_pointer - 4. */
25601 rtx_insn *insn;
25602 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25603
25604 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25605 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25606 {
25607 rtx addr = gen_frame_mem (V2SImode,
25608 plus_constant (Pmode, hard_frame_pointer_rtx,
25609 - lrm_count * 4));
25610 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25611 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25612 gen_rtx_REG (V2SImode, i),
25613 NULL_RTX);
25614 lrm_count += 2;
25615 }
25616 }
25617
25618 /* saved_regs_mask should contain IP which contains old stack pointer
25619 at the time of activation creation. Since SP and IP are adjacent registers,
25620 we can restore the value directly into SP. */
25621 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25622 saved_regs_mask &= ~(1 << IP_REGNUM);
25623 saved_regs_mask |= (1 << SP_REGNUM);
25624
25625 /* There are two registers left in saved_regs_mask - LR and PC. We
25626 only need to restore LR (the return address), but to
25627 save time we can load it directly into PC, unless we need a
25628 special function exit sequence, or we are not really returning. */
25629 if (really_return
25630 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25631 && !crtl->calls_eh_return)
25632 /* Delete LR from the register mask, so that LR on
25633 the stack is loaded into the PC in the register mask. */
25634 saved_regs_mask &= ~(1 << LR_REGNUM);
25635 else
25636 saved_regs_mask &= ~(1 << PC_REGNUM);
25637
25638 num_regs = bit_count (saved_regs_mask);
25639 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25640 {
25641 rtx_insn *insn;
25642 emit_insn (gen_blockage ());
25643 /* Unwind the stack to just below the saved registers. */
25644 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25645 hard_frame_pointer_rtx,
25646 GEN_INT (- 4 * num_regs)));
25647
25648 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25649 stack_pointer_rtx, hard_frame_pointer_rtx);
25650 }
25651
25652 arm_emit_multi_reg_pop (saved_regs_mask);
25653
25654 if (IS_INTERRUPT (func_type))
25655 {
25656 /* Interrupt handlers will have pushed the
25657 IP onto the stack, so restore it now. */
25658 rtx_insn *insn;
25659 rtx addr = gen_rtx_MEM (SImode,
25660 gen_rtx_POST_INC (SImode,
25661 stack_pointer_rtx));
25662 set_mem_alias_set (addr, get_frame_alias_set ());
25663 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25664 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25665 gen_rtx_REG (SImode, IP_REGNUM),
25666 NULL_RTX);
25667 }
25668
25669 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25670 return;
25671
25672 if (crtl->calls_eh_return)
25673 emit_insn (gen_addsi3 (stack_pointer_rtx,
25674 stack_pointer_rtx,
25675 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25676
25677 if (IS_STACKALIGN (func_type))
25678 /* Restore the original stack pointer. Before prologue, the stack was
25679 realigned and the original stack pointer saved in r0. For details,
25680 see comment in arm_expand_prologue. */
25681 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25682
25683 emit_jump_insn (simple_return_rtx);
25684 }
25685
25686 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25687 function is not a sibcall. */
25688 void
25689 arm_expand_epilogue (bool really_return)
25690 {
25691 unsigned long func_type;
25692 unsigned long saved_regs_mask;
25693 int num_regs = 0;
25694 int i;
25695 int amount;
25696 arm_stack_offsets *offsets;
25697
25698 func_type = arm_current_func_type ();
25699
25700 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25701 let output_return_instruction take care of instruction emission if any. */
25702 if (IS_NAKED (func_type)
25703 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25704 {
25705 if (really_return)
25706 emit_jump_insn (simple_return_rtx);
25707 return;
25708 }
25709
25710 /* If we are throwing an exception, then we really must be doing a
25711 return, so we can't tail-call. */
25712 gcc_assert (!crtl->calls_eh_return || really_return);
25713
25714 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25715 {
25716 arm_expand_epilogue_apcs_frame (really_return);
25717 return;
25718 }
25719
25720 /* Get frame offsets for ARM. */
25721 offsets = arm_get_frame_offsets ();
25722 saved_regs_mask = offsets->saved_regs_mask;
25723 num_regs = bit_count (saved_regs_mask);
25724
25725 if (frame_pointer_needed)
25726 {
25727 rtx_insn *insn;
25728 /* Restore stack pointer if necessary. */
25729 if (TARGET_ARM)
25730 {
25731 /* In ARM mode, frame pointer points to first saved register.
25732 Restore stack pointer to last saved register. */
25733 amount = offsets->frame - offsets->saved_regs;
25734
25735 /* Force out any pending memory operations that reference stacked data
25736 before stack de-allocation occurs. */
25737 emit_insn (gen_blockage ());
25738 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25739 hard_frame_pointer_rtx,
25740 GEN_INT (amount)));
25741 arm_add_cfa_adjust_cfa_note (insn, amount,
25742 stack_pointer_rtx,
25743 hard_frame_pointer_rtx);
25744
25745 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25746 deleted. */
25747 emit_insn (gen_force_register_use (stack_pointer_rtx));
25748 }
25749 else
25750 {
25751 /* In Thumb-2 mode, the frame pointer points to the last saved
25752 register. */
25753 amount = offsets->locals_base - offsets->saved_regs;
25754 if (amount)
25755 {
25756 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25757 hard_frame_pointer_rtx,
25758 GEN_INT (amount)));
25759 arm_add_cfa_adjust_cfa_note (insn, amount,
25760 hard_frame_pointer_rtx,
25761 hard_frame_pointer_rtx);
25762 }
25763
25764 /* Force out any pending memory operations that reference stacked data
25765 before stack de-allocation occurs. */
25766 emit_insn (gen_blockage ());
25767 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25768 hard_frame_pointer_rtx));
25769 arm_add_cfa_adjust_cfa_note (insn, 0,
25770 stack_pointer_rtx,
25771 hard_frame_pointer_rtx);
25772 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25773 deleted. */
25774 emit_insn (gen_force_register_use (stack_pointer_rtx));
25775 }
25776 }
25777 else
25778 {
25779 /* Pop off outgoing args and local frame to adjust stack pointer to
25780 last saved register. */
25781 amount = offsets->outgoing_args - offsets->saved_regs;
25782 if (amount)
25783 {
25784 rtx_insn *tmp;
25785 /* Force out any pending memory operations that reference stacked data
25786 before stack de-allocation occurs. */
25787 emit_insn (gen_blockage ());
25788 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25789 stack_pointer_rtx,
25790 GEN_INT (amount)));
25791 arm_add_cfa_adjust_cfa_note (tmp, amount,
25792 stack_pointer_rtx, stack_pointer_rtx);
25793 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25794 not deleted. */
25795 emit_insn (gen_force_register_use (stack_pointer_rtx));
25796 }
25797 }
25798
25799 if (TARGET_HARD_FLOAT)
25800 {
25801 /* Generate VFP register multi-pop. */
25802 int end_reg = LAST_VFP_REGNUM + 1;
25803
25804 /* Scan the registers in reverse order. We need to match
25805 any groupings made in the prologue and generate matching
25806 vldm operations. The need to match groups is because,
25807 unlike pop, vldm can only do consecutive regs. */
25808 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25809 /* Look for a case where a reg does not need restoring. */
25810 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25811 && (!df_regs_ever_live_p (i + 1)
25812 || call_used_regs[i + 1]))
25813 {
25814 /* Restore the regs discovered so far (from reg+2 to
25815 end_reg). */
25816 if (end_reg > i + 2)
25817 arm_emit_vfp_multi_reg_pop (i + 2,
25818 (end_reg - (i + 2)) / 2,
25819 stack_pointer_rtx);
25820 end_reg = i;
25821 }
25822
25823 /* Restore the remaining regs that we have discovered (or possibly
25824 even all of them, if the conditional in the for loop never
25825 fired). */
25826 if (end_reg > i + 2)
25827 arm_emit_vfp_multi_reg_pop (i + 2,
25828 (end_reg - (i + 2)) / 2,
25829 stack_pointer_rtx);
25830 }
25831
25832 if (TARGET_IWMMXT)
25833 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25834 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25835 {
25836 rtx_insn *insn;
25837 rtx addr = gen_rtx_MEM (V2SImode,
25838 gen_rtx_POST_INC (SImode,
25839 stack_pointer_rtx));
25840 set_mem_alias_set (addr, get_frame_alias_set ());
25841 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25842 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25843 gen_rtx_REG (V2SImode, i),
25844 NULL_RTX);
25845 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25846 stack_pointer_rtx, stack_pointer_rtx);
25847 }
25848
25849 if (saved_regs_mask)
25850 {
25851 rtx insn;
25852 bool return_in_pc = false;
25853
25854 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25855 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25856 && !IS_CMSE_ENTRY (func_type)
25857 && !IS_STACKALIGN (func_type)
25858 && really_return
25859 && crtl->args.pretend_args_size == 0
25860 && saved_regs_mask & (1 << LR_REGNUM)
25861 && !crtl->calls_eh_return)
25862 {
25863 saved_regs_mask &= ~(1 << LR_REGNUM);
25864 saved_regs_mask |= (1 << PC_REGNUM);
25865 return_in_pc = true;
25866 }
25867
25868 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25869 {
25870 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25871 if (saved_regs_mask & (1 << i))
25872 {
25873 rtx addr = gen_rtx_MEM (SImode,
25874 gen_rtx_POST_INC (SImode,
25875 stack_pointer_rtx));
25876 set_mem_alias_set (addr, get_frame_alias_set ());
25877
25878 if (i == PC_REGNUM)
25879 {
25880 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25881 XVECEXP (insn, 0, 0) = ret_rtx;
25882 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25883 addr);
25884 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25885 insn = emit_jump_insn (insn);
25886 }
25887 else
25888 {
25889 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25890 addr));
25891 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25892 gen_rtx_REG (SImode, i),
25893 NULL_RTX);
25894 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25895 stack_pointer_rtx,
25896 stack_pointer_rtx);
25897 }
25898 }
25899 }
25900 else
25901 {
25902 if (TARGET_LDRD
25903 && current_tune->prefer_ldrd_strd
25904 && !optimize_function_for_size_p (cfun))
25905 {
25906 if (TARGET_THUMB2)
25907 thumb2_emit_ldrd_pop (saved_regs_mask);
25908 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25909 arm_emit_ldrd_pop (saved_regs_mask);
25910 else
25911 arm_emit_multi_reg_pop (saved_regs_mask);
25912 }
25913 else
25914 arm_emit_multi_reg_pop (saved_regs_mask);
25915 }
25916
25917 if (return_in_pc)
25918 return;
25919 }
25920
25921 amount
25922 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25923 if (amount)
25924 {
25925 int i, j;
25926 rtx dwarf = NULL_RTX;
25927 rtx_insn *tmp =
25928 emit_insn (gen_addsi3 (stack_pointer_rtx,
25929 stack_pointer_rtx,
25930 GEN_INT (amount)));
25931
25932 RTX_FRAME_RELATED_P (tmp) = 1;
25933
25934 if (cfun->machine->uses_anonymous_args)
25935 {
25936 /* Restore pretend args. Refer arm_expand_prologue on how to save
25937 pretend_args in stack. */
25938 int num_regs = crtl->args.pretend_args_size / 4;
25939 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25940 for (j = 0, i = 0; j < num_regs; i++)
25941 if (saved_regs_mask & (1 << i))
25942 {
25943 rtx reg = gen_rtx_REG (SImode, i);
25944 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25945 j++;
25946 }
25947 REG_NOTES (tmp) = dwarf;
25948 }
25949 arm_add_cfa_adjust_cfa_note (tmp, amount,
25950 stack_pointer_rtx, stack_pointer_rtx);
25951 }
25952
25953 /* Clear all caller-saved regs that are not used to return. */
25954 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25955 {
25956 /* CMSE_ENTRY always returns. */
25957 gcc_assert (really_return);
25958 cmse_nonsecure_entry_clear_before_return ();
25959 }
25960
25961 if (!really_return)
25962 return;
25963
25964 if (crtl->calls_eh_return)
25965 emit_insn (gen_addsi3 (stack_pointer_rtx,
25966 stack_pointer_rtx,
25967 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25968
25969 if (IS_STACKALIGN (func_type))
25970 /* Restore the original stack pointer. Before prologue, the stack was
25971 realigned and the original stack pointer saved in r0. For details,
25972 see comment in arm_expand_prologue. */
25973 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25974
25975 emit_jump_insn (simple_return_rtx);
25976 }
25977
25978 /* Implementation of insn prologue_thumb1_interwork. This is the first
25979 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25980
25981 const char *
25982 thumb1_output_interwork (void)
25983 {
25984 const char * name;
25985 FILE *f = asm_out_file;
25986
25987 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25988 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25989 == SYMBOL_REF);
25990 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25991
25992 /* Generate code sequence to switch us into Thumb mode. */
25993 /* The .code 32 directive has already been emitted by
25994 ASM_DECLARE_FUNCTION_NAME. */
25995 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25996 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25997
25998 /* Generate a label, so that the debugger will notice the
25999 change in instruction sets. This label is also used by
26000 the assembler to bypass the ARM code when this function
26001 is called from a Thumb encoded function elsewhere in the
26002 same file. Hence the definition of STUB_NAME here must
26003 agree with the definition in gas/config/tc-arm.c. */
26004
26005 #define STUB_NAME ".real_start_of"
26006
26007 fprintf (f, "\t.code\t16\n");
26008 #ifdef ARM_PE
26009 if (arm_dllexport_name_p (name))
26010 name = arm_strip_name_encoding (name);
26011 #endif
26012 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26013 fprintf (f, "\t.thumb_func\n");
26014 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26015
26016 return "";
26017 }
26018
26019 /* Handle the case of a double word load into a low register from
26020 a computed memory address. The computed address may involve a
26021 register which is overwritten by the load. */
26022 const char *
26023 thumb_load_double_from_address (rtx *operands)
26024 {
26025 rtx addr;
26026 rtx base;
26027 rtx offset;
26028 rtx arg1;
26029 rtx arg2;
26030
26031 gcc_assert (REG_P (operands[0]));
26032 gcc_assert (MEM_P (operands[1]));
26033
26034 /* Get the memory address. */
26035 addr = XEXP (operands[1], 0);
26036
26037 /* Work out how the memory address is computed. */
26038 switch (GET_CODE (addr))
26039 {
26040 case REG:
26041 operands[2] = adjust_address (operands[1], SImode, 4);
26042
26043 if (REGNO (operands[0]) == REGNO (addr))
26044 {
26045 output_asm_insn ("ldr\t%H0, %2", operands);
26046 output_asm_insn ("ldr\t%0, %1", operands);
26047 }
26048 else
26049 {
26050 output_asm_insn ("ldr\t%0, %1", operands);
26051 output_asm_insn ("ldr\t%H0, %2", operands);
26052 }
26053 break;
26054
26055 case CONST:
26056 /* Compute <address> + 4 for the high order load. */
26057 operands[2] = adjust_address (operands[1], SImode, 4);
26058
26059 output_asm_insn ("ldr\t%0, %1", operands);
26060 output_asm_insn ("ldr\t%H0, %2", operands);
26061 break;
26062
26063 case PLUS:
26064 arg1 = XEXP (addr, 0);
26065 arg2 = XEXP (addr, 1);
26066
26067 if (CONSTANT_P (arg1))
26068 base = arg2, offset = arg1;
26069 else
26070 base = arg1, offset = arg2;
26071
26072 gcc_assert (REG_P (base));
26073
26074 /* Catch the case of <address> = <reg> + <reg> */
26075 if (REG_P (offset))
26076 {
26077 int reg_offset = REGNO (offset);
26078 int reg_base = REGNO (base);
26079 int reg_dest = REGNO (operands[0]);
26080
26081 /* Add the base and offset registers together into the
26082 higher destination register. */
26083 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26084 reg_dest + 1, reg_base, reg_offset);
26085
26086 /* Load the lower destination register from the address in
26087 the higher destination register. */
26088 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26089 reg_dest, reg_dest + 1);
26090
26091 /* Load the higher destination register from its own address
26092 plus 4. */
26093 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26094 reg_dest + 1, reg_dest + 1);
26095 }
26096 else
26097 {
26098 /* Compute <address> + 4 for the high order load. */
26099 operands[2] = adjust_address (operands[1], SImode, 4);
26100
26101 /* If the computed address is held in the low order register
26102 then load the high order register first, otherwise always
26103 load the low order register first. */
26104 if (REGNO (operands[0]) == REGNO (base))
26105 {
26106 output_asm_insn ("ldr\t%H0, %2", operands);
26107 output_asm_insn ("ldr\t%0, %1", operands);
26108 }
26109 else
26110 {
26111 output_asm_insn ("ldr\t%0, %1", operands);
26112 output_asm_insn ("ldr\t%H0, %2", operands);
26113 }
26114 }
26115 break;
26116
26117 case LABEL_REF:
26118 /* With no registers to worry about we can just load the value
26119 directly. */
26120 operands[2] = adjust_address (operands[1], SImode, 4);
26121
26122 output_asm_insn ("ldr\t%H0, %2", operands);
26123 output_asm_insn ("ldr\t%0, %1", operands);
26124 break;
26125
26126 default:
26127 gcc_unreachable ();
26128 }
26129
26130 return "";
26131 }
26132
26133 const char *
26134 thumb_output_move_mem_multiple (int n, rtx *operands)
26135 {
26136 switch (n)
26137 {
26138 case 2:
26139 if (REGNO (operands[4]) > REGNO (operands[5]))
26140 std::swap (operands[4], operands[5]);
26141
26142 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26143 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26144 break;
26145
26146 case 3:
26147 if (REGNO (operands[4]) > REGNO (operands[5]))
26148 std::swap (operands[4], operands[5]);
26149 if (REGNO (operands[5]) > REGNO (operands[6]))
26150 std::swap (operands[5], operands[6]);
26151 if (REGNO (operands[4]) > REGNO (operands[5]))
26152 std::swap (operands[4], operands[5]);
26153
26154 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26155 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26156 break;
26157
26158 default:
26159 gcc_unreachable ();
26160 }
26161
26162 return "";
26163 }
26164
26165 /* Output a call-via instruction for thumb state. */
26166 const char *
26167 thumb_call_via_reg (rtx reg)
26168 {
26169 int regno = REGNO (reg);
26170 rtx *labelp;
26171
26172 gcc_assert (regno < LR_REGNUM);
26173
26174 /* If we are in the normal text section we can use a single instance
26175 per compilation unit. If we are doing function sections, then we need
26176 an entry per section, since we can't rely on reachability. */
26177 if (in_section == text_section)
26178 {
26179 thumb_call_reg_needed = 1;
26180
26181 if (thumb_call_via_label[regno] == NULL)
26182 thumb_call_via_label[regno] = gen_label_rtx ();
26183 labelp = thumb_call_via_label + regno;
26184 }
26185 else
26186 {
26187 if (cfun->machine->call_via[regno] == NULL)
26188 cfun->machine->call_via[regno] = gen_label_rtx ();
26189 labelp = cfun->machine->call_via + regno;
26190 }
26191
26192 output_asm_insn ("bl\t%a0", labelp);
26193 return "";
26194 }
26195
26196 /* Routines for generating rtl. */
26197 void
26198 thumb_expand_movmemqi (rtx *operands)
26199 {
26200 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26201 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26202 HOST_WIDE_INT len = INTVAL (operands[2]);
26203 HOST_WIDE_INT offset = 0;
26204
26205 while (len >= 12)
26206 {
26207 emit_insn (gen_movmem12b (out, in, out, in));
26208 len -= 12;
26209 }
26210
26211 if (len >= 8)
26212 {
26213 emit_insn (gen_movmem8b (out, in, out, in));
26214 len -= 8;
26215 }
26216
26217 if (len >= 4)
26218 {
26219 rtx reg = gen_reg_rtx (SImode);
26220 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26221 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26222 len -= 4;
26223 offset += 4;
26224 }
26225
26226 if (len >= 2)
26227 {
26228 rtx reg = gen_reg_rtx (HImode);
26229 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26230 plus_constant (Pmode, in,
26231 offset))));
26232 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26233 offset)),
26234 reg));
26235 len -= 2;
26236 offset += 2;
26237 }
26238
26239 if (len)
26240 {
26241 rtx reg = gen_reg_rtx (QImode);
26242 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26243 plus_constant (Pmode, in,
26244 offset))));
26245 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26246 offset)),
26247 reg));
26248 }
26249 }
26250
26251 void
26252 thumb_reload_out_hi (rtx *operands)
26253 {
26254 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26255 }
26256
26257 /* Return the length of a function name prefix
26258 that starts with the character 'c'. */
26259 static int
26260 arm_get_strip_length (int c)
26261 {
26262 switch (c)
26263 {
26264 ARM_NAME_ENCODING_LENGTHS
26265 default: return 0;
26266 }
26267 }
26268
26269 /* Return a pointer to a function's name with any
26270 and all prefix encodings stripped from it. */
26271 const char *
26272 arm_strip_name_encoding (const char *name)
26273 {
26274 int skip;
26275
26276 while ((skip = arm_get_strip_length (* name)))
26277 name += skip;
26278
26279 return name;
26280 }
26281
26282 /* If there is a '*' anywhere in the name's prefix, then
26283 emit the stripped name verbatim, otherwise prepend an
26284 underscore if leading underscores are being used. */
26285 void
26286 arm_asm_output_labelref (FILE *stream, const char *name)
26287 {
26288 int skip;
26289 int verbatim = 0;
26290
26291 while ((skip = arm_get_strip_length (* name)))
26292 {
26293 verbatim |= (*name == '*');
26294 name += skip;
26295 }
26296
26297 if (verbatim)
26298 fputs (name, stream);
26299 else
26300 asm_fprintf (stream, "%U%s", name);
26301 }
26302
26303 /* This function is used to emit an EABI tag and its associated value.
26304 We emit the numerical value of the tag in case the assembler does not
26305 support textual tags. (Eg gas prior to 2.20). If requested we include
26306 the tag name in a comment so that anyone reading the assembler output
26307 will know which tag is being set.
26308
26309 This function is not static because arm-c.c needs it too. */
26310
26311 void
26312 arm_emit_eabi_attribute (const char *name, int num, int val)
26313 {
26314 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26315 if (flag_verbose_asm || flag_debug_asm)
26316 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26317 asm_fprintf (asm_out_file, "\n");
26318 }
26319
26320 /* This function is used to print CPU tuning information as comment
26321 in assembler file. Pointers are not printed for now. */
26322
26323 void
26324 arm_print_tune_info (void)
26325 {
26326 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26327 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26328 current_tune->constant_limit);
26329 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26330 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26331 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26332 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26333 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26334 "prefetch.l1_cache_size:\t%d\n",
26335 current_tune->prefetch.l1_cache_size);
26336 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26337 "prefetch.l1_cache_line_size:\t%d\n",
26338 current_tune->prefetch.l1_cache_line_size);
26339 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26340 "prefer_constant_pool:\t%d\n",
26341 (int) current_tune->prefer_constant_pool);
26342 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26343 "branch_cost:\t(s:speed, p:predictable)\n");
26344 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26345 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26346 current_tune->branch_cost (false, false));
26347 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26348 current_tune->branch_cost (false, true));
26349 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26350 current_tune->branch_cost (true, false));
26351 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26352 current_tune->branch_cost (true, true));
26353 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26354 "prefer_ldrd_strd:\t%d\n",
26355 (int) current_tune->prefer_ldrd_strd);
26356 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26357 "logical_op_non_short_circuit:\t[%d,%d]\n",
26358 (int) current_tune->logical_op_non_short_circuit_thumb,
26359 (int) current_tune->logical_op_non_short_circuit_arm);
26360 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26361 "prefer_neon_for_64bits:\t%d\n",
26362 (int) current_tune->prefer_neon_for_64bits);
26363 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26364 "disparage_flag_setting_t16_encodings:\t%d\n",
26365 (int) current_tune->disparage_flag_setting_t16_encodings);
26366 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26367 "string_ops_prefer_neon:\t%d\n",
26368 (int) current_tune->string_ops_prefer_neon);
26369 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26370 "max_insns_inline_memset:\t%d\n",
26371 current_tune->max_insns_inline_memset);
26372 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26373 current_tune->fusible_ops);
26374 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26375 (int) current_tune->sched_autopref);
26376 }
26377
26378 /* Print .arch and .arch_extension directives corresponding to the
26379 current architecture configuration. */
26380 static void
26381 arm_print_asm_arch_directives ()
26382 {
26383 const arch_option *arch
26384 = arm_parse_arch_option_name (all_architectures, "-march",
26385 arm_active_target.arch_name);
26386 auto_sbitmap opt_bits (isa_num_bits);
26387
26388 gcc_assert (arch);
26389
26390 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26391 if (!arch->common.extensions)
26392 return;
26393
26394 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26395 opt->name != NULL;
26396 opt++)
26397 {
26398 if (!opt->remove)
26399 {
26400 arm_initialize_isa (opt_bits, opt->isa_bits);
26401
26402 /* If every feature bit of this option is set in the target
26403 ISA specification, print out the option name. However,
26404 don't print anything if all the bits are part of the
26405 FPU specification. */
26406 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26407 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26408 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26409 }
26410 }
26411 }
26412
26413 static void
26414 arm_file_start (void)
26415 {
26416 int val;
26417
26418 if (TARGET_BPABI)
26419 {
26420 /* We don't have a specified CPU. Use the architecture to
26421 generate the tags.
26422
26423 Note: it might be better to do this unconditionally, then the
26424 assembler would not need to know about all new CPU names as
26425 they are added. */
26426 if (!arm_active_target.core_name)
26427 {
26428 /* armv7ve doesn't support any extensions. */
26429 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26430 {
26431 /* Keep backward compatability for assemblers
26432 which don't support armv7ve. */
26433 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26434 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26435 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26436 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26437 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26438 }
26439 else
26440 arm_print_asm_arch_directives ();
26441 }
26442 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26443 asm_fprintf (asm_out_file, "\t.arch %s\n",
26444 arm_active_target.core_name + 8);
26445 else
26446 {
26447 const char* truncated_name
26448 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26449 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26450 }
26451
26452 if (print_tune_info)
26453 arm_print_tune_info ();
26454
26455 if (! TARGET_SOFT_FLOAT)
26456 {
26457 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26458 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26459
26460 if (TARGET_HARD_FLOAT_ABI)
26461 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26462 }
26463
26464 /* Some of these attributes only apply when the corresponding features
26465 are used. However we don't have any easy way of figuring this out.
26466 Conservatively record the setting that would have been used. */
26467
26468 if (flag_rounding_math)
26469 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26470
26471 if (!flag_unsafe_math_optimizations)
26472 {
26473 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26474 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26475 }
26476 if (flag_signaling_nans)
26477 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26478
26479 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26480 flag_finite_math_only ? 1 : 3);
26481
26482 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26483 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26484 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26485 flag_short_enums ? 1 : 2);
26486
26487 /* Tag_ABI_optimization_goals. */
26488 if (optimize_size)
26489 val = 4;
26490 else if (optimize >= 2)
26491 val = 2;
26492 else if (optimize)
26493 val = 1;
26494 else
26495 val = 6;
26496 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26497
26498 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26499 unaligned_access);
26500
26501 if (arm_fp16_format)
26502 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26503 (int) arm_fp16_format);
26504
26505 if (arm_lang_output_object_attributes_hook)
26506 arm_lang_output_object_attributes_hook();
26507 }
26508
26509 default_file_start ();
26510 }
26511
26512 static void
26513 arm_file_end (void)
26514 {
26515 int regno;
26516
26517 if (NEED_INDICATE_EXEC_STACK)
26518 /* Add .note.GNU-stack. */
26519 file_end_indicate_exec_stack ();
26520
26521 if (! thumb_call_reg_needed)
26522 return;
26523
26524 switch_to_section (text_section);
26525 asm_fprintf (asm_out_file, "\t.code 16\n");
26526 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26527
26528 for (regno = 0; regno < LR_REGNUM; regno++)
26529 {
26530 rtx label = thumb_call_via_label[regno];
26531
26532 if (label != 0)
26533 {
26534 targetm.asm_out.internal_label (asm_out_file, "L",
26535 CODE_LABEL_NUMBER (label));
26536 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26537 }
26538 }
26539 }
26540
26541 #ifndef ARM_PE
26542 /* Symbols in the text segment can be accessed without indirecting via the
26543 constant pool; it may take an extra binary operation, but this is still
26544 faster than indirecting via memory. Don't do this when not optimizing,
26545 since we won't be calculating al of the offsets necessary to do this
26546 simplification. */
26547
26548 static void
26549 arm_encode_section_info (tree decl, rtx rtl, int first)
26550 {
26551 if (optimize > 0 && TREE_CONSTANT (decl))
26552 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26553
26554 default_encode_section_info (decl, rtl, first);
26555 }
26556 #endif /* !ARM_PE */
26557
26558 static void
26559 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26560 {
26561 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26562 && !strcmp (prefix, "L"))
26563 {
26564 arm_ccfsm_state = 0;
26565 arm_target_insn = NULL;
26566 }
26567 default_internal_label (stream, prefix, labelno);
26568 }
26569
26570 /* Output code to add DELTA to the first argument, and then jump
26571 to FUNCTION. Used for C++ multiple inheritance. */
26572
26573 static void
26574 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26575 HOST_WIDE_INT, tree function)
26576 {
26577 static int thunk_label = 0;
26578 char label[256];
26579 char labelpc[256];
26580 int mi_delta = delta;
26581 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26582 int shift = 0;
26583 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26584 ? 1 : 0);
26585 if (mi_delta < 0)
26586 mi_delta = - mi_delta;
26587
26588 final_start_function (emit_barrier (), file, 1);
26589
26590 if (TARGET_THUMB1)
26591 {
26592 int labelno = thunk_label++;
26593 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26594 /* Thunks are entered in arm mode when available. */
26595 if (TARGET_THUMB1_ONLY)
26596 {
26597 /* push r3 so we can use it as a temporary. */
26598 /* TODO: Omit this save if r3 is not used. */
26599 fputs ("\tpush {r3}\n", file);
26600 fputs ("\tldr\tr3, ", file);
26601 }
26602 else
26603 {
26604 fputs ("\tldr\tr12, ", file);
26605 }
26606 assemble_name (file, label);
26607 fputc ('\n', file);
26608 if (flag_pic)
26609 {
26610 /* If we are generating PIC, the ldr instruction below loads
26611 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26612 the address of the add + 8, so we have:
26613
26614 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26615 = target + 1.
26616
26617 Note that we have "+ 1" because some versions of GNU ld
26618 don't set the low bit of the result for R_ARM_REL32
26619 relocations against thumb function symbols.
26620 On ARMv6M this is +4, not +8. */
26621 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26622 assemble_name (file, labelpc);
26623 fputs (":\n", file);
26624 if (TARGET_THUMB1_ONLY)
26625 {
26626 /* This is 2 insns after the start of the thunk, so we know it
26627 is 4-byte aligned. */
26628 fputs ("\tadd\tr3, pc, r3\n", file);
26629 fputs ("\tmov r12, r3\n", file);
26630 }
26631 else
26632 fputs ("\tadd\tr12, pc, r12\n", file);
26633 }
26634 else if (TARGET_THUMB1_ONLY)
26635 fputs ("\tmov r12, r3\n", file);
26636 }
26637 if (TARGET_THUMB1_ONLY)
26638 {
26639 if (mi_delta > 255)
26640 {
26641 fputs ("\tldr\tr3, ", file);
26642 assemble_name (file, label);
26643 fputs ("+4\n", file);
26644 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26645 mi_op, this_regno, this_regno);
26646 }
26647 else if (mi_delta != 0)
26648 {
26649 /* Thumb1 unified syntax requires s suffix in instruction name when
26650 one of the operands is immediate. */
26651 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26652 mi_op, this_regno, this_regno,
26653 mi_delta);
26654 }
26655 }
26656 else
26657 {
26658 /* TODO: Use movw/movt for large constants when available. */
26659 while (mi_delta != 0)
26660 {
26661 if ((mi_delta & (3 << shift)) == 0)
26662 shift += 2;
26663 else
26664 {
26665 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26666 mi_op, this_regno, this_regno,
26667 mi_delta & (0xff << shift));
26668 mi_delta &= ~(0xff << shift);
26669 shift += 8;
26670 }
26671 }
26672 }
26673 if (TARGET_THUMB1)
26674 {
26675 if (TARGET_THUMB1_ONLY)
26676 fputs ("\tpop\t{r3}\n", file);
26677
26678 fprintf (file, "\tbx\tr12\n");
26679 ASM_OUTPUT_ALIGN (file, 2);
26680 assemble_name (file, label);
26681 fputs (":\n", file);
26682 if (flag_pic)
26683 {
26684 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26685 rtx tem = XEXP (DECL_RTL (function), 0);
26686 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26687 pipeline offset is four rather than eight. Adjust the offset
26688 accordingly. */
26689 tem = plus_constant (GET_MODE (tem), tem,
26690 TARGET_THUMB1_ONLY ? -3 : -7);
26691 tem = gen_rtx_MINUS (GET_MODE (tem),
26692 tem,
26693 gen_rtx_SYMBOL_REF (Pmode,
26694 ggc_strdup (labelpc)));
26695 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26696 }
26697 else
26698 /* Output ".word .LTHUNKn". */
26699 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26700
26701 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26702 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26703 }
26704 else
26705 {
26706 fputs ("\tb\t", file);
26707 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26708 if (NEED_PLT_RELOC)
26709 fputs ("(PLT)", file);
26710 fputc ('\n', file);
26711 }
26712
26713 final_end_function ();
26714 }
26715
26716 /* MI thunk handling for TARGET_32BIT. */
26717
26718 static void
26719 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26720 HOST_WIDE_INT vcall_offset, tree function)
26721 {
26722 /* On ARM, this_regno is R0 or R1 depending on
26723 whether the function returns an aggregate or not.
26724 */
26725 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26726 function)
26727 ? R1_REGNUM : R0_REGNUM);
26728
26729 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26730 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26731 reload_completed = 1;
26732 emit_note (NOTE_INSN_PROLOGUE_END);
26733
26734 /* Add DELTA to THIS_RTX. */
26735 if (delta != 0)
26736 arm_split_constant (PLUS, Pmode, NULL_RTX,
26737 delta, this_rtx, this_rtx, false);
26738
26739 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26740 if (vcall_offset != 0)
26741 {
26742 /* Load *THIS_RTX. */
26743 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26744 /* Compute *THIS_RTX + VCALL_OFFSET. */
26745 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26746 false);
26747 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26748 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26749 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26750 }
26751
26752 /* Generate a tail call to the target function. */
26753 if (!TREE_USED (function))
26754 {
26755 assemble_external (function);
26756 TREE_USED (function) = 1;
26757 }
26758 rtx funexp = XEXP (DECL_RTL (function), 0);
26759 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26760 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26761 SIBLING_CALL_P (insn) = 1;
26762
26763 insn = get_insns ();
26764 shorten_branches (insn);
26765 final_start_function (insn, file, 1);
26766 final (insn, file, 1);
26767 final_end_function ();
26768
26769 /* Stop pretending this is a post-reload pass. */
26770 reload_completed = 0;
26771 }
26772
26773 /* Output code to add DELTA to the first argument, and then jump
26774 to FUNCTION. Used for C++ multiple inheritance. */
26775
26776 static void
26777 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26778 HOST_WIDE_INT vcall_offset, tree function)
26779 {
26780 if (TARGET_32BIT)
26781 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26782 else
26783 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26784 }
26785
26786 int
26787 arm_emit_vector_const (FILE *file, rtx x)
26788 {
26789 int i;
26790 const char * pattern;
26791
26792 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26793
26794 switch (GET_MODE (x))
26795 {
26796 case E_V2SImode: pattern = "%08x"; break;
26797 case E_V4HImode: pattern = "%04x"; break;
26798 case E_V8QImode: pattern = "%02x"; break;
26799 default: gcc_unreachable ();
26800 }
26801
26802 fprintf (file, "0x");
26803 for (i = CONST_VECTOR_NUNITS (x); i--;)
26804 {
26805 rtx element;
26806
26807 element = CONST_VECTOR_ELT (x, i);
26808 fprintf (file, pattern, INTVAL (element));
26809 }
26810
26811 return 1;
26812 }
26813
26814 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26815 HFmode constant pool entries are actually loaded with ldr. */
26816 void
26817 arm_emit_fp16_const (rtx c)
26818 {
26819 long bits;
26820
26821 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26822 if (WORDS_BIG_ENDIAN)
26823 assemble_zeros (2);
26824 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26825 if (!WORDS_BIG_ENDIAN)
26826 assemble_zeros (2);
26827 }
26828
26829 const char *
26830 arm_output_load_gr (rtx *operands)
26831 {
26832 rtx reg;
26833 rtx offset;
26834 rtx wcgr;
26835 rtx sum;
26836
26837 if (!MEM_P (operands [1])
26838 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26839 || !REG_P (reg = XEXP (sum, 0))
26840 || !CONST_INT_P (offset = XEXP (sum, 1))
26841 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26842 return "wldrw%?\t%0, %1";
26843
26844 /* Fix up an out-of-range load of a GR register. */
26845 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26846 wcgr = operands[0];
26847 operands[0] = reg;
26848 output_asm_insn ("ldr%?\t%0, %1", operands);
26849
26850 operands[0] = wcgr;
26851 operands[1] = reg;
26852 output_asm_insn ("tmcr%?\t%0, %1", operands);
26853 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26854
26855 return "";
26856 }
26857
26858 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26859
26860 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26861 named arg and all anonymous args onto the stack.
26862 XXX I know the prologue shouldn't be pushing registers, but it is faster
26863 that way. */
26864
26865 static void
26866 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26867 machine_mode mode,
26868 tree type,
26869 int *pretend_size,
26870 int second_time ATTRIBUTE_UNUSED)
26871 {
26872 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26873 int nregs;
26874
26875 cfun->machine->uses_anonymous_args = 1;
26876 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26877 {
26878 nregs = pcum->aapcs_ncrn;
26879 if (nregs & 1)
26880 {
26881 int res = arm_needs_doubleword_align (mode, type);
26882 if (res < 0 && warn_psabi)
26883 inform (input_location, "parameter passing for argument of "
26884 "type %qT changed in GCC 7.1", type);
26885 else if (res > 0)
26886 nregs++;
26887 }
26888 }
26889 else
26890 nregs = pcum->nregs;
26891
26892 if (nregs < NUM_ARG_REGS)
26893 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26894 }
26895
26896 /* We can't rely on the caller doing the proper promotion when
26897 using APCS or ATPCS. */
26898
26899 static bool
26900 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26901 {
26902 return !TARGET_AAPCS_BASED;
26903 }
26904
26905 static machine_mode
26906 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26907 machine_mode mode,
26908 int *punsignedp ATTRIBUTE_UNUSED,
26909 const_tree fntype ATTRIBUTE_UNUSED,
26910 int for_return ATTRIBUTE_UNUSED)
26911 {
26912 if (GET_MODE_CLASS (mode) == MODE_INT
26913 && GET_MODE_SIZE (mode) < 4)
26914 return SImode;
26915
26916 return mode;
26917 }
26918
26919
26920 static bool
26921 arm_default_short_enums (void)
26922 {
26923 return ARM_DEFAULT_SHORT_ENUMS;
26924 }
26925
26926
26927 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26928
26929 static bool
26930 arm_align_anon_bitfield (void)
26931 {
26932 return TARGET_AAPCS_BASED;
26933 }
26934
26935
26936 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26937
26938 static tree
26939 arm_cxx_guard_type (void)
26940 {
26941 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26942 }
26943
26944
26945 /* The EABI says test the least significant bit of a guard variable. */
26946
26947 static bool
26948 arm_cxx_guard_mask_bit (void)
26949 {
26950 return TARGET_AAPCS_BASED;
26951 }
26952
26953
26954 /* The EABI specifies that all array cookies are 8 bytes long. */
26955
26956 static tree
26957 arm_get_cookie_size (tree type)
26958 {
26959 tree size;
26960
26961 if (!TARGET_AAPCS_BASED)
26962 return default_cxx_get_cookie_size (type);
26963
26964 size = build_int_cst (sizetype, 8);
26965 return size;
26966 }
26967
26968
26969 /* The EABI says that array cookies should also contain the element size. */
26970
26971 static bool
26972 arm_cookie_has_size (void)
26973 {
26974 return TARGET_AAPCS_BASED;
26975 }
26976
26977
26978 /* The EABI says constructors and destructors should return a pointer to
26979 the object constructed/destroyed. */
26980
26981 static bool
26982 arm_cxx_cdtor_returns_this (void)
26983 {
26984 return TARGET_AAPCS_BASED;
26985 }
26986
26987 /* The EABI says that an inline function may never be the key
26988 method. */
26989
26990 static bool
26991 arm_cxx_key_method_may_be_inline (void)
26992 {
26993 return !TARGET_AAPCS_BASED;
26994 }
26995
26996 static void
26997 arm_cxx_determine_class_data_visibility (tree decl)
26998 {
26999 if (!TARGET_AAPCS_BASED
27000 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27001 return;
27002
27003 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27004 is exported. However, on systems without dynamic vague linkage,
27005 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27006 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27007 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27008 else
27009 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27010 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27011 }
27012
27013 static bool
27014 arm_cxx_class_data_always_comdat (void)
27015 {
27016 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27017 vague linkage if the class has no key function. */
27018 return !TARGET_AAPCS_BASED;
27019 }
27020
27021
27022 /* The EABI says __aeabi_atexit should be used to register static
27023 destructors. */
27024
27025 static bool
27026 arm_cxx_use_aeabi_atexit (void)
27027 {
27028 return TARGET_AAPCS_BASED;
27029 }
27030
27031
27032 void
27033 arm_set_return_address (rtx source, rtx scratch)
27034 {
27035 arm_stack_offsets *offsets;
27036 HOST_WIDE_INT delta;
27037 rtx addr, mem;
27038 unsigned long saved_regs;
27039
27040 offsets = arm_get_frame_offsets ();
27041 saved_regs = offsets->saved_regs_mask;
27042
27043 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27044 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27045 else
27046 {
27047 if (frame_pointer_needed)
27048 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27049 else
27050 {
27051 /* LR will be the first saved register. */
27052 delta = offsets->outgoing_args - (offsets->frame + 4);
27053
27054
27055 if (delta >= 4096)
27056 {
27057 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27058 GEN_INT (delta & ~4095)));
27059 addr = scratch;
27060 delta &= 4095;
27061 }
27062 else
27063 addr = stack_pointer_rtx;
27064
27065 addr = plus_constant (Pmode, addr, delta);
27066 }
27067
27068 /* The store needs to be marked to prevent DSE from deleting
27069 it as dead if it is based on fp. */
27070 mem = gen_frame_mem (Pmode, addr);
27071 MEM_VOLATILE_P (mem) = true;
27072 emit_move_insn (mem, source);
27073 }
27074 }
27075
27076
27077 void
27078 thumb_set_return_address (rtx source, rtx scratch)
27079 {
27080 arm_stack_offsets *offsets;
27081 HOST_WIDE_INT delta;
27082 HOST_WIDE_INT limit;
27083 int reg;
27084 rtx addr, mem;
27085 unsigned long mask;
27086
27087 emit_use (source);
27088
27089 offsets = arm_get_frame_offsets ();
27090 mask = offsets->saved_regs_mask;
27091 if (mask & (1 << LR_REGNUM))
27092 {
27093 limit = 1024;
27094 /* Find the saved regs. */
27095 if (frame_pointer_needed)
27096 {
27097 delta = offsets->soft_frame - offsets->saved_args;
27098 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27099 if (TARGET_THUMB1)
27100 limit = 128;
27101 }
27102 else
27103 {
27104 delta = offsets->outgoing_args - offsets->saved_args;
27105 reg = SP_REGNUM;
27106 }
27107 /* Allow for the stack frame. */
27108 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27109 delta -= 16;
27110 /* The link register is always the first saved register. */
27111 delta -= 4;
27112
27113 /* Construct the address. */
27114 addr = gen_rtx_REG (SImode, reg);
27115 if (delta > limit)
27116 {
27117 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27118 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27119 addr = scratch;
27120 }
27121 else
27122 addr = plus_constant (Pmode, addr, delta);
27123
27124 /* The store needs to be marked to prevent DSE from deleting
27125 it as dead if it is based on fp. */
27126 mem = gen_frame_mem (Pmode, addr);
27127 MEM_VOLATILE_P (mem) = true;
27128 emit_move_insn (mem, source);
27129 }
27130 else
27131 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27132 }
27133
27134 /* Implements target hook vector_mode_supported_p. */
27135 bool
27136 arm_vector_mode_supported_p (machine_mode mode)
27137 {
27138 /* Neon also supports V2SImode, etc. listed in the clause below. */
27139 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27140 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27141 || mode == V2DImode || mode == V8HFmode))
27142 return true;
27143
27144 if ((TARGET_NEON || TARGET_IWMMXT)
27145 && ((mode == V2SImode)
27146 || (mode == V4HImode)
27147 || (mode == V8QImode)))
27148 return true;
27149
27150 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27151 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27152 || mode == V2HAmode))
27153 return true;
27154
27155 return false;
27156 }
27157
27158 /* Implements target hook array_mode_supported_p. */
27159
27160 static bool
27161 arm_array_mode_supported_p (machine_mode mode,
27162 unsigned HOST_WIDE_INT nelems)
27163 {
27164 if (TARGET_NEON
27165 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27166 && (nelems >= 2 && nelems <= 4))
27167 return true;
27168
27169 return false;
27170 }
27171
27172 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27173 registers when autovectorizing for Neon, at least until multiple vector
27174 widths are supported properly by the middle-end. */
27175
27176 static machine_mode
27177 arm_preferred_simd_mode (scalar_mode mode)
27178 {
27179 if (TARGET_NEON)
27180 switch (mode)
27181 {
27182 case E_SFmode:
27183 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27184 case E_SImode:
27185 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27186 case E_HImode:
27187 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27188 case E_QImode:
27189 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27190 case E_DImode:
27191 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27192 return V2DImode;
27193 break;
27194
27195 default:;
27196 }
27197
27198 if (TARGET_REALLY_IWMMXT)
27199 switch (mode)
27200 {
27201 case E_SImode:
27202 return V2SImode;
27203 case E_HImode:
27204 return V4HImode;
27205 case E_QImode:
27206 return V8QImode;
27207
27208 default:;
27209 }
27210
27211 return word_mode;
27212 }
27213
27214 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27215
27216 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27217 using r0-r4 for function arguments, r7 for the stack frame and don't have
27218 enough left over to do doubleword arithmetic. For Thumb-2 all the
27219 potentially problematic instructions accept high registers so this is not
27220 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27221 that require many low registers. */
27222 static bool
27223 arm_class_likely_spilled_p (reg_class_t rclass)
27224 {
27225 if ((TARGET_THUMB1 && rclass == LO_REGS)
27226 || rclass == CC_REG)
27227 return true;
27228
27229 return false;
27230 }
27231
27232 /* Implements target hook small_register_classes_for_mode_p. */
27233 bool
27234 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27235 {
27236 return TARGET_THUMB1;
27237 }
27238
27239 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27240 ARM insns and therefore guarantee that the shift count is modulo 256.
27241 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27242 guarantee no particular behavior for out-of-range counts. */
27243
27244 static unsigned HOST_WIDE_INT
27245 arm_shift_truncation_mask (machine_mode mode)
27246 {
27247 return mode == SImode ? 255 : 0;
27248 }
27249
27250
27251 /* Map internal gcc register numbers to DWARF2 register numbers. */
27252
27253 unsigned int
27254 arm_dbx_register_number (unsigned int regno)
27255 {
27256 if (regno < 16)
27257 return regno;
27258
27259 if (IS_VFP_REGNUM (regno))
27260 {
27261 /* See comment in arm_dwarf_register_span. */
27262 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27263 return 64 + regno - FIRST_VFP_REGNUM;
27264 else
27265 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27266 }
27267
27268 if (IS_IWMMXT_GR_REGNUM (regno))
27269 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27270
27271 if (IS_IWMMXT_REGNUM (regno))
27272 return 112 + regno - FIRST_IWMMXT_REGNUM;
27273
27274 return DWARF_FRAME_REGISTERS;
27275 }
27276
27277 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27278 GCC models tham as 64 32-bit registers, so we need to describe this to
27279 the DWARF generation code. Other registers can use the default. */
27280 static rtx
27281 arm_dwarf_register_span (rtx rtl)
27282 {
27283 machine_mode mode;
27284 unsigned regno;
27285 rtx parts[16];
27286 int nregs;
27287 int i;
27288
27289 regno = REGNO (rtl);
27290 if (!IS_VFP_REGNUM (regno))
27291 return NULL_RTX;
27292
27293 /* XXX FIXME: The EABI defines two VFP register ranges:
27294 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27295 256-287: D0-D31
27296 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27297 corresponding D register. Until GDB supports this, we shall use the
27298 legacy encodings. We also use these encodings for D0-D15 for
27299 compatibility with older debuggers. */
27300 mode = GET_MODE (rtl);
27301 if (GET_MODE_SIZE (mode) < 8)
27302 return NULL_RTX;
27303
27304 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27305 {
27306 nregs = GET_MODE_SIZE (mode) / 4;
27307 for (i = 0; i < nregs; i += 2)
27308 if (TARGET_BIG_END)
27309 {
27310 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27311 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27312 }
27313 else
27314 {
27315 parts[i] = gen_rtx_REG (SImode, regno + i);
27316 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27317 }
27318 }
27319 else
27320 {
27321 nregs = GET_MODE_SIZE (mode) / 8;
27322 for (i = 0; i < nregs; i++)
27323 parts[i] = gen_rtx_REG (DImode, regno + i);
27324 }
27325
27326 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27327 }
27328
27329 #if ARM_UNWIND_INFO
27330 /* Emit unwind directives for a store-multiple instruction or stack pointer
27331 push during alignment.
27332 These should only ever be generated by the function prologue code, so
27333 expect them to have a particular form.
27334 The store-multiple instruction sometimes pushes pc as the last register,
27335 although it should not be tracked into unwind information, or for -Os
27336 sometimes pushes some dummy registers before first register that needs
27337 to be tracked in unwind information; such dummy registers are there just
27338 to avoid separate stack adjustment, and will not be restored in the
27339 epilogue. */
27340
27341 static void
27342 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27343 {
27344 int i;
27345 HOST_WIDE_INT offset;
27346 HOST_WIDE_INT nregs;
27347 int reg_size;
27348 unsigned reg;
27349 unsigned lastreg;
27350 unsigned padfirst = 0, padlast = 0;
27351 rtx e;
27352
27353 e = XVECEXP (p, 0, 0);
27354 gcc_assert (GET_CODE (e) == SET);
27355
27356 /* First insn will adjust the stack pointer. */
27357 gcc_assert (GET_CODE (e) == SET
27358 && REG_P (SET_DEST (e))
27359 && REGNO (SET_DEST (e)) == SP_REGNUM
27360 && GET_CODE (SET_SRC (e)) == PLUS);
27361
27362 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27363 nregs = XVECLEN (p, 0) - 1;
27364 gcc_assert (nregs);
27365
27366 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27367 if (reg < 16)
27368 {
27369 /* For -Os dummy registers can be pushed at the beginning to
27370 avoid separate stack pointer adjustment. */
27371 e = XVECEXP (p, 0, 1);
27372 e = XEXP (SET_DEST (e), 0);
27373 if (GET_CODE (e) == PLUS)
27374 padfirst = INTVAL (XEXP (e, 1));
27375 gcc_assert (padfirst == 0 || optimize_size);
27376 /* The function prologue may also push pc, but not annotate it as it is
27377 never restored. We turn this into a stack pointer adjustment. */
27378 e = XVECEXP (p, 0, nregs);
27379 e = XEXP (SET_DEST (e), 0);
27380 if (GET_CODE (e) == PLUS)
27381 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27382 else
27383 padlast = offset - 4;
27384 gcc_assert (padlast == 0 || padlast == 4);
27385 if (padlast == 4)
27386 fprintf (asm_out_file, "\t.pad #4\n");
27387 reg_size = 4;
27388 fprintf (asm_out_file, "\t.save {");
27389 }
27390 else if (IS_VFP_REGNUM (reg))
27391 {
27392 reg_size = 8;
27393 fprintf (asm_out_file, "\t.vsave {");
27394 }
27395 else
27396 /* Unknown register type. */
27397 gcc_unreachable ();
27398
27399 /* If the stack increment doesn't match the size of the saved registers,
27400 something has gone horribly wrong. */
27401 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27402
27403 offset = padfirst;
27404 lastreg = 0;
27405 /* The remaining insns will describe the stores. */
27406 for (i = 1; i <= nregs; i++)
27407 {
27408 /* Expect (set (mem <addr>) (reg)).
27409 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27410 e = XVECEXP (p, 0, i);
27411 gcc_assert (GET_CODE (e) == SET
27412 && MEM_P (SET_DEST (e))
27413 && REG_P (SET_SRC (e)));
27414
27415 reg = REGNO (SET_SRC (e));
27416 gcc_assert (reg >= lastreg);
27417
27418 if (i != 1)
27419 fprintf (asm_out_file, ", ");
27420 /* We can't use %r for vfp because we need to use the
27421 double precision register names. */
27422 if (IS_VFP_REGNUM (reg))
27423 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27424 else
27425 asm_fprintf (asm_out_file, "%r", reg);
27426
27427 if (flag_checking)
27428 {
27429 /* Check that the addresses are consecutive. */
27430 e = XEXP (SET_DEST (e), 0);
27431 if (GET_CODE (e) == PLUS)
27432 gcc_assert (REG_P (XEXP (e, 0))
27433 && REGNO (XEXP (e, 0)) == SP_REGNUM
27434 && CONST_INT_P (XEXP (e, 1))
27435 && offset == INTVAL (XEXP (e, 1)));
27436 else
27437 gcc_assert (i == 1
27438 && REG_P (e)
27439 && REGNO (e) == SP_REGNUM);
27440 offset += reg_size;
27441 }
27442 }
27443 fprintf (asm_out_file, "}\n");
27444 if (padfirst)
27445 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27446 }
27447
27448 /* Emit unwind directives for a SET. */
27449
27450 static void
27451 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27452 {
27453 rtx e0;
27454 rtx e1;
27455 unsigned reg;
27456
27457 e0 = XEXP (p, 0);
27458 e1 = XEXP (p, 1);
27459 switch (GET_CODE (e0))
27460 {
27461 case MEM:
27462 /* Pushing a single register. */
27463 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27464 || !REG_P (XEXP (XEXP (e0, 0), 0))
27465 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27466 abort ();
27467
27468 asm_fprintf (asm_out_file, "\t.save ");
27469 if (IS_VFP_REGNUM (REGNO (e1)))
27470 asm_fprintf(asm_out_file, "{d%d}\n",
27471 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27472 else
27473 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27474 break;
27475
27476 case REG:
27477 if (REGNO (e0) == SP_REGNUM)
27478 {
27479 /* A stack increment. */
27480 if (GET_CODE (e1) != PLUS
27481 || !REG_P (XEXP (e1, 0))
27482 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27483 || !CONST_INT_P (XEXP (e1, 1)))
27484 abort ();
27485
27486 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27487 -INTVAL (XEXP (e1, 1)));
27488 }
27489 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27490 {
27491 HOST_WIDE_INT offset;
27492
27493 if (GET_CODE (e1) == PLUS)
27494 {
27495 if (!REG_P (XEXP (e1, 0))
27496 || !CONST_INT_P (XEXP (e1, 1)))
27497 abort ();
27498 reg = REGNO (XEXP (e1, 0));
27499 offset = INTVAL (XEXP (e1, 1));
27500 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27501 HARD_FRAME_POINTER_REGNUM, reg,
27502 offset);
27503 }
27504 else if (REG_P (e1))
27505 {
27506 reg = REGNO (e1);
27507 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27508 HARD_FRAME_POINTER_REGNUM, reg);
27509 }
27510 else
27511 abort ();
27512 }
27513 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27514 {
27515 /* Move from sp to reg. */
27516 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27517 }
27518 else if (GET_CODE (e1) == PLUS
27519 && REG_P (XEXP (e1, 0))
27520 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27521 && CONST_INT_P (XEXP (e1, 1)))
27522 {
27523 /* Set reg to offset from sp. */
27524 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27525 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27526 }
27527 else
27528 abort ();
27529 break;
27530
27531 default:
27532 abort ();
27533 }
27534 }
27535
27536
27537 /* Emit unwind directives for the given insn. */
27538
27539 static void
27540 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27541 {
27542 rtx note, pat;
27543 bool handled_one = false;
27544
27545 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27546 return;
27547
27548 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27549 && (TREE_NOTHROW (current_function_decl)
27550 || crtl->all_throwers_are_sibcalls))
27551 return;
27552
27553 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27554 return;
27555
27556 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27557 {
27558 switch (REG_NOTE_KIND (note))
27559 {
27560 case REG_FRAME_RELATED_EXPR:
27561 pat = XEXP (note, 0);
27562 goto found;
27563
27564 case REG_CFA_REGISTER:
27565 pat = XEXP (note, 0);
27566 if (pat == NULL)
27567 {
27568 pat = PATTERN (insn);
27569 if (GET_CODE (pat) == PARALLEL)
27570 pat = XVECEXP (pat, 0, 0);
27571 }
27572
27573 /* Only emitted for IS_STACKALIGN re-alignment. */
27574 {
27575 rtx dest, src;
27576 unsigned reg;
27577
27578 src = SET_SRC (pat);
27579 dest = SET_DEST (pat);
27580
27581 gcc_assert (src == stack_pointer_rtx);
27582 reg = REGNO (dest);
27583 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27584 reg + 0x90, reg);
27585 }
27586 handled_one = true;
27587 break;
27588
27589 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27590 to get correct dwarf information for shrink-wrap. We should not
27591 emit unwind information for it because these are used either for
27592 pretend arguments or notes to adjust sp and restore registers from
27593 stack. */
27594 case REG_CFA_DEF_CFA:
27595 case REG_CFA_ADJUST_CFA:
27596 case REG_CFA_RESTORE:
27597 return;
27598
27599 case REG_CFA_EXPRESSION:
27600 case REG_CFA_OFFSET:
27601 /* ??? Only handling here what we actually emit. */
27602 gcc_unreachable ();
27603
27604 default:
27605 break;
27606 }
27607 }
27608 if (handled_one)
27609 return;
27610 pat = PATTERN (insn);
27611 found:
27612
27613 switch (GET_CODE (pat))
27614 {
27615 case SET:
27616 arm_unwind_emit_set (asm_out_file, pat);
27617 break;
27618
27619 case SEQUENCE:
27620 /* Store multiple. */
27621 arm_unwind_emit_sequence (asm_out_file, pat);
27622 break;
27623
27624 default:
27625 abort();
27626 }
27627 }
27628
27629
27630 /* Output a reference from a function exception table to the type_info
27631 object X. The EABI specifies that the symbol should be relocated by
27632 an R_ARM_TARGET2 relocation. */
27633
27634 static bool
27635 arm_output_ttype (rtx x)
27636 {
27637 fputs ("\t.word\t", asm_out_file);
27638 output_addr_const (asm_out_file, x);
27639 /* Use special relocations for symbol references. */
27640 if (!CONST_INT_P (x))
27641 fputs ("(TARGET2)", asm_out_file);
27642 fputc ('\n', asm_out_file);
27643
27644 return TRUE;
27645 }
27646
27647 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27648
27649 static void
27650 arm_asm_emit_except_personality (rtx personality)
27651 {
27652 fputs ("\t.personality\t", asm_out_file);
27653 output_addr_const (asm_out_file, personality);
27654 fputc ('\n', asm_out_file);
27655 }
27656 #endif /* ARM_UNWIND_INFO */
27657
27658 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27659
27660 static void
27661 arm_asm_init_sections (void)
27662 {
27663 #if ARM_UNWIND_INFO
27664 exception_section = get_unnamed_section (0, output_section_asm_op,
27665 "\t.handlerdata");
27666 #endif /* ARM_UNWIND_INFO */
27667
27668 #ifdef OBJECT_FORMAT_ELF
27669 if (target_pure_code)
27670 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27671 #endif
27672 }
27673
27674 /* Output unwind directives for the start/end of a function. */
27675
27676 void
27677 arm_output_fn_unwind (FILE * f, bool prologue)
27678 {
27679 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27680 return;
27681
27682 if (prologue)
27683 fputs ("\t.fnstart\n", f);
27684 else
27685 {
27686 /* If this function will never be unwound, then mark it as such.
27687 The came condition is used in arm_unwind_emit to suppress
27688 the frame annotations. */
27689 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27690 && (TREE_NOTHROW (current_function_decl)
27691 || crtl->all_throwers_are_sibcalls))
27692 fputs("\t.cantunwind\n", f);
27693
27694 fputs ("\t.fnend\n", f);
27695 }
27696 }
27697
27698 static bool
27699 arm_emit_tls_decoration (FILE *fp, rtx x)
27700 {
27701 enum tls_reloc reloc;
27702 rtx val;
27703
27704 val = XVECEXP (x, 0, 0);
27705 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27706
27707 output_addr_const (fp, val);
27708
27709 switch (reloc)
27710 {
27711 case TLS_GD32:
27712 fputs ("(tlsgd)", fp);
27713 break;
27714 case TLS_LDM32:
27715 fputs ("(tlsldm)", fp);
27716 break;
27717 case TLS_LDO32:
27718 fputs ("(tlsldo)", fp);
27719 break;
27720 case TLS_IE32:
27721 fputs ("(gottpoff)", fp);
27722 break;
27723 case TLS_LE32:
27724 fputs ("(tpoff)", fp);
27725 break;
27726 case TLS_DESCSEQ:
27727 fputs ("(tlsdesc)", fp);
27728 break;
27729 default:
27730 gcc_unreachable ();
27731 }
27732
27733 switch (reloc)
27734 {
27735 case TLS_GD32:
27736 case TLS_LDM32:
27737 case TLS_IE32:
27738 case TLS_DESCSEQ:
27739 fputs (" + (. - ", fp);
27740 output_addr_const (fp, XVECEXP (x, 0, 2));
27741 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27742 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27743 output_addr_const (fp, XVECEXP (x, 0, 3));
27744 fputc (')', fp);
27745 break;
27746 default:
27747 break;
27748 }
27749
27750 return TRUE;
27751 }
27752
27753 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27754
27755 static void
27756 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27757 {
27758 gcc_assert (size == 4);
27759 fputs ("\t.word\t", file);
27760 output_addr_const (file, x);
27761 fputs ("(tlsldo)", file);
27762 }
27763
27764 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27765
27766 static bool
27767 arm_output_addr_const_extra (FILE *fp, rtx x)
27768 {
27769 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27770 return arm_emit_tls_decoration (fp, x);
27771 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27772 {
27773 char label[256];
27774 int labelno = INTVAL (XVECEXP (x, 0, 0));
27775
27776 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27777 assemble_name_raw (fp, label);
27778
27779 return TRUE;
27780 }
27781 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27782 {
27783 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27784 if (GOT_PCREL)
27785 fputs ("+.", fp);
27786 fputs ("-(", fp);
27787 output_addr_const (fp, XVECEXP (x, 0, 0));
27788 fputc (')', fp);
27789 return TRUE;
27790 }
27791 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27792 {
27793 output_addr_const (fp, XVECEXP (x, 0, 0));
27794 if (GOT_PCREL)
27795 fputs ("+.", fp);
27796 fputs ("-(", fp);
27797 output_addr_const (fp, XVECEXP (x, 0, 1));
27798 fputc (')', fp);
27799 return TRUE;
27800 }
27801 else if (GET_CODE (x) == CONST_VECTOR)
27802 return arm_emit_vector_const (fp, x);
27803
27804 return FALSE;
27805 }
27806
27807 /* Output assembly for a shift instruction.
27808 SET_FLAGS determines how the instruction modifies the condition codes.
27809 0 - Do not set condition codes.
27810 1 - Set condition codes.
27811 2 - Use smallest instruction. */
27812 const char *
27813 arm_output_shift(rtx * operands, int set_flags)
27814 {
27815 char pattern[100];
27816 static const char flag_chars[3] = {'?', '.', '!'};
27817 const char *shift;
27818 HOST_WIDE_INT val;
27819 char c;
27820
27821 c = flag_chars[set_flags];
27822 shift = shift_op(operands[3], &val);
27823 if (shift)
27824 {
27825 if (val != -1)
27826 operands[2] = GEN_INT(val);
27827 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27828 }
27829 else
27830 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27831
27832 output_asm_insn (pattern, operands);
27833 return "";
27834 }
27835
27836 /* Output assembly for a WMMX immediate shift instruction. */
27837 const char *
27838 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27839 {
27840 int shift = INTVAL (operands[2]);
27841 char templ[50];
27842 machine_mode opmode = GET_MODE (operands[0]);
27843
27844 gcc_assert (shift >= 0);
27845
27846 /* If the shift value in the register versions is > 63 (for D qualifier),
27847 31 (for W qualifier) or 15 (for H qualifier). */
27848 if (((opmode == V4HImode) && (shift > 15))
27849 || ((opmode == V2SImode) && (shift > 31))
27850 || ((opmode == DImode) && (shift > 63)))
27851 {
27852 if (wror_or_wsra)
27853 {
27854 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27855 output_asm_insn (templ, operands);
27856 if (opmode == DImode)
27857 {
27858 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27859 output_asm_insn (templ, operands);
27860 }
27861 }
27862 else
27863 {
27864 /* The destination register will contain all zeros. */
27865 sprintf (templ, "wzero\t%%0");
27866 output_asm_insn (templ, operands);
27867 }
27868 return "";
27869 }
27870
27871 if ((opmode == DImode) && (shift > 32))
27872 {
27873 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27874 output_asm_insn (templ, operands);
27875 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27876 output_asm_insn (templ, operands);
27877 }
27878 else
27879 {
27880 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27881 output_asm_insn (templ, operands);
27882 }
27883 return "";
27884 }
27885
27886 /* Output assembly for a WMMX tinsr instruction. */
27887 const char *
27888 arm_output_iwmmxt_tinsr (rtx *operands)
27889 {
27890 int mask = INTVAL (operands[3]);
27891 int i;
27892 char templ[50];
27893 int units = mode_nunits[GET_MODE (operands[0])];
27894 gcc_assert ((mask & (mask - 1)) == 0);
27895 for (i = 0; i < units; ++i)
27896 {
27897 if ((mask & 0x01) == 1)
27898 {
27899 break;
27900 }
27901 mask >>= 1;
27902 }
27903 gcc_assert (i < units);
27904 {
27905 switch (GET_MODE (operands[0]))
27906 {
27907 case E_V8QImode:
27908 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27909 break;
27910 case E_V4HImode:
27911 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27912 break;
27913 case E_V2SImode:
27914 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27915 break;
27916 default:
27917 gcc_unreachable ();
27918 break;
27919 }
27920 output_asm_insn (templ, operands);
27921 }
27922 return "";
27923 }
27924
27925 /* Output a Thumb-1 casesi dispatch sequence. */
27926 const char *
27927 thumb1_output_casesi (rtx *operands)
27928 {
27929 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27930
27931 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27932
27933 switch (GET_MODE(diff_vec))
27934 {
27935 case E_QImode:
27936 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27937 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27938 case E_HImode:
27939 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27940 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27941 case E_SImode:
27942 return "bl\t%___gnu_thumb1_case_si";
27943 default:
27944 gcc_unreachable ();
27945 }
27946 }
27947
27948 /* Output a Thumb-2 casesi instruction. */
27949 const char *
27950 thumb2_output_casesi (rtx *operands)
27951 {
27952 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27953
27954 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27955
27956 output_asm_insn ("cmp\t%0, %1", operands);
27957 output_asm_insn ("bhi\t%l3", operands);
27958 switch (GET_MODE(diff_vec))
27959 {
27960 case E_QImode:
27961 return "tbb\t[%|pc, %0]";
27962 case E_HImode:
27963 return "tbh\t[%|pc, %0, lsl #1]";
27964 case E_SImode:
27965 if (flag_pic)
27966 {
27967 output_asm_insn ("adr\t%4, %l2", operands);
27968 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27969 output_asm_insn ("add\t%4, %4, %5", operands);
27970 return "bx\t%4";
27971 }
27972 else
27973 {
27974 output_asm_insn ("adr\t%4, %l2", operands);
27975 return "ldr\t%|pc, [%4, %0, lsl #2]";
27976 }
27977 default:
27978 gcc_unreachable ();
27979 }
27980 }
27981
27982 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27983 per-core tuning structs. */
27984 static int
27985 arm_issue_rate (void)
27986 {
27987 return current_tune->issue_rate;
27988 }
27989
27990 /* Return how many instructions should scheduler lookahead to choose the
27991 best one. */
27992 static int
27993 arm_first_cycle_multipass_dfa_lookahead (void)
27994 {
27995 int issue_rate = arm_issue_rate ();
27996
27997 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27998 }
27999
28000 /* Enable modeling of L2 auto-prefetcher. */
28001 static int
28002 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28003 {
28004 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28005 }
28006
28007 const char *
28008 arm_mangle_type (const_tree type)
28009 {
28010 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28011 has to be managled as if it is in the "std" namespace. */
28012 if (TARGET_AAPCS_BASED
28013 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28014 return "St9__va_list";
28015
28016 /* Half-precision float. */
28017 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28018 return "Dh";
28019
28020 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28021 builtin type. */
28022 if (TYPE_NAME (type) != NULL)
28023 return arm_mangle_builtin_type (type);
28024
28025 /* Use the default mangling. */
28026 return NULL;
28027 }
28028
28029 /* Order of allocation of core registers for Thumb: this allocation is
28030 written over the corresponding initial entries of the array
28031 initialized with REG_ALLOC_ORDER. We allocate all low registers
28032 first. Saving and restoring a low register is usually cheaper than
28033 using a call-clobbered high register. */
28034
28035 static const int thumb_core_reg_alloc_order[] =
28036 {
28037 3, 2, 1, 0, 4, 5, 6, 7,
28038 12, 14, 8, 9, 10, 11
28039 };
28040
28041 /* Adjust register allocation order when compiling for Thumb. */
28042
28043 void
28044 arm_order_regs_for_local_alloc (void)
28045 {
28046 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28047 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28048 if (TARGET_THUMB)
28049 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28050 sizeof (thumb_core_reg_alloc_order));
28051 }
28052
28053 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28054
28055 bool
28056 arm_frame_pointer_required (void)
28057 {
28058 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28059 return true;
28060
28061 /* If the function receives nonlocal gotos, it needs to save the frame
28062 pointer in the nonlocal_goto_save_area object. */
28063 if (cfun->has_nonlocal_label)
28064 return true;
28065
28066 /* The frame pointer is required for non-leaf APCS frames. */
28067 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28068 return true;
28069
28070 /* If we are probing the stack in the prologue, we will have a faulting
28071 instruction prior to the stack adjustment and this requires a frame
28072 pointer if we want to catch the exception using the EABI unwinder. */
28073 if (!IS_INTERRUPT (arm_current_func_type ())
28074 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28075 || flag_stack_clash_protection)
28076 && arm_except_unwind_info (&global_options) == UI_TARGET
28077 && cfun->can_throw_non_call_exceptions)
28078 {
28079 HOST_WIDE_INT size = get_frame_size ();
28080
28081 /* That's irrelevant if there is no stack adjustment. */
28082 if (size <= 0)
28083 return false;
28084
28085 /* That's relevant only if there is a stack probe. */
28086 if (crtl->is_leaf && !cfun->calls_alloca)
28087 {
28088 /* We don't have the final size of the frame so adjust. */
28089 size += 32 * UNITS_PER_WORD;
28090 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28091 return true;
28092 }
28093 else
28094 return true;
28095 }
28096
28097 return false;
28098 }
28099
28100 /* Only thumb1 can't support conditional execution, so return true if
28101 the target is not thumb1. */
28102 static bool
28103 arm_have_conditional_execution (void)
28104 {
28105 return !TARGET_THUMB1;
28106 }
28107
28108 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28109 static HOST_WIDE_INT
28110 arm_vector_alignment (const_tree type)
28111 {
28112 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28113
28114 if (TARGET_AAPCS_BASED)
28115 align = MIN (align, 64);
28116
28117 return align;
28118 }
28119
28120 static unsigned int
28121 arm_autovectorize_vector_sizes (void)
28122 {
28123 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
28124 }
28125
28126 static bool
28127 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28128 {
28129 /* Vectors which aren't in packed structures will not be less aligned than
28130 the natural alignment of their element type, so this is safe. */
28131 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28132 return !is_packed;
28133
28134 return default_builtin_vector_alignment_reachable (type, is_packed);
28135 }
28136
28137 static bool
28138 arm_builtin_support_vector_misalignment (machine_mode mode,
28139 const_tree type, int misalignment,
28140 bool is_packed)
28141 {
28142 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28143 {
28144 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28145
28146 if (is_packed)
28147 return align == 1;
28148
28149 /* If the misalignment is unknown, we should be able to handle the access
28150 so long as it is not to a member of a packed data structure. */
28151 if (misalignment == -1)
28152 return true;
28153
28154 /* Return true if the misalignment is a multiple of the natural alignment
28155 of the vector's element type. This is probably always going to be
28156 true in practice, since we've already established that this isn't a
28157 packed access. */
28158 return ((misalignment % align) == 0);
28159 }
28160
28161 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28162 is_packed);
28163 }
28164
28165 static void
28166 arm_conditional_register_usage (void)
28167 {
28168 int regno;
28169
28170 if (TARGET_THUMB1 && optimize_size)
28171 {
28172 /* When optimizing for size on Thumb-1, it's better not
28173 to use the HI regs, because of the overhead of
28174 stacking them. */
28175 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28176 fixed_regs[regno] = call_used_regs[regno] = 1;
28177 }
28178
28179 /* The link register can be clobbered by any branch insn,
28180 but we have no way to track that at present, so mark
28181 it as unavailable. */
28182 if (TARGET_THUMB1)
28183 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28184
28185 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28186 {
28187 /* VFPv3 registers are disabled when earlier VFP
28188 versions are selected due to the definition of
28189 LAST_VFP_REGNUM. */
28190 for (regno = FIRST_VFP_REGNUM;
28191 regno <= LAST_VFP_REGNUM; ++ regno)
28192 {
28193 fixed_regs[regno] = 0;
28194 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28195 || regno >= FIRST_VFP_REGNUM + 32;
28196 }
28197 }
28198
28199 if (TARGET_REALLY_IWMMXT)
28200 {
28201 regno = FIRST_IWMMXT_GR_REGNUM;
28202 /* The 2002/10/09 revision of the XScale ABI has wCG0
28203 and wCG1 as call-preserved registers. The 2002/11/21
28204 revision changed this so that all wCG registers are
28205 scratch registers. */
28206 for (regno = FIRST_IWMMXT_GR_REGNUM;
28207 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28208 fixed_regs[regno] = 0;
28209 /* The XScale ABI has wR0 - wR9 as scratch registers,
28210 the rest as call-preserved registers. */
28211 for (regno = FIRST_IWMMXT_REGNUM;
28212 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28213 {
28214 fixed_regs[regno] = 0;
28215 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28216 }
28217 }
28218
28219 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28220 {
28221 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28222 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28223 }
28224 else if (TARGET_APCS_STACK)
28225 {
28226 fixed_regs[10] = 1;
28227 call_used_regs[10] = 1;
28228 }
28229 /* -mcaller-super-interworking reserves r11 for calls to
28230 _interwork_r11_call_via_rN(). Making the register global
28231 is an easy way of ensuring that it remains valid for all
28232 calls. */
28233 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28234 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28235 {
28236 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28237 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28238 if (TARGET_CALLER_INTERWORKING)
28239 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28240 }
28241 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28242 }
28243
28244 static reg_class_t
28245 arm_preferred_rename_class (reg_class_t rclass)
28246 {
28247 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28248 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28249 and code size can be reduced. */
28250 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28251 return LO_REGS;
28252 else
28253 return NO_REGS;
28254 }
28255
28256 /* Compute the attribute "length" of insn "*push_multi".
28257 So this function MUST be kept in sync with that insn pattern. */
28258 int
28259 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28260 {
28261 int i, regno, hi_reg;
28262 int num_saves = XVECLEN (parallel_op, 0);
28263
28264 /* ARM mode. */
28265 if (TARGET_ARM)
28266 return 4;
28267 /* Thumb1 mode. */
28268 if (TARGET_THUMB1)
28269 return 2;
28270
28271 /* Thumb2 mode. */
28272 regno = REGNO (first_op);
28273 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28274 list is 8-bit. Normally this means all registers in the list must be
28275 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28276 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28277 with 16-bit encoding. */
28278 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28279 for (i = 1; i < num_saves && !hi_reg; i++)
28280 {
28281 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28282 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28283 }
28284
28285 if (!hi_reg)
28286 return 2;
28287 return 4;
28288 }
28289
28290 /* Compute the attribute "length" of insn. Currently, this function is used
28291 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28292 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28293 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28294 true if OPERANDS contains insn which explicit updates base register. */
28295
28296 int
28297 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28298 {
28299 /* ARM mode. */
28300 if (TARGET_ARM)
28301 return 4;
28302 /* Thumb1 mode. */
28303 if (TARGET_THUMB1)
28304 return 2;
28305
28306 rtx parallel_op = operands[0];
28307 /* Initialize to elements number of PARALLEL. */
28308 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28309 /* Initialize the value to base register. */
28310 unsigned regno = REGNO (operands[1]);
28311 /* Skip return and write back pattern.
28312 We only need register pop pattern for later analysis. */
28313 unsigned first_indx = 0;
28314 first_indx += return_pc ? 1 : 0;
28315 first_indx += write_back_p ? 1 : 0;
28316
28317 /* A pop operation can be done through LDM or POP. If the base register is SP
28318 and if it's with write back, then a LDM will be alias of POP. */
28319 bool pop_p = (regno == SP_REGNUM && write_back_p);
28320 bool ldm_p = !pop_p;
28321
28322 /* Check base register for LDM. */
28323 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28324 return 4;
28325
28326 /* Check each register in the list. */
28327 for (; indx >= first_indx; indx--)
28328 {
28329 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28330 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28331 comment in arm_attr_length_push_multi. */
28332 if (REGNO_REG_CLASS (regno) == HI_REGS
28333 && (regno != PC_REGNUM || ldm_p))
28334 return 4;
28335 }
28336
28337 return 2;
28338 }
28339
28340 /* Compute the number of instructions emitted by output_move_double. */
28341 int
28342 arm_count_output_move_double_insns (rtx *operands)
28343 {
28344 int count;
28345 rtx ops[2];
28346 /* output_move_double may modify the operands array, so call it
28347 here on a copy of the array. */
28348 ops[0] = operands[0];
28349 ops[1] = operands[1];
28350 output_move_double (ops, false, &count);
28351 return count;
28352 }
28353
28354 int
28355 vfp3_const_double_for_fract_bits (rtx operand)
28356 {
28357 REAL_VALUE_TYPE r0;
28358
28359 if (!CONST_DOUBLE_P (operand))
28360 return 0;
28361
28362 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28363 if (exact_real_inverse (DFmode, &r0)
28364 && !REAL_VALUE_NEGATIVE (r0))
28365 {
28366 if (exact_real_truncate (DFmode, &r0))
28367 {
28368 HOST_WIDE_INT value = real_to_integer (&r0);
28369 value = value & 0xffffffff;
28370 if ((value != 0) && ( (value & (value - 1)) == 0))
28371 {
28372 int ret = exact_log2 (value);
28373 gcc_assert (IN_RANGE (ret, 0, 31));
28374 return ret;
28375 }
28376 }
28377 }
28378 return 0;
28379 }
28380
28381 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28382 log2 is in [1, 32], return that log2. Otherwise return -1.
28383 This is used in the patterns for vcvt.s32.f32 floating-point to
28384 fixed-point conversions. */
28385
28386 int
28387 vfp3_const_double_for_bits (rtx x)
28388 {
28389 const REAL_VALUE_TYPE *r;
28390
28391 if (!CONST_DOUBLE_P (x))
28392 return -1;
28393
28394 r = CONST_DOUBLE_REAL_VALUE (x);
28395
28396 if (REAL_VALUE_NEGATIVE (*r)
28397 || REAL_VALUE_ISNAN (*r)
28398 || REAL_VALUE_ISINF (*r)
28399 || !real_isinteger (r, SFmode))
28400 return -1;
28401
28402 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28403
28404 /* The exact_log2 above will have returned -1 if this is
28405 not an exact log2. */
28406 if (!IN_RANGE (hwint, 1, 32))
28407 return -1;
28408
28409 return hwint;
28410 }
28411
28412 \f
28413 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28414
28415 static void
28416 arm_pre_atomic_barrier (enum memmodel model)
28417 {
28418 if (need_atomic_barrier_p (model, true))
28419 emit_insn (gen_memory_barrier ());
28420 }
28421
28422 static void
28423 arm_post_atomic_barrier (enum memmodel model)
28424 {
28425 if (need_atomic_barrier_p (model, false))
28426 emit_insn (gen_memory_barrier ());
28427 }
28428
28429 /* Emit the load-exclusive and store-exclusive instructions.
28430 Use acquire and release versions if necessary. */
28431
28432 static void
28433 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28434 {
28435 rtx (*gen) (rtx, rtx);
28436
28437 if (acq)
28438 {
28439 switch (mode)
28440 {
28441 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28442 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28443 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28444 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28445 default:
28446 gcc_unreachable ();
28447 }
28448 }
28449 else
28450 {
28451 switch (mode)
28452 {
28453 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28454 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28455 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28456 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28457 default:
28458 gcc_unreachable ();
28459 }
28460 }
28461
28462 emit_insn (gen (rval, mem));
28463 }
28464
28465 static void
28466 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28467 rtx mem, bool rel)
28468 {
28469 rtx (*gen) (rtx, rtx, rtx);
28470
28471 if (rel)
28472 {
28473 switch (mode)
28474 {
28475 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28476 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28477 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28478 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28479 default:
28480 gcc_unreachable ();
28481 }
28482 }
28483 else
28484 {
28485 switch (mode)
28486 {
28487 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28488 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28489 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28490 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28491 default:
28492 gcc_unreachable ();
28493 }
28494 }
28495
28496 emit_insn (gen (bval, rval, mem));
28497 }
28498
28499 /* Mark the previous jump instruction as unlikely. */
28500
28501 static void
28502 emit_unlikely_jump (rtx insn)
28503 {
28504 rtx_insn *jump = emit_jump_insn (insn);
28505 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28506 }
28507
28508 /* Expand a compare and swap pattern. */
28509
28510 void
28511 arm_expand_compare_and_swap (rtx operands[])
28512 {
28513 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28514 machine_mode mode;
28515 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28516
28517 bval = operands[0];
28518 rval = operands[1];
28519 mem = operands[2];
28520 oldval = operands[3];
28521 newval = operands[4];
28522 is_weak = operands[5];
28523 mod_s = operands[6];
28524 mod_f = operands[7];
28525 mode = GET_MODE (mem);
28526
28527 /* Normally the succ memory model must be stronger than fail, but in the
28528 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28529 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28530
28531 if (TARGET_HAVE_LDACQ
28532 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28533 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28534 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28535
28536 switch (mode)
28537 {
28538 case E_QImode:
28539 case E_HImode:
28540 /* For narrow modes, we're going to perform the comparison in SImode,
28541 so do the zero-extension now. */
28542 rval = gen_reg_rtx (SImode);
28543 oldval = convert_modes (SImode, mode, oldval, true);
28544 /* FALLTHRU */
28545
28546 case E_SImode:
28547 /* Force the value into a register if needed. We waited until after
28548 the zero-extension above to do this properly. */
28549 if (!arm_add_operand (oldval, SImode))
28550 oldval = force_reg (SImode, oldval);
28551 break;
28552
28553 case E_DImode:
28554 if (!cmpdi_operand (oldval, mode))
28555 oldval = force_reg (mode, oldval);
28556 break;
28557
28558 default:
28559 gcc_unreachable ();
28560 }
28561
28562 if (TARGET_THUMB1)
28563 {
28564 switch (mode)
28565 {
28566 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28567 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28568 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28569 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28570 default:
28571 gcc_unreachable ();
28572 }
28573 }
28574 else
28575 {
28576 switch (mode)
28577 {
28578 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28579 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28580 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28581 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28582 default:
28583 gcc_unreachable ();
28584 }
28585 }
28586
28587 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28588 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28589
28590 if (mode == QImode || mode == HImode)
28591 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28592
28593 /* In all cases, we arrange for success to be signaled by Z set.
28594 This arrangement allows for the boolean result to be used directly
28595 in a subsequent branch, post optimization. For Thumb-1 targets, the
28596 boolean negation of the result is also stored in bval because Thumb-1
28597 backend lacks dependency tracking for CC flag due to flag-setting not
28598 being represented at RTL level. */
28599 if (TARGET_THUMB1)
28600 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28601 else
28602 {
28603 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28604 emit_insn (gen_rtx_SET (bval, x));
28605 }
28606 }
28607
28608 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28609 another memory store between the load-exclusive and store-exclusive can
28610 reset the monitor from Exclusive to Open state. This means we must wait
28611 until after reload to split the pattern, lest we get a register spill in
28612 the middle of the atomic sequence. Success of the compare and swap is
28613 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28614 for Thumb-1 targets (ie. negation of the boolean value returned by
28615 atomic_compare_and_swapmode standard pattern in operand 0). */
28616
28617 void
28618 arm_split_compare_and_swap (rtx operands[])
28619 {
28620 rtx rval, mem, oldval, newval, neg_bval;
28621 machine_mode mode;
28622 enum memmodel mod_s, mod_f;
28623 bool is_weak;
28624 rtx_code_label *label1, *label2;
28625 rtx x, cond;
28626
28627 rval = operands[1];
28628 mem = operands[2];
28629 oldval = operands[3];
28630 newval = operands[4];
28631 is_weak = (operands[5] != const0_rtx);
28632 mod_s = memmodel_from_int (INTVAL (operands[6]));
28633 mod_f = memmodel_from_int (INTVAL (operands[7]));
28634 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28635 mode = GET_MODE (mem);
28636
28637 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28638
28639 bool use_acquire = TARGET_HAVE_LDACQ
28640 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28641 || is_mm_release (mod_s));
28642
28643 bool use_release = TARGET_HAVE_LDACQ
28644 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28645 || is_mm_acquire (mod_s));
28646
28647 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28648 a full barrier is emitted after the store-release. */
28649 if (is_armv8_sync)
28650 use_acquire = false;
28651
28652 /* Checks whether a barrier is needed and emits one accordingly. */
28653 if (!(use_acquire || use_release))
28654 arm_pre_atomic_barrier (mod_s);
28655
28656 label1 = NULL;
28657 if (!is_weak)
28658 {
28659 label1 = gen_label_rtx ();
28660 emit_label (label1);
28661 }
28662 label2 = gen_label_rtx ();
28663
28664 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28665
28666 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28667 as required to communicate with arm_expand_compare_and_swap. */
28668 if (TARGET_32BIT)
28669 {
28670 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28671 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28672 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28673 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28674 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28675 }
28676 else
28677 {
28678 emit_move_insn (neg_bval, const1_rtx);
28679 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28680 if (thumb1_cmpneg_operand (oldval, SImode))
28681 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28682 label2, cond));
28683 else
28684 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28685 }
28686
28687 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28688
28689 /* Weak or strong, we want EQ to be true for success, so that we
28690 match the flags that we got from the compare above. */
28691 if (TARGET_32BIT)
28692 {
28693 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28694 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28695 emit_insn (gen_rtx_SET (cond, x));
28696 }
28697
28698 if (!is_weak)
28699 {
28700 /* Z is set to boolean value of !neg_bval, as required to communicate
28701 with arm_expand_compare_and_swap. */
28702 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28703 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28704 }
28705
28706 if (!is_mm_relaxed (mod_f))
28707 emit_label (label2);
28708
28709 /* Checks whether a barrier is needed and emits one accordingly. */
28710 if (is_armv8_sync
28711 || !(use_acquire || use_release))
28712 arm_post_atomic_barrier (mod_s);
28713
28714 if (is_mm_relaxed (mod_f))
28715 emit_label (label2);
28716 }
28717
28718 /* Split an atomic operation pattern. Operation is given by CODE and is one
28719 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28720 operation). Operation is performed on the content at MEM and on VALUE
28721 following the memory model MODEL_RTX. The content at MEM before and after
28722 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28723 success of the operation is returned in COND. Using a scratch register or
28724 an operand register for these determines what result is returned for that
28725 pattern. */
28726
28727 void
28728 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28729 rtx value, rtx model_rtx, rtx cond)
28730 {
28731 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28732 machine_mode mode = GET_MODE (mem);
28733 machine_mode wmode = (mode == DImode ? DImode : SImode);
28734 rtx_code_label *label;
28735 bool all_low_regs, bind_old_new;
28736 rtx x;
28737
28738 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28739
28740 bool use_acquire = TARGET_HAVE_LDACQ
28741 && !(is_mm_relaxed (model) || is_mm_consume (model)
28742 || is_mm_release (model));
28743
28744 bool use_release = TARGET_HAVE_LDACQ
28745 && !(is_mm_relaxed (model) || is_mm_consume (model)
28746 || is_mm_acquire (model));
28747
28748 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28749 a full barrier is emitted after the store-release. */
28750 if (is_armv8_sync)
28751 use_acquire = false;
28752
28753 /* Checks whether a barrier is needed and emits one accordingly. */
28754 if (!(use_acquire || use_release))
28755 arm_pre_atomic_barrier (model);
28756
28757 label = gen_label_rtx ();
28758 emit_label (label);
28759
28760 if (new_out)
28761 new_out = gen_lowpart (wmode, new_out);
28762 if (old_out)
28763 old_out = gen_lowpart (wmode, old_out);
28764 else
28765 old_out = new_out;
28766 value = simplify_gen_subreg (wmode, value, mode, 0);
28767
28768 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28769
28770 /* Does the operation require destination and first operand to use the same
28771 register? This is decided by register constraints of relevant insn
28772 patterns in thumb1.md. */
28773 gcc_assert (!new_out || REG_P (new_out));
28774 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28775 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28776 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28777 bind_old_new =
28778 (TARGET_THUMB1
28779 && code != SET
28780 && code != MINUS
28781 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28782
28783 /* We want to return the old value while putting the result of the operation
28784 in the same register as the old value so copy the old value over to the
28785 destination register and use that register for the operation. */
28786 if (old_out && bind_old_new)
28787 {
28788 emit_move_insn (new_out, old_out);
28789 old_out = new_out;
28790 }
28791
28792 switch (code)
28793 {
28794 case SET:
28795 new_out = value;
28796 break;
28797
28798 case NOT:
28799 x = gen_rtx_AND (wmode, old_out, value);
28800 emit_insn (gen_rtx_SET (new_out, x));
28801 x = gen_rtx_NOT (wmode, new_out);
28802 emit_insn (gen_rtx_SET (new_out, x));
28803 break;
28804
28805 case MINUS:
28806 if (CONST_INT_P (value))
28807 {
28808 value = GEN_INT (-INTVAL (value));
28809 code = PLUS;
28810 }
28811 /* FALLTHRU */
28812
28813 case PLUS:
28814 if (mode == DImode)
28815 {
28816 /* DImode plus/minus need to clobber flags. */
28817 /* The adddi3 and subdi3 patterns are incorrectly written so that
28818 they require matching operands, even when we could easily support
28819 three operands. Thankfully, this can be fixed up post-splitting,
28820 as the individual add+adc patterns do accept three operands and
28821 post-reload cprop can make these moves go away. */
28822 emit_move_insn (new_out, old_out);
28823 if (code == PLUS)
28824 x = gen_adddi3 (new_out, new_out, value);
28825 else
28826 x = gen_subdi3 (new_out, new_out, value);
28827 emit_insn (x);
28828 break;
28829 }
28830 /* FALLTHRU */
28831
28832 default:
28833 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28834 emit_insn (gen_rtx_SET (new_out, x));
28835 break;
28836 }
28837
28838 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28839 use_release);
28840
28841 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28842 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28843
28844 /* Checks whether a barrier is needed and emits one accordingly. */
28845 if (is_armv8_sync
28846 || !(use_acquire || use_release))
28847 arm_post_atomic_barrier (model);
28848 }
28849 \f
28850 #define MAX_VECT_LEN 16
28851
28852 struct expand_vec_perm_d
28853 {
28854 rtx target, op0, op1;
28855 auto_vec_perm_indices perm;
28856 machine_mode vmode;
28857 bool one_vector_p;
28858 bool testing_p;
28859 };
28860
28861 /* Generate a variable permutation. */
28862
28863 static void
28864 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28865 {
28866 machine_mode vmode = GET_MODE (target);
28867 bool one_vector_p = rtx_equal_p (op0, op1);
28868
28869 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28870 gcc_checking_assert (GET_MODE (op0) == vmode);
28871 gcc_checking_assert (GET_MODE (op1) == vmode);
28872 gcc_checking_assert (GET_MODE (sel) == vmode);
28873 gcc_checking_assert (TARGET_NEON);
28874
28875 if (one_vector_p)
28876 {
28877 if (vmode == V8QImode)
28878 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28879 else
28880 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28881 }
28882 else
28883 {
28884 rtx pair;
28885
28886 if (vmode == V8QImode)
28887 {
28888 pair = gen_reg_rtx (V16QImode);
28889 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28890 pair = gen_lowpart (TImode, pair);
28891 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28892 }
28893 else
28894 {
28895 pair = gen_reg_rtx (OImode);
28896 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28897 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28898 }
28899 }
28900 }
28901
28902 void
28903 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28904 {
28905 machine_mode vmode = GET_MODE (target);
28906 unsigned int nelt = GET_MODE_NUNITS (vmode);
28907 bool one_vector_p = rtx_equal_p (op0, op1);
28908 rtx mask;
28909
28910 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28911 numbering of elements for big-endian, we must reverse the order. */
28912 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28913
28914 /* The VTBL instruction does not use a modulo index, so we must take care
28915 of that ourselves. */
28916 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28917 mask = gen_const_vec_duplicate (vmode, mask);
28918 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28919
28920 arm_expand_vec_perm_1 (target, op0, op1, sel);
28921 }
28922
28923 /* Map lane ordering between architectural lane order, and GCC lane order,
28924 taking into account ABI. See comment above output_move_neon for details. */
28925
28926 static int
28927 neon_endian_lane_map (machine_mode mode, int lane)
28928 {
28929 if (BYTES_BIG_ENDIAN)
28930 {
28931 int nelems = GET_MODE_NUNITS (mode);
28932 /* Reverse lane order. */
28933 lane = (nelems - 1 - lane);
28934 /* Reverse D register order, to match ABI. */
28935 if (GET_MODE_SIZE (mode) == 16)
28936 lane = lane ^ (nelems / 2);
28937 }
28938 return lane;
28939 }
28940
28941 /* Some permutations index into pairs of vectors, this is a helper function
28942 to map indexes into those pairs of vectors. */
28943
28944 static int
28945 neon_pair_endian_lane_map (machine_mode mode, int lane)
28946 {
28947 int nelem = GET_MODE_NUNITS (mode);
28948 if (BYTES_BIG_ENDIAN)
28949 lane =
28950 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28951 return lane;
28952 }
28953
28954 /* Generate or test for an insn that supports a constant permutation. */
28955
28956 /* Recognize patterns for the VUZP insns. */
28957
28958 static bool
28959 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28960 {
28961 unsigned int i, odd, mask, nelt = d->perm.length ();
28962 rtx out0, out1, in0, in1;
28963 rtx (*gen)(rtx, rtx, rtx, rtx);
28964 int first_elem;
28965 int swap_nelt;
28966
28967 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28968 return false;
28969
28970 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28971 big endian pattern on 64 bit vectors, so we correct for that. */
28972 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28973 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28974
28975 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28976
28977 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28978 odd = 0;
28979 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28980 odd = 1;
28981 else
28982 return false;
28983 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28984
28985 for (i = 0; i < nelt; i++)
28986 {
28987 unsigned elt =
28988 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28989 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28990 return false;
28991 }
28992
28993 /* Success! */
28994 if (d->testing_p)
28995 return true;
28996
28997 switch (d->vmode)
28998 {
28999 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29000 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
29001 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
29002 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
29003 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
29004 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
29005 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
29006 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
29007 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
29008 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
29009 default:
29010 gcc_unreachable ();
29011 }
29012
29013 in0 = d->op0;
29014 in1 = d->op1;
29015 if (swap_nelt != 0)
29016 std::swap (in0, in1);
29017
29018 out0 = d->target;
29019 out1 = gen_reg_rtx (d->vmode);
29020 if (odd)
29021 std::swap (out0, out1);
29022
29023 emit_insn (gen (out0, in0, in1, out1));
29024 return true;
29025 }
29026
29027 /* Recognize patterns for the VZIP insns. */
29028
29029 static bool
29030 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29031 {
29032 unsigned int i, high, mask, nelt = d->perm.length ();
29033 rtx out0, out1, in0, in1;
29034 rtx (*gen)(rtx, rtx, rtx, rtx);
29035 int first_elem;
29036 bool is_swapped;
29037
29038 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29039 return false;
29040
29041 is_swapped = BYTES_BIG_ENDIAN;
29042
29043 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29044
29045 high = nelt / 2;
29046 if (first_elem == neon_endian_lane_map (d->vmode, high))
29047 ;
29048 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29049 high = 0;
29050 else
29051 return false;
29052 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29053
29054 for (i = 0; i < nelt / 2; i++)
29055 {
29056 unsigned elt =
29057 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29058 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29059 != elt)
29060 return false;
29061 elt =
29062 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29063 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29064 != elt)
29065 return false;
29066 }
29067
29068 /* Success! */
29069 if (d->testing_p)
29070 return true;
29071
29072 switch (d->vmode)
29073 {
29074 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29075 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
29076 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
29077 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
29078 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
29079 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
29080 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
29081 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
29082 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
29083 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
29084 default:
29085 gcc_unreachable ();
29086 }
29087
29088 in0 = d->op0;
29089 in1 = d->op1;
29090 if (is_swapped)
29091 std::swap (in0, in1);
29092
29093 out0 = d->target;
29094 out1 = gen_reg_rtx (d->vmode);
29095 if (high)
29096 std::swap (out0, out1);
29097
29098 emit_insn (gen (out0, in0, in1, out1));
29099 return true;
29100 }
29101
29102 /* Recognize patterns for the VREV insns. */
29103
29104 static bool
29105 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29106 {
29107 unsigned int i, j, diff, nelt = d->perm.length ();
29108 rtx (*gen)(rtx, rtx);
29109
29110 if (!d->one_vector_p)
29111 return false;
29112
29113 diff = d->perm[0];
29114 switch (diff)
29115 {
29116 case 7:
29117 switch (d->vmode)
29118 {
29119 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29120 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
29121 default:
29122 return false;
29123 }
29124 break;
29125 case 3:
29126 switch (d->vmode)
29127 {
29128 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29129 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
29130 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
29131 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
29132 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
29133 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
29134 default:
29135 return false;
29136 }
29137 break;
29138 case 1:
29139 switch (d->vmode)
29140 {
29141 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29142 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
29143 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
29144 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
29145 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
29146 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
29147 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
29148 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
29149 default:
29150 return false;
29151 }
29152 break;
29153 default:
29154 return false;
29155 }
29156
29157 for (i = 0; i < nelt ; i += diff + 1)
29158 for (j = 0; j <= diff; j += 1)
29159 {
29160 /* This is guaranteed to be true as the value of diff
29161 is 7, 3, 1 and we should have enough elements in the
29162 queue to generate this. Getting a vector mask with a
29163 value of diff other than these values implies that
29164 something is wrong by the time we get here. */
29165 gcc_assert (i + j < nelt);
29166 if (d->perm[i + j] != i + diff - j)
29167 return false;
29168 }
29169
29170 /* Success! */
29171 if (d->testing_p)
29172 return true;
29173
29174 emit_insn (gen (d->target, d->op0));
29175 return true;
29176 }
29177
29178 /* Recognize patterns for the VTRN insns. */
29179
29180 static bool
29181 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29182 {
29183 unsigned int i, odd, mask, nelt = d->perm.length ();
29184 rtx out0, out1, in0, in1;
29185 rtx (*gen)(rtx, rtx, rtx, rtx);
29186
29187 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29188 return false;
29189
29190 /* Note that these are little-endian tests. Adjust for big-endian later. */
29191 if (d->perm[0] == 0)
29192 odd = 0;
29193 else if (d->perm[0] == 1)
29194 odd = 1;
29195 else
29196 return false;
29197 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29198
29199 for (i = 0; i < nelt; i += 2)
29200 {
29201 if (d->perm[i] != i + odd)
29202 return false;
29203 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29204 return false;
29205 }
29206
29207 /* Success! */
29208 if (d->testing_p)
29209 return true;
29210
29211 switch (d->vmode)
29212 {
29213 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29214 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29215 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29216 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29217 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29218 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29219 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29220 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29221 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29222 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29223 default:
29224 gcc_unreachable ();
29225 }
29226
29227 in0 = d->op0;
29228 in1 = d->op1;
29229 if (BYTES_BIG_ENDIAN)
29230 {
29231 std::swap (in0, in1);
29232 odd = !odd;
29233 }
29234
29235 out0 = d->target;
29236 out1 = gen_reg_rtx (d->vmode);
29237 if (odd)
29238 std::swap (out0, out1);
29239
29240 emit_insn (gen (out0, in0, in1, out1));
29241 return true;
29242 }
29243
29244 /* Recognize patterns for the VEXT insns. */
29245
29246 static bool
29247 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29248 {
29249 unsigned int i, nelt = d->perm.length ();
29250 rtx (*gen) (rtx, rtx, rtx, rtx);
29251 rtx offset;
29252
29253 unsigned int location;
29254
29255 unsigned int next = d->perm[0] + 1;
29256
29257 /* TODO: Handle GCC's numbering of elements for big-endian. */
29258 if (BYTES_BIG_ENDIAN)
29259 return false;
29260
29261 /* Check if the extracted indexes are increasing by one. */
29262 for (i = 1; i < nelt; next++, i++)
29263 {
29264 /* If we hit the most significant element of the 2nd vector in
29265 the previous iteration, no need to test further. */
29266 if (next == 2 * nelt)
29267 return false;
29268
29269 /* If we are operating on only one vector: it could be a
29270 rotation. If there are only two elements of size < 64, let
29271 arm_evpc_neon_vrev catch it. */
29272 if (d->one_vector_p && (next == nelt))
29273 {
29274 if ((nelt == 2) && (d->vmode != V2DImode))
29275 return false;
29276 else
29277 next = 0;
29278 }
29279
29280 if (d->perm[i] != next)
29281 return false;
29282 }
29283
29284 location = d->perm[0];
29285
29286 switch (d->vmode)
29287 {
29288 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29289 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29290 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29291 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29292 case E_V2SImode: gen = gen_neon_vextv2si; break;
29293 case E_V4SImode: gen = gen_neon_vextv4si; break;
29294 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29295 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29296 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29297 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29298 case E_V2DImode: gen = gen_neon_vextv2di; break;
29299 default:
29300 return false;
29301 }
29302
29303 /* Success! */
29304 if (d->testing_p)
29305 return true;
29306
29307 offset = GEN_INT (location);
29308 emit_insn (gen (d->target, d->op0, d->op1, offset));
29309 return true;
29310 }
29311
29312 /* The NEON VTBL instruction is a fully variable permuation that's even
29313 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29314 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29315 can do slightly better by expanding this as a constant where we don't
29316 have to apply a mask. */
29317
29318 static bool
29319 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29320 {
29321 rtx rperm[MAX_VECT_LEN], sel;
29322 machine_mode vmode = d->vmode;
29323 unsigned int i, nelt = d->perm.length ();
29324
29325 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29326 numbering of elements for big-endian, we must reverse the order. */
29327 if (BYTES_BIG_ENDIAN)
29328 return false;
29329
29330 if (d->testing_p)
29331 return true;
29332
29333 /* Generic code will try constant permutation twice. Once with the
29334 original mode and again with the elements lowered to QImode.
29335 So wait and don't do the selector expansion ourselves. */
29336 if (vmode != V8QImode && vmode != V16QImode)
29337 return false;
29338
29339 for (i = 0; i < nelt; ++i)
29340 rperm[i] = GEN_INT (d->perm[i]);
29341 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29342 sel = force_reg (vmode, sel);
29343
29344 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29345 return true;
29346 }
29347
29348 static bool
29349 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29350 {
29351 /* Check if the input mask matches vext before reordering the
29352 operands. */
29353 if (TARGET_NEON)
29354 if (arm_evpc_neon_vext (d))
29355 return true;
29356
29357 /* The pattern matching functions above are written to look for a small
29358 number to begin the sequence (0, 1, N/2). If we begin with an index
29359 from the second operand, we can swap the operands. */
29360 unsigned int nelt = d->perm.length ();
29361 if (d->perm[0] >= nelt)
29362 {
29363 for (unsigned int i = 0; i < nelt; ++i)
29364 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29365
29366 std::swap (d->op0, d->op1);
29367 }
29368
29369 if (TARGET_NEON)
29370 {
29371 if (arm_evpc_neon_vuzp (d))
29372 return true;
29373 if (arm_evpc_neon_vzip (d))
29374 return true;
29375 if (arm_evpc_neon_vrev (d))
29376 return true;
29377 if (arm_evpc_neon_vtrn (d))
29378 return true;
29379 return arm_evpc_neon_vtbl (d);
29380 }
29381 return false;
29382 }
29383
29384 /* Expand a vec_perm_const pattern. */
29385
29386 bool
29387 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29388 {
29389 struct expand_vec_perm_d d;
29390 int i, nelt, which;
29391
29392 d.target = target;
29393 d.op0 = op0;
29394 d.op1 = op1;
29395
29396 d.vmode = GET_MODE (target);
29397 gcc_assert (VECTOR_MODE_P (d.vmode));
29398 d.testing_p = false;
29399
29400 nelt = GET_MODE_NUNITS (d.vmode);
29401 d.perm.reserve (nelt);
29402 for (i = which = 0; i < nelt; ++i)
29403 {
29404 rtx e = XVECEXP (sel, 0, i);
29405 int ei = INTVAL (e) & (2 * nelt - 1);
29406 which |= (ei < nelt ? 1 : 2);
29407 d.perm.quick_push (ei);
29408 }
29409
29410 switch (which)
29411 {
29412 default:
29413 gcc_unreachable();
29414
29415 case 3:
29416 d.one_vector_p = false;
29417 if (!rtx_equal_p (op0, op1))
29418 break;
29419
29420 /* The elements of PERM do not suggest that only the first operand
29421 is used, but both operands are identical. Allow easier matching
29422 of the permutation by folding the permutation into the single
29423 input vector. */
29424 /* FALLTHRU */
29425 case 2:
29426 for (i = 0; i < nelt; ++i)
29427 d.perm[i] &= nelt - 1;
29428 d.op0 = op1;
29429 d.one_vector_p = true;
29430 break;
29431
29432 case 1:
29433 d.op1 = op0;
29434 d.one_vector_p = true;
29435 break;
29436 }
29437
29438 return arm_expand_vec_perm_const_1 (&d);
29439 }
29440
29441 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29442
29443 static bool
29444 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29445 {
29446 struct expand_vec_perm_d d;
29447 unsigned int i, nelt, which;
29448 bool ret;
29449
29450 d.vmode = vmode;
29451 d.testing_p = true;
29452 d.perm.safe_splice (sel);
29453
29454 /* Categorize the set of elements in the selector. */
29455 nelt = GET_MODE_NUNITS (d.vmode);
29456 for (i = which = 0; i < nelt; ++i)
29457 {
29458 unsigned int e = d.perm[i];
29459 gcc_assert (e < 2 * nelt);
29460 which |= (e < nelt ? 1 : 2);
29461 }
29462
29463 /* For all elements from second vector, fold the elements to first. */
29464 if (which == 2)
29465 for (i = 0; i < nelt; ++i)
29466 d.perm[i] -= nelt;
29467
29468 /* Check whether the mask can be applied to the vector type. */
29469 d.one_vector_p = (which != 3);
29470
29471 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29472 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29473 if (!d.one_vector_p)
29474 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29475
29476 start_sequence ();
29477 ret = arm_expand_vec_perm_const_1 (&d);
29478 end_sequence ();
29479
29480 return ret;
29481 }
29482
29483 bool
29484 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29485 {
29486 /* If we are soft float and we do not have ldrd
29487 then all auto increment forms are ok. */
29488 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29489 return true;
29490
29491 switch (code)
29492 {
29493 /* Post increment and Pre Decrement are supported for all
29494 instruction forms except for vector forms. */
29495 case ARM_POST_INC:
29496 case ARM_PRE_DEC:
29497 if (VECTOR_MODE_P (mode))
29498 {
29499 if (code != ARM_PRE_DEC)
29500 return true;
29501 else
29502 return false;
29503 }
29504
29505 return true;
29506
29507 case ARM_POST_DEC:
29508 case ARM_PRE_INC:
29509 /* Without LDRD and mode size greater than
29510 word size, there is no point in auto-incrementing
29511 because ldm and stm will not have these forms. */
29512 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29513 return false;
29514
29515 /* Vector and floating point modes do not support
29516 these auto increment forms. */
29517 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29518 return false;
29519
29520 return true;
29521
29522 default:
29523 return false;
29524
29525 }
29526
29527 return false;
29528 }
29529
29530 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29531 on ARM, since we know that shifts by negative amounts are no-ops.
29532 Additionally, the default expansion code is not available or suitable
29533 for post-reload insn splits (this can occur when the register allocator
29534 chooses not to do a shift in NEON).
29535
29536 This function is used in both initial expand and post-reload splits, and
29537 handles all kinds of 64-bit shifts.
29538
29539 Input requirements:
29540 - It is safe for the input and output to be the same register, but
29541 early-clobber rules apply for the shift amount and scratch registers.
29542 - Shift by register requires both scratch registers. In all other cases
29543 the scratch registers may be NULL.
29544 - Ashiftrt by a register also clobbers the CC register. */
29545 void
29546 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29547 rtx amount, rtx scratch1, rtx scratch2)
29548 {
29549 rtx out_high = gen_highpart (SImode, out);
29550 rtx out_low = gen_lowpart (SImode, out);
29551 rtx in_high = gen_highpart (SImode, in);
29552 rtx in_low = gen_lowpart (SImode, in);
29553
29554 /* Terminology:
29555 in = the register pair containing the input value.
29556 out = the destination register pair.
29557 up = the high- or low-part of each pair.
29558 down = the opposite part to "up".
29559 In a shift, we can consider bits to shift from "up"-stream to
29560 "down"-stream, so in a left-shift "up" is the low-part and "down"
29561 is the high-part of each register pair. */
29562
29563 rtx out_up = code == ASHIFT ? out_low : out_high;
29564 rtx out_down = code == ASHIFT ? out_high : out_low;
29565 rtx in_up = code == ASHIFT ? in_low : in_high;
29566 rtx in_down = code == ASHIFT ? in_high : in_low;
29567
29568 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29569 gcc_assert (out
29570 && (REG_P (out) || GET_CODE (out) == SUBREG)
29571 && GET_MODE (out) == DImode);
29572 gcc_assert (in
29573 && (REG_P (in) || GET_CODE (in) == SUBREG)
29574 && GET_MODE (in) == DImode);
29575 gcc_assert (amount
29576 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29577 && GET_MODE (amount) == SImode)
29578 || CONST_INT_P (amount)));
29579 gcc_assert (scratch1 == NULL
29580 || (GET_CODE (scratch1) == SCRATCH)
29581 || (GET_MODE (scratch1) == SImode
29582 && REG_P (scratch1)));
29583 gcc_assert (scratch2 == NULL
29584 || (GET_CODE (scratch2) == SCRATCH)
29585 || (GET_MODE (scratch2) == SImode
29586 && REG_P (scratch2)));
29587 gcc_assert (!REG_P (out) || !REG_P (amount)
29588 || !HARD_REGISTER_P (out)
29589 || (REGNO (out) != REGNO (amount)
29590 && REGNO (out) + 1 != REGNO (amount)));
29591
29592 /* Macros to make following code more readable. */
29593 #define SUB_32(DEST,SRC) \
29594 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29595 #define RSB_32(DEST,SRC) \
29596 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29597 #define SUB_S_32(DEST,SRC) \
29598 gen_addsi3_compare0 ((DEST), (SRC), \
29599 GEN_INT (-32))
29600 #define SET(DEST,SRC) \
29601 gen_rtx_SET ((DEST), (SRC))
29602 #define SHIFT(CODE,SRC,AMOUNT) \
29603 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29604 #define LSHIFT(CODE,SRC,AMOUNT) \
29605 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29606 SImode, (SRC), (AMOUNT))
29607 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29608 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29609 SImode, (SRC), (AMOUNT))
29610 #define ORR(A,B) \
29611 gen_rtx_IOR (SImode, (A), (B))
29612 #define BRANCH(COND,LABEL) \
29613 gen_arm_cond_branch ((LABEL), \
29614 gen_rtx_ ## COND (CCmode, cc_reg, \
29615 const0_rtx), \
29616 cc_reg)
29617
29618 /* Shifts by register and shifts by constant are handled separately. */
29619 if (CONST_INT_P (amount))
29620 {
29621 /* We have a shift-by-constant. */
29622
29623 /* First, handle out-of-range shift amounts.
29624 In both cases we try to match the result an ARM instruction in a
29625 shift-by-register would give. This helps reduce execution
29626 differences between optimization levels, but it won't stop other
29627 parts of the compiler doing different things. This is "undefined
29628 behavior, in any case. */
29629 if (INTVAL (amount) <= 0)
29630 emit_insn (gen_movdi (out, in));
29631 else if (INTVAL (amount) >= 64)
29632 {
29633 if (code == ASHIFTRT)
29634 {
29635 rtx const31_rtx = GEN_INT (31);
29636 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29637 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29638 }
29639 else
29640 emit_insn (gen_movdi (out, const0_rtx));
29641 }
29642
29643 /* Now handle valid shifts. */
29644 else if (INTVAL (amount) < 32)
29645 {
29646 /* Shifts by a constant less than 32. */
29647 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29648
29649 /* Clearing the out register in DImode first avoids lots
29650 of spilling and results in less stack usage.
29651 Later this redundant insn is completely removed.
29652 Do that only if "in" and "out" are different registers. */
29653 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29654 emit_insn (SET (out, const0_rtx));
29655 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29656 emit_insn (SET (out_down,
29657 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29658 out_down)));
29659 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29660 }
29661 else
29662 {
29663 /* Shifts by a constant greater than 31. */
29664 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29665
29666 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29667 emit_insn (SET (out, const0_rtx));
29668 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29669 if (code == ASHIFTRT)
29670 emit_insn (gen_ashrsi3 (out_up, in_up,
29671 GEN_INT (31)));
29672 else
29673 emit_insn (SET (out_up, const0_rtx));
29674 }
29675 }
29676 else
29677 {
29678 /* We have a shift-by-register. */
29679 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29680
29681 /* This alternative requires the scratch registers. */
29682 gcc_assert (scratch1 && REG_P (scratch1));
29683 gcc_assert (scratch2 && REG_P (scratch2));
29684
29685 /* We will need the values "amount-32" and "32-amount" later.
29686 Swapping them around now allows the later code to be more general. */
29687 switch (code)
29688 {
29689 case ASHIFT:
29690 emit_insn (SUB_32 (scratch1, amount));
29691 emit_insn (RSB_32 (scratch2, amount));
29692 break;
29693 case ASHIFTRT:
29694 emit_insn (RSB_32 (scratch1, amount));
29695 /* Also set CC = amount > 32. */
29696 emit_insn (SUB_S_32 (scratch2, amount));
29697 break;
29698 case LSHIFTRT:
29699 emit_insn (RSB_32 (scratch1, amount));
29700 emit_insn (SUB_32 (scratch2, amount));
29701 break;
29702 default:
29703 gcc_unreachable ();
29704 }
29705
29706 /* Emit code like this:
29707
29708 arithmetic-left:
29709 out_down = in_down << amount;
29710 out_down = (in_up << (amount - 32)) | out_down;
29711 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29712 out_up = in_up << amount;
29713
29714 arithmetic-right:
29715 out_down = in_down >> amount;
29716 out_down = (in_up << (32 - amount)) | out_down;
29717 if (amount < 32)
29718 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29719 out_up = in_up << amount;
29720
29721 logical-right:
29722 out_down = in_down >> amount;
29723 out_down = (in_up << (32 - amount)) | out_down;
29724 if (amount < 32)
29725 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29726 out_up = in_up << amount;
29727
29728 The ARM and Thumb2 variants are the same but implemented slightly
29729 differently. If this were only called during expand we could just
29730 use the Thumb2 case and let combine do the right thing, but this
29731 can also be called from post-reload splitters. */
29732
29733 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29734
29735 if (!TARGET_THUMB2)
29736 {
29737 /* Emit code for ARM mode. */
29738 emit_insn (SET (out_down,
29739 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29740 if (code == ASHIFTRT)
29741 {
29742 rtx_code_label *done_label = gen_label_rtx ();
29743 emit_jump_insn (BRANCH (LT, done_label));
29744 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29745 out_down)));
29746 emit_label (done_label);
29747 }
29748 else
29749 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29750 out_down)));
29751 }
29752 else
29753 {
29754 /* Emit code for Thumb2 mode.
29755 Thumb2 can't do shift and or in one insn. */
29756 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29757 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29758
29759 if (code == ASHIFTRT)
29760 {
29761 rtx_code_label *done_label = gen_label_rtx ();
29762 emit_jump_insn (BRANCH (LT, done_label));
29763 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29764 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29765 emit_label (done_label);
29766 }
29767 else
29768 {
29769 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29770 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29771 }
29772 }
29773
29774 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29775 }
29776
29777 #undef SUB_32
29778 #undef RSB_32
29779 #undef SUB_S_32
29780 #undef SET
29781 #undef SHIFT
29782 #undef LSHIFT
29783 #undef REV_LSHIFT
29784 #undef ORR
29785 #undef BRANCH
29786 }
29787
29788 /* Returns true if the pattern is a valid symbolic address, which is either a
29789 symbol_ref or (symbol_ref + addend).
29790
29791 According to the ARM ELF ABI, the initial addend of REL-type relocations
29792 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29793 literal field of the instruction as a 16-bit signed value in the range
29794 -32768 <= A < 32768. */
29795
29796 bool
29797 arm_valid_symbolic_address_p (rtx addr)
29798 {
29799 rtx xop0, xop1 = NULL_RTX;
29800 rtx tmp = addr;
29801
29802 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29803 return true;
29804
29805 /* (const (plus: symbol_ref const_int)) */
29806 if (GET_CODE (addr) == CONST)
29807 tmp = XEXP (addr, 0);
29808
29809 if (GET_CODE (tmp) == PLUS)
29810 {
29811 xop0 = XEXP (tmp, 0);
29812 xop1 = XEXP (tmp, 1);
29813
29814 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29815 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29816 }
29817
29818 return false;
29819 }
29820
29821 /* Returns true if a valid comparison operation and makes
29822 the operands in a form that is valid. */
29823 bool
29824 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29825 {
29826 enum rtx_code code = GET_CODE (*comparison);
29827 int code_int;
29828 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29829 ? GET_MODE (*op2) : GET_MODE (*op1);
29830
29831 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29832
29833 if (code == UNEQ || code == LTGT)
29834 return false;
29835
29836 code_int = (int)code;
29837 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29838 PUT_CODE (*comparison, (enum rtx_code)code_int);
29839
29840 switch (mode)
29841 {
29842 case E_SImode:
29843 if (!arm_add_operand (*op1, mode))
29844 *op1 = force_reg (mode, *op1);
29845 if (!arm_add_operand (*op2, mode))
29846 *op2 = force_reg (mode, *op2);
29847 return true;
29848
29849 case E_DImode:
29850 if (!cmpdi_operand (*op1, mode))
29851 *op1 = force_reg (mode, *op1);
29852 if (!cmpdi_operand (*op2, mode))
29853 *op2 = force_reg (mode, *op2);
29854 return true;
29855
29856 case E_HFmode:
29857 if (!TARGET_VFP_FP16INST)
29858 break;
29859 /* FP16 comparisons are done in SF mode. */
29860 mode = SFmode;
29861 *op1 = convert_to_mode (mode, *op1, 1);
29862 *op2 = convert_to_mode (mode, *op2, 1);
29863 /* Fall through. */
29864 case E_SFmode:
29865 case E_DFmode:
29866 if (!vfp_compare_operand (*op1, mode))
29867 *op1 = force_reg (mode, *op1);
29868 if (!vfp_compare_operand (*op2, mode))
29869 *op2 = force_reg (mode, *op2);
29870 return true;
29871 default:
29872 break;
29873 }
29874
29875 return false;
29876
29877 }
29878
29879 /* Maximum number of instructions to set block of memory. */
29880 static int
29881 arm_block_set_max_insns (void)
29882 {
29883 if (optimize_function_for_size_p (cfun))
29884 return 4;
29885 else
29886 return current_tune->max_insns_inline_memset;
29887 }
29888
29889 /* Return TRUE if it's profitable to set block of memory for
29890 non-vectorized case. VAL is the value to set the memory
29891 with. LENGTH is the number of bytes to set. ALIGN is the
29892 alignment of the destination memory in bytes. UNALIGNED_P
29893 is TRUE if we can only set the memory with instructions
29894 meeting alignment requirements. USE_STRD_P is TRUE if we
29895 can use strd to set the memory. */
29896 static bool
29897 arm_block_set_non_vect_profit_p (rtx val,
29898 unsigned HOST_WIDE_INT length,
29899 unsigned HOST_WIDE_INT align,
29900 bool unaligned_p, bool use_strd_p)
29901 {
29902 int num = 0;
29903 /* For leftovers in bytes of 0-7, we can set the memory block using
29904 strb/strh/str with minimum instruction number. */
29905 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29906
29907 if (unaligned_p)
29908 {
29909 num = arm_const_inline_cost (SET, val);
29910 num += length / align + length % align;
29911 }
29912 else if (use_strd_p)
29913 {
29914 num = arm_const_double_inline_cost (val);
29915 num += (length >> 3) + leftover[length & 7];
29916 }
29917 else
29918 {
29919 num = arm_const_inline_cost (SET, val);
29920 num += (length >> 2) + leftover[length & 3];
29921 }
29922
29923 /* We may be able to combine last pair STRH/STRB into a single STR
29924 by shifting one byte back. */
29925 if (unaligned_access && length > 3 && (length & 3) == 3)
29926 num--;
29927
29928 return (num <= arm_block_set_max_insns ());
29929 }
29930
29931 /* Return TRUE if it's profitable to set block of memory for
29932 vectorized case. LENGTH is the number of bytes to set.
29933 ALIGN is the alignment of destination memory in bytes.
29934 MODE is the vector mode used to set the memory. */
29935 static bool
29936 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29937 unsigned HOST_WIDE_INT align,
29938 machine_mode mode)
29939 {
29940 int num;
29941 bool unaligned_p = ((align & 3) != 0);
29942 unsigned int nelt = GET_MODE_NUNITS (mode);
29943
29944 /* Instruction loading constant value. */
29945 num = 1;
29946 /* Instructions storing the memory. */
29947 num += (length + nelt - 1) / nelt;
29948 /* Instructions adjusting the address expression. Only need to
29949 adjust address expression if it's 4 bytes aligned and bytes
29950 leftover can only be stored by mis-aligned store instruction. */
29951 if (!unaligned_p && (length & 3) != 0)
29952 num++;
29953
29954 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29955 if (!unaligned_p && mode == V16QImode)
29956 num--;
29957
29958 return (num <= arm_block_set_max_insns ());
29959 }
29960
29961 /* Set a block of memory using vectorization instructions for the
29962 unaligned case. We fill the first LENGTH bytes of the memory
29963 area starting from DSTBASE with byte constant VALUE. ALIGN is
29964 the alignment requirement of memory. Return TRUE if succeeded. */
29965 static bool
29966 arm_block_set_unaligned_vect (rtx dstbase,
29967 unsigned HOST_WIDE_INT length,
29968 unsigned HOST_WIDE_INT value,
29969 unsigned HOST_WIDE_INT align)
29970 {
29971 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29972 rtx dst, mem;
29973 rtx val_vec, reg;
29974 rtx (*gen_func) (rtx, rtx);
29975 machine_mode mode;
29976 unsigned HOST_WIDE_INT v = value;
29977 unsigned int offset = 0;
29978 gcc_assert ((align & 0x3) != 0);
29979 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29980 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29981 if (length >= nelt_v16)
29982 {
29983 mode = V16QImode;
29984 gen_func = gen_movmisalignv16qi;
29985 }
29986 else
29987 {
29988 mode = V8QImode;
29989 gen_func = gen_movmisalignv8qi;
29990 }
29991 nelt_mode = GET_MODE_NUNITS (mode);
29992 gcc_assert (length >= nelt_mode);
29993 /* Skip if it isn't profitable. */
29994 if (!arm_block_set_vect_profit_p (length, align, mode))
29995 return false;
29996
29997 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29998 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29999
30000 v = sext_hwi (v, BITS_PER_WORD);
30001
30002 reg = gen_reg_rtx (mode);
30003 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30004 /* Emit instruction loading the constant value. */
30005 emit_move_insn (reg, val_vec);
30006
30007 /* Handle nelt_mode bytes in a vector. */
30008 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30009 {
30010 emit_insn ((*gen_func) (mem, reg));
30011 if (i + 2 * nelt_mode <= length)
30012 {
30013 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30014 offset += nelt_mode;
30015 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30016 }
30017 }
30018
30019 /* If there are not less than nelt_v8 bytes leftover, we must be in
30020 V16QI mode. */
30021 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30022
30023 /* Handle (8, 16) bytes leftover. */
30024 if (i + nelt_v8 < length)
30025 {
30026 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30027 offset += length - i;
30028 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30029
30030 /* We are shifting bytes back, set the alignment accordingly. */
30031 if ((length & 1) != 0 && align >= 2)
30032 set_mem_align (mem, BITS_PER_UNIT);
30033
30034 emit_insn (gen_movmisalignv16qi (mem, reg));
30035 }
30036 /* Handle (0, 8] bytes leftover. */
30037 else if (i < length && i + nelt_v8 >= length)
30038 {
30039 if (mode == V16QImode)
30040 reg = gen_lowpart (V8QImode, reg);
30041
30042 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30043 + (nelt_mode - nelt_v8))));
30044 offset += (length - i) + (nelt_mode - nelt_v8);
30045 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30046
30047 /* We are shifting bytes back, set the alignment accordingly. */
30048 if ((length & 1) != 0 && align >= 2)
30049 set_mem_align (mem, BITS_PER_UNIT);
30050
30051 emit_insn (gen_movmisalignv8qi (mem, reg));
30052 }
30053
30054 return true;
30055 }
30056
30057 /* Set a block of memory using vectorization instructions for the
30058 aligned case. We fill the first LENGTH bytes of the memory area
30059 starting from DSTBASE with byte constant VALUE. ALIGN is the
30060 alignment requirement of memory. Return TRUE if succeeded. */
30061 static bool
30062 arm_block_set_aligned_vect (rtx dstbase,
30063 unsigned HOST_WIDE_INT length,
30064 unsigned HOST_WIDE_INT value,
30065 unsigned HOST_WIDE_INT align)
30066 {
30067 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30068 rtx dst, addr, mem;
30069 rtx val_vec, reg;
30070 machine_mode mode;
30071 unsigned HOST_WIDE_INT v = value;
30072 unsigned int offset = 0;
30073
30074 gcc_assert ((align & 0x3) == 0);
30075 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30076 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30077 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30078 mode = V16QImode;
30079 else
30080 mode = V8QImode;
30081
30082 nelt_mode = GET_MODE_NUNITS (mode);
30083 gcc_assert (length >= nelt_mode);
30084 /* Skip if it isn't profitable. */
30085 if (!arm_block_set_vect_profit_p (length, align, mode))
30086 return false;
30087
30088 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30089
30090 v = sext_hwi (v, BITS_PER_WORD);
30091
30092 reg = gen_reg_rtx (mode);
30093 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30094 /* Emit instruction loading the constant value. */
30095 emit_move_insn (reg, val_vec);
30096
30097 i = 0;
30098 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30099 if (mode == V16QImode)
30100 {
30101 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30102 emit_insn (gen_movmisalignv16qi (mem, reg));
30103 i += nelt_mode;
30104 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30105 if (i + nelt_v8 < length && i + nelt_v16 > length)
30106 {
30107 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30108 offset += length - nelt_mode;
30109 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30110 /* We are shifting bytes back, set the alignment accordingly. */
30111 if ((length & 0x3) == 0)
30112 set_mem_align (mem, BITS_PER_UNIT * 4);
30113 else if ((length & 0x1) == 0)
30114 set_mem_align (mem, BITS_PER_UNIT * 2);
30115 else
30116 set_mem_align (mem, BITS_PER_UNIT);
30117
30118 emit_insn (gen_movmisalignv16qi (mem, reg));
30119 return true;
30120 }
30121 /* Fall through for bytes leftover. */
30122 mode = V8QImode;
30123 nelt_mode = GET_MODE_NUNITS (mode);
30124 reg = gen_lowpart (V8QImode, reg);
30125 }
30126
30127 /* Handle 8 bytes in a vector. */
30128 for (; (i + nelt_mode <= length); i += nelt_mode)
30129 {
30130 addr = plus_constant (Pmode, dst, i);
30131 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30132 emit_move_insn (mem, reg);
30133 }
30134
30135 /* Handle single word leftover by shifting 4 bytes back. We can
30136 use aligned access for this case. */
30137 if (i + UNITS_PER_WORD == length)
30138 {
30139 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30140 offset += i - UNITS_PER_WORD;
30141 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30142 /* We are shifting 4 bytes back, set the alignment accordingly. */
30143 if (align > UNITS_PER_WORD)
30144 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30145
30146 emit_move_insn (mem, reg);
30147 }
30148 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30149 We have to use unaligned access for this case. */
30150 else if (i < length)
30151 {
30152 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30153 offset += length - nelt_mode;
30154 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30155 /* We are shifting bytes back, set the alignment accordingly. */
30156 if ((length & 1) == 0)
30157 set_mem_align (mem, BITS_PER_UNIT * 2);
30158 else
30159 set_mem_align (mem, BITS_PER_UNIT);
30160
30161 emit_insn (gen_movmisalignv8qi (mem, reg));
30162 }
30163
30164 return true;
30165 }
30166
30167 /* Set a block of memory using plain strh/strb instructions, only
30168 using instructions allowed by ALIGN on processor. We fill the
30169 first LENGTH bytes of the memory area starting from DSTBASE
30170 with byte constant VALUE. ALIGN is the alignment requirement
30171 of memory. */
30172 static bool
30173 arm_block_set_unaligned_non_vect (rtx dstbase,
30174 unsigned HOST_WIDE_INT length,
30175 unsigned HOST_WIDE_INT value,
30176 unsigned HOST_WIDE_INT align)
30177 {
30178 unsigned int i;
30179 rtx dst, addr, mem;
30180 rtx val_exp, val_reg, reg;
30181 machine_mode mode;
30182 HOST_WIDE_INT v = value;
30183
30184 gcc_assert (align == 1 || align == 2);
30185
30186 if (align == 2)
30187 v |= (value << BITS_PER_UNIT);
30188
30189 v = sext_hwi (v, BITS_PER_WORD);
30190 val_exp = GEN_INT (v);
30191 /* Skip if it isn't profitable. */
30192 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30193 align, true, false))
30194 return false;
30195
30196 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30197 mode = (align == 2 ? HImode : QImode);
30198 val_reg = force_reg (SImode, val_exp);
30199 reg = gen_lowpart (mode, val_reg);
30200
30201 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30202 {
30203 addr = plus_constant (Pmode, dst, i);
30204 mem = adjust_automodify_address (dstbase, mode, addr, i);
30205 emit_move_insn (mem, reg);
30206 }
30207
30208 /* Handle single byte leftover. */
30209 if (i + 1 == length)
30210 {
30211 reg = gen_lowpart (QImode, val_reg);
30212 addr = plus_constant (Pmode, dst, i);
30213 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30214 emit_move_insn (mem, reg);
30215 i++;
30216 }
30217
30218 gcc_assert (i == length);
30219 return true;
30220 }
30221
30222 /* Set a block of memory using plain strd/str/strh/strb instructions,
30223 to permit unaligned copies on processors which support unaligned
30224 semantics for those instructions. We fill the first LENGTH bytes
30225 of the memory area starting from DSTBASE with byte constant VALUE.
30226 ALIGN is the alignment requirement of memory. */
30227 static bool
30228 arm_block_set_aligned_non_vect (rtx dstbase,
30229 unsigned HOST_WIDE_INT length,
30230 unsigned HOST_WIDE_INT value,
30231 unsigned HOST_WIDE_INT align)
30232 {
30233 unsigned int i;
30234 rtx dst, addr, mem;
30235 rtx val_exp, val_reg, reg;
30236 unsigned HOST_WIDE_INT v;
30237 bool use_strd_p;
30238
30239 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30240 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30241
30242 v = (value | (value << 8) | (value << 16) | (value << 24));
30243 if (length < UNITS_PER_WORD)
30244 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30245
30246 if (use_strd_p)
30247 v |= (v << BITS_PER_WORD);
30248 else
30249 v = sext_hwi (v, BITS_PER_WORD);
30250
30251 val_exp = GEN_INT (v);
30252 /* Skip if it isn't profitable. */
30253 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30254 align, false, use_strd_p))
30255 {
30256 if (!use_strd_p)
30257 return false;
30258
30259 /* Try without strd. */
30260 v = (v >> BITS_PER_WORD);
30261 v = sext_hwi (v, BITS_PER_WORD);
30262 val_exp = GEN_INT (v);
30263 use_strd_p = false;
30264 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30265 align, false, use_strd_p))
30266 return false;
30267 }
30268
30269 i = 0;
30270 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30271 /* Handle double words using strd if possible. */
30272 if (use_strd_p)
30273 {
30274 val_reg = force_reg (DImode, val_exp);
30275 reg = val_reg;
30276 for (; (i + 8 <= length); i += 8)
30277 {
30278 addr = plus_constant (Pmode, dst, i);
30279 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30280 emit_move_insn (mem, reg);
30281 }
30282 }
30283 else
30284 val_reg = force_reg (SImode, val_exp);
30285
30286 /* Handle words. */
30287 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30288 for (; (i + 4 <= length); i += 4)
30289 {
30290 addr = plus_constant (Pmode, dst, i);
30291 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30292 if ((align & 3) == 0)
30293 emit_move_insn (mem, reg);
30294 else
30295 emit_insn (gen_unaligned_storesi (mem, reg));
30296 }
30297
30298 /* Merge last pair of STRH and STRB into a STR if possible. */
30299 if (unaligned_access && i > 0 && (i + 3) == length)
30300 {
30301 addr = plus_constant (Pmode, dst, i - 1);
30302 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30303 /* We are shifting one byte back, set the alignment accordingly. */
30304 if ((align & 1) == 0)
30305 set_mem_align (mem, BITS_PER_UNIT);
30306
30307 /* Most likely this is an unaligned access, and we can't tell at
30308 compilation time. */
30309 emit_insn (gen_unaligned_storesi (mem, reg));
30310 return true;
30311 }
30312
30313 /* Handle half word leftover. */
30314 if (i + 2 <= length)
30315 {
30316 reg = gen_lowpart (HImode, val_reg);
30317 addr = plus_constant (Pmode, dst, i);
30318 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30319 if ((align & 1) == 0)
30320 emit_move_insn (mem, reg);
30321 else
30322 emit_insn (gen_unaligned_storehi (mem, reg));
30323
30324 i += 2;
30325 }
30326
30327 /* Handle single byte leftover. */
30328 if (i + 1 == length)
30329 {
30330 reg = gen_lowpart (QImode, val_reg);
30331 addr = plus_constant (Pmode, dst, i);
30332 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30333 emit_move_insn (mem, reg);
30334 }
30335
30336 return true;
30337 }
30338
30339 /* Set a block of memory using vectorization instructions for both
30340 aligned and unaligned cases. We fill the first LENGTH bytes of
30341 the memory area starting from DSTBASE with byte constant VALUE.
30342 ALIGN is the alignment requirement of memory. */
30343 static bool
30344 arm_block_set_vect (rtx dstbase,
30345 unsigned HOST_WIDE_INT length,
30346 unsigned HOST_WIDE_INT value,
30347 unsigned HOST_WIDE_INT align)
30348 {
30349 /* Check whether we need to use unaligned store instruction. */
30350 if (((align & 3) != 0 || (length & 3) != 0)
30351 /* Check whether unaligned store instruction is available. */
30352 && (!unaligned_access || BYTES_BIG_ENDIAN))
30353 return false;
30354
30355 if ((align & 3) == 0)
30356 return arm_block_set_aligned_vect (dstbase, length, value, align);
30357 else
30358 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30359 }
30360
30361 /* Expand string store operation. Firstly we try to do that by using
30362 vectorization instructions, then try with ARM unaligned access and
30363 double-word store if profitable. OPERANDS[0] is the destination,
30364 OPERANDS[1] is the number of bytes, operands[2] is the value to
30365 initialize the memory, OPERANDS[3] is the known alignment of the
30366 destination. */
30367 bool
30368 arm_gen_setmem (rtx *operands)
30369 {
30370 rtx dstbase = operands[0];
30371 unsigned HOST_WIDE_INT length;
30372 unsigned HOST_WIDE_INT value;
30373 unsigned HOST_WIDE_INT align;
30374
30375 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30376 return false;
30377
30378 length = UINTVAL (operands[1]);
30379 if (length > 64)
30380 return false;
30381
30382 value = (UINTVAL (operands[2]) & 0xFF);
30383 align = UINTVAL (operands[3]);
30384 if (TARGET_NEON && length >= 8
30385 && current_tune->string_ops_prefer_neon
30386 && arm_block_set_vect (dstbase, length, value, align))
30387 return true;
30388
30389 if (!unaligned_access && (align & 3) != 0)
30390 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30391
30392 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30393 }
30394
30395
30396 static bool
30397 arm_macro_fusion_p (void)
30398 {
30399 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30400 }
30401
30402 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30403 for MOVW / MOVT macro fusion. */
30404
30405 static bool
30406 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30407 {
30408 /* We are trying to fuse
30409 movw imm / movt imm
30410 instructions as a group that gets scheduled together. */
30411
30412 rtx set_dest = SET_DEST (curr_set);
30413
30414 if (GET_MODE (set_dest) != SImode)
30415 return false;
30416
30417 /* We are trying to match:
30418 prev (movw) == (set (reg r0) (const_int imm16))
30419 curr (movt) == (set (zero_extract (reg r0)
30420 (const_int 16)
30421 (const_int 16))
30422 (const_int imm16_1))
30423 or
30424 prev (movw) == (set (reg r1)
30425 (high (symbol_ref ("SYM"))))
30426 curr (movt) == (set (reg r0)
30427 (lo_sum (reg r1)
30428 (symbol_ref ("SYM")))) */
30429
30430 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30431 {
30432 if (CONST_INT_P (SET_SRC (curr_set))
30433 && CONST_INT_P (SET_SRC (prev_set))
30434 && REG_P (XEXP (set_dest, 0))
30435 && REG_P (SET_DEST (prev_set))
30436 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30437 return true;
30438
30439 }
30440 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30441 && REG_P (SET_DEST (curr_set))
30442 && REG_P (SET_DEST (prev_set))
30443 && GET_CODE (SET_SRC (prev_set)) == HIGH
30444 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30445 return true;
30446
30447 return false;
30448 }
30449
30450 static bool
30451 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30452 {
30453 rtx prev_set = single_set (prev);
30454 rtx curr_set = single_set (curr);
30455
30456 if (!prev_set
30457 || !curr_set)
30458 return false;
30459
30460 if (any_condjump_p (curr))
30461 return false;
30462
30463 if (!arm_macro_fusion_p ())
30464 return false;
30465
30466 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30467 && aarch_crypto_can_dual_issue (prev, curr))
30468 return true;
30469
30470 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30471 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30472 return true;
30473
30474 return false;
30475 }
30476
30477 /* Return true iff the instruction fusion described by OP is enabled. */
30478 bool
30479 arm_fusion_enabled_p (tune_params::fuse_ops op)
30480 {
30481 return current_tune->fusible_ops & op;
30482 }
30483
30484 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30485 scheduled for speculative execution. Reject the long-running division
30486 and square-root instructions. */
30487
30488 static bool
30489 arm_sched_can_speculate_insn (rtx_insn *insn)
30490 {
30491 switch (get_attr_type (insn))
30492 {
30493 case TYPE_SDIV:
30494 case TYPE_UDIV:
30495 case TYPE_FDIVS:
30496 case TYPE_FDIVD:
30497 case TYPE_FSQRTS:
30498 case TYPE_FSQRTD:
30499 case TYPE_NEON_FP_SQRT_S:
30500 case TYPE_NEON_FP_SQRT_D:
30501 case TYPE_NEON_FP_SQRT_S_Q:
30502 case TYPE_NEON_FP_SQRT_D_Q:
30503 case TYPE_NEON_FP_DIV_S:
30504 case TYPE_NEON_FP_DIV_D:
30505 case TYPE_NEON_FP_DIV_S_Q:
30506 case TYPE_NEON_FP_DIV_D_Q:
30507 return false;
30508 default:
30509 return true;
30510 }
30511 }
30512
30513 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30514
30515 static unsigned HOST_WIDE_INT
30516 arm_asan_shadow_offset (void)
30517 {
30518 return HOST_WIDE_INT_1U << 29;
30519 }
30520
30521
30522 /* This is a temporary fix for PR60655. Ideally we need
30523 to handle most of these cases in the generic part but
30524 currently we reject minus (..) (sym_ref). We try to
30525 ameliorate the case with minus (sym_ref1) (sym_ref2)
30526 where they are in the same section. */
30527
30528 static bool
30529 arm_const_not_ok_for_debug_p (rtx p)
30530 {
30531 tree decl_op0 = NULL;
30532 tree decl_op1 = NULL;
30533
30534 if (GET_CODE (p) == UNSPEC)
30535 return true;
30536 if (GET_CODE (p) == MINUS)
30537 {
30538 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30539 {
30540 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30541 if (decl_op1
30542 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30543 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30544 {
30545 if ((VAR_P (decl_op1)
30546 || TREE_CODE (decl_op1) == CONST_DECL)
30547 && (VAR_P (decl_op0)
30548 || TREE_CODE (decl_op0) == CONST_DECL))
30549 return (get_variable_section (decl_op1, false)
30550 != get_variable_section (decl_op0, false));
30551
30552 if (TREE_CODE (decl_op1) == LABEL_DECL
30553 && TREE_CODE (decl_op0) == LABEL_DECL)
30554 return (DECL_CONTEXT (decl_op1)
30555 != DECL_CONTEXT (decl_op0));
30556 }
30557
30558 return true;
30559 }
30560 }
30561
30562 return false;
30563 }
30564
30565 /* return TRUE if x is a reference to a value in a constant pool */
30566 extern bool
30567 arm_is_constant_pool_ref (rtx x)
30568 {
30569 return (MEM_P (x)
30570 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30571 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30572 }
30573
30574 /* Remember the last target of arm_set_current_function. */
30575 static GTY(()) tree arm_previous_fndecl;
30576
30577 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30578
30579 void
30580 save_restore_target_globals (tree new_tree)
30581 {
30582 /* If we have a previous state, use it. */
30583 if (TREE_TARGET_GLOBALS (new_tree))
30584 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30585 else if (new_tree == target_option_default_node)
30586 restore_target_globals (&default_target_globals);
30587 else
30588 {
30589 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30590 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30591 }
30592
30593 arm_option_params_internal ();
30594 }
30595
30596 /* Invalidate arm_previous_fndecl. */
30597
30598 void
30599 arm_reset_previous_fndecl (void)
30600 {
30601 arm_previous_fndecl = NULL_TREE;
30602 }
30603
30604 /* Establish appropriate back-end context for processing the function
30605 FNDECL. The argument might be NULL to indicate processing at top
30606 level, outside of any function scope. */
30607
30608 static void
30609 arm_set_current_function (tree fndecl)
30610 {
30611 if (!fndecl || fndecl == arm_previous_fndecl)
30612 return;
30613
30614 tree old_tree = (arm_previous_fndecl
30615 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30616 : NULL_TREE);
30617
30618 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30619
30620 /* If current function has no attributes but previous one did,
30621 use the default node. */
30622 if (! new_tree && old_tree)
30623 new_tree = target_option_default_node;
30624
30625 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30626 the default have been handled by save_restore_target_globals from
30627 arm_pragma_target_parse. */
30628 if (old_tree == new_tree)
30629 return;
30630
30631 arm_previous_fndecl = fndecl;
30632
30633 /* First set the target options. */
30634 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30635
30636 save_restore_target_globals (new_tree);
30637 }
30638
30639 /* Implement TARGET_OPTION_PRINT. */
30640
30641 static void
30642 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30643 {
30644 int flags = ptr->x_target_flags;
30645 const char *fpu_name;
30646
30647 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30648 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30649
30650 fprintf (file, "%*sselected isa %s\n", indent, "",
30651 TARGET_THUMB2_P (flags) ? "thumb2" :
30652 TARGET_THUMB_P (flags) ? "thumb1" :
30653 "arm");
30654
30655 if (ptr->x_arm_arch_string)
30656 fprintf (file, "%*sselected architecture %s\n", indent, "",
30657 ptr->x_arm_arch_string);
30658
30659 if (ptr->x_arm_cpu_string)
30660 fprintf (file, "%*sselected CPU %s\n", indent, "",
30661 ptr->x_arm_cpu_string);
30662
30663 if (ptr->x_arm_tune_string)
30664 fprintf (file, "%*sselected tune %s\n", indent, "",
30665 ptr->x_arm_tune_string);
30666
30667 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30668 }
30669
30670 /* Hook to determine if one function can safely inline another. */
30671
30672 static bool
30673 arm_can_inline_p (tree caller, tree callee)
30674 {
30675 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30676 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30677 bool can_inline = true;
30678
30679 struct cl_target_option *caller_opts
30680 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30681 : target_option_default_node);
30682
30683 struct cl_target_option *callee_opts
30684 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30685 : target_option_default_node);
30686
30687 if (callee_opts == caller_opts)
30688 return true;
30689
30690 /* Callee's ISA features should be a subset of the caller's. */
30691 struct arm_build_target caller_target;
30692 struct arm_build_target callee_target;
30693 caller_target.isa = sbitmap_alloc (isa_num_bits);
30694 callee_target.isa = sbitmap_alloc (isa_num_bits);
30695
30696 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30697 false);
30698 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30699 false);
30700 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30701 can_inline = false;
30702
30703 sbitmap_free (caller_target.isa);
30704 sbitmap_free (callee_target.isa);
30705
30706 /* OK to inline between different modes.
30707 Function with mode specific instructions, e.g using asm,
30708 must be explicitly protected with noinline. */
30709 return can_inline;
30710 }
30711
30712 /* Hook to fix function's alignment affected by target attribute. */
30713
30714 static void
30715 arm_relayout_function (tree fndecl)
30716 {
30717 if (DECL_USER_ALIGN (fndecl))
30718 return;
30719
30720 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30721
30722 if (!callee_tree)
30723 callee_tree = target_option_default_node;
30724
30725 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30726 SET_DECL_ALIGN
30727 (fndecl,
30728 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30729 }
30730
30731 /* Inner function to process the attribute((target(...))), take an argument and
30732 set the current options from the argument. If we have a list, recursively
30733 go over the list. */
30734
30735 static bool
30736 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30737 {
30738 if (TREE_CODE (args) == TREE_LIST)
30739 {
30740 bool ret = true;
30741
30742 for (; args; args = TREE_CHAIN (args))
30743 if (TREE_VALUE (args)
30744 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30745 ret = false;
30746 return ret;
30747 }
30748
30749 else if (TREE_CODE (args) != STRING_CST)
30750 {
30751 error ("attribute %<target%> argument not a string");
30752 return false;
30753 }
30754
30755 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30756 char *q;
30757
30758 while ((q = strtok (argstr, ",")) != NULL)
30759 {
30760 while (ISSPACE (*q)) ++q;
30761
30762 argstr = NULL;
30763 if (!strncmp (q, "thumb", 5))
30764 opts->x_target_flags |= MASK_THUMB;
30765
30766 else if (!strncmp (q, "arm", 3))
30767 opts->x_target_flags &= ~MASK_THUMB;
30768
30769 else if (!strncmp (q, "fpu=", 4))
30770 {
30771 int fpu_index;
30772 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30773 &fpu_index, CL_TARGET))
30774 {
30775 error ("invalid fpu for target attribute or pragma %qs", q);
30776 return false;
30777 }
30778 if (fpu_index == TARGET_FPU_auto)
30779 {
30780 /* This doesn't really make sense until we support
30781 general dynamic selection of the architecture and all
30782 sub-features. */
30783 sorry ("auto fpu selection not currently permitted here");
30784 return false;
30785 }
30786 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30787 }
30788 else if (!strncmp (q, "arch=", 5))
30789 {
30790 char* arch = q+5;
30791 const arch_option *arm_selected_arch
30792 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30793
30794 if (!arm_selected_arch)
30795 {
30796 error ("invalid architecture for target attribute or pragma %qs",
30797 q);
30798 return false;
30799 }
30800
30801 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30802 }
30803 else if (q[0] == '+')
30804 {
30805 opts->x_arm_arch_string
30806 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30807 }
30808 else
30809 {
30810 error ("unknown target attribute or pragma %qs", q);
30811 return false;
30812 }
30813 }
30814
30815 return true;
30816 }
30817
30818 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30819
30820 tree
30821 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30822 struct gcc_options *opts_set)
30823 {
30824 struct cl_target_option cl_opts;
30825
30826 if (!arm_valid_target_attribute_rec (args, opts))
30827 return NULL_TREE;
30828
30829 cl_target_option_save (&cl_opts, opts);
30830 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30831 arm_option_check_internal (opts);
30832 /* Do any overrides, such as global options arch=xxx.
30833 We do this since arm_active_target was overridden. */
30834 arm_option_reconfigure_globals ();
30835 arm_options_perform_arch_sanity_checks ();
30836 arm_option_override_internal (opts, opts_set);
30837
30838 return build_target_option_node (opts);
30839 }
30840
30841 static void
30842 add_attribute (const char * mode, tree *attributes)
30843 {
30844 size_t len = strlen (mode);
30845 tree value = build_string (len, mode);
30846
30847 TREE_TYPE (value) = build_array_type (char_type_node,
30848 build_index_type (size_int (len)));
30849
30850 *attributes = tree_cons (get_identifier ("target"),
30851 build_tree_list (NULL_TREE, value),
30852 *attributes);
30853 }
30854
30855 /* For testing. Insert thumb or arm modes alternatively on functions. */
30856
30857 static void
30858 arm_insert_attributes (tree fndecl, tree * attributes)
30859 {
30860 const char *mode;
30861
30862 if (! TARGET_FLIP_THUMB)
30863 return;
30864
30865 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30866 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30867 return;
30868
30869 /* Nested definitions must inherit mode. */
30870 if (current_function_decl)
30871 {
30872 mode = TARGET_THUMB ? "thumb" : "arm";
30873 add_attribute (mode, attributes);
30874 return;
30875 }
30876
30877 /* If there is already a setting don't change it. */
30878 if (lookup_attribute ("target", *attributes) != NULL)
30879 return;
30880
30881 mode = thumb_flipper ? "thumb" : "arm";
30882 add_attribute (mode, attributes);
30883
30884 thumb_flipper = !thumb_flipper;
30885 }
30886
30887 /* Hook to validate attribute((target("string"))). */
30888
30889 static bool
30890 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30891 tree args, int ARG_UNUSED (flags))
30892 {
30893 bool ret = true;
30894 struct gcc_options func_options;
30895 tree cur_tree, new_optimize;
30896 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30897
30898 /* Get the optimization options of the current function. */
30899 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30900
30901 /* If the function changed the optimization levels as well as setting target
30902 options, start with the optimizations specified. */
30903 if (!func_optimize)
30904 func_optimize = optimization_default_node;
30905
30906 /* Init func_options. */
30907 memset (&func_options, 0, sizeof (func_options));
30908 init_options_struct (&func_options, NULL);
30909 lang_hooks.init_options_struct (&func_options);
30910
30911 /* Initialize func_options to the defaults. */
30912 cl_optimization_restore (&func_options,
30913 TREE_OPTIMIZATION (func_optimize));
30914
30915 cl_target_option_restore (&func_options,
30916 TREE_TARGET_OPTION (target_option_default_node));
30917
30918 /* Set func_options flags with new target mode. */
30919 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30920 &global_options_set);
30921
30922 if (cur_tree == NULL_TREE)
30923 ret = false;
30924
30925 new_optimize = build_optimization_node (&func_options);
30926
30927 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30928
30929 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30930
30931 finalize_options_struct (&func_options);
30932
30933 return ret;
30934 }
30935
30936 /* Match an ISA feature bitmap to a named FPU. We always use the
30937 first entry that exactly matches the feature set, so that we
30938 effectively canonicalize the FPU name for the assembler. */
30939 static const char*
30940 arm_identify_fpu_from_isa (sbitmap isa)
30941 {
30942 auto_sbitmap fpubits (isa_num_bits);
30943 auto_sbitmap cand_fpubits (isa_num_bits);
30944
30945 bitmap_and (fpubits, isa, isa_all_fpubits);
30946
30947 /* If there are no ISA feature bits relating to the FPU, we must be
30948 doing soft-float. */
30949 if (bitmap_empty_p (fpubits))
30950 return "softvfp";
30951
30952 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30953 {
30954 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30955 if (bitmap_equal_p (fpubits, cand_fpubits))
30956 return all_fpus[i].name;
30957 }
30958 /* We must find an entry, or things have gone wrong. */
30959 gcc_unreachable ();
30960 }
30961
30962 /* The last .arch and .fpu assembly strings that we printed. */
30963 static std::string arm_last_printed_arch_string;
30964 static std::string arm_last_printed_fpu_string;
30965
30966 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30967 by the function fndecl. */
30968 void
30969 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30970 {
30971 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30972
30973 struct cl_target_option *targ_options;
30974 if (target_parts)
30975 targ_options = TREE_TARGET_OPTION (target_parts);
30976 else
30977 targ_options = TREE_TARGET_OPTION (target_option_current_node);
30978 gcc_assert (targ_options);
30979
30980 /* Only update the assembler .arch string if it is distinct from the last
30981 such string we printed. */
30982 std::string arch_to_print = targ_options->x_arm_arch_string;
30983 if (arch_to_print != arm_last_printed_arch_string)
30984 {
30985 std::string arch_name
30986 = arch_to_print.substr (0, arch_to_print.find ("+"));
30987 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30988 const arch_option *arch
30989 = arm_parse_arch_option_name (all_architectures, "-march",
30990 targ_options->x_arm_arch_string);
30991 auto_sbitmap opt_bits (isa_num_bits);
30992
30993 gcc_assert (arch);
30994 if (arch->common.extensions)
30995 {
30996 for (const struct cpu_arch_extension *opt = arch->common.extensions;
30997 opt->name != NULL;
30998 opt++)
30999 {
31000 if (!opt->remove)
31001 {
31002 arm_initialize_isa (opt_bits, opt->isa_bits);
31003 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31004 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31005 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31006 opt->name);
31007 }
31008 }
31009 }
31010
31011 arm_last_printed_arch_string = arch_to_print;
31012 }
31013
31014 fprintf (stream, "\t.syntax unified\n");
31015
31016 if (TARGET_THUMB)
31017 {
31018 if (is_called_in_ARM_mode (decl)
31019 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31020 && cfun->is_thunk))
31021 fprintf (stream, "\t.code 32\n");
31022 else if (TARGET_THUMB1)
31023 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31024 else
31025 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31026 }
31027 else
31028 fprintf (stream, "\t.arm\n");
31029
31030 std::string fpu_to_print
31031 = TARGET_SOFT_FLOAT
31032 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31033
31034 if (fpu_to_print != arm_last_printed_arch_string)
31035 {
31036 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31037 arm_last_printed_fpu_string = fpu_to_print;
31038 }
31039
31040 if (TARGET_POKE_FUNCTION_NAME)
31041 arm_poke_function_name (stream, (const char *) name);
31042 }
31043
31044 /* If MEM is in the form of [base+offset], extract the two parts
31045 of address and set to BASE and OFFSET, otherwise return false
31046 after clearing BASE and OFFSET. */
31047
31048 static bool
31049 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31050 {
31051 rtx addr;
31052
31053 gcc_assert (MEM_P (mem));
31054
31055 addr = XEXP (mem, 0);
31056
31057 /* Strip off const from addresses like (const (addr)). */
31058 if (GET_CODE (addr) == CONST)
31059 addr = XEXP (addr, 0);
31060
31061 if (GET_CODE (addr) == REG)
31062 {
31063 *base = addr;
31064 *offset = const0_rtx;
31065 return true;
31066 }
31067
31068 if (GET_CODE (addr) == PLUS
31069 && GET_CODE (XEXP (addr, 0)) == REG
31070 && CONST_INT_P (XEXP (addr, 1)))
31071 {
31072 *base = XEXP (addr, 0);
31073 *offset = XEXP (addr, 1);
31074 return true;
31075 }
31076
31077 *base = NULL_RTX;
31078 *offset = NULL_RTX;
31079
31080 return false;
31081 }
31082
31083 /* If INSN is a load or store of address in the form of [base+offset],
31084 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31085 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31086 otherwise return FALSE. */
31087
31088 static bool
31089 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31090 {
31091 rtx x, dest, src;
31092
31093 gcc_assert (INSN_P (insn));
31094 x = PATTERN (insn);
31095 if (GET_CODE (x) != SET)
31096 return false;
31097
31098 src = SET_SRC (x);
31099 dest = SET_DEST (x);
31100 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31101 {
31102 *is_load = false;
31103 extract_base_offset_in_addr (dest, base, offset);
31104 }
31105 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31106 {
31107 *is_load = true;
31108 extract_base_offset_in_addr (src, base, offset);
31109 }
31110 else
31111 return false;
31112
31113 return (*base != NULL_RTX && *offset != NULL_RTX);
31114 }
31115
31116 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31117
31118 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31119 and PRI are only calculated for these instructions. For other instruction,
31120 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31121 instruction fusion can be supported by returning different priorities.
31122
31123 It's important that irrelevant instructions get the largest FUSION_PRI. */
31124
31125 static void
31126 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31127 int *fusion_pri, int *pri)
31128 {
31129 int tmp, off_val;
31130 bool is_load;
31131 rtx base, offset;
31132
31133 gcc_assert (INSN_P (insn));
31134
31135 tmp = max_pri - 1;
31136 if (!fusion_load_store (insn, &base, &offset, &is_load))
31137 {
31138 *pri = tmp;
31139 *fusion_pri = tmp;
31140 return;
31141 }
31142
31143 /* Load goes first. */
31144 if (is_load)
31145 *fusion_pri = tmp - 1;
31146 else
31147 *fusion_pri = tmp - 2;
31148
31149 tmp /= 2;
31150
31151 /* INSN with smaller base register goes first. */
31152 tmp -= ((REGNO (base) & 0xff) << 20);
31153
31154 /* INSN with smaller offset goes first. */
31155 off_val = (int)(INTVAL (offset));
31156 if (off_val >= 0)
31157 tmp -= (off_val & 0xfffff);
31158 else
31159 tmp += ((- off_val) & 0xfffff);
31160
31161 *pri = tmp;
31162 return;
31163 }
31164
31165
31166 /* Construct and return a PARALLEL RTX vector with elements numbering the
31167 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31168 the vector - from the perspective of the architecture. This does not
31169 line up with GCC's perspective on lane numbers, so we end up with
31170 different masks depending on our target endian-ness. The diagram
31171 below may help. We must draw the distinction when building masks
31172 which select one half of the vector. An instruction selecting
31173 architectural low-lanes for a big-endian target, must be described using
31174 a mask selecting GCC high-lanes.
31175
31176 Big-Endian Little-Endian
31177
31178 GCC 0 1 2 3 3 2 1 0
31179 | x | x | x | x | | x | x | x | x |
31180 Architecture 3 2 1 0 3 2 1 0
31181
31182 Low Mask: { 2, 3 } { 0, 1 }
31183 High Mask: { 0, 1 } { 2, 3 }
31184 */
31185
31186 rtx
31187 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31188 {
31189 int nunits = GET_MODE_NUNITS (mode);
31190 rtvec v = rtvec_alloc (nunits / 2);
31191 int high_base = nunits / 2;
31192 int low_base = 0;
31193 int base;
31194 rtx t1;
31195 int i;
31196
31197 if (BYTES_BIG_ENDIAN)
31198 base = high ? low_base : high_base;
31199 else
31200 base = high ? high_base : low_base;
31201
31202 for (i = 0; i < nunits / 2; i++)
31203 RTVEC_ELT (v, i) = GEN_INT (base + i);
31204
31205 t1 = gen_rtx_PARALLEL (mode, v);
31206 return t1;
31207 }
31208
31209 /* Check OP for validity as a PARALLEL RTX vector with elements
31210 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31211 from the perspective of the architecture. See the diagram above
31212 arm_simd_vect_par_cnst_half_p for more details. */
31213
31214 bool
31215 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31216 bool high)
31217 {
31218 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31219 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31220 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31221 int i = 0;
31222
31223 if (!VECTOR_MODE_P (mode))
31224 return false;
31225
31226 if (count_op != count_ideal)
31227 return false;
31228
31229 for (i = 0; i < count_ideal; i++)
31230 {
31231 rtx elt_op = XVECEXP (op, 0, i);
31232 rtx elt_ideal = XVECEXP (ideal, 0, i);
31233
31234 if (!CONST_INT_P (elt_op)
31235 || INTVAL (elt_ideal) != INTVAL (elt_op))
31236 return false;
31237 }
31238 return true;
31239 }
31240
31241 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31242 in Thumb1. */
31243 static bool
31244 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31245 const_tree)
31246 {
31247 /* For now, we punt and not handle this for TARGET_THUMB1. */
31248 if (vcall_offset && TARGET_THUMB1)
31249 return false;
31250
31251 /* Otherwise ok. */
31252 return true;
31253 }
31254
31255 /* Generate RTL for a conditional branch with rtx comparison CODE in
31256 mode CC_MODE. The destination of the unlikely conditional branch
31257 is LABEL_REF. */
31258
31259 void
31260 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31261 rtx label_ref)
31262 {
31263 rtx x;
31264 x = gen_rtx_fmt_ee (code, VOIDmode,
31265 gen_rtx_REG (cc_mode, CC_REGNUM),
31266 const0_rtx);
31267
31268 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31269 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31270 pc_rtx);
31271 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31272 }
31273
31274 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31275
31276 For pure-code sections there is no letter code for this attribute, so
31277 output all the section flags numerically when this is needed. */
31278
31279 static bool
31280 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31281 {
31282
31283 if (flags & SECTION_ARM_PURECODE)
31284 {
31285 *num = 0x20000000;
31286
31287 if (!(flags & SECTION_DEBUG))
31288 *num |= 0x2;
31289 if (flags & SECTION_EXCLUDE)
31290 *num |= 0x80000000;
31291 if (flags & SECTION_WRITE)
31292 *num |= 0x1;
31293 if (flags & SECTION_CODE)
31294 *num |= 0x4;
31295 if (flags & SECTION_MERGE)
31296 *num |= 0x10;
31297 if (flags & SECTION_STRINGS)
31298 *num |= 0x20;
31299 if (flags & SECTION_TLS)
31300 *num |= 0x400;
31301 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31302 *num |= 0x200;
31303
31304 return true;
31305 }
31306
31307 return false;
31308 }
31309
31310 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31311
31312 If pure-code is passed as an option, make sure all functions are in
31313 sections that have the SHF_ARM_PURECODE attribute. */
31314
31315 static section *
31316 arm_function_section (tree decl, enum node_frequency freq,
31317 bool startup, bool exit)
31318 {
31319 const char * section_name;
31320 section * sec;
31321
31322 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31323 return default_function_section (decl, freq, startup, exit);
31324
31325 if (!target_pure_code)
31326 return default_function_section (decl, freq, startup, exit);
31327
31328
31329 section_name = DECL_SECTION_NAME (decl);
31330
31331 /* If a function is not in a named section then it falls under the 'default'
31332 text section, also known as '.text'. We can preserve previous behavior as
31333 the default text section already has the SHF_ARM_PURECODE section
31334 attribute. */
31335 if (!section_name)
31336 {
31337 section *default_sec = default_function_section (decl, freq, startup,
31338 exit);
31339
31340 /* If default_sec is not null, then it must be a special section like for
31341 example .text.startup. We set the pure-code attribute and return the
31342 same section to preserve existing behavior. */
31343 if (default_sec)
31344 default_sec->common.flags |= SECTION_ARM_PURECODE;
31345 return default_sec;
31346 }
31347
31348 /* Otherwise look whether a section has already been created with
31349 'section_name'. */
31350 sec = get_named_section (decl, section_name, 0);
31351 if (!sec)
31352 /* If that is not the case passing NULL as the section's name to
31353 'get_named_section' will create a section with the declaration's
31354 section name. */
31355 sec = get_named_section (decl, NULL, 0);
31356
31357 /* Set the SHF_ARM_PURECODE attribute. */
31358 sec->common.flags |= SECTION_ARM_PURECODE;
31359
31360 return sec;
31361 }
31362
31363 /* Implements the TARGET_SECTION_FLAGS hook.
31364
31365 If DECL is a function declaration and pure-code is passed as an option
31366 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31367 section's name and RELOC indicates whether the declarations initializer may
31368 contain runtime relocations. */
31369
31370 static unsigned int
31371 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31372 {
31373 unsigned int flags = default_section_type_flags (decl, name, reloc);
31374
31375 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31376 flags |= SECTION_ARM_PURECODE;
31377
31378 return flags;
31379 }
31380
31381 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31382
31383 static void
31384 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31385 rtx op0, rtx op1,
31386 rtx *quot_p, rtx *rem_p)
31387 {
31388 if (mode == SImode)
31389 gcc_assert (!TARGET_IDIV);
31390
31391 scalar_int_mode libval_mode
31392 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31393
31394 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31395 libval_mode,
31396 op0, GET_MODE (op0),
31397 op1, GET_MODE (op1));
31398
31399 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31400 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31401 GET_MODE_SIZE (mode));
31402
31403 gcc_assert (quotient);
31404 gcc_assert (remainder);
31405
31406 *quot_p = quotient;
31407 *rem_p = remainder;
31408 }
31409
31410 /* This function checks for the availability of the coprocessor builtin passed
31411 in BUILTIN for the current target. Returns true if it is available and
31412 false otherwise. If a BUILTIN is passed for which this function has not
31413 been implemented it will cause an exception. */
31414
31415 bool
31416 arm_coproc_builtin_available (enum unspecv builtin)
31417 {
31418 /* None of these builtins are available in Thumb mode if the target only
31419 supports Thumb-1. */
31420 if (TARGET_THUMB1)
31421 return false;
31422
31423 switch (builtin)
31424 {
31425 case VUNSPEC_CDP:
31426 case VUNSPEC_LDC:
31427 case VUNSPEC_LDCL:
31428 case VUNSPEC_STC:
31429 case VUNSPEC_STCL:
31430 case VUNSPEC_MCR:
31431 case VUNSPEC_MRC:
31432 if (arm_arch4)
31433 return true;
31434 break;
31435 case VUNSPEC_CDP2:
31436 case VUNSPEC_LDC2:
31437 case VUNSPEC_LDC2L:
31438 case VUNSPEC_STC2:
31439 case VUNSPEC_STC2L:
31440 case VUNSPEC_MCR2:
31441 case VUNSPEC_MRC2:
31442 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31443 ARMv8-{A,M}. */
31444 if (arm_arch5)
31445 return true;
31446 break;
31447 case VUNSPEC_MCRR:
31448 case VUNSPEC_MRRC:
31449 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31450 ARMv8-{A,M}. */
31451 if (arm_arch6 || arm_arch5te)
31452 return true;
31453 break;
31454 case VUNSPEC_MCRR2:
31455 case VUNSPEC_MRRC2:
31456 if (arm_arch6)
31457 return true;
31458 break;
31459 default:
31460 gcc_unreachable ();
31461 }
31462 return false;
31463 }
31464
31465 /* This function returns true if OP is a valid memory operand for the ldc and
31466 stc coprocessor instructions and false otherwise. */
31467
31468 bool
31469 arm_coproc_ldc_stc_legitimate_address (rtx op)
31470 {
31471 HOST_WIDE_INT range;
31472 /* Has to be a memory operand. */
31473 if (!MEM_P (op))
31474 return false;
31475
31476 op = XEXP (op, 0);
31477
31478 /* We accept registers. */
31479 if (REG_P (op))
31480 return true;
31481
31482 switch GET_CODE (op)
31483 {
31484 case PLUS:
31485 {
31486 /* Or registers with an offset. */
31487 if (!REG_P (XEXP (op, 0)))
31488 return false;
31489
31490 op = XEXP (op, 1);
31491
31492 /* The offset must be an immediate though. */
31493 if (!CONST_INT_P (op))
31494 return false;
31495
31496 range = INTVAL (op);
31497
31498 /* Within the range of [-1020,1020]. */
31499 if (!IN_RANGE (range, -1020, 1020))
31500 return false;
31501
31502 /* And a multiple of 4. */
31503 return (range % 4) == 0;
31504 }
31505 case PRE_INC:
31506 case POST_INC:
31507 case PRE_DEC:
31508 case POST_DEC:
31509 return REG_P (XEXP (op, 0));
31510 default:
31511 gcc_unreachable ();
31512 }
31513 return false;
31514 }
31515
31516 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31517
31518 In VFPv1, VFP registers could only be accessed in the mode they were
31519 set, so subregs would be invalid there. However, we don't support
31520 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31521
31522 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31523 VFP registers in little-endian order. We can't describe that accurately to
31524 GCC, so avoid taking subregs of such values.
31525
31526 The only exception is going from a 128-bit to a 64-bit type. In that
31527 case the data layout happens to be consistent for big-endian, so we
31528 explicitly allow that case. */
31529
31530 static bool
31531 arm_can_change_mode_class (machine_mode from, machine_mode to,
31532 reg_class_t rclass)
31533 {
31534 if (TARGET_BIG_END
31535 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31536 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31537 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31538 && reg_classes_intersect_p (VFP_REGS, rclass))
31539 return false;
31540 return true;
31541 }
31542
31543 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31544 strcpy from constants will be faster. */
31545
31546 static HOST_WIDE_INT
31547 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31548 {
31549 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31550 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31551 return MAX (align, BITS_PER_WORD * factor);
31552 return align;
31553 }
31554
31555 #if CHECKING_P
31556 namespace selftest {
31557
31558 /* Scan the static data tables generated by parsecpu.awk looking for
31559 potential issues with the data. We primarily check for
31560 inconsistencies in the option extensions at present (extensions
31561 that duplicate others but aren't marked as aliases). Furthermore,
31562 for correct canonicalization later options must never be a subset
31563 of an earlier option. Any extension should also only specify other
31564 feature bits and never an architecture bit. The architecture is inferred
31565 from the declaration of the extension. */
31566 static void
31567 arm_test_cpu_arch_data (void)
31568 {
31569 const arch_option *arch;
31570 const cpu_option *cpu;
31571 auto_sbitmap target_isa (isa_num_bits);
31572 auto_sbitmap isa1 (isa_num_bits);
31573 auto_sbitmap isa2 (isa_num_bits);
31574
31575 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31576 {
31577 const cpu_arch_extension *ext1, *ext2;
31578
31579 if (arch->common.extensions == NULL)
31580 continue;
31581
31582 arm_initialize_isa (target_isa, arch->common.isa_bits);
31583
31584 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31585 {
31586 if (ext1->alias)
31587 continue;
31588
31589 arm_initialize_isa (isa1, ext1->isa_bits);
31590 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31591 {
31592 if (ext2->alias || ext1->remove != ext2->remove)
31593 continue;
31594
31595 arm_initialize_isa (isa2, ext2->isa_bits);
31596 /* If the option is a subset of the parent option, it doesn't
31597 add anything and so isn't useful. */
31598 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31599
31600 /* If the extension specifies any architectural bits then
31601 disallow it. Extensions should only specify feature bits. */
31602 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31603 }
31604 }
31605 }
31606
31607 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31608 {
31609 const cpu_arch_extension *ext1, *ext2;
31610
31611 if (cpu->common.extensions == NULL)
31612 continue;
31613
31614 arm_initialize_isa (target_isa, arch->common.isa_bits);
31615
31616 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31617 {
31618 if (ext1->alias)
31619 continue;
31620
31621 arm_initialize_isa (isa1, ext1->isa_bits);
31622 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31623 {
31624 if (ext2->alias || ext1->remove != ext2->remove)
31625 continue;
31626
31627 arm_initialize_isa (isa2, ext2->isa_bits);
31628 /* If the option is a subset of the parent option, it doesn't
31629 add anything and so isn't useful. */
31630 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31631
31632 /* If the extension specifies any architectural bits then
31633 disallow it. Extensions should only specify feature bits. */
31634 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31635 }
31636 }
31637 }
31638 }
31639
31640 /* Scan the static data tables generated by parsecpu.awk looking for
31641 potential issues with the data. Here we check for consistency between the
31642 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31643 a feature bit that is not defined by any FPU flag. */
31644 static void
31645 arm_test_fpu_data (void)
31646 {
31647 auto_sbitmap isa_all_fpubits (isa_num_bits);
31648 auto_sbitmap fpubits (isa_num_bits);
31649 auto_sbitmap tmpset (isa_num_bits);
31650
31651 static const enum isa_feature fpu_bitlist[]
31652 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31653 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31654
31655 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31656 {
31657 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31658 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31659 bitmap_clear (isa_all_fpubits);
31660 bitmap_copy (isa_all_fpubits, tmpset);
31661 }
31662
31663 if (!bitmap_empty_p (isa_all_fpubits))
31664 {
31665 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31666 " group that are not defined by any FPU.\n"
31667 " Check your arm-cpus.in.\n");
31668 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31669 }
31670 }
31671
31672 static void
31673 arm_run_selftests (void)
31674 {
31675 arm_test_cpu_arch_data ();
31676 arm_test_fpu_data ();
31677 }
31678 } /* Namespace selftest. */
31679
31680 #undef TARGET_RUN_TARGET_SELFTESTS
31681 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31682 #endif /* CHECKING_P */
31683
31684 struct gcc_target targetm = TARGET_INITIALIZER;
31685
31686 #include "gt-arm.h"