]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
Require wi::to_wide for trees
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
70
71 /* This file should be included last. */
72 #include "target-def.h"
73
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
77
78 void (*arm_lang_output_object_attributes_hook)(void);
79
80 struct four_ints
81 {
82 int i[4];
83 };
84
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
192 const_tree);
193 static rtx aapcs_libcall_value (machine_mode);
194 static int aapcs_select_return_coproc (const_tree, const_tree);
195
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
199 #endif
200 #ifndef ARM_PE
201 static void arm_encode_section_info (tree, rtx, int);
202 #endif
203
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree, tree *);
207
208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 #endif
223 static void arm_asm_init_sections (void);
224 static rtx arm_dwarf_register_span (rtx);
225
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
241 static void arm_option_restore (struct gcc_options *,
242 struct cl_target_option *);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option *);
245 static void arm_set_current_function (tree);
246 static bool arm_can_inline_p (tree, tree);
247 static void arm_relayout_function (tree);
248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
250 static bool arm_sched_can_speculate_insn (rtx_insn *);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static tree arm_promoted_type (const_tree t);
261 static bool arm_scalar_mode_supported_p (scalar_mode);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx, tree, rtx);
266 static rtx arm_trampoline_adjust_address (rtx);
267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool arm_array_mode_supported_p (machine_mode,
272 unsigned HOST_WIDE_INT);
273 static machine_mode arm_preferred_simd_mode (scalar_mode);
274 static bool arm_class_likely_spilled_p (reg_class_t);
275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
278 const_tree type,
279 int misalignment,
280 bool is_packed);
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
289
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
291
292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
293
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
302
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
306
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
309 const_tree);
310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
313 int reloc);
314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
320 \f
321 /* Table of machine attributes. */
322 static const struct attribute_spec arm_attribute_table[] =
323 {
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
325 affects_type_identity } */
326 /* Function calls made to this symbol must be done indirectly, because
327 it may lie outside of the 26 bit addressing range of a normal function
328 call. */
329 { "long_call", 0, 0, false, true, true, NULL, false },
330 /* Whereas these functions are always known to reside within the 26 bit
331 addressing range. */
332 { "short_call", 0, 0, false, true, true, NULL, false },
333 /* Specify the procedure call conventions for a function. */
334 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
335 false },
336 /* Interrupt Service Routines have special prologue and epilogue requirements. */
337 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
338 false },
339 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
340 false },
341 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
342 false },
343 #ifdef ARM_PE
344 /* ARM/PE has three new attributes:
345 interfacearm - ?
346 dllexport - for exporting a function/variable that will live in a dll
347 dllimport - for importing a function/variable from a dll
348
349 Microsoft allows multiple declspecs in one __declspec, separating
350 them with spaces. We do NOT support this. Instead, use __declspec
351 multiple times.
352 */
353 { "dllimport", 0, 0, true, false, false, NULL, false },
354 { "dllexport", 0, 0, true, false, false, NULL, false },
355 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
356 false },
357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
358 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
360 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
361 false },
362 #endif
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false,
365 arm_handle_cmse_nonsecure_entry, false },
366 { "cmse_nonsecure_call", 0, 0, true, false, false,
367 arm_handle_cmse_nonsecure_call, true },
368 { NULL, 0, 0, false, false, false, NULL, false }
369 };
370 \f
371 /* Initialize the GCC target structure. */
372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
373 #undef TARGET_MERGE_DECL_ATTRIBUTES
374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
375 #endif
376
377 #undef TARGET_LEGITIMIZE_ADDRESS
378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
379
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
382
383 #undef TARGET_INSERT_ATTRIBUTES
384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
385
386 #undef TARGET_ASM_FILE_START
387 #define TARGET_ASM_FILE_START arm_file_start
388 #undef TARGET_ASM_FILE_END
389 #define TARGET_ASM_FILE_END arm_file_end
390
391 #undef TARGET_ASM_ALIGNED_SI_OP
392 #define TARGET_ASM_ALIGNED_SI_OP NULL
393 #undef TARGET_ASM_INTEGER
394 #define TARGET_ASM_INTEGER arm_assemble_integer
395
396 #undef TARGET_PRINT_OPERAND
397 #define TARGET_PRINT_OPERAND arm_print_operand
398 #undef TARGET_PRINT_OPERAND_ADDRESS
399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
402
403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
405
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
408
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
411
412 #undef TARGET_CAN_INLINE_P
413 #define TARGET_CAN_INLINE_P arm_can_inline_p
414
415 #undef TARGET_RELAYOUT_FUNCTION
416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
417
418 #undef TARGET_OPTION_OVERRIDE
419 #define TARGET_OPTION_OVERRIDE arm_option_override
420
421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
423
424 #undef TARGET_OPTION_SAVE
425 #define TARGET_OPTION_SAVE arm_option_save
426
427 #undef TARGET_OPTION_RESTORE
428 #define TARGET_OPTION_RESTORE arm_option_restore
429
430 #undef TARGET_OPTION_PRINT
431 #define TARGET_OPTION_PRINT arm_option_print
432
433 #undef TARGET_COMP_TYPE_ATTRIBUTES
434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
435
436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
438
439 #undef TARGET_SCHED_MACRO_FUSION_P
440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
441
442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
444
445 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
447
448 #undef TARGET_SCHED_ADJUST_COST
449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
450
451 #undef TARGET_SET_CURRENT_FUNCTION
452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
453
454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
456
457 #undef TARGET_SCHED_REORDER
458 #define TARGET_SCHED_REORDER arm_sched_reorder
459
460 #undef TARGET_REGISTER_MOVE_COST
461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
462
463 #undef TARGET_MEMORY_MOVE_COST
464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
465
466 #undef TARGET_ENCODE_SECTION_INFO
467 #ifdef ARM_PE
468 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
469 #else
470 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
471 #endif
472
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
475
476 #undef TARGET_ASM_INTERNAL_LABEL
477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
478
479 #undef TARGET_FLOATN_MODE
480 #define TARGET_FLOATN_MODE arm_floatn_mode
481
482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
484
485 #undef TARGET_FUNCTION_VALUE
486 #define TARGET_FUNCTION_VALUE arm_function_value
487
488 #undef TARGET_LIBCALL_VALUE
489 #define TARGET_LIBCALL_VALUE arm_libcall_value
490
491 #undef TARGET_FUNCTION_VALUE_REGNO_P
492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
493
494 #undef TARGET_ASM_OUTPUT_MI_THUNK
495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
498
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS arm_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST arm_address_cost
503
504 #undef TARGET_SHIFT_TRUNCATION_MASK
505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
514 arm_autovectorize_vector_sizes
515
516 #undef TARGET_MACHINE_DEPENDENT_REORG
517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
518
519 #undef TARGET_INIT_BUILTINS
520 #define TARGET_INIT_BUILTINS arm_init_builtins
521 #undef TARGET_EXPAND_BUILTIN
522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
523 #undef TARGET_BUILTIN_DECL
524 #define TARGET_BUILTIN_DECL arm_builtin_decl
525
526 #undef TARGET_INIT_LIBFUNCS
527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
528
529 #undef TARGET_PROMOTE_FUNCTION_MODE
530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
531 #undef TARGET_PROMOTE_PROTOTYPES
532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
535 #undef TARGET_ARG_PARTIAL_BYTES
536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
537 #undef TARGET_FUNCTION_ARG
538 #define TARGET_FUNCTION_ARG arm_function_arg
539 #undef TARGET_FUNCTION_ARG_ADVANCE
540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
541 #undef TARGET_FUNCTION_ARG_PADDING
542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
543 #undef TARGET_FUNCTION_ARG_BOUNDARY
544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
545
546 #undef TARGET_SETUP_INCOMING_VARARGS
547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
548
549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
551
552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
554 #undef TARGET_TRAMPOLINE_INIT
555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
558
559 #undef TARGET_WARN_FUNC_RETURN
560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
561
562 #undef TARGET_DEFAULT_SHORT_ENUMS
563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
564
565 #undef TARGET_ALIGN_ANON_BITFIELD
566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
567
568 #undef TARGET_NARROW_VOLATILE_BITFIELD
569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
570
571 #undef TARGET_CXX_GUARD_TYPE
572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
573
574 #undef TARGET_CXX_GUARD_MASK_BIT
575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
576
577 #undef TARGET_CXX_GET_COOKIE_SIZE
578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
579
580 #undef TARGET_CXX_COOKIE_HAS_SIZE
581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
582
583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
585
586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
588
589 #undef TARGET_CXX_USE_AEABI_ATEXIT
590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
591
592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
594 arm_cxx_determine_class_data_visibility
595
596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
598
599 #undef TARGET_RETURN_IN_MSB
600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
601
602 #undef TARGET_RETURN_IN_MEMORY
603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
604
605 #undef TARGET_MUST_PASS_IN_STACK
606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
607
608 #if ARM_UNWIND_INFO
609 #undef TARGET_ASM_UNWIND_EMIT
610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
611
612 /* EABI unwinding tables use a different format for the typeinfo tables. */
613 #undef TARGET_ASM_TTYPE
614 #define TARGET_ASM_TTYPE arm_output_ttype
615
616 #undef TARGET_ARM_EABI_UNWINDER
617 #define TARGET_ARM_EABI_UNWINDER true
618
619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
621
622 #endif /* ARM_UNWIND_INFO */
623
624 #undef TARGET_ASM_INIT_SECTIONS
625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
626
627 #undef TARGET_DWARF_REGISTER_SPAN
628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
629
630 #undef TARGET_CANNOT_COPY_INSN_P
631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
632
633 #ifdef HAVE_AS_TLS
634 #undef TARGET_HAVE_TLS
635 #define TARGET_HAVE_TLS true
636 #endif
637
638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
640
641 #undef TARGET_LEGITIMATE_CONSTANT_P
642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
643
644 #undef TARGET_CANNOT_FORCE_CONST_MEM
645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
646
647 #undef TARGET_MAX_ANCHOR_OFFSET
648 #define TARGET_MAX_ANCHOR_OFFSET 4095
649
650 /* The minimum is set such that the total size of the block
651 for a particular anchor is -4088 + 1 + 4095 bytes, which is
652 divisible by eight, ensuring natural spacing of anchors. */
653 #undef TARGET_MIN_ANCHOR_OFFSET
654 #define TARGET_MIN_ANCHOR_OFFSET -4088
655
656 #undef TARGET_SCHED_ISSUE_RATE
657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
658
659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
661 arm_first_cycle_multipass_dfa_lookahead
662
663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
665 arm_first_cycle_multipass_dfa_lookahead_guard
666
667 #undef TARGET_MANGLE_TYPE
668 #define TARGET_MANGLE_TYPE arm_mangle_type
669
670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
672
673 #undef TARGET_BUILD_BUILTIN_VA_LIST
674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
675 #undef TARGET_EXPAND_BUILTIN_VA_START
676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
679
680 #ifdef HAVE_AS_TLS
681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
683 #endif
684
685 #undef TARGET_LEGITIMATE_ADDRESS_P
686 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
687
688 #undef TARGET_PREFERRED_RELOAD_CLASS
689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
690
691 #undef TARGET_PROMOTED_TYPE
692 #define TARGET_PROMOTED_TYPE arm_promoted_type
693
694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
696
697 #undef TARGET_COMPUTE_FRAME_LAYOUT
698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
699
700 #undef TARGET_FRAME_POINTER_REQUIRED
701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
702
703 #undef TARGET_CAN_ELIMINATE
704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
705
706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
708
709 #undef TARGET_CLASS_LIKELY_SPILLED_P
710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
711
712 #undef TARGET_VECTORIZE_BUILTINS
713 #define TARGET_VECTORIZE_BUILTINS
714
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
717 arm_builtin_vectorized_function
718
719 #undef TARGET_VECTOR_ALIGNMENT
720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
721
722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
724 arm_vector_alignment_reachable
725
726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
728 arm_builtin_support_vector_misalignment
729
730 #undef TARGET_PREFERRED_RENAME_CLASS
731 #define TARGET_PREFERRED_RENAME_CLASS \
732 arm_preferred_rename_class
733
734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
736 arm_vectorize_vec_perm_const_ok
737
738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
740 arm_builtin_vectorization_cost
741 #undef TARGET_VECTORIZE_ADD_STMT_COST
742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
743
744 #undef TARGET_CANONICALIZE_COMPARISON
745 #define TARGET_CANONICALIZE_COMPARISON \
746 arm_canonicalize_comparison
747
748 #undef TARGET_ASAN_SHADOW_OFFSET
749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
750
751 #undef MAX_INSN_PER_IT_BLOCK
752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
753
754 #undef TARGET_CAN_USE_DOLOOP_P
755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
756
757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
759
760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
762
763 #undef TARGET_SCHED_FUSION_PRIORITY
764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
765
766 #undef TARGET_ASM_FUNCTION_SECTION
767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
768
769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
771
772 #undef TARGET_SECTION_TYPE_FLAGS
773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
774
775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
777
778 #undef TARGET_C_EXCESS_PRECISION
779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
780
781 /* Although the architecture reserves bits 0 and 1, only the former is
782 used for ARM/Thumb ISA selection in v7 and earlier versions. */
783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
785
786 #undef TARGET_FIXED_CONDITION_CODE_REGS
787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
788
789 #undef TARGET_HARD_REGNO_NREGS
790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
791 #undef TARGET_HARD_REGNO_MODE_OK
792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
793
794 #undef TARGET_MODES_TIEABLE_P
795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
796
797 #undef TARGET_CAN_CHANGE_MODE_CLASS
798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
799
800 #undef TARGET_CONSTANT_ALIGNMENT
801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
802 \f
803 /* Obstack for minipool constant handling. */
804 static struct obstack minipool_obstack;
805 static char * minipool_startobj;
806
807 /* The maximum number of insns skipped which
808 will be conditionalised if possible. */
809 static int max_insns_skipped = 5;
810
811 extern FILE * asm_out_file;
812
813 /* True if we are currently building a constant table. */
814 int making_const_table;
815
816 /* The processor for which instructions should be scheduled. */
817 enum processor_type arm_tune = TARGET_CPU_arm_none;
818
819 /* The current tuning set. */
820 const struct tune_params *current_tune;
821
822 /* Which floating point hardware to schedule for. */
823 int arm_fpu_attr;
824
825 /* Used for Thumb call_via trampolines. */
826 rtx thumb_call_via_label[14];
827 static int thumb_call_reg_needed;
828
829 /* The bits in this mask specify which instruction scheduling options should
830 be used. */
831 unsigned int tune_flags = 0;
832
833 /* The highest ARM architecture version supported by the
834 target. */
835 enum base_architecture arm_base_arch = BASE_ARCH_0;
836
837 /* Active target architecture and tuning. */
838
839 struct arm_build_target arm_active_target;
840
841 /* The following are used in the arm.md file as equivalents to bits
842 in the above two flag variables. */
843
844 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
845 int arm_arch3m = 0;
846
847 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
848 int arm_arch4 = 0;
849
850 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
851 int arm_arch4t = 0;
852
853 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
854 int arm_arch5 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
857 int arm_arch5e = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
860 int arm_arch5te = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
863 int arm_arch6 = 0;
864
865 /* Nonzero if this chip supports the ARM 6K extensions. */
866 int arm_arch6k = 0;
867
868 /* Nonzero if this chip supports the ARM 6KZ extensions. */
869 int arm_arch6kz = 0;
870
871 /* Nonzero if instructions present in ARMv6-M can be used. */
872 int arm_arch6m = 0;
873
874 /* Nonzero if this chip supports the ARM 7 extensions. */
875 int arm_arch7 = 0;
876
877 /* Nonzero if this chip supports the Large Physical Address Extension. */
878 int arm_arch_lpae = 0;
879
880 /* Nonzero if instructions not present in the 'M' profile can be used. */
881 int arm_arch_notm = 0;
882
883 /* Nonzero if instructions present in ARMv7E-M can be used. */
884 int arm_arch7em = 0;
885
886 /* Nonzero if instructions present in ARMv8 can be used. */
887 int arm_arch8 = 0;
888
889 /* Nonzero if this chip supports the ARMv8.1 extensions. */
890 int arm_arch8_1 = 0;
891
892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
893 int arm_arch8_2 = 0;
894
895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
896 Architecture 8.2. */
897 int arm_fp16_inst = 0;
898
899 /* Nonzero if this chip can benefit from load scheduling. */
900 int arm_ld_sched = 0;
901
902 /* Nonzero if this chip is a StrongARM. */
903 int arm_tune_strongarm = 0;
904
905 /* Nonzero if this chip supports Intel Wireless MMX technology. */
906 int arm_arch_iwmmxt = 0;
907
908 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
909 int arm_arch_iwmmxt2 = 0;
910
911 /* Nonzero if this chip is an XScale. */
912 int arm_arch_xscale = 0;
913
914 /* Nonzero if tuning for XScale */
915 int arm_tune_xscale = 0;
916
917 /* Nonzero if we want to tune for stores that access the write-buffer.
918 This typically means an ARM6 or ARM7 with MMU or MPU. */
919 int arm_tune_wbuf = 0;
920
921 /* Nonzero if tuning for Cortex-A9. */
922 int arm_tune_cortex_a9 = 0;
923
924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
925 preprocessor.
926 XXX This is a bit of a hack, it's intended to help work around
927 problems in GLD which doesn't understand that armv5t code is
928 interworking clean. */
929 int arm_cpp_interwork = 0;
930
931 /* Nonzero if chip supports Thumb 1. */
932 int arm_arch_thumb1;
933
934 /* Nonzero if chip supports Thumb 2. */
935 int arm_arch_thumb2;
936
937 /* Nonzero if chip supports integer division instruction. */
938 int arm_arch_arm_hwdiv;
939 int arm_arch_thumb_hwdiv;
940
941 /* Nonzero if chip disallows volatile memory access in IT block. */
942 int arm_arch_no_volatile_ce;
943
944 /* Nonzero if we should use Neon to handle 64-bits operations rather
945 than core registers. */
946 int prefer_neon_for_64bits = 0;
947
948 /* Nonzero if we shouldn't use literal pools. */
949 bool arm_disable_literal_pool = false;
950
951 /* The register number to be used for the PIC offset register. */
952 unsigned arm_pic_register = INVALID_REGNUM;
953
954 enum arm_pcs arm_pcs_default;
955
956 /* For an explanation of these variables, see final_prescan_insn below. */
957 int arm_ccfsm_state;
958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
959 enum arm_cond_code arm_current_cc;
960
961 rtx arm_target_insn;
962 int arm_target_label;
963 /* The number of conditionally executed insns, including the current insn. */
964 int arm_condexec_count = 0;
965 /* A bitmask specifying the patterns for the IT block.
966 Zero means do not output an IT block before this insn. */
967 int arm_condexec_mask = 0;
968 /* The number of bits used in arm_condexec_mask. */
969 int arm_condexec_masklen = 0;
970
971 /* Nonzero if chip supports the ARMv8 CRC instructions. */
972 int arm_arch_crc = 0;
973
974 /* Nonzero if chip supports the ARMv8-M security extensions. */
975 int arm_arch_cmse = 0;
976
977 /* Nonzero if the core has a very small, high-latency, multiply unit. */
978 int arm_m_profile_small_mul = 0;
979
980 /* The condition codes of the ARM, and the inverse function. */
981 static const char * const arm_condition_codes[] =
982 {
983 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
984 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
985 };
986
987 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
988 int arm_regs_in_sequence[] =
989 {
990 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
991 };
992
993 #define ARM_LSL_NAME "lsl"
994 #define streq(string1, string2) (strcmp (string1, string2) == 0)
995
996 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
997 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
998 | (1 << PIC_OFFSET_TABLE_REGNUM)))
999 \f
1000 /* Initialization code. */
1001
1002 struct cpu_tune
1003 {
1004 enum processor_type scheduler;
1005 unsigned int tune_flags;
1006 const struct tune_params *tune;
1007 };
1008
1009 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1010 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1011 { \
1012 num_slots, \
1013 l1_size, \
1014 l1_line_size \
1015 }
1016
1017 /* arm generic vectorizer costs. */
1018 static const
1019 struct cpu_vec_costs arm_default_vec_cost = {
1020 1, /* scalar_stmt_cost. */
1021 1, /* scalar load_cost. */
1022 1, /* scalar_store_cost. */
1023 1, /* vec_stmt_cost. */
1024 1, /* vec_to_scalar_cost. */
1025 1, /* scalar_to_vec_cost. */
1026 1, /* vec_align_load_cost. */
1027 1, /* vec_unalign_load_cost. */
1028 1, /* vec_unalign_store_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1032 };
1033
1034 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1035 #include "aarch-cost-tables.h"
1036
1037
1038
1039 const struct cpu_cost_table cortexa9_extra_costs =
1040 {
1041 /* ALU */
1042 {
1043 0, /* arith. */
1044 0, /* logical. */
1045 0, /* shift. */
1046 COSTS_N_INSNS (1), /* shift_reg. */
1047 COSTS_N_INSNS (1), /* arith_shift. */
1048 COSTS_N_INSNS (2), /* arith_shift_reg. */
1049 0, /* log_shift. */
1050 COSTS_N_INSNS (1), /* log_shift_reg. */
1051 COSTS_N_INSNS (1), /* extend. */
1052 COSTS_N_INSNS (2), /* extend_arith. */
1053 COSTS_N_INSNS (1), /* bfi. */
1054 COSTS_N_INSNS (1), /* bfx. */
1055 0, /* clz. */
1056 0, /* rev. */
1057 0, /* non_exec. */
1058 true /* non_exec_costs_exec. */
1059 },
1060 {
1061 /* MULT SImode */
1062 {
1063 COSTS_N_INSNS (3), /* simple. */
1064 COSTS_N_INSNS (3), /* flag_setting. */
1065 COSTS_N_INSNS (2), /* extend. */
1066 COSTS_N_INSNS (3), /* add. */
1067 COSTS_N_INSNS (2), /* extend_add. */
1068 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1069 },
1070 /* MULT DImode */
1071 {
1072 0, /* simple (N/A). */
1073 0, /* flag_setting (N/A). */
1074 COSTS_N_INSNS (4), /* extend. */
1075 0, /* add (N/A). */
1076 COSTS_N_INSNS (4), /* extend_add. */
1077 0 /* idiv (N/A). */
1078 }
1079 },
1080 /* LD/ST */
1081 {
1082 COSTS_N_INSNS (2), /* load. */
1083 COSTS_N_INSNS (2), /* load_sign_extend. */
1084 COSTS_N_INSNS (2), /* ldrd. */
1085 COSTS_N_INSNS (2), /* ldm_1st. */
1086 1, /* ldm_regs_per_insn_1st. */
1087 2, /* ldm_regs_per_insn_subsequent. */
1088 COSTS_N_INSNS (5), /* loadf. */
1089 COSTS_N_INSNS (5), /* loadd. */
1090 COSTS_N_INSNS (1), /* load_unaligned. */
1091 COSTS_N_INSNS (2), /* store. */
1092 COSTS_N_INSNS (2), /* strd. */
1093 COSTS_N_INSNS (2), /* stm_1st. */
1094 1, /* stm_regs_per_insn_1st. */
1095 2, /* stm_regs_per_insn_subsequent. */
1096 COSTS_N_INSNS (1), /* storef. */
1097 COSTS_N_INSNS (1), /* stored. */
1098 COSTS_N_INSNS (1), /* store_unaligned. */
1099 COSTS_N_INSNS (1), /* loadv. */
1100 COSTS_N_INSNS (1) /* storev. */
1101 },
1102 {
1103 /* FP SFmode */
1104 {
1105 COSTS_N_INSNS (14), /* div. */
1106 COSTS_N_INSNS (4), /* mult. */
1107 COSTS_N_INSNS (7), /* mult_addsub. */
1108 COSTS_N_INSNS (30), /* fma. */
1109 COSTS_N_INSNS (3), /* addsub. */
1110 COSTS_N_INSNS (1), /* fpconst. */
1111 COSTS_N_INSNS (1), /* neg. */
1112 COSTS_N_INSNS (3), /* compare. */
1113 COSTS_N_INSNS (3), /* widen. */
1114 COSTS_N_INSNS (3), /* narrow. */
1115 COSTS_N_INSNS (3), /* toint. */
1116 COSTS_N_INSNS (3), /* fromint. */
1117 COSTS_N_INSNS (3) /* roundint. */
1118 },
1119 /* FP DFmode */
1120 {
1121 COSTS_N_INSNS (24), /* div. */
1122 COSTS_N_INSNS (5), /* mult. */
1123 COSTS_N_INSNS (8), /* mult_addsub. */
1124 COSTS_N_INSNS (30), /* fma. */
1125 COSTS_N_INSNS (3), /* addsub. */
1126 COSTS_N_INSNS (1), /* fpconst. */
1127 COSTS_N_INSNS (1), /* neg. */
1128 COSTS_N_INSNS (3), /* compare. */
1129 COSTS_N_INSNS (3), /* widen. */
1130 COSTS_N_INSNS (3), /* narrow. */
1131 COSTS_N_INSNS (3), /* toint. */
1132 COSTS_N_INSNS (3), /* fromint. */
1133 COSTS_N_INSNS (3) /* roundint. */
1134 }
1135 },
1136 /* Vector */
1137 {
1138 COSTS_N_INSNS (1) /* alu. */
1139 }
1140 };
1141
1142 const struct cpu_cost_table cortexa8_extra_costs =
1143 {
1144 /* ALU */
1145 {
1146 0, /* arith. */
1147 0, /* logical. */
1148 COSTS_N_INSNS (1), /* shift. */
1149 0, /* shift_reg. */
1150 COSTS_N_INSNS (1), /* arith_shift. */
1151 0, /* arith_shift_reg. */
1152 COSTS_N_INSNS (1), /* log_shift. */
1153 0, /* log_shift_reg. */
1154 0, /* extend. */
1155 0, /* extend_arith. */
1156 0, /* bfi. */
1157 0, /* bfx. */
1158 0, /* clz. */
1159 0, /* rev. */
1160 0, /* non_exec. */
1161 true /* non_exec_costs_exec. */
1162 },
1163 {
1164 /* MULT SImode */
1165 {
1166 COSTS_N_INSNS (1), /* simple. */
1167 COSTS_N_INSNS (1), /* flag_setting. */
1168 COSTS_N_INSNS (1), /* extend. */
1169 COSTS_N_INSNS (1), /* add. */
1170 COSTS_N_INSNS (1), /* extend_add. */
1171 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1172 },
1173 /* MULT DImode */
1174 {
1175 0, /* simple (N/A). */
1176 0, /* flag_setting (N/A). */
1177 COSTS_N_INSNS (2), /* extend. */
1178 0, /* add (N/A). */
1179 COSTS_N_INSNS (2), /* extend_add. */
1180 0 /* idiv (N/A). */
1181 }
1182 },
1183 /* LD/ST */
1184 {
1185 COSTS_N_INSNS (1), /* load. */
1186 COSTS_N_INSNS (1), /* load_sign_extend. */
1187 COSTS_N_INSNS (1), /* ldrd. */
1188 COSTS_N_INSNS (1), /* ldm_1st. */
1189 1, /* ldm_regs_per_insn_1st. */
1190 2, /* ldm_regs_per_insn_subsequent. */
1191 COSTS_N_INSNS (1), /* loadf. */
1192 COSTS_N_INSNS (1), /* loadd. */
1193 COSTS_N_INSNS (1), /* load_unaligned. */
1194 COSTS_N_INSNS (1), /* store. */
1195 COSTS_N_INSNS (1), /* strd. */
1196 COSTS_N_INSNS (1), /* stm_1st. */
1197 1, /* stm_regs_per_insn_1st. */
1198 2, /* stm_regs_per_insn_subsequent. */
1199 COSTS_N_INSNS (1), /* storef. */
1200 COSTS_N_INSNS (1), /* stored. */
1201 COSTS_N_INSNS (1), /* store_unaligned. */
1202 COSTS_N_INSNS (1), /* loadv. */
1203 COSTS_N_INSNS (1) /* storev. */
1204 },
1205 {
1206 /* FP SFmode */
1207 {
1208 COSTS_N_INSNS (36), /* div. */
1209 COSTS_N_INSNS (11), /* mult. */
1210 COSTS_N_INSNS (20), /* mult_addsub. */
1211 COSTS_N_INSNS (30), /* fma. */
1212 COSTS_N_INSNS (9), /* addsub. */
1213 COSTS_N_INSNS (3), /* fpconst. */
1214 COSTS_N_INSNS (3), /* neg. */
1215 COSTS_N_INSNS (6), /* compare. */
1216 COSTS_N_INSNS (4), /* widen. */
1217 COSTS_N_INSNS (4), /* narrow. */
1218 COSTS_N_INSNS (8), /* toint. */
1219 COSTS_N_INSNS (8), /* fromint. */
1220 COSTS_N_INSNS (8) /* roundint. */
1221 },
1222 /* FP DFmode */
1223 {
1224 COSTS_N_INSNS (64), /* div. */
1225 COSTS_N_INSNS (16), /* mult. */
1226 COSTS_N_INSNS (25), /* mult_addsub. */
1227 COSTS_N_INSNS (30), /* fma. */
1228 COSTS_N_INSNS (9), /* addsub. */
1229 COSTS_N_INSNS (3), /* fpconst. */
1230 COSTS_N_INSNS (3), /* neg. */
1231 COSTS_N_INSNS (6), /* compare. */
1232 COSTS_N_INSNS (6), /* widen. */
1233 COSTS_N_INSNS (6), /* narrow. */
1234 COSTS_N_INSNS (8), /* toint. */
1235 COSTS_N_INSNS (8), /* fromint. */
1236 COSTS_N_INSNS (8) /* roundint. */
1237 }
1238 },
1239 /* Vector */
1240 {
1241 COSTS_N_INSNS (1) /* alu. */
1242 }
1243 };
1244
1245 const struct cpu_cost_table cortexa5_extra_costs =
1246 {
1247 /* ALU */
1248 {
1249 0, /* arith. */
1250 0, /* logical. */
1251 COSTS_N_INSNS (1), /* shift. */
1252 COSTS_N_INSNS (1), /* shift_reg. */
1253 COSTS_N_INSNS (1), /* arith_shift. */
1254 COSTS_N_INSNS (1), /* arith_shift_reg. */
1255 COSTS_N_INSNS (1), /* log_shift. */
1256 COSTS_N_INSNS (1), /* log_shift_reg. */
1257 COSTS_N_INSNS (1), /* extend. */
1258 COSTS_N_INSNS (1), /* extend_arith. */
1259 COSTS_N_INSNS (1), /* bfi. */
1260 COSTS_N_INSNS (1), /* bfx. */
1261 COSTS_N_INSNS (1), /* clz. */
1262 COSTS_N_INSNS (1), /* rev. */
1263 0, /* non_exec. */
1264 true /* non_exec_costs_exec. */
1265 },
1266
1267 {
1268 /* MULT SImode */
1269 {
1270 0, /* simple. */
1271 COSTS_N_INSNS (1), /* flag_setting. */
1272 COSTS_N_INSNS (1), /* extend. */
1273 COSTS_N_INSNS (1), /* add. */
1274 COSTS_N_INSNS (1), /* extend_add. */
1275 COSTS_N_INSNS (7) /* idiv. */
1276 },
1277 /* MULT DImode */
1278 {
1279 0, /* simple (N/A). */
1280 0, /* flag_setting (N/A). */
1281 COSTS_N_INSNS (1), /* extend. */
1282 0, /* add. */
1283 COSTS_N_INSNS (2), /* extend_add. */
1284 0 /* idiv (N/A). */
1285 }
1286 },
1287 /* LD/ST */
1288 {
1289 COSTS_N_INSNS (1), /* load. */
1290 COSTS_N_INSNS (1), /* load_sign_extend. */
1291 COSTS_N_INSNS (6), /* ldrd. */
1292 COSTS_N_INSNS (1), /* ldm_1st. */
1293 1, /* ldm_regs_per_insn_1st. */
1294 2, /* ldm_regs_per_insn_subsequent. */
1295 COSTS_N_INSNS (2), /* loadf. */
1296 COSTS_N_INSNS (4), /* loadd. */
1297 COSTS_N_INSNS (1), /* load_unaligned. */
1298 COSTS_N_INSNS (1), /* store. */
1299 COSTS_N_INSNS (3), /* strd. */
1300 COSTS_N_INSNS (1), /* stm_1st. */
1301 1, /* stm_regs_per_insn_1st. */
1302 2, /* stm_regs_per_insn_subsequent. */
1303 COSTS_N_INSNS (2), /* storef. */
1304 COSTS_N_INSNS (2), /* stored. */
1305 COSTS_N_INSNS (1), /* store_unaligned. */
1306 COSTS_N_INSNS (1), /* loadv. */
1307 COSTS_N_INSNS (1) /* storev. */
1308 },
1309 {
1310 /* FP SFmode */
1311 {
1312 COSTS_N_INSNS (15), /* div. */
1313 COSTS_N_INSNS (3), /* mult. */
1314 COSTS_N_INSNS (7), /* mult_addsub. */
1315 COSTS_N_INSNS (7), /* fma. */
1316 COSTS_N_INSNS (3), /* addsub. */
1317 COSTS_N_INSNS (3), /* fpconst. */
1318 COSTS_N_INSNS (3), /* neg. */
1319 COSTS_N_INSNS (3), /* compare. */
1320 COSTS_N_INSNS (3), /* widen. */
1321 COSTS_N_INSNS (3), /* narrow. */
1322 COSTS_N_INSNS (3), /* toint. */
1323 COSTS_N_INSNS (3), /* fromint. */
1324 COSTS_N_INSNS (3) /* roundint. */
1325 },
1326 /* FP DFmode */
1327 {
1328 COSTS_N_INSNS (30), /* div. */
1329 COSTS_N_INSNS (6), /* mult. */
1330 COSTS_N_INSNS (10), /* mult_addsub. */
1331 COSTS_N_INSNS (7), /* fma. */
1332 COSTS_N_INSNS (3), /* addsub. */
1333 COSTS_N_INSNS (3), /* fpconst. */
1334 COSTS_N_INSNS (3), /* neg. */
1335 COSTS_N_INSNS (3), /* compare. */
1336 COSTS_N_INSNS (3), /* widen. */
1337 COSTS_N_INSNS (3), /* narrow. */
1338 COSTS_N_INSNS (3), /* toint. */
1339 COSTS_N_INSNS (3), /* fromint. */
1340 COSTS_N_INSNS (3) /* roundint. */
1341 }
1342 },
1343 /* Vector */
1344 {
1345 COSTS_N_INSNS (1) /* alu. */
1346 }
1347 };
1348
1349
1350 const struct cpu_cost_table cortexa7_extra_costs =
1351 {
1352 /* ALU */
1353 {
1354 0, /* arith. */
1355 0, /* logical. */
1356 COSTS_N_INSNS (1), /* shift. */
1357 COSTS_N_INSNS (1), /* shift_reg. */
1358 COSTS_N_INSNS (1), /* arith_shift. */
1359 COSTS_N_INSNS (1), /* arith_shift_reg. */
1360 COSTS_N_INSNS (1), /* log_shift. */
1361 COSTS_N_INSNS (1), /* log_shift_reg. */
1362 COSTS_N_INSNS (1), /* extend. */
1363 COSTS_N_INSNS (1), /* extend_arith. */
1364 COSTS_N_INSNS (1), /* bfi. */
1365 COSTS_N_INSNS (1), /* bfx. */
1366 COSTS_N_INSNS (1), /* clz. */
1367 COSTS_N_INSNS (1), /* rev. */
1368 0, /* non_exec. */
1369 true /* non_exec_costs_exec. */
1370 },
1371
1372 {
1373 /* MULT SImode */
1374 {
1375 0, /* simple. */
1376 COSTS_N_INSNS (1), /* flag_setting. */
1377 COSTS_N_INSNS (1), /* extend. */
1378 COSTS_N_INSNS (1), /* add. */
1379 COSTS_N_INSNS (1), /* extend_add. */
1380 COSTS_N_INSNS (7) /* idiv. */
1381 },
1382 /* MULT DImode */
1383 {
1384 0, /* simple (N/A). */
1385 0, /* flag_setting (N/A). */
1386 COSTS_N_INSNS (1), /* extend. */
1387 0, /* add. */
1388 COSTS_N_INSNS (2), /* extend_add. */
1389 0 /* idiv (N/A). */
1390 }
1391 },
1392 /* LD/ST */
1393 {
1394 COSTS_N_INSNS (1), /* load. */
1395 COSTS_N_INSNS (1), /* load_sign_extend. */
1396 COSTS_N_INSNS (3), /* ldrd. */
1397 COSTS_N_INSNS (1), /* ldm_1st. */
1398 1, /* ldm_regs_per_insn_1st. */
1399 2, /* ldm_regs_per_insn_subsequent. */
1400 COSTS_N_INSNS (2), /* loadf. */
1401 COSTS_N_INSNS (2), /* loadd. */
1402 COSTS_N_INSNS (1), /* load_unaligned. */
1403 COSTS_N_INSNS (1), /* store. */
1404 COSTS_N_INSNS (3), /* strd. */
1405 COSTS_N_INSNS (1), /* stm_1st. */
1406 1, /* stm_regs_per_insn_1st. */
1407 2, /* stm_regs_per_insn_subsequent. */
1408 COSTS_N_INSNS (2), /* storef. */
1409 COSTS_N_INSNS (2), /* stored. */
1410 COSTS_N_INSNS (1), /* store_unaligned. */
1411 COSTS_N_INSNS (1), /* loadv. */
1412 COSTS_N_INSNS (1) /* storev. */
1413 },
1414 {
1415 /* FP SFmode */
1416 {
1417 COSTS_N_INSNS (15), /* div. */
1418 COSTS_N_INSNS (3), /* mult. */
1419 COSTS_N_INSNS (7), /* mult_addsub. */
1420 COSTS_N_INSNS (7), /* fma. */
1421 COSTS_N_INSNS (3), /* addsub. */
1422 COSTS_N_INSNS (3), /* fpconst. */
1423 COSTS_N_INSNS (3), /* neg. */
1424 COSTS_N_INSNS (3), /* compare. */
1425 COSTS_N_INSNS (3), /* widen. */
1426 COSTS_N_INSNS (3), /* narrow. */
1427 COSTS_N_INSNS (3), /* toint. */
1428 COSTS_N_INSNS (3), /* fromint. */
1429 COSTS_N_INSNS (3) /* roundint. */
1430 },
1431 /* FP DFmode */
1432 {
1433 COSTS_N_INSNS (30), /* div. */
1434 COSTS_N_INSNS (6), /* mult. */
1435 COSTS_N_INSNS (10), /* mult_addsub. */
1436 COSTS_N_INSNS (7), /* fma. */
1437 COSTS_N_INSNS (3), /* addsub. */
1438 COSTS_N_INSNS (3), /* fpconst. */
1439 COSTS_N_INSNS (3), /* neg. */
1440 COSTS_N_INSNS (3), /* compare. */
1441 COSTS_N_INSNS (3), /* widen. */
1442 COSTS_N_INSNS (3), /* narrow. */
1443 COSTS_N_INSNS (3), /* toint. */
1444 COSTS_N_INSNS (3), /* fromint. */
1445 COSTS_N_INSNS (3) /* roundint. */
1446 }
1447 },
1448 /* Vector */
1449 {
1450 COSTS_N_INSNS (1) /* alu. */
1451 }
1452 };
1453
1454 const struct cpu_cost_table cortexa12_extra_costs =
1455 {
1456 /* ALU */
1457 {
1458 0, /* arith. */
1459 0, /* logical. */
1460 0, /* shift. */
1461 COSTS_N_INSNS (1), /* shift_reg. */
1462 COSTS_N_INSNS (1), /* arith_shift. */
1463 COSTS_N_INSNS (1), /* arith_shift_reg. */
1464 COSTS_N_INSNS (1), /* log_shift. */
1465 COSTS_N_INSNS (1), /* log_shift_reg. */
1466 0, /* extend. */
1467 COSTS_N_INSNS (1), /* extend_arith. */
1468 0, /* bfi. */
1469 COSTS_N_INSNS (1), /* bfx. */
1470 COSTS_N_INSNS (1), /* clz. */
1471 COSTS_N_INSNS (1), /* rev. */
1472 0, /* non_exec. */
1473 true /* non_exec_costs_exec. */
1474 },
1475 /* MULT SImode */
1476 {
1477 {
1478 COSTS_N_INSNS (2), /* simple. */
1479 COSTS_N_INSNS (3), /* flag_setting. */
1480 COSTS_N_INSNS (2), /* extend. */
1481 COSTS_N_INSNS (3), /* add. */
1482 COSTS_N_INSNS (2), /* extend_add. */
1483 COSTS_N_INSNS (18) /* idiv. */
1484 },
1485 /* MULT DImode */
1486 {
1487 0, /* simple (N/A). */
1488 0, /* flag_setting (N/A). */
1489 COSTS_N_INSNS (3), /* extend. */
1490 0, /* add (N/A). */
1491 COSTS_N_INSNS (3), /* extend_add. */
1492 0 /* idiv (N/A). */
1493 }
1494 },
1495 /* LD/ST */
1496 {
1497 COSTS_N_INSNS (3), /* load. */
1498 COSTS_N_INSNS (3), /* load_sign_extend. */
1499 COSTS_N_INSNS (3), /* ldrd. */
1500 COSTS_N_INSNS (3), /* ldm_1st. */
1501 1, /* ldm_regs_per_insn_1st. */
1502 2, /* ldm_regs_per_insn_subsequent. */
1503 COSTS_N_INSNS (3), /* loadf. */
1504 COSTS_N_INSNS (3), /* loadd. */
1505 0, /* load_unaligned. */
1506 0, /* store. */
1507 0, /* strd. */
1508 0, /* stm_1st. */
1509 1, /* stm_regs_per_insn_1st. */
1510 2, /* stm_regs_per_insn_subsequent. */
1511 COSTS_N_INSNS (2), /* storef. */
1512 COSTS_N_INSNS (2), /* stored. */
1513 0, /* store_unaligned. */
1514 COSTS_N_INSNS (1), /* loadv. */
1515 COSTS_N_INSNS (1) /* storev. */
1516 },
1517 {
1518 /* FP SFmode */
1519 {
1520 COSTS_N_INSNS (17), /* div. */
1521 COSTS_N_INSNS (4), /* mult. */
1522 COSTS_N_INSNS (8), /* mult_addsub. */
1523 COSTS_N_INSNS (8), /* fma. */
1524 COSTS_N_INSNS (4), /* addsub. */
1525 COSTS_N_INSNS (2), /* fpconst. */
1526 COSTS_N_INSNS (2), /* neg. */
1527 COSTS_N_INSNS (2), /* compare. */
1528 COSTS_N_INSNS (4), /* widen. */
1529 COSTS_N_INSNS (4), /* narrow. */
1530 COSTS_N_INSNS (4), /* toint. */
1531 COSTS_N_INSNS (4), /* fromint. */
1532 COSTS_N_INSNS (4) /* roundint. */
1533 },
1534 /* FP DFmode */
1535 {
1536 COSTS_N_INSNS (31), /* div. */
1537 COSTS_N_INSNS (4), /* mult. */
1538 COSTS_N_INSNS (8), /* mult_addsub. */
1539 COSTS_N_INSNS (8), /* fma. */
1540 COSTS_N_INSNS (4), /* addsub. */
1541 COSTS_N_INSNS (2), /* fpconst. */
1542 COSTS_N_INSNS (2), /* neg. */
1543 COSTS_N_INSNS (2), /* compare. */
1544 COSTS_N_INSNS (4), /* widen. */
1545 COSTS_N_INSNS (4), /* narrow. */
1546 COSTS_N_INSNS (4), /* toint. */
1547 COSTS_N_INSNS (4), /* fromint. */
1548 COSTS_N_INSNS (4) /* roundint. */
1549 }
1550 },
1551 /* Vector */
1552 {
1553 COSTS_N_INSNS (1) /* alu. */
1554 }
1555 };
1556
1557 const struct cpu_cost_table cortexa15_extra_costs =
1558 {
1559 /* ALU */
1560 {
1561 0, /* arith. */
1562 0, /* logical. */
1563 0, /* shift. */
1564 0, /* shift_reg. */
1565 COSTS_N_INSNS (1), /* arith_shift. */
1566 COSTS_N_INSNS (1), /* arith_shift_reg. */
1567 COSTS_N_INSNS (1), /* log_shift. */
1568 COSTS_N_INSNS (1), /* log_shift_reg. */
1569 0, /* extend. */
1570 COSTS_N_INSNS (1), /* extend_arith. */
1571 COSTS_N_INSNS (1), /* bfi. */
1572 0, /* bfx. */
1573 0, /* clz. */
1574 0, /* rev. */
1575 0, /* non_exec. */
1576 true /* non_exec_costs_exec. */
1577 },
1578 /* MULT SImode */
1579 {
1580 {
1581 COSTS_N_INSNS (2), /* simple. */
1582 COSTS_N_INSNS (3), /* flag_setting. */
1583 COSTS_N_INSNS (2), /* extend. */
1584 COSTS_N_INSNS (2), /* add. */
1585 COSTS_N_INSNS (2), /* extend_add. */
1586 COSTS_N_INSNS (18) /* idiv. */
1587 },
1588 /* MULT DImode */
1589 {
1590 0, /* simple (N/A). */
1591 0, /* flag_setting (N/A). */
1592 COSTS_N_INSNS (3), /* extend. */
1593 0, /* add (N/A). */
1594 COSTS_N_INSNS (3), /* extend_add. */
1595 0 /* idiv (N/A). */
1596 }
1597 },
1598 /* LD/ST */
1599 {
1600 COSTS_N_INSNS (3), /* load. */
1601 COSTS_N_INSNS (3), /* load_sign_extend. */
1602 COSTS_N_INSNS (3), /* ldrd. */
1603 COSTS_N_INSNS (4), /* ldm_1st. */
1604 1, /* ldm_regs_per_insn_1st. */
1605 2, /* ldm_regs_per_insn_subsequent. */
1606 COSTS_N_INSNS (4), /* loadf. */
1607 COSTS_N_INSNS (4), /* loadd. */
1608 0, /* load_unaligned. */
1609 0, /* store. */
1610 0, /* strd. */
1611 COSTS_N_INSNS (1), /* stm_1st. */
1612 1, /* stm_regs_per_insn_1st. */
1613 2, /* stm_regs_per_insn_subsequent. */
1614 0, /* storef. */
1615 0, /* stored. */
1616 0, /* store_unaligned. */
1617 COSTS_N_INSNS (1), /* loadv. */
1618 COSTS_N_INSNS (1) /* storev. */
1619 },
1620 {
1621 /* FP SFmode */
1622 {
1623 COSTS_N_INSNS (17), /* div. */
1624 COSTS_N_INSNS (4), /* mult. */
1625 COSTS_N_INSNS (8), /* mult_addsub. */
1626 COSTS_N_INSNS (8), /* fma. */
1627 COSTS_N_INSNS (4), /* addsub. */
1628 COSTS_N_INSNS (2), /* fpconst. */
1629 COSTS_N_INSNS (2), /* neg. */
1630 COSTS_N_INSNS (5), /* compare. */
1631 COSTS_N_INSNS (4), /* widen. */
1632 COSTS_N_INSNS (4), /* narrow. */
1633 COSTS_N_INSNS (4), /* toint. */
1634 COSTS_N_INSNS (4), /* fromint. */
1635 COSTS_N_INSNS (4) /* roundint. */
1636 },
1637 /* FP DFmode */
1638 {
1639 COSTS_N_INSNS (31), /* div. */
1640 COSTS_N_INSNS (4), /* mult. */
1641 COSTS_N_INSNS (8), /* mult_addsub. */
1642 COSTS_N_INSNS (8), /* fma. */
1643 COSTS_N_INSNS (4), /* addsub. */
1644 COSTS_N_INSNS (2), /* fpconst. */
1645 COSTS_N_INSNS (2), /* neg. */
1646 COSTS_N_INSNS (2), /* compare. */
1647 COSTS_N_INSNS (4), /* widen. */
1648 COSTS_N_INSNS (4), /* narrow. */
1649 COSTS_N_INSNS (4), /* toint. */
1650 COSTS_N_INSNS (4), /* fromint. */
1651 COSTS_N_INSNS (4) /* roundint. */
1652 }
1653 },
1654 /* Vector */
1655 {
1656 COSTS_N_INSNS (1) /* alu. */
1657 }
1658 };
1659
1660 const struct cpu_cost_table v7m_extra_costs =
1661 {
1662 /* ALU */
1663 {
1664 0, /* arith. */
1665 0, /* logical. */
1666 0, /* shift. */
1667 0, /* shift_reg. */
1668 0, /* arith_shift. */
1669 COSTS_N_INSNS (1), /* arith_shift_reg. */
1670 0, /* log_shift. */
1671 COSTS_N_INSNS (1), /* log_shift_reg. */
1672 0, /* extend. */
1673 COSTS_N_INSNS (1), /* extend_arith. */
1674 0, /* bfi. */
1675 0, /* bfx. */
1676 0, /* clz. */
1677 0, /* rev. */
1678 COSTS_N_INSNS (1), /* non_exec. */
1679 false /* non_exec_costs_exec. */
1680 },
1681 {
1682 /* MULT SImode */
1683 {
1684 COSTS_N_INSNS (1), /* simple. */
1685 COSTS_N_INSNS (1), /* flag_setting. */
1686 COSTS_N_INSNS (2), /* extend. */
1687 COSTS_N_INSNS (1), /* add. */
1688 COSTS_N_INSNS (3), /* extend_add. */
1689 COSTS_N_INSNS (8) /* idiv. */
1690 },
1691 /* MULT DImode */
1692 {
1693 0, /* simple (N/A). */
1694 0, /* flag_setting (N/A). */
1695 COSTS_N_INSNS (2), /* extend. */
1696 0, /* add (N/A). */
1697 COSTS_N_INSNS (3), /* extend_add. */
1698 0 /* idiv (N/A). */
1699 }
1700 },
1701 /* LD/ST */
1702 {
1703 COSTS_N_INSNS (2), /* load. */
1704 0, /* load_sign_extend. */
1705 COSTS_N_INSNS (3), /* ldrd. */
1706 COSTS_N_INSNS (2), /* ldm_1st. */
1707 1, /* ldm_regs_per_insn_1st. */
1708 1, /* ldm_regs_per_insn_subsequent. */
1709 COSTS_N_INSNS (2), /* loadf. */
1710 COSTS_N_INSNS (3), /* loadd. */
1711 COSTS_N_INSNS (1), /* load_unaligned. */
1712 COSTS_N_INSNS (2), /* store. */
1713 COSTS_N_INSNS (3), /* strd. */
1714 COSTS_N_INSNS (2), /* stm_1st. */
1715 1, /* stm_regs_per_insn_1st. */
1716 1, /* stm_regs_per_insn_subsequent. */
1717 COSTS_N_INSNS (2), /* storef. */
1718 COSTS_N_INSNS (3), /* stored. */
1719 COSTS_N_INSNS (1), /* store_unaligned. */
1720 COSTS_N_INSNS (1), /* loadv. */
1721 COSTS_N_INSNS (1) /* storev. */
1722 },
1723 {
1724 /* FP SFmode */
1725 {
1726 COSTS_N_INSNS (7), /* div. */
1727 COSTS_N_INSNS (2), /* mult. */
1728 COSTS_N_INSNS (5), /* mult_addsub. */
1729 COSTS_N_INSNS (3), /* fma. */
1730 COSTS_N_INSNS (1), /* addsub. */
1731 0, /* fpconst. */
1732 0, /* neg. */
1733 0, /* compare. */
1734 0, /* widen. */
1735 0, /* narrow. */
1736 0, /* toint. */
1737 0, /* fromint. */
1738 0 /* roundint. */
1739 },
1740 /* FP DFmode */
1741 {
1742 COSTS_N_INSNS (15), /* div. */
1743 COSTS_N_INSNS (5), /* mult. */
1744 COSTS_N_INSNS (7), /* mult_addsub. */
1745 COSTS_N_INSNS (7), /* fma. */
1746 COSTS_N_INSNS (3), /* addsub. */
1747 0, /* fpconst. */
1748 0, /* neg. */
1749 0, /* compare. */
1750 0, /* widen. */
1751 0, /* narrow. */
1752 0, /* toint. */
1753 0, /* fromint. */
1754 0 /* roundint. */
1755 }
1756 },
1757 /* Vector */
1758 {
1759 COSTS_N_INSNS (1) /* alu. */
1760 }
1761 };
1762
1763 const struct tune_params arm_slowmul_tune =
1764 {
1765 &generic_extra_costs, /* Insn extra costs. */
1766 NULL, /* Sched adj cost. */
1767 arm_default_branch_cost,
1768 &arm_default_vec_cost,
1769 3, /* Constant limit. */
1770 5, /* Max cond insns. */
1771 8, /* Memset max inline. */
1772 1, /* Issue rate. */
1773 ARM_PREFETCH_NOT_BENEFICIAL,
1774 tune_params::PREF_CONST_POOL_TRUE,
1775 tune_params::PREF_LDRD_FALSE,
1776 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1777 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1778 tune_params::DISPARAGE_FLAGS_NEITHER,
1779 tune_params::PREF_NEON_64_FALSE,
1780 tune_params::PREF_NEON_STRINGOPS_FALSE,
1781 tune_params::FUSE_NOTHING,
1782 tune_params::SCHED_AUTOPREF_OFF
1783 };
1784
1785 const struct tune_params arm_fastmul_tune =
1786 {
1787 &generic_extra_costs, /* Insn extra costs. */
1788 NULL, /* Sched adj cost. */
1789 arm_default_branch_cost,
1790 &arm_default_vec_cost,
1791 1, /* Constant limit. */
1792 5, /* Max cond insns. */
1793 8, /* Memset max inline. */
1794 1, /* Issue rate. */
1795 ARM_PREFETCH_NOT_BENEFICIAL,
1796 tune_params::PREF_CONST_POOL_TRUE,
1797 tune_params::PREF_LDRD_FALSE,
1798 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1799 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1800 tune_params::DISPARAGE_FLAGS_NEITHER,
1801 tune_params::PREF_NEON_64_FALSE,
1802 tune_params::PREF_NEON_STRINGOPS_FALSE,
1803 tune_params::FUSE_NOTHING,
1804 tune_params::SCHED_AUTOPREF_OFF
1805 };
1806
1807 /* StrongARM has early execution of branches, so a sequence that is worth
1808 skipping is shorter. Set max_insns_skipped to a lower value. */
1809
1810 const struct tune_params arm_strongarm_tune =
1811 {
1812 &generic_extra_costs, /* Insn extra costs. */
1813 NULL, /* Sched adj cost. */
1814 arm_default_branch_cost,
1815 &arm_default_vec_cost,
1816 1, /* Constant limit. */
1817 3, /* Max cond insns. */
1818 8, /* Memset max inline. */
1819 1, /* Issue rate. */
1820 ARM_PREFETCH_NOT_BENEFICIAL,
1821 tune_params::PREF_CONST_POOL_TRUE,
1822 tune_params::PREF_LDRD_FALSE,
1823 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1824 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1825 tune_params::DISPARAGE_FLAGS_NEITHER,
1826 tune_params::PREF_NEON_64_FALSE,
1827 tune_params::PREF_NEON_STRINGOPS_FALSE,
1828 tune_params::FUSE_NOTHING,
1829 tune_params::SCHED_AUTOPREF_OFF
1830 };
1831
1832 const struct tune_params arm_xscale_tune =
1833 {
1834 &generic_extra_costs, /* Insn extra costs. */
1835 xscale_sched_adjust_cost,
1836 arm_default_branch_cost,
1837 &arm_default_vec_cost,
1838 2, /* Constant limit. */
1839 3, /* Max cond insns. */
1840 8, /* Memset max inline. */
1841 1, /* Issue rate. */
1842 ARM_PREFETCH_NOT_BENEFICIAL,
1843 tune_params::PREF_CONST_POOL_TRUE,
1844 tune_params::PREF_LDRD_FALSE,
1845 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1847 tune_params::DISPARAGE_FLAGS_NEITHER,
1848 tune_params::PREF_NEON_64_FALSE,
1849 tune_params::PREF_NEON_STRINGOPS_FALSE,
1850 tune_params::FUSE_NOTHING,
1851 tune_params::SCHED_AUTOPREF_OFF
1852 };
1853
1854 const struct tune_params arm_9e_tune =
1855 {
1856 &generic_extra_costs, /* Insn extra costs. */
1857 NULL, /* Sched adj cost. */
1858 arm_default_branch_cost,
1859 &arm_default_vec_cost,
1860 1, /* Constant limit. */
1861 5, /* Max cond insns. */
1862 8, /* Memset max inline. */
1863 1, /* Issue rate. */
1864 ARM_PREFETCH_NOT_BENEFICIAL,
1865 tune_params::PREF_CONST_POOL_TRUE,
1866 tune_params::PREF_LDRD_FALSE,
1867 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1868 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1869 tune_params::DISPARAGE_FLAGS_NEITHER,
1870 tune_params::PREF_NEON_64_FALSE,
1871 tune_params::PREF_NEON_STRINGOPS_FALSE,
1872 tune_params::FUSE_NOTHING,
1873 tune_params::SCHED_AUTOPREF_OFF
1874 };
1875
1876 const struct tune_params arm_marvell_pj4_tune =
1877 {
1878 &generic_extra_costs, /* Insn extra costs. */
1879 NULL, /* Sched adj cost. */
1880 arm_default_branch_cost,
1881 &arm_default_vec_cost,
1882 1, /* Constant limit. */
1883 5, /* Max cond insns. */
1884 8, /* Memset max inline. */
1885 2, /* Issue rate. */
1886 ARM_PREFETCH_NOT_BENEFICIAL,
1887 tune_params::PREF_CONST_POOL_TRUE,
1888 tune_params::PREF_LDRD_FALSE,
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1891 tune_params::DISPARAGE_FLAGS_NEITHER,
1892 tune_params::PREF_NEON_64_FALSE,
1893 tune_params::PREF_NEON_STRINGOPS_FALSE,
1894 tune_params::FUSE_NOTHING,
1895 tune_params::SCHED_AUTOPREF_OFF
1896 };
1897
1898 const struct tune_params arm_v6t2_tune =
1899 {
1900 &generic_extra_costs, /* Insn extra costs. */
1901 NULL, /* Sched adj cost. */
1902 arm_default_branch_cost,
1903 &arm_default_vec_cost,
1904 1, /* Constant limit. */
1905 5, /* Max cond insns. */
1906 8, /* Memset max inline. */
1907 1, /* Issue rate. */
1908 ARM_PREFETCH_NOT_BENEFICIAL,
1909 tune_params::PREF_CONST_POOL_FALSE,
1910 tune_params::PREF_LDRD_FALSE,
1911 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1913 tune_params::DISPARAGE_FLAGS_NEITHER,
1914 tune_params::PREF_NEON_64_FALSE,
1915 tune_params::PREF_NEON_STRINGOPS_FALSE,
1916 tune_params::FUSE_NOTHING,
1917 tune_params::SCHED_AUTOPREF_OFF
1918 };
1919
1920
1921 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1922 const struct tune_params arm_cortex_tune =
1923 {
1924 &generic_extra_costs,
1925 NULL, /* Sched adj cost. */
1926 arm_default_branch_cost,
1927 &arm_default_vec_cost,
1928 1, /* Constant limit. */
1929 5, /* Max cond insns. */
1930 8, /* Memset max inline. */
1931 2, /* Issue rate. */
1932 ARM_PREFETCH_NOT_BENEFICIAL,
1933 tune_params::PREF_CONST_POOL_FALSE,
1934 tune_params::PREF_LDRD_FALSE,
1935 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1937 tune_params::DISPARAGE_FLAGS_NEITHER,
1938 tune_params::PREF_NEON_64_FALSE,
1939 tune_params::PREF_NEON_STRINGOPS_FALSE,
1940 tune_params::FUSE_NOTHING,
1941 tune_params::SCHED_AUTOPREF_OFF
1942 };
1943
1944 const struct tune_params arm_cortex_a8_tune =
1945 {
1946 &cortexa8_extra_costs,
1947 NULL, /* Sched adj cost. */
1948 arm_default_branch_cost,
1949 &arm_default_vec_cost,
1950 1, /* Constant limit. */
1951 5, /* Max cond insns. */
1952 8, /* Memset max inline. */
1953 2, /* Issue rate. */
1954 ARM_PREFETCH_NOT_BENEFICIAL,
1955 tune_params::PREF_CONST_POOL_FALSE,
1956 tune_params::PREF_LDRD_FALSE,
1957 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1959 tune_params::DISPARAGE_FLAGS_NEITHER,
1960 tune_params::PREF_NEON_64_FALSE,
1961 tune_params::PREF_NEON_STRINGOPS_TRUE,
1962 tune_params::FUSE_NOTHING,
1963 tune_params::SCHED_AUTOPREF_OFF
1964 };
1965
1966 const struct tune_params arm_cortex_a7_tune =
1967 {
1968 &cortexa7_extra_costs,
1969 NULL, /* Sched adj cost. */
1970 arm_default_branch_cost,
1971 &arm_default_vec_cost,
1972 1, /* Constant limit. */
1973 5, /* Max cond insns. */
1974 8, /* Memset max inline. */
1975 2, /* Issue rate. */
1976 ARM_PREFETCH_NOT_BENEFICIAL,
1977 tune_params::PREF_CONST_POOL_FALSE,
1978 tune_params::PREF_LDRD_FALSE,
1979 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1981 tune_params::DISPARAGE_FLAGS_NEITHER,
1982 tune_params::PREF_NEON_64_FALSE,
1983 tune_params::PREF_NEON_STRINGOPS_TRUE,
1984 tune_params::FUSE_NOTHING,
1985 tune_params::SCHED_AUTOPREF_OFF
1986 };
1987
1988 const struct tune_params arm_cortex_a15_tune =
1989 {
1990 &cortexa15_extra_costs,
1991 NULL, /* Sched adj cost. */
1992 arm_default_branch_cost,
1993 &arm_default_vec_cost,
1994 1, /* Constant limit. */
1995 2, /* Max cond insns. */
1996 8, /* Memset max inline. */
1997 3, /* Issue rate. */
1998 ARM_PREFETCH_NOT_BENEFICIAL,
1999 tune_params::PREF_CONST_POOL_FALSE,
2000 tune_params::PREF_LDRD_TRUE,
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2003 tune_params::DISPARAGE_FLAGS_ALL,
2004 tune_params::PREF_NEON_64_FALSE,
2005 tune_params::PREF_NEON_STRINGOPS_TRUE,
2006 tune_params::FUSE_NOTHING,
2007 tune_params::SCHED_AUTOPREF_FULL
2008 };
2009
2010 const struct tune_params arm_cortex_a35_tune =
2011 {
2012 &cortexa53_extra_costs,
2013 NULL, /* Sched adj cost. */
2014 arm_default_branch_cost,
2015 &arm_default_vec_cost,
2016 1, /* Constant limit. */
2017 5, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 1, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL,
2021 tune_params::PREF_CONST_POOL_FALSE,
2022 tune_params::PREF_LDRD_FALSE,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_NEITHER,
2026 tune_params::PREF_NEON_64_FALSE,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE,
2028 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2029 tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032 const struct tune_params arm_cortex_a53_tune =
2033 {
2034 &cortexa53_extra_costs,
2035 NULL, /* Sched adj cost. */
2036 arm_default_branch_cost,
2037 &arm_default_vec_cost,
2038 1, /* Constant limit. */
2039 5, /* Max cond insns. */
2040 8, /* Memset max inline. */
2041 2, /* Issue rate. */
2042 ARM_PREFETCH_NOT_BENEFICIAL,
2043 tune_params::PREF_CONST_POOL_FALSE,
2044 tune_params::PREF_LDRD_FALSE,
2045 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2047 tune_params::DISPARAGE_FLAGS_NEITHER,
2048 tune_params::PREF_NEON_64_FALSE,
2049 tune_params::PREF_NEON_STRINGOPS_TRUE,
2050 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2051 tune_params::SCHED_AUTOPREF_OFF
2052 };
2053
2054 const struct tune_params arm_cortex_a57_tune =
2055 {
2056 &cortexa57_extra_costs,
2057 NULL, /* Sched adj cost. */
2058 arm_default_branch_cost,
2059 &arm_default_vec_cost,
2060 1, /* Constant limit. */
2061 2, /* Max cond insns. */
2062 8, /* Memset max inline. */
2063 3, /* Issue rate. */
2064 ARM_PREFETCH_NOT_BENEFICIAL,
2065 tune_params::PREF_CONST_POOL_FALSE,
2066 tune_params::PREF_LDRD_TRUE,
2067 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2068 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2069 tune_params::DISPARAGE_FLAGS_ALL,
2070 tune_params::PREF_NEON_64_FALSE,
2071 tune_params::PREF_NEON_STRINGOPS_TRUE,
2072 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2073 tune_params::SCHED_AUTOPREF_FULL
2074 };
2075
2076 const struct tune_params arm_exynosm1_tune =
2077 {
2078 &exynosm1_extra_costs,
2079 NULL, /* Sched adj cost. */
2080 arm_default_branch_cost,
2081 &arm_default_vec_cost,
2082 1, /* Constant limit. */
2083 2, /* Max cond insns. */
2084 8, /* Memset max inline. */
2085 3, /* Issue rate. */
2086 ARM_PREFETCH_NOT_BENEFICIAL,
2087 tune_params::PREF_CONST_POOL_FALSE,
2088 tune_params::PREF_LDRD_TRUE,
2089 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2090 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2091 tune_params::DISPARAGE_FLAGS_ALL,
2092 tune_params::PREF_NEON_64_FALSE,
2093 tune_params::PREF_NEON_STRINGOPS_TRUE,
2094 tune_params::FUSE_NOTHING,
2095 tune_params::SCHED_AUTOPREF_OFF
2096 };
2097
2098 const struct tune_params arm_xgene1_tune =
2099 {
2100 &xgene1_extra_costs,
2101 NULL, /* Sched adj cost. */
2102 arm_default_branch_cost,
2103 &arm_default_vec_cost,
2104 1, /* Constant limit. */
2105 2, /* Max cond insns. */
2106 32, /* Memset max inline. */
2107 4, /* Issue rate. */
2108 ARM_PREFETCH_NOT_BENEFICIAL,
2109 tune_params::PREF_CONST_POOL_FALSE,
2110 tune_params::PREF_LDRD_TRUE,
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2112 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2113 tune_params::DISPARAGE_FLAGS_ALL,
2114 tune_params::PREF_NEON_64_FALSE,
2115 tune_params::PREF_NEON_STRINGOPS_FALSE,
2116 tune_params::FUSE_NOTHING,
2117 tune_params::SCHED_AUTOPREF_OFF
2118 };
2119
2120 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2121 less appealing. Set max_insns_skipped to a low value. */
2122
2123 const struct tune_params arm_cortex_a5_tune =
2124 {
2125 &cortexa5_extra_costs,
2126 NULL, /* Sched adj cost. */
2127 arm_cortex_a5_branch_cost,
2128 &arm_default_vec_cost,
2129 1, /* Constant limit. */
2130 1, /* Max cond insns. */
2131 8, /* Memset max inline. */
2132 2, /* Issue rate. */
2133 ARM_PREFETCH_NOT_BENEFICIAL,
2134 tune_params::PREF_CONST_POOL_FALSE,
2135 tune_params::PREF_LDRD_FALSE,
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2138 tune_params::DISPARAGE_FLAGS_NEITHER,
2139 tune_params::PREF_NEON_64_FALSE,
2140 tune_params::PREF_NEON_STRINGOPS_TRUE,
2141 tune_params::FUSE_NOTHING,
2142 tune_params::SCHED_AUTOPREF_OFF
2143 };
2144
2145 const struct tune_params arm_cortex_a9_tune =
2146 {
2147 &cortexa9_extra_costs,
2148 cortex_a9_sched_adjust_cost,
2149 arm_default_branch_cost,
2150 &arm_default_vec_cost,
2151 1, /* Constant limit. */
2152 5, /* Max cond insns. */
2153 8, /* Memset max inline. */
2154 2, /* Issue rate. */
2155 ARM_PREFETCH_BENEFICIAL(4,32,32),
2156 tune_params::PREF_CONST_POOL_FALSE,
2157 tune_params::PREF_LDRD_FALSE,
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2160 tune_params::DISPARAGE_FLAGS_NEITHER,
2161 tune_params::PREF_NEON_64_FALSE,
2162 tune_params::PREF_NEON_STRINGOPS_FALSE,
2163 tune_params::FUSE_NOTHING,
2164 tune_params::SCHED_AUTOPREF_OFF
2165 };
2166
2167 const struct tune_params arm_cortex_a12_tune =
2168 {
2169 &cortexa12_extra_costs,
2170 NULL, /* Sched adj cost. */
2171 arm_default_branch_cost,
2172 &arm_default_vec_cost, /* Vectorizer costs. */
2173 1, /* Constant limit. */
2174 2, /* Max cond insns. */
2175 8, /* Memset max inline. */
2176 2, /* Issue rate. */
2177 ARM_PREFETCH_NOT_BENEFICIAL,
2178 tune_params::PREF_CONST_POOL_FALSE,
2179 tune_params::PREF_LDRD_TRUE,
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2182 tune_params::DISPARAGE_FLAGS_ALL,
2183 tune_params::PREF_NEON_64_FALSE,
2184 tune_params::PREF_NEON_STRINGOPS_TRUE,
2185 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2186 tune_params::SCHED_AUTOPREF_OFF
2187 };
2188
2189 const struct tune_params arm_cortex_a73_tune =
2190 {
2191 &cortexa57_extra_costs,
2192 NULL, /* Sched adj cost. */
2193 arm_default_branch_cost,
2194 &arm_default_vec_cost, /* Vectorizer costs. */
2195 1, /* Constant limit. */
2196 2, /* Max cond insns. */
2197 8, /* Memset max inline. */
2198 2, /* Issue rate. */
2199 ARM_PREFETCH_NOT_BENEFICIAL,
2200 tune_params::PREF_CONST_POOL_FALSE,
2201 tune_params::PREF_LDRD_TRUE,
2202 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2203 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2204 tune_params::DISPARAGE_FLAGS_ALL,
2205 tune_params::PREF_NEON_64_FALSE,
2206 tune_params::PREF_NEON_STRINGOPS_TRUE,
2207 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2208 tune_params::SCHED_AUTOPREF_FULL
2209 };
2210
2211 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2212 cycle to execute each. An LDR from the constant pool also takes two cycles
2213 to execute, but mildly increases pipelining opportunity (consecutive
2214 loads/stores can be pipelined together, saving one cycle), and may also
2215 improve icache utilisation. Hence we prefer the constant pool for such
2216 processors. */
2217
2218 const struct tune_params arm_v7m_tune =
2219 {
2220 &v7m_extra_costs,
2221 NULL, /* Sched adj cost. */
2222 arm_cortex_m_branch_cost,
2223 &arm_default_vec_cost,
2224 1, /* Constant limit. */
2225 2, /* Max cond insns. */
2226 8, /* Memset max inline. */
2227 1, /* Issue rate. */
2228 ARM_PREFETCH_NOT_BENEFICIAL,
2229 tune_params::PREF_CONST_POOL_TRUE,
2230 tune_params::PREF_LDRD_FALSE,
2231 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2232 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2233 tune_params::DISPARAGE_FLAGS_NEITHER,
2234 tune_params::PREF_NEON_64_FALSE,
2235 tune_params::PREF_NEON_STRINGOPS_FALSE,
2236 tune_params::FUSE_NOTHING,
2237 tune_params::SCHED_AUTOPREF_OFF
2238 };
2239
2240 /* Cortex-M7 tuning. */
2241
2242 const struct tune_params arm_cortex_m7_tune =
2243 {
2244 &v7m_extra_costs,
2245 NULL, /* Sched adj cost. */
2246 arm_cortex_m7_branch_cost,
2247 &arm_default_vec_cost,
2248 0, /* Constant limit. */
2249 1, /* Max cond insns. */
2250 8, /* Memset max inline. */
2251 2, /* Issue rate. */
2252 ARM_PREFETCH_NOT_BENEFICIAL,
2253 tune_params::PREF_CONST_POOL_TRUE,
2254 tune_params::PREF_LDRD_FALSE,
2255 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2256 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2257 tune_params::DISPARAGE_FLAGS_NEITHER,
2258 tune_params::PREF_NEON_64_FALSE,
2259 tune_params::PREF_NEON_STRINGOPS_FALSE,
2260 tune_params::FUSE_NOTHING,
2261 tune_params::SCHED_AUTOPREF_OFF
2262 };
2263
2264 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2265 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2266 cortex-m23. */
2267 const struct tune_params arm_v6m_tune =
2268 {
2269 &generic_extra_costs, /* Insn extra costs. */
2270 NULL, /* Sched adj cost. */
2271 arm_default_branch_cost,
2272 &arm_default_vec_cost, /* Vectorizer costs. */
2273 1, /* Constant limit. */
2274 5, /* Max cond insns. */
2275 8, /* Memset max inline. */
2276 1, /* Issue rate. */
2277 ARM_PREFETCH_NOT_BENEFICIAL,
2278 tune_params::PREF_CONST_POOL_FALSE,
2279 tune_params::PREF_LDRD_FALSE,
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2281 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2282 tune_params::DISPARAGE_FLAGS_NEITHER,
2283 tune_params::PREF_NEON_64_FALSE,
2284 tune_params::PREF_NEON_STRINGOPS_FALSE,
2285 tune_params::FUSE_NOTHING,
2286 tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 const struct tune_params arm_fa726te_tune =
2290 {
2291 &generic_extra_costs, /* Insn extra costs. */
2292 fa726te_sched_adjust_cost,
2293 arm_default_branch_cost,
2294 &arm_default_vec_cost,
2295 1, /* Constant limit. */
2296 5, /* Max cond insns. */
2297 8, /* Memset max inline. */
2298 2, /* Issue rate. */
2299 ARM_PREFETCH_NOT_BENEFICIAL,
2300 tune_params::PREF_CONST_POOL_TRUE,
2301 tune_params::PREF_LDRD_FALSE,
2302 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2303 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2304 tune_params::DISPARAGE_FLAGS_NEITHER,
2305 tune_params::PREF_NEON_64_FALSE,
2306 tune_params::PREF_NEON_STRINGOPS_FALSE,
2307 tune_params::FUSE_NOTHING,
2308 tune_params::SCHED_AUTOPREF_OFF
2309 };
2310
2311 /* Auto-generated CPU, FPU and architecture tables. */
2312 #include "arm-cpu-data.h"
2313
2314 /* The name of the preprocessor macro to define for this architecture. PROFILE
2315 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2316 is thus chosen to be big enough to hold the longest architecture name. */
2317
2318 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2319
2320 /* Supported TLS relocations. */
2321
2322 enum tls_reloc {
2323 TLS_GD32,
2324 TLS_LDM32,
2325 TLS_LDO32,
2326 TLS_IE32,
2327 TLS_LE32,
2328 TLS_DESCSEQ /* GNU scheme */
2329 };
2330
2331 /* The maximum number of insns to be used when loading a constant. */
2332 inline static int
2333 arm_constant_limit (bool size_p)
2334 {
2335 return size_p ? 1 : current_tune->constant_limit;
2336 }
2337
2338 /* Emit an insn that's a simple single-set. Both the operands must be known
2339 to be valid. */
2340 inline static rtx_insn *
2341 emit_set_insn (rtx x, rtx y)
2342 {
2343 return emit_insn (gen_rtx_SET (x, y));
2344 }
2345
2346 /* Return the number of bits set in VALUE. */
2347 static unsigned
2348 bit_count (unsigned long value)
2349 {
2350 unsigned long count = 0;
2351
2352 while (value)
2353 {
2354 count++;
2355 value &= value - 1; /* Clear the least-significant set bit. */
2356 }
2357
2358 return count;
2359 }
2360
2361 /* Return the number of bits set in BMAP. */
2362 static unsigned
2363 bitmap_popcount (const sbitmap bmap)
2364 {
2365 unsigned int count = 0;
2366 unsigned int n = 0;
2367 sbitmap_iterator sbi;
2368
2369 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2370 count++;
2371 return count;
2372 }
2373
2374 typedef struct
2375 {
2376 machine_mode mode;
2377 const char *name;
2378 } arm_fixed_mode_set;
2379
2380 /* A small helper for setting fixed-point library libfuncs. */
2381
2382 static void
2383 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2384 const char *funcname, const char *modename,
2385 int num_suffix)
2386 {
2387 char buffer[50];
2388
2389 if (num_suffix == 0)
2390 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2391 else
2392 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2393
2394 set_optab_libfunc (optable, mode, buffer);
2395 }
2396
2397 static void
2398 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2399 machine_mode from, const char *funcname,
2400 const char *toname, const char *fromname)
2401 {
2402 char buffer[50];
2403 const char *maybe_suffix_2 = "";
2404
2405 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2406 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2407 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2408 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2409 maybe_suffix_2 = "2";
2410
2411 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2412 maybe_suffix_2);
2413
2414 set_conv_libfunc (optable, to, from, buffer);
2415 }
2416
2417 /* Set up library functions unique to ARM. */
2418
2419 static void
2420 arm_init_libfuncs (void)
2421 {
2422 /* For Linux, we have access to kernel support for atomic operations. */
2423 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2424 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2425
2426 /* There are no special library functions unless we are using the
2427 ARM BPABI. */
2428 if (!TARGET_BPABI)
2429 return;
2430
2431 /* The functions below are described in Section 4 of the "Run-Time
2432 ABI for the ARM architecture", Version 1.0. */
2433
2434 /* Double-precision floating-point arithmetic. Table 2. */
2435 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2436 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2437 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2438 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2439 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2440
2441 /* Double-precision comparisons. Table 3. */
2442 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2443 set_optab_libfunc (ne_optab, DFmode, NULL);
2444 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2445 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2446 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2447 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2448 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2449
2450 /* Single-precision floating-point arithmetic. Table 4. */
2451 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2452 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2453 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2454 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2455 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2456
2457 /* Single-precision comparisons. Table 5. */
2458 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2459 set_optab_libfunc (ne_optab, SFmode, NULL);
2460 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2461 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2462 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2463 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2464 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2465
2466 /* Floating-point to integer conversions. Table 6. */
2467 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2468 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2469 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2470 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2471 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2472 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2473 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2474 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2475
2476 /* Conversions between floating types. Table 7. */
2477 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2478 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2479
2480 /* Integer to floating-point conversions. Table 8. */
2481 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2482 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2483 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2484 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2485 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2486 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2487 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2488 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2489
2490 /* Long long. Table 9. */
2491 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2492 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2493 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2494 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2495 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2496 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2497 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2498 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2499
2500 /* Integer (32/32->32) division. \S 4.3.1. */
2501 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2502 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2503
2504 /* The divmod functions are designed so that they can be used for
2505 plain division, even though they return both the quotient and the
2506 remainder. The quotient is returned in the usual location (i.e.,
2507 r0 for SImode, {r0, r1} for DImode), just as would be expected
2508 for an ordinary division routine. Because the AAPCS calling
2509 conventions specify that all of { r0, r1, r2, r3 } are
2510 callee-saved registers, there is no need to tell the compiler
2511 explicitly that those registers are clobbered by these
2512 routines. */
2513 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2514 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2515
2516 /* For SImode division the ABI provides div-without-mod routines,
2517 which are faster. */
2518 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2519 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2520
2521 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2522 divmod libcalls instead. */
2523 set_optab_libfunc (smod_optab, DImode, NULL);
2524 set_optab_libfunc (umod_optab, DImode, NULL);
2525 set_optab_libfunc (smod_optab, SImode, NULL);
2526 set_optab_libfunc (umod_optab, SImode, NULL);
2527
2528 /* Half-precision float operations. The compiler handles all operations
2529 with NULL libfuncs by converting the SFmode. */
2530 switch (arm_fp16_format)
2531 {
2532 case ARM_FP16_FORMAT_IEEE:
2533 case ARM_FP16_FORMAT_ALTERNATIVE:
2534
2535 /* Conversions. */
2536 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2537 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2538 ? "__gnu_f2h_ieee"
2539 : "__gnu_f2h_alternative"));
2540 set_conv_libfunc (sext_optab, SFmode, HFmode,
2541 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2542 ? "__gnu_h2f_ieee"
2543 : "__gnu_h2f_alternative"));
2544
2545 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2546 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2547 ? "__gnu_d2h_ieee"
2548 : "__gnu_d2h_alternative"));
2549
2550 /* Arithmetic. */
2551 set_optab_libfunc (add_optab, HFmode, NULL);
2552 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2553 set_optab_libfunc (smul_optab, HFmode, NULL);
2554 set_optab_libfunc (neg_optab, HFmode, NULL);
2555 set_optab_libfunc (sub_optab, HFmode, NULL);
2556
2557 /* Comparisons. */
2558 set_optab_libfunc (eq_optab, HFmode, NULL);
2559 set_optab_libfunc (ne_optab, HFmode, NULL);
2560 set_optab_libfunc (lt_optab, HFmode, NULL);
2561 set_optab_libfunc (le_optab, HFmode, NULL);
2562 set_optab_libfunc (ge_optab, HFmode, NULL);
2563 set_optab_libfunc (gt_optab, HFmode, NULL);
2564 set_optab_libfunc (unord_optab, HFmode, NULL);
2565 break;
2566
2567 default:
2568 break;
2569 }
2570
2571 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2572 {
2573 const arm_fixed_mode_set fixed_arith_modes[] =
2574 {
2575 { E_QQmode, "qq" },
2576 { E_UQQmode, "uqq" },
2577 { E_HQmode, "hq" },
2578 { E_UHQmode, "uhq" },
2579 { E_SQmode, "sq" },
2580 { E_USQmode, "usq" },
2581 { E_DQmode, "dq" },
2582 { E_UDQmode, "udq" },
2583 { E_TQmode, "tq" },
2584 { E_UTQmode, "utq" },
2585 { E_HAmode, "ha" },
2586 { E_UHAmode, "uha" },
2587 { E_SAmode, "sa" },
2588 { E_USAmode, "usa" },
2589 { E_DAmode, "da" },
2590 { E_UDAmode, "uda" },
2591 { E_TAmode, "ta" },
2592 { E_UTAmode, "uta" }
2593 };
2594 const arm_fixed_mode_set fixed_conv_modes[] =
2595 {
2596 { E_QQmode, "qq" },
2597 { E_UQQmode, "uqq" },
2598 { E_HQmode, "hq" },
2599 { E_UHQmode, "uhq" },
2600 { E_SQmode, "sq" },
2601 { E_USQmode, "usq" },
2602 { E_DQmode, "dq" },
2603 { E_UDQmode, "udq" },
2604 { E_TQmode, "tq" },
2605 { E_UTQmode, "utq" },
2606 { E_HAmode, "ha" },
2607 { E_UHAmode, "uha" },
2608 { E_SAmode, "sa" },
2609 { E_USAmode, "usa" },
2610 { E_DAmode, "da" },
2611 { E_UDAmode, "uda" },
2612 { E_TAmode, "ta" },
2613 { E_UTAmode, "uta" },
2614 { E_QImode, "qi" },
2615 { E_HImode, "hi" },
2616 { E_SImode, "si" },
2617 { E_DImode, "di" },
2618 { E_TImode, "ti" },
2619 { E_SFmode, "sf" },
2620 { E_DFmode, "df" }
2621 };
2622 unsigned int i, j;
2623
2624 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2625 {
2626 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2627 "add", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2629 "ssadd", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2631 "usadd", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2633 "sub", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2635 "sssub", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2637 "ussub", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2639 "mul", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2641 "ssmul", fixed_arith_modes[i].name, 3);
2642 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2643 "usmul", fixed_arith_modes[i].name, 3);
2644 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2645 "div", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2647 "udiv", fixed_arith_modes[i].name, 3);
2648 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2649 "ssdiv", fixed_arith_modes[i].name, 3);
2650 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2651 "usdiv", fixed_arith_modes[i].name, 3);
2652 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2653 "neg", fixed_arith_modes[i].name, 2);
2654 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2655 "ssneg", fixed_arith_modes[i].name, 2);
2656 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2657 "usneg", fixed_arith_modes[i].name, 2);
2658 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2659 "ashl", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2661 "ashr", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2663 "lshr", fixed_arith_modes[i].name, 3);
2664 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2665 "ssashl", fixed_arith_modes[i].name, 3);
2666 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2667 "usashl", fixed_arith_modes[i].name, 3);
2668 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2669 "cmp", fixed_arith_modes[i].name, 2);
2670 }
2671
2672 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2673 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2674 {
2675 if (i == j
2676 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2677 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2678 continue;
2679
2680 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2681 fixed_conv_modes[j].mode, "fract",
2682 fixed_conv_modes[i].name,
2683 fixed_conv_modes[j].name);
2684 arm_set_fixed_conv_libfunc (satfract_optab,
2685 fixed_conv_modes[i].mode,
2686 fixed_conv_modes[j].mode, "satfract",
2687 fixed_conv_modes[i].name,
2688 fixed_conv_modes[j].name);
2689 arm_set_fixed_conv_libfunc (fractuns_optab,
2690 fixed_conv_modes[i].mode,
2691 fixed_conv_modes[j].mode, "fractuns",
2692 fixed_conv_modes[i].name,
2693 fixed_conv_modes[j].name);
2694 arm_set_fixed_conv_libfunc (satfractuns_optab,
2695 fixed_conv_modes[i].mode,
2696 fixed_conv_modes[j].mode, "satfractuns",
2697 fixed_conv_modes[i].name,
2698 fixed_conv_modes[j].name);
2699 }
2700 }
2701
2702 if (TARGET_AAPCS_BASED)
2703 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2704 }
2705
2706 /* On AAPCS systems, this is the "struct __va_list". */
2707 static GTY(()) tree va_list_type;
2708
2709 /* Return the type to use as __builtin_va_list. */
2710 static tree
2711 arm_build_builtin_va_list (void)
2712 {
2713 tree va_list_name;
2714 tree ap_field;
2715
2716 if (!TARGET_AAPCS_BASED)
2717 return std_build_builtin_va_list ();
2718
2719 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2720 defined as:
2721
2722 struct __va_list
2723 {
2724 void *__ap;
2725 };
2726
2727 The C Library ABI further reinforces this definition in \S
2728 4.1.
2729
2730 We must follow this definition exactly. The structure tag
2731 name is visible in C++ mangled names, and thus forms a part
2732 of the ABI. The field name may be used by people who
2733 #include <stdarg.h>. */
2734 /* Create the type. */
2735 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2736 /* Give it the required name. */
2737 va_list_name = build_decl (BUILTINS_LOCATION,
2738 TYPE_DECL,
2739 get_identifier ("__va_list"),
2740 va_list_type);
2741 DECL_ARTIFICIAL (va_list_name) = 1;
2742 TYPE_NAME (va_list_type) = va_list_name;
2743 TYPE_STUB_DECL (va_list_type) = va_list_name;
2744 /* Create the __ap field. */
2745 ap_field = build_decl (BUILTINS_LOCATION,
2746 FIELD_DECL,
2747 get_identifier ("__ap"),
2748 ptr_type_node);
2749 DECL_ARTIFICIAL (ap_field) = 1;
2750 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2751 TYPE_FIELDS (va_list_type) = ap_field;
2752 /* Compute its layout. */
2753 layout_type (va_list_type);
2754
2755 return va_list_type;
2756 }
2757
2758 /* Return an expression of type "void *" pointing to the next
2759 available argument in a variable-argument list. VALIST is the
2760 user-level va_list object, of type __builtin_va_list. */
2761 static tree
2762 arm_extract_valist_ptr (tree valist)
2763 {
2764 if (TREE_TYPE (valist) == error_mark_node)
2765 return error_mark_node;
2766
2767 /* On an AAPCS target, the pointer is stored within "struct
2768 va_list". */
2769 if (TARGET_AAPCS_BASED)
2770 {
2771 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2772 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2773 valist, ap_field, NULL_TREE);
2774 }
2775
2776 return valist;
2777 }
2778
2779 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2780 static void
2781 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2782 {
2783 valist = arm_extract_valist_ptr (valist);
2784 std_expand_builtin_va_start (valist, nextarg);
2785 }
2786
2787 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2788 static tree
2789 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2790 gimple_seq *post_p)
2791 {
2792 valist = arm_extract_valist_ptr (valist);
2793 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2794 }
2795
2796 /* Check any incompatible options that the user has specified. */
2797 static void
2798 arm_option_check_internal (struct gcc_options *opts)
2799 {
2800 int flags = opts->x_target_flags;
2801
2802 /* iWMMXt and NEON are incompatible. */
2803 if (TARGET_IWMMXT
2804 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2805 error ("iWMMXt and NEON are incompatible");
2806
2807 /* Make sure that the processor choice does not conflict with any of the
2808 other command line choices. */
2809 if (TARGET_ARM_P (flags)
2810 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2811 error ("target CPU does not support ARM mode");
2812
2813 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2814 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2815 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2816
2817 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2818 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2819
2820 /* If this target is normally configured to use APCS frames, warn if they
2821 are turned off and debugging is turned on. */
2822 if (TARGET_ARM_P (flags)
2823 && write_symbols != NO_DEBUG
2824 && !TARGET_APCS_FRAME
2825 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2826 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2827
2828 /* iWMMXt unsupported under Thumb mode. */
2829 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2830 error ("iWMMXt unsupported under Thumb mode");
2831
2832 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2833 error ("can not use -mtp=cp15 with 16-bit Thumb");
2834
2835 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2836 {
2837 error ("RTP PIC is incompatible with Thumb");
2838 flag_pic = 0;
2839 }
2840
2841 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2842 with MOVT. */
2843 if ((target_pure_code || target_slow_flash_data)
2844 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2845 {
2846 const char *flag = (target_pure_code ? "-mpure-code" :
2847 "-mslow-flash-data");
2848 error ("%s only supports non-pic code on M-profile targets with the "
2849 "MOVT instruction", flag);
2850 }
2851
2852 }
2853
2854 /* Recompute the global settings depending on target attribute options. */
2855
2856 static void
2857 arm_option_params_internal (void)
2858 {
2859 /* If we are not using the default (ARM mode) section anchor offset
2860 ranges, then set the correct ranges now. */
2861 if (TARGET_THUMB1)
2862 {
2863 /* Thumb-1 LDR instructions cannot have negative offsets.
2864 Permissible positive offset ranges are 5-bit (for byte loads),
2865 6-bit (for halfword loads), or 7-bit (for word loads).
2866 Empirical results suggest a 7-bit anchor range gives the best
2867 overall code size. */
2868 targetm.min_anchor_offset = 0;
2869 targetm.max_anchor_offset = 127;
2870 }
2871 else if (TARGET_THUMB2)
2872 {
2873 /* The minimum is set such that the total size of the block
2874 for a particular anchor is 248 + 1 + 4095 bytes, which is
2875 divisible by eight, ensuring natural spacing of anchors. */
2876 targetm.min_anchor_offset = -248;
2877 targetm.max_anchor_offset = 4095;
2878 }
2879 else
2880 {
2881 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2882 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2883 }
2884
2885 /* Increase the number of conditional instructions with -Os. */
2886 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2887
2888 /* For THUMB2, we limit the conditional sequence to one IT block. */
2889 if (TARGET_THUMB2)
2890 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2891 }
2892
2893 /* True if -mflip-thumb should next add an attribute for the default
2894 mode, false if it should next add an attribute for the opposite mode. */
2895 static GTY(()) bool thumb_flipper;
2896
2897 /* Options after initial target override. */
2898 static GTY(()) tree init_optimize;
2899
2900 static void
2901 arm_override_options_after_change_1 (struct gcc_options *opts)
2902 {
2903 if (opts->x_align_functions <= 0)
2904 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2905 && opts->x_optimize_size ? 2 : 4;
2906 }
2907
2908 /* Implement targetm.override_options_after_change. */
2909
2910 static void
2911 arm_override_options_after_change (void)
2912 {
2913 arm_configure_build_target (&arm_active_target,
2914 TREE_TARGET_OPTION (target_option_default_node),
2915 &global_options_set, false);
2916
2917 arm_override_options_after_change_1 (&global_options);
2918 }
2919
2920 /* Implement TARGET_OPTION_SAVE. */
2921 static void
2922 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2923 {
2924 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2925 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2926 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2927 }
2928
2929 /* Implement TARGET_OPTION_RESTORE. */
2930 static void
2931 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2932 {
2933 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2934 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2935 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2936 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2937 false);
2938 }
2939
2940 /* Reset options between modes that the user has specified. */
2941 static void
2942 arm_option_override_internal (struct gcc_options *opts,
2943 struct gcc_options *opts_set)
2944 {
2945 arm_override_options_after_change_1 (opts);
2946
2947 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2948 {
2949 /* The default is to enable interworking, so this warning message would
2950 be confusing to users who have just compiled with, eg, -march=armv3. */
2951 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2952 opts->x_target_flags &= ~MASK_INTERWORK;
2953 }
2954
2955 if (TARGET_THUMB_P (opts->x_target_flags)
2956 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2957 {
2958 warning (0, "target CPU does not support THUMB instructions");
2959 opts->x_target_flags &= ~MASK_THUMB;
2960 }
2961
2962 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2963 {
2964 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2965 opts->x_target_flags &= ~MASK_APCS_FRAME;
2966 }
2967
2968 /* Callee super interworking implies thumb interworking. Adding
2969 this to the flags here simplifies the logic elsewhere. */
2970 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2971 opts->x_target_flags |= MASK_INTERWORK;
2972
2973 /* need to remember initial values so combinaisons of options like
2974 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2975 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2976
2977 if (! opts_set->x_arm_restrict_it)
2978 opts->x_arm_restrict_it = arm_arch8;
2979
2980 /* ARM execution state and M profile don't have [restrict] IT. */
2981 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2982 opts->x_arm_restrict_it = 0;
2983
2984 /* Enable -munaligned-access by default for
2985 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2986 i.e. Thumb2 and ARM state only.
2987 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2988 - ARMv8 architecture-base processors.
2989
2990 Disable -munaligned-access by default for
2991 - all pre-ARMv6 architecture-based processors
2992 - ARMv6-M architecture-based processors
2993 - ARMv8-M Baseline processors. */
2994
2995 if (! opts_set->x_unaligned_access)
2996 {
2997 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2998 && arm_arch6 && (arm_arch_notm || arm_arch7));
2999 }
3000 else if (opts->x_unaligned_access == 1
3001 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3002 {
3003 warning (0, "target CPU does not support unaligned accesses");
3004 opts->x_unaligned_access = 0;
3005 }
3006
3007 /* Don't warn since it's on by default in -O2. */
3008 if (TARGET_THUMB1_P (opts->x_target_flags))
3009 opts->x_flag_schedule_insns = 0;
3010 else
3011 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3012
3013 /* Disable shrink-wrap when optimizing function for size, since it tends to
3014 generate additional returns. */
3015 if (optimize_function_for_size_p (cfun)
3016 && TARGET_THUMB2_P (opts->x_target_flags))
3017 opts->x_flag_shrink_wrap = false;
3018 else
3019 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3020
3021 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3022 - epilogue_insns - does not accurately model the corresponding insns
3023 emitted in the asm file. In particular, see the comment in thumb_exit
3024 'Find out how many of the (return) argument registers we can corrupt'.
3025 As a consequence, the epilogue may clobber registers without fipa-ra
3026 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3027 TODO: Accurately model clobbers for epilogue_insns and reenable
3028 fipa-ra. */
3029 if (TARGET_THUMB1_P (opts->x_target_flags))
3030 opts->x_flag_ipa_ra = 0;
3031 else
3032 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3033
3034 /* Thumb2 inline assembly code should always use unified syntax.
3035 This will apply to ARM and Thumb1 eventually. */
3036 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3037
3038 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3039 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3040 #endif
3041 }
3042
3043 static sbitmap isa_all_fpubits;
3044 static sbitmap isa_quirkbits;
3045
3046 /* Configure a build target TARGET from the user-specified options OPTS and
3047 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3048 architecture have been specified, but the two are not identical. */
3049 void
3050 arm_configure_build_target (struct arm_build_target *target,
3051 struct cl_target_option *opts,
3052 struct gcc_options *opts_set,
3053 bool warn_compatible)
3054 {
3055 const cpu_option *arm_selected_tune = NULL;
3056 const arch_option *arm_selected_arch = NULL;
3057 const cpu_option *arm_selected_cpu = NULL;
3058 const arm_fpu_desc *arm_selected_fpu = NULL;
3059 const char *tune_opts = NULL;
3060 const char *arch_opts = NULL;
3061 const char *cpu_opts = NULL;
3062
3063 bitmap_clear (target->isa);
3064 target->core_name = NULL;
3065 target->arch_name = NULL;
3066
3067 if (opts_set->x_arm_arch_string)
3068 {
3069 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3070 "-march",
3071 opts->x_arm_arch_string);
3072 arch_opts = strchr (opts->x_arm_arch_string, '+');
3073 }
3074
3075 if (opts_set->x_arm_cpu_string)
3076 {
3077 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3078 opts->x_arm_cpu_string);
3079 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3080 arm_selected_tune = arm_selected_cpu;
3081 /* If taking the tuning from -mcpu, we don't need to rescan the
3082 options for tuning. */
3083 }
3084
3085 if (opts_set->x_arm_tune_string)
3086 {
3087 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3088 opts->x_arm_tune_string);
3089 tune_opts = strchr (opts->x_arm_tune_string, '+');
3090 }
3091
3092 if (arm_selected_arch)
3093 {
3094 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3095 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3096 arch_opts);
3097
3098 if (arm_selected_cpu)
3099 {
3100 auto_sbitmap cpu_isa (isa_num_bits);
3101 auto_sbitmap isa_delta (isa_num_bits);
3102
3103 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3104 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3105 cpu_opts);
3106 bitmap_xor (isa_delta, cpu_isa, target->isa);
3107 /* Ignore any bits that are quirk bits. */
3108 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3109 /* Ignore (for now) any bits that might be set by -mfpu. */
3110 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3111
3112 if (!bitmap_empty_p (isa_delta))
3113 {
3114 if (warn_compatible)
3115 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3116 arm_selected_cpu->common.name,
3117 arm_selected_arch->common.name);
3118 /* -march wins for code generation.
3119 -mcpu wins for default tuning. */
3120 if (!arm_selected_tune)
3121 arm_selected_tune = arm_selected_cpu;
3122
3123 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3124 target->arch_name = arm_selected_arch->common.name;
3125 }
3126 else
3127 {
3128 /* Architecture and CPU are essentially the same.
3129 Prefer the CPU setting. */
3130 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3131 target->core_name = arm_selected_cpu->common.name;
3132 /* Copy the CPU's capabilities, so that we inherit the
3133 appropriate extensions and quirks. */
3134 bitmap_copy (target->isa, cpu_isa);
3135 }
3136 }
3137 else
3138 {
3139 /* Pick a CPU based on the architecture. */
3140 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3141 target->arch_name = arm_selected_arch->common.name;
3142 /* Note: target->core_name is left unset in this path. */
3143 }
3144 }
3145 else if (arm_selected_cpu)
3146 {
3147 target->core_name = arm_selected_cpu->common.name;
3148 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3149 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3150 cpu_opts);
3151 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3152 }
3153 /* If the user did not specify a processor or architecture, choose
3154 one for them. */
3155 else
3156 {
3157 const cpu_option *sel;
3158 auto_sbitmap sought_isa (isa_num_bits);
3159 bitmap_clear (sought_isa);
3160 auto_sbitmap default_isa (isa_num_bits);
3161
3162 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3163 TARGET_CPU_DEFAULT);
3164 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3165 gcc_assert (arm_selected_cpu->common.name);
3166
3167 /* RWE: All of the selection logic below (to the end of this
3168 'if' clause) looks somewhat suspect. It appears to be mostly
3169 there to support forcing thumb support when the default CPU
3170 does not have thumb (somewhat dubious in terms of what the
3171 user might be expecting). I think it should be removed once
3172 support for the pre-thumb era cores is removed. */
3173 sel = arm_selected_cpu;
3174 arm_initialize_isa (default_isa, sel->common.isa_bits);
3175 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3176 cpu_opts);
3177
3178 /* Now check to see if the user has specified any command line
3179 switches that require certain abilities from the cpu. */
3180
3181 if (TARGET_INTERWORK || TARGET_THUMB)
3182 {
3183 bitmap_set_bit (sought_isa, isa_bit_thumb);
3184 bitmap_set_bit (sought_isa, isa_bit_mode32);
3185
3186 /* There are no ARM processors that support both APCS-26 and
3187 interworking. Therefore we forcibly remove MODE26 from
3188 from the isa features here (if it was set), so that the
3189 search below will always be able to find a compatible
3190 processor. */
3191 bitmap_clear_bit (default_isa, isa_bit_mode26);
3192 }
3193
3194 /* If there are such requirements and the default CPU does not
3195 satisfy them, we need to run over the complete list of
3196 cores looking for one that is satisfactory. */
3197 if (!bitmap_empty_p (sought_isa)
3198 && !bitmap_subset_p (sought_isa, default_isa))
3199 {
3200 auto_sbitmap candidate_isa (isa_num_bits);
3201 /* We're only interested in a CPU with at least the
3202 capabilities of the default CPU and the required
3203 additional features. */
3204 bitmap_ior (default_isa, default_isa, sought_isa);
3205
3206 /* Try to locate a CPU type that supports all of the abilities
3207 of the default CPU, plus the extra abilities requested by
3208 the user. */
3209 for (sel = all_cores; sel->common.name != NULL; sel++)
3210 {
3211 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3212 /* An exact match? */
3213 if (bitmap_equal_p (default_isa, candidate_isa))
3214 break;
3215 }
3216
3217 if (sel->common.name == NULL)
3218 {
3219 unsigned current_bit_count = isa_num_bits;
3220 const cpu_option *best_fit = NULL;
3221
3222 /* Ideally we would like to issue an error message here
3223 saying that it was not possible to find a CPU compatible
3224 with the default CPU, but which also supports the command
3225 line options specified by the programmer, and so they
3226 ought to use the -mcpu=<name> command line option to
3227 override the default CPU type.
3228
3229 If we cannot find a CPU that has exactly the
3230 characteristics of the default CPU and the given
3231 command line options we scan the array again looking
3232 for a best match. The best match must have at least
3233 the capabilities of the perfect match. */
3234 for (sel = all_cores; sel->common.name != NULL; sel++)
3235 {
3236 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3237
3238 if (bitmap_subset_p (default_isa, candidate_isa))
3239 {
3240 unsigned count;
3241
3242 bitmap_and_compl (candidate_isa, candidate_isa,
3243 default_isa);
3244 count = bitmap_popcount (candidate_isa);
3245
3246 if (count < current_bit_count)
3247 {
3248 best_fit = sel;
3249 current_bit_count = count;
3250 }
3251 }
3252
3253 gcc_assert (best_fit);
3254 sel = best_fit;
3255 }
3256 }
3257 arm_selected_cpu = sel;
3258 }
3259
3260 /* Now we know the CPU, we can finally initialize the target
3261 structure. */
3262 target->core_name = arm_selected_cpu->common.name;
3263 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3264 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3265 cpu_opts);
3266 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3267 }
3268
3269 gcc_assert (arm_selected_cpu);
3270 gcc_assert (arm_selected_arch);
3271
3272 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3273 {
3274 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3275 auto_sbitmap fpu_bits (isa_num_bits);
3276
3277 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3278 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3279 bitmap_ior (target->isa, target->isa, fpu_bits);
3280 }
3281
3282 if (!arm_selected_tune)
3283 arm_selected_tune = arm_selected_cpu;
3284 else /* Validate the features passed to -mtune. */
3285 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3286
3287 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3288
3289 /* Finish initializing the target structure. */
3290 target->arch_pp_name = arm_selected_arch->arch;
3291 target->base_arch = arm_selected_arch->base_arch;
3292 target->profile = arm_selected_arch->profile;
3293
3294 target->tune_flags = tune_data->tune_flags;
3295 target->tune = tune_data->tune;
3296 target->tune_core = tune_data->scheduler;
3297 }
3298
3299 /* Fix up any incompatible options that the user has specified. */
3300 static void
3301 arm_option_override (void)
3302 {
3303 static const enum isa_feature fpu_bitlist[]
3304 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3305 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3306 cl_target_option opts;
3307
3308 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3309 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3310
3311 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3312 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3313
3314 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3315
3316 if (!global_options_set.x_arm_fpu_index)
3317 {
3318 bool ok;
3319 int fpu_index;
3320
3321 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3322 CL_TARGET);
3323 gcc_assert (ok);
3324 arm_fpu_index = (enum fpu_type) fpu_index;
3325 }
3326
3327 cl_target_option_save (&opts, &global_options);
3328 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3329 true);
3330
3331 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3332 SUBTARGET_OVERRIDE_OPTIONS;
3333 #endif
3334
3335 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3336 arm_base_arch = arm_active_target.base_arch;
3337
3338 arm_tune = arm_active_target.tune_core;
3339 tune_flags = arm_active_target.tune_flags;
3340 current_tune = arm_active_target.tune;
3341
3342 /* TBD: Dwarf info for apcs frame is not handled yet. */
3343 if (TARGET_APCS_FRAME)
3344 flag_shrink_wrap = false;
3345
3346 /* BPABI targets use linker tricks to allow interworking on cores
3347 without thumb support. */
3348 if (TARGET_INTERWORK
3349 && !TARGET_BPABI
3350 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3351 {
3352 warning (0, "target CPU does not support interworking" );
3353 target_flags &= ~MASK_INTERWORK;
3354 }
3355
3356 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3357 {
3358 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3359 target_flags |= MASK_APCS_FRAME;
3360 }
3361
3362 if (TARGET_POKE_FUNCTION_NAME)
3363 target_flags |= MASK_APCS_FRAME;
3364
3365 if (TARGET_APCS_REENT && flag_pic)
3366 error ("-fpic and -mapcs-reent are incompatible");
3367
3368 if (TARGET_APCS_REENT)
3369 warning (0, "APCS reentrant code not supported. Ignored");
3370
3371 /* Initialize boolean versions of the architectural flags, for use
3372 in the arm.md file. */
3373 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3374 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3375 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3376 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3377 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3378 arm_arch5te = arm_arch5e
3379 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3380 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3381 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3382 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3383 arm_arch6m = arm_arch6 && !arm_arch_notm;
3384 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3385 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3386 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3387 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3388 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3389 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3390 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3391 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3392 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3393 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3394 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3395 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3396 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3397 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3398 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3399 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3400 if (arm_fp16_inst)
3401 {
3402 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3403 error ("selected fp16 options are incompatible");
3404 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3405 }
3406
3407
3408 /* Set up some tuning parameters. */
3409 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3410 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3411 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3412 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3413 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3414 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3415
3416 /* And finally, set up some quirks. */
3417 arm_arch_no_volatile_ce
3418 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3419 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3420 isa_bit_quirk_armv6kz);
3421
3422 /* V5 code we generate is completely interworking capable, so we turn off
3423 TARGET_INTERWORK here to avoid many tests later on. */
3424
3425 /* XXX However, we must pass the right pre-processor defines to CPP
3426 or GLD can get confused. This is a hack. */
3427 if (TARGET_INTERWORK)
3428 arm_cpp_interwork = 1;
3429
3430 if (arm_arch5)
3431 target_flags &= ~MASK_INTERWORK;
3432
3433 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3434 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3435
3436 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3437 error ("iwmmxt abi requires an iwmmxt capable cpu");
3438
3439 /* If soft-float is specified then don't use FPU. */
3440 if (TARGET_SOFT_FLOAT)
3441 arm_fpu_attr = FPU_NONE;
3442 else
3443 arm_fpu_attr = FPU_VFP;
3444
3445 if (TARGET_AAPCS_BASED)
3446 {
3447 if (TARGET_CALLER_INTERWORKING)
3448 error ("AAPCS does not support -mcaller-super-interworking");
3449 else
3450 if (TARGET_CALLEE_INTERWORKING)
3451 error ("AAPCS does not support -mcallee-super-interworking");
3452 }
3453
3454 /* __fp16 support currently assumes the core has ldrh. */
3455 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3456 sorry ("__fp16 and no ldrh");
3457
3458 if (TARGET_AAPCS_BASED)
3459 {
3460 if (arm_abi == ARM_ABI_IWMMXT)
3461 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3462 else if (TARGET_HARD_FLOAT_ABI)
3463 {
3464 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3465 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3466 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3467 }
3468 else
3469 arm_pcs_default = ARM_PCS_AAPCS;
3470 }
3471 else
3472 {
3473 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3474 sorry ("-mfloat-abi=hard and VFP");
3475
3476 if (arm_abi == ARM_ABI_APCS)
3477 arm_pcs_default = ARM_PCS_APCS;
3478 else
3479 arm_pcs_default = ARM_PCS_ATPCS;
3480 }
3481
3482 /* For arm2/3 there is no need to do any scheduling if we are doing
3483 software floating-point. */
3484 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3485 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3486
3487 /* Use the cp15 method if it is available. */
3488 if (target_thread_pointer == TP_AUTO)
3489 {
3490 if (arm_arch6k && !TARGET_THUMB1)
3491 target_thread_pointer = TP_CP15;
3492 else
3493 target_thread_pointer = TP_SOFT;
3494 }
3495
3496 /* Override the default structure alignment for AAPCS ABI. */
3497 if (!global_options_set.x_arm_structure_size_boundary)
3498 {
3499 if (TARGET_AAPCS_BASED)
3500 arm_structure_size_boundary = 8;
3501 }
3502 else
3503 {
3504 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3505
3506 if (arm_structure_size_boundary != 8
3507 && arm_structure_size_boundary != 32
3508 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3509 {
3510 if (ARM_DOUBLEWORD_ALIGN)
3511 warning (0,
3512 "structure size boundary can only be set to 8, 32 or 64");
3513 else
3514 warning (0, "structure size boundary can only be set to 8 or 32");
3515 arm_structure_size_boundary
3516 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3517 }
3518 }
3519
3520 if (TARGET_VXWORKS_RTP)
3521 {
3522 if (!global_options_set.x_arm_pic_data_is_text_relative)
3523 arm_pic_data_is_text_relative = 0;
3524 }
3525 else if (flag_pic
3526 && !arm_pic_data_is_text_relative
3527 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3528 /* When text & data segments don't have a fixed displacement, the
3529 intended use is with a single, read only, pic base register.
3530 Unless the user explicitly requested not to do that, set
3531 it. */
3532 target_flags |= MASK_SINGLE_PIC_BASE;
3533
3534 /* If stack checking is disabled, we can use r10 as the PIC register,
3535 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3536 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3537 {
3538 if (TARGET_VXWORKS_RTP)
3539 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3540 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3541 }
3542
3543 if (flag_pic && TARGET_VXWORKS_RTP)
3544 arm_pic_register = 9;
3545
3546 if (arm_pic_register_string != NULL)
3547 {
3548 int pic_register = decode_reg_name (arm_pic_register_string);
3549
3550 if (!flag_pic)
3551 warning (0, "-mpic-register= is useless without -fpic");
3552
3553 /* Prevent the user from choosing an obviously stupid PIC register. */
3554 else if (pic_register < 0 || call_used_regs[pic_register]
3555 || pic_register == HARD_FRAME_POINTER_REGNUM
3556 || pic_register == STACK_POINTER_REGNUM
3557 || pic_register >= PC_REGNUM
3558 || (TARGET_VXWORKS_RTP
3559 && (unsigned int) pic_register != arm_pic_register))
3560 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3561 else
3562 arm_pic_register = pic_register;
3563 }
3564
3565 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3566 if (fix_cm3_ldrd == 2)
3567 {
3568 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3569 fix_cm3_ldrd = 1;
3570 else
3571 fix_cm3_ldrd = 0;
3572 }
3573
3574 /* Hot/Cold partitioning is not currently supported, since we can't
3575 handle literal pool placement in that case. */
3576 if (flag_reorder_blocks_and_partition)
3577 {
3578 inform (input_location,
3579 "-freorder-blocks-and-partition not supported on this architecture");
3580 flag_reorder_blocks_and_partition = 0;
3581 flag_reorder_blocks = 1;
3582 }
3583
3584 if (flag_pic)
3585 /* Hoisting PIC address calculations more aggressively provides a small,
3586 but measurable, size reduction for PIC code. Therefore, we decrease
3587 the bar for unrestricted expression hoisting to the cost of PIC address
3588 calculation, which is 2 instructions. */
3589 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3590 global_options.x_param_values,
3591 global_options_set.x_param_values);
3592
3593 /* ARM EABI defaults to strict volatile bitfields. */
3594 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3595 && abi_version_at_least(2))
3596 flag_strict_volatile_bitfields = 1;
3597
3598 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3599 have deemed it beneficial (signified by setting
3600 prefetch.num_slots to 1 or more). */
3601 if (flag_prefetch_loop_arrays < 0
3602 && HAVE_prefetch
3603 && optimize >= 3
3604 && current_tune->prefetch.num_slots > 0)
3605 flag_prefetch_loop_arrays = 1;
3606
3607 /* Set up parameters to be used in prefetching algorithm. Do not
3608 override the defaults unless we are tuning for a core we have
3609 researched values for. */
3610 if (current_tune->prefetch.num_slots > 0)
3611 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3612 current_tune->prefetch.num_slots,
3613 global_options.x_param_values,
3614 global_options_set.x_param_values);
3615 if (current_tune->prefetch.l1_cache_line_size >= 0)
3616 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3617 current_tune->prefetch.l1_cache_line_size,
3618 global_options.x_param_values,
3619 global_options_set.x_param_values);
3620 if (current_tune->prefetch.l1_cache_size >= 0)
3621 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3622 current_tune->prefetch.l1_cache_size,
3623 global_options.x_param_values,
3624 global_options_set.x_param_values);
3625
3626 /* Use Neon to perform 64-bits operations rather than core
3627 registers. */
3628 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3629 if (use_neon_for_64bits == 1)
3630 prefer_neon_for_64bits = true;
3631
3632 /* Use the alternative scheduling-pressure algorithm by default. */
3633 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3634 global_options.x_param_values,
3635 global_options_set.x_param_values);
3636
3637 /* Look through ready list and all of queue for instructions
3638 relevant for L2 auto-prefetcher. */
3639 int param_sched_autopref_queue_depth;
3640
3641 switch (current_tune->sched_autopref)
3642 {
3643 case tune_params::SCHED_AUTOPREF_OFF:
3644 param_sched_autopref_queue_depth = -1;
3645 break;
3646
3647 case tune_params::SCHED_AUTOPREF_RANK:
3648 param_sched_autopref_queue_depth = 0;
3649 break;
3650
3651 case tune_params::SCHED_AUTOPREF_FULL:
3652 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3653 break;
3654
3655 default:
3656 gcc_unreachable ();
3657 }
3658
3659 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3660 param_sched_autopref_queue_depth,
3661 global_options.x_param_values,
3662 global_options_set.x_param_values);
3663
3664 /* Currently, for slow flash data, we just disable literal pools. We also
3665 disable it for pure-code. */
3666 if (target_slow_flash_data || target_pure_code)
3667 arm_disable_literal_pool = true;
3668
3669 if (use_cmse && !arm_arch_cmse)
3670 error ("target CPU does not support ARMv8-M Security Extensions");
3671
3672 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3673 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3674 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3675 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3676
3677 /* Disable scheduling fusion by default if it's not armv7 processor
3678 or doesn't prefer ldrd/strd. */
3679 if (flag_schedule_fusion == 2
3680 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3681 flag_schedule_fusion = 0;
3682
3683 /* Need to remember initial options before they are overriden. */
3684 init_optimize = build_optimization_node (&global_options);
3685
3686 arm_option_override_internal (&global_options, &global_options_set);
3687 arm_option_check_internal (&global_options);
3688 arm_option_params_internal ();
3689
3690 /* Create the default target_options structure. */
3691 target_option_default_node = target_option_current_node
3692 = build_target_option_node (&global_options);
3693
3694 /* Register global variables with the garbage collector. */
3695 arm_add_gc_roots ();
3696
3697 /* Init initial mode for testing. */
3698 thumb_flipper = TARGET_THUMB;
3699 }
3700
3701 static void
3702 arm_add_gc_roots (void)
3703 {
3704 gcc_obstack_init(&minipool_obstack);
3705 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3706 }
3707 \f
3708 /* A table of known ARM exception types.
3709 For use with the interrupt function attribute. */
3710
3711 typedef struct
3712 {
3713 const char *const arg;
3714 const unsigned long return_value;
3715 }
3716 isr_attribute_arg;
3717
3718 static const isr_attribute_arg isr_attribute_args [] =
3719 {
3720 { "IRQ", ARM_FT_ISR },
3721 { "irq", ARM_FT_ISR },
3722 { "FIQ", ARM_FT_FIQ },
3723 { "fiq", ARM_FT_FIQ },
3724 { "ABORT", ARM_FT_ISR },
3725 { "abort", ARM_FT_ISR },
3726 { "ABORT", ARM_FT_ISR },
3727 { "abort", ARM_FT_ISR },
3728 { "UNDEF", ARM_FT_EXCEPTION },
3729 { "undef", ARM_FT_EXCEPTION },
3730 { "SWI", ARM_FT_EXCEPTION },
3731 { "swi", ARM_FT_EXCEPTION },
3732 { NULL, ARM_FT_NORMAL }
3733 };
3734
3735 /* Returns the (interrupt) function type of the current
3736 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3737
3738 static unsigned long
3739 arm_isr_value (tree argument)
3740 {
3741 const isr_attribute_arg * ptr;
3742 const char * arg;
3743
3744 if (!arm_arch_notm)
3745 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3746
3747 /* No argument - default to IRQ. */
3748 if (argument == NULL_TREE)
3749 return ARM_FT_ISR;
3750
3751 /* Get the value of the argument. */
3752 if (TREE_VALUE (argument) == NULL_TREE
3753 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3754 return ARM_FT_UNKNOWN;
3755
3756 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3757
3758 /* Check it against the list of known arguments. */
3759 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3760 if (streq (arg, ptr->arg))
3761 return ptr->return_value;
3762
3763 /* An unrecognized interrupt type. */
3764 return ARM_FT_UNKNOWN;
3765 }
3766
3767 /* Computes the type of the current function. */
3768
3769 static unsigned long
3770 arm_compute_func_type (void)
3771 {
3772 unsigned long type = ARM_FT_UNKNOWN;
3773 tree a;
3774 tree attr;
3775
3776 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3777
3778 /* Decide if the current function is volatile. Such functions
3779 never return, and many memory cycles can be saved by not storing
3780 register values that will never be needed again. This optimization
3781 was added to speed up context switching in a kernel application. */
3782 if (optimize > 0
3783 && (TREE_NOTHROW (current_function_decl)
3784 || !(flag_unwind_tables
3785 || (flag_exceptions
3786 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3787 && TREE_THIS_VOLATILE (current_function_decl))
3788 type |= ARM_FT_VOLATILE;
3789
3790 if (cfun->static_chain_decl != NULL)
3791 type |= ARM_FT_NESTED;
3792
3793 attr = DECL_ATTRIBUTES (current_function_decl);
3794
3795 a = lookup_attribute ("naked", attr);
3796 if (a != NULL_TREE)
3797 type |= ARM_FT_NAKED;
3798
3799 a = lookup_attribute ("isr", attr);
3800 if (a == NULL_TREE)
3801 a = lookup_attribute ("interrupt", attr);
3802
3803 if (a == NULL_TREE)
3804 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3805 else
3806 type |= arm_isr_value (TREE_VALUE (a));
3807
3808 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3809 type |= ARM_FT_CMSE_ENTRY;
3810
3811 return type;
3812 }
3813
3814 /* Returns the type of the current function. */
3815
3816 unsigned long
3817 arm_current_func_type (void)
3818 {
3819 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3820 cfun->machine->func_type = arm_compute_func_type ();
3821
3822 return cfun->machine->func_type;
3823 }
3824
3825 bool
3826 arm_allocate_stack_slots_for_args (void)
3827 {
3828 /* Naked functions should not allocate stack slots for arguments. */
3829 return !IS_NAKED (arm_current_func_type ());
3830 }
3831
3832 static bool
3833 arm_warn_func_return (tree decl)
3834 {
3835 /* Naked functions are implemented entirely in assembly, including the
3836 return sequence, so suppress warnings about this. */
3837 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3838 }
3839
3840 \f
3841 /* Output assembler code for a block containing the constant parts
3842 of a trampoline, leaving space for the variable parts.
3843
3844 On the ARM, (if r8 is the static chain regnum, and remembering that
3845 referencing pc adds an offset of 8) the trampoline looks like:
3846 ldr r8, [pc, #0]
3847 ldr pc, [pc]
3848 .word static chain value
3849 .word function's address
3850 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3851
3852 static void
3853 arm_asm_trampoline_template (FILE *f)
3854 {
3855 fprintf (f, "\t.syntax unified\n");
3856
3857 if (TARGET_ARM)
3858 {
3859 fprintf (f, "\t.arm\n");
3860 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3861 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3862 }
3863 else if (TARGET_THUMB2)
3864 {
3865 fprintf (f, "\t.thumb\n");
3866 /* The Thumb-2 trampoline is similar to the arm implementation.
3867 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3868 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3869 STATIC_CHAIN_REGNUM, PC_REGNUM);
3870 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3871 }
3872 else
3873 {
3874 ASM_OUTPUT_ALIGN (f, 2);
3875 fprintf (f, "\t.code\t16\n");
3876 fprintf (f, ".Ltrampoline_start:\n");
3877 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3878 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3879 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3880 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3881 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3882 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3883 }
3884 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3885 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3886 }
3887
3888 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3889
3890 static void
3891 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3892 {
3893 rtx fnaddr, mem, a_tramp;
3894
3895 emit_block_move (m_tramp, assemble_trampoline_template (),
3896 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3897
3898 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3899 emit_move_insn (mem, chain_value);
3900
3901 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3902 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3903 emit_move_insn (mem, fnaddr);
3904
3905 a_tramp = XEXP (m_tramp, 0);
3906 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3907 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3908 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3909 }
3910
3911 /* Thumb trampolines should be entered in thumb mode, so set
3912 the bottom bit of the address. */
3913
3914 static rtx
3915 arm_trampoline_adjust_address (rtx addr)
3916 {
3917 if (TARGET_THUMB)
3918 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3919 NULL, 0, OPTAB_LIB_WIDEN);
3920 return addr;
3921 }
3922 \f
3923 /* Return 1 if it is possible to return using a single instruction.
3924 If SIBLING is non-null, this is a test for a return before a sibling
3925 call. SIBLING is the call insn, so we can examine its register usage. */
3926
3927 int
3928 use_return_insn (int iscond, rtx sibling)
3929 {
3930 int regno;
3931 unsigned int func_type;
3932 unsigned long saved_int_regs;
3933 unsigned HOST_WIDE_INT stack_adjust;
3934 arm_stack_offsets *offsets;
3935
3936 /* Never use a return instruction before reload has run. */
3937 if (!reload_completed)
3938 return 0;
3939
3940 func_type = arm_current_func_type ();
3941
3942 /* Naked, volatile and stack alignment functions need special
3943 consideration. */
3944 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3945 return 0;
3946
3947 /* So do interrupt functions that use the frame pointer and Thumb
3948 interrupt functions. */
3949 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3950 return 0;
3951
3952 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3953 && !optimize_function_for_size_p (cfun))
3954 return 0;
3955
3956 offsets = arm_get_frame_offsets ();
3957 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3958
3959 /* As do variadic functions. */
3960 if (crtl->args.pretend_args_size
3961 || cfun->machine->uses_anonymous_args
3962 /* Or if the function calls __builtin_eh_return () */
3963 || crtl->calls_eh_return
3964 /* Or if the function calls alloca */
3965 || cfun->calls_alloca
3966 /* Or if there is a stack adjustment. However, if the stack pointer
3967 is saved on the stack, we can use a pre-incrementing stack load. */
3968 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3969 && stack_adjust == 4))
3970 /* Or if the static chain register was saved above the frame, under the
3971 assumption that the stack pointer isn't saved on the stack. */
3972 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3973 && arm_compute_static_chain_stack_bytes() != 0))
3974 return 0;
3975
3976 saved_int_regs = offsets->saved_regs_mask;
3977
3978 /* Unfortunately, the insn
3979
3980 ldmib sp, {..., sp, ...}
3981
3982 triggers a bug on most SA-110 based devices, such that the stack
3983 pointer won't be correctly restored if the instruction takes a
3984 page fault. We work around this problem by popping r3 along with
3985 the other registers, since that is never slower than executing
3986 another instruction.
3987
3988 We test for !arm_arch5 here, because code for any architecture
3989 less than this could potentially be run on one of the buggy
3990 chips. */
3991 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3992 {
3993 /* Validate that r3 is a call-clobbered register (always true in
3994 the default abi) ... */
3995 if (!call_used_regs[3])
3996 return 0;
3997
3998 /* ... that it isn't being used for a return value ... */
3999 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4000 return 0;
4001
4002 /* ... or for a tail-call argument ... */
4003 if (sibling)
4004 {
4005 gcc_assert (CALL_P (sibling));
4006
4007 if (find_regno_fusage (sibling, USE, 3))
4008 return 0;
4009 }
4010
4011 /* ... and that there are no call-saved registers in r0-r2
4012 (always true in the default ABI). */
4013 if (saved_int_regs & 0x7)
4014 return 0;
4015 }
4016
4017 /* Can't be done if interworking with Thumb, and any registers have been
4018 stacked. */
4019 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4020 return 0;
4021
4022 /* On StrongARM, conditional returns are expensive if they aren't
4023 taken and multiple registers have been stacked. */
4024 if (iscond && arm_tune_strongarm)
4025 {
4026 /* Conditional return when just the LR is stored is a simple
4027 conditional-load instruction, that's not expensive. */
4028 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4029 return 0;
4030
4031 if (flag_pic
4032 && arm_pic_register != INVALID_REGNUM
4033 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4034 return 0;
4035 }
4036
4037 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4038 several instructions if anything needs to be popped. */
4039 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4040 return 0;
4041
4042 /* If there are saved registers but the LR isn't saved, then we need
4043 two instructions for the return. */
4044 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4045 return 0;
4046
4047 /* Can't be done if any of the VFP regs are pushed,
4048 since this also requires an insn. */
4049 if (TARGET_HARD_FLOAT)
4050 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4051 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4052 return 0;
4053
4054 if (TARGET_REALLY_IWMMXT)
4055 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4056 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4057 return 0;
4058
4059 return 1;
4060 }
4061
4062 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4063 shrink-wrapping if possible. This is the case if we need to emit a
4064 prologue, which we can test by looking at the offsets. */
4065 bool
4066 use_simple_return_p (void)
4067 {
4068 arm_stack_offsets *offsets;
4069
4070 /* Note this function can be called before or after reload. */
4071 if (!reload_completed)
4072 arm_compute_frame_layout ();
4073
4074 offsets = arm_get_frame_offsets ();
4075 return offsets->outgoing_args != 0;
4076 }
4077
4078 /* Return TRUE if int I is a valid immediate ARM constant. */
4079
4080 int
4081 const_ok_for_arm (HOST_WIDE_INT i)
4082 {
4083 int lowbit;
4084
4085 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4086 be all zero, or all one. */
4087 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4088 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4089 != ((~(unsigned HOST_WIDE_INT) 0)
4090 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4091 return FALSE;
4092
4093 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4094
4095 /* Fast return for 0 and small values. We must do this for zero, since
4096 the code below can't handle that one case. */
4097 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4098 return TRUE;
4099
4100 /* Get the number of trailing zeros. */
4101 lowbit = ffs((int) i) - 1;
4102
4103 /* Only even shifts are allowed in ARM mode so round down to the
4104 nearest even number. */
4105 if (TARGET_ARM)
4106 lowbit &= ~1;
4107
4108 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4109 return TRUE;
4110
4111 if (TARGET_ARM)
4112 {
4113 /* Allow rotated constants in ARM mode. */
4114 if (lowbit <= 4
4115 && ((i & ~0xc000003f) == 0
4116 || (i & ~0xf000000f) == 0
4117 || (i & ~0xfc000003) == 0))
4118 return TRUE;
4119 }
4120 else if (TARGET_THUMB2)
4121 {
4122 HOST_WIDE_INT v;
4123
4124 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4125 v = i & 0xff;
4126 v |= v << 16;
4127 if (i == v || i == (v | (v << 8)))
4128 return TRUE;
4129
4130 /* Allow repeated pattern 0xXY00XY00. */
4131 v = i & 0xff00;
4132 v |= v << 16;
4133 if (i == v)
4134 return TRUE;
4135 }
4136 else if (TARGET_HAVE_MOVT)
4137 {
4138 /* Thumb-1 Targets with MOVT. */
4139 if (i > 0xffff)
4140 return FALSE;
4141 else
4142 return TRUE;
4143 }
4144
4145 return FALSE;
4146 }
4147
4148 /* Return true if I is a valid constant for the operation CODE. */
4149 int
4150 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4151 {
4152 if (const_ok_for_arm (i))
4153 return 1;
4154
4155 switch (code)
4156 {
4157 case SET:
4158 /* See if we can use movw. */
4159 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4160 return 1;
4161 else
4162 /* Otherwise, try mvn. */
4163 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4164
4165 case PLUS:
4166 /* See if we can use addw or subw. */
4167 if (TARGET_THUMB2
4168 && ((i & 0xfffff000) == 0
4169 || ((-i) & 0xfffff000) == 0))
4170 return 1;
4171 /* Fall through. */
4172 case COMPARE:
4173 case EQ:
4174 case NE:
4175 case GT:
4176 case LE:
4177 case LT:
4178 case GE:
4179 case GEU:
4180 case LTU:
4181 case GTU:
4182 case LEU:
4183 case UNORDERED:
4184 case ORDERED:
4185 case UNEQ:
4186 case UNGE:
4187 case UNLT:
4188 case UNGT:
4189 case UNLE:
4190 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4191
4192 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4193 case XOR:
4194 return 0;
4195
4196 case IOR:
4197 if (TARGET_THUMB2)
4198 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4199 return 0;
4200
4201 case AND:
4202 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4203
4204 default:
4205 gcc_unreachable ();
4206 }
4207 }
4208
4209 /* Return true if I is a valid di mode constant for the operation CODE. */
4210 int
4211 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4212 {
4213 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4214 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4215 rtx hi = GEN_INT (hi_val);
4216 rtx lo = GEN_INT (lo_val);
4217
4218 if (TARGET_THUMB1)
4219 return 0;
4220
4221 switch (code)
4222 {
4223 case AND:
4224 case IOR:
4225 case XOR:
4226 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4227 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4228 case PLUS:
4229 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4230
4231 default:
4232 return 0;
4233 }
4234 }
4235
4236 /* Emit a sequence of insns to handle a large constant.
4237 CODE is the code of the operation required, it can be any of SET, PLUS,
4238 IOR, AND, XOR, MINUS;
4239 MODE is the mode in which the operation is being performed;
4240 VAL is the integer to operate on;
4241 SOURCE is the other operand (a register, or a null-pointer for SET);
4242 SUBTARGETS means it is safe to create scratch registers if that will
4243 either produce a simpler sequence, or we will want to cse the values.
4244 Return value is the number of insns emitted. */
4245
4246 /* ??? Tweak this for thumb2. */
4247 int
4248 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4249 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4250 {
4251 rtx cond;
4252
4253 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4254 cond = COND_EXEC_TEST (PATTERN (insn));
4255 else
4256 cond = NULL_RTX;
4257
4258 if (subtargets || code == SET
4259 || (REG_P (target) && REG_P (source)
4260 && REGNO (target) != REGNO (source)))
4261 {
4262 /* After arm_reorg has been called, we can't fix up expensive
4263 constants by pushing them into memory so we must synthesize
4264 them in-line, regardless of the cost. This is only likely to
4265 be more costly on chips that have load delay slots and we are
4266 compiling without running the scheduler (so no splitting
4267 occurred before the final instruction emission).
4268
4269 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4270 */
4271 if (!cfun->machine->after_arm_reorg
4272 && !cond
4273 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4274 1, 0)
4275 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4276 + (code != SET))))
4277 {
4278 if (code == SET)
4279 {
4280 /* Currently SET is the only monadic value for CODE, all
4281 the rest are diadic. */
4282 if (TARGET_USE_MOVT)
4283 arm_emit_movpair (target, GEN_INT (val));
4284 else
4285 emit_set_insn (target, GEN_INT (val));
4286
4287 return 1;
4288 }
4289 else
4290 {
4291 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4292
4293 if (TARGET_USE_MOVT)
4294 arm_emit_movpair (temp, GEN_INT (val));
4295 else
4296 emit_set_insn (temp, GEN_INT (val));
4297
4298 /* For MINUS, the value is subtracted from, since we never
4299 have subtraction of a constant. */
4300 if (code == MINUS)
4301 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4302 else
4303 emit_set_insn (target,
4304 gen_rtx_fmt_ee (code, mode, source, temp));
4305 return 2;
4306 }
4307 }
4308 }
4309
4310 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4311 1);
4312 }
4313
4314 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4315 ARM/THUMB2 immediates, and add up to VAL.
4316 Thr function return value gives the number of insns required. */
4317 static int
4318 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4319 struct four_ints *return_sequence)
4320 {
4321 int best_consecutive_zeros = 0;
4322 int i;
4323 int best_start = 0;
4324 int insns1, insns2;
4325 struct four_ints tmp_sequence;
4326
4327 /* If we aren't targeting ARM, the best place to start is always at
4328 the bottom, otherwise look more closely. */
4329 if (TARGET_ARM)
4330 {
4331 for (i = 0; i < 32; i += 2)
4332 {
4333 int consecutive_zeros = 0;
4334
4335 if (!(val & (3 << i)))
4336 {
4337 while ((i < 32) && !(val & (3 << i)))
4338 {
4339 consecutive_zeros += 2;
4340 i += 2;
4341 }
4342 if (consecutive_zeros > best_consecutive_zeros)
4343 {
4344 best_consecutive_zeros = consecutive_zeros;
4345 best_start = i - consecutive_zeros;
4346 }
4347 i -= 2;
4348 }
4349 }
4350 }
4351
4352 /* So long as it won't require any more insns to do so, it's
4353 desirable to emit a small constant (in bits 0...9) in the last
4354 insn. This way there is more chance that it can be combined with
4355 a later addressing insn to form a pre-indexed load or store
4356 operation. Consider:
4357
4358 *((volatile int *)0xe0000100) = 1;
4359 *((volatile int *)0xe0000110) = 2;
4360
4361 We want this to wind up as:
4362
4363 mov rA, #0xe0000000
4364 mov rB, #1
4365 str rB, [rA, #0x100]
4366 mov rB, #2
4367 str rB, [rA, #0x110]
4368
4369 rather than having to synthesize both large constants from scratch.
4370
4371 Therefore, we calculate how many insns would be required to emit
4372 the constant starting from `best_start', and also starting from
4373 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4374 yield a shorter sequence, we may as well use zero. */
4375 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4376 if (best_start != 0
4377 && ((HOST_WIDE_INT_1U << best_start) < val))
4378 {
4379 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4380 if (insns2 <= insns1)
4381 {
4382 *return_sequence = tmp_sequence;
4383 insns1 = insns2;
4384 }
4385 }
4386
4387 return insns1;
4388 }
4389
4390 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4391 static int
4392 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4393 struct four_ints *return_sequence, int i)
4394 {
4395 int remainder = val & 0xffffffff;
4396 int insns = 0;
4397
4398 /* Try and find a way of doing the job in either two or three
4399 instructions.
4400
4401 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4402 location. We start at position I. This may be the MSB, or
4403 optimial_immediate_sequence may have positioned it at the largest block
4404 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4405 wrapping around to the top of the word when we drop off the bottom.
4406 In the worst case this code should produce no more than four insns.
4407
4408 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4409 constants, shifted to any arbitrary location. We should always start
4410 at the MSB. */
4411 do
4412 {
4413 int end;
4414 unsigned int b1, b2, b3, b4;
4415 unsigned HOST_WIDE_INT result;
4416 int loc;
4417
4418 gcc_assert (insns < 4);
4419
4420 if (i <= 0)
4421 i += 32;
4422
4423 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4424 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4425 {
4426 loc = i;
4427 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4428 /* We can use addw/subw for the last 12 bits. */
4429 result = remainder;
4430 else
4431 {
4432 /* Use an 8-bit shifted/rotated immediate. */
4433 end = i - 8;
4434 if (end < 0)
4435 end += 32;
4436 result = remainder & ((0x0ff << end)
4437 | ((i < end) ? (0xff >> (32 - end))
4438 : 0));
4439 i -= 8;
4440 }
4441 }
4442 else
4443 {
4444 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4445 arbitrary shifts. */
4446 i -= TARGET_ARM ? 2 : 1;
4447 continue;
4448 }
4449
4450 /* Next, see if we can do a better job with a thumb2 replicated
4451 constant.
4452
4453 We do it this way around to catch the cases like 0x01F001E0 where
4454 two 8-bit immediates would work, but a replicated constant would
4455 make it worse.
4456
4457 TODO: 16-bit constants that don't clear all the bits, but still win.
4458 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4459 if (TARGET_THUMB2)
4460 {
4461 b1 = (remainder & 0xff000000) >> 24;
4462 b2 = (remainder & 0x00ff0000) >> 16;
4463 b3 = (remainder & 0x0000ff00) >> 8;
4464 b4 = remainder & 0xff;
4465
4466 if (loc > 24)
4467 {
4468 /* The 8-bit immediate already found clears b1 (and maybe b2),
4469 but must leave b3 and b4 alone. */
4470
4471 /* First try to find a 32-bit replicated constant that clears
4472 almost everything. We can assume that we can't do it in one,
4473 or else we wouldn't be here. */
4474 unsigned int tmp = b1 & b2 & b3 & b4;
4475 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4476 + (tmp << 24);
4477 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4478 + (tmp == b3) + (tmp == b4);
4479 if (tmp
4480 && (matching_bytes >= 3
4481 || (matching_bytes == 2
4482 && const_ok_for_op (remainder & ~tmp2, code))))
4483 {
4484 /* At least 3 of the bytes match, and the fourth has at
4485 least as many bits set, or two of the bytes match
4486 and it will only require one more insn to finish. */
4487 result = tmp2;
4488 i = tmp != b1 ? 32
4489 : tmp != b2 ? 24
4490 : tmp != b3 ? 16
4491 : 8;
4492 }
4493
4494 /* Second, try to find a 16-bit replicated constant that can
4495 leave three of the bytes clear. If b2 or b4 is already
4496 zero, then we can. If the 8-bit from above would not
4497 clear b2 anyway, then we still win. */
4498 else if (b1 == b3 && (!b2 || !b4
4499 || (remainder & 0x00ff0000 & ~result)))
4500 {
4501 result = remainder & 0xff00ff00;
4502 i = 24;
4503 }
4504 }
4505 else if (loc > 16)
4506 {
4507 /* The 8-bit immediate already found clears b2 (and maybe b3)
4508 and we don't get here unless b1 is alredy clear, but it will
4509 leave b4 unchanged. */
4510
4511 /* If we can clear b2 and b4 at once, then we win, since the
4512 8-bits couldn't possibly reach that far. */
4513 if (b2 == b4)
4514 {
4515 result = remainder & 0x00ff00ff;
4516 i = 16;
4517 }
4518 }
4519 }
4520
4521 return_sequence->i[insns++] = result;
4522 remainder &= ~result;
4523
4524 if (code == SET || code == MINUS)
4525 code = PLUS;
4526 }
4527 while (remainder);
4528
4529 return insns;
4530 }
4531
4532 /* Emit an instruction with the indicated PATTERN. If COND is
4533 non-NULL, conditionalize the execution of the instruction on COND
4534 being true. */
4535
4536 static void
4537 emit_constant_insn (rtx cond, rtx pattern)
4538 {
4539 if (cond)
4540 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4541 emit_insn (pattern);
4542 }
4543
4544 /* As above, but extra parameter GENERATE which, if clear, suppresses
4545 RTL generation. */
4546
4547 static int
4548 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4549 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4550 int subtargets, int generate)
4551 {
4552 int can_invert = 0;
4553 int can_negate = 0;
4554 int final_invert = 0;
4555 int i;
4556 int set_sign_bit_copies = 0;
4557 int clear_sign_bit_copies = 0;
4558 int clear_zero_bit_copies = 0;
4559 int set_zero_bit_copies = 0;
4560 int insns = 0, neg_insns, inv_insns;
4561 unsigned HOST_WIDE_INT temp1, temp2;
4562 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4563 struct four_ints *immediates;
4564 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4565
4566 /* Find out which operations are safe for a given CODE. Also do a quick
4567 check for degenerate cases; these can occur when DImode operations
4568 are split. */
4569 switch (code)
4570 {
4571 case SET:
4572 can_invert = 1;
4573 break;
4574
4575 case PLUS:
4576 can_negate = 1;
4577 break;
4578
4579 case IOR:
4580 if (remainder == 0xffffffff)
4581 {
4582 if (generate)
4583 emit_constant_insn (cond,
4584 gen_rtx_SET (target,
4585 GEN_INT (ARM_SIGN_EXTEND (val))));
4586 return 1;
4587 }
4588
4589 if (remainder == 0)
4590 {
4591 if (reload_completed && rtx_equal_p (target, source))
4592 return 0;
4593
4594 if (generate)
4595 emit_constant_insn (cond, gen_rtx_SET (target, source));
4596 return 1;
4597 }
4598 break;
4599
4600 case AND:
4601 if (remainder == 0)
4602 {
4603 if (generate)
4604 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4605 return 1;
4606 }
4607 if (remainder == 0xffffffff)
4608 {
4609 if (reload_completed && rtx_equal_p (target, source))
4610 return 0;
4611 if (generate)
4612 emit_constant_insn (cond, gen_rtx_SET (target, source));
4613 return 1;
4614 }
4615 can_invert = 1;
4616 break;
4617
4618 case XOR:
4619 if (remainder == 0)
4620 {
4621 if (reload_completed && rtx_equal_p (target, source))
4622 return 0;
4623 if (generate)
4624 emit_constant_insn (cond, gen_rtx_SET (target, source));
4625 return 1;
4626 }
4627
4628 if (remainder == 0xffffffff)
4629 {
4630 if (generate)
4631 emit_constant_insn (cond,
4632 gen_rtx_SET (target,
4633 gen_rtx_NOT (mode, source)));
4634 return 1;
4635 }
4636 final_invert = 1;
4637 break;
4638
4639 case MINUS:
4640 /* We treat MINUS as (val - source), since (source - val) is always
4641 passed as (source + (-val)). */
4642 if (remainder == 0)
4643 {
4644 if (generate)
4645 emit_constant_insn (cond,
4646 gen_rtx_SET (target,
4647 gen_rtx_NEG (mode, source)));
4648 return 1;
4649 }
4650 if (const_ok_for_arm (val))
4651 {
4652 if (generate)
4653 emit_constant_insn (cond,
4654 gen_rtx_SET (target,
4655 gen_rtx_MINUS (mode, GEN_INT (val),
4656 source)));
4657 return 1;
4658 }
4659
4660 break;
4661
4662 default:
4663 gcc_unreachable ();
4664 }
4665
4666 /* If we can do it in one insn get out quickly. */
4667 if (const_ok_for_op (val, code))
4668 {
4669 if (generate)
4670 emit_constant_insn (cond,
4671 gen_rtx_SET (target,
4672 (source
4673 ? gen_rtx_fmt_ee (code, mode, source,
4674 GEN_INT (val))
4675 : GEN_INT (val))));
4676 return 1;
4677 }
4678
4679 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4680 insn. */
4681 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4682 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4683 {
4684 if (generate)
4685 {
4686 if (mode == SImode && i == 16)
4687 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4688 smaller insn. */
4689 emit_constant_insn (cond,
4690 gen_zero_extendhisi2
4691 (target, gen_lowpart (HImode, source)));
4692 else
4693 /* Extz only supports SImode, but we can coerce the operands
4694 into that mode. */
4695 emit_constant_insn (cond,
4696 gen_extzv_t2 (gen_lowpart (SImode, target),
4697 gen_lowpart (SImode, source),
4698 GEN_INT (i), const0_rtx));
4699 }
4700
4701 return 1;
4702 }
4703
4704 /* Calculate a few attributes that may be useful for specific
4705 optimizations. */
4706 /* Count number of leading zeros. */
4707 for (i = 31; i >= 0; i--)
4708 {
4709 if ((remainder & (1 << i)) == 0)
4710 clear_sign_bit_copies++;
4711 else
4712 break;
4713 }
4714
4715 /* Count number of leading 1's. */
4716 for (i = 31; i >= 0; i--)
4717 {
4718 if ((remainder & (1 << i)) != 0)
4719 set_sign_bit_copies++;
4720 else
4721 break;
4722 }
4723
4724 /* Count number of trailing zero's. */
4725 for (i = 0; i <= 31; i++)
4726 {
4727 if ((remainder & (1 << i)) == 0)
4728 clear_zero_bit_copies++;
4729 else
4730 break;
4731 }
4732
4733 /* Count number of trailing 1's. */
4734 for (i = 0; i <= 31; i++)
4735 {
4736 if ((remainder & (1 << i)) != 0)
4737 set_zero_bit_copies++;
4738 else
4739 break;
4740 }
4741
4742 switch (code)
4743 {
4744 case SET:
4745 /* See if we can do this by sign_extending a constant that is known
4746 to be negative. This is a good, way of doing it, since the shift
4747 may well merge into a subsequent insn. */
4748 if (set_sign_bit_copies > 1)
4749 {
4750 if (const_ok_for_arm
4751 (temp1 = ARM_SIGN_EXTEND (remainder
4752 << (set_sign_bit_copies - 1))))
4753 {
4754 if (generate)
4755 {
4756 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4757 emit_constant_insn (cond,
4758 gen_rtx_SET (new_src, GEN_INT (temp1)));
4759 emit_constant_insn (cond,
4760 gen_ashrsi3 (target, new_src,
4761 GEN_INT (set_sign_bit_copies - 1)));
4762 }
4763 return 2;
4764 }
4765 /* For an inverted constant, we will need to set the low bits,
4766 these will be shifted out of harm's way. */
4767 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4768 if (const_ok_for_arm (~temp1))
4769 {
4770 if (generate)
4771 {
4772 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4773 emit_constant_insn (cond,
4774 gen_rtx_SET (new_src, GEN_INT (temp1)));
4775 emit_constant_insn (cond,
4776 gen_ashrsi3 (target, new_src,
4777 GEN_INT (set_sign_bit_copies - 1)));
4778 }
4779 return 2;
4780 }
4781 }
4782
4783 /* See if we can calculate the value as the difference between two
4784 valid immediates. */
4785 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4786 {
4787 int topshift = clear_sign_bit_copies & ~1;
4788
4789 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4790 & (0xff000000 >> topshift));
4791
4792 /* If temp1 is zero, then that means the 9 most significant
4793 bits of remainder were 1 and we've caused it to overflow.
4794 When topshift is 0 we don't need to do anything since we
4795 can borrow from 'bit 32'. */
4796 if (temp1 == 0 && topshift != 0)
4797 temp1 = 0x80000000 >> (topshift - 1);
4798
4799 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4800
4801 if (const_ok_for_arm (temp2))
4802 {
4803 if (generate)
4804 {
4805 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4806 emit_constant_insn (cond,
4807 gen_rtx_SET (new_src, GEN_INT (temp1)));
4808 emit_constant_insn (cond,
4809 gen_addsi3 (target, new_src,
4810 GEN_INT (-temp2)));
4811 }
4812
4813 return 2;
4814 }
4815 }
4816
4817 /* See if we can generate this by setting the bottom (or the top)
4818 16 bits, and then shifting these into the other half of the
4819 word. We only look for the simplest cases, to do more would cost
4820 too much. Be careful, however, not to generate this when the
4821 alternative would take fewer insns. */
4822 if (val & 0xffff0000)
4823 {
4824 temp1 = remainder & 0xffff0000;
4825 temp2 = remainder & 0x0000ffff;
4826
4827 /* Overlaps outside this range are best done using other methods. */
4828 for (i = 9; i < 24; i++)
4829 {
4830 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4831 && !const_ok_for_arm (temp2))
4832 {
4833 rtx new_src = (subtargets
4834 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4835 : target);
4836 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4837 source, subtargets, generate);
4838 source = new_src;
4839 if (generate)
4840 emit_constant_insn
4841 (cond,
4842 gen_rtx_SET
4843 (target,
4844 gen_rtx_IOR (mode,
4845 gen_rtx_ASHIFT (mode, source,
4846 GEN_INT (i)),
4847 source)));
4848 return insns + 1;
4849 }
4850 }
4851
4852 /* Don't duplicate cases already considered. */
4853 for (i = 17; i < 24; i++)
4854 {
4855 if (((temp1 | (temp1 >> i)) == remainder)
4856 && !const_ok_for_arm (temp1))
4857 {
4858 rtx new_src = (subtargets
4859 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4860 : target);
4861 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4862 source, subtargets, generate);
4863 source = new_src;
4864 if (generate)
4865 emit_constant_insn
4866 (cond,
4867 gen_rtx_SET (target,
4868 gen_rtx_IOR
4869 (mode,
4870 gen_rtx_LSHIFTRT (mode, source,
4871 GEN_INT (i)),
4872 source)));
4873 return insns + 1;
4874 }
4875 }
4876 }
4877 break;
4878
4879 case IOR:
4880 case XOR:
4881 /* If we have IOR or XOR, and the constant can be loaded in a
4882 single instruction, and we can find a temporary to put it in,
4883 then this can be done in two instructions instead of 3-4. */
4884 if (subtargets
4885 /* TARGET can't be NULL if SUBTARGETS is 0 */
4886 || (reload_completed && !reg_mentioned_p (target, source)))
4887 {
4888 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4889 {
4890 if (generate)
4891 {
4892 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4893
4894 emit_constant_insn (cond,
4895 gen_rtx_SET (sub, GEN_INT (val)));
4896 emit_constant_insn (cond,
4897 gen_rtx_SET (target,
4898 gen_rtx_fmt_ee (code, mode,
4899 source, sub)));
4900 }
4901 return 2;
4902 }
4903 }
4904
4905 if (code == XOR)
4906 break;
4907
4908 /* Convert.
4909 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4910 and the remainder 0s for e.g. 0xfff00000)
4911 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4912
4913 This can be done in 2 instructions by using shifts with mov or mvn.
4914 e.g. for
4915 x = x | 0xfff00000;
4916 we generate.
4917 mvn r0, r0, asl #12
4918 mvn r0, r0, lsr #12 */
4919 if (set_sign_bit_copies > 8
4920 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4921 {
4922 if (generate)
4923 {
4924 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4925 rtx shift = GEN_INT (set_sign_bit_copies);
4926
4927 emit_constant_insn
4928 (cond,
4929 gen_rtx_SET (sub,
4930 gen_rtx_NOT (mode,
4931 gen_rtx_ASHIFT (mode,
4932 source,
4933 shift))));
4934 emit_constant_insn
4935 (cond,
4936 gen_rtx_SET (target,
4937 gen_rtx_NOT (mode,
4938 gen_rtx_LSHIFTRT (mode, sub,
4939 shift))));
4940 }
4941 return 2;
4942 }
4943
4944 /* Convert
4945 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4946 to
4947 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4948
4949 For eg. r0 = r0 | 0xfff
4950 mvn r0, r0, lsr #12
4951 mvn r0, r0, asl #12
4952
4953 */
4954 if (set_zero_bit_copies > 8
4955 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4956 {
4957 if (generate)
4958 {
4959 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4960 rtx shift = GEN_INT (set_zero_bit_copies);
4961
4962 emit_constant_insn
4963 (cond,
4964 gen_rtx_SET (sub,
4965 gen_rtx_NOT (mode,
4966 gen_rtx_LSHIFTRT (mode,
4967 source,
4968 shift))));
4969 emit_constant_insn
4970 (cond,
4971 gen_rtx_SET (target,
4972 gen_rtx_NOT (mode,
4973 gen_rtx_ASHIFT (mode, sub,
4974 shift))));
4975 }
4976 return 2;
4977 }
4978
4979 /* This will never be reached for Thumb2 because orn is a valid
4980 instruction. This is for Thumb1 and the ARM 32 bit cases.
4981
4982 x = y | constant (such that ~constant is a valid constant)
4983 Transform this to
4984 x = ~(~y & ~constant).
4985 */
4986 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4987 {
4988 if (generate)
4989 {
4990 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4991 emit_constant_insn (cond,
4992 gen_rtx_SET (sub,
4993 gen_rtx_NOT (mode, source)));
4994 source = sub;
4995 if (subtargets)
4996 sub = gen_reg_rtx (mode);
4997 emit_constant_insn (cond,
4998 gen_rtx_SET (sub,
4999 gen_rtx_AND (mode, source,
5000 GEN_INT (temp1))));
5001 emit_constant_insn (cond,
5002 gen_rtx_SET (target,
5003 gen_rtx_NOT (mode, sub)));
5004 }
5005 return 3;
5006 }
5007 break;
5008
5009 case AND:
5010 /* See if two shifts will do 2 or more insn's worth of work. */
5011 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5012 {
5013 HOST_WIDE_INT shift_mask = ((0xffffffff
5014 << (32 - clear_sign_bit_copies))
5015 & 0xffffffff);
5016
5017 if ((remainder | shift_mask) != 0xffffffff)
5018 {
5019 HOST_WIDE_INT new_val
5020 = ARM_SIGN_EXTEND (remainder | shift_mask);
5021
5022 if (generate)
5023 {
5024 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5025 insns = arm_gen_constant (AND, SImode, cond, new_val,
5026 new_src, source, subtargets, 1);
5027 source = new_src;
5028 }
5029 else
5030 {
5031 rtx targ = subtargets ? NULL_RTX : target;
5032 insns = arm_gen_constant (AND, mode, cond, new_val,
5033 targ, source, subtargets, 0);
5034 }
5035 }
5036
5037 if (generate)
5038 {
5039 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5040 rtx shift = GEN_INT (clear_sign_bit_copies);
5041
5042 emit_insn (gen_ashlsi3 (new_src, source, shift));
5043 emit_insn (gen_lshrsi3 (target, new_src, shift));
5044 }
5045
5046 return insns + 2;
5047 }
5048
5049 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5050 {
5051 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5052
5053 if ((remainder | shift_mask) != 0xffffffff)
5054 {
5055 HOST_WIDE_INT new_val
5056 = ARM_SIGN_EXTEND (remainder | shift_mask);
5057 if (generate)
5058 {
5059 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5060
5061 insns = arm_gen_constant (AND, mode, cond, new_val,
5062 new_src, source, subtargets, 1);
5063 source = new_src;
5064 }
5065 else
5066 {
5067 rtx targ = subtargets ? NULL_RTX : target;
5068
5069 insns = arm_gen_constant (AND, mode, cond, new_val,
5070 targ, source, subtargets, 0);
5071 }
5072 }
5073
5074 if (generate)
5075 {
5076 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5077 rtx shift = GEN_INT (clear_zero_bit_copies);
5078
5079 emit_insn (gen_lshrsi3 (new_src, source, shift));
5080 emit_insn (gen_ashlsi3 (target, new_src, shift));
5081 }
5082
5083 return insns + 2;
5084 }
5085
5086 break;
5087
5088 default:
5089 break;
5090 }
5091
5092 /* Calculate what the instruction sequences would be if we generated it
5093 normally, negated, or inverted. */
5094 if (code == AND)
5095 /* AND cannot be split into multiple insns, so invert and use BIC. */
5096 insns = 99;
5097 else
5098 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5099
5100 if (can_negate)
5101 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5102 &neg_immediates);
5103 else
5104 neg_insns = 99;
5105
5106 if (can_invert || final_invert)
5107 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5108 &inv_immediates);
5109 else
5110 inv_insns = 99;
5111
5112 immediates = &pos_immediates;
5113
5114 /* Is the negated immediate sequence more efficient? */
5115 if (neg_insns < insns && neg_insns <= inv_insns)
5116 {
5117 insns = neg_insns;
5118 immediates = &neg_immediates;
5119 }
5120 else
5121 can_negate = 0;
5122
5123 /* Is the inverted immediate sequence more efficient?
5124 We must allow for an extra NOT instruction for XOR operations, although
5125 there is some chance that the final 'mvn' will get optimized later. */
5126 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5127 {
5128 insns = inv_insns;
5129 immediates = &inv_immediates;
5130 }
5131 else
5132 {
5133 can_invert = 0;
5134 final_invert = 0;
5135 }
5136
5137 /* Now output the chosen sequence as instructions. */
5138 if (generate)
5139 {
5140 for (i = 0; i < insns; i++)
5141 {
5142 rtx new_src, temp1_rtx;
5143
5144 temp1 = immediates->i[i];
5145
5146 if (code == SET || code == MINUS)
5147 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5148 else if ((final_invert || i < (insns - 1)) && subtargets)
5149 new_src = gen_reg_rtx (mode);
5150 else
5151 new_src = target;
5152
5153 if (can_invert)
5154 temp1 = ~temp1;
5155 else if (can_negate)
5156 temp1 = -temp1;
5157
5158 temp1 = trunc_int_for_mode (temp1, mode);
5159 temp1_rtx = GEN_INT (temp1);
5160
5161 if (code == SET)
5162 ;
5163 else if (code == MINUS)
5164 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5165 else
5166 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5167
5168 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5169 source = new_src;
5170
5171 if (code == SET)
5172 {
5173 can_negate = can_invert;
5174 can_invert = 0;
5175 code = PLUS;
5176 }
5177 else if (code == MINUS)
5178 code = PLUS;
5179 }
5180 }
5181
5182 if (final_invert)
5183 {
5184 if (generate)
5185 emit_constant_insn (cond, gen_rtx_SET (target,
5186 gen_rtx_NOT (mode, source)));
5187 insns++;
5188 }
5189
5190 return insns;
5191 }
5192
5193 /* Canonicalize a comparison so that we are more likely to recognize it.
5194 This can be done for a few constant compares, where we can make the
5195 immediate value easier to load. */
5196
5197 static void
5198 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5199 bool op0_preserve_value)
5200 {
5201 machine_mode mode;
5202 unsigned HOST_WIDE_INT i, maxval;
5203
5204 mode = GET_MODE (*op0);
5205 if (mode == VOIDmode)
5206 mode = GET_MODE (*op1);
5207
5208 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5209
5210 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5211 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5212 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5213 for GTU/LEU in Thumb mode. */
5214 if (mode == DImode)
5215 {
5216
5217 if (*code == GT || *code == LE
5218 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5219 {
5220 /* Missing comparison. First try to use an available
5221 comparison. */
5222 if (CONST_INT_P (*op1))
5223 {
5224 i = INTVAL (*op1);
5225 switch (*code)
5226 {
5227 case GT:
5228 case LE:
5229 if (i != maxval
5230 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5231 {
5232 *op1 = GEN_INT (i + 1);
5233 *code = *code == GT ? GE : LT;
5234 return;
5235 }
5236 break;
5237 case GTU:
5238 case LEU:
5239 if (i != ~((unsigned HOST_WIDE_INT) 0)
5240 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5241 {
5242 *op1 = GEN_INT (i + 1);
5243 *code = *code == GTU ? GEU : LTU;
5244 return;
5245 }
5246 break;
5247 default:
5248 gcc_unreachable ();
5249 }
5250 }
5251
5252 /* If that did not work, reverse the condition. */
5253 if (!op0_preserve_value)
5254 {
5255 std::swap (*op0, *op1);
5256 *code = (int)swap_condition ((enum rtx_code)*code);
5257 }
5258 }
5259 return;
5260 }
5261
5262 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5263 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5264 to facilitate possible combining with a cmp into 'ands'. */
5265 if (mode == SImode
5266 && GET_CODE (*op0) == ZERO_EXTEND
5267 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5268 && GET_MODE (XEXP (*op0, 0)) == QImode
5269 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5270 && subreg_lowpart_p (XEXP (*op0, 0))
5271 && *op1 == const0_rtx)
5272 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5273 GEN_INT (255));
5274
5275 /* Comparisons smaller than DImode. Only adjust comparisons against
5276 an out-of-range constant. */
5277 if (!CONST_INT_P (*op1)
5278 || const_ok_for_arm (INTVAL (*op1))
5279 || const_ok_for_arm (- INTVAL (*op1)))
5280 return;
5281
5282 i = INTVAL (*op1);
5283
5284 switch (*code)
5285 {
5286 case EQ:
5287 case NE:
5288 return;
5289
5290 case GT:
5291 case LE:
5292 if (i != maxval
5293 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5294 {
5295 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5296 *code = *code == GT ? GE : LT;
5297 return;
5298 }
5299 break;
5300
5301 case GE:
5302 case LT:
5303 if (i != ~maxval
5304 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5305 {
5306 *op1 = GEN_INT (i - 1);
5307 *code = *code == GE ? GT : LE;
5308 return;
5309 }
5310 break;
5311
5312 case GTU:
5313 case LEU:
5314 if (i != ~((unsigned HOST_WIDE_INT) 0)
5315 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5316 {
5317 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5318 *code = *code == GTU ? GEU : LTU;
5319 return;
5320 }
5321 break;
5322
5323 case GEU:
5324 case LTU:
5325 if (i != 0
5326 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5327 {
5328 *op1 = GEN_INT (i - 1);
5329 *code = *code == GEU ? GTU : LEU;
5330 return;
5331 }
5332 break;
5333
5334 default:
5335 gcc_unreachable ();
5336 }
5337 }
5338
5339
5340 /* Define how to find the value returned by a function. */
5341
5342 static rtx
5343 arm_function_value(const_tree type, const_tree func,
5344 bool outgoing ATTRIBUTE_UNUSED)
5345 {
5346 machine_mode mode;
5347 int unsignedp ATTRIBUTE_UNUSED;
5348 rtx r ATTRIBUTE_UNUSED;
5349
5350 mode = TYPE_MODE (type);
5351
5352 if (TARGET_AAPCS_BASED)
5353 return aapcs_allocate_return_reg (mode, type, func);
5354
5355 /* Promote integer types. */
5356 if (INTEGRAL_TYPE_P (type))
5357 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5358
5359 /* Promotes small structs returned in a register to full-word size
5360 for big-endian AAPCS. */
5361 if (arm_return_in_msb (type))
5362 {
5363 HOST_WIDE_INT size = int_size_in_bytes (type);
5364 if (size % UNITS_PER_WORD != 0)
5365 {
5366 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5367 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5368 }
5369 }
5370
5371 return arm_libcall_value_1 (mode);
5372 }
5373
5374 /* libcall hashtable helpers. */
5375
5376 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5377 {
5378 static inline hashval_t hash (const rtx_def *);
5379 static inline bool equal (const rtx_def *, const rtx_def *);
5380 static inline void remove (rtx_def *);
5381 };
5382
5383 inline bool
5384 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5385 {
5386 return rtx_equal_p (p1, p2);
5387 }
5388
5389 inline hashval_t
5390 libcall_hasher::hash (const rtx_def *p1)
5391 {
5392 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5393 }
5394
5395 typedef hash_table<libcall_hasher> libcall_table_type;
5396
5397 static void
5398 add_libcall (libcall_table_type *htab, rtx libcall)
5399 {
5400 *htab->find_slot (libcall, INSERT) = libcall;
5401 }
5402
5403 static bool
5404 arm_libcall_uses_aapcs_base (const_rtx libcall)
5405 {
5406 static bool init_done = false;
5407 static libcall_table_type *libcall_htab = NULL;
5408
5409 if (!init_done)
5410 {
5411 init_done = true;
5412
5413 libcall_htab = new libcall_table_type (31);
5414 add_libcall (libcall_htab,
5415 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5416 add_libcall (libcall_htab,
5417 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5418 add_libcall (libcall_htab,
5419 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5420 add_libcall (libcall_htab,
5421 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5422
5423 add_libcall (libcall_htab,
5424 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5425 add_libcall (libcall_htab,
5426 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5427 add_libcall (libcall_htab,
5428 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5429 add_libcall (libcall_htab,
5430 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5431
5432 add_libcall (libcall_htab,
5433 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5434 add_libcall (libcall_htab,
5435 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5436 add_libcall (libcall_htab,
5437 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5438 add_libcall (libcall_htab,
5439 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5440 add_libcall (libcall_htab,
5441 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5442 add_libcall (libcall_htab,
5443 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5444 add_libcall (libcall_htab,
5445 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5446 add_libcall (libcall_htab,
5447 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5448
5449 /* Values from double-precision helper functions are returned in core
5450 registers if the selected core only supports single-precision
5451 arithmetic, even if we are using the hard-float ABI. The same is
5452 true for single-precision helpers, but we will never be using the
5453 hard-float ABI on a CPU which doesn't support single-precision
5454 operations in hardware. */
5455 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5456 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5457 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5458 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5459 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5460 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5461 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5462 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5463 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5464 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5465 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5466 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5467 SFmode));
5468 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5469 DFmode));
5470 add_libcall (libcall_htab,
5471 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5472 }
5473
5474 return libcall && libcall_htab->find (libcall) != NULL;
5475 }
5476
5477 static rtx
5478 arm_libcall_value_1 (machine_mode mode)
5479 {
5480 if (TARGET_AAPCS_BASED)
5481 return aapcs_libcall_value (mode);
5482 else if (TARGET_IWMMXT_ABI
5483 && arm_vector_mode_supported_p (mode))
5484 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5485 else
5486 return gen_rtx_REG (mode, ARG_REGISTER (1));
5487 }
5488
5489 /* Define how to find the value returned by a library function
5490 assuming the value has mode MODE. */
5491
5492 static rtx
5493 arm_libcall_value (machine_mode mode, const_rtx libcall)
5494 {
5495 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5496 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5497 {
5498 /* The following libcalls return their result in integer registers,
5499 even though they return a floating point value. */
5500 if (arm_libcall_uses_aapcs_base (libcall))
5501 return gen_rtx_REG (mode, ARG_REGISTER(1));
5502
5503 }
5504
5505 return arm_libcall_value_1 (mode);
5506 }
5507
5508 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5509
5510 static bool
5511 arm_function_value_regno_p (const unsigned int regno)
5512 {
5513 if (regno == ARG_REGISTER (1)
5514 || (TARGET_32BIT
5515 && TARGET_AAPCS_BASED
5516 && TARGET_HARD_FLOAT
5517 && regno == FIRST_VFP_REGNUM)
5518 || (TARGET_IWMMXT_ABI
5519 && regno == FIRST_IWMMXT_REGNUM))
5520 return true;
5521
5522 return false;
5523 }
5524
5525 /* Determine the amount of memory needed to store the possible return
5526 registers of an untyped call. */
5527 int
5528 arm_apply_result_size (void)
5529 {
5530 int size = 16;
5531
5532 if (TARGET_32BIT)
5533 {
5534 if (TARGET_HARD_FLOAT_ABI)
5535 size += 32;
5536 if (TARGET_IWMMXT_ABI)
5537 size += 8;
5538 }
5539
5540 return size;
5541 }
5542
5543 /* Decide whether TYPE should be returned in memory (true)
5544 or in a register (false). FNTYPE is the type of the function making
5545 the call. */
5546 static bool
5547 arm_return_in_memory (const_tree type, const_tree fntype)
5548 {
5549 HOST_WIDE_INT size;
5550
5551 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5552
5553 if (TARGET_AAPCS_BASED)
5554 {
5555 /* Simple, non-aggregate types (ie not including vectors and
5556 complex) are always returned in a register (or registers).
5557 We don't care about which register here, so we can short-cut
5558 some of the detail. */
5559 if (!AGGREGATE_TYPE_P (type)
5560 && TREE_CODE (type) != VECTOR_TYPE
5561 && TREE_CODE (type) != COMPLEX_TYPE)
5562 return false;
5563
5564 /* Any return value that is no larger than one word can be
5565 returned in r0. */
5566 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5567 return false;
5568
5569 /* Check any available co-processors to see if they accept the
5570 type as a register candidate (VFP, for example, can return
5571 some aggregates in consecutive registers). These aren't
5572 available if the call is variadic. */
5573 if (aapcs_select_return_coproc (type, fntype) >= 0)
5574 return false;
5575
5576 /* Vector values should be returned using ARM registers, not
5577 memory (unless they're over 16 bytes, which will break since
5578 we only have four call-clobbered registers to play with). */
5579 if (TREE_CODE (type) == VECTOR_TYPE)
5580 return (size < 0 || size > (4 * UNITS_PER_WORD));
5581
5582 /* The rest go in memory. */
5583 return true;
5584 }
5585
5586 if (TREE_CODE (type) == VECTOR_TYPE)
5587 return (size < 0 || size > (4 * UNITS_PER_WORD));
5588
5589 if (!AGGREGATE_TYPE_P (type) &&
5590 (TREE_CODE (type) != VECTOR_TYPE))
5591 /* All simple types are returned in registers. */
5592 return false;
5593
5594 if (arm_abi != ARM_ABI_APCS)
5595 {
5596 /* ATPCS and later return aggregate types in memory only if they are
5597 larger than a word (or are variable size). */
5598 return (size < 0 || size > UNITS_PER_WORD);
5599 }
5600
5601 /* For the arm-wince targets we choose to be compatible with Microsoft's
5602 ARM and Thumb compilers, which always return aggregates in memory. */
5603 #ifndef ARM_WINCE
5604 /* All structures/unions bigger than one word are returned in memory.
5605 Also catch the case where int_size_in_bytes returns -1. In this case
5606 the aggregate is either huge or of variable size, and in either case
5607 we will want to return it via memory and not in a register. */
5608 if (size < 0 || size > UNITS_PER_WORD)
5609 return true;
5610
5611 if (TREE_CODE (type) == RECORD_TYPE)
5612 {
5613 tree field;
5614
5615 /* For a struct the APCS says that we only return in a register
5616 if the type is 'integer like' and every addressable element
5617 has an offset of zero. For practical purposes this means
5618 that the structure can have at most one non bit-field element
5619 and that this element must be the first one in the structure. */
5620
5621 /* Find the first field, ignoring non FIELD_DECL things which will
5622 have been created by C++. */
5623 for (field = TYPE_FIELDS (type);
5624 field && TREE_CODE (field) != FIELD_DECL;
5625 field = DECL_CHAIN (field))
5626 continue;
5627
5628 if (field == NULL)
5629 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5630
5631 /* Check that the first field is valid for returning in a register. */
5632
5633 /* ... Floats are not allowed */
5634 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5635 return true;
5636
5637 /* ... Aggregates that are not themselves valid for returning in
5638 a register are not allowed. */
5639 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5640 return true;
5641
5642 /* Now check the remaining fields, if any. Only bitfields are allowed,
5643 since they are not addressable. */
5644 for (field = DECL_CHAIN (field);
5645 field;
5646 field = DECL_CHAIN (field))
5647 {
5648 if (TREE_CODE (field) != FIELD_DECL)
5649 continue;
5650
5651 if (!DECL_BIT_FIELD_TYPE (field))
5652 return true;
5653 }
5654
5655 return false;
5656 }
5657
5658 if (TREE_CODE (type) == UNION_TYPE)
5659 {
5660 tree field;
5661
5662 /* Unions can be returned in registers if every element is
5663 integral, or can be returned in an integer register. */
5664 for (field = TYPE_FIELDS (type);
5665 field;
5666 field = DECL_CHAIN (field))
5667 {
5668 if (TREE_CODE (field) != FIELD_DECL)
5669 continue;
5670
5671 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5672 return true;
5673
5674 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5675 return true;
5676 }
5677
5678 return false;
5679 }
5680 #endif /* not ARM_WINCE */
5681
5682 /* Return all other types in memory. */
5683 return true;
5684 }
5685
5686 const struct pcs_attribute_arg
5687 {
5688 const char *arg;
5689 enum arm_pcs value;
5690 } pcs_attribute_args[] =
5691 {
5692 {"aapcs", ARM_PCS_AAPCS},
5693 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5694 #if 0
5695 /* We could recognize these, but changes would be needed elsewhere
5696 * to implement them. */
5697 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5698 {"atpcs", ARM_PCS_ATPCS},
5699 {"apcs", ARM_PCS_APCS},
5700 #endif
5701 {NULL, ARM_PCS_UNKNOWN}
5702 };
5703
5704 static enum arm_pcs
5705 arm_pcs_from_attribute (tree attr)
5706 {
5707 const struct pcs_attribute_arg *ptr;
5708 const char *arg;
5709
5710 /* Get the value of the argument. */
5711 if (TREE_VALUE (attr) == NULL_TREE
5712 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5713 return ARM_PCS_UNKNOWN;
5714
5715 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5716
5717 /* Check it against the list of known arguments. */
5718 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5719 if (streq (arg, ptr->arg))
5720 return ptr->value;
5721
5722 /* An unrecognized interrupt type. */
5723 return ARM_PCS_UNKNOWN;
5724 }
5725
5726 /* Get the PCS variant to use for this call. TYPE is the function's type
5727 specification, DECL is the specific declartion. DECL may be null if
5728 the call could be indirect or if this is a library call. */
5729 static enum arm_pcs
5730 arm_get_pcs_model (const_tree type, const_tree decl)
5731 {
5732 bool user_convention = false;
5733 enum arm_pcs user_pcs = arm_pcs_default;
5734 tree attr;
5735
5736 gcc_assert (type);
5737
5738 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5739 if (attr)
5740 {
5741 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5742 user_convention = true;
5743 }
5744
5745 if (TARGET_AAPCS_BASED)
5746 {
5747 /* Detect varargs functions. These always use the base rules
5748 (no argument is ever a candidate for a co-processor
5749 register). */
5750 bool base_rules = stdarg_p (type);
5751
5752 if (user_convention)
5753 {
5754 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5755 sorry ("non-AAPCS derived PCS variant");
5756 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5757 error ("variadic functions must use the base AAPCS variant");
5758 }
5759
5760 if (base_rules)
5761 return ARM_PCS_AAPCS;
5762 else if (user_convention)
5763 return user_pcs;
5764 else if (decl && flag_unit_at_a_time)
5765 {
5766 /* Local functions never leak outside this compilation unit,
5767 so we are free to use whatever conventions are
5768 appropriate. */
5769 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5770 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5771 if (i && i->local)
5772 return ARM_PCS_AAPCS_LOCAL;
5773 }
5774 }
5775 else if (user_convention && user_pcs != arm_pcs_default)
5776 sorry ("PCS variant");
5777
5778 /* For everything else we use the target's default. */
5779 return arm_pcs_default;
5780 }
5781
5782
5783 static void
5784 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5785 const_tree fntype ATTRIBUTE_UNUSED,
5786 rtx libcall ATTRIBUTE_UNUSED,
5787 const_tree fndecl ATTRIBUTE_UNUSED)
5788 {
5789 /* Record the unallocated VFP registers. */
5790 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5791 pcum->aapcs_vfp_reg_alloc = 0;
5792 }
5793
5794 /* Walk down the type tree of TYPE counting consecutive base elements.
5795 If *MODEP is VOIDmode, then set it to the first valid floating point
5796 type. If a non-floating point type is found, or if a floating point
5797 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5798 otherwise return the count in the sub-tree. */
5799 static int
5800 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5801 {
5802 machine_mode mode;
5803 HOST_WIDE_INT size;
5804
5805 switch (TREE_CODE (type))
5806 {
5807 case REAL_TYPE:
5808 mode = TYPE_MODE (type);
5809 if (mode != DFmode && mode != SFmode && mode != HFmode)
5810 return -1;
5811
5812 if (*modep == VOIDmode)
5813 *modep = mode;
5814
5815 if (*modep == mode)
5816 return 1;
5817
5818 break;
5819
5820 case COMPLEX_TYPE:
5821 mode = TYPE_MODE (TREE_TYPE (type));
5822 if (mode != DFmode && mode != SFmode)
5823 return -1;
5824
5825 if (*modep == VOIDmode)
5826 *modep = mode;
5827
5828 if (*modep == mode)
5829 return 2;
5830
5831 break;
5832
5833 case VECTOR_TYPE:
5834 /* Use V2SImode and V4SImode as representatives of all 64-bit
5835 and 128-bit vector types, whether or not those modes are
5836 supported with the present options. */
5837 size = int_size_in_bytes (type);
5838 switch (size)
5839 {
5840 case 8:
5841 mode = V2SImode;
5842 break;
5843 case 16:
5844 mode = V4SImode;
5845 break;
5846 default:
5847 return -1;
5848 }
5849
5850 if (*modep == VOIDmode)
5851 *modep = mode;
5852
5853 /* Vector modes are considered to be opaque: two vectors are
5854 equivalent for the purposes of being homogeneous aggregates
5855 if they are the same size. */
5856 if (*modep == mode)
5857 return 1;
5858
5859 break;
5860
5861 case ARRAY_TYPE:
5862 {
5863 int count;
5864 tree index = TYPE_DOMAIN (type);
5865
5866 /* Can't handle incomplete types nor sizes that are not
5867 fixed. */
5868 if (!COMPLETE_TYPE_P (type)
5869 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5870 return -1;
5871
5872 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5873 if (count == -1
5874 || !index
5875 || !TYPE_MAX_VALUE (index)
5876 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5877 || !TYPE_MIN_VALUE (index)
5878 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5879 || count < 0)
5880 return -1;
5881
5882 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5883 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5884
5885 /* There must be no padding. */
5886 if (wi::to_wide (TYPE_SIZE (type))
5887 != count * GET_MODE_BITSIZE (*modep))
5888 return -1;
5889
5890 return count;
5891 }
5892
5893 case RECORD_TYPE:
5894 {
5895 int count = 0;
5896 int sub_count;
5897 tree field;
5898
5899 /* Can't handle incomplete types nor sizes that are not
5900 fixed. */
5901 if (!COMPLETE_TYPE_P (type)
5902 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5903 return -1;
5904
5905 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5906 {
5907 if (TREE_CODE (field) != FIELD_DECL)
5908 continue;
5909
5910 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5911 if (sub_count < 0)
5912 return -1;
5913 count += sub_count;
5914 }
5915
5916 /* There must be no padding. */
5917 if (wi::to_wide (TYPE_SIZE (type))
5918 != count * GET_MODE_BITSIZE (*modep))
5919 return -1;
5920
5921 return count;
5922 }
5923
5924 case UNION_TYPE:
5925 case QUAL_UNION_TYPE:
5926 {
5927 /* These aren't very interesting except in a degenerate case. */
5928 int count = 0;
5929 int sub_count;
5930 tree field;
5931
5932 /* Can't handle incomplete types nor sizes that are not
5933 fixed. */
5934 if (!COMPLETE_TYPE_P (type)
5935 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5936 return -1;
5937
5938 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5939 {
5940 if (TREE_CODE (field) != FIELD_DECL)
5941 continue;
5942
5943 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5944 if (sub_count < 0)
5945 return -1;
5946 count = count > sub_count ? count : sub_count;
5947 }
5948
5949 /* There must be no padding. */
5950 if (wi::to_wide (TYPE_SIZE (type))
5951 != count * GET_MODE_BITSIZE (*modep))
5952 return -1;
5953
5954 return count;
5955 }
5956
5957 default:
5958 break;
5959 }
5960
5961 return -1;
5962 }
5963
5964 /* Return true if PCS_VARIANT should use VFP registers. */
5965 static bool
5966 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5967 {
5968 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5969 {
5970 static bool seen_thumb1_vfp = false;
5971
5972 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5973 {
5974 sorry ("Thumb-1 hard-float VFP ABI");
5975 /* sorry() is not immediately fatal, so only display this once. */
5976 seen_thumb1_vfp = true;
5977 }
5978
5979 return true;
5980 }
5981
5982 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5983 return false;
5984
5985 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5986 (TARGET_VFP_DOUBLE || !is_double));
5987 }
5988
5989 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5990 suitable for passing or returning in VFP registers for the PCS
5991 variant selected. If it is, then *BASE_MODE is updated to contain
5992 a machine mode describing each element of the argument's type and
5993 *COUNT to hold the number of such elements. */
5994 static bool
5995 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5996 machine_mode mode, const_tree type,
5997 machine_mode *base_mode, int *count)
5998 {
5999 machine_mode new_mode = VOIDmode;
6000
6001 /* If we have the type information, prefer that to working things
6002 out from the mode. */
6003 if (type)
6004 {
6005 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6006
6007 if (ag_count > 0 && ag_count <= 4)
6008 *count = ag_count;
6009 else
6010 return false;
6011 }
6012 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6013 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6014 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6015 {
6016 *count = 1;
6017 new_mode = mode;
6018 }
6019 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6020 {
6021 *count = 2;
6022 new_mode = (mode == DCmode ? DFmode : SFmode);
6023 }
6024 else
6025 return false;
6026
6027
6028 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6029 return false;
6030
6031 *base_mode = new_mode;
6032 return true;
6033 }
6034
6035 static bool
6036 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6037 machine_mode mode, const_tree type)
6038 {
6039 int count ATTRIBUTE_UNUSED;
6040 machine_mode ag_mode ATTRIBUTE_UNUSED;
6041
6042 if (!use_vfp_abi (pcs_variant, false))
6043 return false;
6044 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6045 &ag_mode, &count);
6046 }
6047
6048 static bool
6049 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6050 const_tree type)
6051 {
6052 if (!use_vfp_abi (pcum->pcs_variant, false))
6053 return false;
6054
6055 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6056 &pcum->aapcs_vfp_rmode,
6057 &pcum->aapcs_vfp_rcount);
6058 }
6059
6060 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6061 for the behaviour of this function. */
6062
6063 static bool
6064 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6065 const_tree type ATTRIBUTE_UNUSED)
6066 {
6067 int rmode_size
6068 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6069 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6070 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6071 int regno;
6072
6073 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6074 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6075 {
6076 pcum->aapcs_vfp_reg_alloc = mask << regno;
6077 if (mode == BLKmode
6078 || (mode == TImode && ! TARGET_NEON)
6079 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6080 {
6081 int i;
6082 int rcount = pcum->aapcs_vfp_rcount;
6083 int rshift = shift;
6084 machine_mode rmode = pcum->aapcs_vfp_rmode;
6085 rtx par;
6086 if (!TARGET_NEON)
6087 {
6088 /* Avoid using unsupported vector modes. */
6089 if (rmode == V2SImode)
6090 rmode = DImode;
6091 else if (rmode == V4SImode)
6092 {
6093 rmode = DImode;
6094 rcount *= 2;
6095 rshift /= 2;
6096 }
6097 }
6098 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6099 for (i = 0; i < rcount; i++)
6100 {
6101 rtx tmp = gen_rtx_REG (rmode,
6102 FIRST_VFP_REGNUM + regno + i * rshift);
6103 tmp = gen_rtx_EXPR_LIST
6104 (VOIDmode, tmp,
6105 GEN_INT (i * GET_MODE_SIZE (rmode)));
6106 XVECEXP (par, 0, i) = tmp;
6107 }
6108
6109 pcum->aapcs_reg = par;
6110 }
6111 else
6112 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6113 return true;
6114 }
6115 return false;
6116 }
6117
6118 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6119 comment there for the behaviour of this function. */
6120
6121 static rtx
6122 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6123 machine_mode mode,
6124 const_tree type ATTRIBUTE_UNUSED)
6125 {
6126 if (!use_vfp_abi (pcs_variant, false))
6127 return NULL;
6128
6129 if (mode == BLKmode
6130 || (GET_MODE_CLASS (mode) == MODE_INT
6131 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6132 && !TARGET_NEON))
6133 {
6134 int count;
6135 machine_mode ag_mode;
6136 int i;
6137 rtx par;
6138 int shift;
6139
6140 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6141 &ag_mode, &count);
6142
6143 if (!TARGET_NEON)
6144 {
6145 if (ag_mode == V2SImode)
6146 ag_mode = DImode;
6147 else if (ag_mode == V4SImode)
6148 {
6149 ag_mode = DImode;
6150 count *= 2;
6151 }
6152 }
6153 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6154 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6155 for (i = 0; i < count; i++)
6156 {
6157 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6158 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6159 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6160 XVECEXP (par, 0, i) = tmp;
6161 }
6162
6163 return par;
6164 }
6165
6166 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6167 }
6168
6169 static void
6170 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6171 machine_mode mode ATTRIBUTE_UNUSED,
6172 const_tree type ATTRIBUTE_UNUSED)
6173 {
6174 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6175 pcum->aapcs_vfp_reg_alloc = 0;
6176 return;
6177 }
6178
6179 #define AAPCS_CP(X) \
6180 { \
6181 aapcs_ ## X ## _cum_init, \
6182 aapcs_ ## X ## _is_call_candidate, \
6183 aapcs_ ## X ## _allocate, \
6184 aapcs_ ## X ## _is_return_candidate, \
6185 aapcs_ ## X ## _allocate_return_reg, \
6186 aapcs_ ## X ## _advance \
6187 }
6188
6189 /* Table of co-processors that can be used to pass arguments in
6190 registers. Idealy no arugment should be a candidate for more than
6191 one co-processor table entry, but the table is processed in order
6192 and stops after the first match. If that entry then fails to put
6193 the argument into a co-processor register, the argument will go on
6194 the stack. */
6195 static struct
6196 {
6197 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6198 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6199
6200 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6201 BLKmode) is a candidate for this co-processor's registers; this
6202 function should ignore any position-dependent state in
6203 CUMULATIVE_ARGS and only use call-type dependent information. */
6204 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6205
6206 /* Return true if the argument does get a co-processor register; it
6207 should set aapcs_reg to an RTX of the register allocated as is
6208 required for a return from FUNCTION_ARG. */
6209 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6210
6211 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6212 be returned in this co-processor's registers. */
6213 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6214
6215 /* Allocate and return an RTX element to hold the return type of a call. This
6216 routine must not fail and will only be called if is_return_candidate
6217 returned true with the same parameters. */
6218 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6219
6220 /* Finish processing this argument and prepare to start processing
6221 the next one. */
6222 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6223 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6224 {
6225 AAPCS_CP(vfp)
6226 };
6227
6228 #undef AAPCS_CP
6229
6230 static int
6231 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6232 const_tree type)
6233 {
6234 int i;
6235
6236 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6237 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6238 return i;
6239
6240 return -1;
6241 }
6242
6243 static int
6244 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6245 {
6246 /* We aren't passed a decl, so we can't check that a call is local.
6247 However, it isn't clear that that would be a win anyway, since it
6248 might limit some tail-calling opportunities. */
6249 enum arm_pcs pcs_variant;
6250
6251 if (fntype)
6252 {
6253 const_tree fndecl = NULL_TREE;
6254
6255 if (TREE_CODE (fntype) == FUNCTION_DECL)
6256 {
6257 fndecl = fntype;
6258 fntype = TREE_TYPE (fntype);
6259 }
6260
6261 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6262 }
6263 else
6264 pcs_variant = arm_pcs_default;
6265
6266 if (pcs_variant != ARM_PCS_AAPCS)
6267 {
6268 int i;
6269
6270 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6271 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6272 TYPE_MODE (type),
6273 type))
6274 return i;
6275 }
6276 return -1;
6277 }
6278
6279 static rtx
6280 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6281 const_tree fntype)
6282 {
6283 /* We aren't passed a decl, so we can't check that a call is local.
6284 However, it isn't clear that that would be a win anyway, since it
6285 might limit some tail-calling opportunities. */
6286 enum arm_pcs pcs_variant;
6287 int unsignedp ATTRIBUTE_UNUSED;
6288
6289 if (fntype)
6290 {
6291 const_tree fndecl = NULL_TREE;
6292
6293 if (TREE_CODE (fntype) == FUNCTION_DECL)
6294 {
6295 fndecl = fntype;
6296 fntype = TREE_TYPE (fntype);
6297 }
6298
6299 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6300 }
6301 else
6302 pcs_variant = arm_pcs_default;
6303
6304 /* Promote integer types. */
6305 if (type && INTEGRAL_TYPE_P (type))
6306 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6307
6308 if (pcs_variant != ARM_PCS_AAPCS)
6309 {
6310 int i;
6311
6312 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6313 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6314 type))
6315 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6316 mode, type);
6317 }
6318
6319 /* Promotes small structs returned in a register to full-word size
6320 for big-endian AAPCS. */
6321 if (type && arm_return_in_msb (type))
6322 {
6323 HOST_WIDE_INT size = int_size_in_bytes (type);
6324 if (size % UNITS_PER_WORD != 0)
6325 {
6326 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6327 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6328 }
6329 }
6330
6331 return gen_rtx_REG (mode, R0_REGNUM);
6332 }
6333
6334 static rtx
6335 aapcs_libcall_value (machine_mode mode)
6336 {
6337 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6338 && GET_MODE_SIZE (mode) <= 4)
6339 mode = SImode;
6340
6341 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6342 }
6343
6344 /* Lay out a function argument using the AAPCS rules. The rule
6345 numbers referred to here are those in the AAPCS. */
6346 static void
6347 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6348 const_tree type, bool named)
6349 {
6350 int nregs, nregs2;
6351 int ncrn;
6352
6353 /* We only need to do this once per argument. */
6354 if (pcum->aapcs_arg_processed)
6355 return;
6356
6357 pcum->aapcs_arg_processed = true;
6358
6359 /* Special case: if named is false then we are handling an incoming
6360 anonymous argument which is on the stack. */
6361 if (!named)
6362 return;
6363
6364 /* Is this a potential co-processor register candidate? */
6365 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6366 {
6367 int slot = aapcs_select_call_coproc (pcum, mode, type);
6368 pcum->aapcs_cprc_slot = slot;
6369
6370 /* We don't have to apply any of the rules from part B of the
6371 preparation phase, these are handled elsewhere in the
6372 compiler. */
6373
6374 if (slot >= 0)
6375 {
6376 /* A Co-processor register candidate goes either in its own
6377 class of registers or on the stack. */
6378 if (!pcum->aapcs_cprc_failed[slot])
6379 {
6380 /* C1.cp - Try to allocate the argument to co-processor
6381 registers. */
6382 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6383 return;
6384
6385 /* C2.cp - Put the argument on the stack and note that we
6386 can't assign any more candidates in this slot. We also
6387 need to note that we have allocated stack space, so that
6388 we won't later try to split a non-cprc candidate between
6389 core registers and the stack. */
6390 pcum->aapcs_cprc_failed[slot] = true;
6391 pcum->can_split = false;
6392 }
6393
6394 /* We didn't get a register, so this argument goes on the
6395 stack. */
6396 gcc_assert (pcum->can_split == false);
6397 return;
6398 }
6399 }
6400
6401 /* C3 - For double-word aligned arguments, round the NCRN up to the
6402 next even number. */
6403 ncrn = pcum->aapcs_ncrn;
6404 if (ncrn & 1)
6405 {
6406 int res = arm_needs_doubleword_align (mode, type);
6407 /* Only warn during RTL expansion of call stmts, otherwise we would
6408 warn e.g. during gimplification even on functions that will be
6409 always inlined, and we'd warn multiple times. Don't warn when
6410 called in expand_function_start either, as we warn instead in
6411 arm_function_arg_boundary in that case. */
6412 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6413 inform (input_location, "parameter passing for argument of type "
6414 "%qT changed in GCC 7.1", type);
6415 else if (res > 0)
6416 ncrn++;
6417 }
6418
6419 nregs = ARM_NUM_REGS2(mode, type);
6420
6421 /* Sigh, this test should really assert that nregs > 0, but a GCC
6422 extension allows empty structs and then gives them empty size; it
6423 then allows such a structure to be passed by value. For some of
6424 the code below we have to pretend that such an argument has
6425 non-zero size so that we 'locate' it correctly either in
6426 registers or on the stack. */
6427 gcc_assert (nregs >= 0);
6428
6429 nregs2 = nregs ? nregs : 1;
6430
6431 /* C4 - Argument fits entirely in core registers. */
6432 if (ncrn + nregs2 <= NUM_ARG_REGS)
6433 {
6434 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6435 pcum->aapcs_next_ncrn = ncrn + nregs;
6436 return;
6437 }
6438
6439 /* C5 - Some core registers left and there are no arguments already
6440 on the stack: split this argument between the remaining core
6441 registers and the stack. */
6442 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6443 {
6444 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6445 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6446 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6447 return;
6448 }
6449
6450 /* C6 - NCRN is set to 4. */
6451 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6452
6453 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6454 return;
6455 }
6456
6457 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6458 for a call to a function whose data type is FNTYPE.
6459 For a library call, FNTYPE is NULL. */
6460 void
6461 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6462 rtx libname,
6463 tree fndecl ATTRIBUTE_UNUSED)
6464 {
6465 /* Long call handling. */
6466 if (fntype)
6467 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6468 else
6469 pcum->pcs_variant = arm_pcs_default;
6470
6471 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6472 {
6473 if (arm_libcall_uses_aapcs_base (libname))
6474 pcum->pcs_variant = ARM_PCS_AAPCS;
6475
6476 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6477 pcum->aapcs_reg = NULL_RTX;
6478 pcum->aapcs_partial = 0;
6479 pcum->aapcs_arg_processed = false;
6480 pcum->aapcs_cprc_slot = -1;
6481 pcum->can_split = true;
6482
6483 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6484 {
6485 int i;
6486
6487 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6488 {
6489 pcum->aapcs_cprc_failed[i] = false;
6490 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6491 }
6492 }
6493 return;
6494 }
6495
6496 /* Legacy ABIs */
6497
6498 /* On the ARM, the offset starts at 0. */
6499 pcum->nregs = 0;
6500 pcum->iwmmxt_nregs = 0;
6501 pcum->can_split = true;
6502
6503 /* Varargs vectors are treated the same as long long.
6504 named_count avoids having to change the way arm handles 'named' */
6505 pcum->named_count = 0;
6506 pcum->nargs = 0;
6507
6508 if (TARGET_REALLY_IWMMXT && fntype)
6509 {
6510 tree fn_arg;
6511
6512 for (fn_arg = TYPE_ARG_TYPES (fntype);
6513 fn_arg;
6514 fn_arg = TREE_CHAIN (fn_arg))
6515 pcum->named_count += 1;
6516
6517 if (! pcum->named_count)
6518 pcum->named_count = INT_MAX;
6519 }
6520 }
6521
6522 /* Return 1 if double word alignment is required for argument passing.
6523 Return -1 if double word alignment used to be required for argument
6524 passing before PR77728 ABI fix, but is not required anymore.
6525 Return 0 if double word alignment is not required and wasn't requried
6526 before either. */
6527 static int
6528 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6529 {
6530 if (!type)
6531 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6532
6533 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6534 if (!AGGREGATE_TYPE_P (type))
6535 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6536
6537 /* Array types: Use member alignment of element type. */
6538 if (TREE_CODE (type) == ARRAY_TYPE)
6539 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6540
6541 int ret = 0;
6542 /* Record/aggregate types: Use greatest member alignment of any member. */
6543 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6544 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6545 {
6546 if (TREE_CODE (field) == FIELD_DECL)
6547 return 1;
6548 else
6549 /* Before PR77728 fix, we were incorrectly considering also
6550 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6551 Make sure we can warn about that with -Wpsabi. */
6552 ret = -1;
6553 }
6554
6555 return ret;
6556 }
6557
6558
6559 /* Determine where to put an argument to a function.
6560 Value is zero to push the argument on the stack,
6561 or a hard register in which to store the argument.
6562
6563 MODE is the argument's machine mode.
6564 TYPE is the data type of the argument (as a tree).
6565 This is null for libcalls where that information may
6566 not be available.
6567 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6568 the preceding args and about the function being called.
6569 NAMED is nonzero if this argument is a named parameter
6570 (otherwise it is an extra parameter matching an ellipsis).
6571
6572 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6573 other arguments are passed on the stack. If (NAMED == 0) (which happens
6574 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6575 defined), say it is passed in the stack (function_prologue will
6576 indeed make it pass in the stack if necessary). */
6577
6578 static rtx
6579 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6580 const_tree type, bool named)
6581 {
6582 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6583 int nregs;
6584
6585 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6586 a call insn (op3 of a call_value insn). */
6587 if (mode == VOIDmode)
6588 return const0_rtx;
6589
6590 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6591 {
6592 aapcs_layout_arg (pcum, mode, type, named);
6593 return pcum->aapcs_reg;
6594 }
6595
6596 /* Varargs vectors are treated the same as long long.
6597 named_count avoids having to change the way arm handles 'named' */
6598 if (TARGET_IWMMXT_ABI
6599 && arm_vector_mode_supported_p (mode)
6600 && pcum->named_count > pcum->nargs + 1)
6601 {
6602 if (pcum->iwmmxt_nregs <= 9)
6603 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6604 else
6605 {
6606 pcum->can_split = false;
6607 return NULL_RTX;
6608 }
6609 }
6610
6611 /* Put doubleword aligned quantities in even register pairs. */
6612 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6613 {
6614 int res = arm_needs_doubleword_align (mode, type);
6615 if (res < 0 && warn_psabi)
6616 inform (input_location, "parameter passing for argument of type "
6617 "%qT changed in GCC 7.1", type);
6618 else if (res > 0)
6619 pcum->nregs++;
6620 }
6621
6622 /* Only allow splitting an arg between regs and memory if all preceding
6623 args were allocated to regs. For args passed by reference we only count
6624 the reference pointer. */
6625 if (pcum->can_split)
6626 nregs = 1;
6627 else
6628 nregs = ARM_NUM_REGS2 (mode, type);
6629
6630 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6631 return NULL_RTX;
6632
6633 return gen_rtx_REG (mode, pcum->nregs);
6634 }
6635
6636 static unsigned int
6637 arm_function_arg_boundary (machine_mode mode, const_tree type)
6638 {
6639 if (!ARM_DOUBLEWORD_ALIGN)
6640 return PARM_BOUNDARY;
6641
6642 int res = arm_needs_doubleword_align (mode, type);
6643 if (res < 0 && warn_psabi)
6644 inform (input_location, "parameter passing for argument of type %qT "
6645 "changed in GCC 7.1", type);
6646
6647 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6648 }
6649
6650 static int
6651 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6652 tree type, bool named)
6653 {
6654 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6655 int nregs = pcum->nregs;
6656
6657 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6658 {
6659 aapcs_layout_arg (pcum, mode, type, named);
6660 return pcum->aapcs_partial;
6661 }
6662
6663 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6664 return 0;
6665
6666 if (NUM_ARG_REGS > nregs
6667 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6668 && pcum->can_split)
6669 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6670
6671 return 0;
6672 }
6673
6674 /* Update the data in PCUM to advance over an argument
6675 of mode MODE and data type TYPE.
6676 (TYPE is null for libcalls where that information may not be available.) */
6677
6678 static void
6679 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6680 const_tree type, bool named)
6681 {
6682 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6683
6684 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6685 {
6686 aapcs_layout_arg (pcum, mode, type, named);
6687
6688 if (pcum->aapcs_cprc_slot >= 0)
6689 {
6690 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6691 type);
6692 pcum->aapcs_cprc_slot = -1;
6693 }
6694
6695 /* Generic stuff. */
6696 pcum->aapcs_arg_processed = false;
6697 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6698 pcum->aapcs_reg = NULL_RTX;
6699 pcum->aapcs_partial = 0;
6700 }
6701 else
6702 {
6703 pcum->nargs += 1;
6704 if (arm_vector_mode_supported_p (mode)
6705 && pcum->named_count > pcum->nargs
6706 && TARGET_IWMMXT_ABI)
6707 pcum->iwmmxt_nregs += 1;
6708 else
6709 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6710 }
6711 }
6712
6713 /* Variable sized types are passed by reference. This is a GCC
6714 extension to the ARM ABI. */
6715
6716 static bool
6717 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6718 machine_mode mode ATTRIBUTE_UNUSED,
6719 const_tree type, bool named ATTRIBUTE_UNUSED)
6720 {
6721 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6722 }
6723 \f
6724 /* Encode the current state of the #pragma [no_]long_calls. */
6725 typedef enum
6726 {
6727 OFF, /* No #pragma [no_]long_calls is in effect. */
6728 LONG, /* #pragma long_calls is in effect. */
6729 SHORT /* #pragma no_long_calls is in effect. */
6730 } arm_pragma_enum;
6731
6732 static arm_pragma_enum arm_pragma_long_calls = OFF;
6733
6734 void
6735 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6736 {
6737 arm_pragma_long_calls = LONG;
6738 }
6739
6740 void
6741 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6742 {
6743 arm_pragma_long_calls = SHORT;
6744 }
6745
6746 void
6747 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6748 {
6749 arm_pragma_long_calls = OFF;
6750 }
6751 \f
6752 /* Handle an attribute requiring a FUNCTION_DECL;
6753 arguments as in struct attribute_spec.handler. */
6754 static tree
6755 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6756 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6757 {
6758 if (TREE_CODE (*node) != FUNCTION_DECL)
6759 {
6760 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6761 name);
6762 *no_add_attrs = true;
6763 }
6764
6765 return NULL_TREE;
6766 }
6767
6768 /* Handle an "interrupt" or "isr" attribute;
6769 arguments as in struct attribute_spec.handler. */
6770 static tree
6771 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6772 bool *no_add_attrs)
6773 {
6774 if (DECL_P (*node))
6775 {
6776 if (TREE_CODE (*node) != FUNCTION_DECL)
6777 {
6778 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6779 name);
6780 *no_add_attrs = true;
6781 }
6782 /* FIXME: the argument if any is checked for type attributes;
6783 should it be checked for decl ones? */
6784 }
6785 else
6786 {
6787 if (TREE_CODE (*node) == FUNCTION_TYPE
6788 || TREE_CODE (*node) == METHOD_TYPE)
6789 {
6790 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6791 {
6792 warning (OPT_Wattributes, "%qE attribute ignored",
6793 name);
6794 *no_add_attrs = true;
6795 }
6796 }
6797 else if (TREE_CODE (*node) == POINTER_TYPE
6798 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6799 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6800 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6801 {
6802 *node = build_variant_type_copy (*node);
6803 TREE_TYPE (*node) = build_type_attribute_variant
6804 (TREE_TYPE (*node),
6805 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6806 *no_add_attrs = true;
6807 }
6808 else
6809 {
6810 /* Possibly pass this attribute on from the type to a decl. */
6811 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6812 | (int) ATTR_FLAG_FUNCTION_NEXT
6813 | (int) ATTR_FLAG_ARRAY_NEXT))
6814 {
6815 *no_add_attrs = true;
6816 return tree_cons (name, args, NULL_TREE);
6817 }
6818 else
6819 {
6820 warning (OPT_Wattributes, "%qE attribute ignored",
6821 name);
6822 }
6823 }
6824 }
6825
6826 return NULL_TREE;
6827 }
6828
6829 /* Handle a "pcs" attribute; arguments as in struct
6830 attribute_spec.handler. */
6831 static tree
6832 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6833 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6834 {
6835 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6836 {
6837 warning (OPT_Wattributes, "%qE attribute ignored", name);
6838 *no_add_attrs = true;
6839 }
6840 return NULL_TREE;
6841 }
6842
6843 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6844 /* Handle the "notshared" attribute. This attribute is another way of
6845 requesting hidden visibility. ARM's compiler supports
6846 "__declspec(notshared)"; we support the same thing via an
6847 attribute. */
6848
6849 static tree
6850 arm_handle_notshared_attribute (tree *node,
6851 tree name ATTRIBUTE_UNUSED,
6852 tree args ATTRIBUTE_UNUSED,
6853 int flags ATTRIBUTE_UNUSED,
6854 bool *no_add_attrs)
6855 {
6856 tree decl = TYPE_NAME (*node);
6857
6858 if (decl)
6859 {
6860 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6861 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6862 *no_add_attrs = false;
6863 }
6864 return NULL_TREE;
6865 }
6866 #endif
6867
6868 /* This function returns true if a function with declaration FNDECL and type
6869 FNTYPE uses the stack to pass arguments or return variables and false
6870 otherwise. This is used for functions with the attributes
6871 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6872 diagnostic messages if the stack is used. NAME is the name of the attribute
6873 used. */
6874
6875 static bool
6876 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6877 {
6878 function_args_iterator args_iter;
6879 CUMULATIVE_ARGS args_so_far_v;
6880 cumulative_args_t args_so_far;
6881 bool first_param = true;
6882 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6883
6884 /* Error out if any argument is passed on the stack. */
6885 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6886 args_so_far = pack_cumulative_args (&args_so_far_v);
6887 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6888 {
6889 rtx arg_rtx;
6890 machine_mode arg_mode = TYPE_MODE (arg_type);
6891
6892 prev_arg_type = arg_type;
6893 if (VOID_TYPE_P (arg_type))
6894 continue;
6895
6896 if (!first_param)
6897 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6898 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6899 if (!arg_rtx
6900 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6901 {
6902 error ("%qE attribute not available to functions with arguments "
6903 "passed on the stack", name);
6904 return true;
6905 }
6906 first_param = false;
6907 }
6908
6909 /* Error out for variadic functions since we cannot control how many
6910 arguments will be passed and thus stack could be used. stdarg_p () is not
6911 used for the checking to avoid browsing arguments twice. */
6912 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6913 {
6914 error ("%qE attribute not available to functions with variable number "
6915 "of arguments", name);
6916 return true;
6917 }
6918
6919 /* Error out if return value is passed on the stack. */
6920 ret_type = TREE_TYPE (fntype);
6921 if (arm_return_in_memory (ret_type, fntype))
6922 {
6923 error ("%qE attribute not available to functions that return value on "
6924 "the stack", name);
6925 return true;
6926 }
6927 return false;
6928 }
6929
6930 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6931 function will check whether the attribute is allowed here and will add the
6932 attribute to the function declaration tree or otherwise issue a warning. */
6933
6934 static tree
6935 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6936 tree /* args */,
6937 int /* flags */,
6938 bool *no_add_attrs)
6939 {
6940 tree fndecl;
6941
6942 if (!use_cmse)
6943 {
6944 *no_add_attrs = true;
6945 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6946 name);
6947 return NULL_TREE;
6948 }
6949
6950 /* Ignore attribute for function types. */
6951 if (TREE_CODE (*node) != FUNCTION_DECL)
6952 {
6953 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6954 name);
6955 *no_add_attrs = true;
6956 return NULL_TREE;
6957 }
6958
6959 fndecl = *node;
6960
6961 /* Warn for static linkage functions. */
6962 if (!TREE_PUBLIC (fndecl))
6963 {
6964 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6965 "with static linkage", name);
6966 *no_add_attrs = true;
6967 return NULL_TREE;
6968 }
6969
6970 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6971 TREE_TYPE (fndecl));
6972 return NULL_TREE;
6973 }
6974
6975
6976 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6977 function will check whether the attribute is allowed here and will add the
6978 attribute to the function type tree or otherwise issue a diagnostic. The
6979 reason we check this at declaration time is to only allow the use of the
6980 attribute with declarations of function pointers and not function
6981 declarations. This function checks NODE is of the expected type and issues
6982 diagnostics otherwise using NAME. If it is not of the expected type
6983 *NO_ADD_ATTRS will be set to true. */
6984
6985 static tree
6986 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6987 tree /* args */,
6988 int /* flags */,
6989 bool *no_add_attrs)
6990 {
6991 tree decl = NULL_TREE, fntype = NULL_TREE;
6992 tree type;
6993
6994 if (!use_cmse)
6995 {
6996 *no_add_attrs = true;
6997 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6998 name);
6999 return NULL_TREE;
7000 }
7001
7002 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7003 {
7004 decl = *node;
7005 fntype = TREE_TYPE (decl);
7006 }
7007
7008 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7009 fntype = TREE_TYPE (fntype);
7010
7011 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7012 {
7013 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7014 "function pointer", name);
7015 *no_add_attrs = true;
7016 return NULL_TREE;
7017 }
7018
7019 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7020
7021 if (*no_add_attrs)
7022 return NULL_TREE;
7023
7024 /* Prevent trees being shared among function types with and without
7025 cmse_nonsecure_call attribute. */
7026 type = TREE_TYPE (decl);
7027
7028 type = build_distinct_type_copy (type);
7029 TREE_TYPE (decl) = type;
7030 fntype = type;
7031
7032 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7033 {
7034 type = fntype;
7035 fntype = TREE_TYPE (fntype);
7036 fntype = build_distinct_type_copy (fntype);
7037 TREE_TYPE (type) = fntype;
7038 }
7039
7040 /* Construct a type attribute and add it to the function type. */
7041 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7042 TYPE_ATTRIBUTES (fntype));
7043 TYPE_ATTRIBUTES (fntype) = attrs;
7044 return NULL_TREE;
7045 }
7046
7047 /* Return 0 if the attributes for two types are incompatible, 1 if they
7048 are compatible, and 2 if they are nearly compatible (which causes a
7049 warning to be generated). */
7050 static int
7051 arm_comp_type_attributes (const_tree type1, const_tree type2)
7052 {
7053 int l1, l2, s1, s2;
7054
7055 /* Check for mismatch of non-default calling convention. */
7056 if (TREE_CODE (type1) != FUNCTION_TYPE)
7057 return 1;
7058
7059 /* Check for mismatched call attributes. */
7060 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7061 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7062 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7063 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7064
7065 /* Only bother to check if an attribute is defined. */
7066 if (l1 | l2 | s1 | s2)
7067 {
7068 /* If one type has an attribute, the other must have the same attribute. */
7069 if ((l1 != l2) || (s1 != s2))
7070 return 0;
7071
7072 /* Disallow mixed attributes. */
7073 if ((l1 & s2) || (l2 & s1))
7074 return 0;
7075 }
7076
7077 /* Check for mismatched ISR attribute. */
7078 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7079 if (! l1)
7080 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7081 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7082 if (! l2)
7083 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7084 if (l1 != l2)
7085 return 0;
7086
7087 l1 = lookup_attribute ("cmse_nonsecure_call",
7088 TYPE_ATTRIBUTES (type1)) != NULL;
7089 l2 = lookup_attribute ("cmse_nonsecure_call",
7090 TYPE_ATTRIBUTES (type2)) != NULL;
7091
7092 if (l1 != l2)
7093 return 0;
7094
7095 return 1;
7096 }
7097
7098 /* Assigns default attributes to newly defined type. This is used to
7099 set short_call/long_call attributes for function types of
7100 functions defined inside corresponding #pragma scopes. */
7101 static void
7102 arm_set_default_type_attributes (tree type)
7103 {
7104 /* Add __attribute__ ((long_call)) to all functions, when
7105 inside #pragma long_calls or __attribute__ ((short_call)),
7106 when inside #pragma no_long_calls. */
7107 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7108 {
7109 tree type_attr_list, attr_name;
7110 type_attr_list = TYPE_ATTRIBUTES (type);
7111
7112 if (arm_pragma_long_calls == LONG)
7113 attr_name = get_identifier ("long_call");
7114 else if (arm_pragma_long_calls == SHORT)
7115 attr_name = get_identifier ("short_call");
7116 else
7117 return;
7118
7119 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7120 TYPE_ATTRIBUTES (type) = type_attr_list;
7121 }
7122 }
7123 \f
7124 /* Return true if DECL is known to be linked into section SECTION. */
7125
7126 static bool
7127 arm_function_in_section_p (tree decl, section *section)
7128 {
7129 /* We can only be certain about the prevailing symbol definition. */
7130 if (!decl_binds_to_current_def_p (decl))
7131 return false;
7132
7133 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7134 if (!DECL_SECTION_NAME (decl))
7135 {
7136 /* Make sure that we will not create a unique section for DECL. */
7137 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7138 return false;
7139 }
7140
7141 return function_section (decl) == section;
7142 }
7143
7144 /* Return nonzero if a 32-bit "long_call" should be generated for
7145 a call from the current function to DECL. We generate a long_call
7146 if the function:
7147
7148 a. has an __attribute__((long call))
7149 or b. is within the scope of a #pragma long_calls
7150 or c. the -mlong-calls command line switch has been specified
7151
7152 However we do not generate a long call if the function:
7153
7154 d. has an __attribute__ ((short_call))
7155 or e. is inside the scope of a #pragma no_long_calls
7156 or f. is defined in the same section as the current function. */
7157
7158 bool
7159 arm_is_long_call_p (tree decl)
7160 {
7161 tree attrs;
7162
7163 if (!decl)
7164 return TARGET_LONG_CALLS;
7165
7166 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7167 if (lookup_attribute ("short_call", attrs))
7168 return false;
7169
7170 /* For "f", be conservative, and only cater for cases in which the
7171 whole of the current function is placed in the same section. */
7172 if (!flag_reorder_blocks_and_partition
7173 && TREE_CODE (decl) == FUNCTION_DECL
7174 && arm_function_in_section_p (decl, current_function_section ()))
7175 return false;
7176
7177 if (lookup_attribute ("long_call", attrs))
7178 return true;
7179
7180 return TARGET_LONG_CALLS;
7181 }
7182
7183 /* Return nonzero if it is ok to make a tail-call to DECL. */
7184 static bool
7185 arm_function_ok_for_sibcall (tree decl, tree exp)
7186 {
7187 unsigned long func_type;
7188
7189 if (cfun->machine->sibcall_blocked)
7190 return false;
7191
7192 /* Never tailcall something if we are generating code for Thumb-1. */
7193 if (TARGET_THUMB1)
7194 return false;
7195
7196 /* The PIC register is live on entry to VxWorks PLT entries, so we
7197 must make the call before restoring the PIC register. */
7198 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7199 return false;
7200
7201 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7202 may be used both as target of the call and base register for restoring
7203 the VFP registers */
7204 if (TARGET_APCS_FRAME && TARGET_ARM
7205 && TARGET_HARD_FLOAT
7206 && decl && arm_is_long_call_p (decl))
7207 return false;
7208
7209 /* If we are interworking and the function is not declared static
7210 then we can't tail-call it unless we know that it exists in this
7211 compilation unit (since it might be a Thumb routine). */
7212 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7213 && !TREE_ASM_WRITTEN (decl))
7214 return false;
7215
7216 func_type = arm_current_func_type ();
7217 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7218 if (IS_INTERRUPT (func_type))
7219 return false;
7220
7221 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7222 generated for entry functions themselves. */
7223 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7224 return false;
7225
7226 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7227 this would complicate matters for later code generation. */
7228 if (TREE_CODE (exp) == CALL_EXPR)
7229 {
7230 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7231 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7232 return false;
7233 }
7234
7235 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7236 {
7237 /* Check that the return value locations are the same. For
7238 example that we aren't returning a value from the sibling in
7239 a VFP register but then need to transfer it to a core
7240 register. */
7241 rtx a, b;
7242 tree decl_or_type = decl;
7243
7244 /* If it is an indirect function pointer, get the function type. */
7245 if (!decl)
7246 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7247
7248 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7249 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7250 cfun->decl, false);
7251 if (!rtx_equal_p (a, b))
7252 return false;
7253 }
7254
7255 /* Never tailcall if function may be called with a misaligned SP. */
7256 if (IS_STACKALIGN (func_type))
7257 return false;
7258
7259 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7260 references should become a NOP. Don't convert such calls into
7261 sibling calls. */
7262 if (TARGET_AAPCS_BASED
7263 && arm_abi == ARM_ABI_AAPCS
7264 && decl
7265 && DECL_WEAK (decl))
7266 return false;
7267
7268 /* We cannot do a tailcall for an indirect call by descriptor if all the
7269 argument registers are used because the only register left to load the
7270 address is IP and it will already contain the static chain. */
7271 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7272 {
7273 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7274 CUMULATIVE_ARGS cum;
7275 cumulative_args_t cum_v;
7276
7277 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7278 cum_v = pack_cumulative_args (&cum);
7279
7280 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7281 {
7282 tree type = TREE_VALUE (t);
7283 if (!VOID_TYPE_P (type))
7284 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7285 }
7286
7287 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7288 return false;
7289 }
7290
7291 /* Everything else is ok. */
7292 return true;
7293 }
7294
7295 \f
7296 /* Addressing mode support functions. */
7297
7298 /* Return nonzero if X is a legitimate immediate operand when compiling
7299 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7300 int
7301 legitimate_pic_operand_p (rtx x)
7302 {
7303 if (GET_CODE (x) == SYMBOL_REF
7304 || (GET_CODE (x) == CONST
7305 && GET_CODE (XEXP (x, 0)) == PLUS
7306 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7307 return 0;
7308
7309 return 1;
7310 }
7311
7312 /* Record that the current function needs a PIC register. Initialize
7313 cfun->machine->pic_reg if we have not already done so. */
7314
7315 static void
7316 require_pic_register (void)
7317 {
7318 /* A lot of the logic here is made obscure by the fact that this
7319 routine gets called as part of the rtx cost estimation process.
7320 We don't want those calls to affect any assumptions about the real
7321 function; and further, we can't call entry_of_function() until we
7322 start the real expansion process. */
7323 if (!crtl->uses_pic_offset_table)
7324 {
7325 gcc_assert (can_create_pseudo_p ());
7326 if (arm_pic_register != INVALID_REGNUM
7327 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7328 {
7329 if (!cfun->machine->pic_reg)
7330 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7331
7332 /* Play games to avoid marking the function as needing pic
7333 if we are being called as part of the cost-estimation
7334 process. */
7335 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7336 crtl->uses_pic_offset_table = 1;
7337 }
7338 else
7339 {
7340 rtx_insn *seq, *insn;
7341
7342 if (!cfun->machine->pic_reg)
7343 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7344
7345 /* Play games to avoid marking the function as needing pic
7346 if we are being called as part of the cost-estimation
7347 process. */
7348 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7349 {
7350 crtl->uses_pic_offset_table = 1;
7351 start_sequence ();
7352
7353 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7354 && arm_pic_register > LAST_LO_REGNUM)
7355 emit_move_insn (cfun->machine->pic_reg,
7356 gen_rtx_REG (Pmode, arm_pic_register));
7357 else
7358 arm_load_pic_register (0UL);
7359
7360 seq = get_insns ();
7361 end_sequence ();
7362
7363 for (insn = seq; insn; insn = NEXT_INSN (insn))
7364 if (INSN_P (insn))
7365 INSN_LOCATION (insn) = prologue_location;
7366
7367 /* We can be called during expansion of PHI nodes, where
7368 we can't yet emit instructions directly in the final
7369 insn stream. Queue the insns on the entry edge, they will
7370 be committed after everything else is expanded. */
7371 insert_insn_on_edge (seq,
7372 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7373 }
7374 }
7375 }
7376 }
7377
7378 rtx
7379 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7380 {
7381 if (GET_CODE (orig) == SYMBOL_REF
7382 || GET_CODE (orig) == LABEL_REF)
7383 {
7384 if (reg == 0)
7385 {
7386 gcc_assert (can_create_pseudo_p ());
7387 reg = gen_reg_rtx (Pmode);
7388 }
7389
7390 /* VxWorks does not impose a fixed gap between segments; the run-time
7391 gap can be different from the object-file gap. We therefore can't
7392 use GOTOFF unless we are absolutely sure that the symbol is in the
7393 same segment as the GOT. Unfortunately, the flexibility of linker
7394 scripts means that we can't be sure of that in general, so assume
7395 that GOTOFF is never valid on VxWorks. */
7396 /* References to weak symbols cannot be resolved locally: they
7397 may be overridden by a non-weak definition at link time. */
7398 rtx_insn *insn;
7399 if ((GET_CODE (orig) == LABEL_REF
7400 || (GET_CODE (orig) == SYMBOL_REF
7401 && SYMBOL_REF_LOCAL_P (orig)
7402 && (SYMBOL_REF_DECL (orig)
7403 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7404 && NEED_GOT_RELOC
7405 && arm_pic_data_is_text_relative)
7406 insn = arm_pic_static_addr (orig, reg);
7407 else
7408 {
7409 rtx pat;
7410 rtx mem;
7411
7412 /* If this function doesn't have a pic register, create one now. */
7413 require_pic_register ();
7414
7415 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7416
7417 /* Make the MEM as close to a constant as possible. */
7418 mem = SET_SRC (pat);
7419 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7420 MEM_READONLY_P (mem) = 1;
7421 MEM_NOTRAP_P (mem) = 1;
7422
7423 insn = emit_insn (pat);
7424 }
7425
7426 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7427 by loop. */
7428 set_unique_reg_note (insn, REG_EQUAL, orig);
7429
7430 return reg;
7431 }
7432 else if (GET_CODE (orig) == CONST)
7433 {
7434 rtx base, offset;
7435
7436 if (GET_CODE (XEXP (orig, 0)) == PLUS
7437 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7438 return orig;
7439
7440 /* Handle the case where we have: const (UNSPEC_TLS). */
7441 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7442 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7443 return orig;
7444
7445 /* Handle the case where we have:
7446 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7447 CONST_INT. */
7448 if (GET_CODE (XEXP (orig, 0)) == PLUS
7449 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7450 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7451 {
7452 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7453 return orig;
7454 }
7455
7456 if (reg == 0)
7457 {
7458 gcc_assert (can_create_pseudo_p ());
7459 reg = gen_reg_rtx (Pmode);
7460 }
7461
7462 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7463
7464 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7465 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7466 base == reg ? 0 : reg);
7467
7468 if (CONST_INT_P (offset))
7469 {
7470 /* The base register doesn't really matter, we only want to
7471 test the index for the appropriate mode. */
7472 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7473 {
7474 gcc_assert (can_create_pseudo_p ());
7475 offset = force_reg (Pmode, offset);
7476 }
7477
7478 if (CONST_INT_P (offset))
7479 return plus_constant (Pmode, base, INTVAL (offset));
7480 }
7481
7482 if (GET_MODE_SIZE (mode) > 4
7483 && (GET_MODE_CLASS (mode) == MODE_INT
7484 || TARGET_SOFT_FLOAT))
7485 {
7486 emit_insn (gen_addsi3 (reg, base, offset));
7487 return reg;
7488 }
7489
7490 return gen_rtx_PLUS (Pmode, base, offset);
7491 }
7492
7493 return orig;
7494 }
7495
7496
7497 /* Find a spare register to use during the prolog of a function. */
7498
7499 static int
7500 thumb_find_work_register (unsigned long pushed_regs_mask)
7501 {
7502 int reg;
7503
7504 /* Check the argument registers first as these are call-used. The
7505 register allocation order means that sometimes r3 might be used
7506 but earlier argument registers might not, so check them all. */
7507 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7508 if (!df_regs_ever_live_p (reg))
7509 return reg;
7510
7511 /* Before going on to check the call-saved registers we can try a couple
7512 more ways of deducing that r3 is available. The first is when we are
7513 pushing anonymous arguments onto the stack and we have less than 4
7514 registers worth of fixed arguments(*). In this case r3 will be part of
7515 the variable argument list and so we can be sure that it will be
7516 pushed right at the start of the function. Hence it will be available
7517 for the rest of the prologue.
7518 (*): ie crtl->args.pretend_args_size is greater than 0. */
7519 if (cfun->machine->uses_anonymous_args
7520 && crtl->args.pretend_args_size > 0)
7521 return LAST_ARG_REGNUM;
7522
7523 /* The other case is when we have fixed arguments but less than 4 registers
7524 worth. In this case r3 might be used in the body of the function, but
7525 it is not being used to convey an argument into the function. In theory
7526 we could just check crtl->args.size to see how many bytes are
7527 being passed in argument registers, but it seems that it is unreliable.
7528 Sometimes it will have the value 0 when in fact arguments are being
7529 passed. (See testcase execute/20021111-1.c for an example). So we also
7530 check the args_info.nregs field as well. The problem with this field is
7531 that it makes no allowances for arguments that are passed to the
7532 function but which are not used. Hence we could miss an opportunity
7533 when a function has an unused argument in r3. But it is better to be
7534 safe than to be sorry. */
7535 if (! cfun->machine->uses_anonymous_args
7536 && crtl->args.size >= 0
7537 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7538 && (TARGET_AAPCS_BASED
7539 ? crtl->args.info.aapcs_ncrn < 4
7540 : crtl->args.info.nregs < 4))
7541 return LAST_ARG_REGNUM;
7542
7543 /* Otherwise look for a call-saved register that is going to be pushed. */
7544 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7545 if (pushed_regs_mask & (1 << reg))
7546 return reg;
7547
7548 if (TARGET_THUMB2)
7549 {
7550 /* Thumb-2 can use high regs. */
7551 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7552 if (pushed_regs_mask & (1 << reg))
7553 return reg;
7554 }
7555 /* Something went wrong - thumb_compute_save_reg_mask()
7556 should have arranged for a suitable register to be pushed. */
7557 gcc_unreachable ();
7558 }
7559
7560 static GTY(()) int pic_labelno;
7561
7562 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7563 low register. */
7564
7565 void
7566 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7567 {
7568 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7569
7570 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7571 return;
7572
7573 gcc_assert (flag_pic);
7574
7575 pic_reg = cfun->machine->pic_reg;
7576 if (TARGET_VXWORKS_RTP)
7577 {
7578 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7579 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7580 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7581
7582 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7583
7584 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7585 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7586 }
7587 else
7588 {
7589 /* We use an UNSPEC rather than a LABEL_REF because this label
7590 never appears in the code stream. */
7591
7592 labelno = GEN_INT (pic_labelno++);
7593 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7594 l1 = gen_rtx_CONST (VOIDmode, l1);
7595
7596 /* On the ARM the PC register contains 'dot + 8' at the time of the
7597 addition, on the Thumb it is 'dot + 4'. */
7598 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7599 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7600 UNSPEC_GOTSYM_OFF);
7601 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7602
7603 if (TARGET_32BIT)
7604 {
7605 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7606 }
7607 else /* TARGET_THUMB1 */
7608 {
7609 if (arm_pic_register != INVALID_REGNUM
7610 && REGNO (pic_reg) > LAST_LO_REGNUM)
7611 {
7612 /* We will have pushed the pic register, so we should always be
7613 able to find a work register. */
7614 pic_tmp = gen_rtx_REG (SImode,
7615 thumb_find_work_register (saved_regs));
7616 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7617 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7618 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7619 }
7620 else if (arm_pic_register != INVALID_REGNUM
7621 && arm_pic_register > LAST_LO_REGNUM
7622 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7623 {
7624 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7625 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7626 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7627 }
7628 else
7629 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7630 }
7631 }
7632
7633 /* Need to emit this whether or not we obey regdecls,
7634 since setjmp/longjmp can cause life info to screw up. */
7635 emit_use (pic_reg);
7636 }
7637
7638 /* Generate code to load the address of a static var when flag_pic is set. */
7639 static rtx_insn *
7640 arm_pic_static_addr (rtx orig, rtx reg)
7641 {
7642 rtx l1, labelno, offset_rtx;
7643
7644 gcc_assert (flag_pic);
7645
7646 /* We use an UNSPEC rather than a LABEL_REF because this label
7647 never appears in the code stream. */
7648 labelno = GEN_INT (pic_labelno++);
7649 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7650 l1 = gen_rtx_CONST (VOIDmode, l1);
7651
7652 /* On the ARM the PC register contains 'dot + 8' at the time of the
7653 addition, on the Thumb it is 'dot + 4'. */
7654 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7655 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7656 UNSPEC_SYMBOL_OFFSET);
7657 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7658
7659 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7660 }
7661
7662 /* Return nonzero if X is valid as an ARM state addressing register. */
7663 static int
7664 arm_address_register_rtx_p (rtx x, int strict_p)
7665 {
7666 int regno;
7667
7668 if (!REG_P (x))
7669 return 0;
7670
7671 regno = REGNO (x);
7672
7673 if (strict_p)
7674 return ARM_REGNO_OK_FOR_BASE_P (regno);
7675
7676 return (regno <= LAST_ARM_REGNUM
7677 || regno >= FIRST_PSEUDO_REGISTER
7678 || regno == FRAME_POINTER_REGNUM
7679 || regno == ARG_POINTER_REGNUM);
7680 }
7681
7682 /* Return TRUE if this rtx is the difference of a symbol and a label,
7683 and will reduce to a PC-relative relocation in the object file.
7684 Expressions like this can be left alone when generating PIC, rather
7685 than forced through the GOT. */
7686 static int
7687 pcrel_constant_p (rtx x)
7688 {
7689 if (GET_CODE (x) == MINUS)
7690 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7691
7692 return FALSE;
7693 }
7694
7695 /* Return true if X will surely end up in an index register after next
7696 splitting pass. */
7697 static bool
7698 will_be_in_index_register (const_rtx x)
7699 {
7700 /* arm.md: calculate_pic_address will split this into a register. */
7701 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7702 }
7703
7704 /* Return nonzero if X is a valid ARM state address operand. */
7705 int
7706 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7707 int strict_p)
7708 {
7709 bool use_ldrd;
7710 enum rtx_code code = GET_CODE (x);
7711
7712 if (arm_address_register_rtx_p (x, strict_p))
7713 return 1;
7714
7715 use_ldrd = (TARGET_LDRD
7716 && (mode == DImode || mode == DFmode));
7717
7718 if (code == POST_INC || code == PRE_DEC
7719 || ((code == PRE_INC || code == POST_DEC)
7720 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7721 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7722
7723 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7724 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7725 && GET_CODE (XEXP (x, 1)) == PLUS
7726 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7727 {
7728 rtx addend = XEXP (XEXP (x, 1), 1);
7729
7730 /* Don't allow ldrd post increment by register because it's hard
7731 to fixup invalid register choices. */
7732 if (use_ldrd
7733 && GET_CODE (x) == POST_MODIFY
7734 && REG_P (addend))
7735 return 0;
7736
7737 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7738 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7739 }
7740
7741 /* After reload constants split into minipools will have addresses
7742 from a LABEL_REF. */
7743 else if (reload_completed
7744 && (code == LABEL_REF
7745 || (code == CONST
7746 && GET_CODE (XEXP (x, 0)) == PLUS
7747 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7748 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7749 return 1;
7750
7751 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7752 return 0;
7753
7754 else if (code == PLUS)
7755 {
7756 rtx xop0 = XEXP (x, 0);
7757 rtx xop1 = XEXP (x, 1);
7758
7759 return ((arm_address_register_rtx_p (xop0, strict_p)
7760 && ((CONST_INT_P (xop1)
7761 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7762 || (!strict_p && will_be_in_index_register (xop1))))
7763 || (arm_address_register_rtx_p (xop1, strict_p)
7764 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7765 }
7766
7767 #if 0
7768 /* Reload currently can't handle MINUS, so disable this for now */
7769 else if (GET_CODE (x) == MINUS)
7770 {
7771 rtx xop0 = XEXP (x, 0);
7772 rtx xop1 = XEXP (x, 1);
7773
7774 return (arm_address_register_rtx_p (xop0, strict_p)
7775 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7776 }
7777 #endif
7778
7779 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7780 && code == SYMBOL_REF
7781 && CONSTANT_POOL_ADDRESS_P (x)
7782 && ! (flag_pic
7783 && symbol_mentioned_p (get_pool_constant (x))
7784 && ! pcrel_constant_p (get_pool_constant (x))))
7785 return 1;
7786
7787 return 0;
7788 }
7789
7790 /* Return true if we can avoid creating a constant pool entry for x. */
7791 static bool
7792 can_avoid_literal_pool_for_label_p (rtx x)
7793 {
7794 /* Normally we can assign constant values to target registers without
7795 the help of constant pool. But there are cases we have to use constant
7796 pool like:
7797 1) assign a label to register.
7798 2) sign-extend a 8bit value to 32bit and then assign to register.
7799
7800 Constant pool access in format:
7801 (set (reg r0) (mem (symbol_ref (".LC0"))))
7802 will cause the use of literal pool (later in function arm_reorg).
7803 So here we mark such format as an invalid format, then the compiler
7804 will adjust it into:
7805 (set (reg r0) (symbol_ref (".LC0")))
7806 (set (reg r0) (mem (reg r0))).
7807 No extra register is required, and (mem (reg r0)) won't cause the use
7808 of literal pools. */
7809 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7810 && CONSTANT_POOL_ADDRESS_P (x))
7811 return 1;
7812 return 0;
7813 }
7814
7815
7816 /* Return nonzero if X is a valid Thumb-2 address operand. */
7817 static int
7818 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7819 {
7820 bool use_ldrd;
7821 enum rtx_code code = GET_CODE (x);
7822
7823 if (arm_address_register_rtx_p (x, strict_p))
7824 return 1;
7825
7826 use_ldrd = (TARGET_LDRD
7827 && (mode == DImode || mode == DFmode));
7828
7829 if (code == POST_INC || code == PRE_DEC
7830 || ((code == PRE_INC || code == POST_DEC)
7831 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7832 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7833
7834 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7835 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7836 && GET_CODE (XEXP (x, 1)) == PLUS
7837 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7838 {
7839 /* Thumb-2 only has autoincrement by constant. */
7840 rtx addend = XEXP (XEXP (x, 1), 1);
7841 HOST_WIDE_INT offset;
7842
7843 if (!CONST_INT_P (addend))
7844 return 0;
7845
7846 offset = INTVAL(addend);
7847 if (GET_MODE_SIZE (mode) <= 4)
7848 return (offset > -256 && offset < 256);
7849
7850 return (use_ldrd && offset > -1024 && offset < 1024
7851 && (offset & 3) == 0);
7852 }
7853
7854 /* After reload constants split into minipools will have addresses
7855 from a LABEL_REF. */
7856 else if (reload_completed
7857 && (code == LABEL_REF
7858 || (code == CONST
7859 && GET_CODE (XEXP (x, 0)) == PLUS
7860 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7861 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7862 return 1;
7863
7864 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7865 return 0;
7866
7867 else if (code == PLUS)
7868 {
7869 rtx xop0 = XEXP (x, 0);
7870 rtx xop1 = XEXP (x, 1);
7871
7872 return ((arm_address_register_rtx_p (xop0, strict_p)
7873 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7874 || (!strict_p && will_be_in_index_register (xop1))))
7875 || (arm_address_register_rtx_p (xop1, strict_p)
7876 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7877 }
7878
7879 else if (can_avoid_literal_pool_for_label_p (x))
7880 return 0;
7881
7882 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7883 && code == SYMBOL_REF
7884 && CONSTANT_POOL_ADDRESS_P (x)
7885 && ! (flag_pic
7886 && symbol_mentioned_p (get_pool_constant (x))
7887 && ! pcrel_constant_p (get_pool_constant (x))))
7888 return 1;
7889
7890 return 0;
7891 }
7892
7893 /* Return nonzero if INDEX is valid for an address index operand in
7894 ARM state. */
7895 static int
7896 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7897 int strict_p)
7898 {
7899 HOST_WIDE_INT range;
7900 enum rtx_code code = GET_CODE (index);
7901
7902 /* Standard coprocessor addressing modes. */
7903 if (TARGET_HARD_FLOAT
7904 && (mode == SFmode || mode == DFmode))
7905 return (code == CONST_INT && INTVAL (index) < 1024
7906 && INTVAL (index) > -1024
7907 && (INTVAL (index) & 3) == 0);
7908
7909 /* For quad modes, we restrict the constant offset to be slightly less
7910 than what the instruction format permits. We do this because for
7911 quad mode moves, we will actually decompose them into two separate
7912 double-mode reads or writes. INDEX must therefore be a valid
7913 (double-mode) offset and so should INDEX+8. */
7914 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7915 return (code == CONST_INT
7916 && INTVAL (index) < 1016
7917 && INTVAL (index) > -1024
7918 && (INTVAL (index) & 3) == 0);
7919
7920 /* We have no such constraint on double mode offsets, so we permit the
7921 full range of the instruction format. */
7922 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7923 return (code == CONST_INT
7924 && INTVAL (index) < 1024
7925 && INTVAL (index) > -1024
7926 && (INTVAL (index) & 3) == 0);
7927
7928 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7929 return (code == CONST_INT
7930 && INTVAL (index) < 1024
7931 && INTVAL (index) > -1024
7932 && (INTVAL (index) & 3) == 0);
7933
7934 if (arm_address_register_rtx_p (index, strict_p)
7935 && (GET_MODE_SIZE (mode) <= 4))
7936 return 1;
7937
7938 if (mode == DImode || mode == DFmode)
7939 {
7940 if (code == CONST_INT)
7941 {
7942 HOST_WIDE_INT val = INTVAL (index);
7943
7944 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7945 If vldr is selected it uses arm_coproc_mem_operand. */
7946 if (TARGET_LDRD)
7947 return val > -256 && val < 256;
7948 else
7949 return val > -4096 && val < 4092;
7950 }
7951
7952 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7953 }
7954
7955 if (GET_MODE_SIZE (mode) <= 4
7956 && ! (arm_arch4
7957 && (mode == HImode
7958 || mode == HFmode
7959 || (mode == QImode && outer == SIGN_EXTEND))))
7960 {
7961 if (code == MULT)
7962 {
7963 rtx xiop0 = XEXP (index, 0);
7964 rtx xiop1 = XEXP (index, 1);
7965
7966 return ((arm_address_register_rtx_p (xiop0, strict_p)
7967 && power_of_two_operand (xiop1, SImode))
7968 || (arm_address_register_rtx_p (xiop1, strict_p)
7969 && power_of_two_operand (xiop0, SImode)));
7970 }
7971 else if (code == LSHIFTRT || code == ASHIFTRT
7972 || code == ASHIFT || code == ROTATERT)
7973 {
7974 rtx op = XEXP (index, 1);
7975
7976 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7977 && CONST_INT_P (op)
7978 && INTVAL (op) > 0
7979 && INTVAL (op) <= 31);
7980 }
7981 }
7982
7983 /* For ARM v4 we may be doing a sign-extend operation during the
7984 load. */
7985 if (arm_arch4)
7986 {
7987 if (mode == HImode
7988 || mode == HFmode
7989 || (outer == SIGN_EXTEND && mode == QImode))
7990 range = 256;
7991 else
7992 range = 4096;
7993 }
7994 else
7995 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7996
7997 return (code == CONST_INT
7998 && INTVAL (index) < range
7999 && INTVAL (index) > -range);
8000 }
8001
8002 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8003 index operand. i.e. 1, 2, 4 or 8. */
8004 static bool
8005 thumb2_index_mul_operand (rtx op)
8006 {
8007 HOST_WIDE_INT val;
8008
8009 if (!CONST_INT_P (op))
8010 return false;
8011
8012 val = INTVAL(op);
8013 return (val == 1 || val == 2 || val == 4 || val == 8);
8014 }
8015
8016 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8017 static int
8018 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8019 {
8020 enum rtx_code code = GET_CODE (index);
8021
8022 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8023 /* Standard coprocessor addressing modes. */
8024 if (TARGET_HARD_FLOAT
8025 && (mode == SFmode || mode == DFmode))
8026 return (code == CONST_INT && INTVAL (index) < 1024
8027 /* Thumb-2 allows only > -256 index range for it's core register
8028 load/stores. Since we allow SF/DF in core registers, we have
8029 to use the intersection between -256~4096 (core) and -1024~1024
8030 (coprocessor). */
8031 && INTVAL (index) > -256
8032 && (INTVAL (index) & 3) == 0);
8033
8034 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8035 {
8036 /* For DImode assume values will usually live in core regs
8037 and only allow LDRD addressing modes. */
8038 if (!TARGET_LDRD || mode != DImode)
8039 return (code == CONST_INT
8040 && INTVAL (index) < 1024
8041 && INTVAL (index) > -1024
8042 && (INTVAL (index) & 3) == 0);
8043 }
8044
8045 /* For quad modes, we restrict the constant offset to be slightly less
8046 than what the instruction format permits. We do this because for
8047 quad mode moves, we will actually decompose them into two separate
8048 double-mode reads or writes. INDEX must therefore be a valid
8049 (double-mode) offset and so should INDEX+8. */
8050 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8051 return (code == CONST_INT
8052 && INTVAL (index) < 1016
8053 && INTVAL (index) > -1024
8054 && (INTVAL (index) & 3) == 0);
8055
8056 /* We have no such constraint on double mode offsets, so we permit the
8057 full range of the instruction format. */
8058 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8059 return (code == CONST_INT
8060 && INTVAL (index) < 1024
8061 && INTVAL (index) > -1024
8062 && (INTVAL (index) & 3) == 0);
8063
8064 if (arm_address_register_rtx_p (index, strict_p)
8065 && (GET_MODE_SIZE (mode) <= 4))
8066 return 1;
8067
8068 if (mode == DImode || mode == DFmode)
8069 {
8070 if (code == CONST_INT)
8071 {
8072 HOST_WIDE_INT val = INTVAL (index);
8073 /* Thumb-2 ldrd only has reg+const addressing modes.
8074 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8075 If vldr is selected it uses arm_coproc_mem_operand. */
8076 if (TARGET_LDRD)
8077 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8078 else
8079 return IN_RANGE (val, -255, 4095 - 4);
8080 }
8081 else
8082 return 0;
8083 }
8084
8085 if (code == MULT)
8086 {
8087 rtx xiop0 = XEXP (index, 0);
8088 rtx xiop1 = XEXP (index, 1);
8089
8090 return ((arm_address_register_rtx_p (xiop0, strict_p)
8091 && thumb2_index_mul_operand (xiop1))
8092 || (arm_address_register_rtx_p (xiop1, strict_p)
8093 && thumb2_index_mul_operand (xiop0)));
8094 }
8095 else if (code == ASHIFT)
8096 {
8097 rtx op = XEXP (index, 1);
8098
8099 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8100 && CONST_INT_P (op)
8101 && INTVAL (op) > 0
8102 && INTVAL (op) <= 3);
8103 }
8104
8105 return (code == CONST_INT
8106 && INTVAL (index) < 4096
8107 && INTVAL (index) > -256);
8108 }
8109
8110 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8111 static int
8112 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8113 {
8114 int regno;
8115
8116 if (!REG_P (x))
8117 return 0;
8118
8119 regno = REGNO (x);
8120
8121 if (strict_p)
8122 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8123
8124 return (regno <= LAST_LO_REGNUM
8125 || regno > LAST_VIRTUAL_REGISTER
8126 || regno == FRAME_POINTER_REGNUM
8127 || (GET_MODE_SIZE (mode) >= 4
8128 && (regno == STACK_POINTER_REGNUM
8129 || regno >= FIRST_PSEUDO_REGISTER
8130 || x == hard_frame_pointer_rtx
8131 || x == arg_pointer_rtx)));
8132 }
8133
8134 /* Return nonzero if x is a legitimate index register. This is the case
8135 for any base register that can access a QImode object. */
8136 inline static int
8137 thumb1_index_register_rtx_p (rtx x, int strict_p)
8138 {
8139 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8140 }
8141
8142 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8143
8144 The AP may be eliminated to either the SP or the FP, so we use the
8145 least common denominator, e.g. SImode, and offsets from 0 to 64.
8146
8147 ??? Verify whether the above is the right approach.
8148
8149 ??? Also, the FP may be eliminated to the SP, so perhaps that
8150 needs special handling also.
8151
8152 ??? Look at how the mips16 port solves this problem. It probably uses
8153 better ways to solve some of these problems.
8154
8155 Although it is not incorrect, we don't accept QImode and HImode
8156 addresses based on the frame pointer or arg pointer until the
8157 reload pass starts. This is so that eliminating such addresses
8158 into stack based ones won't produce impossible code. */
8159 int
8160 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8161 {
8162 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8163 return 0;
8164
8165 /* ??? Not clear if this is right. Experiment. */
8166 if (GET_MODE_SIZE (mode) < 4
8167 && !(reload_in_progress || reload_completed)
8168 && (reg_mentioned_p (frame_pointer_rtx, x)
8169 || reg_mentioned_p (arg_pointer_rtx, x)
8170 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8171 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8172 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8173 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8174 return 0;
8175
8176 /* Accept any base register. SP only in SImode or larger. */
8177 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8178 return 1;
8179
8180 /* This is PC relative data before arm_reorg runs. */
8181 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8182 && GET_CODE (x) == SYMBOL_REF
8183 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8184 return 1;
8185
8186 /* This is PC relative data after arm_reorg runs. */
8187 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8188 && reload_completed
8189 && (GET_CODE (x) == LABEL_REF
8190 || (GET_CODE (x) == CONST
8191 && GET_CODE (XEXP (x, 0)) == PLUS
8192 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8193 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8194 return 1;
8195
8196 /* Post-inc indexing only supported for SImode and larger. */
8197 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8198 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8199 return 1;
8200
8201 else if (GET_CODE (x) == PLUS)
8202 {
8203 /* REG+REG address can be any two index registers. */
8204 /* We disallow FRAME+REG addressing since we know that FRAME
8205 will be replaced with STACK, and SP relative addressing only
8206 permits SP+OFFSET. */
8207 if (GET_MODE_SIZE (mode) <= 4
8208 && XEXP (x, 0) != frame_pointer_rtx
8209 && XEXP (x, 1) != frame_pointer_rtx
8210 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8211 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8212 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8213 return 1;
8214
8215 /* REG+const has 5-7 bit offset for non-SP registers. */
8216 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8217 || XEXP (x, 0) == arg_pointer_rtx)
8218 && CONST_INT_P (XEXP (x, 1))
8219 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8220 return 1;
8221
8222 /* REG+const has 10-bit offset for SP, but only SImode and
8223 larger is supported. */
8224 /* ??? Should probably check for DI/DFmode overflow here
8225 just like GO_IF_LEGITIMATE_OFFSET does. */
8226 else if (REG_P (XEXP (x, 0))
8227 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8228 && GET_MODE_SIZE (mode) >= 4
8229 && CONST_INT_P (XEXP (x, 1))
8230 && INTVAL (XEXP (x, 1)) >= 0
8231 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8232 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8233 return 1;
8234
8235 else if (REG_P (XEXP (x, 0))
8236 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8237 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8238 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8239 && REGNO (XEXP (x, 0))
8240 <= LAST_VIRTUAL_POINTER_REGISTER))
8241 && GET_MODE_SIZE (mode) >= 4
8242 && CONST_INT_P (XEXP (x, 1))
8243 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8244 return 1;
8245 }
8246
8247 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8248 && GET_MODE_SIZE (mode) == 4
8249 && GET_CODE (x) == SYMBOL_REF
8250 && CONSTANT_POOL_ADDRESS_P (x)
8251 && ! (flag_pic
8252 && symbol_mentioned_p (get_pool_constant (x))
8253 && ! pcrel_constant_p (get_pool_constant (x))))
8254 return 1;
8255
8256 return 0;
8257 }
8258
8259 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8260 instruction of mode MODE. */
8261 int
8262 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8263 {
8264 switch (GET_MODE_SIZE (mode))
8265 {
8266 case 1:
8267 return val >= 0 && val < 32;
8268
8269 case 2:
8270 return val >= 0 && val < 64 && (val & 1) == 0;
8271
8272 default:
8273 return (val >= 0
8274 && (val + GET_MODE_SIZE (mode)) <= 128
8275 && (val & 3) == 0);
8276 }
8277 }
8278
8279 bool
8280 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8281 {
8282 if (TARGET_ARM)
8283 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8284 else if (TARGET_THUMB2)
8285 return thumb2_legitimate_address_p (mode, x, strict_p);
8286 else /* if (TARGET_THUMB1) */
8287 return thumb1_legitimate_address_p (mode, x, strict_p);
8288 }
8289
8290 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8291
8292 Given an rtx X being reloaded into a reg required to be
8293 in class CLASS, return the class of reg to actually use.
8294 In general this is just CLASS, but for the Thumb core registers and
8295 immediate constants we prefer a LO_REGS class or a subset. */
8296
8297 static reg_class_t
8298 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8299 {
8300 if (TARGET_32BIT)
8301 return rclass;
8302 else
8303 {
8304 if (rclass == GENERAL_REGS)
8305 return LO_REGS;
8306 else
8307 return rclass;
8308 }
8309 }
8310
8311 /* Build the SYMBOL_REF for __tls_get_addr. */
8312
8313 static GTY(()) rtx tls_get_addr_libfunc;
8314
8315 static rtx
8316 get_tls_get_addr (void)
8317 {
8318 if (!tls_get_addr_libfunc)
8319 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8320 return tls_get_addr_libfunc;
8321 }
8322
8323 rtx
8324 arm_load_tp (rtx target)
8325 {
8326 if (!target)
8327 target = gen_reg_rtx (SImode);
8328
8329 if (TARGET_HARD_TP)
8330 {
8331 /* Can return in any reg. */
8332 emit_insn (gen_load_tp_hard (target));
8333 }
8334 else
8335 {
8336 /* Always returned in r0. Immediately copy the result into a pseudo,
8337 otherwise other uses of r0 (e.g. setting up function arguments) may
8338 clobber the value. */
8339
8340 rtx tmp;
8341
8342 emit_insn (gen_load_tp_soft ());
8343
8344 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8345 emit_move_insn (target, tmp);
8346 }
8347 return target;
8348 }
8349
8350 static rtx
8351 load_tls_operand (rtx x, rtx reg)
8352 {
8353 rtx tmp;
8354
8355 if (reg == NULL_RTX)
8356 reg = gen_reg_rtx (SImode);
8357
8358 tmp = gen_rtx_CONST (SImode, x);
8359
8360 emit_move_insn (reg, tmp);
8361
8362 return reg;
8363 }
8364
8365 static rtx_insn *
8366 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8367 {
8368 rtx label, labelno, sum;
8369
8370 gcc_assert (reloc != TLS_DESCSEQ);
8371 start_sequence ();
8372
8373 labelno = GEN_INT (pic_labelno++);
8374 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8375 label = gen_rtx_CONST (VOIDmode, label);
8376
8377 sum = gen_rtx_UNSPEC (Pmode,
8378 gen_rtvec (4, x, GEN_INT (reloc), label,
8379 GEN_INT (TARGET_ARM ? 8 : 4)),
8380 UNSPEC_TLS);
8381 reg = load_tls_operand (sum, reg);
8382
8383 if (TARGET_ARM)
8384 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8385 else
8386 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8387
8388 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8389 LCT_PURE, /* LCT_CONST? */
8390 Pmode, reg, Pmode);
8391
8392 rtx_insn *insns = get_insns ();
8393 end_sequence ();
8394
8395 return insns;
8396 }
8397
8398 static rtx
8399 arm_tls_descseq_addr (rtx x, rtx reg)
8400 {
8401 rtx labelno = GEN_INT (pic_labelno++);
8402 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8403 rtx sum = gen_rtx_UNSPEC (Pmode,
8404 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8405 gen_rtx_CONST (VOIDmode, label),
8406 GEN_INT (!TARGET_ARM)),
8407 UNSPEC_TLS);
8408 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8409
8410 emit_insn (gen_tlscall (x, labelno));
8411 if (!reg)
8412 reg = gen_reg_rtx (SImode);
8413 else
8414 gcc_assert (REGNO (reg) != R0_REGNUM);
8415
8416 emit_move_insn (reg, reg0);
8417
8418 return reg;
8419 }
8420
8421 rtx
8422 legitimize_tls_address (rtx x, rtx reg)
8423 {
8424 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8425 rtx_insn *insns;
8426 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8427
8428 switch (model)
8429 {
8430 case TLS_MODEL_GLOBAL_DYNAMIC:
8431 if (TARGET_GNU2_TLS)
8432 {
8433 reg = arm_tls_descseq_addr (x, reg);
8434
8435 tp = arm_load_tp (NULL_RTX);
8436
8437 dest = gen_rtx_PLUS (Pmode, tp, reg);
8438 }
8439 else
8440 {
8441 /* Original scheme */
8442 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8443 dest = gen_reg_rtx (Pmode);
8444 emit_libcall_block (insns, dest, ret, x);
8445 }
8446 return dest;
8447
8448 case TLS_MODEL_LOCAL_DYNAMIC:
8449 if (TARGET_GNU2_TLS)
8450 {
8451 reg = arm_tls_descseq_addr (x, reg);
8452
8453 tp = arm_load_tp (NULL_RTX);
8454
8455 dest = gen_rtx_PLUS (Pmode, tp, reg);
8456 }
8457 else
8458 {
8459 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8460
8461 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8462 share the LDM result with other LD model accesses. */
8463 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8464 UNSPEC_TLS);
8465 dest = gen_reg_rtx (Pmode);
8466 emit_libcall_block (insns, dest, ret, eqv);
8467
8468 /* Load the addend. */
8469 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8470 GEN_INT (TLS_LDO32)),
8471 UNSPEC_TLS);
8472 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8473 dest = gen_rtx_PLUS (Pmode, dest, addend);
8474 }
8475 return dest;
8476
8477 case TLS_MODEL_INITIAL_EXEC:
8478 labelno = GEN_INT (pic_labelno++);
8479 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8480 label = gen_rtx_CONST (VOIDmode, label);
8481 sum = gen_rtx_UNSPEC (Pmode,
8482 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8483 GEN_INT (TARGET_ARM ? 8 : 4)),
8484 UNSPEC_TLS);
8485 reg = load_tls_operand (sum, reg);
8486
8487 if (TARGET_ARM)
8488 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8489 else if (TARGET_THUMB2)
8490 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8491 else
8492 {
8493 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8494 emit_move_insn (reg, gen_const_mem (SImode, reg));
8495 }
8496
8497 tp = arm_load_tp (NULL_RTX);
8498
8499 return gen_rtx_PLUS (Pmode, tp, reg);
8500
8501 case TLS_MODEL_LOCAL_EXEC:
8502 tp = arm_load_tp (NULL_RTX);
8503
8504 reg = gen_rtx_UNSPEC (Pmode,
8505 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8506 UNSPEC_TLS);
8507 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8508
8509 return gen_rtx_PLUS (Pmode, tp, reg);
8510
8511 default:
8512 abort ();
8513 }
8514 }
8515
8516 /* Try machine-dependent ways of modifying an illegitimate address
8517 to be legitimate. If we find one, return the new, valid address. */
8518 rtx
8519 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8520 {
8521 if (arm_tls_referenced_p (x))
8522 {
8523 rtx addend = NULL;
8524
8525 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8526 {
8527 addend = XEXP (XEXP (x, 0), 1);
8528 x = XEXP (XEXP (x, 0), 0);
8529 }
8530
8531 if (GET_CODE (x) != SYMBOL_REF)
8532 return x;
8533
8534 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8535
8536 x = legitimize_tls_address (x, NULL_RTX);
8537
8538 if (addend)
8539 {
8540 x = gen_rtx_PLUS (SImode, x, addend);
8541 orig_x = x;
8542 }
8543 else
8544 return x;
8545 }
8546
8547 if (!TARGET_ARM)
8548 {
8549 /* TODO: legitimize_address for Thumb2. */
8550 if (TARGET_THUMB2)
8551 return x;
8552 return thumb_legitimize_address (x, orig_x, mode);
8553 }
8554
8555 if (GET_CODE (x) == PLUS)
8556 {
8557 rtx xop0 = XEXP (x, 0);
8558 rtx xop1 = XEXP (x, 1);
8559
8560 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8561 xop0 = force_reg (SImode, xop0);
8562
8563 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8564 && !symbol_mentioned_p (xop1))
8565 xop1 = force_reg (SImode, xop1);
8566
8567 if (ARM_BASE_REGISTER_RTX_P (xop0)
8568 && CONST_INT_P (xop1))
8569 {
8570 HOST_WIDE_INT n, low_n;
8571 rtx base_reg, val;
8572 n = INTVAL (xop1);
8573
8574 /* VFP addressing modes actually allow greater offsets, but for
8575 now we just stick with the lowest common denominator. */
8576 if (mode == DImode || mode == DFmode)
8577 {
8578 low_n = n & 0x0f;
8579 n &= ~0x0f;
8580 if (low_n > 4)
8581 {
8582 n += 16;
8583 low_n -= 16;
8584 }
8585 }
8586 else
8587 {
8588 low_n = ((mode) == TImode ? 0
8589 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8590 n -= low_n;
8591 }
8592
8593 base_reg = gen_reg_rtx (SImode);
8594 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8595 emit_move_insn (base_reg, val);
8596 x = plus_constant (Pmode, base_reg, low_n);
8597 }
8598 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8599 x = gen_rtx_PLUS (SImode, xop0, xop1);
8600 }
8601
8602 /* XXX We don't allow MINUS any more -- see comment in
8603 arm_legitimate_address_outer_p (). */
8604 else if (GET_CODE (x) == MINUS)
8605 {
8606 rtx xop0 = XEXP (x, 0);
8607 rtx xop1 = XEXP (x, 1);
8608
8609 if (CONSTANT_P (xop0))
8610 xop0 = force_reg (SImode, xop0);
8611
8612 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8613 xop1 = force_reg (SImode, xop1);
8614
8615 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8616 x = gen_rtx_MINUS (SImode, xop0, xop1);
8617 }
8618
8619 /* Make sure to take full advantage of the pre-indexed addressing mode
8620 with absolute addresses which often allows for the base register to
8621 be factorized for multiple adjacent memory references, and it might
8622 even allows for the mini pool to be avoided entirely. */
8623 else if (CONST_INT_P (x) && optimize > 0)
8624 {
8625 unsigned int bits;
8626 HOST_WIDE_INT mask, base, index;
8627 rtx base_reg;
8628
8629 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8630 use a 8-bit index. So let's use a 12-bit index for SImode only and
8631 hope that arm_gen_constant will enable ldrb to use more bits. */
8632 bits = (mode == SImode) ? 12 : 8;
8633 mask = (1 << bits) - 1;
8634 base = INTVAL (x) & ~mask;
8635 index = INTVAL (x) & mask;
8636 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8637 {
8638 /* It'll most probably be more efficient to generate the base
8639 with more bits set and use a negative index instead. */
8640 base |= mask;
8641 index -= mask;
8642 }
8643 base_reg = force_reg (SImode, GEN_INT (base));
8644 x = plus_constant (Pmode, base_reg, index);
8645 }
8646
8647 if (flag_pic)
8648 {
8649 /* We need to find and carefully transform any SYMBOL and LABEL
8650 references; so go back to the original address expression. */
8651 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8652
8653 if (new_x != orig_x)
8654 x = new_x;
8655 }
8656
8657 return x;
8658 }
8659
8660
8661 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8662 to be legitimate. If we find one, return the new, valid address. */
8663 rtx
8664 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8665 {
8666 if (GET_CODE (x) == PLUS
8667 && CONST_INT_P (XEXP (x, 1))
8668 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8669 || INTVAL (XEXP (x, 1)) < 0))
8670 {
8671 rtx xop0 = XEXP (x, 0);
8672 rtx xop1 = XEXP (x, 1);
8673 HOST_WIDE_INT offset = INTVAL (xop1);
8674
8675 /* Try and fold the offset into a biasing of the base register and
8676 then offsetting that. Don't do this when optimizing for space
8677 since it can cause too many CSEs. */
8678 if (optimize_size && offset >= 0
8679 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8680 {
8681 HOST_WIDE_INT delta;
8682
8683 if (offset >= 256)
8684 delta = offset - (256 - GET_MODE_SIZE (mode));
8685 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8686 delta = 31 * GET_MODE_SIZE (mode);
8687 else
8688 delta = offset & (~31 * GET_MODE_SIZE (mode));
8689
8690 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8691 NULL_RTX);
8692 x = plus_constant (Pmode, xop0, delta);
8693 }
8694 else if (offset < 0 && offset > -256)
8695 /* Small negative offsets are best done with a subtract before the
8696 dereference, forcing these into a register normally takes two
8697 instructions. */
8698 x = force_operand (x, NULL_RTX);
8699 else
8700 {
8701 /* For the remaining cases, force the constant into a register. */
8702 xop1 = force_reg (SImode, xop1);
8703 x = gen_rtx_PLUS (SImode, xop0, xop1);
8704 }
8705 }
8706 else if (GET_CODE (x) == PLUS
8707 && s_register_operand (XEXP (x, 1), SImode)
8708 && !s_register_operand (XEXP (x, 0), SImode))
8709 {
8710 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8711
8712 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8713 }
8714
8715 if (flag_pic)
8716 {
8717 /* We need to find and carefully transform any SYMBOL and LABEL
8718 references; so go back to the original address expression. */
8719 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8720
8721 if (new_x != orig_x)
8722 x = new_x;
8723 }
8724
8725 return x;
8726 }
8727
8728 /* Return TRUE if X contains any TLS symbol references. */
8729
8730 bool
8731 arm_tls_referenced_p (rtx x)
8732 {
8733 if (! TARGET_HAVE_TLS)
8734 return false;
8735
8736 subrtx_iterator::array_type array;
8737 FOR_EACH_SUBRTX (iter, array, x, ALL)
8738 {
8739 const_rtx x = *iter;
8740 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8741 {
8742 /* ARM currently does not provide relocations to encode TLS variables
8743 into AArch32 instructions, only data, so there is no way to
8744 currently implement these if a literal pool is disabled. */
8745 if (arm_disable_literal_pool)
8746 sorry ("accessing thread-local storage is not currently supported "
8747 "with -mpure-code or -mslow-flash-data");
8748
8749 return true;
8750 }
8751
8752 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8753 TLS offsets, not real symbol references. */
8754 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8755 iter.skip_subrtxes ();
8756 }
8757 return false;
8758 }
8759
8760 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8761
8762 On the ARM, allow any integer (invalid ones are removed later by insn
8763 patterns), nice doubles and symbol_refs which refer to the function's
8764 constant pool XXX.
8765
8766 When generating pic allow anything. */
8767
8768 static bool
8769 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8770 {
8771 return flag_pic || !label_mentioned_p (x);
8772 }
8773
8774 static bool
8775 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8776 {
8777 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8778 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8779 for ARMv8-M Baseline or later the result is valid. */
8780 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8781 x = XEXP (x, 0);
8782
8783 return (CONST_INT_P (x)
8784 || CONST_DOUBLE_P (x)
8785 || CONSTANT_ADDRESS_P (x)
8786 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8787 || flag_pic);
8788 }
8789
8790 static bool
8791 arm_legitimate_constant_p (machine_mode mode, rtx x)
8792 {
8793 return (!arm_cannot_force_const_mem (mode, x)
8794 && (TARGET_32BIT
8795 ? arm_legitimate_constant_p_1 (mode, x)
8796 : thumb_legitimate_constant_p (mode, x)));
8797 }
8798
8799 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8800
8801 static bool
8802 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8803 {
8804 rtx base, offset;
8805
8806 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8807 {
8808 split_const (x, &base, &offset);
8809 if (GET_CODE (base) == SYMBOL_REF
8810 && !offset_within_block_p (base, INTVAL (offset)))
8811 return true;
8812 }
8813 return arm_tls_referenced_p (x);
8814 }
8815 \f
8816 #define REG_OR_SUBREG_REG(X) \
8817 (REG_P (X) \
8818 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8819
8820 #define REG_OR_SUBREG_RTX(X) \
8821 (REG_P (X) ? (X) : SUBREG_REG (X))
8822
8823 static inline int
8824 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8825 {
8826 machine_mode mode = GET_MODE (x);
8827 int total, words;
8828
8829 switch (code)
8830 {
8831 case ASHIFT:
8832 case ASHIFTRT:
8833 case LSHIFTRT:
8834 case ROTATERT:
8835 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8836
8837 case PLUS:
8838 case MINUS:
8839 case COMPARE:
8840 case NEG:
8841 case NOT:
8842 return COSTS_N_INSNS (1);
8843
8844 case MULT:
8845 if (arm_arch6m && arm_m_profile_small_mul)
8846 return COSTS_N_INSNS (32);
8847
8848 if (CONST_INT_P (XEXP (x, 1)))
8849 {
8850 int cycles = 0;
8851 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8852
8853 while (i)
8854 {
8855 i >>= 2;
8856 cycles++;
8857 }
8858 return COSTS_N_INSNS (2) + cycles;
8859 }
8860 return COSTS_N_INSNS (1) + 16;
8861
8862 case SET:
8863 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8864 the mode. */
8865 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8866 return (COSTS_N_INSNS (words)
8867 + 4 * ((MEM_P (SET_SRC (x)))
8868 + MEM_P (SET_DEST (x))));
8869
8870 case CONST_INT:
8871 if (outer == SET)
8872 {
8873 if (UINTVAL (x) < 256
8874 /* 16-bit constant. */
8875 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8876 return 0;
8877 if (thumb_shiftable_const (INTVAL (x)))
8878 return COSTS_N_INSNS (2);
8879 return COSTS_N_INSNS (3);
8880 }
8881 else if ((outer == PLUS || outer == COMPARE)
8882 && INTVAL (x) < 256 && INTVAL (x) > -256)
8883 return 0;
8884 else if ((outer == IOR || outer == XOR || outer == AND)
8885 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8886 return COSTS_N_INSNS (1);
8887 else if (outer == AND)
8888 {
8889 int i;
8890 /* This duplicates the tests in the andsi3 expander. */
8891 for (i = 9; i <= 31; i++)
8892 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8893 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8894 return COSTS_N_INSNS (2);
8895 }
8896 else if (outer == ASHIFT || outer == ASHIFTRT
8897 || outer == LSHIFTRT)
8898 return 0;
8899 return COSTS_N_INSNS (2);
8900
8901 case CONST:
8902 case CONST_DOUBLE:
8903 case LABEL_REF:
8904 case SYMBOL_REF:
8905 return COSTS_N_INSNS (3);
8906
8907 case UDIV:
8908 case UMOD:
8909 case DIV:
8910 case MOD:
8911 return 100;
8912
8913 case TRUNCATE:
8914 return 99;
8915
8916 case AND:
8917 case XOR:
8918 case IOR:
8919 /* XXX guess. */
8920 return 8;
8921
8922 case MEM:
8923 /* XXX another guess. */
8924 /* Memory costs quite a lot for the first word, but subsequent words
8925 load at the equivalent of a single insn each. */
8926 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8927 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8928 ? 4 : 0));
8929
8930 case IF_THEN_ELSE:
8931 /* XXX a guess. */
8932 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8933 return 14;
8934 return 2;
8935
8936 case SIGN_EXTEND:
8937 case ZERO_EXTEND:
8938 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8939 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8940
8941 if (mode == SImode)
8942 return total;
8943
8944 if (arm_arch6)
8945 return total + COSTS_N_INSNS (1);
8946
8947 /* Assume a two-shift sequence. Increase the cost slightly so
8948 we prefer actual shifts over an extend operation. */
8949 return total + 1 + COSTS_N_INSNS (2);
8950
8951 default:
8952 return 99;
8953 }
8954 }
8955
8956 /* Estimates the size cost of thumb1 instructions.
8957 For now most of the code is copied from thumb1_rtx_costs. We need more
8958 fine grain tuning when we have more related test cases. */
8959 static inline int
8960 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8961 {
8962 machine_mode mode = GET_MODE (x);
8963 int words, cost;
8964
8965 switch (code)
8966 {
8967 case ASHIFT:
8968 case ASHIFTRT:
8969 case LSHIFTRT:
8970 case ROTATERT:
8971 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8972
8973 case PLUS:
8974 case MINUS:
8975 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8976 defined by RTL expansion, especially for the expansion of
8977 multiplication. */
8978 if ((GET_CODE (XEXP (x, 0)) == MULT
8979 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8980 || (GET_CODE (XEXP (x, 1)) == MULT
8981 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8982 return COSTS_N_INSNS (2);
8983 /* Fall through. */
8984 case COMPARE:
8985 case NEG:
8986 case NOT:
8987 return COSTS_N_INSNS (1);
8988
8989 case MULT:
8990 if (CONST_INT_P (XEXP (x, 1)))
8991 {
8992 /* Thumb1 mul instruction can't operate on const. We must Load it
8993 into a register first. */
8994 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8995 /* For the targets which have a very small and high-latency multiply
8996 unit, we prefer to synthesize the mult with up to 5 instructions,
8997 giving a good balance between size and performance. */
8998 if (arm_arch6m && arm_m_profile_small_mul)
8999 return COSTS_N_INSNS (5);
9000 else
9001 return COSTS_N_INSNS (1) + const_size;
9002 }
9003 return COSTS_N_INSNS (1);
9004
9005 case SET:
9006 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9007 the mode. */
9008 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9009 cost = COSTS_N_INSNS (words);
9010 if (satisfies_constraint_J (SET_SRC (x))
9011 || satisfies_constraint_K (SET_SRC (x))
9012 /* Too big an immediate for a 2-byte mov, using MOVT. */
9013 || (CONST_INT_P (SET_SRC (x))
9014 && UINTVAL (SET_SRC (x)) >= 256
9015 && TARGET_HAVE_MOVT
9016 && satisfies_constraint_j (SET_SRC (x)))
9017 /* thumb1_movdi_insn. */
9018 || ((words > 1) && MEM_P (SET_SRC (x))))
9019 cost += COSTS_N_INSNS (1);
9020 return cost;
9021
9022 case CONST_INT:
9023 if (outer == SET)
9024 {
9025 if (UINTVAL (x) < 256)
9026 return COSTS_N_INSNS (1);
9027 /* movw is 4byte long. */
9028 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9029 return COSTS_N_INSNS (2);
9030 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9031 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9032 return COSTS_N_INSNS (2);
9033 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9034 if (thumb_shiftable_const (INTVAL (x)))
9035 return COSTS_N_INSNS (2);
9036 return COSTS_N_INSNS (3);
9037 }
9038 else if ((outer == PLUS || outer == COMPARE)
9039 && INTVAL (x) < 256 && INTVAL (x) > -256)
9040 return 0;
9041 else if ((outer == IOR || outer == XOR || outer == AND)
9042 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9043 return COSTS_N_INSNS (1);
9044 else if (outer == AND)
9045 {
9046 int i;
9047 /* This duplicates the tests in the andsi3 expander. */
9048 for (i = 9; i <= 31; i++)
9049 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9050 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9051 return COSTS_N_INSNS (2);
9052 }
9053 else if (outer == ASHIFT || outer == ASHIFTRT
9054 || outer == LSHIFTRT)
9055 return 0;
9056 return COSTS_N_INSNS (2);
9057
9058 case CONST:
9059 case CONST_DOUBLE:
9060 case LABEL_REF:
9061 case SYMBOL_REF:
9062 return COSTS_N_INSNS (3);
9063
9064 case UDIV:
9065 case UMOD:
9066 case DIV:
9067 case MOD:
9068 return 100;
9069
9070 case TRUNCATE:
9071 return 99;
9072
9073 case AND:
9074 case XOR:
9075 case IOR:
9076 return COSTS_N_INSNS (1);
9077
9078 case MEM:
9079 return (COSTS_N_INSNS (1)
9080 + COSTS_N_INSNS (1)
9081 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9082 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9083 ? COSTS_N_INSNS (1) : 0));
9084
9085 case IF_THEN_ELSE:
9086 /* XXX a guess. */
9087 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9088 return 14;
9089 return 2;
9090
9091 case ZERO_EXTEND:
9092 /* XXX still guessing. */
9093 switch (GET_MODE (XEXP (x, 0)))
9094 {
9095 case E_QImode:
9096 return (1 + (mode == DImode ? 4 : 0)
9097 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9098
9099 case E_HImode:
9100 return (4 + (mode == DImode ? 4 : 0)
9101 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9102
9103 case E_SImode:
9104 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9105
9106 default:
9107 return 99;
9108 }
9109
9110 default:
9111 return 99;
9112 }
9113 }
9114
9115 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9116 operand, then return the operand that is being shifted. If the shift
9117 is not by a constant, then set SHIFT_REG to point to the operand.
9118 Return NULL if OP is not a shifter operand. */
9119 static rtx
9120 shifter_op_p (rtx op, rtx *shift_reg)
9121 {
9122 enum rtx_code code = GET_CODE (op);
9123
9124 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9125 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9126 return XEXP (op, 0);
9127 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9128 return XEXP (op, 0);
9129 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9130 || code == ASHIFTRT)
9131 {
9132 if (!CONST_INT_P (XEXP (op, 1)))
9133 *shift_reg = XEXP (op, 1);
9134 return XEXP (op, 0);
9135 }
9136
9137 return NULL;
9138 }
9139
9140 static bool
9141 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9142 {
9143 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9144 rtx_code code = GET_CODE (x);
9145 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9146
9147 switch (XINT (x, 1))
9148 {
9149 case UNSPEC_UNALIGNED_LOAD:
9150 /* We can only do unaligned loads into the integer unit, and we can't
9151 use LDM or LDRD. */
9152 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9153 if (speed_p)
9154 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9155 + extra_cost->ldst.load_unaligned);
9156
9157 #ifdef NOT_YET
9158 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9159 ADDR_SPACE_GENERIC, speed_p);
9160 #endif
9161 return true;
9162
9163 case UNSPEC_UNALIGNED_STORE:
9164 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9165 if (speed_p)
9166 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9167 + extra_cost->ldst.store_unaligned);
9168
9169 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9170 #ifdef NOT_YET
9171 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9172 ADDR_SPACE_GENERIC, speed_p);
9173 #endif
9174 return true;
9175
9176 case UNSPEC_VRINTZ:
9177 case UNSPEC_VRINTP:
9178 case UNSPEC_VRINTM:
9179 case UNSPEC_VRINTR:
9180 case UNSPEC_VRINTX:
9181 case UNSPEC_VRINTA:
9182 if (speed_p)
9183 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9184
9185 return true;
9186 default:
9187 *cost = COSTS_N_INSNS (2);
9188 break;
9189 }
9190 return true;
9191 }
9192
9193 /* Cost of a libcall. We assume one insn per argument, an amount for the
9194 call (one insn for -Os) and then one for processing the result. */
9195 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9196
9197 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9198 do \
9199 { \
9200 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9201 if (shift_op != NULL \
9202 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9203 { \
9204 if (shift_reg) \
9205 { \
9206 if (speed_p) \
9207 *cost += extra_cost->alu.arith_shift_reg; \
9208 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9209 ASHIFT, 1, speed_p); \
9210 } \
9211 else if (speed_p) \
9212 *cost += extra_cost->alu.arith_shift; \
9213 \
9214 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9215 ASHIFT, 0, speed_p) \
9216 + rtx_cost (XEXP (x, 1 - IDX), \
9217 GET_MODE (shift_op), \
9218 OP, 1, speed_p)); \
9219 return true; \
9220 } \
9221 } \
9222 while (0);
9223
9224 /* RTX costs. Make an estimate of the cost of executing the operation
9225 X, which is contained with an operation with code OUTER_CODE.
9226 SPEED_P indicates whether the cost desired is the performance cost,
9227 or the size cost. The estimate is stored in COST and the return
9228 value is TRUE if the cost calculation is final, or FALSE if the
9229 caller should recurse through the operands of X to add additional
9230 costs.
9231
9232 We currently make no attempt to model the size savings of Thumb-2
9233 16-bit instructions. At the normal points in compilation where
9234 this code is called we have no measure of whether the condition
9235 flags are live or not, and thus no realistic way to determine what
9236 the size will eventually be. */
9237 static bool
9238 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9239 const struct cpu_cost_table *extra_cost,
9240 int *cost, bool speed_p)
9241 {
9242 machine_mode mode = GET_MODE (x);
9243
9244 *cost = COSTS_N_INSNS (1);
9245
9246 if (TARGET_THUMB1)
9247 {
9248 if (speed_p)
9249 *cost = thumb1_rtx_costs (x, code, outer_code);
9250 else
9251 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9252 return true;
9253 }
9254
9255 switch (code)
9256 {
9257 case SET:
9258 *cost = 0;
9259 /* SET RTXs don't have a mode so we get it from the destination. */
9260 mode = GET_MODE (SET_DEST (x));
9261
9262 if (REG_P (SET_SRC (x))
9263 && REG_P (SET_DEST (x)))
9264 {
9265 /* Assume that most copies can be done with a single insn,
9266 unless we don't have HW FP, in which case everything
9267 larger than word mode will require two insns. */
9268 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9269 && GET_MODE_SIZE (mode) > 4)
9270 || mode == DImode)
9271 ? 2 : 1);
9272 /* Conditional register moves can be encoded
9273 in 16 bits in Thumb mode. */
9274 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9275 *cost >>= 1;
9276
9277 return true;
9278 }
9279
9280 if (CONST_INT_P (SET_SRC (x)))
9281 {
9282 /* Handle CONST_INT here, since the value doesn't have a mode
9283 and we would otherwise be unable to work out the true cost. */
9284 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9285 0, speed_p);
9286 outer_code = SET;
9287 /* Slightly lower the cost of setting a core reg to a constant.
9288 This helps break up chains and allows for better scheduling. */
9289 if (REG_P (SET_DEST (x))
9290 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9291 *cost -= 1;
9292 x = SET_SRC (x);
9293 /* Immediate moves with an immediate in the range [0, 255] can be
9294 encoded in 16 bits in Thumb mode. */
9295 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9296 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9297 *cost >>= 1;
9298 goto const_int_cost;
9299 }
9300
9301 return false;
9302
9303 case MEM:
9304 /* A memory access costs 1 insn if the mode is small, or the address is
9305 a single register, otherwise it costs one insn per word. */
9306 if (REG_P (XEXP (x, 0)))
9307 *cost = COSTS_N_INSNS (1);
9308 else if (flag_pic
9309 && GET_CODE (XEXP (x, 0)) == PLUS
9310 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9311 /* This will be split into two instructions.
9312 See arm.md:calculate_pic_address. */
9313 *cost = COSTS_N_INSNS (2);
9314 else
9315 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9316
9317 /* For speed optimizations, add the costs of the address and
9318 accessing memory. */
9319 if (speed_p)
9320 #ifdef NOT_YET
9321 *cost += (extra_cost->ldst.load
9322 + arm_address_cost (XEXP (x, 0), mode,
9323 ADDR_SPACE_GENERIC, speed_p));
9324 #else
9325 *cost += extra_cost->ldst.load;
9326 #endif
9327 return true;
9328
9329 case PARALLEL:
9330 {
9331 /* Calculations of LDM costs are complex. We assume an initial cost
9332 (ldm_1st) which will load the number of registers mentioned in
9333 ldm_regs_per_insn_1st registers; then each additional
9334 ldm_regs_per_insn_subsequent registers cost one more insn. The
9335 formula for N regs is thus:
9336
9337 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9338 + ldm_regs_per_insn_subsequent - 1)
9339 / ldm_regs_per_insn_subsequent).
9340
9341 Additional costs may also be added for addressing. A similar
9342 formula is used for STM. */
9343
9344 bool is_ldm = load_multiple_operation (x, SImode);
9345 bool is_stm = store_multiple_operation (x, SImode);
9346
9347 if (is_ldm || is_stm)
9348 {
9349 if (speed_p)
9350 {
9351 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9352 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9353 ? extra_cost->ldst.ldm_regs_per_insn_1st
9354 : extra_cost->ldst.stm_regs_per_insn_1st;
9355 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9356 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9357 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9358
9359 *cost += regs_per_insn_1st
9360 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9361 + regs_per_insn_sub - 1)
9362 / regs_per_insn_sub);
9363 return true;
9364 }
9365
9366 }
9367 return false;
9368 }
9369 case DIV:
9370 case UDIV:
9371 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9372 && (mode == SFmode || !TARGET_VFP_SINGLE))
9373 *cost += COSTS_N_INSNS (speed_p
9374 ? extra_cost->fp[mode != SFmode].div : 0);
9375 else if (mode == SImode && TARGET_IDIV)
9376 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9377 else
9378 *cost = LIBCALL_COST (2);
9379
9380 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9381 possible udiv is prefered. */
9382 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9383 return false; /* All arguments must be in registers. */
9384
9385 case MOD:
9386 /* MOD by a power of 2 can be expanded as:
9387 rsbs r1, r0, #0
9388 and r0, r0, #(n - 1)
9389 and r1, r1, #(n - 1)
9390 rsbpl r0, r1, #0. */
9391 if (CONST_INT_P (XEXP (x, 1))
9392 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9393 && mode == SImode)
9394 {
9395 *cost += COSTS_N_INSNS (3);
9396
9397 if (speed_p)
9398 *cost += 2 * extra_cost->alu.logical
9399 + extra_cost->alu.arith;
9400 return true;
9401 }
9402
9403 /* Fall-through. */
9404 case UMOD:
9405 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9406 possible udiv is prefered. */
9407 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9408 return false; /* All arguments must be in registers. */
9409
9410 case ROTATE:
9411 if (mode == SImode && REG_P (XEXP (x, 1)))
9412 {
9413 *cost += (COSTS_N_INSNS (1)
9414 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9415 if (speed_p)
9416 *cost += extra_cost->alu.shift_reg;
9417 return true;
9418 }
9419 /* Fall through */
9420 case ROTATERT:
9421 case ASHIFT:
9422 case LSHIFTRT:
9423 case ASHIFTRT:
9424 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9425 {
9426 *cost += (COSTS_N_INSNS (2)
9427 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9428 if (speed_p)
9429 *cost += 2 * extra_cost->alu.shift;
9430 return true;
9431 }
9432 else if (mode == SImode)
9433 {
9434 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9435 /* Slightly disparage register shifts at -Os, but not by much. */
9436 if (!CONST_INT_P (XEXP (x, 1)))
9437 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9438 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9439 return true;
9440 }
9441 else if (GET_MODE_CLASS (mode) == MODE_INT
9442 && GET_MODE_SIZE (mode) < 4)
9443 {
9444 if (code == ASHIFT)
9445 {
9446 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9447 /* Slightly disparage register shifts at -Os, but not by
9448 much. */
9449 if (!CONST_INT_P (XEXP (x, 1)))
9450 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9451 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9452 }
9453 else if (code == LSHIFTRT || code == ASHIFTRT)
9454 {
9455 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9456 {
9457 /* Can use SBFX/UBFX. */
9458 if (speed_p)
9459 *cost += extra_cost->alu.bfx;
9460 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9461 }
9462 else
9463 {
9464 *cost += COSTS_N_INSNS (1);
9465 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9466 if (speed_p)
9467 {
9468 if (CONST_INT_P (XEXP (x, 1)))
9469 *cost += 2 * extra_cost->alu.shift;
9470 else
9471 *cost += (extra_cost->alu.shift
9472 + extra_cost->alu.shift_reg);
9473 }
9474 else
9475 /* Slightly disparage register shifts. */
9476 *cost += !CONST_INT_P (XEXP (x, 1));
9477 }
9478 }
9479 else /* Rotates. */
9480 {
9481 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9482 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9483 if (speed_p)
9484 {
9485 if (CONST_INT_P (XEXP (x, 1)))
9486 *cost += (2 * extra_cost->alu.shift
9487 + extra_cost->alu.log_shift);
9488 else
9489 *cost += (extra_cost->alu.shift
9490 + extra_cost->alu.shift_reg
9491 + extra_cost->alu.log_shift_reg);
9492 }
9493 }
9494 return true;
9495 }
9496
9497 *cost = LIBCALL_COST (2);
9498 return false;
9499
9500 case BSWAP:
9501 if (arm_arch6)
9502 {
9503 if (mode == SImode)
9504 {
9505 if (speed_p)
9506 *cost += extra_cost->alu.rev;
9507
9508 return false;
9509 }
9510 }
9511 else
9512 {
9513 /* No rev instruction available. Look at arm_legacy_rev
9514 and thumb_legacy_rev for the form of RTL used then. */
9515 if (TARGET_THUMB)
9516 {
9517 *cost += COSTS_N_INSNS (9);
9518
9519 if (speed_p)
9520 {
9521 *cost += 6 * extra_cost->alu.shift;
9522 *cost += 3 * extra_cost->alu.logical;
9523 }
9524 }
9525 else
9526 {
9527 *cost += COSTS_N_INSNS (4);
9528
9529 if (speed_p)
9530 {
9531 *cost += 2 * extra_cost->alu.shift;
9532 *cost += extra_cost->alu.arith_shift;
9533 *cost += 2 * extra_cost->alu.logical;
9534 }
9535 }
9536 return true;
9537 }
9538 return false;
9539
9540 case MINUS:
9541 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9542 && (mode == SFmode || !TARGET_VFP_SINGLE))
9543 {
9544 if (GET_CODE (XEXP (x, 0)) == MULT
9545 || GET_CODE (XEXP (x, 1)) == MULT)
9546 {
9547 rtx mul_op0, mul_op1, sub_op;
9548
9549 if (speed_p)
9550 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9551
9552 if (GET_CODE (XEXP (x, 0)) == MULT)
9553 {
9554 mul_op0 = XEXP (XEXP (x, 0), 0);
9555 mul_op1 = XEXP (XEXP (x, 0), 1);
9556 sub_op = XEXP (x, 1);
9557 }
9558 else
9559 {
9560 mul_op0 = XEXP (XEXP (x, 1), 0);
9561 mul_op1 = XEXP (XEXP (x, 1), 1);
9562 sub_op = XEXP (x, 0);
9563 }
9564
9565 /* The first operand of the multiply may be optionally
9566 negated. */
9567 if (GET_CODE (mul_op0) == NEG)
9568 mul_op0 = XEXP (mul_op0, 0);
9569
9570 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9571 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9572 + rtx_cost (sub_op, mode, code, 0, speed_p));
9573
9574 return true;
9575 }
9576
9577 if (speed_p)
9578 *cost += extra_cost->fp[mode != SFmode].addsub;
9579 return false;
9580 }
9581
9582 if (mode == SImode)
9583 {
9584 rtx shift_by_reg = NULL;
9585 rtx shift_op;
9586 rtx non_shift_op;
9587
9588 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9589 if (shift_op == NULL)
9590 {
9591 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9592 non_shift_op = XEXP (x, 0);
9593 }
9594 else
9595 non_shift_op = XEXP (x, 1);
9596
9597 if (shift_op != NULL)
9598 {
9599 if (shift_by_reg != NULL)
9600 {
9601 if (speed_p)
9602 *cost += extra_cost->alu.arith_shift_reg;
9603 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9604 }
9605 else if (speed_p)
9606 *cost += extra_cost->alu.arith_shift;
9607
9608 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9609 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9610 return true;
9611 }
9612
9613 if (arm_arch_thumb2
9614 && GET_CODE (XEXP (x, 1)) == MULT)
9615 {
9616 /* MLS. */
9617 if (speed_p)
9618 *cost += extra_cost->mult[0].add;
9619 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9620 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9621 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9622 return true;
9623 }
9624
9625 if (CONST_INT_P (XEXP (x, 0)))
9626 {
9627 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9628 INTVAL (XEXP (x, 0)), NULL_RTX,
9629 NULL_RTX, 1, 0);
9630 *cost = COSTS_N_INSNS (insns);
9631 if (speed_p)
9632 *cost += insns * extra_cost->alu.arith;
9633 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9634 return true;
9635 }
9636 else if (speed_p)
9637 *cost += extra_cost->alu.arith;
9638
9639 return false;
9640 }
9641
9642 if (GET_MODE_CLASS (mode) == MODE_INT
9643 && GET_MODE_SIZE (mode) < 4)
9644 {
9645 rtx shift_op, shift_reg;
9646 shift_reg = NULL;
9647
9648 /* We check both sides of the MINUS for shifter operands since,
9649 unlike PLUS, it's not commutative. */
9650
9651 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9652 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9653
9654 /* Slightly disparage, as we might need to widen the result. */
9655 *cost += 1;
9656 if (speed_p)
9657 *cost += extra_cost->alu.arith;
9658
9659 if (CONST_INT_P (XEXP (x, 0)))
9660 {
9661 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9662 return true;
9663 }
9664
9665 return false;
9666 }
9667
9668 if (mode == DImode)
9669 {
9670 *cost += COSTS_N_INSNS (1);
9671
9672 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9673 {
9674 rtx op1 = XEXP (x, 1);
9675
9676 if (speed_p)
9677 *cost += 2 * extra_cost->alu.arith;
9678
9679 if (GET_CODE (op1) == ZERO_EXTEND)
9680 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9681 0, speed_p);
9682 else
9683 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9684 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9685 0, speed_p);
9686 return true;
9687 }
9688 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9689 {
9690 if (speed_p)
9691 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9692 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9693 0, speed_p)
9694 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9695 return true;
9696 }
9697 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9698 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9699 {
9700 if (speed_p)
9701 *cost += (extra_cost->alu.arith
9702 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9703 ? extra_cost->alu.arith
9704 : extra_cost->alu.arith_shift));
9705 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9706 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9707 GET_CODE (XEXP (x, 1)), 0, speed_p));
9708 return true;
9709 }
9710
9711 if (speed_p)
9712 *cost += 2 * extra_cost->alu.arith;
9713 return false;
9714 }
9715
9716 /* Vector mode? */
9717
9718 *cost = LIBCALL_COST (2);
9719 return false;
9720
9721 case PLUS:
9722 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9723 && (mode == SFmode || !TARGET_VFP_SINGLE))
9724 {
9725 if (GET_CODE (XEXP (x, 0)) == MULT)
9726 {
9727 rtx mul_op0, mul_op1, add_op;
9728
9729 if (speed_p)
9730 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9731
9732 mul_op0 = XEXP (XEXP (x, 0), 0);
9733 mul_op1 = XEXP (XEXP (x, 0), 1);
9734 add_op = XEXP (x, 1);
9735
9736 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9737 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9738 + rtx_cost (add_op, mode, code, 0, speed_p));
9739
9740 return true;
9741 }
9742
9743 if (speed_p)
9744 *cost += extra_cost->fp[mode != SFmode].addsub;
9745 return false;
9746 }
9747 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9748 {
9749 *cost = LIBCALL_COST (2);
9750 return false;
9751 }
9752
9753 /* Narrow modes can be synthesized in SImode, but the range
9754 of useful sub-operations is limited. Check for shift operations
9755 on one of the operands. Only left shifts can be used in the
9756 narrow modes. */
9757 if (GET_MODE_CLASS (mode) == MODE_INT
9758 && GET_MODE_SIZE (mode) < 4)
9759 {
9760 rtx shift_op, shift_reg;
9761 shift_reg = NULL;
9762
9763 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9764
9765 if (CONST_INT_P (XEXP (x, 1)))
9766 {
9767 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9768 INTVAL (XEXP (x, 1)), NULL_RTX,
9769 NULL_RTX, 1, 0);
9770 *cost = COSTS_N_INSNS (insns);
9771 if (speed_p)
9772 *cost += insns * extra_cost->alu.arith;
9773 /* Slightly penalize a narrow operation as the result may
9774 need widening. */
9775 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9776 return true;
9777 }
9778
9779 /* Slightly penalize a narrow operation as the result may
9780 need widening. */
9781 *cost += 1;
9782 if (speed_p)
9783 *cost += extra_cost->alu.arith;
9784
9785 return false;
9786 }
9787
9788 if (mode == SImode)
9789 {
9790 rtx shift_op, shift_reg;
9791
9792 if (TARGET_INT_SIMD
9793 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9794 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9795 {
9796 /* UXTA[BH] or SXTA[BH]. */
9797 if (speed_p)
9798 *cost += extra_cost->alu.extend_arith;
9799 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9800 0, speed_p)
9801 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9802 return true;
9803 }
9804
9805 shift_reg = NULL;
9806 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9807 if (shift_op != NULL)
9808 {
9809 if (shift_reg)
9810 {
9811 if (speed_p)
9812 *cost += extra_cost->alu.arith_shift_reg;
9813 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9814 }
9815 else if (speed_p)
9816 *cost += extra_cost->alu.arith_shift;
9817
9818 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9819 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9820 return true;
9821 }
9822 if (GET_CODE (XEXP (x, 0)) == MULT)
9823 {
9824 rtx mul_op = XEXP (x, 0);
9825
9826 if (TARGET_DSP_MULTIPLY
9827 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9828 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9829 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9830 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9831 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9832 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9833 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9834 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9835 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9836 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9837 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9838 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9839 == 16))))))
9840 {
9841 /* SMLA[BT][BT]. */
9842 if (speed_p)
9843 *cost += extra_cost->mult[0].extend_add;
9844 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9845 SIGN_EXTEND, 0, speed_p)
9846 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9847 SIGN_EXTEND, 0, speed_p)
9848 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9849 return true;
9850 }
9851
9852 if (speed_p)
9853 *cost += extra_cost->mult[0].add;
9854 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9855 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9856 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9857 return true;
9858 }
9859 if (CONST_INT_P (XEXP (x, 1)))
9860 {
9861 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9862 INTVAL (XEXP (x, 1)), NULL_RTX,
9863 NULL_RTX, 1, 0);
9864 *cost = COSTS_N_INSNS (insns);
9865 if (speed_p)
9866 *cost += insns * extra_cost->alu.arith;
9867 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9868 return true;
9869 }
9870 else if (speed_p)
9871 *cost += extra_cost->alu.arith;
9872
9873 return false;
9874 }
9875
9876 if (mode == DImode)
9877 {
9878 if (arm_arch3m
9879 && GET_CODE (XEXP (x, 0)) == MULT
9880 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9881 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9882 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9883 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9884 {
9885 if (speed_p)
9886 *cost += extra_cost->mult[1].extend_add;
9887 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9888 ZERO_EXTEND, 0, speed_p)
9889 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9890 ZERO_EXTEND, 0, speed_p)
9891 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9892 return true;
9893 }
9894
9895 *cost += COSTS_N_INSNS (1);
9896
9897 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9898 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9899 {
9900 if (speed_p)
9901 *cost += (extra_cost->alu.arith
9902 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9903 ? extra_cost->alu.arith
9904 : extra_cost->alu.arith_shift));
9905
9906 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9907 0, speed_p)
9908 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9909 return true;
9910 }
9911
9912 if (speed_p)
9913 *cost += 2 * extra_cost->alu.arith;
9914 return false;
9915 }
9916
9917 /* Vector mode? */
9918 *cost = LIBCALL_COST (2);
9919 return false;
9920 case IOR:
9921 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9922 {
9923 if (speed_p)
9924 *cost += extra_cost->alu.rev;
9925
9926 return true;
9927 }
9928 /* Fall through. */
9929 case AND: case XOR:
9930 if (mode == SImode)
9931 {
9932 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9933 rtx op0 = XEXP (x, 0);
9934 rtx shift_op, shift_reg;
9935
9936 if (subcode == NOT
9937 && (code == AND
9938 || (code == IOR && TARGET_THUMB2)))
9939 op0 = XEXP (op0, 0);
9940
9941 shift_reg = NULL;
9942 shift_op = shifter_op_p (op0, &shift_reg);
9943 if (shift_op != NULL)
9944 {
9945 if (shift_reg)
9946 {
9947 if (speed_p)
9948 *cost += extra_cost->alu.log_shift_reg;
9949 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9950 }
9951 else if (speed_p)
9952 *cost += extra_cost->alu.log_shift;
9953
9954 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9955 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9956 return true;
9957 }
9958
9959 if (CONST_INT_P (XEXP (x, 1)))
9960 {
9961 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9962 INTVAL (XEXP (x, 1)), NULL_RTX,
9963 NULL_RTX, 1, 0);
9964
9965 *cost = COSTS_N_INSNS (insns);
9966 if (speed_p)
9967 *cost += insns * extra_cost->alu.logical;
9968 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9969 return true;
9970 }
9971
9972 if (speed_p)
9973 *cost += extra_cost->alu.logical;
9974 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9975 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9976 return true;
9977 }
9978
9979 if (mode == DImode)
9980 {
9981 rtx op0 = XEXP (x, 0);
9982 enum rtx_code subcode = GET_CODE (op0);
9983
9984 *cost += COSTS_N_INSNS (1);
9985
9986 if (subcode == NOT
9987 && (code == AND
9988 || (code == IOR && TARGET_THUMB2)))
9989 op0 = XEXP (op0, 0);
9990
9991 if (GET_CODE (op0) == ZERO_EXTEND)
9992 {
9993 if (speed_p)
9994 *cost += 2 * extra_cost->alu.logical;
9995
9996 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9997 0, speed_p)
9998 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9999 return true;
10000 }
10001 else if (GET_CODE (op0) == SIGN_EXTEND)
10002 {
10003 if (speed_p)
10004 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10005
10006 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10007 0, speed_p)
10008 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10009 return true;
10010 }
10011
10012 if (speed_p)
10013 *cost += 2 * extra_cost->alu.logical;
10014
10015 return true;
10016 }
10017 /* Vector mode? */
10018
10019 *cost = LIBCALL_COST (2);
10020 return false;
10021
10022 case MULT:
10023 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10024 && (mode == SFmode || !TARGET_VFP_SINGLE))
10025 {
10026 rtx op0 = XEXP (x, 0);
10027
10028 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10029 op0 = XEXP (op0, 0);
10030
10031 if (speed_p)
10032 *cost += extra_cost->fp[mode != SFmode].mult;
10033
10034 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10035 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10036 return true;
10037 }
10038 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10039 {
10040 *cost = LIBCALL_COST (2);
10041 return false;
10042 }
10043
10044 if (mode == SImode)
10045 {
10046 if (TARGET_DSP_MULTIPLY
10047 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10048 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10049 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10050 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10051 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10052 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10053 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10054 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10055 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10056 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10057 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10058 && (INTVAL (XEXP (XEXP (x, 1), 1))
10059 == 16))))))
10060 {
10061 /* SMUL[TB][TB]. */
10062 if (speed_p)
10063 *cost += extra_cost->mult[0].extend;
10064 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10065 SIGN_EXTEND, 0, speed_p);
10066 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10067 SIGN_EXTEND, 1, speed_p);
10068 return true;
10069 }
10070 if (speed_p)
10071 *cost += extra_cost->mult[0].simple;
10072 return false;
10073 }
10074
10075 if (mode == DImode)
10076 {
10077 if (arm_arch3m
10078 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10079 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10080 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10081 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10082 {
10083 if (speed_p)
10084 *cost += extra_cost->mult[1].extend;
10085 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10086 ZERO_EXTEND, 0, speed_p)
10087 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10088 ZERO_EXTEND, 0, speed_p));
10089 return true;
10090 }
10091
10092 *cost = LIBCALL_COST (2);
10093 return false;
10094 }
10095
10096 /* Vector mode? */
10097 *cost = LIBCALL_COST (2);
10098 return false;
10099
10100 case NEG:
10101 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10102 && (mode == SFmode || !TARGET_VFP_SINGLE))
10103 {
10104 if (GET_CODE (XEXP (x, 0)) == MULT)
10105 {
10106 /* VNMUL. */
10107 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10108 return true;
10109 }
10110
10111 if (speed_p)
10112 *cost += extra_cost->fp[mode != SFmode].neg;
10113
10114 return false;
10115 }
10116 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10117 {
10118 *cost = LIBCALL_COST (1);
10119 return false;
10120 }
10121
10122 if (mode == SImode)
10123 {
10124 if (GET_CODE (XEXP (x, 0)) == ABS)
10125 {
10126 *cost += COSTS_N_INSNS (1);
10127 /* Assume the non-flag-changing variant. */
10128 if (speed_p)
10129 *cost += (extra_cost->alu.log_shift
10130 + extra_cost->alu.arith_shift);
10131 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10132 return true;
10133 }
10134
10135 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10136 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10137 {
10138 *cost += COSTS_N_INSNS (1);
10139 /* No extra cost for MOV imm and MVN imm. */
10140 /* If the comparison op is using the flags, there's no further
10141 cost, otherwise we need to add the cost of the comparison. */
10142 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10143 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10144 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10145 {
10146 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10147 *cost += (COSTS_N_INSNS (1)
10148 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10149 0, speed_p)
10150 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10151 1, speed_p));
10152 if (speed_p)
10153 *cost += extra_cost->alu.arith;
10154 }
10155 return true;
10156 }
10157
10158 if (speed_p)
10159 *cost += extra_cost->alu.arith;
10160 return false;
10161 }
10162
10163 if (GET_MODE_CLASS (mode) == MODE_INT
10164 && GET_MODE_SIZE (mode) < 4)
10165 {
10166 /* Slightly disparage, as we might need an extend operation. */
10167 *cost += 1;
10168 if (speed_p)
10169 *cost += extra_cost->alu.arith;
10170 return false;
10171 }
10172
10173 if (mode == DImode)
10174 {
10175 *cost += COSTS_N_INSNS (1);
10176 if (speed_p)
10177 *cost += 2 * extra_cost->alu.arith;
10178 return false;
10179 }
10180
10181 /* Vector mode? */
10182 *cost = LIBCALL_COST (1);
10183 return false;
10184
10185 case NOT:
10186 if (mode == SImode)
10187 {
10188 rtx shift_op;
10189 rtx shift_reg = NULL;
10190
10191 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10192
10193 if (shift_op)
10194 {
10195 if (shift_reg != NULL)
10196 {
10197 if (speed_p)
10198 *cost += extra_cost->alu.log_shift_reg;
10199 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10200 }
10201 else if (speed_p)
10202 *cost += extra_cost->alu.log_shift;
10203 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10204 return true;
10205 }
10206
10207 if (speed_p)
10208 *cost += extra_cost->alu.logical;
10209 return false;
10210 }
10211 if (mode == DImode)
10212 {
10213 *cost += COSTS_N_INSNS (1);
10214 return false;
10215 }
10216
10217 /* Vector mode? */
10218
10219 *cost += LIBCALL_COST (1);
10220 return false;
10221
10222 case IF_THEN_ELSE:
10223 {
10224 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10225 {
10226 *cost += COSTS_N_INSNS (3);
10227 return true;
10228 }
10229 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10230 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10231
10232 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10233 /* Assume that if one arm of the if_then_else is a register,
10234 that it will be tied with the result and eliminate the
10235 conditional insn. */
10236 if (REG_P (XEXP (x, 1)))
10237 *cost += op2cost;
10238 else if (REG_P (XEXP (x, 2)))
10239 *cost += op1cost;
10240 else
10241 {
10242 if (speed_p)
10243 {
10244 if (extra_cost->alu.non_exec_costs_exec)
10245 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10246 else
10247 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10248 }
10249 else
10250 *cost += op1cost + op2cost;
10251 }
10252 }
10253 return true;
10254
10255 case COMPARE:
10256 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10257 *cost = 0;
10258 else
10259 {
10260 machine_mode op0mode;
10261 /* We'll mostly assume that the cost of a compare is the cost of the
10262 LHS. However, there are some notable exceptions. */
10263
10264 /* Floating point compares are never done as side-effects. */
10265 op0mode = GET_MODE (XEXP (x, 0));
10266 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10267 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10268 {
10269 if (speed_p)
10270 *cost += extra_cost->fp[op0mode != SFmode].compare;
10271
10272 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10273 {
10274 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10275 return true;
10276 }
10277
10278 return false;
10279 }
10280 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10281 {
10282 *cost = LIBCALL_COST (2);
10283 return false;
10284 }
10285
10286 /* DImode compares normally take two insns. */
10287 if (op0mode == DImode)
10288 {
10289 *cost += COSTS_N_INSNS (1);
10290 if (speed_p)
10291 *cost += 2 * extra_cost->alu.arith;
10292 return false;
10293 }
10294
10295 if (op0mode == SImode)
10296 {
10297 rtx shift_op;
10298 rtx shift_reg;
10299
10300 if (XEXP (x, 1) == const0_rtx
10301 && !(REG_P (XEXP (x, 0))
10302 || (GET_CODE (XEXP (x, 0)) == SUBREG
10303 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10304 {
10305 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10306
10307 /* Multiply operations that set the flags are often
10308 significantly more expensive. */
10309 if (speed_p
10310 && GET_CODE (XEXP (x, 0)) == MULT
10311 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10312 *cost += extra_cost->mult[0].flag_setting;
10313
10314 if (speed_p
10315 && GET_CODE (XEXP (x, 0)) == PLUS
10316 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10317 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10318 0), 1), mode))
10319 *cost += extra_cost->mult[0].flag_setting;
10320 return true;
10321 }
10322
10323 shift_reg = NULL;
10324 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10325 if (shift_op != NULL)
10326 {
10327 if (shift_reg != NULL)
10328 {
10329 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10330 1, speed_p);
10331 if (speed_p)
10332 *cost += extra_cost->alu.arith_shift_reg;
10333 }
10334 else if (speed_p)
10335 *cost += extra_cost->alu.arith_shift;
10336 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10337 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10338 return true;
10339 }
10340
10341 if (speed_p)
10342 *cost += extra_cost->alu.arith;
10343 if (CONST_INT_P (XEXP (x, 1))
10344 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10345 {
10346 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10347 return true;
10348 }
10349 return false;
10350 }
10351
10352 /* Vector mode? */
10353
10354 *cost = LIBCALL_COST (2);
10355 return false;
10356 }
10357 return true;
10358
10359 case EQ:
10360 case NE:
10361 case LT:
10362 case LE:
10363 case GT:
10364 case GE:
10365 case LTU:
10366 case LEU:
10367 case GEU:
10368 case GTU:
10369 case ORDERED:
10370 case UNORDERED:
10371 case UNEQ:
10372 case UNLE:
10373 case UNLT:
10374 case UNGE:
10375 case UNGT:
10376 case LTGT:
10377 if (outer_code == SET)
10378 {
10379 /* Is it a store-flag operation? */
10380 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10381 && XEXP (x, 1) == const0_rtx)
10382 {
10383 /* Thumb also needs an IT insn. */
10384 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10385 return true;
10386 }
10387 if (XEXP (x, 1) == const0_rtx)
10388 {
10389 switch (code)
10390 {
10391 case LT:
10392 /* LSR Rd, Rn, #31. */
10393 if (speed_p)
10394 *cost += extra_cost->alu.shift;
10395 break;
10396
10397 case EQ:
10398 /* RSBS T1, Rn, #0
10399 ADC Rd, Rn, T1. */
10400
10401 case NE:
10402 /* SUBS T1, Rn, #1
10403 SBC Rd, Rn, T1. */
10404 *cost += COSTS_N_INSNS (1);
10405 break;
10406
10407 case LE:
10408 /* RSBS T1, Rn, Rn, LSR #31
10409 ADC Rd, Rn, T1. */
10410 *cost += COSTS_N_INSNS (1);
10411 if (speed_p)
10412 *cost += extra_cost->alu.arith_shift;
10413 break;
10414
10415 case GT:
10416 /* RSB Rd, Rn, Rn, ASR #1
10417 LSR Rd, Rd, #31. */
10418 *cost += COSTS_N_INSNS (1);
10419 if (speed_p)
10420 *cost += (extra_cost->alu.arith_shift
10421 + extra_cost->alu.shift);
10422 break;
10423
10424 case GE:
10425 /* ASR Rd, Rn, #31
10426 ADD Rd, Rn, #1. */
10427 *cost += COSTS_N_INSNS (1);
10428 if (speed_p)
10429 *cost += extra_cost->alu.shift;
10430 break;
10431
10432 default:
10433 /* Remaining cases are either meaningless or would take
10434 three insns anyway. */
10435 *cost = COSTS_N_INSNS (3);
10436 break;
10437 }
10438 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10439 return true;
10440 }
10441 else
10442 {
10443 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10444 if (CONST_INT_P (XEXP (x, 1))
10445 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10446 {
10447 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10448 return true;
10449 }
10450
10451 return false;
10452 }
10453 }
10454 /* Not directly inside a set. If it involves the condition code
10455 register it must be the condition for a branch, cond_exec or
10456 I_T_E operation. Since the comparison is performed elsewhere
10457 this is just the control part which has no additional
10458 cost. */
10459 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10460 && XEXP (x, 1) == const0_rtx)
10461 {
10462 *cost = 0;
10463 return true;
10464 }
10465 return false;
10466
10467 case ABS:
10468 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10469 && (mode == SFmode || !TARGET_VFP_SINGLE))
10470 {
10471 if (speed_p)
10472 *cost += extra_cost->fp[mode != SFmode].neg;
10473
10474 return false;
10475 }
10476 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10477 {
10478 *cost = LIBCALL_COST (1);
10479 return false;
10480 }
10481
10482 if (mode == SImode)
10483 {
10484 if (speed_p)
10485 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10486 return false;
10487 }
10488 /* Vector mode? */
10489 *cost = LIBCALL_COST (1);
10490 return false;
10491
10492 case SIGN_EXTEND:
10493 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10494 && MEM_P (XEXP (x, 0)))
10495 {
10496 if (mode == DImode)
10497 *cost += COSTS_N_INSNS (1);
10498
10499 if (!speed_p)
10500 return true;
10501
10502 if (GET_MODE (XEXP (x, 0)) == SImode)
10503 *cost += extra_cost->ldst.load;
10504 else
10505 *cost += extra_cost->ldst.load_sign_extend;
10506
10507 if (mode == DImode)
10508 *cost += extra_cost->alu.shift;
10509
10510 return true;
10511 }
10512
10513 /* Widening from less than 32-bits requires an extend operation. */
10514 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10515 {
10516 /* We have SXTB/SXTH. */
10517 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10518 if (speed_p)
10519 *cost += extra_cost->alu.extend;
10520 }
10521 else if (GET_MODE (XEXP (x, 0)) != SImode)
10522 {
10523 /* Needs two shifts. */
10524 *cost += COSTS_N_INSNS (1);
10525 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10526 if (speed_p)
10527 *cost += 2 * extra_cost->alu.shift;
10528 }
10529
10530 /* Widening beyond 32-bits requires one more insn. */
10531 if (mode == DImode)
10532 {
10533 *cost += COSTS_N_INSNS (1);
10534 if (speed_p)
10535 *cost += extra_cost->alu.shift;
10536 }
10537
10538 return true;
10539
10540 case ZERO_EXTEND:
10541 if ((arm_arch4
10542 || GET_MODE (XEXP (x, 0)) == SImode
10543 || GET_MODE (XEXP (x, 0)) == QImode)
10544 && MEM_P (XEXP (x, 0)))
10545 {
10546 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10547
10548 if (mode == DImode)
10549 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10550
10551 return true;
10552 }
10553
10554 /* Widening from less than 32-bits requires an extend operation. */
10555 if (GET_MODE (XEXP (x, 0)) == QImode)
10556 {
10557 /* UXTB can be a shorter instruction in Thumb2, but it might
10558 be slower than the AND Rd, Rn, #255 alternative. When
10559 optimizing for speed it should never be slower to use
10560 AND, and we don't really model 16-bit vs 32-bit insns
10561 here. */
10562 if (speed_p)
10563 *cost += extra_cost->alu.logical;
10564 }
10565 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10566 {
10567 /* We have UXTB/UXTH. */
10568 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10569 if (speed_p)
10570 *cost += extra_cost->alu.extend;
10571 }
10572 else if (GET_MODE (XEXP (x, 0)) != SImode)
10573 {
10574 /* Needs two shifts. It's marginally preferable to use
10575 shifts rather than two BIC instructions as the second
10576 shift may merge with a subsequent insn as a shifter
10577 op. */
10578 *cost = COSTS_N_INSNS (2);
10579 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10580 if (speed_p)
10581 *cost += 2 * extra_cost->alu.shift;
10582 }
10583
10584 /* Widening beyond 32-bits requires one more insn. */
10585 if (mode == DImode)
10586 {
10587 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10588 }
10589
10590 return true;
10591
10592 case CONST_INT:
10593 *cost = 0;
10594 /* CONST_INT has no mode, so we cannot tell for sure how many
10595 insns are really going to be needed. The best we can do is
10596 look at the value passed. If it fits in SImode, then assume
10597 that's the mode it will be used for. Otherwise assume it
10598 will be used in DImode. */
10599 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10600 mode = SImode;
10601 else
10602 mode = DImode;
10603
10604 /* Avoid blowing up in arm_gen_constant (). */
10605 if (!(outer_code == PLUS
10606 || outer_code == AND
10607 || outer_code == IOR
10608 || outer_code == XOR
10609 || outer_code == MINUS))
10610 outer_code = SET;
10611
10612 const_int_cost:
10613 if (mode == SImode)
10614 {
10615 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10616 INTVAL (x), NULL, NULL,
10617 0, 0));
10618 /* Extra costs? */
10619 }
10620 else
10621 {
10622 *cost += COSTS_N_INSNS (arm_gen_constant
10623 (outer_code, SImode, NULL,
10624 trunc_int_for_mode (INTVAL (x), SImode),
10625 NULL, NULL, 0, 0)
10626 + arm_gen_constant (outer_code, SImode, NULL,
10627 INTVAL (x) >> 32, NULL,
10628 NULL, 0, 0));
10629 /* Extra costs? */
10630 }
10631
10632 return true;
10633
10634 case CONST:
10635 case LABEL_REF:
10636 case SYMBOL_REF:
10637 if (speed_p)
10638 {
10639 if (arm_arch_thumb2 && !flag_pic)
10640 *cost += COSTS_N_INSNS (1);
10641 else
10642 *cost += extra_cost->ldst.load;
10643 }
10644 else
10645 *cost += COSTS_N_INSNS (1);
10646
10647 if (flag_pic)
10648 {
10649 *cost += COSTS_N_INSNS (1);
10650 if (speed_p)
10651 *cost += extra_cost->alu.arith;
10652 }
10653
10654 return true;
10655
10656 case CONST_FIXED:
10657 *cost = COSTS_N_INSNS (4);
10658 /* Fixme. */
10659 return true;
10660
10661 case CONST_DOUBLE:
10662 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10663 && (mode == SFmode || !TARGET_VFP_SINGLE))
10664 {
10665 if (vfp3_const_double_rtx (x))
10666 {
10667 if (speed_p)
10668 *cost += extra_cost->fp[mode == DFmode].fpconst;
10669 return true;
10670 }
10671
10672 if (speed_p)
10673 {
10674 if (mode == DFmode)
10675 *cost += extra_cost->ldst.loadd;
10676 else
10677 *cost += extra_cost->ldst.loadf;
10678 }
10679 else
10680 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10681
10682 return true;
10683 }
10684 *cost = COSTS_N_INSNS (4);
10685 return true;
10686
10687 case CONST_VECTOR:
10688 /* Fixme. */
10689 if (TARGET_NEON
10690 && TARGET_HARD_FLOAT
10691 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10692 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10693 *cost = COSTS_N_INSNS (1);
10694 else
10695 *cost = COSTS_N_INSNS (4);
10696 return true;
10697
10698 case HIGH:
10699 case LO_SUM:
10700 /* When optimizing for size, we prefer constant pool entries to
10701 MOVW/MOVT pairs, so bump the cost of these slightly. */
10702 if (!speed_p)
10703 *cost += 1;
10704 return true;
10705
10706 case CLZ:
10707 if (speed_p)
10708 *cost += extra_cost->alu.clz;
10709 return false;
10710
10711 case SMIN:
10712 if (XEXP (x, 1) == const0_rtx)
10713 {
10714 if (speed_p)
10715 *cost += extra_cost->alu.log_shift;
10716 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10717 return true;
10718 }
10719 /* Fall through. */
10720 case SMAX:
10721 case UMIN:
10722 case UMAX:
10723 *cost += COSTS_N_INSNS (1);
10724 return false;
10725
10726 case TRUNCATE:
10727 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10728 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10729 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10730 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10731 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10732 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10733 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10734 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10735 == ZERO_EXTEND))))
10736 {
10737 if (speed_p)
10738 *cost += extra_cost->mult[1].extend;
10739 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10740 ZERO_EXTEND, 0, speed_p)
10741 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10742 ZERO_EXTEND, 0, speed_p));
10743 return true;
10744 }
10745 *cost = LIBCALL_COST (1);
10746 return false;
10747
10748 case UNSPEC_VOLATILE:
10749 case UNSPEC:
10750 return arm_unspec_cost (x, outer_code, speed_p, cost);
10751
10752 case PC:
10753 /* Reading the PC is like reading any other register. Writing it
10754 is more expensive, but we take that into account elsewhere. */
10755 *cost = 0;
10756 return true;
10757
10758 case ZERO_EXTRACT:
10759 /* TODO: Simple zero_extract of bottom bits using AND. */
10760 /* Fall through. */
10761 case SIGN_EXTRACT:
10762 if (arm_arch6
10763 && mode == SImode
10764 && CONST_INT_P (XEXP (x, 1))
10765 && CONST_INT_P (XEXP (x, 2)))
10766 {
10767 if (speed_p)
10768 *cost += extra_cost->alu.bfx;
10769 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10770 return true;
10771 }
10772 /* Without UBFX/SBFX, need to resort to shift operations. */
10773 *cost += COSTS_N_INSNS (1);
10774 if (speed_p)
10775 *cost += 2 * extra_cost->alu.shift;
10776 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10777 return true;
10778
10779 case FLOAT_EXTEND:
10780 if (TARGET_HARD_FLOAT)
10781 {
10782 if (speed_p)
10783 *cost += extra_cost->fp[mode == DFmode].widen;
10784 if (!TARGET_VFP5
10785 && GET_MODE (XEXP (x, 0)) == HFmode)
10786 {
10787 /* Pre v8, widening HF->DF is a two-step process, first
10788 widening to SFmode. */
10789 *cost += COSTS_N_INSNS (1);
10790 if (speed_p)
10791 *cost += extra_cost->fp[0].widen;
10792 }
10793 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10794 return true;
10795 }
10796
10797 *cost = LIBCALL_COST (1);
10798 return false;
10799
10800 case FLOAT_TRUNCATE:
10801 if (TARGET_HARD_FLOAT)
10802 {
10803 if (speed_p)
10804 *cost += extra_cost->fp[mode == DFmode].narrow;
10805 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10806 return true;
10807 /* Vector modes? */
10808 }
10809 *cost = LIBCALL_COST (1);
10810 return false;
10811
10812 case FMA:
10813 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10814 {
10815 rtx op0 = XEXP (x, 0);
10816 rtx op1 = XEXP (x, 1);
10817 rtx op2 = XEXP (x, 2);
10818
10819
10820 /* vfms or vfnma. */
10821 if (GET_CODE (op0) == NEG)
10822 op0 = XEXP (op0, 0);
10823
10824 /* vfnms or vfnma. */
10825 if (GET_CODE (op2) == NEG)
10826 op2 = XEXP (op2, 0);
10827
10828 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10829 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10830 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10831
10832 if (speed_p)
10833 *cost += extra_cost->fp[mode ==DFmode].fma;
10834
10835 return true;
10836 }
10837
10838 *cost = LIBCALL_COST (3);
10839 return false;
10840
10841 case FIX:
10842 case UNSIGNED_FIX:
10843 if (TARGET_HARD_FLOAT)
10844 {
10845 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10846 a vcvt fixed-point conversion. */
10847 if (code == FIX && mode == SImode
10848 && GET_CODE (XEXP (x, 0)) == FIX
10849 && GET_MODE (XEXP (x, 0)) == SFmode
10850 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10851 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10852 > 0)
10853 {
10854 if (speed_p)
10855 *cost += extra_cost->fp[0].toint;
10856
10857 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10858 code, 0, speed_p);
10859 return true;
10860 }
10861
10862 if (GET_MODE_CLASS (mode) == MODE_INT)
10863 {
10864 mode = GET_MODE (XEXP (x, 0));
10865 if (speed_p)
10866 *cost += extra_cost->fp[mode == DFmode].toint;
10867 /* Strip of the 'cost' of rounding towards zero. */
10868 if (GET_CODE (XEXP (x, 0)) == FIX)
10869 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10870 0, speed_p);
10871 else
10872 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10873 /* ??? Increase the cost to deal with transferring from
10874 FP -> CORE registers? */
10875 return true;
10876 }
10877 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10878 && TARGET_VFP5)
10879 {
10880 if (speed_p)
10881 *cost += extra_cost->fp[mode == DFmode].roundint;
10882 return false;
10883 }
10884 /* Vector costs? */
10885 }
10886 *cost = LIBCALL_COST (1);
10887 return false;
10888
10889 case FLOAT:
10890 case UNSIGNED_FLOAT:
10891 if (TARGET_HARD_FLOAT)
10892 {
10893 /* ??? Increase the cost to deal with transferring from CORE
10894 -> FP registers? */
10895 if (speed_p)
10896 *cost += extra_cost->fp[mode == DFmode].fromint;
10897 return false;
10898 }
10899 *cost = LIBCALL_COST (1);
10900 return false;
10901
10902 case CALL:
10903 return true;
10904
10905 case ASM_OPERANDS:
10906 {
10907 /* Just a guess. Guess number of instructions in the asm
10908 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10909 though (see PR60663). */
10910 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10911 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10912
10913 *cost = COSTS_N_INSNS (asm_length + num_operands);
10914 return true;
10915 }
10916 default:
10917 if (mode != VOIDmode)
10918 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10919 else
10920 *cost = COSTS_N_INSNS (4); /* Who knows? */
10921 return false;
10922 }
10923 }
10924
10925 #undef HANDLE_NARROW_SHIFT_ARITH
10926
10927 /* RTX costs entry point. */
10928
10929 static bool
10930 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10931 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10932 {
10933 bool result;
10934 int code = GET_CODE (x);
10935 gcc_assert (current_tune->insn_extra_cost);
10936
10937 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10938 (enum rtx_code) outer_code,
10939 current_tune->insn_extra_cost,
10940 total, speed);
10941
10942 if (dump_file && (dump_flags & TDF_DETAILS))
10943 {
10944 print_rtl_single (dump_file, x);
10945 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10946 *total, result ? "final" : "partial");
10947 }
10948 return result;
10949 }
10950
10951 /* All address computations that can be done are free, but rtx cost returns
10952 the same for practically all of them. So we weight the different types
10953 of address here in the order (most pref first):
10954 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10955 static inline int
10956 arm_arm_address_cost (rtx x)
10957 {
10958 enum rtx_code c = GET_CODE (x);
10959
10960 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10961 return 0;
10962 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10963 return 10;
10964
10965 if (c == PLUS)
10966 {
10967 if (CONST_INT_P (XEXP (x, 1)))
10968 return 2;
10969
10970 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10971 return 3;
10972
10973 return 4;
10974 }
10975
10976 return 6;
10977 }
10978
10979 static inline int
10980 arm_thumb_address_cost (rtx x)
10981 {
10982 enum rtx_code c = GET_CODE (x);
10983
10984 if (c == REG)
10985 return 1;
10986 if (c == PLUS
10987 && REG_P (XEXP (x, 0))
10988 && CONST_INT_P (XEXP (x, 1)))
10989 return 1;
10990
10991 return 2;
10992 }
10993
10994 static int
10995 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10996 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10997 {
10998 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10999 }
11000
11001 /* Adjust cost hook for XScale. */
11002 static bool
11003 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11004 int * cost)
11005 {
11006 /* Some true dependencies can have a higher cost depending
11007 on precisely how certain input operands are used. */
11008 if (dep_type == 0
11009 && recog_memoized (insn) >= 0
11010 && recog_memoized (dep) >= 0)
11011 {
11012 int shift_opnum = get_attr_shift (insn);
11013 enum attr_type attr_type = get_attr_type (dep);
11014
11015 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11016 operand for INSN. If we have a shifted input operand and the
11017 instruction we depend on is another ALU instruction, then we may
11018 have to account for an additional stall. */
11019 if (shift_opnum != 0
11020 && (attr_type == TYPE_ALU_SHIFT_IMM
11021 || attr_type == TYPE_ALUS_SHIFT_IMM
11022 || attr_type == TYPE_LOGIC_SHIFT_IMM
11023 || attr_type == TYPE_LOGICS_SHIFT_IMM
11024 || attr_type == TYPE_ALU_SHIFT_REG
11025 || attr_type == TYPE_ALUS_SHIFT_REG
11026 || attr_type == TYPE_LOGIC_SHIFT_REG
11027 || attr_type == TYPE_LOGICS_SHIFT_REG
11028 || attr_type == TYPE_MOV_SHIFT
11029 || attr_type == TYPE_MVN_SHIFT
11030 || attr_type == TYPE_MOV_SHIFT_REG
11031 || attr_type == TYPE_MVN_SHIFT_REG))
11032 {
11033 rtx shifted_operand;
11034 int opno;
11035
11036 /* Get the shifted operand. */
11037 extract_insn (insn);
11038 shifted_operand = recog_data.operand[shift_opnum];
11039
11040 /* Iterate over all the operands in DEP. If we write an operand
11041 that overlaps with SHIFTED_OPERAND, then we have increase the
11042 cost of this dependency. */
11043 extract_insn (dep);
11044 preprocess_constraints (dep);
11045 for (opno = 0; opno < recog_data.n_operands; opno++)
11046 {
11047 /* We can ignore strict inputs. */
11048 if (recog_data.operand_type[opno] == OP_IN)
11049 continue;
11050
11051 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11052 shifted_operand))
11053 {
11054 *cost = 2;
11055 return false;
11056 }
11057 }
11058 }
11059 }
11060 return true;
11061 }
11062
11063 /* Adjust cost hook for Cortex A9. */
11064 static bool
11065 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11066 int * cost)
11067 {
11068 switch (dep_type)
11069 {
11070 case REG_DEP_ANTI:
11071 *cost = 0;
11072 return false;
11073
11074 case REG_DEP_TRUE:
11075 case REG_DEP_OUTPUT:
11076 if (recog_memoized (insn) >= 0
11077 && recog_memoized (dep) >= 0)
11078 {
11079 if (GET_CODE (PATTERN (insn)) == SET)
11080 {
11081 if (GET_MODE_CLASS
11082 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11083 || GET_MODE_CLASS
11084 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11085 {
11086 enum attr_type attr_type_insn = get_attr_type (insn);
11087 enum attr_type attr_type_dep = get_attr_type (dep);
11088
11089 /* By default all dependencies of the form
11090 s0 = s0 <op> s1
11091 s0 = s0 <op> s2
11092 have an extra latency of 1 cycle because
11093 of the input and output dependency in this
11094 case. However this gets modeled as an true
11095 dependency and hence all these checks. */
11096 if (REG_P (SET_DEST (PATTERN (insn)))
11097 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11098 {
11099 /* FMACS is a special case where the dependent
11100 instruction can be issued 3 cycles before
11101 the normal latency in case of an output
11102 dependency. */
11103 if ((attr_type_insn == TYPE_FMACS
11104 || attr_type_insn == TYPE_FMACD)
11105 && (attr_type_dep == TYPE_FMACS
11106 || attr_type_dep == TYPE_FMACD))
11107 {
11108 if (dep_type == REG_DEP_OUTPUT)
11109 *cost = insn_default_latency (dep) - 3;
11110 else
11111 *cost = insn_default_latency (dep);
11112 return false;
11113 }
11114 else
11115 {
11116 if (dep_type == REG_DEP_OUTPUT)
11117 *cost = insn_default_latency (dep) + 1;
11118 else
11119 *cost = insn_default_latency (dep);
11120 }
11121 return false;
11122 }
11123 }
11124 }
11125 }
11126 break;
11127
11128 default:
11129 gcc_unreachable ();
11130 }
11131
11132 return true;
11133 }
11134
11135 /* Adjust cost hook for FA726TE. */
11136 static bool
11137 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11138 int * cost)
11139 {
11140 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11141 have penalty of 3. */
11142 if (dep_type == REG_DEP_TRUE
11143 && recog_memoized (insn) >= 0
11144 && recog_memoized (dep) >= 0
11145 && get_attr_conds (dep) == CONDS_SET)
11146 {
11147 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11148 if (get_attr_conds (insn) == CONDS_USE
11149 && get_attr_type (insn) != TYPE_BRANCH)
11150 {
11151 *cost = 3;
11152 return false;
11153 }
11154
11155 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11156 || get_attr_conds (insn) == CONDS_USE)
11157 {
11158 *cost = 0;
11159 return false;
11160 }
11161 }
11162
11163 return true;
11164 }
11165
11166 /* Implement TARGET_REGISTER_MOVE_COST.
11167
11168 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11169 it is typically more expensive than a single memory access. We set
11170 the cost to less than two memory accesses so that floating
11171 point to integer conversion does not go through memory. */
11172
11173 int
11174 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11175 reg_class_t from, reg_class_t to)
11176 {
11177 if (TARGET_32BIT)
11178 {
11179 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11180 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11181 return 15;
11182 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11183 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11184 return 4;
11185 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11186 return 20;
11187 else
11188 return 2;
11189 }
11190 else
11191 {
11192 if (from == HI_REGS || to == HI_REGS)
11193 return 4;
11194 else
11195 return 2;
11196 }
11197 }
11198
11199 /* Implement TARGET_MEMORY_MOVE_COST. */
11200
11201 int
11202 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11203 bool in ATTRIBUTE_UNUSED)
11204 {
11205 if (TARGET_32BIT)
11206 return 10;
11207 else
11208 {
11209 if (GET_MODE_SIZE (mode) < 4)
11210 return 8;
11211 else
11212 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11213 }
11214 }
11215
11216 /* Vectorizer cost model implementation. */
11217
11218 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11219 static int
11220 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11221 tree vectype,
11222 int misalign ATTRIBUTE_UNUSED)
11223 {
11224 unsigned elements;
11225
11226 switch (type_of_cost)
11227 {
11228 case scalar_stmt:
11229 return current_tune->vec_costs->scalar_stmt_cost;
11230
11231 case scalar_load:
11232 return current_tune->vec_costs->scalar_load_cost;
11233
11234 case scalar_store:
11235 return current_tune->vec_costs->scalar_store_cost;
11236
11237 case vector_stmt:
11238 return current_tune->vec_costs->vec_stmt_cost;
11239
11240 case vector_load:
11241 return current_tune->vec_costs->vec_align_load_cost;
11242
11243 case vector_store:
11244 return current_tune->vec_costs->vec_store_cost;
11245
11246 case vec_to_scalar:
11247 return current_tune->vec_costs->vec_to_scalar_cost;
11248
11249 case scalar_to_vec:
11250 return current_tune->vec_costs->scalar_to_vec_cost;
11251
11252 case unaligned_load:
11253 return current_tune->vec_costs->vec_unalign_load_cost;
11254
11255 case unaligned_store:
11256 return current_tune->vec_costs->vec_unalign_store_cost;
11257
11258 case cond_branch_taken:
11259 return current_tune->vec_costs->cond_taken_branch_cost;
11260
11261 case cond_branch_not_taken:
11262 return current_tune->vec_costs->cond_not_taken_branch_cost;
11263
11264 case vec_perm:
11265 case vec_promote_demote:
11266 return current_tune->vec_costs->vec_stmt_cost;
11267
11268 case vec_construct:
11269 elements = TYPE_VECTOR_SUBPARTS (vectype);
11270 return elements / 2 + 1;
11271
11272 default:
11273 gcc_unreachable ();
11274 }
11275 }
11276
11277 /* Implement targetm.vectorize.add_stmt_cost. */
11278
11279 static unsigned
11280 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11281 struct _stmt_vec_info *stmt_info, int misalign,
11282 enum vect_cost_model_location where)
11283 {
11284 unsigned *cost = (unsigned *) data;
11285 unsigned retval = 0;
11286
11287 if (flag_vect_cost_model)
11288 {
11289 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11290 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11291
11292 /* Statements in an inner loop relative to the loop being
11293 vectorized are weighted more heavily. The value here is
11294 arbitrary and could potentially be improved with analysis. */
11295 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11296 count *= 50; /* FIXME. */
11297
11298 retval = (unsigned) (count * stmt_cost);
11299 cost[where] += retval;
11300 }
11301
11302 return retval;
11303 }
11304
11305 /* Return true if and only if this insn can dual-issue only as older. */
11306 static bool
11307 cortexa7_older_only (rtx_insn *insn)
11308 {
11309 if (recog_memoized (insn) < 0)
11310 return false;
11311
11312 switch (get_attr_type (insn))
11313 {
11314 case TYPE_ALU_DSP_REG:
11315 case TYPE_ALU_SREG:
11316 case TYPE_ALUS_SREG:
11317 case TYPE_LOGIC_REG:
11318 case TYPE_LOGICS_REG:
11319 case TYPE_ADC_REG:
11320 case TYPE_ADCS_REG:
11321 case TYPE_ADR:
11322 case TYPE_BFM:
11323 case TYPE_REV:
11324 case TYPE_MVN_REG:
11325 case TYPE_SHIFT_IMM:
11326 case TYPE_SHIFT_REG:
11327 case TYPE_LOAD_BYTE:
11328 case TYPE_LOAD_4:
11329 case TYPE_STORE_4:
11330 case TYPE_FFARITHS:
11331 case TYPE_FADDS:
11332 case TYPE_FFARITHD:
11333 case TYPE_FADDD:
11334 case TYPE_FMOV:
11335 case TYPE_F_CVT:
11336 case TYPE_FCMPS:
11337 case TYPE_FCMPD:
11338 case TYPE_FCONSTS:
11339 case TYPE_FCONSTD:
11340 case TYPE_FMULS:
11341 case TYPE_FMACS:
11342 case TYPE_FMULD:
11343 case TYPE_FMACD:
11344 case TYPE_FDIVS:
11345 case TYPE_FDIVD:
11346 case TYPE_F_MRC:
11347 case TYPE_F_MRRC:
11348 case TYPE_F_FLAG:
11349 case TYPE_F_LOADS:
11350 case TYPE_F_STORES:
11351 return true;
11352 default:
11353 return false;
11354 }
11355 }
11356
11357 /* Return true if and only if this insn can dual-issue as younger. */
11358 static bool
11359 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11360 {
11361 if (recog_memoized (insn) < 0)
11362 {
11363 if (verbose > 5)
11364 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11365 return false;
11366 }
11367
11368 switch (get_attr_type (insn))
11369 {
11370 case TYPE_ALU_IMM:
11371 case TYPE_ALUS_IMM:
11372 case TYPE_LOGIC_IMM:
11373 case TYPE_LOGICS_IMM:
11374 case TYPE_EXTEND:
11375 case TYPE_MVN_IMM:
11376 case TYPE_MOV_IMM:
11377 case TYPE_MOV_REG:
11378 case TYPE_MOV_SHIFT:
11379 case TYPE_MOV_SHIFT_REG:
11380 case TYPE_BRANCH:
11381 case TYPE_CALL:
11382 return true;
11383 default:
11384 return false;
11385 }
11386 }
11387
11388
11389 /* Look for an instruction that can dual issue only as an older
11390 instruction, and move it in front of any instructions that can
11391 dual-issue as younger, while preserving the relative order of all
11392 other instructions in the ready list. This is a hueuristic to help
11393 dual-issue in later cycles, by postponing issue of more flexible
11394 instructions. This heuristic may affect dual issue opportunities
11395 in the current cycle. */
11396 static void
11397 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11398 int *n_readyp, int clock)
11399 {
11400 int i;
11401 int first_older_only = -1, first_younger = -1;
11402
11403 if (verbose > 5)
11404 fprintf (file,
11405 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11406 clock,
11407 *n_readyp);
11408
11409 /* Traverse the ready list from the head (the instruction to issue
11410 first), and looking for the first instruction that can issue as
11411 younger and the first instruction that can dual-issue only as
11412 older. */
11413 for (i = *n_readyp - 1; i >= 0; i--)
11414 {
11415 rtx_insn *insn = ready[i];
11416 if (cortexa7_older_only (insn))
11417 {
11418 first_older_only = i;
11419 if (verbose > 5)
11420 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11421 break;
11422 }
11423 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11424 first_younger = i;
11425 }
11426
11427 /* Nothing to reorder because either no younger insn found or insn
11428 that can dual-issue only as older appears before any insn that
11429 can dual-issue as younger. */
11430 if (first_younger == -1)
11431 {
11432 if (verbose > 5)
11433 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11434 return;
11435 }
11436
11437 /* Nothing to reorder because no older-only insn in the ready list. */
11438 if (first_older_only == -1)
11439 {
11440 if (verbose > 5)
11441 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11442 return;
11443 }
11444
11445 /* Move first_older_only insn before first_younger. */
11446 if (verbose > 5)
11447 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11448 INSN_UID(ready [first_older_only]),
11449 INSN_UID(ready [first_younger]));
11450 rtx_insn *first_older_only_insn = ready [first_older_only];
11451 for (i = first_older_only; i < first_younger; i++)
11452 {
11453 ready[i] = ready[i+1];
11454 }
11455
11456 ready[i] = first_older_only_insn;
11457 return;
11458 }
11459
11460 /* Implement TARGET_SCHED_REORDER. */
11461 static int
11462 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11463 int clock)
11464 {
11465 switch (arm_tune)
11466 {
11467 case TARGET_CPU_cortexa7:
11468 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11469 break;
11470 default:
11471 /* Do nothing for other cores. */
11472 break;
11473 }
11474
11475 return arm_issue_rate ();
11476 }
11477
11478 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11479 It corrects the value of COST based on the relationship between
11480 INSN and DEP through the dependence LINK. It returns the new
11481 value. There is a per-core adjust_cost hook to adjust scheduler costs
11482 and the per-core hook can choose to completely override the generic
11483 adjust_cost function. Only put bits of code into arm_adjust_cost that
11484 are common across all cores. */
11485 static int
11486 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11487 unsigned int)
11488 {
11489 rtx i_pat, d_pat;
11490
11491 /* When generating Thumb-1 code, we want to place flag-setting operations
11492 close to a conditional branch which depends on them, so that we can
11493 omit the comparison. */
11494 if (TARGET_THUMB1
11495 && dep_type == 0
11496 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11497 && recog_memoized (dep) >= 0
11498 && get_attr_conds (dep) == CONDS_SET)
11499 return 0;
11500
11501 if (current_tune->sched_adjust_cost != NULL)
11502 {
11503 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11504 return cost;
11505 }
11506
11507 /* XXX Is this strictly true? */
11508 if (dep_type == REG_DEP_ANTI
11509 || dep_type == REG_DEP_OUTPUT)
11510 return 0;
11511
11512 /* Call insns don't incur a stall, even if they follow a load. */
11513 if (dep_type == 0
11514 && CALL_P (insn))
11515 return 1;
11516
11517 if ((i_pat = single_set (insn)) != NULL
11518 && MEM_P (SET_SRC (i_pat))
11519 && (d_pat = single_set (dep)) != NULL
11520 && MEM_P (SET_DEST (d_pat)))
11521 {
11522 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11523 /* This is a load after a store, there is no conflict if the load reads
11524 from a cached area. Assume that loads from the stack, and from the
11525 constant pool are cached, and that others will miss. This is a
11526 hack. */
11527
11528 if ((GET_CODE (src_mem) == SYMBOL_REF
11529 && CONSTANT_POOL_ADDRESS_P (src_mem))
11530 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11531 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11532 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11533 return 1;
11534 }
11535
11536 return cost;
11537 }
11538
11539 int
11540 arm_max_conditional_execute (void)
11541 {
11542 return max_insns_skipped;
11543 }
11544
11545 static int
11546 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11547 {
11548 if (TARGET_32BIT)
11549 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11550 else
11551 return (optimize > 0) ? 2 : 0;
11552 }
11553
11554 static int
11555 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11556 {
11557 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11558 }
11559
11560 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11561 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11562 sequences of non-executed instructions in IT blocks probably take the same
11563 amount of time as executed instructions (and the IT instruction itself takes
11564 space in icache). This function was experimentally determined to give good
11565 results on a popular embedded benchmark. */
11566
11567 static int
11568 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11569 {
11570 return (TARGET_32BIT && speed_p) ? 1
11571 : arm_default_branch_cost (speed_p, predictable_p);
11572 }
11573
11574 static int
11575 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11576 {
11577 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11578 }
11579
11580 static bool fp_consts_inited = false;
11581
11582 static REAL_VALUE_TYPE value_fp0;
11583
11584 static void
11585 init_fp_table (void)
11586 {
11587 REAL_VALUE_TYPE r;
11588
11589 r = REAL_VALUE_ATOF ("0", DFmode);
11590 value_fp0 = r;
11591 fp_consts_inited = true;
11592 }
11593
11594 /* Return TRUE if rtx X is a valid immediate FP constant. */
11595 int
11596 arm_const_double_rtx (rtx x)
11597 {
11598 const REAL_VALUE_TYPE *r;
11599
11600 if (!fp_consts_inited)
11601 init_fp_table ();
11602
11603 r = CONST_DOUBLE_REAL_VALUE (x);
11604 if (REAL_VALUE_MINUS_ZERO (*r))
11605 return 0;
11606
11607 if (real_equal (r, &value_fp0))
11608 return 1;
11609
11610 return 0;
11611 }
11612
11613 /* VFPv3 has a fairly wide range of representable immediates, formed from
11614 "quarter-precision" floating-point values. These can be evaluated using this
11615 formula (with ^ for exponentiation):
11616
11617 -1^s * n * 2^-r
11618
11619 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11620 16 <= n <= 31 and 0 <= r <= 7.
11621
11622 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11623
11624 - A (most-significant) is the sign bit.
11625 - BCD are the exponent (encoded as r XOR 3).
11626 - EFGH are the mantissa (encoded as n - 16).
11627 */
11628
11629 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11630 fconst[sd] instruction, or -1 if X isn't suitable. */
11631 static int
11632 vfp3_const_double_index (rtx x)
11633 {
11634 REAL_VALUE_TYPE r, m;
11635 int sign, exponent;
11636 unsigned HOST_WIDE_INT mantissa, mant_hi;
11637 unsigned HOST_WIDE_INT mask;
11638 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11639 bool fail;
11640
11641 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11642 return -1;
11643
11644 r = *CONST_DOUBLE_REAL_VALUE (x);
11645
11646 /* We can't represent these things, so detect them first. */
11647 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11648 return -1;
11649
11650 /* Extract sign, exponent and mantissa. */
11651 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11652 r = real_value_abs (&r);
11653 exponent = REAL_EXP (&r);
11654 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11655 highest (sign) bit, with a fixed binary point at bit point_pos.
11656 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11657 bits for the mantissa, this may fail (low bits would be lost). */
11658 real_ldexp (&m, &r, point_pos - exponent);
11659 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11660 mantissa = w.elt (0);
11661 mant_hi = w.elt (1);
11662
11663 /* If there are bits set in the low part of the mantissa, we can't
11664 represent this value. */
11665 if (mantissa != 0)
11666 return -1;
11667
11668 /* Now make it so that mantissa contains the most-significant bits, and move
11669 the point_pos to indicate that the least-significant bits have been
11670 discarded. */
11671 point_pos -= HOST_BITS_PER_WIDE_INT;
11672 mantissa = mant_hi;
11673
11674 /* We can permit four significant bits of mantissa only, plus a high bit
11675 which is always 1. */
11676 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11677 if ((mantissa & mask) != 0)
11678 return -1;
11679
11680 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11681 mantissa >>= point_pos - 5;
11682
11683 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11684 floating-point immediate zero with Neon using an integer-zero load, but
11685 that case is handled elsewhere.) */
11686 if (mantissa == 0)
11687 return -1;
11688
11689 gcc_assert (mantissa >= 16 && mantissa <= 31);
11690
11691 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11692 normalized significands are in the range [1, 2). (Our mantissa is shifted
11693 left 4 places at this point relative to normalized IEEE754 values). GCC
11694 internally uses [0.5, 1) (see real.c), so the exponent returned from
11695 REAL_EXP must be altered. */
11696 exponent = 5 - exponent;
11697
11698 if (exponent < 0 || exponent > 7)
11699 return -1;
11700
11701 /* Sign, mantissa and exponent are now in the correct form to plug into the
11702 formula described in the comment above. */
11703 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11704 }
11705
11706 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11707 int
11708 vfp3_const_double_rtx (rtx x)
11709 {
11710 if (!TARGET_VFP3)
11711 return 0;
11712
11713 return vfp3_const_double_index (x) != -1;
11714 }
11715
11716 /* Recognize immediates which can be used in various Neon instructions. Legal
11717 immediates are described by the following table (for VMVN variants, the
11718 bitwise inverse of the constant shown is recognized. In either case, VMOV
11719 is output and the correct instruction to use for a given constant is chosen
11720 by the assembler). The constant shown is replicated across all elements of
11721 the destination vector.
11722
11723 insn elems variant constant (binary)
11724 ---- ----- ------- -----------------
11725 vmov i32 0 00000000 00000000 00000000 abcdefgh
11726 vmov i32 1 00000000 00000000 abcdefgh 00000000
11727 vmov i32 2 00000000 abcdefgh 00000000 00000000
11728 vmov i32 3 abcdefgh 00000000 00000000 00000000
11729 vmov i16 4 00000000 abcdefgh
11730 vmov i16 5 abcdefgh 00000000
11731 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11732 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11733 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11734 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11735 vmvn i16 10 00000000 abcdefgh
11736 vmvn i16 11 abcdefgh 00000000
11737 vmov i32 12 00000000 00000000 abcdefgh 11111111
11738 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11739 vmov i32 14 00000000 abcdefgh 11111111 11111111
11740 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11741 vmov i8 16 abcdefgh
11742 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11743 eeeeeeee ffffffff gggggggg hhhhhhhh
11744 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11745 vmov f32 19 00000000 00000000 00000000 00000000
11746
11747 For case 18, B = !b. Representable values are exactly those accepted by
11748 vfp3_const_double_index, but are output as floating-point numbers rather
11749 than indices.
11750
11751 For case 19, we will change it to vmov.i32 when assembling.
11752
11753 Variants 0-5 (inclusive) may also be used as immediates for the second
11754 operand of VORR/VBIC instructions.
11755
11756 The INVERSE argument causes the bitwise inverse of the given operand to be
11757 recognized instead (used for recognizing legal immediates for the VAND/VORN
11758 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11759 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11760 output, rather than the real insns vbic/vorr).
11761
11762 INVERSE makes no difference to the recognition of float vectors.
11763
11764 The return value is the variant of immediate as shown in the above table, or
11765 -1 if the given value doesn't match any of the listed patterns.
11766 */
11767 static int
11768 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11769 rtx *modconst, int *elementwidth)
11770 {
11771 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11772 matches = 1; \
11773 for (i = 0; i < idx; i += (STRIDE)) \
11774 if (!(TEST)) \
11775 matches = 0; \
11776 if (matches) \
11777 { \
11778 immtype = (CLASS); \
11779 elsize = (ELSIZE); \
11780 break; \
11781 }
11782
11783 unsigned int i, elsize = 0, idx = 0, n_elts;
11784 unsigned int innersize;
11785 unsigned char bytes[16];
11786 int immtype = -1, matches;
11787 unsigned int invmask = inverse ? 0xff : 0;
11788 bool vector = GET_CODE (op) == CONST_VECTOR;
11789
11790 if (vector)
11791 n_elts = CONST_VECTOR_NUNITS (op);
11792 else
11793 {
11794 n_elts = 1;
11795 if (mode == VOIDmode)
11796 mode = DImode;
11797 }
11798
11799 innersize = GET_MODE_UNIT_SIZE (mode);
11800
11801 /* Vectors of float constants. */
11802 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11803 {
11804 rtx el0 = CONST_VECTOR_ELT (op, 0);
11805
11806 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11807 return -1;
11808
11809 /* FP16 vectors cannot be represented. */
11810 if (GET_MODE_INNER (mode) == HFmode)
11811 return -1;
11812
11813 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11814 are distinct in this context. */
11815 if (!const_vec_duplicate_p (op))
11816 return -1;
11817
11818 if (modconst)
11819 *modconst = CONST_VECTOR_ELT (op, 0);
11820
11821 if (elementwidth)
11822 *elementwidth = 0;
11823
11824 if (el0 == CONST0_RTX (GET_MODE (el0)))
11825 return 19;
11826 else
11827 return 18;
11828 }
11829
11830 /* The tricks done in the code below apply for little-endian vector layout.
11831 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11832 FIXME: Implement logic for big-endian vectors. */
11833 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11834 return -1;
11835
11836 /* Splat vector constant out into a byte vector. */
11837 for (i = 0; i < n_elts; i++)
11838 {
11839 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11840 unsigned HOST_WIDE_INT elpart;
11841
11842 gcc_assert (CONST_INT_P (el));
11843 elpart = INTVAL (el);
11844
11845 for (unsigned int byte = 0; byte < innersize; byte++)
11846 {
11847 bytes[idx++] = (elpart & 0xff) ^ invmask;
11848 elpart >>= BITS_PER_UNIT;
11849 }
11850 }
11851
11852 /* Sanity check. */
11853 gcc_assert (idx == GET_MODE_SIZE (mode));
11854
11855 do
11856 {
11857 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11858 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11859
11860 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11861 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11862
11863 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11864 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11865
11866 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11867 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11868
11869 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11870
11871 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11872
11873 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11874 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11875
11876 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11877 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11878
11879 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11880 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11881
11882 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11883 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11884
11885 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11886
11887 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11888
11889 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11890 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11891
11892 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11893 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11894
11895 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11896 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11897
11898 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11899 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11900
11901 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11902
11903 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11904 && bytes[i] == bytes[(i + 8) % idx]);
11905 }
11906 while (0);
11907
11908 if (immtype == -1)
11909 return -1;
11910
11911 if (elementwidth)
11912 *elementwidth = elsize;
11913
11914 if (modconst)
11915 {
11916 unsigned HOST_WIDE_INT imm = 0;
11917
11918 /* Un-invert bytes of recognized vector, if necessary. */
11919 if (invmask != 0)
11920 for (i = 0; i < idx; i++)
11921 bytes[i] ^= invmask;
11922
11923 if (immtype == 17)
11924 {
11925 /* FIXME: Broken on 32-bit H_W_I hosts. */
11926 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11927
11928 for (i = 0; i < 8; i++)
11929 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11930 << (i * BITS_PER_UNIT);
11931
11932 *modconst = GEN_INT (imm);
11933 }
11934 else
11935 {
11936 unsigned HOST_WIDE_INT imm = 0;
11937
11938 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11939 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11940
11941 *modconst = GEN_INT (imm);
11942 }
11943 }
11944
11945 return immtype;
11946 #undef CHECK
11947 }
11948
11949 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11950 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11951 float elements), and a modified constant (whatever should be output for a
11952 VMOV) in *MODCONST. */
11953
11954 int
11955 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11956 rtx *modconst, int *elementwidth)
11957 {
11958 rtx tmpconst;
11959 int tmpwidth;
11960 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11961
11962 if (retval == -1)
11963 return 0;
11964
11965 if (modconst)
11966 *modconst = tmpconst;
11967
11968 if (elementwidth)
11969 *elementwidth = tmpwidth;
11970
11971 return 1;
11972 }
11973
11974 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11975 the immediate is valid, write a constant suitable for using as an operand
11976 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11977 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11978
11979 int
11980 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11981 rtx *modconst, int *elementwidth)
11982 {
11983 rtx tmpconst;
11984 int tmpwidth;
11985 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11986
11987 if (retval < 0 || retval > 5)
11988 return 0;
11989
11990 if (modconst)
11991 *modconst = tmpconst;
11992
11993 if (elementwidth)
11994 *elementwidth = tmpwidth;
11995
11996 return 1;
11997 }
11998
11999 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12000 the immediate is valid, write a constant suitable for using as an operand
12001 to VSHR/VSHL to *MODCONST and the corresponding element width to
12002 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12003 because they have different limitations. */
12004
12005 int
12006 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12007 rtx *modconst, int *elementwidth,
12008 bool isleftshift)
12009 {
12010 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12011 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12012 unsigned HOST_WIDE_INT last_elt = 0;
12013 unsigned HOST_WIDE_INT maxshift;
12014
12015 /* Split vector constant out into a byte vector. */
12016 for (i = 0; i < n_elts; i++)
12017 {
12018 rtx el = CONST_VECTOR_ELT (op, i);
12019 unsigned HOST_WIDE_INT elpart;
12020
12021 if (CONST_INT_P (el))
12022 elpart = INTVAL (el);
12023 else if (CONST_DOUBLE_P (el))
12024 return 0;
12025 else
12026 gcc_unreachable ();
12027
12028 if (i != 0 && elpart != last_elt)
12029 return 0;
12030
12031 last_elt = elpart;
12032 }
12033
12034 /* Shift less than element size. */
12035 maxshift = innersize * 8;
12036
12037 if (isleftshift)
12038 {
12039 /* Left shift immediate value can be from 0 to <size>-1. */
12040 if (last_elt >= maxshift)
12041 return 0;
12042 }
12043 else
12044 {
12045 /* Right shift immediate value can be from 1 to <size>. */
12046 if (last_elt == 0 || last_elt > maxshift)
12047 return 0;
12048 }
12049
12050 if (elementwidth)
12051 *elementwidth = innersize * 8;
12052
12053 if (modconst)
12054 *modconst = CONST_VECTOR_ELT (op, 0);
12055
12056 return 1;
12057 }
12058
12059 /* Return a string suitable for output of Neon immediate logic operation
12060 MNEM. */
12061
12062 char *
12063 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12064 int inverse, int quad)
12065 {
12066 int width, is_valid;
12067 static char templ[40];
12068
12069 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12070
12071 gcc_assert (is_valid != 0);
12072
12073 if (quad)
12074 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12075 else
12076 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12077
12078 return templ;
12079 }
12080
12081 /* Return a string suitable for output of Neon immediate shift operation
12082 (VSHR or VSHL) MNEM. */
12083
12084 char *
12085 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12086 machine_mode mode, int quad,
12087 bool isleftshift)
12088 {
12089 int width, is_valid;
12090 static char templ[40];
12091
12092 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12093 gcc_assert (is_valid != 0);
12094
12095 if (quad)
12096 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12097 else
12098 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12099
12100 return templ;
12101 }
12102
12103 /* Output a sequence of pairwise operations to implement a reduction.
12104 NOTE: We do "too much work" here, because pairwise operations work on two
12105 registers-worth of operands in one go. Unfortunately we can't exploit those
12106 extra calculations to do the full operation in fewer steps, I don't think.
12107 Although all vector elements of the result but the first are ignored, we
12108 actually calculate the same result in each of the elements. An alternative
12109 such as initially loading a vector with zero to use as each of the second
12110 operands would use up an additional register and take an extra instruction,
12111 for no particular gain. */
12112
12113 void
12114 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12115 rtx (*reduc) (rtx, rtx, rtx))
12116 {
12117 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12118 rtx tmpsum = op1;
12119
12120 for (i = parts / 2; i >= 1; i /= 2)
12121 {
12122 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12123 emit_insn (reduc (dest, tmpsum, tmpsum));
12124 tmpsum = dest;
12125 }
12126 }
12127
12128 /* If VALS is a vector constant that can be loaded into a register
12129 using VDUP, generate instructions to do so and return an RTX to
12130 assign to the register. Otherwise return NULL_RTX. */
12131
12132 static rtx
12133 neon_vdup_constant (rtx vals)
12134 {
12135 machine_mode mode = GET_MODE (vals);
12136 machine_mode inner_mode = GET_MODE_INNER (mode);
12137 rtx x;
12138
12139 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12140 return NULL_RTX;
12141
12142 if (!const_vec_duplicate_p (vals, &x))
12143 /* The elements are not all the same. We could handle repeating
12144 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12145 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12146 vdup.i16). */
12147 return NULL_RTX;
12148
12149 /* We can load this constant by using VDUP and a constant in a
12150 single ARM register. This will be cheaper than a vector
12151 load. */
12152
12153 x = copy_to_mode_reg (inner_mode, x);
12154 return gen_rtx_VEC_DUPLICATE (mode, x);
12155 }
12156
12157 /* Generate code to load VALS, which is a PARALLEL containing only
12158 constants (for vec_init) or CONST_VECTOR, efficiently into a
12159 register. Returns an RTX to copy into the register, or NULL_RTX
12160 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12161
12162 rtx
12163 neon_make_constant (rtx vals)
12164 {
12165 machine_mode mode = GET_MODE (vals);
12166 rtx target;
12167 rtx const_vec = NULL_RTX;
12168 int n_elts = GET_MODE_NUNITS (mode);
12169 int n_const = 0;
12170 int i;
12171
12172 if (GET_CODE (vals) == CONST_VECTOR)
12173 const_vec = vals;
12174 else if (GET_CODE (vals) == PARALLEL)
12175 {
12176 /* A CONST_VECTOR must contain only CONST_INTs and
12177 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12178 Only store valid constants in a CONST_VECTOR. */
12179 for (i = 0; i < n_elts; ++i)
12180 {
12181 rtx x = XVECEXP (vals, 0, i);
12182 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12183 n_const++;
12184 }
12185 if (n_const == n_elts)
12186 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12187 }
12188 else
12189 gcc_unreachable ();
12190
12191 if (const_vec != NULL
12192 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12193 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12194 return const_vec;
12195 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12196 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12197 pipeline cycle; creating the constant takes one or two ARM
12198 pipeline cycles. */
12199 return target;
12200 else if (const_vec != NULL_RTX)
12201 /* Load from constant pool. On Cortex-A8 this takes two cycles
12202 (for either double or quad vectors). We can not take advantage
12203 of single-cycle VLD1 because we need a PC-relative addressing
12204 mode. */
12205 return const_vec;
12206 else
12207 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12208 We can not construct an initializer. */
12209 return NULL_RTX;
12210 }
12211
12212 /* Initialize vector TARGET to VALS. */
12213
12214 void
12215 neon_expand_vector_init (rtx target, rtx vals)
12216 {
12217 machine_mode mode = GET_MODE (target);
12218 machine_mode inner_mode = GET_MODE_INNER (mode);
12219 int n_elts = GET_MODE_NUNITS (mode);
12220 int n_var = 0, one_var = -1;
12221 bool all_same = true;
12222 rtx x, mem;
12223 int i;
12224
12225 for (i = 0; i < n_elts; ++i)
12226 {
12227 x = XVECEXP (vals, 0, i);
12228 if (!CONSTANT_P (x))
12229 ++n_var, one_var = i;
12230
12231 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12232 all_same = false;
12233 }
12234
12235 if (n_var == 0)
12236 {
12237 rtx constant = neon_make_constant (vals);
12238 if (constant != NULL_RTX)
12239 {
12240 emit_move_insn (target, constant);
12241 return;
12242 }
12243 }
12244
12245 /* Splat a single non-constant element if we can. */
12246 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12247 {
12248 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12249 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12250 return;
12251 }
12252
12253 /* One field is non-constant. Load constant then overwrite varying
12254 field. This is more efficient than using the stack. */
12255 if (n_var == 1)
12256 {
12257 rtx copy = copy_rtx (vals);
12258 rtx index = GEN_INT (one_var);
12259
12260 /* Load constant part of vector, substitute neighboring value for
12261 varying element. */
12262 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12263 neon_expand_vector_init (target, copy);
12264
12265 /* Insert variable. */
12266 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12267 switch (mode)
12268 {
12269 case E_V8QImode:
12270 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12271 break;
12272 case E_V16QImode:
12273 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12274 break;
12275 case E_V4HImode:
12276 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12277 break;
12278 case E_V8HImode:
12279 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12280 break;
12281 case E_V2SImode:
12282 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12283 break;
12284 case E_V4SImode:
12285 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12286 break;
12287 case E_V2SFmode:
12288 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12289 break;
12290 case E_V4SFmode:
12291 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12292 break;
12293 case E_V2DImode:
12294 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12295 break;
12296 default:
12297 gcc_unreachable ();
12298 }
12299 return;
12300 }
12301
12302 /* Construct the vector in memory one field at a time
12303 and load the whole vector. */
12304 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12305 for (i = 0; i < n_elts; i++)
12306 emit_move_insn (adjust_address_nv (mem, inner_mode,
12307 i * GET_MODE_SIZE (inner_mode)),
12308 XVECEXP (vals, 0, i));
12309 emit_move_insn (target, mem);
12310 }
12311
12312 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12313 ERR if it doesn't. EXP indicates the source location, which includes the
12314 inlining history for intrinsics. */
12315
12316 static void
12317 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12318 const_tree exp, const char *desc)
12319 {
12320 HOST_WIDE_INT lane;
12321
12322 gcc_assert (CONST_INT_P (operand));
12323
12324 lane = INTVAL (operand);
12325
12326 if (lane < low || lane >= high)
12327 {
12328 if (exp)
12329 error ("%K%s %wd out of range %wd - %wd",
12330 exp, desc, lane, low, high - 1);
12331 else
12332 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12333 }
12334 }
12335
12336 /* Bounds-check lanes. */
12337
12338 void
12339 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12340 const_tree exp)
12341 {
12342 bounds_check (operand, low, high, exp, "lane");
12343 }
12344
12345 /* Bounds-check constants. */
12346
12347 void
12348 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12349 {
12350 bounds_check (operand, low, high, NULL_TREE, "constant");
12351 }
12352
12353 HOST_WIDE_INT
12354 neon_element_bits (machine_mode mode)
12355 {
12356 return GET_MODE_UNIT_BITSIZE (mode);
12357 }
12358
12359 \f
12360 /* Predicates for `match_operand' and `match_operator'. */
12361
12362 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12363 WB is true if full writeback address modes are allowed and is false
12364 if limited writeback address modes (POST_INC and PRE_DEC) are
12365 allowed. */
12366
12367 int
12368 arm_coproc_mem_operand (rtx op, bool wb)
12369 {
12370 rtx ind;
12371
12372 /* Reject eliminable registers. */
12373 if (! (reload_in_progress || reload_completed || lra_in_progress)
12374 && ( reg_mentioned_p (frame_pointer_rtx, op)
12375 || reg_mentioned_p (arg_pointer_rtx, op)
12376 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12377 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12378 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12379 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12380 return FALSE;
12381
12382 /* Constants are converted into offsets from labels. */
12383 if (!MEM_P (op))
12384 return FALSE;
12385
12386 ind = XEXP (op, 0);
12387
12388 if (reload_completed
12389 && (GET_CODE (ind) == LABEL_REF
12390 || (GET_CODE (ind) == CONST
12391 && GET_CODE (XEXP (ind, 0)) == PLUS
12392 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12393 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12394 return TRUE;
12395
12396 /* Match: (mem (reg)). */
12397 if (REG_P (ind))
12398 return arm_address_register_rtx_p (ind, 0);
12399
12400 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12401 acceptable in any case (subject to verification by
12402 arm_address_register_rtx_p). We need WB to be true to accept
12403 PRE_INC and POST_DEC. */
12404 if (GET_CODE (ind) == POST_INC
12405 || GET_CODE (ind) == PRE_DEC
12406 || (wb
12407 && (GET_CODE (ind) == PRE_INC
12408 || GET_CODE (ind) == POST_DEC)))
12409 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12410
12411 if (wb
12412 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12413 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12414 && GET_CODE (XEXP (ind, 1)) == PLUS
12415 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12416 ind = XEXP (ind, 1);
12417
12418 /* Match:
12419 (plus (reg)
12420 (const)). */
12421 if (GET_CODE (ind) == PLUS
12422 && REG_P (XEXP (ind, 0))
12423 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12424 && CONST_INT_P (XEXP (ind, 1))
12425 && INTVAL (XEXP (ind, 1)) > -1024
12426 && INTVAL (XEXP (ind, 1)) < 1024
12427 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12428 return TRUE;
12429
12430 return FALSE;
12431 }
12432
12433 /* Return TRUE if OP is a memory operand which we can load or store a vector
12434 to/from. TYPE is one of the following values:
12435 0 - Vector load/stor (vldr)
12436 1 - Core registers (ldm)
12437 2 - Element/structure loads (vld1)
12438 */
12439 int
12440 neon_vector_mem_operand (rtx op, int type, bool strict)
12441 {
12442 rtx ind;
12443
12444 /* Reject eliminable registers. */
12445 if (strict && ! (reload_in_progress || reload_completed)
12446 && (reg_mentioned_p (frame_pointer_rtx, op)
12447 || reg_mentioned_p (arg_pointer_rtx, op)
12448 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12449 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12450 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12451 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12452 return FALSE;
12453
12454 /* Constants are converted into offsets from labels. */
12455 if (!MEM_P (op))
12456 return FALSE;
12457
12458 ind = XEXP (op, 0);
12459
12460 if (reload_completed
12461 && (GET_CODE (ind) == LABEL_REF
12462 || (GET_CODE (ind) == CONST
12463 && GET_CODE (XEXP (ind, 0)) == PLUS
12464 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12465 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12466 return TRUE;
12467
12468 /* Match: (mem (reg)). */
12469 if (REG_P (ind))
12470 return arm_address_register_rtx_p (ind, 0);
12471
12472 /* Allow post-increment with Neon registers. */
12473 if ((type != 1 && GET_CODE (ind) == POST_INC)
12474 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12475 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12476
12477 /* Allow post-increment by register for VLDn */
12478 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12479 && GET_CODE (XEXP (ind, 1)) == PLUS
12480 && REG_P (XEXP (XEXP (ind, 1), 1)))
12481 return true;
12482
12483 /* Match:
12484 (plus (reg)
12485 (const)). */
12486 if (type == 0
12487 && GET_CODE (ind) == PLUS
12488 && REG_P (XEXP (ind, 0))
12489 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12490 && CONST_INT_P (XEXP (ind, 1))
12491 && INTVAL (XEXP (ind, 1)) > -1024
12492 /* For quad modes, we restrict the constant offset to be slightly less
12493 than what the instruction format permits. We have no such constraint
12494 on double mode offsets. (This must match arm_legitimate_index_p.) */
12495 && (INTVAL (XEXP (ind, 1))
12496 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12497 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12498 return TRUE;
12499
12500 return FALSE;
12501 }
12502
12503 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12504 type. */
12505 int
12506 neon_struct_mem_operand (rtx op)
12507 {
12508 rtx ind;
12509
12510 /* Reject eliminable registers. */
12511 if (! (reload_in_progress || reload_completed)
12512 && ( reg_mentioned_p (frame_pointer_rtx, op)
12513 || reg_mentioned_p (arg_pointer_rtx, op)
12514 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12515 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12516 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12517 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12518 return FALSE;
12519
12520 /* Constants are converted into offsets from labels. */
12521 if (!MEM_P (op))
12522 return FALSE;
12523
12524 ind = XEXP (op, 0);
12525
12526 if (reload_completed
12527 && (GET_CODE (ind) == LABEL_REF
12528 || (GET_CODE (ind) == CONST
12529 && GET_CODE (XEXP (ind, 0)) == PLUS
12530 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12531 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12532 return TRUE;
12533
12534 /* Match: (mem (reg)). */
12535 if (REG_P (ind))
12536 return arm_address_register_rtx_p (ind, 0);
12537
12538 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12539 if (GET_CODE (ind) == POST_INC
12540 || GET_CODE (ind) == PRE_DEC)
12541 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12542
12543 return FALSE;
12544 }
12545
12546 /* Return true if X is a register that will be eliminated later on. */
12547 int
12548 arm_eliminable_register (rtx x)
12549 {
12550 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12551 || REGNO (x) == ARG_POINTER_REGNUM
12552 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12553 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12554 }
12555
12556 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12557 coprocessor registers. Otherwise return NO_REGS. */
12558
12559 enum reg_class
12560 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12561 {
12562 if (mode == HFmode)
12563 {
12564 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12565 return GENERAL_REGS;
12566 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12567 return NO_REGS;
12568 return GENERAL_REGS;
12569 }
12570
12571 /* The neon move patterns handle all legitimate vector and struct
12572 addresses. */
12573 if (TARGET_NEON
12574 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12575 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12576 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12577 || VALID_NEON_STRUCT_MODE (mode)))
12578 return NO_REGS;
12579
12580 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12581 return NO_REGS;
12582
12583 return GENERAL_REGS;
12584 }
12585
12586 /* Values which must be returned in the most-significant end of the return
12587 register. */
12588
12589 static bool
12590 arm_return_in_msb (const_tree valtype)
12591 {
12592 return (TARGET_AAPCS_BASED
12593 && BYTES_BIG_ENDIAN
12594 && (AGGREGATE_TYPE_P (valtype)
12595 || TREE_CODE (valtype) == COMPLEX_TYPE
12596 || FIXED_POINT_TYPE_P (valtype)));
12597 }
12598
12599 /* Return TRUE if X references a SYMBOL_REF. */
12600 int
12601 symbol_mentioned_p (rtx x)
12602 {
12603 const char * fmt;
12604 int i;
12605
12606 if (GET_CODE (x) == SYMBOL_REF)
12607 return 1;
12608
12609 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12610 are constant offsets, not symbols. */
12611 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12612 return 0;
12613
12614 fmt = GET_RTX_FORMAT (GET_CODE (x));
12615
12616 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12617 {
12618 if (fmt[i] == 'E')
12619 {
12620 int j;
12621
12622 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12623 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12624 return 1;
12625 }
12626 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12627 return 1;
12628 }
12629
12630 return 0;
12631 }
12632
12633 /* Return TRUE if X references a LABEL_REF. */
12634 int
12635 label_mentioned_p (rtx x)
12636 {
12637 const char * fmt;
12638 int i;
12639
12640 if (GET_CODE (x) == LABEL_REF)
12641 return 1;
12642
12643 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12644 instruction, but they are constant offsets, not symbols. */
12645 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12646 return 0;
12647
12648 fmt = GET_RTX_FORMAT (GET_CODE (x));
12649 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12650 {
12651 if (fmt[i] == 'E')
12652 {
12653 int j;
12654
12655 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12656 if (label_mentioned_p (XVECEXP (x, i, j)))
12657 return 1;
12658 }
12659 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12660 return 1;
12661 }
12662
12663 return 0;
12664 }
12665
12666 int
12667 tls_mentioned_p (rtx x)
12668 {
12669 switch (GET_CODE (x))
12670 {
12671 case CONST:
12672 return tls_mentioned_p (XEXP (x, 0));
12673
12674 case UNSPEC:
12675 if (XINT (x, 1) == UNSPEC_TLS)
12676 return 1;
12677
12678 /* Fall through. */
12679 default:
12680 return 0;
12681 }
12682 }
12683
12684 /* Must not copy any rtx that uses a pc-relative address.
12685 Also, disallow copying of load-exclusive instructions that
12686 may appear after splitting of compare-and-swap-style operations
12687 so as to prevent those loops from being transformed away from their
12688 canonical forms (see PR 69904). */
12689
12690 static bool
12691 arm_cannot_copy_insn_p (rtx_insn *insn)
12692 {
12693 /* The tls call insn cannot be copied, as it is paired with a data
12694 word. */
12695 if (recog_memoized (insn) == CODE_FOR_tlscall)
12696 return true;
12697
12698 subrtx_iterator::array_type array;
12699 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12700 {
12701 const_rtx x = *iter;
12702 if (GET_CODE (x) == UNSPEC
12703 && (XINT (x, 1) == UNSPEC_PIC_BASE
12704 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12705 return true;
12706 }
12707
12708 rtx set = single_set (insn);
12709 if (set)
12710 {
12711 rtx src = SET_SRC (set);
12712 if (GET_CODE (src) == ZERO_EXTEND)
12713 src = XEXP (src, 0);
12714
12715 /* Catch the load-exclusive and load-acquire operations. */
12716 if (GET_CODE (src) == UNSPEC_VOLATILE
12717 && (XINT (src, 1) == VUNSPEC_LL
12718 || XINT (src, 1) == VUNSPEC_LAX))
12719 return true;
12720 }
12721 return false;
12722 }
12723
12724 enum rtx_code
12725 minmax_code (rtx x)
12726 {
12727 enum rtx_code code = GET_CODE (x);
12728
12729 switch (code)
12730 {
12731 case SMAX:
12732 return GE;
12733 case SMIN:
12734 return LE;
12735 case UMIN:
12736 return LEU;
12737 case UMAX:
12738 return GEU;
12739 default:
12740 gcc_unreachable ();
12741 }
12742 }
12743
12744 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12745
12746 bool
12747 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12748 int *mask, bool *signed_sat)
12749 {
12750 /* The high bound must be a power of two minus one. */
12751 int log = exact_log2 (INTVAL (hi_bound) + 1);
12752 if (log == -1)
12753 return false;
12754
12755 /* The low bound is either zero (for usat) or one less than the
12756 negation of the high bound (for ssat). */
12757 if (INTVAL (lo_bound) == 0)
12758 {
12759 if (mask)
12760 *mask = log;
12761 if (signed_sat)
12762 *signed_sat = false;
12763
12764 return true;
12765 }
12766
12767 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12768 {
12769 if (mask)
12770 *mask = log + 1;
12771 if (signed_sat)
12772 *signed_sat = true;
12773
12774 return true;
12775 }
12776
12777 return false;
12778 }
12779
12780 /* Return 1 if memory locations are adjacent. */
12781 int
12782 adjacent_mem_locations (rtx a, rtx b)
12783 {
12784 /* We don't guarantee to preserve the order of these memory refs. */
12785 if (volatile_refs_p (a) || volatile_refs_p (b))
12786 return 0;
12787
12788 if ((REG_P (XEXP (a, 0))
12789 || (GET_CODE (XEXP (a, 0)) == PLUS
12790 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12791 && (REG_P (XEXP (b, 0))
12792 || (GET_CODE (XEXP (b, 0)) == PLUS
12793 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12794 {
12795 HOST_WIDE_INT val0 = 0, val1 = 0;
12796 rtx reg0, reg1;
12797 int val_diff;
12798
12799 if (GET_CODE (XEXP (a, 0)) == PLUS)
12800 {
12801 reg0 = XEXP (XEXP (a, 0), 0);
12802 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12803 }
12804 else
12805 reg0 = XEXP (a, 0);
12806
12807 if (GET_CODE (XEXP (b, 0)) == PLUS)
12808 {
12809 reg1 = XEXP (XEXP (b, 0), 0);
12810 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12811 }
12812 else
12813 reg1 = XEXP (b, 0);
12814
12815 /* Don't accept any offset that will require multiple
12816 instructions to handle, since this would cause the
12817 arith_adjacentmem pattern to output an overlong sequence. */
12818 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12819 return 0;
12820
12821 /* Don't allow an eliminable register: register elimination can make
12822 the offset too large. */
12823 if (arm_eliminable_register (reg0))
12824 return 0;
12825
12826 val_diff = val1 - val0;
12827
12828 if (arm_ld_sched)
12829 {
12830 /* If the target has load delay slots, then there's no benefit
12831 to using an ldm instruction unless the offset is zero and
12832 we are optimizing for size. */
12833 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12834 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12835 && (val_diff == 4 || val_diff == -4));
12836 }
12837
12838 return ((REGNO (reg0) == REGNO (reg1))
12839 && (val_diff == 4 || val_diff == -4));
12840 }
12841
12842 return 0;
12843 }
12844
12845 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12846 for load operations, false for store operations. CONSECUTIVE is true
12847 if the register numbers in the operation must be consecutive in the register
12848 bank. RETURN_PC is true if value is to be loaded in PC.
12849 The pattern we are trying to match for load is:
12850 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12851 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12852 :
12853 :
12854 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12855 ]
12856 where
12857 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12858 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12859 3. If consecutive is TRUE, then for kth register being loaded,
12860 REGNO (R_dk) = REGNO (R_d0) + k.
12861 The pattern for store is similar. */
12862 bool
12863 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12864 bool consecutive, bool return_pc)
12865 {
12866 HOST_WIDE_INT count = XVECLEN (op, 0);
12867 rtx reg, mem, addr;
12868 unsigned regno;
12869 unsigned first_regno;
12870 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12871 rtx elt;
12872 bool addr_reg_in_reglist = false;
12873 bool update = false;
12874 int reg_increment;
12875 int offset_adj;
12876 int regs_per_val;
12877
12878 /* If not in SImode, then registers must be consecutive
12879 (e.g., VLDM instructions for DFmode). */
12880 gcc_assert ((mode == SImode) || consecutive);
12881 /* Setting return_pc for stores is illegal. */
12882 gcc_assert (!return_pc || load);
12883
12884 /* Set up the increments and the regs per val based on the mode. */
12885 reg_increment = GET_MODE_SIZE (mode);
12886 regs_per_val = reg_increment / 4;
12887 offset_adj = return_pc ? 1 : 0;
12888
12889 if (count <= 1
12890 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12891 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12892 return false;
12893
12894 /* Check if this is a write-back. */
12895 elt = XVECEXP (op, 0, offset_adj);
12896 if (GET_CODE (SET_SRC (elt)) == PLUS)
12897 {
12898 i++;
12899 base = 1;
12900 update = true;
12901
12902 /* The offset adjustment must be the number of registers being
12903 popped times the size of a single register. */
12904 if (!REG_P (SET_DEST (elt))
12905 || !REG_P (XEXP (SET_SRC (elt), 0))
12906 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12907 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12908 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12909 ((count - 1 - offset_adj) * reg_increment))
12910 return false;
12911 }
12912
12913 i = i + offset_adj;
12914 base = base + offset_adj;
12915 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12916 success depends on the type: VLDM can do just one reg,
12917 LDM must do at least two. */
12918 if ((count <= i) && (mode == SImode))
12919 return false;
12920
12921 elt = XVECEXP (op, 0, i - 1);
12922 if (GET_CODE (elt) != SET)
12923 return false;
12924
12925 if (load)
12926 {
12927 reg = SET_DEST (elt);
12928 mem = SET_SRC (elt);
12929 }
12930 else
12931 {
12932 reg = SET_SRC (elt);
12933 mem = SET_DEST (elt);
12934 }
12935
12936 if (!REG_P (reg) || !MEM_P (mem))
12937 return false;
12938
12939 regno = REGNO (reg);
12940 first_regno = regno;
12941 addr = XEXP (mem, 0);
12942 if (GET_CODE (addr) == PLUS)
12943 {
12944 if (!CONST_INT_P (XEXP (addr, 1)))
12945 return false;
12946
12947 offset = INTVAL (XEXP (addr, 1));
12948 addr = XEXP (addr, 0);
12949 }
12950
12951 if (!REG_P (addr))
12952 return false;
12953
12954 /* Don't allow SP to be loaded unless it is also the base register. It
12955 guarantees that SP is reset correctly when an LDM instruction
12956 is interrupted. Otherwise, we might end up with a corrupt stack. */
12957 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12958 return false;
12959
12960 for (; i < count; i++)
12961 {
12962 elt = XVECEXP (op, 0, i);
12963 if (GET_CODE (elt) != SET)
12964 return false;
12965
12966 if (load)
12967 {
12968 reg = SET_DEST (elt);
12969 mem = SET_SRC (elt);
12970 }
12971 else
12972 {
12973 reg = SET_SRC (elt);
12974 mem = SET_DEST (elt);
12975 }
12976
12977 if (!REG_P (reg)
12978 || GET_MODE (reg) != mode
12979 || REGNO (reg) <= regno
12980 || (consecutive
12981 && (REGNO (reg) !=
12982 (unsigned int) (first_regno + regs_per_val * (i - base))))
12983 /* Don't allow SP to be loaded unless it is also the base register. It
12984 guarantees that SP is reset correctly when an LDM instruction
12985 is interrupted. Otherwise, we might end up with a corrupt stack. */
12986 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12987 || !MEM_P (mem)
12988 || GET_MODE (mem) != mode
12989 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12990 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12991 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12992 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12993 offset + (i - base) * reg_increment))
12994 && (!REG_P (XEXP (mem, 0))
12995 || offset + (i - base) * reg_increment != 0)))
12996 return false;
12997
12998 regno = REGNO (reg);
12999 if (regno == REGNO (addr))
13000 addr_reg_in_reglist = true;
13001 }
13002
13003 if (load)
13004 {
13005 if (update && addr_reg_in_reglist)
13006 return false;
13007
13008 /* For Thumb-1, address register is always modified - either by write-back
13009 or by explicit load. If the pattern does not describe an update,
13010 then the address register must be in the list of loaded registers. */
13011 if (TARGET_THUMB1)
13012 return update || addr_reg_in_reglist;
13013 }
13014
13015 return true;
13016 }
13017
13018 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13019 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13020 instruction. ADD_OFFSET is nonzero if the base address register needs
13021 to be modified with an add instruction before we can use it. */
13022
13023 static bool
13024 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13025 int nops, HOST_WIDE_INT add_offset)
13026 {
13027 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13028 if the offset isn't small enough. The reason 2 ldrs are faster
13029 is because these ARMs are able to do more than one cache access
13030 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13031 whilst the ARM8 has a double bandwidth cache. This means that
13032 these cores can do both an instruction fetch and a data fetch in
13033 a single cycle, so the trick of calculating the address into a
13034 scratch register (one of the result regs) and then doing a load
13035 multiple actually becomes slower (and no smaller in code size).
13036 That is the transformation
13037
13038 ldr rd1, [rbase + offset]
13039 ldr rd2, [rbase + offset + 4]
13040
13041 to
13042
13043 add rd1, rbase, offset
13044 ldmia rd1, {rd1, rd2}
13045
13046 produces worse code -- '3 cycles + any stalls on rd2' instead of
13047 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13048 access per cycle, the first sequence could never complete in less
13049 than 6 cycles, whereas the ldm sequence would only take 5 and
13050 would make better use of sequential accesses if not hitting the
13051 cache.
13052
13053 We cheat here and test 'arm_ld_sched' which we currently know to
13054 only be true for the ARM8, ARM9 and StrongARM. If this ever
13055 changes, then the test below needs to be reworked. */
13056 if (nops == 2 && arm_ld_sched && add_offset != 0)
13057 return false;
13058
13059 /* XScale has load-store double instructions, but they have stricter
13060 alignment requirements than load-store multiple, so we cannot
13061 use them.
13062
13063 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13064 the pipeline until completion.
13065
13066 NREGS CYCLES
13067 1 3
13068 2 4
13069 3 5
13070 4 6
13071
13072 An ldr instruction takes 1-3 cycles, but does not block the
13073 pipeline.
13074
13075 NREGS CYCLES
13076 1 1-3
13077 2 2-6
13078 3 3-9
13079 4 4-12
13080
13081 Best case ldr will always win. However, the more ldr instructions
13082 we issue, the less likely we are to be able to schedule them well.
13083 Using ldr instructions also increases code size.
13084
13085 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13086 for counts of 3 or 4 regs. */
13087 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13088 return false;
13089 return true;
13090 }
13091
13092 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13093 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13094 an array ORDER which describes the sequence to use when accessing the
13095 offsets that produces an ascending order. In this sequence, each
13096 offset must be larger by exactly 4 than the previous one. ORDER[0]
13097 must have been filled in with the lowest offset by the caller.
13098 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13099 we use to verify that ORDER produces an ascending order of registers.
13100 Return true if it was possible to construct such an order, false if
13101 not. */
13102
13103 static bool
13104 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13105 int *unsorted_regs)
13106 {
13107 int i;
13108 for (i = 1; i < nops; i++)
13109 {
13110 int j;
13111
13112 order[i] = order[i - 1];
13113 for (j = 0; j < nops; j++)
13114 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13115 {
13116 /* We must find exactly one offset that is higher than the
13117 previous one by 4. */
13118 if (order[i] != order[i - 1])
13119 return false;
13120 order[i] = j;
13121 }
13122 if (order[i] == order[i - 1])
13123 return false;
13124 /* The register numbers must be ascending. */
13125 if (unsorted_regs != NULL
13126 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13127 return false;
13128 }
13129 return true;
13130 }
13131
13132 /* Used to determine in a peephole whether a sequence of load
13133 instructions can be changed into a load-multiple instruction.
13134 NOPS is the number of separate load instructions we are examining. The
13135 first NOPS entries in OPERANDS are the destination registers, the
13136 next NOPS entries are memory operands. If this function is
13137 successful, *BASE is set to the common base register of the memory
13138 accesses; *LOAD_OFFSET is set to the first memory location's offset
13139 from that base register.
13140 REGS is an array filled in with the destination register numbers.
13141 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13142 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13143 the sequence of registers in REGS matches the loads from ascending memory
13144 locations, and the function verifies that the register numbers are
13145 themselves ascending. If CHECK_REGS is false, the register numbers
13146 are stored in the order they are found in the operands. */
13147 static int
13148 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13149 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13150 {
13151 int unsorted_regs[MAX_LDM_STM_OPS];
13152 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13153 int order[MAX_LDM_STM_OPS];
13154 rtx base_reg_rtx = NULL;
13155 int base_reg = -1;
13156 int i, ldm_case;
13157
13158 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13159 easily extended if required. */
13160 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13161
13162 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13163
13164 /* Loop over the operands and check that the memory references are
13165 suitable (i.e. immediate offsets from the same base register). At
13166 the same time, extract the target register, and the memory
13167 offsets. */
13168 for (i = 0; i < nops; i++)
13169 {
13170 rtx reg;
13171 rtx offset;
13172
13173 /* Convert a subreg of a mem into the mem itself. */
13174 if (GET_CODE (operands[nops + i]) == SUBREG)
13175 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13176
13177 gcc_assert (MEM_P (operands[nops + i]));
13178
13179 /* Don't reorder volatile memory references; it doesn't seem worth
13180 looking for the case where the order is ok anyway. */
13181 if (MEM_VOLATILE_P (operands[nops + i]))
13182 return 0;
13183
13184 offset = const0_rtx;
13185
13186 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13187 || (GET_CODE (reg) == SUBREG
13188 && REG_P (reg = SUBREG_REG (reg))))
13189 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13190 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13191 || (GET_CODE (reg) == SUBREG
13192 && REG_P (reg = SUBREG_REG (reg))))
13193 && (CONST_INT_P (offset
13194 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13195 {
13196 if (i == 0)
13197 {
13198 base_reg = REGNO (reg);
13199 base_reg_rtx = reg;
13200 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13201 return 0;
13202 }
13203 else if (base_reg != (int) REGNO (reg))
13204 /* Not addressed from the same base register. */
13205 return 0;
13206
13207 unsorted_regs[i] = (REG_P (operands[i])
13208 ? REGNO (operands[i])
13209 : REGNO (SUBREG_REG (operands[i])));
13210
13211 /* If it isn't an integer register, or if it overwrites the
13212 base register but isn't the last insn in the list, then
13213 we can't do this. */
13214 if (unsorted_regs[i] < 0
13215 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13216 || unsorted_regs[i] > 14
13217 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13218 return 0;
13219
13220 /* Don't allow SP to be loaded unless it is also the base
13221 register. It guarantees that SP is reset correctly when
13222 an LDM instruction is interrupted. Otherwise, we might
13223 end up with a corrupt stack. */
13224 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13225 return 0;
13226
13227 unsorted_offsets[i] = INTVAL (offset);
13228 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13229 order[0] = i;
13230 }
13231 else
13232 /* Not a suitable memory address. */
13233 return 0;
13234 }
13235
13236 /* All the useful information has now been extracted from the
13237 operands into unsorted_regs and unsorted_offsets; additionally,
13238 order[0] has been set to the lowest offset in the list. Sort
13239 the offsets into order, verifying that they are adjacent, and
13240 check that the register numbers are ascending. */
13241 if (!compute_offset_order (nops, unsorted_offsets, order,
13242 check_regs ? unsorted_regs : NULL))
13243 return 0;
13244
13245 if (saved_order)
13246 memcpy (saved_order, order, sizeof order);
13247
13248 if (base)
13249 {
13250 *base = base_reg;
13251
13252 for (i = 0; i < nops; i++)
13253 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13254
13255 *load_offset = unsorted_offsets[order[0]];
13256 }
13257
13258 if (TARGET_THUMB1
13259 && !peep2_reg_dead_p (nops, base_reg_rtx))
13260 return 0;
13261
13262 if (unsorted_offsets[order[0]] == 0)
13263 ldm_case = 1; /* ldmia */
13264 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13265 ldm_case = 2; /* ldmib */
13266 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13267 ldm_case = 3; /* ldmda */
13268 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13269 ldm_case = 4; /* ldmdb */
13270 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13271 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13272 ldm_case = 5;
13273 else
13274 return 0;
13275
13276 if (!multiple_operation_profitable_p (false, nops,
13277 ldm_case == 5
13278 ? unsorted_offsets[order[0]] : 0))
13279 return 0;
13280
13281 return ldm_case;
13282 }
13283
13284 /* Used to determine in a peephole whether a sequence of store instructions can
13285 be changed into a store-multiple instruction.
13286 NOPS is the number of separate store instructions we are examining.
13287 NOPS_TOTAL is the total number of instructions recognized by the peephole
13288 pattern.
13289 The first NOPS entries in OPERANDS are the source registers, the next
13290 NOPS entries are memory operands. If this function is successful, *BASE is
13291 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13292 to the first memory location's offset from that base register. REGS is an
13293 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13294 likewise filled with the corresponding rtx's.
13295 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13296 numbers to an ascending order of stores.
13297 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13298 from ascending memory locations, and the function verifies that the register
13299 numbers are themselves ascending. If CHECK_REGS is false, the register
13300 numbers are stored in the order they are found in the operands. */
13301 static int
13302 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13303 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13304 HOST_WIDE_INT *load_offset, bool check_regs)
13305 {
13306 int unsorted_regs[MAX_LDM_STM_OPS];
13307 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13308 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13309 int order[MAX_LDM_STM_OPS];
13310 int base_reg = -1;
13311 rtx base_reg_rtx = NULL;
13312 int i, stm_case;
13313
13314 /* Write back of base register is currently only supported for Thumb 1. */
13315 int base_writeback = TARGET_THUMB1;
13316
13317 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13318 easily extended if required. */
13319 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13320
13321 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13322
13323 /* Loop over the operands and check that the memory references are
13324 suitable (i.e. immediate offsets from the same base register). At
13325 the same time, extract the target register, and the memory
13326 offsets. */
13327 for (i = 0; i < nops; i++)
13328 {
13329 rtx reg;
13330 rtx offset;
13331
13332 /* Convert a subreg of a mem into the mem itself. */
13333 if (GET_CODE (operands[nops + i]) == SUBREG)
13334 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13335
13336 gcc_assert (MEM_P (operands[nops + i]));
13337
13338 /* Don't reorder volatile memory references; it doesn't seem worth
13339 looking for the case where the order is ok anyway. */
13340 if (MEM_VOLATILE_P (operands[nops + i]))
13341 return 0;
13342
13343 offset = const0_rtx;
13344
13345 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13346 || (GET_CODE (reg) == SUBREG
13347 && REG_P (reg = SUBREG_REG (reg))))
13348 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13349 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13350 || (GET_CODE (reg) == SUBREG
13351 && REG_P (reg = SUBREG_REG (reg))))
13352 && (CONST_INT_P (offset
13353 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13354 {
13355 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13356 ? operands[i] : SUBREG_REG (operands[i]));
13357 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13358
13359 if (i == 0)
13360 {
13361 base_reg = REGNO (reg);
13362 base_reg_rtx = reg;
13363 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13364 return 0;
13365 }
13366 else if (base_reg != (int) REGNO (reg))
13367 /* Not addressed from the same base register. */
13368 return 0;
13369
13370 /* If it isn't an integer register, then we can't do this. */
13371 if (unsorted_regs[i] < 0
13372 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13373 /* The effects are unpredictable if the base register is
13374 both updated and stored. */
13375 || (base_writeback && unsorted_regs[i] == base_reg)
13376 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13377 || unsorted_regs[i] > 14)
13378 return 0;
13379
13380 unsorted_offsets[i] = INTVAL (offset);
13381 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13382 order[0] = i;
13383 }
13384 else
13385 /* Not a suitable memory address. */
13386 return 0;
13387 }
13388
13389 /* All the useful information has now been extracted from the
13390 operands into unsorted_regs and unsorted_offsets; additionally,
13391 order[0] has been set to the lowest offset in the list. Sort
13392 the offsets into order, verifying that they are adjacent, and
13393 check that the register numbers are ascending. */
13394 if (!compute_offset_order (nops, unsorted_offsets, order,
13395 check_regs ? unsorted_regs : NULL))
13396 return 0;
13397
13398 if (saved_order)
13399 memcpy (saved_order, order, sizeof order);
13400
13401 if (base)
13402 {
13403 *base = base_reg;
13404
13405 for (i = 0; i < nops; i++)
13406 {
13407 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13408 if (reg_rtxs)
13409 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13410 }
13411
13412 *load_offset = unsorted_offsets[order[0]];
13413 }
13414
13415 if (TARGET_THUMB1
13416 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13417 return 0;
13418
13419 if (unsorted_offsets[order[0]] == 0)
13420 stm_case = 1; /* stmia */
13421 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13422 stm_case = 2; /* stmib */
13423 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13424 stm_case = 3; /* stmda */
13425 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13426 stm_case = 4; /* stmdb */
13427 else
13428 return 0;
13429
13430 if (!multiple_operation_profitable_p (false, nops, 0))
13431 return 0;
13432
13433 return stm_case;
13434 }
13435 \f
13436 /* Routines for use in generating RTL. */
13437
13438 /* Generate a load-multiple instruction. COUNT is the number of loads in
13439 the instruction; REGS and MEMS are arrays containing the operands.
13440 BASEREG is the base register to be used in addressing the memory operands.
13441 WBACK_OFFSET is nonzero if the instruction should update the base
13442 register. */
13443
13444 static rtx
13445 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13446 HOST_WIDE_INT wback_offset)
13447 {
13448 int i = 0, j;
13449 rtx result;
13450
13451 if (!multiple_operation_profitable_p (false, count, 0))
13452 {
13453 rtx seq;
13454
13455 start_sequence ();
13456
13457 for (i = 0; i < count; i++)
13458 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13459
13460 if (wback_offset != 0)
13461 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13462
13463 seq = get_insns ();
13464 end_sequence ();
13465
13466 return seq;
13467 }
13468
13469 result = gen_rtx_PARALLEL (VOIDmode,
13470 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13471 if (wback_offset != 0)
13472 {
13473 XVECEXP (result, 0, 0)
13474 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13475 i = 1;
13476 count++;
13477 }
13478
13479 for (j = 0; i < count; i++, j++)
13480 XVECEXP (result, 0, i)
13481 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13482
13483 return result;
13484 }
13485
13486 /* Generate a store-multiple instruction. COUNT is the number of stores in
13487 the instruction; REGS and MEMS are arrays containing the operands.
13488 BASEREG is the base register to be used in addressing the memory operands.
13489 WBACK_OFFSET is nonzero if the instruction should update the base
13490 register. */
13491
13492 static rtx
13493 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13494 HOST_WIDE_INT wback_offset)
13495 {
13496 int i = 0, j;
13497 rtx result;
13498
13499 if (GET_CODE (basereg) == PLUS)
13500 basereg = XEXP (basereg, 0);
13501
13502 if (!multiple_operation_profitable_p (false, count, 0))
13503 {
13504 rtx seq;
13505
13506 start_sequence ();
13507
13508 for (i = 0; i < count; i++)
13509 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13510
13511 if (wback_offset != 0)
13512 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13513
13514 seq = get_insns ();
13515 end_sequence ();
13516
13517 return seq;
13518 }
13519
13520 result = gen_rtx_PARALLEL (VOIDmode,
13521 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13522 if (wback_offset != 0)
13523 {
13524 XVECEXP (result, 0, 0)
13525 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13526 i = 1;
13527 count++;
13528 }
13529
13530 for (j = 0; i < count; i++, j++)
13531 XVECEXP (result, 0, i)
13532 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13533
13534 return result;
13535 }
13536
13537 /* Generate either a load-multiple or a store-multiple instruction. This
13538 function can be used in situations where we can start with a single MEM
13539 rtx and adjust its address upwards.
13540 COUNT is the number of operations in the instruction, not counting a
13541 possible update of the base register. REGS is an array containing the
13542 register operands.
13543 BASEREG is the base register to be used in addressing the memory operands,
13544 which are constructed from BASEMEM.
13545 WRITE_BACK specifies whether the generated instruction should include an
13546 update of the base register.
13547 OFFSETP is used to pass an offset to and from this function; this offset
13548 is not used when constructing the address (instead BASEMEM should have an
13549 appropriate offset in its address), it is used only for setting
13550 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13551
13552 static rtx
13553 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13554 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13555 {
13556 rtx mems[MAX_LDM_STM_OPS];
13557 HOST_WIDE_INT offset = *offsetp;
13558 int i;
13559
13560 gcc_assert (count <= MAX_LDM_STM_OPS);
13561
13562 if (GET_CODE (basereg) == PLUS)
13563 basereg = XEXP (basereg, 0);
13564
13565 for (i = 0; i < count; i++)
13566 {
13567 rtx addr = plus_constant (Pmode, basereg, i * 4);
13568 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13569 offset += 4;
13570 }
13571
13572 if (write_back)
13573 *offsetp = offset;
13574
13575 if (is_load)
13576 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13577 write_back ? 4 * count : 0);
13578 else
13579 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13580 write_back ? 4 * count : 0);
13581 }
13582
13583 rtx
13584 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13585 rtx basemem, HOST_WIDE_INT *offsetp)
13586 {
13587 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13588 offsetp);
13589 }
13590
13591 rtx
13592 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13593 rtx basemem, HOST_WIDE_INT *offsetp)
13594 {
13595 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13596 offsetp);
13597 }
13598
13599 /* Called from a peephole2 expander to turn a sequence of loads into an
13600 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13601 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13602 is true if we can reorder the registers because they are used commutatively
13603 subsequently.
13604 Returns true iff we could generate a new instruction. */
13605
13606 bool
13607 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13608 {
13609 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13610 rtx mems[MAX_LDM_STM_OPS];
13611 int i, j, base_reg;
13612 rtx base_reg_rtx;
13613 HOST_WIDE_INT offset;
13614 int write_back = FALSE;
13615 int ldm_case;
13616 rtx addr;
13617
13618 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13619 &base_reg, &offset, !sort_regs);
13620
13621 if (ldm_case == 0)
13622 return false;
13623
13624 if (sort_regs)
13625 for (i = 0; i < nops - 1; i++)
13626 for (j = i + 1; j < nops; j++)
13627 if (regs[i] > regs[j])
13628 {
13629 int t = regs[i];
13630 regs[i] = regs[j];
13631 regs[j] = t;
13632 }
13633 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13634
13635 if (TARGET_THUMB1)
13636 {
13637 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13638 gcc_assert (ldm_case == 1 || ldm_case == 5);
13639 write_back = TRUE;
13640 }
13641
13642 if (ldm_case == 5)
13643 {
13644 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13645 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13646 offset = 0;
13647 if (!TARGET_THUMB1)
13648 base_reg_rtx = newbase;
13649 }
13650
13651 for (i = 0; i < nops; i++)
13652 {
13653 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13654 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13655 SImode, addr, 0);
13656 }
13657 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13658 write_back ? offset + i * 4 : 0));
13659 return true;
13660 }
13661
13662 /* Called from a peephole2 expander to turn a sequence of stores into an
13663 STM instruction. OPERANDS are the operands found by the peephole matcher;
13664 NOPS indicates how many separate stores we are trying to combine.
13665 Returns true iff we could generate a new instruction. */
13666
13667 bool
13668 gen_stm_seq (rtx *operands, int nops)
13669 {
13670 int i;
13671 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13672 rtx mems[MAX_LDM_STM_OPS];
13673 int base_reg;
13674 rtx base_reg_rtx;
13675 HOST_WIDE_INT offset;
13676 int write_back = FALSE;
13677 int stm_case;
13678 rtx addr;
13679 bool base_reg_dies;
13680
13681 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13682 mem_order, &base_reg, &offset, true);
13683
13684 if (stm_case == 0)
13685 return false;
13686
13687 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13688
13689 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13690 if (TARGET_THUMB1)
13691 {
13692 gcc_assert (base_reg_dies);
13693 write_back = TRUE;
13694 }
13695
13696 if (stm_case == 5)
13697 {
13698 gcc_assert (base_reg_dies);
13699 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13700 offset = 0;
13701 }
13702
13703 addr = plus_constant (Pmode, base_reg_rtx, offset);
13704
13705 for (i = 0; i < nops; i++)
13706 {
13707 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13708 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13709 SImode, addr, 0);
13710 }
13711 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13712 write_back ? offset + i * 4 : 0));
13713 return true;
13714 }
13715
13716 /* Called from a peephole2 expander to turn a sequence of stores that are
13717 preceded by constant loads into an STM instruction. OPERANDS are the
13718 operands found by the peephole matcher; NOPS indicates how many
13719 separate stores we are trying to combine; there are 2 * NOPS
13720 instructions in the peephole.
13721 Returns true iff we could generate a new instruction. */
13722
13723 bool
13724 gen_const_stm_seq (rtx *operands, int nops)
13725 {
13726 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13727 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13728 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13729 rtx mems[MAX_LDM_STM_OPS];
13730 int base_reg;
13731 rtx base_reg_rtx;
13732 HOST_WIDE_INT offset;
13733 int write_back = FALSE;
13734 int stm_case;
13735 rtx addr;
13736 bool base_reg_dies;
13737 int i, j;
13738 HARD_REG_SET allocated;
13739
13740 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13741 mem_order, &base_reg, &offset, false);
13742
13743 if (stm_case == 0)
13744 return false;
13745
13746 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13747
13748 /* If the same register is used more than once, try to find a free
13749 register. */
13750 CLEAR_HARD_REG_SET (allocated);
13751 for (i = 0; i < nops; i++)
13752 {
13753 for (j = i + 1; j < nops; j++)
13754 if (regs[i] == regs[j])
13755 {
13756 rtx t = peep2_find_free_register (0, nops * 2,
13757 TARGET_THUMB1 ? "l" : "r",
13758 SImode, &allocated);
13759 if (t == NULL_RTX)
13760 return false;
13761 reg_rtxs[i] = t;
13762 regs[i] = REGNO (t);
13763 }
13764 }
13765
13766 /* Compute an ordering that maps the register numbers to an ascending
13767 sequence. */
13768 reg_order[0] = 0;
13769 for (i = 0; i < nops; i++)
13770 if (regs[i] < regs[reg_order[0]])
13771 reg_order[0] = i;
13772
13773 for (i = 1; i < nops; i++)
13774 {
13775 int this_order = reg_order[i - 1];
13776 for (j = 0; j < nops; j++)
13777 if (regs[j] > regs[reg_order[i - 1]]
13778 && (this_order == reg_order[i - 1]
13779 || regs[j] < regs[this_order]))
13780 this_order = j;
13781 reg_order[i] = this_order;
13782 }
13783
13784 /* Ensure that registers that must be live after the instruction end
13785 up with the correct value. */
13786 for (i = 0; i < nops; i++)
13787 {
13788 int this_order = reg_order[i];
13789 if ((this_order != mem_order[i]
13790 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13791 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13792 return false;
13793 }
13794
13795 /* Load the constants. */
13796 for (i = 0; i < nops; i++)
13797 {
13798 rtx op = operands[2 * nops + mem_order[i]];
13799 sorted_regs[i] = regs[reg_order[i]];
13800 emit_move_insn (reg_rtxs[reg_order[i]], op);
13801 }
13802
13803 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13804
13805 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13806 if (TARGET_THUMB1)
13807 {
13808 gcc_assert (base_reg_dies);
13809 write_back = TRUE;
13810 }
13811
13812 if (stm_case == 5)
13813 {
13814 gcc_assert (base_reg_dies);
13815 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13816 offset = 0;
13817 }
13818
13819 addr = plus_constant (Pmode, base_reg_rtx, offset);
13820
13821 for (i = 0; i < nops; i++)
13822 {
13823 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13824 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13825 SImode, addr, 0);
13826 }
13827 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13828 write_back ? offset + i * 4 : 0));
13829 return true;
13830 }
13831
13832 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13833 unaligned copies on processors which support unaligned semantics for those
13834 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13835 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13836 An interleave factor of 1 (the minimum) will perform no interleaving.
13837 Load/store multiple are used for aligned addresses where possible. */
13838
13839 static void
13840 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13841 HOST_WIDE_INT length,
13842 unsigned int interleave_factor)
13843 {
13844 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13845 int *regnos = XALLOCAVEC (int, interleave_factor);
13846 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13847 HOST_WIDE_INT i, j;
13848 HOST_WIDE_INT remaining = length, words;
13849 rtx halfword_tmp = NULL, byte_tmp = NULL;
13850 rtx dst, src;
13851 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13852 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13853 HOST_WIDE_INT srcoffset, dstoffset;
13854 HOST_WIDE_INT src_autoinc, dst_autoinc;
13855 rtx mem, addr;
13856
13857 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13858
13859 /* Use hard registers if we have aligned source or destination so we can use
13860 load/store multiple with contiguous registers. */
13861 if (dst_aligned || src_aligned)
13862 for (i = 0; i < interleave_factor; i++)
13863 regs[i] = gen_rtx_REG (SImode, i);
13864 else
13865 for (i = 0; i < interleave_factor; i++)
13866 regs[i] = gen_reg_rtx (SImode);
13867
13868 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13869 src = copy_addr_to_reg (XEXP (srcbase, 0));
13870
13871 srcoffset = dstoffset = 0;
13872
13873 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13874 For copying the last bytes we want to subtract this offset again. */
13875 src_autoinc = dst_autoinc = 0;
13876
13877 for (i = 0; i < interleave_factor; i++)
13878 regnos[i] = i;
13879
13880 /* Copy BLOCK_SIZE_BYTES chunks. */
13881
13882 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13883 {
13884 /* Load words. */
13885 if (src_aligned && interleave_factor > 1)
13886 {
13887 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13888 TRUE, srcbase, &srcoffset));
13889 src_autoinc += UNITS_PER_WORD * interleave_factor;
13890 }
13891 else
13892 {
13893 for (j = 0; j < interleave_factor; j++)
13894 {
13895 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13896 - src_autoinc));
13897 mem = adjust_automodify_address (srcbase, SImode, addr,
13898 srcoffset + j * UNITS_PER_WORD);
13899 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13900 }
13901 srcoffset += block_size_bytes;
13902 }
13903
13904 /* Store words. */
13905 if (dst_aligned && interleave_factor > 1)
13906 {
13907 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13908 TRUE, dstbase, &dstoffset));
13909 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13910 }
13911 else
13912 {
13913 for (j = 0; j < interleave_factor; j++)
13914 {
13915 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13916 - dst_autoinc));
13917 mem = adjust_automodify_address (dstbase, SImode, addr,
13918 dstoffset + j * UNITS_PER_WORD);
13919 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13920 }
13921 dstoffset += block_size_bytes;
13922 }
13923
13924 remaining -= block_size_bytes;
13925 }
13926
13927 /* Copy any whole words left (note these aren't interleaved with any
13928 subsequent halfword/byte load/stores in the interests of simplicity). */
13929
13930 words = remaining / UNITS_PER_WORD;
13931
13932 gcc_assert (words < interleave_factor);
13933
13934 if (src_aligned && words > 1)
13935 {
13936 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13937 &srcoffset));
13938 src_autoinc += UNITS_PER_WORD * words;
13939 }
13940 else
13941 {
13942 for (j = 0; j < words; j++)
13943 {
13944 addr = plus_constant (Pmode, src,
13945 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13946 mem = adjust_automodify_address (srcbase, SImode, addr,
13947 srcoffset + j * UNITS_PER_WORD);
13948 if (src_aligned)
13949 emit_move_insn (regs[j], mem);
13950 else
13951 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13952 }
13953 srcoffset += words * UNITS_PER_WORD;
13954 }
13955
13956 if (dst_aligned && words > 1)
13957 {
13958 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13959 &dstoffset));
13960 dst_autoinc += words * UNITS_PER_WORD;
13961 }
13962 else
13963 {
13964 for (j = 0; j < words; j++)
13965 {
13966 addr = plus_constant (Pmode, dst,
13967 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13968 mem = adjust_automodify_address (dstbase, SImode, addr,
13969 dstoffset + j * UNITS_PER_WORD);
13970 if (dst_aligned)
13971 emit_move_insn (mem, regs[j]);
13972 else
13973 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13974 }
13975 dstoffset += words * UNITS_PER_WORD;
13976 }
13977
13978 remaining -= words * UNITS_PER_WORD;
13979
13980 gcc_assert (remaining < 4);
13981
13982 /* Copy a halfword if necessary. */
13983
13984 if (remaining >= 2)
13985 {
13986 halfword_tmp = gen_reg_rtx (SImode);
13987
13988 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13989 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13990 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13991
13992 /* Either write out immediately, or delay until we've loaded the last
13993 byte, depending on interleave factor. */
13994 if (interleave_factor == 1)
13995 {
13996 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13997 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13998 emit_insn (gen_unaligned_storehi (mem,
13999 gen_lowpart (HImode, halfword_tmp)));
14000 halfword_tmp = NULL;
14001 dstoffset += 2;
14002 }
14003
14004 remaining -= 2;
14005 srcoffset += 2;
14006 }
14007
14008 gcc_assert (remaining < 2);
14009
14010 /* Copy last byte. */
14011
14012 if ((remaining & 1) != 0)
14013 {
14014 byte_tmp = gen_reg_rtx (SImode);
14015
14016 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14017 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14018 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14019
14020 if (interleave_factor == 1)
14021 {
14022 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14023 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14024 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14025 byte_tmp = NULL;
14026 dstoffset++;
14027 }
14028
14029 remaining--;
14030 srcoffset++;
14031 }
14032
14033 /* Store last halfword if we haven't done so already. */
14034
14035 if (halfword_tmp)
14036 {
14037 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14038 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14039 emit_insn (gen_unaligned_storehi (mem,
14040 gen_lowpart (HImode, halfword_tmp)));
14041 dstoffset += 2;
14042 }
14043
14044 /* Likewise for last byte. */
14045
14046 if (byte_tmp)
14047 {
14048 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14049 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14050 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14051 dstoffset++;
14052 }
14053
14054 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14055 }
14056
14057 /* From mips_adjust_block_mem:
14058
14059 Helper function for doing a loop-based block operation on memory
14060 reference MEM. Each iteration of the loop will operate on LENGTH
14061 bytes of MEM.
14062
14063 Create a new base register for use within the loop and point it to
14064 the start of MEM. Create a new memory reference that uses this
14065 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14066
14067 static void
14068 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14069 rtx *loop_mem)
14070 {
14071 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14072
14073 /* Although the new mem does not refer to a known location,
14074 it does keep up to LENGTH bytes of alignment. */
14075 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14076 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14077 }
14078
14079 /* From mips_block_move_loop:
14080
14081 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14082 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14083 the memory regions do not overlap. */
14084
14085 static void
14086 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14087 unsigned int interleave_factor,
14088 HOST_WIDE_INT bytes_per_iter)
14089 {
14090 rtx src_reg, dest_reg, final_src, test;
14091 HOST_WIDE_INT leftover;
14092
14093 leftover = length % bytes_per_iter;
14094 length -= leftover;
14095
14096 /* Create registers and memory references for use within the loop. */
14097 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14098 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14099
14100 /* Calculate the value that SRC_REG should have after the last iteration of
14101 the loop. */
14102 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14103 0, 0, OPTAB_WIDEN);
14104
14105 /* Emit the start of the loop. */
14106 rtx_code_label *label = gen_label_rtx ();
14107 emit_label (label);
14108
14109 /* Emit the loop body. */
14110 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14111 interleave_factor);
14112
14113 /* Move on to the next block. */
14114 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14115 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14116
14117 /* Emit the loop condition. */
14118 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14119 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14120
14121 /* Mop up any left-over bytes. */
14122 if (leftover)
14123 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14124 }
14125
14126 /* Emit a block move when either the source or destination is unaligned (not
14127 aligned to a four-byte boundary). This may need further tuning depending on
14128 core type, optimize_size setting, etc. */
14129
14130 static int
14131 arm_movmemqi_unaligned (rtx *operands)
14132 {
14133 HOST_WIDE_INT length = INTVAL (operands[2]);
14134
14135 if (optimize_size)
14136 {
14137 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14138 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14139 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14140 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14141 or dst_aligned though: allow more interleaving in those cases since the
14142 resulting code can be smaller. */
14143 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14144 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14145
14146 if (length > 12)
14147 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14148 interleave_factor, bytes_per_iter);
14149 else
14150 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14151 interleave_factor);
14152 }
14153 else
14154 {
14155 /* Note that the loop created by arm_block_move_unaligned_loop may be
14156 subject to loop unrolling, which makes tuning this condition a little
14157 redundant. */
14158 if (length > 32)
14159 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14160 else
14161 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14162 }
14163
14164 return 1;
14165 }
14166
14167 int
14168 arm_gen_movmemqi (rtx *operands)
14169 {
14170 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14171 HOST_WIDE_INT srcoffset, dstoffset;
14172 rtx src, dst, srcbase, dstbase;
14173 rtx part_bytes_reg = NULL;
14174 rtx mem;
14175
14176 if (!CONST_INT_P (operands[2])
14177 || !CONST_INT_P (operands[3])
14178 || INTVAL (operands[2]) > 64)
14179 return 0;
14180
14181 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14182 return arm_movmemqi_unaligned (operands);
14183
14184 if (INTVAL (operands[3]) & 3)
14185 return 0;
14186
14187 dstbase = operands[0];
14188 srcbase = operands[1];
14189
14190 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14191 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14192
14193 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14194 out_words_to_go = INTVAL (operands[2]) / 4;
14195 last_bytes = INTVAL (operands[2]) & 3;
14196 dstoffset = srcoffset = 0;
14197
14198 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14199 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14200
14201 while (in_words_to_go >= 2)
14202 {
14203 if (in_words_to_go > 4)
14204 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14205 TRUE, srcbase, &srcoffset));
14206 else
14207 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14208 src, FALSE, srcbase,
14209 &srcoffset));
14210
14211 if (out_words_to_go)
14212 {
14213 if (out_words_to_go > 4)
14214 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14215 TRUE, dstbase, &dstoffset));
14216 else if (out_words_to_go != 1)
14217 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14218 out_words_to_go, dst,
14219 (last_bytes == 0
14220 ? FALSE : TRUE),
14221 dstbase, &dstoffset));
14222 else
14223 {
14224 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14225 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14226 if (last_bytes != 0)
14227 {
14228 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14229 dstoffset += 4;
14230 }
14231 }
14232 }
14233
14234 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14235 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14236 }
14237
14238 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14239 if (out_words_to_go)
14240 {
14241 rtx sreg;
14242
14243 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14244 sreg = copy_to_reg (mem);
14245
14246 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14247 emit_move_insn (mem, sreg);
14248 in_words_to_go--;
14249
14250 gcc_assert (!in_words_to_go); /* Sanity check */
14251 }
14252
14253 if (in_words_to_go)
14254 {
14255 gcc_assert (in_words_to_go > 0);
14256
14257 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14258 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14259 }
14260
14261 gcc_assert (!last_bytes || part_bytes_reg);
14262
14263 if (BYTES_BIG_ENDIAN && last_bytes)
14264 {
14265 rtx tmp = gen_reg_rtx (SImode);
14266
14267 /* The bytes we want are in the top end of the word. */
14268 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14269 GEN_INT (8 * (4 - last_bytes))));
14270 part_bytes_reg = tmp;
14271
14272 while (last_bytes)
14273 {
14274 mem = adjust_automodify_address (dstbase, QImode,
14275 plus_constant (Pmode, dst,
14276 last_bytes - 1),
14277 dstoffset + last_bytes - 1);
14278 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14279
14280 if (--last_bytes)
14281 {
14282 tmp = gen_reg_rtx (SImode);
14283 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14284 part_bytes_reg = tmp;
14285 }
14286 }
14287
14288 }
14289 else
14290 {
14291 if (last_bytes > 1)
14292 {
14293 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14294 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14295 last_bytes -= 2;
14296 if (last_bytes)
14297 {
14298 rtx tmp = gen_reg_rtx (SImode);
14299 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14300 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14301 part_bytes_reg = tmp;
14302 dstoffset += 2;
14303 }
14304 }
14305
14306 if (last_bytes)
14307 {
14308 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14309 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14310 }
14311 }
14312
14313 return 1;
14314 }
14315
14316 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14317 by mode size. */
14318 inline static rtx
14319 next_consecutive_mem (rtx mem)
14320 {
14321 machine_mode mode = GET_MODE (mem);
14322 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14323 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14324
14325 return adjust_automodify_address (mem, mode, addr, offset);
14326 }
14327
14328 /* Copy using LDRD/STRD instructions whenever possible.
14329 Returns true upon success. */
14330 bool
14331 gen_movmem_ldrd_strd (rtx *operands)
14332 {
14333 unsigned HOST_WIDE_INT len;
14334 HOST_WIDE_INT align;
14335 rtx src, dst, base;
14336 rtx reg0;
14337 bool src_aligned, dst_aligned;
14338 bool src_volatile, dst_volatile;
14339
14340 gcc_assert (CONST_INT_P (operands[2]));
14341 gcc_assert (CONST_INT_P (operands[3]));
14342
14343 len = UINTVAL (operands[2]);
14344 if (len > 64)
14345 return false;
14346
14347 /* Maximum alignment we can assume for both src and dst buffers. */
14348 align = INTVAL (operands[3]);
14349
14350 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14351 return false;
14352
14353 /* Place src and dst addresses in registers
14354 and update the corresponding mem rtx. */
14355 dst = operands[0];
14356 dst_volatile = MEM_VOLATILE_P (dst);
14357 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14358 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14359 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14360
14361 src = operands[1];
14362 src_volatile = MEM_VOLATILE_P (src);
14363 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14364 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14365 src = adjust_automodify_address (src, VOIDmode, base, 0);
14366
14367 if (!unaligned_access && !(src_aligned && dst_aligned))
14368 return false;
14369
14370 if (src_volatile || dst_volatile)
14371 return false;
14372
14373 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14374 if (!(dst_aligned || src_aligned))
14375 return arm_gen_movmemqi (operands);
14376
14377 /* If the either src or dst is unaligned we'll be accessing it as pairs
14378 of unaligned SImode accesses. Otherwise we can generate DImode
14379 ldrd/strd instructions. */
14380 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14381 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14382
14383 while (len >= 8)
14384 {
14385 len -= 8;
14386 reg0 = gen_reg_rtx (DImode);
14387 rtx low_reg = NULL_RTX;
14388 rtx hi_reg = NULL_RTX;
14389
14390 if (!src_aligned || !dst_aligned)
14391 {
14392 low_reg = gen_lowpart (SImode, reg0);
14393 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14394 }
14395 if (src_aligned)
14396 emit_move_insn (reg0, src);
14397 else
14398 {
14399 emit_insn (gen_unaligned_loadsi (low_reg, src));
14400 src = next_consecutive_mem (src);
14401 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14402 }
14403
14404 if (dst_aligned)
14405 emit_move_insn (dst, reg0);
14406 else
14407 {
14408 emit_insn (gen_unaligned_storesi (dst, low_reg));
14409 dst = next_consecutive_mem (dst);
14410 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14411 }
14412
14413 src = next_consecutive_mem (src);
14414 dst = next_consecutive_mem (dst);
14415 }
14416
14417 gcc_assert (len < 8);
14418 if (len >= 4)
14419 {
14420 /* More than a word but less than a double-word to copy. Copy a word. */
14421 reg0 = gen_reg_rtx (SImode);
14422 src = adjust_address (src, SImode, 0);
14423 dst = adjust_address (dst, SImode, 0);
14424 if (src_aligned)
14425 emit_move_insn (reg0, src);
14426 else
14427 emit_insn (gen_unaligned_loadsi (reg0, src));
14428
14429 if (dst_aligned)
14430 emit_move_insn (dst, reg0);
14431 else
14432 emit_insn (gen_unaligned_storesi (dst, reg0));
14433
14434 src = next_consecutive_mem (src);
14435 dst = next_consecutive_mem (dst);
14436 len -= 4;
14437 }
14438
14439 if (len == 0)
14440 return true;
14441
14442 /* Copy the remaining bytes. */
14443 if (len >= 2)
14444 {
14445 dst = adjust_address (dst, HImode, 0);
14446 src = adjust_address (src, HImode, 0);
14447 reg0 = gen_reg_rtx (SImode);
14448 if (src_aligned)
14449 emit_insn (gen_zero_extendhisi2 (reg0, src));
14450 else
14451 emit_insn (gen_unaligned_loadhiu (reg0, src));
14452
14453 if (dst_aligned)
14454 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14455 else
14456 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14457
14458 src = next_consecutive_mem (src);
14459 dst = next_consecutive_mem (dst);
14460 if (len == 2)
14461 return true;
14462 }
14463
14464 dst = adjust_address (dst, QImode, 0);
14465 src = adjust_address (src, QImode, 0);
14466 reg0 = gen_reg_rtx (QImode);
14467 emit_move_insn (reg0, src);
14468 emit_move_insn (dst, reg0);
14469 return true;
14470 }
14471
14472 /* Select a dominance comparison mode if possible for a test of the general
14473 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14474 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14475 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14476 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14477 In all cases OP will be either EQ or NE, but we don't need to know which
14478 here. If we are unable to support a dominance comparison we return
14479 CC mode. This will then fail to match for the RTL expressions that
14480 generate this call. */
14481 machine_mode
14482 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14483 {
14484 enum rtx_code cond1, cond2;
14485 int swapped = 0;
14486
14487 /* Currently we will probably get the wrong result if the individual
14488 comparisons are not simple. This also ensures that it is safe to
14489 reverse a comparison if necessary. */
14490 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14491 != CCmode)
14492 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14493 != CCmode))
14494 return CCmode;
14495
14496 /* The if_then_else variant of this tests the second condition if the
14497 first passes, but is true if the first fails. Reverse the first
14498 condition to get a true "inclusive-or" expression. */
14499 if (cond_or == DOM_CC_NX_OR_Y)
14500 cond1 = reverse_condition (cond1);
14501
14502 /* If the comparisons are not equal, and one doesn't dominate the other,
14503 then we can't do this. */
14504 if (cond1 != cond2
14505 && !comparison_dominates_p (cond1, cond2)
14506 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14507 return CCmode;
14508
14509 if (swapped)
14510 std::swap (cond1, cond2);
14511
14512 switch (cond1)
14513 {
14514 case EQ:
14515 if (cond_or == DOM_CC_X_AND_Y)
14516 return CC_DEQmode;
14517
14518 switch (cond2)
14519 {
14520 case EQ: return CC_DEQmode;
14521 case LE: return CC_DLEmode;
14522 case LEU: return CC_DLEUmode;
14523 case GE: return CC_DGEmode;
14524 case GEU: return CC_DGEUmode;
14525 default: gcc_unreachable ();
14526 }
14527
14528 case LT:
14529 if (cond_or == DOM_CC_X_AND_Y)
14530 return CC_DLTmode;
14531
14532 switch (cond2)
14533 {
14534 case LT:
14535 return CC_DLTmode;
14536 case LE:
14537 return CC_DLEmode;
14538 case NE:
14539 return CC_DNEmode;
14540 default:
14541 gcc_unreachable ();
14542 }
14543
14544 case GT:
14545 if (cond_or == DOM_CC_X_AND_Y)
14546 return CC_DGTmode;
14547
14548 switch (cond2)
14549 {
14550 case GT:
14551 return CC_DGTmode;
14552 case GE:
14553 return CC_DGEmode;
14554 case NE:
14555 return CC_DNEmode;
14556 default:
14557 gcc_unreachable ();
14558 }
14559
14560 case LTU:
14561 if (cond_or == DOM_CC_X_AND_Y)
14562 return CC_DLTUmode;
14563
14564 switch (cond2)
14565 {
14566 case LTU:
14567 return CC_DLTUmode;
14568 case LEU:
14569 return CC_DLEUmode;
14570 case NE:
14571 return CC_DNEmode;
14572 default:
14573 gcc_unreachable ();
14574 }
14575
14576 case GTU:
14577 if (cond_or == DOM_CC_X_AND_Y)
14578 return CC_DGTUmode;
14579
14580 switch (cond2)
14581 {
14582 case GTU:
14583 return CC_DGTUmode;
14584 case GEU:
14585 return CC_DGEUmode;
14586 case NE:
14587 return CC_DNEmode;
14588 default:
14589 gcc_unreachable ();
14590 }
14591
14592 /* The remaining cases only occur when both comparisons are the
14593 same. */
14594 case NE:
14595 gcc_assert (cond1 == cond2);
14596 return CC_DNEmode;
14597
14598 case LE:
14599 gcc_assert (cond1 == cond2);
14600 return CC_DLEmode;
14601
14602 case GE:
14603 gcc_assert (cond1 == cond2);
14604 return CC_DGEmode;
14605
14606 case LEU:
14607 gcc_assert (cond1 == cond2);
14608 return CC_DLEUmode;
14609
14610 case GEU:
14611 gcc_assert (cond1 == cond2);
14612 return CC_DGEUmode;
14613
14614 default:
14615 gcc_unreachable ();
14616 }
14617 }
14618
14619 machine_mode
14620 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14621 {
14622 /* All floating point compares return CCFP if it is an equality
14623 comparison, and CCFPE otherwise. */
14624 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14625 {
14626 switch (op)
14627 {
14628 case EQ:
14629 case NE:
14630 case UNORDERED:
14631 case ORDERED:
14632 case UNLT:
14633 case UNLE:
14634 case UNGT:
14635 case UNGE:
14636 case UNEQ:
14637 case LTGT:
14638 return CCFPmode;
14639
14640 case LT:
14641 case LE:
14642 case GT:
14643 case GE:
14644 return CCFPEmode;
14645
14646 default:
14647 gcc_unreachable ();
14648 }
14649 }
14650
14651 /* A compare with a shifted operand. Because of canonicalization, the
14652 comparison will have to be swapped when we emit the assembler. */
14653 if (GET_MODE (y) == SImode
14654 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14655 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14656 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14657 || GET_CODE (x) == ROTATERT))
14658 return CC_SWPmode;
14659
14660 /* This operation is performed swapped, but since we only rely on the Z
14661 flag we don't need an additional mode. */
14662 if (GET_MODE (y) == SImode
14663 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14664 && GET_CODE (x) == NEG
14665 && (op == EQ || op == NE))
14666 return CC_Zmode;
14667
14668 /* This is a special case that is used by combine to allow a
14669 comparison of a shifted byte load to be split into a zero-extend
14670 followed by a comparison of the shifted integer (only valid for
14671 equalities and unsigned inequalities). */
14672 if (GET_MODE (x) == SImode
14673 && GET_CODE (x) == ASHIFT
14674 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14675 && GET_CODE (XEXP (x, 0)) == SUBREG
14676 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14677 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14678 && (op == EQ || op == NE
14679 || op == GEU || op == GTU || op == LTU || op == LEU)
14680 && CONST_INT_P (y))
14681 return CC_Zmode;
14682
14683 /* A construct for a conditional compare, if the false arm contains
14684 0, then both conditions must be true, otherwise either condition
14685 must be true. Not all conditions are possible, so CCmode is
14686 returned if it can't be done. */
14687 if (GET_CODE (x) == IF_THEN_ELSE
14688 && (XEXP (x, 2) == const0_rtx
14689 || XEXP (x, 2) == const1_rtx)
14690 && COMPARISON_P (XEXP (x, 0))
14691 && COMPARISON_P (XEXP (x, 1)))
14692 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14693 INTVAL (XEXP (x, 2)));
14694
14695 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14696 if (GET_CODE (x) == AND
14697 && (op == EQ || op == NE)
14698 && COMPARISON_P (XEXP (x, 0))
14699 && COMPARISON_P (XEXP (x, 1)))
14700 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14701 DOM_CC_X_AND_Y);
14702
14703 if (GET_CODE (x) == IOR
14704 && (op == EQ || op == NE)
14705 && COMPARISON_P (XEXP (x, 0))
14706 && COMPARISON_P (XEXP (x, 1)))
14707 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14708 DOM_CC_X_OR_Y);
14709
14710 /* An operation (on Thumb) where we want to test for a single bit.
14711 This is done by shifting that bit up into the top bit of a
14712 scratch register; we can then branch on the sign bit. */
14713 if (TARGET_THUMB1
14714 && GET_MODE (x) == SImode
14715 && (op == EQ || op == NE)
14716 && GET_CODE (x) == ZERO_EXTRACT
14717 && XEXP (x, 1) == const1_rtx)
14718 return CC_Nmode;
14719
14720 /* An operation that sets the condition codes as a side-effect, the
14721 V flag is not set correctly, so we can only use comparisons where
14722 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14723 instead.) */
14724 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14725 if (GET_MODE (x) == SImode
14726 && y == const0_rtx
14727 && (op == EQ || op == NE || op == LT || op == GE)
14728 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14729 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14730 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14731 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14732 || GET_CODE (x) == LSHIFTRT
14733 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14734 || GET_CODE (x) == ROTATERT
14735 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14736 return CC_NOOVmode;
14737
14738 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14739 return CC_Zmode;
14740
14741 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14742 && GET_CODE (x) == PLUS
14743 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14744 return CC_Cmode;
14745
14746 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14747 {
14748 switch (op)
14749 {
14750 case EQ:
14751 case NE:
14752 /* A DImode comparison against zero can be implemented by
14753 or'ing the two halves together. */
14754 if (y == const0_rtx)
14755 return CC_Zmode;
14756
14757 /* We can do an equality test in three Thumb instructions. */
14758 if (!TARGET_32BIT)
14759 return CC_Zmode;
14760
14761 /* FALLTHROUGH */
14762
14763 case LTU:
14764 case LEU:
14765 case GTU:
14766 case GEU:
14767 /* DImode unsigned comparisons can be implemented by cmp +
14768 cmpeq without a scratch register. Not worth doing in
14769 Thumb-2. */
14770 if (TARGET_32BIT)
14771 return CC_CZmode;
14772
14773 /* FALLTHROUGH */
14774
14775 case LT:
14776 case LE:
14777 case GT:
14778 case GE:
14779 /* DImode signed and unsigned comparisons can be implemented
14780 by cmp + sbcs with a scratch register, but that does not
14781 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14782 gcc_assert (op != EQ && op != NE);
14783 return CC_NCVmode;
14784
14785 default:
14786 gcc_unreachable ();
14787 }
14788 }
14789
14790 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14791 return GET_MODE (x);
14792
14793 return CCmode;
14794 }
14795
14796 /* X and Y are two things to compare using CODE. Emit the compare insn and
14797 return the rtx for register 0 in the proper mode. FP means this is a
14798 floating point compare: I don't think that it is needed on the arm. */
14799 rtx
14800 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14801 {
14802 machine_mode mode;
14803 rtx cc_reg;
14804 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14805
14806 /* We might have X as a constant, Y as a register because of the predicates
14807 used for cmpdi. If so, force X to a register here. */
14808 if (dimode_comparison && !REG_P (x))
14809 x = force_reg (DImode, x);
14810
14811 mode = SELECT_CC_MODE (code, x, y);
14812 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14813
14814 if (dimode_comparison
14815 && mode != CC_CZmode)
14816 {
14817 rtx clobber, set;
14818
14819 /* To compare two non-zero values for equality, XOR them and
14820 then compare against zero. Not used for ARM mode; there
14821 CC_CZmode is cheaper. */
14822 if (mode == CC_Zmode && y != const0_rtx)
14823 {
14824 gcc_assert (!reload_completed);
14825 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14826 y = const0_rtx;
14827 }
14828
14829 /* A scratch register is required. */
14830 if (reload_completed)
14831 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14832 else
14833 scratch = gen_rtx_SCRATCH (SImode);
14834
14835 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14836 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14837 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14838 }
14839 else
14840 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14841
14842 return cc_reg;
14843 }
14844
14845 /* Generate a sequence of insns that will generate the correct return
14846 address mask depending on the physical architecture that the program
14847 is running on. */
14848 rtx
14849 arm_gen_return_addr_mask (void)
14850 {
14851 rtx reg = gen_reg_rtx (Pmode);
14852
14853 emit_insn (gen_return_addr_mask (reg));
14854 return reg;
14855 }
14856
14857 void
14858 arm_reload_in_hi (rtx *operands)
14859 {
14860 rtx ref = operands[1];
14861 rtx base, scratch;
14862 HOST_WIDE_INT offset = 0;
14863
14864 if (GET_CODE (ref) == SUBREG)
14865 {
14866 offset = SUBREG_BYTE (ref);
14867 ref = SUBREG_REG (ref);
14868 }
14869
14870 if (REG_P (ref))
14871 {
14872 /* We have a pseudo which has been spilt onto the stack; there
14873 are two cases here: the first where there is a simple
14874 stack-slot replacement and a second where the stack-slot is
14875 out of range, or is used as a subreg. */
14876 if (reg_equiv_mem (REGNO (ref)))
14877 {
14878 ref = reg_equiv_mem (REGNO (ref));
14879 base = find_replacement (&XEXP (ref, 0));
14880 }
14881 else
14882 /* The slot is out of range, or was dressed up in a SUBREG. */
14883 base = reg_equiv_address (REGNO (ref));
14884
14885 /* PR 62554: If there is no equivalent memory location then just move
14886 the value as an SImode register move. This happens when the target
14887 architecture variant does not have an HImode register move. */
14888 if (base == NULL)
14889 {
14890 gcc_assert (REG_P (operands[0]));
14891 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14892 gen_rtx_SUBREG (SImode, ref, 0)));
14893 return;
14894 }
14895 }
14896 else
14897 base = find_replacement (&XEXP (ref, 0));
14898
14899 /* Handle the case where the address is too complex to be offset by 1. */
14900 if (GET_CODE (base) == MINUS
14901 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14902 {
14903 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14904
14905 emit_set_insn (base_plus, base);
14906 base = base_plus;
14907 }
14908 else if (GET_CODE (base) == PLUS)
14909 {
14910 /* The addend must be CONST_INT, or we would have dealt with it above. */
14911 HOST_WIDE_INT hi, lo;
14912
14913 offset += INTVAL (XEXP (base, 1));
14914 base = XEXP (base, 0);
14915
14916 /* Rework the address into a legal sequence of insns. */
14917 /* Valid range for lo is -4095 -> 4095 */
14918 lo = (offset >= 0
14919 ? (offset & 0xfff)
14920 : -((-offset) & 0xfff));
14921
14922 /* Corner case, if lo is the max offset then we would be out of range
14923 once we have added the additional 1 below, so bump the msb into the
14924 pre-loading insn(s). */
14925 if (lo == 4095)
14926 lo &= 0x7ff;
14927
14928 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14929 ^ (HOST_WIDE_INT) 0x80000000)
14930 - (HOST_WIDE_INT) 0x80000000);
14931
14932 gcc_assert (hi + lo == offset);
14933
14934 if (hi != 0)
14935 {
14936 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14937
14938 /* Get the base address; addsi3 knows how to handle constants
14939 that require more than one insn. */
14940 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14941 base = base_plus;
14942 offset = lo;
14943 }
14944 }
14945
14946 /* Operands[2] may overlap operands[0] (though it won't overlap
14947 operands[1]), that's why we asked for a DImode reg -- so we can
14948 use the bit that does not overlap. */
14949 if (REGNO (operands[2]) == REGNO (operands[0]))
14950 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14951 else
14952 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14953
14954 emit_insn (gen_zero_extendqisi2 (scratch,
14955 gen_rtx_MEM (QImode,
14956 plus_constant (Pmode, base,
14957 offset))));
14958 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14959 gen_rtx_MEM (QImode,
14960 plus_constant (Pmode, base,
14961 offset + 1))));
14962 if (!BYTES_BIG_ENDIAN)
14963 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14964 gen_rtx_IOR (SImode,
14965 gen_rtx_ASHIFT
14966 (SImode,
14967 gen_rtx_SUBREG (SImode, operands[0], 0),
14968 GEN_INT (8)),
14969 scratch));
14970 else
14971 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14972 gen_rtx_IOR (SImode,
14973 gen_rtx_ASHIFT (SImode, scratch,
14974 GEN_INT (8)),
14975 gen_rtx_SUBREG (SImode, operands[0], 0)));
14976 }
14977
14978 /* Handle storing a half-word to memory during reload by synthesizing as two
14979 byte stores. Take care not to clobber the input values until after we
14980 have moved them somewhere safe. This code assumes that if the DImode
14981 scratch in operands[2] overlaps either the input value or output address
14982 in some way, then that value must die in this insn (we absolutely need
14983 two scratch registers for some corner cases). */
14984 void
14985 arm_reload_out_hi (rtx *operands)
14986 {
14987 rtx ref = operands[0];
14988 rtx outval = operands[1];
14989 rtx base, scratch;
14990 HOST_WIDE_INT offset = 0;
14991
14992 if (GET_CODE (ref) == SUBREG)
14993 {
14994 offset = SUBREG_BYTE (ref);
14995 ref = SUBREG_REG (ref);
14996 }
14997
14998 if (REG_P (ref))
14999 {
15000 /* We have a pseudo which has been spilt onto the stack; there
15001 are two cases here: the first where there is a simple
15002 stack-slot replacement and a second where the stack-slot is
15003 out of range, or is used as a subreg. */
15004 if (reg_equiv_mem (REGNO (ref)))
15005 {
15006 ref = reg_equiv_mem (REGNO (ref));
15007 base = find_replacement (&XEXP (ref, 0));
15008 }
15009 else
15010 /* The slot is out of range, or was dressed up in a SUBREG. */
15011 base = reg_equiv_address (REGNO (ref));
15012
15013 /* PR 62254: If there is no equivalent memory location then just move
15014 the value as an SImode register move. This happens when the target
15015 architecture variant does not have an HImode register move. */
15016 if (base == NULL)
15017 {
15018 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15019
15020 if (REG_P (outval))
15021 {
15022 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15023 gen_rtx_SUBREG (SImode, outval, 0)));
15024 }
15025 else /* SUBREG_P (outval) */
15026 {
15027 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15028 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15029 SUBREG_REG (outval)));
15030 else
15031 /* FIXME: Handle other cases ? */
15032 gcc_unreachable ();
15033 }
15034 return;
15035 }
15036 }
15037 else
15038 base = find_replacement (&XEXP (ref, 0));
15039
15040 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15041
15042 /* Handle the case where the address is too complex to be offset by 1. */
15043 if (GET_CODE (base) == MINUS
15044 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15045 {
15046 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15047
15048 /* Be careful not to destroy OUTVAL. */
15049 if (reg_overlap_mentioned_p (base_plus, outval))
15050 {
15051 /* Updating base_plus might destroy outval, see if we can
15052 swap the scratch and base_plus. */
15053 if (!reg_overlap_mentioned_p (scratch, outval))
15054 std::swap (scratch, base_plus);
15055 else
15056 {
15057 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15058
15059 /* Be conservative and copy OUTVAL into the scratch now,
15060 this should only be necessary if outval is a subreg
15061 of something larger than a word. */
15062 /* XXX Might this clobber base? I can't see how it can,
15063 since scratch is known to overlap with OUTVAL, and
15064 must be wider than a word. */
15065 emit_insn (gen_movhi (scratch_hi, outval));
15066 outval = scratch_hi;
15067 }
15068 }
15069
15070 emit_set_insn (base_plus, base);
15071 base = base_plus;
15072 }
15073 else if (GET_CODE (base) == PLUS)
15074 {
15075 /* The addend must be CONST_INT, or we would have dealt with it above. */
15076 HOST_WIDE_INT hi, lo;
15077
15078 offset += INTVAL (XEXP (base, 1));
15079 base = XEXP (base, 0);
15080
15081 /* Rework the address into a legal sequence of insns. */
15082 /* Valid range for lo is -4095 -> 4095 */
15083 lo = (offset >= 0
15084 ? (offset & 0xfff)
15085 : -((-offset) & 0xfff));
15086
15087 /* Corner case, if lo is the max offset then we would be out of range
15088 once we have added the additional 1 below, so bump the msb into the
15089 pre-loading insn(s). */
15090 if (lo == 4095)
15091 lo &= 0x7ff;
15092
15093 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15094 ^ (HOST_WIDE_INT) 0x80000000)
15095 - (HOST_WIDE_INT) 0x80000000);
15096
15097 gcc_assert (hi + lo == offset);
15098
15099 if (hi != 0)
15100 {
15101 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15102
15103 /* Be careful not to destroy OUTVAL. */
15104 if (reg_overlap_mentioned_p (base_plus, outval))
15105 {
15106 /* Updating base_plus might destroy outval, see if we
15107 can swap the scratch and base_plus. */
15108 if (!reg_overlap_mentioned_p (scratch, outval))
15109 std::swap (scratch, base_plus);
15110 else
15111 {
15112 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15113
15114 /* Be conservative and copy outval into scratch now,
15115 this should only be necessary if outval is a
15116 subreg of something larger than a word. */
15117 /* XXX Might this clobber base? I can't see how it
15118 can, since scratch is known to overlap with
15119 outval. */
15120 emit_insn (gen_movhi (scratch_hi, outval));
15121 outval = scratch_hi;
15122 }
15123 }
15124
15125 /* Get the base address; addsi3 knows how to handle constants
15126 that require more than one insn. */
15127 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15128 base = base_plus;
15129 offset = lo;
15130 }
15131 }
15132
15133 if (BYTES_BIG_ENDIAN)
15134 {
15135 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15136 plus_constant (Pmode, base,
15137 offset + 1)),
15138 gen_lowpart (QImode, outval)));
15139 emit_insn (gen_lshrsi3 (scratch,
15140 gen_rtx_SUBREG (SImode, outval, 0),
15141 GEN_INT (8)));
15142 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15143 offset)),
15144 gen_lowpart (QImode, scratch)));
15145 }
15146 else
15147 {
15148 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15149 offset)),
15150 gen_lowpart (QImode, outval)));
15151 emit_insn (gen_lshrsi3 (scratch,
15152 gen_rtx_SUBREG (SImode, outval, 0),
15153 GEN_INT (8)));
15154 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15155 plus_constant (Pmode, base,
15156 offset + 1)),
15157 gen_lowpart (QImode, scratch)));
15158 }
15159 }
15160
15161 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15162 (padded to the size of a word) should be passed in a register. */
15163
15164 static bool
15165 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15166 {
15167 if (TARGET_AAPCS_BASED)
15168 return must_pass_in_stack_var_size (mode, type);
15169 else
15170 return must_pass_in_stack_var_size_or_pad (mode, type);
15171 }
15172
15173
15174 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15175 byte of a stack argument has useful data. For legacy APCS ABIs we use
15176 the default. For AAPCS based ABIs small aggregate types are placed
15177 in the lowest memory address. */
15178
15179 static pad_direction
15180 arm_function_arg_padding (machine_mode mode, const_tree type)
15181 {
15182 if (!TARGET_AAPCS_BASED)
15183 return default_function_arg_padding (mode, type);
15184
15185 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15186 return PAD_DOWNWARD;
15187
15188 return PAD_UPWARD;
15189 }
15190
15191
15192 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15193 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15194 register has useful data, and return the opposite if the most
15195 significant byte does. */
15196
15197 bool
15198 arm_pad_reg_upward (machine_mode mode,
15199 tree type, int first ATTRIBUTE_UNUSED)
15200 {
15201 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15202 {
15203 /* For AAPCS, small aggregates, small fixed-point types,
15204 and small complex types are always padded upwards. */
15205 if (type)
15206 {
15207 if ((AGGREGATE_TYPE_P (type)
15208 || TREE_CODE (type) == COMPLEX_TYPE
15209 || FIXED_POINT_TYPE_P (type))
15210 && int_size_in_bytes (type) <= 4)
15211 return true;
15212 }
15213 else
15214 {
15215 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15216 && GET_MODE_SIZE (mode) <= 4)
15217 return true;
15218 }
15219 }
15220
15221 /* Otherwise, use default padding. */
15222 return !BYTES_BIG_ENDIAN;
15223 }
15224
15225 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15226 assuming that the address in the base register is word aligned. */
15227 bool
15228 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15229 {
15230 HOST_WIDE_INT max_offset;
15231
15232 /* Offset must be a multiple of 4 in Thumb mode. */
15233 if (TARGET_THUMB2 && ((offset & 3) != 0))
15234 return false;
15235
15236 if (TARGET_THUMB2)
15237 max_offset = 1020;
15238 else if (TARGET_ARM)
15239 max_offset = 255;
15240 else
15241 return false;
15242
15243 return ((offset <= max_offset) && (offset >= -max_offset));
15244 }
15245
15246 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15247 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15248 Assumes that the address in the base register RN is word aligned. Pattern
15249 guarantees that both memory accesses use the same base register,
15250 the offsets are constants within the range, and the gap between the offsets is 4.
15251 If preload complete then check that registers are legal. WBACK indicates whether
15252 address is updated. LOAD indicates whether memory access is load or store. */
15253 bool
15254 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15255 bool wback, bool load)
15256 {
15257 unsigned int t, t2, n;
15258
15259 if (!reload_completed)
15260 return true;
15261
15262 if (!offset_ok_for_ldrd_strd (offset))
15263 return false;
15264
15265 t = REGNO (rt);
15266 t2 = REGNO (rt2);
15267 n = REGNO (rn);
15268
15269 if ((TARGET_THUMB2)
15270 && ((wback && (n == t || n == t2))
15271 || (t == SP_REGNUM)
15272 || (t == PC_REGNUM)
15273 || (t2 == SP_REGNUM)
15274 || (t2 == PC_REGNUM)
15275 || (!load && (n == PC_REGNUM))
15276 || (load && (t == t2))
15277 /* Triggers Cortex-M3 LDRD errata. */
15278 || (!wback && load && fix_cm3_ldrd && (n == t))))
15279 return false;
15280
15281 if ((TARGET_ARM)
15282 && ((wback && (n == t || n == t2))
15283 || (t2 == PC_REGNUM)
15284 || (t % 2 != 0) /* First destination register is not even. */
15285 || (t2 != t + 1)
15286 /* PC can be used as base register (for offset addressing only),
15287 but it is depricated. */
15288 || (n == PC_REGNUM)))
15289 return false;
15290
15291 return true;
15292 }
15293
15294 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15295 operand MEM's address contains an immediate offset from the base
15296 register and has no side effects, in which case it sets BASE and
15297 OFFSET accordingly. */
15298 static bool
15299 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15300 {
15301 rtx addr;
15302
15303 gcc_assert (base != NULL && offset != NULL);
15304
15305 /* TODO: Handle more general memory operand patterns, such as
15306 PRE_DEC and PRE_INC. */
15307
15308 if (side_effects_p (mem))
15309 return false;
15310
15311 /* Can't deal with subregs. */
15312 if (GET_CODE (mem) == SUBREG)
15313 return false;
15314
15315 gcc_assert (MEM_P (mem));
15316
15317 *offset = const0_rtx;
15318
15319 addr = XEXP (mem, 0);
15320
15321 /* If addr isn't valid for DImode, then we can't handle it. */
15322 if (!arm_legitimate_address_p (DImode, addr,
15323 reload_in_progress || reload_completed))
15324 return false;
15325
15326 if (REG_P (addr))
15327 {
15328 *base = addr;
15329 return true;
15330 }
15331 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15332 {
15333 *base = XEXP (addr, 0);
15334 *offset = XEXP (addr, 1);
15335 return (REG_P (*base) && CONST_INT_P (*offset));
15336 }
15337
15338 return false;
15339 }
15340
15341 /* Called from a peephole2 to replace two word-size accesses with a
15342 single LDRD/STRD instruction. Returns true iff we can generate a
15343 new instruction sequence. That is, both accesses use the same base
15344 register and the gap between constant offsets is 4. This function
15345 may reorder its operands to match ldrd/strd RTL templates.
15346 OPERANDS are the operands found by the peephole matcher;
15347 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15348 corresponding memory operands. LOAD indicaates whether the access
15349 is load or store. CONST_STORE indicates a store of constant
15350 integer values held in OPERANDS[4,5] and assumes that the pattern
15351 is of length 4 insn, for the purpose of checking dead registers.
15352 COMMUTE indicates that register operands may be reordered. */
15353 bool
15354 gen_operands_ldrd_strd (rtx *operands, bool load,
15355 bool const_store, bool commute)
15356 {
15357 int nops = 2;
15358 HOST_WIDE_INT offsets[2], offset;
15359 rtx base = NULL_RTX;
15360 rtx cur_base, cur_offset, tmp;
15361 int i, gap;
15362 HARD_REG_SET regset;
15363
15364 gcc_assert (!const_store || !load);
15365 /* Check that the memory references are immediate offsets from the
15366 same base register. Extract the base register, the destination
15367 registers, and the corresponding memory offsets. */
15368 for (i = 0; i < nops; i++)
15369 {
15370 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15371 return false;
15372
15373 if (i == 0)
15374 base = cur_base;
15375 else if (REGNO (base) != REGNO (cur_base))
15376 return false;
15377
15378 offsets[i] = INTVAL (cur_offset);
15379 if (GET_CODE (operands[i]) == SUBREG)
15380 {
15381 tmp = SUBREG_REG (operands[i]);
15382 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15383 operands[i] = tmp;
15384 }
15385 }
15386
15387 /* Make sure there is no dependency between the individual loads. */
15388 if (load && REGNO (operands[0]) == REGNO (base))
15389 return false; /* RAW */
15390
15391 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15392 return false; /* WAW */
15393
15394 /* If the same input register is used in both stores
15395 when storing different constants, try to find a free register.
15396 For example, the code
15397 mov r0, 0
15398 str r0, [r2]
15399 mov r0, 1
15400 str r0, [r2, #4]
15401 can be transformed into
15402 mov r1, 0
15403 mov r0, 1
15404 strd r1, r0, [r2]
15405 in Thumb mode assuming that r1 is free.
15406 For ARM mode do the same but only if the starting register
15407 can be made to be even. */
15408 if (const_store
15409 && REGNO (operands[0]) == REGNO (operands[1])
15410 && INTVAL (operands[4]) != INTVAL (operands[5]))
15411 {
15412 if (TARGET_THUMB2)
15413 {
15414 CLEAR_HARD_REG_SET (regset);
15415 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15416 if (tmp == NULL_RTX)
15417 return false;
15418
15419 /* Use the new register in the first load to ensure that
15420 if the original input register is not dead after peephole,
15421 then it will have the correct constant value. */
15422 operands[0] = tmp;
15423 }
15424 else if (TARGET_ARM)
15425 {
15426 int regno = REGNO (operands[0]);
15427 if (!peep2_reg_dead_p (4, operands[0]))
15428 {
15429 /* When the input register is even and is not dead after the
15430 pattern, it has to hold the second constant but we cannot
15431 form a legal STRD in ARM mode with this register as the second
15432 register. */
15433 if (regno % 2 == 0)
15434 return false;
15435
15436 /* Is regno-1 free? */
15437 SET_HARD_REG_SET (regset);
15438 CLEAR_HARD_REG_BIT(regset, regno - 1);
15439 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15440 if (tmp == NULL_RTX)
15441 return false;
15442
15443 operands[0] = tmp;
15444 }
15445 else
15446 {
15447 /* Find a DImode register. */
15448 CLEAR_HARD_REG_SET (regset);
15449 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15450 if (tmp != NULL_RTX)
15451 {
15452 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15453 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15454 }
15455 else
15456 {
15457 /* Can we use the input register to form a DI register? */
15458 SET_HARD_REG_SET (regset);
15459 CLEAR_HARD_REG_BIT(regset,
15460 regno % 2 == 0 ? regno + 1 : regno - 1);
15461 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15462 if (tmp == NULL_RTX)
15463 return false;
15464 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15465 }
15466 }
15467
15468 gcc_assert (operands[0] != NULL_RTX);
15469 gcc_assert (operands[1] != NULL_RTX);
15470 gcc_assert (REGNO (operands[0]) % 2 == 0);
15471 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15472 }
15473 }
15474
15475 /* Make sure the instructions are ordered with lower memory access first. */
15476 if (offsets[0] > offsets[1])
15477 {
15478 gap = offsets[0] - offsets[1];
15479 offset = offsets[1];
15480
15481 /* Swap the instructions such that lower memory is accessed first. */
15482 std::swap (operands[0], operands[1]);
15483 std::swap (operands[2], operands[3]);
15484 if (const_store)
15485 std::swap (operands[4], operands[5]);
15486 }
15487 else
15488 {
15489 gap = offsets[1] - offsets[0];
15490 offset = offsets[0];
15491 }
15492
15493 /* Make sure accesses are to consecutive memory locations. */
15494 if (gap != 4)
15495 return false;
15496
15497 /* Make sure we generate legal instructions. */
15498 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15499 false, load))
15500 return true;
15501
15502 /* In Thumb state, where registers are almost unconstrained, there
15503 is little hope to fix it. */
15504 if (TARGET_THUMB2)
15505 return false;
15506
15507 if (load && commute)
15508 {
15509 /* Try reordering registers. */
15510 std::swap (operands[0], operands[1]);
15511 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15512 false, load))
15513 return true;
15514 }
15515
15516 if (const_store)
15517 {
15518 /* If input registers are dead after this pattern, they can be
15519 reordered or replaced by other registers that are free in the
15520 current pattern. */
15521 if (!peep2_reg_dead_p (4, operands[0])
15522 || !peep2_reg_dead_p (4, operands[1]))
15523 return false;
15524
15525 /* Try to reorder the input registers. */
15526 /* For example, the code
15527 mov r0, 0
15528 mov r1, 1
15529 str r1, [r2]
15530 str r0, [r2, #4]
15531 can be transformed into
15532 mov r1, 0
15533 mov r0, 1
15534 strd r0, [r2]
15535 */
15536 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15537 false, false))
15538 {
15539 std::swap (operands[0], operands[1]);
15540 return true;
15541 }
15542
15543 /* Try to find a free DI register. */
15544 CLEAR_HARD_REG_SET (regset);
15545 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15546 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15547 while (true)
15548 {
15549 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15550 if (tmp == NULL_RTX)
15551 return false;
15552
15553 /* DREG must be an even-numbered register in DImode.
15554 Split it into SI registers. */
15555 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15556 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15557 gcc_assert (operands[0] != NULL_RTX);
15558 gcc_assert (operands[1] != NULL_RTX);
15559 gcc_assert (REGNO (operands[0]) % 2 == 0);
15560 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15561
15562 return (operands_ok_ldrd_strd (operands[0], operands[1],
15563 base, offset,
15564 false, load));
15565 }
15566 }
15567
15568 return false;
15569 }
15570
15571
15572
15573 \f
15574 /* Print a symbolic form of X to the debug file, F. */
15575 static void
15576 arm_print_value (FILE *f, rtx x)
15577 {
15578 switch (GET_CODE (x))
15579 {
15580 case CONST_INT:
15581 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15582 return;
15583
15584 case CONST_DOUBLE:
15585 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15586 return;
15587
15588 case CONST_VECTOR:
15589 {
15590 int i;
15591
15592 fprintf (f, "<");
15593 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15594 {
15595 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15596 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15597 fputc (',', f);
15598 }
15599 fprintf (f, ">");
15600 }
15601 return;
15602
15603 case CONST_STRING:
15604 fprintf (f, "\"%s\"", XSTR (x, 0));
15605 return;
15606
15607 case SYMBOL_REF:
15608 fprintf (f, "`%s'", XSTR (x, 0));
15609 return;
15610
15611 case LABEL_REF:
15612 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15613 return;
15614
15615 case CONST:
15616 arm_print_value (f, XEXP (x, 0));
15617 return;
15618
15619 case PLUS:
15620 arm_print_value (f, XEXP (x, 0));
15621 fprintf (f, "+");
15622 arm_print_value (f, XEXP (x, 1));
15623 return;
15624
15625 case PC:
15626 fprintf (f, "pc");
15627 return;
15628
15629 default:
15630 fprintf (f, "????");
15631 return;
15632 }
15633 }
15634 \f
15635 /* Routines for manipulation of the constant pool. */
15636
15637 /* Arm instructions cannot load a large constant directly into a
15638 register; they have to come from a pc relative load. The constant
15639 must therefore be placed in the addressable range of the pc
15640 relative load. Depending on the precise pc relative load
15641 instruction the range is somewhere between 256 bytes and 4k. This
15642 means that we often have to dump a constant inside a function, and
15643 generate code to branch around it.
15644
15645 It is important to minimize this, since the branches will slow
15646 things down and make the code larger.
15647
15648 Normally we can hide the table after an existing unconditional
15649 branch so that there is no interruption of the flow, but in the
15650 worst case the code looks like this:
15651
15652 ldr rn, L1
15653 ...
15654 b L2
15655 align
15656 L1: .long value
15657 L2:
15658 ...
15659
15660 ldr rn, L3
15661 ...
15662 b L4
15663 align
15664 L3: .long value
15665 L4:
15666 ...
15667
15668 We fix this by performing a scan after scheduling, which notices
15669 which instructions need to have their operands fetched from the
15670 constant table and builds the table.
15671
15672 The algorithm starts by building a table of all the constants that
15673 need fixing up and all the natural barriers in the function (places
15674 where a constant table can be dropped without breaking the flow).
15675 For each fixup we note how far the pc-relative replacement will be
15676 able to reach and the offset of the instruction into the function.
15677
15678 Having built the table we then group the fixes together to form
15679 tables that are as large as possible (subject to addressing
15680 constraints) and emit each table of constants after the last
15681 barrier that is within range of all the instructions in the group.
15682 If a group does not contain a barrier, then we forcibly create one
15683 by inserting a jump instruction into the flow. Once the table has
15684 been inserted, the insns are then modified to reference the
15685 relevant entry in the pool.
15686
15687 Possible enhancements to the algorithm (not implemented) are:
15688
15689 1) For some processors and object formats, there may be benefit in
15690 aligning the pools to the start of cache lines; this alignment
15691 would need to be taken into account when calculating addressability
15692 of a pool. */
15693
15694 /* These typedefs are located at the start of this file, so that
15695 they can be used in the prototypes there. This comment is to
15696 remind readers of that fact so that the following structures
15697 can be understood more easily.
15698
15699 typedef struct minipool_node Mnode;
15700 typedef struct minipool_fixup Mfix; */
15701
15702 struct minipool_node
15703 {
15704 /* Doubly linked chain of entries. */
15705 Mnode * next;
15706 Mnode * prev;
15707 /* The maximum offset into the code that this entry can be placed. While
15708 pushing fixes for forward references, all entries are sorted in order
15709 of increasing max_address. */
15710 HOST_WIDE_INT max_address;
15711 /* Similarly for an entry inserted for a backwards ref. */
15712 HOST_WIDE_INT min_address;
15713 /* The number of fixes referencing this entry. This can become zero
15714 if we "unpush" an entry. In this case we ignore the entry when we
15715 come to emit the code. */
15716 int refcount;
15717 /* The offset from the start of the minipool. */
15718 HOST_WIDE_INT offset;
15719 /* The value in table. */
15720 rtx value;
15721 /* The mode of value. */
15722 machine_mode mode;
15723 /* The size of the value. With iWMMXt enabled
15724 sizes > 4 also imply an alignment of 8-bytes. */
15725 int fix_size;
15726 };
15727
15728 struct minipool_fixup
15729 {
15730 Mfix * next;
15731 rtx_insn * insn;
15732 HOST_WIDE_INT address;
15733 rtx * loc;
15734 machine_mode mode;
15735 int fix_size;
15736 rtx value;
15737 Mnode * minipool;
15738 HOST_WIDE_INT forwards;
15739 HOST_WIDE_INT backwards;
15740 };
15741
15742 /* Fixes less than a word need padding out to a word boundary. */
15743 #define MINIPOOL_FIX_SIZE(mode) \
15744 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15745
15746 static Mnode * minipool_vector_head;
15747 static Mnode * minipool_vector_tail;
15748 static rtx_code_label *minipool_vector_label;
15749 static int minipool_pad;
15750
15751 /* The linked list of all minipool fixes required for this function. */
15752 Mfix * minipool_fix_head;
15753 Mfix * minipool_fix_tail;
15754 /* The fix entry for the current minipool, once it has been placed. */
15755 Mfix * minipool_barrier;
15756
15757 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15758 #define JUMP_TABLES_IN_TEXT_SECTION 0
15759 #endif
15760
15761 static HOST_WIDE_INT
15762 get_jump_table_size (rtx_jump_table_data *insn)
15763 {
15764 /* ADDR_VECs only take room if read-only data does into the text
15765 section. */
15766 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15767 {
15768 rtx body = PATTERN (insn);
15769 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15770 HOST_WIDE_INT size;
15771 HOST_WIDE_INT modesize;
15772
15773 modesize = GET_MODE_SIZE (GET_MODE (body));
15774 size = modesize * XVECLEN (body, elt);
15775 switch (modesize)
15776 {
15777 case 1:
15778 /* Round up size of TBB table to a halfword boundary. */
15779 size = (size + 1) & ~HOST_WIDE_INT_1;
15780 break;
15781 case 2:
15782 /* No padding necessary for TBH. */
15783 break;
15784 case 4:
15785 /* Add two bytes for alignment on Thumb. */
15786 if (TARGET_THUMB)
15787 size += 2;
15788 break;
15789 default:
15790 gcc_unreachable ();
15791 }
15792 return size;
15793 }
15794
15795 return 0;
15796 }
15797
15798 /* Return the maximum amount of padding that will be inserted before
15799 label LABEL. */
15800
15801 static HOST_WIDE_INT
15802 get_label_padding (rtx label)
15803 {
15804 HOST_WIDE_INT align, min_insn_size;
15805
15806 align = 1 << label_to_alignment (label);
15807 min_insn_size = TARGET_THUMB ? 2 : 4;
15808 return align > min_insn_size ? align - min_insn_size : 0;
15809 }
15810
15811 /* Move a minipool fix MP from its current location to before MAX_MP.
15812 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15813 constraints may need updating. */
15814 static Mnode *
15815 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15816 HOST_WIDE_INT max_address)
15817 {
15818 /* The code below assumes these are different. */
15819 gcc_assert (mp != max_mp);
15820
15821 if (max_mp == NULL)
15822 {
15823 if (max_address < mp->max_address)
15824 mp->max_address = max_address;
15825 }
15826 else
15827 {
15828 if (max_address > max_mp->max_address - mp->fix_size)
15829 mp->max_address = max_mp->max_address - mp->fix_size;
15830 else
15831 mp->max_address = max_address;
15832
15833 /* Unlink MP from its current position. Since max_mp is non-null,
15834 mp->prev must be non-null. */
15835 mp->prev->next = mp->next;
15836 if (mp->next != NULL)
15837 mp->next->prev = mp->prev;
15838 else
15839 minipool_vector_tail = mp->prev;
15840
15841 /* Re-insert it before MAX_MP. */
15842 mp->next = max_mp;
15843 mp->prev = max_mp->prev;
15844 max_mp->prev = mp;
15845
15846 if (mp->prev != NULL)
15847 mp->prev->next = mp;
15848 else
15849 minipool_vector_head = mp;
15850 }
15851
15852 /* Save the new entry. */
15853 max_mp = mp;
15854
15855 /* Scan over the preceding entries and adjust their addresses as
15856 required. */
15857 while (mp->prev != NULL
15858 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15859 {
15860 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15861 mp = mp->prev;
15862 }
15863
15864 return max_mp;
15865 }
15866
15867 /* Add a constant to the minipool for a forward reference. Returns the
15868 node added or NULL if the constant will not fit in this pool. */
15869 static Mnode *
15870 add_minipool_forward_ref (Mfix *fix)
15871 {
15872 /* If set, max_mp is the first pool_entry that has a lower
15873 constraint than the one we are trying to add. */
15874 Mnode * max_mp = NULL;
15875 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15876 Mnode * mp;
15877
15878 /* If the minipool starts before the end of FIX->INSN then this FIX
15879 can not be placed into the current pool. Furthermore, adding the
15880 new constant pool entry may cause the pool to start FIX_SIZE bytes
15881 earlier. */
15882 if (minipool_vector_head &&
15883 (fix->address + get_attr_length (fix->insn)
15884 >= minipool_vector_head->max_address - fix->fix_size))
15885 return NULL;
15886
15887 /* Scan the pool to see if a constant with the same value has
15888 already been added. While we are doing this, also note the
15889 location where we must insert the constant if it doesn't already
15890 exist. */
15891 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15892 {
15893 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15894 && fix->mode == mp->mode
15895 && (!LABEL_P (fix->value)
15896 || (CODE_LABEL_NUMBER (fix->value)
15897 == CODE_LABEL_NUMBER (mp->value)))
15898 && rtx_equal_p (fix->value, mp->value))
15899 {
15900 /* More than one fix references this entry. */
15901 mp->refcount++;
15902 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15903 }
15904
15905 /* Note the insertion point if necessary. */
15906 if (max_mp == NULL
15907 && mp->max_address > max_address)
15908 max_mp = mp;
15909
15910 /* If we are inserting an 8-bytes aligned quantity and
15911 we have not already found an insertion point, then
15912 make sure that all such 8-byte aligned quantities are
15913 placed at the start of the pool. */
15914 if (ARM_DOUBLEWORD_ALIGN
15915 && max_mp == NULL
15916 && fix->fix_size >= 8
15917 && mp->fix_size < 8)
15918 {
15919 max_mp = mp;
15920 max_address = mp->max_address;
15921 }
15922 }
15923
15924 /* The value is not currently in the minipool, so we need to create
15925 a new entry for it. If MAX_MP is NULL, the entry will be put on
15926 the end of the list since the placement is less constrained than
15927 any existing entry. Otherwise, we insert the new fix before
15928 MAX_MP and, if necessary, adjust the constraints on the other
15929 entries. */
15930 mp = XNEW (Mnode);
15931 mp->fix_size = fix->fix_size;
15932 mp->mode = fix->mode;
15933 mp->value = fix->value;
15934 mp->refcount = 1;
15935 /* Not yet required for a backwards ref. */
15936 mp->min_address = -65536;
15937
15938 if (max_mp == NULL)
15939 {
15940 mp->max_address = max_address;
15941 mp->next = NULL;
15942 mp->prev = minipool_vector_tail;
15943
15944 if (mp->prev == NULL)
15945 {
15946 minipool_vector_head = mp;
15947 minipool_vector_label = gen_label_rtx ();
15948 }
15949 else
15950 mp->prev->next = mp;
15951
15952 minipool_vector_tail = mp;
15953 }
15954 else
15955 {
15956 if (max_address > max_mp->max_address - mp->fix_size)
15957 mp->max_address = max_mp->max_address - mp->fix_size;
15958 else
15959 mp->max_address = max_address;
15960
15961 mp->next = max_mp;
15962 mp->prev = max_mp->prev;
15963 max_mp->prev = mp;
15964 if (mp->prev != NULL)
15965 mp->prev->next = mp;
15966 else
15967 minipool_vector_head = mp;
15968 }
15969
15970 /* Save the new entry. */
15971 max_mp = mp;
15972
15973 /* Scan over the preceding entries and adjust their addresses as
15974 required. */
15975 while (mp->prev != NULL
15976 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15977 {
15978 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15979 mp = mp->prev;
15980 }
15981
15982 return max_mp;
15983 }
15984
15985 static Mnode *
15986 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15987 HOST_WIDE_INT min_address)
15988 {
15989 HOST_WIDE_INT offset;
15990
15991 /* The code below assumes these are different. */
15992 gcc_assert (mp != min_mp);
15993
15994 if (min_mp == NULL)
15995 {
15996 if (min_address > mp->min_address)
15997 mp->min_address = min_address;
15998 }
15999 else
16000 {
16001 /* We will adjust this below if it is too loose. */
16002 mp->min_address = min_address;
16003
16004 /* Unlink MP from its current position. Since min_mp is non-null,
16005 mp->next must be non-null. */
16006 mp->next->prev = mp->prev;
16007 if (mp->prev != NULL)
16008 mp->prev->next = mp->next;
16009 else
16010 minipool_vector_head = mp->next;
16011
16012 /* Reinsert it after MIN_MP. */
16013 mp->prev = min_mp;
16014 mp->next = min_mp->next;
16015 min_mp->next = mp;
16016 if (mp->next != NULL)
16017 mp->next->prev = mp;
16018 else
16019 minipool_vector_tail = mp;
16020 }
16021
16022 min_mp = mp;
16023
16024 offset = 0;
16025 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16026 {
16027 mp->offset = offset;
16028 if (mp->refcount > 0)
16029 offset += mp->fix_size;
16030
16031 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16032 mp->next->min_address = mp->min_address + mp->fix_size;
16033 }
16034
16035 return min_mp;
16036 }
16037
16038 /* Add a constant to the minipool for a backward reference. Returns the
16039 node added or NULL if the constant will not fit in this pool.
16040
16041 Note that the code for insertion for a backwards reference can be
16042 somewhat confusing because the calculated offsets for each fix do
16043 not take into account the size of the pool (which is still under
16044 construction. */
16045 static Mnode *
16046 add_minipool_backward_ref (Mfix *fix)
16047 {
16048 /* If set, min_mp is the last pool_entry that has a lower constraint
16049 than the one we are trying to add. */
16050 Mnode *min_mp = NULL;
16051 /* This can be negative, since it is only a constraint. */
16052 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16053 Mnode *mp;
16054
16055 /* If we can't reach the current pool from this insn, or if we can't
16056 insert this entry at the end of the pool without pushing other
16057 fixes out of range, then we don't try. This ensures that we
16058 can't fail later on. */
16059 if (min_address >= minipool_barrier->address
16060 || (minipool_vector_tail->min_address + fix->fix_size
16061 >= minipool_barrier->address))
16062 return NULL;
16063
16064 /* Scan the pool to see if a constant with the same value has
16065 already been added. While we are doing this, also note the
16066 location where we must insert the constant if it doesn't already
16067 exist. */
16068 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16069 {
16070 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16071 && fix->mode == mp->mode
16072 && (!LABEL_P (fix->value)
16073 || (CODE_LABEL_NUMBER (fix->value)
16074 == CODE_LABEL_NUMBER (mp->value)))
16075 && rtx_equal_p (fix->value, mp->value)
16076 /* Check that there is enough slack to move this entry to the
16077 end of the table (this is conservative). */
16078 && (mp->max_address
16079 > (minipool_barrier->address
16080 + minipool_vector_tail->offset
16081 + minipool_vector_tail->fix_size)))
16082 {
16083 mp->refcount++;
16084 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16085 }
16086
16087 if (min_mp != NULL)
16088 mp->min_address += fix->fix_size;
16089 else
16090 {
16091 /* Note the insertion point if necessary. */
16092 if (mp->min_address < min_address)
16093 {
16094 /* For now, we do not allow the insertion of 8-byte alignment
16095 requiring nodes anywhere but at the start of the pool. */
16096 if (ARM_DOUBLEWORD_ALIGN
16097 && fix->fix_size >= 8 && mp->fix_size < 8)
16098 return NULL;
16099 else
16100 min_mp = mp;
16101 }
16102 else if (mp->max_address
16103 < minipool_barrier->address + mp->offset + fix->fix_size)
16104 {
16105 /* Inserting before this entry would push the fix beyond
16106 its maximum address (which can happen if we have
16107 re-located a forwards fix); force the new fix to come
16108 after it. */
16109 if (ARM_DOUBLEWORD_ALIGN
16110 && fix->fix_size >= 8 && mp->fix_size < 8)
16111 return NULL;
16112 else
16113 {
16114 min_mp = mp;
16115 min_address = mp->min_address + fix->fix_size;
16116 }
16117 }
16118 /* Do not insert a non-8-byte aligned quantity before 8-byte
16119 aligned quantities. */
16120 else if (ARM_DOUBLEWORD_ALIGN
16121 && fix->fix_size < 8
16122 && mp->fix_size >= 8)
16123 {
16124 min_mp = mp;
16125 min_address = mp->min_address + fix->fix_size;
16126 }
16127 }
16128 }
16129
16130 /* We need to create a new entry. */
16131 mp = XNEW (Mnode);
16132 mp->fix_size = fix->fix_size;
16133 mp->mode = fix->mode;
16134 mp->value = fix->value;
16135 mp->refcount = 1;
16136 mp->max_address = minipool_barrier->address + 65536;
16137
16138 mp->min_address = min_address;
16139
16140 if (min_mp == NULL)
16141 {
16142 mp->prev = NULL;
16143 mp->next = minipool_vector_head;
16144
16145 if (mp->next == NULL)
16146 {
16147 minipool_vector_tail = mp;
16148 minipool_vector_label = gen_label_rtx ();
16149 }
16150 else
16151 mp->next->prev = mp;
16152
16153 minipool_vector_head = mp;
16154 }
16155 else
16156 {
16157 mp->next = min_mp->next;
16158 mp->prev = min_mp;
16159 min_mp->next = mp;
16160
16161 if (mp->next != NULL)
16162 mp->next->prev = mp;
16163 else
16164 minipool_vector_tail = mp;
16165 }
16166
16167 /* Save the new entry. */
16168 min_mp = mp;
16169
16170 if (mp->prev)
16171 mp = mp->prev;
16172 else
16173 mp->offset = 0;
16174
16175 /* Scan over the following entries and adjust their offsets. */
16176 while (mp->next != NULL)
16177 {
16178 if (mp->next->min_address < mp->min_address + mp->fix_size)
16179 mp->next->min_address = mp->min_address + mp->fix_size;
16180
16181 if (mp->refcount)
16182 mp->next->offset = mp->offset + mp->fix_size;
16183 else
16184 mp->next->offset = mp->offset;
16185
16186 mp = mp->next;
16187 }
16188
16189 return min_mp;
16190 }
16191
16192 static void
16193 assign_minipool_offsets (Mfix *barrier)
16194 {
16195 HOST_WIDE_INT offset = 0;
16196 Mnode *mp;
16197
16198 minipool_barrier = barrier;
16199
16200 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16201 {
16202 mp->offset = offset;
16203
16204 if (mp->refcount > 0)
16205 offset += mp->fix_size;
16206 }
16207 }
16208
16209 /* Output the literal table */
16210 static void
16211 dump_minipool (rtx_insn *scan)
16212 {
16213 Mnode * mp;
16214 Mnode * nmp;
16215 int align64 = 0;
16216
16217 if (ARM_DOUBLEWORD_ALIGN)
16218 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16219 if (mp->refcount > 0 && mp->fix_size >= 8)
16220 {
16221 align64 = 1;
16222 break;
16223 }
16224
16225 if (dump_file)
16226 fprintf (dump_file,
16227 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16228 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16229
16230 scan = emit_label_after (gen_label_rtx (), scan);
16231 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16232 scan = emit_label_after (minipool_vector_label, scan);
16233
16234 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16235 {
16236 if (mp->refcount > 0)
16237 {
16238 if (dump_file)
16239 {
16240 fprintf (dump_file,
16241 ";; Offset %u, min %ld, max %ld ",
16242 (unsigned) mp->offset, (unsigned long) mp->min_address,
16243 (unsigned long) mp->max_address);
16244 arm_print_value (dump_file, mp->value);
16245 fputc ('\n', dump_file);
16246 }
16247
16248 rtx val = copy_rtx (mp->value);
16249
16250 switch (GET_MODE_SIZE (mp->mode))
16251 {
16252 #ifdef HAVE_consttable_1
16253 case 1:
16254 scan = emit_insn_after (gen_consttable_1 (val), scan);
16255 break;
16256
16257 #endif
16258 #ifdef HAVE_consttable_2
16259 case 2:
16260 scan = emit_insn_after (gen_consttable_2 (val), scan);
16261 break;
16262
16263 #endif
16264 #ifdef HAVE_consttable_4
16265 case 4:
16266 scan = emit_insn_after (gen_consttable_4 (val), scan);
16267 break;
16268
16269 #endif
16270 #ifdef HAVE_consttable_8
16271 case 8:
16272 scan = emit_insn_after (gen_consttable_8 (val), scan);
16273 break;
16274
16275 #endif
16276 #ifdef HAVE_consttable_16
16277 case 16:
16278 scan = emit_insn_after (gen_consttable_16 (val), scan);
16279 break;
16280
16281 #endif
16282 default:
16283 gcc_unreachable ();
16284 }
16285 }
16286
16287 nmp = mp->next;
16288 free (mp);
16289 }
16290
16291 minipool_vector_head = minipool_vector_tail = NULL;
16292 scan = emit_insn_after (gen_consttable_end (), scan);
16293 scan = emit_barrier_after (scan);
16294 }
16295
16296 /* Return the cost of forcibly inserting a barrier after INSN. */
16297 static int
16298 arm_barrier_cost (rtx_insn *insn)
16299 {
16300 /* Basing the location of the pool on the loop depth is preferable,
16301 but at the moment, the basic block information seems to be
16302 corrupt by this stage of the compilation. */
16303 int base_cost = 50;
16304 rtx_insn *next = next_nonnote_insn (insn);
16305
16306 if (next != NULL && LABEL_P (next))
16307 base_cost -= 20;
16308
16309 switch (GET_CODE (insn))
16310 {
16311 case CODE_LABEL:
16312 /* It will always be better to place the table before the label, rather
16313 than after it. */
16314 return 50;
16315
16316 case INSN:
16317 case CALL_INSN:
16318 return base_cost;
16319
16320 case JUMP_INSN:
16321 return base_cost - 10;
16322
16323 default:
16324 return base_cost + 10;
16325 }
16326 }
16327
16328 /* Find the best place in the insn stream in the range
16329 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16330 Create the barrier by inserting a jump and add a new fix entry for
16331 it. */
16332 static Mfix *
16333 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16334 {
16335 HOST_WIDE_INT count = 0;
16336 rtx_barrier *barrier;
16337 rtx_insn *from = fix->insn;
16338 /* The instruction after which we will insert the jump. */
16339 rtx_insn *selected = NULL;
16340 int selected_cost;
16341 /* The address at which the jump instruction will be placed. */
16342 HOST_WIDE_INT selected_address;
16343 Mfix * new_fix;
16344 HOST_WIDE_INT max_count = max_address - fix->address;
16345 rtx_code_label *label = gen_label_rtx ();
16346
16347 selected_cost = arm_barrier_cost (from);
16348 selected_address = fix->address;
16349
16350 while (from && count < max_count)
16351 {
16352 rtx_jump_table_data *tmp;
16353 int new_cost;
16354
16355 /* This code shouldn't have been called if there was a natural barrier
16356 within range. */
16357 gcc_assert (!BARRIER_P (from));
16358
16359 /* Count the length of this insn. This must stay in sync with the
16360 code that pushes minipool fixes. */
16361 if (LABEL_P (from))
16362 count += get_label_padding (from);
16363 else
16364 count += get_attr_length (from);
16365
16366 /* If there is a jump table, add its length. */
16367 if (tablejump_p (from, NULL, &tmp))
16368 {
16369 count += get_jump_table_size (tmp);
16370
16371 /* Jump tables aren't in a basic block, so base the cost on
16372 the dispatch insn. If we select this location, we will
16373 still put the pool after the table. */
16374 new_cost = arm_barrier_cost (from);
16375
16376 if (count < max_count
16377 && (!selected || new_cost <= selected_cost))
16378 {
16379 selected = tmp;
16380 selected_cost = new_cost;
16381 selected_address = fix->address + count;
16382 }
16383
16384 /* Continue after the dispatch table. */
16385 from = NEXT_INSN (tmp);
16386 continue;
16387 }
16388
16389 new_cost = arm_barrier_cost (from);
16390
16391 if (count < max_count
16392 && (!selected || new_cost <= selected_cost))
16393 {
16394 selected = from;
16395 selected_cost = new_cost;
16396 selected_address = fix->address + count;
16397 }
16398
16399 from = NEXT_INSN (from);
16400 }
16401
16402 /* Make sure that we found a place to insert the jump. */
16403 gcc_assert (selected);
16404
16405 /* Make sure we do not split a call and its corresponding
16406 CALL_ARG_LOCATION note. */
16407 if (CALL_P (selected))
16408 {
16409 rtx_insn *next = NEXT_INSN (selected);
16410 if (next && NOTE_P (next)
16411 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16412 selected = next;
16413 }
16414
16415 /* Create a new JUMP_INSN that branches around a barrier. */
16416 from = emit_jump_insn_after (gen_jump (label), selected);
16417 JUMP_LABEL (from) = label;
16418 barrier = emit_barrier_after (from);
16419 emit_label_after (label, barrier);
16420
16421 /* Create a minipool barrier entry for the new barrier. */
16422 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16423 new_fix->insn = barrier;
16424 new_fix->address = selected_address;
16425 new_fix->next = fix->next;
16426 fix->next = new_fix;
16427
16428 return new_fix;
16429 }
16430
16431 /* Record that there is a natural barrier in the insn stream at
16432 ADDRESS. */
16433 static void
16434 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16435 {
16436 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16437
16438 fix->insn = insn;
16439 fix->address = address;
16440
16441 fix->next = NULL;
16442 if (minipool_fix_head != NULL)
16443 minipool_fix_tail->next = fix;
16444 else
16445 minipool_fix_head = fix;
16446
16447 minipool_fix_tail = fix;
16448 }
16449
16450 /* Record INSN, which will need fixing up to load a value from the
16451 minipool. ADDRESS is the offset of the insn since the start of the
16452 function; LOC is a pointer to the part of the insn which requires
16453 fixing; VALUE is the constant that must be loaded, which is of type
16454 MODE. */
16455 static void
16456 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16457 machine_mode mode, rtx value)
16458 {
16459 gcc_assert (!arm_disable_literal_pool);
16460 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16461
16462 fix->insn = insn;
16463 fix->address = address;
16464 fix->loc = loc;
16465 fix->mode = mode;
16466 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16467 fix->value = value;
16468 fix->forwards = get_attr_pool_range (insn);
16469 fix->backwards = get_attr_neg_pool_range (insn);
16470 fix->minipool = NULL;
16471
16472 /* If an insn doesn't have a range defined for it, then it isn't
16473 expecting to be reworked by this code. Better to stop now than
16474 to generate duff assembly code. */
16475 gcc_assert (fix->forwards || fix->backwards);
16476
16477 /* If an entry requires 8-byte alignment then assume all constant pools
16478 require 4 bytes of padding. Trying to do this later on a per-pool
16479 basis is awkward because existing pool entries have to be modified. */
16480 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16481 minipool_pad = 4;
16482
16483 if (dump_file)
16484 {
16485 fprintf (dump_file,
16486 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16487 GET_MODE_NAME (mode),
16488 INSN_UID (insn), (unsigned long) address,
16489 -1 * (long)fix->backwards, (long)fix->forwards);
16490 arm_print_value (dump_file, fix->value);
16491 fprintf (dump_file, "\n");
16492 }
16493
16494 /* Add it to the chain of fixes. */
16495 fix->next = NULL;
16496
16497 if (minipool_fix_head != NULL)
16498 minipool_fix_tail->next = fix;
16499 else
16500 minipool_fix_head = fix;
16501
16502 minipool_fix_tail = fix;
16503 }
16504
16505 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16506 Returns the number of insns needed, or 99 if we always want to synthesize
16507 the value. */
16508 int
16509 arm_max_const_double_inline_cost ()
16510 {
16511 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16512 }
16513
16514 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16515 Returns the number of insns needed, or 99 if we don't know how to
16516 do it. */
16517 int
16518 arm_const_double_inline_cost (rtx val)
16519 {
16520 rtx lowpart, highpart;
16521 machine_mode mode;
16522
16523 mode = GET_MODE (val);
16524
16525 if (mode == VOIDmode)
16526 mode = DImode;
16527
16528 gcc_assert (GET_MODE_SIZE (mode) == 8);
16529
16530 lowpart = gen_lowpart (SImode, val);
16531 highpart = gen_highpart_mode (SImode, mode, val);
16532
16533 gcc_assert (CONST_INT_P (lowpart));
16534 gcc_assert (CONST_INT_P (highpart));
16535
16536 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16537 NULL_RTX, NULL_RTX, 0, 0)
16538 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16539 NULL_RTX, NULL_RTX, 0, 0));
16540 }
16541
16542 /* Cost of loading a SImode constant. */
16543 static inline int
16544 arm_const_inline_cost (enum rtx_code code, rtx val)
16545 {
16546 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16547 NULL_RTX, NULL_RTX, 1, 0);
16548 }
16549
16550 /* Return true if it is worthwhile to split a 64-bit constant into two
16551 32-bit operations. This is the case if optimizing for size, or
16552 if we have load delay slots, or if one 32-bit part can be done with
16553 a single data operation. */
16554 bool
16555 arm_const_double_by_parts (rtx val)
16556 {
16557 machine_mode mode = GET_MODE (val);
16558 rtx part;
16559
16560 if (optimize_size || arm_ld_sched)
16561 return true;
16562
16563 if (mode == VOIDmode)
16564 mode = DImode;
16565
16566 part = gen_highpart_mode (SImode, mode, val);
16567
16568 gcc_assert (CONST_INT_P (part));
16569
16570 if (const_ok_for_arm (INTVAL (part))
16571 || const_ok_for_arm (~INTVAL (part)))
16572 return true;
16573
16574 part = gen_lowpart (SImode, val);
16575
16576 gcc_assert (CONST_INT_P (part));
16577
16578 if (const_ok_for_arm (INTVAL (part))
16579 || const_ok_for_arm (~INTVAL (part)))
16580 return true;
16581
16582 return false;
16583 }
16584
16585 /* Return true if it is possible to inline both the high and low parts
16586 of a 64-bit constant into 32-bit data processing instructions. */
16587 bool
16588 arm_const_double_by_immediates (rtx val)
16589 {
16590 machine_mode mode = GET_MODE (val);
16591 rtx part;
16592
16593 if (mode == VOIDmode)
16594 mode = DImode;
16595
16596 part = gen_highpart_mode (SImode, mode, val);
16597
16598 gcc_assert (CONST_INT_P (part));
16599
16600 if (!const_ok_for_arm (INTVAL (part)))
16601 return false;
16602
16603 part = gen_lowpart (SImode, val);
16604
16605 gcc_assert (CONST_INT_P (part));
16606
16607 if (!const_ok_for_arm (INTVAL (part)))
16608 return false;
16609
16610 return true;
16611 }
16612
16613 /* Scan INSN and note any of its operands that need fixing.
16614 If DO_PUSHES is false we do not actually push any of the fixups
16615 needed. */
16616 static void
16617 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16618 {
16619 int opno;
16620
16621 extract_constrain_insn (insn);
16622
16623 if (recog_data.n_alternatives == 0)
16624 return;
16625
16626 /* Fill in recog_op_alt with information about the constraints of
16627 this insn. */
16628 preprocess_constraints (insn);
16629
16630 const operand_alternative *op_alt = which_op_alt ();
16631 for (opno = 0; opno < recog_data.n_operands; opno++)
16632 {
16633 /* Things we need to fix can only occur in inputs. */
16634 if (recog_data.operand_type[opno] != OP_IN)
16635 continue;
16636
16637 /* If this alternative is a memory reference, then any mention
16638 of constants in this alternative is really to fool reload
16639 into allowing us to accept one there. We need to fix them up
16640 now so that we output the right code. */
16641 if (op_alt[opno].memory_ok)
16642 {
16643 rtx op = recog_data.operand[opno];
16644
16645 if (CONSTANT_P (op))
16646 {
16647 if (do_pushes)
16648 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16649 recog_data.operand_mode[opno], op);
16650 }
16651 else if (MEM_P (op)
16652 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16653 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16654 {
16655 if (do_pushes)
16656 {
16657 rtx cop = avoid_constant_pool_reference (op);
16658
16659 /* Casting the address of something to a mode narrower
16660 than a word can cause avoid_constant_pool_reference()
16661 to return the pool reference itself. That's no good to
16662 us here. Lets just hope that we can use the
16663 constant pool value directly. */
16664 if (op == cop)
16665 cop = get_pool_constant (XEXP (op, 0));
16666
16667 push_minipool_fix (insn, address,
16668 recog_data.operand_loc[opno],
16669 recog_data.operand_mode[opno], cop);
16670 }
16671
16672 }
16673 }
16674 }
16675
16676 return;
16677 }
16678
16679 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16680 and unions in the context of ARMv8-M Security Extensions. It is used as a
16681 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16682 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16683 or four masks, depending on whether it is being computed for a
16684 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16685 respectively. The tree for the type of the argument or a field within an
16686 argument is passed in ARG_TYPE, the current register this argument or field
16687 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16688 argument or field starts at is passed in STARTING_BIT and the last used bit
16689 is kept in LAST_USED_BIT which is also updated accordingly. */
16690
16691 static unsigned HOST_WIDE_INT
16692 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16693 uint32_t * padding_bits_to_clear,
16694 unsigned starting_bit, int * last_used_bit)
16695
16696 {
16697 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16698
16699 if (TREE_CODE (arg_type) == RECORD_TYPE)
16700 {
16701 unsigned current_bit = starting_bit;
16702 tree field;
16703 long int offset, size;
16704
16705
16706 field = TYPE_FIELDS (arg_type);
16707 while (field)
16708 {
16709 /* The offset within a structure is always an offset from
16710 the start of that structure. Make sure we take that into the
16711 calculation of the register based offset that we use here. */
16712 offset = starting_bit;
16713 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16714 offset %= 32;
16715
16716 /* This is the actual size of the field, for bitfields this is the
16717 bitfield width and not the container size. */
16718 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16719
16720 if (*last_used_bit != offset)
16721 {
16722 if (offset < *last_used_bit)
16723 {
16724 /* This field's offset is before the 'last_used_bit', that
16725 means this field goes on the next register. So we need to
16726 pad the rest of the current register and increase the
16727 register number. */
16728 uint32_t mask;
16729 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16730 mask++;
16731
16732 padding_bits_to_clear[*regno] |= mask;
16733 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16734 (*regno)++;
16735 }
16736 else
16737 {
16738 /* Otherwise we pad the bits between the last field's end and
16739 the start of the new field. */
16740 uint32_t mask;
16741
16742 mask = ((uint32_t)-1) >> (32 - offset);
16743 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16744 padding_bits_to_clear[*regno] |= mask;
16745 }
16746 current_bit = offset;
16747 }
16748
16749 /* Calculate further padding bits for inner structs/unions too. */
16750 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16751 {
16752 *last_used_bit = current_bit;
16753 not_to_clear_reg_mask
16754 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16755 padding_bits_to_clear, offset,
16756 last_used_bit);
16757 }
16758 else
16759 {
16760 /* Update 'current_bit' with this field's size. If the
16761 'current_bit' lies in a subsequent register, update 'regno' and
16762 reset 'current_bit' to point to the current bit in that new
16763 register. */
16764 current_bit += size;
16765 while (current_bit >= 32)
16766 {
16767 current_bit-=32;
16768 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16769 (*regno)++;
16770 }
16771 *last_used_bit = current_bit;
16772 }
16773
16774 field = TREE_CHAIN (field);
16775 }
16776 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16777 }
16778 else if (TREE_CODE (arg_type) == UNION_TYPE)
16779 {
16780 tree field, field_t;
16781 int i, regno_t, field_size;
16782 int max_reg = -1;
16783 int max_bit = -1;
16784 uint32_t mask;
16785 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16786 = {-1, -1, -1, -1};
16787
16788 /* To compute the padding bits in a union we only consider bits as
16789 padding bits if they are always either a padding bit or fall outside a
16790 fields size for all fields in the union. */
16791 field = TYPE_FIELDS (arg_type);
16792 while (field)
16793 {
16794 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16795 = {0U, 0U, 0U, 0U};
16796 int last_used_bit_t = *last_used_bit;
16797 regno_t = *regno;
16798 field_t = TREE_TYPE (field);
16799
16800 /* If the field's type is either a record or a union make sure to
16801 compute their padding bits too. */
16802 if (RECORD_OR_UNION_TYPE_P (field_t))
16803 not_to_clear_reg_mask
16804 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16805 &padding_bits_to_clear_t[0],
16806 starting_bit, &last_used_bit_t);
16807 else
16808 {
16809 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16810 regno_t = (field_size / 32) + *regno;
16811 last_used_bit_t = (starting_bit + field_size) % 32;
16812 }
16813
16814 for (i = *regno; i < regno_t; i++)
16815 {
16816 /* For all but the last register used by this field only keep the
16817 padding bits that were padding bits in this field. */
16818 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16819 }
16820
16821 /* For the last register, keep all padding bits that were padding
16822 bits in this field and any padding bits that are still valid
16823 as padding bits but fall outside of this field's size. */
16824 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16825 padding_bits_to_clear_res[regno_t]
16826 &= padding_bits_to_clear_t[regno_t] | mask;
16827
16828 /* Update the maximum size of the fields in terms of registers used
16829 ('max_reg') and the 'last_used_bit' in said register. */
16830 if (max_reg < regno_t)
16831 {
16832 max_reg = regno_t;
16833 max_bit = last_used_bit_t;
16834 }
16835 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16836 max_bit = last_used_bit_t;
16837
16838 field = TREE_CHAIN (field);
16839 }
16840
16841 /* Update the current padding_bits_to_clear using the intersection of the
16842 padding bits of all the fields. */
16843 for (i=*regno; i < max_reg; i++)
16844 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16845
16846 /* Do not keep trailing padding bits, we do not know yet whether this
16847 is the end of the argument. */
16848 mask = ((uint32_t) 1 << max_bit) - 1;
16849 padding_bits_to_clear[max_reg]
16850 |= padding_bits_to_clear_res[max_reg] & mask;
16851
16852 *regno = max_reg;
16853 *last_used_bit = max_bit;
16854 }
16855 else
16856 /* This function should only be used for structs and unions. */
16857 gcc_unreachable ();
16858
16859 return not_to_clear_reg_mask;
16860 }
16861
16862 /* In the context of ARMv8-M Security Extensions, this function is used for both
16863 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16864 registers are used when returning or passing arguments, which is then
16865 returned as a mask. It will also compute a mask to indicate padding/unused
16866 bits for each of these registers, and passes this through the
16867 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16868 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16869 the starting register used to pass this argument or return value is passed
16870 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16871 for struct and union types. */
16872
16873 static unsigned HOST_WIDE_INT
16874 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16875 uint32_t * padding_bits_to_clear)
16876
16877 {
16878 int last_used_bit = 0;
16879 unsigned HOST_WIDE_INT not_to_clear_mask;
16880
16881 if (RECORD_OR_UNION_TYPE_P (arg_type))
16882 {
16883 not_to_clear_mask
16884 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16885 padding_bits_to_clear, 0,
16886 &last_used_bit);
16887
16888
16889 /* If the 'last_used_bit' is not zero, that means we are still using a
16890 part of the last 'regno'. In such cases we must clear the trailing
16891 bits. Otherwise we are not using regno and we should mark it as to
16892 clear. */
16893 if (last_used_bit != 0)
16894 padding_bits_to_clear[regno]
16895 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16896 else
16897 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16898 }
16899 else
16900 {
16901 not_to_clear_mask = 0;
16902 /* We are not dealing with structs nor unions. So these arguments may be
16903 passed in floating point registers too. In some cases a BLKmode is
16904 used when returning or passing arguments in multiple VFP registers. */
16905 if (GET_MODE (arg_rtx) == BLKmode)
16906 {
16907 int i, arg_regs;
16908 rtx reg;
16909
16910 /* This should really only occur when dealing with the hard-float
16911 ABI. */
16912 gcc_assert (TARGET_HARD_FLOAT_ABI);
16913
16914 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16915 {
16916 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16917 gcc_assert (REG_P (reg));
16918
16919 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16920
16921 /* If we are dealing with DF mode, make sure we don't
16922 clear either of the registers it addresses. */
16923 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16924 if (arg_regs > 1)
16925 {
16926 unsigned HOST_WIDE_INT mask;
16927 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16928 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16929 not_to_clear_mask |= mask;
16930 }
16931 }
16932 }
16933 else
16934 {
16935 /* Otherwise we can rely on the MODE to determine how many registers
16936 are being used by this argument. */
16937 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16938 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16939 if (arg_regs > 1)
16940 {
16941 unsigned HOST_WIDE_INT
16942 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16943 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16944 not_to_clear_mask |= mask;
16945 }
16946 }
16947 }
16948
16949 return not_to_clear_mask;
16950 }
16951
16952 /* Clears caller saved registers not used to pass arguments before a
16953 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16954 registers is done in __gnu_cmse_nonsecure_call libcall.
16955 See libgcc/config/arm/cmse_nonsecure_call.S. */
16956
16957 static void
16958 cmse_nonsecure_call_clear_caller_saved (void)
16959 {
16960 basic_block bb;
16961
16962 FOR_EACH_BB_FN (bb, cfun)
16963 {
16964 rtx_insn *insn;
16965
16966 FOR_BB_INSNS (bb, insn)
16967 {
16968 uint64_t to_clear_mask, float_mask;
16969 rtx_insn *seq;
16970 rtx pat, call, unspec, reg, cleared_reg, tmp;
16971 unsigned int regno, maxregno;
16972 rtx address;
16973 CUMULATIVE_ARGS args_so_far_v;
16974 cumulative_args_t args_so_far;
16975 tree arg_type, fntype;
16976 bool using_r4, first_param = true;
16977 function_args_iterator args_iter;
16978 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16979 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16980
16981 if (!NONDEBUG_INSN_P (insn))
16982 continue;
16983
16984 if (!CALL_P (insn))
16985 continue;
16986
16987 pat = PATTERN (insn);
16988 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16989 call = XVECEXP (pat, 0, 0);
16990
16991 /* Get the real call RTX if the insn sets a value, ie. returns. */
16992 if (GET_CODE (call) == SET)
16993 call = SET_SRC (call);
16994
16995 /* Check if it is a cmse_nonsecure_call. */
16996 unspec = XEXP (call, 0);
16997 if (GET_CODE (unspec) != UNSPEC
16998 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16999 continue;
17000
17001 /* Determine the caller-saved registers we need to clear. */
17002 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17003 maxregno = NUM_ARG_REGS - 1;
17004 /* Only look at the caller-saved floating point registers in case of
17005 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17006 lazy store and loads which clear both caller- and callee-saved
17007 registers. */
17008 if (TARGET_HARD_FLOAT_ABI)
17009 {
17010 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17011 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17012 to_clear_mask |= float_mask;
17013 maxregno = D7_VFP_REGNUM;
17014 }
17015
17016 /* Make sure the register used to hold the function address is not
17017 cleared. */
17018 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17019 gcc_assert (MEM_P (address));
17020 gcc_assert (REG_P (XEXP (address, 0)));
17021 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17022
17023 /* Set basic block of call insn so that df rescan is performed on
17024 insns inserted here. */
17025 set_block_for_insn (insn, bb);
17026 df_set_flags (DF_DEFER_INSN_RESCAN);
17027 start_sequence ();
17028
17029 /* Make sure the scheduler doesn't schedule other insns beyond
17030 here. */
17031 emit_insn (gen_blockage ());
17032
17033 /* Walk through all arguments and clear registers appropriately.
17034 */
17035 fntype = TREE_TYPE (MEM_EXPR (address));
17036 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17037 NULL_TREE);
17038 args_so_far = pack_cumulative_args (&args_so_far_v);
17039 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17040 {
17041 rtx arg_rtx;
17042 machine_mode arg_mode = TYPE_MODE (arg_type);
17043
17044 if (VOID_TYPE_P (arg_type))
17045 continue;
17046
17047 if (!first_param)
17048 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17049 true);
17050
17051 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17052 true);
17053 gcc_assert (REG_P (arg_rtx));
17054 to_clear_mask
17055 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17056 REGNO (arg_rtx),
17057 padding_bits_to_clear_ptr);
17058
17059 first_param = false;
17060 }
17061
17062 /* Clear padding bits where needed. */
17063 cleared_reg = XEXP (address, 0);
17064 reg = gen_rtx_REG (SImode, IP_REGNUM);
17065 using_r4 = false;
17066 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17067 {
17068 if (padding_bits_to_clear[regno] == 0)
17069 continue;
17070
17071 /* If this is a Thumb-1 target copy the address of the function
17072 we are calling from 'r4' into 'ip' such that we can use r4 to
17073 clear the unused bits in the arguments. */
17074 if (TARGET_THUMB1 && !using_r4)
17075 {
17076 using_r4 = true;
17077 reg = cleared_reg;
17078 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17079 reg);
17080 }
17081
17082 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17083 emit_move_insn (reg, tmp);
17084 /* Also fill the top half of the negated
17085 padding_bits_to_clear. */
17086 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17087 {
17088 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17089 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17090 GEN_INT (16),
17091 GEN_INT (16)),
17092 tmp));
17093 }
17094
17095 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17096 gen_rtx_REG (SImode, regno),
17097 reg));
17098
17099 }
17100 if (using_r4)
17101 emit_move_insn (cleared_reg,
17102 gen_rtx_REG (SImode, IP_REGNUM));
17103
17104 /* We use right shift and left shift to clear the LSB of the address
17105 we jump to instead of using bic, to avoid having to use an extra
17106 register on Thumb-1. */
17107 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17108 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17109 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17110 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17111
17112 /* Clearing all registers that leak before doing a non-secure
17113 call. */
17114 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17115 {
17116 if (!(to_clear_mask & (1LL << regno)))
17117 continue;
17118
17119 /* If regno is an even vfp register and its successor is also to
17120 be cleared, use vmov. */
17121 if (IS_VFP_REGNUM (regno))
17122 {
17123 if (TARGET_VFP_DOUBLE
17124 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17125 && to_clear_mask & (1LL << (regno + 1)))
17126 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17127 CONST0_RTX (DFmode));
17128 else
17129 emit_move_insn (gen_rtx_REG (SFmode, regno),
17130 CONST0_RTX (SFmode));
17131 }
17132 else
17133 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17134 }
17135
17136 seq = get_insns ();
17137 end_sequence ();
17138 emit_insn_before (seq, insn);
17139
17140 }
17141 }
17142 }
17143
17144 /* Rewrite move insn into subtract of 0 if the condition codes will
17145 be useful in next conditional jump insn. */
17146
17147 static void
17148 thumb1_reorg (void)
17149 {
17150 basic_block bb;
17151
17152 FOR_EACH_BB_FN (bb, cfun)
17153 {
17154 rtx dest, src;
17155 rtx cmp, op0, op1, set = NULL;
17156 rtx_insn *prev, *insn = BB_END (bb);
17157 bool insn_clobbered = false;
17158
17159 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17160 insn = PREV_INSN (insn);
17161
17162 /* Find the last cbranchsi4_insn in basic block BB. */
17163 if (insn == BB_HEAD (bb)
17164 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17165 continue;
17166
17167 /* Get the register with which we are comparing. */
17168 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17169 op0 = XEXP (cmp, 0);
17170 op1 = XEXP (cmp, 1);
17171
17172 /* Check that comparison is against ZERO. */
17173 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17174 continue;
17175
17176 /* Find the first flag setting insn before INSN in basic block BB. */
17177 gcc_assert (insn != BB_HEAD (bb));
17178 for (prev = PREV_INSN (insn);
17179 (!insn_clobbered
17180 && prev != BB_HEAD (bb)
17181 && (NOTE_P (prev)
17182 || DEBUG_INSN_P (prev)
17183 || ((set = single_set (prev)) != NULL
17184 && get_attr_conds (prev) == CONDS_NOCOND)));
17185 prev = PREV_INSN (prev))
17186 {
17187 if (reg_set_p (op0, prev))
17188 insn_clobbered = true;
17189 }
17190
17191 /* Skip if op0 is clobbered by insn other than prev. */
17192 if (insn_clobbered)
17193 continue;
17194
17195 if (!set)
17196 continue;
17197
17198 dest = SET_DEST (set);
17199 src = SET_SRC (set);
17200 if (!low_register_operand (dest, SImode)
17201 || !low_register_operand (src, SImode))
17202 continue;
17203
17204 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17205 in INSN. Both src and dest of the move insn are checked. */
17206 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17207 {
17208 dest = copy_rtx (dest);
17209 src = copy_rtx (src);
17210 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17211 PATTERN (prev) = gen_rtx_SET (dest, src);
17212 INSN_CODE (prev) = -1;
17213 /* Set test register in INSN to dest. */
17214 XEXP (cmp, 0) = copy_rtx (dest);
17215 INSN_CODE (insn) = -1;
17216 }
17217 }
17218 }
17219
17220 /* Convert instructions to their cc-clobbering variant if possible, since
17221 that allows us to use smaller encodings. */
17222
17223 static void
17224 thumb2_reorg (void)
17225 {
17226 basic_block bb;
17227 regset_head live;
17228
17229 INIT_REG_SET (&live);
17230
17231 /* We are freeing block_for_insn in the toplev to keep compatibility
17232 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17233 compute_bb_for_insn ();
17234 df_analyze ();
17235
17236 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17237
17238 FOR_EACH_BB_FN (bb, cfun)
17239 {
17240 if ((current_tune->disparage_flag_setting_t16_encodings
17241 == tune_params::DISPARAGE_FLAGS_ALL)
17242 && optimize_bb_for_speed_p (bb))
17243 continue;
17244
17245 rtx_insn *insn;
17246 Convert_Action action = SKIP;
17247 Convert_Action action_for_partial_flag_setting
17248 = ((current_tune->disparage_flag_setting_t16_encodings
17249 != tune_params::DISPARAGE_FLAGS_NEITHER)
17250 && optimize_bb_for_speed_p (bb))
17251 ? SKIP : CONV;
17252
17253 COPY_REG_SET (&live, DF_LR_OUT (bb));
17254 df_simulate_initialize_backwards (bb, &live);
17255 FOR_BB_INSNS_REVERSE (bb, insn)
17256 {
17257 if (NONJUMP_INSN_P (insn)
17258 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17259 && GET_CODE (PATTERN (insn)) == SET)
17260 {
17261 action = SKIP;
17262 rtx pat = PATTERN (insn);
17263 rtx dst = XEXP (pat, 0);
17264 rtx src = XEXP (pat, 1);
17265 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17266
17267 if (UNARY_P (src) || BINARY_P (src))
17268 op0 = XEXP (src, 0);
17269
17270 if (BINARY_P (src))
17271 op1 = XEXP (src, 1);
17272
17273 if (low_register_operand (dst, SImode))
17274 {
17275 switch (GET_CODE (src))
17276 {
17277 case PLUS:
17278 /* Adding two registers and storing the result
17279 in the first source is already a 16-bit
17280 operation. */
17281 if (rtx_equal_p (dst, op0)
17282 && register_operand (op1, SImode))
17283 break;
17284
17285 if (low_register_operand (op0, SImode))
17286 {
17287 /* ADDS <Rd>,<Rn>,<Rm> */
17288 if (low_register_operand (op1, SImode))
17289 action = CONV;
17290 /* ADDS <Rdn>,#<imm8> */
17291 /* SUBS <Rdn>,#<imm8> */
17292 else if (rtx_equal_p (dst, op0)
17293 && CONST_INT_P (op1)
17294 && IN_RANGE (INTVAL (op1), -255, 255))
17295 action = CONV;
17296 /* ADDS <Rd>,<Rn>,#<imm3> */
17297 /* SUBS <Rd>,<Rn>,#<imm3> */
17298 else if (CONST_INT_P (op1)
17299 && IN_RANGE (INTVAL (op1), -7, 7))
17300 action = CONV;
17301 }
17302 /* ADCS <Rd>, <Rn> */
17303 else if (GET_CODE (XEXP (src, 0)) == PLUS
17304 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17305 && low_register_operand (XEXP (XEXP (src, 0), 1),
17306 SImode)
17307 && COMPARISON_P (op1)
17308 && cc_register (XEXP (op1, 0), VOIDmode)
17309 && maybe_get_arm_condition_code (op1) == ARM_CS
17310 && XEXP (op1, 1) == const0_rtx)
17311 action = CONV;
17312 break;
17313
17314 case MINUS:
17315 /* RSBS <Rd>,<Rn>,#0
17316 Not handled here: see NEG below. */
17317 /* SUBS <Rd>,<Rn>,#<imm3>
17318 SUBS <Rdn>,#<imm8>
17319 Not handled here: see PLUS above. */
17320 /* SUBS <Rd>,<Rn>,<Rm> */
17321 if (low_register_operand (op0, SImode)
17322 && low_register_operand (op1, SImode))
17323 action = CONV;
17324 break;
17325
17326 case MULT:
17327 /* MULS <Rdm>,<Rn>,<Rdm>
17328 As an exception to the rule, this is only used
17329 when optimizing for size since MULS is slow on all
17330 known implementations. We do not even want to use
17331 MULS in cold code, if optimizing for speed, so we
17332 test the global flag here. */
17333 if (!optimize_size)
17334 break;
17335 /* Fall through. */
17336 case AND:
17337 case IOR:
17338 case XOR:
17339 /* ANDS <Rdn>,<Rm> */
17340 if (rtx_equal_p (dst, op0)
17341 && low_register_operand (op1, SImode))
17342 action = action_for_partial_flag_setting;
17343 else if (rtx_equal_p (dst, op1)
17344 && low_register_operand (op0, SImode))
17345 action = action_for_partial_flag_setting == SKIP
17346 ? SKIP : SWAP_CONV;
17347 break;
17348
17349 case ASHIFTRT:
17350 case ASHIFT:
17351 case LSHIFTRT:
17352 /* ASRS <Rdn>,<Rm> */
17353 /* LSRS <Rdn>,<Rm> */
17354 /* LSLS <Rdn>,<Rm> */
17355 if (rtx_equal_p (dst, op0)
17356 && low_register_operand (op1, SImode))
17357 action = action_for_partial_flag_setting;
17358 /* ASRS <Rd>,<Rm>,#<imm5> */
17359 /* LSRS <Rd>,<Rm>,#<imm5> */
17360 /* LSLS <Rd>,<Rm>,#<imm5> */
17361 else if (low_register_operand (op0, SImode)
17362 && CONST_INT_P (op1)
17363 && IN_RANGE (INTVAL (op1), 0, 31))
17364 action = action_for_partial_flag_setting;
17365 break;
17366
17367 case ROTATERT:
17368 /* RORS <Rdn>,<Rm> */
17369 if (rtx_equal_p (dst, op0)
17370 && low_register_operand (op1, SImode))
17371 action = action_for_partial_flag_setting;
17372 break;
17373
17374 case NOT:
17375 /* MVNS <Rd>,<Rm> */
17376 if (low_register_operand (op0, SImode))
17377 action = action_for_partial_flag_setting;
17378 break;
17379
17380 case NEG:
17381 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17382 if (low_register_operand (op0, SImode))
17383 action = CONV;
17384 break;
17385
17386 case CONST_INT:
17387 /* MOVS <Rd>,#<imm8> */
17388 if (CONST_INT_P (src)
17389 && IN_RANGE (INTVAL (src), 0, 255))
17390 action = action_for_partial_flag_setting;
17391 break;
17392
17393 case REG:
17394 /* MOVS and MOV<c> with registers have different
17395 encodings, so are not relevant here. */
17396 break;
17397
17398 default:
17399 break;
17400 }
17401 }
17402
17403 if (action != SKIP)
17404 {
17405 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17406 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17407 rtvec vec;
17408
17409 if (action == SWAP_CONV)
17410 {
17411 src = copy_rtx (src);
17412 XEXP (src, 0) = op1;
17413 XEXP (src, 1) = op0;
17414 pat = gen_rtx_SET (dst, src);
17415 vec = gen_rtvec (2, pat, clobber);
17416 }
17417 else /* action == CONV */
17418 vec = gen_rtvec (2, pat, clobber);
17419
17420 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17421 INSN_CODE (insn) = -1;
17422 }
17423 }
17424
17425 if (NONDEBUG_INSN_P (insn))
17426 df_simulate_one_insn_backwards (bb, insn, &live);
17427 }
17428 }
17429
17430 CLEAR_REG_SET (&live);
17431 }
17432
17433 /* Gcc puts the pool in the wrong place for ARM, since we can only
17434 load addresses a limited distance around the pc. We do some
17435 special munging to move the constant pool values to the correct
17436 point in the code. */
17437 static void
17438 arm_reorg (void)
17439 {
17440 rtx_insn *insn;
17441 HOST_WIDE_INT address = 0;
17442 Mfix * fix;
17443
17444 if (use_cmse)
17445 cmse_nonsecure_call_clear_caller_saved ();
17446 if (TARGET_THUMB1)
17447 thumb1_reorg ();
17448 else if (TARGET_THUMB2)
17449 thumb2_reorg ();
17450
17451 /* Ensure all insns that must be split have been split at this point.
17452 Otherwise, the pool placement code below may compute incorrect
17453 insn lengths. Note that when optimizing, all insns have already
17454 been split at this point. */
17455 if (!optimize)
17456 split_all_insns_noflow ();
17457
17458 /* Make sure we do not attempt to create a literal pool even though it should
17459 no longer be necessary to create any. */
17460 if (arm_disable_literal_pool)
17461 return ;
17462
17463 minipool_fix_head = minipool_fix_tail = NULL;
17464
17465 /* The first insn must always be a note, or the code below won't
17466 scan it properly. */
17467 insn = get_insns ();
17468 gcc_assert (NOTE_P (insn));
17469 minipool_pad = 0;
17470
17471 /* Scan all the insns and record the operands that will need fixing. */
17472 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17473 {
17474 if (BARRIER_P (insn))
17475 push_minipool_barrier (insn, address);
17476 else if (INSN_P (insn))
17477 {
17478 rtx_jump_table_data *table;
17479
17480 note_invalid_constants (insn, address, true);
17481 address += get_attr_length (insn);
17482
17483 /* If the insn is a vector jump, add the size of the table
17484 and skip the table. */
17485 if (tablejump_p (insn, NULL, &table))
17486 {
17487 address += get_jump_table_size (table);
17488 insn = table;
17489 }
17490 }
17491 else if (LABEL_P (insn))
17492 /* Add the worst-case padding due to alignment. We don't add
17493 the _current_ padding because the minipool insertions
17494 themselves might change it. */
17495 address += get_label_padding (insn);
17496 }
17497
17498 fix = minipool_fix_head;
17499
17500 /* Now scan the fixups and perform the required changes. */
17501 while (fix)
17502 {
17503 Mfix * ftmp;
17504 Mfix * fdel;
17505 Mfix * last_added_fix;
17506 Mfix * last_barrier = NULL;
17507 Mfix * this_fix;
17508
17509 /* Skip any further barriers before the next fix. */
17510 while (fix && BARRIER_P (fix->insn))
17511 fix = fix->next;
17512
17513 /* No more fixes. */
17514 if (fix == NULL)
17515 break;
17516
17517 last_added_fix = NULL;
17518
17519 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17520 {
17521 if (BARRIER_P (ftmp->insn))
17522 {
17523 if (ftmp->address >= minipool_vector_head->max_address)
17524 break;
17525
17526 last_barrier = ftmp;
17527 }
17528 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17529 break;
17530
17531 last_added_fix = ftmp; /* Keep track of the last fix added. */
17532 }
17533
17534 /* If we found a barrier, drop back to that; any fixes that we
17535 could have reached but come after the barrier will now go in
17536 the next mini-pool. */
17537 if (last_barrier != NULL)
17538 {
17539 /* Reduce the refcount for those fixes that won't go into this
17540 pool after all. */
17541 for (fdel = last_barrier->next;
17542 fdel && fdel != ftmp;
17543 fdel = fdel->next)
17544 {
17545 fdel->minipool->refcount--;
17546 fdel->minipool = NULL;
17547 }
17548
17549 ftmp = last_barrier;
17550 }
17551 else
17552 {
17553 /* ftmp is first fix that we can't fit into this pool and
17554 there no natural barriers that we could use. Insert a
17555 new barrier in the code somewhere between the previous
17556 fix and this one, and arrange to jump around it. */
17557 HOST_WIDE_INT max_address;
17558
17559 /* The last item on the list of fixes must be a barrier, so
17560 we can never run off the end of the list of fixes without
17561 last_barrier being set. */
17562 gcc_assert (ftmp);
17563
17564 max_address = minipool_vector_head->max_address;
17565 /* Check that there isn't another fix that is in range that
17566 we couldn't fit into this pool because the pool was
17567 already too large: we need to put the pool before such an
17568 instruction. The pool itself may come just after the
17569 fix because create_fix_barrier also allows space for a
17570 jump instruction. */
17571 if (ftmp->address < max_address)
17572 max_address = ftmp->address + 1;
17573
17574 last_barrier = create_fix_barrier (last_added_fix, max_address);
17575 }
17576
17577 assign_minipool_offsets (last_barrier);
17578
17579 while (ftmp)
17580 {
17581 if (!BARRIER_P (ftmp->insn)
17582 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17583 == NULL))
17584 break;
17585
17586 ftmp = ftmp->next;
17587 }
17588
17589 /* Scan over the fixes we have identified for this pool, fixing them
17590 up and adding the constants to the pool itself. */
17591 for (this_fix = fix; this_fix && ftmp != this_fix;
17592 this_fix = this_fix->next)
17593 if (!BARRIER_P (this_fix->insn))
17594 {
17595 rtx addr
17596 = plus_constant (Pmode,
17597 gen_rtx_LABEL_REF (VOIDmode,
17598 minipool_vector_label),
17599 this_fix->minipool->offset);
17600 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17601 }
17602
17603 dump_minipool (last_barrier->insn);
17604 fix = ftmp;
17605 }
17606
17607 /* From now on we must synthesize any constants that we can't handle
17608 directly. This can happen if the RTL gets split during final
17609 instruction generation. */
17610 cfun->machine->after_arm_reorg = 1;
17611
17612 /* Free the minipool memory. */
17613 obstack_free (&minipool_obstack, minipool_startobj);
17614 }
17615 \f
17616 /* Routines to output assembly language. */
17617
17618 /* Return string representation of passed in real value. */
17619 static const char *
17620 fp_const_from_val (REAL_VALUE_TYPE *r)
17621 {
17622 if (!fp_consts_inited)
17623 init_fp_table ();
17624
17625 gcc_assert (real_equal (r, &value_fp0));
17626 return "0";
17627 }
17628
17629 /* OPERANDS[0] is the entire list of insns that constitute pop,
17630 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17631 is in the list, UPDATE is true iff the list contains explicit
17632 update of base register. */
17633 void
17634 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17635 bool update)
17636 {
17637 int i;
17638 char pattern[100];
17639 int offset;
17640 const char *conditional;
17641 int num_saves = XVECLEN (operands[0], 0);
17642 unsigned int regno;
17643 unsigned int regno_base = REGNO (operands[1]);
17644 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17645
17646 offset = 0;
17647 offset += update ? 1 : 0;
17648 offset += return_pc ? 1 : 0;
17649
17650 /* Is the base register in the list? */
17651 for (i = offset; i < num_saves; i++)
17652 {
17653 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17654 /* If SP is in the list, then the base register must be SP. */
17655 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17656 /* If base register is in the list, there must be no explicit update. */
17657 if (regno == regno_base)
17658 gcc_assert (!update);
17659 }
17660
17661 conditional = reverse ? "%?%D0" : "%?%d0";
17662 /* Can't use POP if returning from an interrupt. */
17663 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17664 sprintf (pattern, "pop%s\t{", conditional);
17665 else
17666 {
17667 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17668 It's just a convention, their semantics are identical. */
17669 if (regno_base == SP_REGNUM)
17670 sprintf (pattern, "ldmfd%s\t", conditional);
17671 else if (update)
17672 sprintf (pattern, "ldmia%s\t", conditional);
17673 else
17674 sprintf (pattern, "ldm%s\t", conditional);
17675
17676 strcat (pattern, reg_names[regno_base]);
17677 if (update)
17678 strcat (pattern, "!, {");
17679 else
17680 strcat (pattern, ", {");
17681 }
17682
17683 /* Output the first destination register. */
17684 strcat (pattern,
17685 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17686
17687 /* Output the rest of the destination registers. */
17688 for (i = offset + 1; i < num_saves; i++)
17689 {
17690 strcat (pattern, ", ");
17691 strcat (pattern,
17692 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17693 }
17694
17695 strcat (pattern, "}");
17696
17697 if (interrupt_p && return_pc)
17698 strcat (pattern, "^");
17699
17700 output_asm_insn (pattern, &cond);
17701 }
17702
17703
17704 /* Output the assembly for a store multiple. */
17705
17706 const char *
17707 vfp_output_vstmd (rtx * operands)
17708 {
17709 char pattern[100];
17710 int p;
17711 int base;
17712 int i;
17713 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17714 ? XEXP (operands[0], 0)
17715 : XEXP (XEXP (operands[0], 0), 0);
17716 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17717
17718 if (push_p)
17719 strcpy (pattern, "vpush%?.64\t{%P1");
17720 else
17721 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17722
17723 p = strlen (pattern);
17724
17725 gcc_assert (REG_P (operands[1]));
17726
17727 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17728 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17729 {
17730 p += sprintf (&pattern[p], ", d%d", base + i);
17731 }
17732 strcpy (&pattern[p], "}");
17733
17734 output_asm_insn (pattern, operands);
17735 return "";
17736 }
17737
17738
17739 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17740 number of bytes pushed. */
17741
17742 static int
17743 vfp_emit_fstmd (int base_reg, int count)
17744 {
17745 rtx par;
17746 rtx dwarf;
17747 rtx tmp, reg;
17748 int i;
17749
17750 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17751 register pairs are stored by a store multiple insn. We avoid this
17752 by pushing an extra pair. */
17753 if (count == 2 && !arm_arch6)
17754 {
17755 if (base_reg == LAST_VFP_REGNUM - 3)
17756 base_reg -= 2;
17757 count++;
17758 }
17759
17760 /* FSTMD may not store more than 16 doubleword registers at once. Split
17761 larger stores into multiple parts (up to a maximum of two, in
17762 practice). */
17763 if (count > 16)
17764 {
17765 int saved;
17766 /* NOTE: base_reg is an internal register number, so each D register
17767 counts as 2. */
17768 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17769 saved += vfp_emit_fstmd (base_reg, 16);
17770 return saved;
17771 }
17772
17773 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17774 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17775
17776 reg = gen_rtx_REG (DFmode, base_reg);
17777 base_reg += 2;
17778
17779 XVECEXP (par, 0, 0)
17780 = gen_rtx_SET (gen_frame_mem
17781 (BLKmode,
17782 gen_rtx_PRE_MODIFY (Pmode,
17783 stack_pointer_rtx,
17784 plus_constant
17785 (Pmode, stack_pointer_rtx,
17786 - (count * 8)))
17787 ),
17788 gen_rtx_UNSPEC (BLKmode,
17789 gen_rtvec (1, reg),
17790 UNSPEC_PUSH_MULT));
17791
17792 tmp = gen_rtx_SET (stack_pointer_rtx,
17793 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17794 RTX_FRAME_RELATED_P (tmp) = 1;
17795 XVECEXP (dwarf, 0, 0) = tmp;
17796
17797 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17798 RTX_FRAME_RELATED_P (tmp) = 1;
17799 XVECEXP (dwarf, 0, 1) = tmp;
17800
17801 for (i = 1; i < count; i++)
17802 {
17803 reg = gen_rtx_REG (DFmode, base_reg);
17804 base_reg += 2;
17805 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17806
17807 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17808 plus_constant (Pmode,
17809 stack_pointer_rtx,
17810 i * 8)),
17811 reg);
17812 RTX_FRAME_RELATED_P (tmp) = 1;
17813 XVECEXP (dwarf, 0, i + 1) = tmp;
17814 }
17815
17816 par = emit_insn (par);
17817 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17818 RTX_FRAME_RELATED_P (par) = 1;
17819
17820 return count * 8;
17821 }
17822
17823 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17824 has the cmse_nonsecure_call attribute and returns false otherwise. */
17825
17826 bool
17827 detect_cmse_nonsecure_call (tree addr)
17828 {
17829 if (!addr)
17830 return FALSE;
17831
17832 tree fntype = TREE_TYPE (addr);
17833 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17834 TYPE_ATTRIBUTES (fntype)))
17835 return TRUE;
17836 return FALSE;
17837 }
17838
17839
17840 /* Emit a call instruction with pattern PAT. ADDR is the address of
17841 the call target. */
17842
17843 void
17844 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17845 {
17846 rtx insn;
17847
17848 insn = emit_call_insn (pat);
17849
17850 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17851 If the call might use such an entry, add a use of the PIC register
17852 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17853 if (TARGET_VXWORKS_RTP
17854 && flag_pic
17855 && !sibcall
17856 && GET_CODE (addr) == SYMBOL_REF
17857 && (SYMBOL_REF_DECL (addr)
17858 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17859 : !SYMBOL_REF_LOCAL_P (addr)))
17860 {
17861 require_pic_register ();
17862 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17863 }
17864
17865 if (TARGET_AAPCS_BASED)
17866 {
17867 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17868 linker. We need to add an IP clobber to allow setting
17869 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17870 is not needed since it's a fixed register. */
17871 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17872 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17873 }
17874 }
17875
17876 /* Output a 'call' insn. */
17877 const char *
17878 output_call (rtx *operands)
17879 {
17880 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17881
17882 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17883 if (REGNO (operands[0]) == LR_REGNUM)
17884 {
17885 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17886 output_asm_insn ("mov%?\t%0, %|lr", operands);
17887 }
17888
17889 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17890
17891 if (TARGET_INTERWORK || arm_arch4t)
17892 output_asm_insn ("bx%?\t%0", operands);
17893 else
17894 output_asm_insn ("mov%?\t%|pc, %0", operands);
17895
17896 return "";
17897 }
17898
17899 /* Output a move from arm registers to arm registers of a long double
17900 OPERANDS[0] is the destination.
17901 OPERANDS[1] is the source. */
17902 const char *
17903 output_mov_long_double_arm_from_arm (rtx *operands)
17904 {
17905 /* We have to be careful here because the two might overlap. */
17906 int dest_start = REGNO (operands[0]);
17907 int src_start = REGNO (operands[1]);
17908 rtx ops[2];
17909 int i;
17910
17911 if (dest_start < src_start)
17912 {
17913 for (i = 0; i < 3; i++)
17914 {
17915 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17916 ops[1] = gen_rtx_REG (SImode, src_start + i);
17917 output_asm_insn ("mov%?\t%0, %1", ops);
17918 }
17919 }
17920 else
17921 {
17922 for (i = 2; i >= 0; i--)
17923 {
17924 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17925 ops[1] = gen_rtx_REG (SImode, src_start + i);
17926 output_asm_insn ("mov%?\t%0, %1", ops);
17927 }
17928 }
17929
17930 return "";
17931 }
17932
17933 void
17934 arm_emit_movpair (rtx dest, rtx src)
17935 {
17936 /* If the src is an immediate, simplify it. */
17937 if (CONST_INT_P (src))
17938 {
17939 HOST_WIDE_INT val = INTVAL (src);
17940 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17941 if ((val >> 16) & 0x0000ffff)
17942 {
17943 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17944 GEN_INT (16)),
17945 GEN_INT ((val >> 16) & 0x0000ffff));
17946 rtx_insn *insn = get_last_insn ();
17947 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17948 }
17949 return;
17950 }
17951 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17952 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17953 rtx_insn *insn = get_last_insn ();
17954 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17955 }
17956
17957 /* Output a move between double words. It must be REG<-MEM
17958 or MEM<-REG. */
17959 const char *
17960 output_move_double (rtx *operands, bool emit, int *count)
17961 {
17962 enum rtx_code code0 = GET_CODE (operands[0]);
17963 enum rtx_code code1 = GET_CODE (operands[1]);
17964 rtx otherops[3];
17965 if (count)
17966 *count = 1;
17967
17968 /* The only case when this might happen is when
17969 you are looking at the length of a DImode instruction
17970 that has an invalid constant in it. */
17971 if (code0 == REG && code1 != MEM)
17972 {
17973 gcc_assert (!emit);
17974 *count = 2;
17975 return "";
17976 }
17977
17978 if (code0 == REG)
17979 {
17980 unsigned int reg0 = REGNO (operands[0]);
17981
17982 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17983
17984 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17985
17986 switch (GET_CODE (XEXP (operands[1], 0)))
17987 {
17988 case REG:
17989
17990 if (emit)
17991 {
17992 if (TARGET_LDRD
17993 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17994 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17995 else
17996 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17997 }
17998 break;
17999
18000 case PRE_INC:
18001 gcc_assert (TARGET_LDRD);
18002 if (emit)
18003 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18004 break;
18005
18006 case PRE_DEC:
18007 if (emit)
18008 {
18009 if (TARGET_LDRD)
18010 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18011 else
18012 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18013 }
18014 break;
18015
18016 case POST_INC:
18017 if (emit)
18018 {
18019 if (TARGET_LDRD)
18020 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18021 else
18022 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18023 }
18024 break;
18025
18026 case POST_DEC:
18027 gcc_assert (TARGET_LDRD);
18028 if (emit)
18029 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18030 break;
18031
18032 case PRE_MODIFY:
18033 case POST_MODIFY:
18034 /* Autoicrement addressing modes should never have overlapping
18035 base and destination registers, and overlapping index registers
18036 are already prohibited, so this doesn't need to worry about
18037 fix_cm3_ldrd. */
18038 otherops[0] = operands[0];
18039 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18040 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18041
18042 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18043 {
18044 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18045 {
18046 /* Registers overlap so split out the increment. */
18047 if (emit)
18048 {
18049 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18050 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18051 }
18052 if (count)
18053 *count = 2;
18054 }
18055 else
18056 {
18057 /* Use a single insn if we can.
18058 FIXME: IWMMXT allows offsets larger than ldrd can
18059 handle, fix these up with a pair of ldr. */
18060 if (TARGET_THUMB2
18061 || !CONST_INT_P (otherops[2])
18062 || (INTVAL (otherops[2]) > -256
18063 && INTVAL (otherops[2]) < 256))
18064 {
18065 if (emit)
18066 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18067 }
18068 else
18069 {
18070 if (emit)
18071 {
18072 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18073 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18074 }
18075 if (count)
18076 *count = 2;
18077
18078 }
18079 }
18080 }
18081 else
18082 {
18083 /* Use a single insn if we can.
18084 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18085 fix these up with a pair of ldr. */
18086 if (TARGET_THUMB2
18087 || !CONST_INT_P (otherops[2])
18088 || (INTVAL (otherops[2]) > -256
18089 && INTVAL (otherops[2]) < 256))
18090 {
18091 if (emit)
18092 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18093 }
18094 else
18095 {
18096 if (emit)
18097 {
18098 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18099 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18100 }
18101 if (count)
18102 *count = 2;
18103 }
18104 }
18105 break;
18106
18107 case LABEL_REF:
18108 case CONST:
18109 /* We might be able to use ldrd %0, %1 here. However the range is
18110 different to ldr/adr, and it is broken on some ARMv7-M
18111 implementations. */
18112 /* Use the second register of the pair to avoid problematic
18113 overlap. */
18114 otherops[1] = operands[1];
18115 if (emit)
18116 output_asm_insn ("adr%?\t%0, %1", otherops);
18117 operands[1] = otherops[0];
18118 if (emit)
18119 {
18120 if (TARGET_LDRD)
18121 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18122 else
18123 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18124 }
18125
18126 if (count)
18127 *count = 2;
18128 break;
18129
18130 /* ??? This needs checking for thumb2. */
18131 default:
18132 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18133 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18134 {
18135 otherops[0] = operands[0];
18136 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18137 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18138
18139 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18140 {
18141 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18142 {
18143 switch ((int) INTVAL (otherops[2]))
18144 {
18145 case -8:
18146 if (emit)
18147 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18148 return "";
18149 case -4:
18150 if (TARGET_THUMB2)
18151 break;
18152 if (emit)
18153 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18154 return "";
18155 case 4:
18156 if (TARGET_THUMB2)
18157 break;
18158 if (emit)
18159 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18160 return "";
18161 }
18162 }
18163 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18164 operands[1] = otherops[0];
18165 if (TARGET_LDRD
18166 && (REG_P (otherops[2])
18167 || TARGET_THUMB2
18168 || (CONST_INT_P (otherops[2])
18169 && INTVAL (otherops[2]) > -256
18170 && INTVAL (otherops[2]) < 256)))
18171 {
18172 if (reg_overlap_mentioned_p (operands[0],
18173 otherops[2]))
18174 {
18175 /* Swap base and index registers over to
18176 avoid a conflict. */
18177 std::swap (otherops[1], otherops[2]);
18178 }
18179 /* If both registers conflict, it will usually
18180 have been fixed by a splitter. */
18181 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18182 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18183 {
18184 if (emit)
18185 {
18186 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18187 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18188 }
18189 if (count)
18190 *count = 2;
18191 }
18192 else
18193 {
18194 otherops[0] = operands[0];
18195 if (emit)
18196 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18197 }
18198 return "";
18199 }
18200
18201 if (CONST_INT_P (otherops[2]))
18202 {
18203 if (emit)
18204 {
18205 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18206 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18207 else
18208 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18209 }
18210 }
18211 else
18212 {
18213 if (emit)
18214 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18215 }
18216 }
18217 else
18218 {
18219 if (emit)
18220 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18221 }
18222
18223 if (count)
18224 *count = 2;
18225
18226 if (TARGET_LDRD)
18227 return "ldrd%?\t%0, [%1]";
18228
18229 return "ldmia%?\t%1, %M0";
18230 }
18231 else
18232 {
18233 otherops[1] = adjust_address (operands[1], SImode, 4);
18234 /* Take care of overlapping base/data reg. */
18235 if (reg_mentioned_p (operands[0], operands[1]))
18236 {
18237 if (emit)
18238 {
18239 output_asm_insn ("ldr%?\t%0, %1", otherops);
18240 output_asm_insn ("ldr%?\t%0, %1", operands);
18241 }
18242 if (count)
18243 *count = 2;
18244
18245 }
18246 else
18247 {
18248 if (emit)
18249 {
18250 output_asm_insn ("ldr%?\t%0, %1", operands);
18251 output_asm_insn ("ldr%?\t%0, %1", otherops);
18252 }
18253 if (count)
18254 *count = 2;
18255 }
18256 }
18257 }
18258 }
18259 else
18260 {
18261 /* Constraints should ensure this. */
18262 gcc_assert (code0 == MEM && code1 == REG);
18263 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18264 || (TARGET_ARM && TARGET_LDRD));
18265
18266 switch (GET_CODE (XEXP (operands[0], 0)))
18267 {
18268 case REG:
18269 if (emit)
18270 {
18271 if (TARGET_LDRD)
18272 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18273 else
18274 output_asm_insn ("stm%?\t%m0, %M1", operands);
18275 }
18276 break;
18277
18278 case PRE_INC:
18279 gcc_assert (TARGET_LDRD);
18280 if (emit)
18281 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18282 break;
18283
18284 case PRE_DEC:
18285 if (emit)
18286 {
18287 if (TARGET_LDRD)
18288 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18289 else
18290 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18291 }
18292 break;
18293
18294 case POST_INC:
18295 if (emit)
18296 {
18297 if (TARGET_LDRD)
18298 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18299 else
18300 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18301 }
18302 break;
18303
18304 case POST_DEC:
18305 gcc_assert (TARGET_LDRD);
18306 if (emit)
18307 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18308 break;
18309
18310 case PRE_MODIFY:
18311 case POST_MODIFY:
18312 otherops[0] = operands[1];
18313 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18314 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18315
18316 /* IWMMXT allows offsets larger than ldrd can handle,
18317 fix these up with a pair of ldr. */
18318 if (!TARGET_THUMB2
18319 && CONST_INT_P (otherops[2])
18320 && (INTVAL(otherops[2]) <= -256
18321 || INTVAL(otherops[2]) >= 256))
18322 {
18323 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18324 {
18325 if (emit)
18326 {
18327 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18328 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18329 }
18330 if (count)
18331 *count = 2;
18332 }
18333 else
18334 {
18335 if (emit)
18336 {
18337 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18338 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18339 }
18340 if (count)
18341 *count = 2;
18342 }
18343 }
18344 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18345 {
18346 if (emit)
18347 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18348 }
18349 else
18350 {
18351 if (emit)
18352 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18353 }
18354 break;
18355
18356 case PLUS:
18357 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18358 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18359 {
18360 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18361 {
18362 case -8:
18363 if (emit)
18364 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18365 return "";
18366
18367 case -4:
18368 if (TARGET_THUMB2)
18369 break;
18370 if (emit)
18371 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18372 return "";
18373
18374 case 4:
18375 if (TARGET_THUMB2)
18376 break;
18377 if (emit)
18378 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18379 return "";
18380 }
18381 }
18382 if (TARGET_LDRD
18383 && (REG_P (otherops[2])
18384 || TARGET_THUMB2
18385 || (CONST_INT_P (otherops[2])
18386 && INTVAL (otherops[2]) > -256
18387 && INTVAL (otherops[2]) < 256)))
18388 {
18389 otherops[0] = operands[1];
18390 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18391 if (emit)
18392 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18393 return "";
18394 }
18395 /* Fall through */
18396
18397 default:
18398 otherops[0] = adjust_address (operands[0], SImode, 4);
18399 otherops[1] = operands[1];
18400 if (emit)
18401 {
18402 output_asm_insn ("str%?\t%1, %0", operands);
18403 output_asm_insn ("str%?\t%H1, %0", otherops);
18404 }
18405 if (count)
18406 *count = 2;
18407 }
18408 }
18409
18410 return "";
18411 }
18412
18413 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18414 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18415
18416 const char *
18417 output_move_quad (rtx *operands)
18418 {
18419 if (REG_P (operands[0]))
18420 {
18421 /* Load, or reg->reg move. */
18422
18423 if (MEM_P (operands[1]))
18424 {
18425 switch (GET_CODE (XEXP (operands[1], 0)))
18426 {
18427 case REG:
18428 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18429 break;
18430
18431 case LABEL_REF:
18432 case CONST:
18433 output_asm_insn ("adr%?\t%0, %1", operands);
18434 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18435 break;
18436
18437 default:
18438 gcc_unreachable ();
18439 }
18440 }
18441 else
18442 {
18443 rtx ops[2];
18444 int dest, src, i;
18445
18446 gcc_assert (REG_P (operands[1]));
18447
18448 dest = REGNO (operands[0]);
18449 src = REGNO (operands[1]);
18450
18451 /* This seems pretty dumb, but hopefully GCC won't try to do it
18452 very often. */
18453 if (dest < src)
18454 for (i = 0; i < 4; i++)
18455 {
18456 ops[0] = gen_rtx_REG (SImode, dest + i);
18457 ops[1] = gen_rtx_REG (SImode, src + i);
18458 output_asm_insn ("mov%?\t%0, %1", ops);
18459 }
18460 else
18461 for (i = 3; i >= 0; i--)
18462 {
18463 ops[0] = gen_rtx_REG (SImode, dest + i);
18464 ops[1] = gen_rtx_REG (SImode, src + i);
18465 output_asm_insn ("mov%?\t%0, %1", ops);
18466 }
18467 }
18468 }
18469 else
18470 {
18471 gcc_assert (MEM_P (operands[0]));
18472 gcc_assert (REG_P (operands[1]));
18473 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18474
18475 switch (GET_CODE (XEXP (operands[0], 0)))
18476 {
18477 case REG:
18478 output_asm_insn ("stm%?\t%m0, %M1", operands);
18479 break;
18480
18481 default:
18482 gcc_unreachable ();
18483 }
18484 }
18485
18486 return "";
18487 }
18488
18489 /* Output a VFP load or store instruction. */
18490
18491 const char *
18492 output_move_vfp (rtx *operands)
18493 {
18494 rtx reg, mem, addr, ops[2];
18495 int load = REG_P (operands[0]);
18496 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18497 int sp = (!TARGET_VFP_FP16INST
18498 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18499 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18500 const char *templ;
18501 char buff[50];
18502 machine_mode mode;
18503
18504 reg = operands[!load];
18505 mem = operands[load];
18506
18507 mode = GET_MODE (reg);
18508
18509 gcc_assert (REG_P (reg));
18510 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18511 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18512 || mode == SFmode
18513 || mode == DFmode
18514 || mode == HImode
18515 || mode == SImode
18516 || mode == DImode
18517 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18518 gcc_assert (MEM_P (mem));
18519
18520 addr = XEXP (mem, 0);
18521
18522 switch (GET_CODE (addr))
18523 {
18524 case PRE_DEC:
18525 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18526 ops[0] = XEXP (addr, 0);
18527 ops[1] = reg;
18528 break;
18529
18530 case POST_INC:
18531 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18532 ops[0] = XEXP (addr, 0);
18533 ops[1] = reg;
18534 break;
18535
18536 default:
18537 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18538 ops[0] = reg;
18539 ops[1] = mem;
18540 break;
18541 }
18542
18543 sprintf (buff, templ,
18544 load ? "ld" : "st",
18545 dp ? "64" : sp ? "32" : "16",
18546 dp ? "P" : "",
18547 integer_p ? "\t%@ int" : "");
18548 output_asm_insn (buff, ops);
18549
18550 return "";
18551 }
18552
18553 /* Output a Neon double-word or quad-word load or store, or a load
18554 or store for larger structure modes.
18555
18556 WARNING: The ordering of elements is weird in big-endian mode,
18557 because the EABI requires that vectors stored in memory appear
18558 as though they were stored by a VSTM, as required by the EABI.
18559 GCC RTL defines element ordering based on in-memory order.
18560 This can be different from the architectural ordering of elements
18561 within a NEON register. The intrinsics defined in arm_neon.h use the
18562 NEON register element ordering, not the GCC RTL element ordering.
18563
18564 For example, the in-memory ordering of a big-endian a quadword
18565 vector with 16-bit elements when stored from register pair {d0,d1}
18566 will be (lowest address first, d0[N] is NEON register element N):
18567
18568 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18569
18570 When necessary, quadword registers (dN, dN+1) are moved to ARM
18571 registers from rN in the order:
18572
18573 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18574
18575 So that STM/LDM can be used on vectors in ARM registers, and the
18576 same memory layout will result as if VSTM/VLDM were used.
18577
18578 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18579 possible, which allows use of appropriate alignment tags.
18580 Note that the choice of "64" is independent of the actual vector
18581 element size; this size simply ensures that the behavior is
18582 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18583
18584 Due to limitations of those instructions, use of VST1.64/VLD1.64
18585 is not possible if:
18586 - the address contains PRE_DEC, or
18587 - the mode refers to more than 4 double-word registers
18588
18589 In those cases, it would be possible to replace VSTM/VLDM by a
18590 sequence of instructions; this is not currently implemented since
18591 this is not certain to actually improve performance. */
18592
18593 const char *
18594 output_move_neon (rtx *operands)
18595 {
18596 rtx reg, mem, addr, ops[2];
18597 int regno, nregs, load = REG_P (operands[0]);
18598 const char *templ;
18599 char buff[50];
18600 machine_mode mode;
18601
18602 reg = operands[!load];
18603 mem = operands[load];
18604
18605 mode = GET_MODE (reg);
18606
18607 gcc_assert (REG_P (reg));
18608 regno = REGNO (reg);
18609 nregs = REG_NREGS (reg) / 2;
18610 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18611 || NEON_REGNO_OK_FOR_QUAD (regno));
18612 gcc_assert (VALID_NEON_DREG_MODE (mode)
18613 || VALID_NEON_QREG_MODE (mode)
18614 || VALID_NEON_STRUCT_MODE (mode));
18615 gcc_assert (MEM_P (mem));
18616
18617 addr = XEXP (mem, 0);
18618
18619 /* Strip off const from addresses like (const (plus (...))). */
18620 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18621 addr = XEXP (addr, 0);
18622
18623 switch (GET_CODE (addr))
18624 {
18625 case POST_INC:
18626 /* We have to use vldm / vstm for too-large modes. */
18627 if (nregs > 4)
18628 {
18629 templ = "v%smia%%?\t%%0!, %%h1";
18630 ops[0] = XEXP (addr, 0);
18631 }
18632 else
18633 {
18634 templ = "v%s1.64\t%%h1, %%A0";
18635 ops[0] = mem;
18636 }
18637 ops[1] = reg;
18638 break;
18639
18640 case PRE_DEC:
18641 /* We have to use vldm / vstm in this case, since there is no
18642 pre-decrement form of the vld1 / vst1 instructions. */
18643 templ = "v%smdb%%?\t%%0!, %%h1";
18644 ops[0] = XEXP (addr, 0);
18645 ops[1] = reg;
18646 break;
18647
18648 case POST_MODIFY:
18649 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18650 gcc_unreachable ();
18651
18652 case REG:
18653 /* We have to use vldm / vstm for too-large modes. */
18654 if (nregs > 1)
18655 {
18656 if (nregs > 4)
18657 templ = "v%smia%%?\t%%m0, %%h1";
18658 else
18659 templ = "v%s1.64\t%%h1, %%A0";
18660
18661 ops[0] = mem;
18662 ops[1] = reg;
18663 break;
18664 }
18665 /* Fall through. */
18666 case LABEL_REF:
18667 case PLUS:
18668 {
18669 int i;
18670 int overlap = -1;
18671 for (i = 0; i < nregs; i++)
18672 {
18673 /* We're only using DImode here because it's a convenient size. */
18674 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18675 ops[1] = adjust_address (mem, DImode, 8 * i);
18676 if (reg_overlap_mentioned_p (ops[0], mem))
18677 {
18678 gcc_assert (overlap == -1);
18679 overlap = i;
18680 }
18681 else
18682 {
18683 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18684 output_asm_insn (buff, ops);
18685 }
18686 }
18687 if (overlap != -1)
18688 {
18689 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18690 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18691 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18692 output_asm_insn (buff, ops);
18693 }
18694
18695 return "";
18696 }
18697
18698 default:
18699 gcc_unreachable ();
18700 }
18701
18702 sprintf (buff, templ, load ? "ld" : "st");
18703 output_asm_insn (buff, ops);
18704
18705 return "";
18706 }
18707
18708 /* Compute and return the length of neon_mov<mode>, where <mode> is
18709 one of VSTRUCT modes: EI, OI, CI or XI. */
18710 int
18711 arm_attr_length_move_neon (rtx_insn *insn)
18712 {
18713 rtx reg, mem, addr;
18714 int load;
18715 machine_mode mode;
18716
18717 extract_insn_cached (insn);
18718
18719 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18720 {
18721 mode = GET_MODE (recog_data.operand[0]);
18722 switch (mode)
18723 {
18724 case E_EImode:
18725 case E_OImode:
18726 return 8;
18727 case E_CImode:
18728 return 12;
18729 case E_XImode:
18730 return 16;
18731 default:
18732 gcc_unreachable ();
18733 }
18734 }
18735
18736 load = REG_P (recog_data.operand[0]);
18737 reg = recog_data.operand[!load];
18738 mem = recog_data.operand[load];
18739
18740 gcc_assert (MEM_P (mem));
18741
18742 addr = XEXP (mem, 0);
18743
18744 /* Strip off const from addresses like (const (plus (...))). */
18745 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18746 addr = XEXP (addr, 0);
18747
18748 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18749 {
18750 int insns = REG_NREGS (reg) / 2;
18751 return insns * 4;
18752 }
18753 else
18754 return 4;
18755 }
18756
18757 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18758 return zero. */
18759
18760 int
18761 arm_address_offset_is_imm (rtx_insn *insn)
18762 {
18763 rtx mem, addr;
18764
18765 extract_insn_cached (insn);
18766
18767 if (REG_P (recog_data.operand[0]))
18768 return 0;
18769
18770 mem = recog_data.operand[0];
18771
18772 gcc_assert (MEM_P (mem));
18773
18774 addr = XEXP (mem, 0);
18775
18776 if (REG_P (addr)
18777 || (GET_CODE (addr) == PLUS
18778 && REG_P (XEXP (addr, 0))
18779 && CONST_INT_P (XEXP (addr, 1))))
18780 return 1;
18781 else
18782 return 0;
18783 }
18784
18785 /* Output an ADD r, s, #n where n may be too big for one instruction.
18786 If adding zero to one register, output nothing. */
18787 const char *
18788 output_add_immediate (rtx *operands)
18789 {
18790 HOST_WIDE_INT n = INTVAL (operands[2]);
18791
18792 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18793 {
18794 if (n < 0)
18795 output_multi_immediate (operands,
18796 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18797 -n);
18798 else
18799 output_multi_immediate (operands,
18800 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18801 n);
18802 }
18803
18804 return "";
18805 }
18806
18807 /* Output a multiple immediate operation.
18808 OPERANDS is the vector of operands referred to in the output patterns.
18809 INSTR1 is the output pattern to use for the first constant.
18810 INSTR2 is the output pattern to use for subsequent constants.
18811 IMMED_OP is the index of the constant slot in OPERANDS.
18812 N is the constant value. */
18813 static const char *
18814 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18815 int immed_op, HOST_WIDE_INT n)
18816 {
18817 #if HOST_BITS_PER_WIDE_INT > 32
18818 n &= 0xffffffff;
18819 #endif
18820
18821 if (n == 0)
18822 {
18823 /* Quick and easy output. */
18824 operands[immed_op] = const0_rtx;
18825 output_asm_insn (instr1, operands);
18826 }
18827 else
18828 {
18829 int i;
18830 const char * instr = instr1;
18831
18832 /* Note that n is never zero here (which would give no output). */
18833 for (i = 0; i < 32; i += 2)
18834 {
18835 if (n & (3 << i))
18836 {
18837 operands[immed_op] = GEN_INT (n & (255 << i));
18838 output_asm_insn (instr, operands);
18839 instr = instr2;
18840 i += 6;
18841 }
18842 }
18843 }
18844
18845 return "";
18846 }
18847
18848 /* Return the name of a shifter operation. */
18849 static const char *
18850 arm_shift_nmem(enum rtx_code code)
18851 {
18852 switch (code)
18853 {
18854 case ASHIFT:
18855 return ARM_LSL_NAME;
18856
18857 case ASHIFTRT:
18858 return "asr";
18859
18860 case LSHIFTRT:
18861 return "lsr";
18862
18863 case ROTATERT:
18864 return "ror";
18865
18866 default:
18867 abort();
18868 }
18869 }
18870
18871 /* Return the appropriate ARM instruction for the operation code.
18872 The returned result should not be overwritten. OP is the rtx of the
18873 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18874 was shifted. */
18875 const char *
18876 arithmetic_instr (rtx op, int shift_first_arg)
18877 {
18878 switch (GET_CODE (op))
18879 {
18880 case PLUS:
18881 return "add";
18882
18883 case MINUS:
18884 return shift_first_arg ? "rsb" : "sub";
18885
18886 case IOR:
18887 return "orr";
18888
18889 case XOR:
18890 return "eor";
18891
18892 case AND:
18893 return "and";
18894
18895 case ASHIFT:
18896 case ASHIFTRT:
18897 case LSHIFTRT:
18898 case ROTATERT:
18899 return arm_shift_nmem(GET_CODE(op));
18900
18901 default:
18902 gcc_unreachable ();
18903 }
18904 }
18905
18906 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18907 for the operation code. The returned result should not be overwritten.
18908 OP is the rtx code of the shift.
18909 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18910 shift. */
18911 static const char *
18912 shift_op (rtx op, HOST_WIDE_INT *amountp)
18913 {
18914 const char * mnem;
18915 enum rtx_code code = GET_CODE (op);
18916
18917 switch (code)
18918 {
18919 case ROTATE:
18920 if (!CONST_INT_P (XEXP (op, 1)))
18921 {
18922 output_operand_lossage ("invalid shift operand");
18923 return NULL;
18924 }
18925
18926 code = ROTATERT;
18927 *amountp = 32 - INTVAL (XEXP (op, 1));
18928 mnem = "ror";
18929 break;
18930
18931 case ASHIFT:
18932 case ASHIFTRT:
18933 case LSHIFTRT:
18934 case ROTATERT:
18935 mnem = arm_shift_nmem(code);
18936 if (CONST_INT_P (XEXP (op, 1)))
18937 {
18938 *amountp = INTVAL (XEXP (op, 1));
18939 }
18940 else if (REG_P (XEXP (op, 1)))
18941 {
18942 *amountp = -1;
18943 return mnem;
18944 }
18945 else
18946 {
18947 output_operand_lossage ("invalid shift operand");
18948 return NULL;
18949 }
18950 break;
18951
18952 case MULT:
18953 /* We never have to worry about the amount being other than a
18954 power of 2, since this case can never be reloaded from a reg. */
18955 if (!CONST_INT_P (XEXP (op, 1)))
18956 {
18957 output_operand_lossage ("invalid shift operand");
18958 return NULL;
18959 }
18960
18961 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18962
18963 /* Amount must be a power of two. */
18964 if (*amountp & (*amountp - 1))
18965 {
18966 output_operand_lossage ("invalid shift operand");
18967 return NULL;
18968 }
18969
18970 *amountp = exact_log2 (*amountp);
18971 gcc_assert (IN_RANGE (*amountp, 0, 31));
18972 return ARM_LSL_NAME;
18973
18974 default:
18975 output_operand_lossage ("invalid shift operand");
18976 return NULL;
18977 }
18978
18979 /* This is not 100% correct, but follows from the desire to merge
18980 multiplication by a power of 2 with the recognizer for a
18981 shift. >=32 is not a valid shift for "lsl", so we must try and
18982 output a shift that produces the correct arithmetical result.
18983 Using lsr #32 is identical except for the fact that the carry bit
18984 is not set correctly if we set the flags; but we never use the
18985 carry bit from such an operation, so we can ignore that. */
18986 if (code == ROTATERT)
18987 /* Rotate is just modulo 32. */
18988 *amountp &= 31;
18989 else if (*amountp != (*amountp & 31))
18990 {
18991 if (code == ASHIFT)
18992 mnem = "lsr";
18993 *amountp = 32;
18994 }
18995
18996 /* Shifts of 0 are no-ops. */
18997 if (*amountp == 0)
18998 return NULL;
18999
19000 return mnem;
19001 }
19002
19003 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19004 because /bin/as is horribly restrictive. The judgement about
19005 whether or not each character is 'printable' (and can be output as
19006 is) or not (and must be printed with an octal escape) must be made
19007 with reference to the *host* character set -- the situation is
19008 similar to that discussed in the comments above pp_c_char in
19009 c-pretty-print.c. */
19010
19011 #define MAX_ASCII_LEN 51
19012
19013 void
19014 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19015 {
19016 int i;
19017 int len_so_far = 0;
19018
19019 fputs ("\t.ascii\t\"", stream);
19020
19021 for (i = 0; i < len; i++)
19022 {
19023 int c = p[i];
19024
19025 if (len_so_far >= MAX_ASCII_LEN)
19026 {
19027 fputs ("\"\n\t.ascii\t\"", stream);
19028 len_so_far = 0;
19029 }
19030
19031 if (ISPRINT (c))
19032 {
19033 if (c == '\\' || c == '\"')
19034 {
19035 putc ('\\', stream);
19036 len_so_far++;
19037 }
19038 putc (c, stream);
19039 len_so_far++;
19040 }
19041 else
19042 {
19043 fprintf (stream, "\\%03o", c);
19044 len_so_far += 4;
19045 }
19046 }
19047
19048 fputs ("\"\n", stream);
19049 }
19050 \f
19051 /* Whether a register is callee saved or not. This is necessary because high
19052 registers are marked as caller saved when optimizing for size on Thumb-1
19053 targets despite being callee saved in order to avoid using them. */
19054 #define callee_saved_reg_p(reg) \
19055 (!call_used_regs[reg] \
19056 || (TARGET_THUMB1 && optimize_size \
19057 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19058
19059 /* Compute the register save mask for registers 0 through 12
19060 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19061
19062 static unsigned long
19063 arm_compute_save_reg0_reg12_mask (void)
19064 {
19065 unsigned long func_type = arm_current_func_type ();
19066 unsigned long save_reg_mask = 0;
19067 unsigned int reg;
19068
19069 if (IS_INTERRUPT (func_type))
19070 {
19071 unsigned int max_reg;
19072 /* Interrupt functions must not corrupt any registers,
19073 even call clobbered ones. If this is a leaf function
19074 we can just examine the registers used by the RTL, but
19075 otherwise we have to assume that whatever function is
19076 called might clobber anything, and so we have to save
19077 all the call-clobbered registers as well. */
19078 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19079 /* FIQ handlers have registers r8 - r12 banked, so
19080 we only need to check r0 - r7, Normal ISRs only
19081 bank r14 and r15, so we must check up to r12.
19082 r13 is the stack pointer which is always preserved,
19083 so we do not need to consider it here. */
19084 max_reg = 7;
19085 else
19086 max_reg = 12;
19087
19088 for (reg = 0; reg <= max_reg; reg++)
19089 if (df_regs_ever_live_p (reg)
19090 || (! crtl->is_leaf && call_used_regs[reg]))
19091 save_reg_mask |= (1 << reg);
19092
19093 /* Also save the pic base register if necessary. */
19094 if (flag_pic
19095 && !TARGET_SINGLE_PIC_BASE
19096 && arm_pic_register != INVALID_REGNUM
19097 && crtl->uses_pic_offset_table)
19098 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19099 }
19100 else if (IS_VOLATILE(func_type))
19101 {
19102 /* For noreturn functions we historically omitted register saves
19103 altogether. However this really messes up debugging. As a
19104 compromise save just the frame pointers. Combined with the link
19105 register saved elsewhere this should be sufficient to get
19106 a backtrace. */
19107 if (frame_pointer_needed)
19108 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19109 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19110 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19111 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19112 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19113 }
19114 else
19115 {
19116 /* In the normal case we only need to save those registers
19117 which are call saved and which are used by this function. */
19118 for (reg = 0; reg <= 11; reg++)
19119 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19120 save_reg_mask |= (1 << reg);
19121
19122 /* Handle the frame pointer as a special case. */
19123 if (frame_pointer_needed)
19124 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19125
19126 /* If we aren't loading the PIC register,
19127 don't stack it even though it may be live. */
19128 if (flag_pic
19129 && !TARGET_SINGLE_PIC_BASE
19130 && arm_pic_register != INVALID_REGNUM
19131 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19132 || crtl->uses_pic_offset_table))
19133 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19134
19135 /* The prologue will copy SP into R0, so save it. */
19136 if (IS_STACKALIGN (func_type))
19137 save_reg_mask |= 1;
19138 }
19139
19140 /* Save registers so the exception handler can modify them. */
19141 if (crtl->calls_eh_return)
19142 {
19143 unsigned int i;
19144
19145 for (i = 0; ; i++)
19146 {
19147 reg = EH_RETURN_DATA_REGNO (i);
19148 if (reg == INVALID_REGNUM)
19149 break;
19150 save_reg_mask |= 1 << reg;
19151 }
19152 }
19153
19154 return save_reg_mask;
19155 }
19156
19157 /* Return true if r3 is live at the start of the function. */
19158
19159 static bool
19160 arm_r3_live_at_start_p (void)
19161 {
19162 /* Just look at cfg info, which is still close enough to correct at this
19163 point. This gives false positives for broken functions that might use
19164 uninitialized data that happens to be allocated in r3, but who cares? */
19165 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19166 }
19167
19168 /* Compute the number of bytes used to store the static chain register on the
19169 stack, above the stack frame. We need to know this accurately to get the
19170 alignment of the rest of the stack frame correct. */
19171
19172 static int
19173 arm_compute_static_chain_stack_bytes (void)
19174 {
19175 /* See the defining assertion in arm_expand_prologue. */
19176 if (IS_NESTED (arm_current_func_type ())
19177 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19178 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19179 || flag_stack_clash_protection)
19180 && !df_regs_ever_live_p (LR_REGNUM)))
19181 && arm_r3_live_at_start_p ()
19182 && crtl->args.pretend_args_size == 0)
19183 return 4;
19184
19185 return 0;
19186 }
19187
19188 /* Compute a bit mask of which core registers need to be
19189 saved on the stack for the current function.
19190 This is used by arm_compute_frame_layout, which may add extra registers. */
19191
19192 static unsigned long
19193 arm_compute_save_core_reg_mask (void)
19194 {
19195 unsigned int save_reg_mask = 0;
19196 unsigned long func_type = arm_current_func_type ();
19197 unsigned int reg;
19198
19199 if (IS_NAKED (func_type))
19200 /* This should never really happen. */
19201 return 0;
19202
19203 /* If we are creating a stack frame, then we must save the frame pointer,
19204 IP (which will hold the old stack pointer), LR and the PC. */
19205 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19206 save_reg_mask |=
19207 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19208 | (1 << IP_REGNUM)
19209 | (1 << LR_REGNUM)
19210 | (1 << PC_REGNUM);
19211
19212 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19213
19214 /* Decide if we need to save the link register.
19215 Interrupt routines have their own banked link register,
19216 so they never need to save it.
19217 Otherwise if we do not use the link register we do not need to save
19218 it. If we are pushing other registers onto the stack however, we
19219 can save an instruction in the epilogue by pushing the link register
19220 now and then popping it back into the PC. This incurs extra memory
19221 accesses though, so we only do it when optimizing for size, and only
19222 if we know that we will not need a fancy return sequence. */
19223 if (df_regs_ever_live_p (LR_REGNUM)
19224 || (save_reg_mask
19225 && optimize_size
19226 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19227 && !crtl->tail_call_emit
19228 && !crtl->calls_eh_return))
19229 save_reg_mask |= 1 << LR_REGNUM;
19230
19231 if (cfun->machine->lr_save_eliminated)
19232 save_reg_mask &= ~ (1 << LR_REGNUM);
19233
19234 if (TARGET_REALLY_IWMMXT
19235 && ((bit_count (save_reg_mask)
19236 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19237 arm_compute_static_chain_stack_bytes())
19238 ) % 2) != 0)
19239 {
19240 /* The total number of registers that are going to be pushed
19241 onto the stack is odd. We need to ensure that the stack
19242 is 64-bit aligned before we start to save iWMMXt registers,
19243 and also before we start to create locals. (A local variable
19244 might be a double or long long which we will load/store using
19245 an iWMMXt instruction). Therefore we need to push another
19246 ARM register, so that the stack will be 64-bit aligned. We
19247 try to avoid using the arg registers (r0 -r3) as they might be
19248 used to pass values in a tail call. */
19249 for (reg = 4; reg <= 12; reg++)
19250 if ((save_reg_mask & (1 << reg)) == 0)
19251 break;
19252
19253 if (reg <= 12)
19254 save_reg_mask |= (1 << reg);
19255 else
19256 {
19257 cfun->machine->sibcall_blocked = 1;
19258 save_reg_mask |= (1 << 3);
19259 }
19260 }
19261
19262 /* We may need to push an additional register for use initializing the
19263 PIC base register. */
19264 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19265 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19266 {
19267 reg = thumb_find_work_register (1 << 4);
19268 if (!call_used_regs[reg])
19269 save_reg_mask |= (1 << reg);
19270 }
19271
19272 return save_reg_mask;
19273 }
19274
19275 /* Compute a bit mask of which core registers need to be
19276 saved on the stack for the current function. */
19277 static unsigned long
19278 thumb1_compute_save_core_reg_mask (void)
19279 {
19280 unsigned long mask;
19281 unsigned reg;
19282
19283 mask = 0;
19284 for (reg = 0; reg < 12; reg ++)
19285 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19286 mask |= 1 << reg;
19287
19288 /* Handle the frame pointer as a special case. */
19289 if (frame_pointer_needed)
19290 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19291
19292 if (flag_pic
19293 && !TARGET_SINGLE_PIC_BASE
19294 && arm_pic_register != INVALID_REGNUM
19295 && crtl->uses_pic_offset_table)
19296 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19297
19298 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19299 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19300 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19301
19302 /* LR will also be pushed if any lo regs are pushed. */
19303 if (mask & 0xff || thumb_force_lr_save ())
19304 mask |= (1 << LR_REGNUM);
19305
19306 /* Make sure we have a low work register if we need one.
19307 We will need one if we are going to push a high register,
19308 but we are not currently intending to push a low register. */
19309 if ((mask & 0xff) == 0
19310 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19311 {
19312 /* Use thumb_find_work_register to choose which register
19313 we will use. If the register is live then we will
19314 have to push it. Use LAST_LO_REGNUM as our fallback
19315 choice for the register to select. */
19316 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19317 /* Make sure the register returned by thumb_find_work_register is
19318 not part of the return value. */
19319 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19320 reg = LAST_LO_REGNUM;
19321
19322 if (callee_saved_reg_p (reg))
19323 mask |= 1 << reg;
19324 }
19325
19326 /* The 504 below is 8 bytes less than 512 because there are two possible
19327 alignment words. We can't tell here if they will be present or not so we
19328 have to play it safe and assume that they are. */
19329 if ((CALLER_INTERWORKING_SLOT_SIZE +
19330 ROUND_UP_WORD (get_frame_size ()) +
19331 crtl->outgoing_args_size) >= 504)
19332 {
19333 /* This is the same as the code in thumb1_expand_prologue() which
19334 determines which register to use for stack decrement. */
19335 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19336 if (mask & (1 << reg))
19337 break;
19338
19339 if (reg > LAST_LO_REGNUM)
19340 {
19341 /* Make sure we have a register available for stack decrement. */
19342 mask |= 1 << LAST_LO_REGNUM;
19343 }
19344 }
19345
19346 return mask;
19347 }
19348
19349
19350 /* Return the number of bytes required to save VFP registers. */
19351 static int
19352 arm_get_vfp_saved_size (void)
19353 {
19354 unsigned int regno;
19355 int count;
19356 int saved;
19357
19358 saved = 0;
19359 /* Space for saved VFP registers. */
19360 if (TARGET_HARD_FLOAT)
19361 {
19362 count = 0;
19363 for (regno = FIRST_VFP_REGNUM;
19364 regno < LAST_VFP_REGNUM;
19365 regno += 2)
19366 {
19367 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19368 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19369 {
19370 if (count > 0)
19371 {
19372 /* Workaround ARM10 VFPr1 bug. */
19373 if (count == 2 && !arm_arch6)
19374 count++;
19375 saved += count * 8;
19376 }
19377 count = 0;
19378 }
19379 else
19380 count++;
19381 }
19382 if (count > 0)
19383 {
19384 if (count == 2 && !arm_arch6)
19385 count++;
19386 saved += count * 8;
19387 }
19388 }
19389 return saved;
19390 }
19391
19392
19393 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19394 everything bar the final return instruction. If simple_return is true,
19395 then do not output epilogue, because it has already been emitted in RTL. */
19396 const char *
19397 output_return_instruction (rtx operand, bool really_return, bool reverse,
19398 bool simple_return)
19399 {
19400 char conditional[10];
19401 char instr[100];
19402 unsigned reg;
19403 unsigned long live_regs_mask;
19404 unsigned long func_type;
19405 arm_stack_offsets *offsets;
19406
19407 func_type = arm_current_func_type ();
19408
19409 if (IS_NAKED (func_type))
19410 return "";
19411
19412 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19413 {
19414 /* If this function was declared non-returning, and we have
19415 found a tail call, then we have to trust that the called
19416 function won't return. */
19417 if (really_return)
19418 {
19419 rtx ops[2];
19420
19421 /* Otherwise, trap an attempted return by aborting. */
19422 ops[0] = operand;
19423 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19424 : "abort");
19425 assemble_external_libcall (ops[1]);
19426 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19427 }
19428
19429 return "";
19430 }
19431
19432 gcc_assert (!cfun->calls_alloca || really_return);
19433
19434 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19435
19436 cfun->machine->return_used_this_function = 1;
19437
19438 offsets = arm_get_frame_offsets ();
19439 live_regs_mask = offsets->saved_regs_mask;
19440
19441 if (!simple_return && live_regs_mask)
19442 {
19443 const char * return_reg;
19444
19445 /* If we do not have any special requirements for function exit
19446 (e.g. interworking) then we can load the return address
19447 directly into the PC. Otherwise we must load it into LR. */
19448 if (really_return
19449 && !IS_CMSE_ENTRY (func_type)
19450 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19451 return_reg = reg_names[PC_REGNUM];
19452 else
19453 return_reg = reg_names[LR_REGNUM];
19454
19455 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19456 {
19457 /* There are three possible reasons for the IP register
19458 being saved. 1) a stack frame was created, in which case
19459 IP contains the old stack pointer, or 2) an ISR routine
19460 corrupted it, or 3) it was saved to align the stack on
19461 iWMMXt. In case 1, restore IP into SP, otherwise just
19462 restore IP. */
19463 if (frame_pointer_needed)
19464 {
19465 live_regs_mask &= ~ (1 << IP_REGNUM);
19466 live_regs_mask |= (1 << SP_REGNUM);
19467 }
19468 else
19469 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19470 }
19471
19472 /* On some ARM architectures it is faster to use LDR rather than
19473 LDM to load a single register. On other architectures, the
19474 cost is the same. In 26 bit mode, or for exception handlers,
19475 we have to use LDM to load the PC so that the CPSR is also
19476 restored. */
19477 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19478 if (live_regs_mask == (1U << reg))
19479 break;
19480
19481 if (reg <= LAST_ARM_REGNUM
19482 && (reg != LR_REGNUM
19483 || ! really_return
19484 || ! IS_INTERRUPT (func_type)))
19485 {
19486 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19487 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19488 }
19489 else
19490 {
19491 char *p;
19492 int first = 1;
19493
19494 /* Generate the load multiple instruction to restore the
19495 registers. Note we can get here, even if
19496 frame_pointer_needed is true, but only if sp already
19497 points to the base of the saved core registers. */
19498 if (live_regs_mask & (1 << SP_REGNUM))
19499 {
19500 unsigned HOST_WIDE_INT stack_adjust;
19501
19502 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19503 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19504
19505 if (stack_adjust && arm_arch5 && TARGET_ARM)
19506 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19507 else
19508 {
19509 /* If we can't use ldmib (SA110 bug),
19510 then try to pop r3 instead. */
19511 if (stack_adjust)
19512 live_regs_mask |= 1 << 3;
19513
19514 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19515 }
19516 }
19517 /* For interrupt returns we have to use an LDM rather than
19518 a POP so that we can use the exception return variant. */
19519 else if (IS_INTERRUPT (func_type))
19520 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19521 else
19522 sprintf (instr, "pop%s\t{", conditional);
19523
19524 p = instr + strlen (instr);
19525
19526 for (reg = 0; reg <= SP_REGNUM; reg++)
19527 if (live_regs_mask & (1 << reg))
19528 {
19529 int l = strlen (reg_names[reg]);
19530
19531 if (first)
19532 first = 0;
19533 else
19534 {
19535 memcpy (p, ", ", 2);
19536 p += 2;
19537 }
19538
19539 memcpy (p, "%|", 2);
19540 memcpy (p + 2, reg_names[reg], l);
19541 p += l + 2;
19542 }
19543
19544 if (live_regs_mask & (1 << LR_REGNUM))
19545 {
19546 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19547 /* If returning from an interrupt, restore the CPSR. */
19548 if (IS_INTERRUPT (func_type))
19549 strcat (p, "^");
19550 }
19551 else
19552 strcpy (p, "}");
19553 }
19554
19555 output_asm_insn (instr, & operand);
19556
19557 /* See if we need to generate an extra instruction to
19558 perform the actual function return. */
19559 if (really_return
19560 && func_type != ARM_FT_INTERWORKED
19561 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19562 {
19563 /* The return has already been handled
19564 by loading the LR into the PC. */
19565 return "";
19566 }
19567 }
19568
19569 if (really_return)
19570 {
19571 switch ((int) ARM_FUNC_TYPE (func_type))
19572 {
19573 case ARM_FT_ISR:
19574 case ARM_FT_FIQ:
19575 /* ??? This is wrong for unified assembly syntax. */
19576 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19577 break;
19578
19579 case ARM_FT_INTERWORKED:
19580 gcc_assert (arm_arch5 || arm_arch4t);
19581 sprintf (instr, "bx%s\t%%|lr", conditional);
19582 break;
19583
19584 case ARM_FT_EXCEPTION:
19585 /* ??? This is wrong for unified assembly syntax. */
19586 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19587 break;
19588
19589 default:
19590 if (IS_CMSE_ENTRY (func_type))
19591 {
19592 /* Check if we have to clear the 'GE bits' which is only used if
19593 parallel add and subtraction instructions are available. */
19594 if (TARGET_INT_SIMD)
19595 snprintf (instr, sizeof (instr),
19596 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19597 else
19598 snprintf (instr, sizeof (instr),
19599 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19600
19601 output_asm_insn (instr, & operand);
19602 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19603 {
19604 /* Clear the cumulative exception-status bits (0-4,7) and the
19605 condition code bits (28-31) of the FPSCR. We need to
19606 remember to clear the first scratch register used (IP) and
19607 save and restore the second (r4). */
19608 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19609 output_asm_insn (instr, & operand);
19610 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19611 output_asm_insn (instr, & operand);
19612 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19613 output_asm_insn (instr, & operand);
19614 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19615 output_asm_insn (instr, & operand);
19616 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19617 output_asm_insn (instr, & operand);
19618 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19619 output_asm_insn (instr, & operand);
19620 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19621 output_asm_insn (instr, & operand);
19622 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19623 output_asm_insn (instr, & operand);
19624 }
19625 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19626 }
19627 /* Use bx if it's available. */
19628 else if (arm_arch5 || arm_arch4t)
19629 sprintf (instr, "bx%s\t%%|lr", conditional);
19630 else
19631 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19632 break;
19633 }
19634
19635 output_asm_insn (instr, & operand);
19636 }
19637
19638 return "";
19639 }
19640
19641 /* Output in FILE asm statements needed to declare the NAME of the function
19642 defined by its DECL node. */
19643
19644 void
19645 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19646 {
19647 size_t cmse_name_len;
19648 char *cmse_name = 0;
19649 char cmse_prefix[] = "__acle_se_";
19650
19651 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19652 extra function label for each function with the 'cmse_nonsecure_entry'
19653 attribute. This extra function label should be prepended with
19654 '__acle_se_', telling the linker that it needs to create secure gateway
19655 veneers for this function. */
19656 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19657 DECL_ATTRIBUTES (decl)))
19658 {
19659 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19660 cmse_name = XALLOCAVEC (char, cmse_name_len);
19661 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19662 targetm.asm_out.globalize_label (file, cmse_name);
19663
19664 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19665 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19666 }
19667
19668 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19669 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19670 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19671 ASM_OUTPUT_LABEL (file, name);
19672
19673 if (cmse_name)
19674 ASM_OUTPUT_LABEL (file, cmse_name);
19675
19676 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19677 }
19678
19679 /* Write the function name into the code section, directly preceding
19680 the function prologue.
19681
19682 Code will be output similar to this:
19683 t0
19684 .ascii "arm_poke_function_name", 0
19685 .align
19686 t1
19687 .word 0xff000000 + (t1 - t0)
19688 arm_poke_function_name
19689 mov ip, sp
19690 stmfd sp!, {fp, ip, lr, pc}
19691 sub fp, ip, #4
19692
19693 When performing a stack backtrace, code can inspect the value
19694 of 'pc' stored at 'fp' + 0. If the trace function then looks
19695 at location pc - 12 and the top 8 bits are set, then we know
19696 that there is a function name embedded immediately preceding this
19697 location and has length ((pc[-3]) & 0xff000000).
19698
19699 We assume that pc is declared as a pointer to an unsigned long.
19700
19701 It is of no benefit to output the function name if we are assembling
19702 a leaf function. These function types will not contain a stack
19703 backtrace structure, therefore it is not possible to determine the
19704 function name. */
19705 void
19706 arm_poke_function_name (FILE *stream, const char *name)
19707 {
19708 unsigned long alignlength;
19709 unsigned long length;
19710 rtx x;
19711
19712 length = strlen (name) + 1;
19713 alignlength = ROUND_UP_WORD (length);
19714
19715 ASM_OUTPUT_ASCII (stream, name, length);
19716 ASM_OUTPUT_ALIGN (stream, 2);
19717 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19718 assemble_aligned_integer (UNITS_PER_WORD, x);
19719 }
19720
19721 /* Place some comments into the assembler stream
19722 describing the current function. */
19723 static void
19724 arm_output_function_prologue (FILE *f)
19725 {
19726 unsigned long func_type;
19727
19728 /* Sanity check. */
19729 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19730
19731 func_type = arm_current_func_type ();
19732
19733 switch ((int) ARM_FUNC_TYPE (func_type))
19734 {
19735 default:
19736 case ARM_FT_NORMAL:
19737 break;
19738 case ARM_FT_INTERWORKED:
19739 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19740 break;
19741 case ARM_FT_ISR:
19742 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19743 break;
19744 case ARM_FT_FIQ:
19745 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19746 break;
19747 case ARM_FT_EXCEPTION:
19748 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19749 break;
19750 }
19751
19752 if (IS_NAKED (func_type))
19753 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19754
19755 if (IS_VOLATILE (func_type))
19756 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19757
19758 if (IS_NESTED (func_type))
19759 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19760 if (IS_STACKALIGN (func_type))
19761 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19762 if (IS_CMSE_ENTRY (func_type))
19763 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19764
19765 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19766 crtl->args.size,
19767 crtl->args.pretend_args_size,
19768 (HOST_WIDE_INT) get_frame_size ());
19769
19770 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19771 frame_pointer_needed,
19772 cfun->machine->uses_anonymous_args);
19773
19774 if (cfun->machine->lr_save_eliminated)
19775 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19776
19777 if (crtl->calls_eh_return)
19778 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19779
19780 }
19781
19782 static void
19783 arm_output_function_epilogue (FILE *)
19784 {
19785 arm_stack_offsets *offsets;
19786
19787 if (TARGET_THUMB1)
19788 {
19789 int regno;
19790
19791 /* Emit any call-via-reg trampolines that are needed for v4t support
19792 of call_reg and call_value_reg type insns. */
19793 for (regno = 0; regno < LR_REGNUM; regno++)
19794 {
19795 rtx label = cfun->machine->call_via[regno];
19796
19797 if (label != NULL)
19798 {
19799 switch_to_section (function_section (current_function_decl));
19800 targetm.asm_out.internal_label (asm_out_file, "L",
19801 CODE_LABEL_NUMBER (label));
19802 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19803 }
19804 }
19805
19806 /* ??? Probably not safe to set this here, since it assumes that a
19807 function will be emitted as assembly immediately after we generate
19808 RTL for it. This does not happen for inline functions. */
19809 cfun->machine->return_used_this_function = 0;
19810 }
19811 else /* TARGET_32BIT */
19812 {
19813 /* We need to take into account any stack-frame rounding. */
19814 offsets = arm_get_frame_offsets ();
19815
19816 gcc_assert (!use_return_insn (FALSE, NULL)
19817 || (cfun->machine->return_used_this_function != 0)
19818 || offsets->saved_regs == offsets->outgoing_args
19819 || frame_pointer_needed);
19820 }
19821 }
19822
19823 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19824 STR and STRD. If an even number of registers are being pushed, one
19825 or more STRD patterns are created for each register pair. If an
19826 odd number of registers are pushed, emit an initial STR followed by
19827 as many STRD instructions as are needed. This works best when the
19828 stack is initially 64-bit aligned (the normal case), since it
19829 ensures that each STRD is also 64-bit aligned. */
19830 static void
19831 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19832 {
19833 int num_regs = 0;
19834 int i;
19835 int regno;
19836 rtx par = NULL_RTX;
19837 rtx dwarf = NULL_RTX;
19838 rtx tmp;
19839 bool first = true;
19840
19841 num_regs = bit_count (saved_regs_mask);
19842
19843 /* Must be at least one register to save, and can't save SP or PC. */
19844 gcc_assert (num_regs > 0 && num_regs <= 14);
19845 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19846 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19847
19848 /* Create sequence for DWARF info. All the frame-related data for
19849 debugging is held in this wrapper. */
19850 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19851
19852 /* Describe the stack adjustment. */
19853 tmp = gen_rtx_SET (stack_pointer_rtx,
19854 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19855 RTX_FRAME_RELATED_P (tmp) = 1;
19856 XVECEXP (dwarf, 0, 0) = tmp;
19857
19858 /* Find the first register. */
19859 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19860 ;
19861
19862 i = 0;
19863
19864 /* If there's an odd number of registers to push. Start off by
19865 pushing a single register. This ensures that subsequent strd
19866 operations are dword aligned (assuming that SP was originally
19867 64-bit aligned). */
19868 if ((num_regs & 1) != 0)
19869 {
19870 rtx reg, mem, insn;
19871
19872 reg = gen_rtx_REG (SImode, regno);
19873 if (num_regs == 1)
19874 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19875 stack_pointer_rtx));
19876 else
19877 mem = gen_frame_mem (Pmode,
19878 gen_rtx_PRE_MODIFY
19879 (Pmode, stack_pointer_rtx,
19880 plus_constant (Pmode, stack_pointer_rtx,
19881 -4 * num_regs)));
19882
19883 tmp = gen_rtx_SET (mem, reg);
19884 RTX_FRAME_RELATED_P (tmp) = 1;
19885 insn = emit_insn (tmp);
19886 RTX_FRAME_RELATED_P (insn) = 1;
19887 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19888 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19889 RTX_FRAME_RELATED_P (tmp) = 1;
19890 i++;
19891 regno++;
19892 XVECEXP (dwarf, 0, i) = tmp;
19893 first = false;
19894 }
19895
19896 while (i < num_regs)
19897 if (saved_regs_mask & (1 << regno))
19898 {
19899 rtx reg1, reg2, mem1, mem2;
19900 rtx tmp0, tmp1, tmp2;
19901 int regno2;
19902
19903 /* Find the register to pair with this one. */
19904 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19905 regno2++)
19906 ;
19907
19908 reg1 = gen_rtx_REG (SImode, regno);
19909 reg2 = gen_rtx_REG (SImode, regno2);
19910
19911 if (first)
19912 {
19913 rtx insn;
19914
19915 first = false;
19916 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19917 stack_pointer_rtx,
19918 -4 * num_regs));
19919 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19920 stack_pointer_rtx,
19921 -4 * (num_regs - 1)));
19922 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19923 plus_constant (Pmode, stack_pointer_rtx,
19924 -4 * (num_regs)));
19925 tmp1 = gen_rtx_SET (mem1, reg1);
19926 tmp2 = gen_rtx_SET (mem2, reg2);
19927 RTX_FRAME_RELATED_P (tmp0) = 1;
19928 RTX_FRAME_RELATED_P (tmp1) = 1;
19929 RTX_FRAME_RELATED_P (tmp2) = 1;
19930 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19931 XVECEXP (par, 0, 0) = tmp0;
19932 XVECEXP (par, 0, 1) = tmp1;
19933 XVECEXP (par, 0, 2) = tmp2;
19934 insn = emit_insn (par);
19935 RTX_FRAME_RELATED_P (insn) = 1;
19936 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19937 }
19938 else
19939 {
19940 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19941 stack_pointer_rtx,
19942 4 * i));
19943 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19944 stack_pointer_rtx,
19945 4 * (i + 1)));
19946 tmp1 = gen_rtx_SET (mem1, reg1);
19947 tmp2 = gen_rtx_SET (mem2, reg2);
19948 RTX_FRAME_RELATED_P (tmp1) = 1;
19949 RTX_FRAME_RELATED_P (tmp2) = 1;
19950 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19951 XVECEXP (par, 0, 0) = tmp1;
19952 XVECEXP (par, 0, 1) = tmp2;
19953 emit_insn (par);
19954 }
19955
19956 /* Create unwind information. This is an approximation. */
19957 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19958 plus_constant (Pmode,
19959 stack_pointer_rtx,
19960 4 * i)),
19961 reg1);
19962 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19963 plus_constant (Pmode,
19964 stack_pointer_rtx,
19965 4 * (i + 1))),
19966 reg2);
19967
19968 RTX_FRAME_RELATED_P (tmp1) = 1;
19969 RTX_FRAME_RELATED_P (tmp2) = 1;
19970 XVECEXP (dwarf, 0, i + 1) = tmp1;
19971 XVECEXP (dwarf, 0, i + 2) = tmp2;
19972 i += 2;
19973 regno = regno2 + 1;
19974 }
19975 else
19976 regno++;
19977
19978 return;
19979 }
19980
19981 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19982 whenever possible, otherwise it emits single-word stores. The first store
19983 also allocates stack space for all saved registers, using writeback with
19984 post-addressing mode. All other stores use offset addressing. If no STRD
19985 can be emitted, this function emits a sequence of single-word stores,
19986 and not an STM as before, because single-word stores provide more freedom
19987 scheduling and can be turned into an STM by peephole optimizations. */
19988 static void
19989 arm_emit_strd_push (unsigned long saved_regs_mask)
19990 {
19991 int num_regs = 0;
19992 int i, j, dwarf_index = 0;
19993 int offset = 0;
19994 rtx dwarf = NULL_RTX;
19995 rtx insn = NULL_RTX;
19996 rtx tmp, mem;
19997
19998 /* TODO: A more efficient code can be emitted by changing the
19999 layout, e.g., first push all pairs that can use STRD to keep the
20000 stack aligned, and then push all other registers. */
20001 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20002 if (saved_regs_mask & (1 << i))
20003 num_regs++;
20004
20005 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20006 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20007 gcc_assert (num_regs > 0);
20008
20009 /* Create sequence for DWARF info. */
20010 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20011
20012 /* For dwarf info, we generate explicit stack update. */
20013 tmp = gen_rtx_SET (stack_pointer_rtx,
20014 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20015 RTX_FRAME_RELATED_P (tmp) = 1;
20016 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20017
20018 /* Save registers. */
20019 offset = - 4 * num_regs;
20020 j = 0;
20021 while (j <= LAST_ARM_REGNUM)
20022 if (saved_regs_mask & (1 << j))
20023 {
20024 if ((j % 2 == 0)
20025 && (saved_regs_mask & (1 << (j + 1))))
20026 {
20027 /* Current register and previous register form register pair for
20028 which STRD can be generated. */
20029 if (offset < 0)
20030 {
20031 /* Allocate stack space for all saved registers. */
20032 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20033 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20034 mem = gen_frame_mem (DImode, tmp);
20035 offset = 0;
20036 }
20037 else if (offset > 0)
20038 mem = gen_frame_mem (DImode,
20039 plus_constant (Pmode,
20040 stack_pointer_rtx,
20041 offset));
20042 else
20043 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20044
20045 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20046 RTX_FRAME_RELATED_P (tmp) = 1;
20047 tmp = emit_insn (tmp);
20048
20049 /* Record the first store insn. */
20050 if (dwarf_index == 1)
20051 insn = tmp;
20052
20053 /* Generate dwarf info. */
20054 mem = gen_frame_mem (SImode,
20055 plus_constant (Pmode,
20056 stack_pointer_rtx,
20057 offset));
20058 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20059 RTX_FRAME_RELATED_P (tmp) = 1;
20060 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20061
20062 mem = gen_frame_mem (SImode,
20063 plus_constant (Pmode,
20064 stack_pointer_rtx,
20065 offset + 4));
20066 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20067 RTX_FRAME_RELATED_P (tmp) = 1;
20068 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20069
20070 offset += 8;
20071 j += 2;
20072 }
20073 else
20074 {
20075 /* Emit a single word store. */
20076 if (offset < 0)
20077 {
20078 /* Allocate stack space for all saved registers. */
20079 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20080 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20081 mem = gen_frame_mem (SImode, tmp);
20082 offset = 0;
20083 }
20084 else if (offset > 0)
20085 mem = gen_frame_mem (SImode,
20086 plus_constant (Pmode,
20087 stack_pointer_rtx,
20088 offset));
20089 else
20090 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20091
20092 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20093 RTX_FRAME_RELATED_P (tmp) = 1;
20094 tmp = emit_insn (tmp);
20095
20096 /* Record the first store insn. */
20097 if (dwarf_index == 1)
20098 insn = tmp;
20099
20100 /* Generate dwarf info. */
20101 mem = gen_frame_mem (SImode,
20102 plus_constant(Pmode,
20103 stack_pointer_rtx,
20104 offset));
20105 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20106 RTX_FRAME_RELATED_P (tmp) = 1;
20107 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20108
20109 offset += 4;
20110 j += 1;
20111 }
20112 }
20113 else
20114 j++;
20115
20116 /* Attach dwarf info to the first insn we generate. */
20117 gcc_assert (insn != NULL_RTX);
20118 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20119 RTX_FRAME_RELATED_P (insn) = 1;
20120 }
20121
20122 /* Generate and emit an insn that we will recognize as a push_multi.
20123 Unfortunately, since this insn does not reflect very well the actual
20124 semantics of the operation, we need to annotate the insn for the benefit
20125 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20126 MASK for registers that should be annotated for DWARF2 frame unwind
20127 information. */
20128 static rtx
20129 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20130 {
20131 int num_regs = 0;
20132 int num_dwarf_regs = 0;
20133 int i, j;
20134 rtx par;
20135 rtx dwarf;
20136 int dwarf_par_index;
20137 rtx tmp, reg;
20138
20139 /* We don't record the PC in the dwarf frame information. */
20140 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20141
20142 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20143 {
20144 if (mask & (1 << i))
20145 num_regs++;
20146 if (dwarf_regs_mask & (1 << i))
20147 num_dwarf_regs++;
20148 }
20149
20150 gcc_assert (num_regs && num_regs <= 16);
20151 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20152
20153 /* For the body of the insn we are going to generate an UNSPEC in
20154 parallel with several USEs. This allows the insn to be recognized
20155 by the push_multi pattern in the arm.md file.
20156
20157 The body of the insn looks something like this:
20158
20159 (parallel [
20160 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20161 (const_int:SI <num>)))
20162 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20163 (use (reg:SI XX))
20164 (use (reg:SI YY))
20165 ...
20166 ])
20167
20168 For the frame note however, we try to be more explicit and actually
20169 show each register being stored into the stack frame, plus a (single)
20170 decrement of the stack pointer. We do it this way in order to be
20171 friendly to the stack unwinding code, which only wants to see a single
20172 stack decrement per instruction. The RTL we generate for the note looks
20173 something like this:
20174
20175 (sequence [
20176 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20177 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20178 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20179 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20180 ...
20181 ])
20182
20183 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20184 instead we'd have a parallel expression detailing all
20185 the stores to the various memory addresses so that debug
20186 information is more up-to-date. Remember however while writing
20187 this to take care of the constraints with the push instruction.
20188
20189 Note also that this has to be taken care of for the VFP registers.
20190
20191 For more see PR43399. */
20192
20193 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20194 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20195 dwarf_par_index = 1;
20196
20197 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20198 {
20199 if (mask & (1 << i))
20200 {
20201 reg = gen_rtx_REG (SImode, i);
20202
20203 XVECEXP (par, 0, 0)
20204 = gen_rtx_SET (gen_frame_mem
20205 (BLKmode,
20206 gen_rtx_PRE_MODIFY (Pmode,
20207 stack_pointer_rtx,
20208 plus_constant
20209 (Pmode, stack_pointer_rtx,
20210 -4 * num_regs))
20211 ),
20212 gen_rtx_UNSPEC (BLKmode,
20213 gen_rtvec (1, reg),
20214 UNSPEC_PUSH_MULT));
20215
20216 if (dwarf_regs_mask & (1 << i))
20217 {
20218 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20219 reg);
20220 RTX_FRAME_RELATED_P (tmp) = 1;
20221 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20222 }
20223
20224 break;
20225 }
20226 }
20227
20228 for (j = 1, i++; j < num_regs; i++)
20229 {
20230 if (mask & (1 << i))
20231 {
20232 reg = gen_rtx_REG (SImode, i);
20233
20234 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20235
20236 if (dwarf_regs_mask & (1 << i))
20237 {
20238 tmp
20239 = gen_rtx_SET (gen_frame_mem
20240 (SImode,
20241 plus_constant (Pmode, stack_pointer_rtx,
20242 4 * j)),
20243 reg);
20244 RTX_FRAME_RELATED_P (tmp) = 1;
20245 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20246 }
20247
20248 j++;
20249 }
20250 }
20251
20252 par = emit_insn (par);
20253
20254 tmp = gen_rtx_SET (stack_pointer_rtx,
20255 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20256 RTX_FRAME_RELATED_P (tmp) = 1;
20257 XVECEXP (dwarf, 0, 0) = tmp;
20258
20259 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20260
20261 return par;
20262 }
20263
20264 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20265 SIZE is the offset to be adjusted.
20266 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20267 static void
20268 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20269 {
20270 rtx dwarf;
20271
20272 RTX_FRAME_RELATED_P (insn) = 1;
20273 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20274 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20275 }
20276
20277 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20278 SAVED_REGS_MASK shows which registers need to be restored.
20279
20280 Unfortunately, since this insn does not reflect very well the actual
20281 semantics of the operation, we need to annotate the insn for the benefit
20282 of DWARF2 frame unwind information. */
20283 static void
20284 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20285 {
20286 int num_regs = 0;
20287 int i, j;
20288 rtx par;
20289 rtx dwarf = NULL_RTX;
20290 rtx tmp, reg;
20291 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20292 int offset_adj;
20293 int emit_update;
20294
20295 offset_adj = return_in_pc ? 1 : 0;
20296 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20297 if (saved_regs_mask & (1 << i))
20298 num_regs++;
20299
20300 gcc_assert (num_regs && num_regs <= 16);
20301
20302 /* If SP is in reglist, then we don't emit SP update insn. */
20303 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20304
20305 /* The parallel needs to hold num_regs SETs
20306 and one SET for the stack update. */
20307 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20308
20309 if (return_in_pc)
20310 XVECEXP (par, 0, 0) = ret_rtx;
20311
20312 if (emit_update)
20313 {
20314 /* Increment the stack pointer, based on there being
20315 num_regs 4-byte registers to restore. */
20316 tmp = gen_rtx_SET (stack_pointer_rtx,
20317 plus_constant (Pmode,
20318 stack_pointer_rtx,
20319 4 * num_regs));
20320 RTX_FRAME_RELATED_P (tmp) = 1;
20321 XVECEXP (par, 0, offset_adj) = tmp;
20322 }
20323
20324 /* Now restore every reg, which may include PC. */
20325 for (j = 0, i = 0; j < num_regs; i++)
20326 if (saved_regs_mask & (1 << i))
20327 {
20328 reg = gen_rtx_REG (SImode, i);
20329 if ((num_regs == 1) && emit_update && !return_in_pc)
20330 {
20331 /* Emit single load with writeback. */
20332 tmp = gen_frame_mem (SImode,
20333 gen_rtx_POST_INC (Pmode,
20334 stack_pointer_rtx));
20335 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20336 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20337 return;
20338 }
20339
20340 tmp = gen_rtx_SET (reg,
20341 gen_frame_mem
20342 (SImode,
20343 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20344 RTX_FRAME_RELATED_P (tmp) = 1;
20345 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20346
20347 /* We need to maintain a sequence for DWARF info too. As dwarf info
20348 should not have PC, skip PC. */
20349 if (i != PC_REGNUM)
20350 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20351
20352 j++;
20353 }
20354
20355 if (return_in_pc)
20356 par = emit_jump_insn (par);
20357 else
20358 par = emit_insn (par);
20359
20360 REG_NOTES (par) = dwarf;
20361 if (!return_in_pc)
20362 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20363 stack_pointer_rtx, stack_pointer_rtx);
20364 }
20365
20366 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20367 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20368
20369 Unfortunately, since this insn does not reflect very well the actual
20370 semantics of the operation, we need to annotate the insn for the benefit
20371 of DWARF2 frame unwind information. */
20372 static void
20373 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20374 {
20375 int i, j;
20376 rtx par;
20377 rtx dwarf = NULL_RTX;
20378 rtx tmp, reg;
20379
20380 gcc_assert (num_regs && num_regs <= 32);
20381
20382 /* Workaround ARM10 VFPr1 bug. */
20383 if (num_regs == 2 && !arm_arch6)
20384 {
20385 if (first_reg == 15)
20386 first_reg--;
20387
20388 num_regs++;
20389 }
20390
20391 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20392 there could be up to 32 D-registers to restore.
20393 If there are more than 16 D-registers, make two recursive calls,
20394 each of which emits one pop_multi instruction. */
20395 if (num_regs > 16)
20396 {
20397 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20398 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20399 return;
20400 }
20401
20402 /* The parallel needs to hold num_regs SETs
20403 and one SET for the stack update. */
20404 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20405
20406 /* Increment the stack pointer, based on there being
20407 num_regs 8-byte registers to restore. */
20408 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20409 RTX_FRAME_RELATED_P (tmp) = 1;
20410 XVECEXP (par, 0, 0) = tmp;
20411
20412 /* Now show every reg that will be restored, using a SET for each. */
20413 for (j = 0, i=first_reg; j < num_regs; i += 2)
20414 {
20415 reg = gen_rtx_REG (DFmode, i);
20416
20417 tmp = gen_rtx_SET (reg,
20418 gen_frame_mem
20419 (DFmode,
20420 plus_constant (Pmode, base_reg, 8 * j)));
20421 RTX_FRAME_RELATED_P (tmp) = 1;
20422 XVECEXP (par, 0, j + 1) = tmp;
20423
20424 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20425
20426 j++;
20427 }
20428
20429 par = emit_insn (par);
20430 REG_NOTES (par) = dwarf;
20431
20432 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20433 if (REGNO (base_reg) == IP_REGNUM)
20434 {
20435 RTX_FRAME_RELATED_P (par) = 1;
20436 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20437 }
20438 else
20439 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20440 base_reg, base_reg);
20441 }
20442
20443 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20444 number of registers are being popped, multiple LDRD patterns are created for
20445 all register pairs. If odd number of registers are popped, last register is
20446 loaded by using LDR pattern. */
20447 static void
20448 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20449 {
20450 int num_regs = 0;
20451 int i, j;
20452 rtx par = NULL_RTX;
20453 rtx dwarf = NULL_RTX;
20454 rtx tmp, reg, tmp1;
20455 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20456
20457 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20458 if (saved_regs_mask & (1 << i))
20459 num_regs++;
20460
20461 gcc_assert (num_regs && num_regs <= 16);
20462
20463 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20464 to be popped. So, if num_regs is even, now it will become odd,
20465 and we can generate pop with PC. If num_regs is odd, it will be
20466 even now, and ldr with return can be generated for PC. */
20467 if (return_in_pc)
20468 num_regs--;
20469
20470 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20471
20472 /* Var j iterates over all the registers to gather all the registers in
20473 saved_regs_mask. Var i gives index of saved registers in stack frame.
20474 A PARALLEL RTX of register-pair is created here, so that pattern for
20475 LDRD can be matched. As PC is always last register to be popped, and
20476 we have already decremented num_regs if PC, we don't have to worry
20477 about PC in this loop. */
20478 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20479 if (saved_regs_mask & (1 << j))
20480 {
20481 /* Create RTX for memory load. */
20482 reg = gen_rtx_REG (SImode, j);
20483 tmp = gen_rtx_SET (reg,
20484 gen_frame_mem (SImode,
20485 plus_constant (Pmode,
20486 stack_pointer_rtx, 4 * i)));
20487 RTX_FRAME_RELATED_P (tmp) = 1;
20488
20489 if (i % 2 == 0)
20490 {
20491 /* When saved-register index (i) is even, the RTX to be emitted is
20492 yet to be created. Hence create it first. The LDRD pattern we
20493 are generating is :
20494 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20495 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20496 where target registers need not be consecutive. */
20497 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20498 dwarf = NULL_RTX;
20499 }
20500
20501 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20502 added as 0th element and if i is odd, reg_i is added as 1st element
20503 of LDRD pattern shown above. */
20504 XVECEXP (par, 0, (i % 2)) = tmp;
20505 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20506
20507 if ((i % 2) == 1)
20508 {
20509 /* When saved-register index (i) is odd, RTXs for both the registers
20510 to be loaded are generated in above given LDRD pattern, and the
20511 pattern can be emitted now. */
20512 par = emit_insn (par);
20513 REG_NOTES (par) = dwarf;
20514 RTX_FRAME_RELATED_P (par) = 1;
20515 }
20516
20517 i++;
20518 }
20519
20520 /* If the number of registers pushed is odd AND return_in_pc is false OR
20521 number of registers are even AND return_in_pc is true, last register is
20522 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20523 then LDR with post increment. */
20524
20525 /* Increment the stack pointer, based on there being
20526 num_regs 4-byte registers to restore. */
20527 tmp = gen_rtx_SET (stack_pointer_rtx,
20528 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20529 RTX_FRAME_RELATED_P (tmp) = 1;
20530 tmp = emit_insn (tmp);
20531 if (!return_in_pc)
20532 {
20533 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20534 stack_pointer_rtx, stack_pointer_rtx);
20535 }
20536
20537 dwarf = NULL_RTX;
20538
20539 if (((num_regs % 2) == 1 && !return_in_pc)
20540 || ((num_regs % 2) == 0 && return_in_pc))
20541 {
20542 /* Scan for the single register to be popped. Skip until the saved
20543 register is found. */
20544 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20545
20546 /* Gen LDR with post increment here. */
20547 tmp1 = gen_rtx_MEM (SImode,
20548 gen_rtx_POST_INC (SImode,
20549 stack_pointer_rtx));
20550 set_mem_alias_set (tmp1, get_frame_alias_set ());
20551
20552 reg = gen_rtx_REG (SImode, j);
20553 tmp = gen_rtx_SET (reg, tmp1);
20554 RTX_FRAME_RELATED_P (tmp) = 1;
20555 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20556
20557 if (return_in_pc)
20558 {
20559 /* If return_in_pc, j must be PC_REGNUM. */
20560 gcc_assert (j == PC_REGNUM);
20561 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20562 XVECEXP (par, 0, 0) = ret_rtx;
20563 XVECEXP (par, 0, 1) = tmp;
20564 par = emit_jump_insn (par);
20565 }
20566 else
20567 {
20568 par = emit_insn (tmp);
20569 REG_NOTES (par) = dwarf;
20570 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20571 stack_pointer_rtx, stack_pointer_rtx);
20572 }
20573
20574 }
20575 else if ((num_regs % 2) == 1 && return_in_pc)
20576 {
20577 /* There are 2 registers to be popped. So, generate the pattern
20578 pop_multiple_with_stack_update_and_return to pop in PC. */
20579 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20580 }
20581
20582 return;
20583 }
20584
20585 /* LDRD in ARM mode needs consecutive registers as operands. This function
20586 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20587 offset addressing and then generates one separate stack udpate. This provides
20588 more scheduling freedom, compared to writeback on every load. However,
20589 if the function returns using load into PC directly
20590 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20591 before the last load. TODO: Add a peephole optimization to recognize
20592 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20593 peephole optimization to merge the load at stack-offset zero
20594 with the stack update instruction using load with writeback
20595 in post-index addressing mode. */
20596 static void
20597 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20598 {
20599 int j = 0;
20600 int offset = 0;
20601 rtx par = NULL_RTX;
20602 rtx dwarf = NULL_RTX;
20603 rtx tmp, mem;
20604
20605 /* Restore saved registers. */
20606 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20607 j = 0;
20608 while (j <= LAST_ARM_REGNUM)
20609 if (saved_regs_mask & (1 << j))
20610 {
20611 if ((j % 2) == 0
20612 && (saved_regs_mask & (1 << (j + 1)))
20613 && (j + 1) != PC_REGNUM)
20614 {
20615 /* Current register and next register form register pair for which
20616 LDRD can be generated. PC is always the last register popped, and
20617 we handle it separately. */
20618 if (offset > 0)
20619 mem = gen_frame_mem (DImode,
20620 plus_constant (Pmode,
20621 stack_pointer_rtx,
20622 offset));
20623 else
20624 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20625
20626 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20627 tmp = emit_insn (tmp);
20628 RTX_FRAME_RELATED_P (tmp) = 1;
20629
20630 /* Generate dwarf info. */
20631
20632 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20633 gen_rtx_REG (SImode, j),
20634 NULL_RTX);
20635 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20636 gen_rtx_REG (SImode, j + 1),
20637 dwarf);
20638
20639 REG_NOTES (tmp) = dwarf;
20640
20641 offset += 8;
20642 j += 2;
20643 }
20644 else if (j != PC_REGNUM)
20645 {
20646 /* Emit a single word load. */
20647 if (offset > 0)
20648 mem = gen_frame_mem (SImode,
20649 plus_constant (Pmode,
20650 stack_pointer_rtx,
20651 offset));
20652 else
20653 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20654
20655 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20656 tmp = emit_insn (tmp);
20657 RTX_FRAME_RELATED_P (tmp) = 1;
20658
20659 /* Generate dwarf info. */
20660 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20661 gen_rtx_REG (SImode, j),
20662 NULL_RTX);
20663
20664 offset += 4;
20665 j += 1;
20666 }
20667 else /* j == PC_REGNUM */
20668 j++;
20669 }
20670 else
20671 j++;
20672
20673 /* Update the stack. */
20674 if (offset > 0)
20675 {
20676 tmp = gen_rtx_SET (stack_pointer_rtx,
20677 plus_constant (Pmode,
20678 stack_pointer_rtx,
20679 offset));
20680 tmp = emit_insn (tmp);
20681 arm_add_cfa_adjust_cfa_note (tmp, offset,
20682 stack_pointer_rtx, stack_pointer_rtx);
20683 offset = 0;
20684 }
20685
20686 if (saved_regs_mask & (1 << PC_REGNUM))
20687 {
20688 /* Only PC is to be popped. */
20689 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20690 XVECEXP (par, 0, 0) = ret_rtx;
20691 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20692 gen_frame_mem (SImode,
20693 gen_rtx_POST_INC (SImode,
20694 stack_pointer_rtx)));
20695 RTX_FRAME_RELATED_P (tmp) = 1;
20696 XVECEXP (par, 0, 1) = tmp;
20697 par = emit_jump_insn (par);
20698
20699 /* Generate dwarf info. */
20700 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20701 gen_rtx_REG (SImode, PC_REGNUM),
20702 NULL_RTX);
20703 REG_NOTES (par) = dwarf;
20704 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20705 stack_pointer_rtx, stack_pointer_rtx);
20706 }
20707 }
20708
20709 /* Calculate the size of the return value that is passed in registers. */
20710 static unsigned
20711 arm_size_return_regs (void)
20712 {
20713 machine_mode mode;
20714
20715 if (crtl->return_rtx != 0)
20716 mode = GET_MODE (crtl->return_rtx);
20717 else
20718 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20719
20720 return GET_MODE_SIZE (mode);
20721 }
20722
20723 /* Return true if the current function needs to save/restore LR. */
20724 static bool
20725 thumb_force_lr_save (void)
20726 {
20727 return !cfun->machine->lr_save_eliminated
20728 && (!crtl->is_leaf
20729 || thumb_far_jump_used_p ()
20730 || df_regs_ever_live_p (LR_REGNUM));
20731 }
20732
20733 /* We do not know if r3 will be available because
20734 we do have an indirect tailcall happening in this
20735 particular case. */
20736 static bool
20737 is_indirect_tailcall_p (rtx call)
20738 {
20739 rtx pat = PATTERN (call);
20740
20741 /* Indirect tail call. */
20742 pat = XVECEXP (pat, 0, 0);
20743 if (GET_CODE (pat) == SET)
20744 pat = SET_SRC (pat);
20745
20746 pat = XEXP (XEXP (pat, 0), 0);
20747 return REG_P (pat);
20748 }
20749
20750 /* Return true if r3 is used by any of the tail call insns in the
20751 current function. */
20752 static bool
20753 any_sibcall_could_use_r3 (void)
20754 {
20755 edge_iterator ei;
20756 edge e;
20757
20758 if (!crtl->tail_call_emit)
20759 return false;
20760 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20761 if (e->flags & EDGE_SIBCALL)
20762 {
20763 rtx_insn *call = BB_END (e->src);
20764 if (!CALL_P (call))
20765 call = prev_nonnote_nondebug_insn (call);
20766 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20767 if (find_regno_fusage (call, USE, 3)
20768 || is_indirect_tailcall_p (call))
20769 return true;
20770 }
20771 return false;
20772 }
20773
20774
20775 /* Compute the distance from register FROM to register TO.
20776 These can be the arg pointer (26), the soft frame pointer (25),
20777 the stack pointer (13) or the hard frame pointer (11).
20778 In thumb mode r7 is used as the soft frame pointer, if needed.
20779 Typical stack layout looks like this:
20780
20781 old stack pointer -> | |
20782 ----
20783 | | \
20784 | | saved arguments for
20785 | | vararg functions
20786 | | /
20787 --
20788 hard FP & arg pointer -> | | \
20789 | | stack
20790 | | frame
20791 | | /
20792 --
20793 | | \
20794 | | call saved
20795 | | registers
20796 soft frame pointer -> | | /
20797 --
20798 | | \
20799 | | local
20800 | | variables
20801 locals base pointer -> | | /
20802 --
20803 | | \
20804 | | outgoing
20805 | | arguments
20806 current stack pointer -> | | /
20807 --
20808
20809 For a given function some or all of these stack components
20810 may not be needed, giving rise to the possibility of
20811 eliminating some of the registers.
20812
20813 The values returned by this function must reflect the behavior
20814 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20815
20816 The sign of the number returned reflects the direction of stack
20817 growth, so the values are positive for all eliminations except
20818 from the soft frame pointer to the hard frame pointer.
20819
20820 SFP may point just inside the local variables block to ensure correct
20821 alignment. */
20822
20823
20824 /* Return cached stack offsets. */
20825
20826 static arm_stack_offsets *
20827 arm_get_frame_offsets (void)
20828 {
20829 struct arm_stack_offsets *offsets;
20830
20831 offsets = &cfun->machine->stack_offsets;
20832
20833 return offsets;
20834 }
20835
20836
20837 /* Calculate stack offsets. These are used to calculate register elimination
20838 offsets and in prologue/epilogue code. Also calculates which registers
20839 should be saved. */
20840
20841 static void
20842 arm_compute_frame_layout (void)
20843 {
20844 struct arm_stack_offsets *offsets;
20845 unsigned long func_type;
20846 int saved;
20847 int core_saved;
20848 HOST_WIDE_INT frame_size;
20849 int i;
20850
20851 offsets = &cfun->machine->stack_offsets;
20852
20853 /* Initially this is the size of the local variables. It will translated
20854 into an offset once we have determined the size of preceding data. */
20855 frame_size = ROUND_UP_WORD (get_frame_size ());
20856
20857 /* Space for variadic functions. */
20858 offsets->saved_args = crtl->args.pretend_args_size;
20859
20860 /* In Thumb mode this is incorrect, but never used. */
20861 offsets->frame
20862 = (offsets->saved_args
20863 + arm_compute_static_chain_stack_bytes ()
20864 + (frame_pointer_needed ? 4 : 0));
20865
20866 if (TARGET_32BIT)
20867 {
20868 unsigned int regno;
20869
20870 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20871 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20872 saved = core_saved;
20873
20874 /* We know that SP will be doubleword aligned on entry, and we must
20875 preserve that condition at any subroutine call. We also require the
20876 soft frame pointer to be doubleword aligned. */
20877
20878 if (TARGET_REALLY_IWMMXT)
20879 {
20880 /* Check for the call-saved iWMMXt registers. */
20881 for (regno = FIRST_IWMMXT_REGNUM;
20882 regno <= LAST_IWMMXT_REGNUM;
20883 regno++)
20884 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20885 saved += 8;
20886 }
20887
20888 func_type = arm_current_func_type ();
20889 /* Space for saved VFP registers. */
20890 if (! IS_VOLATILE (func_type)
20891 && TARGET_HARD_FLOAT)
20892 saved += arm_get_vfp_saved_size ();
20893 }
20894 else /* TARGET_THUMB1 */
20895 {
20896 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20897 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20898 saved = core_saved;
20899 if (TARGET_BACKTRACE)
20900 saved += 16;
20901 }
20902
20903 /* Saved registers include the stack frame. */
20904 offsets->saved_regs
20905 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20906 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20907
20908 /* A leaf function does not need any stack alignment if it has nothing
20909 on the stack. */
20910 if (crtl->is_leaf && frame_size == 0
20911 /* However if it calls alloca(), we have a dynamically allocated
20912 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20913 && ! cfun->calls_alloca)
20914 {
20915 offsets->outgoing_args = offsets->soft_frame;
20916 offsets->locals_base = offsets->soft_frame;
20917 return;
20918 }
20919
20920 /* Ensure SFP has the correct alignment. */
20921 if (ARM_DOUBLEWORD_ALIGN
20922 && (offsets->soft_frame & 7))
20923 {
20924 offsets->soft_frame += 4;
20925 /* Try to align stack by pushing an extra reg. Don't bother doing this
20926 when there is a stack frame as the alignment will be rolled into
20927 the normal stack adjustment. */
20928 if (frame_size + crtl->outgoing_args_size == 0)
20929 {
20930 int reg = -1;
20931
20932 /* Register r3 is caller-saved. Normally it does not need to be
20933 saved on entry by the prologue. However if we choose to save
20934 it for padding then we may confuse the compiler into thinking
20935 a prologue sequence is required when in fact it is not. This
20936 will occur when shrink-wrapping if r3 is used as a scratch
20937 register and there are no other callee-saved writes.
20938
20939 This situation can be avoided when other callee-saved registers
20940 are available and r3 is not mandatory if we choose a callee-saved
20941 register for padding. */
20942 bool prefer_callee_reg_p = false;
20943
20944 /* If it is safe to use r3, then do so. This sometimes
20945 generates better code on Thumb-2 by avoiding the need to
20946 use 32-bit push/pop instructions. */
20947 if (! any_sibcall_could_use_r3 ()
20948 && arm_size_return_regs () <= 12
20949 && (offsets->saved_regs_mask & (1 << 3)) == 0
20950 && (TARGET_THUMB2
20951 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20952 {
20953 reg = 3;
20954 if (!TARGET_THUMB2)
20955 prefer_callee_reg_p = true;
20956 }
20957 if (reg == -1
20958 || prefer_callee_reg_p)
20959 {
20960 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20961 {
20962 /* Avoid fixed registers; they may be changed at
20963 arbitrary times so it's unsafe to restore them
20964 during the epilogue. */
20965 if (!fixed_regs[i]
20966 && (offsets->saved_regs_mask & (1 << i)) == 0)
20967 {
20968 reg = i;
20969 break;
20970 }
20971 }
20972 }
20973
20974 if (reg != -1)
20975 {
20976 offsets->saved_regs += 4;
20977 offsets->saved_regs_mask |= (1 << reg);
20978 }
20979 }
20980 }
20981
20982 offsets->locals_base = offsets->soft_frame + frame_size;
20983 offsets->outgoing_args = (offsets->locals_base
20984 + crtl->outgoing_args_size);
20985
20986 if (ARM_DOUBLEWORD_ALIGN)
20987 {
20988 /* Ensure SP remains doubleword aligned. */
20989 if (offsets->outgoing_args & 7)
20990 offsets->outgoing_args += 4;
20991 gcc_assert (!(offsets->outgoing_args & 7));
20992 }
20993 }
20994
20995
20996 /* Calculate the relative offsets for the different stack pointers. Positive
20997 offsets are in the direction of stack growth. */
20998
20999 HOST_WIDE_INT
21000 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21001 {
21002 arm_stack_offsets *offsets;
21003
21004 offsets = arm_get_frame_offsets ();
21005
21006 /* OK, now we have enough information to compute the distances.
21007 There must be an entry in these switch tables for each pair
21008 of registers in ELIMINABLE_REGS, even if some of the entries
21009 seem to be redundant or useless. */
21010 switch (from)
21011 {
21012 case ARG_POINTER_REGNUM:
21013 switch (to)
21014 {
21015 case THUMB_HARD_FRAME_POINTER_REGNUM:
21016 return 0;
21017
21018 case FRAME_POINTER_REGNUM:
21019 /* This is the reverse of the soft frame pointer
21020 to hard frame pointer elimination below. */
21021 return offsets->soft_frame - offsets->saved_args;
21022
21023 case ARM_HARD_FRAME_POINTER_REGNUM:
21024 /* This is only non-zero in the case where the static chain register
21025 is stored above the frame. */
21026 return offsets->frame - offsets->saved_args - 4;
21027
21028 case STACK_POINTER_REGNUM:
21029 /* If nothing has been pushed on the stack at all
21030 then this will return -4. This *is* correct! */
21031 return offsets->outgoing_args - (offsets->saved_args + 4);
21032
21033 default:
21034 gcc_unreachable ();
21035 }
21036 gcc_unreachable ();
21037
21038 case FRAME_POINTER_REGNUM:
21039 switch (to)
21040 {
21041 case THUMB_HARD_FRAME_POINTER_REGNUM:
21042 return 0;
21043
21044 case ARM_HARD_FRAME_POINTER_REGNUM:
21045 /* The hard frame pointer points to the top entry in the
21046 stack frame. The soft frame pointer to the bottom entry
21047 in the stack frame. If there is no stack frame at all,
21048 then they are identical. */
21049
21050 return offsets->frame - offsets->soft_frame;
21051
21052 case STACK_POINTER_REGNUM:
21053 return offsets->outgoing_args - offsets->soft_frame;
21054
21055 default:
21056 gcc_unreachable ();
21057 }
21058 gcc_unreachable ();
21059
21060 default:
21061 /* You cannot eliminate from the stack pointer.
21062 In theory you could eliminate from the hard frame
21063 pointer to the stack pointer, but this will never
21064 happen, since if a stack frame is not needed the
21065 hard frame pointer will never be used. */
21066 gcc_unreachable ();
21067 }
21068 }
21069
21070 /* Given FROM and TO register numbers, say whether this elimination is
21071 allowed. Frame pointer elimination is automatically handled.
21072
21073 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21074 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21075 pointer, we must eliminate FRAME_POINTER_REGNUM into
21076 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21077 ARG_POINTER_REGNUM. */
21078
21079 bool
21080 arm_can_eliminate (const int from, const int to)
21081 {
21082 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21083 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21084 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21085 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21086 true);
21087 }
21088
21089 /* Emit RTL to save coprocessor registers on function entry. Returns the
21090 number of bytes pushed. */
21091
21092 static int
21093 arm_save_coproc_regs(void)
21094 {
21095 int saved_size = 0;
21096 unsigned reg;
21097 unsigned start_reg;
21098 rtx insn;
21099
21100 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21101 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21102 {
21103 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21104 insn = gen_rtx_MEM (V2SImode, insn);
21105 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21106 RTX_FRAME_RELATED_P (insn) = 1;
21107 saved_size += 8;
21108 }
21109
21110 if (TARGET_HARD_FLOAT)
21111 {
21112 start_reg = FIRST_VFP_REGNUM;
21113
21114 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21115 {
21116 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21117 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21118 {
21119 if (start_reg != reg)
21120 saved_size += vfp_emit_fstmd (start_reg,
21121 (reg - start_reg) / 2);
21122 start_reg = reg + 2;
21123 }
21124 }
21125 if (start_reg != reg)
21126 saved_size += vfp_emit_fstmd (start_reg,
21127 (reg - start_reg) / 2);
21128 }
21129 return saved_size;
21130 }
21131
21132
21133 /* Set the Thumb frame pointer from the stack pointer. */
21134
21135 static void
21136 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21137 {
21138 HOST_WIDE_INT amount;
21139 rtx insn, dwarf;
21140
21141 amount = offsets->outgoing_args - offsets->locals_base;
21142 if (amount < 1024)
21143 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21144 stack_pointer_rtx, GEN_INT (amount)));
21145 else
21146 {
21147 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21148 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21149 expects the first two operands to be the same. */
21150 if (TARGET_THUMB2)
21151 {
21152 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21153 stack_pointer_rtx,
21154 hard_frame_pointer_rtx));
21155 }
21156 else
21157 {
21158 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21159 hard_frame_pointer_rtx,
21160 stack_pointer_rtx));
21161 }
21162 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21163 plus_constant (Pmode, stack_pointer_rtx, amount));
21164 RTX_FRAME_RELATED_P (dwarf) = 1;
21165 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21166 }
21167
21168 RTX_FRAME_RELATED_P (insn) = 1;
21169 }
21170
21171 struct scratch_reg {
21172 rtx reg;
21173 bool saved;
21174 };
21175
21176 /* Return a short-lived scratch register for use as a 2nd scratch register on
21177 function entry after the registers are saved in the prologue. This register
21178 must be released by means of release_scratch_register_on_entry. IP is not
21179 considered since it is always used as the 1st scratch register if available.
21180
21181 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21182 mask of live registers. */
21183
21184 static void
21185 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21186 unsigned long live_regs)
21187 {
21188 int regno = -1;
21189
21190 sr->saved = false;
21191
21192 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21193 regno = LR_REGNUM;
21194 else
21195 {
21196 unsigned int i;
21197
21198 for (i = 4; i < 11; i++)
21199 if (regno1 != i && (live_regs & (1 << i)) != 0)
21200 {
21201 regno = i;
21202 break;
21203 }
21204
21205 if (regno < 0)
21206 {
21207 /* If IP is used as the 1st scratch register for a nested function,
21208 then either r3 wasn't available or is used to preserve IP. */
21209 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21210 regno1 = 3;
21211 regno = (regno1 == 3 ? 2 : 3);
21212 sr->saved
21213 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21214 regno);
21215 }
21216 }
21217
21218 sr->reg = gen_rtx_REG (SImode, regno);
21219 if (sr->saved)
21220 {
21221 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21222 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21223 rtx x = gen_rtx_SET (stack_pointer_rtx,
21224 plus_constant (Pmode, stack_pointer_rtx, -4));
21225 RTX_FRAME_RELATED_P (insn) = 1;
21226 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21227 }
21228 }
21229
21230 /* Release a scratch register obtained from the preceding function. */
21231
21232 static void
21233 release_scratch_register_on_entry (struct scratch_reg *sr)
21234 {
21235 if (sr->saved)
21236 {
21237 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21238 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21239 rtx x = gen_rtx_SET (stack_pointer_rtx,
21240 plus_constant (Pmode, stack_pointer_rtx, 4));
21241 RTX_FRAME_RELATED_P (insn) = 1;
21242 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21243 }
21244 }
21245
21246 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21247
21248 #if PROBE_INTERVAL > 4096
21249 #error Cannot use indexed addressing mode for stack probing
21250 #endif
21251
21252 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21253 inclusive. These are offsets from the current stack pointer. REGNO1
21254 is the index number of the 1st scratch register and LIVE_REGS is the
21255 mask of live registers. */
21256
21257 static void
21258 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21259 unsigned int regno1, unsigned long live_regs)
21260 {
21261 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21262
21263 /* See if we have a constant small number of probes to generate. If so,
21264 that's the easy case. */
21265 if (size <= PROBE_INTERVAL)
21266 {
21267 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21268 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21269 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21270 }
21271
21272 /* The run-time loop is made up of 10 insns in the generic case while the
21273 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21274 else if (size <= 5 * PROBE_INTERVAL)
21275 {
21276 HOST_WIDE_INT i, rem;
21277
21278 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21279 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21280 emit_stack_probe (reg1);
21281
21282 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21283 it exceeds SIZE. If only two probes are needed, this will not
21284 generate any code. Then probe at FIRST + SIZE. */
21285 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21286 {
21287 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21288 emit_stack_probe (reg1);
21289 }
21290
21291 rem = size - (i - PROBE_INTERVAL);
21292 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21293 {
21294 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21295 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21296 }
21297 else
21298 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21299 }
21300
21301 /* Otherwise, do the same as above, but in a loop. Note that we must be
21302 extra careful with variables wrapping around because we might be at
21303 the very top (or the very bottom) of the address space and we have
21304 to be able to handle this case properly; in particular, we use an
21305 equality test for the loop condition. */
21306 else
21307 {
21308 HOST_WIDE_INT rounded_size;
21309 struct scratch_reg sr;
21310
21311 get_scratch_register_on_entry (&sr, regno1, live_regs);
21312
21313 emit_move_insn (reg1, GEN_INT (first));
21314
21315
21316 /* Step 1: round SIZE to the previous multiple of the interval. */
21317
21318 rounded_size = size & -PROBE_INTERVAL;
21319 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21320
21321
21322 /* Step 2: compute initial and final value of the loop counter. */
21323
21324 /* TEST_ADDR = SP + FIRST. */
21325 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21326
21327 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21328 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21329
21330
21331 /* Step 3: the loop
21332
21333 do
21334 {
21335 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21336 probe at TEST_ADDR
21337 }
21338 while (TEST_ADDR != LAST_ADDR)
21339
21340 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21341 until it is equal to ROUNDED_SIZE. */
21342
21343 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21344
21345
21346 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21347 that SIZE is equal to ROUNDED_SIZE. */
21348
21349 if (size != rounded_size)
21350 {
21351 HOST_WIDE_INT rem = size - rounded_size;
21352
21353 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21354 {
21355 emit_set_insn (sr.reg,
21356 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21357 emit_stack_probe (plus_constant (Pmode, sr.reg,
21358 PROBE_INTERVAL - rem));
21359 }
21360 else
21361 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21362 }
21363
21364 release_scratch_register_on_entry (&sr);
21365 }
21366
21367 /* Make sure nothing is scheduled before we are done. */
21368 emit_insn (gen_blockage ());
21369 }
21370
21371 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21372 absolute addresses. */
21373
21374 const char *
21375 output_probe_stack_range (rtx reg1, rtx reg2)
21376 {
21377 static int labelno = 0;
21378 char loop_lab[32];
21379 rtx xops[2];
21380
21381 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21382
21383 /* Loop. */
21384 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21385
21386 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21387 xops[0] = reg1;
21388 xops[1] = GEN_INT (PROBE_INTERVAL);
21389 output_asm_insn ("sub\t%0, %0, %1", xops);
21390
21391 /* Probe at TEST_ADDR. */
21392 output_asm_insn ("str\tr0, [%0, #0]", xops);
21393
21394 /* Test if TEST_ADDR == LAST_ADDR. */
21395 xops[1] = reg2;
21396 output_asm_insn ("cmp\t%0, %1", xops);
21397
21398 /* Branch. */
21399 fputs ("\tbne\t", asm_out_file);
21400 assemble_name_raw (asm_out_file, loop_lab);
21401 fputc ('\n', asm_out_file);
21402
21403 return "";
21404 }
21405
21406 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21407 function. */
21408 void
21409 arm_expand_prologue (void)
21410 {
21411 rtx amount;
21412 rtx insn;
21413 rtx ip_rtx;
21414 unsigned long live_regs_mask;
21415 unsigned long func_type;
21416 int fp_offset = 0;
21417 int saved_pretend_args = 0;
21418 int saved_regs = 0;
21419 unsigned HOST_WIDE_INT args_to_push;
21420 HOST_WIDE_INT size;
21421 arm_stack_offsets *offsets;
21422 bool clobber_ip;
21423
21424 func_type = arm_current_func_type ();
21425
21426 /* Naked functions don't have prologues. */
21427 if (IS_NAKED (func_type))
21428 {
21429 if (flag_stack_usage_info)
21430 current_function_static_stack_size = 0;
21431 return;
21432 }
21433
21434 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21435 args_to_push = crtl->args.pretend_args_size;
21436
21437 /* Compute which register we will have to save onto the stack. */
21438 offsets = arm_get_frame_offsets ();
21439 live_regs_mask = offsets->saved_regs_mask;
21440
21441 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21442
21443 if (IS_STACKALIGN (func_type))
21444 {
21445 rtx r0, r1;
21446
21447 /* Handle a word-aligned stack pointer. We generate the following:
21448
21449 mov r0, sp
21450 bic r1, r0, #7
21451 mov sp, r1
21452 <save and restore r0 in normal prologue/epilogue>
21453 mov sp, r0
21454 bx lr
21455
21456 The unwinder doesn't need to know about the stack realignment.
21457 Just tell it we saved SP in r0. */
21458 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21459
21460 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21461 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21462
21463 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21464 RTX_FRAME_RELATED_P (insn) = 1;
21465 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21466
21467 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21468
21469 /* ??? The CFA changes here, which may cause GDB to conclude that it
21470 has entered a different function. That said, the unwind info is
21471 correct, individually, before and after this instruction because
21472 we've described the save of SP, which will override the default
21473 handling of SP as restoring from the CFA. */
21474 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21475 }
21476
21477 /* The static chain register is the same as the IP register. If it is
21478 clobbered when creating the frame, we need to save and restore it. */
21479 clobber_ip = IS_NESTED (func_type)
21480 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21481 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21482 || flag_stack_clash_protection)
21483 && !df_regs_ever_live_p (LR_REGNUM)
21484 && arm_r3_live_at_start_p ()));
21485
21486 /* Find somewhere to store IP whilst the frame is being created.
21487 We try the following places in order:
21488
21489 1. The last argument register r3 if it is available.
21490 2. A slot on the stack above the frame if there are no
21491 arguments to push onto the stack.
21492 3. Register r3 again, after pushing the argument registers
21493 onto the stack, if this is a varargs function.
21494 4. The last slot on the stack created for the arguments to
21495 push, if this isn't a varargs function.
21496
21497 Note - we only need to tell the dwarf2 backend about the SP
21498 adjustment in the second variant; the static chain register
21499 doesn't need to be unwound, as it doesn't contain a value
21500 inherited from the caller. */
21501 if (clobber_ip)
21502 {
21503 if (!arm_r3_live_at_start_p ())
21504 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21505 else if (args_to_push == 0)
21506 {
21507 rtx addr, dwarf;
21508
21509 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21510 saved_regs += 4;
21511
21512 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21513 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21514 fp_offset = 4;
21515
21516 /* Just tell the dwarf backend that we adjusted SP. */
21517 dwarf = gen_rtx_SET (stack_pointer_rtx,
21518 plus_constant (Pmode, stack_pointer_rtx,
21519 -fp_offset));
21520 RTX_FRAME_RELATED_P (insn) = 1;
21521 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21522 }
21523 else
21524 {
21525 /* Store the args on the stack. */
21526 if (cfun->machine->uses_anonymous_args)
21527 {
21528 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21529 (0xf0 >> (args_to_push / 4)) & 0xf);
21530 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21531 saved_pretend_args = 1;
21532 }
21533 else
21534 {
21535 rtx addr, dwarf;
21536
21537 if (args_to_push == 4)
21538 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21539 else
21540 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21541 plus_constant (Pmode,
21542 stack_pointer_rtx,
21543 -args_to_push));
21544
21545 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21546
21547 /* Just tell the dwarf backend that we adjusted SP. */
21548 dwarf = gen_rtx_SET (stack_pointer_rtx,
21549 plus_constant (Pmode, stack_pointer_rtx,
21550 -args_to_push));
21551 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21552 }
21553
21554 RTX_FRAME_RELATED_P (insn) = 1;
21555 fp_offset = args_to_push;
21556 args_to_push = 0;
21557 }
21558 }
21559
21560 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21561 {
21562 if (IS_INTERRUPT (func_type))
21563 {
21564 /* Interrupt functions must not corrupt any registers.
21565 Creating a frame pointer however, corrupts the IP
21566 register, so we must push it first. */
21567 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21568
21569 /* Do not set RTX_FRAME_RELATED_P on this insn.
21570 The dwarf stack unwinding code only wants to see one
21571 stack decrement per function, and this is not it. If
21572 this instruction is labeled as being part of the frame
21573 creation sequence then dwarf2out_frame_debug_expr will
21574 die when it encounters the assignment of IP to FP
21575 later on, since the use of SP here establishes SP as
21576 the CFA register and not IP.
21577
21578 Anyway this instruction is not really part of the stack
21579 frame creation although it is part of the prologue. */
21580 }
21581
21582 insn = emit_set_insn (ip_rtx,
21583 plus_constant (Pmode, stack_pointer_rtx,
21584 fp_offset));
21585 RTX_FRAME_RELATED_P (insn) = 1;
21586 }
21587
21588 if (args_to_push)
21589 {
21590 /* Push the argument registers, or reserve space for them. */
21591 if (cfun->machine->uses_anonymous_args)
21592 insn = emit_multi_reg_push
21593 ((0xf0 >> (args_to_push / 4)) & 0xf,
21594 (0xf0 >> (args_to_push / 4)) & 0xf);
21595 else
21596 insn = emit_insn
21597 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21598 GEN_INT (- args_to_push)));
21599 RTX_FRAME_RELATED_P (insn) = 1;
21600 }
21601
21602 /* If this is an interrupt service routine, and the link register
21603 is going to be pushed, and we're not generating extra
21604 push of IP (needed when frame is needed and frame layout if apcs),
21605 subtracting four from LR now will mean that the function return
21606 can be done with a single instruction. */
21607 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21608 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21609 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21610 && TARGET_ARM)
21611 {
21612 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21613
21614 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21615 }
21616
21617 if (live_regs_mask)
21618 {
21619 unsigned long dwarf_regs_mask = live_regs_mask;
21620
21621 saved_regs += bit_count (live_regs_mask) * 4;
21622 if (optimize_size && !frame_pointer_needed
21623 && saved_regs == offsets->saved_regs - offsets->saved_args)
21624 {
21625 /* If no coprocessor registers are being pushed and we don't have
21626 to worry about a frame pointer then push extra registers to
21627 create the stack frame. This is done in a way that does not
21628 alter the frame layout, so is independent of the epilogue. */
21629 int n;
21630 int frame;
21631 n = 0;
21632 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21633 n++;
21634 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21635 if (frame && n * 4 >= frame)
21636 {
21637 n = frame / 4;
21638 live_regs_mask |= (1 << n) - 1;
21639 saved_regs += frame;
21640 }
21641 }
21642
21643 if (TARGET_LDRD
21644 && current_tune->prefer_ldrd_strd
21645 && !optimize_function_for_size_p (cfun))
21646 {
21647 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21648 if (TARGET_THUMB2)
21649 thumb2_emit_strd_push (live_regs_mask);
21650 else if (TARGET_ARM
21651 && !TARGET_APCS_FRAME
21652 && !IS_INTERRUPT (func_type))
21653 arm_emit_strd_push (live_regs_mask);
21654 else
21655 {
21656 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21657 RTX_FRAME_RELATED_P (insn) = 1;
21658 }
21659 }
21660 else
21661 {
21662 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21663 RTX_FRAME_RELATED_P (insn) = 1;
21664 }
21665 }
21666
21667 if (! IS_VOLATILE (func_type))
21668 saved_regs += arm_save_coproc_regs ();
21669
21670 if (frame_pointer_needed && TARGET_ARM)
21671 {
21672 /* Create the new frame pointer. */
21673 if (TARGET_APCS_FRAME)
21674 {
21675 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21676 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21677 RTX_FRAME_RELATED_P (insn) = 1;
21678 }
21679 else
21680 {
21681 insn = GEN_INT (saved_regs - (4 + fp_offset));
21682 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21683 stack_pointer_rtx, insn));
21684 RTX_FRAME_RELATED_P (insn) = 1;
21685 }
21686 }
21687
21688 size = offsets->outgoing_args - offsets->saved_args;
21689 if (flag_stack_usage_info)
21690 current_function_static_stack_size = size;
21691
21692 /* If this isn't an interrupt service routine and we have a frame, then do
21693 stack checking. We use IP as the first scratch register, except for the
21694 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21695 if (!IS_INTERRUPT (func_type)
21696 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21697 || flag_stack_clash_protection))
21698 {
21699 unsigned int regno;
21700
21701 if (!IS_NESTED (func_type) || clobber_ip)
21702 regno = IP_REGNUM;
21703 else if (df_regs_ever_live_p (LR_REGNUM))
21704 regno = LR_REGNUM;
21705 else
21706 regno = 3;
21707
21708 if (crtl->is_leaf && !cfun->calls_alloca)
21709 {
21710 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21711 arm_emit_probe_stack_range (get_stack_check_protect (),
21712 size - get_stack_check_protect (),
21713 regno, live_regs_mask);
21714 }
21715 else if (size > 0)
21716 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21717 regno, live_regs_mask);
21718 }
21719
21720 /* Recover the static chain register. */
21721 if (clobber_ip)
21722 {
21723 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21724 insn = gen_rtx_REG (SImode, 3);
21725 else
21726 {
21727 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21728 insn = gen_frame_mem (SImode, insn);
21729 }
21730 emit_set_insn (ip_rtx, insn);
21731 emit_insn (gen_force_register_use (ip_rtx));
21732 }
21733
21734 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21735 {
21736 /* This add can produce multiple insns for a large constant, so we
21737 need to get tricky. */
21738 rtx_insn *last = get_last_insn ();
21739
21740 amount = GEN_INT (offsets->saved_args + saved_regs
21741 - offsets->outgoing_args);
21742
21743 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21744 amount));
21745 do
21746 {
21747 last = last ? NEXT_INSN (last) : get_insns ();
21748 RTX_FRAME_RELATED_P (last) = 1;
21749 }
21750 while (last != insn);
21751
21752 /* If the frame pointer is needed, emit a special barrier that
21753 will prevent the scheduler from moving stores to the frame
21754 before the stack adjustment. */
21755 if (frame_pointer_needed)
21756 emit_insn (gen_stack_tie (stack_pointer_rtx,
21757 hard_frame_pointer_rtx));
21758 }
21759
21760
21761 if (frame_pointer_needed && TARGET_THUMB2)
21762 thumb_set_frame_pointer (offsets);
21763
21764 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21765 {
21766 unsigned long mask;
21767
21768 mask = live_regs_mask;
21769 mask &= THUMB2_WORK_REGS;
21770 if (!IS_NESTED (func_type))
21771 mask |= (1 << IP_REGNUM);
21772 arm_load_pic_register (mask);
21773 }
21774
21775 /* If we are profiling, make sure no instructions are scheduled before
21776 the call to mcount. Similarly if the user has requested no
21777 scheduling in the prolog. Similarly if we want non-call exceptions
21778 using the EABI unwinder, to prevent faulting instructions from being
21779 swapped with a stack adjustment. */
21780 if (crtl->profile || !TARGET_SCHED_PROLOG
21781 || (arm_except_unwind_info (&global_options) == UI_TARGET
21782 && cfun->can_throw_non_call_exceptions))
21783 emit_insn (gen_blockage ());
21784
21785 /* If the link register is being kept alive, with the return address in it,
21786 then make sure that it does not get reused by the ce2 pass. */
21787 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21788 cfun->machine->lr_save_eliminated = 1;
21789 }
21790 \f
21791 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21792 static void
21793 arm_print_condition (FILE *stream)
21794 {
21795 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21796 {
21797 /* Branch conversion is not implemented for Thumb-2. */
21798 if (TARGET_THUMB)
21799 {
21800 output_operand_lossage ("predicated Thumb instruction");
21801 return;
21802 }
21803 if (current_insn_predicate != NULL)
21804 {
21805 output_operand_lossage
21806 ("predicated instruction in conditional sequence");
21807 return;
21808 }
21809
21810 fputs (arm_condition_codes[arm_current_cc], stream);
21811 }
21812 else if (current_insn_predicate)
21813 {
21814 enum arm_cond_code code;
21815
21816 if (TARGET_THUMB1)
21817 {
21818 output_operand_lossage ("predicated Thumb instruction");
21819 return;
21820 }
21821
21822 code = get_arm_condition_code (current_insn_predicate);
21823 fputs (arm_condition_codes[code], stream);
21824 }
21825 }
21826
21827
21828 /* Globally reserved letters: acln
21829 Puncutation letters currently used: @_|?().!#
21830 Lower case letters currently used: bcdefhimpqtvwxyz
21831 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21832 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21833
21834 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21835
21836 If CODE is 'd', then the X is a condition operand and the instruction
21837 should only be executed if the condition is true.
21838 if CODE is 'D', then the X is a condition operand and the instruction
21839 should only be executed if the condition is false: however, if the mode
21840 of the comparison is CCFPEmode, then always execute the instruction -- we
21841 do this because in these circumstances !GE does not necessarily imply LT;
21842 in these cases the instruction pattern will take care to make sure that
21843 an instruction containing %d will follow, thereby undoing the effects of
21844 doing this instruction unconditionally.
21845 If CODE is 'N' then X is a floating point operand that must be negated
21846 before output.
21847 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21848 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21849 static void
21850 arm_print_operand (FILE *stream, rtx x, int code)
21851 {
21852 switch (code)
21853 {
21854 case '@':
21855 fputs (ASM_COMMENT_START, stream);
21856 return;
21857
21858 case '_':
21859 fputs (user_label_prefix, stream);
21860 return;
21861
21862 case '|':
21863 fputs (REGISTER_PREFIX, stream);
21864 return;
21865
21866 case '?':
21867 arm_print_condition (stream);
21868 return;
21869
21870 case '.':
21871 /* The current condition code for a condition code setting instruction.
21872 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21873 fputc('s', stream);
21874 arm_print_condition (stream);
21875 return;
21876
21877 case '!':
21878 /* If the instruction is conditionally executed then print
21879 the current condition code, otherwise print 's'. */
21880 gcc_assert (TARGET_THUMB2);
21881 if (current_insn_predicate)
21882 arm_print_condition (stream);
21883 else
21884 fputc('s', stream);
21885 break;
21886
21887 /* %# is a "break" sequence. It doesn't output anything, but is used to
21888 separate e.g. operand numbers from following text, if that text consists
21889 of further digits which we don't want to be part of the operand
21890 number. */
21891 case '#':
21892 return;
21893
21894 case 'N':
21895 {
21896 REAL_VALUE_TYPE r;
21897 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21898 fprintf (stream, "%s", fp_const_from_val (&r));
21899 }
21900 return;
21901
21902 /* An integer or symbol address without a preceding # sign. */
21903 case 'c':
21904 switch (GET_CODE (x))
21905 {
21906 case CONST_INT:
21907 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21908 break;
21909
21910 case SYMBOL_REF:
21911 output_addr_const (stream, x);
21912 break;
21913
21914 case CONST:
21915 if (GET_CODE (XEXP (x, 0)) == PLUS
21916 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21917 {
21918 output_addr_const (stream, x);
21919 break;
21920 }
21921 /* Fall through. */
21922
21923 default:
21924 output_operand_lossage ("Unsupported operand for code '%c'", code);
21925 }
21926 return;
21927
21928 /* An integer that we want to print in HEX. */
21929 case 'x':
21930 switch (GET_CODE (x))
21931 {
21932 case CONST_INT:
21933 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21934 break;
21935
21936 default:
21937 output_operand_lossage ("Unsupported operand for code '%c'", code);
21938 }
21939 return;
21940
21941 case 'B':
21942 if (CONST_INT_P (x))
21943 {
21944 HOST_WIDE_INT val;
21945 val = ARM_SIGN_EXTEND (~INTVAL (x));
21946 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21947 }
21948 else
21949 {
21950 putc ('~', stream);
21951 output_addr_const (stream, x);
21952 }
21953 return;
21954
21955 case 'b':
21956 /* Print the log2 of a CONST_INT. */
21957 {
21958 HOST_WIDE_INT val;
21959
21960 if (!CONST_INT_P (x)
21961 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21962 output_operand_lossage ("Unsupported operand for code '%c'", code);
21963 else
21964 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21965 }
21966 return;
21967
21968 case 'L':
21969 /* The low 16 bits of an immediate constant. */
21970 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21971 return;
21972
21973 case 'i':
21974 fprintf (stream, "%s", arithmetic_instr (x, 1));
21975 return;
21976
21977 case 'I':
21978 fprintf (stream, "%s", arithmetic_instr (x, 0));
21979 return;
21980
21981 case 'S':
21982 {
21983 HOST_WIDE_INT val;
21984 const char *shift;
21985
21986 shift = shift_op (x, &val);
21987
21988 if (shift)
21989 {
21990 fprintf (stream, ", %s ", shift);
21991 if (val == -1)
21992 arm_print_operand (stream, XEXP (x, 1), 0);
21993 else
21994 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21995 }
21996 }
21997 return;
21998
21999 /* An explanation of the 'Q', 'R' and 'H' register operands:
22000
22001 In a pair of registers containing a DI or DF value the 'Q'
22002 operand returns the register number of the register containing
22003 the least significant part of the value. The 'R' operand returns
22004 the register number of the register containing the most
22005 significant part of the value.
22006
22007 The 'H' operand returns the higher of the two register numbers.
22008 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22009 same as the 'Q' operand, since the most significant part of the
22010 value is held in the lower number register. The reverse is true
22011 on systems where WORDS_BIG_ENDIAN is false.
22012
22013 The purpose of these operands is to distinguish between cases
22014 where the endian-ness of the values is important (for example
22015 when they are added together), and cases where the endian-ness
22016 is irrelevant, but the order of register operations is important.
22017 For example when loading a value from memory into a register
22018 pair, the endian-ness does not matter. Provided that the value
22019 from the lower memory address is put into the lower numbered
22020 register, and the value from the higher address is put into the
22021 higher numbered register, the load will work regardless of whether
22022 the value being loaded is big-wordian or little-wordian. The
22023 order of the two register loads can matter however, if the address
22024 of the memory location is actually held in one of the registers
22025 being overwritten by the load.
22026
22027 The 'Q' and 'R' constraints are also available for 64-bit
22028 constants. */
22029 case 'Q':
22030 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22031 {
22032 rtx part = gen_lowpart (SImode, x);
22033 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22034 return;
22035 }
22036
22037 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22038 {
22039 output_operand_lossage ("invalid operand for code '%c'", code);
22040 return;
22041 }
22042
22043 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22044 return;
22045
22046 case 'R':
22047 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22048 {
22049 machine_mode mode = GET_MODE (x);
22050 rtx part;
22051
22052 if (mode == VOIDmode)
22053 mode = DImode;
22054 part = gen_highpart_mode (SImode, mode, x);
22055 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22056 return;
22057 }
22058
22059 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22060 {
22061 output_operand_lossage ("invalid operand for code '%c'", code);
22062 return;
22063 }
22064
22065 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22066 return;
22067
22068 case 'H':
22069 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22070 {
22071 output_operand_lossage ("invalid operand for code '%c'", code);
22072 return;
22073 }
22074
22075 asm_fprintf (stream, "%r", REGNO (x) + 1);
22076 return;
22077
22078 case 'J':
22079 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22080 {
22081 output_operand_lossage ("invalid operand for code '%c'", code);
22082 return;
22083 }
22084
22085 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22086 return;
22087
22088 case 'K':
22089 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22090 {
22091 output_operand_lossage ("invalid operand for code '%c'", code);
22092 return;
22093 }
22094
22095 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22096 return;
22097
22098 case 'm':
22099 asm_fprintf (stream, "%r",
22100 REG_P (XEXP (x, 0))
22101 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22102 return;
22103
22104 case 'M':
22105 asm_fprintf (stream, "{%r-%r}",
22106 REGNO (x),
22107 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22108 return;
22109
22110 /* Like 'M', but writing doubleword vector registers, for use by Neon
22111 insns. */
22112 case 'h':
22113 {
22114 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22115 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22116 if (numregs == 1)
22117 asm_fprintf (stream, "{d%d}", regno);
22118 else
22119 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22120 }
22121 return;
22122
22123 case 'd':
22124 /* CONST_TRUE_RTX means always -- that's the default. */
22125 if (x == const_true_rtx)
22126 return;
22127
22128 if (!COMPARISON_P (x))
22129 {
22130 output_operand_lossage ("invalid operand for code '%c'", code);
22131 return;
22132 }
22133
22134 fputs (arm_condition_codes[get_arm_condition_code (x)],
22135 stream);
22136 return;
22137
22138 case 'D':
22139 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22140 want to do that. */
22141 if (x == const_true_rtx)
22142 {
22143 output_operand_lossage ("instruction never executed");
22144 return;
22145 }
22146 if (!COMPARISON_P (x))
22147 {
22148 output_operand_lossage ("invalid operand for code '%c'", code);
22149 return;
22150 }
22151
22152 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22153 (get_arm_condition_code (x))],
22154 stream);
22155 return;
22156
22157 case 's':
22158 case 'V':
22159 case 'W':
22160 case 'X':
22161 case 'Y':
22162 case 'Z':
22163 /* Former Maverick support, removed after GCC-4.7. */
22164 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22165 return;
22166
22167 case 'U':
22168 if (!REG_P (x)
22169 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22170 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22171 /* Bad value for wCG register number. */
22172 {
22173 output_operand_lossage ("invalid operand for code '%c'", code);
22174 return;
22175 }
22176
22177 else
22178 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22179 return;
22180
22181 /* Print an iWMMXt control register name. */
22182 case 'w':
22183 if (!CONST_INT_P (x)
22184 || INTVAL (x) < 0
22185 || INTVAL (x) >= 16)
22186 /* Bad value for wC register number. */
22187 {
22188 output_operand_lossage ("invalid operand for code '%c'", code);
22189 return;
22190 }
22191
22192 else
22193 {
22194 static const char * wc_reg_names [16] =
22195 {
22196 "wCID", "wCon", "wCSSF", "wCASF",
22197 "wC4", "wC5", "wC6", "wC7",
22198 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22199 "wC12", "wC13", "wC14", "wC15"
22200 };
22201
22202 fputs (wc_reg_names [INTVAL (x)], stream);
22203 }
22204 return;
22205
22206 /* Print the high single-precision register of a VFP double-precision
22207 register. */
22208 case 'p':
22209 {
22210 machine_mode mode = GET_MODE (x);
22211 int regno;
22212
22213 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22214 {
22215 output_operand_lossage ("invalid operand for code '%c'", code);
22216 return;
22217 }
22218
22219 regno = REGNO (x);
22220 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22221 {
22222 output_operand_lossage ("invalid operand for code '%c'", code);
22223 return;
22224 }
22225
22226 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22227 }
22228 return;
22229
22230 /* Print a VFP/Neon double precision or quad precision register name. */
22231 case 'P':
22232 case 'q':
22233 {
22234 machine_mode mode = GET_MODE (x);
22235 int is_quad = (code == 'q');
22236 int regno;
22237
22238 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22239 {
22240 output_operand_lossage ("invalid operand for code '%c'", code);
22241 return;
22242 }
22243
22244 if (!REG_P (x)
22245 || !IS_VFP_REGNUM (REGNO (x)))
22246 {
22247 output_operand_lossage ("invalid operand for code '%c'", code);
22248 return;
22249 }
22250
22251 regno = REGNO (x);
22252 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22253 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22254 {
22255 output_operand_lossage ("invalid operand for code '%c'", code);
22256 return;
22257 }
22258
22259 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22260 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22261 }
22262 return;
22263
22264 /* These two codes print the low/high doubleword register of a Neon quad
22265 register, respectively. For pair-structure types, can also print
22266 low/high quadword registers. */
22267 case 'e':
22268 case 'f':
22269 {
22270 machine_mode mode = GET_MODE (x);
22271 int regno;
22272
22273 if ((GET_MODE_SIZE (mode) != 16
22274 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22275 {
22276 output_operand_lossage ("invalid operand for code '%c'", code);
22277 return;
22278 }
22279
22280 regno = REGNO (x);
22281 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22282 {
22283 output_operand_lossage ("invalid operand for code '%c'", code);
22284 return;
22285 }
22286
22287 if (GET_MODE_SIZE (mode) == 16)
22288 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22289 + (code == 'f' ? 1 : 0));
22290 else
22291 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22292 + (code == 'f' ? 1 : 0));
22293 }
22294 return;
22295
22296 /* Print a VFPv3 floating-point constant, represented as an integer
22297 index. */
22298 case 'G':
22299 {
22300 int index = vfp3_const_double_index (x);
22301 gcc_assert (index != -1);
22302 fprintf (stream, "%d", index);
22303 }
22304 return;
22305
22306 /* Print bits representing opcode features for Neon.
22307
22308 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22309 and polynomials as unsigned.
22310
22311 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22312
22313 Bit 2 is 1 for rounding functions, 0 otherwise. */
22314
22315 /* Identify the type as 's', 'u', 'p' or 'f'. */
22316 case 'T':
22317 {
22318 HOST_WIDE_INT bits = INTVAL (x);
22319 fputc ("uspf"[bits & 3], stream);
22320 }
22321 return;
22322
22323 /* Likewise, but signed and unsigned integers are both 'i'. */
22324 case 'F':
22325 {
22326 HOST_WIDE_INT bits = INTVAL (x);
22327 fputc ("iipf"[bits & 3], stream);
22328 }
22329 return;
22330
22331 /* As for 'T', but emit 'u' instead of 'p'. */
22332 case 't':
22333 {
22334 HOST_WIDE_INT bits = INTVAL (x);
22335 fputc ("usuf"[bits & 3], stream);
22336 }
22337 return;
22338
22339 /* Bit 2: rounding (vs none). */
22340 case 'O':
22341 {
22342 HOST_WIDE_INT bits = INTVAL (x);
22343 fputs ((bits & 4) != 0 ? "r" : "", stream);
22344 }
22345 return;
22346
22347 /* Memory operand for vld1/vst1 instruction. */
22348 case 'A':
22349 {
22350 rtx addr;
22351 bool postinc = FALSE;
22352 rtx postinc_reg = NULL;
22353 unsigned align, memsize, align_bits;
22354
22355 gcc_assert (MEM_P (x));
22356 addr = XEXP (x, 0);
22357 if (GET_CODE (addr) == POST_INC)
22358 {
22359 postinc = 1;
22360 addr = XEXP (addr, 0);
22361 }
22362 if (GET_CODE (addr) == POST_MODIFY)
22363 {
22364 postinc_reg = XEXP( XEXP (addr, 1), 1);
22365 addr = XEXP (addr, 0);
22366 }
22367 asm_fprintf (stream, "[%r", REGNO (addr));
22368
22369 /* We know the alignment of this access, so we can emit a hint in the
22370 instruction (for some alignments) as an aid to the memory subsystem
22371 of the target. */
22372 align = MEM_ALIGN (x) >> 3;
22373 memsize = MEM_SIZE (x);
22374
22375 /* Only certain alignment specifiers are supported by the hardware. */
22376 if (memsize == 32 && (align % 32) == 0)
22377 align_bits = 256;
22378 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22379 align_bits = 128;
22380 else if (memsize >= 8 && (align % 8) == 0)
22381 align_bits = 64;
22382 else
22383 align_bits = 0;
22384
22385 if (align_bits != 0)
22386 asm_fprintf (stream, ":%d", align_bits);
22387
22388 asm_fprintf (stream, "]");
22389
22390 if (postinc)
22391 fputs("!", stream);
22392 if (postinc_reg)
22393 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22394 }
22395 return;
22396
22397 case 'C':
22398 {
22399 rtx addr;
22400
22401 gcc_assert (MEM_P (x));
22402 addr = XEXP (x, 0);
22403 gcc_assert (REG_P (addr));
22404 asm_fprintf (stream, "[%r]", REGNO (addr));
22405 }
22406 return;
22407
22408 /* Translate an S register number into a D register number and element index. */
22409 case 'y':
22410 {
22411 machine_mode mode = GET_MODE (x);
22412 int regno;
22413
22414 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22415 {
22416 output_operand_lossage ("invalid operand for code '%c'", code);
22417 return;
22418 }
22419
22420 regno = REGNO (x);
22421 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22422 {
22423 output_operand_lossage ("invalid operand for code '%c'", code);
22424 return;
22425 }
22426
22427 regno = regno - FIRST_VFP_REGNUM;
22428 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22429 }
22430 return;
22431
22432 case 'v':
22433 gcc_assert (CONST_DOUBLE_P (x));
22434 int result;
22435 result = vfp3_const_double_for_fract_bits (x);
22436 if (result == 0)
22437 result = vfp3_const_double_for_bits (x);
22438 fprintf (stream, "#%d", result);
22439 return;
22440
22441 /* Register specifier for vld1.16/vst1.16. Translate the S register
22442 number into a D register number and element index. */
22443 case 'z':
22444 {
22445 machine_mode mode = GET_MODE (x);
22446 int regno;
22447
22448 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22449 {
22450 output_operand_lossage ("invalid operand for code '%c'", code);
22451 return;
22452 }
22453
22454 regno = REGNO (x);
22455 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22456 {
22457 output_operand_lossage ("invalid operand for code '%c'", code);
22458 return;
22459 }
22460
22461 regno = regno - FIRST_VFP_REGNUM;
22462 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22463 }
22464 return;
22465
22466 default:
22467 if (x == 0)
22468 {
22469 output_operand_lossage ("missing operand");
22470 return;
22471 }
22472
22473 switch (GET_CODE (x))
22474 {
22475 case REG:
22476 asm_fprintf (stream, "%r", REGNO (x));
22477 break;
22478
22479 case MEM:
22480 output_address (GET_MODE (x), XEXP (x, 0));
22481 break;
22482
22483 case CONST_DOUBLE:
22484 {
22485 char fpstr[20];
22486 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22487 sizeof (fpstr), 0, 1);
22488 fprintf (stream, "#%s", fpstr);
22489 }
22490 break;
22491
22492 default:
22493 gcc_assert (GET_CODE (x) != NEG);
22494 fputc ('#', stream);
22495 if (GET_CODE (x) == HIGH)
22496 {
22497 fputs (":lower16:", stream);
22498 x = XEXP (x, 0);
22499 }
22500
22501 output_addr_const (stream, x);
22502 break;
22503 }
22504 }
22505 }
22506 \f
22507 /* Target hook for printing a memory address. */
22508 static void
22509 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22510 {
22511 if (TARGET_32BIT)
22512 {
22513 int is_minus = GET_CODE (x) == MINUS;
22514
22515 if (REG_P (x))
22516 asm_fprintf (stream, "[%r]", REGNO (x));
22517 else if (GET_CODE (x) == PLUS || is_minus)
22518 {
22519 rtx base = XEXP (x, 0);
22520 rtx index = XEXP (x, 1);
22521 HOST_WIDE_INT offset = 0;
22522 if (!REG_P (base)
22523 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22524 {
22525 /* Ensure that BASE is a register. */
22526 /* (one of them must be). */
22527 /* Also ensure the SP is not used as in index register. */
22528 std::swap (base, index);
22529 }
22530 switch (GET_CODE (index))
22531 {
22532 case CONST_INT:
22533 offset = INTVAL (index);
22534 if (is_minus)
22535 offset = -offset;
22536 asm_fprintf (stream, "[%r, #%wd]",
22537 REGNO (base), offset);
22538 break;
22539
22540 case REG:
22541 asm_fprintf (stream, "[%r, %s%r]",
22542 REGNO (base), is_minus ? "-" : "",
22543 REGNO (index));
22544 break;
22545
22546 case MULT:
22547 case ASHIFTRT:
22548 case LSHIFTRT:
22549 case ASHIFT:
22550 case ROTATERT:
22551 {
22552 asm_fprintf (stream, "[%r, %s%r",
22553 REGNO (base), is_minus ? "-" : "",
22554 REGNO (XEXP (index, 0)));
22555 arm_print_operand (stream, index, 'S');
22556 fputs ("]", stream);
22557 break;
22558 }
22559
22560 default:
22561 gcc_unreachable ();
22562 }
22563 }
22564 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22565 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22566 {
22567 gcc_assert (REG_P (XEXP (x, 0)));
22568
22569 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22570 asm_fprintf (stream, "[%r, #%s%d]!",
22571 REGNO (XEXP (x, 0)),
22572 GET_CODE (x) == PRE_DEC ? "-" : "",
22573 GET_MODE_SIZE (mode));
22574 else
22575 asm_fprintf (stream, "[%r], #%s%d",
22576 REGNO (XEXP (x, 0)),
22577 GET_CODE (x) == POST_DEC ? "-" : "",
22578 GET_MODE_SIZE (mode));
22579 }
22580 else if (GET_CODE (x) == PRE_MODIFY)
22581 {
22582 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22583 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22584 asm_fprintf (stream, "#%wd]!",
22585 INTVAL (XEXP (XEXP (x, 1), 1)));
22586 else
22587 asm_fprintf (stream, "%r]!",
22588 REGNO (XEXP (XEXP (x, 1), 1)));
22589 }
22590 else if (GET_CODE (x) == POST_MODIFY)
22591 {
22592 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22593 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22594 asm_fprintf (stream, "#%wd",
22595 INTVAL (XEXP (XEXP (x, 1), 1)));
22596 else
22597 asm_fprintf (stream, "%r",
22598 REGNO (XEXP (XEXP (x, 1), 1)));
22599 }
22600 else output_addr_const (stream, x);
22601 }
22602 else
22603 {
22604 if (REG_P (x))
22605 asm_fprintf (stream, "[%r]", REGNO (x));
22606 else if (GET_CODE (x) == POST_INC)
22607 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22608 else if (GET_CODE (x) == PLUS)
22609 {
22610 gcc_assert (REG_P (XEXP (x, 0)));
22611 if (CONST_INT_P (XEXP (x, 1)))
22612 asm_fprintf (stream, "[%r, #%wd]",
22613 REGNO (XEXP (x, 0)),
22614 INTVAL (XEXP (x, 1)));
22615 else
22616 asm_fprintf (stream, "[%r, %r]",
22617 REGNO (XEXP (x, 0)),
22618 REGNO (XEXP (x, 1)));
22619 }
22620 else
22621 output_addr_const (stream, x);
22622 }
22623 }
22624 \f
22625 /* Target hook for indicating whether a punctuation character for
22626 TARGET_PRINT_OPERAND is valid. */
22627 static bool
22628 arm_print_operand_punct_valid_p (unsigned char code)
22629 {
22630 return (code == '@' || code == '|' || code == '.'
22631 || code == '(' || code == ')' || code == '#'
22632 || (TARGET_32BIT && (code == '?'))
22633 || (TARGET_THUMB2 && (code == '!'))
22634 || (TARGET_THUMB && (code == '_')));
22635 }
22636 \f
22637 /* Target hook for assembling integer objects. The ARM version needs to
22638 handle word-sized values specially. */
22639 static bool
22640 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22641 {
22642 machine_mode mode;
22643
22644 if (size == UNITS_PER_WORD && aligned_p)
22645 {
22646 fputs ("\t.word\t", asm_out_file);
22647 output_addr_const (asm_out_file, x);
22648
22649 /* Mark symbols as position independent. We only do this in the
22650 .text segment, not in the .data segment. */
22651 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22652 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22653 {
22654 /* See legitimize_pic_address for an explanation of the
22655 TARGET_VXWORKS_RTP check. */
22656 /* References to weak symbols cannot be resolved locally:
22657 they may be overridden by a non-weak definition at link
22658 time. */
22659 if (!arm_pic_data_is_text_relative
22660 || (GET_CODE (x) == SYMBOL_REF
22661 && (!SYMBOL_REF_LOCAL_P (x)
22662 || (SYMBOL_REF_DECL (x)
22663 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22664 fputs ("(GOT)", asm_out_file);
22665 else
22666 fputs ("(GOTOFF)", asm_out_file);
22667 }
22668 fputc ('\n', asm_out_file);
22669 return true;
22670 }
22671
22672 mode = GET_MODE (x);
22673
22674 if (arm_vector_mode_supported_p (mode))
22675 {
22676 int i, units;
22677
22678 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22679
22680 units = CONST_VECTOR_NUNITS (x);
22681 size = GET_MODE_UNIT_SIZE (mode);
22682
22683 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22684 for (i = 0; i < units; i++)
22685 {
22686 rtx elt = CONST_VECTOR_ELT (x, i);
22687 assemble_integer
22688 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22689 }
22690 else
22691 for (i = 0; i < units; i++)
22692 {
22693 rtx elt = CONST_VECTOR_ELT (x, i);
22694 assemble_real
22695 (*CONST_DOUBLE_REAL_VALUE (elt),
22696 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22697 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22698 }
22699
22700 return true;
22701 }
22702
22703 return default_assemble_integer (x, size, aligned_p);
22704 }
22705
22706 static void
22707 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22708 {
22709 section *s;
22710
22711 if (!TARGET_AAPCS_BASED)
22712 {
22713 (is_ctor ?
22714 default_named_section_asm_out_constructor
22715 : default_named_section_asm_out_destructor) (symbol, priority);
22716 return;
22717 }
22718
22719 /* Put these in the .init_array section, using a special relocation. */
22720 if (priority != DEFAULT_INIT_PRIORITY)
22721 {
22722 char buf[18];
22723 sprintf (buf, "%s.%.5u",
22724 is_ctor ? ".init_array" : ".fini_array",
22725 priority);
22726 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22727 }
22728 else if (is_ctor)
22729 s = ctors_section;
22730 else
22731 s = dtors_section;
22732
22733 switch_to_section (s);
22734 assemble_align (POINTER_SIZE);
22735 fputs ("\t.word\t", asm_out_file);
22736 output_addr_const (asm_out_file, symbol);
22737 fputs ("(target1)\n", asm_out_file);
22738 }
22739
22740 /* Add a function to the list of static constructors. */
22741
22742 static void
22743 arm_elf_asm_constructor (rtx symbol, int priority)
22744 {
22745 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22746 }
22747
22748 /* Add a function to the list of static destructors. */
22749
22750 static void
22751 arm_elf_asm_destructor (rtx symbol, int priority)
22752 {
22753 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22754 }
22755 \f
22756 /* A finite state machine takes care of noticing whether or not instructions
22757 can be conditionally executed, and thus decrease execution time and code
22758 size by deleting branch instructions. The fsm is controlled by
22759 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22760
22761 /* The state of the fsm controlling condition codes are:
22762 0: normal, do nothing special
22763 1: make ASM_OUTPUT_OPCODE not output this instruction
22764 2: make ASM_OUTPUT_OPCODE not output this instruction
22765 3: make instructions conditional
22766 4: make instructions conditional
22767
22768 State transitions (state->state by whom under condition):
22769 0 -> 1 final_prescan_insn if the `target' is a label
22770 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22771 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22772 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22773 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22774 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22775 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22776 (the target insn is arm_target_insn).
22777
22778 If the jump clobbers the conditions then we use states 2 and 4.
22779
22780 A similar thing can be done with conditional return insns.
22781
22782 XXX In case the `target' is an unconditional branch, this conditionalising
22783 of the instructions always reduces code size, but not always execution
22784 time. But then, I want to reduce the code size to somewhere near what
22785 /bin/cc produces. */
22786
22787 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22788 instructions. When a COND_EXEC instruction is seen the subsequent
22789 instructions are scanned so that multiple conditional instructions can be
22790 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22791 specify the length and true/false mask for the IT block. These will be
22792 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22793
22794 /* Returns the index of the ARM condition code string in
22795 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22796 COMPARISON should be an rtx like `(eq (...) (...))'. */
22797
22798 enum arm_cond_code
22799 maybe_get_arm_condition_code (rtx comparison)
22800 {
22801 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22802 enum arm_cond_code code;
22803 enum rtx_code comp_code = GET_CODE (comparison);
22804
22805 if (GET_MODE_CLASS (mode) != MODE_CC)
22806 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22807 XEXP (comparison, 1));
22808
22809 switch (mode)
22810 {
22811 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22812 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22813 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22814 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22815 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22816 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22817 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22818 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22819 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22820 case E_CC_DLTUmode: code = ARM_CC;
22821
22822 dominance:
22823 if (comp_code == EQ)
22824 return ARM_INVERSE_CONDITION_CODE (code);
22825 if (comp_code == NE)
22826 return code;
22827 return ARM_NV;
22828
22829 case E_CC_NOOVmode:
22830 switch (comp_code)
22831 {
22832 case NE: return ARM_NE;
22833 case EQ: return ARM_EQ;
22834 case GE: return ARM_PL;
22835 case LT: return ARM_MI;
22836 default: return ARM_NV;
22837 }
22838
22839 case E_CC_Zmode:
22840 switch (comp_code)
22841 {
22842 case NE: return ARM_NE;
22843 case EQ: return ARM_EQ;
22844 default: return ARM_NV;
22845 }
22846
22847 case E_CC_Nmode:
22848 switch (comp_code)
22849 {
22850 case NE: return ARM_MI;
22851 case EQ: return ARM_PL;
22852 default: return ARM_NV;
22853 }
22854
22855 case E_CCFPEmode:
22856 case E_CCFPmode:
22857 /* We can handle all cases except UNEQ and LTGT. */
22858 switch (comp_code)
22859 {
22860 case GE: return ARM_GE;
22861 case GT: return ARM_GT;
22862 case LE: return ARM_LS;
22863 case LT: return ARM_MI;
22864 case NE: return ARM_NE;
22865 case EQ: return ARM_EQ;
22866 case ORDERED: return ARM_VC;
22867 case UNORDERED: return ARM_VS;
22868 case UNLT: return ARM_LT;
22869 case UNLE: return ARM_LE;
22870 case UNGT: return ARM_HI;
22871 case UNGE: return ARM_PL;
22872 /* UNEQ and LTGT do not have a representation. */
22873 case UNEQ: /* Fall through. */
22874 case LTGT: /* Fall through. */
22875 default: return ARM_NV;
22876 }
22877
22878 case E_CC_SWPmode:
22879 switch (comp_code)
22880 {
22881 case NE: return ARM_NE;
22882 case EQ: return ARM_EQ;
22883 case GE: return ARM_LE;
22884 case GT: return ARM_LT;
22885 case LE: return ARM_GE;
22886 case LT: return ARM_GT;
22887 case GEU: return ARM_LS;
22888 case GTU: return ARM_CC;
22889 case LEU: return ARM_CS;
22890 case LTU: return ARM_HI;
22891 default: return ARM_NV;
22892 }
22893
22894 case E_CC_Cmode:
22895 switch (comp_code)
22896 {
22897 case LTU: return ARM_CS;
22898 case GEU: return ARM_CC;
22899 case NE: return ARM_CS;
22900 case EQ: return ARM_CC;
22901 default: return ARM_NV;
22902 }
22903
22904 case E_CC_CZmode:
22905 switch (comp_code)
22906 {
22907 case NE: return ARM_NE;
22908 case EQ: return ARM_EQ;
22909 case GEU: return ARM_CS;
22910 case GTU: return ARM_HI;
22911 case LEU: return ARM_LS;
22912 case LTU: return ARM_CC;
22913 default: return ARM_NV;
22914 }
22915
22916 case E_CC_NCVmode:
22917 switch (comp_code)
22918 {
22919 case GE: return ARM_GE;
22920 case LT: return ARM_LT;
22921 case GEU: return ARM_CS;
22922 case LTU: return ARM_CC;
22923 default: return ARM_NV;
22924 }
22925
22926 case E_CC_Vmode:
22927 switch (comp_code)
22928 {
22929 case NE: return ARM_VS;
22930 case EQ: return ARM_VC;
22931 default: return ARM_NV;
22932 }
22933
22934 case E_CCmode:
22935 switch (comp_code)
22936 {
22937 case NE: return ARM_NE;
22938 case EQ: return ARM_EQ;
22939 case GE: return ARM_GE;
22940 case GT: return ARM_GT;
22941 case LE: return ARM_LE;
22942 case LT: return ARM_LT;
22943 case GEU: return ARM_CS;
22944 case GTU: return ARM_HI;
22945 case LEU: return ARM_LS;
22946 case LTU: return ARM_CC;
22947 default: return ARM_NV;
22948 }
22949
22950 default: gcc_unreachable ();
22951 }
22952 }
22953
22954 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22955 static enum arm_cond_code
22956 get_arm_condition_code (rtx comparison)
22957 {
22958 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22959 gcc_assert (code != ARM_NV);
22960 return code;
22961 }
22962
22963 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22964 code registers when not targetting Thumb1. The VFP condition register
22965 only exists when generating hard-float code. */
22966 static bool
22967 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22968 {
22969 if (!TARGET_32BIT)
22970 return false;
22971
22972 *p1 = CC_REGNUM;
22973 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22974 return true;
22975 }
22976
22977 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22978 instructions. */
22979 void
22980 thumb2_final_prescan_insn (rtx_insn *insn)
22981 {
22982 rtx_insn *first_insn = insn;
22983 rtx body = PATTERN (insn);
22984 rtx predicate;
22985 enum arm_cond_code code;
22986 int n;
22987 int mask;
22988 int max;
22989
22990 /* max_insns_skipped in the tune was already taken into account in the
22991 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22992 just emit the IT blocks as we can. It does not make sense to split
22993 the IT blocks. */
22994 max = MAX_INSN_PER_IT_BLOCK;
22995
22996 /* Remove the previous insn from the count of insns to be output. */
22997 if (arm_condexec_count)
22998 arm_condexec_count--;
22999
23000 /* Nothing to do if we are already inside a conditional block. */
23001 if (arm_condexec_count)
23002 return;
23003
23004 if (GET_CODE (body) != COND_EXEC)
23005 return;
23006
23007 /* Conditional jumps are implemented directly. */
23008 if (JUMP_P (insn))
23009 return;
23010
23011 predicate = COND_EXEC_TEST (body);
23012 arm_current_cc = get_arm_condition_code (predicate);
23013
23014 n = get_attr_ce_count (insn);
23015 arm_condexec_count = 1;
23016 arm_condexec_mask = (1 << n) - 1;
23017 arm_condexec_masklen = n;
23018 /* See if subsequent instructions can be combined into the same block. */
23019 for (;;)
23020 {
23021 insn = next_nonnote_insn (insn);
23022
23023 /* Jumping into the middle of an IT block is illegal, so a label or
23024 barrier terminates the block. */
23025 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23026 break;
23027
23028 body = PATTERN (insn);
23029 /* USE and CLOBBER aren't really insns, so just skip them. */
23030 if (GET_CODE (body) == USE
23031 || GET_CODE (body) == CLOBBER)
23032 continue;
23033
23034 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23035 if (GET_CODE (body) != COND_EXEC)
23036 break;
23037 /* Maximum number of conditionally executed instructions in a block. */
23038 n = get_attr_ce_count (insn);
23039 if (arm_condexec_masklen + n > max)
23040 break;
23041
23042 predicate = COND_EXEC_TEST (body);
23043 code = get_arm_condition_code (predicate);
23044 mask = (1 << n) - 1;
23045 if (arm_current_cc == code)
23046 arm_condexec_mask |= (mask << arm_condexec_masklen);
23047 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23048 break;
23049
23050 arm_condexec_count++;
23051 arm_condexec_masklen += n;
23052
23053 /* A jump must be the last instruction in a conditional block. */
23054 if (JUMP_P (insn))
23055 break;
23056 }
23057 /* Restore recog_data (getting the attributes of other insns can
23058 destroy this array, but final.c assumes that it remains intact
23059 across this call). */
23060 extract_constrain_insn_cached (first_insn);
23061 }
23062
23063 void
23064 arm_final_prescan_insn (rtx_insn *insn)
23065 {
23066 /* BODY will hold the body of INSN. */
23067 rtx body = PATTERN (insn);
23068
23069 /* This will be 1 if trying to repeat the trick, and things need to be
23070 reversed if it appears to fail. */
23071 int reverse = 0;
23072
23073 /* If we start with a return insn, we only succeed if we find another one. */
23074 int seeking_return = 0;
23075 enum rtx_code return_code = UNKNOWN;
23076
23077 /* START_INSN will hold the insn from where we start looking. This is the
23078 first insn after the following code_label if REVERSE is true. */
23079 rtx_insn *start_insn = insn;
23080
23081 /* If in state 4, check if the target branch is reached, in order to
23082 change back to state 0. */
23083 if (arm_ccfsm_state == 4)
23084 {
23085 if (insn == arm_target_insn)
23086 {
23087 arm_target_insn = NULL;
23088 arm_ccfsm_state = 0;
23089 }
23090 return;
23091 }
23092
23093 /* If in state 3, it is possible to repeat the trick, if this insn is an
23094 unconditional branch to a label, and immediately following this branch
23095 is the previous target label which is only used once, and the label this
23096 branch jumps to is not too far off. */
23097 if (arm_ccfsm_state == 3)
23098 {
23099 if (simplejump_p (insn))
23100 {
23101 start_insn = next_nonnote_insn (start_insn);
23102 if (BARRIER_P (start_insn))
23103 {
23104 /* XXX Isn't this always a barrier? */
23105 start_insn = next_nonnote_insn (start_insn);
23106 }
23107 if (LABEL_P (start_insn)
23108 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23109 && LABEL_NUSES (start_insn) == 1)
23110 reverse = TRUE;
23111 else
23112 return;
23113 }
23114 else if (ANY_RETURN_P (body))
23115 {
23116 start_insn = next_nonnote_insn (start_insn);
23117 if (BARRIER_P (start_insn))
23118 start_insn = next_nonnote_insn (start_insn);
23119 if (LABEL_P (start_insn)
23120 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23121 && LABEL_NUSES (start_insn) == 1)
23122 {
23123 reverse = TRUE;
23124 seeking_return = 1;
23125 return_code = GET_CODE (body);
23126 }
23127 else
23128 return;
23129 }
23130 else
23131 return;
23132 }
23133
23134 gcc_assert (!arm_ccfsm_state || reverse);
23135 if (!JUMP_P (insn))
23136 return;
23137
23138 /* This jump might be paralleled with a clobber of the condition codes
23139 the jump should always come first */
23140 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23141 body = XVECEXP (body, 0, 0);
23142
23143 if (reverse
23144 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23145 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23146 {
23147 int insns_skipped;
23148 int fail = FALSE, succeed = FALSE;
23149 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23150 int then_not_else = TRUE;
23151 rtx_insn *this_insn = start_insn;
23152 rtx label = 0;
23153
23154 /* Register the insn jumped to. */
23155 if (reverse)
23156 {
23157 if (!seeking_return)
23158 label = XEXP (SET_SRC (body), 0);
23159 }
23160 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23161 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23162 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23163 {
23164 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23165 then_not_else = FALSE;
23166 }
23167 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23168 {
23169 seeking_return = 1;
23170 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23171 }
23172 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23173 {
23174 seeking_return = 1;
23175 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23176 then_not_else = FALSE;
23177 }
23178 else
23179 gcc_unreachable ();
23180
23181 /* See how many insns this branch skips, and what kind of insns. If all
23182 insns are okay, and the label or unconditional branch to the same
23183 label is not too far away, succeed. */
23184 for (insns_skipped = 0;
23185 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23186 {
23187 rtx scanbody;
23188
23189 this_insn = next_nonnote_insn (this_insn);
23190 if (!this_insn)
23191 break;
23192
23193 switch (GET_CODE (this_insn))
23194 {
23195 case CODE_LABEL:
23196 /* Succeed if it is the target label, otherwise fail since
23197 control falls in from somewhere else. */
23198 if (this_insn == label)
23199 {
23200 arm_ccfsm_state = 1;
23201 succeed = TRUE;
23202 }
23203 else
23204 fail = TRUE;
23205 break;
23206
23207 case BARRIER:
23208 /* Succeed if the following insn is the target label.
23209 Otherwise fail.
23210 If return insns are used then the last insn in a function
23211 will be a barrier. */
23212 this_insn = next_nonnote_insn (this_insn);
23213 if (this_insn && this_insn == label)
23214 {
23215 arm_ccfsm_state = 1;
23216 succeed = TRUE;
23217 }
23218 else
23219 fail = TRUE;
23220 break;
23221
23222 case CALL_INSN:
23223 /* The AAPCS says that conditional calls should not be
23224 used since they make interworking inefficient (the
23225 linker can't transform BL<cond> into BLX). That's
23226 only a problem if the machine has BLX. */
23227 if (arm_arch5)
23228 {
23229 fail = TRUE;
23230 break;
23231 }
23232
23233 /* Succeed if the following insn is the target label, or
23234 if the following two insns are a barrier and the
23235 target label. */
23236 this_insn = next_nonnote_insn (this_insn);
23237 if (this_insn && BARRIER_P (this_insn))
23238 this_insn = next_nonnote_insn (this_insn);
23239
23240 if (this_insn && this_insn == label
23241 && insns_skipped < max_insns_skipped)
23242 {
23243 arm_ccfsm_state = 1;
23244 succeed = TRUE;
23245 }
23246 else
23247 fail = TRUE;
23248 break;
23249
23250 case JUMP_INSN:
23251 /* If this is an unconditional branch to the same label, succeed.
23252 If it is to another label, do nothing. If it is conditional,
23253 fail. */
23254 /* XXX Probably, the tests for SET and the PC are
23255 unnecessary. */
23256
23257 scanbody = PATTERN (this_insn);
23258 if (GET_CODE (scanbody) == SET
23259 && GET_CODE (SET_DEST (scanbody)) == PC)
23260 {
23261 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23262 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23263 {
23264 arm_ccfsm_state = 2;
23265 succeed = TRUE;
23266 }
23267 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23268 fail = TRUE;
23269 }
23270 /* Fail if a conditional return is undesirable (e.g. on a
23271 StrongARM), but still allow this if optimizing for size. */
23272 else if (GET_CODE (scanbody) == return_code
23273 && !use_return_insn (TRUE, NULL)
23274 && !optimize_size)
23275 fail = TRUE;
23276 else if (GET_CODE (scanbody) == return_code)
23277 {
23278 arm_ccfsm_state = 2;
23279 succeed = TRUE;
23280 }
23281 else if (GET_CODE (scanbody) == PARALLEL)
23282 {
23283 switch (get_attr_conds (this_insn))
23284 {
23285 case CONDS_NOCOND:
23286 break;
23287 default:
23288 fail = TRUE;
23289 break;
23290 }
23291 }
23292 else
23293 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23294
23295 break;
23296
23297 case INSN:
23298 /* Instructions using or affecting the condition codes make it
23299 fail. */
23300 scanbody = PATTERN (this_insn);
23301 if (!(GET_CODE (scanbody) == SET
23302 || GET_CODE (scanbody) == PARALLEL)
23303 || get_attr_conds (this_insn) != CONDS_NOCOND)
23304 fail = TRUE;
23305 break;
23306
23307 default:
23308 break;
23309 }
23310 }
23311 if (succeed)
23312 {
23313 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23314 arm_target_label = CODE_LABEL_NUMBER (label);
23315 else
23316 {
23317 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23318
23319 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23320 {
23321 this_insn = next_nonnote_insn (this_insn);
23322 gcc_assert (!this_insn
23323 || (!BARRIER_P (this_insn)
23324 && !LABEL_P (this_insn)));
23325 }
23326 if (!this_insn)
23327 {
23328 /* Oh, dear! we ran off the end.. give up. */
23329 extract_constrain_insn_cached (insn);
23330 arm_ccfsm_state = 0;
23331 arm_target_insn = NULL;
23332 return;
23333 }
23334 arm_target_insn = this_insn;
23335 }
23336
23337 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23338 what it was. */
23339 if (!reverse)
23340 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23341
23342 if (reverse || then_not_else)
23343 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23344 }
23345
23346 /* Restore recog_data (getting the attributes of other insns can
23347 destroy this array, but final.c assumes that it remains intact
23348 across this call. */
23349 extract_constrain_insn_cached (insn);
23350 }
23351 }
23352
23353 /* Output IT instructions. */
23354 void
23355 thumb2_asm_output_opcode (FILE * stream)
23356 {
23357 char buff[5];
23358 int n;
23359
23360 if (arm_condexec_mask)
23361 {
23362 for (n = 0; n < arm_condexec_masklen; n++)
23363 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23364 buff[n] = 0;
23365 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23366 arm_condition_codes[arm_current_cc]);
23367 arm_condexec_mask = 0;
23368 }
23369 }
23370
23371 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23372 UNITS_PER_WORD bytes wide. */
23373 static unsigned int
23374 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23375 {
23376 if (TARGET_32BIT
23377 && regno > PC_REGNUM
23378 && regno != FRAME_POINTER_REGNUM
23379 && regno != ARG_POINTER_REGNUM
23380 && !IS_VFP_REGNUM (regno))
23381 return 1;
23382
23383 return ARM_NUM_REGS (mode);
23384 }
23385
23386 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23387 static bool
23388 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23389 {
23390 if (GET_MODE_CLASS (mode) == MODE_CC)
23391 return (regno == CC_REGNUM
23392 || (TARGET_HARD_FLOAT
23393 && regno == VFPCC_REGNUM));
23394
23395 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23396 return false;
23397
23398 if (TARGET_THUMB1)
23399 /* For the Thumb we only allow values bigger than SImode in
23400 registers 0 - 6, so that there is always a second low
23401 register available to hold the upper part of the value.
23402 We probably we ought to ensure that the register is the
23403 start of an even numbered register pair. */
23404 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23405
23406 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23407 {
23408 if (mode == SFmode || mode == SImode)
23409 return VFP_REGNO_OK_FOR_SINGLE (regno);
23410
23411 if (mode == DFmode)
23412 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23413
23414 if (mode == HFmode)
23415 return VFP_REGNO_OK_FOR_SINGLE (regno);
23416
23417 /* VFP registers can hold HImode values. */
23418 if (mode == HImode)
23419 return VFP_REGNO_OK_FOR_SINGLE (regno);
23420
23421 if (TARGET_NEON)
23422 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23423 || (VALID_NEON_QREG_MODE (mode)
23424 && NEON_REGNO_OK_FOR_QUAD (regno))
23425 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23426 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23427 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23428 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23429 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23430
23431 return false;
23432 }
23433
23434 if (TARGET_REALLY_IWMMXT)
23435 {
23436 if (IS_IWMMXT_GR_REGNUM (regno))
23437 return mode == SImode;
23438
23439 if (IS_IWMMXT_REGNUM (regno))
23440 return VALID_IWMMXT_REG_MODE (mode);
23441 }
23442
23443 /* We allow almost any value to be stored in the general registers.
23444 Restrict doubleword quantities to even register pairs in ARM state
23445 so that we can use ldrd. Do not allow very large Neon structure
23446 opaque modes in general registers; they would use too many. */
23447 if (regno <= LAST_ARM_REGNUM)
23448 {
23449 if (ARM_NUM_REGS (mode) > 4)
23450 return false;
23451
23452 if (TARGET_THUMB2)
23453 return true;
23454
23455 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23456 }
23457
23458 if (regno == FRAME_POINTER_REGNUM
23459 || regno == ARG_POINTER_REGNUM)
23460 /* We only allow integers in the fake hard registers. */
23461 return GET_MODE_CLASS (mode) == MODE_INT;
23462
23463 return false;
23464 }
23465
23466 /* Implement TARGET_MODES_TIEABLE_P. */
23467
23468 static bool
23469 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23470 {
23471 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23472 return true;
23473
23474 /* We specifically want to allow elements of "structure" modes to
23475 be tieable to the structure. This more general condition allows
23476 other rarer situations too. */
23477 if (TARGET_NEON
23478 && (VALID_NEON_DREG_MODE (mode1)
23479 || VALID_NEON_QREG_MODE (mode1)
23480 || VALID_NEON_STRUCT_MODE (mode1))
23481 && (VALID_NEON_DREG_MODE (mode2)
23482 || VALID_NEON_QREG_MODE (mode2)
23483 || VALID_NEON_STRUCT_MODE (mode2)))
23484 return true;
23485
23486 return false;
23487 }
23488
23489 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23490 not used in arm mode. */
23491
23492 enum reg_class
23493 arm_regno_class (int regno)
23494 {
23495 if (regno == PC_REGNUM)
23496 return NO_REGS;
23497
23498 if (TARGET_THUMB1)
23499 {
23500 if (regno == STACK_POINTER_REGNUM)
23501 return STACK_REG;
23502 if (regno == CC_REGNUM)
23503 return CC_REG;
23504 if (regno < 8)
23505 return LO_REGS;
23506 return HI_REGS;
23507 }
23508
23509 if (TARGET_THUMB2 && regno < 8)
23510 return LO_REGS;
23511
23512 if ( regno <= LAST_ARM_REGNUM
23513 || regno == FRAME_POINTER_REGNUM
23514 || regno == ARG_POINTER_REGNUM)
23515 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23516
23517 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23518 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23519
23520 if (IS_VFP_REGNUM (regno))
23521 {
23522 if (regno <= D7_VFP_REGNUM)
23523 return VFP_D0_D7_REGS;
23524 else if (regno <= LAST_LO_VFP_REGNUM)
23525 return VFP_LO_REGS;
23526 else
23527 return VFP_HI_REGS;
23528 }
23529
23530 if (IS_IWMMXT_REGNUM (regno))
23531 return IWMMXT_REGS;
23532
23533 if (IS_IWMMXT_GR_REGNUM (regno))
23534 return IWMMXT_GR_REGS;
23535
23536 return NO_REGS;
23537 }
23538
23539 /* Handle a special case when computing the offset
23540 of an argument from the frame pointer. */
23541 int
23542 arm_debugger_arg_offset (int value, rtx addr)
23543 {
23544 rtx_insn *insn;
23545
23546 /* We are only interested if dbxout_parms() failed to compute the offset. */
23547 if (value != 0)
23548 return 0;
23549
23550 /* We can only cope with the case where the address is held in a register. */
23551 if (!REG_P (addr))
23552 return 0;
23553
23554 /* If we are using the frame pointer to point at the argument, then
23555 an offset of 0 is correct. */
23556 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23557 return 0;
23558
23559 /* If we are using the stack pointer to point at the
23560 argument, then an offset of 0 is correct. */
23561 /* ??? Check this is consistent with thumb2 frame layout. */
23562 if ((TARGET_THUMB || !frame_pointer_needed)
23563 && REGNO (addr) == SP_REGNUM)
23564 return 0;
23565
23566 /* Oh dear. The argument is pointed to by a register rather
23567 than being held in a register, or being stored at a known
23568 offset from the frame pointer. Since GDB only understands
23569 those two kinds of argument we must translate the address
23570 held in the register into an offset from the frame pointer.
23571 We do this by searching through the insns for the function
23572 looking to see where this register gets its value. If the
23573 register is initialized from the frame pointer plus an offset
23574 then we are in luck and we can continue, otherwise we give up.
23575
23576 This code is exercised by producing debugging information
23577 for a function with arguments like this:
23578
23579 double func (double a, double b, int c, double d) {return d;}
23580
23581 Without this code the stab for parameter 'd' will be set to
23582 an offset of 0 from the frame pointer, rather than 8. */
23583
23584 /* The if() statement says:
23585
23586 If the insn is a normal instruction
23587 and if the insn is setting the value in a register
23588 and if the register being set is the register holding the address of the argument
23589 and if the address is computing by an addition
23590 that involves adding to a register
23591 which is the frame pointer
23592 a constant integer
23593
23594 then... */
23595
23596 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23597 {
23598 if ( NONJUMP_INSN_P (insn)
23599 && GET_CODE (PATTERN (insn)) == SET
23600 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23601 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23602 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23603 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23604 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23605 )
23606 {
23607 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23608
23609 break;
23610 }
23611 }
23612
23613 if (value == 0)
23614 {
23615 debug_rtx (addr);
23616 warning (0, "unable to compute real location of stacked parameter");
23617 value = 8; /* XXX magic hack */
23618 }
23619
23620 return value;
23621 }
23622 \f
23623 /* Implement TARGET_PROMOTED_TYPE. */
23624
23625 static tree
23626 arm_promoted_type (const_tree t)
23627 {
23628 if (SCALAR_FLOAT_TYPE_P (t)
23629 && TYPE_PRECISION (t) == 16
23630 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23631 return float_type_node;
23632 return NULL_TREE;
23633 }
23634
23635 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23636 This simply adds HFmode as a supported mode; even though we don't
23637 implement arithmetic on this type directly, it's supported by
23638 optabs conversions, much the way the double-word arithmetic is
23639 special-cased in the default hook. */
23640
23641 static bool
23642 arm_scalar_mode_supported_p (scalar_mode mode)
23643 {
23644 if (mode == HFmode)
23645 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23646 else if (ALL_FIXED_POINT_MODE_P (mode))
23647 return true;
23648 else
23649 return default_scalar_mode_supported_p (mode);
23650 }
23651
23652 /* Set the value of FLT_EVAL_METHOD.
23653 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23654
23655 0: evaluate all operations and constants, whose semantic type has at
23656 most the range and precision of type float, to the range and
23657 precision of float; evaluate all other operations and constants to
23658 the range and precision of the semantic type;
23659
23660 N, where _FloatN is a supported interchange floating type
23661 evaluate all operations and constants, whose semantic type has at
23662 most the range and precision of _FloatN type, to the range and
23663 precision of the _FloatN type; evaluate all other operations and
23664 constants to the range and precision of the semantic type;
23665
23666 If we have the ARMv8.2-A extensions then we support _Float16 in native
23667 precision, so we should set this to 16. Otherwise, we support the type,
23668 but want to evaluate expressions in float precision, so set this to
23669 0. */
23670
23671 static enum flt_eval_method
23672 arm_excess_precision (enum excess_precision_type type)
23673 {
23674 switch (type)
23675 {
23676 case EXCESS_PRECISION_TYPE_FAST:
23677 case EXCESS_PRECISION_TYPE_STANDARD:
23678 /* We can calculate either in 16-bit range and precision or
23679 32-bit range and precision. Make that decision based on whether
23680 we have native support for the ARMv8.2-A 16-bit floating-point
23681 instructions or not. */
23682 return (TARGET_VFP_FP16INST
23683 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23684 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23685 case EXCESS_PRECISION_TYPE_IMPLICIT:
23686 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23687 default:
23688 gcc_unreachable ();
23689 }
23690 return FLT_EVAL_METHOD_UNPREDICTABLE;
23691 }
23692
23693
23694 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23695 _Float16 if we are using anything other than ieee format for 16-bit
23696 floating point. Otherwise, punt to the default implementation. */
23697 static opt_scalar_float_mode
23698 arm_floatn_mode (int n, bool extended)
23699 {
23700 if (!extended && n == 16)
23701 {
23702 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23703 return HFmode;
23704 return opt_scalar_float_mode ();
23705 }
23706
23707 return default_floatn_mode (n, extended);
23708 }
23709
23710
23711 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23712 not to early-clobber SRC registers in the process.
23713
23714 We assume that the operands described by SRC and DEST represent a
23715 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23716 number of components into which the copy has been decomposed. */
23717 void
23718 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23719 {
23720 unsigned int i;
23721
23722 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23723 || REGNO (operands[0]) < REGNO (operands[1]))
23724 {
23725 for (i = 0; i < count; i++)
23726 {
23727 operands[2 * i] = dest[i];
23728 operands[2 * i + 1] = src[i];
23729 }
23730 }
23731 else
23732 {
23733 for (i = 0; i < count; i++)
23734 {
23735 operands[2 * i] = dest[count - i - 1];
23736 operands[2 * i + 1] = src[count - i - 1];
23737 }
23738 }
23739 }
23740
23741 /* Split operands into moves from op[1] + op[2] into op[0]. */
23742
23743 void
23744 neon_split_vcombine (rtx operands[3])
23745 {
23746 unsigned int dest = REGNO (operands[0]);
23747 unsigned int src1 = REGNO (operands[1]);
23748 unsigned int src2 = REGNO (operands[2]);
23749 machine_mode halfmode = GET_MODE (operands[1]);
23750 unsigned int halfregs = REG_NREGS (operands[1]);
23751 rtx destlo, desthi;
23752
23753 if (src1 == dest && src2 == dest + halfregs)
23754 {
23755 /* No-op move. Can't split to nothing; emit something. */
23756 emit_note (NOTE_INSN_DELETED);
23757 return;
23758 }
23759
23760 /* Preserve register attributes for variable tracking. */
23761 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23762 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23763 GET_MODE_SIZE (halfmode));
23764
23765 /* Special case of reversed high/low parts. Use VSWP. */
23766 if (src2 == dest && src1 == dest + halfregs)
23767 {
23768 rtx x = gen_rtx_SET (destlo, operands[1]);
23769 rtx y = gen_rtx_SET (desthi, operands[2]);
23770 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23771 return;
23772 }
23773
23774 if (!reg_overlap_mentioned_p (operands[2], destlo))
23775 {
23776 /* Try to avoid unnecessary moves if part of the result
23777 is in the right place already. */
23778 if (src1 != dest)
23779 emit_move_insn (destlo, operands[1]);
23780 if (src2 != dest + halfregs)
23781 emit_move_insn (desthi, operands[2]);
23782 }
23783 else
23784 {
23785 if (src2 != dest + halfregs)
23786 emit_move_insn (desthi, operands[2]);
23787 if (src1 != dest)
23788 emit_move_insn (destlo, operands[1]);
23789 }
23790 }
23791 \f
23792 /* Return the number (counting from 0) of
23793 the least significant set bit in MASK. */
23794
23795 inline static int
23796 number_of_first_bit_set (unsigned mask)
23797 {
23798 return ctz_hwi (mask);
23799 }
23800
23801 /* Like emit_multi_reg_push, but allowing for a different set of
23802 registers to be described as saved. MASK is the set of registers
23803 to be saved; REAL_REGS is the set of registers to be described as
23804 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23805
23806 static rtx_insn *
23807 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23808 {
23809 unsigned long regno;
23810 rtx par[10], tmp, reg;
23811 rtx_insn *insn;
23812 int i, j;
23813
23814 /* Build the parallel of the registers actually being stored. */
23815 for (i = 0; mask; ++i, mask &= mask - 1)
23816 {
23817 regno = ctz_hwi (mask);
23818 reg = gen_rtx_REG (SImode, regno);
23819
23820 if (i == 0)
23821 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23822 else
23823 tmp = gen_rtx_USE (VOIDmode, reg);
23824
23825 par[i] = tmp;
23826 }
23827
23828 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23829 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23830 tmp = gen_frame_mem (BLKmode, tmp);
23831 tmp = gen_rtx_SET (tmp, par[0]);
23832 par[0] = tmp;
23833
23834 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23835 insn = emit_insn (tmp);
23836
23837 /* Always build the stack adjustment note for unwind info. */
23838 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23839 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23840 par[0] = tmp;
23841
23842 /* Build the parallel of the registers recorded as saved for unwind. */
23843 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23844 {
23845 regno = ctz_hwi (real_regs);
23846 reg = gen_rtx_REG (SImode, regno);
23847
23848 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23849 tmp = gen_frame_mem (SImode, tmp);
23850 tmp = gen_rtx_SET (tmp, reg);
23851 RTX_FRAME_RELATED_P (tmp) = 1;
23852 par[j + 1] = tmp;
23853 }
23854
23855 if (j == 0)
23856 tmp = par[0];
23857 else
23858 {
23859 RTX_FRAME_RELATED_P (par[0]) = 1;
23860 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23861 }
23862
23863 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23864
23865 return insn;
23866 }
23867
23868 /* Emit code to push or pop registers to or from the stack. F is the
23869 assembly file. MASK is the registers to pop. */
23870 static void
23871 thumb_pop (FILE *f, unsigned long mask)
23872 {
23873 int regno;
23874 int lo_mask = mask & 0xFF;
23875
23876 gcc_assert (mask);
23877
23878 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23879 {
23880 /* Special case. Do not generate a POP PC statement here, do it in
23881 thumb_exit() */
23882 thumb_exit (f, -1);
23883 return;
23884 }
23885
23886 fprintf (f, "\tpop\t{");
23887
23888 /* Look at the low registers first. */
23889 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23890 {
23891 if (lo_mask & 1)
23892 {
23893 asm_fprintf (f, "%r", regno);
23894
23895 if ((lo_mask & ~1) != 0)
23896 fprintf (f, ", ");
23897 }
23898 }
23899
23900 if (mask & (1 << PC_REGNUM))
23901 {
23902 /* Catch popping the PC. */
23903 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23904 || IS_CMSE_ENTRY (arm_current_func_type ()))
23905 {
23906 /* The PC is never poped directly, instead
23907 it is popped into r3 and then BX is used. */
23908 fprintf (f, "}\n");
23909
23910 thumb_exit (f, -1);
23911
23912 return;
23913 }
23914 else
23915 {
23916 if (mask & 0xFF)
23917 fprintf (f, ", ");
23918
23919 asm_fprintf (f, "%r", PC_REGNUM);
23920 }
23921 }
23922
23923 fprintf (f, "}\n");
23924 }
23925
23926 /* Generate code to return from a thumb function.
23927 If 'reg_containing_return_addr' is -1, then the return address is
23928 actually on the stack, at the stack pointer. */
23929 static void
23930 thumb_exit (FILE *f, int reg_containing_return_addr)
23931 {
23932 unsigned regs_available_for_popping;
23933 unsigned regs_to_pop;
23934 int pops_needed;
23935 unsigned available;
23936 unsigned required;
23937 machine_mode mode;
23938 int size;
23939 int restore_a4 = FALSE;
23940
23941 /* Compute the registers we need to pop. */
23942 regs_to_pop = 0;
23943 pops_needed = 0;
23944
23945 if (reg_containing_return_addr == -1)
23946 {
23947 regs_to_pop |= 1 << LR_REGNUM;
23948 ++pops_needed;
23949 }
23950
23951 if (TARGET_BACKTRACE)
23952 {
23953 /* Restore the (ARM) frame pointer and stack pointer. */
23954 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23955 pops_needed += 2;
23956 }
23957
23958 /* If there is nothing to pop then just emit the BX instruction and
23959 return. */
23960 if (pops_needed == 0)
23961 {
23962 if (crtl->calls_eh_return)
23963 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23964
23965 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23966 {
23967 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23968 reg_containing_return_addr);
23969 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23970 }
23971 else
23972 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23973 return;
23974 }
23975 /* Otherwise if we are not supporting interworking and we have not created
23976 a backtrace structure and the function was not entered in ARM mode then
23977 just pop the return address straight into the PC. */
23978 else if (!TARGET_INTERWORK
23979 && !TARGET_BACKTRACE
23980 && !is_called_in_ARM_mode (current_function_decl)
23981 && !crtl->calls_eh_return
23982 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23983 {
23984 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23985 return;
23986 }
23987
23988 /* Find out how many of the (return) argument registers we can corrupt. */
23989 regs_available_for_popping = 0;
23990
23991 /* If returning via __builtin_eh_return, the bottom three registers
23992 all contain information needed for the return. */
23993 if (crtl->calls_eh_return)
23994 size = 12;
23995 else
23996 {
23997 /* If we can deduce the registers used from the function's
23998 return value. This is more reliable that examining
23999 df_regs_ever_live_p () because that will be set if the register is
24000 ever used in the function, not just if the register is used
24001 to hold a return value. */
24002
24003 if (crtl->return_rtx != 0)
24004 mode = GET_MODE (crtl->return_rtx);
24005 else
24006 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24007
24008 size = GET_MODE_SIZE (mode);
24009
24010 if (size == 0)
24011 {
24012 /* In a void function we can use any argument register.
24013 In a function that returns a structure on the stack
24014 we can use the second and third argument registers. */
24015 if (mode == VOIDmode)
24016 regs_available_for_popping =
24017 (1 << ARG_REGISTER (1))
24018 | (1 << ARG_REGISTER (2))
24019 | (1 << ARG_REGISTER (3));
24020 else
24021 regs_available_for_popping =
24022 (1 << ARG_REGISTER (2))
24023 | (1 << ARG_REGISTER (3));
24024 }
24025 else if (size <= 4)
24026 regs_available_for_popping =
24027 (1 << ARG_REGISTER (2))
24028 | (1 << ARG_REGISTER (3));
24029 else if (size <= 8)
24030 regs_available_for_popping =
24031 (1 << ARG_REGISTER (3));
24032 }
24033
24034 /* Match registers to be popped with registers into which we pop them. */
24035 for (available = regs_available_for_popping,
24036 required = regs_to_pop;
24037 required != 0 && available != 0;
24038 available &= ~(available & - available),
24039 required &= ~(required & - required))
24040 -- pops_needed;
24041
24042 /* If we have any popping registers left over, remove them. */
24043 if (available > 0)
24044 regs_available_for_popping &= ~available;
24045
24046 /* Otherwise if we need another popping register we can use
24047 the fourth argument register. */
24048 else if (pops_needed)
24049 {
24050 /* If we have not found any free argument registers and
24051 reg a4 contains the return address, we must move it. */
24052 if (regs_available_for_popping == 0
24053 && reg_containing_return_addr == LAST_ARG_REGNUM)
24054 {
24055 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24056 reg_containing_return_addr = LR_REGNUM;
24057 }
24058 else if (size > 12)
24059 {
24060 /* Register a4 is being used to hold part of the return value,
24061 but we have dire need of a free, low register. */
24062 restore_a4 = TRUE;
24063
24064 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24065 }
24066
24067 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24068 {
24069 /* The fourth argument register is available. */
24070 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24071
24072 --pops_needed;
24073 }
24074 }
24075
24076 /* Pop as many registers as we can. */
24077 thumb_pop (f, regs_available_for_popping);
24078
24079 /* Process the registers we popped. */
24080 if (reg_containing_return_addr == -1)
24081 {
24082 /* The return address was popped into the lowest numbered register. */
24083 regs_to_pop &= ~(1 << LR_REGNUM);
24084
24085 reg_containing_return_addr =
24086 number_of_first_bit_set (regs_available_for_popping);
24087
24088 /* Remove this register for the mask of available registers, so that
24089 the return address will not be corrupted by further pops. */
24090 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24091 }
24092
24093 /* If we popped other registers then handle them here. */
24094 if (regs_available_for_popping)
24095 {
24096 int frame_pointer;
24097
24098 /* Work out which register currently contains the frame pointer. */
24099 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24100
24101 /* Move it into the correct place. */
24102 asm_fprintf (f, "\tmov\t%r, %r\n",
24103 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24104
24105 /* (Temporarily) remove it from the mask of popped registers. */
24106 regs_available_for_popping &= ~(1 << frame_pointer);
24107 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24108
24109 if (regs_available_for_popping)
24110 {
24111 int stack_pointer;
24112
24113 /* We popped the stack pointer as well,
24114 find the register that contains it. */
24115 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24116
24117 /* Move it into the stack register. */
24118 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24119
24120 /* At this point we have popped all necessary registers, so
24121 do not worry about restoring regs_available_for_popping
24122 to its correct value:
24123
24124 assert (pops_needed == 0)
24125 assert (regs_available_for_popping == (1 << frame_pointer))
24126 assert (regs_to_pop == (1 << STACK_POINTER)) */
24127 }
24128 else
24129 {
24130 /* Since we have just move the popped value into the frame
24131 pointer, the popping register is available for reuse, and
24132 we know that we still have the stack pointer left to pop. */
24133 regs_available_for_popping |= (1 << frame_pointer);
24134 }
24135 }
24136
24137 /* If we still have registers left on the stack, but we no longer have
24138 any registers into which we can pop them, then we must move the return
24139 address into the link register and make available the register that
24140 contained it. */
24141 if (regs_available_for_popping == 0 && pops_needed > 0)
24142 {
24143 regs_available_for_popping |= 1 << reg_containing_return_addr;
24144
24145 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24146 reg_containing_return_addr);
24147
24148 reg_containing_return_addr = LR_REGNUM;
24149 }
24150
24151 /* If we have registers left on the stack then pop some more.
24152 We know that at most we will want to pop FP and SP. */
24153 if (pops_needed > 0)
24154 {
24155 int popped_into;
24156 int move_to;
24157
24158 thumb_pop (f, regs_available_for_popping);
24159
24160 /* We have popped either FP or SP.
24161 Move whichever one it is into the correct register. */
24162 popped_into = number_of_first_bit_set (regs_available_for_popping);
24163 move_to = number_of_first_bit_set (regs_to_pop);
24164
24165 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24166 --pops_needed;
24167 }
24168
24169 /* If we still have not popped everything then we must have only
24170 had one register available to us and we are now popping the SP. */
24171 if (pops_needed > 0)
24172 {
24173 int popped_into;
24174
24175 thumb_pop (f, regs_available_for_popping);
24176
24177 popped_into = number_of_first_bit_set (regs_available_for_popping);
24178
24179 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24180 /*
24181 assert (regs_to_pop == (1 << STACK_POINTER))
24182 assert (pops_needed == 1)
24183 */
24184 }
24185
24186 /* If necessary restore the a4 register. */
24187 if (restore_a4)
24188 {
24189 if (reg_containing_return_addr != LR_REGNUM)
24190 {
24191 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24192 reg_containing_return_addr = LR_REGNUM;
24193 }
24194
24195 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24196 }
24197
24198 if (crtl->calls_eh_return)
24199 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24200
24201 /* Return to caller. */
24202 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24203 {
24204 /* This is for the cases where LR is not being used to contain the return
24205 address. It may therefore contain information that we might not want
24206 to leak, hence it must be cleared. The value in R0 will never be a
24207 secret at this point, so it is safe to use it, see the clearing code
24208 in 'cmse_nonsecure_entry_clear_before_return'. */
24209 if (reg_containing_return_addr != LR_REGNUM)
24210 asm_fprintf (f, "\tmov\tlr, r0\n");
24211
24212 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24213 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24214 }
24215 else
24216 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24217 }
24218 \f
24219 /* Scan INSN just before assembler is output for it.
24220 For Thumb-1, we track the status of the condition codes; this
24221 information is used in the cbranchsi4_insn pattern. */
24222 void
24223 thumb1_final_prescan_insn (rtx_insn *insn)
24224 {
24225 if (flag_print_asm_name)
24226 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24227 INSN_ADDRESSES (INSN_UID (insn)));
24228 /* Don't overwrite the previous setter when we get to a cbranch. */
24229 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24230 {
24231 enum attr_conds conds;
24232
24233 if (cfun->machine->thumb1_cc_insn)
24234 {
24235 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24236 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24237 CC_STATUS_INIT;
24238 }
24239 conds = get_attr_conds (insn);
24240 if (conds == CONDS_SET)
24241 {
24242 rtx set = single_set (insn);
24243 cfun->machine->thumb1_cc_insn = insn;
24244 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24245 cfun->machine->thumb1_cc_op1 = const0_rtx;
24246 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24247 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24248 {
24249 rtx src1 = XEXP (SET_SRC (set), 1);
24250 if (src1 == const0_rtx)
24251 cfun->machine->thumb1_cc_mode = CCmode;
24252 }
24253 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24254 {
24255 /* Record the src register operand instead of dest because
24256 cprop_hardreg pass propagates src. */
24257 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24258 }
24259 }
24260 else if (conds != CONDS_NOCOND)
24261 cfun->machine->thumb1_cc_insn = NULL_RTX;
24262 }
24263
24264 /* Check if unexpected far jump is used. */
24265 if (cfun->machine->lr_save_eliminated
24266 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24267 internal_error("Unexpected thumb1 far jump");
24268 }
24269
24270 int
24271 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24272 {
24273 unsigned HOST_WIDE_INT mask = 0xff;
24274 int i;
24275
24276 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24277 if (val == 0) /* XXX */
24278 return 0;
24279
24280 for (i = 0; i < 25; i++)
24281 if ((val & (mask << i)) == val)
24282 return 1;
24283
24284 return 0;
24285 }
24286
24287 /* Returns nonzero if the current function contains,
24288 or might contain a far jump. */
24289 static int
24290 thumb_far_jump_used_p (void)
24291 {
24292 rtx_insn *insn;
24293 bool far_jump = false;
24294 unsigned int func_size = 0;
24295
24296 /* If we have already decided that far jumps may be used,
24297 do not bother checking again, and always return true even if
24298 it turns out that they are not being used. Once we have made
24299 the decision that far jumps are present (and that hence the link
24300 register will be pushed onto the stack) we cannot go back on it. */
24301 if (cfun->machine->far_jump_used)
24302 return 1;
24303
24304 /* If this function is not being called from the prologue/epilogue
24305 generation code then it must be being called from the
24306 INITIAL_ELIMINATION_OFFSET macro. */
24307 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24308 {
24309 /* In this case we know that we are being asked about the elimination
24310 of the arg pointer register. If that register is not being used,
24311 then there are no arguments on the stack, and we do not have to
24312 worry that a far jump might force the prologue to push the link
24313 register, changing the stack offsets. In this case we can just
24314 return false, since the presence of far jumps in the function will
24315 not affect stack offsets.
24316
24317 If the arg pointer is live (or if it was live, but has now been
24318 eliminated and so set to dead) then we do have to test to see if
24319 the function might contain a far jump. This test can lead to some
24320 false negatives, since before reload is completed, then length of
24321 branch instructions is not known, so gcc defaults to returning their
24322 longest length, which in turn sets the far jump attribute to true.
24323
24324 A false negative will not result in bad code being generated, but it
24325 will result in a needless push and pop of the link register. We
24326 hope that this does not occur too often.
24327
24328 If we need doubleword stack alignment this could affect the other
24329 elimination offsets so we can't risk getting it wrong. */
24330 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24331 cfun->machine->arg_pointer_live = 1;
24332 else if (!cfun->machine->arg_pointer_live)
24333 return 0;
24334 }
24335
24336 /* We should not change far_jump_used during or after reload, as there is
24337 no chance to change stack frame layout. */
24338 if (reload_in_progress || reload_completed)
24339 return 0;
24340
24341 /* Check to see if the function contains a branch
24342 insn with the far jump attribute set. */
24343 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24344 {
24345 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24346 {
24347 far_jump = true;
24348 }
24349 func_size += get_attr_length (insn);
24350 }
24351
24352 /* Attribute far_jump will always be true for thumb1 before
24353 shorten_branch pass. So checking far_jump attribute before
24354 shorten_branch isn't much useful.
24355
24356 Following heuristic tries to estimate more accurately if a far jump
24357 may finally be used. The heuristic is very conservative as there is
24358 no chance to roll-back the decision of not to use far jump.
24359
24360 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24361 2-byte insn is associated with a 4 byte constant pool. Using
24362 function size 2048/3 as the threshold is conservative enough. */
24363 if (far_jump)
24364 {
24365 if ((func_size * 3) >= 2048)
24366 {
24367 /* Record the fact that we have decided that
24368 the function does use far jumps. */
24369 cfun->machine->far_jump_used = 1;
24370 return 1;
24371 }
24372 }
24373
24374 return 0;
24375 }
24376
24377 /* Return nonzero if FUNC must be entered in ARM mode. */
24378 static bool
24379 is_called_in_ARM_mode (tree func)
24380 {
24381 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24382
24383 /* Ignore the problem about functions whose address is taken. */
24384 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24385 return true;
24386
24387 #ifdef ARM_PE
24388 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24389 #else
24390 return false;
24391 #endif
24392 }
24393
24394 /* Given the stack offsets and register mask in OFFSETS, decide how
24395 many additional registers to push instead of subtracting a constant
24396 from SP. For epilogues the principle is the same except we use pop.
24397 FOR_PROLOGUE indicates which we're generating. */
24398 static int
24399 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24400 {
24401 HOST_WIDE_INT amount;
24402 unsigned long live_regs_mask = offsets->saved_regs_mask;
24403 /* Extract a mask of the ones we can give to the Thumb's push/pop
24404 instruction. */
24405 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24406 /* Then count how many other high registers will need to be pushed. */
24407 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24408 int n_free, reg_base, size;
24409
24410 if (!for_prologue && frame_pointer_needed)
24411 amount = offsets->locals_base - offsets->saved_regs;
24412 else
24413 amount = offsets->outgoing_args - offsets->saved_regs;
24414
24415 /* If the stack frame size is 512 exactly, we can save one load
24416 instruction, which should make this a win even when optimizing
24417 for speed. */
24418 if (!optimize_size && amount != 512)
24419 return 0;
24420
24421 /* Can't do this if there are high registers to push. */
24422 if (high_regs_pushed != 0)
24423 return 0;
24424
24425 /* Shouldn't do it in the prologue if no registers would normally
24426 be pushed at all. In the epilogue, also allow it if we'll have
24427 a pop insn for the PC. */
24428 if (l_mask == 0
24429 && (for_prologue
24430 || TARGET_BACKTRACE
24431 || (live_regs_mask & 1 << LR_REGNUM) == 0
24432 || TARGET_INTERWORK
24433 || crtl->args.pretend_args_size != 0))
24434 return 0;
24435
24436 /* Don't do this if thumb_expand_prologue wants to emit instructions
24437 between the push and the stack frame allocation. */
24438 if (for_prologue
24439 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24440 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24441 return 0;
24442
24443 reg_base = 0;
24444 n_free = 0;
24445 if (!for_prologue)
24446 {
24447 size = arm_size_return_regs ();
24448 reg_base = ARM_NUM_INTS (size);
24449 live_regs_mask >>= reg_base;
24450 }
24451
24452 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24453 && (for_prologue || call_used_regs[reg_base + n_free]))
24454 {
24455 live_regs_mask >>= 1;
24456 n_free++;
24457 }
24458
24459 if (n_free == 0)
24460 return 0;
24461 gcc_assert (amount / 4 * 4 == amount);
24462
24463 if (amount >= 512 && (amount - n_free * 4) < 512)
24464 return (amount - 508) / 4;
24465 if (amount <= n_free * 4)
24466 return amount / 4;
24467 return 0;
24468 }
24469
24470 /* The bits which aren't usefully expanded as rtl. */
24471 const char *
24472 thumb1_unexpanded_epilogue (void)
24473 {
24474 arm_stack_offsets *offsets;
24475 int regno;
24476 unsigned long live_regs_mask = 0;
24477 int high_regs_pushed = 0;
24478 int extra_pop;
24479 int had_to_push_lr;
24480 int size;
24481
24482 if (cfun->machine->return_used_this_function != 0)
24483 return "";
24484
24485 if (IS_NAKED (arm_current_func_type ()))
24486 return "";
24487
24488 offsets = arm_get_frame_offsets ();
24489 live_regs_mask = offsets->saved_regs_mask;
24490 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24491
24492 /* If we can deduce the registers used from the function's return value.
24493 This is more reliable that examining df_regs_ever_live_p () because that
24494 will be set if the register is ever used in the function, not just if
24495 the register is used to hold a return value. */
24496 size = arm_size_return_regs ();
24497
24498 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24499 if (extra_pop > 0)
24500 {
24501 unsigned long extra_mask = (1 << extra_pop) - 1;
24502 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24503 }
24504
24505 /* The prolog may have pushed some high registers to use as
24506 work registers. e.g. the testsuite file:
24507 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24508 compiles to produce:
24509 push {r4, r5, r6, r7, lr}
24510 mov r7, r9
24511 mov r6, r8
24512 push {r6, r7}
24513 as part of the prolog. We have to undo that pushing here. */
24514
24515 if (high_regs_pushed)
24516 {
24517 unsigned long mask = live_regs_mask & 0xff;
24518 int next_hi_reg;
24519
24520 /* The available low registers depend on the size of the value we are
24521 returning. */
24522 if (size <= 12)
24523 mask |= 1 << 3;
24524 if (size <= 8)
24525 mask |= 1 << 2;
24526
24527 if (mask == 0)
24528 /* Oh dear! We have no low registers into which we can pop
24529 high registers! */
24530 internal_error
24531 ("no low registers available for popping high registers");
24532
24533 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24534 if (live_regs_mask & (1 << next_hi_reg))
24535 break;
24536
24537 while (high_regs_pushed)
24538 {
24539 /* Find lo register(s) into which the high register(s) can
24540 be popped. */
24541 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24542 {
24543 if (mask & (1 << regno))
24544 high_regs_pushed--;
24545 if (high_regs_pushed == 0)
24546 break;
24547 }
24548
24549 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24550
24551 /* Pop the values into the low register(s). */
24552 thumb_pop (asm_out_file, mask);
24553
24554 /* Move the value(s) into the high registers. */
24555 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24556 {
24557 if (mask & (1 << regno))
24558 {
24559 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24560 regno);
24561
24562 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24563 if (live_regs_mask & (1 << next_hi_reg))
24564 break;
24565 }
24566 }
24567 }
24568 live_regs_mask &= ~0x0f00;
24569 }
24570
24571 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24572 live_regs_mask &= 0xff;
24573
24574 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24575 {
24576 /* Pop the return address into the PC. */
24577 if (had_to_push_lr)
24578 live_regs_mask |= 1 << PC_REGNUM;
24579
24580 /* Either no argument registers were pushed or a backtrace
24581 structure was created which includes an adjusted stack
24582 pointer, so just pop everything. */
24583 if (live_regs_mask)
24584 thumb_pop (asm_out_file, live_regs_mask);
24585
24586 /* We have either just popped the return address into the
24587 PC or it is was kept in LR for the entire function.
24588 Note that thumb_pop has already called thumb_exit if the
24589 PC was in the list. */
24590 if (!had_to_push_lr)
24591 thumb_exit (asm_out_file, LR_REGNUM);
24592 }
24593 else
24594 {
24595 /* Pop everything but the return address. */
24596 if (live_regs_mask)
24597 thumb_pop (asm_out_file, live_regs_mask);
24598
24599 if (had_to_push_lr)
24600 {
24601 if (size > 12)
24602 {
24603 /* We have no free low regs, so save one. */
24604 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24605 LAST_ARG_REGNUM);
24606 }
24607
24608 /* Get the return address into a temporary register. */
24609 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24610
24611 if (size > 12)
24612 {
24613 /* Move the return address to lr. */
24614 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24615 LAST_ARG_REGNUM);
24616 /* Restore the low register. */
24617 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24618 IP_REGNUM);
24619 regno = LR_REGNUM;
24620 }
24621 else
24622 regno = LAST_ARG_REGNUM;
24623 }
24624 else
24625 regno = LR_REGNUM;
24626
24627 /* Remove the argument registers that were pushed onto the stack. */
24628 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24629 SP_REGNUM, SP_REGNUM,
24630 crtl->args.pretend_args_size);
24631
24632 thumb_exit (asm_out_file, regno);
24633 }
24634
24635 return "";
24636 }
24637
24638 /* Functions to save and restore machine-specific function data. */
24639 static struct machine_function *
24640 arm_init_machine_status (void)
24641 {
24642 struct machine_function *machine;
24643 machine = ggc_cleared_alloc<machine_function> ();
24644
24645 #if ARM_FT_UNKNOWN != 0
24646 machine->func_type = ARM_FT_UNKNOWN;
24647 #endif
24648 return machine;
24649 }
24650
24651 /* Return an RTX indicating where the return address to the
24652 calling function can be found. */
24653 rtx
24654 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24655 {
24656 if (count != 0)
24657 return NULL_RTX;
24658
24659 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24660 }
24661
24662 /* Do anything needed before RTL is emitted for each function. */
24663 void
24664 arm_init_expanders (void)
24665 {
24666 /* Arrange to initialize and mark the machine per-function status. */
24667 init_machine_status = arm_init_machine_status;
24668
24669 /* This is to stop the combine pass optimizing away the alignment
24670 adjustment of va_arg. */
24671 /* ??? It is claimed that this should not be necessary. */
24672 if (cfun)
24673 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24674 }
24675
24676 /* Check that FUNC is called with a different mode. */
24677
24678 bool
24679 arm_change_mode_p (tree func)
24680 {
24681 if (TREE_CODE (func) != FUNCTION_DECL)
24682 return false;
24683
24684 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24685
24686 if (!callee_tree)
24687 callee_tree = target_option_default_node;
24688
24689 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24690 int flags = callee_opts->x_target_flags;
24691
24692 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24693 }
24694
24695 /* Like arm_compute_initial_elimination offset. Simpler because there
24696 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24697 to point at the base of the local variables after static stack
24698 space for a function has been allocated. */
24699
24700 HOST_WIDE_INT
24701 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24702 {
24703 arm_stack_offsets *offsets;
24704
24705 offsets = arm_get_frame_offsets ();
24706
24707 switch (from)
24708 {
24709 case ARG_POINTER_REGNUM:
24710 switch (to)
24711 {
24712 case STACK_POINTER_REGNUM:
24713 return offsets->outgoing_args - offsets->saved_args;
24714
24715 case FRAME_POINTER_REGNUM:
24716 return offsets->soft_frame - offsets->saved_args;
24717
24718 case ARM_HARD_FRAME_POINTER_REGNUM:
24719 return offsets->saved_regs - offsets->saved_args;
24720
24721 case THUMB_HARD_FRAME_POINTER_REGNUM:
24722 return offsets->locals_base - offsets->saved_args;
24723
24724 default:
24725 gcc_unreachable ();
24726 }
24727 break;
24728
24729 case FRAME_POINTER_REGNUM:
24730 switch (to)
24731 {
24732 case STACK_POINTER_REGNUM:
24733 return offsets->outgoing_args - offsets->soft_frame;
24734
24735 case ARM_HARD_FRAME_POINTER_REGNUM:
24736 return offsets->saved_regs - offsets->soft_frame;
24737
24738 case THUMB_HARD_FRAME_POINTER_REGNUM:
24739 return offsets->locals_base - offsets->soft_frame;
24740
24741 default:
24742 gcc_unreachable ();
24743 }
24744 break;
24745
24746 default:
24747 gcc_unreachable ();
24748 }
24749 }
24750
24751 /* Generate the function's prologue. */
24752
24753 void
24754 thumb1_expand_prologue (void)
24755 {
24756 rtx_insn *insn;
24757
24758 HOST_WIDE_INT amount;
24759 HOST_WIDE_INT size;
24760 arm_stack_offsets *offsets;
24761 unsigned long func_type;
24762 int regno;
24763 unsigned long live_regs_mask;
24764 unsigned long l_mask;
24765 unsigned high_regs_pushed = 0;
24766 bool lr_needs_saving;
24767
24768 func_type = arm_current_func_type ();
24769
24770 /* Naked functions don't have prologues. */
24771 if (IS_NAKED (func_type))
24772 {
24773 if (flag_stack_usage_info)
24774 current_function_static_stack_size = 0;
24775 return;
24776 }
24777
24778 if (IS_INTERRUPT (func_type))
24779 {
24780 error ("interrupt Service Routines cannot be coded in Thumb mode");
24781 return;
24782 }
24783
24784 if (is_called_in_ARM_mode (current_function_decl))
24785 emit_insn (gen_prologue_thumb1_interwork ());
24786
24787 offsets = arm_get_frame_offsets ();
24788 live_regs_mask = offsets->saved_regs_mask;
24789 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24790
24791 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24792 l_mask = live_regs_mask & 0x40ff;
24793 /* Then count how many other high registers will need to be pushed. */
24794 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24795
24796 if (crtl->args.pretend_args_size)
24797 {
24798 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24799
24800 if (cfun->machine->uses_anonymous_args)
24801 {
24802 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24803 unsigned long mask;
24804
24805 mask = 1ul << (LAST_ARG_REGNUM + 1);
24806 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24807
24808 insn = thumb1_emit_multi_reg_push (mask, 0);
24809 }
24810 else
24811 {
24812 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24813 stack_pointer_rtx, x));
24814 }
24815 RTX_FRAME_RELATED_P (insn) = 1;
24816 }
24817
24818 if (TARGET_BACKTRACE)
24819 {
24820 HOST_WIDE_INT offset = 0;
24821 unsigned work_register;
24822 rtx work_reg, x, arm_hfp_rtx;
24823
24824 /* We have been asked to create a stack backtrace structure.
24825 The code looks like this:
24826
24827 0 .align 2
24828 0 func:
24829 0 sub SP, #16 Reserve space for 4 registers.
24830 2 push {R7} Push low registers.
24831 4 add R7, SP, #20 Get the stack pointer before the push.
24832 6 str R7, [SP, #8] Store the stack pointer
24833 (before reserving the space).
24834 8 mov R7, PC Get hold of the start of this code + 12.
24835 10 str R7, [SP, #16] Store it.
24836 12 mov R7, FP Get hold of the current frame pointer.
24837 14 str R7, [SP, #4] Store it.
24838 16 mov R7, LR Get hold of the current return address.
24839 18 str R7, [SP, #12] Store it.
24840 20 add R7, SP, #16 Point at the start of the
24841 backtrace structure.
24842 22 mov FP, R7 Put this value into the frame pointer. */
24843
24844 work_register = thumb_find_work_register (live_regs_mask);
24845 work_reg = gen_rtx_REG (SImode, work_register);
24846 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24847
24848 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24849 stack_pointer_rtx, GEN_INT (-16)));
24850 RTX_FRAME_RELATED_P (insn) = 1;
24851
24852 if (l_mask)
24853 {
24854 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24855 RTX_FRAME_RELATED_P (insn) = 1;
24856 lr_needs_saving = false;
24857
24858 offset = bit_count (l_mask) * UNITS_PER_WORD;
24859 }
24860
24861 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24862 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24863
24864 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24865 x = gen_frame_mem (SImode, x);
24866 emit_move_insn (x, work_reg);
24867
24868 /* Make sure that the instruction fetching the PC is in the right place
24869 to calculate "start of backtrace creation code + 12". */
24870 /* ??? The stores using the common WORK_REG ought to be enough to
24871 prevent the scheduler from doing anything weird. Failing that
24872 we could always move all of the following into an UNSPEC_VOLATILE. */
24873 if (l_mask)
24874 {
24875 x = gen_rtx_REG (SImode, PC_REGNUM);
24876 emit_move_insn (work_reg, x);
24877
24878 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24879 x = gen_frame_mem (SImode, x);
24880 emit_move_insn (x, work_reg);
24881
24882 emit_move_insn (work_reg, arm_hfp_rtx);
24883
24884 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24885 x = gen_frame_mem (SImode, x);
24886 emit_move_insn (x, work_reg);
24887 }
24888 else
24889 {
24890 emit_move_insn (work_reg, arm_hfp_rtx);
24891
24892 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24893 x = gen_frame_mem (SImode, x);
24894 emit_move_insn (x, work_reg);
24895
24896 x = gen_rtx_REG (SImode, PC_REGNUM);
24897 emit_move_insn (work_reg, x);
24898
24899 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24900 x = gen_frame_mem (SImode, x);
24901 emit_move_insn (x, work_reg);
24902 }
24903
24904 x = gen_rtx_REG (SImode, LR_REGNUM);
24905 emit_move_insn (work_reg, x);
24906
24907 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24908 x = gen_frame_mem (SImode, x);
24909 emit_move_insn (x, work_reg);
24910
24911 x = GEN_INT (offset + 12);
24912 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24913
24914 emit_move_insn (arm_hfp_rtx, work_reg);
24915 }
24916 /* Optimization: If we are not pushing any low registers but we are going
24917 to push some high registers then delay our first push. This will just
24918 be a push of LR and we can combine it with the push of the first high
24919 register. */
24920 else if ((l_mask & 0xff) != 0
24921 || (high_regs_pushed == 0 && lr_needs_saving))
24922 {
24923 unsigned long mask = l_mask;
24924 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24925 insn = thumb1_emit_multi_reg_push (mask, mask);
24926 RTX_FRAME_RELATED_P (insn) = 1;
24927 lr_needs_saving = false;
24928 }
24929
24930 if (high_regs_pushed)
24931 {
24932 unsigned pushable_regs;
24933 unsigned next_hi_reg;
24934 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24935 : crtl->args.info.nregs;
24936 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24937
24938 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24939 if (live_regs_mask & (1 << next_hi_reg))
24940 break;
24941
24942 /* Here we need to mask out registers used for passing arguments
24943 even if they can be pushed. This is to avoid using them to stash the high
24944 registers. Such kind of stash may clobber the use of arguments. */
24945 pushable_regs = l_mask & (~arg_regs_mask);
24946 if (lr_needs_saving)
24947 pushable_regs &= ~(1 << LR_REGNUM);
24948
24949 if (pushable_regs == 0)
24950 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24951
24952 while (high_regs_pushed > 0)
24953 {
24954 unsigned long real_regs_mask = 0;
24955 unsigned long push_mask = 0;
24956
24957 for (regno = LR_REGNUM; regno >= 0; regno --)
24958 {
24959 if (pushable_regs & (1 << regno))
24960 {
24961 emit_move_insn (gen_rtx_REG (SImode, regno),
24962 gen_rtx_REG (SImode, next_hi_reg));
24963
24964 high_regs_pushed --;
24965 real_regs_mask |= (1 << next_hi_reg);
24966 push_mask |= (1 << regno);
24967
24968 if (high_regs_pushed)
24969 {
24970 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24971 next_hi_reg --)
24972 if (live_regs_mask & (1 << next_hi_reg))
24973 break;
24974 }
24975 else
24976 break;
24977 }
24978 }
24979
24980 /* If we had to find a work register and we have not yet
24981 saved the LR then add it to the list of regs to push. */
24982 if (lr_needs_saving)
24983 {
24984 push_mask |= 1 << LR_REGNUM;
24985 real_regs_mask |= 1 << LR_REGNUM;
24986 lr_needs_saving = false;
24987 }
24988
24989 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24990 RTX_FRAME_RELATED_P (insn) = 1;
24991 }
24992 }
24993
24994 /* Load the pic register before setting the frame pointer,
24995 so we can use r7 as a temporary work register. */
24996 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24997 arm_load_pic_register (live_regs_mask);
24998
24999 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25000 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25001 stack_pointer_rtx);
25002
25003 size = offsets->outgoing_args - offsets->saved_args;
25004 if (flag_stack_usage_info)
25005 current_function_static_stack_size = size;
25006
25007 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25008 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25009 || flag_stack_clash_protection)
25010 && size)
25011 sorry ("-fstack-check=specific for Thumb-1");
25012
25013 amount = offsets->outgoing_args - offsets->saved_regs;
25014 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25015 if (amount)
25016 {
25017 if (amount < 512)
25018 {
25019 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25020 GEN_INT (- amount)));
25021 RTX_FRAME_RELATED_P (insn) = 1;
25022 }
25023 else
25024 {
25025 rtx reg, dwarf;
25026
25027 /* The stack decrement is too big for an immediate value in a single
25028 insn. In theory we could issue multiple subtracts, but after
25029 three of them it becomes more space efficient to place the full
25030 value in the constant pool and load into a register. (Also the
25031 ARM debugger really likes to see only one stack decrement per
25032 function). So instead we look for a scratch register into which
25033 we can load the decrement, and then we subtract this from the
25034 stack pointer. Unfortunately on the thumb the only available
25035 scratch registers are the argument registers, and we cannot use
25036 these as they may hold arguments to the function. Instead we
25037 attempt to locate a call preserved register which is used by this
25038 function. If we can find one, then we know that it will have
25039 been pushed at the start of the prologue and so we can corrupt
25040 it now. */
25041 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25042 if (live_regs_mask & (1 << regno))
25043 break;
25044
25045 gcc_assert(regno <= LAST_LO_REGNUM);
25046
25047 reg = gen_rtx_REG (SImode, regno);
25048
25049 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25050
25051 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25052 stack_pointer_rtx, reg));
25053
25054 dwarf = gen_rtx_SET (stack_pointer_rtx,
25055 plus_constant (Pmode, stack_pointer_rtx,
25056 -amount));
25057 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25058 RTX_FRAME_RELATED_P (insn) = 1;
25059 }
25060 }
25061
25062 if (frame_pointer_needed)
25063 thumb_set_frame_pointer (offsets);
25064
25065 /* If we are profiling, make sure no instructions are scheduled before
25066 the call to mcount. Similarly if the user has requested no
25067 scheduling in the prolog. Similarly if we want non-call exceptions
25068 using the EABI unwinder, to prevent faulting instructions from being
25069 swapped with a stack adjustment. */
25070 if (crtl->profile || !TARGET_SCHED_PROLOG
25071 || (arm_except_unwind_info (&global_options) == UI_TARGET
25072 && cfun->can_throw_non_call_exceptions))
25073 emit_insn (gen_blockage ());
25074
25075 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25076 if (live_regs_mask & 0xff)
25077 cfun->machine->lr_save_eliminated = 0;
25078 }
25079
25080 /* Clear caller saved registers not used to pass return values and leaked
25081 condition flags before exiting a cmse_nonsecure_entry function. */
25082
25083 void
25084 cmse_nonsecure_entry_clear_before_return (void)
25085 {
25086 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25087 uint32_t padding_bits_to_clear = 0;
25088 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25089 auto_sbitmap to_clear_bitmap (maxregno + 1);
25090 tree result_type;
25091 rtx result_rtl;
25092
25093 bitmap_clear (to_clear_bitmap);
25094 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25095 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25096
25097 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25098 registers. */
25099 if (TARGET_HARD_FLOAT)
25100 {
25101 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25102
25103 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25104
25105 /* Make sure we don't clear the two scratch registers used to clear the
25106 relevant FPSCR bits in output_return_instruction. */
25107 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25108 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25109 emit_use (gen_rtx_REG (SImode, 4));
25110 bitmap_clear_bit (to_clear_bitmap, 4);
25111 }
25112
25113 /* If the user has defined registers to be caller saved, these are no longer
25114 restored by the function before returning and must thus be cleared for
25115 security purposes. */
25116 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25117 {
25118 /* We do not touch registers that can be used to pass arguments as per
25119 the AAPCS, since these should never be made callee-saved by user
25120 options. */
25121 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25122 continue;
25123 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25124 continue;
25125 if (call_used_regs[regno])
25126 bitmap_set_bit (to_clear_bitmap, regno);
25127 }
25128
25129 /* Make sure we do not clear the registers used to return the result in. */
25130 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25131 if (!VOID_TYPE_P (result_type))
25132 {
25133 uint64_t to_clear_return_mask;
25134 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25135
25136 /* No need to check that we return in registers, because we don't
25137 support returning on stack yet. */
25138 gcc_assert (REG_P (result_rtl));
25139 to_clear_return_mask
25140 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25141 padding_bits_to_clear_ptr);
25142 if (to_clear_return_mask)
25143 {
25144 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25145 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25146 {
25147 if (to_clear_return_mask & (1ULL << regno))
25148 bitmap_clear_bit (to_clear_bitmap, regno);
25149 }
25150 }
25151 }
25152
25153 if (padding_bits_to_clear != 0)
25154 {
25155 rtx reg_rtx;
25156 auto_sbitmap to_clear_arg_regs_bitmap (R0_REGNUM + NUM_ARG_REGS);
25157
25158 /* Padding bits to clear is not 0 so we know we are dealing with
25159 returning a composite type, which only uses r0. Let's make sure that
25160 r1-r3 is cleared too, we will use r1 as a scratch register. */
25161 bitmap_clear (to_clear_arg_regs_bitmap);
25162 bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25163 NUM_ARG_REGS - 1);
25164 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25165
25166 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25167
25168 /* Fill the lower half of the negated padding_bits_to_clear. */
25169 emit_move_insn (reg_rtx,
25170 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25171
25172 /* Also fill the top half of the negated padding_bits_to_clear. */
25173 if (((~padding_bits_to_clear) >> 16) > 0)
25174 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25175 GEN_INT (16),
25176 GEN_INT (16)),
25177 GEN_INT ((~padding_bits_to_clear) >> 16)));
25178
25179 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25180 gen_rtx_REG (SImode, R0_REGNUM),
25181 reg_rtx));
25182 }
25183
25184 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25185 {
25186 if (!bitmap_bit_p (to_clear_bitmap, regno))
25187 continue;
25188
25189 if (IS_VFP_REGNUM (regno))
25190 {
25191 /* If regno is an even vfp register and its successor is also to
25192 be cleared, use vmov. */
25193 if (TARGET_VFP_DOUBLE
25194 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25195 && bitmap_bit_p (to_clear_bitmap, regno + 1))
25196 {
25197 emit_move_insn (gen_rtx_REG (DFmode, regno),
25198 CONST1_RTX (DFmode));
25199 emit_use (gen_rtx_REG (DFmode, regno));
25200 regno++;
25201 }
25202 else
25203 {
25204 emit_move_insn (gen_rtx_REG (SFmode, regno),
25205 CONST1_RTX (SFmode));
25206 emit_use (gen_rtx_REG (SFmode, regno));
25207 }
25208 }
25209 else
25210 {
25211 if (TARGET_THUMB1)
25212 {
25213 if (regno == R0_REGNUM)
25214 emit_move_insn (gen_rtx_REG (SImode, regno),
25215 const0_rtx);
25216 else
25217 /* R0 has either been cleared before, see code above, or it
25218 holds a return value, either way it is not secret
25219 information. */
25220 emit_move_insn (gen_rtx_REG (SImode, regno),
25221 gen_rtx_REG (SImode, R0_REGNUM));
25222 emit_use (gen_rtx_REG (SImode, regno));
25223 }
25224 else
25225 {
25226 emit_move_insn (gen_rtx_REG (SImode, regno),
25227 gen_rtx_REG (SImode, LR_REGNUM));
25228 emit_use (gen_rtx_REG (SImode, regno));
25229 }
25230 }
25231 }
25232 }
25233
25234 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25235 POP instruction can be generated. LR should be replaced by PC. All
25236 the checks required are already done by USE_RETURN_INSN (). Hence,
25237 all we really need to check here is if single register is to be
25238 returned, or multiple register return. */
25239 void
25240 thumb2_expand_return (bool simple_return)
25241 {
25242 int i, num_regs;
25243 unsigned long saved_regs_mask;
25244 arm_stack_offsets *offsets;
25245
25246 offsets = arm_get_frame_offsets ();
25247 saved_regs_mask = offsets->saved_regs_mask;
25248
25249 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25250 if (saved_regs_mask & (1 << i))
25251 num_regs++;
25252
25253 if (!simple_return && saved_regs_mask)
25254 {
25255 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25256 functions or adapt code to handle according to ACLE. This path should
25257 not be reachable for cmse_nonsecure_entry functions though we prefer
25258 to assert it for now to ensure that future code changes do not silently
25259 change this behavior. */
25260 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25261 if (num_regs == 1)
25262 {
25263 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25264 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25265 rtx addr = gen_rtx_MEM (SImode,
25266 gen_rtx_POST_INC (SImode,
25267 stack_pointer_rtx));
25268 set_mem_alias_set (addr, get_frame_alias_set ());
25269 XVECEXP (par, 0, 0) = ret_rtx;
25270 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25271 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25272 emit_jump_insn (par);
25273 }
25274 else
25275 {
25276 saved_regs_mask &= ~ (1 << LR_REGNUM);
25277 saved_regs_mask |= (1 << PC_REGNUM);
25278 arm_emit_multi_reg_pop (saved_regs_mask);
25279 }
25280 }
25281 else
25282 {
25283 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25284 cmse_nonsecure_entry_clear_before_return ();
25285 emit_jump_insn (simple_return_rtx);
25286 }
25287 }
25288
25289 void
25290 thumb1_expand_epilogue (void)
25291 {
25292 HOST_WIDE_INT amount;
25293 arm_stack_offsets *offsets;
25294 int regno;
25295
25296 /* Naked functions don't have prologues. */
25297 if (IS_NAKED (arm_current_func_type ()))
25298 return;
25299
25300 offsets = arm_get_frame_offsets ();
25301 amount = offsets->outgoing_args - offsets->saved_regs;
25302
25303 if (frame_pointer_needed)
25304 {
25305 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25306 amount = offsets->locals_base - offsets->saved_regs;
25307 }
25308 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25309
25310 gcc_assert (amount >= 0);
25311 if (amount)
25312 {
25313 emit_insn (gen_blockage ());
25314
25315 if (amount < 512)
25316 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25317 GEN_INT (amount)));
25318 else
25319 {
25320 /* r3 is always free in the epilogue. */
25321 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25322
25323 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25324 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25325 }
25326 }
25327
25328 /* Emit a USE (stack_pointer_rtx), so that
25329 the stack adjustment will not be deleted. */
25330 emit_insn (gen_force_register_use (stack_pointer_rtx));
25331
25332 if (crtl->profile || !TARGET_SCHED_PROLOG)
25333 emit_insn (gen_blockage ());
25334
25335 /* Emit a clobber for each insn that will be restored in the epilogue,
25336 so that flow2 will get register lifetimes correct. */
25337 for (regno = 0; regno < 13; regno++)
25338 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25339 emit_clobber (gen_rtx_REG (SImode, regno));
25340
25341 if (! df_regs_ever_live_p (LR_REGNUM))
25342 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25343
25344 /* Clear all caller-saved regs that are not used to return. */
25345 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25346 cmse_nonsecure_entry_clear_before_return ();
25347 }
25348
25349 /* Epilogue code for APCS frame. */
25350 static void
25351 arm_expand_epilogue_apcs_frame (bool really_return)
25352 {
25353 unsigned long func_type;
25354 unsigned long saved_regs_mask;
25355 int num_regs = 0;
25356 int i;
25357 int floats_from_frame = 0;
25358 arm_stack_offsets *offsets;
25359
25360 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25361 func_type = arm_current_func_type ();
25362
25363 /* Get frame offsets for ARM. */
25364 offsets = arm_get_frame_offsets ();
25365 saved_regs_mask = offsets->saved_regs_mask;
25366
25367 /* Find the offset of the floating-point save area in the frame. */
25368 floats_from_frame
25369 = (offsets->saved_args
25370 + arm_compute_static_chain_stack_bytes ()
25371 - offsets->frame);
25372
25373 /* Compute how many core registers saved and how far away the floats are. */
25374 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25375 if (saved_regs_mask & (1 << i))
25376 {
25377 num_regs++;
25378 floats_from_frame += 4;
25379 }
25380
25381 if (TARGET_HARD_FLOAT)
25382 {
25383 int start_reg;
25384 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25385
25386 /* The offset is from IP_REGNUM. */
25387 int saved_size = arm_get_vfp_saved_size ();
25388 if (saved_size > 0)
25389 {
25390 rtx_insn *insn;
25391 floats_from_frame += saved_size;
25392 insn = emit_insn (gen_addsi3 (ip_rtx,
25393 hard_frame_pointer_rtx,
25394 GEN_INT (-floats_from_frame)));
25395 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25396 ip_rtx, hard_frame_pointer_rtx);
25397 }
25398
25399 /* Generate VFP register multi-pop. */
25400 start_reg = FIRST_VFP_REGNUM;
25401
25402 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25403 /* Look for a case where a reg does not need restoring. */
25404 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25405 && (!df_regs_ever_live_p (i + 1)
25406 || call_used_regs[i + 1]))
25407 {
25408 if (start_reg != i)
25409 arm_emit_vfp_multi_reg_pop (start_reg,
25410 (i - start_reg) / 2,
25411 gen_rtx_REG (SImode,
25412 IP_REGNUM));
25413 start_reg = i + 2;
25414 }
25415
25416 /* Restore the remaining regs that we have discovered (or possibly
25417 even all of them, if the conditional in the for loop never
25418 fired). */
25419 if (start_reg != i)
25420 arm_emit_vfp_multi_reg_pop (start_reg,
25421 (i - start_reg) / 2,
25422 gen_rtx_REG (SImode, IP_REGNUM));
25423 }
25424
25425 if (TARGET_IWMMXT)
25426 {
25427 /* The frame pointer is guaranteed to be non-double-word aligned, as
25428 it is set to double-word-aligned old_stack_pointer - 4. */
25429 rtx_insn *insn;
25430 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25431
25432 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25433 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25434 {
25435 rtx addr = gen_frame_mem (V2SImode,
25436 plus_constant (Pmode, hard_frame_pointer_rtx,
25437 - lrm_count * 4));
25438 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25439 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25440 gen_rtx_REG (V2SImode, i),
25441 NULL_RTX);
25442 lrm_count += 2;
25443 }
25444 }
25445
25446 /* saved_regs_mask should contain IP which contains old stack pointer
25447 at the time of activation creation. Since SP and IP are adjacent registers,
25448 we can restore the value directly into SP. */
25449 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25450 saved_regs_mask &= ~(1 << IP_REGNUM);
25451 saved_regs_mask |= (1 << SP_REGNUM);
25452
25453 /* There are two registers left in saved_regs_mask - LR and PC. We
25454 only need to restore LR (the return address), but to
25455 save time we can load it directly into PC, unless we need a
25456 special function exit sequence, or we are not really returning. */
25457 if (really_return
25458 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25459 && !crtl->calls_eh_return)
25460 /* Delete LR from the register mask, so that LR on
25461 the stack is loaded into the PC in the register mask. */
25462 saved_regs_mask &= ~(1 << LR_REGNUM);
25463 else
25464 saved_regs_mask &= ~(1 << PC_REGNUM);
25465
25466 num_regs = bit_count (saved_regs_mask);
25467 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25468 {
25469 rtx_insn *insn;
25470 emit_insn (gen_blockage ());
25471 /* Unwind the stack to just below the saved registers. */
25472 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25473 hard_frame_pointer_rtx,
25474 GEN_INT (- 4 * num_regs)));
25475
25476 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25477 stack_pointer_rtx, hard_frame_pointer_rtx);
25478 }
25479
25480 arm_emit_multi_reg_pop (saved_regs_mask);
25481
25482 if (IS_INTERRUPT (func_type))
25483 {
25484 /* Interrupt handlers will have pushed the
25485 IP onto the stack, so restore it now. */
25486 rtx_insn *insn;
25487 rtx addr = gen_rtx_MEM (SImode,
25488 gen_rtx_POST_INC (SImode,
25489 stack_pointer_rtx));
25490 set_mem_alias_set (addr, get_frame_alias_set ());
25491 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25492 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25493 gen_rtx_REG (SImode, IP_REGNUM),
25494 NULL_RTX);
25495 }
25496
25497 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25498 return;
25499
25500 if (crtl->calls_eh_return)
25501 emit_insn (gen_addsi3 (stack_pointer_rtx,
25502 stack_pointer_rtx,
25503 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25504
25505 if (IS_STACKALIGN (func_type))
25506 /* Restore the original stack pointer. Before prologue, the stack was
25507 realigned and the original stack pointer saved in r0. For details,
25508 see comment in arm_expand_prologue. */
25509 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25510
25511 emit_jump_insn (simple_return_rtx);
25512 }
25513
25514 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25515 function is not a sibcall. */
25516 void
25517 arm_expand_epilogue (bool really_return)
25518 {
25519 unsigned long func_type;
25520 unsigned long saved_regs_mask;
25521 int num_regs = 0;
25522 int i;
25523 int amount;
25524 arm_stack_offsets *offsets;
25525
25526 func_type = arm_current_func_type ();
25527
25528 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25529 let output_return_instruction take care of instruction emission if any. */
25530 if (IS_NAKED (func_type)
25531 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25532 {
25533 if (really_return)
25534 emit_jump_insn (simple_return_rtx);
25535 return;
25536 }
25537
25538 /* If we are throwing an exception, then we really must be doing a
25539 return, so we can't tail-call. */
25540 gcc_assert (!crtl->calls_eh_return || really_return);
25541
25542 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25543 {
25544 arm_expand_epilogue_apcs_frame (really_return);
25545 return;
25546 }
25547
25548 /* Get frame offsets for ARM. */
25549 offsets = arm_get_frame_offsets ();
25550 saved_regs_mask = offsets->saved_regs_mask;
25551 num_regs = bit_count (saved_regs_mask);
25552
25553 if (frame_pointer_needed)
25554 {
25555 rtx_insn *insn;
25556 /* Restore stack pointer if necessary. */
25557 if (TARGET_ARM)
25558 {
25559 /* In ARM mode, frame pointer points to first saved register.
25560 Restore stack pointer to last saved register. */
25561 amount = offsets->frame - offsets->saved_regs;
25562
25563 /* Force out any pending memory operations that reference stacked data
25564 before stack de-allocation occurs. */
25565 emit_insn (gen_blockage ());
25566 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25567 hard_frame_pointer_rtx,
25568 GEN_INT (amount)));
25569 arm_add_cfa_adjust_cfa_note (insn, amount,
25570 stack_pointer_rtx,
25571 hard_frame_pointer_rtx);
25572
25573 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25574 deleted. */
25575 emit_insn (gen_force_register_use (stack_pointer_rtx));
25576 }
25577 else
25578 {
25579 /* In Thumb-2 mode, the frame pointer points to the last saved
25580 register. */
25581 amount = offsets->locals_base - offsets->saved_regs;
25582 if (amount)
25583 {
25584 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25585 hard_frame_pointer_rtx,
25586 GEN_INT (amount)));
25587 arm_add_cfa_adjust_cfa_note (insn, amount,
25588 hard_frame_pointer_rtx,
25589 hard_frame_pointer_rtx);
25590 }
25591
25592 /* Force out any pending memory operations that reference stacked data
25593 before stack de-allocation occurs. */
25594 emit_insn (gen_blockage ());
25595 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25596 hard_frame_pointer_rtx));
25597 arm_add_cfa_adjust_cfa_note (insn, 0,
25598 stack_pointer_rtx,
25599 hard_frame_pointer_rtx);
25600 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25601 deleted. */
25602 emit_insn (gen_force_register_use (stack_pointer_rtx));
25603 }
25604 }
25605 else
25606 {
25607 /* Pop off outgoing args and local frame to adjust stack pointer to
25608 last saved register. */
25609 amount = offsets->outgoing_args - offsets->saved_regs;
25610 if (amount)
25611 {
25612 rtx_insn *tmp;
25613 /* Force out any pending memory operations that reference stacked data
25614 before stack de-allocation occurs. */
25615 emit_insn (gen_blockage ());
25616 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25617 stack_pointer_rtx,
25618 GEN_INT (amount)));
25619 arm_add_cfa_adjust_cfa_note (tmp, amount,
25620 stack_pointer_rtx, stack_pointer_rtx);
25621 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25622 not deleted. */
25623 emit_insn (gen_force_register_use (stack_pointer_rtx));
25624 }
25625 }
25626
25627 if (TARGET_HARD_FLOAT)
25628 {
25629 /* Generate VFP register multi-pop. */
25630 int end_reg = LAST_VFP_REGNUM + 1;
25631
25632 /* Scan the registers in reverse order. We need to match
25633 any groupings made in the prologue and generate matching
25634 vldm operations. The need to match groups is because,
25635 unlike pop, vldm can only do consecutive regs. */
25636 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25637 /* Look for a case where a reg does not need restoring. */
25638 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25639 && (!df_regs_ever_live_p (i + 1)
25640 || call_used_regs[i + 1]))
25641 {
25642 /* Restore the regs discovered so far (from reg+2 to
25643 end_reg). */
25644 if (end_reg > i + 2)
25645 arm_emit_vfp_multi_reg_pop (i + 2,
25646 (end_reg - (i + 2)) / 2,
25647 stack_pointer_rtx);
25648 end_reg = i;
25649 }
25650
25651 /* Restore the remaining regs that we have discovered (or possibly
25652 even all of them, if the conditional in the for loop never
25653 fired). */
25654 if (end_reg > i + 2)
25655 arm_emit_vfp_multi_reg_pop (i + 2,
25656 (end_reg - (i + 2)) / 2,
25657 stack_pointer_rtx);
25658 }
25659
25660 if (TARGET_IWMMXT)
25661 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25662 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25663 {
25664 rtx_insn *insn;
25665 rtx addr = gen_rtx_MEM (V2SImode,
25666 gen_rtx_POST_INC (SImode,
25667 stack_pointer_rtx));
25668 set_mem_alias_set (addr, get_frame_alias_set ());
25669 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25670 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25671 gen_rtx_REG (V2SImode, i),
25672 NULL_RTX);
25673 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25674 stack_pointer_rtx, stack_pointer_rtx);
25675 }
25676
25677 if (saved_regs_mask)
25678 {
25679 rtx insn;
25680 bool return_in_pc = false;
25681
25682 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25683 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25684 && !IS_CMSE_ENTRY (func_type)
25685 && !IS_STACKALIGN (func_type)
25686 && really_return
25687 && crtl->args.pretend_args_size == 0
25688 && saved_regs_mask & (1 << LR_REGNUM)
25689 && !crtl->calls_eh_return)
25690 {
25691 saved_regs_mask &= ~(1 << LR_REGNUM);
25692 saved_regs_mask |= (1 << PC_REGNUM);
25693 return_in_pc = true;
25694 }
25695
25696 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25697 {
25698 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25699 if (saved_regs_mask & (1 << i))
25700 {
25701 rtx addr = gen_rtx_MEM (SImode,
25702 gen_rtx_POST_INC (SImode,
25703 stack_pointer_rtx));
25704 set_mem_alias_set (addr, get_frame_alias_set ());
25705
25706 if (i == PC_REGNUM)
25707 {
25708 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25709 XVECEXP (insn, 0, 0) = ret_rtx;
25710 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25711 addr);
25712 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25713 insn = emit_jump_insn (insn);
25714 }
25715 else
25716 {
25717 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25718 addr));
25719 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25720 gen_rtx_REG (SImode, i),
25721 NULL_RTX);
25722 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25723 stack_pointer_rtx,
25724 stack_pointer_rtx);
25725 }
25726 }
25727 }
25728 else
25729 {
25730 if (TARGET_LDRD
25731 && current_tune->prefer_ldrd_strd
25732 && !optimize_function_for_size_p (cfun))
25733 {
25734 if (TARGET_THUMB2)
25735 thumb2_emit_ldrd_pop (saved_regs_mask);
25736 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25737 arm_emit_ldrd_pop (saved_regs_mask);
25738 else
25739 arm_emit_multi_reg_pop (saved_regs_mask);
25740 }
25741 else
25742 arm_emit_multi_reg_pop (saved_regs_mask);
25743 }
25744
25745 if (return_in_pc)
25746 return;
25747 }
25748
25749 amount
25750 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25751 if (amount)
25752 {
25753 int i, j;
25754 rtx dwarf = NULL_RTX;
25755 rtx_insn *tmp =
25756 emit_insn (gen_addsi3 (stack_pointer_rtx,
25757 stack_pointer_rtx,
25758 GEN_INT (amount)));
25759
25760 RTX_FRAME_RELATED_P (tmp) = 1;
25761
25762 if (cfun->machine->uses_anonymous_args)
25763 {
25764 /* Restore pretend args. Refer arm_expand_prologue on how to save
25765 pretend_args in stack. */
25766 int num_regs = crtl->args.pretend_args_size / 4;
25767 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25768 for (j = 0, i = 0; j < num_regs; i++)
25769 if (saved_regs_mask & (1 << i))
25770 {
25771 rtx reg = gen_rtx_REG (SImode, i);
25772 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25773 j++;
25774 }
25775 REG_NOTES (tmp) = dwarf;
25776 }
25777 arm_add_cfa_adjust_cfa_note (tmp, amount,
25778 stack_pointer_rtx, stack_pointer_rtx);
25779 }
25780
25781 /* Clear all caller-saved regs that are not used to return. */
25782 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25783 {
25784 /* CMSE_ENTRY always returns. */
25785 gcc_assert (really_return);
25786 cmse_nonsecure_entry_clear_before_return ();
25787 }
25788
25789 if (!really_return)
25790 return;
25791
25792 if (crtl->calls_eh_return)
25793 emit_insn (gen_addsi3 (stack_pointer_rtx,
25794 stack_pointer_rtx,
25795 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25796
25797 if (IS_STACKALIGN (func_type))
25798 /* Restore the original stack pointer. Before prologue, the stack was
25799 realigned and the original stack pointer saved in r0. For details,
25800 see comment in arm_expand_prologue. */
25801 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25802
25803 emit_jump_insn (simple_return_rtx);
25804 }
25805
25806 /* Implementation of insn prologue_thumb1_interwork. This is the first
25807 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25808
25809 const char *
25810 thumb1_output_interwork (void)
25811 {
25812 const char * name;
25813 FILE *f = asm_out_file;
25814
25815 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25816 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25817 == SYMBOL_REF);
25818 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25819
25820 /* Generate code sequence to switch us into Thumb mode. */
25821 /* The .code 32 directive has already been emitted by
25822 ASM_DECLARE_FUNCTION_NAME. */
25823 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25824 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25825
25826 /* Generate a label, so that the debugger will notice the
25827 change in instruction sets. This label is also used by
25828 the assembler to bypass the ARM code when this function
25829 is called from a Thumb encoded function elsewhere in the
25830 same file. Hence the definition of STUB_NAME here must
25831 agree with the definition in gas/config/tc-arm.c. */
25832
25833 #define STUB_NAME ".real_start_of"
25834
25835 fprintf (f, "\t.code\t16\n");
25836 #ifdef ARM_PE
25837 if (arm_dllexport_name_p (name))
25838 name = arm_strip_name_encoding (name);
25839 #endif
25840 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25841 fprintf (f, "\t.thumb_func\n");
25842 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25843
25844 return "";
25845 }
25846
25847 /* Handle the case of a double word load into a low register from
25848 a computed memory address. The computed address may involve a
25849 register which is overwritten by the load. */
25850 const char *
25851 thumb_load_double_from_address (rtx *operands)
25852 {
25853 rtx addr;
25854 rtx base;
25855 rtx offset;
25856 rtx arg1;
25857 rtx arg2;
25858
25859 gcc_assert (REG_P (operands[0]));
25860 gcc_assert (MEM_P (operands[1]));
25861
25862 /* Get the memory address. */
25863 addr = XEXP (operands[1], 0);
25864
25865 /* Work out how the memory address is computed. */
25866 switch (GET_CODE (addr))
25867 {
25868 case REG:
25869 operands[2] = adjust_address (operands[1], SImode, 4);
25870
25871 if (REGNO (operands[0]) == REGNO (addr))
25872 {
25873 output_asm_insn ("ldr\t%H0, %2", operands);
25874 output_asm_insn ("ldr\t%0, %1", operands);
25875 }
25876 else
25877 {
25878 output_asm_insn ("ldr\t%0, %1", operands);
25879 output_asm_insn ("ldr\t%H0, %2", operands);
25880 }
25881 break;
25882
25883 case CONST:
25884 /* Compute <address> + 4 for the high order load. */
25885 operands[2] = adjust_address (operands[1], SImode, 4);
25886
25887 output_asm_insn ("ldr\t%0, %1", operands);
25888 output_asm_insn ("ldr\t%H0, %2", operands);
25889 break;
25890
25891 case PLUS:
25892 arg1 = XEXP (addr, 0);
25893 arg2 = XEXP (addr, 1);
25894
25895 if (CONSTANT_P (arg1))
25896 base = arg2, offset = arg1;
25897 else
25898 base = arg1, offset = arg2;
25899
25900 gcc_assert (REG_P (base));
25901
25902 /* Catch the case of <address> = <reg> + <reg> */
25903 if (REG_P (offset))
25904 {
25905 int reg_offset = REGNO (offset);
25906 int reg_base = REGNO (base);
25907 int reg_dest = REGNO (operands[0]);
25908
25909 /* Add the base and offset registers together into the
25910 higher destination register. */
25911 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25912 reg_dest + 1, reg_base, reg_offset);
25913
25914 /* Load the lower destination register from the address in
25915 the higher destination register. */
25916 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25917 reg_dest, reg_dest + 1);
25918
25919 /* Load the higher destination register from its own address
25920 plus 4. */
25921 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25922 reg_dest + 1, reg_dest + 1);
25923 }
25924 else
25925 {
25926 /* Compute <address> + 4 for the high order load. */
25927 operands[2] = adjust_address (operands[1], SImode, 4);
25928
25929 /* If the computed address is held in the low order register
25930 then load the high order register first, otherwise always
25931 load the low order register first. */
25932 if (REGNO (operands[0]) == REGNO (base))
25933 {
25934 output_asm_insn ("ldr\t%H0, %2", operands);
25935 output_asm_insn ("ldr\t%0, %1", operands);
25936 }
25937 else
25938 {
25939 output_asm_insn ("ldr\t%0, %1", operands);
25940 output_asm_insn ("ldr\t%H0, %2", operands);
25941 }
25942 }
25943 break;
25944
25945 case LABEL_REF:
25946 /* With no registers to worry about we can just load the value
25947 directly. */
25948 operands[2] = adjust_address (operands[1], SImode, 4);
25949
25950 output_asm_insn ("ldr\t%H0, %2", operands);
25951 output_asm_insn ("ldr\t%0, %1", operands);
25952 break;
25953
25954 default:
25955 gcc_unreachable ();
25956 }
25957
25958 return "";
25959 }
25960
25961 const char *
25962 thumb_output_move_mem_multiple (int n, rtx *operands)
25963 {
25964 switch (n)
25965 {
25966 case 2:
25967 if (REGNO (operands[4]) > REGNO (operands[5]))
25968 std::swap (operands[4], operands[5]);
25969
25970 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25971 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25972 break;
25973
25974 case 3:
25975 if (REGNO (operands[4]) > REGNO (operands[5]))
25976 std::swap (operands[4], operands[5]);
25977 if (REGNO (operands[5]) > REGNO (operands[6]))
25978 std::swap (operands[5], operands[6]);
25979 if (REGNO (operands[4]) > REGNO (operands[5]))
25980 std::swap (operands[4], operands[5]);
25981
25982 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25983 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25984 break;
25985
25986 default:
25987 gcc_unreachable ();
25988 }
25989
25990 return "";
25991 }
25992
25993 /* Output a call-via instruction for thumb state. */
25994 const char *
25995 thumb_call_via_reg (rtx reg)
25996 {
25997 int regno = REGNO (reg);
25998 rtx *labelp;
25999
26000 gcc_assert (regno < LR_REGNUM);
26001
26002 /* If we are in the normal text section we can use a single instance
26003 per compilation unit. If we are doing function sections, then we need
26004 an entry per section, since we can't rely on reachability. */
26005 if (in_section == text_section)
26006 {
26007 thumb_call_reg_needed = 1;
26008
26009 if (thumb_call_via_label[regno] == NULL)
26010 thumb_call_via_label[regno] = gen_label_rtx ();
26011 labelp = thumb_call_via_label + regno;
26012 }
26013 else
26014 {
26015 if (cfun->machine->call_via[regno] == NULL)
26016 cfun->machine->call_via[regno] = gen_label_rtx ();
26017 labelp = cfun->machine->call_via + regno;
26018 }
26019
26020 output_asm_insn ("bl\t%a0", labelp);
26021 return "";
26022 }
26023
26024 /* Routines for generating rtl. */
26025 void
26026 thumb_expand_movmemqi (rtx *operands)
26027 {
26028 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26029 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26030 HOST_WIDE_INT len = INTVAL (operands[2]);
26031 HOST_WIDE_INT offset = 0;
26032
26033 while (len >= 12)
26034 {
26035 emit_insn (gen_movmem12b (out, in, out, in));
26036 len -= 12;
26037 }
26038
26039 if (len >= 8)
26040 {
26041 emit_insn (gen_movmem8b (out, in, out, in));
26042 len -= 8;
26043 }
26044
26045 if (len >= 4)
26046 {
26047 rtx reg = gen_reg_rtx (SImode);
26048 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26049 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26050 len -= 4;
26051 offset += 4;
26052 }
26053
26054 if (len >= 2)
26055 {
26056 rtx reg = gen_reg_rtx (HImode);
26057 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26058 plus_constant (Pmode, in,
26059 offset))));
26060 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26061 offset)),
26062 reg));
26063 len -= 2;
26064 offset += 2;
26065 }
26066
26067 if (len)
26068 {
26069 rtx reg = gen_reg_rtx (QImode);
26070 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26071 plus_constant (Pmode, in,
26072 offset))));
26073 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26074 offset)),
26075 reg));
26076 }
26077 }
26078
26079 void
26080 thumb_reload_out_hi (rtx *operands)
26081 {
26082 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26083 }
26084
26085 /* Return the length of a function name prefix
26086 that starts with the character 'c'. */
26087 static int
26088 arm_get_strip_length (int c)
26089 {
26090 switch (c)
26091 {
26092 ARM_NAME_ENCODING_LENGTHS
26093 default: return 0;
26094 }
26095 }
26096
26097 /* Return a pointer to a function's name with any
26098 and all prefix encodings stripped from it. */
26099 const char *
26100 arm_strip_name_encoding (const char *name)
26101 {
26102 int skip;
26103
26104 while ((skip = arm_get_strip_length (* name)))
26105 name += skip;
26106
26107 return name;
26108 }
26109
26110 /* If there is a '*' anywhere in the name's prefix, then
26111 emit the stripped name verbatim, otherwise prepend an
26112 underscore if leading underscores are being used. */
26113 void
26114 arm_asm_output_labelref (FILE *stream, const char *name)
26115 {
26116 int skip;
26117 int verbatim = 0;
26118
26119 while ((skip = arm_get_strip_length (* name)))
26120 {
26121 verbatim |= (*name == '*');
26122 name += skip;
26123 }
26124
26125 if (verbatim)
26126 fputs (name, stream);
26127 else
26128 asm_fprintf (stream, "%U%s", name);
26129 }
26130
26131 /* This function is used to emit an EABI tag and its associated value.
26132 We emit the numerical value of the tag in case the assembler does not
26133 support textual tags. (Eg gas prior to 2.20). If requested we include
26134 the tag name in a comment so that anyone reading the assembler output
26135 will know which tag is being set.
26136
26137 This function is not static because arm-c.c needs it too. */
26138
26139 void
26140 arm_emit_eabi_attribute (const char *name, int num, int val)
26141 {
26142 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26143 if (flag_verbose_asm || flag_debug_asm)
26144 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26145 asm_fprintf (asm_out_file, "\n");
26146 }
26147
26148 /* This function is used to print CPU tuning information as comment
26149 in assembler file. Pointers are not printed for now. */
26150
26151 void
26152 arm_print_tune_info (void)
26153 {
26154 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26155 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26156 current_tune->constant_limit);
26157 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26158 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26159 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26160 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26161 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26162 "prefetch.l1_cache_size:\t%d\n",
26163 current_tune->prefetch.l1_cache_size);
26164 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26165 "prefetch.l1_cache_line_size:\t%d\n",
26166 current_tune->prefetch.l1_cache_line_size);
26167 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26168 "prefer_constant_pool:\t%d\n",
26169 (int) current_tune->prefer_constant_pool);
26170 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26171 "branch_cost:\t(s:speed, p:predictable)\n");
26172 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26173 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26174 current_tune->branch_cost (false, false));
26175 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26176 current_tune->branch_cost (false, true));
26177 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26178 current_tune->branch_cost (true, false));
26179 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26180 current_tune->branch_cost (true, true));
26181 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26182 "prefer_ldrd_strd:\t%d\n",
26183 (int) current_tune->prefer_ldrd_strd);
26184 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26185 "logical_op_non_short_circuit:\t[%d,%d]\n",
26186 (int) current_tune->logical_op_non_short_circuit_thumb,
26187 (int) current_tune->logical_op_non_short_circuit_arm);
26188 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26189 "prefer_neon_for_64bits:\t%d\n",
26190 (int) current_tune->prefer_neon_for_64bits);
26191 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26192 "disparage_flag_setting_t16_encodings:\t%d\n",
26193 (int) current_tune->disparage_flag_setting_t16_encodings);
26194 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26195 "string_ops_prefer_neon:\t%d\n",
26196 (int) current_tune->string_ops_prefer_neon);
26197 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26198 "max_insns_inline_memset:\t%d\n",
26199 current_tune->max_insns_inline_memset);
26200 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26201 current_tune->fusible_ops);
26202 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26203 (int) current_tune->sched_autopref);
26204 }
26205
26206 /* Print .arch and .arch_extension directives corresponding to the
26207 current architecture configuration. */
26208 static void
26209 arm_print_asm_arch_directives ()
26210 {
26211 const arch_option *arch
26212 = arm_parse_arch_option_name (all_architectures, "-march",
26213 arm_active_target.arch_name);
26214 auto_sbitmap opt_bits (isa_num_bits);
26215
26216 gcc_assert (arch);
26217
26218 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26219 if (!arch->common.extensions)
26220 return;
26221
26222 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26223 opt->name != NULL;
26224 opt++)
26225 {
26226 if (!opt->remove)
26227 {
26228 arm_initialize_isa (opt_bits, opt->isa_bits);
26229
26230 /* If every feature bit of this option is set in the target
26231 ISA specification, print out the option name. However,
26232 don't print anything if all the bits are part of the
26233 FPU specification. */
26234 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26235 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26236 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26237 }
26238 }
26239 }
26240
26241 static void
26242 arm_file_start (void)
26243 {
26244 int val;
26245
26246 if (TARGET_BPABI)
26247 {
26248 /* We don't have a specified CPU. Use the architecture to
26249 generate the tags.
26250
26251 Note: it might be better to do this unconditionally, then the
26252 assembler would not need to know about all new CPU names as
26253 they are added. */
26254 if (!arm_active_target.core_name)
26255 {
26256 /* armv7ve doesn't support any extensions. */
26257 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26258 {
26259 /* Keep backward compatability for assemblers
26260 which don't support armv7ve. */
26261 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26262 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26263 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26264 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26265 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26266 }
26267 else
26268 arm_print_asm_arch_directives ();
26269 }
26270 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26271 asm_fprintf (asm_out_file, "\t.arch %s\n",
26272 arm_active_target.core_name + 8);
26273 else
26274 {
26275 const char* truncated_name
26276 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26277 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26278 }
26279
26280 if (print_tune_info)
26281 arm_print_tune_info ();
26282
26283 if (! TARGET_SOFT_FLOAT)
26284 {
26285 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26286 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26287
26288 if (TARGET_HARD_FLOAT_ABI)
26289 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26290 }
26291
26292 /* Some of these attributes only apply when the corresponding features
26293 are used. However we don't have any easy way of figuring this out.
26294 Conservatively record the setting that would have been used. */
26295
26296 if (flag_rounding_math)
26297 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26298
26299 if (!flag_unsafe_math_optimizations)
26300 {
26301 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26302 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26303 }
26304 if (flag_signaling_nans)
26305 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26306
26307 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26308 flag_finite_math_only ? 1 : 3);
26309
26310 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26311 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26312 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26313 flag_short_enums ? 1 : 2);
26314
26315 /* Tag_ABI_optimization_goals. */
26316 if (optimize_size)
26317 val = 4;
26318 else if (optimize >= 2)
26319 val = 2;
26320 else if (optimize)
26321 val = 1;
26322 else
26323 val = 6;
26324 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26325
26326 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26327 unaligned_access);
26328
26329 if (arm_fp16_format)
26330 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26331 (int) arm_fp16_format);
26332
26333 if (arm_lang_output_object_attributes_hook)
26334 arm_lang_output_object_attributes_hook();
26335 }
26336
26337 default_file_start ();
26338 }
26339
26340 static void
26341 arm_file_end (void)
26342 {
26343 int regno;
26344
26345 if (NEED_INDICATE_EXEC_STACK)
26346 /* Add .note.GNU-stack. */
26347 file_end_indicate_exec_stack ();
26348
26349 if (! thumb_call_reg_needed)
26350 return;
26351
26352 switch_to_section (text_section);
26353 asm_fprintf (asm_out_file, "\t.code 16\n");
26354 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26355
26356 for (regno = 0; regno < LR_REGNUM; regno++)
26357 {
26358 rtx label = thumb_call_via_label[regno];
26359
26360 if (label != 0)
26361 {
26362 targetm.asm_out.internal_label (asm_out_file, "L",
26363 CODE_LABEL_NUMBER (label));
26364 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26365 }
26366 }
26367 }
26368
26369 #ifndef ARM_PE
26370 /* Symbols in the text segment can be accessed without indirecting via the
26371 constant pool; it may take an extra binary operation, but this is still
26372 faster than indirecting via memory. Don't do this when not optimizing,
26373 since we won't be calculating al of the offsets necessary to do this
26374 simplification. */
26375
26376 static void
26377 arm_encode_section_info (tree decl, rtx rtl, int first)
26378 {
26379 if (optimize > 0 && TREE_CONSTANT (decl))
26380 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26381
26382 default_encode_section_info (decl, rtl, first);
26383 }
26384 #endif /* !ARM_PE */
26385
26386 static void
26387 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26388 {
26389 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26390 && !strcmp (prefix, "L"))
26391 {
26392 arm_ccfsm_state = 0;
26393 arm_target_insn = NULL;
26394 }
26395 default_internal_label (stream, prefix, labelno);
26396 }
26397
26398 /* Output code to add DELTA to the first argument, and then jump
26399 to FUNCTION. Used for C++ multiple inheritance. */
26400
26401 static void
26402 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26403 HOST_WIDE_INT, tree function)
26404 {
26405 static int thunk_label = 0;
26406 char label[256];
26407 char labelpc[256];
26408 int mi_delta = delta;
26409 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26410 int shift = 0;
26411 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26412 ? 1 : 0);
26413 if (mi_delta < 0)
26414 mi_delta = - mi_delta;
26415
26416 final_start_function (emit_barrier (), file, 1);
26417
26418 if (TARGET_THUMB1)
26419 {
26420 int labelno = thunk_label++;
26421 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26422 /* Thunks are entered in arm mode when available. */
26423 if (TARGET_THUMB1_ONLY)
26424 {
26425 /* push r3 so we can use it as a temporary. */
26426 /* TODO: Omit this save if r3 is not used. */
26427 fputs ("\tpush {r3}\n", file);
26428 fputs ("\tldr\tr3, ", file);
26429 }
26430 else
26431 {
26432 fputs ("\tldr\tr12, ", file);
26433 }
26434 assemble_name (file, label);
26435 fputc ('\n', file);
26436 if (flag_pic)
26437 {
26438 /* If we are generating PIC, the ldr instruction below loads
26439 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26440 the address of the add + 8, so we have:
26441
26442 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26443 = target + 1.
26444
26445 Note that we have "+ 1" because some versions of GNU ld
26446 don't set the low bit of the result for R_ARM_REL32
26447 relocations against thumb function symbols.
26448 On ARMv6M this is +4, not +8. */
26449 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26450 assemble_name (file, labelpc);
26451 fputs (":\n", file);
26452 if (TARGET_THUMB1_ONLY)
26453 {
26454 /* This is 2 insns after the start of the thunk, so we know it
26455 is 4-byte aligned. */
26456 fputs ("\tadd\tr3, pc, r3\n", file);
26457 fputs ("\tmov r12, r3\n", file);
26458 }
26459 else
26460 fputs ("\tadd\tr12, pc, r12\n", file);
26461 }
26462 else if (TARGET_THUMB1_ONLY)
26463 fputs ("\tmov r12, r3\n", file);
26464 }
26465 if (TARGET_THUMB1_ONLY)
26466 {
26467 if (mi_delta > 255)
26468 {
26469 fputs ("\tldr\tr3, ", file);
26470 assemble_name (file, label);
26471 fputs ("+4\n", file);
26472 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26473 mi_op, this_regno, this_regno);
26474 }
26475 else if (mi_delta != 0)
26476 {
26477 /* Thumb1 unified syntax requires s suffix in instruction name when
26478 one of the operands is immediate. */
26479 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26480 mi_op, this_regno, this_regno,
26481 mi_delta);
26482 }
26483 }
26484 else
26485 {
26486 /* TODO: Use movw/movt for large constants when available. */
26487 while (mi_delta != 0)
26488 {
26489 if ((mi_delta & (3 << shift)) == 0)
26490 shift += 2;
26491 else
26492 {
26493 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26494 mi_op, this_regno, this_regno,
26495 mi_delta & (0xff << shift));
26496 mi_delta &= ~(0xff << shift);
26497 shift += 8;
26498 }
26499 }
26500 }
26501 if (TARGET_THUMB1)
26502 {
26503 if (TARGET_THUMB1_ONLY)
26504 fputs ("\tpop\t{r3}\n", file);
26505
26506 fprintf (file, "\tbx\tr12\n");
26507 ASM_OUTPUT_ALIGN (file, 2);
26508 assemble_name (file, label);
26509 fputs (":\n", file);
26510 if (flag_pic)
26511 {
26512 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26513 rtx tem = XEXP (DECL_RTL (function), 0);
26514 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26515 pipeline offset is four rather than eight. Adjust the offset
26516 accordingly. */
26517 tem = plus_constant (GET_MODE (tem), tem,
26518 TARGET_THUMB1_ONLY ? -3 : -7);
26519 tem = gen_rtx_MINUS (GET_MODE (tem),
26520 tem,
26521 gen_rtx_SYMBOL_REF (Pmode,
26522 ggc_strdup (labelpc)));
26523 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26524 }
26525 else
26526 /* Output ".word .LTHUNKn". */
26527 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26528
26529 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26530 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26531 }
26532 else
26533 {
26534 fputs ("\tb\t", file);
26535 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26536 if (NEED_PLT_RELOC)
26537 fputs ("(PLT)", file);
26538 fputc ('\n', file);
26539 }
26540
26541 final_end_function ();
26542 }
26543
26544 /* MI thunk handling for TARGET_32BIT. */
26545
26546 static void
26547 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26548 HOST_WIDE_INT vcall_offset, tree function)
26549 {
26550 /* On ARM, this_regno is R0 or R1 depending on
26551 whether the function returns an aggregate or not.
26552 */
26553 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26554 function)
26555 ? R1_REGNUM : R0_REGNUM);
26556
26557 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26558 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26559 reload_completed = 1;
26560 emit_note (NOTE_INSN_PROLOGUE_END);
26561
26562 /* Add DELTA to THIS_RTX. */
26563 if (delta != 0)
26564 arm_split_constant (PLUS, Pmode, NULL_RTX,
26565 delta, this_rtx, this_rtx, false);
26566
26567 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26568 if (vcall_offset != 0)
26569 {
26570 /* Load *THIS_RTX. */
26571 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26572 /* Compute *THIS_RTX + VCALL_OFFSET. */
26573 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26574 false);
26575 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26576 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26577 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26578 }
26579
26580 /* Generate a tail call to the target function. */
26581 if (!TREE_USED (function))
26582 {
26583 assemble_external (function);
26584 TREE_USED (function) = 1;
26585 }
26586 rtx funexp = XEXP (DECL_RTL (function), 0);
26587 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26588 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26589 SIBLING_CALL_P (insn) = 1;
26590
26591 insn = get_insns ();
26592 shorten_branches (insn);
26593 final_start_function (insn, file, 1);
26594 final (insn, file, 1);
26595 final_end_function ();
26596
26597 /* Stop pretending this is a post-reload pass. */
26598 reload_completed = 0;
26599 }
26600
26601 /* Output code to add DELTA to the first argument, and then jump
26602 to FUNCTION. Used for C++ multiple inheritance. */
26603
26604 static void
26605 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26606 HOST_WIDE_INT vcall_offset, tree function)
26607 {
26608 if (TARGET_32BIT)
26609 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26610 else
26611 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26612 }
26613
26614 int
26615 arm_emit_vector_const (FILE *file, rtx x)
26616 {
26617 int i;
26618 const char * pattern;
26619
26620 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26621
26622 switch (GET_MODE (x))
26623 {
26624 case E_V2SImode: pattern = "%08x"; break;
26625 case E_V4HImode: pattern = "%04x"; break;
26626 case E_V8QImode: pattern = "%02x"; break;
26627 default: gcc_unreachable ();
26628 }
26629
26630 fprintf (file, "0x");
26631 for (i = CONST_VECTOR_NUNITS (x); i--;)
26632 {
26633 rtx element;
26634
26635 element = CONST_VECTOR_ELT (x, i);
26636 fprintf (file, pattern, INTVAL (element));
26637 }
26638
26639 return 1;
26640 }
26641
26642 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26643 HFmode constant pool entries are actually loaded with ldr. */
26644 void
26645 arm_emit_fp16_const (rtx c)
26646 {
26647 long bits;
26648
26649 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26650 if (WORDS_BIG_ENDIAN)
26651 assemble_zeros (2);
26652 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26653 if (!WORDS_BIG_ENDIAN)
26654 assemble_zeros (2);
26655 }
26656
26657 const char *
26658 arm_output_load_gr (rtx *operands)
26659 {
26660 rtx reg;
26661 rtx offset;
26662 rtx wcgr;
26663 rtx sum;
26664
26665 if (!MEM_P (operands [1])
26666 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26667 || !REG_P (reg = XEXP (sum, 0))
26668 || !CONST_INT_P (offset = XEXP (sum, 1))
26669 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26670 return "wldrw%?\t%0, %1";
26671
26672 /* Fix up an out-of-range load of a GR register. */
26673 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26674 wcgr = operands[0];
26675 operands[0] = reg;
26676 output_asm_insn ("ldr%?\t%0, %1", operands);
26677
26678 operands[0] = wcgr;
26679 operands[1] = reg;
26680 output_asm_insn ("tmcr%?\t%0, %1", operands);
26681 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26682
26683 return "";
26684 }
26685
26686 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26687
26688 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26689 named arg and all anonymous args onto the stack.
26690 XXX I know the prologue shouldn't be pushing registers, but it is faster
26691 that way. */
26692
26693 static void
26694 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26695 machine_mode mode,
26696 tree type,
26697 int *pretend_size,
26698 int second_time ATTRIBUTE_UNUSED)
26699 {
26700 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26701 int nregs;
26702
26703 cfun->machine->uses_anonymous_args = 1;
26704 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26705 {
26706 nregs = pcum->aapcs_ncrn;
26707 if (nregs & 1)
26708 {
26709 int res = arm_needs_doubleword_align (mode, type);
26710 if (res < 0 && warn_psabi)
26711 inform (input_location, "parameter passing for argument of "
26712 "type %qT changed in GCC 7.1", type);
26713 else if (res > 0)
26714 nregs++;
26715 }
26716 }
26717 else
26718 nregs = pcum->nregs;
26719
26720 if (nregs < NUM_ARG_REGS)
26721 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26722 }
26723
26724 /* We can't rely on the caller doing the proper promotion when
26725 using APCS or ATPCS. */
26726
26727 static bool
26728 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26729 {
26730 return !TARGET_AAPCS_BASED;
26731 }
26732
26733 static machine_mode
26734 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26735 machine_mode mode,
26736 int *punsignedp ATTRIBUTE_UNUSED,
26737 const_tree fntype ATTRIBUTE_UNUSED,
26738 int for_return ATTRIBUTE_UNUSED)
26739 {
26740 if (GET_MODE_CLASS (mode) == MODE_INT
26741 && GET_MODE_SIZE (mode) < 4)
26742 return SImode;
26743
26744 return mode;
26745 }
26746
26747
26748 static bool
26749 arm_default_short_enums (void)
26750 {
26751 return ARM_DEFAULT_SHORT_ENUMS;
26752 }
26753
26754
26755 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26756
26757 static bool
26758 arm_align_anon_bitfield (void)
26759 {
26760 return TARGET_AAPCS_BASED;
26761 }
26762
26763
26764 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26765
26766 static tree
26767 arm_cxx_guard_type (void)
26768 {
26769 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26770 }
26771
26772
26773 /* The EABI says test the least significant bit of a guard variable. */
26774
26775 static bool
26776 arm_cxx_guard_mask_bit (void)
26777 {
26778 return TARGET_AAPCS_BASED;
26779 }
26780
26781
26782 /* The EABI specifies that all array cookies are 8 bytes long. */
26783
26784 static tree
26785 arm_get_cookie_size (tree type)
26786 {
26787 tree size;
26788
26789 if (!TARGET_AAPCS_BASED)
26790 return default_cxx_get_cookie_size (type);
26791
26792 size = build_int_cst (sizetype, 8);
26793 return size;
26794 }
26795
26796
26797 /* The EABI says that array cookies should also contain the element size. */
26798
26799 static bool
26800 arm_cookie_has_size (void)
26801 {
26802 return TARGET_AAPCS_BASED;
26803 }
26804
26805
26806 /* The EABI says constructors and destructors should return a pointer to
26807 the object constructed/destroyed. */
26808
26809 static bool
26810 arm_cxx_cdtor_returns_this (void)
26811 {
26812 return TARGET_AAPCS_BASED;
26813 }
26814
26815 /* The EABI says that an inline function may never be the key
26816 method. */
26817
26818 static bool
26819 arm_cxx_key_method_may_be_inline (void)
26820 {
26821 return !TARGET_AAPCS_BASED;
26822 }
26823
26824 static void
26825 arm_cxx_determine_class_data_visibility (tree decl)
26826 {
26827 if (!TARGET_AAPCS_BASED
26828 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26829 return;
26830
26831 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26832 is exported. However, on systems without dynamic vague linkage,
26833 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26834 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26835 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26836 else
26837 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26838 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26839 }
26840
26841 static bool
26842 arm_cxx_class_data_always_comdat (void)
26843 {
26844 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26845 vague linkage if the class has no key function. */
26846 return !TARGET_AAPCS_BASED;
26847 }
26848
26849
26850 /* The EABI says __aeabi_atexit should be used to register static
26851 destructors. */
26852
26853 static bool
26854 arm_cxx_use_aeabi_atexit (void)
26855 {
26856 return TARGET_AAPCS_BASED;
26857 }
26858
26859
26860 void
26861 arm_set_return_address (rtx source, rtx scratch)
26862 {
26863 arm_stack_offsets *offsets;
26864 HOST_WIDE_INT delta;
26865 rtx addr, mem;
26866 unsigned long saved_regs;
26867
26868 offsets = arm_get_frame_offsets ();
26869 saved_regs = offsets->saved_regs_mask;
26870
26871 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26872 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26873 else
26874 {
26875 if (frame_pointer_needed)
26876 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26877 else
26878 {
26879 /* LR will be the first saved register. */
26880 delta = offsets->outgoing_args - (offsets->frame + 4);
26881
26882
26883 if (delta >= 4096)
26884 {
26885 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26886 GEN_INT (delta & ~4095)));
26887 addr = scratch;
26888 delta &= 4095;
26889 }
26890 else
26891 addr = stack_pointer_rtx;
26892
26893 addr = plus_constant (Pmode, addr, delta);
26894 }
26895
26896 /* The store needs to be marked to prevent DSE from deleting
26897 it as dead if it is based on fp. */
26898 mem = gen_frame_mem (Pmode, addr);
26899 MEM_VOLATILE_P (mem) = true;
26900 emit_move_insn (mem, source);
26901 }
26902 }
26903
26904
26905 void
26906 thumb_set_return_address (rtx source, rtx scratch)
26907 {
26908 arm_stack_offsets *offsets;
26909 HOST_WIDE_INT delta;
26910 HOST_WIDE_INT limit;
26911 int reg;
26912 rtx addr, mem;
26913 unsigned long mask;
26914
26915 emit_use (source);
26916
26917 offsets = arm_get_frame_offsets ();
26918 mask = offsets->saved_regs_mask;
26919 if (mask & (1 << LR_REGNUM))
26920 {
26921 limit = 1024;
26922 /* Find the saved regs. */
26923 if (frame_pointer_needed)
26924 {
26925 delta = offsets->soft_frame - offsets->saved_args;
26926 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26927 if (TARGET_THUMB1)
26928 limit = 128;
26929 }
26930 else
26931 {
26932 delta = offsets->outgoing_args - offsets->saved_args;
26933 reg = SP_REGNUM;
26934 }
26935 /* Allow for the stack frame. */
26936 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26937 delta -= 16;
26938 /* The link register is always the first saved register. */
26939 delta -= 4;
26940
26941 /* Construct the address. */
26942 addr = gen_rtx_REG (SImode, reg);
26943 if (delta > limit)
26944 {
26945 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26946 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26947 addr = scratch;
26948 }
26949 else
26950 addr = plus_constant (Pmode, addr, delta);
26951
26952 /* The store needs to be marked to prevent DSE from deleting
26953 it as dead if it is based on fp. */
26954 mem = gen_frame_mem (Pmode, addr);
26955 MEM_VOLATILE_P (mem) = true;
26956 emit_move_insn (mem, source);
26957 }
26958 else
26959 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26960 }
26961
26962 /* Implements target hook vector_mode_supported_p. */
26963 bool
26964 arm_vector_mode_supported_p (machine_mode mode)
26965 {
26966 /* Neon also supports V2SImode, etc. listed in the clause below. */
26967 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26968 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26969 || mode == V2DImode || mode == V8HFmode))
26970 return true;
26971
26972 if ((TARGET_NEON || TARGET_IWMMXT)
26973 && ((mode == V2SImode)
26974 || (mode == V4HImode)
26975 || (mode == V8QImode)))
26976 return true;
26977
26978 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26979 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26980 || mode == V2HAmode))
26981 return true;
26982
26983 return false;
26984 }
26985
26986 /* Implements target hook array_mode_supported_p. */
26987
26988 static bool
26989 arm_array_mode_supported_p (machine_mode mode,
26990 unsigned HOST_WIDE_INT nelems)
26991 {
26992 if (TARGET_NEON
26993 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26994 && (nelems >= 2 && nelems <= 4))
26995 return true;
26996
26997 return false;
26998 }
26999
27000 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27001 registers when autovectorizing for Neon, at least until multiple vector
27002 widths are supported properly by the middle-end. */
27003
27004 static machine_mode
27005 arm_preferred_simd_mode (scalar_mode mode)
27006 {
27007 if (TARGET_NEON)
27008 switch (mode)
27009 {
27010 case E_SFmode:
27011 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27012 case E_SImode:
27013 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27014 case E_HImode:
27015 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27016 case E_QImode:
27017 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27018 case E_DImode:
27019 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27020 return V2DImode;
27021 break;
27022
27023 default:;
27024 }
27025
27026 if (TARGET_REALLY_IWMMXT)
27027 switch (mode)
27028 {
27029 case E_SImode:
27030 return V2SImode;
27031 case E_HImode:
27032 return V4HImode;
27033 case E_QImode:
27034 return V8QImode;
27035
27036 default:;
27037 }
27038
27039 return word_mode;
27040 }
27041
27042 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27043
27044 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27045 using r0-r4 for function arguments, r7 for the stack frame and don't have
27046 enough left over to do doubleword arithmetic. For Thumb-2 all the
27047 potentially problematic instructions accept high registers so this is not
27048 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27049 that require many low registers. */
27050 static bool
27051 arm_class_likely_spilled_p (reg_class_t rclass)
27052 {
27053 if ((TARGET_THUMB1 && rclass == LO_REGS)
27054 || rclass == CC_REG)
27055 return true;
27056
27057 return false;
27058 }
27059
27060 /* Implements target hook small_register_classes_for_mode_p. */
27061 bool
27062 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27063 {
27064 return TARGET_THUMB1;
27065 }
27066
27067 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27068 ARM insns and therefore guarantee that the shift count is modulo 256.
27069 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27070 guarantee no particular behavior for out-of-range counts. */
27071
27072 static unsigned HOST_WIDE_INT
27073 arm_shift_truncation_mask (machine_mode mode)
27074 {
27075 return mode == SImode ? 255 : 0;
27076 }
27077
27078
27079 /* Map internal gcc register numbers to DWARF2 register numbers. */
27080
27081 unsigned int
27082 arm_dbx_register_number (unsigned int regno)
27083 {
27084 if (regno < 16)
27085 return regno;
27086
27087 if (IS_VFP_REGNUM (regno))
27088 {
27089 /* See comment in arm_dwarf_register_span. */
27090 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27091 return 64 + regno - FIRST_VFP_REGNUM;
27092 else
27093 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27094 }
27095
27096 if (IS_IWMMXT_GR_REGNUM (regno))
27097 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27098
27099 if (IS_IWMMXT_REGNUM (regno))
27100 return 112 + regno - FIRST_IWMMXT_REGNUM;
27101
27102 return DWARF_FRAME_REGISTERS;
27103 }
27104
27105 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27106 GCC models tham as 64 32-bit registers, so we need to describe this to
27107 the DWARF generation code. Other registers can use the default. */
27108 static rtx
27109 arm_dwarf_register_span (rtx rtl)
27110 {
27111 machine_mode mode;
27112 unsigned regno;
27113 rtx parts[16];
27114 int nregs;
27115 int i;
27116
27117 regno = REGNO (rtl);
27118 if (!IS_VFP_REGNUM (regno))
27119 return NULL_RTX;
27120
27121 /* XXX FIXME: The EABI defines two VFP register ranges:
27122 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27123 256-287: D0-D31
27124 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27125 corresponding D register. Until GDB supports this, we shall use the
27126 legacy encodings. We also use these encodings for D0-D15 for
27127 compatibility with older debuggers. */
27128 mode = GET_MODE (rtl);
27129 if (GET_MODE_SIZE (mode) < 8)
27130 return NULL_RTX;
27131
27132 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27133 {
27134 nregs = GET_MODE_SIZE (mode) / 4;
27135 for (i = 0; i < nregs; i += 2)
27136 if (TARGET_BIG_END)
27137 {
27138 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27139 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27140 }
27141 else
27142 {
27143 parts[i] = gen_rtx_REG (SImode, regno + i);
27144 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27145 }
27146 }
27147 else
27148 {
27149 nregs = GET_MODE_SIZE (mode) / 8;
27150 for (i = 0; i < nregs; i++)
27151 parts[i] = gen_rtx_REG (DImode, regno + i);
27152 }
27153
27154 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27155 }
27156
27157 #if ARM_UNWIND_INFO
27158 /* Emit unwind directives for a store-multiple instruction or stack pointer
27159 push during alignment.
27160 These should only ever be generated by the function prologue code, so
27161 expect them to have a particular form.
27162 The store-multiple instruction sometimes pushes pc as the last register,
27163 although it should not be tracked into unwind information, or for -Os
27164 sometimes pushes some dummy registers before first register that needs
27165 to be tracked in unwind information; such dummy registers are there just
27166 to avoid separate stack adjustment, and will not be restored in the
27167 epilogue. */
27168
27169 static void
27170 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27171 {
27172 int i;
27173 HOST_WIDE_INT offset;
27174 HOST_WIDE_INT nregs;
27175 int reg_size;
27176 unsigned reg;
27177 unsigned lastreg;
27178 unsigned padfirst = 0, padlast = 0;
27179 rtx e;
27180
27181 e = XVECEXP (p, 0, 0);
27182 gcc_assert (GET_CODE (e) == SET);
27183
27184 /* First insn will adjust the stack pointer. */
27185 gcc_assert (GET_CODE (e) == SET
27186 && REG_P (SET_DEST (e))
27187 && REGNO (SET_DEST (e)) == SP_REGNUM
27188 && GET_CODE (SET_SRC (e)) == PLUS);
27189
27190 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27191 nregs = XVECLEN (p, 0) - 1;
27192 gcc_assert (nregs);
27193
27194 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27195 if (reg < 16)
27196 {
27197 /* For -Os dummy registers can be pushed at the beginning to
27198 avoid separate stack pointer adjustment. */
27199 e = XVECEXP (p, 0, 1);
27200 e = XEXP (SET_DEST (e), 0);
27201 if (GET_CODE (e) == PLUS)
27202 padfirst = INTVAL (XEXP (e, 1));
27203 gcc_assert (padfirst == 0 || optimize_size);
27204 /* The function prologue may also push pc, but not annotate it as it is
27205 never restored. We turn this into a stack pointer adjustment. */
27206 e = XVECEXP (p, 0, nregs);
27207 e = XEXP (SET_DEST (e), 0);
27208 if (GET_CODE (e) == PLUS)
27209 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27210 else
27211 padlast = offset - 4;
27212 gcc_assert (padlast == 0 || padlast == 4);
27213 if (padlast == 4)
27214 fprintf (asm_out_file, "\t.pad #4\n");
27215 reg_size = 4;
27216 fprintf (asm_out_file, "\t.save {");
27217 }
27218 else if (IS_VFP_REGNUM (reg))
27219 {
27220 reg_size = 8;
27221 fprintf (asm_out_file, "\t.vsave {");
27222 }
27223 else
27224 /* Unknown register type. */
27225 gcc_unreachable ();
27226
27227 /* If the stack increment doesn't match the size of the saved registers,
27228 something has gone horribly wrong. */
27229 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27230
27231 offset = padfirst;
27232 lastreg = 0;
27233 /* The remaining insns will describe the stores. */
27234 for (i = 1; i <= nregs; i++)
27235 {
27236 /* Expect (set (mem <addr>) (reg)).
27237 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27238 e = XVECEXP (p, 0, i);
27239 gcc_assert (GET_CODE (e) == SET
27240 && MEM_P (SET_DEST (e))
27241 && REG_P (SET_SRC (e)));
27242
27243 reg = REGNO (SET_SRC (e));
27244 gcc_assert (reg >= lastreg);
27245
27246 if (i != 1)
27247 fprintf (asm_out_file, ", ");
27248 /* We can't use %r for vfp because we need to use the
27249 double precision register names. */
27250 if (IS_VFP_REGNUM (reg))
27251 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27252 else
27253 asm_fprintf (asm_out_file, "%r", reg);
27254
27255 if (flag_checking)
27256 {
27257 /* Check that the addresses are consecutive. */
27258 e = XEXP (SET_DEST (e), 0);
27259 if (GET_CODE (e) == PLUS)
27260 gcc_assert (REG_P (XEXP (e, 0))
27261 && REGNO (XEXP (e, 0)) == SP_REGNUM
27262 && CONST_INT_P (XEXP (e, 1))
27263 && offset == INTVAL (XEXP (e, 1)));
27264 else
27265 gcc_assert (i == 1
27266 && REG_P (e)
27267 && REGNO (e) == SP_REGNUM);
27268 offset += reg_size;
27269 }
27270 }
27271 fprintf (asm_out_file, "}\n");
27272 if (padfirst)
27273 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27274 }
27275
27276 /* Emit unwind directives for a SET. */
27277
27278 static void
27279 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27280 {
27281 rtx e0;
27282 rtx e1;
27283 unsigned reg;
27284
27285 e0 = XEXP (p, 0);
27286 e1 = XEXP (p, 1);
27287 switch (GET_CODE (e0))
27288 {
27289 case MEM:
27290 /* Pushing a single register. */
27291 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27292 || !REG_P (XEXP (XEXP (e0, 0), 0))
27293 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27294 abort ();
27295
27296 asm_fprintf (asm_out_file, "\t.save ");
27297 if (IS_VFP_REGNUM (REGNO (e1)))
27298 asm_fprintf(asm_out_file, "{d%d}\n",
27299 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27300 else
27301 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27302 break;
27303
27304 case REG:
27305 if (REGNO (e0) == SP_REGNUM)
27306 {
27307 /* A stack increment. */
27308 if (GET_CODE (e1) != PLUS
27309 || !REG_P (XEXP (e1, 0))
27310 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27311 || !CONST_INT_P (XEXP (e1, 1)))
27312 abort ();
27313
27314 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27315 -INTVAL (XEXP (e1, 1)));
27316 }
27317 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27318 {
27319 HOST_WIDE_INT offset;
27320
27321 if (GET_CODE (e1) == PLUS)
27322 {
27323 if (!REG_P (XEXP (e1, 0))
27324 || !CONST_INT_P (XEXP (e1, 1)))
27325 abort ();
27326 reg = REGNO (XEXP (e1, 0));
27327 offset = INTVAL (XEXP (e1, 1));
27328 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27329 HARD_FRAME_POINTER_REGNUM, reg,
27330 offset);
27331 }
27332 else if (REG_P (e1))
27333 {
27334 reg = REGNO (e1);
27335 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27336 HARD_FRAME_POINTER_REGNUM, reg);
27337 }
27338 else
27339 abort ();
27340 }
27341 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27342 {
27343 /* Move from sp to reg. */
27344 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27345 }
27346 else if (GET_CODE (e1) == PLUS
27347 && REG_P (XEXP (e1, 0))
27348 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27349 && CONST_INT_P (XEXP (e1, 1)))
27350 {
27351 /* Set reg to offset from sp. */
27352 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27353 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27354 }
27355 else
27356 abort ();
27357 break;
27358
27359 default:
27360 abort ();
27361 }
27362 }
27363
27364
27365 /* Emit unwind directives for the given insn. */
27366
27367 static void
27368 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27369 {
27370 rtx note, pat;
27371 bool handled_one = false;
27372
27373 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27374 return;
27375
27376 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27377 && (TREE_NOTHROW (current_function_decl)
27378 || crtl->all_throwers_are_sibcalls))
27379 return;
27380
27381 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27382 return;
27383
27384 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27385 {
27386 switch (REG_NOTE_KIND (note))
27387 {
27388 case REG_FRAME_RELATED_EXPR:
27389 pat = XEXP (note, 0);
27390 goto found;
27391
27392 case REG_CFA_REGISTER:
27393 pat = XEXP (note, 0);
27394 if (pat == NULL)
27395 {
27396 pat = PATTERN (insn);
27397 if (GET_CODE (pat) == PARALLEL)
27398 pat = XVECEXP (pat, 0, 0);
27399 }
27400
27401 /* Only emitted for IS_STACKALIGN re-alignment. */
27402 {
27403 rtx dest, src;
27404 unsigned reg;
27405
27406 src = SET_SRC (pat);
27407 dest = SET_DEST (pat);
27408
27409 gcc_assert (src == stack_pointer_rtx);
27410 reg = REGNO (dest);
27411 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27412 reg + 0x90, reg);
27413 }
27414 handled_one = true;
27415 break;
27416
27417 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27418 to get correct dwarf information for shrink-wrap. We should not
27419 emit unwind information for it because these are used either for
27420 pretend arguments or notes to adjust sp and restore registers from
27421 stack. */
27422 case REG_CFA_DEF_CFA:
27423 case REG_CFA_ADJUST_CFA:
27424 case REG_CFA_RESTORE:
27425 return;
27426
27427 case REG_CFA_EXPRESSION:
27428 case REG_CFA_OFFSET:
27429 /* ??? Only handling here what we actually emit. */
27430 gcc_unreachable ();
27431
27432 default:
27433 break;
27434 }
27435 }
27436 if (handled_one)
27437 return;
27438 pat = PATTERN (insn);
27439 found:
27440
27441 switch (GET_CODE (pat))
27442 {
27443 case SET:
27444 arm_unwind_emit_set (asm_out_file, pat);
27445 break;
27446
27447 case SEQUENCE:
27448 /* Store multiple. */
27449 arm_unwind_emit_sequence (asm_out_file, pat);
27450 break;
27451
27452 default:
27453 abort();
27454 }
27455 }
27456
27457
27458 /* Output a reference from a function exception table to the type_info
27459 object X. The EABI specifies that the symbol should be relocated by
27460 an R_ARM_TARGET2 relocation. */
27461
27462 static bool
27463 arm_output_ttype (rtx x)
27464 {
27465 fputs ("\t.word\t", asm_out_file);
27466 output_addr_const (asm_out_file, x);
27467 /* Use special relocations for symbol references. */
27468 if (!CONST_INT_P (x))
27469 fputs ("(TARGET2)", asm_out_file);
27470 fputc ('\n', asm_out_file);
27471
27472 return TRUE;
27473 }
27474
27475 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27476
27477 static void
27478 arm_asm_emit_except_personality (rtx personality)
27479 {
27480 fputs ("\t.personality\t", asm_out_file);
27481 output_addr_const (asm_out_file, personality);
27482 fputc ('\n', asm_out_file);
27483 }
27484 #endif /* ARM_UNWIND_INFO */
27485
27486 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27487
27488 static void
27489 arm_asm_init_sections (void)
27490 {
27491 #if ARM_UNWIND_INFO
27492 exception_section = get_unnamed_section (0, output_section_asm_op,
27493 "\t.handlerdata");
27494 #endif /* ARM_UNWIND_INFO */
27495
27496 #ifdef OBJECT_FORMAT_ELF
27497 if (target_pure_code)
27498 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27499 #endif
27500 }
27501
27502 /* Output unwind directives for the start/end of a function. */
27503
27504 void
27505 arm_output_fn_unwind (FILE * f, bool prologue)
27506 {
27507 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27508 return;
27509
27510 if (prologue)
27511 fputs ("\t.fnstart\n", f);
27512 else
27513 {
27514 /* If this function will never be unwound, then mark it as such.
27515 The came condition is used in arm_unwind_emit to suppress
27516 the frame annotations. */
27517 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27518 && (TREE_NOTHROW (current_function_decl)
27519 || crtl->all_throwers_are_sibcalls))
27520 fputs("\t.cantunwind\n", f);
27521
27522 fputs ("\t.fnend\n", f);
27523 }
27524 }
27525
27526 static bool
27527 arm_emit_tls_decoration (FILE *fp, rtx x)
27528 {
27529 enum tls_reloc reloc;
27530 rtx val;
27531
27532 val = XVECEXP (x, 0, 0);
27533 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27534
27535 output_addr_const (fp, val);
27536
27537 switch (reloc)
27538 {
27539 case TLS_GD32:
27540 fputs ("(tlsgd)", fp);
27541 break;
27542 case TLS_LDM32:
27543 fputs ("(tlsldm)", fp);
27544 break;
27545 case TLS_LDO32:
27546 fputs ("(tlsldo)", fp);
27547 break;
27548 case TLS_IE32:
27549 fputs ("(gottpoff)", fp);
27550 break;
27551 case TLS_LE32:
27552 fputs ("(tpoff)", fp);
27553 break;
27554 case TLS_DESCSEQ:
27555 fputs ("(tlsdesc)", fp);
27556 break;
27557 default:
27558 gcc_unreachable ();
27559 }
27560
27561 switch (reloc)
27562 {
27563 case TLS_GD32:
27564 case TLS_LDM32:
27565 case TLS_IE32:
27566 case TLS_DESCSEQ:
27567 fputs (" + (. - ", fp);
27568 output_addr_const (fp, XVECEXP (x, 0, 2));
27569 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27570 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27571 output_addr_const (fp, XVECEXP (x, 0, 3));
27572 fputc (')', fp);
27573 break;
27574 default:
27575 break;
27576 }
27577
27578 return TRUE;
27579 }
27580
27581 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27582
27583 static void
27584 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27585 {
27586 gcc_assert (size == 4);
27587 fputs ("\t.word\t", file);
27588 output_addr_const (file, x);
27589 fputs ("(tlsldo)", file);
27590 }
27591
27592 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27593
27594 static bool
27595 arm_output_addr_const_extra (FILE *fp, rtx x)
27596 {
27597 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27598 return arm_emit_tls_decoration (fp, x);
27599 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27600 {
27601 char label[256];
27602 int labelno = INTVAL (XVECEXP (x, 0, 0));
27603
27604 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27605 assemble_name_raw (fp, label);
27606
27607 return TRUE;
27608 }
27609 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27610 {
27611 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27612 if (GOT_PCREL)
27613 fputs ("+.", fp);
27614 fputs ("-(", fp);
27615 output_addr_const (fp, XVECEXP (x, 0, 0));
27616 fputc (')', fp);
27617 return TRUE;
27618 }
27619 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27620 {
27621 output_addr_const (fp, XVECEXP (x, 0, 0));
27622 if (GOT_PCREL)
27623 fputs ("+.", fp);
27624 fputs ("-(", fp);
27625 output_addr_const (fp, XVECEXP (x, 0, 1));
27626 fputc (')', fp);
27627 return TRUE;
27628 }
27629 else if (GET_CODE (x) == CONST_VECTOR)
27630 return arm_emit_vector_const (fp, x);
27631
27632 return FALSE;
27633 }
27634
27635 /* Output assembly for a shift instruction.
27636 SET_FLAGS determines how the instruction modifies the condition codes.
27637 0 - Do not set condition codes.
27638 1 - Set condition codes.
27639 2 - Use smallest instruction. */
27640 const char *
27641 arm_output_shift(rtx * operands, int set_flags)
27642 {
27643 char pattern[100];
27644 static const char flag_chars[3] = {'?', '.', '!'};
27645 const char *shift;
27646 HOST_WIDE_INT val;
27647 char c;
27648
27649 c = flag_chars[set_flags];
27650 shift = shift_op(operands[3], &val);
27651 if (shift)
27652 {
27653 if (val != -1)
27654 operands[2] = GEN_INT(val);
27655 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27656 }
27657 else
27658 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27659
27660 output_asm_insn (pattern, operands);
27661 return "";
27662 }
27663
27664 /* Output assembly for a WMMX immediate shift instruction. */
27665 const char *
27666 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27667 {
27668 int shift = INTVAL (operands[2]);
27669 char templ[50];
27670 machine_mode opmode = GET_MODE (operands[0]);
27671
27672 gcc_assert (shift >= 0);
27673
27674 /* If the shift value in the register versions is > 63 (for D qualifier),
27675 31 (for W qualifier) or 15 (for H qualifier). */
27676 if (((opmode == V4HImode) && (shift > 15))
27677 || ((opmode == V2SImode) && (shift > 31))
27678 || ((opmode == DImode) && (shift > 63)))
27679 {
27680 if (wror_or_wsra)
27681 {
27682 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27683 output_asm_insn (templ, operands);
27684 if (opmode == DImode)
27685 {
27686 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27687 output_asm_insn (templ, operands);
27688 }
27689 }
27690 else
27691 {
27692 /* The destination register will contain all zeros. */
27693 sprintf (templ, "wzero\t%%0");
27694 output_asm_insn (templ, operands);
27695 }
27696 return "";
27697 }
27698
27699 if ((opmode == DImode) && (shift > 32))
27700 {
27701 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27702 output_asm_insn (templ, operands);
27703 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27704 output_asm_insn (templ, operands);
27705 }
27706 else
27707 {
27708 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27709 output_asm_insn (templ, operands);
27710 }
27711 return "";
27712 }
27713
27714 /* Output assembly for a WMMX tinsr instruction. */
27715 const char *
27716 arm_output_iwmmxt_tinsr (rtx *operands)
27717 {
27718 int mask = INTVAL (operands[3]);
27719 int i;
27720 char templ[50];
27721 int units = mode_nunits[GET_MODE (operands[0])];
27722 gcc_assert ((mask & (mask - 1)) == 0);
27723 for (i = 0; i < units; ++i)
27724 {
27725 if ((mask & 0x01) == 1)
27726 {
27727 break;
27728 }
27729 mask >>= 1;
27730 }
27731 gcc_assert (i < units);
27732 {
27733 switch (GET_MODE (operands[0]))
27734 {
27735 case E_V8QImode:
27736 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27737 break;
27738 case E_V4HImode:
27739 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27740 break;
27741 case E_V2SImode:
27742 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27743 break;
27744 default:
27745 gcc_unreachable ();
27746 break;
27747 }
27748 output_asm_insn (templ, operands);
27749 }
27750 return "";
27751 }
27752
27753 /* Output a Thumb-1 casesi dispatch sequence. */
27754 const char *
27755 thumb1_output_casesi (rtx *operands)
27756 {
27757 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27758
27759 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27760
27761 switch (GET_MODE(diff_vec))
27762 {
27763 case E_QImode:
27764 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27765 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27766 case E_HImode:
27767 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27768 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27769 case E_SImode:
27770 return "bl\t%___gnu_thumb1_case_si";
27771 default:
27772 gcc_unreachable ();
27773 }
27774 }
27775
27776 /* Output a Thumb-2 casesi instruction. */
27777 const char *
27778 thumb2_output_casesi (rtx *operands)
27779 {
27780 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27781
27782 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27783
27784 output_asm_insn ("cmp\t%0, %1", operands);
27785 output_asm_insn ("bhi\t%l3", operands);
27786 switch (GET_MODE(diff_vec))
27787 {
27788 case E_QImode:
27789 return "tbb\t[%|pc, %0]";
27790 case E_HImode:
27791 return "tbh\t[%|pc, %0, lsl #1]";
27792 case E_SImode:
27793 if (flag_pic)
27794 {
27795 output_asm_insn ("adr\t%4, %l2", operands);
27796 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27797 output_asm_insn ("add\t%4, %4, %5", operands);
27798 return "bx\t%4";
27799 }
27800 else
27801 {
27802 output_asm_insn ("adr\t%4, %l2", operands);
27803 return "ldr\t%|pc, [%4, %0, lsl #2]";
27804 }
27805 default:
27806 gcc_unreachable ();
27807 }
27808 }
27809
27810 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27811 per-core tuning structs. */
27812 static int
27813 arm_issue_rate (void)
27814 {
27815 return current_tune->issue_rate;
27816 }
27817
27818 /* Return how many instructions should scheduler lookahead to choose the
27819 best one. */
27820 static int
27821 arm_first_cycle_multipass_dfa_lookahead (void)
27822 {
27823 int issue_rate = arm_issue_rate ();
27824
27825 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27826 }
27827
27828 /* Enable modeling of L2 auto-prefetcher. */
27829 static int
27830 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27831 {
27832 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27833 }
27834
27835 const char *
27836 arm_mangle_type (const_tree type)
27837 {
27838 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27839 has to be managled as if it is in the "std" namespace. */
27840 if (TARGET_AAPCS_BASED
27841 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27842 return "St9__va_list";
27843
27844 /* Half-precision float. */
27845 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27846 return "Dh";
27847
27848 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27849 builtin type. */
27850 if (TYPE_NAME (type) != NULL)
27851 return arm_mangle_builtin_type (type);
27852
27853 /* Use the default mangling. */
27854 return NULL;
27855 }
27856
27857 /* Order of allocation of core registers for Thumb: this allocation is
27858 written over the corresponding initial entries of the array
27859 initialized with REG_ALLOC_ORDER. We allocate all low registers
27860 first. Saving and restoring a low register is usually cheaper than
27861 using a call-clobbered high register. */
27862
27863 static const int thumb_core_reg_alloc_order[] =
27864 {
27865 3, 2, 1, 0, 4, 5, 6, 7,
27866 12, 14, 8, 9, 10, 11
27867 };
27868
27869 /* Adjust register allocation order when compiling for Thumb. */
27870
27871 void
27872 arm_order_regs_for_local_alloc (void)
27873 {
27874 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27875 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27876 if (TARGET_THUMB)
27877 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27878 sizeof (thumb_core_reg_alloc_order));
27879 }
27880
27881 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27882
27883 bool
27884 arm_frame_pointer_required (void)
27885 {
27886 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27887 return true;
27888
27889 /* If the function receives nonlocal gotos, it needs to save the frame
27890 pointer in the nonlocal_goto_save_area object. */
27891 if (cfun->has_nonlocal_label)
27892 return true;
27893
27894 /* The frame pointer is required for non-leaf APCS frames. */
27895 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27896 return true;
27897
27898 /* If we are probing the stack in the prologue, we will have a faulting
27899 instruction prior to the stack adjustment and this requires a frame
27900 pointer if we want to catch the exception using the EABI unwinder. */
27901 if (!IS_INTERRUPT (arm_current_func_type ())
27902 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27903 || flag_stack_clash_protection)
27904 && arm_except_unwind_info (&global_options) == UI_TARGET
27905 && cfun->can_throw_non_call_exceptions)
27906 {
27907 HOST_WIDE_INT size = get_frame_size ();
27908
27909 /* That's irrelevant if there is no stack adjustment. */
27910 if (size <= 0)
27911 return false;
27912
27913 /* That's relevant only if there is a stack probe. */
27914 if (crtl->is_leaf && !cfun->calls_alloca)
27915 {
27916 /* We don't have the final size of the frame so adjust. */
27917 size += 32 * UNITS_PER_WORD;
27918 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27919 return true;
27920 }
27921 else
27922 return true;
27923 }
27924
27925 return false;
27926 }
27927
27928 /* Only thumb1 can't support conditional execution, so return true if
27929 the target is not thumb1. */
27930 static bool
27931 arm_have_conditional_execution (void)
27932 {
27933 return !TARGET_THUMB1;
27934 }
27935
27936 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27937 static HOST_WIDE_INT
27938 arm_vector_alignment (const_tree type)
27939 {
27940 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27941
27942 if (TARGET_AAPCS_BASED)
27943 align = MIN (align, 64);
27944
27945 return align;
27946 }
27947
27948 static unsigned int
27949 arm_autovectorize_vector_sizes (void)
27950 {
27951 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27952 }
27953
27954 static bool
27955 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27956 {
27957 /* Vectors which aren't in packed structures will not be less aligned than
27958 the natural alignment of their element type, so this is safe. */
27959 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27960 return !is_packed;
27961
27962 return default_builtin_vector_alignment_reachable (type, is_packed);
27963 }
27964
27965 static bool
27966 arm_builtin_support_vector_misalignment (machine_mode mode,
27967 const_tree type, int misalignment,
27968 bool is_packed)
27969 {
27970 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27971 {
27972 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27973
27974 if (is_packed)
27975 return align == 1;
27976
27977 /* If the misalignment is unknown, we should be able to handle the access
27978 so long as it is not to a member of a packed data structure. */
27979 if (misalignment == -1)
27980 return true;
27981
27982 /* Return true if the misalignment is a multiple of the natural alignment
27983 of the vector's element type. This is probably always going to be
27984 true in practice, since we've already established that this isn't a
27985 packed access. */
27986 return ((misalignment % align) == 0);
27987 }
27988
27989 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27990 is_packed);
27991 }
27992
27993 static void
27994 arm_conditional_register_usage (void)
27995 {
27996 int regno;
27997
27998 if (TARGET_THUMB1 && optimize_size)
27999 {
28000 /* When optimizing for size on Thumb-1, it's better not
28001 to use the HI regs, because of the overhead of
28002 stacking them. */
28003 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28004 fixed_regs[regno] = call_used_regs[regno] = 1;
28005 }
28006
28007 /* The link register can be clobbered by any branch insn,
28008 but we have no way to track that at present, so mark
28009 it as unavailable. */
28010 if (TARGET_THUMB1)
28011 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28012
28013 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28014 {
28015 /* VFPv3 registers are disabled when earlier VFP
28016 versions are selected due to the definition of
28017 LAST_VFP_REGNUM. */
28018 for (regno = FIRST_VFP_REGNUM;
28019 regno <= LAST_VFP_REGNUM; ++ regno)
28020 {
28021 fixed_regs[regno] = 0;
28022 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28023 || regno >= FIRST_VFP_REGNUM + 32;
28024 }
28025 }
28026
28027 if (TARGET_REALLY_IWMMXT)
28028 {
28029 regno = FIRST_IWMMXT_GR_REGNUM;
28030 /* The 2002/10/09 revision of the XScale ABI has wCG0
28031 and wCG1 as call-preserved registers. The 2002/11/21
28032 revision changed this so that all wCG registers are
28033 scratch registers. */
28034 for (regno = FIRST_IWMMXT_GR_REGNUM;
28035 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28036 fixed_regs[regno] = 0;
28037 /* The XScale ABI has wR0 - wR9 as scratch registers,
28038 the rest as call-preserved registers. */
28039 for (regno = FIRST_IWMMXT_REGNUM;
28040 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28041 {
28042 fixed_regs[regno] = 0;
28043 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28044 }
28045 }
28046
28047 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28048 {
28049 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28050 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28051 }
28052 else if (TARGET_APCS_STACK)
28053 {
28054 fixed_regs[10] = 1;
28055 call_used_regs[10] = 1;
28056 }
28057 /* -mcaller-super-interworking reserves r11 for calls to
28058 _interwork_r11_call_via_rN(). Making the register global
28059 is an easy way of ensuring that it remains valid for all
28060 calls. */
28061 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28062 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28063 {
28064 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28065 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28066 if (TARGET_CALLER_INTERWORKING)
28067 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28068 }
28069 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28070 }
28071
28072 static reg_class_t
28073 arm_preferred_rename_class (reg_class_t rclass)
28074 {
28075 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28076 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28077 and code size can be reduced. */
28078 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28079 return LO_REGS;
28080 else
28081 return NO_REGS;
28082 }
28083
28084 /* Compute the attribute "length" of insn "*push_multi".
28085 So this function MUST be kept in sync with that insn pattern. */
28086 int
28087 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28088 {
28089 int i, regno, hi_reg;
28090 int num_saves = XVECLEN (parallel_op, 0);
28091
28092 /* ARM mode. */
28093 if (TARGET_ARM)
28094 return 4;
28095 /* Thumb1 mode. */
28096 if (TARGET_THUMB1)
28097 return 2;
28098
28099 /* Thumb2 mode. */
28100 regno = REGNO (first_op);
28101 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28102 list is 8-bit. Normally this means all registers in the list must be
28103 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28104 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28105 with 16-bit encoding. */
28106 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28107 for (i = 1; i < num_saves && !hi_reg; i++)
28108 {
28109 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28110 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28111 }
28112
28113 if (!hi_reg)
28114 return 2;
28115 return 4;
28116 }
28117
28118 /* Compute the attribute "length" of insn. Currently, this function is used
28119 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28120 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28121 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28122 true if OPERANDS contains insn which explicit updates base register. */
28123
28124 int
28125 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28126 {
28127 /* ARM mode. */
28128 if (TARGET_ARM)
28129 return 4;
28130 /* Thumb1 mode. */
28131 if (TARGET_THUMB1)
28132 return 2;
28133
28134 rtx parallel_op = operands[0];
28135 /* Initialize to elements number of PARALLEL. */
28136 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28137 /* Initialize the value to base register. */
28138 unsigned regno = REGNO (operands[1]);
28139 /* Skip return and write back pattern.
28140 We only need register pop pattern for later analysis. */
28141 unsigned first_indx = 0;
28142 first_indx += return_pc ? 1 : 0;
28143 first_indx += write_back_p ? 1 : 0;
28144
28145 /* A pop operation can be done through LDM or POP. If the base register is SP
28146 and if it's with write back, then a LDM will be alias of POP. */
28147 bool pop_p = (regno == SP_REGNUM && write_back_p);
28148 bool ldm_p = !pop_p;
28149
28150 /* Check base register for LDM. */
28151 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28152 return 4;
28153
28154 /* Check each register in the list. */
28155 for (; indx >= first_indx; indx--)
28156 {
28157 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28158 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28159 comment in arm_attr_length_push_multi. */
28160 if (REGNO_REG_CLASS (regno) == HI_REGS
28161 && (regno != PC_REGNUM || ldm_p))
28162 return 4;
28163 }
28164
28165 return 2;
28166 }
28167
28168 /* Compute the number of instructions emitted by output_move_double. */
28169 int
28170 arm_count_output_move_double_insns (rtx *operands)
28171 {
28172 int count;
28173 rtx ops[2];
28174 /* output_move_double may modify the operands array, so call it
28175 here on a copy of the array. */
28176 ops[0] = operands[0];
28177 ops[1] = operands[1];
28178 output_move_double (ops, false, &count);
28179 return count;
28180 }
28181
28182 int
28183 vfp3_const_double_for_fract_bits (rtx operand)
28184 {
28185 REAL_VALUE_TYPE r0;
28186
28187 if (!CONST_DOUBLE_P (operand))
28188 return 0;
28189
28190 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28191 if (exact_real_inverse (DFmode, &r0)
28192 && !REAL_VALUE_NEGATIVE (r0))
28193 {
28194 if (exact_real_truncate (DFmode, &r0))
28195 {
28196 HOST_WIDE_INT value = real_to_integer (&r0);
28197 value = value & 0xffffffff;
28198 if ((value != 0) && ( (value & (value - 1)) == 0))
28199 {
28200 int ret = exact_log2 (value);
28201 gcc_assert (IN_RANGE (ret, 0, 31));
28202 return ret;
28203 }
28204 }
28205 }
28206 return 0;
28207 }
28208
28209 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28210 log2 is in [1, 32], return that log2. Otherwise return -1.
28211 This is used in the patterns for vcvt.s32.f32 floating-point to
28212 fixed-point conversions. */
28213
28214 int
28215 vfp3_const_double_for_bits (rtx x)
28216 {
28217 const REAL_VALUE_TYPE *r;
28218
28219 if (!CONST_DOUBLE_P (x))
28220 return -1;
28221
28222 r = CONST_DOUBLE_REAL_VALUE (x);
28223
28224 if (REAL_VALUE_NEGATIVE (*r)
28225 || REAL_VALUE_ISNAN (*r)
28226 || REAL_VALUE_ISINF (*r)
28227 || !real_isinteger (r, SFmode))
28228 return -1;
28229
28230 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28231
28232 /* The exact_log2 above will have returned -1 if this is
28233 not an exact log2. */
28234 if (!IN_RANGE (hwint, 1, 32))
28235 return -1;
28236
28237 return hwint;
28238 }
28239
28240 \f
28241 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28242
28243 static void
28244 arm_pre_atomic_barrier (enum memmodel model)
28245 {
28246 if (need_atomic_barrier_p (model, true))
28247 emit_insn (gen_memory_barrier ());
28248 }
28249
28250 static void
28251 arm_post_atomic_barrier (enum memmodel model)
28252 {
28253 if (need_atomic_barrier_p (model, false))
28254 emit_insn (gen_memory_barrier ());
28255 }
28256
28257 /* Emit the load-exclusive and store-exclusive instructions.
28258 Use acquire and release versions if necessary. */
28259
28260 static void
28261 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28262 {
28263 rtx (*gen) (rtx, rtx);
28264
28265 if (acq)
28266 {
28267 switch (mode)
28268 {
28269 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28270 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28271 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28272 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28273 default:
28274 gcc_unreachable ();
28275 }
28276 }
28277 else
28278 {
28279 switch (mode)
28280 {
28281 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28282 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28283 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28284 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28285 default:
28286 gcc_unreachable ();
28287 }
28288 }
28289
28290 emit_insn (gen (rval, mem));
28291 }
28292
28293 static void
28294 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28295 rtx mem, bool rel)
28296 {
28297 rtx (*gen) (rtx, rtx, rtx);
28298
28299 if (rel)
28300 {
28301 switch (mode)
28302 {
28303 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28304 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28305 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28306 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28307 default:
28308 gcc_unreachable ();
28309 }
28310 }
28311 else
28312 {
28313 switch (mode)
28314 {
28315 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28316 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28317 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28318 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28319 default:
28320 gcc_unreachable ();
28321 }
28322 }
28323
28324 emit_insn (gen (bval, rval, mem));
28325 }
28326
28327 /* Mark the previous jump instruction as unlikely. */
28328
28329 static void
28330 emit_unlikely_jump (rtx insn)
28331 {
28332 rtx_insn *jump = emit_jump_insn (insn);
28333 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28334 }
28335
28336 /* Expand a compare and swap pattern. */
28337
28338 void
28339 arm_expand_compare_and_swap (rtx operands[])
28340 {
28341 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28342 machine_mode mode;
28343 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28344
28345 bval = operands[0];
28346 rval = operands[1];
28347 mem = operands[2];
28348 oldval = operands[3];
28349 newval = operands[4];
28350 is_weak = operands[5];
28351 mod_s = operands[6];
28352 mod_f = operands[7];
28353 mode = GET_MODE (mem);
28354
28355 /* Normally the succ memory model must be stronger than fail, but in the
28356 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28357 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28358
28359 if (TARGET_HAVE_LDACQ
28360 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28361 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28362 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28363
28364 switch (mode)
28365 {
28366 case E_QImode:
28367 case E_HImode:
28368 /* For narrow modes, we're going to perform the comparison in SImode,
28369 so do the zero-extension now. */
28370 rval = gen_reg_rtx (SImode);
28371 oldval = convert_modes (SImode, mode, oldval, true);
28372 /* FALLTHRU */
28373
28374 case E_SImode:
28375 /* Force the value into a register if needed. We waited until after
28376 the zero-extension above to do this properly. */
28377 if (!arm_add_operand (oldval, SImode))
28378 oldval = force_reg (SImode, oldval);
28379 break;
28380
28381 case E_DImode:
28382 if (!cmpdi_operand (oldval, mode))
28383 oldval = force_reg (mode, oldval);
28384 break;
28385
28386 default:
28387 gcc_unreachable ();
28388 }
28389
28390 if (TARGET_THUMB1)
28391 {
28392 switch (mode)
28393 {
28394 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28395 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28396 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28397 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28398 default:
28399 gcc_unreachable ();
28400 }
28401 }
28402 else
28403 {
28404 switch (mode)
28405 {
28406 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28407 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28408 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28409 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28410 default:
28411 gcc_unreachable ();
28412 }
28413 }
28414
28415 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28416 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28417
28418 if (mode == QImode || mode == HImode)
28419 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28420
28421 /* In all cases, we arrange for success to be signaled by Z set.
28422 This arrangement allows for the boolean result to be used directly
28423 in a subsequent branch, post optimization. For Thumb-1 targets, the
28424 boolean negation of the result is also stored in bval because Thumb-1
28425 backend lacks dependency tracking for CC flag due to flag-setting not
28426 being represented at RTL level. */
28427 if (TARGET_THUMB1)
28428 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28429 else
28430 {
28431 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28432 emit_insn (gen_rtx_SET (bval, x));
28433 }
28434 }
28435
28436 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28437 another memory store between the load-exclusive and store-exclusive can
28438 reset the monitor from Exclusive to Open state. This means we must wait
28439 until after reload to split the pattern, lest we get a register spill in
28440 the middle of the atomic sequence. Success of the compare and swap is
28441 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28442 for Thumb-1 targets (ie. negation of the boolean value returned by
28443 atomic_compare_and_swapmode standard pattern in operand 0). */
28444
28445 void
28446 arm_split_compare_and_swap (rtx operands[])
28447 {
28448 rtx rval, mem, oldval, newval, neg_bval;
28449 machine_mode mode;
28450 enum memmodel mod_s, mod_f;
28451 bool is_weak;
28452 rtx_code_label *label1, *label2;
28453 rtx x, cond;
28454
28455 rval = operands[1];
28456 mem = operands[2];
28457 oldval = operands[3];
28458 newval = operands[4];
28459 is_weak = (operands[5] != const0_rtx);
28460 mod_s = memmodel_from_int (INTVAL (operands[6]));
28461 mod_f = memmodel_from_int (INTVAL (operands[7]));
28462 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28463 mode = GET_MODE (mem);
28464
28465 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28466
28467 bool use_acquire = TARGET_HAVE_LDACQ
28468 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28469 || is_mm_release (mod_s));
28470
28471 bool use_release = TARGET_HAVE_LDACQ
28472 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28473 || is_mm_acquire (mod_s));
28474
28475 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28476 a full barrier is emitted after the store-release. */
28477 if (is_armv8_sync)
28478 use_acquire = false;
28479
28480 /* Checks whether a barrier is needed and emits one accordingly. */
28481 if (!(use_acquire || use_release))
28482 arm_pre_atomic_barrier (mod_s);
28483
28484 label1 = NULL;
28485 if (!is_weak)
28486 {
28487 label1 = gen_label_rtx ();
28488 emit_label (label1);
28489 }
28490 label2 = gen_label_rtx ();
28491
28492 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28493
28494 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28495 as required to communicate with arm_expand_compare_and_swap. */
28496 if (TARGET_32BIT)
28497 {
28498 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28499 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28500 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28501 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28502 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28503 }
28504 else
28505 {
28506 emit_move_insn (neg_bval, const1_rtx);
28507 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28508 if (thumb1_cmpneg_operand (oldval, SImode))
28509 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28510 label2, cond));
28511 else
28512 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28513 }
28514
28515 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28516
28517 /* Weak or strong, we want EQ to be true for success, so that we
28518 match the flags that we got from the compare above. */
28519 if (TARGET_32BIT)
28520 {
28521 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28522 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28523 emit_insn (gen_rtx_SET (cond, x));
28524 }
28525
28526 if (!is_weak)
28527 {
28528 /* Z is set to boolean value of !neg_bval, as required to communicate
28529 with arm_expand_compare_and_swap. */
28530 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28531 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28532 }
28533
28534 if (!is_mm_relaxed (mod_f))
28535 emit_label (label2);
28536
28537 /* Checks whether a barrier is needed and emits one accordingly. */
28538 if (is_armv8_sync
28539 || !(use_acquire || use_release))
28540 arm_post_atomic_barrier (mod_s);
28541
28542 if (is_mm_relaxed (mod_f))
28543 emit_label (label2);
28544 }
28545
28546 /* Split an atomic operation pattern. Operation is given by CODE and is one
28547 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28548 operation). Operation is performed on the content at MEM and on VALUE
28549 following the memory model MODEL_RTX. The content at MEM before and after
28550 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28551 success of the operation is returned in COND. Using a scratch register or
28552 an operand register for these determines what result is returned for that
28553 pattern. */
28554
28555 void
28556 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28557 rtx value, rtx model_rtx, rtx cond)
28558 {
28559 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28560 machine_mode mode = GET_MODE (mem);
28561 machine_mode wmode = (mode == DImode ? DImode : SImode);
28562 rtx_code_label *label;
28563 bool all_low_regs, bind_old_new;
28564 rtx x;
28565
28566 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28567
28568 bool use_acquire = TARGET_HAVE_LDACQ
28569 && !(is_mm_relaxed (model) || is_mm_consume (model)
28570 || is_mm_release (model));
28571
28572 bool use_release = TARGET_HAVE_LDACQ
28573 && !(is_mm_relaxed (model) || is_mm_consume (model)
28574 || is_mm_acquire (model));
28575
28576 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28577 a full barrier is emitted after the store-release. */
28578 if (is_armv8_sync)
28579 use_acquire = false;
28580
28581 /* Checks whether a barrier is needed and emits one accordingly. */
28582 if (!(use_acquire || use_release))
28583 arm_pre_atomic_barrier (model);
28584
28585 label = gen_label_rtx ();
28586 emit_label (label);
28587
28588 if (new_out)
28589 new_out = gen_lowpart (wmode, new_out);
28590 if (old_out)
28591 old_out = gen_lowpart (wmode, old_out);
28592 else
28593 old_out = new_out;
28594 value = simplify_gen_subreg (wmode, value, mode, 0);
28595
28596 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28597
28598 /* Does the operation require destination and first operand to use the same
28599 register? This is decided by register constraints of relevant insn
28600 patterns in thumb1.md. */
28601 gcc_assert (!new_out || REG_P (new_out));
28602 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28603 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28604 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28605 bind_old_new =
28606 (TARGET_THUMB1
28607 && code != SET
28608 && code != MINUS
28609 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28610
28611 /* We want to return the old value while putting the result of the operation
28612 in the same register as the old value so copy the old value over to the
28613 destination register and use that register for the operation. */
28614 if (old_out && bind_old_new)
28615 {
28616 emit_move_insn (new_out, old_out);
28617 old_out = new_out;
28618 }
28619
28620 switch (code)
28621 {
28622 case SET:
28623 new_out = value;
28624 break;
28625
28626 case NOT:
28627 x = gen_rtx_AND (wmode, old_out, value);
28628 emit_insn (gen_rtx_SET (new_out, x));
28629 x = gen_rtx_NOT (wmode, new_out);
28630 emit_insn (gen_rtx_SET (new_out, x));
28631 break;
28632
28633 case MINUS:
28634 if (CONST_INT_P (value))
28635 {
28636 value = GEN_INT (-INTVAL (value));
28637 code = PLUS;
28638 }
28639 /* FALLTHRU */
28640
28641 case PLUS:
28642 if (mode == DImode)
28643 {
28644 /* DImode plus/minus need to clobber flags. */
28645 /* The adddi3 and subdi3 patterns are incorrectly written so that
28646 they require matching operands, even when we could easily support
28647 three operands. Thankfully, this can be fixed up post-splitting,
28648 as the individual add+adc patterns do accept three operands and
28649 post-reload cprop can make these moves go away. */
28650 emit_move_insn (new_out, old_out);
28651 if (code == PLUS)
28652 x = gen_adddi3 (new_out, new_out, value);
28653 else
28654 x = gen_subdi3 (new_out, new_out, value);
28655 emit_insn (x);
28656 break;
28657 }
28658 /* FALLTHRU */
28659
28660 default:
28661 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28662 emit_insn (gen_rtx_SET (new_out, x));
28663 break;
28664 }
28665
28666 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28667 use_release);
28668
28669 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28670 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28671
28672 /* Checks whether a barrier is needed and emits one accordingly. */
28673 if (is_armv8_sync
28674 || !(use_acquire || use_release))
28675 arm_post_atomic_barrier (model);
28676 }
28677 \f
28678 #define MAX_VECT_LEN 16
28679
28680 struct expand_vec_perm_d
28681 {
28682 rtx target, op0, op1;
28683 auto_vec_perm_indices perm;
28684 machine_mode vmode;
28685 bool one_vector_p;
28686 bool testing_p;
28687 };
28688
28689 /* Generate a variable permutation. */
28690
28691 static void
28692 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28693 {
28694 machine_mode vmode = GET_MODE (target);
28695 bool one_vector_p = rtx_equal_p (op0, op1);
28696
28697 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28698 gcc_checking_assert (GET_MODE (op0) == vmode);
28699 gcc_checking_assert (GET_MODE (op1) == vmode);
28700 gcc_checking_assert (GET_MODE (sel) == vmode);
28701 gcc_checking_assert (TARGET_NEON);
28702
28703 if (one_vector_p)
28704 {
28705 if (vmode == V8QImode)
28706 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28707 else
28708 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28709 }
28710 else
28711 {
28712 rtx pair;
28713
28714 if (vmode == V8QImode)
28715 {
28716 pair = gen_reg_rtx (V16QImode);
28717 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28718 pair = gen_lowpart (TImode, pair);
28719 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28720 }
28721 else
28722 {
28723 pair = gen_reg_rtx (OImode);
28724 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28725 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28726 }
28727 }
28728 }
28729
28730 void
28731 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28732 {
28733 machine_mode vmode = GET_MODE (target);
28734 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28735 bool one_vector_p = rtx_equal_p (op0, op1);
28736 rtx rmask[MAX_VECT_LEN], mask;
28737
28738 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28739 numbering of elements for big-endian, we must reverse the order. */
28740 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28741
28742 /* The VTBL instruction does not use a modulo index, so we must take care
28743 of that ourselves. */
28744 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28745 for (i = 0; i < nelt; ++i)
28746 rmask[i] = mask;
28747 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28748 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28749
28750 arm_expand_vec_perm_1 (target, op0, op1, sel);
28751 }
28752
28753 /* Map lane ordering between architectural lane order, and GCC lane order,
28754 taking into account ABI. See comment above output_move_neon for details. */
28755
28756 static int
28757 neon_endian_lane_map (machine_mode mode, int lane)
28758 {
28759 if (BYTES_BIG_ENDIAN)
28760 {
28761 int nelems = GET_MODE_NUNITS (mode);
28762 /* Reverse lane order. */
28763 lane = (nelems - 1 - lane);
28764 /* Reverse D register order, to match ABI. */
28765 if (GET_MODE_SIZE (mode) == 16)
28766 lane = lane ^ (nelems / 2);
28767 }
28768 return lane;
28769 }
28770
28771 /* Some permutations index into pairs of vectors, this is a helper function
28772 to map indexes into those pairs of vectors. */
28773
28774 static int
28775 neon_pair_endian_lane_map (machine_mode mode, int lane)
28776 {
28777 int nelem = GET_MODE_NUNITS (mode);
28778 if (BYTES_BIG_ENDIAN)
28779 lane =
28780 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28781 return lane;
28782 }
28783
28784 /* Generate or test for an insn that supports a constant permutation. */
28785
28786 /* Recognize patterns for the VUZP insns. */
28787
28788 static bool
28789 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28790 {
28791 unsigned int i, odd, mask, nelt = d->perm.length ();
28792 rtx out0, out1, in0, in1;
28793 rtx (*gen)(rtx, rtx, rtx, rtx);
28794 int first_elem;
28795 int swap_nelt;
28796
28797 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28798 return false;
28799
28800 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28801 big endian pattern on 64 bit vectors, so we correct for that. */
28802 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28803 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28804
28805 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28806
28807 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28808 odd = 0;
28809 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28810 odd = 1;
28811 else
28812 return false;
28813 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28814
28815 for (i = 0; i < nelt; i++)
28816 {
28817 unsigned elt =
28818 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28819 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28820 return false;
28821 }
28822
28823 /* Success! */
28824 if (d->testing_p)
28825 return true;
28826
28827 switch (d->vmode)
28828 {
28829 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28830 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28831 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28832 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28833 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28834 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28835 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28836 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28837 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28838 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28839 default:
28840 gcc_unreachable ();
28841 }
28842
28843 in0 = d->op0;
28844 in1 = d->op1;
28845 if (swap_nelt != 0)
28846 std::swap (in0, in1);
28847
28848 out0 = d->target;
28849 out1 = gen_reg_rtx (d->vmode);
28850 if (odd)
28851 std::swap (out0, out1);
28852
28853 emit_insn (gen (out0, in0, in1, out1));
28854 return true;
28855 }
28856
28857 /* Recognize patterns for the VZIP insns. */
28858
28859 static bool
28860 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28861 {
28862 unsigned int i, high, mask, nelt = d->perm.length ();
28863 rtx out0, out1, in0, in1;
28864 rtx (*gen)(rtx, rtx, rtx, rtx);
28865 int first_elem;
28866 bool is_swapped;
28867
28868 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28869 return false;
28870
28871 is_swapped = BYTES_BIG_ENDIAN;
28872
28873 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28874
28875 high = nelt / 2;
28876 if (first_elem == neon_endian_lane_map (d->vmode, high))
28877 ;
28878 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28879 high = 0;
28880 else
28881 return false;
28882 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28883
28884 for (i = 0; i < nelt / 2; i++)
28885 {
28886 unsigned elt =
28887 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28888 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28889 != elt)
28890 return false;
28891 elt =
28892 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28893 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28894 != elt)
28895 return false;
28896 }
28897
28898 /* Success! */
28899 if (d->testing_p)
28900 return true;
28901
28902 switch (d->vmode)
28903 {
28904 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28905 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28906 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28907 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28908 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28909 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28910 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28911 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28912 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28913 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28914 default:
28915 gcc_unreachable ();
28916 }
28917
28918 in0 = d->op0;
28919 in1 = d->op1;
28920 if (is_swapped)
28921 std::swap (in0, in1);
28922
28923 out0 = d->target;
28924 out1 = gen_reg_rtx (d->vmode);
28925 if (high)
28926 std::swap (out0, out1);
28927
28928 emit_insn (gen (out0, in0, in1, out1));
28929 return true;
28930 }
28931
28932 /* Recognize patterns for the VREV insns. */
28933
28934 static bool
28935 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28936 {
28937 unsigned int i, j, diff, nelt = d->perm.length ();
28938 rtx (*gen)(rtx, rtx);
28939
28940 if (!d->one_vector_p)
28941 return false;
28942
28943 diff = d->perm[0];
28944 switch (diff)
28945 {
28946 case 7:
28947 switch (d->vmode)
28948 {
28949 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28950 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28951 default:
28952 return false;
28953 }
28954 break;
28955 case 3:
28956 switch (d->vmode)
28957 {
28958 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28959 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28960 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28961 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28962 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28963 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28964 default:
28965 return false;
28966 }
28967 break;
28968 case 1:
28969 switch (d->vmode)
28970 {
28971 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28972 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28973 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28974 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
28975 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
28976 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
28977 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
28978 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
28979 default:
28980 return false;
28981 }
28982 break;
28983 default:
28984 return false;
28985 }
28986
28987 for (i = 0; i < nelt ; i += diff + 1)
28988 for (j = 0; j <= diff; j += 1)
28989 {
28990 /* This is guaranteed to be true as the value of diff
28991 is 7, 3, 1 and we should have enough elements in the
28992 queue to generate this. Getting a vector mask with a
28993 value of diff other than these values implies that
28994 something is wrong by the time we get here. */
28995 gcc_assert (i + j < nelt);
28996 if (d->perm[i + j] != i + diff - j)
28997 return false;
28998 }
28999
29000 /* Success! */
29001 if (d->testing_p)
29002 return true;
29003
29004 emit_insn (gen (d->target, d->op0));
29005 return true;
29006 }
29007
29008 /* Recognize patterns for the VTRN insns. */
29009
29010 static bool
29011 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29012 {
29013 unsigned int i, odd, mask, nelt = d->perm.length ();
29014 rtx out0, out1, in0, in1;
29015 rtx (*gen)(rtx, rtx, rtx, rtx);
29016
29017 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29018 return false;
29019
29020 /* Note that these are little-endian tests. Adjust for big-endian later. */
29021 if (d->perm[0] == 0)
29022 odd = 0;
29023 else if (d->perm[0] == 1)
29024 odd = 1;
29025 else
29026 return false;
29027 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29028
29029 for (i = 0; i < nelt; i += 2)
29030 {
29031 if (d->perm[i] != i + odd)
29032 return false;
29033 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29034 return false;
29035 }
29036
29037 /* Success! */
29038 if (d->testing_p)
29039 return true;
29040
29041 switch (d->vmode)
29042 {
29043 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29044 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29045 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29046 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29047 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29048 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29049 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29050 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29051 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29052 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29053 default:
29054 gcc_unreachable ();
29055 }
29056
29057 in0 = d->op0;
29058 in1 = d->op1;
29059 if (BYTES_BIG_ENDIAN)
29060 {
29061 std::swap (in0, in1);
29062 odd = !odd;
29063 }
29064
29065 out0 = d->target;
29066 out1 = gen_reg_rtx (d->vmode);
29067 if (odd)
29068 std::swap (out0, out1);
29069
29070 emit_insn (gen (out0, in0, in1, out1));
29071 return true;
29072 }
29073
29074 /* Recognize patterns for the VEXT insns. */
29075
29076 static bool
29077 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29078 {
29079 unsigned int i, nelt = d->perm.length ();
29080 rtx (*gen) (rtx, rtx, rtx, rtx);
29081 rtx offset;
29082
29083 unsigned int location;
29084
29085 unsigned int next = d->perm[0] + 1;
29086
29087 /* TODO: Handle GCC's numbering of elements for big-endian. */
29088 if (BYTES_BIG_ENDIAN)
29089 return false;
29090
29091 /* Check if the extracted indexes are increasing by one. */
29092 for (i = 1; i < nelt; next++, i++)
29093 {
29094 /* If we hit the most significant element of the 2nd vector in
29095 the previous iteration, no need to test further. */
29096 if (next == 2 * nelt)
29097 return false;
29098
29099 /* If we are operating on only one vector: it could be a
29100 rotation. If there are only two elements of size < 64, let
29101 arm_evpc_neon_vrev catch it. */
29102 if (d->one_vector_p && (next == nelt))
29103 {
29104 if ((nelt == 2) && (d->vmode != V2DImode))
29105 return false;
29106 else
29107 next = 0;
29108 }
29109
29110 if (d->perm[i] != next)
29111 return false;
29112 }
29113
29114 location = d->perm[0];
29115
29116 switch (d->vmode)
29117 {
29118 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29119 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29120 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29121 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29122 case E_V2SImode: gen = gen_neon_vextv2si; break;
29123 case E_V4SImode: gen = gen_neon_vextv4si; break;
29124 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29125 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29126 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29127 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29128 case E_V2DImode: gen = gen_neon_vextv2di; break;
29129 default:
29130 return false;
29131 }
29132
29133 /* Success! */
29134 if (d->testing_p)
29135 return true;
29136
29137 offset = GEN_INT (location);
29138 emit_insn (gen (d->target, d->op0, d->op1, offset));
29139 return true;
29140 }
29141
29142 /* The NEON VTBL instruction is a fully variable permuation that's even
29143 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29144 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29145 can do slightly better by expanding this as a constant where we don't
29146 have to apply a mask. */
29147
29148 static bool
29149 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29150 {
29151 rtx rperm[MAX_VECT_LEN], sel;
29152 machine_mode vmode = d->vmode;
29153 unsigned int i, nelt = d->perm.length ();
29154
29155 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29156 numbering of elements for big-endian, we must reverse the order. */
29157 if (BYTES_BIG_ENDIAN)
29158 return false;
29159
29160 if (d->testing_p)
29161 return true;
29162
29163 /* Generic code will try constant permutation twice. Once with the
29164 original mode and again with the elements lowered to QImode.
29165 So wait and don't do the selector expansion ourselves. */
29166 if (vmode != V8QImode && vmode != V16QImode)
29167 return false;
29168
29169 for (i = 0; i < nelt; ++i)
29170 rperm[i] = GEN_INT (d->perm[i]);
29171 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29172 sel = force_reg (vmode, sel);
29173
29174 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29175 return true;
29176 }
29177
29178 static bool
29179 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29180 {
29181 /* Check if the input mask matches vext before reordering the
29182 operands. */
29183 if (TARGET_NEON)
29184 if (arm_evpc_neon_vext (d))
29185 return true;
29186
29187 /* The pattern matching functions above are written to look for a small
29188 number to begin the sequence (0, 1, N/2). If we begin with an index
29189 from the second operand, we can swap the operands. */
29190 unsigned int nelt = d->perm.length ();
29191 if (d->perm[0] >= nelt)
29192 {
29193 for (unsigned int i = 0; i < nelt; ++i)
29194 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29195
29196 std::swap (d->op0, d->op1);
29197 }
29198
29199 if (TARGET_NEON)
29200 {
29201 if (arm_evpc_neon_vuzp (d))
29202 return true;
29203 if (arm_evpc_neon_vzip (d))
29204 return true;
29205 if (arm_evpc_neon_vrev (d))
29206 return true;
29207 if (arm_evpc_neon_vtrn (d))
29208 return true;
29209 return arm_evpc_neon_vtbl (d);
29210 }
29211 return false;
29212 }
29213
29214 /* Expand a vec_perm_const pattern. */
29215
29216 bool
29217 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29218 {
29219 struct expand_vec_perm_d d;
29220 int i, nelt, which;
29221
29222 d.target = target;
29223 d.op0 = op0;
29224 d.op1 = op1;
29225
29226 d.vmode = GET_MODE (target);
29227 gcc_assert (VECTOR_MODE_P (d.vmode));
29228 d.testing_p = false;
29229
29230 nelt = GET_MODE_NUNITS (d.vmode);
29231 d.perm.reserve (nelt);
29232 for (i = which = 0; i < nelt; ++i)
29233 {
29234 rtx e = XVECEXP (sel, 0, i);
29235 int ei = INTVAL (e) & (2 * nelt - 1);
29236 which |= (ei < nelt ? 1 : 2);
29237 d.perm.quick_push (ei);
29238 }
29239
29240 switch (which)
29241 {
29242 default:
29243 gcc_unreachable();
29244
29245 case 3:
29246 d.one_vector_p = false;
29247 if (!rtx_equal_p (op0, op1))
29248 break;
29249
29250 /* The elements of PERM do not suggest that only the first operand
29251 is used, but both operands are identical. Allow easier matching
29252 of the permutation by folding the permutation into the single
29253 input vector. */
29254 /* FALLTHRU */
29255 case 2:
29256 for (i = 0; i < nelt; ++i)
29257 d.perm[i] &= nelt - 1;
29258 d.op0 = op1;
29259 d.one_vector_p = true;
29260 break;
29261
29262 case 1:
29263 d.op1 = op0;
29264 d.one_vector_p = true;
29265 break;
29266 }
29267
29268 return arm_expand_vec_perm_const_1 (&d);
29269 }
29270
29271 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29272
29273 static bool
29274 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29275 {
29276 struct expand_vec_perm_d d;
29277 unsigned int i, nelt, which;
29278 bool ret;
29279
29280 d.vmode = vmode;
29281 d.testing_p = true;
29282 d.perm.safe_splice (sel);
29283
29284 /* Categorize the set of elements in the selector. */
29285 nelt = GET_MODE_NUNITS (d.vmode);
29286 for (i = which = 0; i < nelt; ++i)
29287 {
29288 unsigned int e = d.perm[i];
29289 gcc_assert (e < 2 * nelt);
29290 which |= (e < nelt ? 1 : 2);
29291 }
29292
29293 /* For all elements from second vector, fold the elements to first. */
29294 if (which == 2)
29295 for (i = 0; i < nelt; ++i)
29296 d.perm[i] -= nelt;
29297
29298 /* Check whether the mask can be applied to the vector type. */
29299 d.one_vector_p = (which != 3);
29300
29301 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29302 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29303 if (!d.one_vector_p)
29304 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29305
29306 start_sequence ();
29307 ret = arm_expand_vec_perm_const_1 (&d);
29308 end_sequence ();
29309
29310 return ret;
29311 }
29312
29313 bool
29314 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29315 {
29316 /* If we are soft float and we do not have ldrd
29317 then all auto increment forms are ok. */
29318 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29319 return true;
29320
29321 switch (code)
29322 {
29323 /* Post increment and Pre Decrement are supported for all
29324 instruction forms except for vector forms. */
29325 case ARM_POST_INC:
29326 case ARM_PRE_DEC:
29327 if (VECTOR_MODE_P (mode))
29328 {
29329 if (code != ARM_PRE_DEC)
29330 return true;
29331 else
29332 return false;
29333 }
29334
29335 return true;
29336
29337 case ARM_POST_DEC:
29338 case ARM_PRE_INC:
29339 /* Without LDRD and mode size greater than
29340 word size, there is no point in auto-incrementing
29341 because ldm and stm will not have these forms. */
29342 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29343 return false;
29344
29345 /* Vector and floating point modes do not support
29346 these auto increment forms. */
29347 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29348 return false;
29349
29350 return true;
29351
29352 default:
29353 return false;
29354
29355 }
29356
29357 return false;
29358 }
29359
29360 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29361 on ARM, since we know that shifts by negative amounts are no-ops.
29362 Additionally, the default expansion code is not available or suitable
29363 for post-reload insn splits (this can occur when the register allocator
29364 chooses not to do a shift in NEON).
29365
29366 This function is used in both initial expand and post-reload splits, and
29367 handles all kinds of 64-bit shifts.
29368
29369 Input requirements:
29370 - It is safe for the input and output to be the same register, but
29371 early-clobber rules apply for the shift amount and scratch registers.
29372 - Shift by register requires both scratch registers. In all other cases
29373 the scratch registers may be NULL.
29374 - Ashiftrt by a register also clobbers the CC register. */
29375 void
29376 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29377 rtx amount, rtx scratch1, rtx scratch2)
29378 {
29379 rtx out_high = gen_highpart (SImode, out);
29380 rtx out_low = gen_lowpart (SImode, out);
29381 rtx in_high = gen_highpart (SImode, in);
29382 rtx in_low = gen_lowpart (SImode, in);
29383
29384 /* Terminology:
29385 in = the register pair containing the input value.
29386 out = the destination register pair.
29387 up = the high- or low-part of each pair.
29388 down = the opposite part to "up".
29389 In a shift, we can consider bits to shift from "up"-stream to
29390 "down"-stream, so in a left-shift "up" is the low-part and "down"
29391 is the high-part of each register pair. */
29392
29393 rtx out_up = code == ASHIFT ? out_low : out_high;
29394 rtx out_down = code == ASHIFT ? out_high : out_low;
29395 rtx in_up = code == ASHIFT ? in_low : in_high;
29396 rtx in_down = code == ASHIFT ? in_high : in_low;
29397
29398 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29399 gcc_assert (out
29400 && (REG_P (out) || GET_CODE (out) == SUBREG)
29401 && GET_MODE (out) == DImode);
29402 gcc_assert (in
29403 && (REG_P (in) || GET_CODE (in) == SUBREG)
29404 && GET_MODE (in) == DImode);
29405 gcc_assert (amount
29406 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29407 && GET_MODE (amount) == SImode)
29408 || CONST_INT_P (amount)));
29409 gcc_assert (scratch1 == NULL
29410 || (GET_CODE (scratch1) == SCRATCH)
29411 || (GET_MODE (scratch1) == SImode
29412 && REG_P (scratch1)));
29413 gcc_assert (scratch2 == NULL
29414 || (GET_CODE (scratch2) == SCRATCH)
29415 || (GET_MODE (scratch2) == SImode
29416 && REG_P (scratch2)));
29417 gcc_assert (!REG_P (out) || !REG_P (amount)
29418 || !HARD_REGISTER_P (out)
29419 || (REGNO (out) != REGNO (amount)
29420 && REGNO (out) + 1 != REGNO (amount)));
29421
29422 /* Macros to make following code more readable. */
29423 #define SUB_32(DEST,SRC) \
29424 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29425 #define RSB_32(DEST,SRC) \
29426 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29427 #define SUB_S_32(DEST,SRC) \
29428 gen_addsi3_compare0 ((DEST), (SRC), \
29429 GEN_INT (-32))
29430 #define SET(DEST,SRC) \
29431 gen_rtx_SET ((DEST), (SRC))
29432 #define SHIFT(CODE,SRC,AMOUNT) \
29433 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29434 #define LSHIFT(CODE,SRC,AMOUNT) \
29435 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29436 SImode, (SRC), (AMOUNT))
29437 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29438 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29439 SImode, (SRC), (AMOUNT))
29440 #define ORR(A,B) \
29441 gen_rtx_IOR (SImode, (A), (B))
29442 #define BRANCH(COND,LABEL) \
29443 gen_arm_cond_branch ((LABEL), \
29444 gen_rtx_ ## COND (CCmode, cc_reg, \
29445 const0_rtx), \
29446 cc_reg)
29447
29448 /* Shifts by register and shifts by constant are handled separately. */
29449 if (CONST_INT_P (amount))
29450 {
29451 /* We have a shift-by-constant. */
29452
29453 /* First, handle out-of-range shift amounts.
29454 In both cases we try to match the result an ARM instruction in a
29455 shift-by-register would give. This helps reduce execution
29456 differences between optimization levels, but it won't stop other
29457 parts of the compiler doing different things. This is "undefined
29458 behavior, in any case. */
29459 if (INTVAL (amount) <= 0)
29460 emit_insn (gen_movdi (out, in));
29461 else if (INTVAL (amount) >= 64)
29462 {
29463 if (code == ASHIFTRT)
29464 {
29465 rtx const31_rtx = GEN_INT (31);
29466 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29467 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29468 }
29469 else
29470 emit_insn (gen_movdi (out, const0_rtx));
29471 }
29472
29473 /* Now handle valid shifts. */
29474 else if (INTVAL (amount) < 32)
29475 {
29476 /* Shifts by a constant less than 32. */
29477 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29478
29479 /* Clearing the out register in DImode first avoids lots
29480 of spilling and results in less stack usage.
29481 Later this redundant insn is completely removed.
29482 Do that only if "in" and "out" are different registers. */
29483 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29484 emit_insn (SET (out, const0_rtx));
29485 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29486 emit_insn (SET (out_down,
29487 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29488 out_down)));
29489 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29490 }
29491 else
29492 {
29493 /* Shifts by a constant greater than 31. */
29494 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29495
29496 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29497 emit_insn (SET (out, const0_rtx));
29498 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29499 if (code == ASHIFTRT)
29500 emit_insn (gen_ashrsi3 (out_up, in_up,
29501 GEN_INT (31)));
29502 else
29503 emit_insn (SET (out_up, const0_rtx));
29504 }
29505 }
29506 else
29507 {
29508 /* We have a shift-by-register. */
29509 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29510
29511 /* This alternative requires the scratch registers. */
29512 gcc_assert (scratch1 && REG_P (scratch1));
29513 gcc_assert (scratch2 && REG_P (scratch2));
29514
29515 /* We will need the values "amount-32" and "32-amount" later.
29516 Swapping them around now allows the later code to be more general. */
29517 switch (code)
29518 {
29519 case ASHIFT:
29520 emit_insn (SUB_32 (scratch1, amount));
29521 emit_insn (RSB_32 (scratch2, amount));
29522 break;
29523 case ASHIFTRT:
29524 emit_insn (RSB_32 (scratch1, amount));
29525 /* Also set CC = amount > 32. */
29526 emit_insn (SUB_S_32 (scratch2, amount));
29527 break;
29528 case LSHIFTRT:
29529 emit_insn (RSB_32 (scratch1, amount));
29530 emit_insn (SUB_32 (scratch2, amount));
29531 break;
29532 default:
29533 gcc_unreachable ();
29534 }
29535
29536 /* Emit code like this:
29537
29538 arithmetic-left:
29539 out_down = in_down << amount;
29540 out_down = (in_up << (amount - 32)) | out_down;
29541 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29542 out_up = in_up << amount;
29543
29544 arithmetic-right:
29545 out_down = in_down >> amount;
29546 out_down = (in_up << (32 - amount)) | out_down;
29547 if (amount < 32)
29548 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29549 out_up = in_up << amount;
29550
29551 logical-right:
29552 out_down = in_down >> amount;
29553 out_down = (in_up << (32 - amount)) | out_down;
29554 if (amount < 32)
29555 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29556 out_up = in_up << amount;
29557
29558 The ARM and Thumb2 variants are the same but implemented slightly
29559 differently. If this were only called during expand we could just
29560 use the Thumb2 case and let combine do the right thing, but this
29561 can also be called from post-reload splitters. */
29562
29563 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29564
29565 if (!TARGET_THUMB2)
29566 {
29567 /* Emit code for ARM mode. */
29568 emit_insn (SET (out_down,
29569 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29570 if (code == ASHIFTRT)
29571 {
29572 rtx_code_label *done_label = gen_label_rtx ();
29573 emit_jump_insn (BRANCH (LT, done_label));
29574 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29575 out_down)));
29576 emit_label (done_label);
29577 }
29578 else
29579 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29580 out_down)));
29581 }
29582 else
29583 {
29584 /* Emit code for Thumb2 mode.
29585 Thumb2 can't do shift and or in one insn. */
29586 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29587 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29588
29589 if (code == ASHIFTRT)
29590 {
29591 rtx_code_label *done_label = gen_label_rtx ();
29592 emit_jump_insn (BRANCH (LT, done_label));
29593 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29594 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29595 emit_label (done_label);
29596 }
29597 else
29598 {
29599 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29600 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29601 }
29602 }
29603
29604 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29605 }
29606
29607 #undef SUB_32
29608 #undef RSB_32
29609 #undef SUB_S_32
29610 #undef SET
29611 #undef SHIFT
29612 #undef LSHIFT
29613 #undef REV_LSHIFT
29614 #undef ORR
29615 #undef BRANCH
29616 }
29617
29618 /* Returns true if the pattern is a valid symbolic address, which is either a
29619 symbol_ref or (symbol_ref + addend).
29620
29621 According to the ARM ELF ABI, the initial addend of REL-type relocations
29622 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29623 literal field of the instruction as a 16-bit signed value in the range
29624 -32768 <= A < 32768. */
29625
29626 bool
29627 arm_valid_symbolic_address_p (rtx addr)
29628 {
29629 rtx xop0, xop1 = NULL_RTX;
29630 rtx tmp = addr;
29631
29632 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29633 return true;
29634
29635 /* (const (plus: symbol_ref const_int)) */
29636 if (GET_CODE (addr) == CONST)
29637 tmp = XEXP (addr, 0);
29638
29639 if (GET_CODE (tmp) == PLUS)
29640 {
29641 xop0 = XEXP (tmp, 0);
29642 xop1 = XEXP (tmp, 1);
29643
29644 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29645 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29646 }
29647
29648 return false;
29649 }
29650
29651 /* Returns true if a valid comparison operation and makes
29652 the operands in a form that is valid. */
29653 bool
29654 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29655 {
29656 enum rtx_code code = GET_CODE (*comparison);
29657 int code_int;
29658 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29659 ? GET_MODE (*op2) : GET_MODE (*op1);
29660
29661 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29662
29663 if (code == UNEQ || code == LTGT)
29664 return false;
29665
29666 code_int = (int)code;
29667 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29668 PUT_CODE (*comparison, (enum rtx_code)code_int);
29669
29670 switch (mode)
29671 {
29672 case E_SImode:
29673 if (!arm_add_operand (*op1, mode))
29674 *op1 = force_reg (mode, *op1);
29675 if (!arm_add_operand (*op2, mode))
29676 *op2 = force_reg (mode, *op2);
29677 return true;
29678
29679 case E_DImode:
29680 if (!cmpdi_operand (*op1, mode))
29681 *op1 = force_reg (mode, *op1);
29682 if (!cmpdi_operand (*op2, mode))
29683 *op2 = force_reg (mode, *op2);
29684 return true;
29685
29686 case E_HFmode:
29687 if (!TARGET_VFP_FP16INST)
29688 break;
29689 /* FP16 comparisons are done in SF mode. */
29690 mode = SFmode;
29691 *op1 = convert_to_mode (mode, *op1, 1);
29692 *op2 = convert_to_mode (mode, *op2, 1);
29693 /* Fall through. */
29694 case E_SFmode:
29695 case E_DFmode:
29696 if (!vfp_compare_operand (*op1, mode))
29697 *op1 = force_reg (mode, *op1);
29698 if (!vfp_compare_operand (*op2, mode))
29699 *op2 = force_reg (mode, *op2);
29700 return true;
29701 default:
29702 break;
29703 }
29704
29705 return false;
29706
29707 }
29708
29709 /* Maximum number of instructions to set block of memory. */
29710 static int
29711 arm_block_set_max_insns (void)
29712 {
29713 if (optimize_function_for_size_p (cfun))
29714 return 4;
29715 else
29716 return current_tune->max_insns_inline_memset;
29717 }
29718
29719 /* Return TRUE if it's profitable to set block of memory for
29720 non-vectorized case. VAL is the value to set the memory
29721 with. LENGTH is the number of bytes to set. ALIGN is the
29722 alignment of the destination memory in bytes. UNALIGNED_P
29723 is TRUE if we can only set the memory with instructions
29724 meeting alignment requirements. USE_STRD_P is TRUE if we
29725 can use strd to set the memory. */
29726 static bool
29727 arm_block_set_non_vect_profit_p (rtx val,
29728 unsigned HOST_WIDE_INT length,
29729 unsigned HOST_WIDE_INT align,
29730 bool unaligned_p, bool use_strd_p)
29731 {
29732 int num = 0;
29733 /* For leftovers in bytes of 0-7, we can set the memory block using
29734 strb/strh/str with minimum instruction number. */
29735 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29736
29737 if (unaligned_p)
29738 {
29739 num = arm_const_inline_cost (SET, val);
29740 num += length / align + length % align;
29741 }
29742 else if (use_strd_p)
29743 {
29744 num = arm_const_double_inline_cost (val);
29745 num += (length >> 3) + leftover[length & 7];
29746 }
29747 else
29748 {
29749 num = arm_const_inline_cost (SET, val);
29750 num += (length >> 2) + leftover[length & 3];
29751 }
29752
29753 /* We may be able to combine last pair STRH/STRB into a single STR
29754 by shifting one byte back. */
29755 if (unaligned_access && length > 3 && (length & 3) == 3)
29756 num--;
29757
29758 return (num <= arm_block_set_max_insns ());
29759 }
29760
29761 /* Return TRUE if it's profitable to set block of memory for
29762 vectorized case. LENGTH is the number of bytes to set.
29763 ALIGN is the alignment of destination memory in bytes.
29764 MODE is the vector mode used to set the memory. */
29765 static bool
29766 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29767 unsigned HOST_WIDE_INT align,
29768 machine_mode mode)
29769 {
29770 int num;
29771 bool unaligned_p = ((align & 3) != 0);
29772 unsigned int nelt = GET_MODE_NUNITS (mode);
29773
29774 /* Instruction loading constant value. */
29775 num = 1;
29776 /* Instructions storing the memory. */
29777 num += (length + nelt - 1) / nelt;
29778 /* Instructions adjusting the address expression. Only need to
29779 adjust address expression if it's 4 bytes aligned and bytes
29780 leftover can only be stored by mis-aligned store instruction. */
29781 if (!unaligned_p && (length & 3) != 0)
29782 num++;
29783
29784 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29785 if (!unaligned_p && mode == V16QImode)
29786 num--;
29787
29788 return (num <= arm_block_set_max_insns ());
29789 }
29790
29791 /* Set a block of memory using vectorization instructions for the
29792 unaligned case. We fill the first LENGTH bytes of the memory
29793 area starting from DSTBASE with byte constant VALUE. ALIGN is
29794 the alignment requirement of memory. Return TRUE if succeeded. */
29795 static bool
29796 arm_block_set_unaligned_vect (rtx dstbase,
29797 unsigned HOST_WIDE_INT length,
29798 unsigned HOST_WIDE_INT value,
29799 unsigned HOST_WIDE_INT align)
29800 {
29801 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29802 rtx dst, mem;
29803 rtx val_elt, val_vec, reg;
29804 rtx rval[MAX_VECT_LEN];
29805 rtx (*gen_func) (rtx, rtx);
29806 machine_mode mode;
29807 unsigned HOST_WIDE_INT v = value;
29808 unsigned int offset = 0;
29809 gcc_assert ((align & 0x3) != 0);
29810 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29811 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29812 if (length >= nelt_v16)
29813 {
29814 mode = V16QImode;
29815 gen_func = gen_movmisalignv16qi;
29816 }
29817 else
29818 {
29819 mode = V8QImode;
29820 gen_func = gen_movmisalignv8qi;
29821 }
29822 nelt_mode = GET_MODE_NUNITS (mode);
29823 gcc_assert (length >= nelt_mode);
29824 /* Skip if it isn't profitable. */
29825 if (!arm_block_set_vect_profit_p (length, align, mode))
29826 return false;
29827
29828 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29829 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29830
29831 v = sext_hwi (v, BITS_PER_WORD);
29832 val_elt = GEN_INT (v);
29833 for (j = 0; j < nelt_mode; j++)
29834 rval[j] = val_elt;
29835
29836 reg = gen_reg_rtx (mode);
29837 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29838 /* Emit instruction loading the constant value. */
29839 emit_move_insn (reg, val_vec);
29840
29841 /* Handle nelt_mode bytes in a vector. */
29842 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29843 {
29844 emit_insn ((*gen_func) (mem, reg));
29845 if (i + 2 * nelt_mode <= length)
29846 {
29847 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29848 offset += nelt_mode;
29849 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29850 }
29851 }
29852
29853 /* If there are not less than nelt_v8 bytes leftover, we must be in
29854 V16QI mode. */
29855 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29856
29857 /* Handle (8, 16) bytes leftover. */
29858 if (i + nelt_v8 < length)
29859 {
29860 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29861 offset += length - i;
29862 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29863
29864 /* We are shifting bytes back, set the alignment accordingly. */
29865 if ((length & 1) != 0 && align >= 2)
29866 set_mem_align (mem, BITS_PER_UNIT);
29867
29868 emit_insn (gen_movmisalignv16qi (mem, reg));
29869 }
29870 /* Handle (0, 8] bytes leftover. */
29871 else if (i < length && i + nelt_v8 >= length)
29872 {
29873 if (mode == V16QImode)
29874 reg = gen_lowpart (V8QImode, reg);
29875
29876 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29877 + (nelt_mode - nelt_v8))));
29878 offset += (length - i) + (nelt_mode - nelt_v8);
29879 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29880
29881 /* We are shifting bytes back, set the alignment accordingly. */
29882 if ((length & 1) != 0 && align >= 2)
29883 set_mem_align (mem, BITS_PER_UNIT);
29884
29885 emit_insn (gen_movmisalignv8qi (mem, reg));
29886 }
29887
29888 return true;
29889 }
29890
29891 /* Set a block of memory using vectorization instructions for the
29892 aligned case. We fill the first LENGTH bytes of the memory area
29893 starting from DSTBASE with byte constant VALUE. ALIGN is the
29894 alignment requirement of memory. Return TRUE if succeeded. */
29895 static bool
29896 arm_block_set_aligned_vect (rtx dstbase,
29897 unsigned HOST_WIDE_INT length,
29898 unsigned HOST_WIDE_INT value,
29899 unsigned HOST_WIDE_INT align)
29900 {
29901 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29902 rtx dst, addr, mem;
29903 rtx val_elt, val_vec, reg;
29904 rtx rval[MAX_VECT_LEN];
29905 machine_mode mode;
29906 unsigned HOST_WIDE_INT v = value;
29907 unsigned int offset = 0;
29908
29909 gcc_assert ((align & 0x3) == 0);
29910 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29911 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29912 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29913 mode = V16QImode;
29914 else
29915 mode = V8QImode;
29916
29917 nelt_mode = GET_MODE_NUNITS (mode);
29918 gcc_assert (length >= nelt_mode);
29919 /* Skip if it isn't profitable. */
29920 if (!arm_block_set_vect_profit_p (length, align, mode))
29921 return false;
29922
29923 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29924
29925 v = sext_hwi (v, BITS_PER_WORD);
29926 val_elt = GEN_INT (v);
29927 for (j = 0; j < nelt_mode; j++)
29928 rval[j] = val_elt;
29929
29930 reg = gen_reg_rtx (mode);
29931 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29932 /* Emit instruction loading the constant value. */
29933 emit_move_insn (reg, val_vec);
29934
29935 i = 0;
29936 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29937 if (mode == V16QImode)
29938 {
29939 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29940 emit_insn (gen_movmisalignv16qi (mem, reg));
29941 i += nelt_mode;
29942 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29943 if (i + nelt_v8 < length && i + nelt_v16 > length)
29944 {
29945 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29946 offset += length - nelt_mode;
29947 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29948 /* We are shifting bytes back, set the alignment accordingly. */
29949 if ((length & 0x3) == 0)
29950 set_mem_align (mem, BITS_PER_UNIT * 4);
29951 else if ((length & 0x1) == 0)
29952 set_mem_align (mem, BITS_PER_UNIT * 2);
29953 else
29954 set_mem_align (mem, BITS_PER_UNIT);
29955
29956 emit_insn (gen_movmisalignv16qi (mem, reg));
29957 return true;
29958 }
29959 /* Fall through for bytes leftover. */
29960 mode = V8QImode;
29961 nelt_mode = GET_MODE_NUNITS (mode);
29962 reg = gen_lowpart (V8QImode, reg);
29963 }
29964
29965 /* Handle 8 bytes in a vector. */
29966 for (; (i + nelt_mode <= length); i += nelt_mode)
29967 {
29968 addr = plus_constant (Pmode, dst, i);
29969 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29970 emit_move_insn (mem, reg);
29971 }
29972
29973 /* Handle single word leftover by shifting 4 bytes back. We can
29974 use aligned access for this case. */
29975 if (i + UNITS_PER_WORD == length)
29976 {
29977 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29978 offset += i - UNITS_PER_WORD;
29979 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29980 /* We are shifting 4 bytes back, set the alignment accordingly. */
29981 if (align > UNITS_PER_WORD)
29982 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29983
29984 emit_move_insn (mem, reg);
29985 }
29986 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29987 We have to use unaligned access for this case. */
29988 else if (i < length)
29989 {
29990 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29991 offset += length - nelt_mode;
29992 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29993 /* We are shifting bytes back, set the alignment accordingly. */
29994 if ((length & 1) == 0)
29995 set_mem_align (mem, BITS_PER_UNIT * 2);
29996 else
29997 set_mem_align (mem, BITS_PER_UNIT);
29998
29999 emit_insn (gen_movmisalignv8qi (mem, reg));
30000 }
30001
30002 return true;
30003 }
30004
30005 /* Set a block of memory using plain strh/strb instructions, only
30006 using instructions allowed by ALIGN on processor. We fill the
30007 first LENGTH bytes of the memory area starting from DSTBASE
30008 with byte constant VALUE. ALIGN is the alignment requirement
30009 of memory. */
30010 static bool
30011 arm_block_set_unaligned_non_vect (rtx dstbase,
30012 unsigned HOST_WIDE_INT length,
30013 unsigned HOST_WIDE_INT value,
30014 unsigned HOST_WIDE_INT align)
30015 {
30016 unsigned int i;
30017 rtx dst, addr, mem;
30018 rtx val_exp, val_reg, reg;
30019 machine_mode mode;
30020 HOST_WIDE_INT v = value;
30021
30022 gcc_assert (align == 1 || align == 2);
30023
30024 if (align == 2)
30025 v |= (value << BITS_PER_UNIT);
30026
30027 v = sext_hwi (v, BITS_PER_WORD);
30028 val_exp = GEN_INT (v);
30029 /* Skip if it isn't profitable. */
30030 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30031 align, true, false))
30032 return false;
30033
30034 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30035 mode = (align == 2 ? HImode : QImode);
30036 val_reg = force_reg (SImode, val_exp);
30037 reg = gen_lowpart (mode, val_reg);
30038
30039 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30040 {
30041 addr = plus_constant (Pmode, dst, i);
30042 mem = adjust_automodify_address (dstbase, mode, addr, i);
30043 emit_move_insn (mem, reg);
30044 }
30045
30046 /* Handle single byte leftover. */
30047 if (i + 1 == length)
30048 {
30049 reg = gen_lowpart (QImode, val_reg);
30050 addr = plus_constant (Pmode, dst, i);
30051 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30052 emit_move_insn (mem, reg);
30053 i++;
30054 }
30055
30056 gcc_assert (i == length);
30057 return true;
30058 }
30059
30060 /* Set a block of memory using plain strd/str/strh/strb instructions,
30061 to permit unaligned copies on processors which support unaligned
30062 semantics for those instructions. We fill the first LENGTH bytes
30063 of the memory area starting from DSTBASE with byte constant VALUE.
30064 ALIGN is the alignment requirement of memory. */
30065 static bool
30066 arm_block_set_aligned_non_vect (rtx dstbase,
30067 unsigned HOST_WIDE_INT length,
30068 unsigned HOST_WIDE_INT value,
30069 unsigned HOST_WIDE_INT align)
30070 {
30071 unsigned int i;
30072 rtx dst, addr, mem;
30073 rtx val_exp, val_reg, reg;
30074 unsigned HOST_WIDE_INT v;
30075 bool use_strd_p;
30076
30077 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30078 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30079
30080 v = (value | (value << 8) | (value << 16) | (value << 24));
30081 if (length < UNITS_PER_WORD)
30082 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30083
30084 if (use_strd_p)
30085 v |= (v << BITS_PER_WORD);
30086 else
30087 v = sext_hwi (v, BITS_PER_WORD);
30088
30089 val_exp = GEN_INT (v);
30090 /* Skip if it isn't profitable. */
30091 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30092 align, false, use_strd_p))
30093 {
30094 if (!use_strd_p)
30095 return false;
30096
30097 /* Try without strd. */
30098 v = (v >> BITS_PER_WORD);
30099 v = sext_hwi (v, BITS_PER_WORD);
30100 val_exp = GEN_INT (v);
30101 use_strd_p = false;
30102 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30103 align, false, use_strd_p))
30104 return false;
30105 }
30106
30107 i = 0;
30108 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30109 /* Handle double words using strd if possible. */
30110 if (use_strd_p)
30111 {
30112 val_reg = force_reg (DImode, val_exp);
30113 reg = val_reg;
30114 for (; (i + 8 <= length); i += 8)
30115 {
30116 addr = plus_constant (Pmode, dst, i);
30117 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30118 emit_move_insn (mem, reg);
30119 }
30120 }
30121 else
30122 val_reg = force_reg (SImode, val_exp);
30123
30124 /* Handle words. */
30125 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30126 for (; (i + 4 <= length); i += 4)
30127 {
30128 addr = plus_constant (Pmode, dst, i);
30129 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30130 if ((align & 3) == 0)
30131 emit_move_insn (mem, reg);
30132 else
30133 emit_insn (gen_unaligned_storesi (mem, reg));
30134 }
30135
30136 /* Merge last pair of STRH and STRB into a STR if possible. */
30137 if (unaligned_access && i > 0 && (i + 3) == length)
30138 {
30139 addr = plus_constant (Pmode, dst, i - 1);
30140 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30141 /* We are shifting one byte back, set the alignment accordingly. */
30142 if ((align & 1) == 0)
30143 set_mem_align (mem, BITS_PER_UNIT);
30144
30145 /* Most likely this is an unaligned access, and we can't tell at
30146 compilation time. */
30147 emit_insn (gen_unaligned_storesi (mem, reg));
30148 return true;
30149 }
30150
30151 /* Handle half word leftover. */
30152 if (i + 2 <= length)
30153 {
30154 reg = gen_lowpart (HImode, val_reg);
30155 addr = plus_constant (Pmode, dst, i);
30156 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30157 if ((align & 1) == 0)
30158 emit_move_insn (mem, reg);
30159 else
30160 emit_insn (gen_unaligned_storehi (mem, reg));
30161
30162 i += 2;
30163 }
30164
30165 /* Handle single byte leftover. */
30166 if (i + 1 == length)
30167 {
30168 reg = gen_lowpart (QImode, val_reg);
30169 addr = plus_constant (Pmode, dst, i);
30170 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30171 emit_move_insn (mem, reg);
30172 }
30173
30174 return true;
30175 }
30176
30177 /* Set a block of memory using vectorization instructions for both
30178 aligned and unaligned cases. We fill the first LENGTH bytes of
30179 the memory area starting from DSTBASE with byte constant VALUE.
30180 ALIGN is the alignment requirement of memory. */
30181 static bool
30182 arm_block_set_vect (rtx dstbase,
30183 unsigned HOST_WIDE_INT length,
30184 unsigned HOST_WIDE_INT value,
30185 unsigned HOST_WIDE_INT align)
30186 {
30187 /* Check whether we need to use unaligned store instruction. */
30188 if (((align & 3) != 0 || (length & 3) != 0)
30189 /* Check whether unaligned store instruction is available. */
30190 && (!unaligned_access || BYTES_BIG_ENDIAN))
30191 return false;
30192
30193 if ((align & 3) == 0)
30194 return arm_block_set_aligned_vect (dstbase, length, value, align);
30195 else
30196 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30197 }
30198
30199 /* Expand string store operation. Firstly we try to do that by using
30200 vectorization instructions, then try with ARM unaligned access and
30201 double-word store if profitable. OPERANDS[0] is the destination,
30202 OPERANDS[1] is the number of bytes, operands[2] is the value to
30203 initialize the memory, OPERANDS[3] is the known alignment of the
30204 destination. */
30205 bool
30206 arm_gen_setmem (rtx *operands)
30207 {
30208 rtx dstbase = operands[0];
30209 unsigned HOST_WIDE_INT length;
30210 unsigned HOST_WIDE_INT value;
30211 unsigned HOST_WIDE_INT align;
30212
30213 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30214 return false;
30215
30216 length = UINTVAL (operands[1]);
30217 if (length > 64)
30218 return false;
30219
30220 value = (UINTVAL (operands[2]) & 0xFF);
30221 align = UINTVAL (operands[3]);
30222 if (TARGET_NEON && length >= 8
30223 && current_tune->string_ops_prefer_neon
30224 && arm_block_set_vect (dstbase, length, value, align))
30225 return true;
30226
30227 if (!unaligned_access && (align & 3) != 0)
30228 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30229
30230 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30231 }
30232
30233
30234 static bool
30235 arm_macro_fusion_p (void)
30236 {
30237 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30238 }
30239
30240 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30241 for MOVW / MOVT macro fusion. */
30242
30243 static bool
30244 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30245 {
30246 /* We are trying to fuse
30247 movw imm / movt imm
30248 instructions as a group that gets scheduled together. */
30249
30250 rtx set_dest = SET_DEST (curr_set);
30251
30252 if (GET_MODE (set_dest) != SImode)
30253 return false;
30254
30255 /* We are trying to match:
30256 prev (movw) == (set (reg r0) (const_int imm16))
30257 curr (movt) == (set (zero_extract (reg r0)
30258 (const_int 16)
30259 (const_int 16))
30260 (const_int imm16_1))
30261 or
30262 prev (movw) == (set (reg r1)
30263 (high (symbol_ref ("SYM"))))
30264 curr (movt) == (set (reg r0)
30265 (lo_sum (reg r1)
30266 (symbol_ref ("SYM")))) */
30267
30268 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30269 {
30270 if (CONST_INT_P (SET_SRC (curr_set))
30271 && CONST_INT_P (SET_SRC (prev_set))
30272 && REG_P (XEXP (set_dest, 0))
30273 && REG_P (SET_DEST (prev_set))
30274 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30275 return true;
30276
30277 }
30278 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30279 && REG_P (SET_DEST (curr_set))
30280 && REG_P (SET_DEST (prev_set))
30281 && GET_CODE (SET_SRC (prev_set)) == HIGH
30282 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30283 return true;
30284
30285 return false;
30286 }
30287
30288 static bool
30289 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30290 {
30291 rtx prev_set = single_set (prev);
30292 rtx curr_set = single_set (curr);
30293
30294 if (!prev_set
30295 || !curr_set)
30296 return false;
30297
30298 if (any_condjump_p (curr))
30299 return false;
30300
30301 if (!arm_macro_fusion_p ())
30302 return false;
30303
30304 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30305 && aarch_crypto_can_dual_issue (prev, curr))
30306 return true;
30307
30308 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30309 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30310 return true;
30311
30312 return false;
30313 }
30314
30315 /* Return true iff the instruction fusion described by OP is enabled. */
30316 bool
30317 arm_fusion_enabled_p (tune_params::fuse_ops op)
30318 {
30319 return current_tune->fusible_ops & op;
30320 }
30321
30322 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30323 scheduled for speculative execution. Reject the long-running division
30324 and square-root instructions. */
30325
30326 static bool
30327 arm_sched_can_speculate_insn (rtx_insn *insn)
30328 {
30329 switch (get_attr_type (insn))
30330 {
30331 case TYPE_SDIV:
30332 case TYPE_UDIV:
30333 case TYPE_FDIVS:
30334 case TYPE_FDIVD:
30335 case TYPE_FSQRTS:
30336 case TYPE_FSQRTD:
30337 case TYPE_NEON_FP_SQRT_S:
30338 case TYPE_NEON_FP_SQRT_D:
30339 case TYPE_NEON_FP_SQRT_S_Q:
30340 case TYPE_NEON_FP_SQRT_D_Q:
30341 case TYPE_NEON_FP_DIV_S:
30342 case TYPE_NEON_FP_DIV_D:
30343 case TYPE_NEON_FP_DIV_S_Q:
30344 case TYPE_NEON_FP_DIV_D_Q:
30345 return false;
30346 default:
30347 return true;
30348 }
30349 }
30350
30351 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30352
30353 static unsigned HOST_WIDE_INT
30354 arm_asan_shadow_offset (void)
30355 {
30356 return HOST_WIDE_INT_1U << 29;
30357 }
30358
30359
30360 /* This is a temporary fix for PR60655. Ideally we need
30361 to handle most of these cases in the generic part but
30362 currently we reject minus (..) (sym_ref). We try to
30363 ameliorate the case with minus (sym_ref1) (sym_ref2)
30364 where they are in the same section. */
30365
30366 static bool
30367 arm_const_not_ok_for_debug_p (rtx p)
30368 {
30369 tree decl_op0 = NULL;
30370 tree decl_op1 = NULL;
30371
30372 if (GET_CODE (p) == MINUS)
30373 {
30374 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30375 {
30376 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30377 if (decl_op1
30378 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30379 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30380 {
30381 if ((VAR_P (decl_op1)
30382 || TREE_CODE (decl_op1) == CONST_DECL)
30383 && (VAR_P (decl_op0)
30384 || TREE_CODE (decl_op0) == CONST_DECL))
30385 return (get_variable_section (decl_op1, false)
30386 != get_variable_section (decl_op0, false));
30387
30388 if (TREE_CODE (decl_op1) == LABEL_DECL
30389 && TREE_CODE (decl_op0) == LABEL_DECL)
30390 return (DECL_CONTEXT (decl_op1)
30391 != DECL_CONTEXT (decl_op0));
30392 }
30393
30394 return true;
30395 }
30396 }
30397
30398 return false;
30399 }
30400
30401 /* return TRUE if x is a reference to a value in a constant pool */
30402 extern bool
30403 arm_is_constant_pool_ref (rtx x)
30404 {
30405 return (MEM_P (x)
30406 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30407 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30408 }
30409
30410 /* Remember the last target of arm_set_current_function. */
30411 static GTY(()) tree arm_previous_fndecl;
30412
30413 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30414
30415 void
30416 save_restore_target_globals (tree new_tree)
30417 {
30418 /* If we have a previous state, use it. */
30419 if (TREE_TARGET_GLOBALS (new_tree))
30420 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30421 else if (new_tree == target_option_default_node)
30422 restore_target_globals (&default_target_globals);
30423 else
30424 {
30425 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30426 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30427 }
30428
30429 arm_option_params_internal ();
30430 }
30431
30432 /* Invalidate arm_previous_fndecl. */
30433
30434 void
30435 arm_reset_previous_fndecl (void)
30436 {
30437 arm_previous_fndecl = NULL_TREE;
30438 }
30439
30440 /* Establish appropriate back-end context for processing the function
30441 FNDECL. The argument might be NULL to indicate processing at top
30442 level, outside of any function scope. */
30443
30444 static void
30445 arm_set_current_function (tree fndecl)
30446 {
30447 if (!fndecl || fndecl == arm_previous_fndecl)
30448 return;
30449
30450 tree old_tree = (arm_previous_fndecl
30451 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30452 : NULL_TREE);
30453
30454 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30455
30456 /* If current function has no attributes but previous one did,
30457 use the default node. */
30458 if (! new_tree && old_tree)
30459 new_tree = target_option_default_node;
30460
30461 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30462 the default have been handled by save_restore_target_globals from
30463 arm_pragma_target_parse. */
30464 if (old_tree == new_tree)
30465 return;
30466
30467 arm_previous_fndecl = fndecl;
30468
30469 /* First set the target options. */
30470 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30471
30472 save_restore_target_globals (new_tree);
30473 }
30474
30475 /* Implement TARGET_OPTION_PRINT. */
30476
30477 static void
30478 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30479 {
30480 int flags = ptr->x_target_flags;
30481 const char *fpu_name;
30482
30483 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30484 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30485
30486 fprintf (file, "%*sselected isa %s\n", indent, "",
30487 TARGET_THUMB2_P (flags) ? "thumb2" :
30488 TARGET_THUMB_P (flags) ? "thumb1" :
30489 "arm");
30490
30491 if (ptr->x_arm_arch_string)
30492 fprintf (file, "%*sselected architecture %s\n", indent, "",
30493 ptr->x_arm_arch_string);
30494
30495 if (ptr->x_arm_cpu_string)
30496 fprintf (file, "%*sselected CPU %s\n", indent, "",
30497 ptr->x_arm_cpu_string);
30498
30499 if (ptr->x_arm_tune_string)
30500 fprintf (file, "%*sselected tune %s\n", indent, "",
30501 ptr->x_arm_tune_string);
30502
30503 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30504 }
30505
30506 /* Hook to determine if one function can safely inline another. */
30507
30508 static bool
30509 arm_can_inline_p (tree caller, tree callee)
30510 {
30511 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30512 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30513 bool can_inline = true;
30514
30515 struct cl_target_option *caller_opts
30516 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30517 : target_option_default_node);
30518
30519 struct cl_target_option *callee_opts
30520 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30521 : target_option_default_node);
30522
30523 if (callee_opts == caller_opts)
30524 return true;
30525
30526 /* Callee's ISA features should be a subset of the caller's. */
30527 struct arm_build_target caller_target;
30528 struct arm_build_target callee_target;
30529 caller_target.isa = sbitmap_alloc (isa_num_bits);
30530 callee_target.isa = sbitmap_alloc (isa_num_bits);
30531
30532 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30533 false);
30534 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30535 false);
30536 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30537 can_inline = false;
30538
30539 sbitmap_free (caller_target.isa);
30540 sbitmap_free (callee_target.isa);
30541
30542 /* OK to inline between different modes.
30543 Function with mode specific instructions, e.g using asm,
30544 must be explicitly protected with noinline. */
30545 return can_inline;
30546 }
30547
30548 /* Hook to fix function's alignment affected by target attribute. */
30549
30550 static void
30551 arm_relayout_function (tree fndecl)
30552 {
30553 if (DECL_USER_ALIGN (fndecl))
30554 return;
30555
30556 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30557
30558 if (!callee_tree)
30559 callee_tree = target_option_default_node;
30560
30561 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30562 SET_DECL_ALIGN
30563 (fndecl,
30564 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30565 }
30566
30567 /* Inner function to process the attribute((target(...))), take an argument and
30568 set the current options from the argument. If we have a list, recursively
30569 go over the list. */
30570
30571 static bool
30572 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30573 {
30574 if (TREE_CODE (args) == TREE_LIST)
30575 {
30576 bool ret = true;
30577
30578 for (; args; args = TREE_CHAIN (args))
30579 if (TREE_VALUE (args)
30580 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30581 ret = false;
30582 return ret;
30583 }
30584
30585 else if (TREE_CODE (args) != STRING_CST)
30586 {
30587 error ("attribute %<target%> argument not a string");
30588 return false;
30589 }
30590
30591 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30592 char *q;
30593
30594 while ((q = strtok (argstr, ",")) != NULL)
30595 {
30596 while (ISSPACE (*q)) ++q;
30597
30598 argstr = NULL;
30599 if (!strncmp (q, "thumb", 5))
30600 opts->x_target_flags |= MASK_THUMB;
30601
30602 else if (!strncmp (q, "arm", 3))
30603 opts->x_target_flags &= ~MASK_THUMB;
30604
30605 else if (!strncmp (q, "fpu=", 4))
30606 {
30607 int fpu_index;
30608 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30609 &fpu_index, CL_TARGET))
30610 {
30611 error ("invalid fpu for attribute(target(\"%s\"))", q);
30612 return false;
30613 }
30614 if (fpu_index == TARGET_FPU_auto)
30615 {
30616 /* This doesn't really make sense until we support
30617 general dynamic selection of the architecture and all
30618 sub-features. */
30619 sorry ("auto fpu selection not currently permitted here");
30620 return false;
30621 }
30622 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30623 }
30624 else
30625 {
30626 error ("attribute(target(\"%s\")) is unknown", q);
30627 return false;
30628 }
30629 }
30630
30631 return true;
30632 }
30633
30634 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30635
30636 tree
30637 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30638 struct gcc_options *opts_set)
30639 {
30640 struct cl_target_option cl_opts;
30641
30642 if (!arm_valid_target_attribute_rec (args, opts))
30643 return NULL_TREE;
30644
30645 cl_target_option_save (&cl_opts, opts);
30646 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30647 arm_option_check_internal (opts);
30648 /* Do any overrides, such as global options arch=xxx. */
30649 arm_option_override_internal (opts, opts_set);
30650
30651 return build_target_option_node (opts);
30652 }
30653
30654 static void
30655 add_attribute (const char * mode, tree *attributes)
30656 {
30657 size_t len = strlen (mode);
30658 tree value = build_string (len, mode);
30659
30660 TREE_TYPE (value) = build_array_type (char_type_node,
30661 build_index_type (size_int (len)));
30662
30663 *attributes = tree_cons (get_identifier ("target"),
30664 build_tree_list (NULL_TREE, value),
30665 *attributes);
30666 }
30667
30668 /* For testing. Insert thumb or arm modes alternatively on functions. */
30669
30670 static void
30671 arm_insert_attributes (tree fndecl, tree * attributes)
30672 {
30673 const char *mode;
30674
30675 if (! TARGET_FLIP_THUMB)
30676 return;
30677
30678 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30679 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30680 return;
30681
30682 /* Nested definitions must inherit mode. */
30683 if (current_function_decl)
30684 {
30685 mode = TARGET_THUMB ? "thumb" : "arm";
30686 add_attribute (mode, attributes);
30687 return;
30688 }
30689
30690 /* If there is already a setting don't change it. */
30691 if (lookup_attribute ("target", *attributes) != NULL)
30692 return;
30693
30694 mode = thumb_flipper ? "thumb" : "arm";
30695 add_attribute (mode, attributes);
30696
30697 thumb_flipper = !thumb_flipper;
30698 }
30699
30700 /* Hook to validate attribute((target("string"))). */
30701
30702 static bool
30703 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30704 tree args, int ARG_UNUSED (flags))
30705 {
30706 bool ret = true;
30707 struct gcc_options func_options;
30708 tree cur_tree, new_optimize;
30709 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30710
30711 /* Get the optimization options of the current function. */
30712 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30713
30714 /* If the function changed the optimization levels as well as setting target
30715 options, start with the optimizations specified. */
30716 if (!func_optimize)
30717 func_optimize = optimization_default_node;
30718
30719 /* Init func_options. */
30720 memset (&func_options, 0, sizeof (func_options));
30721 init_options_struct (&func_options, NULL);
30722 lang_hooks.init_options_struct (&func_options);
30723
30724 /* Initialize func_options to the defaults. */
30725 cl_optimization_restore (&func_options,
30726 TREE_OPTIMIZATION (func_optimize));
30727
30728 cl_target_option_restore (&func_options,
30729 TREE_TARGET_OPTION (target_option_default_node));
30730
30731 /* Set func_options flags with new target mode. */
30732 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30733 &global_options_set);
30734
30735 if (cur_tree == NULL_TREE)
30736 ret = false;
30737
30738 new_optimize = build_optimization_node (&func_options);
30739
30740 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30741
30742 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30743
30744 finalize_options_struct (&func_options);
30745
30746 return ret;
30747 }
30748
30749 /* Match an ISA feature bitmap to a named FPU. We always use the
30750 first entry that exactly matches the feature set, so that we
30751 effectively canonicalize the FPU name for the assembler. */
30752 static const char*
30753 arm_identify_fpu_from_isa (sbitmap isa)
30754 {
30755 auto_sbitmap fpubits (isa_num_bits);
30756 auto_sbitmap cand_fpubits (isa_num_bits);
30757
30758 bitmap_and (fpubits, isa, isa_all_fpubits);
30759
30760 /* If there are no ISA feature bits relating to the FPU, we must be
30761 doing soft-float. */
30762 if (bitmap_empty_p (fpubits))
30763 return "softvfp";
30764
30765 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30766 {
30767 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30768 if (bitmap_equal_p (fpubits, cand_fpubits))
30769 return all_fpus[i].name;
30770 }
30771 /* We must find an entry, or things have gone wrong. */
30772 gcc_unreachable ();
30773 }
30774
30775 void
30776 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30777 {
30778
30779 fprintf (stream, "\t.syntax unified\n");
30780
30781 if (TARGET_THUMB)
30782 {
30783 if (is_called_in_ARM_mode (decl)
30784 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30785 && cfun->is_thunk))
30786 fprintf (stream, "\t.code 32\n");
30787 else if (TARGET_THUMB1)
30788 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30789 else
30790 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30791 }
30792 else
30793 fprintf (stream, "\t.arm\n");
30794
30795 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30796 (TARGET_SOFT_FLOAT
30797 ? "softvfp"
30798 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30799
30800 if (TARGET_POKE_FUNCTION_NAME)
30801 arm_poke_function_name (stream, (const char *) name);
30802 }
30803
30804 /* If MEM is in the form of [base+offset], extract the two parts
30805 of address and set to BASE and OFFSET, otherwise return false
30806 after clearing BASE and OFFSET. */
30807
30808 static bool
30809 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30810 {
30811 rtx addr;
30812
30813 gcc_assert (MEM_P (mem));
30814
30815 addr = XEXP (mem, 0);
30816
30817 /* Strip off const from addresses like (const (addr)). */
30818 if (GET_CODE (addr) == CONST)
30819 addr = XEXP (addr, 0);
30820
30821 if (GET_CODE (addr) == REG)
30822 {
30823 *base = addr;
30824 *offset = const0_rtx;
30825 return true;
30826 }
30827
30828 if (GET_CODE (addr) == PLUS
30829 && GET_CODE (XEXP (addr, 0)) == REG
30830 && CONST_INT_P (XEXP (addr, 1)))
30831 {
30832 *base = XEXP (addr, 0);
30833 *offset = XEXP (addr, 1);
30834 return true;
30835 }
30836
30837 *base = NULL_RTX;
30838 *offset = NULL_RTX;
30839
30840 return false;
30841 }
30842
30843 /* If INSN is a load or store of address in the form of [base+offset],
30844 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30845 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30846 otherwise return FALSE. */
30847
30848 static bool
30849 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30850 {
30851 rtx x, dest, src;
30852
30853 gcc_assert (INSN_P (insn));
30854 x = PATTERN (insn);
30855 if (GET_CODE (x) != SET)
30856 return false;
30857
30858 src = SET_SRC (x);
30859 dest = SET_DEST (x);
30860 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30861 {
30862 *is_load = false;
30863 extract_base_offset_in_addr (dest, base, offset);
30864 }
30865 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30866 {
30867 *is_load = true;
30868 extract_base_offset_in_addr (src, base, offset);
30869 }
30870 else
30871 return false;
30872
30873 return (*base != NULL_RTX && *offset != NULL_RTX);
30874 }
30875
30876 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30877
30878 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30879 and PRI are only calculated for these instructions. For other instruction,
30880 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30881 instruction fusion can be supported by returning different priorities.
30882
30883 It's important that irrelevant instructions get the largest FUSION_PRI. */
30884
30885 static void
30886 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30887 int *fusion_pri, int *pri)
30888 {
30889 int tmp, off_val;
30890 bool is_load;
30891 rtx base, offset;
30892
30893 gcc_assert (INSN_P (insn));
30894
30895 tmp = max_pri - 1;
30896 if (!fusion_load_store (insn, &base, &offset, &is_load))
30897 {
30898 *pri = tmp;
30899 *fusion_pri = tmp;
30900 return;
30901 }
30902
30903 /* Load goes first. */
30904 if (is_load)
30905 *fusion_pri = tmp - 1;
30906 else
30907 *fusion_pri = tmp - 2;
30908
30909 tmp /= 2;
30910
30911 /* INSN with smaller base register goes first. */
30912 tmp -= ((REGNO (base) & 0xff) << 20);
30913
30914 /* INSN with smaller offset goes first. */
30915 off_val = (int)(INTVAL (offset));
30916 if (off_val >= 0)
30917 tmp -= (off_val & 0xfffff);
30918 else
30919 tmp += ((- off_val) & 0xfffff);
30920
30921 *pri = tmp;
30922 return;
30923 }
30924
30925
30926 /* Construct and return a PARALLEL RTX vector with elements numbering the
30927 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30928 the vector - from the perspective of the architecture. This does not
30929 line up with GCC's perspective on lane numbers, so we end up with
30930 different masks depending on our target endian-ness. The diagram
30931 below may help. We must draw the distinction when building masks
30932 which select one half of the vector. An instruction selecting
30933 architectural low-lanes for a big-endian target, must be described using
30934 a mask selecting GCC high-lanes.
30935
30936 Big-Endian Little-Endian
30937
30938 GCC 0 1 2 3 3 2 1 0
30939 | x | x | x | x | | x | x | x | x |
30940 Architecture 3 2 1 0 3 2 1 0
30941
30942 Low Mask: { 2, 3 } { 0, 1 }
30943 High Mask: { 0, 1 } { 2, 3 }
30944 */
30945
30946 rtx
30947 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30948 {
30949 int nunits = GET_MODE_NUNITS (mode);
30950 rtvec v = rtvec_alloc (nunits / 2);
30951 int high_base = nunits / 2;
30952 int low_base = 0;
30953 int base;
30954 rtx t1;
30955 int i;
30956
30957 if (BYTES_BIG_ENDIAN)
30958 base = high ? low_base : high_base;
30959 else
30960 base = high ? high_base : low_base;
30961
30962 for (i = 0; i < nunits / 2; i++)
30963 RTVEC_ELT (v, i) = GEN_INT (base + i);
30964
30965 t1 = gen_rtx_PARALLEL (mode, v);
30966 return t1;
30967 }
30968
30969 /* Check OP for validity as a PARALLEL RTX vector with elements
30970 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30971 from the perspective of the architecture. See the diagram above
30972 arm_simd_vect_par_cnst_half_p for more details. */
30973
30974 bool
30975 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30976 bool high)
30977 {
30978 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30979 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30980 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30981 int i = 0;
30982
30983 if (!VECTOR_MODE_P (mode))
30984 return false;
30985
30986 if (count_op != count_ideal)
30987 return false;
30988
30989 for (i = 0; i < count_ideal; i++)
30990 {
30991 rtx elt_op = XVECEXP (op, 0, i);
30992 rtx elt_ideal = XVECEXP (ideal, 0, i);
30993
30994 if (!CONST_INT_P (elt_op)
30995 || INTVAL (elt_ideal) != INTVAL (elt_op))
30996 return false;
30997 }
30998 return true;
30999 }
31000
31001 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31002 in Thumb1. */
31003 static bool
31004 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31005 const_tree)
31006 {
31007 /* For now, we punt and not handle this for TARGET_THUMB1. */
31008 if (vcall_offset && TARGET_THUMB1)
31009 return false;
31010
31011 /* Otherwise ok. */
31012 return true;
31013 }
31014
31015 /* Generate RTL for a conditional branch with rtx comparison CODE in
31016 mode CC_MODE. The destination of the unlikely conditional branch
31017 is LABEL_REF. */
31018
31019 void
31020 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31021 rtx label_ref)
31022 {
31023 rtx x;
31024 x = gen_rtx_fmt_ee (code, VOIDmode,
31025 gen_rtx_REG (cc_mode, CC_REGNUM),
31026 const0_rtx);
31027
31028 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31029 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31030 pc_rtx);
31031 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31032 }
31033
31034 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31035
31036 For pure-code sections there is no letter code for this attribute, so
31037 output all the section flags numerically when this is needed. */
31038
31039 static bool
31040 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31041 {
31042
31043 if (flags & SECTION_ARM_PURECODE)
31044 {
31045 *num = 0x20000000;
31046
31047 if (!(flags & SECTION_DEBUG))
31048 *num |= 0x2;
31049 if (flags & SECTION_EXCLUDE)
31050 *num |= 0x80000000;
31051 if (flags & SECTION_WRITE)
31052 *num |= 0x1;
31053 if (flags & SECTION_CODE)
31054 *num |= 0x4;
31055 if (flags & SECTION_MERGE)
31056 *num |= 0x10;
31057 if (flags & SECTION_STRINGS)
31058 *num |= 0x20;
31059 if (flags & SECTION_TLS)
31060 *num |= 0x400;
31061 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31062 *num |= 0x200;
31063
31064 return true;
31065 }
31066
31067 return false;
31068 }
31069
31070 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31071
31072 If pure-code is passed as an option, make sure all functions are in
31073 sections that have the SHF_ARM_PURECODE attribute. */
31074
31075 static section *
31076 arm_function_section (tree decl, enum node_frequency freq,
31077 bool startup, bool exit)
31078 {
31079 const char * section_name;
31080 section * sec;
31081
31082 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31083 return default_function_section (decl, freq, startup, exit);
31084
31085 if (!target_pure_code)
31086 return default_function_section (decl, freq, startup, exit);
31087
31088
31089 section_name = DECL_SECTION_NAME (decl);
31090
31091 /* If a function is not in a named section then it falls under the 'default'
31092 text section, also known as '.text'. We can preserve previous behavior as
31093 the default text section already has the SHF_ARM_PURECODE section
31094 attribute. */
31095 if (!section_name)
31096 {
31097 section *default_sec = default_function_section (decl, freq, startup,
31098 exit);
31099
31100 /* If default_sec is not null, then it must be a special section like for
31101 example .text.startup. We set the pure-code attribute and return the
31102 same section to preserve existing behavior. */
31103 if (default_sec)
31104 default_sec->common.flags |= SECTION_ARM_PURECODE;
31105 return default_sec;
31106 }
31107
31108 /* Otherwise look whether a section has already been created with
31109 'section_name'. */
31110 sec = get_named_section (decl, section_name, 0);
31111 if (!sec)
31112 /* If that is not the case passing NULL as the section's name to
31113 'get_named_section' will create a section with the declaration's
31114 section name. */
31115 sec = get_named_section (decl, NULL, 0);
31116
31117 /* Set the SHF_ARM_PURECODE attribute. */
31118 sec->common.flags |= SECTION_ARM_PURECODE;
31119
31120 return sec;
31121 }
31122
31123 /* Implements the TARGET_SECTION_FLAGS hook.
31124
31125 If DECL is a function declaration and pure-code is passed as an option
31126 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31127 section's name and RELOC indicates whether the declarations initializer may
31128 contain runtime relocations. */
31129
31130 static unsigned int
31131 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31132 {
31133 unsigned int flags = default_section_type_flags (decl, name, reloc);
31134
31135 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31136 flags |= SECTION_ARM_PURECODE;
31137
31138 return flags;
31139 }
31140
31141 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31142
31143 static void
31144 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31145 rtx op0, rtx op1,
31146 rtx *quot_p, rtx *rem_p)
31147 {
31148 if (mode == SImode)
31149 gcc_assert (!TARGET_IDIV);
31150
31151 scalar_int_mode libval_mode
31152 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31153
31154 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31155 libval_mode,
31156 op0, GET_MODE (op0),
31157 op1, GET_MODE (op1));
31158
31159 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31160 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31161 GET_MODE_SIZE (mode));
31162
31163 gcc_assert (quotient);
31164 gcc_assert (remainder);
31165
31166 *quot_p = quotient;
31167 *rem_p = remainder;
31168 }
31169
31170 /* This function checks for the availability of the coprocessor builtin passed
31171 in BUILTIN for the current target. Returns true if it is available and
31172 false otherwise. If a BUILTIN is passed for which this function has not
31173 been implemented it will cause an exception. */
31174
31175 bool
31176 arm_coproc_builtin_available (enum unspecv builtin)
31177 {
31178 /* None of these builtins are available in Thumb mode if the target only
31179 supports Thumb-1. */
31180 if (TARGET_THUMB1)
31181 return false;
31182
31183 switch (builtin)
31184 {
31185 case VUNSPEC_CDP:
31186 case VUNSPEC_LDC:
31187 case VUNSPEC_LDCL:
31188 case VUNSPEC_STC:
31189 case VUNSPEC_STCL:
31190 case VUNSPEC_MCR:
31191 case VUNSPEC_MRC:
31192 if (arm_arch4)
31193 return true;
31194 break;
31195 case VUNSPEC_CDP2:
31196 case VUNSPEC_LDC2:
31197 case VUNSPEC_LDC2L:
31198 case VUNSPEC_STC2:
31199 case VUNSPEC_STC2L:
31200 case VUNSPEC_MCR2:
31201 case VUNSPEC_MRC2:
31202 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31203 ARMv8-{A,M}. */
31204 if (arm_arch5)
31205 return true;
31206 break;
31207 case VUNSPEC_MCRR:
31208 case VUNSPEC_MRRC:
31209 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31210 ARMv8-{A,M}. */
31211 if (arm_arch6 || arm_arch5te)
31212 return true;
31213 break;
31214 case VUNSPEC_MCRR2:
31215 case VUNSPEC_MRRC2:
31216 if (arm_arch6)
31217 return true;
31218 break;
31219 default:
31220 gcc_unreachable ();
31221 }
31222 return false;
31223 }
31224
31225 /* This function returns true if OP is a valid memory operand for the ldc and
31226 stc coprocessor instructions and false otherwise. */
31227
31228 bool
31229 arm_coproc_ldc_stc_legitimate_address (rtx op)
31230 {
31231 HOST_WIDE_INT range;
31232 /* Has to be a memory operand. */
31233 if (!MEM_P (op))
31234 return false;
31235
31236 op = XEXP (op, 0);
31237
31238 /* We accept registers. */
31239 if (REG_P (op))
31240 return true;
31241
31242 switch GET_CODE (op)
31243 {
31244 case PLUS:
31245 {
31246 /* Or registers with an offset. */
31247 if (!REG_P (XEXP (op, 0)))
31248 return false;
31249
31250 op = XEXP (op, 1);
31251
31252 /* The offset must be an immediate though. */
31253 if (!CONST_INT_P (op))
31254 return false;
31255
31256 range = INTVAL (op);
31257
31258 /* Within the range of [-1020,1020]. */
31259 if (!IN_RANGE (range, -1020, 1020))
31260 return false;
31261
31262 /* And a multiple of 4. */
31263 return (range % 4) == 0;
31264 }
31265 case PRE_INC:
31266 case POST_INC:
31267 case PRE_DEC:
31268 case POST_DEC:
31269 return REG_P (XEXP (op, 0));
31270 default:
31271 gcc_unreachable ();
31272 }
31273 return false;
31274 }
31275
31276 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31277
31278 In VFPv1, VFP registers could only be accessed in the mode they were
31279 set, so subregs would be invalid there. However, we don't support
31280 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31281
31282 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31283 VFP registers in little-endian order. We can't describe that accurately to
31284 GCC, so avoid taking subregs of such values.
31285
31286 The only exception is going from a 128-bit to a 64-bit type. In that
31287 case the data layout happens to be consistent for big-endian, so we
31288 explicitly allow that case. */
31289
31290 static bool
31291 arm_can_change_mode_class (machine_mode from, machine_mode to,
31292 reg_class_t rclass)
31293 {
31294 if (TARGET_BIG_END
31295 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31296 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31297 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31298 && reg_classes_intersect_p (VFP_REGS, rclass))
31299 return false;
31300 return true;
31301 }
31302
31303 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31304 strcpy from constants will be faster. */
31305
31306 static HOST_WIDE_INT
31307 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31308 {
31309 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31310 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31311 return MAX (align, BITS_PER_WORD * factor);
31312 return align;
31313 }
31314
31315 #if CHECKING_P
31316 namespace selftest {
31317
31318 /* Scan the static data tables generated by parsecpu.awk looking for
31319 potential issues with the data. We primarily check for
31320 inconsistencies in the option extensions at present (extensions
31321 that duplicate others but aren't marked as aliases). Furthermore,
31322 for correct canonicalization later options must never be a subset
31323 of an earlier option. Any extension should also only specify other
31324 feature bits and never an architecture bit. The architecture is inferred
31325 from the declaration of the extension. */
31326 static void
31327 arm_test_cpu_arch_data (void)
31328 {
31329 const arch_option *arch;
31330 const cpu_option *cpu;
31331 auto_sbitmap target_isa (isa_num_bits);
31332 auto_sbitmap isa1 (isa_num_bits);
31333 auto_sbitmap isa2 (isa_num_bits);
31334
31335 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31336 {
31337 const cpu_arch_extension *ext1, *ext2;
31338
31339 if (arch->common.extensions == NULL)
31340 continue;
31341
31342 arm_initialize_isa (target_isa, arch->common.isa_bits);
31343
31344 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31345 {
31346 if (ext1->alias)
31347 continue;
31348
31349 arm_initialize_isa (isa1, ext1->isa_bits);
31350 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31351 {
31352 if (ext2->alias || ext1->remove != ext2->remove)
31353 continue;
31354
31355 arm_initialize_isa (isa2, ext2->isa_bits);
31356 /* If the option is a subset of the parent option, it doesn't
31357 add anything and so isn't useful. */
31358 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31359
31360 /* If the extension specifies any architectural bits then
31361 disallow it. Extensions should only specify feature bits. */
31362 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31363 }
31364 }
31365 }
31366
31367 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31368 {
31369 const cpu_arch_extension *ext1, *ext2;
31370
31371 if (cpu->common.extensions == NULL)
31372 continue;
31373
31374 arm_initialize_isa (target_isa, arch->common.isa_bits);
31375
31376 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31377 {
31378 if (ext1->alias)
31379 continue;
31380
31381 arm_initialize_isa (isa1, ext1->isa_bits);
31382 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31383 {
31384 if (ext2->alias || ext1->remove != ext2->remove)
31385 continue;
31386
31387 arm_initialize_isa (isa2, ext2->isa_bits);
31388 /* If the option is a subset of the parent option, it doesn't
31389 add anything and so isn't useful. */
31390 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31391
31392 /* If the extension specifies any architectural bits then
31393 disallow it. Extensions should only specify feature bits. */
31394 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31395 }
31396 }
31397 }
31398 }
31399
31400 /* Scan the static data tables generated by parsecpu.awk looking for
31401 potential issues with the data. Here we check for consistency between the
31402 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31403 a feature bit that is not defined by any FPU flag. */
31404 static void
31405 arm_test_fpu_data (void)
31406 {
31407 auto_sbitmap isa_all_fpubits (isa_num_bits);
31408 auto_sbitmap fpubits (isa_num_bits);
31409 auto_sbitmap tmpset (isa_num_bits);
31410
31411 static const enum isa_feature fpu_bitlist[]
31412 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31413 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31414
31415 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31416 {
31417 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31418 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31419 bitmap_clear (isa_all_fpubits);
31420 bitmap_copy (isa_all_fpubits, tmpset);
31421 }
31422
31423 if (!bitmap_empty_p (isa_all_fpubits))
31424 {
31425 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31426 " group that are not defined by any FPU.\n"
31427 " Check your arm-cpus.in.\n");
31428 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31429 }
31430 }
31431
31432 static void
31433 arm_run_selftests (void)
31434 {
31435 arm_test_cpu_arch_data ();
31436 arm_test_fpu_data ();
31437 }
31438 } /* Namespace selftest. */
31439
31440 #undef TARGET_RUN_TARGET_SELFTESTS
31441 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31442 #endif /* CHECKING_P */
31443
31444 struct gcc_target targetm = TARGET_INITIALIZER;
31445
31446 #include "gt-arm.h"