]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
2017-10-05 Tamar Christina <tamar.christina@arm.com>
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
70
71 /* This file should be included last. */
72 #include "target-def.h"
73
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
77
78 void (*arm_lang_output_object_attributes_hook)(void);
79
80 struct four_ints
81 {
82 int i[4];
83 };
84
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
192 const_tree);
193 static rtx aapcs_libcall_value (machine_mode);
194 static int aapcs_select_return_coproc (const_tree, const_tree);
195
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
199 #endif
200 #ifndef ARM_PE
201 static void arm_encode_section_info (tree, rtx, int);
202 #endif
203
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree, tree *);
207
208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 #endif
223 static void arm_asm_init_sections (void);
224 static rtx arm_dwarf_register_span (rtx);
225
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
241 static void arm_option_restore (struct gcc_options *,
242 struct cl_target_option *);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option *);
245 static void arm_set_current_function (tree);
246 static bool arm_can_inline_p (tree, tree);
247 static void arm_relayout_function (tree);
248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
250 static bool arm_sched_can_speculate_insn (rtx_insn *);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static tree arm_promoted_type (const_tree t);
261 static bool arm_scalar_mode_supported_p (scalar_mode);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx, tree, rtx);
266 static rtx arm_trampoline_adjust_address (rtx);
267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool arm_array_mode_supported_p (machine_mode,
272 unsigned HOST_WIDE_INT);
273 static machine_mode arm_preferred_simd_mode (scalar_mode);
274 static bool arm_class_likely_spilled_p (reg_class_t);
275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
278 const_tree type,
279 int misalignment,
280 bool is_packed);
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
289
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
291
292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
293
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
302
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
306
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
309 const_tree);
310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
313 int reloc);
314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
320 \f
321 /* Table of machine attributes. */
322 static const struct attribute_spec arm_attribute_table[] =
323 {
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
325 affects_type_identity } */
326 /* Function calls made to this symbol must be done indirectly, because
327 it may lie outside of the 26 bit addressing range of a normal function
328 call. */
329 { "long_call", 0, 0, false, true, true, NULL, false },
330 /* Whereas these functions are always known to reside within the 26 bit
331 addressing range. */
332 { "short_call", 0, 0, false, true, true, NULL, false },
333 /* Specify the procedure call conventions for a function. */
334 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
335 false },
336 /* Interrupt Service Routines have special prologue and epilogue requirements. */
337 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
338 false },
339 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
340 false },
341 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
342 false },
343 #ifdef ARM_PE
344 /* ARM/PE has three new attributes:
345 interfacearm - ?
346 dllexport - for exporting a function/variable that will live in a dll
347 dllimport - for importing a function/variable from a dll
348
349 Microsoft allows multiple declspecs in one __declspec, separating
350 them with spaces. We do NOT support this. Instead, use __declspec
351 multiple times.
352 */
353 { "dllimport", 0, 0, true, false, false, NULL, false },
354 { "dllexport", 0, 0, true, false, false, NULL, false },
355 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
356 false },
357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
358 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
360 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
361 false },
362 #endif
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false,
365 arm_handle_cmse_nonsecure_entry, false },
366 { "cmse_nonsecure_call", 0, 0, true, false, false,
367 arm_handle_cmse_nonsecure_call, true },
368 { NULL, 0, 0, false, false, false, NULL, false }
369 };
370 \f
371 /* Initialize the GCC target structure. */
372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
373 #undef TARGET_MERGE_DECL_ATTRIBUTES
374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
375 #endif
376
377 #undef TARGET_LEGITIMIZE_ADDRESS
378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
379
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
382
383 #undef TARGET_INSERT_ATTRIBUTES
384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
385
386 #undef TARGET_ASM_FILE_START
387 #define TARGET_ASM_FILE_START arm_file_start
388 #undef TARGET_ASM_FILE_END
389 #define TARGET_ASM_FILE_END arm_file_end
390
391 #undef TARGET_ASM_ALIGNED_SI_OP
392 #define TARGET_ASM_ALIGNED_SI_OP NULL
393 #undef TARGET_ASM_INTEGER
394 #define TARGET_ASM_INTEGER arm_assemble_integer
395
396 #undef TARGET_PRINT_OPERAND
397 #define TARGET_PRINT_OPERAND arm_print_operand
398 #undef TARGET_PRINT_OPERAND_ADDRESS
399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
402
403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
405
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
408
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
411
412 #undef TARGET_CAN_INLINE_P
413 #define TARGET_CAN_INLINE_P arm_can_inline_p
414
415 #undef TARGET_RELAYOUT_FUNCTION
416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
417
418 #undef TARGET_OPTION_OVERRIDE
419 #define TARGET_OPTION_OVERRIDE arm_option_override
420
421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
423
424 #undef TARGET_OPTION_SAVE
425 #define TARGET_OPTION_SAVE arm_option_save
426
427 #undef TARGET_OPTION_RESTORE
428 #define TARGET_OPTION_RESTORE arm_option_restore
429
430 #undef TARGET_OPTION_PRINT
431 #define TARGET_OPTION_PRINT arm_option_print
432
433 #undef TARGET_COMP_TYPE_ATTRIBUTES
434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
435
436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
438
439 #undef TARGET_SCHED_MACRO_FUSION_P
440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
441
442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
444
445 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
447
448 #undef TARGET_SCHED_ADJUST_COST
449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
450
451 #undef TARGET_SET_CURRENT_FUNCTION
452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
453
454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
456
457 #undef TARGET_SCHED_REORDER
458 #define TARGET_SCHED_REORDER arm_sched_reorder
459
460 #undef TARGET_REGISTER_MOVE_COST
461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
462
463 #undef TARGET_MEMORY_MOVE_COST
464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
465
466 #undef TARGET_ENCODE_SECTION_INFO
467 #ifdef ARM_PE
468 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
469 #else
470 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
471 #endif
472
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
475
476 #undef TARGET_ASM_INTERNAL_LABEL
477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
478
479 #undef TARGET_FLOATN_MODE
480 #define TARGET_FLOATN_MODE arm_floatn_mode
481
482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
484
485 #undef TARGET_FUNCTION_VALUE
486 #define TARGET_FUNCTION_VALUE arm_function_value
487
488 #undef TARGET_LIBCALL_VALUE
489 #define TARGET_LIBCALL_VALUE arm_libcall_value
490
491 #undef TARGET_FUNCTION_VALUE_REGNO_P
492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
493
494 #undef TARGET_ASM_OUTPUT_MI_THUNK
495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
498
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS arm_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST arm_address_cost
503
504 #undef TARGET_SHIFT_TRUNCATION_MASK
505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
514 arm_autovectorize_vector_sizes
515
516 #undef TARGET_MACHINE_DEPENDENT_REORG
517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
518
519 #undef TARGET_INIT_BUILTINS
520 #define TARGET_INIT_BUILTINS arm_init_builtins
521 #undef TARGET_EXPAND_BUILTIN
522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
523 #undef TARGET_BUILTIN_DECL
524 #define TARGET_BUILTIN_DECL arm_builtin_decl
525
526 #undef TARGET_INIT_LIBFUNCS
527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
528
529 #undef TARGET_PROMOTE_FUNCTION_MODE
530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
531 #undef TARGET_PROMOTE_PROTOTYPES
532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
535 #undef TARGET_ARG_PARTIAL_BYTES
536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
537 #undef TARGET_FUNCTION_ARG
538 #define TARGET_FUNCTION_ARG arm_function_arg
539 #undef TARGET_FUNCTION_ARG_ADVANCE
540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
541 #undef TARGET_FUNCTION_ARG_PADDING
542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
543 #undef TARGET_FUNCTION_ARG_BOUNDARY
544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
545
546 #undef TARGET_SETUP_INCOMING_VARARGS
547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
548
549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
551
552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
554 #undef TARGET_TRAMPOLINE_INIT
555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
558
559 #undef TARGET_WARN_FUNC_RETURN
560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
561
562 #undef TARGET_DEFAULT_SHORT_ENUMS
563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
564
565 #undef TARGET_ALIGN_ANON_BITFIELD
566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
567
568 #undef TARGET_NARROW_VOLATILE_BITFIELD
569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
570
571 #undef TARGET_CXX_GUARD_TYPE
572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
573
574 #undef TARGET_CXX_GUARD_MASK_BIT
575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
576
577 #undef TARGET_CXX_GET_COOKIE_SIZE
578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
579
580 #undef TARGET_CXX_COOKIE_HAS_SIZE
581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
582
583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
585
586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
588
589 #undef TARGET_CXX_USE_AEABI_ATEXIT
590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
591
592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
594 arm_cxx_determine_class_data_visibility
595
596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
598
599 #undef TARGET_RETURN_IN_MSB
600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
601
602 #undef TARGET_RETURN_IN_MEMORY
603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
604
605 #undef TARGET_MUST_PASS_IN_STACK
606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
607
608 #if ARM_UNWIND_INFO
609 #undef TARGET_ASM_UNWIND_EMIT
610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
611
612 /* EABI unwinding tables use a different format for the typeinfo tables. */
613 #undef TARGET_ASM_TTYPE
614 #define TARGET_ASM_TTYPE arm_output_ttype
615
616 #undef TARGET_ARM_EABI_UNWINDER
617 #define TARGET_ARM_EABI_UNWINDER true
618
619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
621
622 #endif /* ARM_UNWIND_INFO */
623
624 #undef TARGET_ASM_INIT_SECTIONS
625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
626
627 #undef TARGET_DWARF_REGISTER_SPAN
628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
629
630 #undef TARGET_CANNOT_COPY_INSN_P
631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
632
633 #ifdef HAVE_AS_TLS
634 #undef TARGET_HAVE_TLS
635 #define TARGET_HAVE_TLS true
636 #endif
637
638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
640
641 #undef TARGET_LEGITIMATE_CONSTANT_P
642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
643
644 #undef TARGET_CANNOT_FORCE_CONST_MEM
645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
646
647 #undef TARGET_MAX_ANCHOR_OFFSET
648 #define TARGET_MAX_ANCHOR_OFFSET 4095
649
650 /* The minimum is set such that the total size of the block
651 for a particular anchor is -4088 + 1 + 4095 bytes, which is
652 divisible by eight, ensuring natural spacing of anchors. */
653 #undef TARGET_MIN_ANCHOR_OFFSET
654 #define TARGET_MIN_ANCHOR_OFFSET -4088
655
656 #undef TARGET_SCHED_ISSUE_RATE
657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
658
659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
661 arm_first_cycle_multipass_dfa_lookahead
662
663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
665 arm_first_cycle_multipass_dfa_lookahead_guard
666
667 #undef TARGET_MANGLE_TYPE
668 #define TARGET_MANGLE_TYPE arm_mangle_type
669
670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
672
673 #undef TARGET_BUILD_BUILTIN_VA_LIST
674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
675 #undef TARGET_EXPAND_BUILTIN_VA_START
676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
679
680 #ifdef HAVE_AS_TLS
681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
683 #endif
684
685 #undef TARGET_LEGITIMATE_ADDRESS_P
686 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
687
688 #undef TARGET_PREFERRED_RELOAD_CLASS
689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
690
691 #undef TARGET_PROMOTED_TYPE
692 #define TARGET_PROMOTED_TYPE arm_promoted_type
693
694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
696
697 #undef TARGET_COMPUTE_FRAME_LAYOUT
698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
699
700 #undef TARGET_FRAME_POINTER_REQUIRED
701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
702
703 #undef TARGET_CAN_ELIMINATE
704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
705
706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
708
709 #undef TARGET_CLASS_LIKELY_SPILLED_P
710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
711
712 #undef TARGET_VECTORIZE_BUILTINS
713 #define TARGET_VECTORIZE_BUILTINS
714
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
717 arm_builtin_vectorized_function
718
719 #undef TARGET_VECTOR_ALIGNMENT
720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
721
722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
724 arm_vector_alignment_reachable
725
726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
728 arm_builtin_support_vector_misalignment
729
730 #undef TARGET_PREFERRED_RENAME_CLASS
731 #define TARGET_PREFERRED_RENAME_CLASS \
732 arm_preferred_rename_class
733
734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
736 arm_vectorize_vec_perm_const_ok
737
738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
740 arm_builtin_vectorization_cost
741 #undef TARGET_VECTORIZE_ADD_STMT_COST
742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
743
744 #undef TARGET_CANONICALIZE_COMPARISON
745 #define TARGET_CANONICALIZE_COMPARISON \
746 arm_canonicalize_comparison
747
748 #undef TARGET_ASAN_SHADOW_OFFSET
749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
750
751 #undef MAX_INSN_PER_IT_BLOCK
752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
753
754 #undef TARGET_CAN_USE_DOLOOP_P
755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
756
757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
759
760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
762
763 #undef TARGET_SCHED_FUSION_PRIORITY
764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
765
766 #undef TARGET_ASM_FUNCTION_SECTION
767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
768
769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
771
772 #undef TARGET_SECTION_TYPE_FLAGS
773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
774
775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
777
778 #undef TARGET_C_EXCESS_PRECISION
779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
780
781 /* Although the architecture reserves bits 0 and 1, only the former is
782 used for ARM/Thumb ISA selection in v7 and earlier versions. */
783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
785
786 #undef TARGET_FIXED_CONDITION_CODE_REGS
787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
788
789 #undef TARGET_HARD_REGNO_NREGS
790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
791 #undef TARGET_HARD_REGNO_MODE_OK
792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
793
794 #undef TARGET_MODES_TIEABLE_P
795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
796
797 #undef TARGET_CAN_CHANGE_MODE_CLASS
798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
799
800 #undef TARGET_CONSTANT_ALIGNMENT
801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
802 \f
803 /* Obstack for minipool constant handling. */
804 static struct obstack minipool_obstack;
805 static char * minipool_startobj;
806
807 /* The maximum number of insns skipped which
808 will be conditionalised if possible. */
809 static int max_insns_skipped = 5;
810
811 extern FILE * asm_out_file;
812
813 /* True if we are currently building a constant table. */
814 int making_const_table;
815
816 /* The processor for which instructions should be scheduled. */
817 enum processor_type arm_tune = TARGET_CPU_arm_none;
818
819 /* The current tuning set. */
820 const struct tune_params *current_tune;
821
822 /* Which floating point hardware to schedule for. */
823 int arm_fpu_attr;
824
825 /* Used for Thumb call_via trampolines. */
826 rtx thumb_call_via_label[14];
827 static int thumb_call_reg_needed;
828
829 /* The bits in this mask specify which instruction scheduling options should
830 be used. */
831 unsigned int tune_flags = 0;
832
833 /* The highest ARM architecture version supported by the
834 target. */
835 enum base_architecture arm_base_arch = BASE_ARCH_0;
836
837 /* Active target architecture and tuning. */
838
839 struct arm_build_target arm_active_target;
840
841 /* The following are used in the arm.md file as equivalents to bits
842 in the above two flag variables. */
843
844 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
845 int arm_arch3m = 0;
846
847 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
848 int arm_arch4 = 0;
849
850 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
851 int arm_arch4t = 0;
852
853 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
854 int arm_arch5 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
857 int arm_arch5e = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
860 int arm_arch5te = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
863 int arm_arch6 = 0;
864
865 /* Nonzero if this chip supports the ARM 6K extensions. */
866 int arm_arch6k = 0;
867
868 /* Nonzero if this chip supports the ARM 6KZ extensions. */
869 int arm_arch6kz = 0;
870
871 /* Nonzero if instructions present in ARMv6-M can be used. */
872 int arm_arch6m = 0;
873
874 /* Nonzero if this chip supports the ARM 7 extensions. */
875 int arm_arch7 = 0;
876
877 /* Nonzero if this chip supports the Large Physical Address Extension. */
878 int arm_arch_lpae = 0;
879
880 /* Nonzero if instructions not present in the 'M' profile can be used. */
881 int arm_arch_notm = 0;
882
883 /* Nonzero if instructions present in ARMv7E-M can be used. */
884 int arm_arch7em = 0;
885
886 /* Nonzero if instructions present in ARMv8 can be used. */
887 int arm_arch8 = 0;
888
889 /* Nonzero if this chip supports the ARMv8.1 extensions. */
890 int arm_arch8_1 = 0;
891
892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
893 int arm_arch8_2 = 0;
894
895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
896 Architecture 8.2. */
897 int arm_fp16_inst = 0;
898
899 /* Nonzero if this chip can benefit from load scheduling. */
900 int arm_ld_sched = 0;
901
902 /* Nonzero if this chip is a StrongARM. */
903 int arm_tune_strongarm = 0;
904
905 /* Nonzero if this chip supports Intel Wireless MMX technology. */
906 int arm_arch_iwmmxt = 0;
907
908 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
909 int arm_arch_iwmmxt2 = 0;
910
911 /* Nonzero if this chip is an XScale. */
912 int arm_arch_xscale = 0;
913
914 /* Nonzero if tuning for XScale */
915 int arm_tune_xscale = 0;
916
917 /* Nonzero if we want to tune for stores that access the write-buffer.
918 This typically means an ARM6 or ARM7 with MMU or MPU. */
919 int arm_tune_wbuf = 0;
920
921 /* Nonzero if tuning for Cortex-A9. */
922 int arm_tune_cortex_a9 = 0;
923
924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
925 preprocessor.
926 XXX This is a bit of a hack, it's intended to help work around
927 problems in GLD which doesn't understand that armv5t code is
928 interworking clean. */
929 int arm_cpp_interwork = 0;
930
931 /* Nonzero if chip supports Thumb 1. */
932 int arm_arch_thumb1;
933
934 /* Nonzero if chip supports Thumb 2. */
935 int arm_arch_thumb2;
936
937 /* Nonzero if chip supports integer division instruction. */
938 int arm_arch_arm_hwdiv;
939 int arm_arch_thumb_hwdiv;
940
941 /* Nonzero if chip disallows volatile memory access in IT block. */
942 int arm_arch_no_volatile_ce;
943
944 /* Nonzero if we should use Neon to handle 64-bits operations rather
945 than core registers. */
946 int prefer_neon_for_64bits = 0;
947
948 /* Nonzero if we shouldn't use literal pools. */
949 bool arm_disable_literal_pool = false;
950
951 /* The register number to be used for the PIC offset register. */
952 unsigned arm_pic_register = INVALID_REGNUM;
953
954 enum arm_pcs arm_pcs_default;
955
956 /* For an explanation of these variables, see final_prescan_insn below. */
957 int arm_ccfsm_state;
958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
959 enum arm_cond_code arm_current_cc;
960
961 rtx arm_target_insn;
962 int arm_target_label;
963 /* The number of conditionally executed insns, including the current insn. */
964 int arm_condexec_count = 0;
965 /* A bitmask specifying the patterns for the IT block.
966 Zero means do not output an IT block before this insn. */
967 int arm_condexec_mask = 0;
968 /* The number of bits used in arm_condexec_mask. */
969 int arm_condexec_masklen = 0;
970
971 /* Nonzero if chip supports the ARMv8 CRC instructions. */
972 int arm_arch_crc = 0;
973
974 /* Nonzero if chip supports the ARMv8-M security extensions. */
975 int arm_arch_cmse = 0;
976
977 /* Nonzero if the core has a very small, high-latency, multiply unit. */
978 int arm_m_profile_small_mul = 0;
979
980 /* The condition codes of the ARM, and the inverse function. */
981 static const char * const arm_condition_codes[] =
982 {
983 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
984 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
985 };
986
987 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
988 int arm_regs_in_sequence[] =
989 {
990 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
991 };
992
993 #define ARM_LSL_NAME "lsl"
994 #define streq(string1, string2) (strcmp (string1, string2) == 0)
995
996 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
997 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
998 | (1 << PIC_OFFSET_TABLE_REGNUM)))
999 \f
1000 /* Initialization code. */
1001
1002 struct cpu_tune
1003 {
1004 enum processor_type scheduler;
1005 unsigned int tune_flags;
1006 const struct tune_params *tune;
1007 };
1008
1009 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1010 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1011 { \
1012 num_slots, \
1013 l1_size, \
1014 l1_line_size \
1015 }
1016
1017 /* arm generic vectorizer costs. */
1018 static const
1019 struct cpu_vec_costs arm_default_vec_cost = {
1020 1, /* scalar_stmt_cost. */
1021 1, /* scalar load_cost. */
1022 1, /* scalar_store_cost. */
1023 1, /* vec_stmt_cost. */
1024 1, /* vec_to_scalar_cost. */
1025 1, /* scalar_to_vec_cost. */
1026 1, /* vec_align_load_cost. */
1027 1, /* vec_unalign_load_cost. */
1028 1, /* vec_unalign_store_cost. */
1029 1, /* vec_store_cost. */
1030 3, /* cond_taken_branch_cost. */
1031 1, /* cond_not_taken_branch_cost. */
1032 };
1033
1034 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1035 #include "aarch-cost-tables.h"
1036
1037
1038
1039 const struct cpu_cost_table cortexa9_extra_costs =
1040 {
1041 /* ALU */
1042 {
1043 0, /* arith. */
1044 0, /* logical. */
1045 0, /* shift. */
1046 COSTS_N_INSNS (1), /* shift_reg. */
1047 COSTS_N_INSNS (1), /* arith_shift. */
1048 COSTS_N_INSNS (2), /* arith_shift_reg. */
1049 0, /* log_shift. */
1050 COSTS_N_INSNS (1), /* log_shift_reg. */
1051 COSTS_N_INSNS (1), /* extend. */
1052 COSTS_N_INSNS (2), /* extend_arith. */
1053 COSTS_N_INSNS (1), /* bfi. */
1054 COSTS_N_INSNS (1), /* bfx. */
1055 0, /* clz. */
1056 0, /* rev. */
1057 0, /* non_exec. */
1058 true /* non_exec_costs_exec. */
1059 },
1060 {
1061 /* MULT SImode */
1062 {
1063 COSTS_N_INSNS (3), /* simple. */
1064 COSTS_N_INSNS (3), /* flag_setting. */
1065 COSTS_N_INSNS (2), /* extend. */
1066 COSTS_N_INSNS (3), /* add. */
1067 COSTS_N_INSNS (2), /* extend_add. */
1068 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1069 },
1070 /* MULT DImode */
1071 {
1072 0, /* simple (N/A). */
1073 0, /* flag_setting (N/A). */
1074 COSTS_N_INSNS (4), /* extend. */
1075 0, /* add (N/A). */
1076 COSTS_N_INSNS (4), /* extend_add. */
1077 0 /* idiv (N/A). */
1078 }
1079 },
1080 /* LD/ST */
1081 {
1082 COSTS_N_INSNS (2), /* load. */
1083 COSTS_N_INSNS (2), /* load_sign_extend. */
1084 COSTS_N_INSNS (2), /* ldrd. */
1085 COSTS_N_INSNS (2), /* ldm_1st. */
1086 1, /* ldm_regs_per_insn_1st. */
1087 2, /* ldm_regs_per_insn_subsequent. */
1088 COSTS_N_INSNS (5), /* loadf. */
1089 COSTS_N_INSNS (5), /* loadd. */
1090 COSTS_N_INSNS (1), /* load_unaligned. */
1091 COSTS_N_INSNS (2), /* store. */
1092 COSTS_N_INSNS (2), /* strd. */
1093 COSTS_N_INSNS (2), /* stm_1st. */
1094 1, /* stm_regs_per_insn_1st. */
1095 2, /* stm_regs_per_insn_subsequent. */
1096 COSTS_N_INSNS (1), /* storef. */
1097 COSTS_N_INSNS (1), /* stored. */
1098 COSTS_N_INSNS (1), /* store_unaligned. */
1099 COSTS_N_INSNS (1), /* loadv. */
1100 COSTS_N_INSNS (1) /* storev. */
1101 },
1102 {
1103 /* FP SFmode */
1104 {
1105 COSTS_N_INSNS (14), /* div. */
1106 COSTS_N_INSNS (4), /* mult. */
1107 COSTS_N_INSNS (7), /* mult_addsub. */
1108 COSTS_N_INSNS (30), /* fma. */
1109 COSTS_N_INSNS (3), /* addsub. */
1110 COSTS_N_INSNS (1), /* fpconst. */
1111 COSTS_N_INSNS (1), /* neg. */
1112 COSTS_N_INSNS (3), /* compare. */
1113 COSTS_N_INSNS (3), /* widen. */
1114 COSTS_N_INSNS (3), /* narrow. */
1115 COSTS_N_INSNS (3), /* toint. */
1116 COSTS_N_INSNS (3), /* fromint. */
1117 COSTS_N_INSNS (3) /* roundint. */
1118 },
1119 /* FP DFmode */
1120 {
1121 COSTS_N_INSNS (24), /* div. */
1122 COSTS_N_INSNS (5), /* mult. */
1123 COSTS_N_INSNS (8), /* mult_addsub. */
1124 COSTS_N_INSNS (30), /* fma. */
1125 COSTS_N_INSNS (3), /* addsub. */
1126 COSTS_N_INSNS (1), /* fpconst. */
1127 COSTS_N_INSNS (1), /* neg. */
1128 COSTS_N_INSNS (3), /* compare. */
1129 COSTS_N_INSNS (3), /* widen. */
1130 COSTS_N_INSNS (3), /* narrow. */
1131 COSTS_N_INSNS (3), /* toint. */
1132 COSTS_N_INSNS (3), /* fromint. */
1133 COSTS_N_INSNS (3) /* roundint. */
1134 }
1135 },
1136 /* Vector */
1137 {
1138 COSTS_N_INSNS (1) /* alu. */
1139 }
1140 };
1141
1142 const struct cpu_cost_table cortexa8_extra_costs =
1143 {
1144 /* ALU */
1145 {
1146 0, /* arith. */
1147 0, /* logical. */
1148 COSTS_N_INSNS (1), /* shift. */
1149 0, /* shift_reg. */
1150 COSTS_N_INSNS (1), /* arith_shift. */
1151 0, /* arith_shift_reg. */
1152 COSTS_N_INSNS (1), /* log_shift. */
1153 0, /* log_shift_reg. */
1154 0, /* extend. */
1155 0, /* extend_arith. */
1156 0, /* bfi. */
1157 0, /* bfx. */
1158 0, /* clz. */
1159 0, /* rev. */
1160 0, /* non_exec. */
1161 true /* non_exec_costs_exec. */
1162 },
1163 {
1164 /* MULT SImode */
1165 {
1166 COSTS_N_INSNS (1), /* simple. */
1167 COSTS_N_INSNS (1), /* flag_setting. */
1168 COSTS_N_INSNS (1), /* extend. */
1169 COSTS_N_INSNS (1), /* add. */
1170 COSTS_N_INSNS (1), /* extend_add. */
1171 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1172 },
1173 /* MULT DImode */
1174 {
1175 0, /* simple (N/A). */
1176 0, /* flag_setting (N/A). */
1177 COSTS_N_INSNS (2), /* extend. */
1178 0, /* add (N/A). */
1179 COSTS_N_INSNS (2), /* extend_add. */
1180 0 /* idiv (N/A). */
1181 }
1182 },
1183 /* LD/ST */
1184 {
1185 COSTS_N_INSNS (1), /* load. */
1186 COSTS_N_INSNS (1), /* load_sign_extend. */
1187 COSTS_N_INSNS (1), /* ldrd. */
1188 COSTS_N_INSNS (1), /* ldm_1st. */
1189 1, /* ldm_regs_per_insn_1st. */
1190 2, /* ldm_regs_per_insn_subsequent. */
1191 COSTS_N_INSNS (1), /* loadf. */
1192 COSTS_N_INSNS (1), /* loadd. */
1193 COSTS_N_INSNS (1), /* load_unaligned. */
1194 COSTS_N_INSNS (1), /* store. */
1195 COSTS_N_INSNS (1), /* strd. */
1196 COSTS_N_INSNS (1), /* stm_1st. */
1197 1, /* stm_regs_per_insn_1st. */
1198 2, /* stm_regs_per_insn_subsequent. */
1199 COSTS_N_INSNS (1), /* storef. */
1200 COSTS_N_INSNS (1), /* stored. */
1201 COSTS_N_INSNS (1), /* store_unaligned. */
1202 COSTS_N_INSNS (1), /* loadv. */
1203 COSTS_N_INSNS (1) /* storev. */
1204 },
1205 {
1206 /* FP SFmode */
1207 {
1208 COSTS_N_INSNS (36), /* div. */
1209 COSTS_N_INSNS (11), /* mult. */
1210 COSTS_N_INSNS (20), /* mult_addsub. */
1211 COSTS_N_INSNS (30), /* fma. */
1212 COSTS_N_INSNS (9), /* addsub. */
1213 COSTS_N_INSNS (3), /* fpconst. */
1214 COSTS_N_INSNS (3), /* neg. */
1215 COSTS_N_INSNS (6), /* compare. */
1216 COSTS_N_INSNS (4), /* widen. */
1217 COSTS_N_INSNS (4), /* narrow. */
1218 COSTS_N_INSNS (8), /* toint. */
1219 COSTS_N_INSNS (8), /* fromint. */
1220 COSTS_N_INSNS (8) /* roundint. */
1221 },
1222 /* FP DFmode */
1223 {
1224 COSTS_N_INSNS (64), /* div. */
1225 COSTS_N_INSNS (16), /* mult. */
1226 COSTS_N_INSNS (25), /* mult_addsub. */
1227 COSTS_N_INSNS (30), /* fma. */
1228 COSTS_N_INSNS (9), /* addsub. */
1229 COSTS_N_INSNS (3), /* fpconst. */
1230 COSTS_N_INSNS (3), /* neg. */
1231 COSTS_N_INSNS (6), /* compare. */
1232 COSTS_N_INSNS (6), /* widen. */
1233 COSTS_N_INSNS (6), /* narrow. */
1234 COSTS_N_INSNS (8), /* toint. */
1235 COSTS_N_INSNS (8), /* fromint. */
1236 COSTS_N_INSNS (8) /* roundint. */
1237 }
1238 },
1239 /* Vector */
1240 {
1241 COSTS_N_INSNS (1) /* alu. */
1242 }
1243 };
1244
1245 const struct cpu_cost_table cortexa5_extra_costs =
1246 {
1247 /* ALU */
1248 {
1249 0, /* arith. */
1250 0, /* logical. */
1251 COSTS_N_INSNS (1), /* shift. */
1252 COSTS_N_INSNS (1), /* shift_reg. */
1253 COSTS_N_INSNS (1), /* arith_shift. */
1254 COSTS_N_INSNS (1), /* arith_shift_reg. */
1255 COSTS_N_INSNS (1), /* log_shift. */
1256 COSTS_N_INSNS (1), /* log_shift_reg. */
1257 COSTS_N_INSNS (1), /* extend. */
1258 COSTS_N_INSNS (1), /* extend_arith. */
1259 COSTS_N_INSNS (1), /* bfi. */
1260 COSTS_N_INSNS (1), /* bfx. */
1261 COSTS_N_INSNS (1), /* clz. */
1262 COSTS_N_INSNS (1), /* rev. */
1263 0, /* non_exec. */
1264 true /* non_exec_costs_exec. */
1265 },
1266
1267 {
1268 /* MULT SImode */
1269 {
1270 0, /* simple. */
1271 COSTS_N_INSNS (1), /* flag_setting. */
1272 COSTS_N_INSNS (1), /* extend. */
1273 COSTS_N_INSNS (1), /* add. */
1274 COSTS_N_INSNS (1), /* extend_add. */
1275 COSTS_N_INSNS (7) /* idiv. */
1276 },
1277 /* MULT DImode */
1278 {
1279 0, /* simple (N/A). */
1280 0, /* flag_setting (N/A). */
1281 COSTS_N_INSNS (1), /* extend. */
1282 0, /* add. */
1283 COSTS_N_INSNS (2), /* extend_add. */
1284 0 /* idiv (N/A). */
1285 }
1286 },
1287 /* LD/ST */
1288 {
1289 COSTS_N_INSNS (1), /* load. */
1290 COSTS_N_INSNS (1), /* load_sign_extend. */
1291 COSTS_N_INSNS (6), /* ldrd. */
1292 COSTS_N_INSNS (1), /* ldm_1st. */
1293 1, /* ldm_regs_per_insn_1st. */
1294 2, /* ldm_regs_per_insn_subsequent. */
1295 COSTS_N_INSNS (2), /* loadf. */
1296 COSTS_N_INSNS (4), /* loadd. */
1297 COSTS_N_INSNS (1), /* load_unaligned. */
1298 COSTS_N_INSNS (1), /* store. */
1299 COSTS_N_INSNS (3), /* strd. */
1300 COSTS_N_INSNS (1), /* stm_1st. */
1301 1, /* stm_regs_per_insn_1st. */
1302 2, /* stm_regs_per_insn_subsequent. */
1303 COSTS_N_INSNS (2), /* storef. */
1304 COSTS_N_INSNS (2), /* stored. */
1305 COSTS_N_INSNS (1), /* store_unaligned. */
1306 COSTS_N_INSNS (1), /* loadv. */
1307 COSTS_N_INSNS (1) /* storev. */
1308 },
1309 {
1310 /* FP SFmode */
1311 {
1312 COSTS_N_INSNS (15), /* div. */
1313 COSTS_N_INSNS (3), /* mult. */
1314 COSTS_N_INSNS (7), /* mult_addsub. */
1315 COSTS_N_INSNS (7), /* fma. */
1316 COSTS_N_INSNS (3), /* addsub. */
1317 COSTS_N_INSNS (3), /* fpconst. */
1318 COSTS_N_INSNS (3), /* neg. */
1319 COSTS_N_INSNS (3), /* compare. */
1320 COSTS_N_INSNS (3), /* widen. */
1321 COSTS_N_INSNS (3), /* narrow. */
1322 COSTS_N_INSNS (3), /* toint. */
1323 COSTS_N_INSNS (3), /* fromint. */
1324 COSTS_N_INSNS (3) /* roundint. */
1325 },
1326 /* FP DFmode */
1327 {
1328 COSTS_N_INSNS (30), /* div. */
1329 COSTS_N_INSNS (6), /* mult. */
1330 COSTS_N_INSNS (10), /* mult_addsub. */
1331 COSTS_N_INSNS (7), /* fma. */
1332 COSTS_N_INSNS (3), /* addsub. */
1333 COSTS_N_INSNS (3), /* fpconst. */
1334 COSTS_N_INSNS (3), /* neg. */
1335 COSTS_N_INSNS (3), /* compare. */
1336 COSTS_N_INSNS (3), /* widen. */
1337 COSTS_N_INSNS (3), /* narrow. */
1338 COSTS_N_INSNS (3), /* toint. */
1339 COSTS_N_INSNS (3), /* fromint. */
1340 COSTS_N_INSNS (3) /* roundint. */
1341 }
1342 },
1343 /* Vector */
1344 {
1345 COSTS_N_INSNS (1) /* alu. */
1346 }
1347 };
1348
1349
1350 const struct cpu_cost_table cortexa7_extra_costs =
1351 {
1352 /* ALU */
1353 {
1354 0, /* arith. */
1355 0, /* logical. */
1356 COSTS_N_INSNS (1), /* shift. */
1357 COSTS_N_INSNS (1), /* shift_reg. */
1358 COSTS_N_INSNS (1), /* arith_shift. */
1359 COSTS_N_INSNS (1), /* arith_shift_reg. */
1360 COSTS_N_INSNS (1), /* log_shift. */
1361 COSTS_N_INSNS (1), /* log_shift_reg. */
1362 COSTS_N_INSNS (1), /* extend. */
1363 COSTS_N_INSNS (1), /* extend_arith. */
1364 COSTS_N_INSNS (1), /* bfi. */
1365 COSTS_N_INSNS (1), /* bfx. */
1366 COSTS_N_INSNS (1), /* clz. */
1367 COSTS_N_INSNS (1), /* rev. */
1368 0, /* non_exec. */
1369 true /* non_exec_costs_exec. */
1370 },
1371
1372 {
1373 /* MULT SImode */
1374 {
1375 0, /* simple. */
1376 COSTS_N_INSNS (1), /* flag_setting. */
1377 COSTS_N_INSNS (1), /* extend. */
1378 COSTS_N_INSNS (1), /* add. */
1379 COSTS_N_INSNS (1), /* extend_add. */
1380 COSTS_N_INSNS (7) /* idiv. */
1381 },
1382 /* MULT DImode */
1383 {
1384 0, /* simple (N/A). */
1385 0, /* flag_setting (N/A). */
1386 COSTS_N_INSNS (1), /* extend. */
1387 0, /* add. */
1388 COSTS_N_INSNS (2), /* extend_add. */
1389 0 /* idiv (N/A). */
1390 }
1391 },
1392 /* LD/ST */
1393 {
1394 COSTS_N_INSNS (1), /* load. */
1395 COSTS_N_INSNS (1), /* load_sign_extend. */
1396 COSTS_N_INSNS (3), /* ldrd. */
1397 COSTS_N_INSNS (1), /* ldm_1st. */
1398 1, /* ldm_regs_per_insn_1st. */
1399 2, /* ldm_regs_per_insn_subsequent. */
1400 COSTS_N_INSNS (2), /* loadf. */
1401 COSTS_N_INSNS (2), /* loadd. */
1402 COSTS_N_INSNS (1), /* load_unaligned. */
1403 COSTS_N_INSNS (1), /* store. */
1404 COSTS_N_INSNS (3), /* strd. */
1405 COSTS_N_INSNS (1), /* stm_1st. */
1406 1, /* stm_regs_per_insn_1st. */
1407 2, /* stm_regs_per_insn_subsequent. */
1408 COSTS_N_INSNS (2), /* storef. */
1409 COSTS_N_INSNS (2), /* stored. */
1410 COSTS_N_INSNS (1), /* store_unaligned. */
1411 COSTS_N_INSNS (1), /* loadv. */
1412 COSTS_N_INSNS (1) /* storev. */
1413 },
1414 {
1415 /* FP SFmode */
1416 {
1417 COSTS_N_INSNS (15), /* div. */
1418 COSTS_N_INSNS (3), /* mult. */
1419 COSTS_N_INSNS (7), /* mult_addsub. */
1420 COSTS_N_INSNS (7), /* fma. */
1421 COSTS_N_INSNS (3), /* addsub. */
1422 COSTS_N_INSNS (3), /* fpconst. */
1423 COSTS_N_INSNS (3), /* neg. */
1424 COSTS_N_INSNS (3), /* compare. */
1425 COSTS_N_INSNS (3), /* widen. */
1426 COSTS_N_INSNS (3), /* narrow. */
1427 COSTS_N_INSNS (3), /* toint. */
1428 COSTS_N_INSNS (3), /* fromint. */
1429 COSTS_N_INSNS (3) /* roundint. */
1430 },
1431 /* FP DFmode */
1432 {
1433 COSTS_N_INSNS (30), /* div. */
1434 COSTS_N_INSNS (6), /* mult. */
1435 COSTS_N_INSNS (10), /* mult_addsub. */
1436 COSTS_N_INSNS (7), /* fma. */
1437 COSTS_N_INSNS (3), /* addsub. */
1438 COSTS_N_INSNS (3), /* fpconst. */
1439 COSTS_N_INSNS (3), /* neg. */
1440 COSTS_N_INSNS (3), /* compare. */
1441 COSTS_N_INSNS (3), /* widen. */
1442 COSTS_N_INSNS (3), /* narrow. */
1443 COSTS_N_INSNS (3), /* toint. */
1444 COSTS_N_INSNS (3), /* fromint. */
1445 COSTS_N_INSNS (3) /* roundint. */
1446 }
1447 },
1448 /* Vector */
1449 {
1450 COSTS_N_INSNS (1) /* alu. */
1451 }
1452 };
1453
1454 const struct cpu_cost_table cortexa12_extra_costs =
1455 {
1456 /* ALU */
1457 {
1458 0, /* arith. */
1459 0, /* logical. */
1460 0, /* shift. */
1461 COSTS_N_INSNS (1), /* shift_reg. */
1462 COSTS_N_INSNS (1), /* arith_shift. */
1463 COSTS_N_INSNS (1), /* arith_shift_reg. */
1464 COSTS_N_INSNS (1), /* log_shift. */
1465 COSTS_N_INSNS (1), /* log_shift_reg. */
1466 0, /* extend. */
1467 COSTS_N_INSNS (1), /* extend_arith. */
1468 0, /* bfi. */
1469 COSTS_N_INSNS (1), /* bfx. */
1470 COSTS_N_INSNS (1), /* clz. */
1471 COSTS_N_INSNS (1), /* rev. */
1472 0, /* non_exec. */
1473 true /* non_exec_costs_exec. */
1474 },
1475 /* MULT SImode */
1476 {
1477 {
1478 COSTS_N_INSNS (2), /* simple. */
1479 COSTS_N_INSNS (3), /* flag_setting. */
1480 COSTS_N_INSNS (2), /* extend. */
1481 COSTS_N_INSNS (3), /* add. */
1482 COSTS_N_INSNS (2), /* extend_add. */
1483 COSTS_N_INSNS (18) /* idiv. */
1484 },
1485 /* MULT DImode */
1486 {
1487 0, /* simple (N/A). */
1488 0, /* flag_setting (N/A). */
1489 COSTS_N_INSNS (3), /* extend. */
1490 0, /* add (N/A). */
1491 COSTS_N_INSNS (3), /* extend_add. */
1492 0 /* idiv (N/A). */
1493 }
1494 },
1495 /* LD/ST */
1496 {
1497 COSTS_N_INSNS (3), /* load. */
1498 COSTS_N_INSNS (3), /* load_sign_extend. */
1499 COSTS_N_INSNS (3), /* ldrd. */
1500 COSTS_N_INSNS (3), /* ldm_1st. */
1501 1, /* ldm_regs_per_insn_1st. */
1502 2, /* ldm_regs_per_insn_subsequent. */
1503 COSTS_N_INSNS (3), /* loadf. */
1504 COSTS_N_INSNS (3), /* loadd. */
1505 0, /* load_unaligned. */
1506 0, /* store. */
1507 0, /* strd. */
1508 0, /* stm_1st. */
1509 1, /* stm_regs_per_insn_1st. */
1510 2, /* stm_regs_per_insn_subsequent. */
1511 COSTS_N_INSNS (2), /* storef. */
1512 COSTS_N_INSNS (2), /* stored. */
1513 0, /* store_unaligned. */
1514 COSTS_N_INSNS (1), /* loadv. */
1515 COSTS_N_INSNS (1) /* storev. */
1516 },
1517 {
1518 /* FP SFmode */
1519 {
1520 COSTS_N_INSNS (17), /* div. */
1521 COSTS_N_INSNS (4), /* mult. */
1522 COSTS_N_INSNS (8), /* mult_addsub. */
1523 COSTS_N_INSNS (8), /* fma. */
1524 COSTS_N_INSNS (4), /* addsub. */
1525 COSTS_N_INSNS (2), /* fpconst. */
1526 COSTS_N_INSNS (2), /* neg. */
1527 COSTS_N_INSNS (2), /* compare. */
1528 COSTS_N_INSNS (4), /* widen. */
1529 COSTS_N_INSNS (4), /* narrow. */
1530 COSTS_N_INSNS (4), /* toint. */
1531 COSTS_N_INSNS (4), /* fromint. */
1532 COSTS_N_INSNS (4) /* roundint. */
1533 },
1534 /* FP DFmode */
1535 {
1536 COSTS_N_INSNS (31), /* div. */
1537 COSTS_N_INSNS (4), /* mult. */
1538 COSTS_N_INSNS (8), /* mult_addsub. */
1539 COSTS_N_INSNS (8), /* fma. */
1540 COSTS_N_INSNS (4), /* addsub. */
1541 COSTS_N_INSNS (2), /* fpconst. */
1542 COSTS_N_INSNS (2), /* neg. */
1543 COSTS_N_INSNS (2), /* compare. */
1544 COSTS_N_INSNS (4), /* widen. */
1545 COSTS_N_INSNS (4), /* narrow. */
1546 COSTS_N_INSNS (4), /* toint. */
1547 COSTS_N_INSNS (4), /* fromint. */
1548 COSTS_N_INSNS (4) /* roundint. */
1549 }
1550 },
1551 /* Vector */
1552 {
1553 COSTS_N_INSNS (1) /* alu. */
1554 }
1555 };
1556
1557 const struct cpu_cost_table cortexa15_extra_costs =
1558 {
1559 /* ALU */
1560 {
1561 0, /* arith. */
1562 0, /* logical. */
1563 0, /* shift. */
1564 0, /* shift_reg. */
1565 COSTS_N_INSNS (1), /* arith_shift. */
1566 COSTS_N_INSNS (1), /* arith_shift_reg. */
1567 COSTS_N_INSNS (1), /* log_shift. */
1568 COSTS_N_INSNS (1), /* log_shift_reg. */
1569 0, /* extend. */
1570 COSTS_N_INSNS (1), /* extend_arith. */
1571 COSTS_N_INSNS (1), /* bfi. */
1572 0, /* bfx. */
1573 0, /* clz. */
1574 0, /* rev. */
1575 0, /* non_exec. */
1576 true /* non_exec_costs_exec. */
1577 },
1578 /* MULT SImode */
1579 {
1580 {
1581 COSTS_N_INSNS (2), /* simple. */
1582 COSTS_N_INSNS (3), /* flag_setting. */
1583 COSTS_N_INSNS (2), /* extend. */
1584 COSTS_N_INSNS (2), /* add. */
1585 COSTS_N_INSNS (2), /* extend_add. */
1586 COSTS_N_INSNS (18) /* idiv. */
1587 },
1588 /* MULT DImode */
1589 {
1590 0, /* simple (N/A). */
1591 0, /* flag_setting (N/A). */
1592 COSTS_N_INSNS (3), /* extend. */
1593 0, /* add (N/A). */
1594 COSTS_N_INSNS (3), /* extend_add. */
1595 0 /* idiv (N/A). */
1596 }
1597 },
1598 /* LD/ST */
1599 {
1600 COSTS_N_INSNS (3), /* load. */
1601 COSTS_N_INSNS (3), /* load_sign_extend. */
1602 COSTS_N_INSNS (3), /* ldrd. */
1603 COSTS_N_INSNS (4), /* ldm_1st. */
1604 1, /* ldm_regs_per_insn_1st. */
1605 2, /* ldm_regs_per_insn_subsequent. */
1606 COSTS_N_INSNS (4), /* loadf. */
1607 COSTS_N_INSNS (4), /* loadd. */
1608 0, /* load_unaligned. */
1609 0, /* store. */
1610 0, /* strd. */
1611 COSTS_N_INSNS (1), /* stm_1st. */
1612 1, /* stm_regs_per_insn_1st. */
1613 2, /* stm_regs_per_insn_subsequent. */
1614 0, /* storef. */
1615 0, /* stored. */
1616 0, /* store_unaligned. */
1617 COSTS_N_INSNS (1), /* loadv. */
1618 COSTS_N_INSNS (1) /* storev. */
1619 },
1620 {
1621 /* FP SFmode */
1622 {
1623 COSTS_N_INSNS (17), /* div. */
1624 COSTS_N_INSNS (4), /* mult. */
1625 COSTS_N_INSNS (8), /* mult_addsub. */
1626 COSTS_N_INSNS (8), /* fma. */
1627 COSTS_N_INSNS (4), /* addsub. */
1628 COSTS_N_INSNS (2), /* fpconst. */
1629 COSTS_N_INSNS (2), /* neg. */
1630 COSTS_N_INSNS (5), /* compare. */
1631 COSTS_N_INSNS (4), /* widen. */
1632 COSTS_N_INSNS (4), /* narrow. */
1633 COSTS_N_INSNS (4), /* toint. */
1634 COSTS_N_INSNS (4), /* fromint. */
1635 COSTS_N_INSNS (4) /* roundint. */
1636 },
1637 /* FP DFmode */
1638 {
1639 COSTS_N_INSNS (31), /* div. */
1640 COSTS_N_INSNS (4), /* mult. */
1641 COSTS_N_INSNS (8), /* mult_addsub. */
1642 COSTS_N_INSNS (8), /* fma. */
1643 COSTS_N_INSNS (4), /* addsub. */
1644 COSTS_N_INSNS (2), /* fpconst. */
1645 COSTS_N_INSNS (2), /* neg. */
1646 COSTS_N_INSNS (2), /* compare. */
1647 COSTS_N_INSNS (4), /* widen. */
1648 COSTS_N_INSNS (4), /* narrow. */
1649 COSTS_N_INSNS (4), /* toint. */
1650 COSTS_N_INSNS (4), /* fromint. */
1651 COSTS_N_INSNS (4) /* roundint. */
1652 }
1653 },
1654 /* Vector */
1655 {
1656 COSTS_N_INSNS (1) /* alu. */
1657 }
1658 };
1659
1660 const struct cpu_cost_table v7m_extra_costs =
1661 {
1662 /* ALU */
1663 {
1664 0, /* arith. */
1665 0, /* logical. */
1666 0, /* shift. */
1667 0, /* shift_reg. */
1668 0, /* arith_shift. */
1669 COSTS_N_INSNS (1), /* arith_shift_reg. */
1670 0, /* log_shift. */
1671 COSTS_N_INSNS (1), /* log_shift_reg. */
1672 0, /* extend. */
1673 COSTS_N_INSNS (1), /* extend_arith. */
1674 0, /* bfi. */
1675 0, /* bfx. */
1676 0, /* clz. */
1677 0, /* rev. */
1678 COSTS_N_INSNS (1), /* non_exec. */
1679 false /* non_exec_costs_exec. */
1680 },
1681 {
1682 /* MULT SImode */
1683 {
1684 COSTS_N_INSNS (1), /* simple. */
1685 COSTS_N_INSNS (1), /* flag_setting. */
1686 COSTS_N_INSNS (2), /* extend. */
1687 COSTS_N_INSNS (1), /* add. */
1688 COSTS_N_INSNS (3), /* extend_add. */
1689 COSTS_N_INSNS (8) /* idiv. */
1690 },
1691 /* MULT DImode */
1692 {
1693 0, /* simple (N/A). */
1694 0, /* flag_setting (N/A). */
1695 COSTS_N_INSNS (2), /* extend. */
1696 0, /* add (N/A). */
1697 COSTS_N_INSNS (3), /* extend_add. */
1698 0 /* idiv (N/A). */
1699 }
1700 },
1701 /* LD/ST */
1702 {
1703 COSTS_N_INSNS (2), /* load. */
1704 0, /* load_sign_extend. */
1705 COSTS_N_INSNS (3), /* ldrd. */
1706 COSTS_N_INSNS (2), /* ldm_1st. */
1707 1, /* ldm_regs_per_insn_1st. */
1708 1, /* ldm_regs_per_insn_subsequent. */
1709 COSTS_N_INSNS (2), /* loadf. */
1710 COSTS_N_INSNS (3), /* loadd. */
1711 COSTS_N_INSNS (1), /* load_unaligned. */
1712 COSTS_N_INSNS (2), /* store. */
1713 COSTS_N_INSNS (3), /* strd. */
1714 COSTS_N_INSNS (2), /* stm_1st. */
1715 1, /* stm_regs_per_insn_1st. */
1716 1, /* stm_regs_per_insn_subsequent. */
1717 COSTS_N_INSNS (2), /* storef. */
1718 COSTS_N_INSNS (3), /* stored. */
1719 COSTS_N_INSNS (1), /* store_unaligned. */
1720 COSTS_N_INSNS (1), /* loadv. */
1721 COSTS_N_INSNS (1) /* storev. */
1722 },
1723 {
1724 /* FP SFmode */
1725 {
1726 COSTS_N_INSNS (7), /* div. */
1727 COSTS_N_INSNS (2), /* mult. */
1728 COSTS_N_INSNS (5), /* mult_addsub. */
1729 COSTS_N_INSNS (3), /* fma. */
1730 COSTS_N_INSNS (1), /* addsub. */
1731 0, /* fpconst. */
1732 0, /* neg. */
1733 0, /* compare. */
1734 0, /* widen. */
1735 0, /* narrow. */
1736 0, /* toint. */
1737 0, /* fromint. */
1738 0 /* roundint. */
1739 },
1740 /* FP DFmode */
1741 {
1742 COSTS_N_INSNS (15), /* div. */
1743 COSTS_N_INSNS (5), /* mult. */
1744 COSTS_N_INSNS (7), /* mult_addsub. */
1745 COSTS_N_INSNS (7), /* fma. */
1746 COSTS_N_INSNS (3), /* addsub. */
1747 0, /* fpconst. */
1748 0, /* neg. */
1749 0, /* compare. */
1750 0, /* widen. */
1751 0, /* narrow. */
1752 0, /* toint. */
1753 0, /* fromint. */
1754 0 /* roundint. */
1755 }
1756 },
1757 /* Vector */
1758 {
1759 COSTS_N_INSNS (1) /* alu. */
1760 }
1761 };
1762
1763 const struct tune_params arm_slowmul_tune =
1764 {
1765 &generic_extra_costs, /* Insn extra costs. */
1766 NULL, /* Sched adj cost. */
1767 arm_default_branch_cost,
1768 &arm_default_vec_cost,
1769 3, /* Constant limit. */
1770 5, /* Max cond insns. */
1771 8, /* Memset max inline. */
1772 1, /* Issue rate. */
1773 ARM_PREFETCH_NOT_BENEFICIAL,
1774 tune_params::PREF_CONST_POOL_TRUE,
1775 tune_params::PREF_LDRD_FALSE,
1776 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1777 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1778 tune_params::DISPARAGE_FLAGS_NEITHER,
1779 tune_params::PREF_NEON_64_FALSE,
1780 tune_params::PREF_NEON_STRINGOPS_FALSE,
1781 tune_params::FUSE_NOTHING,
1782 tune_params::SCHED_AUTOPREF_OFF
1783 };
1784
1785 const struct tune_params arm_fastmul_tune =
1786 {
1787 &generic_extra_costs, /* Insn extra costs. */
1788 NULL, /* Sched adj cost. */
1789 arm_default_branch_cost,
1790 &arm_default_vec_cost,
1791 1, /* Constant limit. */
1792 5, /* Max cond insns. */
1793 8, /* Memset max inline. */
1794 1, /* Issue rate. */
1795 ARM_PREFETCH_NOT_BENEFICIAL,
1796 tune_params::PREF_CONST_POOL_TRUE,
1797 tune_params::PREF_LDRD_FALSE,
1798 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1799 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1800 tune_params::DISPARAGE_FLAGS_NEITHER,
1801 tune_params::PREF_NEON_64_FALSE,
1802 tune_params::PREF_NEON_STRINGOPS_FALSE,
1803 tune_params::FUSE_NOTHING,
1804 tune_params::SCHED_AUTOPREF_OFF
1805 };
1806
1807 /* StrongARM has early execution of branches, so a sequence that is worth
1808 skipping is shorter. Set max_insns_skipped to a lower value. */
1809
1810 const struct tune_params arm_strongarm_tune =
1811 {
1812 &generic_extra_costs, /* Insn extra costs. */
1813 NULL, /* Sched adj cost. */
1814 arm_default_branch_cost,
1815 &arm_default_vec_cost,
1816 1, /* Constant limit. */
1817 3, /* Max cond insns. */
1818 8, /* Memset max inline. */
1819 1, /* Issue rate. */
1820 ARM_PREFETCH_NOT_BENEFICIAL,
1821 tune_params::PREF_CONST_POOL_TRUE,
1822 tune_params::PREF_LDRD_FALSE,
1823 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1824 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1825 tune_params::DISPARAGE_FLAGS_NEITHER,
1826 tune_params::PREF_NEON_64_FALSE,
1827 tune_params::PREF_NEON_STRINGOPS_FALSE,
1828 tune_params::FUSE_NOTHING,
1829 tune_params::SCHED_AUTOPREF_OFF
1830 };
1831
1832 const struct tune_params arm_xscale_tune =
1833 {
1834 &generic_extra_costs, /* Insn extra costs. */
1835 xscale_sched_adjust_cost,
1836 arm_default_branch_cost,
1837 &arm_default_vec_cost,
1838 2, /* Constant limit. */
1839 3, /* Max cond insns. */
1840 8, /* Memset max inline. */
1841 1, /* Issue rate. */
1842 ARM_PREFETCH_NOT_BENEFICIAL,
1843 tune_params::PREF_CONST_POOL_TRUE,
1844 tune_params::PREF_LDRD_FALSE,
1845 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1846 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1847 tune_params::DISPARAGE_FLAGS_NEITHER,
1848 tune_params::PREF_NEON_64_FALSE,
1849 tune_params::PREF_NEON_STRINGOPS_FALSE,
1850 tune_params::FUSE_NOTHING,
1851 tune_params::SCHED_AUTOPREF_OFF
1852 };
1853
1854 const struct tune_params arm_9e_tune =
1855 {
1856 &generic_extra_costs, /* Insn extra costs. */
1857 NULL, /* Sched adj cost. */
1858 arm_default_branch_cost,
1859 &arm_default_vec_cost,
1860 1, /* Constant limit. */
1861 5, /* Max cond insns. */
1862 8, /* Memset max inline. */
1863 1, /* Issue rate. */
1864 ARM_PREFETCH_NOT_BENEFICIAL,
1865 tune_params::PREF_CONST_POOL_TRUE,
1866 tune_params::PREF_LDRD_FALSE,
1867 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1868 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1869 tune_params::DISPARAGE_FLAGS_NEITHER,
1870 tune_params::PREF_NEON_64_FALSE,
1871 tune_params::PREF_NEON_STRINGOPS_FALSE,
1872 tune_params::FUSE_NOTHING,
1873 tune_params::SCHED_AUTOPREF_OFF
1874 };
1875
1876 const struct tune_params arm_marvell_pj4_tune =
1877 {
1878 &generic_extra_costs, /* Insn extra costs. */
1879 NULL, /* Sched adj cost. */
1880 arm_default_branch_cost,
1881 &arm_default_vec_cost,
1882 1, /* Constant limit. */
1883 5, /* Max cond insns. */
1884 8, /* Memset max inline. */
1885 2, /* Issue rate. */
1886 ARM_PREFETCH_NOT_BENEFICIAL,
1887 tune_params::PREF_CONST_POOL_TRUE,
1888 tune_params::PREF_LDRD_FALSE,
1889 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1890 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1891 tune_params::DISPARAGE_FLAGS_NEITHER,
1892 tune_params::PREF_NEON_64_FALSE,
1893 tune_params::PREF_NEON_STRINGOPS_FALSE,
1894 tune_params::FUSE_NOTHING,
1895 tune_params::SCHED_AUTOPREF_OFF
1896 };
1897
1898 const struct tune_params arm_v6t2_tune =
1899 {
1900 &generic_extra_costs, /* Insn extra costs. */
1901 NULL, /* Sched adj cost. */
1902 arm_default_branch_cost,
1903 &arm_default_vec_cost,
1904 1, /* Constant limit. */
1905 5, /* Max cond insns. */
1906 8, /* Memset max inline. */
1907 1, /* Issue rate. */
1908 ARM_PREFETCH_NOT_BENEFICIAL,
1909 tune_params::PREF_CONST_POOL_FALSE,
1910 tune_params::PREF_LDRD_FALSE,
1911 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1912 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1913 tune_params::DISPARAGE_FLAGS_NEITHER,
1914 tune_params::PREF_NEON_64_FALSE,
1915 tune_params::PREF_NEON_STRINGOPS_FALSE,
1916 tune_params::FUSE_NOTHING,
1917 tune_params::SCHED_AUTOPREF_OFF
1918 };
1919
1920
1921 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1922 const struct tune_params arm_cortex_tune =
1923 {
1924 &generic_extra_costs,
1925 NULL, /* Sched adj cost. */
1926 arm_default_branch_cost,
1927 &arm_default_vec_cost,
1928 1, /* Constant limit. */
1929 5, /* Max cond insns. */
1930 8, /* Memset max inline. */
1931 2, /* Issue rate. */
1932 ARM_PREFETCH_NOT_BENEFICIAL,
1933 tune_params::PREF_CONST_POOL_FALSE,
1934 tune_params::PREF_LDRD_FALSE,
1935 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1936 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1937 tune_params::DISPARAGE_FLAGS_NEITHER,
1938 tune_params::PREF_NEON_64_FALSE,
1939 tune_params::PREF_NEON_STRINGOPS_FALSE,
1940 tune_params::FUSE_NOTHING,
1941 tune_params::SCHED_AUTOPREF_OFF
1942 };
1943
1944 const struct tune_params arm_cortex_a8_tune =
1945 {
1946 &cortexa8_extra_costs,
1947 NULL, /* Sched adj cost. */
1948 arm_default_branch_cost,
1949 &arm_default_vec_cost,
1950 1, /* Constant limit. */
1951 5, /* Max cond insns. */
1952 8, /* Memset max inline. */
1953 2, /* Issue rate. */
1954 ARM_PREFETCH_NOT_BENEFICIAL,
1955 tune_params::PREF_CONST_POOL_FALSE,
1956 tune_params::PREF_LDRD_FALSE,
1957 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1958 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1959 tune_params::DISPARAGE_FLAGS_NEITHER,
1960 tune_params::PREF_NEON_64_FALSE,
1961 tune_params::PREF_NEON_STRINGOPS_TRUE,
1962 tune_params::FUSE_NOTHING,
1963 tune_params::SCHED_AUTOPREF_OFF
1964 };
1965
1966 const struct tune_params arm_cortex_a7_tune =
1967 {
1968 &cortexa7_extra_costs,
1969 NULL, /* Sched adj cost. */
1970 arm_default_branch_cost,
1971 &arm_default_vec_cost,
1972 1, /* Constant limit. */
1973 5, /* Max cond insns. */
1974 8, /* Memset max inline. */
1975 2, /* Issue rate. */
1976 ARM_PREFETCH_NOT_BENEFICIAL,
1977 tune_params::PREF_CONST_POOL_FALSE,
1978 tune_params::PREF_LDRD_FALSE,
1979 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1980 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1981 tune_params::DISPARAGE_FLAGS_NEITHER,
1982 tune_params::PREF_NEON_64_FALSE,
1983 tune_params::PREF_NEON_STRINGOPS_TRUE,
1984 tune_params::FUSE_NOTHING,
1985 tune_params::SCHED_AUTOPREF_OFF
1986 };
1987
1988 const struct tune_params arm_cortex_a15_tune =
1989 {
1990 &cortexa15_extra_costs,
1991 NULL, /* Sched adj cost. */
1992 arm_default_branch_cost,
1993 &arm_default_vec_cost,
1994 1, /* Constant limit. */
1995 2, /* Max cond insns. */
1996 8, /* Memset max inline. */
1997 3, /* Issue rate. */
1998 ARM_PREFETCH_NOT_BENEFICIAL,
1999 tune_params::PREF_CONST_POOL_FALSE,
2000 tune_params::PREF_LDRD_TRUE,
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2002 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2003 tune_params::DISPARAGE_FLAGS_ALL,
2004 tune_params::PREF_NEON_64_FALSE,
2005 tune_params::PREF_NEON_STRINGOPS_TRUE,
2006 tune_params::FUSE_NOTHING,
2007 tune_params::SCHED_AUTOPREF_FULL
2008 };
2009
2010 const struct tune_params arm_cortex_a35_tune =
2011 {
2012 &cortexa53_extra_costs,
2013 NULL, /* Sched adj cost. */
2014 arm_default_branch_cost,
2015 &arm_default_vec_cost,
2016 1, /* Constant limit. */
2017 5, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 1, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL,
2021 tune_params::PREF_CONST_POOL_FALSE,
2022 tune_params::PREF_LDRD_FALSE,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_NEITHER,
2026 tune_params::PREF_NEON_64_FALSE,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE,
2028 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2029 tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032 const struct tune_params arm_cortex_a53_tune =
2033 {
2034 &cortexa53_extra_costs,
2035 NULL, /* Sched adj cost. */
2036 arm_default_branch_cost,
2037 &arm_default_vec_cost,
2038 1, /* Constant limit. */
2039 5, /* Max cond insns. */
2040 8, /* Memset max inline. */
2041 2, /* Issue rate. */
2042 ARM_PREFETCH_NOT_BENEFICIAL,
2043 tune_params::PREF_CONST_POOL_FALSE,
2044 tune_params::PREF_LDRD_FALSE,
2045 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2047 tune_params::DISPARAGE_FLAGS_NEITHER,
2048 tune_params::PREF_NEON_64_FALSE,
2049 tune_params::PREF_NEON_STRINGOPS_TRUE,
2050 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2051 tune_params::SCHED_AUTOPREF_OFF
2052 };
2053
2054 const struct tune_params arm_cortex_a57_tune =
2055 {
2056 &cortexa57_extra_costs,
2057 NULL, /* Sched adj cost. */
2058 arm_default_branch_cost,
2059 &arm_default_vec_cost,
2060 1, /* Constant limit. */
2061 2, /* Max cond insns. */
2062 8, /* Memset max inline. */
2063 3, /* Issue rate. */
2064 ARM_PREFETCH_NOT_BENEFICIAL,
2065 tune_params::PREF_CONST_POOL_FALSE,
2066 tune_params::PREF_LDRD_TRUE,
2067 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2068 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2069 tune_params::DISPARAGE_FLAGS_ALL,
2070 tune_params::PREF_NEON_64_FALSE,
2071 tune_params::PREF_NEON_STRINGOPS_TRUE,
2072 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2073 tune_params::SCHED_AUTOPREF_FULL
2074 };
2075
2076 const struct tune_params arm_exynosm1_tune =
2077 {
2078 &exynosm1_extra_costs,
2079 NULL, /* Sched adj cost. */
2080 arm_default_branch_cost,
2081 &arm_default_vec_cost,
2082 1, /* Constant limit. */
2083 2, /* Max cond insns. */
2084 8, /* Memset max inline. */
2085 3, /* Issue rate. */
2086 ARM_PREFETCH_NOT_BENEFICIAL,
2087 tune_params::PREF_CONST_POOL_FALSE,
2088 tune_params::PREF_LDRD_TRUE,
2089 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2090 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2091 tune_params::DISPARAGE_FLAGS_ALL,
2092 tune_params::PREF_NEON_64_FALSE,
2093 tune_params::PREF_NEON_STRINGOPS_TRUE,
2094 tune_params::FUSE_NOTHING,
2095 tune_params::SCHED_AUTOPREF_OFF
2096 };
2097
2098 const struct tune_params arm_xgene1_tune =
2099 {
2100 &xgene1_extra_costs,
2101 NULL, /* Sched adj cost. */
2102 arm_default_branch_cost,
2103 &arm_default_vec_cost,
2104 1, /* Constant limit. */
2105 2, /* Max cond insns. */
2106 32, /* Memset max inline. */
2107 4, /* Issue rate. */
2108 ARM_PREFETCH_NOT_BENEFICIAL,
2109 tune_params::PREF_CONST_POOL_FALSE,
2110 tune_params::PREF_LDRD_TRUE,
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2112 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2113 tune_params::DISPARAGE_FLAGS_ALL,
2114 tune_params::PREF_NEON_64_FALSE,
2115 tune_params::PREF_NEON_STRINGOPS_FALSE,
2116 tune_params::FUSE_NOTHING,
2117 tune_params::SCHED_AUTOPREF_OFF
2118 };
2119
2120 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2121 less appealing. Set max_insns_skipped to a low value. */
2122
2123 const struct tune_params arm_cortex_a5_tune =
2124 {
2125 &cortexa5_extra_costs,
2126 NULL, /* Sched adj cost. */
2127 arm_cortex_a5_branch_cost,
2128 &arm_default_vec_cost,
2129 1, /* Constant limit. */
2130 1, /* Max cond insns. */
2131 8, /* Memset max inline. */
2132 2, /* Issue rate. */
2133 ARM_PREFETCH_NOT_BENEFICIAL,
2134 tune_params::PREF_CONST_POOL_FALSE,
2135 tune_params::PREF_LDRD_FALSE,
2136 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2137 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2138 tune_params::DISPARAGE_FLAGS_NEITHER,
2139 tune_params::PREF_NEON_64_FALSE,
2140 tune_params::PREF_NEON_STRINGOPS_TRUE,
2141 tune_params::FUSE_NOTHING,
2142 tune_params::SCHED_AUTOPREF_OFF
2143 };
2144
2145 const struct tune_params arm_cortex_a9_tune =
2146 {
2147 &cortexa9_extra_costs,
2148 cortex_a9_sched_adjust_cost,
2149 arm_default_branch_cost,
2150 &arm_default_vec_cost,
2151 1, /* Constant limit. */
2152 5, /* Max cond insns. */
2153 8, /* Memset max inline. */
2154 2, /* Issue rate. */
2155 ARM_PREFETCH_BENEFICIAL(4,32,32),
2156 tune_params::PREF_CONST_POOL_FALSE,
2157 tune_params::PREF_LDRD_FALSE,
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2159 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2160 tune_params::DISPARAGE_FLAGS_NEITHER,
2161 tune_params::PREF_NEON_64_FALSE,
2162 tune_params::PREF_NEON_STRINGOPS_FALSE,
2163 tune_params::FUSE_NOTHING,
2164 tune_params::SCHED_AUTOPREF_OFF
2165 };
2166
2167 const struct tune_params arm_cortex_a12_tune =
2168 {
2169 &cortexa12_extra_costs,
2170 NULL, /* Sched adj cost. */
2171 arm_default_branch_cost,
2172 &arm_default_vec_cost, /* Vectorizer costs. */
2173 1, /* Constant limit. */
2174 2, /* Max cond insns. */
2175 8, /* Memset max inline. */
2176 2, /* Issue rate. */
2177 ARM_PREFETCH_NOT_BENEFICIAL,
2178 tune_params::PREF_CONST_POOL_FALSE,
2179 tune_params::PREF_LDRD_TRUE,
2180 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2182 tune_params::DISPARAGE_FLAGS_ALL,
2183 tune_params::PREF_NEON_64_FALSE,
2184 tune_params::PREF_NEON_STRINGOPS_TRUE,
2185 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2186 tune_params::SCHED_AUTOPREF_OFF
2187 };
2188
2189 const struct tune_params arm_cortex_a73_tune =
2190 {
2191 &cortexa57_extra_costs,
2192 NULL, /* Sched adj cost. */
2193 arm_default_branch_cost,
2194 &arm_default_vec_cost, /* Vectorizer costs. */
2195 1, /* Constant limit. */
2196 2, /* Max cond insns. */
2197 8, /* Memset max inline. */
2198 2, /* Issue rate. */
2199 ARM_PREFETCH_NOT_BENEFICIAL,
2200 tune_params::PREF_CONST_POOL_FALSE,
2201 tune_params::PREF_LDRD_TRUE,
2202 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2203 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2204 tune_params::DISPARAGE_FLAGS_ALL,
2205 tune_params::PREF_NEON_64_FALSE,
2206 tune_params::PREF_NEON_STRINGOPS_TRUE,
2207 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2208 tune_params::SCHED_AUTOPREF_FULL
2209 };
2210
2211 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2212 cycle to execute each. An LDR from the constant pool also takes two cycles
2213 to execute, but mildly increases pipelining opportunity (consecutive
2214 loads/stores can be pipelined together, saving one cycle), and may also
2215 improve icache utilisation. Hence we prefer the constant pool for such
2216 processors. */
2217
2218 const struct tune_params arm_v7m_tune =
2219 {
2220 &v7m_extra_costs,
2221 NULL, /* Sched adj cost. */
2222 arm_cortex_m_branch_cost,
2223 &arm_default_vec_cost,
2224 1, /* Constant limit. */
2225 2, /* Max cond insns. */
2226 8, /* Memset max inline. */
2227 1, /* Issue rate. */
2228 ARM_PREFETCH_NOT_BENEFICIAL,
2229 tune_params::PREF_CONST_POOL_TRUE,
2230 tune_params::PREF_LDRD_FALSE,
2231 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2232 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2233 tune_params::DISPARAGE_FLAGS_NEITHER,
2234 tune_params::PREF_NEON_64_FALSE,
2235 tune_params::PREF_NEON_STRINGOPS_FALSE,
2236 tune_params::FUSE_NOTHING,
2237 tune_params::SCHED_AUTOPREF_OFF
2238 };
2239
2240 /* Cortex-M7 tuning. */
2241
2242 const struct tune_params arm_cortex_m7_tune =
2243 {
2244 &v7m_extra_costs,
2245 NULL, /* Sched adj cost. */
2246 arm_cortex_m7_branch_cost,
2247 &arm_default_vec_cost,
2248 0, /* Constant limit. */
2249 1, /* Max cond insns. */
2250 8, /* Memset max inline. */
2251 2, /* Issue rate. */
2252 ARM_PREFETCH_NOT_BENEFICIAL,
2253 tune_params::PREF_CONST_POOL_TRUE,
2254 tune_params::PREF_LDRD_FALSE,
2255 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2256 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2257 tune_params::DISPARAGE_FLAGS_NEITHER,
2258 tune_params::PREF_NEON_64_FALSE,
2259 tune_params::PREF_NEON_STRINGOPS_FALSE,
2260 tune_params::FUSE_NOTHING,
2261 tune_params::SCHED_AUTOPREF_OFF
2262 };
2263
2264 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2265 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2266 cortex-m23. */
2267 const struct tune_params arm_v6m_tune =
2268 {
2269 &generic_extra_costs, /* Insn extra costs. */
2270 NULL, /* Sched adj cost. */
2271 arm_default_branch_cost,
2272 &arm_default_vec_cost, /* Vectorizer costs. */
2273 1, /* Constant limit. */
2274 5, /* Max cond insns. */
2275 8, /* Memset max inline. */
2276 1, /* Issue rate. */
2277 ARM_PREFETCH_NOT_BENEFICIAL,
2278 tune_params::PREF_CONST_POOL_FALSE,
2279 tune_params::PREF_LDRD_FALSE,
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2281 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2282 tune_params::DISPARAGE_FLAGS_NEITHER,
2283 tune_params::PREF_NEON_64_FALSE,
2284 tune_params::PREF_NEON_STRINGOPS_FALSE,
2285 tune_params::FUSE_NOTHING,
2286 tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 const struct tune_params arm_fa726te_tune =
2290 {
2291 &generic_extra_costs, /* Insn extra costs. */
2292 fa726te_sched_adjust_cost,
2293 arm_default_branch_cost,
2294 &arm_default_vec_cost,
2295 1, /* Constant limit. */
2296 5, /* Max cond insns. */
2297 8, /* Memset max inline. */
2298 2, /* Issue rate. */
2299 ARM_PREFETCH_NOT_BENEFICIAL,
2300 tune_params::PREF_CONST_POOL_TRUE,
2301 tune_params::PREF_LDRD_FALSE,
2302 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2303 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2304 tune_params::DISPARAGE_FLAGS_NEITHER,
2305 tune_params::PREF_NEON_64_FALSE,
2306 tune_params::PREF_NEON_STRINGOPS_FALSE,
2307 tune_params::FUSE_NOTHING,
2308 tune_params::SCHED_AUTOPREF_OFF
2309 };
2310
2311 /* Auto-generated CPU, FPU and architecture tables. */
2312 #include "arm-cpu-data.h"
2313
2314 /* The name of the preprocessor macro to define for this architecture. PROFILE
2315 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2316 is thus chosen to be big enough to hold the longest architecture name. */
2317
2318 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2319
2320 /* Supported TLS relocations. */
2321
2322 enum tls_reloc {
2323 TLS_GD32,
2324 TLS_LDM32,
2325 TLS_LDO32,
2326 TLS_IE32,
2327 TLS_LE32,
2328 TLS_DESCSEQ /* GNU scheme */
2329 };
2330
2331 /* The maximum number of insns to be used when loading a constant. */
2332 inline static int
2333 arm_constant_limit (bool size_p)
2334 {
2335 return size_p ? 1 : current_tune->constant_limit;
2336 }
2337
2338 /* Emit an insn that's a simple single-set. Both the operands must be known
2339 to be valid. */
2340 inline static rtx_insn *
2341 emit_set_insn (rtx x, rtx y)
2342 {
2343 return emit_insn (gen_rtx_SET (x, y));
2344 }
2345
2346 /* Return the number of bits set in VALUE. */
2347 static unsigned
2348 bit_count (unsigned long value)
2349 {
2350 unsigned long count = 0;
2351
2352 while (value)
2353 {
2354 count++;
2355 value &= value - 1; /* Clear the least-significant set bit. */
2356 }
2357
2358 return count;
2359 }
2360
2361 /* Return the number of bits set in BMAP. */
2362 static unsigned
2363 bitmap_popcount (const sbitmap bmap)
2364 {
2365 unsigned int count = 0;
2366 unsigned int n = 0;
2367 sbitmap_iterator sbi;
2368
2369 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2370 count++;
2371 return count;
2372 }
2373
2374 typedef struct
2375 {
2376 machine_mode mode;
2377 const char *name;
2378 } arm_fixed_mode_set;
2379
2380 /* A small helper for setting fixed-point library libfuncs. */
2381
2382 static void
2383 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2384 const char *funcname, const char *modename,
2385 int num_suffix)
2386 {
2387 char buffer[50];
2388
2389 if (num_suffix == 0)
2390 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2391 else
2392 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2393
2394 set_optab_libfunc (optable, mode, buffer);
2395 }
2396
2397 static void
2398 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2399 machine_mode from, const char *funcname,
2400 const char *toname, const char *fromname)
2401 {
2402 char buffer[50];
2403 const char *maybe_suffix_2 = "";
2404
2405 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2406 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2407 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2408 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2409 maybe_suffix_2 = "2";
2410
2411 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2412 maybe_suffix_2);
2413
2414 set_conv_libfunc (optable, to, from, buffer);
2415 }
2416
2417 /* Set up library functions unique to ARM. */
2418
2419 static void
2420 arm_init_libfuncs (void)
2421 {
2422 /* For Linux, we have access to kernel support for atomic operations. */
2423 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2424 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2425
2426 /* There are no special library functions unless we are using the
2427 ARM BPABI. */
2428 if (!TARGET_BPABI)
2429 return;
2430
2431 /* The functions below are described in Section 4 of the "Run-Time
2432 ABI for the ARM architecture", Version 1.0. */
2433
2434 /* Double-precision floating-point arithmetic. Table 2. */
2435 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2436 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2437 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2438 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2439 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2440
2441 /* Double-precision comparisons. Table 3. */
2442 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2443 set_optab_libfunc (ne_optab, DFmode, NULL);
2444 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2445 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2446 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2447 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2448 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2449
2450 /* Single-precision floating-point arithmetic. Table 4. */
2451 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2452 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2453 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2454 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2455 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2456
2457 /* Single-precision comparisons. Table 5. */
2458 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2459 set_optab_libfunc (ne_optab, SFmode, NULL);
2460 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2461 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2462 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2463 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2464 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2465
2466 /* Floating-point to integer conversions. Table 6. */
2467 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2468 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2469 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2470 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2471 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2472 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2473 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2474 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2475
2476 /* Conversions between floating types. Table 7. */
2477 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2478 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2479
2480 /* Integer to floating-point conversions. Table 8. */
2481 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2482 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2483 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2484 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2485 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2486 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2487 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2488 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2489
2490 /* Long long. Table 9. */
2491 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2492 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2493 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2494 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2495 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2496 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2497 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2498 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2499
2500 /* Integer (32/32->32) division. \S 4.3.1. */
2501 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2502 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2503
2504 /* The divmod functions are designed so that they can be used for
2505 plain division, even though they return both the quotient and the
2506 remainder. The quotient is returned in the usual location (i.e.,
2507 r0 for SImode, {r0, r1} for DImode), just as would be expected
2508 for an ordinary division routine. Because the AAPCS calling
2509 conventions specify that all of { r0, r1, r2, r3 } are
2510 callee-saved registers, there is no need to tell the compiler
2511 explicitly that those registers are clobbered by these
2512 routines. */
2513 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2514 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2515
2516 /* For SImode division the ABI provides div-without-mod routines,
2517 which are faster. */
2518 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2519 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2520
2521 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2522 divmod libcalls instead. */
2523 set_optab_libfunc (smod_optab, DImode, NULL);
2524 set_optab_libfunc (umod_optab, DImode, NULL);
2525 set_optab_libfunc (smod_optab, SImode, NULL);
2526 set_optab_libfunc (umod_optab, SImode, NULL);
2527
2528 /* Half-precision float operations. The compiler handles all operations
2529 with NULL libfuncs by converting the SFmode. */
2530 switch (arm_fp16_format)
2531 {
2532 case ARM_FP16_FORMAT_IEEE:
2533 case ARM_FP16_FORMAT_ALTERNATIVE:
2534
2535 /* Conversions. */
2536 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2537 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2538 ? "__gnu_f2h_ieee"
2539 : "__gnu_f2h_alternative"));
2540 set_conv_libfunc (sext_optab, SFmode, HFmode,
2541 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2542 ? "__gnu_h2f_ieee"
2543 : "__gnu_h2f_alternative"));
2544
2545 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2546 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2547 ? "__gnu_d2h_ieee"
2548 : "__gnu_d2h_alternative"));
2549
2550 /* Arithmetic. */
2551 set_optab_libfunc (add_optab, HFmode, NULL);
2552 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2553 set_optab_libfunc (smul_optab, HFmode, NULL);
2554 set_optab_libfunc (neg_optab, HFmode, NULL);
2555 set_optab_libfunc (sub_optab, HFmode, NULL);
2556
2557 /* Comparisons. */
2558 set_optab_libfunc (eq_optab, HFmode, NULL);
2559 set_optab_libfunc (ne_optab, HFmode, NULL);
2560 set_optab_libfunc (lt_optab, HFmode, NULL);
2561 set_optab_libfunc (le_optab, HFmode, NULL);
2562 set_optab_libfunc (ge_optab, HFmode, NULL);
2563 set_optab_libfunc (gt_optab, HFmode, NULL);
2564 set_optab_libfunc (unord_optab, HFmode, NULL);
2565 break;
2566
2567 default:
2568 break;
2569 }
2570
2571 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2572 {
2573 const arm_fixed_mode_set fixed_arith_modes[] =
2574 {
2575 { E_QQmode, "qq" },
2576 { E_UQQmode, "uqq" },
2577 { E_HQmode, "hq" },
2578 { E_UHQmode, "uhq" },
2579 { E_SQmode, "sq" },
2580 { E_USQmode, "usq" },
2581 { E_DQmode, "dq" },
2582 { E_UDQmode, "udq" },
2583 { E_TQmode, "tq" },
2584 { E_UTQmode, "utq" },
2585 { E_HAmode, "ha" },
2586 { E_UHAmode, "uha" },
2587 { E_SAmode, "sa" },
2588 { E_USAmode, "usa" },
2589 { E_DAmode, "da" },
2590 { E_UDAmode, "uda" },
2591 { E_TAmode, "ta" },
2592 { E_UTAmode, "uta" }
2593 };
2594 const arm_fixed_mode_set fixed_conv_modes[] =
2595 {
2596 { E_QQmode, "qq" },
2597 { E_UQQmode, "uqq" },
2598 { E_HQmode, "hq" },
2599 { E_UHQmode, "uhq" },
2600 { E_SQmode, "sq" },
2601 { E_USQmode, "usq" },
2602 { E_DQmode, "dq" },
2603 { E_UDQmode, "udq" },
2604 { E_TQmode, "tq" },
2605 { E_UTQmode, "utq" },
2606 { E_HAmode, "ha" },
2607 { E_UHAmode, "uha" },
2608 { E_SAmode, "sa" },
2609 { E_USAmode, "usa" },
2610 { E_DAmode, "da" },
2611 { E_UDAmode, "uda" },
2612 { E_TAmode, "ta" },
2613 { E_UTAmode, "uta" },
2614 { E_QImode, "qi" },
2615 { E_HImode, "hi" },
2616 { E_SImode, "si" },
2617 { E_DImode, "di" },
2618 { E_TImode, "ti" },
2619 { E_SFmode, "sf" },
2620 { E_DFmode, "df" }
2621 };
2622 unsigned int i, j;
2623
2624 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2625 {
2626 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2627 "add", fixed_arith_modes[i].name, 3);
2628 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2629 "ssadd", fixed_arith_modes[i].name, 3);
2630 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2631 "usadd", fixed_arith_modes[i].name, 3);
2632 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2633 "sub", fixed_arith_modes[i].name, 3);
2634 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2635 "sssub", fixed_arith_modes[i].name, 3);
2636 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2637 "ussub", fixed_arith_modes[i].name, 3);
2638 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2639 "mul", fixed_arith_modes[i].name, 3);
2640 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2641 "ssmul", fixed_arith_modes[i].name, 3);
2642 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2643 "usmul", fixed_arith_modes[i].name, 3);
2644 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2645 "div", fixed_arith_modes[i].name, 3);
2646 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2647 "udiv", fixed_arith_modes[i].name, 3);
2648 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2649 "ssdiv", fixed_arith_modes[i].name, 3);
2650 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2651 "usdiv", fixed_arith_modes[i].name, 3);
2652 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2653 "neg", fixed_arith_modes[i].name, 2);
2654 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2655 "ssneg", fixed_arith_modes[i].name, 2);
2656 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2657 "usneg", fixed_arith_modes[i].name, 2);
2658 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2659 "ashl", fixed_arith_modes[i].name, 3);
2660 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2661 "ashr", fixed_arith_modes[i].name, 3);
2662 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2663 "lshr", fixed_arith_modes[i].name, 3);
2664 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2665 "ssashl", fixed_arith_modes[i].name, 3);
2666 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2667 "usashl", fixed_arith_modes[i].name, 3);
2668 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2669 "cmp", fixed_arith_modes[i].name, 2);
2670 }
2671
2672 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2673 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2674 {
2675 if (i == j
2676 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2677 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2678 continue;
2679
2680 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2681 fixed_conv_modes[j].mode, "fract",
2682 fixed_conv_modes[i].name,
2683 fixed_conv_modes[j].name);
2684 arm_set_fixed_conv_libfunc (satfract_optab,
2685 fixed_conv_modes[i].mode,
2686 fixed_conv_modes[j].mode, "satfract",
2687 fixed_conv_modes[i].name,
2688 fixed_conv_modes[j].name);
2689 arm_set_fixed_conv_libfunc (fractuns_optab,
2690 fixed_conv_modes[i].mode,
2691 fixed_conv_modes[j].mode, "fractuns",
2692 fixed_conv_modes[i].name,
2693 fixed_conv_modes[j].name);
2694 arm_set_fixed_conv_libfunc (satfractuns_optab,
2695 fixed_conv_modes[i].mode,
2696 fixed_conv_modes[j].mode, "satfractuns",
2697 fixed_conv_modes[i].name,
2698 fixed_conv_modes[j].name);
2699 }
2700 }
2701
2702 if (TARGET_AAPCS_BASED)
2703 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2704 }
2705
2706 /* On AAPCS systems, this is the "struct __va_list". */
2707 static GTY(()) tree va_list_type;
2708
2709 /* Return the type to use as __builtin_va_list. */
2710 static tree
2711 arm_build_builtin_va_list (void)
2712 {
2713 tree va_list_name;
2714 tree ap_field;
2715
2716 if (!TARGET_AAPCS_BASED)
2717 return std_build_builtin_va_list ();
2718
2719 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2720 defined as:
2721
2722 struct __va_list
2723 {
2724 void *__ap;
2725 };
2726
2727 The C Library ABI further reinforces this definition in \S
2728 4.1.
2729
2730 We must follow this definition exactly. The structure tag
2731 name is visible in C++ mangled names, and thus forms a part
2732 of the ABI. The field name may be used by people who
2733 #include <stdarg.h>. */
2734 /* Create the type. */
2735 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2736 /* Give it the required name. */
2737 va_list_name = build_decl (BUILTINS_LOCATION,
2738 TYPE_DECL,
2739 get_identifier ("__va_list"),
2740 va_list_type);
2741 DECL_ARTIFICIAL (va_list_name) = 1;
2742 TYPE_NAME (va_list_type) = va_list_name;
2743 TYPE_STUB_DECL (va_list_type) = va_list_name;
2744 /* Create the __ap field. */
2745 ap_field = build_decl (BUILTINS_LOCATION,
2746 FIELD_DECL,
2747 get_identifier ("__ap"),
2748 ptr_type_node);
2749 DECL_ARTIFICIAL (ap_field) = 1;
2750 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2751 TYPE_FIELDS (va_list_type) = ap_field;
2752 /* Compute its layout. */
2753 layout_type (va_list_type);
2754
2755 return va_list_type;
2756 }
2757
2758 /* Return an expression of type "void *" pointing to the next
2759 available argument in a variable-argument list. VALIST is the
2760 user-level va_list object, of type __builtin_va_list. */
2761 static tree
2762 arm_extract_valist_ptr (tree valist)
2763 {
2764 if (TREE_TYPE (valist) == error_mark_node)
2765 return error_mark_node;
2766
2767 /* On an AAPCS target, the pointer is stored within "struct
2768 va_list". */
2769 if (TARGET_AAPCS_BASED)
2770 {
2771 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2772 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2773 valist, ap_field, NULL_TREE);
2774 }
2775
2776 return valist;
2777 }
2778
2779 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2780 static void
2781 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2782 {
2783 valist = arm_extract_valist_ptr (valist);
2784 std_expand_builtin_va_start (valist, nextarg);
2785 }
2786
2787 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2788 static tree
2789 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2790 gimple_seq *post_p)
2791 {
2792 valist = arm_extract_valist_ptr (valist);
2793 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2794 }
2795
2796 /* Check any incompatible options that the user has specified. */
2797 static void
2798 arm_option_check_internal (struct gcc_options *opts)
2799 {
2800 int flags = opts->x_target_flags;
2801
2802 /* iWMMXt and NEON are incompatible. */
2803 if (TARGET_IWMMXT
2804 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2805 error ("iWMMXt and NEON are incompatible");
2806
2807 /* Make sure that the processor choice does not conflict with any of the
2808 other command line choices. */
2809 if (TARGET_ARM_P (flags)
2810 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2811 error ("target CPU does not support ARM mode");
2812
2813 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2814 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2815 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2816
2817 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2818 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2819
2820 /* If this target is normally configured to use APCS frames, warn if they
2821 are turned off and debugging is turned on. */
2822 if (TARGET_ARM_P (flags)
2823 && write_symbols != NO_DEBUG
2824 && !TARGET_APCS_FRAME
2825 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2826 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2827
2828 /* iWMMXt unsupported under Thumb mode. */
2829 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2830 error ("iWMMXt unsupported under Thumb mode");
2831
2832 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2833 error ("can not use -mtp=cp15 with 16-bit Thumb");
2834
2835 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2836 {
2837 error ("RTP PIC is incompatible with Thumb");
2838 flag_pic = 0;
2839 }
2840
2841 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2842 with MOVT. */
2843 if ((target_pure_code || target_slow_flash_data)
2844 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2845 {
2846 const char *flag = (target_pure_code ? "-mpure-code" :
2847 "-mslow-flash-data");
2848 error ("%s only supports non-pic code on M-profile targets with the "
2849 "MOVT instruction", flag);
2850 }
2851
2852 }
2853
2854 /* Recompute the global settings depending on target attribute options. */
2855
2856 static void
2857 arm_option_params_internal (void)
2858 {
2859 /* If we are not using the default (ARM mode) section anchor offset
2860 ranges, then set the correct ranges now. */
2861 if (TARGET_THUMB1)
2862 {
2863 /* Thumb-1 LDR instructions cannot have negative offsets.
2864 Permissible positive offset ranges are 5-bit (for byte loads),
2865 6-bit (for halfword loads), or 7-bit (for word loads).
2866 Empirical results suggest a 7-bit anchor range gives the best
2867 overall code size. */
2868 targetm.min_anchor_offset = 0;
2869 targetm.max_anchor_offset = 127;
2870 }
2871 else if (TARGET_THUMB2)
2872 {
2873 /* The minimum is set such that the total size of the block
2874 for a particular anchor is 248 + 1 + 4095 bytes, which is
2875 divisible by eight, ensuring natural spacing of anchors. */
2876 targetm.min_anchor_offset = -248;
2877 targetm.max_anchor_offset = 4095;
2878 }
2879 else
2880 {
2881 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2882 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2883 }
2884
2885 /* Increase the number of conditional instructions with -Os. */
2886 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2887
2888 /* For THUMB2, we limit the conditional sequence to one IT block. */
2889 if (TARGET_THUMB2)
2890 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2891 }
2892
2893 /* True if -mflip-thumb should next add an attribute for the default
2894 mode, false if it should next add an attribute for the opposite mode. */
2895 static GTY(()) bool thumb_flipper;
2896
2897 /* Options after initial target override. */
2898 static GTY(()) tree init_optimize;
2899
2900 static void
2901 arm_override_options_after_change_1 (struct gcc_options *opts)
2902 {
2903 if (opts->x_align_functions <= 0)
2904 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2905 && opts->x_optimize_size ? 2 : 4;
2906 }
2907
2908 /* Implement targetm.override_options_after_change. */
2909
2910 static void
2911 arm_override_options_after_change (void)
2912 {
2913 arm_configure_build_target (&arm_active_target,
2914 TREE_TARGET_OPTION (target_option_default_node),
2915 &global_options_set, false);
2916
2917 arm_override_options_after_change_1 (&global_options);
2918 }
2919
2920 /* Implement TARGET_OPTION_SAVE. */
2921 static void
2922 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2923 {
2924 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2925 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2926 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2927 }
2928
2929 /* Implement TARGET_OPTION_RESTORE. */
2930 static void
2931 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2932 {
2933 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2934 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2935 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2936 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2937 false);
2938 }
2939
2940 /* Reset options between modes that the user has specified. */
2941 static void
2942 arm_option_override_internal (struct gcc_options *opts,
2943 struct gcc_options *opts_set)
2944 {
2945 arm_override_options_after_change_1 (opts);
2946
2947 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2948 {
2949 /* The default is to enable interworking, so this warning message would
2950 be confusing to users who have just compiled with, eg, -march=armv3. */
2951 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2952 opts->x_target_flags &= ~MASK_INTERWORK;
2953 }
2954
2955 if (TARGET_THUMB_P (opts->x_target_flags)
2956 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2957 {
2958 warning (0, "target CPU does not support THUMB instructions");
2959 opts->x_target_flags &= ~MASK_THUMB;
2960 }
2961
2962 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2963 {
2964 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2965 opts->x_target_flags &= ~MASK_APCS_FRAME;
2966 }
2967
2968 /* Callee super interworking implies thumb interworking. Adding
2969 this to the flags here simplifies the logic elsewhere. */
2970 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2971 opts->x_target_flags |= MASK_INTERWORK;
2972
2973 /* need to remember initial values so combinaisons of options like
2974 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2975 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2976
2977 if (! opts_set->x_arm_restrict_it)
2978 opts->x_arm_restrict_it = arm_arch8;
2979
2980 /* ARM execution state and M profile don't have [restrict] IT. */
2981 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2982 opts->x_arm_restrict_it = 0;
2983
2984 /* Enable -munaligned-access by default for
2985 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2986 i.e. Thumb2 and ARM state only.
2987 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2988 - ARMv8 architecture-base processors.
2989
2990 Disable -munaligned-access by default for
2991 - all pre-ARMv6 architecture-based processors
2992 - ARMv6-M architecture-based processors
2993 - ARMv8-M Baseline processors. */
2994
2995 if (! opts_set->x_unaligned_access)
2996 {
2997 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2998 && arm_arch6 && (arm_arch_notm || arm_arch7));
2999 }
3000 else if (opts->x_unaligned_access == 1
3001 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3002 {
3003 warning (0, "target CPU does not support unaligned accesses");
3004 opts->x_unaligned_access = 0;
3005 }
3006
3007 /* Don't warn since it's on by default in -O2. */
3008 if (TARGET_THUMB1_P (opts->x_target_flags))
3009 opts->x_flag_schedule_insns = 0;
3010 else
3011 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3012
3013 /* Disable shrink-wrap when optimizing function for size, since it tends to
3014 generate additional returns. */
3015 if (optimize_function_for_size_p (cfun)
3016 && TARGET_THUMB2_P (opts->x_target_flags))
3017 opts->x_flag_shrink_wrap = false;
3018 else
3019 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3020
3021 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3022 - epilogue_insns - does not accurately model the corresponding insns
3023 emitted in the asm file. In particular, see the comment in thumb_exit
3024 'Find out how many of the (return) argument registers we can corrupt'.
3025 As a consequence, the epilogue may clobber registers without fipa-ra
3026 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3027 TODO: Accurately model clobbers for epilogue_insns and reenable
3028 fipa-ra. */
3029 if (TARGET_THUMB1_P (opts->x_target_flags))
3030 opts->x_flag_ipa_ra = 0;
3031 else
3032 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3033
3034 /* Thumb2 inline assembly code should always use unified syntax.
3035 This will apply to ARM and Thumb1 eventually. */
3036 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3037
3038 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3039 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3040 #endif
3041 }
3042
3043 static sbitmap isa_all_fpubits;
3044 static sbitmap isa_quirkbits;
3045
3046 /* Configure a build target TARGET from the user-specified options OPTS and
3047 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3048 architecture have been specified, but the two are not identical. */
3049 void
3050 arm_configure_build_target (struct arm_build_target *target,
3051 struct cl_target_option *opts,
3052 struct gcc_options *opts_set,
3053 bool warn_compatible)
3054 {
3055 const cpu_option *arm_selected_tune = NULL;
3056 const arch_option *arm_selected_arch = NULL;
3057 const cpu_option *arm_selected_cpu = NULL;
3058 const arm_fpu_desc *arm_selected_fpu = NULL;
3059 const char *tune_opts = NULL;
3060 const char *arch_opts = NULL;
3061 const char *cpu_opts = NULL;
3062
3063 bitmap_clear (target->isa);
3064 target->core_name = NULL;
3065 target->arch_name = NULL;
3066
3067 if (opts_set->x_arm_arch_string)
3068 {
3069 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3070 "-march",
3071 opts->x_arm_arch_string);
3072 arch_opts = strchr (opts->x_arm_arch_string, '+');
3073 }
3074
3075 if (opts_set->x_arm_cpu_string)
3076 {
3077 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3078 opts->x_arm_cpu_string);
3079 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3080 arm_selected_tune = arm_selected_cpu;
3081 /* If taking the tuning from -mcpu, we don't need to rescan the
3082 options for tuning. */
3083 }
3084
3085 if (opts_set->x_arm_tune_string)
3086 {
3087 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3088 opts->x_arm_tune_string);
3089 tune_opts = strchr (opts->x_arm_tune_string, '+');
3090 }
3091
3092 if (arm_selected_arch)
3093 {
3094 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3095 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3096 arch_opts);
3097
3098 if (arm_selected_cpu)
3099 {
3100 auto_sbitmap cpu_isa (isa_num_bits);
3101 auto_sbitmap isa_delta (isa_num_bits);
3102
3103 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3104 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3105 cpu_opts);
3106 bitmap_xor (isa_delta, cpu_isa, target->isa);
3107 /* Ignore any bits that are quirk bits. */
3108 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3109 /* Ignore (for now) any bits that might be set by -mfpu. */
3110 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3111
3112 if (!bitmap_empty_p (isa_delta))
3113 {
3114 if (warn_compatible)
3115 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3116 arm_selected_cpu->common.name,
3117 arm_selected_arch->common.name);
3118 /* -march wins for code generation.
3119 -mcpu wins for default tuning. */
3120 if (!arm_selected_tune)
3121 arm_selected_tune = arm_selected_cpu;
3122
3123 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3124 target->arch_name = arm_selected_arch->common.name;
3125 }
3126 else
3127 {
3128 /* Architecture and CPU are essentially the same.
3129 Prefer the CPU setting. */
3130 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3131 target->core_name = arm_selected_cpu->common.name;
3132 /* Copy the CPU's capabilities, so that we inherit the
3133 appropriate extensions and quirks. */
3134 bitmap_copy (target->isa, cpu_isa);
3135 }
3136 }
3137 else
3138 {
3139 /* Pick a CPU based on the architecture. */
3140 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3141 target->arch_name = arm_selected_arch->common.name;
3142 /* Note: target->core_name is left unset in this path. */
3143 }
3144 }
3145 else if (arm_selected_cpu)
3146 {
3147 target->core_name = arm_selected_cpu->common.name;
3148 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3149 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3150 cpu_opts);
3151 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3152 }
3153 /* If the user did not specify a processor or architecture, choose
3154 one for them. */
3155 else
3156 {
3157 const cpu_option *sel;
3158 auto_sbitmap sought_isa (isa_num_bits);
3159 bitmap_clear (sought_isa);
3160 auto_sbitmap default_isa (isa_num_bits);
3161
3162 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3163 TARGET_CPU_DEFAULT);
3164 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3165 gcc_assert (arm_selected_cpu->common.name);
3166
3167 /* RWE: All of the selection logic below (to the end of this
3168 'if' clause) looks somewhat suspect. It appears to be mostly
3169 there to support forcing thumb support when the default CPU
3170 does not have thumb (somewhat dubious in terms of what the
3171 user might be expecting). I think it should be removed once
3172 support for the pre-thumb era cores is removed. */
3173 sel = arm_selected_cpu;
3174 arm_initialize_isa (default_isa, sel->common.isa_bits);
3175 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3176 cpu_opts);
3177
3178 /* Now check to see if the user has specified any command line
3179 switches that require certain abilities from the cpu. */
3180
3181 if (TARGET_INTERWORK || TARGET_THUMB)
3182 {
3183 bitmap_set_bit (sought_isa, isa_bit_thumb);
3184 bitmap_set_bit (sought_isa, isa_bit_mode32);
3185
3186 /* There are no ARM processors that support both APCS-26 and
3187 interworking. Therefore we forcibly remove MODE26 from
3188 from the isa features here (if it was set), so that the
3189 search below will always be able to find a compatible
3190 processor. */
3191 bitmap_clear_bit (default_isa, isa_bit_mode26);
3192 }
3193
3194 /* If there are such requirements and the default CPU does not
3195 satisfy them, we need to run over the complete list of
3196 cores looking for one that is satisfactory. */
3197 if (!bitmap_empty_p (sought_isa)
3198 && !bitmap_subset_p (sought_isa, default_isa))
3199 {
3200 auto_sbitmap candidate_isa (isa_num_bits);
3201 /* We're only interested in a CPU with at least the
3202 capabilities of the default CPU and the required
3203 additional features. */
3204 bitmap_ior (default_isa, default_isa, sought_isa);
3205
3206 /* Try to locate a CPU type that supports all of the abilities
3207 of the default CPU, plus the extra abilities requested by
3208 the user. */
3209 for (sel = all_cores; sel->common.name != NULL; sel++)
3210 {
3211 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3212 /* An exact match? */
3213 if (bitmap_equal_p (default_isa, candidate_isa))
3214 break;
3215 }
3216
3217 if (sel->common.name == NULL)
3218 {
3219 unsigned current_bit_count = isa_num_bits;
3220 const cpu_option *best_fit = NULL;
3221
3222 /* Ideally we would like to issue an error message here
3223 saying that it was not possible to find a CPU compatible
3224 with the default CPU, but which also supports the command
3225 line options specified by the programmer, and so they
3226 ought to use the -mcpu=<name> command line option to
3227 override the default CPU type.
3228
3229 If we cannot find a CPU that has exactly the
3230 characteristics of the default CPU and the given
3231 command line options we scan the array again looking
3232 for a best match. The best match must have at least
3233 the capabilities of the perfect match. */
3234 for (sel = all_cores; sel->common.name != NULL; sel++)
3235 {
3236 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3237
3238 if (bitmap_subset_p (default_isa, candidate_isa))
3239 {
3240 unsigned count;
3241
3242 bitmap_and_compl (candidate_isa, candidate_isa,
3243 default_isa);
3244 count = bitmap_popcount (candidate_isa);
3245
3246 if (count < current_bit_count)
3247 {
3248 best_fit = sel;
3249 current_bit_count = count;
3250 }
3251 }
3252
3253 gcc_assert (best_fit);
3254 sel = best_fit;
3255 }
3256 }
3257 arm_selected_cpu = sel;
3258 }
3259
3260 /* Now we know the CPU, we can finally initialize the target
3261 structure. */
3262 target->core_name = arm_selected_cpu->common.name;
3263 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3264 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3265 cpu_opts);
3266 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3267 }
3268
3269 gcc_assert (arm_selected_cpu);
3270 gcc_assert (arm_selected_arch);
3271
3272 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3273 {
3274 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3275 auto_sbitmap fpu_bits (isa_num_bits);
3276
3277 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3278 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3279 bitmap_ior (target->isa, target->isa, fpu_bits);
3280 }
3281
3282 if (!arm_selected_tune)
3283 arm_selected_tune = arm_selected_cpu;
3284 else /* Validate the features passed to -mtune. */
3285 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3286
3287 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3288
3289 /* Finish initializing the target structure. */
3290 target->arch_pp_name = arm_selected_arch->arch;
3291 target->base_arch = arm_selected_arch->base_arch;
3292 target->profile = arm_selected_arch->profile;
3293
3294 target->tune_flags = tune_data->tune_flags;
3295 target->tune = tune_data->tune;
3296 target->tune_core = tune_data->scheduler;
3297 }
3298
3299 /* Fix up any incompatible options that the user has specified. */
3300 static void
3301 arm_option_override (void)
3302 {
3303 static const enum isa_feature fpu_bitlist[]
3304 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3305 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3306 cl_target_option opts;
3307
3308 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3309 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3310
3311 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3312 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3313
3314 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3315
3316 if (!global_options_set.x_arm_fpu_index)
3317 {
3318 bool ok;
3319 int fpu_index;
3320
3321 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3322 CL_TARGET);
3323 gcc_assert (ok);
3324 arm_fpu_index = (enum fpu_type) fpu_index;
3325 }
3326
3327 cl_target_option_save (&opts, &global_options);
3328 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3329 true);
3330
3331 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3332 SUBTARGET_OVERRIDE_OPTIONS;
3333 #endif
3334
3335 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3336 arm_base_arch = arm_active_target.base_arch;
3337
3338 arm_tune = arm_active_target.tune_core;
3339 tune_flags = arm_active_target.tune_flags;
3340 current_tune = arm_active_target.tune;
3341
3342 /* TBD: Dwarf info for apcs frame is not handled yet. */
3343 if (TARGET_APCS_FRAME)
3344 flag_shrink_wrap = false;
3345
3346 /* BPABI targets use linker tricks to allow interworking on cores
3347 without thumb support. */
3348 if (TARGET_INTERWORK
3349 && !TARGET_BPABI
3350 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3351 {
3352 warning (0, "target CPU does not support interworking" );
3353 target_flags &= ~MASK_INTERWORK;
3354 }
3355
3356 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3357 {
3358 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3359 target_flags |= MASK_APCS_FRAME;
3360 }
3361
3362 if (TARGET_POKE_FUNCTION_NAME)
3363 target_flags |= MASK_APCS_FRAME;
3364
3365 if (TARGET_APCS_REENT && flag_pic)
3366 error ("-fpic and -mapcs-reent are incompatible");
3367
3368 if (TARGET_APCS_REENT)
3369 warning (0, "APCS reentrant code not supported. Ignored");
3370
3371 /* Initialize boolean versions of the architectural flags, for use
3372 in the arm.md file. */
3373 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3374 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3375 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3376 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3377 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3378 arm_arch5te = arm_arch5e
3379 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3380 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3381 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3382 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3383 arm_arch6m = arm_arch6 && !arm_arch_notm;
3384 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3385 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3386 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3387 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3388 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3389 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3390 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3391 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3392 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3393 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3394 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3395 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3396 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3397 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3398 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3399 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3400 if (arm_fp16_inst)
3401 {
3402 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3403 error ("selected fp16 options are incompatible");
3404 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3405 }
3406
3407
3408 /* Set up some tuning parameters. */
3409 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3410 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3411 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3412 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3413 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3414 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3415
3416 /* And finally, set up some quirks. */
3417 arm_arch_no_volatile_ce
3418 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3419 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3420 isa_bit_quirk_armv6kz);
3421
3422 /* V5 code we generate is completely interworking capable, so we turn off
3423 TARGET_INTERWORK here to avoid many tests later on. */
3424
3425 /* XXX However, we must pass the right pre-processor defines to CPP
3426 or GLD can get confused. This is a hack. */
3427 if (TARGET_INTERWORK)
3428 arm_cpp_interwork = 1;
3429
3430 if (arm_arch5)
3431 target_flags &= ~MASK_INTERWORK;
3432
3433 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3434 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3435
3436 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3437 error ("iwmmxt abi requires an iwmmxt capable cpu");
3438
3439 /* If soft-float is specified then don't use FPU. */
3440 if (TARGET_SOFT_FLOAT)
3441 arm_fpu_attr = FPU_NONE;
3442 else
3443 arm_fpu_attr = FPU_VFP;
3444
3445 if (TARGET_AAPCS_BASED)
3446 {
3447 if (TARGET_CALLER_INTERWORKING)
3448 error ("AAPCS does not support -mcaller-super-interworking");
3449 else
3450 if (TARGET_CALLEE_INTERWORKING)
3451 error ("AAPCS does not support -mcallee-super-interworking");
3452 }
3453
3454 /* __fp16 support currently assumes the core has ldrh. */
3455 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3456 sorry ("__fp16 and no ldrh");
3457
3458 if (TARGET_AAPCS_BASED)
3459 {
3460 if (arm_abi == ARM_ABI_IWMMXT)
3461 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3462 else if (TARGET_HARD_FLOAT_ABI)
3463 {
3464 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3465 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3466 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3467 }
3468 else
3469 arm_pcs_default = ARM_PCS_AAPCS;
3470 }
3471 else
3472 {
3473 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3474 sorry ("-mfloat-abi=hard and VFP");
3475
3476 if (arm_abi == ARM_ABI_APCS)
3477 arm_pcs_default = ARM_PCS_APCS;
3478 else
3479 arm_pcs_default = ARM_PCS_ATPCS;
3480 }
3481
3482 /* For arm2/3 there is no need to do any scheduling if we are doing
3483 software floating-point. */
3484 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3485 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3486
3487 /* Use the cp15 method if it is available. */
3488 if (target_thread_pointer == TP_AUTO)
3489 {
3490 if (arm_arch6k && !TARGET_THUMB1)
3491 target_thread_pointer = TP_CP15;
3492 else
3493 target_thread_pointer = TP_SOFT;
3494 }
3495
3496 /* Override the default structure alignment for AAPCS ABI. */
3497 if (!global_options_set.x_arm_structure_size_boundary)
3498 {
3499 if (TARGET_AAPCS_BASED)
3500 arm_structure_size_boundary = 8;
3501 }
3502 else
3503 {
3504 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3505
3506 if (arm_structure_size_boundary != 8
3507 && arm_structure_size_boundary != 32
3508 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3509 {
3510 if (ARM_DOUBLEWORD_ALIGN)
3511 warning (0,
3512 "structure size boundary can only be set to 8, 32 or 64");
3513 else
3514 warning (0, "structure size boundary can only be set to 8 or 32");
3515 arm_structure_size_boundary
3516 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3517 }
3518 }
3519
3520 if (TARGET_VXWORKS_RTP)
3521 {
3522 if (!global_options_set.x_arm_pic_data_is_text_relative)
3523 arm_pic_data_is_text_relative = 0;
3524 }
3525 else if (flag_pic
3526 && !arm_pic_data_is_text_relative
3527 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3528 /* When text & data segments don't have a fixed displacement, the
3529 intended use is with a single, read only, pic base register.
3530 Unless the user explicitly requested not to do that, set
3531 it. */
3532 target_flags |= MASK_SINGLE_PIC_BASE;
3533
3534 /* If stack checking is disabled, we can use r10 as the PIC register,
3535 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3536 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3537 {
3538 if (TARGET_VXWORKS_RTP)
3539 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3540 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3541 }
3542
3543 if (flag_pic && TARGET_VXWORKS_RTP)
3544 arm_pic_register = 9;
3545
3546 if (arm_pic_register_string != NULL)
3547 {
3548 int pic_register = decode_reg_name (arm_pic_register_string);
3549
3550 if (!flag_pic)
3551 warning (0, "-mpic-register= is useless without -fpic");
3552
3553 /* Prevent the user from choosing an obviously stupid PIC register. */
3554 else if (pic_register < 0 || call_used_regs[pic_register]
3555 || pic_register == HARD_FRAME_POINTER_REGNUM
3556 || pic_register == STACK_POINTER_REGNUM
3557 || pic_register >= PC_REGNUM
3558 || (TARGET_VXWORKS_RTP
3559 && (unsigned int) pic_register != arm_pic_register))
3560 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3561 else
3562 arm_pic_register = pic_register;
3563 }
3564
3565 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3566 if (fix_cm3_ldrd == 2)
3567 {
3568 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3569 fix_cm3_ldrd = 1;
3570 else
3571 fix_cm3_ldrd = 0;
3572 }
3573
3574 /* Hot/Cold partitioning is not currently supported, since we can't
3575 handle literal pool placement in that case. */
3576 if (flag_reorder_blocks_and_partition)
3577 {
3578 inform (input_location,
3579 "-freorder-blocks-and-partition not supported on this architecture");
3580 flag_reorder_blocks_and_partition = 0;
3581 flag_reorder_blocks = 1;
3582 }
3583
3584 if (flag_pic)
3585 /* Hoisting PIC address calculations more aggressively provides a small,
3586 but measurable, size reduction for PIC code. Therefore, we decrease
3587 the bar for unrestricted expression hoisting to the cost of PIC address
3588 calculation, which is 2 instructions. */
3589 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3590 global_options.x_param_values,
3591 global_options_set.x_param_values);
3592
3593 /* ARM EABI defaults to strict volatile bitfields. */
3594 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3595 && abi_version_at_least(2))
3596 flag_strict_volatile_bitfields = 1;
3597
3598 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3599 have deemed it beneficial (signified by setting
3600 prefetch.num_slots to 1 or more). */
3601 if (flag_prefetch_loop_arrays < 0
3602 && HAVE_prefetch
3603 && optimize >= 3
3604 && current_tune->prefetch.num_slots > 0)
3605 flag_prefetch_loop_arrays = 1;
3606
3607 /* Set up parameters to be used in prefetching algorithm. Do not
3608 override the defaults unless we are tuning for a core we have
3609 researched values for. */
3610 if (current_tune->prefetch.num_slots > 0)
3611 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3612 current_tune->prefetch.num_slots,
3613 global_options.x_param_values,
3614 global_options_set.x_param_values);
3615 if (current_tune->prefetch.l1_cache_line_size >= 0)
3616 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3617 current_tune->prefetch.l1_cache_line_size,
3618 global_options.x_param_values,
3619 global_options_set.x_param_values);
3620 if (current_tune->prefetch.l1_cache_size >= 0)
3621 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3622 current_tune->prefetch.l1_cache_size,
3623 global_options.x_param_values,
3624 global_options_set.x_param_values);
3625
3626 /* Use Neon to perform 64-bits operations rather than core
3627 registers. */
3628 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3629 if (use_neon_for_64bits == 1)
3630 prefer_neon_for_64bits = true;
3631
3632 /* Use the alternative scheduling-pressure algorithm by default. */
3633 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3634 global_options.x_param_values,
3635 global_options_set.x_param_values);
3636
3637 /* Look through ready list and all of queue for instructions
3638 relevant for L2 auto-prefetcher. */
3639 int param_sched_autopref_queue_depth;
3640
3641 switch (current_tune->sched_autopref)
3642 {
3643 case tune_params::SCHED_AUTOPREF_OFF:
3644 param_sched_autopref_queue_depth = -1;
3645 break;
3646
3647 case tune_params::SCHED_AUTOPREF_RANK:
3648 param_sched_autopref_queue_depth = 0;
3649 break;
3650
3651 case tune_params::SCHED_AUTOPREF_FULL:
3652 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3653 break;
3654
3655 default:
3656 gcc_unreachable ();
3657 }
3658
3659 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3660 param_sched_autopref_queue_depth,
3661 global_options.x_param_values,
3662 global_options_set.x_param_values);
3663
3664 /* Currently, for slow flash data, we just disable literal pools. We also
3665 disable it for pure-code. */
3666 if (target_slow_flash_data || target_pure_code)
3667 arm_disable_literal_pool = true;
3668
3669 if (use_cmse && !arm_arch_cmse)
3670 error ("target CPU does not support ARMv8-M Security Extensions");
3671
3672 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3673 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3674 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3675 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3676
3677 /* Disable scheduling fusion by default if it's not armv7 processor
3678 or doesn't prefer ldrd/strd. */
3679 if (flag_schedule_fusion == 2
3680 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3681 flag_schedule_fusion = 0;
3682
3683 /* Need to remember initial options before they are overriden. */
3684 init_optimize = build_optimization_node (&global_options);
3685
3686 arm_option_override_internal (&global_options, &global_options_set);
3687 arm_option_check_internal (&global_options);
3688 arm_option_params_internal ();
3689
3690 /* Create the default target_options structure. */
3691 target_option_default_node = target_option_current_node
3692 = build_target_option_node (&global_options);
3693
3694 /* Register global variables with the garbage collector. */
3695 arm_add_gc_roots ();
3696
3697 /* Init initial mode for testing. */
3698 thumb_flipper = TARGET_THUMB;
3699 }
3700
3701 static void
3702 arm_add_gc_roots (void)
3703 {
3704 gcc_obstack_init(&minipool_obstack);
3705 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3706 }
3707 \f
3708 /* A table of known ARM exception types.
3709 For use with the interrupt function attribute. */
3710
3711 typedef struct
3712 {
3713 const char *const arg;
3714 const unsigned long return_value;
3715 }
3716 isr_attribute_arg;
3717
3718 static const isr_attribute_arg isr_attribute_args [] =
3719 {
3720 { "IRQ", ARM_FT_ISR },
3721 { "irq", ARM_FT_ISR },
3722 { "FIQ", ARM_FT_FIQ },
3723 { "fiq", ARM_FT_FIQ },
3724 { "ABORT", ARM_FT_ISR },
3725 { "abort", ARM_FT_ISR },
3726 { "ABORT", ARM_FT_ISR },
3727 { "abort", ARM_FT_ISR },
3728 { "UNDEF", ARM_FT_EXCEPTION },
3729 { "undef", ARM_FT_EXCEPTION },
3730 { "SWI", ARM_FT_EXCEPTION },
3731 { "swi", ARM_FT_EXCEPTION },
3732 { NULL, ARM_FT_NORMAL }
3733 };
3734
3735 /* Returns the (interrupt) function type of the current
3736 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3737
3738 static unsigned long
3739 arm_isr_value (tree argument)
3740 {
3741 const isr_attribute_arg * ptr;
3742 const char * arg;
3743
3744 if (!arm_arch_notm)
3745 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3746
3747 /* No argument - default to IRQ. */
3748 if (argument == NULL_TREE)
3749 return ARM_FT_ISR;
3750
3751 /* Get the value of the argument. */
3752 if (TREE_VALUE (argument) == NULL_TREE
3753 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3754 return ARM_FT_UNKNOWN;
3755
3756 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3757
3758 /* Check it against the list of known arguments. */
3759 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3760 if (streq (arg, ptr->arg))
3761 return ptr->return_value;
3762
3763 /* An unrecognized interrupt type. */
3764 return ARM_FT_UNKNOWN;
3765 }
3766
3767 /* Computes the type of the current function. */
3768
3769 static unsigned long
3770 arm_compute_func_type (void)
3771 {
3772 unsigned long type = ARM_FT_UNKNOWN;
3773 tree a;
3774 tree attr;
3775
3776 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3777
3778 /* Decide if the current function is volatile. Such functions
3779 never return, and many memory cycles can be saved by not storing
3780 register values that will never be needed again. This optimization
3781 was added to speed up context switching in a kernel application. */
3782 if (optimize > 0
3783 && (TREE_NOTHROW (current_function_decl)
3784 || !(flag_unwind_tables
3785 || (flag_exceptions
3786 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3787 && TREE_THIS_VOLATILE (current_function_decl))
3788 type |= ARM_FT_VOLATILE;
3789
3790 if (cfun->static_chain_decl != NULL)
3791 type |= ARM_FT_NESTED;
3792
3793 attr = DECL_ATTRIBUTES (current_function_decl);
3794
3795 a = lookup_attribute ("naked", attr);
3796 if (a != NULL_TREE)
3797 type |= ARM_FT_NAKED;
3798
3799 a = lookup_attribute ("isr", attr);
3800 if (a == NULL_TREE)
3801 a = lookup_attribute ("interrupt", attr);
3802
3803 if (a == NULL_TREE)
3804 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3805 else
3806 type |= arm_isr_value (TREE_VALUE (a));
3807
3808 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3809 type |= ARM_FT_CMSE_ENTRY;
3810
3811 return type;
3812 }
3813
3814 /* Returns the type of the current function. */
3815
3816 unsigned long
3817 arm_current_func_type (void)
3818 {
3819 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3820 cfun->machine->func_type = arm_compute_func_type ();
3821
3822 return cfun->machine->func_type;
3823 }
3824
3825 bool
3826 arm_allocate_stack_slots_for_args (void)
3827 {
3828 /* Naked functions should not allocate stack slots for arguments. */
3829 return !IS_NAKED (arm_current_func_type ());
3830 }
3831
3832 static bool
3833 arm_warn_func_return (tree decl)
3834 {
3835 /* Naked functions are implemented entirely in assembly, including the
3836 return sequence, so suppress warnings about this. */
3837 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3838 }
3839
3840 \f
3841 /* Output assembler code for a block containing the constant parts
3842 of a trampoline, leaving space for the variable parts.
3843
3844 On the ARM, (if r8 is the static chain regnum, and remembering that
3845 referencing pc adds an offset of 8) the trampoline looks like:
3846 ldr r8, [pc, #0]
3847 ldr pc, [pc]
3848 .word static chain value
3849 .word function's address
3850 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3851
3852 static void
3853 arm_asm_trampoline_template (FILE *f)
3854 {
3855 fprintf (f, "\t.syntax unified\n");
3856
3857 if (TARGET_ARM)
3858 {
3859 fprintf (f, "\t.arm\n");
3860 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3861 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3862 }
3863 else if (TARGET_THUMB2)
3864 {
3865 fprintf (f, "\t.thumb\n");
3866 /* The Thumb-2 trampoline is similar to the arm implementation.
3867 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3868 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3869 STATIC_CHAIN_REGNUM, PC_REGNUM);
3870 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3871 }
3872 else
3873 {
3874 ASM_OUTPUT_ALIGN (f, 2);
3875 fprintf (f, "\t.code\t16\n");
3876 fprintf (f, ".Ltrampoline_start:\n");
3877 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3878 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3879 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3880 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3881 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3882 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3883 }
3884 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3885 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3886 }
3887
3888 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3889
3890 static void
3891 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3892 {
3893 rtx fnaddr, mem, a_tramp;
3894
3895 emit_block_move (m_tramp, assemble_trampoline_template (),
3896 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3897
3898 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3899 emit_move_insn (mem, chain_value);
3900
3901 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3902 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3903 emit_move_insn (mem, fnaddr);
3904
3905 a_tramp = XEXP (m_tramp, 0);
3906 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3907 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3908 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3909 }
3910
3911 /* Thumb trampolines should be entered in thumb mode, so set
3912 the bottom bit of the address. */
3913
3914 static rtx
3915 arm_trampoline_adjust_address (rtx addr)
3916 {
3917 if (TARGET_THUMB)
3918 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3919 NULL, 0, OPTAB_LIB_WIDEN);
3920 return addr;
3921 }
3922 \f
3923 /* Return 1 if it is possible to return using a single instruction.
3924 If SIBLING is non-null, this is a test for a return before a sibling
3925 call. SIBLING is the call insn, so we can examine its register usage. */
3926
3927 int
3928 use_return_insn (int iscond, rtx sibling)
3929 {
3930 int regno;
3931 unsigned int func_type;
3932 unsigned long saved_int_regs;
3933 unsigned HOST_WIDE_INT stack_adjust;
3934 arm_stack_offsets *offsets;
3935
3936 /* Never use a return instruction before reload has run. */
3937 if (!reload_completed)
3938 return 0;
3939
3940 func_type = arm_current_func_type ();
3941
3942 /* Naked, volatile and stack alignment functions need special
3943 consideration. */
3944 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3945 return 0;
3946
3947 /* So do interrupt functions that use the frame pointer and Thumb
3948 interrupt functions. */
3949 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3950 return 0;
3951
3952 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3953 && !optimize_function_for_size_p (cfun))
3954 return 0;
3955
3956 offsets = arm_get_frame_offsets ();
3957 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3958
3959 /* As do variadic functions. */
3960 if (crtl->args.pretend_args_size
3961 || cfun->machine->uses_anonymous_args
3962 /* Or if the function calls __builtin_eh_return () */
3963 || crtl->calls_eh_return
3964 /* Or if the function calls alloca */
3965 || cfun->calls_alloca
3966 /* Or if there is a stack adjustment. However, if the stack pointer
3967 is saved on the stack, we can use a pre-incrementing stack load. */
3968 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3969 && stack_adjust == 4))
3970 /* Or if the static chain register was saved above the frame, under the
3971 assumption that the stack pointer isn't saved on the stack. */
3972 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3973 && arm_compute_static_chain_stack_bytes() != 0))
3974 return 0;
3975
3976 saved_int_regs = offsets->saved_regs_mask;
3977
3978 /* Unfortunately, the insn
3979
3980 ldmib sp, {..., sp, ...}
3981
3982 triggers a bug on most SA-110 based devices, such that the stack
3983 pointer won't be correctly restored if the instruction takes a
3984 page fault. We work around this problem by popping r3 along with
3985 the other registers, since that is never slower than executing
3986 another instruction.
3987
3988 We test for !arm_arch5 here, because code for any architecture
3989 less than this could potentially be run on one of the buggy
3990 chips. */
3991 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3992 {
3993 /* Validate that r3 is a call-clobbered register (always true in
3994 the default abi) ... */
3995 if (!call_used_regs[3])
3996 return 0;
3997
3998 /* ... that it isn't being used for a return value ... */
3999 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4000 return 0;
4001
4002 /* ... or for a tail-call argument ... */
4003 if (sibling)
4004 {
4005 gcc_assert (CALL_P (sibling));
4006
4007 if (find_regno_fusage (sibling, USE, 3))
4008 return 0;
4009 }
4010
4011 /* ... and that there are no call-saved registers in r0-r2
4012 (always true in the default ABI). */
4013 if (saved_int_regs & 0x7)
4014 return 0;
4015 }
4016
4017 /* Can't be done if interworking with Thumb, and any registers have been
4018 stacked. */
4019 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4020 return 0;
4021
4022 /* On StrongARM, conditional returns are expensive if they aren't
4023 taken and multiple registers have been stacked. */
4024 if (iscond && arm_tune_strongarm)
4025 {
4026 /* Conditional return when just the LR is stored is a simple
4027 conditional-load instruction, that's not expensive. */
4028 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4029 return 0;
4030
4031 if (flag_pic
4032 && arm_pic_register != INVALID_REGNUM
4033 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4034 return 0;
4035 }
4036
4037 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4038 several instructions if anything needs to be popped. */
4039 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4040 return 0;
4041
4042 /* If there are saved registers but the LR isn't saved, then we need
4043 two instructions for the return. */
4044 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4045 return 0;
4046
4047 /* Can't be done if any of the VFP regs are pushed,
4048 since this also requires an insn. */
4049 if (TARGET_HARD_FLOAT)
4050 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4051 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4052 return 0;
4053
4054 if (TARGET_REALLY_IWMMXT)
4055 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4056 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4057 return 0;
4058
4059 return 1;
4060 }
4061
4062 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4063 shrink-wrapping if possible. This is the case if we need to emit a
4064 prologue, which we can test by looking at the offsets. */
4065 bool
4066 use_simple_return_p (void)
4067 {
4068 arm_stack_offsets *offsets;
4069
4070 /* Note this function can be called before or after reload. */
4071 if (!reload_completed)
4072 arm_compute_frame_layout ();
4073
4074 offsets = arm_get_frame_offsets ();
4075 return offsets->outgoing_args != 0;
4076 }
4077
4078 /* Return TRUE if int I is a valid immediate ARM constant. */
4079
4080 int
4081 const_ok_for_arm (HOST_WIDE_INT i)
4082 {
4083 int lowbit;
4084
4085 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4086 be all zero, or all one. */
4087 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4088 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4089 != ((~(unsigned HOST_WIDE_INT) 0)
4090 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4091 return FALSE;
4092
4093 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4094
4095 /* Fast return for 0 and small values. We must do this for zero, since
4096 the code below can't handle that one case. */
4097 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4098 return TRUE;
4099
4100 /* Get the number of trailing zeros. */
4101 lowbit = ffs((int) i) - 1;
4102
4103 /* Only even shifts are allowed in ARM mode so round down to the
4104 nearest even number. */
4105 if (TARGET_ARM)
4106 lowbit &= ~1;
4107
4108 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4109 return TRUE;
4110
4111 if (TARGET_ARM)
4112 {
4113 /* Allow rotated constants in ARM mode. */
4114 if (lowbit <= 4
4115 && ((i & ~0xc000003f) == 0
4116 || (i & ~0xf000000f) == 0
4117 || (i & ~0xfc000003) == 0))
4118 return TRUE;
4119 }
4120 else if (TARGET_THUMB2)
4121 {
4122 HOST_WIDE_INT v;
4123
4124 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4125 v = i & 0xff;
4126 v |= v << 16;
4127 if (i == v || i == (v | (v << 8)))
4128 return TRUE;
4129
4130 /* Allow repeated pattern 0xXY00XY00. */
4131 v = i & 0xff00;
4132 v |= v << 16;
4133 if (i == v)
4134 return TRUE;
4135 }
4136 else if (TARGET_HAVE_MOVT)
4137 {
4138 /* Thumb-1 Targets with MOVT. */
4139 if (i > 0xffff)
4140 return FALSE;
4141 else
4142 return TRUE;
4143 }
4144
4145 return FALSE;
4146 }
4147
4148 /* Return true if I is a valid constant for the operation CODE. */
4149 int
4150 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4151 {
4152 if (const_ok_for_arm (i))
4153 return 1;
4154
4155 switch (code)
4156 {
4157 case SET:
4158 /* See if we can use movw. */
4159 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4160 return 1;
4161 else
4162 /* Otherwise, try mvn. */
4163 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4164
4165 case PLUS:
4166 /* See if we can use addw or subw. */
4167 if (TARGET_THUMB2
4168 && ((i & 0xfffff000) == 0
4169 || ((-i) & 0xfffff000) == 0))
4170 return 1;
4171 /* Fall through. */
4172 case COMPARE:
4173 case EQ:
4174 case NE:
4175 case GT:
4176 case LE:
4177 case LT:
4178 case GE:
4179 case GEU:
4180 case LTU:
4181 case GTU:
4182 case LEU:
4183 case UNORDERED:
4184 case ORDERED:
4185 case UNEQ:
4186 case UNGE:
4187 case UNLT:
4188 case UNGT:
4189 case UNLE:
4190 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4191
4192 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4193 case XOR:
4194 return 0;
4195
4196 case IOR:
4197 if (TARGET_THUMB2)
4198 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4199 return 0;
4200
4201 case AND:
4202 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4203
4204 default:
4205 gcc_unreachable ();
4206 }
4207 }
4208
4209 /* Return true if I is a valid di mode constant for the operation CODE. */
4210 int
4211 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4212 {
4213 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4214 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4215 rtx hi = GEN_INT (hi_val);
4216 rtx lo = GEN_INT (lo_val);
4217
4218 if (TARGET_THUMB1)
4219 return 0;
4220
4221 switch (code)
4222 {
4223 case AND:
4224 case IOR:
4225 case XOR:
4226 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4227 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4228 case PLUS:
4229 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4230
4231 default:
4232 return 0;
4233 }
4234 }
4235
4236 /* Emit a sequence of insns to handle a large constant.
4237 CODE is the code of the operation required, it can be any of SET, PLUS,
4238 IOR, AND, XOR, MINUS;
4239 MODE is the mode in which the operation is being performed;
4240 VAL is the integer to operate on;
4241 SOURCE is the other operand (a register, or a null-pointer for SET);
4242 SUBTARGETS means it is safe to create scratch registers if that will
4243 either produce a simpler sequence, or we will want to cse the values.
4244 Return value is the number of insns emitted. */
4245
4246 /* ??? Tweak this for thumb2. */
4247 int
4248 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4249 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4250 {
4251 rtx cond;
4252
4253 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4254 cond = COND_EXEC_TEST (PATTERN (insn));
4255 else
4256 cond = NULL_RTX;
4257
4258 if (subtargets || code == SET
4259 || (REG_P (target) && REG_P (source)
4260 && REGNO (target) != REGNO (source)))
4261 {
4262 /* After arm_reorg has been called, we can't fix up expensive
4263 constants by pushing them into memory so we must synthesize
4264 them in-line, regardless of the cost. This is only likely to
4265 be more costly on chips that have load delay slots and we are
4266 compiling without running the scheduler (so no splitting
4267 occurred before the final instruction emission).
4268
4269 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4270 */
4271 if (!cfun->machine->after_arm_reorg
4272 && !cond
4273 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4274 1, 0)
4275 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4276 + (code != SET))))
4277 {
4278 if (code == SET)
4279 {
4280 /* Currently SET is the only monadic value for CODE, all
4281 the rest are diadic. */
4282 if (TARGET_USE_MOVT)
4283 arm_emit_movpair (target, GEN_INT (val));
4284 else
4285 emit_set_insn (target, GEN_INT (val));
4286
4287 return 1;
4288 }
4289 else
4290 {
4291 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4292
4293 if (TARGET_USE_MOVT)
4294 arm_emit_movpair (temp, GEN_INT (val));
4295 else
4296 emit_set_insn (temp, GEN_INT (val));
4297
4298 /* For MINUS, the value is subtracted from, since we never
4299 have subtraction of a constant. */
4300 if (code == MINUS)
4301 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4302 else
4303 emit_set_insn (target,
4304 gen_rtx_fmt_ee (code, mode, source, temp));
4305 return 2;
4306 }
4307 }
4308 }
4309
4310 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4311 1);
4312 }
4313
4314 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4315 ARM/THUMB2 immediates, and add up to VAL.
4316 Thr function return value gives the number of insns required. */
4317 static int
4318 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4319 struct four_ints *return_sequence)
4320 {
4321 int best_consecutive_zeros = 0;
4322 int i;
4323 int best_start = 0;
4324 int insns1, insns2;
4325 struct four_ints tmp_sequence;
4326
4327 /* If we aren't targeting ARM, the best place to start is always at
4328 the bottom, otherwise look more closely. */
4329 if (TARGET_ARM)
4330 {
4331 for (i = 0; i < 32; i += 2)
4332 {
4333 int consecutive_zeros = 0;
4334
4335 if (!(val & (3 << i)))
4336 {
4337 while ((i < 32) && !(val & (3 << i)))
4338 {
4339 consecutive_zeros += 2;
4340 i += 2;
4341 }
4342 if (consecutive_zeros > best_consecutive_zeros)
4343 {
4344 best_consecutive_zeros = consecutive_zeros;
4345 best_start = i - consecutive_zeros;
4346 }
4347 i -= 2;
4348 }
4349 }
4350 }
4351
4352 /* So long as it won't require any more insns to do so, it's
4353 desirable to emit a small constant (in bits 0...9) in the last
4354 insn. This way there is more chance that it can be combined with
4355 a later addressing insn to form a pre-indexed load or store
4356 operation. Consider:
4357
4358 *((volatile int *)0xe0000100) = 1;
4359 *((volatile int *)0xe0000110) = 2;
4360
4361 We want this to wind up as:
4362
4363 mov rA, #0xe0000000
4364 mov rB, #1
4365 str rB, [rA, #0x100]
4366 mov rB, #2
4367 str rB, [rA, #0x110]
4368
4369 rather than having to synthesize both large constants from scratch.
4370
4371 Therefore, we calculate how many insns would be required to emit
4372 the constant starting from `best_start', and also starting from
4373 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4374 yield a shorter sequence, we may as well use zero. */
4375 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4376 if (best_start != 0
4377 && ((HOST_WIDE_INT_1U << best_start) < val))
4378 {
4379 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4380 if (insns2 <= insns1)
4381 {
4382 *return_sequence = tmp_sequence;
4383 insns1 = insns2;
4384 }
4385 }
4386
4387 return insns1;
4388 }
4389
4390 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4391 static int
4392 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4393 struct four_ints *return_sequence, int i)
4394 {
4395 int remainder = val & 0xffffffff;
4396 int insns = 0;
4397
4398 /* Try and find a way of doing the job in either two or three
4399 instructions.
4400
4401 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4402 location. We start at position I. This may be the MSB, or
4403 optimial_immediate_sequence may have positioned it at the largest block
4404 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4405 wrapping around to the top of the word when we drop off the bottom.
4406 In the worst case this code should produce no more than four insns.
4407
4408 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4409 constants, shifted to any arbitrary location. We should always start
4410 at the MSB. */
4411 do
4412 {
4413 int end;
4414 unsigned int b1, b2, b3, b4;
4415 unsigned HOST_WIDE_INT result;
4416 int loc;
4417
4418 gcc_assert (insns < 4);
4419
4420 if (i <= 0)
4421 i += 32;
4422
4423 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4424 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4425 {
4426 loc = i;
4427 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4428 /* We can use addw/subw for the last 12 bits. */
4429 result = remainder;
4430 else
4431 {
4432 /* Use an 8-bit shifted/rotated immediate. */
4433 end = i - 8;
4434 if (end < 0)
4435 end += 32;
4436 result = remainder & ((0x0ff << end)
4437 | ((i < end) ? (0xff >> (32 - end))
4438 : 0));
4439 i -= 8;
4440 }
4441 }
4442 else
4443 {
4444 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4445 arbitrary shifts. */
4446 i -= TARGET_ARM ? 2 : 1;
4447 continue;
4448 }
4449
4450 /* Next, see if we can do a better job with a thumb2 replicated
4451 constant.
4452
4453 We do it this way around to catch the cases like 0x01F001E0 where
4454 two 8-bit immediates would work, but a replicated constant would
4455 make it worse.
4456
4457 TODO: 16-bit constants that don't clear all the bits, but still win.
4458 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4459 if (TARGET_THUMB2)
4460 {
4461 b1 = (remainder & 0xff000000) >> 24;
4462 b2 = (remainder & 0x00ff0000) >> 16;
4463 b3 = (remainder & 0x0000ff00) >> 8;
4464 b4 = remainder & 0xff;
4465
4466 if (loc > 24)
4467 {
4468 /* The 8-bit immediate already found clears b1 (and maybe b2),
4469 but must leave b3 and b4 alone. */
4470
4471 /* First try to find a 32-bit replicated constant that clears
4472 almost everything. We can assume that we can't do it in one,
4473 or else we wouldn't be here. */
4474 unsigned int tmp = b1 & b2 & b3 & b4;
4475 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4476 + (tmp << 24);
4477 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4478 + (tmp == b3) + (tmp == b4);
4479 if (tmp
4480 && (matching_bytes >= 3
4481 || (matching_bytes == 2
4482 && const_ok_for_op (remainder & ~tmp2, code))))
4483 {
4484 /* At least 3 of the bytes match, and the fourth has at
4485 least as many bits set, or two of the bytes match
4486 and it will only require one more insn to finish. */
4487 result = tmp2;
4488 i = tmp != b1 ? 32
4489 : tmp != b2 ? 24
4490 : tmp != b3 ? 16
4491 : 8;
4492 }
4493
4494 /* Second, try to find a 16-bit replicated constant that can
4495 leave three of the bytes clear. If b2 or b4 is already
4496 zero, then we can. If the 8-bit from above would not
4497 clear b2 anyway, then we still win. */
4498 else if (b1 == b3 && (!b2 || !b4
4499 || (remainder & 0x00ff0000 & ~result)))
4500 {
4501 result = remainder & 0xff00ff00;
4502 i = 24;
4503 }
4504 }
4505 else if (loc > 16)
4506 {
4507 /* The 8-bit immediate already found clears b2 (and maybe b3)
4508 and we don't get here unless b1 is alredy clear, but it will
4509 leave b4 unchanged. */
4510
4511 /* If we can clear b2 and b4 at once, then we win, since the
4512 8-bits couldn't possibly reach that far. */
4513 if (b2 == b4)
4514 {
4515 result = remainder & 0x00ff00ff;
4516 i = 16;
4517 }
4518 }
4519 }
4520
4521 return_sequence->i[insns++] = result;
4522 remainder &= ~result;
4523
4524 if (code == SET || code == MINUS)
4525 code = PLUS;
4526 }
4527 while (remainder);
4528
4529 return insns;
4530 }
4531
4532 /* Emit an instruction with the indicated PATTERN. If COND is
4533 non-NULL, conditionalize the execution of the instruction on COND
4534 being true. */
4535
4536 static void
4537 emit_constant_insn (rtx cond, rtx pattern)
4538 {
4539 if (cond)
4540 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4541 emit_insn (pattern);
4542 }
4543
4544 /* As above, but extra parameter GENERATE which, if clear, suppresses
4545 RTL generation. */
4546
4547 static int
4548 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4549 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4550 int subtargets, int generate)
4551 {
4552 int can_invert = 0;
4553 int can_negate = 0;
4554 int final_invert = 0;
4555 int i;
4556 int set_sign_bit_copies = 0;
4557 int clear_sign_bit_copies = 0;
4558 int clear_zero_bit_copies = 0;
4559 int set_zero_bit_copies = 0;
4560 int insns = 0, neg_insns, inv_insns;
4561 unsigned HOST_WIDE_INT temp1, temp2;
4562 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4563 struct four_ints *immediates;
4564 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4565
4566 /* Find out which operations are safe for a given CODE. Also do a quick
4567 check for degenerate cases; these can occur when DImode operations
4568 are split. */
4569 switch (code)
4570 {
4571 case SET:
4572 can_invert = 1;
4573 break;
4574
4575 case PLUS:
4576 can_negate = 1;
4577 break;
4578
4579 case IOR:
4580 if (remainder == 0xffffffff)
4581 {
4582 if (generate)
4583 emit_constant_insn (cond,
4584 gen_rtx_SET (target,
4585 GEN_INT (ARM_SIGN_EXTEND (val))));
4586 return 1;
4587 }
4588
4589 if (remainder == 0)
4590 {
4591 if (reload_completed && rtx_equal_p (target, source))
4592 return 0;
4593
4594 if (generate)
4595 emit_constant_insn (cond, gen_rtx_SET (target, source));
4596 return 1;
4597 }
4598 break;
4599
4600 case AND:
4601 if (remainder == 0)
4602 {
4603 if (generate)
4604 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4605 return 1;
4606 }
4607 if (remainder == 0xffffffff)
4608 {
4609 if (reload_completed && rtx_equal_p (target, source))
4610 return 0;
4611 if (generate)
4612 emit_constant_insn (cond, gen_rtx_SET (target, source));
4613 return 1;
4614 }
4615 can_invert = 1;
4616 break;
4617
4618 case XOR:
4619 if (remainder == 0)
4620 {
4621 if (reload_completed && rtx_equal_p (target, source))
4622 return 0;
4623 if (generate)
4624 emit_constant_insn (cond, gen_rtx_SET (target, source));
4625 return 1;
4626 }
4627
4628 if (remainder == 0xffffffff)
4629 {
4630 if (generate)
4631 emit_constant_insn (cond,
4632 gen_rtx_SET (target,
4633 gen_rtx_NOT (mode, source)));
4634 return 1;
4635 }
4636 final_invert = 1;
4637 break;
4638
4639 case MINUS:
4640 /* We treat MINUS as (val - source), since (source - val) is always
4641 passed as (source + (-val)). */
4642 if (remainder == 0)
4643 {
4644 if (generate)
4645 emit_constant_insn (cond,
4646 gen_rtx_SET (target,
4647 gen_rtx_NEG (mode, source)));
4648 return 1;
4649 }
4650 if (const_ok_for_arm (val))
4651 {
4652 if (generate)
4653 emit_constant_insn (cond,
4654 gen_rtx_SET (target,
4655 gen_rtx_MINUS (mode, GEN_INT (val),
4656 source)));
4657 return 1;
4658 }
4659
4660 break;
4661
4662 default:
4663 gcc_unreachable ();
4664 }
4665
4666 /* If we can do it in one insn get out quickly. */
4667 if (const_ok_for_op (val, code))
4668 {
4669 if (generate)
4670 emit_constant_insn (cond,
4671 gen_rtx_SET (target,
4672 (source
4673 ? gen_rtx_fmt_ee (code, mode, source,
4674 GEN_INT (val))
4675 : GEN_INT (val))));
4676 return 1;
4677 }
4678
4679 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4680 insn. */
4681 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4682 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4683 {
4684 if (generate)
4685 {
4686 if (mode == SImode && i == 16)
4687 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4688 smaller insn. */
4689 emit_constant_insn (cond,
4690 gen_zero_extendhisi2
4691 (target, gen_lowpart (HImode, source)));
4692 else
4693 /* Extz only supports SImode, but we can coerce the operands
4694 into that mode. */
4695 emit_constant_insn (cond,
4696 gen_extzv_t2 (gen_lowpart (SImode, target),
4697 gen_lowpart (SImode, source),
4698 GEN_INT (i), const0_rtx));
4699 }
4700
4701 return 1;
4702 }
4703
4704 /* Calculate a few attributes that may be useful for specific
4705 optimizations. */
4706 /* Count number of leading zeros. */
4707 for (i = 31; i >= 0; i--)
4708 {
4709 if ((remainder & (1 << i)) == 0)
4710 clear_sign_bit_copies++;
4711 else
4712 break;
4713 }
4714
4715 /* Count number of leading 1's. */
4716 for (i = 31; i >= 0; i--)
4717 {
4718 if ((remainder & (1 << i)) != 0)
4719 set_sign_bit_copies++;
4720 else
4721 break;
4722 }
4723
4724 /* Count number of trailing zero's. */
4725 for (i = 0; i <= 31; i++)
4726 {
4727 if ((remainder & (1 << i)) == 0)
4728 clear_zero_bit_copies++;
4729 else
4730 break;
4731 }
4732
4733 /* Count number of trailing 1's. */
4734 for (i = 0; i <= 31; i++)
4735 {
4736 if ((remainder & (1 << i)) != 0)
4737 set_zero_bit_copies++;
4738 else
4739 break;
4740 }
4741
4742 switch (code)
4743 {
4744 case SET:
4745 /* See if we can do this by sign_extending a constant that is known
4746 to be negative. This is a good, way of doing it, since the shift
4747 may well merge into a subsequent insn. */
4748 if (set_sign_bit_copies > 1)
4749 {
4750 if (const_ok_for_arm
4751 (temp1 = ARM_SIGN_EXTEND (remainder
4752 << (set_sign_bit_copies - 1))))
4753 {
4754 if (generate)
4755 {
4756 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4757 emit_constant_insn (cond,
4758 gen_rtx_SET (new_src, GEN_INT (temp1)));
4759 emit_constant_insn (cond,
4760 gen_ashrsi3 (target, new_src,
4761 GEN_INT (set_sign_bit_copies - 1)));
4762 }
4763 return 2;
4764 }
4765 /* For an inverted constant, we will need to set the low bits,
4766 these will be shifted out of harm's way. */
4767 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4768 if (const_ok_for_arm (~temp1))
4769 {
4770 if (generate)
4771 {
4772 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4773 emit_constant_insn (cond,
4774 gen_rtx_SET (new_src, GEN_INT (temp1)));
4775 emit_constant_insn (cond,
4776 gen_ashrsi3 (target, new_src,
4777 GEN_INT (set_sign_bit_copies - 1)));
4778 }
4779 return 2;
4780 }
4781 }
4782
4783 /* See if we can calculate the value as the difference between two
4784 valid immediates. */
4785 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4786 {
4787 int topshift = clear_sign_bit_copies & ~1;
4788
4789 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4790 & (0xff000000 >> topshift));
4791
4792 /* If temp1 is zero, then that means the 9 most significant
4793 bits of remainder were 1 and we've caused it to overflow.
4794 When topshift is 0 we don't need to do anything since we
4795 can borrow from 'bit 32'. */
4796 if (temp1 == 0 && topshift != 0)
4797 temp1 = 0x80000000 >> (topshift - 1);
4798
4799 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4800
4801 if (const_ok_for_arm (temp2))
4802 {
4803 if (generate)
4804 {
4805 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4806 emit_constant_insn (cond,
4807 gen_rtx_SET (new_src, GEN_INT (temp1)));
4808 emit_constant_insn (cond,
4809 gen_addsi3 (target, new_src,
4810 GEN_INT (-temp2)));
4811 }
4812
4813 return 2;
4814 }
4815 }
4816
4817 /* See if we can generate this by setting the bottom (or the top)
4818 16 bits, and then shifting these into the other half of the
4819 word. We only look for the simplest cases, to do more would cost
4820 too much. Be careful, however, not to generate this when the
4821 alternative would take fewer insns. */
4822 if (val & 0xffff0000)
4823 {
4824 temp1 = remainder & 0xffff0000;
4825 temp2 = remainder & 0x0000ffff;
4826
4827 /* Overlaps outside this range are best done using other methods. */
4828 for (i = 9; i < 24; i++)
4829 {
4830 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4831 && !const_ok_for_arm (temp2))
4832 {
4833 rtx new_src = (subtargets
4834 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4835 : target);
4836 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4837 source, subtargets, generate);
4838 source = new_src;
4839 if (generate)
4840 emit_constant_insn
4841 (cond,
4842 gen_rtx_SET
4843 (target,
4844 gen_rtx_IOR (mode,
4845 gen_rtx_ASHIFT (mode, source,
4846 GEN_INT (i)),
4847 source)));
4848 return insns + 1;
4849 }
4850 }
4851
4852 /* Don't duplicate cases already considered. */
4853 for (i = 17; i < 24; i++)
4854 {
4855 if (((temp1 | (temp1 >> i)) == remainder)
4856 && !const_ok_for_arm (temp1))
4857 {
4858 rtx new_src = (subtargets
4859 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4860 : target);
4861 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4862 source, subtargets, generate);
4863 source = new_src;
4864 if (generate)
4865 emit_constant_insn
4866 (cond,
4867 gen_rtx_SET (target,
4868 gen_rtx_IOR
4869 (mode,
4870 gen_rtx_LSHIFTRT (mode, source,
4871 GEN_INT (i)),
4872 source)));
4873 return insns + 1;
4874 }
4875 }
4876 }
4877 break;
4878
4879 case IOR:
4880 case XOR:
4881 /* If we have IOR or XOR, and the constant can be loaded in a
4882 single instruction, and we can find a temporary to put it in,
4883 then this can be done in two instructions instead of 3-4. */
4884 if (subtargets
4885 /* TARGET can't be NULL if SUBTARGETS is 0 */
4886 || (reload_completed && !reg_mentioned_p (target, source)))
4887 {
4888 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4889 {
4890 if (generate)
4891 {
4892 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4893
4894 emit_constant_insn (cond,
4895 gen_rtx_SET (sub, GEN_INT (val)));
4896 emit_constant_insn (cond,
4897 gen_rtx_SET (target,
4898 gen_rtx_fmt_ee (code, mode,
4899 source, sub)));
4900 }
4901 return 2;
4902 }
4903 }
4904
4905 if (code == XOR)
4906 break;
4907
4908 /* Convert.
4909 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4910 and the remainder 0s for e.g. 0xfff00000)
4911 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4912
4913 This can be done in 2 instructions by using shifts with mov or mvn.
4914 e.g. for
4915 x = x | 0xfff00000;
4916 we generate.
4917 mvn r0, r0, asl #12
4918 mvn r0, r0, lsr #12 */
4919 if (set_sign_bit_copies > 8
4920 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4921 {
4922 if (generate)
4923 {
4924 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4925 rtx shift = GEN_INT (set_sign_bit_copies);
4926
4927 emit_constant_insn
4928 (cond,
4929 gen_rtx_SET (sub,
4930 gen_rtx_NOT (mode,
4931 gen_rtx_ASHIFT (mode,
4932 source,
4933 shift))));
4934 emit_constant_insn
4935 (cond,
4936 gen_rtx_SET (target,
4937 gen_rtx_NOT (mode,
4938 gen_rtx_LSHIFTRT (mode, sub,
4939 shift))));
4940 }
4941 return 2;
4942 }
4943
4944 /* Convert
4945 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4946 to
4947 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4948
4949 For eg. r0 = r0 | 0xfff
4950 mvn r0, r0, lsr #12
4951 mvn r0, r0, asl #12
4952
4953 */
4954 if (set_zero_bit_copies > 8
4955 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4956 {
4957 if (generate)
4958 {
4959 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4960 rtx shift = GEN_INT (set_zero_bit_copies);
4961
4962 emit_constant_insn
4963 (cond,
4964 gen_rtx_SET (sub,
4965 gen_rtx_NOT (mode,
4966 gen_rtx_LSHIFTRT (mode,
4967 source,
4968 shift))));
4969 emit_constant_insn
4970 (cond,
4971 gen_rtx_SET (target,
4972 gen_rtx_NOT (mode,
4973 gen_rtx_ASHIFT (mode, sub,
4974 shift))));
4975 }
4976 return 2;
4977 }
4978
4979 /* This will never be reached for Thumb2 because orn is a valid
4980 instruction. This is for Thumb1 and the ARM 32 bit cases.
4981
4982 x = y | constant (such that ~constant is a valid constant)
4983 Transform this to
4984 x = ~(~y & ~constant).
4985 */
4986 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4987 {
4988 if (generate)
4989 {
4990 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4991 emit_constant_insn (cond,
4992 gen_rtx_SET (sub,
4993 gen_rtx_NOT (mode, source)));
4994 source = sub;
4995 if (subtargets)
4996 sub = gen_reg_rtx (mode);
4997 emit_constant_insn (cond,
4998 gen_rtx_SET (sub,
4999 gen_rtx_AND (mode, source,
5000 GEN_INT (temp1))));
5001 emit_constant_insn (cond,
5002 gen_rtx_SET (target,
5003 gen_rtx_NOT (mode, sub)));
5004 }
5005 return 3;
5006 }
5007 break;
5008
5009 case AND:
5010 /* See if two shifts will do 2 or more insn's worth of work. */
5011 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5012 {
5013 HOST_WIDE_INT shift_mask = ((0xffffffff
5014 << (32 - clear_sign_bit_copies))
5015 & 0xffffffff);
5016
5017 if ((remainder | shift_mask) != 0xffffffff)
5018 {
5019 HOST_WIDE_INT new_val
5020 = ARM_SIGN_EXTEND (remainder | shift_mask);
5021
5022 if (generate)
5023 {
5024 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5025 insns = arm_gen_constant (AND, SImode, cond, new_val,
5026 new_src, source, subtargets, 1);
5027 source = new_src;
5028 }
5029 else
5030 {
5031 rtx targ = subtargets ? NULL_RTX : target;
5032 insns = arm_gen_constant (AND, mode, cond, new_val,
5033 targ, source, subtargets, 0);
5034 }
5035 }
5036
5037 if (generate)
5038 {
5039 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5040 rtx shift = GEN_INT (clear_sign_bit_copies);
5041
5042 emit_insn (gen_ashlsi3 (new_src, source, shift));
5043 emit_insn (gen_lshrsi3 (target, new_src, shift));
5044 }
5045
5046 return insns + 2;
5047 }
5048
5049 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5050 {
5051 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5052
5053 if ((remainder | shift_mask) != 0xffffffff)
5054 {
5055 HOST_WIDE_INT new_val
5056 = ARM_SIGN_EXTEND (remainder | shift_mask);
5057 if (generate)
5058 {
5059 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5060
5061 insns = arm_gen_constant (AND, mode, cond, new_val,
5062 new_src, source, subtargets, 1);
5063 source = new_src;
5064 }
5065 else
5066 {
5067 rtx targ = subtargets ? NULL_RTX : target;
5068
5069 insns = arm_gen_constant (AND, mode, cond, new_val,
5070 targ, source, subtargets, 0);
5071 }
5072 }
5073
5074 if (generate)
5075 {
5076 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5077 rtx shift = GEN_INT (clear_zero_bit_copies);
5078
5079 emit_insn (gen_lshrsi3 (new_src, source, shift));
5080 emit_insn (gen_ashlsi3 (target, new_src, shift));
5081 }
5082
5083 return insns + 2;
5084 }
5085
5086 break;
5087
5088 default:
5089 break;
5090 }
5091
5092 /* Calculate what the instruction sequences would be if we generated it
5093 normally, negated, or inverted. */
5094 if (code == AND)
5095 /* AND cannot be split into multiple insns, so invert and use BIC. */
5096 insns = 99;
5097 else
5098 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5099
5100 if (can_negate)
5101 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5102 &neg_immediates);
5103 else
5104 neg_insns = 99;
5105
5106 if (can_invert || final_invert)
5107 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5108 &inv_immediates);
5109 else
5110 inv_insns = 99;
5111
5112 immediates = &pos_immediates;
5113
5114 /* Is the negated immediate sequence more efficient? */
5115 if (neg_insns < insns && neg_insns <= inv_insns)
5116 {
5117 insns = neg_insns;
5118 immediates = &neg_immediates;
5119 }
5120 else
5121 can_negate = 0;
5122
5123 /* Is the inverted immediate sequence more efficient?
5124 We must allow for an extra NOT instruction for XOR operations, although
5125 there is some chance that the final 'mvn' will get optimized later. */
5126 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5127 {
5128 insns = inv_insns;
5129 immediates = &inv_immediates;
5130 }
5131 else
5132 {
5133 can_invert = 0;
5134 final_invert = 0;
5135 }
5136
5137 /* Now output the chosen sequence as instructions. */
5138 if (generate)
5139 {
5140 for (i = 0; i < insns; i++)
5141 {
5142 rtx new_src, temp1_rtx;
5143
5144 temp1 = immediates->i[i];
5145
5146 if (code == SET || code == MINUS)
5147 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5148 else if ((final_invert || i < (insns - 1)) && subtargets)
5149 new_src = gen_reg_rtx (mode);
5150 else
5151 new_src = target;
5152
5153 if (can_invert)
5154 temp1 = ~temp1;
5155 else if (can_negate)
5156 temp1 = -temp1;
5157
5158 temp1 = trunc_int_for_mode (temp1, mode);
5159 temp1_rtx = GEN_INT (temp1);
5160
5161 if (code == SET)
5162 ;
5163 else if (code == MINUS)
5164 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5165 else
5166 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5167
5168 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5169 source = new_src;
5170
5171 if (code == SET)
5172 {
5173 can_negate = can_invert;
5174 can_invert = 0;
5175 code = PLUS;
5176 }
5177 else if (code == MINUS)
5178 code = PLUS;
5179 }
5180 }
5181
5182 if (final_invert)
5183 {
5184 if (generate)
5185 emit_constant_insn (cond, gen_rtx_SET (target,
5186 gen_rtx_NOT (mode, source)));
5187 insns++;
5188 }
5189
5190 return insns;
5191 }
5192
5193 /* Canonicalize a comparison so that we are more likely to recognize it.
5194 This can be done for a few constant compares, where we can make the
5195 immediate value easier to load. */
5196
5197 static void
5198 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5199 bool op0_preserve_value)
5200 {
5201 machine_mode mode;
5202 unsigned HOST_WIDE_INT i, maxval;
5203
5204 mode = GET_MODE (*op0);
5205 if (mode == VOIDmode)
5206 mode = GET_MODE (*op1);
5207
5208 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5209
5210 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5211 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5212 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5213 for GTU/LEU in Thumb mode. */
5214 if (mode == DImode)
5215 {
5216
5217 if (*code == GT || *code == LE
5218 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5219 {
5220 /* Missing comparison. First try to use an available
5221 comparison. */
5222 if (CONST_INT_P (*op1))
5223 {
5224 i = INTVAL (*op1);
5225 switch (*code)
5226 {
5227 case GT:
5228 case LE:
5229 if (i != maxval
5230 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5231 {
5232 *op1 = GEN_INT (i + 1);
5233 *code = *code == GT ? GE : LT;
5234 return;
5235 }
5236 break;
5237 case GTU:
5238 case LEU:
5239 if (i != ~((unsigned HOST_WIDE_INT) 0)
5240 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5241 {
5242 *op1 = GEN_INT (i + 1);
5243 *code = *code == GTU ? GEU : LTU;
5244 return;
5245 }
5246 break;
5247 default:
5248 gcc_unreachable ();
5249 }
5250 }
5251
5252 /* If that did not work, reverse the condition. */
5253 if (!op0_preserve_value)
5254 {
5255 std::swap (*op0, *op1);
5256 *code = (int)swap_condition ((enum rtx_code)*code);
5257 }
5258 }
5259 return;
5260 }
5261
5262 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5263 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5264 to facilitate possible combining with a cmp into 'ands'. */
5265 if (mode == SImode
5266 && GET_CODE (*op0) == ZERO_EXTEND
5267 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5268 && GET_MODE (XEXP (*op0, 0)) == QImode
5269 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5270 && subreg_lowpart_p (XEXP (*op0, 0))
5271 && *op1 == const0_rtx)
5272 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5273 GEN_INT (255));
5274
5275 /* Comparisons smaller than DImode. Only adjust comparisons against
5276 an out-of-range constant. */
5277 if (!CONST_INT_P (*op1)
5278 || const_ok_for_arm (INTVAL (*op1))
5279 || const_ok_for_arm (- INTVAL (*op1)))
5280 return;
5281
5282 i = INTVAL (*op1);
5283
5284 switch (*code)
5285 {
5286 case EQ:
5287 case NE:
5288 return;
5289
5290 case GT:
5291 case LE:
5292 if (i != maxval
5293 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5294 {
5295 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5296 *code = *code == GT ? GE : LT;
5297 return;
5298 }
5299 break;
5300
5301 case GE:
5302 case LT:
5303 if (i != ~maxval
5304 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5305 {
5306 *op1 = GEN_INT (i - 1);
5307 *code = *code == GE ? GT : LE;
5308 return;
5309 }
5310 break;
5311
5312 case GTU:
5313 case LEU:
5314 if (i != ~((unsigned HOST_WIDE_INT) 0)
5315 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5316 {
5317 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5318 *code = *code == GTU ? GEU : LTU;
5319 return;
5320 }
5321 break;
5322
5323 case GEU:
5324 case LTU:
5325 if (i != 0
5326 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5327 {
5328 *op1 = GEN_INT (i - 1);
5329 *code = *code == GEU ? GTU : LEU;
5330 return;
5331 }
5332 break;
5333
5334 default:
5335 gcc_unreachable ();
5336 }
5337 }
5338
5339
5340 /* Define how to find the value returned by a function. */
5341
5342 static rtx
5343 arm_function_value(const_tree type, const_tree func,
5344 bool outgoing ATTRIBUTE_UNUSED)
5345 {
5346 machine_mode mode;
5347 int unsignedp ATTRIBUTE_UNUSED;
5348 rtx r ATTRIBUTE_UNUSED;
5349
5350 mode = TYPE_MODE (type);
5351
5352 if (TARGET_AAPCS_BASED)
5353 return aapcs_allocate_return_reg (mode, type, func);
5354
5355 /* Promote integer types. */
5356 if (INTEGRAL_TYPE_P (type))
5357 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5358
5359 /* Promotes small structs returned in a register to full-word size
5360 for big-endian AAPCS. */
5361 if (arm_return_in_msb (type))
5362 {
5363 HOST_WIDE_INT size = int_size_in_bytes (type);
5364 if (size % UNITS_PER_WORD != 0)
5365 {
5366 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5367 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5368 }
5369 }
5370
5371 return arm_libcall_value_1 (mode);
5372 }
5373
5374 /* libcall hashtable helpers. */
5375
5376 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5377 {
5378 static inline hashval_t hash (const rtx_def *);
5379 static inline bool equal (const rtx_def *, const rtx_def *);
5380 static inline void remove (rtx_def *);
5381 };
5382
5383 inline bool
5384 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5385 {
5386 return rtx_equal_p (p1, p2);
5387 }
5388
5389 inline hashval_t
5390 libcall_hasher::hash (const rtx_def *p1)
5391 {
5392 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5393 }
5394
5395 typedef hash_table<libcall_hasher> libcall_table_type;
5396
5397 static void
5398 add_libcall (libcall_table_type *htab, rtx libcall)
5399 {
5400 *htab->find_slot (libcall, INSERT) = libcall;
5401 }
5402
5403 static bool
5404 arm_libcall_uses_aapcs_base (const_rtx libcall)
5405 {
5406 static bool init_done = false;
5407 static libcall_table_type *libcall_htab = NULL;
5408
5409 if (!init_done)
5410 {
5411 init_done = true;
5412
5413 libcall_htab = new libcall_table_type (31);
5414 add_libcall (libcall_htab,
5415 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5416 add_libcall (libcall_htab,
5417 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5418 add_libcall (libcall_htab,
5419 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5420 add_libcall (libcall_htab,
5421 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5422
5423 add_libcall (libcall_htab,
5424 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5425 add_libcall (libcall_htab,
5426 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5427 add_libcall (libcall_htab,
5428 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5429 add_libcall (libcall_htab,
5430 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5431
5432 add_libcall (libcall_htab,
5433 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5434 add_libcall (libcall_htab,
5435 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5436 add_libcall (libcall_htab,
5437 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5438 add_libcall (libcall_htab,
5439 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5440 add_libcall (libcall_htab,
5441 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5442 add_libcall (libcall_htab,
5443 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5444 add_libcall (libcall_htab,
5445 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5446 add_libcall (libcall_htab,
5447 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5448
5449 /* Values from double-precision helper functions are returned in core
5450 registers if the selected core only supports single-precision
5451 arithmetic, even if we are using the hard-float ABI. The same is
5452 true for single-precision helpers, but we will never be using the
5453 hard-float ABI on a CPU which doesn't support single-precision
5454 operations in hardware. */
5455 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5456 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5457 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5458 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5459 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5460 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5461 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5462 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5463 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5464 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5465 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5466 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5467 SFmode));
5468 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5469 DFmode));
5470 add_libcall (libcall_htab,
5471 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5472 }
5473
5474 return libcall && libcall_htab->find (libcall) != NULL;
5475 }
5476
5477 static rtx
5478 arm_libcall_value_1 (machine_mode mode)
5479 {
5480 if (TARGET_AAPCS_BASED)
5481 return aapcs_libcall_value (mode);
5482 else if (TARGET_IWMMXT_ABI
5483 && arm_vector_mode_supported_p (mode))
5484 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5485 else
5486 return gen_rtx_REG (mode, ARG_REGISTER (1));
5487 }
5488
5489 /* Define how to find the value returned by a library function
5490 assuming the value has mode MODE. */
5491
5492 static rtx
5493 arm_libcall_value (machine_mode mode, const_rtx libcall)
5494 {
5495 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5496 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5497 {
5498 /* The following libcalls return their result in integer registers,
5499 even though they return a floating point value. */
5500 if (arm_libcall_uses_aapcs_base (libcall))
5501 return gen_rtx_REG (mode, ARG_REGISTER(1));
5502
5503 }
5504
5505 return arm_libcall_value_1 (mode);
5506 }
5507
5508 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5509
5510 static bool
5511 arm_function_value_regno_p (const unsigned int regno)
5512 {
5513 if (regno == ARG_REGISTER (1)
5514 || (TARGET_32BIT
5515 && TARGET_AAPCS_BASED
5516 && TARGET_HARD_FLOAT
5517 && regno == FIRST_VFP_REGNUM)
5518 || (TARGET_IWMMXT_ABI
5519 && regno == FIRST_IWMMXT_REGNUM))
5520 return true;
5521
5522 return false;
5523 }
5524
5525 /* Determine the amount of memory needed to store the possible return
5526 registers of an untyped call. */
5527 int
5528 arm_apply_result_size (void)
5529 {
5530 int size = 16;
5531
5532 if (TARGET_32BIT)
5533 {
5534 if (TARGET_HARD_FLOAT_ABI)
5535 size += 32;
5536 if (TARGET_IWMMXT_ABI)
5537 size += 8;
5538 }
5539
5540 return size;
5541 }
5542
5543 /* Decide whether TYPE should be returned in memory (true)
5544 or in a register (false). FNTYPE is the type of the function making
5545 the call. */
5546 static bool
5547 arm_return_in_memory (const_tree type, const_tree fntype)
5548 {
5549 HOST_WIDE_INT size;
5550
5551 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5552
5553 if (TARGET_AAPCS_BASED)
5554 {
5555 /* Simple, non-aggregate types (ie not including vectors and
5556 complex) are always returned in a register (or registers).
5557 We don't care about which register here, so we can short-cut
5558 some of the detail. */
5559 if (!AGGREGATE_TYPE_P (type)
5560 && TREE_CODE (type) != VECTOR_TYPE
5561 && TREE_CODE (type) != COMPLEX_TYPE)
5562 return false;
5563
5564 /* Any return value that is no larger than one word can be
5565 returned in r0. */
5566 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5567 return false;
5568
5569 /* Check any available co-processors to see if they accept the
5570 type as a register candidate (VFP, for example, can return
5571 some aggregates in consecutive registers). These aren't
5572 available if the call is variadic. */
5573 if (aapcs_select_return_coproc (type, fntype) >= 0)
5574 return false;
5575
5576 /* Vector values should be returned using ARM registers, not
5577 memory (unless they're over 16 bytes, which will break since
5578 we only have four call-clobbered registers to play with). */
5579 if (TREE_CODE (type) == VECTOR_TYPE)
5580 return (size < 0 || size > (4 * UNITS_PER_WORD));
5581
5582 /* The rest go in memory. */
5583 return true;
5584 }
5585
5586 if (TREE_CODE (type) == VECTOR_TYPE)
5587 return (size < 0 || size > (4 * UNITS_PER_WORD));
5588
5589 if (!AGGREGATE_TYPE_P (type) &&
5590 (TREE_CODE (type) != VECTOR_TYPE))
5591 /* All simple types are returned in registers. */
5592 return false;
5593
5594 if (arm_abi != ARM_ABI_APCS)
5595 {
5596 /* ATPCS and later return aggregate types in memory only if they are
5597 larger than a word (or are variable size). */
5598 return (size < 0 || size > UNITS_PER_WORD);
5599 }
5600
5601 /* For the arm-wince targets we choose to be compatible with Microsoft's
5602 ARM and Thumb compilers, which always return aggregates in memory. */
5603 #ifndef ARM_WINCE
5604 /* All structures/unions bigger than one word are returned in memory.
5605 Also catch the case where int_size_in_bytes returns -1. In this case
5606 the aggregate is either huge or of variable size, and in either case
5607 we will want to return it via memory and not in a register. */
5608 if (size < 0 || size > UNITS_PER_WORD)
5609 return true;
5610
5611 if (TREE_CODE (type) == RECORD_TYPE)
5612 {
5613 tree field;
5614
5615 /* For a struct the APCS says that we only return in a register
5616 if the type is 'integer like' and every addressable element
5617 has an offset of zero. For practical purposes this means
5618 that the structure can have at most one non bit-field element
5619 and that this element must be the first one in the structure. */
5620
5621 /* Find the first field, ignoring non FIELD_DECL things which will
5622 have been created by C++. */
5623 for (field = TYPE_FIELDS (type);
5624 field && TREE_CODE (field) != FIELD_DECL;
5625 field = DECL_CHAIN (field))
5626 continue;
5627
5628 if (field == NULL)
5629 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5630
5631 /* Check that the first field is valid for returning in a register. */
5632
5633 /* ... Floats are not allowed */
5634 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5635 return true;
5636
5637 /* ... Aggregates that are not themselves valid for returning in
5638 a register are not allowed. */
5639 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5640 return true;
5641
5642 /* Now check the remaining fields, if any. Only bitfields are allowed,
5643 since they are not addressable. */
5644 for (field = DECL_CHAIN (field);
5645 field;
5646 field = DECL_CHAIN (field))
5647 {
5648 if (TREE_CODE (field) != FIELD_DECL)
5649 continue;
5650
5651 if (!DECL_BIT_FIELD_TYPE (field))
5652 return true;
5653 }
5654
5655 return false;
5656 }
5657
5658 if (TREE_CODE (type) == UNION_TYPE)
5659 {
5660 tree field;
5661
5662 /* Unions can be returned in registers if every element is
5663 integral, or can be returned in an integer register. */
5664 for (field = TYPE_FIELDS (type);
5665 field;
5666 field = DECL_CHAIN (field))
5667 {
5668 if (TREE_CODE (field) != FIELD_DECL)
5669 continue;
5670
5671 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5672 return true;
5673
5674 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5675 return true;
5676 }
5677
5678 return false;
5679 }
5680 #endif /* not ARM_WINCE */
5681
5682 /* Return all other types in memory. */
5683 return true;
5684 }
5685
5686 const struct pcs_attribute_arg
5687 {
5688 const char *arg;
5689 enum arm_pcs value;
5690 } pcs_attribute_args[] =
5691 {
5692 {"aapcs", ARM_PCS_AAPCS},
5693 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5694 #if 0
5695 /* We could recognize these, but changes would be needed elsewhere
5696 * to implement them. */
5697 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5698 {"atpcs", ARM_PCS_ATPCS},
5699 {"apcs", ARM_PCS_APCS},
5700 #endif
5701 {NULL, ARM_PCS_UNKNOWN}
5702 };
5703
5704 static enum arm_pcs
5705 arm_pcs_from_attribute (tree attr)
5706 {
5707 const struct pcs_attribute_arg *ptr;
5708 const char *arg;
5709
5710 /* Get the value of the argument. */
5711 if (TREE_VALUE (attr) == NULL_TREE
5712 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5713 return ARM_PCS_UNKNOWN;
5714
5715 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5716
5717 /* Check it against the list of known arguments. */
5718 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5719 if (streq (arg, ptr->arg))
5720 return ptr->value;
5721
5722 /* An unrecognized interrupt type. */
5723 return ARM_PCS_UNKNOWN;
5724 }
5725
5726 /* Get the PCS variant to use for this call. TYPE is the function's type
5727 specification, DECL is the specific declartion. DECL may be null if
5728 the call could be indirect or if this is a library call. */
5729 static enum arm_pcs
5730 arm_get_pcs_model (const_tree type, const_tree decl)
5731 {
5732 bool user_convention = false;
5733 enum arm_pcs user_pcs = arm_pcs_default;
5734 tree attr;
5735
5736 gcc_assert (type);
5737
5738 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5739 if (attr)
5740 {
5741 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5742 user_convention = true;
5743 }
5744
5745 if (TARGET_AAPCS_BASED)
5746 {
5747 /* Detect varargs functions. These always use the base rules
5748 (no argument is ever a candidate for a co-processor
5749 register). */
5750 bool base_rules = stdarg_p (type);
5751
5752 if (user_convention)
5753 {
5754 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5755 sorry ("non-AAPCS derived PCS variant");
5756 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5757 error ("variadic functions must use the base AAPCS variant");
5758 }
5759
5760 if (base_rules)
5761 return ARM_PCS_AAPCS;
5762 else if (user_convention)
5763 return user_pcs;
5764 else if (decl && flag_unit_at_a_time)
5765 {
5766 /* Local functions never leak outside this compilation unit,
5767 so we are free to use whatever conventions are
5768 appropriate. */
5769 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5770 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5771 if (i && i->local)
5772 return ARM_PCS_AAPCS_LOCAL;
5773 }
5774 }
5775 else if (user_convention && user_pcs != arm_pcs_default)
5776 sorry ("PCS variant");
5777
5778 /* For everything else we use the target's default. */
5779 return arm_pcs_default;
5780 }
5781
5782
5783 static void
5784 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5785 const_tree fntype ATTRIBUTE_UNUSED,
5786 rtx libcall ATTRIBUTE_UNUSED,
5787 const_tree fndecl ATTRIBUTE_UNUSED)
5788 {
5789 /* Record the unallocated VFP registers. */
5790 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5791 pcum->aapcs_vfp_reg_alloc = 0;
5792 }
5793
5794 /* Walk down the type tree of TYPE counting consecutive base elements.
5795 If *MODEP is VOIDmode, then set it to the first valid floating point
5796 type. If a non-floating point type is found, or if a floating point
5797 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5798 otherwise return the count in the sub-tree. */
5799 static int
5800 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5801 {
5802 machine_mode mode;
5803 HOST_WIDE_INT size;
5804
5805 switch (TREE_CODE (type))
5806 {
5807 case REAL_TYPE:
5808 mode = TYPE_MODE (type);
5809 if (mode != DFmode && mode != SFmode && mode != HFmode)
5810 return -1;
5811
5812 if (*modep == VOIDmode)
5813 *modep = mode;
5814
5815 if (*modep == mode)
5816 return 1;
5817
5818 break;
5819
5820 case COMPLEX_TYPE:
5821 mode = TYPE_MODE (TREE_TYPE (type));
5822 if (mode != DFmode && mode != SFmode)
5823 return -1;
5824
5825 if (*modep == VOIDmode)
5826 *modep = mode;
5827
5828 if (*modep == mode)
5829 return 2;
5830
5831 break;
5832
5833 case VECTOR_TYPE:
5834 /* Use V2SImode and V4SImode as representatives of all 64-bit
5835 and 128-bit vector types, whether or not those modes are
5836 supported with the present options. */
5837 size = int_size_in_bytes (type);
5838 switch (size)
5839 {
5840 case 8:
5841 mode = V2SImode;
5842 break;
5843 case 16:
5844 mode = V4SImode;
5845 break;
5846 default:
5847 return -1;
5848 }
5849
5850 if (*modep == VOIDmode)
5851 *modep = mode;
5852
5853 /* Vector modes are considered to be opaque: two vectors are
5854 equivalent for the purposes of being homogeneous aggregates
5855 if they are the same size. */
5856 if (*modep == mode)
5857 return 1;
5858
5859 break;
5860
5861 case ARRAY_TYPE:
5862 {
5863 int count;
5864 tree index = TYPE_DOMAIN (type);
5865
5866 /* Can't handle incomplete types nor sizes that are not
5867 fixed. */
5868 if (!COMPLETE_TYPE_P (type)
5869 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5870 return -1;
5871
5872 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5873 if (count == -1
5874 || !index
5875 || !TYPE_MAX_VALUE (index)
5876 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5877 || !TYPE_MIN_VALUE (index)
5878 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5879 || count < 0)
5880 return -1;
5881
5882 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5883 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5884
5885 /* There must be no padding. */
5886 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5887 return -1;
5888
5889 return count;
5890 }
5891
5892 case RECORD_TYPE:
5893 {
5894 int count = 0;
5895 int sub_count;
5896 tree field;
5897
5898 /* Can't handle incomplete types nor sizes that are not
5899 fixed. */
5900 if (!COMPLETE_TYPE_P (type)
5901 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5902 return -1;
5903
5904 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5905 {
5906 if (TREE_CODE (field) != FIELD_DECL)
5907 continue;
5908
5909 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5910 if (sub_count < 0)
5911 return -1;
5912 count += sub_count;
5913 }
5914
5915 /* There must be no padding. */
5916 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5917 return -1;
5918
5919 return count;
5920 }
5921
5922 case UNION_TYPE:
5923 case QUAL_UNION_TYPE:
5924 {
5925 /* These aren't very interesting except in a degenerate case. */
5926 int count = 0;
5927 int sub_count;
5928 tree field;
5929
5930 /* Can't handle incomplete types nor sizes that are not
5931 fixed. */
5932 if (!COMPLETE_TYPE_P (type)
5933 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5934 return -1;
5935
5936 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5937 {
5938 if (TREE_CODE (field) != FIELD_DECL)
5939 continue;
5940
5941 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5942 if (sub_count < 0)
5943 return -1;
5944 count = count > sub_count ? count : sub_count;
5945 }
5946
5947 /* There must be no padding. */
5948 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5949 return -1;
5950
5951 return count;
5952 }
5953
5954 default:
5955 break;
5956 }
5957
5958 return -1;
5959 }
5960
5961 /* Return true if PCS_VARIANT should use VFP registers. */
5962 static bool
5963 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5964 {
5965 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5966 {
5967 static bool seen_thumb1_vfp = false;
5968
5969 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5970 {
5971 sorry ("Thumb-1 hard-float VFP ABI");
5972 /* sorry() is not immediately fatal, so only display this once. */
5973 seen_thumb1_vfp = true;
5974 }
5975
5976 return true;
5977 }
5978
5979 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5980 return false;
5981
5982 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5983 (TARGET_VFP_DOUBLE || !is_double));
5984 }
5985
5986 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5987 suitable for passing or returning in VFP registers for the PCS
5988 variant selected. If it is, then *BASE_MODE is updated to contain
5989 a machine mode describing each element of the argument's type and
5990 *COUNT to hold the number of such elements. */
5991 static bool
5992 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5993 machine_mode mode, const_tree type,
5994 machine_mode *base_mode, int *count)
5995 {
5996 machine_mode new_mode = VOIDmode;
5997
5998 /* If we have the type information, prefer that to working things
5999 out from the mode. */
6000 if (type)
6001 {
6002 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6003
6004 if (ag_count > 0 && ag_count <= 4)
6005 *count = ag_count;
6006 else
6007 return false;
6008 }
6009 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6010 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6011 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6012 {
6013 *count = 1;
6014 new_mode = mode;
6015 }
6016 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6017 {
6018 *count = 2;
6019 new_mode = (mode == DCmode ? DFmode : SFmode);
6020 }
6021 else
6022 return false;
6023
6024
6025 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6026 return false;
6027
6028 *base_mode = new_mode;
6029 return true;
6030 }
6031
6032 static bool
6033 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6034 machine_mode mode, const_tree type)
6035 {
6036 int count ATTRIBUTE_UNUSED;
6037 machine_mode ag_mode ATTRIBUTE_UNUSED;
6038
6039 if (!use_vfp_abi (pcs_variant, false))
6040 return false;
6041 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6042 &ag_mode, &count);
6043 }
6044
6045 static bool
6046 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6047 const_tree type)
6048 {
6049 if (!use_vfp_abi (pcum->pcs_variant, false))
6050 return false;
6051
6052 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6053 &pcum->aapcs_vfp_rmode,
6054 &pcum->aapcs_vfp_rcount);
6055 }
6056
6057 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6058 for the behaviour of this function. */
6059
6060 static bool
6061 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6062 const_tree type ATTRIBUTE_UNUSED)
6063 {
6064 int rmode_size
6065 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6066 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6067 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6068 int regno;
6069
6070 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6071 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6072 {
6073 pcum->aapcs_vfp_reg_alloc = mask << regno;
6074 if (mode == BLKmode
6075 || (mode == TImode && ! TARGET_NEON)
6076 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6077 {
6078 int i;
6079 int rcount = pcum->aapcs_vfp_rcount;
6080 int rshift = shift;
6081 machine_mode rmode = pcum->aapcs_vfp_rmode;
6082 rtx par;
6083 if (!TARGET_NEON)
6084 {
6085 /* Avoid using unsupported vector modes. */
6086 if (rmode == V2SImode)
6087 rmode = DImode;
6088 else if (rmode == V4SImode)
6089 {
6090 rmode = DImode;
6091 rcount *= 2;
6092 rshift /= 2;
6093 }
6094 }
6095 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6096 for (i = 0; i < rcount; i++)
6097 {
6098 rtx tmp = gen_rtx_REG (rmode,
6099 FIRST_VFP_REGNUM + regno + i * rshift);
6100 tmp = gen_rtx_EXPR_LIST
6101 (VOIDmode, tmp,
6102 GEN_INT (i * GET_MODE_SIZE (rmode)));
6103 XVECEXP (par, 0, i) = tmp;
6104 }
6105
6106 pcum->aapcs_reg = par;
6107 }
6108 else
6109 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6110 return true;
6111 }
6112 return false;
6113 }
6114
6115 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6116 comment there for the behaviour of this function. */
6117
6118 static rtx
6119 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6120 machine_mode mode,
6121 const_tree type ATTRIBUTE_UNUSED)
6122 {
6123 if (!use_vfp_abi (pcs_variant, false))
6124 return NULL;
6125
6126 if (mode == BLKmode
6127 || (GET_MODE_CLASS (mode) == MODE_INT
6128 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6129 && !TARGET_NEON))
6130 {
6131 int count;
6132 machine_mode ag_mode;
6133 int i;
6134 rtx par;
6135 int shift;
6136
6137 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6138 &ag_mode, &count);
6139
6140 if (!TARGET_NEON)
6141 {
6142 if (ag_mode == V2SImode)
6143 ag_mode = DImode;
6144 else if (ag_mode == V4SImode)
6145 {
6146 ag_mode = DImode;
6147 count *= 2;
6148 }
6149 }
6150 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6151 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6152 for (i = 0; i < count; i++)
6153 {
6154 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6155 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6156 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6157 XVECEXP (par, 0, i) = tmp;
6158 }
6159
6160 return par;
6161 }
6162
6163 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6164 }
6165
6166 static void
6167 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6168 machine_mode mode ATTRIBUTE_UNUSED,
6169 const_tree type ATTRIBUTE_UNUSED)
6170 {
6171 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6172 pcum->aapcs_vfp_reg_alloc = 0;
6173 return;
6174 }
6175
6176 #define AAPCS_CP(X) \
6177 { \
6178 aapcs_ ## X ## _cum_init, \
6179 aapcs_ ## X ## _is_call_candidate, \
6180 aapcs_ ## X ## _allocate, \
6181 aapcs_ ## X ## _is_return_candidate, \
6182 aapcs_ ## X ## _allocate_return_reg, \
6183 aapcs_ ## X ## _advance \
6184 }
6185
6186 /* Table of co-processors that can be used to pass arguments in
6187 registers. Idealy no arugment should be a candidate for more than
6188 one co-processor table entry, but the table is processed in order
6189 and stops after the first match. If that entry then fails to put
6190 the argument into a co-processor register, the argument will go on
6191 the stack. */
6192 static struct
6193 {
6194 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6195 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6196
6197 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6198 BLKmode) is a candidate for this co-processor's registers; this
6199 function should ignore any position-dependent state in
6200 CUMULATIVE_ARGS and only use call-type dependent information. */
6201 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6202
6203 /* Return true if the argument does get a co-processor register; it
6204 should set aapcs_reg to an RTX of the register allocated as is
6205 required for a return from FUNCTION_ARG. */
6206 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6207
6208 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6209 be returned in this co-processor's registers. */
6210 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6211
6212 /* Allocate and return an RTX element to hold the return type of a call. This
6213 routine must not fail and will only be called if is_return_candidate
6214 returned true with the same parameters. */
6215 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6216
6217 /* Finish processing this argument and prepare to start processing
6218 the next one. */
6219 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6220 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6221 {
6222 AAPCS_CP(vfp)
6223 };
6224
6225 #undef AAPCS_CP
6226
6227 static int
6228 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6229 const_tree type)
6230 {
6231 int i;
6232
6233 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6234 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6235 return i;
6236
6237 return -1;
6238 }
6239
6240 static int
6241 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6242 {
6243 /* We aren't passed a decl, so we can't check that a call is local.
6244 However, it isn't clear that that would be a win anyway, since it
6245 might limit some tail-calling opportunities. */
6246 enum arm_pcs pcs_variant;
6247
6248 if (fntype)
6249 {
6250 const_tree fndecl = NULL_TREE;
6251
6252 if (TREE_CODE (fntype) == FUNCTION_DECL)
6253 {
6254 fndecl = fntype;
6255 fntype = TREE_TYPE (fntype);
6256 }
6257
6258 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6259 }
6260 else
6261 pcs_variant = arm_pcs_default;
6262
6263 if (pcs_variant != ARM_PCS_AAPCS)
6264 {
6265 int i;
6266
6267 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6268 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6269 TYPE_MODE (type),
6270 type))
6271 return i;
6272 }
6273 return -1;
6274 }
6275
6276 static rtx
6277 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6278 const_tree fntype)
6279 {
6280 /* We aren't passed a decl, so we can't check that a call is local.
6281 However, it isn't clear that that would be a win anyway, since it
6282 might limit some tail-calling opportunities. */
6283 enum arm_pcs pcs_variant;
6284 int unsignedp ATTRIBUTE_UNUSED;
6285
6286 if (fntype)
6287 {
6288 const_tree fndecl = NULL_TREE;
6289
6290 if (TREE_CODE (fntype) == FUNCTION_DECL)
6291 {
6292 fndecl = fntype;
6293 fntype = TREE_TYPE (fntype);
6294 }
6295
6296 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6297 }
6298 else
6299 pcs_variant = arm_pcs_default;
6300
6301 /* Promote integer types. */
6302 if (type && INTEGRAL_TYPE_P (type))
6303 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6304
6305 if (pcs_variant != ARM_PCS_AAPCS)
6306 {
6307 int i;
6308
6309 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6310 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6311 type))
6312 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6313 mode, type);
6314 }
6315
6316 /* Promotes small structs returned in a register to full-word size
6317 for big-endian AAPCS. */
6318 if (type && arm_return_in_msb (type))
6319 {
6320 HOST_WIDE_INT size = int_size_in_bytes (type);
6321 if (size % UNITS_PER_WORD != 0)
6322 {
6323 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6324 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6325 }
6326 }
6327
6328 return gen_rtx_REG (mode, R0_REGNUM);
6329 }
6330
6331 static rtx
6332 aapcs_libcall_value (machine_mode mode)
6333 {
6334 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6335 && GET_MODE_SIZE (mode) <= 4)
6336 mode = SImode;
6337
6338 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6339 }
6340
6341 /* Lay out a function argument using the AAPCS rules. The rule
6342 numbers referred to here are those in the AAPCS. */
6343 static void
6344 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6345 const_tree type, bool named)
6346 {
6347 int nregs, nregs2;
6348 int ncrn;
6349
6350 /* We only need to do this once per argument. */
6351 if (pcum->aapcs_arg_processed)
6352 return;
6353
6354 pcum->aapcs_arg_processed = true;
6355
6356 /* Special case: if named is false then we are handling an incoming
6357 anonymous argument which is on the stack. */
6358 if (!named)
6359 return;
6360
6361 /* Is this a potential co-processor register candidate? */
6362 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6363 {
6364 int slot = aapcs_select_call_coproc (pcum, mode, type);
6365 pcum->aapcs_cprc_slot = slot;
6366
6367 /* We don't have to apply any of the rules from part B of the
6368 preparation phase, these are handled elsewhere in the
6369 compiler. */
6370
6371 if (slot >= 0)
6372 {
6373 /* A Co-processor register candidate goes either in its own
6374 class of registers or on the stack. */
6375 if (!pcum->aapcs_cprc_failed[slot])
6376 {
6377 /* C1.cp - Try to allocate the argument to co-processor
6378 registers. */
6379 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6380 return;
6381
6382 /* C2.cp - Put the argument on the stack and note that we
6383 can't assign any more candidates in this slot. We also
6384 need to note that we have allocated stack space, so that
6385 we won't later try to split a non-cprc candidate between
6386 core registers and the stack. */
6387 pcum->aapcs_cprc_failed[slot] = true;
6388 pcum->can_split = false;
6389 }
6390
6391 /* We didn't get a register, so this argument goes on the
6392 stack. */
6393 gcc_assert (pcum->can_split == false);
6394 return;
6395 }
6396 }
6397
6398 /* C3 - For double-word aligned arguments, round the NCRN up to the
6399 next even number. */
6400 ncrn = pcum->aapcs_ncrn;
6401 if (ncrn & 1)
6402 {
6403 int res = arm_needs_doubleword_align (mode, type);
6404 /* Only warn during RTL expansion of call stmts, otherwise we would
6405 warn e.g. during gimplification even on functions that will be
6406 always inlined, and we'd warn multiple times. Don't warn when
6407 called in expand_function_start either, as we warn instead in
6408 arm_function_arg_boundary in that case. */
6409 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6410 inform (input_location, "parameter passing for argument of type "
6411 "%qT changed in GCC 7.1", type);
6412 else if (res > 0)
6413 ncrn++;
6414 }
6415
6416 nregs = ARM_NUM_REGS2(mode, type);
6417
6418 /* Sigh, this test should really assert that nregs > 0, but a GCC
6419 extension allows empty structs and then gives them empty size; it
6420 then allows such a structure to be passed by value. For some of
6421 the code below we have to pretend that such an argument has
6422 non-zero size so that we 'locate' it correctly either in
6423 registers or on the stack. */
6424 gcc_assert (nregs >= 0);
6425
6426 nregs2 = nregs ? nregs : 1;
6427
6428 /* C4 - Argument fits entirely in core registers. */
6429 if (ncrn + nregs2 <= NUM_ARG_REGS)
6430 {
6431 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6432 pcum->aapcs_next_ncrn = ncrn + nregs;
6433 return;
6434 }
6435
6436 /* C5 - Some core registers left and there are no arguments already
6437 on the stack: split this argument between the remaining core
6438 registers and the stack. */
6439 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6440 {
6441 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6442 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6443 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6444 return;
6445 }
6446
6447 /* C6 - NCRN is set to 4. */
6448 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6449
6450 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6451 return;
6452 }
6453
6454 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6455 for a call to a function whose data type is FNTYPE.
6456 For a library call, FNTYPE is NULL. */
6457 void
6458 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6459 rtx libname,
6460 tree fndecl ATTRIBUTE_UNUSED)
6461 {
6462 /* Long call handling. */
6463 if (fntype)
6464 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6465 else
6466 pcum->pcs_variant = arm_pcs_default;
6467
6468 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6469 {
6470 if (arm_libcall_uses_aapcs_base (libname))
6471 pcum->pcs_variant = ARM_PCS_AAPCS;
6472
6473 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6474 pcum->aapcs_reg = NULL_RTX;
6475 pcum->aapcs_partial = 0;
6476 pcum->aapcs_arg_processed = false;
6477 pcum->aapcs_cprc_slot = -1;
6478 pcum->can_split = true;
6479
6480 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6481 {
6482 int i;
6483
6484 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6485 {
6486 pcum->aapcs_cprc_failed[i] = false;
6487 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6488 }
6489 }
6490 return;
6491 }
6492
6493 /* Legacy ABIs */
6494
6495 /* On the ARM, the offset starts at 0. */
6496 pcum->nregs = 0;
6497 pcum->iwmmxt_nregs = 0;
6498 pcum->can_split = true;
6499
6500 /* Varargs vectors are treated the same as long long.
6501 named_count avoids having to change the way arm handles 'named' */
6502 pcum->named_count = 0;
6503 pcum->nargs = 0;
6504
6505 if (TARGET_REALLY_IWMMXT && fntype)
6506 {
6507 tree fn_arg;
6508
6509 for (fn_arg = TYPE_ARG_TYPES (fntype);
6510 fn_arg;
6511 fn_arg = TREE_CHAIN (fn_arg))
6512 pcum->named_count += 1;
6513
6514 if (! pcum->named_count)
6515 pcum->named_count = INT_MAX;
6516 }
6517 }
6518
6519 /* Return 1 if double word alignment is required for argument passing.
6520 Return -1 if double word alignment used to be required for argument
6521 passing before PR77728 ABI fix, but is not required anymore.
6522 Return 0 if double word alignment is not required and wasn't requried
6523 before either. */
6524 static int
6525 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6526 {
6527 if (!type)
6528 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6529
6530 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6531 if (!AGGREGATE_TYPE_P (type))
6532 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6533
6534 /* Array types: Use member alignment of element type. */
6535 if (TREE_CODE (type) == ARRAY_TYPE)
6536 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6537
6538 int ret = 0;
6539 /* Record/aggregate types: Use greatest member alignment of any member. */
6540 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6541 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6542 {
6543 if (TREE_CODE (field) == FIELD_DECL)
6544 return 1;
6545 else
6546 /* Before PR77728 fix, we were incorrectly considering also
6547 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6548 Make sure we can warn about that with -Wpsabi. */
6549 ret = -1;
6550 }
6551
6552 return ret;
6553 }
6554
6555
6556 /* Determine where to put an argument to a function.
6557 Value is zero to push the argument on the stack,
6558 or a hard register in which to store the argument.
6559
6560 MODE is the argument's machine mode.
6561 TYPE is the data type of the argument (as a tree).
6562 This is null for libcalls where that information may
6563 not be available.
6564 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6565 the preceding args and about the function being called.
6566 NAMED is nonzero if this argument is a named parameter
6567 (otherwise it is an extra parameter matching an ellipsis).
6568
6569 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6570 other arguments are passed on the stack. If (NAMED == 0) (which happens
6571 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6572 defined), say it is passed in the stack (function_prologue will
6573 indeed make it pass in the stack if necessary). */
6574
6575 static rtx
6576 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6577 const_tree type, bool named)
6578 {
6579 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6580 int nregs;
6581
6582 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6583 a call insn (op3 of a call_value insn). */
6584 if (mode == VOIDmode)
6585 return const0_rtx;
6586
6587 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6588 {
6589 aapcs_layout_arg (pcum, mode, type, named);
6590 return pcum->aapcs_reg;
6591 }
6592
6593 /* Varargs vectors are treated the same as long long.
6594 named_count avoids having to change the way arm handles 'named' */
6595 if (TARGET_IWMMXT_ABI
6596 && arm_vector_mode_supported_p (mode)
6597 && pcum->named_count > pcum->nargs + 1)
6598 {
6599 if (pcum->iwmmxt_nregs <= 9)
6600 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6601 else
6602 {
6603 pcum->can_split = false;
6604 return NULL_RTX;
6605 }
6606 }
6607
6608 /* Put doubleword aligned quantities in even register pairs. */
6609 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6610 {
6611 int res = arm_needs_doubleword_align (mode, type);
6612 if (res < 0 && warn_psabi)
6613 inform (input_location, "parameter passing for argument of type "
6614 "%qT changed in GCC 7.1", type);
6615 else if (res > 0)
6616 pcum->nregs++;
6617 }
6618
6619 /* Only allow splitting an arg between regs and memory if all preceding
6620 args were allocated to regs. For args passed by reference we only count
6621 the reference pointer. */
6622 if (pcum->can_split)
6623 nregs = 1;
6624 else
6625 nregs = ARM_NUM_REGS2 (mode, type);
6626
6627 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6628 return NULL_RTX;
6629
6630 return gen_rtx_REG (mode, pcum->nregs);
6631 }
6632
6633 static unsigned int
6634 arm_function_arg_boundary (machine_mode mode, const_tree type)
6635 {
6636 if (!ARM_DOUBLEWORD_ALIGN)
6637 return PARM_BOUNDARY;
6638
6639 int res = arm_needs_doubleword_align (mode, type);
6640 if (res < 0 && warn_psabi)
6641 inform (input_location, "parameter passing for argument of type %qT "
6642 "changed in GCC 7.1", type);
6643
6644 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6645 }
6646
6647 static int
6648 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6649 tree type, bool named)
6650 {
6651 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6652 int nregs = pcum->nregs;
6653
6654 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6655 {
6656 aapcs_layout_arg (pcum, mode, type, named);
6657 return pcum->aapcs_partial;
6658 }
6659
6660 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6661 return 0;
6662
6663 if (NUM_ARG_REGS > nregs
6664 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6665 && pcum->can_split)
6666 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6667
6668 return 0;
6669 }
6670
6671 /* Update the data in PCUM to advance over an argument
6672 of mode MODE and data type TYPE.
6673 (TYPE is null for libcalls where that information may not be available.) */
6674
6675 static void
6676 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6677 const_tree type, bool named)
6678 {
6679 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6680
6681 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6682 {
6683 aapcs_layout_arg (pcum, mode, type, named);
6684
6685 if (pcum->aapcs_cprc_slot >= 0)
6686 {
6687 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6688 type);
6689 pcum->aapcs_cprc_slot = -1;
6690 }
6691
6692 /* Generic stuff. */
6693 pcum->aapcs_arg_processed = false;
6694 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6695 pcum->aapcs_reg = NULL_RTX;
6696 pcum->aapcs_partial = 0;
6697 }
6698 else
6699 {
6700 pcum->nargs += 1;
6701 if (arm_vector_mode_supported_p (mode)
6702 && pcum->named_count > pcum->nargs
6703 && TARGET_IWMMXT_ABI)
6704 pcum->iwmmxt_nregs += 1;
6705 else
6706 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6707 }
6708 }
6709
6710 /* Variable sized types are passed by reference. This is a GCC
6711 extension to the ARM ABI. */
6712
6713 static bool
6714 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6715 machine_mode mode ATTRIBUTE_UNUSED,
6716 const_tree type, bool named ATTRIBUTE_UNUSED)
6717 {
6718 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6719 }
6720 \f
6721 /* Encode the current state of the #pragma [no_]long_calls. */
6722 typedef enum
6723 {
6724 OFF, /* No #pragma [no_]long_calls is in effect. */
6725 LONG, /* #pragma long_calls is in effect. */
6726 SHORT /* #pragma no_long_calls is in effect. */
6727 } arm_pragma_enum;
6728
6729 static arm_pragma_enum arm_pragma_long_calls = OFF;
6730
6731 void
6732 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6733 {
6734 arm_pragma_long_calls = LONG;
6735 }
6736
6737 void
6738 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6739 {
6740 arm_pragma_long_calls = SHORT;
6741 }
6742
6743 void
6744 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6745 {
6746 arm_pragma_long_calls = OFF;
6747 }
6748 \f
6749 /* Handle an attribute requiring a FUNCTION_DECL;
6750 arguments as in struct attribute_spec.handler. */
6751 static tree
6752 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6753 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6754 {
6755 if (TREE_CODE (*node) != FUNCTION_DECL)
6756 {
6757 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6758 name);
6759 *no_add_attrs = true;
6760 }
6761
6762 return NULL_TREE;
6763 }
6764
6765 /* Handle an "interrupt" or "isr" attribute;
6766 arguments as in struct attribute_spec.handler. */
6767 static tree
6768 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6769 bool *no_add_attrs)
6770 {
6771 if (DECL_P (*node))
6772 {
6773 if (TREE_CODE (*node) != FUNCTION_DECL)
6774 {
6775 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6776 name);
6777 *no_add_attrs = true;
6778 }
6779 /* FIXME: the argument if any is checked for type attributes;
6780 should it be checked for decl ones? */
6781 }
6782 else
6783 {
6784 if (TREE_CODE (*node) == FUNCTION_TYPE
6785 || TREE_CODE (*node) == METHOD_TYPE)
6786 {
6787 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6788 {
6789 warning (OPT_Wattributes, "%qE attribute ignored",
6790 name);
6791 *no_add_attrs = true;
6792 }
6793 }
6794 else if (TREE_CODE (*node) == POINTER_TYPE
6795 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6796 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6797 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6798 {
6799 *node = build_variant_type_copy (*node);
6800 TREE_TYPE (*node) = build_type_attribute_variant
6801 (TREE_TYPE (*node),
6802 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6803 *no_add_attrs = true;
6804 }
6805 else
6806 {
6807 /* Possibly pass this attribute on from the type to a decl. */
6808 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6809 | (int) ATTR_FLAG_FUNCTION_NEXT
6810 | (int) ATTR_FLAG_ARRAY_NEXT))
6811 {
6812 *no_add_attrs = true;
6813 return tree_cons (name, args, NULL_TREE);
6814 }
6815 else
6816 {
6817 warning (OPT_Wattributes, "%qE attribute ignored",
6818 name);
6819 }
6820 }
6821 }
6822
6823 return NULL_TREE;
6824 }
6825
6826 /* Handle a "pcs" attribute; arguments as in struct
6827 attribute_spec.handler. */
6828 static tree
6829 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6830 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6831 {
6832 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6833 {
6834 warning (OPT_Wattributes, "%qE attribute ignored", name);
6835 *no_add_attrs = true;
6836 }
6837 return NULL_TREE;
6838 }
6839
6840 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6841 /* Handle the "notshared" attribute. This attribute is another way of
6842 requesting hidden visibility. ARM's compiler supports
6843 "__declspec(notshared)"; we support the same thing via an
6844 attribute. */
6845
6846 static tree
6847 arm_handle_notshared_attribute (tree *node,
6848 tree name ATTRIBUTE_UNUSED,
6849 tree args ATTRIBUTE_UNUSED,
6850 int flags ATTRIBUTE_UNUSED,
6851 bool *no_add_attrs)
6852 {
6853 tree decl = TYPE_NAME (*node);
6854
6855 if (decl)
6856 {
6857 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6858 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6859 *no_add_attrs = false;
6860 }
6861 return NULL_TREE;
6862 }
6863 #endif
6864
6865 /* This function returns true if a function with declaration FNDECL and type
6866 FNTYPE uses the stack to pass arguments or return variables and false
6867 otherwise. This is used for functions with the attributes
6868 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6869 diagnostic messages if the stack is used. NAME is the name of the attribute
6870 used. */
6871
6872 static bool
6873 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6874 {
6875 function_args_iterator args_iter;
6876 CUMULATIVE_ARGS args_so_far_v;
6877 cumulative_args_t args_so_far;
6878 bool first_param = true;
6879 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6880
6881 /* Error out if any argument is passed on the stack. */
6882 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6883 args_so_far = pack_cumulative_args (&args_so_far_v);
6884 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6885 {
6886 rtx arg_rtx;
6887 machine_mode arg_mode = TYPE_MODE (arg_type);
6888
6889 prev_arg_type = arg_type;
6890 if (VOID_TYPE_P (arg_type))
6891 continue;
6892
6893 if (!first_param)
6894 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6895 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6896 if (!arg_rtx
6897 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6898 {
6899 error ("%qE attribute not available to functions with arguments "
6900 "passed on the stack", name);
6901 return true;
6902 }
6903 first_param = false;
6904 }
6905
6906 /* Error out for variadic functions since we cannot control how many
6907 arguments will be passed and thus stack could be used. stdarg_p () is not
6908 used for the checking to avoid browsing arguments twice. */
6909 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6910 {
6911 error ("%qE attribute not available to functions with variable number "
6912 "of arguments", name);
6913 return true;
6914 }
6915
6916 /* Error out if return value is passed on the stack. */
6917 ret_type = TREE_TYPE (fntype);
6918 if (arm_return_in_memory (ret_type, fntype))
6919 {
6920 error ("%qE attribute not available to functions that return value on "
6921 "the stack", name);
6922 return true;
6923 }
6924 return false;
6925 }
6926
6927 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6928 function will check whether the attribute is allowed here and will add the
6929 attribute to the function declaration tree or otherwise issue a warning. */
6930
6931 static tree
6932 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6933 tree /* args */,
6934 int /* flags */,
6935 bool *no_add_attrs)
6936 {
6937 tree fndecl;
6938
6939 if (!use_cmse)
6940 {
6941 *no_add_attrs = true;
6942 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6943 name);
6944 return NULL_TREE;
6945 }
6946
6947 /* Ignore attribute for function types. */
6948 if (TREE_CODE (*node) != FUNCTION_DECL)
6949 {
6950 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6951 name);
6952 *no_add_attrs = true;
6953 return NULL_TREE;
6954 }
6955
6956 fndecl = *node;
6957
6958 /* Warn for static linkage functions. */
6959 if (!TREE_PUBLIC (fndecl))
6960 {
6961 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6962 "with static linkage", name);
6963 *no_add_attrs = true;
6964 return NULL_TREE;
6965 }
6966
6967 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6968 TREE_TYPE (fndecl));
6969 return NULL_TREE;
6970 }
6971
6972
6973 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6974 function will check whether the attribute is allowed here and will add the
6975 attribute to the function type tree or otherwise issue a diagnostic. The
6976 reason we check this at declaration time is to only allow the use of the
6977 attribute with declarations of function pointers and not function
6978 declarations. This function checks NODE is of the expected type and issues
6979 diagnostics otherwise using NAME. If it is not of the expected type
6980 *NO_ADD_ATTRS will be set to true. */
6981
6982 static tree
6983 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6984 tree /* args */,
6985 int /* flags */,
6986 bool *no_add_attrs)
6987 {
6988 tree decl = NULL_TREE, fntype = NULL_TREE;
6989 tree type;
6990
6991 if (!use_cmse)
6992 {
6993 *no_add_attrs = true;
6994 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6995 name);
6996 return NULL_TREE;
6997 }
6998
6999 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7000 {
7001 decl = *node;
7002 fntype = TREE_TYPE (decl);
7003 }
7004
7005 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7006 fntype = TREE_TYPE (fntype);
7007
7008 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7009 {
7010 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7011 "function pointer", name);
7012 *no_add_attrs = true;
7013 return NULL_TREE;
7014 }
7015
7016 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7017
7018 if (*no_add_attrs)
7019 return NULL_TREE;
7020
7021 /* Prevent trees being shared among function types with and without
7022 cmse_nonsecure_call attribute. */
7023 type = TREE_TYPE (decl);
7024
7025 type = build_distinct_type_copy (type);
7026 TREE_TYPE (decl) = type;
7027 fntype = type;
7028
7029 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7030 {
7031 type = fntype;
7032 fntype = TREE_TYPE (fntype);
7033 fntype = build_distinct_type_copy (fntype);
7034 TREE_TYPE (type) = fntype;
7035 }
7036
7037 /* Construct a type attribute and add it to the function type. */
7038 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7039 TYPE_ATTRIBUTES (fntype));
7040 TYPE_ATTRIBUTES (fntype) = attrs;
7041 return NULL_TREE;
7042 }
7043
7044 /* Return 0 if the attributes for two types are incompatible, 1 if they
7045 are compatible, and 2 if they are nearly compatible (which causes a
7046 warning to be generated). */
7047 static int
7048 arm_comp_type_attributes (const_tree type1, const_tree type2)
7049 {
7050 int l1, l2, s1, s2;
7051
7052 /* Check for mismatch of non-default calling convention. */
7053 if (TREE_CODE (type1) != FUNCTION_TYPE)
7054 return 1;
7055
7056 /* Check for mismatched call attributes. */
7057 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7058 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7059 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7060 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7061
7062 /* Only bother to check if an attribute is defined. */
7063 if (l1 | l2 | s1 | s2)
7064 {
7065 /* If one type has an attribute, the other must have the same attribute. */
7066 if ((l1 != l2) || (s1 != s2))
7067 return 0;
7068
7069 /* Disallow mixed attributes. */
7070 if ((l1 & s2) || (l2 & s1))
7071 return 0;
7072 }
7073
7074 /* Check for mismatched ISR attribute. */
7075 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7076 if (! l1)
7077 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7078 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7079 if (! l2)
7080 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7081 if (l1 != l2)
7082 return 0;
7083
7084 l1 = lookup_attribute ("cmse_nonsecure_call",
7085 TYPE_ATTRIBUTES (type1)) != NULL;
7086 l2 = lookup_attribute ("cmse_nonsecure_call",
7087 TYPE_ATTRIBUTES (type2)) != NULL;
7088
7089 if (l1 != l2)
7090 return 0;
7091
7092 return 1;
7093 }
7094
7095 /* Assigns default attributes to newly defined type. This is used to
7096 set short_call/long_call attributes for function types of
7097 functions defined inside corresponding #pragma scopes. */
7098 static void
7099 arm_set_default_type_attributes (tree type)
7100 {
7101 /* Add __attribute__ ((long_call)) to all functions, when
7102 inside #pragma long_calls or __attribute__ ((short_call)),
7103 when inside #pragma no_long_calls. */
7104 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7105 {
7106 tree type_attr_list, attr_name;
7107 type_attr_list = TYPE_ATTRIBUTES (type);
7108
7109 if (arm_pragma_long_calls == LONG)
7110 attr_name = get_identifier ("long_call");
7111 else if (arm_pragma_long_calls == SHORT)
7112 attr_name = get_identifier ("short_call");
7113 else
7114 return;
7115
7116 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7117 TYPE_ATTRIBUTES (type) = type_attr_list;
7118 }
7119 }
7120 \f
7121 /* Return true if DECL is known to be linked into section SECTION. */
7122
7123 static bool
7124 arm_function_in_section_p (tree decl, section *section)
7125 {
7126 /* We can only be certain about the prevailing symbol definition. */
7127 if (!decl_binds_to_current_def_p (decl))
7128 return false;
7129
7130 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7131 if (!DECL_SECTION_NAME (decl))
7132 {
7133 /* Make sure that we will not create a unique section for DECL. */
7134 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7135 return false;
7136 }
7137
7138 return function_section (decl) == section;
7139 }
7140
7141 /* Return nonzero if a 32-bit "long_call" should be generated for
7142 a call from the current function to DECL. We generate a long_call
7143 if the function:
7144
7145 a. has an __attribute__((long call))
7146 or b. is within the scope of a #pragma long_calls
7147 or c. the -mlong-calls command line switch has been specified
7148
7149 However we do not generate a long call if the function:
7150
7151 d. has an __attribute__ ((short_call))
7152 or e. is inside the scope of a #pragma no_long_calls
7153 or f. is defined in the same section as the current function. */
7154
7155 bool
7156 arm_is_long_call_p (tree decl)
7157 {
7158 tree attrs;
7159
7160 if (!decl)
7161 return TARGET_LONG_CALLS;
7162
7163 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7164 if (lookup_attribute ("short_call", attrs))
7165 return false;
7166
7167 /* For "f", be conservative, and only cater for cases in which the
7168 whole of the current function is placed in the same section. */
7169 if (!flag_reorder_blocks_and_partition
7170 && TREE_CODE (decl) == FUNCTION_DECL
7171 && arm_function_in_section_p (decl, current_function_section ()))
7172 return false;
7173
7174 if (lookup_attribute ("long_call", attrs))
7175 return true;
7176
7177 return TARGET_LONG_CALLS;
7178 }
7179
7180 /* Return nonzero if it is ok to make a tail-call to DECL. */
7181 static bool
7182 arm_function_ok_for_sibcall (tree decl, tree exp)
7183 {
7184 unsigned long func_type;
7185
7186 if (cfun->machine->sibcall_blocked)
7187 return false;
7188
7189 /* Never tailcall something if we are generating code for Thumb-1. */
7190 if (TARGET_THUMB1)
7191 return false;
7192
7193 /* The PIC register is live on entry to VxWorks PLT entries, so we
7194 must make the call before restoring the PIC register. */
7195 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7196 return false;
7197
7198 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7199 may be used both as target of the call and base register for restoring
7200 the VFP registers */
7201 if (TARGET_APCS_FRAME && TARGET_ARM
7202 && TARGET_HARD_FLOAT
7203 && decl && arm_is_long_call_p (decl))
7204 return false;
7205
7206 /* If we are interworking and the function is not declared static
7207 then we can't tail-call it unless we know that it exists in this
7208 compilation unit (since it might be a Thumb routine). */
7209 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7210 && !TREE_ASM_WRITTEN (decl))
7211 return false;
7212
7213 func_type = arm_current_func_type ();
7214 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7215 if (IS_INTERRUPT (func_type))
7216 return false;
7217
7218 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7219 generated for entry functions themselves. */
7220 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7221 return false;
7222
7223 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7224 this would complicate matters for later code generation. */
7225 if (TREE_CODE (exp) == CALL_EXPR)
7226 {
7227 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7228 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7229 return false;
7230 }
7231
7232 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7233 {
7234 /* Check that the return value locations are the same. For
7235 example that we aren't returning a value from the sibling in
7236 a VFP register but then need to transfer it to a core
7237 register. */
7238 rtx a, b;
7239 tree decl_or_type = decl;
7240
7241 /* If it is an indirect function pointer, get the function type. */
7242 if (!decl)
7243 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7244
7245 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7246 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7247 cfun->decl, false);
7248 if (!rtx_equal_p (a, b))
7249 return false;
7250 }
7251
7252 /* Never tailcall if function may be called with a misaligned SP. */
7253 if (IS_STACKALIGN (func_type))
7254 return false;
7255
7256 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7257 references should become a NOP. Don't convert such calls into
7258 sibling calls. */
7259 if (TARGET_AAPCS_BASED
7260 && arm_abi == ARM_ABI_AAPCS
7261 && decl
7262 && DECL_WEAK (decl))
7263 return false;
7264
7265 /* We cannot do a tailcall for an indirect call by descriptor if all the
7266 argument registers are used because the only register left to load the
7267 address is IP and it will already contain the static chain. */
7268 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7269 {
7270 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7271 CUMULATIVE_ARGS cum;
7272 cumulative_args_t cum_v;
7273
7274 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7275 cum_v = pack_cumulative_args (&cum);
7276
7277 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7278 {
7279 tree type = TREE_VALUE (t);
7280 if (!VOID_TYPE_P (type))
7281 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7282 }
7283
7284 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7285 return false;
7286 }
7287
7288 /* Everything else is ok. */
7289 return true;
7290 }
7291
7292 \f
7293 /* Addressing mode support functions. */
7294
7295 /* Return nonzero if X is a legitimate immediate operand when compiling
7296 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7297 int
7298 legitimate_pic_operand_p (rtx x)
7299 {
7300 if (GET_CODE (x) == SYMBOL_REF
7301 || (GET_CODE (x) == CONST
7302 && GET_CODE (XEXP (x, 0)) == PLUS
7303 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7304 return 0;
7305
7306 return 1;
7307 }
7308
7309 /* Record that the current function needs a PIC register. Initialize
7310 cfun->machine->pic_reg if we have not already done so. */
7311
7312 static void
7313 require_pic_register (void)
7314 {
7315 /* A lot of the logic here is made obscure by the fact that this
7316 routine gets called as part of the rtx cost estimation process.
7317 We don't want those calls to affect any assumptions about the real
7318 function; and further, we can't call entry_of_function() until we
7319 start the real expansion process. */
7320 if (!crtl->uses_pic_offset_table)
7321 {
7322 gcc_assert (can_create_pseudo_p ());
7323 if (arm_pic_register != INVALID_REGNUM
7324 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7325 {
7326 if (!cfun->machine->pic_reg)
7327 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7328
7329 /* Play games to avoid marking the function as needing pic
7330 if we are being called as part of the cost-estimation
7331 process. */
7332 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7333 crtl->uses_pic_offset_table = 1;
7334 }
7335 else
7336 {
7337 rtx_insn *seq, *insn;
7338
7339 if (!cfun->machine->pic_reg)
7340 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7341
7342 /* Play games to avoid marking the function as needing pic
7343 if we are being called as part of the cost-estimation
7344 process. */
7345 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7346 {
7347 crtl->uses_pic_offset_table = 1;
7348 start_sequence ();
7349
7350 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7351 && arm_pic_register > LAST_LO_REGNUM)
7352 emit_move_insn (cfun->machine->pic_reg,
7353 gen_rtx_REG (Pmode, arm_pic_register));
7354 else
7355 arm_load_pic_register (0UL);
7356
7357 seq = get_insns ();
7358 end_sequence ();
7359
7360 for (insn = seq; insn; insn = NEXT_INSN (insn))
7361 if (INSN_P (insn))
7362 INSN_LOCATION (insn) = prologue_location;
7363
7364 /* We can be called during expansion of PHI nodes, where
7365 we can't yet emit instructions directly in the final
7366 insn stream. Queue the insns on the entry edge, they will
7367 be committed after everything else is expanded. */
7368 insert_insn_on_edge (seq,
7369 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7370 }
7371 }
7372 }
7373 }
7374
7375 rtx
7376 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7377 {
7378 if (GET_CODE (orig) == SYMBOL_REF
7379 || GET_CODE (orig) == LABEL_REF)
7380 {
7381 if (reg == 0)
7382 {
7383 gcc_assert (can_create_pseudo_p ());
7384 reg = gen_reg_rtx (Pmode);
7385 }
7386
7387 /* VxWorks does not impose a fixed gap between segments; the run-time
7388 gap can be different from the object-file gap. We therefore can't
7389 use GOTOFF unless we are absolutely sure that the symbol is in the
7390 same segment as the GOT. Unfortunately, the flexibility of linker
7391 scripts means that we can't be sure of that in general, so assume
7392 that GOTOFF is never valid on VxWorks. */
7393 /* References to weak symbols cannot be resolved locally: they
7394 may be overridden by a non-weak definition at link time. */
7395 rtx_insn *insn;
7396 if ((GET_CODE (orig) == LABEL_REF
7397 || (GET_CODE (orig) == SYMBOL_REF
7398 && SYMBOL_REF_LOCAL_P (orig)
7399 && (SYMBOL_REF_DECL (orig)
7400 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7401 && NEED_GOT_RELOC
7402 && arm_pic_data_is_text_relative)
7403 insn = arm_pic_static_addr (orig, reg);
7404 else
7405 {
7406 rtx pat;
7407 rtx mem;
7408
7409 /* If this function doesn't have a pic register, create one now. */
7410 require_pic_register ();
7411
7412 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7413
7414 /* Make the MEM as close to a constant as possible. */
7415 mem = SET_SRC (pat);
7416 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7417 MEM_READONLY_P (mem) = 1;
7418 MEM_NOTRAP_P (mem) = 1;
7419
7420 insn = emit_insn (pat);
7421 }
7422
7423 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7424 by loop. */
7425 set_unique_reg_note (insn, REG_EQUAL, orig);
7426
7427 return reg;
7428 }
7429 else if (GET_CODE (orig) == CONST)
7430 {
7431 rtx base, offset;
7432
7433 if (GET_CODE (XEXP (orig, 0)) == PLUS
7434 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7435 return orig;
7436
7437 /* Handle the case where we have: const (UNSPEC_TLS). */
7438 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7439 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7440 return orig;
7441
7442 /* Handle the case where we have:
7443 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7444 CONST_INT. */
7445 if (GET_CODE (XEXP (orig, 0)) == PLUS
7446 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7447 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7448 {
7449 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7450 return orig;
7451 }
7452
7453 if (reg == 0)
7454 {
7455 gcc_assert (can_create_pseudo_p ());
7456 reg = gen_reg_rtx (Pmode);
7457 }
7458
7459 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7460
7461 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7462 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7463 base == reg ? 0 : reg);
7464
7465 if (CONST_INT_P (offset))
7466 {
7467 /* The base register doesn't really matter, we only want to
7468 test the index for the appropriate mode. */
7469 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7470 {
7471 gcc_assert (can_create_pseudo_p ());
7472 offset = force_reg (Pmode, offset);
7473 }
7474
7475 if (CONST_INT_P (offset))
7476 return plus_constant (Pmode, base, INTVAL (offset));
7477 }
7478
7479 if (GET_MODE_SIZE (mode) > 4
7480 && (GET_MODE_CLASS (mode) == MODE_INT
7481 || TARGET_SOFT_FLOAT))
7482 {
7483 emit_insn (gen_addsi3 (reg, base, offset));
7484 return reg;
7485 }
7486
7487 return gen_rtx_PLUS (Pmode, base, offset);
7488 }
7489
7490 return orig;
7491 }
7492
7493
7494 /* Find a spare register to use during the prolog of a function. */
7495
7496 static int
7497 thumb_find_work_register (unsigned long pushed_regs_mask)
7498 {
7499 int reg;
7500
7501 /* Check the argument registers first as these are call-used. The
7502 register allocation order means that sometimes r3 might be used
7503 but earlier argument registers might not, so check them all. */
7504 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7505 if (!df_regs_ever_live_p (reg))
7506 return reg;
7507
7508 /* Before going on to check the call-saved registers we can try a couple
7509 more ways of deducing that r3 is available. The first is when we are
7510 pushing anonymous arguments onto the stack and we have less than 4
7511 registers worth of fixed arguments(*). In this case r3 will be part of
7512 the variable argument list and so we can be sure that it will be
7513 pushed right at the start of the function. Hence it will be available
7514 for the rest of the prologue.
7515 (*): ie crtl->args.pretend_args_size is greater than 0. */
7516 if (cfun->machine->uses_anonymous_args
7517 && crtl->args.pretend_args_size > 0)
7518 return LAST_ARG_REGNUM;
7519
7520 /* The other case is when we have fixed arguments but less than 4 registers
7521 worth. In this case r3 might be used in the body of the function, but
7522 it is not being used to convey an argument into the function. In theory
7523 we could just check crtl->args.size to see how many bytes are
7524 being passed in argument registers, but it seems that it is unreliable.
7525 Sometimes it will have the value 0 when in fact arguments are being
7526 passed. (See testcase execute/20021111-1.c for an example). So we also
7527 check the args_info.nregs field as well. The problem with this field is
7528 that it makes no allowances for arguments that are passed to the
7529 function but which are not used. Hence we could miss an opportunity
7530 when a function has an unused argument in r3. But it is better to be
7531 safe than to be sorry. */
7532 if (! cfun->machine->uses_anonymous_args
7533 && crtl->args.size >= 0
7534 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7535 && (TARGET_AAPCS_BASED
7536 ? crtl->args.info.aapcs_ncrn < 4
7537 : crtl->args.info.nregs < 4))
7538 return LAST_ARG_REGNUM;
7539
7540 /* Otherwise look for a call-saved register that is going to be pushed. */
7541 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7542 if (pushed_regs_mask & (1 << reg))
7543 return reg;
7544
7545 if (TARGET_THUMB2)
7546 {
7547 /* Thumb-2 can use high regs. */
7548 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7549 if (pushed_regs_mask & (1 << reg))
7550 return reg;
7551 }
7552 /* Something went wrong - thumb_compute_save_reg_mask()
7553 should have arranged for a suitable register to be pushed. */
7554 gcc_unreachable ();
7555 }
7556
7557 static GTY(()) int pic_labelno;
7558
7559 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7560 low register. */
7561
7562 void
7563 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7564 {
7565 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7566
7567 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7568 return;
7569
7570 gcc_assert (flag_pic);
7571
7572 pic_reg = cfun->machine->pic_reg;
7573 if (TARGET_VXWORKS_RTP)
7574 {
7575 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7576 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7577 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7578
7579 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7580
7581 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7582 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7583 }
7584 else
7585 {
7586 /* We use an UNSPEC rather than a LABEL_REF because this label
7587 never appears in the code stream. */
7588
7589 labelno = GEN_INT (pic_labelno++);
7590 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7591 l1 = gen_rtx_CONST (VOIDmode, l1);
7592
7593 /* On the ARM the PC register contains 'dot + 8' at the time of the
7594 addition, on the Thumb it is 'dot + 4'. */
7595 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7596 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7597 UNSPEC_GOTSYM_OFF);
7598 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7599
7600 if (TARGET_32BIT)
7601 {
7602 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7603 }
7604 else /* TARGET_THUMB1 */
7605 {
7606 if (arm_pic_register != INVALID_REGNUM
7607 && REGNO (pic_reg) > LAST_LO_REGNUM)
7608 {
7609 /* We will have pushed the pic register, so we should always be
7610 able to find a work register. */
7611 pic_tmp = gen_rtx_REG (SImode,
7612 thumb_find_work_register (saved_regs));
7613 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7614 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7615 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7616 }
7617 else if (arm_pic_register != INVALID_REGNUM
7618 && arm_pic_register > LAST_LO_REGNUM
7619 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7620 {
7621 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7622 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7623 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7624 }
7625 else
7626 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7627 }
7628 }
7629
7630 /* Need to emit this whether or not we obey regdecls,
7631 since setjmp/longjmp can cause life info to screw up. */
7632 emit_use (pic_reg);
7633 }
7634
7635 /* Generate code to load the address of a static var when flag_pic is set. */
7636 static rtx_insn *
7637 arm_pic_static_addr (rtx orig, rtx reg)
7638 {
7639 rtx l1, labelno, offset_rtx;
7640
7641 gcc_assert (flag_pic);
7642
7643 /* We use an UNSPEC rather than a LABEL_REF because this label
7644 never appears in the code stream. */
7645 labelno = GEN_INT (pic_labelno++);
7646 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7647 l1 = gen_rtx_CONST (VOIDmode, l1);
7648
7649 /* On the ARM the PC register contains 'dot + 8' at the time of the
7650 addition, on the Thumb it is 'dot + 4'. */
7651 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7652 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7653 UNSPEC_SYMBOL_OFFSET);
7654 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7655
7656 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7657 }
7658
7659 /* Return nonzero if X is valid as an ARM state addressing register. */
7660 static int
7661 arm_address_register_rtx_p (rtx x, int strict_p)
7662 {
7663 int regno;
7664
7665 if (!REG_P (x))
7666 return 0;
7667
7668 regno = REGNO (x);
7669
7670 if (strict_p)
7671 return ARM_REGNO_OK_FOR_BASE_P (regno);
7672
7673 return (regno <= LAST_ARM_REGNUM
7674 || regno >= FIRST_PSEUDO_REGISTER
7675 || regno == FRAME_POINTER_REGNUM
7676 || regno == ARG_POINTER_REGNUM);
7677 }
7678
7679 /* Return TRUE if this rtx is the difference of a symbol and a label,
7680 and will reduce to a PC-relative relocation in the object file.
7681 Expressions like this can be left alone when generating PIC, rather
7682 than forced through the GOT. */
7683 static int
7684 pcrel_constant_p (rtx x)
7685 {
7686 if (GET_CODE (x) == MINUS)
7687 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7688
7689 return FALSE;
7690 }
7691
7692 /* Return true if X will surely end up in an index register after next
7693 splitting pass. */
7694 static bool
7695 will_be_in_index_register (const_rtx x)
7696 {
7697 /* arm.md: calculate_pic_address will split this into a register. */
7698 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7699 }
7700
7701 /* Return nonzero if X is a valid ARM state address operand. */
7702 int
7703 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7704 int strict_p)
7705 {
7706 bool use_ldrd;
7707 enum rtx_code code = GET_CODE (x);
7708
7709 if (arm_address_register_rtx_p (x, strict_p))
7710 return 1;
7711
7712 use_ldrd = (TARGET_LDRD
7713 && (mode == DImode || mode == DFmode));
7714
7715 if (code == POST_INC || code == PRE_DEC
7716 || ((code == PRE_INC || code == POST_DEC)
7717 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7718 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7719
7720 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7721 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7722 && GET_CODE (XEXP (x, 1)) == PLUS
7723 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7724 {
7725 rtx addend = XEXP (XEXP (x, 1), 1);
7726
7727 /* Don't allow ldrd post increment by register because it's hard
7728 to fixup invalid register choices. */
7729 if (use_ldrd
7730 && GET_CODE (x) == POST_MODIFY
7731 && REG_P (addend))
7732 return 0;
7733
7734 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7735 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7736 }
7737
7738 /* After reload constants split into minipools will have addresses
7739 from a LABEL_REF. */
7740 else if (reload_completed
7741 && (code == LABEL_REF
7742 || (code == CONST
7743 && GET_CODE (XEXP (x, 0)) == PLUS
7744 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7745 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7746 return 1;
7747
7748 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7749 return 0;
7750
7751 else if (code == PLUS)
7752 {
7753 rtx xop0 = XEXP (x, 0);
7754 rtx xop1 = XEXP (x, 1);
7755
7756 return ((arm_address_register_rtx_p (xop0, strict_p)
7757 && ((CONST_INT_P (xop1)
7758 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7759 || (!strict_p && will_be_in_index_register (xop1))))
7760 || (arm_address_register_rtx_p (xop1, strict_p)
7761 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7762 }
7763
7764 #if 0
7765 /* Reload currently can't handle MINUS, so disable this for now */
7766 else if (GET_CODE (x) == MINUS)
7767 {
7768 rtx xop0 = XEXP (x, 0);
7769 rtx xop1 = XEXP (x, 1);
7770
7771 return (arm_address_register_rtx_p (xop0, strict_p)
7772 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7773 }
7774 #endif
7775
7776 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7777 && code == SYMBOL_REF
7778 && CONSTANT_POOL_ADDRESS_P (x)
7779 && ! (flag_pic
7780 && symbol_mentioned_p (get_pool_constant (x))
7781 && ! pcrel_constant_p (get_pool_constant (x))))
7782 return 1;
7783
7784 return 0;
7785 }
7786
7787 /* Return true if we can avoid creating a constant pool entry for x. */
7788 static bool
7789 can_avoid_literal_pool_for_label_p (rtx x)
7790 {
7791 /* Normally we can assign constant values to target registers without
7792 the help of constant pool. But there are cases we have to use constant
7793 pool like:
7794 1) assign a label to register.
7795 2) sign-extend a 8bit value to 32bit and then assign to register.
7796
7797 Constant pool access in format:
7798 (set (reg r0) (mem (symbol_ref (".LC0"))))
7799 will cause the use of literal pool (later in function arm_reorg).
7800 So here we mark such format as an invalid format, then the compiler
7801 will adjust it into:
7802 (set (reg r0) (symbol_ref (".LC0")))
7803 (set (reg r0) (mem (reg r0))).
7804 No extra register is required, and (mem (reg r0)) won't cause the use
7805 of literal pools. */
7806 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7807 && CONSTANT_POOL_ADDRESS_P (x))
7808 return 1;
7809 return 0;
7810 }
7811
7812
7813 /* Return nonzero if X is a valid Thumb-2 address operand. */
7814 static int
7815 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7816 {
7817 bool use_ldrd;
7818 enum rtx_code code = GET_CODE (x);
7819
7820 if (arm_address_register_rtx_p (x, strict_p))
7821 return 1;
7822
7823 use_ldrd = (TARGET_LDRD
7824 && (mode == DImode || mode == DFmode));
7825
7826 if (code == POST_INC || code == PRE_DEC
7827 || ((code == PRE_INC || code == POST_DEC)
7828 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7829 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7830
7831 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7832 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7833 && GET_CODE (XEXP (x, 1)) == PLUS
7834 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7835 {
7836 /* Thumb-2 only has autoincrement by constant. */
7837 rtx addend = XEXP (XEXP (x, 1), 1);
7838 HOST_WIDE_INT offset;
7839
7840 if (!CONST_INT_P (addend))
7841 return 0;
7842
7843 offset = INTVAL(addend);
7844 if (GET_MODE_SIZE (mode) <= 4)
7845 return (offset > -256 && offset < 256);
7846
7847 return (use_ldrd && offset > -1024 && offset < 1024
7848 && (offset & 3) == 0);
7849 }
7850
7851 /* After reload constants split into minipools will have addresses
7852 from a LABEL_REF. */
7853 else if (reload_completed
7854 && (code == LABEL_REF
7855 || (code == CONST
7856 && GET_CODE (XEXP (x, 0)) == PLUS
7857 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7858 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7859 return 1;
7860
7861 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7862 return 0;
7863
7864 else if (code == PLUS)
7865 {
7866 rtx xop0 = XEXP (x, 0);
7867 rtx xop1 = XEXP (x, 1);
7868
7869 return ((arm_address_register_rtx_p (xop0, strict_p)
7870 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7871 || (!strict_p && will_be_in_index_register (xop1))))
7872 || (arm_address_register_rtx_p (xop1, strict_p)
7873 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7874 }
7875
7876 else if (can_avoid_literal_pool_for_label_p (x))
7877 return 0;
7878
7879 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7880 && code == SYMBOL_REF
7881 && CONSTANT_POOL_ADDRESS_P (x)
7882 && ! (flag_pic
7883 && symbol_mentioned_p (get_pool_constant (x))
7884 && ! pcrel_constant_p (get_pool_constant (x))))
7885 return 1;
7886
7887 return 0;
7888 }
7889
7890 /* Return nonzero if INDEX is valid for an address index operand in
7891 ARM state. */
7892 static int
7893 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7894 int strict_p)
7895 {
7896 HOST_WIDE_INT range;
7897 enum rtx_code code = GET_CODE (index);
7898
7899 /* Standard coprocessor addressing modes. */
7900 if (TARGET_HARD_FLOAT
7901 && (mode == SFmode || mode == DFmode))
7902 return (code == CONST_INT && INTVAL (index) < 1024
7903 && INTVAL (index) > -1024
7904 && (INTVAL (index) & 3) == 0);
7905
7906 /* For quad modes, we restrict the constant offset to be slightly less
7907 than what the instruction format permits. We do this because for
7908 quad mode moves, we will actually decompose them into two separate
7909 double-mode reads or writes. INDEX must therefore be a valid
7910 (double-mode) offset and so should INDEX+8. */
7911 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7912 return (code == CONST_INT
7913 && INTVAL (index) < 1016
7914 && INTVAL (index) > -1024
7915 && (INTVAL (index) & 3) == 0);
7916
7917 /* We have no such constraint on double mode offsets, so we permit the
7918 full range of the instruction format. */
7919 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7920 return (code == CONST_INT
7921 && INTVAL (index) < 1024
7922 && INTVAL (index) > -1024
7923 && (INTVAL (index) & 3) == 0);
7924
7925 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7926 return (code == CONST_INT
7927 && INTVAL (index) < 1024
7928 && INTVAL (index) > -1024
7929 && (INTVAL (index) & 3) == 0);
7930
7931 if (arm_address_register_rtx_p (index, strict_p)
7932 && (GET_MODE_SIZE (mode) <= 4))
7933 return 1;
7934
7935 if (mode == DImode || mode == DFmode)
7936 {
7937 if (code == CONST_INT)
7938 {
7939 HOST_WIDE_INT val = INTVAL (index);
7940
7941 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7942 If vldr is selected it uses arm_coproc_mem_operand. */
7943 if (TARGET_LDRD)
7944 return val > -256 && val < 256;
7945 else
7946 return val > -4096 && val < 4092;
7947 }
7948
7949 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7950 }
7951
7952 if (GET_MODE_SIZE (mode) <= 4
7953 && ! (arm_arch4
7954 && (mode == HImode
7955 || mode == HFmode
7956 || (mode == QImode && outer == SIGN_EXTEND))))
7957 {
7958 if (code == MULT)
7959 {
7960 rtx xiop0 = XEXP (index, 0);
7961 rtx xiop1 = XEXP (index, 1);
7962
7963 return ((arm_address_register_rtx_p (xiop0, strict_p)
7964 && power_of_two_operand (xiop1, SImode))
7965 || (arm_address_register_rtx_p (xiop1, strict_p)
7966 && power_of_two_operand (xiop0, SImode)));
7967 }
7968 else if (code == LSHIFTRT || code == ASHIFTRT
7969 || code == ASHIFT || code == ROTATERT)
7970 {
7971 rtx op = XEXP (index, 1);
7972
7973 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7974 && CONST_INT_P (op)
7975 && INTVAL (op) > 0
7976 && INTVAL (op) <= 31);
7977 }
7978 }
7979
7980 /* For ARM v4 we may be doing a sign-extend operation during the
7981 load. */
7982 if (arm_arch4)
7983 {
7984 if (mode == HImode
7985 || mode == HFmode
7986 || (outer == SIGN_EXTEND && mode == QImode))
7987 range = 256;
7988 else
7989 range = 4096;
7990 }
7991 else
7992 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7993
7994 return (code == CONST_INT
7995 && INTVAL (index) < range
7996 && INTVAL (index) > -range);
7997 }
7998
7999 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8000 index operand. i.e. 1, 2, 4 or 8. */
8001 static bool
8002 thumb2_index_mul_operand (rtx op)
8003 {
8004 HOST_WIDE_INT val;
8005
8006 if (!CONST_INT_P (op))
8007 return false;
8008
8009 val = INTVAL(op);
8010 return (val == 1 || val == 2 || val == 4 || val == 8);
8011 }
8012
8013 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8014 static int
8015 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8016 {
8017 enum rtx_code code = GET_CODE (index);
8018
8019 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8020 /* Standard coprocessor addressing modes. */
8021 if (TARGET_HARD_FLOAT
8022 && (mode == SFmode || mode == DFmode))
8023 return (code == CONST_INT && INTVAL (index) < 1024
8024 /* Thumb-2 allows only > -256 index range for it's core register
8025 load/stores. Since we allow SF/DF in core registers, we have
8026 to use the intersection between -256~4096 (core) and -1024~1024
8027 (coprocessor). */
8028 && INTVAL (index) > -256
8029 && (INTVAL (index) & 3) == 0);
8030
8031 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8032 {
8033 /* For DImode assume values will usually live in core regs
8034 and only allow LDRD addressing modes. */
8035 if (!TARGET_LDRD || mode != DImode)
8036 return (code == CONST_INT
8037 && INTVAL (index) < 1024
8038 && INTVAL (index) > -1024
8039 && (INTVAL (index) & 3) == 0);
8040 }
8041
8042 /* For quad modes, we restrict the constant offset to be slightly less
8043 than what the instruction format permits. We do this because for
8044 quad mode moves, we will actually decompose them into two separate
8045 double-mode reads or writes. INDEX must therefore be a valid
8046 (double-mode) offset and so should INDEX+8. */
8047 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8048 return (code == CONST_INT
8049 && INTVAL (index) < 1016
8050 && INTVAL (index) > -1024
8051 && (INTVAL (index) & 3) == 0);
8052
8053 /* We have no such constraint on double mode offsets, so we permit the
8054 full range of the instruction format. */
8055 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8056 return (code == CONST_INT
8057 && INTVAL (index) < 1024
8058 && INTVAL (index) > -1024
8059 && (INTVAL (index) & 3) == 0);
8060
8061 if (arm_address_register_rtx_p (index, strict_p)
8062 && (GET_MODE_SIZE (mode) <= 4))
8063 return 1;
8064
8065 if (mode == DImode || mode == DFmode)
8066 {
8067 if (code == CONST_INT)
8068 {
8069 HOST_WIDE_INT val = INTVAL (index);
8070 /* Thumb-2 ldrd only has reg+const addressing modes.
8071 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8072 If vldr is selected it uses arm_coproc_mem_operand. */
8073 if (TARGET_LDRD)
8074 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8075 else
8076 return IN_RANGE (val, -255, 4095 - 4);
8077 }
8078 else
8079 return 0;
8080 }
8081
8082 if (code == MULT)
8083 {
8084 rtx xiop0 = XEXP (index, 0);
8085 rtx xiop1 = XEXP (index, 1);
8086
8087 return ((arm_address_register_rtx_p (xiop0, strict_p)
8088 && thumb2_index_mul_operand (xiop1))
8089 || (arm_address_register_rtx_p (xiop1, strict_p)
8090 && thumb2_index_mul_operand (xiop0)));
8091 }
8092 else if (code == ASHIFT)
8093 {
8094 rtx op = XEXP (index, 1);
8095
8096 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8097 && CONST_INT_P (op)
8098 && INTVAL (op) > 0
8099 && INTVAL (op) <= 3);
8100 }
8101
8102 return (code == CONST_INT
8103 && INTVAL (index) < 4096
8104 && INTVAL (index) > -256);
8105 }
8106
8107 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8108 static int
8109 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8110 {
8111 int regno;
8112
8113 if (!REG_P (x))
8114 return 0;
8115
8116 regno = REGNO (x);
8117
8118 if (strict_p)
8119 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8120
8121 return (regno <= LAST_LO_REGNUM
8122 || regno > LAST_VIRTUAL_REGISTER
8123 || regno == FRAME_POINTER_REGNUM
8124 || (GET_MODE_SIZE (mode) >= 4
8125 && (regno == STACK_POINTER_REGNUM
8126 || regno >= FIRST_PSEUDO_REGISTER
8127 || x == hard_frame_pointer_rtx
8128 || x == arg_pointer_rtx)));
8129 }
8130
8131 /* Return nonzero if x is a legitimate index register. This is the case
8132 for any base register that can access a QImode object. */
8133 inline static int
8134 thumb1_index_register_rtx_p (rtx x, int strict_p)
8135 {
8136 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8137 }
8138
8139 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8140
8141 The AP may be eliminated to either the SP or the FP, so we use the
8142 least common denominator, e.g. SImode, and offsets from 0 to 64.
8143
8144 ??? Verify whether the above is the right approach.
8145
8146 ??? Also, the FP may be eliminated to the SP, so perhaps that
8147 needs special handling also.
8148
8149 ??? Look at how the mips16 port solves this problem. It probably uses
8150 better ways to solve some of these problems.
8151
8152 Although it is not incorrect, we don't accept QImode and HImode
8153 addresses based on the frame pointer or arg pointer until the
8154 reload pass starts. This is so that eliminating such addresses
8155 into stack based ones won't produce impossible code. */
8156 int
8157 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8158 {
8159 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8160 return 0;
8161
8162 /* ??? Not clear if this is right. Experiment. */
8163 if (GET_MODE_SIZE (mode) < 4
8164 && !(reload_in_progress || reload_completed)
8165 && (reg_mentioned_p (frame_pointer_rtx, x)
8166 || reg_mentioned_p (arg_pointer_rtx, x)
8167 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8168 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8169 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8170 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8171 return 0;
8172
8173 /* Accept any base register. SP only in SImode or larger. */
8174 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8175 return 1;
8176
8177 /* This is PC relative data before arm_reorg runs. */
8178 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8179 && GET_CODE (x) == SYMBOL_REF
8180 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8181 return 1;
8182
8183 /* This is PC relative data after arm_reorg runs. */
8184 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8185 && reload_completed
8186 && (GET_CODE (x) == LABEL_REF
8187 || (GET_CODE (x) == CONST
8188 && GET_CODE (XEXP (x, 0)) == PLUS
8189 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8190 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8191 return 1;
8192
8193 /* Post-inc indexing only supported for SImode and larger. */
8194 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8195 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8196 return 1;
8197
8198 else if (GET_CODE (x) == PLUS)
8199 {
8200 /* REG+REG address can be any two index registers. */
8201 /* We disallow FRAME+REG addressing since we know that FRAME
8202 will be replaced with STACK, and SP relative addressing only
8203 permits SP+OFFSET. */
8204 if (GET_MODE_SIZE (mode) <= 4
8205 && XEXP (x, 0) != frame_pointer_rtx
8206 && XEXP (x, 1) != frame_pointer_rtx
8207 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8208 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8209 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8210 return 1;
8211
8212 /* REG+const has 5-7 bit offset for non-SP registers. */
8213 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8214 || XEXP (x, 0) == arg_pointer_rtx)
8215 && CONST_INT_P (XEXP (x, 1))
8216 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8217 return 1;
8218
8219 /* REG+const has 10-bit offset for SP, but only SImode and
8220 larger is supported. */
8221 /* ??? Should probably check for DI/DFmode overflow here
8222 just like GO_IF_LEGITIMATE_OFFSET does. */
8223 else if (REG_P (XEXP (x, 0))
8224 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8225 && GET_MODE_SIZE (mode) >= 4
8226 && CONST_INT_P (XEXP (x, 1))
8227 && INTVAL (XEXP (x, 1)) >= 0
8228 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8229 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8230 return 1;
8231
8232 else if (REG_P (XEXP (x, 0))
8233 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8234 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8235 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8236 && REGNO (XEXP (x, 0))
8237 <= LAST_VIRTUAL_POINTER_REGISTER))
8238 && GET_MODE_SIZE (mode) >= 4
8239 && CONST_INT_P (XEXP (x, 1))
8240 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8241 return 1;
8242 }
8243
8244 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8245 && GET_MODE_SIZE (mode) == 4
8246 && GET_CODE (x) == SYMBOL_REF
8247 && CONSTANT_POOL_ADDRESS_P (x)
8248 && ! (flag_pic
8249 && symbol_mentioned_p (get_pool_constant (x))
8250 && ! pcrel_constant_p (get_pool_constant (x))))
8251 return 1;
8252
8253 return 0;
8254 }
8255
8256 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8257 instruction of mode MODE. */
8258 int
8259 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8260 {
8261 switch (GET_MODE_SIZE (mode))
8262 {
8263 case 1:
8264 return val >= 0 && val < 32;
8265
8266 case 2:
8267 return val >= 0 && val < 64 && (val & 1) == 0;
8268
8269 default:
8270 return (val >= 0
8271 && (val + GET_MODE_SIZE (mode)) <= 128
8272 && (val & 3) == 0);
8273 }
8274 }
8275
8276 bool
8277 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8278 {
8279 if (TARGET_ARM)
8280 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8281 else if (TARGET_THUMB2)
8282 return thumb2_legitimate_address_p (mode, x, strict_p);
8283 else /* if (TARGET_THUMB1) */
8284 return thumb1_legitimate_address_p (mode, x, strict_p);
8285 }
8286
8287 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8288
8289 Given an rtx X being reloaded into a reg required to be
8290 in class CLASS, return the class of reg to actually use.
8291 In general this is just CLASS, but for the Thumb core registers and
8292 immediate constants we prefer a LO_REGS class or a subset. */
8293
8294 static reg_class_t
8295 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8296 {
8297 if (TARGET_32BIT)
8298 return rclass;
8299 else
8300 {
8301 if (rclass == GENERAL_REGS)
8302 return LO_REGS;
8303 else
8304 return rclass;
8305 }
8306 }
8307
8308 /* Build the SYMBOL_REF for __tls_get_addr. */
8309
8310 static GTY(()) rtx tls_get_addr_libfunc;
8311
8312 static rtx
8313 get_tls_get_addr (void)
8314 {
8315 if (!tls_get_addr_libfunc)
8316 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8317 return tls_get_addr_libfunc;
8318 }
8319
8320 rtx
8321 arm_load_tp (rtx target)
8322 {
8323 if (!target)
8324 target = gen_reg_rtx (SImode);
8325
8326 if (TARGET_HARD_TP)
8327 {
8328 /* Can return in any reg. */
8329 emit_insn (gen_load_tp_hard (target));
8330 }
8331 else
8332 {
8333 /* Always returned in r0. Immediately copy the result into a pseudo,
8334 otherwise other uses of r0 (e.g. setting up function arguments) may
8335 clobber the value. */
8336
8337 rtx tmp;
8338
8339 emit_insn (gen_load_tp_soft ());
8340
8341 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8342 emit_move_insn (target, tmp);
8343 }
8344 return target;
8345 }
8346
8347 static rtx
8348 load_tls_operand (rtx x, rtx reg)
8349 {
8350 rtx tmp;
8351
8352 if (reg == NULL_RTX)
8353 reg = gen_reg_rtx (SImode);
8354
8355 tmp = gen_rtx_CONST (SImode, x);
8356
8357 emit_move_insn (reg, tmp);
8358
8359 return reg;
8360 }
8361
8362 static rtx_insn *
8363 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8364 {
8365 rtx label, labelno, sum;
8366
8367 gcc_assert (reloc != TLS_DESCSEQ);
8368 start_sequence ();
8369
8370 labelno = GEN_INT (pic_labelno++);
8371 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8372 label = gen_rtx_CONST (VOIDmode, label);
8373
8374 sum = gen_rtx_UNSPEC (Pmode,
8375 gen_rtvec (4, x, GEN_INT (reloc), label,
8376 GEN_INT (TARGET_ARM ? 8 : 4)),
8377 UNSPEC_TLS);
8378 reg = load_tls_operand (sum, reg);
8379
8380 if (TARGET_ARM)
8381 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8382 else
8383 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8384
8385 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8386 LCT_PURE, /* LCT_CONST? */
8387 Pmode, reg, Pmode);
8388
8389 rtx_insn *insns = get_insns ();
8390 end_sequence ();
8391
8392 return insns;
8393 }
8394
8395 static rtx
8396 arm_tls_descseq_addr (rtx x, rtx reg)
8397 {
8398 rtx labelno = GEN_INT (pic_labelno++);
8399 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8400 rtx sum = gen_rtx_UNSPEC (Pmode,
8401 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8402 gen_rtx_CONST (VOIDmode, label),
8403 GEN_INT (!TARGET_ARM)),
8404 UNSPEC_TLS);
8405 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8406
8407 emit_insn (gen_tlscall (x, labelno));
8408 if (!reg)
8409 reg = gen_reg_rtx (SImode);
8410 else
8411 gcc_assert (REGNO (reg) != R0_REGNUM);
8412
8413 emit_move_insn (reg, reg0);
8414
8415 return reg;
8416 }
8417
8418 rtx
8419 legitimize_tls_address (rtx x, rtx reg)
8420 {
8421 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8422 rtx_insn *insns;
8423 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8424
8425 switch (model)
8426 {
8427 case TLS_MODEL_GLOBAL_DYNAMIC:
8428 if (TARGET_GNU2_TLS)
8429 {
8430 reg = arm_tls_descseq_addr (x, reg);
8431
8432 tp = arm_load_tp (NULL_RTX);
8433
8434 dest = gen_rtx_PLUS (Pmode, tp, reg);
8435 }
8436 else
8437 {
8438 /* Original scheme */
8439 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8440 dest = gen_reg_rtx (Pmode);
8441 emit_libcall_block (insns, dest, ret, x);
8442 }
8443 return dest;
8444
8445 case TLS_MODEL_LOCAL_DYNAMIC:
8446 if (TARGET_GNU2_TLS)
8447 {
8448 reg = arm_tls_descseq_addr (x, reg);
8449
8450 tp = arm_load_tp (NULL_RTX);
8451
8452 dest = gen_rtx_PLUS (Pmode, tp, reg);
8453 }
8454 else
8455 {
8456 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8457
8458 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8459 share the LDM result with other LD model accesses. */
8460 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8461 UNSPEC_TLS);
8462 dest = gen_reg_rtx (Pmode);
8463 emit_libcall_block (insns, dest, ret, eqv);
8464
8465 /* Load the addend. */
8466 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8467 GEN_INT (TLS_LDO32)),
8468 UNSPEC_TLS);
8469 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8470 dest = gen_rtx_PLUS (Pmode, dest, addend);
8471 }
8472 return dest;
8473
8474 case TLS_MODEL_INITIAL_EXEC:
8475 labelno = GEN_INT (pic_labelno++);
8476 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8477 label = gen_rtx_CONST (VOIDmode, label);
8478 sum = gen_rtx_UNSPEC (Pmode,
8479 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8480 GEN_INT (TARGET_ARM ? 8 : 4)),
8481 UNSPEC_TLS);
8482 reg = load_tls_operand (sum, reg);
8483
8484 if (TARGET_ARM)
8485 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8486 else if (TARGET_THUMB2)
8487 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8488 else
8489 {
8490 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8491 emit_move_insn (reg, gen_const_mem (SImode, reg));
8492 }
8493
8494 tp = arm_load_tp (NULL_RTX);
8495
8496 return gen_rtx_PLUS (Pmode, tp, reg);
8497
8498 case TLS_MODEL_LOCAL_EXEC:
8499 tp = arm_load_tp (NULL_RTX);
8500
8501 reg = gen_rtx_UNSPEC (Pmode,
8502 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8503 UNSPEC_TLS);
8504 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8505
8506 return gen_rtx_PLUS (Pmode, tp, reg);
8507
8508 default:
8509 abort ();
8510 }
8511 }
8512
8513 /* Try machine-dependent ways of modifying an illegitimate address
8514 to be legitimate. If we find one, return the new, valid address. */
8515 rtx
8516 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8517 {
8518 if (arm_tls_referenced_p (x))
8519 {
8520 rtx addend = NULL;
8521
8522 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8523 {
8524 addend = XEXP (XEXP (x, 0), 1);
8525 x = XEXP (XEXP (x, 0), 0);
8526 }
8527
8528 if (GET_CODE (x) != SYMBOL_REF)
8529 return x;
8530
8531 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8532
8533 x = legitimize_tls_address (x, NULL_RTX);
8534
8535 if (addend)
8536 {
8537 x = gen_rtx_PLUS (SImode, x, addend);
8538 orig_x = x;
8539 }
8540 else
8541 return x;
8542 }
8543
8544 if (!TARGET_ARM)
8545 {
8546 /* TODO: legitimize_address for Thumb2. */
8547 if (TARGET_THUMB2)
8548 return x;
8549 return thumb_legitimize_address (x, orig_x, mode);
8550 }
8551
8552 if (GET_CODE (x) == PLUS)
8553 {
8554 rtx xop0 = XEXP (x, 0);
8555 rtx xop1 = XEXP (x, 1);
8556
8557 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8558 xop0 = force_reg (SImode, xop0);
8559
8560 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8561 && !symbol_mentioned_p (xop1))
8562 xop1 = force_reg (SImode, xop1);
8563
8564 if (ARM_BASE_REGISTER_RTX_P (xop0)
8565 && CONST_INT_P (xop1))
8566 {
8567 HOST_WIDE_INT n, low_n;
8568 rtx base_reg, val;
8569 n = INTVAL (xop1);
8570
8571 /* VFP addressing modes actually allow greater offsets, but for
8572 now we just stick with the lowest common denominator. */
8573 if (mode == DImode || mode == DFmode)
8574 {
8575 low_n = n & 0x0f;
8576 n &= ~0x0f;
8577 if (low_n > 4)
8578 {
8579 n += 16;
8580 low_n -= 16;
8581 }
8582 }
8583 else
8584 {
8585 low_n = ((mode) == TImode ? 0
8586 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8587 n -= low_n;
8588 }
8589
8590 base_reg = gen_reg_rtx (SImode);
8591 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8592 emit_move_insn (base_reg, val);
8593 x = plus_constant (Pmode, base_reg, low_n);
8594 }
8595 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8596 x = gen_rtx_PLUS (SImode, xop0, xop1);
8597 }
8598
8599 /* XXX We don't allow MINUS any more -- see comment in
8600 arm_legitimate_address_outer_p (). */
8601 else if (GET_CODE (x) == MINUS)
8602 {
8603 rtx xop0 = XEXP (x, 0);
8604 rtx xop1 = XEXP (x, 1);
8605
8606 if (CONSTANT_P (xop0))
8607 xop0 = force_reg (SImode, xop0);
8608
8609 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8610 xop1 = force_reg (SImode, xop1);
8611
8612 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8613 x = gen_rtx_MINUS (SImode, xop0, xop1);
8614 }
8615
8616 /* Make sure to take full advantage of the pre-indexed addressing mode
8617 with absolute addresses which often allows for the base register to
8618 be factorized for multiple adjacent memory references, and it might
8619 even allows for the mini pool to be avoided entirely. */
8620 else if (CONST_INT_P (x) && optimize > 0)
8621 {
8622 unsigned int bits;
8623 HOST_WIDE_INT mask, base, index;
8624 rtx base_reg;
8625
8626 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8627 use a 8-bit index. So let's use a 12-bit index for SImode only and
8628 hope that arm_gen_constant will enable ldrb to use more bits. */
8629 bits = (mode == SImode) ? 12 : 8;
8630 mask = (1 << bits) - 1;
8631 base = INTVAL (x) & ~mask;
8632 index = INTVAL (x) & mask;
8633 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8634 {
8635 /* It'll most probably be more efficient to generate the base
8636 with more bits set and use a negative index instead. */
8637 base |= mask;
8638 index -= mask;
8639 }
8640 base_reg = force_reg (SImode, GEN_INT (base));
8641 x = plus_constant (Pmode, base_reg, index);
8642 }
8643
8644 if (flag_pic)
8645 {
8646 /* We need to find and carefully transform any SYMBOL and LABEL
8647 references; so go back to the original address expression. */
8648 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8649
8650 if (new_x != orig_x)
8651 x = new_x;
8652 }
8653
8654 return x;
8655 }
8656
8657
8658 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8659 to be legitimate. If we find one, return the new, valid address. */
8660 rtx
8661 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8662 {
8663 if (GET_CODE (x) == PLUS
8664 && CONST_INT_P (XEXP (x, 1))
8665 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8666 || INTVAL (XEXP (x, 1)) < 0))
8667 {
8668 rtx xop0 = XEXP (x, 0);
8669 rtx xop1 = XEXP (x, 1);
8670 HOST_WIDE_INT offset = INTVAL (xop1);
8671
8672 /* Try and fold the offset into a biasing of the base register and
8673 then offsetting that. Don't do this when optimizing for space
8674 since it can cause too many CSEs. */
8675 if (optimize_size && offset >= 0
8676 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8677 {
8678 HOST_WIDE_INT delta;
8679
8680 if (offset >= 256)
8681 delta = offset - (256 - GET_MODE_SIZE (mode));
8682 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8683 delta = 31 * GET_MODE_SIZE (mode);
8684 else
8685 delta = offset & (~31 * GET_MODE_SIZE (mode));
8686
8687 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8688 NULL_RTX);
8689 x = plus_constant (Pmode, xop0, delta);
8690 }
8691 else if (offset < 0 && offset > -256)
8692 /* Small negative offsets are best done with a subtract before the
8693 dereference, forcing these into a register normally takes two
8694 instructions. */
8695 x = force_operand (x, NULL_RTX);
8696 else
8697 {
8698 /* For the remaining cases, force the constant into a register. */
8699 xop1 = force_reg (SImode, xop1);
8700 x = gen_rtx_PLUS (SImode, xop0, xop1);
8701 }
8702 }
8703 else if (GET_CODE (x) == PLUS
8704 && s_register_operand (XEXP (x, 1), SImode)
8705 && !s_register_operand (XEXP (x, 0), SImode))
8706 {
8707 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8708
8709 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8710 }
8711
8712 if (flag_pic)
8713 {
8714 /* We need to find and carefully transform any SYMBOL and LABEL
8715 references; so go back to the original address expression. */
8716 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8717
8718 if (new_x != orig_x)
8719 x = new_x;
8720 }
8721
8722 return x;
8723 }
8724
8725 /* Return TRUE if X contains any TLS symbol references. */
8726
8727 bool
8728 arm_tls_referenced_p (rtx x)
8729 {
8730 if (! TARGET_HAVE_TLS)
8731 return false;
8732
8733 subrtx_iterator::array_type array;
8734 FOR_EACH_SUBRTX (iter, array, x, ALL)
8735 {
8736 const_rtx x = *iter;
8737 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8738 {
8739 /* ARM currently does not provide relocations to encode TLS variables
8740 into AArch32 instructions, only data, so there is no way to
8741 currently implement these if a literal pool is disabled. */
8742 if (arm_disable_literal_pool)
8743 sorry ("accessing thread-local storage is not currently supported "
8744 "with -mpure-code or -mslow-flash-data");
8745
8746 return true;
8747 }
8748
8749 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8750 TLS offsets, not real symbol references. */
8751 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8752 iter.skip_subrtxes ();
8753 }
8754 return false;
8755 }
8756
8757 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8758
8759 On the ARM, allow any integer (invalid ones are removed later by insn
8760 patterns), nice doubles and symbol_refs which refer to the function's
8761 constant pool XXX.
8762
8763 When generating pic allow anything. */
8764
8765 static bool
8766 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8767 {
8768 return flag_pic || !label_mentioned_p (x);
8769 }
8770
8771 static bool
8772 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8773 {
8774 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8775 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8776 for ARMv8-M Baseline or later the result is valid. */
8777 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8778 x = XEXP (x, 0);
8779
8780 return (CONST_INT_P (x)
8781 || CONST_DOUBLE_P (x)
8782 || CONSTANT_ADDRESS_P (x)
8783 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8784 || flag_pic);
8785 }
8786
8787 static bool
8788 arm_legitimate_constant_p (machine_mode mode, rtx x)
8789 {
8790 return (!arm_cannot_force_const_mem (mode, x)
8791 && (TARGET_32BIT
8792 ? arm_legitimate_constant_p_1 (mode, x)
8793 : thumb_legitimate_constant_p (mode, x)));
8794 }
8795
8796 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8797
8798 static bool
8799 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8800 {
8801 rtx base, offset;
8802
8803 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8804 {
8805 split_const (x, &base, &offset);
8806 if (GET_CODE (base) == SYMBOL_REF
8807 && !offset_within_block_p (base, INTVAL (offset)))
8808 return true;
8809 }
8810 return arm_tls_referenced_p (x);
8811 }
8812 \f
8813 #define REG_OR_SUBREG_REG(X) \
8814 (REG_P (X) \
8815 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8816
8817 #define REG_OR_SUBREG_RTX(X) \
8818 (REG_P (X) ? (X) : SUBREG_REG (X))
8819
8820 static inline int
8821 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8822 {
8823 machine_mode mode = GET_MODE (x);
8824 int total, words;
8825
8826 switch (code)
8827 {
8828 case ASHIFT:
8829 case ASHIFTRT:
8830 case LSHIFTRT:
8831 case ROTATERT:
8832 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8833
8834 case PLUS:
8835 case MINUS:
8836 case COMPARE:
8837 case NEG:
8838 case NOT:
8839 return COSTS_N_INSNS (1);
8840
8841 case MULT:
8842 if (arm_arch6m && arm_m_profile_small_mul)
8843 return COSTS_N_INSNS (32);
8844
8845 if (CONST_INT_P (XEXP (x, 1)))
8846 {
8847 int cycles = 0;
8848 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8849
8850 while (i)
8851 {
8852 i >>= 2;
8853 cycles++;
8854 }
8855 return COSTS_N_INSNS (2) + cycles;
8856 }
8857 return COSTS_N_INSNS (1) + 16;
8858
8859 case SET:
8860 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8861 the mode. */
8862 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8863 return (COSTS_N_INSNS (words)
8864 + 4 * ((MEM_P (SET_SRC (x)))
8865 + MEM_P (SET_DEST (x))));
8866
8867 case CONST_INT:
8868 if (outer == SET)
8869 {
8870 if (UINTVAL (x) < 256
8871 /* 16-bit constant. */
8872 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8873 return 0;
8874 if (thumb_shiftable_const (INTVAL (x)))
8875 return COSTS_N_INSNS (2);
8876 return COSTS_N_INSNS (3);
8877 }
8878 else if ((outer == PLUS || outer == COMPARE)
8879 && INTVAL (x) < 256 && INTVAL (x) > -256)
8880 return 0;
8881 else if ((outer == IOR || outer == XOR || outer == AND)
8882 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8883 return COSTS_N_INSNS (1);
8884 else if (outer == AND)
8885 {
8886 int i;
8887 /* This duplicates the tests in the andsi3 expander. */
8888 for (i = 9; i <= 31; i++)
8889 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8890 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8891 return COSTS_N_INSNS (2);
8892 }
8893 else if (outer == ASHIFT || outer == ASHIFTRT
8894 || outer == LSHIFTRT)
8895 return 0;
8896 return COSTS_N_INSNS (2);
8897
8898 case CONST:
8899 case CONST_DOUBLE:
8900 case LABEL_REF:
8901 case SYMBOL_REF:
8902 return COSTS_N_INSNS (3);
8903
8904 case UDIV:
8905 case UMOD:
8906 case DIV:
8907 case MOD:
8908 return 100;
8909
8910 case TRUNCATE:
8911 return 99;
8912
8913 case AND:
8914 case XOR:
8915 case IOR:
8916 /* XXX guess. */
8917 return 8;
8918
8919 case MEM:
8920 /* XXX another guess. */
8921 /* Memory costs quite a lot for the first word, but subsequent words
8922 load at the equivalent of a single insn each. */
8923 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8924 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8925 ? 4 : 0));
8926
8927 case IF_THEN_ELSE:
8928 /* XXX a guess. */
8929 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8930 return 14;
8931 return 2;
8932
8933 case SIGN_EXTEND:
8934 case ZERO_EXTEND:
8935 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8936 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8937
8938 if (mode == SImode)
8939 return total;
8940
8941 if (arm_arch6)
8942 return total + COSTS_N_INSNS (1);
8943
8944 /* Assume a two-shift sequence. Increase the cost slightly so
8945 we prefer actual shifts over an extend operation. */
8946 return total + 1 + COSTS_N_INSNS (2);
8947
8948 default:
8949 return 99;
8950 }
8951 }
8952
8953 /* Estimates the size cost of thumb1 instructions.
8954 For now most of the code is copied from thumb1_rtx_costs. We need more
8955 fine grain tuning when we have more related test cases. */
8956 static inline int
8957 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8958 {
8959 machine_mode mode = GET_MODE (x);
8960 int words, cost;
8961
8962 switch (code)
8963 {
8964 case ASHIFT:
8965 case ASHIFTRT:
8966 case LSHIFTRT:
8967 case ROTATERT:
8968 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8969
8970 case PLUS:
8971 case MINUS:
8972 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8973 defined by RTL expansion, especially for the expansion of
8974 multiplication. */
8975 if ((GET_CODE (XEXP (x, 0)) == MULT
8976 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8977 || (GET_CODE (XEXP (x, 1)) == MULT
8978 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8979 return COSTS_N_INSNS (2);
8980 /* Fall through. */
8981 case COMPARE:
8982 case NEG:
8983 case NOT:
8984 return COSTS_N_INSNS (1);
8985
8986 case MULT:
8987 if (CONST_INT_P (XEXP (x, 1)))
8988 {
8989 /* Thumb1 mul instruction can't operate on const. We must Load it
8990 into a register first. */
8991 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8992 /* For the targets which have a very small and high-latency multiply
8993 unit, we prefer to synthesize the mult with up to 5 instructions,
8994 giving a good balance between size and performance. */
8995 if (arm_arch6m && arm_m_profile_small_mul)
8996 return COSTS_N_INSNS (5);
8997 else
8998 return COSTS_N_INSNS (1) + const_size;
8999 }
9000 return COSTS_N_INSNS (1);
9001
9002 case SET:
9003 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9004 the mode. */
9005 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9006 cost = COSTS_N_INSNS (words);
9007 if (satisfies_constraint_J (SET_SRC (x))
9008 || satisfies_constraint_K (SET_SRC (x))
9009 /* Too big an immediate for a 2-byte mov, using MOVT. */
9010 || (CONST_INT_P (SET_SRC (x))
9011 && UINTVAL (SET_SRC (x)) >= 256
9012 && TARGET_HAVE_MOVT
9013 && satisfies_constraint_j (SET_SRC (x)))
9014 /* thumb1_movdi_insn. */
9015 || ((words > 1) && MEM_P (SET_SRC (x))))
9016 cost += COSTS_N_INSNS (1);
9017 return cost;
9018
9019 case CONST_INT:
9020 if (outer == SET)
9021 {
9022 if (UINTVAL (x) < 256)
9023 return COSTS_N_INSNS (1);
9024 /* movw is 4byte long. */
9025 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9026 return COSTS_N_INSNS (2);
9027 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9028 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9029 return COSTS_N_INSNS (2);
9030 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9031 if (thumb_shiftable_const (INTVAL (x)))
9032 return COSTS_N_INSNS (2);
9033 return COSTS_N_INSNS (3);
9034 }
9035 else if ((outer == PLUS || outer == COMPARE)
9036 && INTVAL (x) < 256 && INTVAL (x) > -256)
9037 return 0;
9038 else if ((outer == IOR || outer == XOR || outer == AND)
9039 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9040 return COSTS_N_INSNS (1);
9041 else if (outer == AND)
9042 {
9043 int i;
9044 /* This duplicates the tests in the andsi3 expander. */
9045 for (i = 9; i <= 31; i++)
9046 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9047 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9048 return COSTS_N_INSNS (2);
9049 }
9050 else if (outer == ASHIFT || outer == ASHIFTRT
9051 || outer == LSHIFTRT)
9052 return 0;
9053 return COSTS_N_INSNS (2);
9054
9055 case CONST:
9056 case CONST_DOUBLE:
9057 case LABEL_REF:
9058 case SYMBOL_REF:
9059 return COSTS_N_INSNS (3);
9060
9061 case UDIV:
9062 case UMOD:
9063 case DIV:
9064 case MOD:
9065 return 100;
9066
9067 case TRUNCATE:
9068 return 99;
9069
9070 case AND:
9071 case XOR:
9072 case IOR:
9073 return COSTS_N_INSNS (1);
9074
9075 case MEM:
9076 return (COSTS_N_INSNS (1)
9077 + COSTS_N_INSNS (1)
9078 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9079 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9080 ? COSTS_N_INSNS (1) : 0));
9081
9082 case IF_THEN_ELSE:
9083 /* XXX a guess. */
9084 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9085 return 14;
9086 return 2;
9087
9088 case ZERO_EXTEND:
9089 /* XXX still guessing. */
9090 switch (GET_MODE (XEXP (x, 0)))
9091 {
9092 case E_QImode:
9093 return (1 + (mode == DImode ? 4 : 0)
9094 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9095
9096 case E_HImode:
9097 return (4 + (mode == DImode ? 4 : 0)
9098 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9099
9100 case E_SImode:
9101 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9102
9103 default:
9104 return 99;
9105 }
9106
9107 default:
9108 return 99;
9109 }
9110 }
9111
9112 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9113 operand, then return the operand that is being shifted. If the shift
9114 is not by a constant, then set SHIFT_REG to point to the operand.
9115 Return NULL if OP is not a shifter operand. */
9116 static rtx
9117 shifter_op_p (rtx op, rtx *shift_reg)
9118 {
9119 enum rtx_code code = GET_CODE (op);
9120
9121 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9122 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9123 return XEXP (op, 0);
9124 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9125 return XEXP (op, 0);
9126 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9127 || code == ASHIFTRT)
9128 {
9129 if (!CONST_INT_P (XEXP (op, 1)))
9130 *shift_reg = XEXP (op, 1);
9131 return XEXP (op, 0);
9132 }
9133
9134 return NULL;
9135 }
9136
9137 static bool
9138 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9139 {
9140 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9141 rtx_code code = GET_CODE (x);
9142 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9143
9144 switch (XINT (x, 1))
9145 {
9146 case UNSPEC_UNALIGNED_LOAD:
9147 /* We can only do unaligned loads into the integer unit, and we can't
9148 use LDM or LDRD. */
9149 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9150 if (speed_p)
9151 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9152 + extra_cost->ldst.load_unaligned);
9153
9154 #ifdef NOT_YET
9155 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9156 ADDR_SPACE_GENERIC, speed_p);
9157 #endif
9158 return true;
9159
9160 case UNSPEC_UNALIGNED_STORE:
9161 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9162 if (speed_p)
9163 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9164 + extra_cost->ldst.store_unaligned);
9165
9166 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9167 #ifdef NOT_YET
9168 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9169 ADDR_SPACE_GENERIC, speed_p);
9170 #endif
9171 return true;
9172
9173 case UNSPEC_VRINTZ:
9174 case UNSPEC_VRINTP:
9175 case UNSPEC_VRINTM:
9176 case UNSPEC_VRINTR:
9177 case UNSPEC_VRINTX:
9178 case UNSPEC_VRINTA:
9179 if (speed_p)
9180 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9181
9182 return true;
9183 default:
9184 *cost = COSTS_N_INSNS (2);
9185 break;
9186 }
9187 return true;
9188 }
9189
9190 /* Cost of a libcall. We assume one insn per argument, an amount for the
9191 call (one insn for -Os) and then one for processing the result. */
9192 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9193
9194 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9195 do \
9196 { \
9197 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9198 if (shift_op != NULL \
9199 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9200 { \
9201 if (shift_reg) \
9202 { \
9203 if (speed_p) \
9204 *cost += extra_cost->alu.arith_shift_reg; \
9205 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9206 ASHIFT, 1, speed_p); \
9207 } \
9208 else if (speed_p) \
9209 *cost += extra_cost->alu.arith_shift; \
9210 \
9211 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9212 ASHIFT, 0, speed_p) \
9213 + rtx_cost (XEXP (x, 1 - IDX), \
9214 GET_MODE (shift_op), \
9215 OP, 1, speed_p)); \
9216 return true; \
9217 } \
9218 } \
9219 while (0);
9220
9221 /* RTX costs. Make an estimate of the cost of executing the operation
9222 X, which is contained with an operation with code OUTER_CODE.
9223 SPEED_P indicates whether the cost desired is the performance cost,
9224 or the size cost. The estimate is stored in COST and the return
9225 value is TRUE if the cost calculation is final, or FALSE if the
9226 caller should recurse through the operands of X to add additional
9227 costs.
9228
9229 We currently make no attempt to model the size savings of Thumb-2
9230 16-bit instructions. At the normal points in compilation where
9231 this code is called we have no measure of whether the condition
9232 flags are live or not, and thus no realistic way to determine what
9233 the size will eventually be. */
9234 static bool
9235 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9236 const struct cpu_cost_table *extra_cost,
9237 int *cost, bool speed_p)
9238 {
9239 machine_mode mode = GET_MODE (x);
9240
9241 *cost = COSTS_N_INSNS (1);
9242
9243 if (TARGET_THUMB1)
9244 {
9245 if (speed_p)
9246 *cost = thumb1_rtx_costs (x, code, outer_code);
9247 else
9248 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9249 return true;
9250 }
9251
9252 switch (code)
9253 {
9254 case SET:
9255 *cost = 0;
9256 /* SET RTXs don't have a mode so we get it from the destination. */
9257 mode = GET_MODE (SET_DEST (x));
9258
9259 if (REG_P (SET_SRC (x))
9260 && REG_P (SET_DEST (x)))
9261 {
9262 /* Assume that most copies can be done with a single insn,
9263 unless we don't have HW FP, in which case everything
9264 larger than word mode will require two insns. */
9265 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9266 && GET_MODE_SIZE (mode) > 4)
9267 || mode == DImode)
9268 ? 2 : 1);
9269 /* Conditional register moves can be encoded
9270 in 16 bits in Thumb mode. */
9271 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9272 *cost >>= 1;
9273
9274 return true;
9275 }
9276
9277 if (CONST_INT_P (SET_SRC (x)))
9278 {
9279 /* Handle CONST_INT here, since the value doesn't have a mode
9280 and we would otherwise be unable to work out the true cost. */
9281 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9282 0, speed_p);
9283 outer_code = SET;
9284 /* Slightly lower the cost of setting a core reg to a constant.
9285 This helps break up chains and allows for better scheduling. */
9286 if (REG_P (SET_DEST (x))
9287 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9288 *cost -= 1;
9289 x = SET_SRC (x);
9290 /* Immediate moves with an immediate in the range [0, 255] can be
9291 encoded in 16 bits in Thumb mode. */
9292 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9293 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9294 *cost >>= 1;
9295 goto const_int_cost;
9296 }
9297
9298 return false;
9299
9300 case MEM:
9301 /* A memory access costs 1 insn if the mode is small, or the address is
9302 a single register, otherwise it costs one insn per word. */
9303 if (REG_P (XEXP (x, 0)))
9304 *cost = COSTS_N_INSNS (1);
9305 else if (flag_pic
9306 && GET_CODE (XEXP (x, 0)) == PLUS
9307 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9308 /* This will be split into two instructions.
9309 See arm.md:calculate_pic_address. */
9310 *cost = COSTS_N_INSNS (2);
9311 else
9312 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9313
9314 /* For speed optimizations, add the costs of the address and
9315 accessing memory. */
9316 if (speed_p)
9317 #ifdef NOT_YET
9318 *cost += (extra_cost->ldst.load
9319 + arm_address_cost (XEXP (x, 0), mode,
9320 ADDR_SPACE_GENERIC, speed_p));
9321 #else
9322 *cost += extra_cost->ldst.load;
9323 #endif
9324 return true;
9325
9326 case PARALLEL:
9327 {
9328 /* Calculations of LDM costs are complex. We assume an initial cost
9329 (ldm_1st) which will load the number of registers mentioned in
9330 ldm_regs_per_insn_1st registers; then each additional
9331 ldm_regs_per_insn_subsequent registers cost one more insn. The
9332 formula for N regs is thus:
9333
9334 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9335 + ldm_regs_per_insn_subsequent - 1)
9336 / ldm_regs_per_insn_subsequent).
9337
9338 Additional costs may also be added for addressing. A similar
9339 formula is used for STM. */
9340
9341 bool is_ldm = load_multiple_operation (x, SImode);
9342 bool is_stm = store_multiple_operation (x, SImode);
9343
9344 if (is_ldm || is_stm)
9345 {
9346 if (speed_p)
9347 {
9348 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9349 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9350 ? extra_cost->ldst.ldm_regs_per_insn_1st
9351 : extra_cost->ldst.stm_regs_per_insn_1st;
9352 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9353 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9354 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9355
9356 *cost += regs_per_insn_1st
9357 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9358 + regs_per_insn_sub - 1)
9359 / regs_per_insn_sub);
9360 return true;
9361 }
9362
9363 }
9364 return false;
9365 }
9366 case DIV:
9367 case UDIV:
9368 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9369 && (mode == SFmode || !TARGET_VFP_SINGLE))
9370 *cost += COSTS_N_INSNS (speed_p
9371 ? extra_cost->fp[mode != SFmode].div : 0);
9372 else if (mode == SImode && TARGET_IDIV)
9373 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9374 else
9375 *cost = LIBCALL_COST (2);
9376
9377 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9378 possible udiv is prefered. */
9379 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9380 return false; /* All arguments must be in registers. */
9381
9382 case MOD:
9383 /* MOD by a power of 2 can be expanded as:
9384 rsbs r1, r0, #0
9385 and r0, r0, #(n - 1)
9386 and r1, r1, #(n - 1)
9387 rsbpl r0, r1, #0. */
9388 if (CONST_INT_P (XEXP (x, 1))
9389 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9390 && mode == SImode)
9391 {
9392 *cost += COSTS_N_INSNS (3);
9393
9394 if (speed_p)
9395 *cost += 2 * extra_cost->alu.logical
9396 + extra_cost->alu.arith;
9397 return true;
9398 }
9399
9400 /* Fall-through. */
9401 case UMOD:
9402 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9403 possible udiv is prefered. */
9404 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9405 return false; /* All arguments must be in registers. */
9406
9407 case ROTATE:
9408 if (mode == SImode && REG_P (XEXP (x, 1)))
9409 {
9410 *cost += (COSTS_N_INSNS (1)
9411 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9412 if (speed_p)
9413 *cost += extra_cost->alu.shift_reg;
9414 return true;
9415 }
9416 /* Fall through */
9417 case ROTATERT:
9418 case ASHIFT:
9419 case LSHIFTRT:
9420 case ASHIFTRT:
9421 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9422 {
9423 *cost += (COSTS_N_INSNS (2)
9424 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9425 if (speed_p)
9426 *cost += 2 * extra_cost->alu.shift;
9427 return true;
9428 }
9429 else if (mode == SImode)
9430 {
9431 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9432 /* Slightly disparage register shifts at -Os, but not by much. */
9433 if (!CONST_INT_P (XEXP (x, 1)))
9434 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9435 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9436 return true;
9437 }
9438 else if (GET_MODE_CLASS (mode) == MODE_INT
9439 && GET_MODE_SIZE (mode) < 4)
9440 {
9441 if (code == ASHIFT)
9442 {
9443 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9444 /* Slightly disparage register shifts at -Os, but not by
9445 much. */
9446 if (!CONST_INT_P (XEXP (x, 1)))
9447 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9448 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9449 }
9450 else if (code == LSHIFTRT || code == ASHIFTRT)
9451 {
9452 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9453 {
9454 /* Can use SBFX/UBFX. */
9455 if (speed_p)
9456 *cost += extra_cost->alu.bfx;
9457 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9458 }
9459 else
9460 {
9461 *cost += COSTS_N_INSNS (1);
9462 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9463 if (speed_p)
9464 {
9465 if (CONST_INT_P (XEXP (x, 1)))
9466 *cost += 2 * extra_cost->alu.shift;
9467 else
9468 *cost += (extra_cost->alu.shift
9469 + extra_cost->alu.shift_reg);
9470 }
9471 else
9472 /* Slightly disparage register shifts. */
9473 *cost += !CONST_INT_P (XEXP (x, 1));
9474 }
9475 }
9476 else /* Rotates. */
9477 {
9478 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9479 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9480 if (speed_p)
9481 {
9482 if (CONST_INT_P (XEXP (x, 1)))
9483 *cost += (2 * extra_cost->alu.shift
9484 + extra_cost->alu.log_shift);
9485 else
9486 *cost += (extra_cost->alu.shift
9487 + extra_cost->alu.shift_reg
9488 + extra_cost->alu.log_shift_reg);
9489 }
9490 }
9491 return true;
9492 }
9493
9494 *cost = LIBCALL_COST (2);
9495 return false;
9496
9497 case BSWAP:
9498 if (arm_arch6)
9499 {
9500 if (mode == SImode)
9501 {
9502 if (speed_p)
9503 *cost += extra_cost->alu.rev;
9504
9505 return false;
9506 }
9507 }
9508 else
9509 {
9510 /* No rev instruction available. Look at arm_legacy_rev
9511 and thumb_legacy_rev for the form of RTL used then. */
9512 if (TARGET_THUMB)
9513 {
9514 *cost += COSTS_N_INSNS (9);
9515
9516 if (speed_p)
9517 {
9518 *cost += 6 * extra_cost->alu.shift;
9519 *cost += 3 * extra_cost->alu.logical;
9520 }
9521 }
9522 else
9523 {
9524 *cost += COSTS_N_INSNS (4);
9525
9526 if (speed_p)
9527 {
9528 *cost += 2 * extra_cost->alu.shift;
9529 *cost += extra_cost->alu.arith_shift;
9530 *cost += 2 * extra_cost->alu.logical;
9531 }
9532 }
9533 return true;
9534 }
9535 return false;
9536
9537 case MINUS:
9538 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9539 && (mode == SFmode || !TARGET_VFP_SINGLE))
9540 {
9541 if (GET_CODE (XEXP (x, 0)) == MULT
9542 || GET_CODE (XEXP (x, 1)) == MULT)
9543 {
9544 rtx mul_op0, mul_op1, sub_op;
9545
9546 if (speed_p)
9547 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9548
9549 if (GET_CODE (XEXP (x, 0)) == MULT)
9550 {
9551 mul_op0 = XEXP (XEXP (x, 0), 0);
9552 mul_op1 = XEXP (XEXP (x, 0), 1);
9553 sub_op = XEXP (x, 1);
9554 }
9555 else
9556 {
9557 mul_op0 = XEXP (XEXP (x, 1), 0);
9558 mul_op1 = XEXP (XEXP (x, 1), 1);
9559 sub_op = XEXP (x, 0);
9560 }
9561
9562 /* The first operand of the multiply may be optionally
9563 negated. */
9564 if (GET_CODE (mul_op0) == NEG)
9565 mul_op0 = XEXP (mul_op0, 0);
9566
9567 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9568 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9569 + rtx_cost (sub_op, mode, code, 0, speed_p));
9570
9571 return true;
9572 }
9573
9574 if (speed_p)
9575 *cost += extra_cost->fp[mode != SFmode].addsub;
9576 return false;
9577 }
9578
9579 if (mode == SImode)
9580 {
9581 rtx shift_by_reg = NULL;
9582 rtx shift_op;
9583 rtx non_shift_op;
9584
9585 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9586 if (shift_op == NULL)
9587 {
9588 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9589 non_shift_op = XEXP (x, 0);
9590 }
9591 else
9592 non_shift_op = XEXP (x, 1);
9593
9594 if (shift_op != NULL)
9595 {
9596 if (shift_by_reg != NULL)
9597 {
9598 if (speed_p)
9599 *cost += extra_cost->alu.arith_shift_reg;
9600 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9601 }
9602 else if (speed_p)
9603 *cost += extra_cost->alu.arith_shift;
9604
9605 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9606 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9607 return true;
9608 }
9609
9610 if (arm_arch_thumb2
9611 && GET_CODE (XEXP (x, 1)) == MULT)
9612 {
9613 /* MLS. */
9614 if (speed_p)
9615 *cost += extra_cost->mult[0].add;
9616 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9617 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9618 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9619 return true;
9620 }
9621
9622 if (CONST_INT_P (XEXP (x, 0)))
9623 {
9624 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9625 INTVAL (XEXP (x, 0)), NULL_RTX,
9626 NULL_RTX, 1, 0);
9627 *cost = COSTS_N_INSNS (insns);
9628 if (speed_p)
9629 *cost += insns * extra_cost->alu.arith;
9630 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9631 return true;
9632 }
9633 else if (speed_p)
9634 *cost += extra_cost->alu.arith;
9635
9636 return false;
9637 }
9638
9639 if (GET_MODE_CLASS (mode) == MODE_INT
9640 && GET_MODE_SIZE (mode) < 4)
9641 {
9642 rtx shift_op, shift_reg;
9643 shift_reg = NULL;
9644
9645 /* We check both sides of the MINUS for shifter operands since,
9646 unlike PLUS, it's not commutative. */
9647
9648 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9649 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9650
9651 /* Slightly disparage, as we might need to widen the result. */
9652 *cost += 1;
9653 if (speed_p)
9654 *cost += extra_cost->alu.arith;
9655
9656 if (CONST_INT_P (XEXP (x, 0)))
9657 {
9658 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9659 return true;
9660 }
9661
9662 return false;
9663 }
9664
9665 if (mode == DImode)
9666 {
9667 *cost += COSTS_N_INSNS (1);
9668
9669 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9670 {
9671 rtx op1 = XEXP (x, 1);
9672
9673 if (speed_p)
9674 *cost += 2 * extra_cost->alu.arith;
9675
9676 if (GET_CODE (op1) == ZERO_EXTEND)
9677 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9678 0, speed_p);
9679 else
9680 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9681 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9682 0, speed_p);
9683 return true;
9684 }
9685 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9686 {
9687 if (speed_p)
9688 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9689 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9690 0, speed_p)
9691 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9692 return true;
9693 }
9694 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9695 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9696 {
9697 if (speed_p)
9698 *cost += (extra_cost->alu.arith
9699 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9700 ? extra_cost->alu.arith
9701 : extra_cost->alu.arith_shift));
9702 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9703 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9704 GET_CODE (XEXP (x, 1)), 0, speed_p));
9705 return true;
9706 }
9707
9708 if (speed_p)
9709 *cost += 2 * extra_cost->alu.arith;
9710 return false;
9711 }
9712
9713 /* Vector mode? */
9714
9715 *cost = LIBCALL_COST (2);
9716 return false;
9717
9718 case PLUS:
9719 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9720 && (mode == SFmode || !TARGET_VFP_SINGLE))
9721 {
9722 if (GET_CODE (XEXP (x, 0)) == MULT)
9723 {
9724 rtx mul_op0, mul_op1, add_op;
9725
9726 if (speed_p)
9727 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9728
9729 mul_op0 = XEXP (XEXP (x, 0), 0);
9730 mul_op1 = XEXP (XEXP (x, 0), 1);
9731 add_op = XEXP (x, 1);
9732
9733 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9734 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9735 + rtx_cost (add_op, mode, code, 0, speed_p));
9736
9737 return true;
9738 }
9739
9740 if (speed_p)
9741 *cost += extra_cost->fp[mode != SFmode].addsub;
9742 return false;
9743 }
9744 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9745 {
9746 *cost = LIBCALL_COST (2);
9747 return false;
9748 }
9749
9750 /* Narrow modes can be synthesized in SImode, but the range
9751 of useful sub-operations is limited. Check for shift operations
9752 on one of the operands. Only left shifts can be used in the
9753 narrow modes. */
9754 if (GET_MODE_CLASS (mode) == MODE_INT
9755 && GET_MODE_SIZE (mode) < 4)
9756 {
9757 rtx shift_op, shift_reg;
9758 shift_reg = NULL;
9759
9760 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9761
9762 if (CONST_INT_P (XEXP (x, 1)))
9763 {
9764 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9765 INTVAL (XEXP (x, 1)), NULL_RTX,
9766 NULL_RTX, 1, 0);
9767 *cost = COSTS_N_INSNS (insns);
9768 if (speed_p)
9769 *cost += insns * extra_cost->alu.arith;
9770 /* Slightly penalize a narrow operation as the result may
9771 need widening. */
9772 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9773 return true;
9774 }
9775
9776 /* Slightly penalize a narrow operation as the result may
9777 need widening. */
9778 *cost += 1;
9779 if (speed_p)
9780 *cost += extra_cost->alu.arith;
9781
9782 return false;
9783 }
9784
9785 if (mode == SImode)
9786 {
9787 rtx shift_op, shift_reg;
9788
9789 if (TARGET_INT_SIMD
9790 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9791 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9792 {
9793 /* UXTA[BH] or SXTA[BH]. */
9794 if (speed_p)
9795 *cost += extra_cost->alu.extend_arith;
9796 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9797 0, speed_p)
9798 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9799 return true;
9800 }
9801
9802 shift_reg = NULL;
9803 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9804 if (shift_op != NULL)
9805 {
9806 if (shift_reg)
9807 {
9808 if (speed_p)
9809 *cost += extra_cost->alu.arith_shift_reg;
9810 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9811 }
9812 else if (speed_p)
9813 *cost += extra_cost->alu.arith_shift;
9814
9815 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9816 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9817 return true;
9818 }
9819 if (GET_CODE (XEXP (x, 0)) == MULT)
9820 {
9821 rtx mul_op = XEXP (x, 0);
9822
9823 if (TARGET_DSP_MULTIPLY
9824 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9825 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9826 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9827 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9828 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9829 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9830 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9831 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9832 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9833 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9834 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9835 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9836 == 16))))))
9837 {
9838 /* SMLA[BT][BT]. */
9839 if (speed_p)
9840 *cost += extra_cost->mult[0].extend_add;
9841 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9842 SIGN_EXTEND, 0, speed_p)
9843 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9844 SIGN_EXTEND, 0, speed_p)
9845 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9846 return true;
9847 }
9848
9849 if (speed_p)
9850 *cost += extra_cost->mult[0].add;
9851 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9852 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9853 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9854 return true;
9855 }
9856 if (CONST_INT_P (XEXP (x, 1)))
9857 {
9858 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9859 INTVAL (XEXP (x, 1)), NULL_RTX,
9860 NULL_RTX, 1, 0);
9861 *cost = COSTS_N_INSNS (insns);
9862 if (speed_p)
9863 *cost += insns * extra_cost->alu.arith;
9864 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9865 return true;
9866 }
9867 else if (speed_p)
9868 *cost += extra_cost->alu.arith;
9869
9870 return false;
9871 }
9872
9873 if (mode == DImode)
9874 {
9875 if (arm_arch3m
9876 && GET_CODE (XEXP (x, 0)) == MULT
9877 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9878 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9879 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9880 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9881 {
9882 if (speed_p)
9883 *cost += extra_cost->mult[1].extend_add;
9884 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9885 ZERO_EXTEND, 0, speed_p)
9886 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9887 ZERO_EXTEND, 0, speed_p)
9888 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9889 return true;
9890 }
9891
9892 *cost += COSTS_N_INSNS (1);
9893
9894 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9895 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9896 {
9897 if (speed_p)
9898 *cost += (extra_cost->alu.arith
9899 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9900 ? extra_cost->alu.arith
9901 : extra_cost->alu.arith_shift));
9902
9903 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9904 0, speed_p)
9905 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9906 return true;
9907 }
9908
9909 if (speed_p)
9910 *cost += 2 * extra_cost->alu.arith;
9911 return false;
9912 }
9913
9914 /* Vector mode? */
9915 *cost = LIBCALL_COST (2);
9916 return false;
9917 case IOR:
9918 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9919 {
9920 if (speed_p)
9921 *cost += extra_cost->alu.rev;
9922
9923 return true;
9924 }
9925 /* Fall through. */
9926 case AND: case XOR:
9927 if (mode == SImode)
9928 {
9929 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9930 rtx op0 = XEXP (x, 0);
9931 rtx shift_op, shift_reg;
9932
9933 if (subcode == NOT
9934 && (code == AND
9935 || (code == IOR && TARGET_THUMB2)))
9936 op0 = XEXP (op0, 0);
9937
9938 shift_reg = NULL;
9939 shift_op = shifter_op_p (op0, &shift_reg);
9940 if (shift_op != NULL)
9941 {
9942 if (shift_reg)
9943 {
9944 if (speed_p)
9945 *cost += extra_cost->alu.log_shift_reg;
9946 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9947 }
9948 else if (speed_p)
9949 *cost += extra_cost->alu.log_shift;
9950
9951 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9952 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9953 return true;
9954 }
9955
9956 if (CONST_INT_P (XEXP (x, 1)))
9957 {
9958 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9959 INTVAL (XEXP (x, 1)), NULL_RTX,
9960 NULL_RTX, 1, 0);
9961
9962 *cost = COSTS_N_INSNS (insns);
9963 if (speed_p)
9964 *cost += insns * extra_cost->alu.logical;
9965 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9966 return true;
9967 }
9968
9969 if (speed_p)
9970 *cost += extra_cost->alu.logical;
9971 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9972 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9973 return true;
9974 }
9975
9976 if (mode == DImode)
9977 {
9978 rtx op0 = XEXP (x, 0);
9979 enum rtx_code subcode = GET_CODE (op0);
9980
9981 *cost += COSTS_N_INSNS (1);
9982
9983 if (subcode == NOT
9984 && (code == AND
9985 || (code == IOR && TARGET_THUMB2)))
9986 op0 = XEXP (op0, 0);
9987
9988 if (GET_CODE (op0) == ZERO_EXTEND)
9989 {
9990 if (speed_p)
9991 *cost += 2 * extra_cost->alu.logical;
9992
9993 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9994 0, speed_p)
9995 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9996 return true;
9997 }
9998 else if (GET_CODE (op0) == SIGN_EXTEND)
9999 {
10000 if (speed_p)
10001 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10002
10003 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10004 0, speed_p)
10005 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10006 return true;
10007 }
10008
10009 if (speed_p)
10010 *cost += 2 * extra_cost->alu.logical;
10011
10012 return true;
10013 }
10014 /* Vector mode? */
10015
10016 *cost = LIBCALL_COST (2);
10017 return false;
10018
10019 case MULT:
10020 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10021 && (mode == SFmode || !TARGET_VFP_SINGLE))
10022 {
10023 rtx op0 = XEXP (x, 0);
10024
10025 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10026 op0 = XEXP (op0, 0);
10027
10028 if (speed_p)
10029 *cost += extra_cost->fp[mode != SFmode].mult;
10030
10031 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10032 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10033 return true;
10034 }
10035 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10036 {
10037 *cost = LIBCALL_COST (2);
10038 return false;
10039 }
10040
10041 if (mode == SImode)
10042 {
10043 if (TARGET_DSP_MULTIPLY
10044 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10045 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10046 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10047 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10048 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10049 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10050 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10051 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10052 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10053 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10054 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10055 && (INTVAL (XEXP (XEXP (x, 1), 1))
10056 == 16))))))
10057 {
10058 /* SMUL[TB][TB]. */
10059 if (speed_p)
10060 *cost += extra_cost->mult[0].extend;
10061 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10062 SIGN_EXTEND, 0, speed_p);
10063 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10064 SIGN_EXTEND, 1, speed_p);
10065 return true;
10066 }
10067 if (speed_p)
10068 *cost += extra_cost->mult[0].simple;
10069 return false;
10070 }
10071
10072 if (mode == DImode)
10073 {
10074 if (arm_arch3m
10075 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10076 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10077 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10078 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10079 {
10080 if (speed_p)
10081 *cost += extra_cost->mult[1].extend;
10082 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10083 ZERO_EXTEND, 0, speed_p)
10084 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10085 ZERO_EXTEND, 0, speed_p));
10086 return true;
10087 }
10088
10089 *cost = LIBCALL_COST (2);
10090 return false;
10091 }
10092
10093 /* Vector mode? */
10094 *cost = LIBCALL_COST (2);
10095 return false;
10096
10097 case NEG:
10098 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10099 && (mode == SFmode || !TARGET_VFP_SINGLE))
10100 {
10101 if (GET_CODE (XEXP (x, 0)) == MULT)
10102 {
10103 /* VNMUL. */
10104 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10105 return true;
10106 }
10107
10108 if (speed_p)
10109 *cost += extra_cost->fp[mode != SFmode].neg;
10110
10111 return false;
10112 }
10113 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10114 {
10115 *cost = LIBCALL_COST (1);
10116 return false;
10117 }
10118
10119 if (mode == SImode)
10120 {
10121 if (GET_CODE (XEXP (x, 0)) == ABS)
10122 {
10123 *cost += COSTS_N_INSNS (1);
10124 /* Assume the non-flag-changing variant. */
10125 if (speed_p)
10126 *cost += (extra_cost->alu.log_shift
10127 + extra_cost->alu.arith_shift);
10128 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10129 return true;
10130 }
10131
10132 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10133 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10134 {
10135 *cost += COSTS_N_INSNS (1);
10136 /* No extra cost for MOV imm and MVN imm. */
10137 /* If the comparison op is using the flags, there's no further
10138 cost, otherwise we need to add the cost of the comparison. */
10139 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10140 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10141 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10142 {
10143 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10144 *cost += (COSTS_N_INSNS (1)
10145 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10146 0, speed_p)
10147 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10148 1, speed_p));
10149 if (speed_p)
10150 *cost += extra_cost->alu.arith;
10151 }
10152 return true;
10153 }
10154
10155 if (speed_p)
10156 *cost += extra_cost->alu.arith;
10157 return false;
10158 }
10159
10160 if (GET_MODE_CLASS (mode) == MODE_INT
10161 && GET_MODE_SIZE (mode) < 4)
10162 {
10163 /* Slightly disparage, as we might need an extend operation. */
10164 *cost += 1;
10165 if (speed_p)
10166 *cost += extra_cost->alu.arith;
10167 return false;
10168 }
10169
10170 if (mode == DImode)
10171 {
10172 *cost += COSTS_N_INSNS (1);
10173 if (speed_p)
10174 *cost += 2 * extra_cost->alu.arith;
10175 return false;
10176 }
10177
10178 /* Vector mode? */
10179 *cost = LIBCALL_COST (1);
10180 return false;
10181
10182 case NOT:
10183 if (mode == SImode)
10184 {
10185 rtx shift_op;
10186 rtx shift_reg = NULL;
10187
10188 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10189
10190 if (shift_op)
10191 {
10192 if (shift_reg != NULL)
10193 {
10194 if (speed_p)
10195 *cost += extra_cost->alu.log_shift_reg;
10196 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10197 }
10198 else if (speed_p)
10199 *cost += extra_cost->alu.log_shift;
10200 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10201 return true;
10202 }
10203
10204 if (speed_p)
10205 *cost += extra_cost->alu.logical;
10206 return false;
10207 }
10208 if (mode == DImode)
10209 {
10210 *cost += COSTS_N_INSNS (1);
10211 return false;
10212 }
10213
10214 /* Vector mode? */
10215
10216 *cost += LIBCALL_COST (1);
10217 return false;
10218
10219 case IF_THEN_ELSE:
10220 {
10221 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10222 {
10223 *cost += COSTS_N_INSNS (3);
10224 return true;
10225 }
10226 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10227 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10228
10229 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10230 /* Assume that if one arm of the if_then_else is a register,
10231 that it will be tied with the result and eliminate the
10232 conditional insn. */
10233 if (REG_P (XEXP (x, 1)))
10234 *cost += op2cost;
10235 else if (REG_P (XEXP (x, 2)))
10236 *cost += op1cost;
10237 else
10238 {
10239 if (speed_p)
10240 {
10241 if (extra_cost->alu.non_exec_costs_exec)
10242 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10243 else
10244 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10245 }
10246 else
10247 *cost += op1cost + op2cost;
10248 }
10249 }
10250 return true;
10251
10252 case COMPARE:
10253 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10254 *cost = 0;
10255 else
10256 {
10257 machine_mode op0mode;
10258 /* We'll mostly assume that the cost of a compare is the cost of the
10259 LHS. However, there are some notable exceptions. */
10260
10261 /* Floating point compares are never done as side-effects. */
10262 op0mode = GET_MODE (XEXP (x, 0));
10263 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10264 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10265 {
10266 if (speed_p)
10267 *cost += extra_cost->fp[op0mode != SFmode].compare;
10268
10269 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10270 {
10271 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10272 return true;
10273 }
10274
10275 return false;
10276 }
10277 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10278 {
10279 *cost = LIBCALL_COST (2);
10280 return false;
10281 }
10282
10283 /* DImode compares normally take two insns. */
10284 if (op0mode == DImode)
10285 {
10286 *cost += COSTS_N_INSNS (1);
10287 if (speed_p)
10288 *cost += 2 * extra_cost->alu.arith;
10289 return false;
10290 }
10291
10292 if (op0mode == SImode)
10293 {
10294 rtx shift_op;
10295 rtx shift_reg;
10296
10297 if (XEXP (x, 1) == const0_rtx
10298 && !(REG_P (XEXP (x, 0))
10299 || (GET_CODE (XEXP (x, 0)) == SUBREG
10300 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10301 {
10302 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10303
10304 /* Multiply operations that set the flags are often
10305 significantly more expensive. */
10306 if (speed_p
10307 && GET_CODE (XEXP (x, 0)) == MULT
10308 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10309 *cost += extra_cost->mult[0].flag_setting;
10310
10311 if (speed_p
10312 && GET_CODE (XEXP (x, 0)) == PLUS
10313 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10314 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10315 0), 1), mode))
10316 *cost += extra_cost->mult[0].flag_setting;
10317 return true;
10318 }
10319
10320 shift_reg = NULL;
10321 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10322 if (shift_op != NULL)
10323 {
10324 if (shift_reg != NULL)
10325 {
10326 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10327 1, speed_p);
10328 if (speed_p)
10329 *cost += extra_cost->alu.arith_shift_reg;
10330 }
10331 else if (speed_p)
10332 *cost += extra_cost->alu.arith_shift;
10333 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10334 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10335 return true;
10336 }
10337
10338 if (speed_p)
10339 *cost += extra_cost->alu.arith;
10340 if (CONST_INT_P (XEXP (x, 1))
10341 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10342 {
10343 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10344 return true;
10345 }
10346 return false;
10347 }
10348
10349 /* Vector mode? */
10350
10351 *cost = LIBCALL_COST (2);
10352 return false;
10353 }
10354 return true;
10355
10356 case EQ:
10357 case NE:
10358 case LT:
10359 case LE:
10360 case GT:
10361 case GE:
10362 case LTU:
10363 case LEU:
10364 case GEU:
10365 case GTU:
10366 case ORDERED:
10367 case UNORDERED:
10368 case UNEQ:
10369 case UNLE:
10370 case UNLT:
10371 case UNGE:
10372 case UNGT:
10373 case LTGT:
10374 if (outer_code == SET)
10375 {
10376 /* Is it a store-flag operation? */
10377 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10378 && XEXP (x, 1) == const0_rtx)
10379 {
10380 /* Thumb also needs an IT insn. */
10381 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10382 return true;
10383 }
10384 if (XEXP (x, 1) == const0_rtx)
10385 {
10386 switch (code)
10387 {
10388 case LT:
10389 /* LSR Rd, Rn, #31. */
10390 if (speed_p)
10391 *cost += extra_cost->alu.shift;
10392 break;
10393
10394 case EQ:
10395 /* RSBS T1, Rn, #0
10396 ADC Rd, Rn, T1. */
10397
10398 case NE:
10399 /* SUBS T1, Rn, #1
10400 SBC Rd, Rn, T1. */
10401 *cost += COSTS_N_INSNS (1);
10402 break;
10403
10404 case LE:
10405 /* RSBS T1, Rn, Rn, LSR #31
10406 ADC Rd, Rn, T1. */
10407 *cost += COSTS_N_INSNS (1);
10408 if (speed_p)
10409 *cost += extra_cost->alu.arith_shift;
10410 break;
10411
10412 case GT:
10413 /* RSB Rd, Rn, Rn, ASR #1
10414 LSR Rd, Rd, #31. */
10415 *cost += COSTS_N_INSNS (1);
10416 if (speed_p)
10417 *cost += (extra_cost->alu.arith_shift
10418 + extra_cost->alu.shift);
10419 break;
10420
10421 case GE:
10422 /* ASR Rd, Rn, #31
10423 ADD Rd, Rn, #1. */
10424 *cost += COSTS_N_INSNS (1);
10425 if (speed_p)
10426 *cost += extra_cost->alu.shift;
10427 break;
10428
10429 default:
10430 /* Remaining cases are either meaningless or would take
10431 three insns anyway. */
10432 *cost = COSTS_N_INSNS (3);
10433 break;
10434 }
10435 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10436 return true;
10437 }
10438 else
10439 {
10440 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10441 if (CONST_INT_P (XEXP (x, 1))
10442 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10443 {
10444 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10445 return true;
10446 }
10447
10448 return false;
10449 }
10450 }
10451 /* Not directly inside a set. If it involves the condition code
10452 register it must be the condition for a branch, cond_exec or
10453 I_T_E operation. Since the comparison is performed elsewhere
10454 this is just the control part which has no additional
10455 cost. */
10456 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10457 && XEXP (x, 1) == const0_rtx)
10458 {
10459 *cost = 0;
10460 return true;
10461 }
10462 return false;
10463
10464 case ABS:
10465 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10466 && (mode == SFmode || !TARGET_VFP_SINGLE))
10467 {
10468 if (speed_p)
10469 *cost += extra_cost->fp[mode != SFmode].neg;
10470
10471 return false;
10472 }
10473 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10474 {
10475 *cost = LIBCALL_COST (1);
10476 return false;
10477 }
10478
10479 if (mode == SImode)
10480 {
10481 if (speed_p)
10482 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10483 return false;
10484 }
10485 /* Vector mode? */
10486 *cost = LIBCALL_COST (1);
10487 return false;
10488
10489 case SIGN_EXTEND:
10490 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10491 && MEM_P (XEXP (x, 0)))
10492 {
10493 if (mode == DImode)
10494 *cost += COSTS_N_INSNS (1);
10495
10496 if (!speed_p)
10497 return true;
10498
10499 if (GET_MODE (XEXP (x, 0)) == SImode)
10500 *cost += extra_cost->ldst.load;
10501 else
10502 *cost += extra_cost->ldst.load_sign_extend;
10503
10504 if (mode == DImode)
10505 *cost += extra_cost->alu.shift;
10506
10507 return true;
10508 }
10509
10510 /* Widening from less than 32-bits requires an extend operation. */
10511 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10512 {
10513 /* We have SXTB/SXTH. */
10514 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10515 if (speed_p)
10516 *cost += extra_cost->alu.extend;
10517 }
10518 else if (GET_MODE (XEXP (x, 0)) != SImode)
10519 {
10520 /* Needs two shifts. */
10521 *cost += COSTS_N_INSNS (1);
10522 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10523 if (speed_p)
10524 *cost += 2 * extra_cost->alu.shift;
10525 }
10526
10527 /* Widening beyond 32-bits requires one more insn. */
10528 if (mode == DImode)
10529 {
10530 *cost += COSTS_N_INSNS (1);
10531 if (speed_p)
10532 *cost += extra_cost->alu.shift;
10533 }
10534
10535 return true;
10536
10537 case ZERO_EXTEND:
10538 if ((arm_arch4
10539 || GET_MODE (XEXP (x, 0)) == SImode
10540 || GET_MODE (XEXP (x, 0)) == QImode)
10541 && MEM_P (XEXP (x, 0)))
10542 {
10543 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10544
10545 if (mode == DImode)
10546 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10547
10548 return true;
10549 }
10550
10551 /* Widening from less than 32-bits requires an extend operation. */
10552 if (GET_MODE (XEXP (x, 0)) == QImode)
10553 {
10554 /* UXTB can be a shorter instruction in Thumb2, but it might
10555 be slower than the AND Rd, Rn, #255 alternative. When
10556 optimizing for speed it should never be slower to use
10557 AND, and we don't really model 16-bit vs 32-bit insns
10558 here. */
10559 if (speed_p)
10560 *cost += extra_cost->alu.logical;
10561 }
10562 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10563 {
10564 /* We have UXTB/UXTH. */
10565 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10566 if (speed_p)
10567 *cost += extra_cost->alu.extend;
10568 }
10569 else if (GET_MODE (XEXP (x, 0)) != SImode)
10570 {
10571 /* Needs two shifts. It's marginally preferable to use
10572 shifts rather than two BIC instructions as the second
10573 shift may merge with a subsequent insn as a shifter
10574 op. */
10575 *cost = COSTS_N_INSNS (2);
10576 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10577 if (speed_p)
10578 *cost += 2 * extra_cost->alu.shift;
10579 }
10580
10581 /* Widening beyond 32-bits requires one more insn. */
10582 if (mode == DImode)
10583 {
10584 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10585 }
10586
10587 return true;
10588
10589 case CONST_INT:
10590 *cost = 0;
10591 /* CONST_INT has no mode, so we cannot tell for sure how many
10592 insns are really going to be needed. The best we can do is
10593 look at the value passed. If it fits in SImode, then assume
10594 that's the mode it will be used for. Otherwise assume it
10595 will be used in DImode. */
10596 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10597 mode = SImode;
10598 else
10599 mode = DImode;
10600
10601 /* Avoid blowing up in arm_gen_constant (). */
10602 if (!(outer_code == PLUS
10603 || outer_code == AND
10604 || outer_code == IOR
10605 || outer_code == XOR
10606 || outer_code == MINUS))
10607 outer_code = SET;
10608
10609 const_int_cost:
10610 if (mode == SImode)
10611 {
10612 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10613 INTVAL (x), NULL, NULL,
10614 0, 0));
10615 /* Extra costs? */
10616 }
10617 else
10618 {
10619 *cost += COSTS_N_INSNS (arm_gen_constant
10620 (outer_code, SImode, NULL,
10621 trunc_int_for_mode (INTVAL (x), SImode),
10622 NULL, NULL, 0, 0)
10623 + arm_gen_constant (outer_code, SImode, NULL,
10624 INTVAL (x) >> 32, NULL,
10625 NULL, 0, 0));
10626 /* Extra costs? */
10627 }
10628
10629 return true;
10630
10631 case CONST:
10632 case LABEL_REF:
10633 case SYMBOL_REF:
10634 if (speed_p)
10635 {
10636 if (arm_arch_thumb2 && !flag_pic)
10637 *cost += COSTS_N_INSNS (1);
10638 else
10639 *cost += extra_cost->ldst.load;
10640 }
10641 else
10642 *cost += COSTS_N_INSNS (1);
10643
10644 if (flag_pic)
10645 {
10646 *cost += COSTS_N_INSNS (1);
10647 if (speed_p)
10648 *cost += extra_cost->alu.arith;
10649 }
10650
10651 return true;
10652
10653 case CONST_FIXED:
10654 *cost = COSTS_N_INSNS (4);
10655 /* Fixme. */
10656 return true;
10657
10658 case CONST_DOUBLE:
10659 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10660 && (mode == SFmode || !TARGET_VFP_SINGLE))
10661 {
10662 if (vfp3_const_double_rtx (x))
10663 {
10664 if (speed_p)
10665 *cost += extra_cost->fp[mode == DFmode].fpconst;
10666 return true;
10667 }
10668
10669 if (speed_p)
10670 {
10671 if (mode == DFmode)
10672 *cost += extra_cost->ldst.loadd;
10673 else
10674 *cost += extra_cost->ldst.loadf;
10675 }
10676 else
10677 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10678
10679 return true;
10680 }
10681 *cost = COSTS_N_INSNS (4);
10682 return true;
10683
10684 case CONST_VECTOR:
10685 /* Fixme. */
10686 if (TARGET_NEON
10687 && TARGET_HARD_FLOAT
10688 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10689 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10690 *cost = COSTS_N_INSNS (1);
10691 else
10692 *cost = COSTS_N_INSNS (4);
10693 return true;
10694
10695 case HIGH:
10696 case LO_SUM:
10697 /* When optimizing for size, we prefer constant pool entries to
10698 MOVW/MOVT pairs, so bump the cost of these slightly. */
10699 if (!speed_p)
10700 *cost += 1;
10701 return true;
10702
10703 case CLZ:
10704 if (speed_p)
10705 *cost += extra_cost->alu.clz;
10706 return false;
10707
10708 case SMIN:
10709 if (XEXP (x, 1) == const0_rtx)
10710 {
10711 if (speed_p)
10712 *cost += extra_cost->alu.log_shift;
10713 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10714 return true;
10715 }
10716 /* Fall through. */
10717 case SMAX:
10718 case UMIN:
10719 case UMAX:
10720 *cost += COSTS_N_INSNS (1);
10721 return false;
10722
10723 case TRUNCATE:
10724 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10725 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10726 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10727 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10728 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10729 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10730 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10731 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10732 == ZERO_EXTEND))))
10733 {
10734 if (speed_p)
10735 *cost += extra_cost->mult[1].extend;
10736 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10737 ZERO_EXTEND, 0, speed_p)
10738 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10739 ZERO_EXTEND, 0, speed_p));
10740 return true;
10741 }
10742 *cost = LIBCALL_COST (1);
10743 return false;
10744
10745 case UNSPEC_VOLATILE:
10746 case UNSPEC:
10747 return arm_unspec_cost (x, outer_code, speed_p, cost);
10748
10749 case PC:
10750 /* Reading the PC is like reading any other register. Writing it
10751 is more expensive, but we take that into account elsewhere. */
10752 *cost = 0;
10753 return true;
10754
10755 case ZERO_EXTRACT:
10756 /* TODO: Simple zero_extract of bottom bits using AND. */
10757 /* Fall through. */
10758 case SIGN_EXTRACT:
10759 if (arm_arch6
10760 && mode == SImode
10761 && CONST_INT_P (XEXP (x, 1))
10762 && CONST_INT_P (XEXP (x, 2)))
10763 {
10764 if (speed_p)
10765 *cost += extra_cost->alu.bfx;
10766 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10767 return true;
10768 }
10769 /* Without UBFX/SBFX, need to resort to shift operations. */
10770 *cost += COSTS_N_INSNS (1);
10771 if (speed_p)
10772 *cost += 2 * extra_cost->alu.shift;
10773 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10774 return true;
10775
10776 case FLOAT_EXTEND:
10777 if (TARGET_HARD_FLOAT)
10778 {
10779 if (speed_p)
10780 *cost += extra_cost->fp[mode == DFmode].widen;
10781 if (!TARGET_VFP5
10782 && GET_MODE (XEXP (x, 0)) == HFmode)
10783 {
10784 /* Pre v8, widening HF->DF is a two-step process, first
10785 widening to SFmode. */
10786 *cost += COSTS_N_INSNS (1);
10787 if (speed_p)
10788 *cost += extra_cost->fp[0].widen;
10789 }
10790 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10791 return true;
10792 }
10793
10794 *cost = LIBCALL_COST (1);
10795 return false;
10796
10797 case FLOAT_TRUNCATE:
10798 if (TARGET_HARD_FLOAT)
10799 {
10800 if (speed_p)
10801 *cost += extra_cost->fp[mode == DFmode].narrow;
10802 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10803 return true;
10804 /* Vector modes? */
10805 }
10806 *cost = LIBCALL_COST (1);
10807 return false;
10808
10809 case FMA:
10810 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10811 {
10812 rtx op0 = XEXP (x, 0);
10813 rtx op1 = XEXP (x, 1);
10814 rtx op2 = XEXP (x, 2);
10815
10816
10817 /* vfms or vfnma. */
10818 if (GET_CODE (op0) == NEG)
10819 op0 = XEXP (op0, 0);
10820
10821 /* vfnms or vfnma. */
10822 if (GET_CODE (op2) == NEG)
10823 op2 = XEXP (op2, 0);
10824
10825 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10826 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10827 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10828
10829 if (speed_p)
10830 *cost += extra_cost->fp[mode ==DFmode].fma;
10831
10832 return true;
10833 }
10834
10835 *cost = LIBCALL_COST (3);
10836 return false;
10837
10838 case FIX:
10839 case UNSIGNED_FIX:
10840 if (TARGET_HARD_FLOAT)
10841 {
10842 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10843 a vcvt fixed-point conversion. */
10844 if (code == FIX && mode == SImode
10845 && GET_CODE (XEXP (x, 0)) == FIX
10846 && GET_MODE (XEXP (x, 0)) == SFmode
10847 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10848 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10849 > 0)
10850 {
10851 if (speed_p)
10852 *cost += extra_cost->fp[0].toint;
10853
10854 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10855 code, 0, speed_p);
10856 return true;
10857 }
10858
10859 if (GET_MODE_CLASS (mode) == MODE_INT)
10860 {
10861 mode = GET_MODE (XEXP (x, 0));
10862 if (speed_p)
10863 *cost += extra_cost->fp[mode == DFmode].toint;
10864 /* Strip of the 'cost' of rounding towards zero. */
10865 if (GET_CODE (XEXP (x, 0)) == FIX)
10866 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10867 0, speed_p);
10868 else
10869 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10870 /* ??? Increase the cost to deal with transferring from
10871 FP -> CORE registers? */
10872 return true;
10873 }
10874 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10875 && TARGET_VFP5)
10876 {
10877 if (speed_p)
10878 *cost += extra_cost->fp[mode == DFmode].roundint;
10879 return false;
10880 }
10881 /* Vector costs? */
10882 }
10883 *cost = LIBCALL_COST (1);
10884 return false;
10885
10886 case FLOAT:
10887 case UNSIGNED_FLOAT:
10888 if (TARGET_HARD_FLOAT)
10889 {
10890 /* ??? Increase the cost to deal with transferring from CORE
10891 -> FP registers? */
10892 if (speed_p)
10893 *cost += extra_cost->fp[mode == DFmode].fromint;
10894 return false;
10895 }
10896 *cost = LIBCALL_COST (1);
10897 return false;
10898
10899 case CALL:
10900 return true;
10901
10902 case ASM_OPERANDS:
10903 {
10904 /* Just a guess. Guess number of instructions in the asm
10905 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10906 though (see PR60663). */
10907 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10908 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10909
10910 *cost = COSTS_N_INSNS (asm_length + num_operands);
10911 return true;
10912 }
10913 default:
10914 if (mode != VOIDmode)
10915 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10916 else
10917 *cost = COSTS_N_INSNS (4); /* Who knows? */
10918 return false;
10919 }
10920 }
10921
10922 #undef HANDLE_NARROW_SHIFT_ARITH
10923
10924 /* RTX costs entry point. */
10925
10926 static bool
10927 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10928 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10929 {
10930 bool result;
10931 int code = GET_CODE (x);
10932 gcc_assert (current_tune->insn_extra_cost);
10933
10934 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10935 (enum rtx_code) outer_code,
10936 current_tune->insn_extra_cost,
10937 total, speed);
10938
10939 if (dump_file && (dump_flags & TDF_DETAILS))
10940 {
10941 print_rtl_single (dump_file, x);
10942 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10943 *total, result ? "final" : "partial");
10944 }
10945 return result;
10946 }
10947
10948 /* All address computations that can be done are free, but rtx cost returns
10949 the same for practically all of them. So we weight the different types
10950 of address here in the order (most pref first):
10951 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10952 static inline int
10953 arm_arm_address_cost (rtx x)
10954 {
10955 enum rtx_code c = GET_CODE (x);
10956
10957 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10958 return 0;
10959 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10960 return 10;
10961
10962 if (c == PLUS)
10963 {
10964 if (CONST_INT_P (XEXP (x, 1)))
10965 return 2;
10966
10967 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10968 return 3;
10969
10970 return 4;
10971 }
10972
10973 return 6;
10974 }
10975
10976 static inline int
10977 arm_thumb_address_cost (rtx x)
10978 {
10979 enum rtx_code c = GET_CODE (x);
10980
10981 if (c == REG)
10982 return 1;
10983 if (c == PLUS
10984 && REG_P (XEXP (x, 0))
10985 && CONST_INT_P (XEXP (x, 1)))
10986 return 1;
10987
10988 return 2;
10989 }
10990
10991 static int
10992 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10993 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10994 {
10995 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10996 }
10997
10998 /* Adjust cost hook for XScale. */
10999 static bool
11000 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11001 int * cost)
11002 {
11003 /* Some true dependencies can have a higher cost depending
11004 on precisely how certain input operands are used. */
11005 if (dep_type == 0
11006 && recog_memoized (insn) >= 0
11007 && recog_memoized (dep) >= 0)
11008 {
11009 int shift_opnum = get_attr_shift (insn);
11010 enum attr_type attr_type = get_attr_type (dep);
11011
11012 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11013 operand for INSN. If we have a shifted input operand and the
11014 instruction we depend on is another ALU instruction, then we may
11015 have to account for an additional stall. */
11016 if (shift_opnum != 0
11017 && (attr_type == TYPE_ALU_SHIFT_IMM
11018 || attr_type == TYPE_ALUS_SHIFT_IMM
11019 || attr_type == TYPE_LOGIC_SHIFT_IMM
11020 || attr_type == TYPE_LOGICS_SHIFT_IMM
11021 || attr_type == TYPE_ALU_SHIFT_REG
11022 || attr_type == TYPE_ALUS_SHIFT_REG
11023 || attr_type == TYPE_LOGIC_SHIFT_REG
11024 || attr_type == TYPE_LOGICS_SHIFT_REG
11025 || attr_type == TYPE_MOV_SHIFT
11026 || attr_type == TYPE_MVN_SHIFT
11027 || attr_type == TYPE_MOV_SHIFT_REG
11028 || attr_type == TYPE_MVN_SHIFT_REG))
11029 {
11030 rtx shifted_operand;
11031 int opno;
11032
11033 /* Get the shifted operand. */
11034 extract_insn (insn);
11035 shifted_operand = recog_data.operand[shift_opnum];
11036
11037 /* Iterate over all the operands in DEP. If we write an operand
11038 that overlaps with SHIFTED_OPERAND, then we have increase the
11039 cost of this dependency. */
11040 extract_insn (dep);
11041 preprocess_constraints (dep);
11042 for (opno = 0; opno < recog_data.n_operands; opno++)
11043 {
11044 /* We can ignore strict inputs. */
11045 if (recog_data.operand_type[opno] == OP_IN)
11046 continue;
11047
11048 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11049 shifted_operand))
11050 {
11051 *cost = 2;
11052 return false;
11053 }
11054 }
11055 }
11056 }
11057 return true;
11058 }
11059
11060 /* Adjust cost hook for Cortex A9. */
11061 static bool
11062 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11063 int * cost)
11064 {
11065 switch (dep_type)
11066 {
11067 case REG_DEP_ANTI:
11068 *cost = 0;
11069 return false;
11070
11071 case REG_DEP_TRUE:
11072 case REG_DEP_OUTPUT:
11073 if (recog_memoized (insn) >= 0
11074 && recog_memoized (dep) >= 0)
11075 {
11076 if (GET_CODE (PATTERN (insn)) == SET)
11077 {
11078 if (GET_MODE_CLASS
11079 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11080 || GET_MODE_CLASS
11081 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11082 {
11083 enum attr_type attr_type_insn = get_attr_type (insn);
11084 enum attr_type attr_type_dep = get_attr_type (dep);
11085
11086 /* By default all dependencies of the form
11087 s0 = s0 <op> s1
11088 s0 = s0 <op> s2
11089 have an extra latency of 1 cycle because
11090 of the input and output dependency in this
11091 case. However this gets modeled as an true
11092 dependency and hence all these checks. */
11093 if (REG_P (SET_DEST (PATTERN (insn)))
11094 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11095 {
11096 /* FMACS is a special case where the dependent
11097 instruction can be issued 3 cycles before
11098 the normal latency in case of an output
11099 dependency. */
11100 if ((attr_type_insn == TYPE_FMACS
11101 || attr_type_insn == TYPE_FMACD)
11102 && (attr_type_dep == TYPE_FMACS
11103 || attr_type_dep == TYPE_FMACD))
11104 {
11105 if (dep_type == REG_DEP_OUTPUT)
11106 *cost = insn_default_latency (dep) - 3;
11107 else
11108 *cost = insn_default_latency (dep);
11109 return false;
11110 }
11111 else
11112 {
11113 if (dep_type == REG_DEP_OUTPUT)
11114 *cost = insn_default_latency (dep) + 1;
11115 else
11116 *cost = insn_default_latency (dep);
11117 }
11118 return false;
11119 }
11120 }
11121 }
11122 }
11123 break;
11124
11125 default:
11126 gcc_unreachable ();
11127 }
11128
11129 return true;
11130 }
11131
11132 /* Adjust cost hook for FA726TE. */
11133 static bool
11134 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11135 int * cost)
11136 {
11137 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11138 have penalty of 3. */
11139 if (dep_type == REG_DEP_TRUE
11140 && recog_memoized (insn) >= 0
11141 && recog_memoized (dep) >= 0
11142 && get_attr_conds (dep) == CONDS_SET)
11143 {
11144 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11145 if (get_attr_conds (insn) == CONDS_USE
11146 && get_attr_type (insn) != TYPE_BRANCH)
11147 {
11148 *cost = 3;
11149 return false;
11150 }
11151
11152 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11153 || get_attr_conds (insn) == CONDS_USE)
11154 {
11155 *cost = 0;
11156 return false;
11157 }
11158 }
11159
11160 return true;
11161 }
11162
11163 /* Implement TARGET_REGISTER_MOVE_COST.
11164
11165 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11166 it is typically more expensive than a single memory access. We set
11167 the cost to less than two memory accesses so that floating
11168 point to integer conversion does not go through memory. */
11169
11170 int
11171 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11172 reg_class_t from, reg_class_t to)
11173 {
11174 if (TARGET_32BIT)
11175 {
11176 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11177 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11178 return 15;
11179 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11180 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11181 return 4;
11182 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11183 return 20;
11184 else
11185 return 2;
11186 }
11187 else
11188 {
11189 if (from == HI_REGS || to == HI_REGS)
11190 return 4;
11191 else
11192 return 2;
11193 }
11194 }
11195
11196 /* Implement TARGET_MEMORY_MOVE_COST. */
11197
11198 int
11199 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11200 bool in ATTRIBUTE_UNUSED)
11201 {
11202 if (TARGET_32BIT)
11203 return 10;
11204 else
11205 {
11206 if (GET_MODE_SIZE (mode) < 4)
11207 return 8;
11208 else
11209 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11210 }
11211 }
11212
11213 /* Vectorizer cost model implementation. */
11214
11215 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11216 static int
11217 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11218 tree vectype,
11219 int misalign ATTRIBUTE_UNUSED)
11220 {
11221 unsigned elements;
11222
11223 switch (type_of_cost)
11224 {
11225 case scalar_stmt:
11226 return current_tune->vec_costs->scalar_stmt_cost;
11227
11228 case scalar_load:
11229 return current_tune->vec_costs->scalar_load_cost;
11230
11231 case scalar_store:
11232 return current_tune->vec_costs->scalar_store_cost;
11233
11234 case vector_stmt:
11235 return current_tune->vec_costs->vec_stmt_cost;
11236
11237 case vector_load:
11238 return current_tune->vec_costs->vec_align_load_cost;
11239
11240 case vector_store:
11241 return current_tune->vec_costs->vec_store_cost;
11242
11243 case vec_to_scalar:
11244 return current_tune->vec_costs->vec_to_scalar_cost;
11245
11246 case scalar_to_vec:
11247 return current_tune->vec_costs->scalar_to_vec_cost;
11248
11249 case unaligned_load:
11250 return current_tune->vec_costs->vec_unalign_load_cost;
11251
11252 case unaligned_store:
11253 return current_tune->vec_costs->vec_unalign_store_cost;
11254
11255 case cond_branch_taken:
11256 return current_tune->vec_costs->cond_taken_branch_cost;
11257
11258 case cond_branch_not_taken:
11259 return current_tune->vec_costs->cond_not_taken_branch_cost;
11260
11261 case vec_perm:
11262 case vec_promote_demote:
11263 return current_tune->vec_costs->vec_stmt_cost;
11264
11265 case vec_construct:
11266 elements = TYPE_VECTOR_SUBPARTS (vectype);
11267 return elements / 2 + 1;
11268
11269 default:
11270 gcc_unreachable ();
11271 }
11272 }
11273
11274 /* Implement targetm.vectorize.add_stmt_cost. */
11275
11276 static unsigned
11277 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11278 struct _stmt_vec_info *stmt_info, int misalign,
11279 enum vect_cost_model_location where)
11280 {
11281 unsigned *cost = (unsigned *) data;
11282 unsigned retval = 0;
11283
11284 if (flag_vect_cost_model)
11285 {
11286 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11287 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11288
11289 /* Statements in an inner loop relative to the loop being
11290 vectorized are weighted more heavily. The value here is
11291 arbitrary and could potentially be improved with analysis. */
11292 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11293 count *= 50; /* FIXME. */
11294
11295 retval = (unsigned) (count * stmt_cost);
11296 cost[where] += retval;
11297 }
11298
11299 return retval;
11300 }
11301
11302 /* Return true if and only if this insn can dual-issue only as older. */
11303 static bool
11304 cortexa7_older_only (rtx_insn *insn)
11305 {
11306 if (recog_memoized (insn) < 0)
11307 return false;
11308
11309 switch (get_attr_type (insn))
11310 {
11311 case TYPE_ALU_DSP_REG:
11312 case TYPE_ALU_SREG:
11313 case TYPE_ALUS_SREG:
11314 case TYPE_LOGIC_REG:
11315 case TYPE_LOGICS_REG:
11316 case TYPE_ADC_REG:
11317 case TYPE_ADCS_REG:
11318 case TYPE_ADR:
11319 case TYPE_BFM:
11320 case TYPE_REV:
11321 case TYPE_MVN_REG:
11322 case TYPE_SHIFT_IMM:
11323 case TYPE_SHIFT_REG:
11324 case TYPE_LOAD_BYTE:
11325 case TYPE_LOAD_4:
11326 case TYPE_STORE_4:
11327 case TYPE_FFARITHS:
11328 case TYPE_FADDS:
11329 case TYPE_FFARITHD:
11330 case TYPE_FADDD:
11331 case TYPE_FMOV:
11332 case TYPE_F_CVT:
11333 case TYPE_FCMPS:
11334 case TYPE_FCMPD:
11335 case TYPE_FCONSTS:
11336 case TYPE_FCONSTD:
11337 case TYPE_FMULS:
11338 case TYPE_FMACS:
11339 case TYPE_FMULD:
11340 case TYPE_FMACD:
11341 case TYPE_FDIVS:
11342 case TYPE_FDIVD:
11343 case TYPE_F_MRC:
11344 case TYPE_F_MRRC:
11345 case TYPE_F_FLAG:
11346 case TYPE_F_LOADS:
11347 case TYPE_F_STORES:
11348 return true;
11349 default:
11350 return false;
11351 }
11352 }
11353
11354 /* Return true if and only if this insn can dual-issue as younger. */
11355 static bool
11356 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11357 {
11358 if (recog_memoized (insn) < 0)
11359 {
11360 if (verbose > 5)
11361 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11362 return false;
11363 }
11364
11365 switch (get_attr_type (insn))
11366 {
11367 case TYPE_ALU_IMM:
11368 case TYPE_ALUS_IMM:
11369 case TYPE_LOGIC_IMM:
11370 case TYPE_LOGICS_IMM:
11371 case TYPE_EXTEND:
11372 case TYPE_MVN_IMM:
11373 case TYPE_MOV_IMM:
11374 case TYPE_MOV_REG:
11375 case TYPE_MOV_SHIFT:
11376 case TYPE_MOV_SHIFT_REG:
11377 case TYPE_BRANCH:
11378 case TYPE_CALL:
11379 return true;
11380 default:
11381 return false;
11382 }
11383 }
11384
11385
11386 /* Look for an instruction that can dual issue only as an older
11387 instruction, and move it in front of any instructions that can
11388 dual-issue as younger, while preserving the relative order of all
11389 other instructions in the ready list. This is a hueuristic to help
11390 dual-issue in later cycles, by postponing issue of more flexible
11391 instructions. This heuristic may affect dual issue opportunities
11392 in the current cycle. */
11393 static void
11394 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11395 int *n_readyp, int clock)
11396 {
11397 int i;
11398 int first_older_only = -1, first_younger = -1;
11399
11400 if (verbose > 5)
11401 fprintf (file,
11402 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11403 clock,
11404 *n_readyp);
11405
11406 /* Traverse the ready list from the head (the instruction to issue
11407 first), and looking for the first instruction that can issue as
11408 younger and the first instruction that can dual-issue only as
11409 older. */
11410 for (i = *n_readyp - 1; i >= 0; i--)
11411 {
11412 rtx_insn *insn = ready[i];
11413 if (cortexa7_older_only (insn))
11414 {
11415 first_older_only = i;
11416 if (verbose > 5)
11417 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11418 break;
11419 }
11420 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11421 first_younger = i;
11422 }
11423
11424 /* Nothing to reorder because either no younger insn found or insn
11425 that can dual-issue only as older appears before any insn that
11426 can dual-issue as younger. */
11427 if (first_younger == -1)
11428 {
11429 if (verbose > 5)
11430 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11431 return;
11432 }
11433
11434 /* Nothing to reorder because no older-only insn in the ready list. */
11435 if (first_older_only == -1)
11436 {
11437 if (verbose > 5)
11438 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11439 return;
11440 }
11441
11442 /* Move first_older_only insn before first_younger. */
11443 if (verbose > 5)
11444 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11445 INSN_UID(ready [first_older_only]),
11446 INSN_UID(ready [first_younger]));
11447 rtx_insn *first_older_only_insn = ready [first_older_only];
11448 for (i = first_older_only; i < first_younger; i++)
11449 {
11450 ready[i] = ready[i+1];
11451 }
11452
11453 ready[i] = first_older_only_insn;
11454 return;
11455 }
11456
11457 /* Implement TARGET_SCHED_REORDER. */
11458 static int
11459 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11460 int clock)
11461 {
11462 switch (arm_tune)
11463 {
11464 case TARGET_CPU_cortexa7:
11465 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11466 break;
11467 default:
11468 /* Do nothing for other cores. */
11469 break;
11470 }
11471
11472 return arm_issue_rate ();
11473 }
11474
11475 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11476 It corrects the value of COST based on the relationship between
11477 INSN and DEP through the dependence LINK. It returns the new
11478 value. There is a per-core adjust_cost hook to adjust scheduler costs
11479 and the per-core hook can choose to completely override the generic
11480 adjust_cost function. Only put bits of code into arm_adjust_cost that
11481 are common across all cores. */
11482 static int
11483 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11484 unsigned int)
11485 {
11486 rtx i_pat, d_pat;
11487
11488 /* When generating Thumb-1 code, we want to place flag-setting operations
11489 close to a conditional branch which depends on them, so that we can
11490 omit the comparison. */
11491 if (TARGET_THUMB1
11492 && dep_type == 0
11493 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11494 && recog_memoized (dep) >= 0
11495 && get_attr_conds (dep) == CONDS_SET)
11496 return 0;
11497
11498 if (current_tune->sched_adjust_cost != NULL)
11499 {
11500 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11501 return cost;
11502 }
11503
11504 /* XXX Is this strictly true? */
11505 if (dep_type == REG_DEP_ANTI
11506 || dep_type == REG_DEP_OUTPUT)
11507 return 0;
11508
11509 /* Call insns don't incur a stall, even if they follow a load. */
11510 if (dep_type == 0
11511 && CALL_P (insn))
11512 return 1;
11513
11514 if ((i_pat = single_set (insn)) != NULL
11515 && MEM_P (SET_SRC (i_pat))
11516 && (d_pat = single_set (dep)) != NULL
11517 && MEM_P (SET_DEST (d_pat)))
11518 {
11519 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11520 /* This is a load after a store, there is no conflict if the load reads
11521 from a cached area. Assume that loads from the stack, and from the
11522 constant pool are cached, and that others will miss. This is a
11523 hack. */
11524
11525 if ((GET_CODE (src_mem) == SYMBOL_REF
11526 && CONSTANT_POOL_ADDRESS_P (src_mem))
11527 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11528 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11529 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11530 return 1;
11531 }
11532
11533 return cost;
11534 }
11535
11536 int
11537 arm_max_conditional_execute (void)
11538 {
11539 return max_insns_skipped;
11540 }
11541
11542 static int
11543 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11544 {
11545 if (TARGET_32BIT)
11546 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11547 else
11548 return (optimize > 0) ? 2 : 0;
11549 }
11550
11551 static int
11552 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11553 {
11554 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11555 }
11556
11557 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11558 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11559 sequences of non-executed instructions in IT blocks probably take the same
11560 amount of time as executed instructions (and the IT instruction itself takes
11561 space in icache). This function was experimentally determined to give good
11562 results on a popular embedded benchmark. */
11563
11564 static int
11565 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11566 {
11567 return (TARGET_32BIT && speed_p) ? 1
11568 : arm_default_branch_cost (speed_p, predictable_p);
11569 }
11570
11571 static int
11572 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11573 {
11574 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11575 }
11576
11577 static bool fp_consts_inited = false;
11578
11579 static REAL_VALUE_TYPE value_fp0;
11580
11581 static void
11582 init_fp_table (void)
11583 {
11584 REAL_VALUE_TYPE r;
11585
11586 r = REAL_VALUE_ATOF ("0", DFmode);
11587 value_fp0 = r;
11588 fp_consts_inited = true;
11589 }
11590
11591 /* Return TRUE if rtx X is a valid immediate FP constant. */
11592 int
11593 arm_const_double_rtx (rtx x)
11594 {
11595 const REAL_VALUE_TYPE *r;
11596
11597 if (!fp_consts_inited)
11598 init_fp_table ();
11599
11600 r = CONST_DOUBLE_REAL_VALUE (x);
11601 if (REAL_VALUE_MINUS_ZERO (*r))
11602 return 0;
11603
11604 if (real_equal (r, &value_fp0))
11605 return 1;
11606
11607 return 0;
11608 }
11609
11610 /* VFPv3 has a fairly wide range of representable immediates, formed from
11611 "quarter-precision" floating-point values. These can be evaluated using this
11612 formula (with ^ for exponentiation):
11613
11614 -1^s * n * 2^-r
11615
11616 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11617 16 <= n <= 31 and 0 <= r <= 7.
11618
11619 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11620
11621 - A (most-significant) is the sign bit.
11622 - BCD are the exponent (encoded as r XOR 3).
11623 - EFGH are the mantissa (encoded as n - 16).
11624 */
11625
11626 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11627 fconst[sd] instruction, or -1 if X isn't suitable. */
11628 static int
11629 vfp3_const_double_index (rtx x)
11630 {
11631 REAL_VALUE_TYPE r, m;
11632 int sign, exponent;
11633 unsigned HOST_WIDE_INT mantissa, mant_hi;
11634 unsigned HOST_WIDE_INT mask;
11635 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11636 bool fail;
11637
11638 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11639 return -1;
11640
11641 r = *CONST_DOUBLE_REAL_VALUE (x);
11642
11643 /* We can't represent these things, so detect them first. */
11644 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11645 return -1;
11646
11647 /* Extract sign, exponent and mantissa. */
11648 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11649 r = real_value_abs (&r);
11650 exponent = REAL_EXP (&r);
11651 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11652 highest (sign) bit, with a fixed binary point at bit point_pos.
11653 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11654 bits for the mantissa, this may fail (low bits would be lost). */
11655 real_ldexp (&m, &r, point_pos - exponent);
11656 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11657 mantissa = w.elt (0);
11658 mant_hi = w.elt (1);
11659
11660 /* If there are bits set in the low part of the mantissa, we can't
11661 represent this value. */
11662 if (mantissa != 0)
11663 return -1;
11664
11665 /* Now make it so that mantissa contains the most-significant bits, and move
11666 the point_pos to indicate that the least-significant bits have been
11667 discarded. */
11668 point_pos -= HOST_BITS_PER_WIDE_INT;
11669 mantissa = mant_hi;
11670
11671 /* We can permit four significant bits of mantissa only, plus a high bit
11672 which is always 1. */
11673 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11674 if ((mantissa & mask) != 0)
11675 return -1;
11676
11677 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11678 mantissa >>= point_pos - 5;
11679
11680 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11681 floating-point immediate zero with Neon using an integer-zero load, but
11682 that case is handled elsewhere.) */
11683 if (mantissa == 0)
11684 return -1;
11685
11686 gcc_assert (mantissa >= 16 && mantissa <= 31);
11687
11688 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11689 normalized significands are in the range [1, 2). (Our mantissa is shifted
11690 left 4 places at this point relative to normalized IEEE754 values). GCC
11691 internally uses [0.5, 1) (see real.c), so the exponent returned from
11692 REAL_EXP must be altered. */
11693 exponent = 5 - exponent;
11694
11695 if (exponent < 0 || exponent > 7)
11696 return -1;
11697
11698 /* Sign, mantissa and exponent are now in the correct form to plug into the
11699 formula described in the comment above. */
11700 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11701 }
11702
11703 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11704 int
11705 vfp3_const_double_rtx (rtx x)
11706 {
11707 if (!TARGET_VFP3)
11708 return 0;
11709
11710 return vfp3_const_double_index (x) != -1;
11711 }
11712
11713 /* Recognize immediates which can be used in various Neon instructions. Legal
11714 immediates are described by the following table (for VMVN variants, the
11715 bitwise inverse of the constant shown is recognized. In either case, VMOV
11716 is output and the correct instruction to use for a given constant is chosen
11717 by the assembler). The constant shown is replicated across all elements of
11718 the destination vector.
11719
11720 insn elems variant constant (binary)
11721 ---- ----- ------- -----------------
11722 vmov i32 0 00000000 00000000 00000000 abcdefgh
11723 vmov i32 1 00000000 00000000 abcdefgh 00000000
11724 vmov i32 2 00000000 abcdefgh 00000000 00000000
11725 vmov i32 3 abcdefgh 00000000 00000000 00000000
11726 vmov i16 4 00000000 abcdefgh
11727 vmov i16 5 abcdefgh 00000000
11728 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11729 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11730 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11731 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11732 vmvn i16 10 00000000 abcdefgh
11733 vmvn i16 11 abcdefgh 00000000
11734 vmov i32 12 00000000 00000000 abcdefgh 11111111
11735 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11736 vmov i32 14 00000000 abcdefgh 11111111 11111111
11737 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11738 vmov i8 16 abcdefgh
11739 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11740 eeeeeeee ffffffff gggggggg hhhhhhhh
11741 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11742 vmov f32 19 00000000 00000000 00000000 00000000
11743
11744 For case 18, B = !b. Representable values are exactly those accepted by
11745 vfp3_const_double_index, but are output as floating-point numbers rather
11746 than indices.
11747
11748 For case 19, we will change it to vmov.i32 when assembling.
11749
11750 Variants 0-5 (inclusive) may also be used as immediates for the second
11751 operand of VORR/VBIC instructions.
11752
11753 The INVERSE argument causes the bitwise inverse of the given operand to be
11754 recognized instead (used for recognizing legal immediates for the VAND/VORN
11755 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11756 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11757 output, rather than the real insns vbic/vorr).
11758
11759 INVERSE makes no difference to the recognition of float vectors.
11760
11761 The return value is the variant of immediate as shown in the above table, or
11762 -1 if the given value doesn't match any of the listed patterns.
11763 */
11764 static int
11765 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11766 rtx *modconst, int *elementwidth)
11767 {
11768 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11769 matches = 1; \
11770 for (i = 0; i < idx; i += (STRIDE)) \
11771 if (!(TEST)) \
11772 matches = 0; \
11773 if (matches) \
11774 { \
11775 immtype = (CLASS); \
11776 elsize = (ELSIZE); \
11777 break; \
11778 }
11779
11780 unsigned int i, elsize = 0, idx = 0, n_elts;
11781 unsigned int innersize;
11782 unsigned char bytes[16];
11783 int immtype = -1, matches;
11784 unsigned int invmask = inverse ? 0xff : 0;
11785 bool vector = GET_CODE (op) == CONST_VECTOR;
11786
11787 if (vector)
11788 n_elts = CONST_VECTOR_NUNITS (op);
11789 else
11790 {
11791 n_elts = 1;
11792 if (mode == VOIDmode)
11793 mode = DImode;
11794 }
11795
11796 innersize = GET_MODE_UNIT_SIZE (mode);
11797
11798 /* Vectors of float constants. */
11799 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11800 {
11801 rtx el0 = CONST_VECTOR_ELT (op, 0);
11802
11803 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11804 return -1;
11805
11806 /* FP16 vectors cannot be represented. */
11807 if (GET_MODE_INNER (mode) == HFmode)
11808 return -1;
11809
11810 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11811 are distinct in this context. */
11812 if (!const_vec_duplicate_p (op))
11813 return -1;
11814
11815 if (modconst)
11816 *modconst = CONST_VECTOR_ELT (op, 0);
11817
11818 if (elementwidth)
11819 *elementwidth = 0;
11820
11821 if (el0 == CONST0_RTX (GET_MODE (el0)))
11822 return 19;
11823 else
11824 return 18;
11825 }
11826
11827 /* The tricks done in the code below apply for little-endian vector layout.
11828 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11829 FIXME: Implement logic for big-endian vectors. */
11830 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11831 return -1;
11832
11833 /* Splat vector constant out into a byte vector. */
11834 for (i = 0; i < n_elts; i++)
11835 {
11836 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11837 unsigned HOST_WIDE_INT elpart;
11838
11839 gcc_assert (CONST_INT_P (el));
11840 elpart = INTVAL (el);
11841
11842 for (unsigned int byte = 0; byte < innersize; byte++)
11843 {
11844 bytes[idx++] = (elpart & 0xff) ^ invmask;
11845 elpart >>= BITS_PER_UNIT;
11846 }
11847 }
11848
11849 /* Sanity check. */
11850 gcc_assert (idx == GET_MODE_SIZE (mode));
11851
11852 do
11853 {
11854 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11855 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11856
11857 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11858 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11859
11860 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11861 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11862
11863 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11864 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11865
11866 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11867
11868 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11869
11870 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11871 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11872
11873 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11874 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11875
11876 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11877 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11878
11879 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11880 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11881
11882 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11883
11884 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11885
11886 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11887 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11888
11889 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11890 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11891
11892 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11893 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11894
11895 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11896 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11897
11898 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11899
11900 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11901 && bytes[i] == bytes[(i + 8) % idx]);
11902 }
11903 while (0);
11904
11905 if (immtype == -1)
11906 return -1;
11907
11908 if (elementwidth)
11909 *elementwidth = elsize;
11910
11911 if (modconst)
11912 {
11913 unsigned HOST_WIDE_INT imm = 0;
11914
11915 /* Un-invert bytes of recognized vector, if necessary. */
11916 if (invmask != 0)
11917 for (i = 0; i < idx; i++)
11918 bytes[i] ^= invmask;
11919
11920 if (immtype == 17)
11921 {
11922 /* FIXME: Broken on 32-bit H_W_I hosts. */
11923 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11924
11925 for (i = 0; i < 8; i++)
11926 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11927 << (i * BITS_PER_UNIT);
11928
11929 *modconst = GEN_INT (imm);
11930 }
11931 else
11932 {
11933 unsigned HOST_WIDE_INT imm = 0;
11934
11935 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11936 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11937
11938 *modconst = GEN_INT (imm);
11939 }
11940 }
11941
11942 return immtype;
11943 #undef CHECK
11944 }
11945
11946 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11947 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11948 float elements), and a modified constant (whatever should be output for a
11949 VMOV) in *MODCONST. */
11950
11951 int
11952 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11953 rtx *modconst, int *elementwidth)
11954 {
11955 rtx tmpconst;
11956 int tmpwidth;
11957 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11958
11959 if (retval == -1)
11960 return 0;
11961
11962 if (modconst)
11963 *modconst = tmpconst;
11964
11965 if (elementwidth)
11966 *elementwidth = tmpwidth;
11967
11968 return 1;
11969 }
11970
11971 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11972 the immediate is valid, write a constant suitable for using as an operand
11973 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11974 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11975
11976 int
11977 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11978 rtx *modconst, int *elementwidth)
11979 {
11980 rtx tmpconst;
11981 int tmpwidth;
11982 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11983
11984 if (retval < 0 || retval > 5)
11985 return 0;
11986
11987 if (modconst)
11988 *modconst = tmpconst;
11989
11990 if (elementwidth)
11991 *elementwidth = tmpwidth;
11992
11993 return 1;
11994 }
11995
11996 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11997 the immediate is valid, write a constant suitable for using as an operand
11998 to VSHR/VSHL to *MODCONST and the corresponding element width to
11999 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12000 because they have different limitations. */
12001
12002 int
12003 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12004 rtx *modconst, int *elementwidth,
12005 bool isleftshift)
12006 {
12007 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12008 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12009 unsigned HOST_WIDE_INT last_elt = 0;
12010 unsigned HOST_WIDE_INT maxshift;
12011
12012 /* Split vector constant out into a byte vector. */
12013 for (i = 0; i < n_elts; i++)
12014 {
12015 rtx el = CONST_VECTOR_ELT (op, i);
12016 unsigned HOST_WIDE_INT elpart;
12017
12018 if (CONST_INT_P (el))
12019 elpart = INTVAL (el);
12020 else if (CONST_DOUBLE_P (el))
12021 return 0;
12022 else
12023 gcc_unreachable ();
12024
12025 if (i != 0 && elpart != last_elt)
12026 return 0;
12027
12028 last_elt = elpart;
12029 }
12030
12031 /* Shift less than element size. */
12032 maxshift = innersize * 8;
12033
12034 if (isleftshift)
12035 {
12036 /* Left shift immediate value can be from 0 to <size>-1. */
12037 if (last_elt >= maxshift)
12038 return 0;
12039 }
12040 else
12041 {
12042 /* Right shift immediate value can be from 1 to <size>. */
12043 if (last_elt == 0 || last_elt > maxshift)
12044 return 0;
12045 }
12046
12047 if (elementwidth)
12048 *elementwidth = innersize * 8;
12049
12050 if (modconst)
12051 *modconst = CONST_VECTOR_ELT (op, 0);
12052
12053 return 1;
12054 }
12055
12056 /* Return a string suitable for output of Neon immediate logic operation
12057 MNEM. */
12058
12059 char *
12060 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12061 int inverse, int quad)
12062 {
12063 int width, is_valid;
12064 static char templ[40];
12065
12066 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12067
12068 gcc_assert (is_valid != 0);
12069
12070 if (quad)
12071 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12072 else
12073 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12074
12075 return templ;
12076 }
12077
12078 /* Return a string suitable for output of Neon immediate shift operation
12079 (VSHR or VSHL) MNEM. */
12080
12081 char *
12082 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12083 machine_mode mode, int quad,
12084 bool isleftshift)
12085 {
12086 int width, is_valid;
12087 static char templ[40];
12088
12089 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12090 gcc_assert (is_valid != 0);
12091
12092 if (quad)
12093 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12094 else
12095 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12096
12097 return templ;
12098 }
12099
12100 /* Output a sequence of pairwise operations to implement a reduction.
12101 NOTE: We do "too much work" here, because pairwise operations work on two
12102 registers-worth of operands in one go. Unfortunately we can't exploit those
12103 extra calculations to do the full operation in fewer steps, I don't think.
12104 Although all vector elements of the result but the first are ignored, we
12105 actually calculate the same result in each of the elements. An alternative
12106 such as initially loading a vector with zero to use as each of the second
12107 operands would use up an additional register and take an extra instruction,
12108 for no particular gain. */
12109
12110 void
12111 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12112 rtx (*reduc) (rtx, rtx, rtx))
12113 {
12114 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12115 rtx tmpsum = op1;
12116
12117 for (i = parts / 2; i >= 1; i /= 2)
12118 {
12119 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12120 emit_insn (reduc (dest, tmpsum, tmpsum));
12121 tmpsum = dest;
12122 }
12123 }
12124
12125 /* If VALS is a vector constant that can be loaded into a register
12126 using VDUP, generate instructions to do so and return an RTX to
12127 assign to the register. Otherwise return NULL_RTX. */
12128
12129 static rtx
12130 neon_vdup_constant (rtx vals)
12131 {
12132 machine_mode mode = GET_MODE (vals);
12133 machine_mode inner_mode = GET_MODE_INNER (mode);
12134 rtx x;
12135
12136 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12137 return NULL_RTX;
12138
12139 if (!const_vec_duplicate_p (vals, &x))
12140 /* The elements are not all the same. We could handle repeating
12141 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12142 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12143 vdup.i16). */
12144 return NULL_RTX;
12145
12146 /* We can load this constant by using VDUP and a constant in a
12147 single ARM register. This will be cheaper than a vector
12148 load. */
12149
12150 x = copy_to_mode_reg (inner_mode, x);
12151 return gen_rtx_VEC_DUPLICATE (mode, x);
12152 }
12153
12154 /* Generate code to load VALS, which is a PARALLEL containing only
12155 constants (for vec_init) or CONST_VECTOR, efficiently into a
12156 register. Returns an RTX to copy into the register, or NULL_RTX
12157 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12158
12159 rtx
12160 neon_make_constant (rtx vals)
12161 {
12162 machine_mode mode = GET_MODE (vals);
12163 rtx target;
12164 rtx const_vec = NULL_RTX;
12165 int n_elts = GET_MODE_NUNITS (mode);
12166 int n_const = 0;
12167 int i;
12168
12169 if (GET_CODE (vals) == CONST_VECTOR)
12170 const_vec = vals;
12171 else if (GET_CODE (vals) == PARALLEL)
12172 {
12173 /* A CONST_VECTOR must contain only CONST_INTs and
12174 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12175 Only store valid constants in a CONST_VECTOR. */
12176 for (i = 0; i < n_elts; ++i)
12177 {
12178 rtx x = XVECEXP (vals, 0, i);
12179 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12180 n_const++;
12181 }
12182 if (n_const == n_elts)
12183 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12184 }
12185 else
12186 gcc_unreachable ();
12187
12188 if (const_vec != NULL
12189 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12190 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12191 return const_vec;
12192 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12193 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12194 pipeline cycle; creating the constant takes one or two ARM
12195 pipeline cycles. */
12196 return target;
12197 else if (const_vec != NULL_RTX)
12198 /* Load from constant pool. On Cortex-A8 this takes two cycles
12199 (for either double or quad vectors). We can not take advantage
12200 of single-cycle VLD1 because we need a PC-relative addressing
12201 mode. */
12202 return const_vec;
12203 else
12204 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12205 We can not construct an initializer. */
12206 return NULL_RTX;
12207 }
12208
12209 /* Initialize vector TARGET to VALS. */
12210
12211 void
12212 neon_expand_vector_init (rtx target, rtx vals)
12213 {
12214 machine_mode mode = GET_MODE (target);
12215 machine_mode inner_mode = GET_MODE_INNER (mode);
12216 int n_elts = GET_MODE_NUNITS (mode);
12217 int n_var = 0, one_var = -1;
12218 bool all_same = true;
12219 rtx x, mem;
12220 int i;
12221
12222 for (i = 0; i < n_elts; ++i)
12223 {
12224 x = XVECEXP (vals, 0, i);
12225 if (!CONSTANT_P (x))
12226 ++n_var, one_var = i;
12227
12228 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12229 all_same = false;
12230 }
12231
12232 if (n_var == 0)
12233 {
12234 rtx constant = neon_make_constant (vals);
12235 if (constant != NULL_RTX)
12236 {
12237 emit_move_insn (target, constant);
12238 return;
12239 }
12240 }
12241
12242 /* Splat a single non-constant element if we can. */
12243 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12244 {
12245 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12246 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12247 return;
12248 }
12249
12250 /* One field is non-constant. Load constant then overwrite varying
12251 field. This is more efficient than using the stack. */
12252 if (n_var == 1)
12253 {
12254 rtx copy = copy_rtx (vals);
12255 rtx index = GEN_INT (one_var);
12256
12257 /* Load constant part of vector, substitute neighboring value for
12258 varying element. */
12259 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12260 neon_expand_vector_init (target, copy);
12261
12262 /* Insert variable. */
12263 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12264 switch (mode)
12265 {
12266 case E_V8QImode:
12267 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12268 break;
12269 case E_V16QImode:
12270 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12271 break;
12272 case E_V4HImode:
12273 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12274 break;
12275 case E_V8HImode:
12276 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12277 break;
12278 case E_V2SImode:
12279 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12280 break;
12281 case E_V4SImode:
12282 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12283 break;
12284 case E_V2SFmode:
12285 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12286 break;
12287 case E_V4SFmode:
12288 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12289 break;
12290 case E_V2DImode:
12291 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12292 break;
12293 default:
12294 gcc_unreachable ();
12295 }
12296 return;
12297 }
12298
12299 /* Construct the vector in memory one field at a time
12300 and load the whole vector. */
12301 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12302 for (i = 0; i < n_elts; i++)
12303 emit_move_insn (adjust_address_nv (mem, inner_mode,
12304 i * GET_MODE_SIZE (inner_mode)),
12305 XVECEXP (vals, 0, i));
12306 emit_move_insn (target, mem);
12307 }
12308
12309 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12310 ERR if it doesn't. EXP indicates the source location, which includes the
12311 inlining history for intrinsics. */
12312
12313 static void
12314 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12315 const_tree exp, const char *desc)
12316 {
12317 HOST_WIDE_INT lane;
12318
12319 gcc_assert (CONST_INT_P (operand));
12320
12321 lane = INTVAL (operand);
12322
12323 if (lane < low || lane >= high)
12324 {
12325 if (exp)
12326 error ("%K%s %wd out of range %wd - %wd",
12327 exp, desc, lane, low, high - 1);
12328 else
12329 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12330 }
12331 }
12332
12333 /* Bounds-check lanes. */
12334
12335 void
12336 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12337 const_tree exp)
12338 {
12339 bounds_check (operand, low, high, exp, "lane");
12340 }
12341
12342 /* Bounds-check constants. */
12343
12344 void
12345 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12346 {
12347 bounds_check (operand, low, high, NULL_TREE, "constant");
12348 }
12349
12350 HOST_WIDE_INT
12351 neon_element_bits (machine_mode mode)
12352 {
12353 return GET_MODE_UNIT_BITSIZE (mode);
12354 }
12355
12356 \f
12357 /* Predicates for `match_operand' and `match_operator'. */
12358
12359 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12360 WB is true if full writeback address modes are allowed and is false
12361 if limited writeback address modes (POST_INC and PRE_DEC) are
12362 allowed. */
12363
12364 int
12365 arm_coproc_mem_operand (rtx op, bool wb)
12366 {
12367 rtx ind;
12368
12369 /* Reject eliminable registers. */
12370 if (! (reload_in_progress || reload_completed || lra_in_progress)
12371 && ( reg_mentioned_p (frame_pointer_rtx, op)
12372 || reg_mentioned_p (arg_pointer_rtx, op)
12373 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12374 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12375 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12376 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12377 return FALSE;
12378
12379 /* Constants are converted into offsets from labels. */
12380 if (!MEM_P (op))
12381 return FALSE;
12382
12383 ind = XEXP (op, 0);
12384
12385 if (reload_completed
12386 && (GET_CODE (ind) == LABEL_REF
12387 || (GET_CODE (ind) == CONST
12388 && GET_CODE (XEXP (ind, 0)) == PLUS
12389 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12390 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12391 return TRUE;
12392
12393 /* Match: (mem (reg)). */
12394 if (REG_P (ind))
12395 return arm_address_register_rtx_p (ind, 0);
12396
12397 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12398 acceptable in any case (subject to verification by
12399 arm_address_register_rtx_p). We need WB to be true to accept
12400 PRE_INC and POST_DEC. */
12401 if (GET_CODE (ind) == POST_INC
12402 || GET_CODE (ind) == PRE_DEC
12403 || (wb
12404 && (GET_CODE (ind) == PRE_INC
12405 || GET_CODE (ind) == POST_DEC)))
12406 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12407
12408 if (wb
12409 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12410 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12411 && GET_CODE (XEXP (ind, 1)) == PLUS
12412 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12413 ind = XEXP (ind, 1);
12414
12415 /* Match:
12416 (plus (reg)
12417 (const)). */
12418 if (GET_CODE (ind) == PLUS
12419 && REG_P (XEXP (ind, 0))
12420 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12421 && CONST_INT_P (XEXP (ind, 1))
12422 && INTVAL (XEXP (ind, 1)) > -1024
12423 && INTVAL (XEXP (ind, 1)) < 1024
12424 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12425 return TRUE;
12426
12427 return FALSE;
12428 }
12429
12430 /* Return TRUE if OP is a memory operand which we can load or store a vector
12431 to/from. TYPE is one of the following values:
12432 0 - Vector load/stor (vldr)
12433 1 - Core registers (ldm)
12434 2 - Element/structure loads (vld1)
12435 */
12436 int
12437 neon_vector_mem_operand (rtx op, int type, bool strict)
12438 {
12439 rtx ind;
12440
12441 /* Reject eliminable registers. */
12442 if (strict && ! (reload_in_progress || reload_completed)
12443 && (reg_mentioned_p (frame_pointer_rtx, op)
12444 || reg_mentioned_p (arg_pointer_rtx, op)
12445 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12446 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12447 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12448 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12449 return FALSE;
12450
12451 /* Constants are converted into offsets from labels. */
12452 if (!MEM_P (op))
12453 return FALSE;
12454
12455 ind = XEXP (op, 0);
12456
12457 if (reload_completed
12458 && (GET_CODE (ind) == LABEL_REF
12459 || (GET_CODE (ind) == CONST
12460 && GET_CODE (XEXP (ind, 0)) == PLUS
12461 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12462 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12463 return TRUE;
12464
12465 /* Match: (mem (reg)). */
12466 if (REG_P (ind))
12467 return arm_address_register_rtx_p (ind, 0);
12468
12469 /* Allow post-increment with Neon registers. */
12470 if ((type != 1 && GET_CODE (ind) == POST_INC)
12471 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12472 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12473
12474 /* Allow post-increment by register for VLDn */
12475 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12476 && GET_CODE (XEXP (ind, 1)) == PLUS
12477 && REG_P (XEXP (XEXP (ind, 1), 1)))
12478 return true;
12479
12480 /* Match:
12481 (plus (reg)
12482 (const)). */
12483 if (type == 0
12484 && GET_CODE (ind) == PLUS
12485 && REG_P (XEXP (ind, 0))
12486 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12487 && CONST_INT_P (XEXP (ind, 1))
12488 && INTVAL (XEXP (ind, 1)) > -1024
12489 /* For quad modes, we restrict the constant offset to be slightly less
12490 than what the instruction format permits. We have no such constraint
12491 on double mode offsets. (This must match arm_legitimate_index_p.) */
12492 && (INTVAL (XEXP (ind, 1))
12493 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12494 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12495 return TRUE;
12496
12497 return FALSE;
12498 }
12499
12500 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12501 type. */
12502 int
12503 neon_struct_mem_operand (rtx op)
12504 {
12505 rtx ind;
12506
12507 /* Reject eliminable registers. */
12508 if (! (reload_in_progress || reload_completed)
12509 && ( reg_mentioned_p (frame_pointer_rtx, op)
12510 || reg_mentioned_p (arg_pointer_rtx, op)
12511 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12512 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12513 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12514 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12515 return FALSE;
12516
12517 /* Constants are converted into offsets from labels. */
12518 if (!MEM_P (op))
12519 return FALSE;
12520
12521 ind = XEXP (op, 0);
12522
12523 if (reload_completed
12524 && (GET_CODE (ind) == LABEL_REF
12525 || (GET_CODE (ind) == CONST
12526 && GET_CODE (XEXP (ind, 0)) == PLUS
12527 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12528 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12529 return TRUE;
12530
12531 /* Match: (mem (reg)). */
12532 if (REG_P (ind))
12533 return arm_address_register_rtx_p (ind, 0);
12534
12535 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12536 if (GET_CODE (ind) == POST_INC
12537 || GET_CODE (ind) == PRE_DEC)
12538 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12539
12540 return FALSE;
12541 }
12542
12543 /* Return true if X is a register that will be eliminated later on. */
12544 int
12545 arm_eliminable_register (rtx x)
12546 {
12547 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12548 || REGNO (x) == ARG_POINTER_REGNUM
12549 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12550 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12551 }
12552
12553 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12554 coprocessor registers. Otherwise return NO_REGS. */
12555
12556 enum reg_class
12557 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12558 {
12559 if (mode == HFmode)
12560 {
12561 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12562 return GENERAL_REGS;
12563 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12564 return NO_REGS;
12565 return GENERAL_REGS;
12566 }
12567
12568 /* The neon move patterns handle all legitimate vector and struct
12569 addresses. */
12570 if (TARGET_NEON
12571 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12572 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12573 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12574 || VALID_NEON_STRUCT_MODE (mode)))
12575 return NO_REGS;
12576
12577 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12578 return NO_REGS;
12579
12580 return GENERAL_REGS;
12581 }
12582
12583 /* Values which must be returned in the most-significant end of the return
12584 register. */
12585
12586 static bool
12587 arm_return_in_msb (const_tree valtype)
12588 {
12589 return (TARGET_AAPCS_BASED
12590 && BYTES_BIG_ENDIAN
12591 && (AGGREGATE_TYPE_P (valtype)
12592 || TREE_CODE (valtype) == COMPLEX_TYPE
12593 || FIXED_POINT_TYPE_P (valtype)));
12594 }
12595
12596 /* Return TRUE if X references a SYMBOL_REF. */
12597 int
12598 symbol_mentioned_p (rtx x)
12599 {
12600 const char * fmt;
12601 int i;
12602
12603 if (GET_CODE (x) == SYMBOL_REF)
12604 return 1;
12605
12606 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12607 are constant offsets, not symbols. */
12608 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12609 return 0;
12610
12611 fmt = GET_RTX_FORMAT (GET_CODE (x));
12612
12613 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12614 {
12615 if (fmt[i] == 'E')
12616 {
12617 int j;
12618
12619 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12620 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12621 return 1;
12622 }
12623 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12624 return 1;
12625 }
12626
12627 return 0;
12628 }
12629
12630 /* Return TRUE if X references a LABEL_REF. */
12631 int
12632 label_mentioned_p (rtx x)
12633 {
12634 const char * fmt;
12635 int i;
12636
12637 if (GET_CODE (x) == LABEL_REF)
12638 return 1;
12639
12640 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12641 instruction, but they are constant offsets, not symbols. */
12642 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12643 return 0;
12644
12645 fmt = GET_RTX_FORMAT (GET_CODE (x));
12646 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12647 {
12648 if (fmt[i] == 'E')
12649 {
12650 int j;
12651
12652 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12653 if (label_mentioned_p (XVECEXP (x, i, j)))
12654 return 1;
12655 }
12656 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12657 return 1;
12658 }
12659
12660 return 0;
12661 }
12662
12663 int
12664 tls_mentioned_p (rtx x)
12665 {
12666 switch (GET_CODE (x))
12667 {
12668 case CONST:
12669 return tls_mentioned_p (XEXP (x, 0));
12670
12671 case UNSPEC:
12672 if (XINT (x, 1) == UNSPEC_TLS)
12673 return 1;
12674
12675 /* Fall through. */
12676 default:
12677 return 0;
12678 }
12679 }
12680
12681 /* Must not copy any rtx that uses a pc-relative address.
12682 Also, disallow copying of load-exclusive instructions that
12683 may appear after splitting of compare-and-swap-style operations
12684 so as to prevent those loops from being transformed away from their
12685 canonical forms (see PR 69904). */
12686
12687 static bool
12688 arm_cannot_copy_insn_p (rtx_insn *insn)
12689 {
12690 /* The tls call insn cannot be copied, as it is paired with a data
12691 word. */
12692 if (recog_memoized (insn) == CODE_FOR_tlscall)
12693 return true;
12694
12695 subrtx_iterator::array_type array;
12696 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12697 {
12698 const_rtx x = *iter;
12699 if (GET_CODE (x) == UNSPEC
12700 && (XINT (x, 1) == UNSPEC_PIC_BASE
12701 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12702 return true;
12703 }
12704
12705 rtx set = single_set (insn);
12706 if (set)
12707 {
12708 rtx src = SET_SRC (set);
12709 if (GET_CODE (src) == ZERO_EXTEND)
12710 src = XEXP (src, 0);
12711
12712 /* Catch the load-exclusive and load-acquire operations. */
12713 if (GET_CODE (src) == UNSPEC_VOLATILE
12714 && (XINT (src, 1) == VUNSPEC_LL
12715 || XINT (src, 1) == VUNSPEC_LAX))
12716 return true;
12717 }
12718 return false;
12719 }
12720
12721 enum rtx_code
12722 minmax_code (rtx x)
12723 {
12724 enum rtx_code code = GET_CODE (x);
12725
12726 switch (code)
12727 {
12728 case SMAX:
12729 return GE;
12730 case SMIN:
12731 return LE;
12732 case UMIN:
12733 return LEU;
12734 case UMAX:
12735 return GEU;
12736 default:
12737 gcc_unreachable ();
12738 }
12739 }
12740
12741 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12742
12743 bool
12744 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12745 int *mask, bool *signed_sat)
12746 {
12747 /* The high bound must be a power of two minus one. */
12748 int log = exact_log2 (INTVAL (hi_bound) + 1);
12749 if (log == -1)
12750 return false;
12751
12752 /* The low bound is either zero (for usat) or one less than the
12753 negation of the high bound (for ssat). */
12754 if (INTVAL (lo_bound) == 0)
12755 {
12756 if (mask)
12757 *mask = log;
12758 if (signed_sat)
12759 *signed_sat = false;
12760
12761 return true;
12762 }
12763
12764 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12765 {
12766 if (mask)
12767 *mask = log + 1;
12768 if (signed_sat)
12769 *signed_sat = true;
12770
12771 return true;
12772 }
12773
12774 return false;
12775 }
12776
12777 /* Return 1 if memory locations are adjacent. */
12778 int
12779 adjacent_mem_locations (rtx a, rtx b)
12780 {
12781 /* We don't guarantee to preserve the order of these memory refs. */
12782 if (volatile_refs_p (a) || volatile_refs_p (b))
12783 return 0;
12784
12785 if ((REG_P (XEXP (a, 0))
12786 || (GET_CODE (XEXP (a, 0)) == PLUS
12787 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12788 && (REG_P (XEXP (b, 0))
12789 || (GET_CODE (XEXP (b, 0)) == PLUS
12790 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12791 {
12792 HOST_WIDE_INT val0 = 0, val1 = 0;
12793 rtx reg0, reg1;
12794 int val_diff;
12795
12796 if (GET_CODE (XEXP (a, 0)) == PLUS)
12797 {
12798 reg0 = XEXP (XEXP (a, 0), 0);
12799 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12800 }
12801 else
12802 reg0 = XEXP (a, 0);
12803
12804 if (GET_CODE (XEXP (b, 0)) == PLUS)
12805 {
12806 reg1 = XEXP (XEXP (b, 0), 0);
12807 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12808 }
12809 else
12810 reg1 = XEXP (b, 0);
12811
12812 /* Don't accept any offset that will require multiple
12813 instructions to handle, since this would cause the
12814 arith_adjacentmem pattern to output an overlong sequence. */
12815 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12816 return 0;
12817
12818 /* Don't allow an eliminable register: register elimination can make
12819 the offset too large. */
12820 if (arm_eliminable_register (reg0))
12821 return 0;
12822
12823 val_diff = val1 - val0;
12824
12825 if (arm_ld_sched)
12826 {
12827 /* If the target has load delay slots, then there's no benefit
12828 to using an ldm instruction unless the offset is zero and
12829 we are optimizing for size. */
12830 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12831 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12832 && (val_diff == 4 || val_diff == -4));
12833 }
12834
12835 return ((REGNO (reg0) == REGNO (reg1))
12836 && (val_diff == 4 || val_diff == -4));
12837 }
12838
12839 return 0;
12840 }
12841
12842 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12843 for load operations, false for store operations. CONSECUTIVE is true
12844 if the register numbers in the operation must be consecutive in the register
12845 bank. RETURN_PC is true if value is to be loaded in PC.
12846 The pattern we are trying to match for load is:
12847 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12848 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12849 :
12850 :
12851 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12852 ]
12853 where
12854 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12855 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12856 3. If consecutive is TRUE, then for kth register being loaded,
12857 REGNO (R_dk) = REGNO (R_d0) + k.
12858 The pattern for store is similar. */
12859 bool
12860 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12861 bool consecutive, bool return_pc)
12862 {
12863 HOST_WIDE_INT count = XVECLEN (op, 0);
12864 rtx reg, mem, addr;
12865 unsigned regno;
12866 unsigned first_regno;
12867 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12868 rtx elt;
12869 bool addr_reg_in_reglist = false;
12870 bool update = false;
12871 int reg_increment;
12872 int offset_adj;
12873 int regs_per_val;
12874
12875 /* If not in SImode, then registers must be consecutive
12876 (e.g., VLDM instructions for DFmode). */
12877 gcc_assert ((mode == SImode) || consecutive);
12878 /* Setting return_pc for stores is illegal. */
12879 gcc_assert (!return_pc || load);
12880
12881 /* Set up the increments and the regs per val based on the mode. */
12882 reg_increment = GET_MODE_SIZE (mode);
12883 regs_per_val = reg_increment / 4;
12884 offset_adj = return_pc ? 1 : 0;
12885
12886 if (count <= 1
12887 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12888 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12889 return false;
12890
12891 /* Check if this is a write-back. */
12892 elt = XVECEXP (op, 0, offset_adj);
12893 if (GET_CODE (SET_SRC (elt)) == PLUS)
12894 {
12895 i++;
12896 base = 1;
12897 update = true;
12898
12899 /* The offset adjustment must be the number of registers being
12900 popped times the size of a single register. */
12901 if (!REG_P (SET_DEST (elt))
12902 || !REG_P (XEXP (SET_SRC (elt), 0))
12903 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12904 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12905 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12906 ((count - 1 - offset_adj) * reg_increment))
12907 return false;
12908 }
12909
12910 i = i + offset_adj;
12911 base = base + offset_adj;
12912 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12913 success depends on the type: VLDM can do just one reg,
12914 LDM must do at least two. */
12915 if ((count <= i) && (mode == SImode))
12916 return false;
12917
12918 elt = XVECEXP (op, 0, i - 1);
12919 if (GET_CODE (elt) != SET)
12920 return false;
12921
12922 if (load)
12923 {
12924 reg = SET_DEST (elt);
12925 mem = SET_SRC (elt);
12926 }
12927 else
12928 {
12929 reg = SET_SRC (elt);
12930 mem = SET_DEST (elt);
12931 }
12932
12933 if (!REG_P (reg) || !MEM_P (mem))
12934 return false;
12935
12936 regno = REGNO (reg);
12937 first_regno = regno;
12938 addr = XEXP (mem, 0);
12939 if (GET_CODE (addr) == PLUS)
12940 {
12941 if (!CONST_INT_P (XEXP (addr, 1)))
12942 return false;
12943
12944 offset = INTVAL (XEXP (addr, 1));
12945 addr = XEXP (addr, 0);
12946 }
12947
12948 if (!REG_P (addr))
12949 return false;
12950
12951 /* Don't allow SP to be loaded unless it is also the base register. It
12952 guarantees that SP is reset correctly when an LDM instruction
12953 is interrupted. Otherwise, we might end up with a corrupt stack. */
12954 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12955 return false;
12956
12957 for (; i < count; i++)
12958 {
12959 elt = XVECEXP (op, 0, i);
12960 if (GET_CODE (elt) != SET)
12961 return false;
12962
12963 if (load)
12964 {
12965 reg = SET_DEST (elt);
12966 mem = SET_SRC (elt);
12967 }
12968 else
12969 {
12970 reg = SET_SRC (elt);
12971 mem = SET_DEST (elt);
12972 }
12973
12974 if (!REG_P (reg)
12975 || GET_MODE (reg) != mode
12976 || REGNO (reg) <= regno
12977 || (consecutive
12978 && (REGNO (reg) !=
12979 (unsigned int) (first_regno + regs_per_val * (i - base))))
12980 /* Don't allow SP to be loaded unless it is also the base register. It
12981 guarantees that SP is reset correctly when an LDM instruction
12982 is interrupted. Otherwise, we might end up with a corrupt stack. */
12983 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12984 || !MEM_P (mem)
12985 || GET_MODE (mem) != mode
12986 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12987 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12988 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12989 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12990 offset + (i - base) * reg_increment))
12991 && (!REG_P (XEXP (mem, 0))
12992 || offset + (i - base) * reg_increment != 0)))
12993 return false;
12994
12995 regno = REGNO (reg);
12996 if (regno == REGNO (addr))
12997 addr_reg_in_reglist = true;
12998 }
12999
13000 if (load)
13001 {
13002 if (update && addr_reg_in_reglist)
13003 return false;
13004
13005 /* For Thumb-1, address register is always modified - either by write-back
13006 or by explicit load. If the pattern does not describe an update,
13007 then the address register must be in the list of loaded registers. */
13008 if (TARGET_THUMB1)
13009 return update || addr_reg_in_reglist;
13010 }
13011
13012 return true;
13013 }
13014
13015 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13016 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13017 instruction. ADD_OFFSET is nonzero if the base address register needs
13018 to be modified with an add instruction before we can use it. */
13019
13020 static bool
13021 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13022 int nops, HOST_WIDE_INT add_offset)
13023 {
13024 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13025 if the offset isn't small enough. The reason 2 ldrs are faster
13026 is because these ARMs are able to do more than one cache access
13027 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13028 whilst the ARM8 has a double bandwidth cache. This means that
13029 these cores can do both an instruction fetch and a data fetch in
13030 a single cycle, so the trick of calculating the address into a
13031 scratch register (one of the result regs) and then doing a load
13032 multiple actually becomes slower (and no smaller in code size).
13033 That is the transformation
13034
13035 ldr rd1, [rbase + offset]
13036 ldr rd2, [rbase + offset + 4]
13037
13038 to
13039
13040 add rd1, rbase, offset
13041 ldmia rd1, {rd1, rd2}
13042
13043 produces worse code -- '3 cycles + any stalls on rd2' instead of
13044 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13045 access per cycle, the first sequence could never complete in less
13046 than 6 cycles, whereas the ldm sequence would only take 5 and
13047 would make better use of sequential accesses if not hitting the
13048 cache.
13049
13050 We cheat here and test 'arm_ld_sched' which we currently know to
13051 only be true for the ARM8, ARM9 and StrongARM. If this ever
13052 changes, then the test below needs to be reworked. */
13053 if (nops == 2 && arm_ld_sched && add_offset != 0)
13054 return false;
13055
13056 /* XScale has load-store double instructions, but they have stricter
13057 alignment requirements than load-store multiple, so we cannot
13058 use them.
13059
13060 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13061 the pipeline until completion.
13062
13063 NREGS CYCLES
13064 1 3
13065 2 4
13066 3 5
13067 4 6
13068
13069 An ldr instruction takes 1-3 cycles, but does not block the
13070 pipeline.
13071
13072 NREGS CYCLES
13073 1 1-3
13074 2 2-6
13075 3 3-9
13076 4 4-12
13077
13078 Best case ldr will always win. However, the more ldr instructions
13079 we issue, the less likely we are to be able to schedule them well.
13080 Using ldr instructions also increases code size.
13081
13082 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13083 for counts of 3 or 4 regs. */
13084 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13085 return false;
13086 return true;
13087 }
13088
13089 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13090 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13091 an array ORDER which describes the sequence to use when accessing the
13092 offsets that produces an ascending order. In this sequence, each
13093 offset must be larger by exactly 4 than the previous one. ORDER[0]
13094 must have been filled in with the lowest offset by the caller.
13095 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13096 we use to verify that ORDER produces an ascending order of registers.
13097 Return true if it was possible to construct such an order, false if
13098 not. */
13099
13100 static bool
13101 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13102 int *unsorted_regs)
13103 {
13104 int i;
13105 for (i = 1; i < nops; i++)
13106 {
13107 int j;
13108
13109 order[i] = order[i - 1];
13110 for (j = 0; j < nops; j++)
13111 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13112 {
13113 /* We must find exactly one offset that is higher than the
13114 previous one by 4. */
13115 if (order[i] != order[i - 1])
13116 return false;
13117 order[i] = j;
13118 }
13119 if (order[i] == order[i - 1])
13120 return false;
13121 /* The register numbers must be ascending. */
13122 if (unsorted_regs != NULL
13123 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13124 return false;
13125 }
13126 return true;
13127 }
13128
13129 /* Used to determine in a peephole whether a sequence of load
13130 instructions can be changed into a load-multiple instruction.
13131 NOPS is the number of separate load instructions we are examining. The
13132 first NOPS entries in OPERANDS are the destination registers, the
13133 next NOPS entries are memory operands. If this function is
13134 successful, *BASE is set to the common base register of the memory
13135 accesses; *LOAD_OFFSET is set to the first memory location's offset
13136 from that base register.
13137 REGS is an array filled in with the destination register numbers.
13138 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13139 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13140 the sequence of registers in REGS matches the loads from ascending memory
13141 locations, and the function verifies that the register numbers are
13142 themselves ascending. If CHECK_REGS is false, the register numbers
13143 are stored in the order they are found in the operands. */
13144 static int
13145 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13146 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13147 {
13148 int unsorted_regs[MAX_LDM_STM_OPS];
13149 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13150 int order[MAX_LDM_STM_OPS];
13151 rtx base_reg_rtx = NULL;
13152 int base_reg = -1;
13153 int i, ldm_case;
13154
13155 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13156 easily extended if required. */
13157 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13158
13159 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13160
13161 /* Loop over the operands and check that the memory references are
13162 suitable (i.e. immediate offsets from the same base register). At
13163 the same time, extract the target register, and the memory
13164 offsets. */
13165 for (i = 0; i < nops; i++)
13166 {
13167 rtx reg;
13168 rtx offset;
13169
13170 /* Convert a subreg of a mem into the mem itself. */
13171 if (GET_CODE (operands[nops + i]) == SUBREG)
13172 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13173
13174 gcc_assert (MEM_P (operands[nops + i]));
13175
13176 /* Don't reorder volatile memory references; it doesn't seem worth
13177 looking for the case where the order is ok anyway. */
13178 if (MEM_VOLATILE_P (operands[nops + i]))
13179 return 0;
13180
13181 offset = const0_rtx;
13182
13183 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13184 || (GET_CODE (reg) == SUBREG
13185 && REG_P (reg = SUBREG_REG (reg))))
13186 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13187 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13188 || (GET_CODE (reg) == SUBREG
13189 && REG_P (reg = SUBREG_REG (reg))))
13190 && (CONST_INT_P (offset
13191 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13192 {
13193 if (i == 0)
13194 {
13195 base_reg = REGNO (reg);
13196 base_reg_rtx = reg;
13197 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13198 return 0;
13199 }
13200 else if (base_reg != (int) REGNO (reg))
13201 /* Not addressed from the same base register. */
13202 return 0;
13203
13204 unsorted_regs[i] = (REG_P (operands[i])
13205 ? REGNO (operands[i])
13206 : REGNO (SUBREG_REG (operands[i])));
13207
13208 /* If it isn't an integer register, or if it overwrites the
13209 base register but isn't the last insn in the list, then
13210 we can't do this. */
13211 if (unsorted_regs[i] < 0
13212 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13213 || unsorted_regs[i] > 14
13214 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13215 return 0;
13216
13217 /* Don't allow SP to be loaded unless it is also the base
13218 register. It guarantees that SP is reset correctly when
13219 an LDM instruction is interrupted. Otherwise, we might
13220 end up with a corrupt stack. */
13221 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13222 return 0;
13223
13224 unsorted_offsets[i] = INTVAL (offset);
13225 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13226 order[0] = i;
13227 }
13228 else
13229 /* Not a suitable memory address. */
13230 return 0;
13231 }
13232
13233 /* All the useful information has now been extracted from the
13234 operands into unsorted_regs and unsorted_offsets; additionally,
13235 order[0] has been set to the lowest offset in the list. Sort
13236 the offsets into order, verifying that they are adjacent, and
13237 check that the register numbers are ascending. */
13238 if (!compute_offset_order (nops, unsorted_offsets, order,
13239 check_regs ? unsorted_regs : NULL))
13240 return 0;
13241
13242 if (saved_order)
13243 memcpy (saved_order, order, sizeof order);
13244
13245 if (base)
13246 {
13247 *base = base_reg;
13248
13249 for (i = 0; i < nops; i++)
13250 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13251
13252 *load_offset = unsorted_offsets[order[0]];
13253 }
13254
13255 if (TARGET_THUMB1
13256 && !peep2_reg_dead_p (nops, base_reg_rtx))
13257 return 0;
13258
13259 if (unsorted_offsets[order[0]] == 0)
13260 ldm_case = 1; /* ldmia */
13261 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13262 ldm_case = 2; /* ldmib */
13263 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13264 ldm_case = 3; /* ldmda */
13265 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13266 ldm_case = 4; /* ldmdb */
13267 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13268 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13269 ldm_case = 5;
13270 else
13271 return 0;
13272
13273 if (!multiple_operation_profitable_p (false, nops,
13274 ldm_case == 5
13275 ? unsorted_offsets[order[0]] : 0))
13276 return 0;
13277
13278 return ldm_case;
13279 }
13280
13281 /* Used to determine in a peephole whether a sequence of store instructions can
13282 be changed into a store-multiple instruction.
13283 NOPS is the number of separate store instructions we are examining.
13284 NOPS_TOTAL is the total number of instructions recognized by the peephole
13285 pattern.
13286 The first NOPS entries in OPERANDS are the source registers, the next
13287 NOPS entries are memory operands. If this function is successful, *BASE is
13288 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13289 to the first memory location's offset from that base register. REGS is an
13290 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13291 likewise filled with the corresponding rtx's.
13292 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13293 numbers to an ascending order of stores.
13294 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13295 from ascending memory locations, and the function verifies that the register
13296 numbers are themselves ascending. If CHECK_REGS is false, the register
13297 numbers are stored in the order they are found in the operands. */
13298 static int
13299 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13300 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13301 HOST_WIDE_INT *load_offset, bool check_regs)
13302 {
13303 int unsorted_regs[MAX_LDM_STM_OPS];
13304 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13305 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13306 int order[MAX_LDM_STM_OPS];
13307 int base_reg = -1;
13308 rtx base_reg_rtx = NULL;
13309 int i, stm_case;
13310
13311 /* Write back of base register is currently only supported for Thumb 1. */
13312 int base_writeback = TARGET_THUMB1;
13313
13314 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13315 easily extended if required. */
13316 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13317
13318 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13319
13320 /* Loop over the operands and check that the memory references are
13321 suitable (i.e. immediate offsets from the same base register). At
13322 the same time, extract the target register, and the memory
13323 offsets. */
13324 for (i = 0; i < nops; i++)
13325 {
13326 rtx reg;
13327 rtx offset;
13328
13329 /* Convert a subreg of a mem into the mem itself. */
13330 if (GET_CODE (operands[nops + i]) == SUBREG)
13331 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13332
13333 gcc_assert (MEM_P (operands[nops + i]));
13334
13335 /* Don't reorder volatile memory references; it doesn't seem worth
13336 looking for the case where the order is ok anyway. */
13337 if (MEM_VOLATILE_P (operands[nops + i]))
13338 return 0;
13339
13340 offset = const0_rtx;
13341
13342 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13343 || (GET_CODE (reg) == SUBREG
13344 && REG_P (reg = SUBREG_REG (reg))))
13345 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13346 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13347 || (GET_CODE (reg) == SUBREG
13348 && REG_P (reg = SUBREG_REG (reg))))
13349 && (CONST_INT_P (offset
13350 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13351 {
13352 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13353 ? operands[i] : SUBREG_REG (operands[i]));
13354 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13355
13356 if (i == 0)
13357 {
13358 base_reg = REGNO (reg);
13359 base_reg_rtx = reg;
13360 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13361 return 0;
13362 }
13363 else if (base_reg != (int) REGNO (reg))
13364 /* Not addressed from the same base register. */
13365 return 0;
13366
13367 /* If it isn't an integer register, then we can't do this. */
13368 if (unsorted_regs[i] < 0
13369 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13370 /* The effects are unpredictable if the base register is
13371 both updated and stored. */
13372 || (base_writeback && unsorted_regs[i] == base_reg)
13373 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13374 || unsorted_regs[i] > 14)
13375 return 0;
13376
13377 unsorted_offsets[i] = INTVAL (offset);
13378 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13379 order[0] = i;
13380 }
13381 else
13382 /* Not a suitable memory address. */
13383 return 0;
13384 }
13385
13386 /* All the useful information has now been extracted from the
13387 operands into unsorted_regs and unsorted_offsets; additionally,
13388 order[0] has been set to the lowest offset in the list. Sort
13389 the offsets into order, verifying that they are adjacent, and
13390 check that the register numbers are ascending. */
13391 if (!compute_offset_order (nops, unsorted_offsets, order,
13392 check_regs ? unsorted_regs : NULL))
13393 return 0;
13394
13395 if (saved_order)
13396 memcpy (saved_order, order, sizeof order);
13397
13398 if (base)
13399 {
13400 *base = base_reg;
13401
13402 for (i = 0; i < nops; i++)
13403 {
13404 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13405 if (reg_rtxs)
13406 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13407 }
13408
13409 *load_offset = unsorted_offsets[order[0]];
13410 }
13411
13412 if (TARGET_THUMB1
13413 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13414 return 0;
13415
13416 if (unsorted_offsets[order[0]] == 0)
13417 stm_case = 1; /* stmia */
13418 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13419 stm_case = 2; /* stmib */
13420 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13421 stm_case = 3; /* stmda */
13422 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13423 stm_case = 4; /* stmdb */
13424 else
13425 return 0;
13426
13427 if (!multiple_operation_profitable_p (false, nops, 0))
13428 return 0;
13429
13430 return stm_case;
13431 }
13432 \f
13433 /* Routines for use in generating RTL. */
13434
13435 /* Generate a load-multiple instruction. COUNT is the number of loads in
13436 the instruction; REGS and MEMS are arrays containing the operands.
13437 BASEREG is the base register to be used in addressing the memory operands.
13438 WBACK_OFFSET is nonzero if the instruction should update the base
13439 register. */
13440
13441 static rtx
13442 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13443 HOST_WIDE_INT wback_offset)
13444 {
13445 int i = 0, j;
13446 rtx result;
13447
13448 if (!multiple_operation_profitable_p (false, count, 0))
13449 {
13450 rtx seq;
13451
13452 start_sequence ();
13453
13454 for (i = 0; i < count; i++)
13455 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13456
13457 if (wback_offset != 0)
13458 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13459
13460 seq = get_insns ();
13461 end_sequence ();
13462
13463 return seq;
13464 }
13465
13466 result = gen_rtx_PARALLEL (VOIDmode,
13467 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13468 if (wback_offset != 0)
13469 {
13470 XVECEXP (result, 0, 0)
13471 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13472 i = 1;
13473 count++;
13474 }
13475
13476 for (j = 0; i < count; i++, j++)
13477 XVECEXP (result, 0, i)
13478 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13479
13480 return result;
13481 }
13482
13483 /* Generate a store-multiple instruction. COUNT is the number of stores in
13484 the instruction; REGS and MEMS are arrays containing the operands.
13485 BASEREG is the base register to be used in addressing the memory operands.
13486 WBACK_OFFSET is nonzero if the instruction should update the base
13487 register. */
13488
13489 static rtx
13490 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13491 HOST_WIDE_INT wback_offset)
13492 {
13493 int i = 0, j;
13494 rtx result;
13495
13496 if (GET_CODE (basereg) == PLUS)
13497 basereg = XEXP (basereg, 0);
13498
13499 if (!multiple_operation_profitable_p (false, count, 0))
13500 {
13501 rtx seq;
13502
13503 start_sequence ();
13504
13505 for (i = 0; i < count; i++)
13506 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13507
13508 if (wback_offset != 0)
13509 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13510
13511 seq = get_insns ();
13512 end_sequence ();
13513
13514 return seq;
13515 }
13516
13517 result = gen_rtx_PARALLEL (VOIDmode,
13518 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13519 if (wback_offset != 0)
13520 {
13521 XVECEXP (result, 0, 0)
13522 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13523 i = 1;
13524 count++;
13525 }
13526
13527 for (j = 0; i < count; i++, j++)
13528 XVECEXP (result, 0, i)
13529 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13530
13531 return result;
13532 }
13533
13534 /* Generate either a load-multiple or a store-multiple instruction. This
13535 function can be used in situations where we can start with a single MEM
13536 rtx and adjust its address upwards.
13537 COUNT is the number of operations in the instruction, not counting a
13538 possible update of the base register. REGS is an array containing the
13539 register operands.
13540 BASEREG is the base register to be used in addressing the memory operands,
13541 which are constructed from BASEMEM.
13542 WRITE_BACK specifies whether the generated instruction should include an
13543 update of the base register.
13544 OFFSETP is used to pass an offset to and from this function; this offset
13545 is not used when constructing the address (instead BASEMEM should have an
13546 appropriate offset in its address), it is used only for setting
13547 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13548
13549 static rtx
13550 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13551 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13552 {
13553 rtx mems[MAX_LDM_STM_OPS];
13554 HOST_WIDE_INT offset = *offsetp;
13555 int i;
13556
13557 gcc_assert (count <= MAX_LDM_STM_OPS);
13558
13559 if (GET_CODE (basereg) == PLUS)
13560 basereg = XEXP (basereg, 0);
13561
13562 for (i = 0; i < count; i++)
13563 {
13564 rtx addr = plus_constant (Pmode, basereg, i * 4);
13565 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13566 offset += 4;
13567 }
13568
13569 if (write_back)
13570 *offsetp = offset;
13571
13572 if (is_load)
13573 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13574 write_back ? 4 * count : 0);
13575 else
13576 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13577 write_back ? 4 * count : 0);
13578 }
13579
13580 rtx
13581 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13582 rtx basemem, HOST_WIDE_INT *offsetp)
13583 {
13584 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13585 offsetp);
13586 }
13587
13588 rtx
13589 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13590 rtx basemem, HOST_WIDE_INT *offsetp)
13591 {
13592 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13593 offsetp);
13594 }
13595
13596 /* Called from a peephole2 expander to turn a sequence of loads into an
13597 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13598 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13599 is true if we can reorder the registers because they are used commutatively
13600 subsequently.
13601 Returns true iff we could generate a new instruction. */
13602
13603 bool
13604 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13605 {
13606 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13607 rtx mems[MAX_LDM_STM_OPS];
13608 int i, j, base_reg;
13609 rtx base_reg_rtx;
13610 HOST_WIDE_INT offset;
13611 int write_back = FALSE;
13612 int ldm_case;
13613 rtx addr;
13614
13615 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13616 &base_reg, &offset, !sort_regs);
13617
13618 if (ldm_case == 0)
13619 return false;
13620
13621 if (sort_regs)
13622 for (i = 0; i < nops - 1; i++)
13623 for (j = i + 1; j < nops; j++)
13624 if (regs[i] > regs[j])
13625 {
13626 int t = regs[i];
13627 regs[i] = regs[j];
13628 regs[j] = t;
13629 }
13630 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13631
13632 if (TARGET_THUMB1)
13633 {
13634 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13635 gcc_assert (ldm_case == 1 || ldm_case == 5);
13636 write_back = TRUE;
13637 }
13638
13639 if (ldm_case == 5)
13640 {
13641 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13642 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13643 offset = 0;
13644 if (!TARGET_THUMB1)
13645 base_reg_rtx = newbase;
13646 }
13647
13648 for (i = 0; i < nops; i++)
13649 {
13650 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13651 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13652 SImode, addr, 0);
13653 }
13654 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13655 write_back ? offset + i * 4 : 0));
13656 return true;
13657 }
13658
13659 /* Called from a peephole2 expander to turn a sequence of stores into an
13660 STM instruction. OPERANDS are the operands found by the peephole matcher;
13661 NOPS indicates how many separate stores we are trying to combine.
13662 Returns true iff we could generate a new instruction. */
13663
13664 bool
13665 gen_stm_seq (rtx *operands, int nops)
13666 {
13667 int i;
13668 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13669 rtx mems[MAX_LDM_STM_OPS];
13670 int base_reg;
13671 rtx base_reg_rtx;
13672 HOST_WIDE_INT offset;
13673 int write_back = FALSE;
13674 int stm_case;
13675 rtx addr;
13676 bool base_reg_dies;
13677
13678 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13679 mem_order, &base_reg, &offset, true);
13680
13681 if (stm_case == 0)
13682 return false;
13683
13684 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13685
13686 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13687 if (TARGET_THUMB1)
13688 {
13689 gcc_assert (base_reg_dies);
13690 write_back = TRUE;
13691 }
13692
13693 if (stm_case == 5)
13694 {
13695 gcc_assert (base_reg_dies);
13696 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13697 offset = 0;
13698 }
13699
13700 addr = plus_constant (Pmode, base_reg_rtx, offset);
13701
13702 for (i = 0; i < nops; i++)
13703 {
13704 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13705 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13706 SImode, addr, 0);
13707 }
13708 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13709 write_back ? offset + i * 4 : 0));
13710 return true;
13711 }
13712
13713 /* Called from a peephole2 expander to turn a sequence of stores that are
13714 preceded by constant loads into an STM instruction. OPERANDS are the
13715 operands found by the peephole matcher; NOPS indicates how many
13716 separate stores we are trying to combine; there are 2 * NOPS
13717 instructions in the peephole.
13718 Returns true iff we could generate a new instruction. */
13719
13720 bool
13721 gen_const_stm_seq (rtx *operands, int nops)
13722 {
13723 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13724 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13725 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13726 rtx mems[MAX_LDM_STM_OPS];
13727 int base_reg;
13728 rtx base_reg_rtx;
13729 HOST_WIDE_INT offset;
13730 int write_back = FALSE;
13731 int stm_case;
13732 rtx addr;
13733 bool base_reg_dies;
13734 int i, j;
13735 HARD_REG_SET allocated;
13736
13737 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13738 mem_order, &base_reg, &offset, false);
13739
13740 if (stm_case == 0)
13741 return false;
13742
13743 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13744
13745 /* If the same register is used more than once, try to find a free
13746 register. */
13747 CLEAR_HARD_REG_SET (allocated);
13748 for (i = 0; i < nops; i++)
13749 {
13750 for (j = i + 1; j < nops; j++)
13751 if (regs[i] == regs[j])
13752 {
13753 rtx t = peep2_find_free_register (0, nops * 2,
13754 TARGET_THUMB1 ? "l" : "r",
13755 SImode, &allocated);
13756 if (t == NULL_RTX)
13757 return false;
13758 reg_rtxs[i] = t;
13759 regs[i] = REGNO (t);
13760 }
13761 }
13762
13763 /* Compute an ordering that maps the register numbers to an ascending
13764 sequence. */
13765 reg_order[0] = 0;
13766 for (i = 0; i < nops; i++)
13767 if (regs[i] < regs[reg_order[0]])
13768 reg_order[0] = i;
13769
13770 for (i = 1; i < nops; i++)
13771 {
13772 int this_order = reg_order[i - 1];
13773 for (j = 0; j < nops; j++)
13774 if (regs[j] > regs[reg_order[i - 1]]
13775 && (this_order == reg_order[i - 1]
13776 || regs[j] < regs[this_order]))
13777 this_order = j;
13778 reg_order[i] = this_order;
13779 }
13780
13781 /* Ensure that registers that must be live after the instruction end
13782 up with the correct value. */
13783 for (i = 0; i < nops; i++)
13784 {
13785 int this_order = reg_order[i];
13786 if ((this_order != mem_order[i]
13787 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13788 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13789 return false;
13790 }
13791
13792 /* Load the constants. */
13793 for (i = 0; i < nops; i++)
13794 {
13795 rtx op = operands[2 * nops + mem_order[i]];
13796 sorted_regs[i] = regs[reg_order[i]];
13797 emit_move_insn (reg_rtxs[reg_order[i]], op);
13798 }
13799
13800 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13801
13802 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13803 if (TARGET_THUMB1)
13804 {
13805 gcc_assert (base_reg_dies);
13806 write_back = TRUE;
13807 }
13808
13809 if (stm_case == 5)
13810 {
13811 gcc_assert (base_reg_dies);
13812 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13813 offset = 0;
13814 }
13815
13816 addr = plus_constant (Pmode, base_reg_rtx, offset);
13817
13818 for (i = 0; i < nops; i++)
13819 {
13820 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13821 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13822 SImode, addr, 0);
13823 }
13824 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13825 write_back ? offset + i * 4 : 0));
13826 return true;
13827 }
13828
13829 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13830 unaligned copies on processors which support unaligned semantics for those
13831 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13832 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13833 An interleave factor of 1 (the minimum) will perform no interleaving.
13834 Load/store multiple are used for aligned addresses where possible. */
13835
13836 static void
13837 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13838 HOST_WIDE_INT length,
13839 unsigned int interleave_factor)
13840 {
13841 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13842 int *regnos = XALLOCAVEC (int, interleave_factor);
13843 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13844 HOST_WIDE_INT i, j;
13845 HOST_WIDE_INT remaining = length, words;
13846 rtx halfword_tmp = NULL, byte_tmp = NULL;
13847 rtx dst, src;
13848 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13849 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13850 HOST_WIDE_INT srcoffset, dstoffset;
13851 HOST_WIDE_INT src_autoinc, dst_autoinc;
13852 rtx mem, addr;
13853
13854 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13855
13856 /* Use hard registers if we have aligned source or destination so we can use
13857 load/store multiple with contiguous registers. */
13858 if (dst_aligned || src_aligned)
13859 for (i = 0; i < interleave_factor; i++)
13860 regs[i] = gen_rtx_REG (SImode, i);
13861 else
13862 for (i = 0; i < interleave_factor; i++)
13863 regs[i] = gen_reg_rtx (SImode);
13864
13865 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13866 src = copy_addr_to_reg (XEXP (srcbase, 0));
13867
13868 srcoffset = dstoffset = 0;
13869
13870 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13871 For copying the last bytes we want to subtract this offset again. */
13872 src_autoinc = dst_autoinc = 0;
13873
13874 for (i = 0; i < interleave_factor; i++)
13875 regnos[i] = i;
13876
13877 /* Copy BLOCK_SIZE_BYTES chunks. */
13878
13879 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13880 {
13881 /* Load words. */
13882 if (src_aligned && interleave_factor > 1)
13883 {
13884 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13885 TRUE, srcbase, &srcoffset));
13886 src_autoinc += UNITS_PER_WORD * interleave_factor;
13887 }
13888 else
13889 {
13890 for (j = 0; j < interleave_factor; j++)
13891 {
13892 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13893 - src_autoinc));
13894 mem = adjust_automodify_address (srcbase, SImode, addr,
13895 srcoffset + j * UNITS_PER_WORD);
13896 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13897 }
13898 srcoffset += block_size_bytes;
13899 }
13900
13901 /* Store words. */
13902 if (dst_aligned && interleave_factor > 1)
13903 {
13904 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13905 TRUE, dstbase, &dstoffset));
13906 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13907 }
13908 else
13909 {
13910 for (j = 0; j < interleave_factor; j++)
13911 {
13912 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13913 - dst_autoinc));
13914 mem = adjust_automodify_address (dstbase, SImode, addr,
13915 dstoffset + j * UNITS_PER_WORD);
13916 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13917 }
13918 dstoffset += block_size_bytes;
13919 }
13920
13921 remaining -= block_size_bytes;
13922 }
13923
13924 /* Copy any whole words left (note these aren't interleaved with any
13925 subsequent halfword/byte load/stores in the interests of simplicity). */
13926
13927 words = remaining / UNITS_PER_WORD;
13928
13929 gcc_assert (words < interleave_factor);
13930
13931 if (src_aligned && words > 1)
13932 {
13933 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13934 &srcoffset));
13935 src_autoinc += UNITS_PER_WORD * words;
13936 }
13937 else
13938 {
13939 for (j = 0; j < words; j++)
13940 {
13941 addr = plus_constant (Pmode, src,
13942 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13943 mem = adjust_automodify_address (srcbase, SImode, addr,
13944 srcoffset + j * UNITS_PER_WORD);
13945 if (src_aligned)
13946 emit_move_insn (regs[j], mem);
13947 else
13948 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13949 }
13950 srcoffset += words * UNITS_PER_WORD;
13951 }
13952
13953 if (dst_aligned && words > 1)
13954 {
13955 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13956 &dstoffset));
13957 dst_autoinc += words * UNITS_PER_WORD;
13958 }
13959 else
13960 {
13961 for (j = 0; j < words; j++)
13962 {
13963 addr = plus_constant (Pmode, dst,
13964 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13965 mem = adjust_automodify_address (dstbase, SImode, addr,
13966 dstoffset + j * UNITS_PER_WORD);
13967 if (dst_aligned)
13968 emit_move_insn (mem, regs[j]);
13969 else
13970 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13971 }
13972 dstoffset += words * UNITS_PER_WORD;
13973 }
13974
13975 remaining -= words * UNITS_PER_WORD;
13976
13977 gcc_assert (remaining < 4);
13978
13979 /* Copy a halfword if necessary. */
13980
13981 if (remaining >= 2)
13982 {
13983 halfword_tmp = gen_reg_rtx (SImode);
13984
13985 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13986 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13987 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13988
13989 /* Either write out immediately, or delay until we've loaded the last
13990 byte, depending on interleave factor. */
13991 if (interleave_factor == 1)
13992 {
13993 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13994 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13995 emit_insn (gen_unaligned_storehi (mem,
13996 gen_lowpart (HImode, halfword_tmp)));
13997 halfword_tmp = NULL;
13998 dstoffset += 2;
13999 }
14000
14001 remaining -= 2;
14002 srcoffset += 2;
14003 }
14004
14005 gcc_assert (remaining < 2);
14006
14007 /* Copy last byte. */
14008
14009 if ((remaining & 1) != 0)
14010 {
14011 byte_tmp = gen_reg_rtx (SImode);
14012
14013 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14014 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14015 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14016
14017 if (interleave_factor == 1)
14018 {
14019 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14020 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14021 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14022 byte_tmp = NULL;
14023 dstoffset++;
14024 }
14025
14026 remaining--;
14027 srcoffset++;
14028 }
14029
14030 /* Store last halfword if we haven't done so already. */
14031
14032 if (halfword_tmp)
14033 {
14034 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14035 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14036 emit_insn (gen_unaligned_storehi (mem,
14037 gen_lowpart (HImode, halfword_tmp)));
14038 dstoffset += 2;
14039 }
14040
14041 /* Likewise for last byte. */
14042
14043 if (byte_tmp)
14044 {
14045 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14046 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14047 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14048 dstoffset++;
14049 }
14050
14051 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14052 }
14053
14054 /* From mips_adjust_block_mem:
14055
14056 Helper function for doing a loop-based block operation on memory
14057 reference MEM. Each iteration of the loop will operate on LENGTH
14058 bytes of MEM.
14059
14060 Create a new base register for use within the loop and point it to
14061 the start of MEM. Create a new memory reference that uses this
14062 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14063
14064 static void
14065 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14066 rtx *loop_mem)
14067 {
14068 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14069
14070 /* Although the new mem does not refer to a known location,
14071 it does keep up to LENGTH bytes of alignment. */
14072 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14073 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14074 }
14075
14076 /* From mips_block_move_loop:
14077
14078 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14079 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14080 the memory regions do not overlap. */
14081
14082 static void
14083 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14084 unsigned int interleave_factor,
14085 HOST_WIDE_INT bytes_per_iter)
14086 {
14087 rtx src_reg, dest_reg, final_src, test;
14088 HOST_WIDE_INT leftover;
14089
14090 leftover = length % bytes_per_iter;
14091 length -= leftover;
14092
14093 /* Create registers and memory references for use within the loop. */
14094 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14095 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14096
14097 /* Calculate the value that SRC_REG should have after the last iteration of
14098 the loop. */
14099 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14100 0, 0, OPTAB_WIDEN);
14101
14102 /* Emit the start of the loop. */
14103 rtx_code_label *label = gen_label_rtx ();
14104 emit_label (label);
14105
14106 /* Emit the loop body. */
14107 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14108 interleave_factor);
14109
14110 /* Move on to the next block. */
14111 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14112 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14113
14114 /* Emit the loop condition. */
14115 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14116 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14117
14118 /* Mop up any left-over bytes. */
14119 if (leftover)
14120 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14121 }
14122
14123 /* Emit a block move when either the source or destination is unaligned (not
14124 aligned to a four-byte boundary). This may need further tuning depending on
14125 core type, optimize_size setting, etc. */
14126
14127 static int
14128 arm_movmemqi_unaligned (rtx *operands)
14129 {
14130 HOST_WIDE_INT length = INTVAL (operands[2]);
14131
14132 if (optimize_size)
14133 {
14134 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14135 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14136 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14137 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14138 or dst_aligned though: allow more interleaving in those cases since the
14139 resulting code can be smaller. */
14140 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14141 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14142
14143 if (length > 12)
14144 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14145 interleave_factor, bytes_per_iter);
14146 else
14147 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14148 interleave_factor);
14149 }
14150 else
14151 {
14152 /* Note that the loop created by arm_block_move_unaligned_loop may be
14153 subject to loop unrolling, which makes tuning this condition a little
14154 redundant. */
14155 if (length > 32)
14156 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14157 else
14158 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14159 }
14160
14161 return 1;
14162 }
14163
14164 int
14165 arm_gen_movmemqi (rtx *operands)
14166 {
14167 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14168 HOST_WIDE_INT srcoffset, dstoffset;
14169 rtx src, dst, srcbase, dstbase;
14170 rtx part_bytes_reg = NULL;
14171 rtx mem;
14172
14173 if (!CONST_INT_P (operands[2])
14174 || !CONST_INT_P (operands[3])
14175 || INTVAL (operands[2]) > 64)
14176 return 0;
14177
14178 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14179 return arm_movmemqi_unaligned (operands);
14180
14181 if (INTVAL (operands[3]) & 3)
14182 return 0;
14183
14184 dstbase = operands[0];
14185 srcbase = operands[1];
14186
14187 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14188 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14189
14190 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14191 out_words_to_go = INTVAL (operands[2]) / 4;
14192 last_bytes = INTVAL (operands[2]) & 3;
14193 dstoffset = srcoffset = 0;
14194
14195 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14196 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14197
14198 while (in_words_to_go >= 2)
14199 {
14200 if (in_words_to_go > 4)
14201 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14202 TRUE, srcbase, &srcoffset));
14203 else
14204 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14205 src, FALSE, srcbase,
14206 &srcoffset));
14207
14208 if (out_words_to_go)
14209 {
14210 if (out_words_to_go > 4)
14211 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14212 TRUE, dstbase, &dstoffset));
14213 else if (out_words_to_go != 1)
14214 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14215 out_words_to_go, dst,
14216 (last_bytes == 0
14217 ? FALSE : TRUE),
14218 dstbase, &dstoffset));
14219 else
14220 {
14221 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14222 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14223 if (last_bytes != 0)
14224 {
14225 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14226 dstoffset += 4;
14227 }
14228 }
14229 }
14230
14231 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14232 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14233 }
14234
14235 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14236 if (out_words_to_go)
14237 {
14238 rtx sreg;
14239
14240 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14241 sreg = copy_to_reg (mem);
14242
14243 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14244 emit_move_insn (mem, sreg);
14245 in_words_to_go--;
14246
14247 gcc_assert (!in_words_to_go); /* Sanity check */
14248 }
14249
14250 if (in_words_to_go)
14251 {
14252 gcc_assert (in_words_to_go > 0);
14253
14254 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14255 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14256 }
14257
14258 gcc_assert (!last_bytes || part_bytes_reg);
14259
14260 if (BYTES_BIG_ENDIAN && last_bytes)
14261 {
14262 rtx tmp = gen_reg_rtx (SImode);
14263
14264 /* The bytes we want are in the top end of the word. */
14265 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14266 GEN_INT (8 * (4 - last_bytes))));
14267 part_bytes_reg = tmp;
14268
14269 while (last_bytes)
14270 {
14271 mem = adjust_automodify_address (dstbase, QImode,
14272 plus_constant (Pmode, dst,
14273 last_bytes - 1),
14274 dstoffset + last_bytes - 1);
14275 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14276
14277 if (--last_bytes)
14278 {
14279 tmp = gen_reg_rtx (SImode);
14280 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14281 part_bytes_reg = tmp;
14282 }
14283 }
14284
14285 }
14286 else
14287 {
14288 if (last_bytes > 1)
14289 {
14290 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14291 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14292 last_bytes -= 2;
14293 if (last_bytes)
14294 {
14295 rtx tmp = gen_reg_rtx (SImode);
14296 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14297 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14298 part_bytes_reg = tmp;
14299 dstoffset += 2;
14300 }
14301 }
14302
14303 if (last_bytes)
14304 {
14305 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14306 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14307 }
14308 }
14309
14310 return 1;
14311 }
14312
14313 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14314 by mode size. */
14315 inline static rtx
14316 next_consecutive_mem (rtx mem)
14317 {
14318 machine_mode mode = GET_MODE (mem);
14319 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14320 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14321
14322 return adjust_automodify_address (mem, mode, addr, offset);
14323 }
14324
14325 /* Copy using LDRD/STRD instructions whenever possible.
14326 Returns true upon success. */
14327 bool
14328 gen_movmem_ldrd_strd (rtx *operands)
14329 {
14330 unsigned HOST_WIDE_INT len;
14331 HOST_WIDE_INT align;
14332 rtx src, dst, base;
14333 rtx reg0;
14334 bool src_aligned, dst_aligned;
14335 bool src_volatile, dst_volatile;
14336
14337 gcc_assert (CONST_INT_P (operands[2]));
14338 gcc_assert (CONST_INT_P (operands[3]));
14339
14340 len = UINTVAL (operands[2]);
14341 if (len > 64)
14342 return false;
14343
14344 /* Maximum alignment we can assume for both src and dst buffers. */
14345 align = INTVAL (operands[3]);
14346
14347 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14348 return false;
14349
14350 /* Place src and dst addresses in registers
14351 and update the corresponding mem rtx. */
14352 dst = operands[0];
14353 dst_volatile = MEM_VOLATILE_P (dst);
14354 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14355 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14356 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14357
14358 src = operands[1];
14359 src_volatile = MEM_VOLATILE_P (src);
14360 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14361 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14362 src = adjust_automodify_address (src, VOIDmode, base, 0);
14363
14364 if (!unaligned_access && !(src_aligned && dst_aligned))
14365 return false;
14366
14367 if (src_volatile || dst_volatile)
14368 return false;
14369
14370 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14371 if (!(dst_aligned || src_aligned))
14372 return arm_gen_movmemqi (operands);
14373
14374 /* If the either src or dst is unaligned we'll be accessing it as pairs
14375 of unaligned SImode accesses. Otherwise we can generate DImode
14376 ldrd/strd instructions. */
14377 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14378 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14379
14380 while (len >= 8)
14381 {
14382 len -= 8;
14383 reg0 = gen_reg_rtx (DImode);
14384 rtx low_reg = NULL_RTX;
14385 rtx hi_reg = NULL_RTX;
14386
14387 if (!src_aligned || !dst_aligned)
14388 {
14389 low_reg = gen_lowpart (SImode, reg0);
14390 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14391 }
14392 if (src_aligned)
14393 emit_move_insn (reg0, src);
14394 else
14395 {
14396 emit_insn (gen_unaligned_loadsi (low_reg, src));
14397 src = next_consecutive_mem (src);
14398 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14399 }
14400
14401 if (dst_aligned)
14402 emit_move_insn (dst, reg0);
14403 else
14404 {
14405 emit_insn (gen_unaligned_storesi (dst, low_reg));
14406 dst = next_consecutive_mem (dst);
14407 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14408 }
14409
14410 src = next_consecutive_mem (src);
14411 dst = next_consecutive_mem (dst);
14412 }
14413
14414 gcc_assert (len < 8);
14415 if (len >= 4)
14416 {
14417 /* More than a word but less than a double-word to copy. Copy a word. */
14418 reg0 = gen_reg_rtx (SImode);
14419 src = adjust_address (src, SImode, 0);
14420 dst = adjust_address (dst, SImode, 0);
14421 if (src_aligned)
14422 emit_move_insn (reg0, src);
14423 else
14424 emit_insn (gen_unaligned_loadsi (reg0, src));
14425
14426 if (dst_aligned)
14427 emit_move_insn (dst, reg0);
14428 else
14429 emit_insn (gen_unaligned_storesi (dst, reg0));
14430
14431 src = next_consecutive_mem (src);
14432 dst = next_consecutive_mem (dst);
14433 len -= 4;
14434 }
14435
14436 if (len == 0)
14437 return true;
14438
14439 /* Copy the remaining bytes. */
14440 if (len >= 2)
14441 {
14442 dst = adjust_address (dst, HImode, 0);
14443 src = adjust_address (src, HImode, 0);
14444 reg0 = gen_reg_rtx (SImode);
14445 if (src_aligned)
14446 emit_insn (gen_zero_extendhisi2 (reg0, src));
14447 else
14448 emit_insn (gen_unaligned_loadhiu (reg0, src));
14449
14450 if (dst_aligned)
14451 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14452 else
14453 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14454
14455 src = next_consecutive_mem (src);
14456 dst = next_consecutive_mem (dst);
14457 if (len == 2)
14458 return true;
14459 }
14460
14461 dst = adjust_address (dst, QImode, 0);
14462 src = adjust_address (src, QImode, 0);
14463 reg0 = gen_reg_rtx (QImode);
14464 emit_move_insn (reg0, src);
14465 emit_move_insn (dst, reg0);
14466 return true;
14467 }
14468
14469 /* Select a dominance comparison mode if possible for a test of the general
14470 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14471 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14472 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14473 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14474 In all cases OP will be either EQ or NE, but we don't need to know which
14475 here. If we are unable to support a dominance comparison we return
14476 CC mode. This will then fail to match for the RTL expressions that
14477 generate this call. */
14478 machine_mode
14479 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14480 {
14481 enum rtx_code cond1, cond2;
14482 int swapped = 0;
14483
14484 /* Currently we will probably get the wrong result if the individual
14485 comparisons are not simple. This also ensures that it is safe to
14486 reverse a comparison if necessary. */
14487 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14488 != CCmode)
14489 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14490 != CCmode))
14491 return CCmode;
14492
14493 /* The if_then_else variant of this tests the second condition if the
14494 first passes, but is true if the first fails. Reverse the first
14495 condition to get a true "inclusive-or" expression. */
14496 if (cond_or == DOM_CC_NX_OR_Y)
14497 cond1 = reverse_condition (cond1);
14498
14499 /* If the comparisons are not equal, and one doesn't dominate the other,
14500 then we can't do this. */
14501 if (cond1 != cond2
14502 && !comparison_dominates_p (cond1, cond2)
14503 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14504 return CCmode;
14505
14506 if (swapped)
14507 std::swap (cond1, cond2);
14508
14509 switch (cond1)
14510 {
14511 case EQ:
14512 if (cond_or == DOM_CC_X_AND_Y)
14513 return CC_DEQmode;
14514
14515 switch (cond2)
14516 {
14517 case EQ: return CC_DEQmode;
14518 case LE: return CC_DLEmode;
14519 case LEU: return CC_DLEUmode;
14520 case GE: return CC_DGEmode;
14521 case GEU: return CC_DGEUmode;
14522 default: gcc_unreachable ();
14523 }
14524
14525 case LT:
14526 if (cond_or == DOM_CC_X_AND_Y)
14527 return CC_DLTmode;
14528
14529 switch (cond2)
14530 {
14531 case LT:
14532 return CC_DLTmode;
14533 case LE:
14534 return CC_DLEmode;
14535 case NE:
14536 return CC_DNEmode;
14537 default:
14538 gcc_unreachable ();
14539 }
14540
14541 case GT:
14542 if (cond_or == DOM_CC_X_AND_Y)
14543 return CC_DGTmode;
14544
14545 switch (cond2)
14546 {
14547 case GT:
14548 return CC_DGTmode;
14549 case GE:
14550 return CC_DGEmode;
14551 case NE:
14552 return CC_DNEmode;
14553 default:
14554 gcc_unreachable ();
14555 }
14556
14557 case LTU:
14558 if (cond_or == DOM_CC_X_AND_Y)
14559 return CC_DLTUmode;
14560
14561 switch (cond2)
14562 {
14563 case LTU:
14564 return CC_DLTUmode;
14565 case LEU:
14566 return CC_DLEUmode;
14567 case NE:
14568 return CC_DNEmode;
14569 default:
14570 gcc_unreachable ();
14571 }
14572
14573 case GTU:
14574 if (cond_or == DOM_CC_X_AND_Y)
14575 return CC_DGTUmode;
14576
14577 switch (cond2)
14578 {
14579 case GTU:
14580 return CC_DGTUmode;
14581 case GEU:
14582 return CC_DGEUmode;
14583 case NE:
14584 return CC_DNEmode;
14585 default:
14586 gcc_unreachable ();
14587 }
14588
14589 /* The remaining cases only occur when both comparisons are the
14590 same. */
14591 case NE:
14592 gcc_assert (cond1 == cond2);
14593 return CC_DNEmode;
14594
14595 case LE:
14596 gcc_assert (cond1 == cond2);
14597 return CC_DLEmode;
14598
14599 case GE:
14600 gcc_assert (cond1 == cond2);
14601 return CC_DGEmode;
14602
14603 case LEU:
14604 gcc_assert (cond1 == cond2);
14605 return CC_DLEUmode;
14606
14607 case GEU:
14608 gcc_assert (cond1 == cond2);
14609 return CC_DGEUmode;
14610
14611 default:
14612 gcc_unreachable ();
14613 }
14614 }
14615
14616 machine_mode
14617 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14618 {
14619 /* All floating point compares return CCFP if it is an equality
14620 comparison, and CCFPE otherwise. */
14621 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14622 {
14623 switch (op)
14624 {
14625 case EQ:
14626 case NE:
14627 case UNORDERED:
14628 case ORDERED:
14629 case UNLT:
14630 case UNLE:
14631 case UNGT:
14632 case UNGE:
14633 case UNEQ:
14634 case LTGT:
14635 return CCFPmode;
14636
14637 case LT:
14638 case LE:
14639 case GT:
14640 case GE:
14641 return CCFPEmode;
14642
14643 default:
14644 gcc_unreachable ();
14645 }
14646 }
14647
14648 /* A compare with a shifted operand. Because of canonicalization, the
14649 comparison will have to be swapped when we emit the assembler. */
14650 if (GET_MODE (y) == SImode
14651 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14652 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14653 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14654 || GET_CODE (x) == ROTATERT))
14655 return CC_SWPmode;
14656
14657 /* This operation is performed swapped, but since we only rely on the Z
14658 flag we don't need an additional mode. */
14659 if (GET_MODE (y) == SImode
14660 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14661 && GET_CODE (x) == NEG
14662 && (op == EQ || op == NE))
14663 return CC_Zmode;
14664
14665 /* This is a special case that is used by combine to allow a
14666 comparison of a shifted byte load to be split into a zero-extend
14667 followed by a comparison of the shifted integer (only valid for
14668 equalities and unsigned inequalities). */
14669 if (GET_MODE (x) == SImode
14670 && GET_CODE (x) == ASHIFT
14671 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14672 && GET_CODE (XEXP (x, 0)) == SUBREG
14673 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14674 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14675 && (op == EQ || op == NE
14676 || op == GEU || op == GTU || op == LTU || op == LEU)
14677 && CONST_INT_P (y))
14678 return CC_Zmode;
14679
14680 /* A construct for a conditional compare, if the false arm contains
14681 0, then both conditions must be true, otherwise either condition
14682 must be true. Not all conditions are possible, so CCmode is
14683 returned if it can't be done. */
14684 if (GET_CODE (x) == IF_THEN_ELSE
14685 && (XEXP (x, 2) == const0_rtx
14686 || XEXP (x, 2) == const1_rtx)
14687 && COMPARISON_P (XEXP (x, 0))
14688 && COMPARISON_P (XEXP (x, 1)))
14689 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14690 INTVAL (XEXP (x, 2)));
14691
14692 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14693 if (GET_CODE (x) == AND
14694 && (op == EQ || op == NE)
14695 && COMPARISON_P (XEXP (x, 0))
14696 && COMPARISON_P (XEXP (x, 1)))
14697 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14698 DOM_CC_X_AND_Y);
14699
14700 if (GET_CODE (x) == IOR
14701 && (op == EQ || op == NE)
14702 && COMPARISON_P (XEXP (x, 0))
14703 && COMPARISON_P (XEXP (x, 1)))
14704 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14705 DOM_CC_X_OR_Y);
14706
14707 /* An operation (on Thumb) where we want to test for a single bit.
14708 This is done by shifting that bit up into the top bit of a
14709 scratch register; we can then branch on the sign bit. */
14710 if (TARGET_THUMB1
14711 && GET_MODE (x) == SImode
14712 && (op == EQ || op == NE)
14713 && GET_CODE (x) == ZERO_EXTRACT
14714 && XEXP (x, 1) == const1_rtx)
14715 return CC_Nmode;
14716
14717 /* An operation that sets the condition codes as a side-effect, the
14718 V flag is not set correctly, so we can only use comparisons where
14719 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14720 instead.) */
14721 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14722 if (GET_MODE (x) == SImode
14723 && y == const0_rtx
14724 && (op == EQ || op == NE || op == LT || op == GE)
14725 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14726 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14727 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14728 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14729 || GET_CODE (x) == LSHIFTRT
14730 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14731 || GET_CODE (x) == ROTATERT
14732 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14733 return CC_NOOVmode;
14734
14735 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14736 return CC_Zmode;
14737
14738 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14739 && GET_CODE (x) == PLUS
14740 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14741 return CC_Cmode;
14742
14743 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14744 {
14745 switch (op)
14746 {
14747 case EQ:
14748 case NE:
14749 /* A DImode comparison against zero can be implemented by
14750 or'ing the two halves together. */
14751 if (y == const0_rtx)
14752 return CC_Zmode;
14753
14754 /* We can do an equality test in three Thumb instructions. */
14755 if (!TARGET_32BIT)
14756 return CC_Zmode;
14757
14758 /* FALLTHROUGH */
14759
14760 case LTU:
14761 case LEU:
14762 case GTU:
14763 case GEU:
14764 /* DImode unsigned comparisons can be implemented by cmp +
14765 cmpeq without a scratch register. Not worth doing in
14766 Thumb-2. */
14767 if (TARGET_32BIT)
14768 return CC_CZmode;
14769
14770 /* FALLTHROUGH */
14771
14772 case LT:
14773 case LE:
14774 case GT:
14775 case GE:
14776 /* DImode signed and unsigned comparisons can be implemented
14777 by cmp + sbcs with a scratch register, but that does not
14778 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14779 gcc_assert (op != EQ && op != NE);
14780 return CC_NCVmode;
14781
14782 default:
14783 gcc_unreachable ();
14784 }
14785 }
14786
14787 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14788 return GET_MODE (x);
14789
14790 return CCmode;
14791 }
14792
14793 /* X and Y are two things to compare using CODE. Emit the compare insn and
14794 return the rtx for register 0 in the proper mode. FP means this is a
14795 floating point compare: I don't think that it is needed on the arm. */
14796 rtx
14797 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14798 {
14799 machine_mode mode;
14800 rtx cc_reg;
14801 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14802
14803 /* We might have X as a constant, Y as a register because of the predicates
14804 used for cmpdi. If so, force X to a register here. */
14805 if (dimode_comparison && !REG_P (x))
14806 x = force_reg (DImode, x);
14807
14808 mode = SELECT_CC_MODE (code, x, y);
14809 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14810
14811 if (dimode_comparison
14812 && mode != CC_CZmode)
14813 {
14814 rtx clobber, set;
14815
14816 /* To compare two non-zero values for equality, XOR them and
14817 then compare against zero. Not used for ARM mode; there
14818 CC_CZmode is cheaper. */
14819 if (mode == CC_Zmode && y != const0_rtx)
14820 {
14821 gcc_assert (!reload_completed);
14822 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14823 y = const0_rtx;
14824 }
14825
14826 /* A scratch register is required. */
14827 if (reload_completed)
14828 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14829 else
14830 scratch = gen_rtx_SCRATCH (SImode);
14831
14832 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14833 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14834 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14835 }
14836 else
14837 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14838
14839 return cc_reg;
14840 }
14841
14842 /* Generate a sequence of insns that will generate the correct return
14843 address mask depending on the physical architecture that the program
14844 is running on. */
14845 rtx
14846 arm_gen_return_addr_mask (void)
14847 {
14848 rtx reg = gen_reg_rtx (Pmode);
14849
14850 emit_insn (gen_return_addr_mask (reg));
14851 return reg;
14852 }
14853
14854 void
14855 arm_reload_in_hi (rtx *operands)
14856 {
14857 rtx ref = operands[1];
14858 rtx base, scratch;
14859 HOST_WIDE_INT offset = 0;
14860
14861 if (GET_CODE (ref) == SUBREG)
14862 {
14863 offset = SUBREG_BYTE (ref);
14864 ref = SUBREG_REG (ref);
14865 }
14866
14867 if (REG_P (ref))
14868 {
14869 /* We have a pseudo which has been spilt onto the stack; there
14870 are two cases here: the first where there is a simple
14871 stack-slot replacement and a second where the stack-slot is
14872 out of range, or is used as a subreg. */
14873 if (reg_equiv_mem (REGNO (ref)))
14874 {
14875 ref = reg_equiv_mem (REGNO (ref));
14876 base = find_replacement (&XEXP (ref, 0));
14877 }
14878 else
14879 /* The slot is out of range, or was dressed up in a SUBREG. */
14880 base = reg_equiv_address (REGNO (ref));
14881
14882 /* PR 62554: If there is no equivalent memory location then just move
14883 the value as an SImode register move. This happens when the target
14884 architecture variant does not have an HImode register move. */
14885 if (base == NULL)
14886 {
14887 gcc_assert (REG_P (operands[0]));
14888 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14889 gen_rtx_SUBREG (SImode, ref, 0)));
14890 return;
14891 }
14892 }
14893 else
14894 base = find_replacement (&XEXP (ref, 0));
14895
14896 /* Handle the case where the address is too complex to be offset by 1. */
14897 if (GET_CODE (base) == MINUS
14898 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14899 {
14900 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14901
14902 emit_set_insn (base_plus, base);
14903 base = base_plus;
14904 }
14905 else if (GET_CODE (base) == PLUS)
14906 {
14907 /* The addend must be CONST_INT, or we would have dealt with it above. */
14908 HOST_WIDE_INT hi, lo;
14909
14910 offset += INTVAL (XEXP (base, 1));
14911 base = XEXP (base, 0);
14912
14913 /* Rework the address into a legal sequence of insns. */
14914 /* Valid range for lo is -4095 -> 4095 */
14915 lo = (offset >= 0
14916 ? (offset & 0xfff)
14917 : -((-offset) & 0xfff));
14918
14919 /* Corner case, if lo is the max offset then we would be out of range
14920 once we have added the additional 1 below, so bump the msb into the
14921 pre-loading insn(s). */
14922 if (lo == 4095)
14923 lo &= 0x7ff;
14924
14925 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14926 ^ (HOST_WIDE_INT) 0x80000000)
14927 - (HOST_WIDE_INT) 0x80000000);
14928
14929 gcc_assert (hi + lo == offset);
14930
14931 if (hi != 0)
14932 {
14933 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14934
14935 /* Get the base address; addsi3 knows how to handle constants
14936 that require more than one insn. */
14937 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14938 base = base_plus;
14939 offset = lo;
14940 }
14941 }
14942
14943 /* Operands[2] may overlap operands[0] (though it won't overlap
14944 operands[1]), that's why we asked for a DImode reg -- so we can
14945 use the bit that does not overlap. */
14946 if (REGNO (operands[2]) == REGNO (operands[0]))
14947 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14948 else
14949 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14950
14951 emit_insn (gen_zero_extendqisi2 (scratch,
14952 gen_rtx_MEM (QImode,
14953 plus_constant (Pmode, base,
14954 offset))));
14955 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14956 gen_rtx_MEM (QImode,
14957 plus_constant (Pmode, base,
14958 offset + 1))));
14959 if (!BYTES_BIG_ENDIAN)
14960 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14961 gen_rtx_IOR (SImode,
14962 gen_rtx_ASHIFT
14963 (SImode,
14964 gen_rtx_SUBREG (SImode, operands[0], 0),
14965 GEN_INT (8)),
14966 scratch));
14967 else
14968 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14969 gen_rtx_IOR (SImode,
14970 gen_rtx_ASHIFT (SImode, scratch,
14971 GEN_INT (8)),
14972 gen_rtx_SUBREG (SImode, operands[0], 0)));
14973 }
14974
14975 /* Handle storing a half-word to memory during reload by synthesizing as two
14976 byte stores. Take care not to clobber the input values until after we
14977 have moved them somewhere safe. This code assumes that if the DImode
14978 scratch in operands[2] overlaps either the input value or output address
14979 in some way, then that value must die in this insn (we absolutely need
14980 two scratch registers for some corner cases). */
14981 void
14982 arm_reload_out_hi (rtx *operands)
14983 {
14984 rtx ref = operands[0];
14985 rtx outval = operands[1];
14986 rtx base, scratch;
14987 HOST_WIDE_INT offset = 0;
14988
14989 if (GET_CODE (ref) == SUBREG)
14990 {
14991 offset = SUBREG_BYTE (ref);
14992 ref = SUBREG_REG (ref);
14993 }
14994
14995 if (REG_P (ref))
14996 {
14997 /* We have a pseudo which has been spilt onto the stack; there
14998 are two cases here: the first where there is a simple
14999 stack-slot replacement and a second where the stack-slot is
15000 out of range, or is used as a subreg. */
15001 if (reg_equiv_mem (REGNO (ref)))
15002 {
15003 ref = reg_equiv_mem (REGNO (ref));
15004 base = find_replacement (&XEXP (ref, 0));
15005 }
15006 else
15007 /* The slot is out of range, or was dressed up in a SUBREG. */
15008 base = reg_equiv_address (REGNO (ref));
15009
15010 /* PR 62254: If there is no equivalent memory location then just move
15011 the value as an SImode register move. This happens when the target
15012 architecture variant does not have an HImode register move. */
15013 if (base == NULL)
15014 {
15015 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15016
15017 if (REG_P (outval))
15018 {
15019 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15020 gen_rtx_SUBREG (SImode, outval, 0)));
15021 }
15022 else /* SUBREG_P (outval) */
15023 {
15024 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15025 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15026 SUBREG_REG (outval)));
15027 else
15028 /* FIXME: Handle other cases ? */
15029 gcc_unreachable ();
15030 }
15031 return;
15032 }
15033 }
15034 else
15035 base = find_replacement (&XEXP (ref, 0));
15036
15037 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15038
15039 /* Handle the case where the address is too complex to be offset by 1. */
15040 if (GET_CODE (base) == MINUS
15041 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15042 {
15043 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15044
15045 /* Be careful not to destroy OUTVAL. */
15046 if (reg_overlap_mentioned_p (base_plus, outval))
15047 {
15048 /* Updating base_plus might destroy outval, see if we can
15049 swap the scratch and base_plus. */
15050 if (!reg_overlap_mentioned_p (scratch, outval))
15051 std::swap (scratch, base_plus);
15052 else
15053 {
15054 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15055
15056 /* Be conservative and copy OUTVAL into the scratch now,
15057 this should only be necessary if outval is a subreg
15058 of something larger than a word. */
15059 /* XXX Might this clobber base? I can't see how it can,
15060 since scratch is known to overlap with OUTVAL, and
15061 must be wider than a word. */
15062 emit_insn (gen_movhi (scratch_hi, outval));
15063 outval = scratch_hi;
15064 }
15065 }
15066
15067 emit_set_insn (base_plus, base);
15068 base = base_plus;
15069 }
15070 else if (GET_CODE (base) == PLUS)
15071 {
15072 /* The addend must be CONST_INT, or we would have dealt with it above. */
15073 HOST_WIDE_INT hi, lo;
15074
15075 offset += INTVAL (XEXP (base, 1));
15076 base = XEXP (base, 0);
15077
15078 /* Rework the address into a legal sequence of insns. */
15079 /* Valid range for lo is -4095 -> 4095 */
15080 lo = (offset >= 0
15081 ? (offset & 0xfff)
15082 : -((-offset) & 0xfff));
15083
15084 /* Corner case, if lo is the max offset then we would be out of range
15085 once we have added the additional 1 below, so bump the msb into the
15086 pre-loading insn(s). */
15087 if (lo == 4095)
15088 lo &= 0x7ff;
15089
15090 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15091 ^ (HOST_WIDE_INT) 0x80000000)
15092 - (HOST_WIDE_INT) 0x80000000);
15093
15094 gcc_assert (hi + lo == offset);
15095
15096 if (hi != 0)
15097 {
15098 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15099
15100 /* Be careful not to destroy OUTVAL. */
15101 if (reg_overlap_mentioned_p (base_plus, outval))
15102 {
15103 /* Updating base_plus might destroy outval, see if we
15104 can swap the scratch and base_plus. */
15105 if (!reg_overlap_mentioned_p (scratch, outval))
15106 std::swap (scratch, base_plus);
15107 else
15108 {
15109 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15110
15111 /* Be conservative and copy outval into scratch now,
15112 this should only be necessary if outval is a
15113 subreg of something larger than a word. */
15114 /* XXX Might this clobber base? I can't see how it
15115 can, since scratch is known to overlap with
15116 outval. */
15117 emit_insn (gen_movhi (scratch_hi, outval));
15118 outval = scratch_hi;
15119 }
15120 }
15121
15122 /* Get the base address; addsi3 knows how to handle constants
15123 that require more than one insn. */
15124 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15125 base = base_plus;
15126 offset = lo;
15127 }
15128 }
15129
15130 if (BYTES_BIG_ENDIAN)
15131 {
15132 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15133 plus_constant (Pmode, base,
15134 offset + 1)),
15135 gen_lowpart (QImode, outval)));
15136 emit_insn (gen_lshrsi3 (scratch,
15137 gen_rtx_SUBREG (SImode, outval, 0),
15138 GEN_INT (8)));
15139 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15140 offset)),
15141 gen_lowpart (QImode, scratch)));
15142 }
15143 else
15144 {
15145 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15146 offset)),
15147 gen_lowpart (QImode, outval)));
15148 emit_insn (gen_lshrsi3 (scratch,
15149 gen_rtx_SUBREG (SImode, outval, 0),
15150 GEN_INT (8)));
15151 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15152 plus_constant (Pmode, base,
15153 offset + 1)),
15154 gen_lowpart (QImode, scratch)));
15155 }
15156 }
15157
15158 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15159 (padded to the size of a word) should be passed in a register. */
15160
15161 static bool
15162 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15163 {
15164 if (TARGET_AAPCS_BASED)
15165 return must_pass_in_stack_var_size (mode, type);
15166 else
15167 return must_pass_in_stack_var_size_or_pad (mode, type);
15168 }
15169
15170
15171 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15172 byte of a stack argument has useful data. For legacy APCS ABIs we use
15173 the default. For AAPCS based ABIs small aggregate types are placed
15174 in the lowest memory address. */
15175
15176 static pad_direction
15177 arm_function_arg_padding (machine_mode mode, const_tree type)
15178 {
15179 if (!TARGET_AAPCS_BASED)
15180 return default_function_arg_padding (mode, type);
15181
15182 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15183 return PAD_DOWNWARD;
15184
15185 return PAD_UPWARD;
15186 }
15187
15188
15189 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15190 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15191 register has useful data, and return the opposite if the most
15192 significant byte does. */
15193
15194 bool
15195 arm_pad_reg_upward (machine_mode mode,
15196 tree type, int first ATTRIBUTE_UNUSED)
15197 {
15198 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15199 {
15200 /* For AAPCS, small aggregates, small fixed-point types,
15201 and small complex types are always padded upwards. */
15202 if (type)
15203 {
15204 if ((AGGREGATE_TYPE_P (type)
15205 || TREE_CODE (type) == COMPLEX_TYPE
15206 || FIXED_POINT_TYPE_P (type))
15207 && int_size_in_bytes (type) <= 4)
15208 return true;
15209 }
15210 else
15211 {
15212 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15213 && GET_MODE_SIZE (mode) <= 4)
15214 return true;
15215 }
15216 }
15217
15218 /* Otherwise, use default padding. */
15219 return !BYTES_BIG_ENDIAN;
15220 }
15221
15222 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15223 assuming that the address in the base register is word aligned. */
15224 bool
15225 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15226 {
15227 HOST_WIDE_INT max_offset;
15228
15229 /* Offset must be a multiple of 4 in Thumb mode. */
15230 if (TARGET_THUMB2 && ((offset & 3) != 0))
15231 return false;
15232
15233 if (TARGET_THUMB2)
15234 max_offset = 1020;
15235 else if (TARGET_ARM)
15236 max_offset = 255;
15237 else
15238 return false;
15239
15240 return ((offset <= max_offset) && (offset >= -max_offset));
15241 }
15242
15243 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15244 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15245 Assumes that the address in the base register RN is word aligned. Pattern
15246 guarantees that both memory accesses use the same base register,
15247 the offsets are constants within the range, and the gap between the offsets is 4.
15248 If preload complete then check that registers are legal. WBACK indicates whether
15249 address is updated. LOAD indicates whether memory access is load or store. */
15250 bool
15251 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15252 bool wback, bool load)
15253 {
15254 unsigned int t, t2, n;
15255
15256 if (!reload_completed)
15257 return true;
15258
15259 if (!offset_ok_for_ldrd_strd (offset))
15260 return false;
15261
15262 t = REGNO (rt);
15263 t2 = REGNO (rt2);
15264 n = REGNO (rn);
15265
15266 if ((TARGET_THUMB2)
15267 && ((wback && (n == t || n == t2))
15268 || (t == SP_REGNUM)
15269 || (t == PC_REGNUM)
15270 || (t2 == SP_REGNUM)
15271 || (t2 == PC_REGNUM)
15272 || (!load && (n == PC_REGNUM))
15273 || (load && (t == t2))
15274 /* Triggers Cortex-M3 LDRD errata. */
15275 || (!wback && load && fix_cm3_ldrd && (n == t))))
15276 return false;
15277
15278 if ((TARGET_ARM)
15279 && ((wback && (n == t || n == t2))
15280 || (t2 == PC_REGNUM)
15281 || (t % 2 != 0) /* First destination register is not even. */
15282 || (t2 != t + 1)
15283 /* PC can be used as base register (for offset addressing only),
15284 but it is depricated. */
15285 || (n == PC_REGNUM)))
15286 return false;
15287
15288 return true;
15289 }
15290
15291 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15292 operand MEM's address contains an immediate offset from the base
15293 register and has no side effects, in which case it sets BASE and
15294 OFFSET accordingly. */
15295 static bool
15296 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15297 {
15298 rtx addr;
15299
15300 gcc_assert (base != NULL && offset != NULL);
15301
15302 /* TODO: Handle more general memory operand patterns, such as
15303 PRE_DEC and PRE_INC. */
15304
15305 if (side_effects_p (mem))
15306 return false;
15307
15308 /* Can't deal with subregs. */
15309 if (GET_CODE (mem) == SUBREG)
15310 return false;
15311
15312 gcc_assert (MEM_P (mem));
15313
15314 *offset = const0_rtx;
15315
15316 addr = XEXP (mem, 0);
15317
15318 /* If addr isn't valid for DImode, then we can't handle it. */
15319 if (!arm_legitimate_address_p (DImode, addr,
15320 reload_in_progress || reload_completed))
15321 return false;
15322
15323 if (REG_P (addr))
15324 {
15325 *base = addr;
15326 return true;
15327 }
15328 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15329 {
15330 *base = XEXP (addr, 0);
15331 *offset = XEXP (addr, 1);
15332 return (REG_P (*base) && CONST_INT_P (*offset));
15333 }
15334
15335 return false;
15336 }
15337
15338 /* Called from a peephole2 to replace two word-size accesses with a
15339 single LDRD/STRD instruction. Returns true iff we can generate a
15340 new instruction sequence. That is, both accesses use the same base
15341 register and the gap between constant offsets is 4. This function
15342 may reorder its operands to match ldrd/strd RTL templates.
15343 OPERANDS are the operands found by the peephole matcher;
15344 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15345 corresponding memory operands. LOAD indicaates whether the access
15346 is load or store. CONST_STORE indicates a store of constant
15347 integer values held in OPERANDS[4,5] and assumes that the pattern
15348 is of length 4 insn, for the purpose of checking dead registers.
15349 COMMUTE indicates that register operands may be reordered. */
15350 bool
15351 gen_operands_ldrd_strd (rtx *operands, bool load,
15352 bool const_store, bool commute)
15353 {
15354 int nops = 2;
15355 HOST_WIDE_INT offsets[2], offset;
15356 rtx base = NULL_RTX;
15357 rtx cur_base, cur_offset, tmp;
15358 int i, gap;
15359 HARD_REG_SET regset;
15360
15361 gcc_assert (!const_store || !load);
15362 /* Check that the memory references are immediate offsets from the
15363 same base register. Extract the base register, the destination
15364 registers, and the corresponding memory offsets. */
15365 for (i = 0; i < nops; i++)
15366 {
15367 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15368 return false;
15369
15370 if (i == 0)
15371 base = cur_base;
15372 else if (REGNO (base) != REGNO (cur_base))
15373 return false;
15374
15375 offsets[i] = INTVAL (cur_offset);
15376 if (GET_CODE (operands[i]) == SUBREG)
15377 {
15378 tmp = SUBREG_REG (operands[i]);
15379 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15380 operands[i] = tmp;
15381 }
15382 }
15383
15384 /* Make sure there is no dependency between the individual loads. */
15385 if (load && REGNO (operands[0]) == REGNO (base))
15386 return false; /* RAW */
15387
15388 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15389 return false; /* WAW */
15390
15391 /* If the same input register is used in both stores
15392 when storing different constants, try to find a free register.
15393 For example, the code
15394 mov r0, 0
15395 str r0, [r2]
15396 mov r0, 1
15397 str r0, [r2, #4]
15398 can be transformed into
15399 mov r1, 0
15400 mov r0, 1
15401 strd r1, r0, [r2]
15402 in Thumb mode assuming that r1 is free.
15403 For ARM mode do the same but only if the starting register
15404 can be made to be even. */
15405 if (const_store
15406 && REGNO (operands[0]) == REGNO (operands[1])
15407 && INTVAL (operands[4]) != INTVAL (operands[5]))
15408 {
15409 if (TARGET_THUMB2)
15410 {
15411 CLEAR_HARD_REG_SET (regset);
15412 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15413 if (tmp == NULL_RTX)
15414 return false;
15415
15416 /* Use the new register in the first load to ensure that
15417 if the original input register is not dead after peephole,
15418 then it will have the correct constant value. */
15419 operands[0] = tmp;
15420 }
15421 else if (TARGET_ARM)
15422 {
15423 int regno = REGNO (operands[0]);
15424 if (!peep2_reg_dead_p (4, operands[0]))
15425 {
15426 /* When the input register is even and is not dead after the
15427 pattern, it has to hold the second constant but we cannot
15428 form a legal STRD in ARM mode with this register as the second
15429 register. */
15430 if (regno % 2 == 0)
15431 return false;
15432
15433 /* Is regno-1 free? */
15434 SET_HARD_REG_SET (regset);
15435 CLEAR_HARD_REG_BIT(regset, regno - 1);
15436 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15437 if (tmp == NULL_RTX)
15438 return false;
15439
15440 operands[0] = tmp;
15441 }
15442 else
15443 {
15444 /* Find a DImode register. */
15445 CLEAR_HARD_REG_SET (regset);
15446 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15447 if (tmp != NULL_RTX)
15448 {
15449 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15450 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15451 }
15452 else
15453 {
15454 /* Can we use the input register to form a DI register? */
15455 SET_HARD_REG_SET (regset);
15456 CLEAR_HARD_REG_BIT(regset,
15457 regno % 2 == 0 ? regno + 1 : regno - 1);
15458 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15459 if (tmp == NULL_RTX)
15460 return false;
15461 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15462 }
15463 }
15464
15465 gcc_assert (operands[0] != NULL_RTX);
15466 gcc_assert (operands[1] != NULL_RTX);
15467 gcc_assert (REGNO (operands[0]) % 2 == 0);
15468 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15469 }
15470 }
15471
15472 /* Make sure the instructions are ordered with lower memory access first. */
15473 if (offsets[0] > offsets[1])
15474 {
15475 gap = offsets[0] - offsets[1];
15476 offset = offsets[1];
15477
15478 /* Swap the instructions such that lower memory is accessed first. */
15479 std::swap (operands[0], operands[1]);
15480 std::swap (operands[2], operands[3]);
15481 if (const_store)
15482 std::swap (operands[4], operands[5]);
15483 }
15484 else
15485 {
15486 gap = offsets[1] - offsets[0];
15487 offset = offsets[0];
15488 }
15489
15490 /* Make sure accesses are to consecutive memory locations. */
15491 if (gap != 4)
15492 return false;
15493
15494 /* Make sure we generate legal instructions. */
15495 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15496 false, load))
15497 return true;
15498
15499 /* In Thumb state, where registers are almost unconstrained, there
15500 is little hope to fix it. */
15501 if (TARGET_THUMB2)
15502 return false;
15503
15504 if (load && commute)
15505 {
15506 /* Try reordering registers. */
15507 std::swap (operands[0], operands[1]);
15508 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15509 false, load))
15510 return true;
15511 }
15512
15513 if (const_store)
15514 {
15515 /* If input registers are dead after this pattern, they can be
15516 reordered or replaced by other registers that are free in the
15517 current pattern. */
15518 if (!peep2_reg_dead_p (4, operands[0])
15519 || !peep2_reg_dead_p (4, operands[1]))
15520 return false;
15521
15522 /* Try to reorder the input registers. */
15523 /* For example, the code
15524 mov r0, 0
15525 mov r1, 1
15526 str r1, [r2]
15527 str r0, [r2, #4]
15528 can be transformed into
15529 mov r1, 0
15530 mov r0, 1
15531 strd r0, [r2]
15532 */
15533 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15534 false, false))
15535 {
15536 std::swap (operands[0], operands[1]);
15537 return true;
15538 }
15539
15540 /* Try to find a free DI register. */
15541 CLEAR_HARD_REG_SET (regset);
15542 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15543 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15544 while (true)
15545 {
15546 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15547 if (tmp == NULL_RTX)
15548 return false;
15549
15550 /* DREG must be an even-numbered register in DImode.
15551 Split it into SI registers. */
15552 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15553 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15554 gcc_assert (operands[0] != NULL_RTX);
15555 gcc_assert (operands[1] != NULL_RTX);
15556 gcc_assert (REGNO (operands[0]) % 2 == 0);
15557 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15558
15559 return (operands_ok_ldrd_strd (operands[0], operands[1],
15560 base, offset,
15561 false, load));
15562 }
15563 }
15564
15565 return false;
15566 }
15567
15568
15569
15570 \f
15571 /* Print a symbolic form of X to the debug file, F. */
15572 static void
15573 arm_print_value (FILE *f, rtx x)
15574 {
15575 switch (GET_CODE (x))
15576 {
15577 case CONST_INT:
15578 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15579 return;
15580
15581 case CONST_DOUBLE:
15582 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15583 return;
15584
15585 case CONST_VECTOR:
15586 {
15587 int i;
15588
15589 fprintf (f, "<");
15590 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15591 {
15592 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15593 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15594 fputc (',', f);
15595 }
15596 fprintf (f, ">");
15597 }
15598 return;
15599
15600 case CONST_STRING:
15601 fprintf (f, "\"%s\"", XSTR (x, 0));
15602 return;
15603
15604 case SYMBOL_REF:
15605 fprintf (f, "`%s'", XSTR (x, 0));
15606 return;
15607
15608 case LABEL_REF:
15609 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15610 return;
15611
15612 case CONST:
15613 arm_print_value (f, XEXP (x, 0));
15614 return;
15615
15616 case PLUS:
15617 arm_print_value (f, XEXP (x, 0));
15618 fprintf (f, "+");
15619 arm_print_value (f, XEXP (x, 1));
15620 return;
15621
15622 case PC:
15623 fprintf (f, "pc");
15624 return;
15625
15626 default:
15627 fprintf (f, "????");
15628 return;
15629 }
15630 }
15631 \f
15632 /* Routines for manipulation of the constant pool. */
15633
15634 /* Arm instructions cannot load a large constant directly into a
15635 register; they have to come from a pc relative load. The constant
15636 must therefore be placed in the addressable range of the pc
15637 relative load. Depending on the precise pc relative load
15638 instruction the range is somewhere between 256 bytes and 4k. This
15639 means that we often have to dump a constant inside a function, and
15640 generate code to branch around it.
15641
15642 It is important to minimize this, since the branches will slow
15643 things down and make the code larger.
15644
15645 Normally we can hide the table after an existing unconditional
15646 branch so that there is no interruption of the flow, but in the
15647 worst case the code looks like this:
15648
15649 ldr rn, L1
15650 ...
15651 b L2
15652 align
15653 L1: .long value
15654 L2:
15655 ...
15656
15657 ldr rn, L3
15658 ...
15659 b L4
15660 align
15661 L3: .long value
15662 L4:
15663 ...
15664
15665 We fix this by performing a scan after scheduling, which notices
15666 which instructions need to have their operands fetched from the
15667 constant table and builds the table.
15668
15669 The algorithm starts by building a table of all the constants that
15670 need fixing up and all the natural barriers in the function (places
15671 where a constant table can be dropped without breaking the flow).
15672 For each fixup we note how far the pc-relative replacement will be
15673 able to reach and the offset of the instruction into the function.
15674
15675 Having built the table we then group the fixes together to form
15676 tables that are as large as possible (subject to addressing
15677 constraints) and emit each table of constants after the last
15678 barrier that is within range of all the instructions in the group.
15679 If a group does not contain a barrier, then we forcibly create one
15680 by inserting a jump instruction into the flow. Once the table has
15681 been inserted, the insns are then modified to reference the
15682 relevant entry in the pool.
15683
15684 Possible enhancements to the algorithm (not implemented) are:
15685
15686 1) For some processors and object formats, there may be benefit in
15687 aligning the pools to the start of cache lines; this alignment
15688 would need to be taken into account when calculating addressability
15689 of a pool. */
15690
15691 /* These typedefs are located at the start of this file, so that
15692 they can be used in the prototypes there. This comment is to
15693 remind readers of that fact so that the following structures
15694 can be understood more easily.
15695
15696 typedef struct minipool_node Mnode;
15697 typedef struct minipool_fixup Mfix; */
15698
15699 struct minipool_node
15700 {
15701 /* Doubly linked chain of entries. */
15702 Mnode * next;
15703 Mnode * prev;
15704 /* The maximum offset into the code that this entry can be placed. While
15705 pushing fixes for forward references, all entries are sorted in order
15706 of increasing max_address. */
15707 HOST_WIDE_INT max_address;
15708 /* Similarly for an entry inserted for a backwards ref. */
15709 HOST_WIDE_INT min_address;
15710 /* The number of fixes referencing this entry. This can become zero
15711 if we "unpush" an entry. In this case we ignore the entry when we
15712 come to emit the code. */
15713 int refcount;
15714 /* The offset from the start of the minipool. */
15715 HOST_WIDE_INT offset;
15716 /* The value in table. */
15717 rtx value;
15718 /* The mode of value. */
15719 machine_mode mode;
15720 /* The size of the value. With iWMMXt enabled
15721 sizes > 4 also imply an alignment of 8-bytes. */
15722 int fix_size;
15723 };
15724
15725 struct minipool_fixup
15726 {
15727 Mfix * next;
15728 rtx_insn * insn;
15729 HOST_WIDE_INT address;
15730 rtx * loc;
15731 machine_mode mode;
15732 int fix_size;
15733 rtx value;
15734 Mnode * minipool;
15735 HOST_WIDE_INT forwards;
15736 HOST_WIDE_INT backwards;
15737 };
15738
15739 /* Fixes less than a word need padding out to a word boundary. */
15740 #define MINIPOOL_FIX_SIZE(mode) \
15741 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15742
15743 static Mnode * minipool_vector_head;
15744 static Mnode * minipool_vector_tail;
15745 static rtx_code_label *minipool_vector_label;
15746 static int minipool_pad;
15747
15748 /* The linked list of all minipool fixes required for this function. */
15749 Mfix * minipool_fix_head;
15750 Mfix * minipool_fix_tail;
15751 /* The fix entry for the current minipool, once it has been placed. */
15752 Mfix * minipool_barrier;
15753
15754 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15755 #define JUMP_TABLES_IN_TEXT_SECTION 0
15756 #endif
15757
15758 static HOST_WIDE_INT
15759 get_jump_table_size (rtx_jump_table_data *insn)
15760 {
15761 /* ADDR_VECs only take room if read-only data does into the text
15762 section. */
15763 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15764 {
15765 rtx body = PATTERN (insn);
15766 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15767 HOST_WIDE_INT size;
15768 HOST_WIDE_INT modesize;
15769
15770 modesize = GET_MODE_SIZE (GET_MODE (body));
15771 size = modesize * XVECLEN (body, elt);
15772 switch (modesize)
15773 {
15774 case 1:
15775 /* Round up size of TBB table to a halfword boundary. */
15776 size = (size + 1) & ~HOST_WIDE_INT_1;
15777 break;
15778 case 2:
15779 /* No padding necessary for TBH. */
15780 break;
15781 case 4:
15782 /* Add two bytes for alignment on Thumb. */
15783 if (TARGET_THUMB)
15784 size += 2;
15785 break;
15786 default:
15787 gcc_unreachable ();
15788 }
15789 return size;
15790 }
15791
15792 return 0;
15793 }
15794
15795 /* Return the maximum amount of padding that will be inserted before
15796 label LABEL. */
15797
15798 static HOST_WIDE_INT
15799 get_label_padding (rtx label)
15800 {
15801 HOST_WIDE_INT align, min_insn_size;
15802
15803 align = 1 << label_to_alignment (label);
15804 min_insn_size = TARGET_THUMB ? 2 : 4;
15805 return align > min_insn_size ? align - min_insn_size : 0;
15806 }
15807
15808 /* Move a minipool fix MP from its current location to before MAX_MP.
15809 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15810 constraints may need updating. */
15811 static Mnode *
15812 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15813 HOST_WIDE_INT max_address)
15814 {
15815 /* The code below assumes these are different. */
15816 gcc_assert (mp != max_mp);
15817
15818 if (max_mp == NULL)
15819 {
15820 if (max_address < mp->max_address)
15821 mp->max_address = max_address;
15822 }
15823 else
15824 {
15825 if (max_address > max_mp->max_address - mp->fix_size)
15826 mp->max_address = max_mp->max_address - mp->fix_size;
15827 else
15828 mp->max_address = max_address;
15829
15830 /* Unlink MP from its current position. Since max_mp is non-null,
15831 mp->prev must be non-null. */
15832 mp->prev->next = mp->next;
15833 if (mp->next != NULL)
15834 mp->next->prev = mp->prev;
15835 else
15836 minipool_vector_tail = mp->prev;
15837
15838 /* Re-insert it before MAX_MP. */
15839 mp->next = max_mp;
15840 mp->prev = max_mp->prev;
15841 max_mp->prev = mp;
15842
15843 if (mp->prev != NULL)
15844 mp->prev->next = mp;
15845 else
15846 minipool_vector_head = mp;
15847 }
15848
15849 /* Save the new entry. */
15850 max_mp = mp;
15851
15852 /* Scan over the preceding entries and adjust their addresses as
15853 required. */
15854 while (mp->prev != NULL
15855 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15856 {
15857 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15858 mp = mp->prev;
15859 }
15860
15861 return max_mp;
15862 }
15863
15864 /* Add a constant to the minipool for a forward reference. Returns the
15865 node added or NULL if the constant will not fit in this pool. */
15866 static Mnode *
15867 add_minipool_forward_ref (Mfix *fix)
15868 {
15869 /* If set, max_mp is the first pool_entry that has a lower
15870 constraint than the one we are trying to add. */
15871 Mnode * max_mp = NULL;
15872 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15873 Mnode * mp;
15874
15875 /* If the minipool starts before the end of FIX->INSN then this FIX
15876 can not be placed into the current pool. Furthermore, adding the
15877 new constant pool entry may cause the pool to start FIX_SIZE bytes
15878 earlier. */
15879 if (minipool_vector_head &&
15880 (fix->address + get_attr_length (fix->insn)
15881 >= minipool_vector_head->max_address - fix->fix_size))
15882 return NULL;
15883
15884 /* Scan the pool to see if a constant with the same value has
15885 already been added. While we are doing this, also note the
15886 location where we must insert the constant if it doesn't already
15887 exist. */
15888 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15889 {
15890 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15891 && fix->mode == mp->mode
15892 && (!LABEL_P (fix->value)
15893 || (CODE_LABEL_NUMBER (fix->value)
15894 == CODE_LABEL_NUMBER (mp->value)))
15895 && rtx_equal_p (fix->value, mp->value))
15896 {
15897 /* More than one fix references this entry. */
15898 mp->refcount++;
15899 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15900 }
15901
15902 /* Note the insertion point if necessary. */
15903 if (max_mp == NULL
15904 && mp->max_address > max_address)
15905 max_mp = mp;
15906
15907 /* If we are inserting an 8-bytes aligned quantity and
15908 we have not already found an insertion point, then
15909 make sure that all such 8-byte aligned quantities are
15910 placed at the start of the pool. */
15911 if (ARM_DOUBLEWORD_ALIGN
15912 && max_mp == NULL
15913 && fix->fix_size >= 8
15914 && mp->fix_size < 8)
15915 {
15916 max_mp = mp;
15917 max_address = mp->max_address;
15918 }
15919 }
15920
15921 /* The value is not currently in the minipool, so we need to create
15922 a new entry for it. If MAX_MP is NULL, the entry will be put on
15923 the end of the list since the placement is less constrained than
15924 any existing entry. Otherwise, we insert the new fix before
15925 MAX_MP and, if necessary, adjust the constraints on the other
15926 entries. */
15927 mp = XNEW (Mnode);
15928 mp->fix_size = fix->fix_size;
15929 mp->mode = fix->mode;
15930 mp->value = fix->value;
15931 mp->refcount = 1;
15932 /* Not yet required for a backwards ref. */
15933 mp->min_address = -65536;
15934
15935 if (max_mp == NULL)
15936 {
15937 mp->max_address = max_address;
15938 mp->next = NULL;
15939 mp->prev = minipool_vector_tail;
15940
15941 if (mp->prev == NULL)
15942 {
15943 minipool_vector_head = mp;
15944 minipool_vector_label = gen_label_rtx ();
15945 }
15946 else
15947 mp->prev->next = mp;
15948
15949 minipool_vector_tail = mp;
15950 }
15951 else
15952 {
15953 if (max_address > max_mp->max_address - mp->fix_size)
15954 mp->max_address = max_mp->max_address - mp->fix_size;
15955 else
15956 mp->max_address = max_address;
15957
15958 mp->next = max_mp;
15959 mp->prev = max_mp->prev;
15960 max_mp->prev = mp;
15961 if (mp->prev != NULL)
15962 mp->prev->next = mp;
15963 else
15964 minipool_vector_head = mp;
15965 }
15966
15967 /* Save the new entry. */
15968 max_mp = mp;
15969
15970 /* Scan over the preceding entries and adjust their addresses as
15971 required. */
15972 while (mp->prev != NULL
15973 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15974 {
15975 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15976 mp = mp->prev;
15977 }
15978
15979 return max_mp;
15980 }
15981
15982 static Mnode *
15983 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15984 HOST_WIDE_INT min_address)
15985 {
15986 HOST_WIDE_INT offset;
15987
15988 /* The code below assumes these are different. */
15989 gcc_assert (mp != min_mp);
15990
15991 if (min_mp == NULL)
15992 {
15993 if (min_address > mp->min_address)
15994 mp->min_address = min_address;
15995 }
15996 else
15997 {
15998 /* We will adjust this below if it is too loose. */
15999 mp->min_address = min_address;
16000
16001 /* Unlink MP from its current position. Since min_mp is non-null,
16002 mp->next must be non-null. */
16003 mp->next->prev = mp->prev;
16004 if (mp->prev != NULL)
16005 mp->prev->next = mp->next;
16006 else
16007 minipool_vector_head = mp->next;
16008
16009 /* Reinsert it after MIN_MP. */
16010 mp->prev = min_mp;
16011 mp->next = min_mp->next;
16012 min_mp->next = mp;
16013 if (mp->next != NULL)
16014 mp->next->prev = mp;
16015 else
16016 minipool_vector_tail = mp;
16017 }
16018
16019 min_mp = mp;
16020
16021 offset = 0;
16022 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16023 {
16024 mp->offset = offset;
16025 if (mp->refcount > 0)
16026 offset += mp->fix_size;
16027
16028 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16029 mp->next->min_address = mp->min_address + mp->fix_size;
16030 }
16031
16032 return min_mp;
16033 }
16034
16035 /* Add a constant to the minipool for a backward reference. Returns the
16036 node added or NULL if the constant will not fit in this pool.
16037
16038 Note that the code for insertion for a backwards reference can be
16039 somewhat confusing because the calculated offsets for each fix do
16040 not take into account the size of the pool (which is still under
16041 construction. */
16042 static Mnode *
16043 add_minipool_backward_ref (Mfix *fix)
16044 {
16045 /* If set, min_mp is the last pool_entry that has a lower constraint
16046 than the one we are trying to add. */
16047 Mnode *min_mp = NULL;
16048 /* This can be negative, since it is only a constraint. */
16049 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16050 Mnode *mp;
16051
16052 /* If we can't reach the current pool from this insn, or if we can't
16053 insert this entry at the end of the pool without pushing other
16054 fixes out of range, then we don't try. This ensures that we
16055 can't fail later on. */
16056 if (min_address >= minipool_barrier->address
16057 || (minipool_vector_tail->min_address + fix->fix_size
16058 >= minipool_barrier->address))
16059 return NULL;
16060
16061 /* Scan the pool to see if a constant with the same value has
16062 already been added. While we are doing this, also note the
16063 location where we must insert the constant if it doesn't already
16064 exist. */
16065 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16066 {
16067 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16068 && fix->mode == mp->mode
16069 && (!LABEL_P (fix->value)
16070 || (CODE_LABEL_NUMBER (fix->value)
16071 == CODE_LABEL_NUMBER (mp->value)))
16072 && rtx_equal_p (fix->value, mp->value)
16073 /* Check that there is enough slack to move this entry to the
16074 end of the table (this is conservative). */
16075 && (mp->max_address
16076 > (minipool_barrier->address
16077 + minipool_vector_tail->offset
16078 + minipool_vector_tail->fix_size)))
16079 {
16080 mp->refcount++;
16081 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16082 }
16083
16084 if (min_mp != NULL)
16085 mp->min_address += fix->fix_size;
16086 else
16087 {
16088 /* Note the insertion point if necessary. */
16089 if (mp->min_address < min_address)
16090 {
16091 /* For now, we do not allow the insertion of 8-byte alignment
16092 requiring nodes anywhere but at the start of the pool. */
16093 if (ARM_DOUBLEWORD_ALIGN
16094 && fix->fix_size >= 8 && mp->fix_size < 8)
16095 return NULL;
16096 else
16097 min_mp = mp;
16098 }
16099 else if (mp->max_address
16100 < minipool_barrier->address + mp->offset + fix->fix_size)
16101 {
16102 /* Inserting before this entry would push the fix beyond
16103 its maximum address (which can happen if we have
16104 re-located a forwards fix); force the new fix to come
16105 after it. */
16106 if (ARM_DOUBLEWORD_ALIGN
16107 && fix->fix_size >= 8 && mp->fix_size < 8)
16108 return NULL;
16109 else
16110 {
16111 min_mp = mp;
16112 min_address = mp->min_address + fix->fix_size;
16113 }
16114 }
16115 /* Do not insert a non-8-byte aligned quantity before 8-byte
16116 aligned quantities. */
16117 else if (ARM_DOUBLEWORD_ALIGN
16118 && fix->fix_size < 8
16119 && mp->fix_size >= 8)
16120 {
16121 min_mp = mp;
16122 min_address = mp->min_address + fix->fix_size;
16123 }
16124 }
16125 }
16126
16127 /* We need to create a new entry. */
16128 mp = XNEW (Mnode);
16129 mp->fix_size = fix->fix_size;
16130 mp->mode = fix->mode;
16131 mp->value = fix->value;
16132 mp->refcount = 1;
16133 mp->max_address = minipool_barrier->address + 65536;
16134
16135 mp->min_address = min_address;
16136
16137 if (min_mp == NULL)
16138 {
16139 mp->prev = NULL;
16140 mp->next = minipool_vector_head;
16141
16142 if (mp->next == NULL)
16143 {
16144 minipool_vector_tail = mp;
16145 minipool_vector_label = gen_label_rtx ();
16146 }
16147 else
16148 mp->next->prev = mp;
16149
16150 minipool_vector_head = mp;
16151 }
16152 else
16153 {
16154 mp->next = min_mp->next;
16155 mp->prev = min_mp;
16156 min_mp->next = mp;
16157
16158 if (mp->next != NULL)
16159 mp->next->prev = mp;
16160 else
16161 minipool_vector_tail = mp;
16162 }
16163
16164 /* Save the new entry. */
16165 min_mp = mp;
16166
16167 if (mp->prev)
16168 mp = mp->prev;
16169 else
16170 mp->offset = 0;
16171
16172 /* Scan over the following entries and adjust their offsets. */
16173 while (mp->next != NULL)
16174 {
16175 if (mp->next->min_address < mp->min_address + mp->fix_size)
16176 mp->next->min_address = mp->min_address + mp->fix_size;
16177
16178 if (mp->refcount)
16179 mp->next->offset = mp->offset + mp->fix_size;
16180 else
16181 mp->next->offset = mp->offset;
16182
16183 mp = mp->next;
16184 }
16185
16186 return min_mp;
16187 }
16188
16189 static void
16190 assign_minipool_offsets (Mfix *barrier)
16191 {
16192 HOST_WIDE_INT offset = 0;
16193 Mnode *mp;
16194
16195 minipool_barrier = barrier;
16196
16197 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16198 {
16199 mp->offset = offset;
16200
16201 if (mp->refcount > 0)
16202 offset += mp->fix_size;
16203 }
16204 }
16205
16206 /* Output the literal table */
16207 static void
16208 dump_minipool (rtx_insn *scan)
16209 {
16210 Mnode * mp;
16211 Mnode * nmp;
16212 int align64 = 0;
16213
16214 if (ARM_DOUBLEWORD_ALIGN)
16215 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16216 if (mp->refcount > 0 && mp->fix_size >= 8)
16217 {
16218 align64 = 1;
16219 break;
16220 }
16221
16222 if (dump_file)
16223 fprintf (dump_file,
16224 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16225 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16226
16227 scan = emit_label_after (gen_label_rtx (), scan);
16228 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16229 scan = emit_label_after (minipool_vector_label, scan);
16230
16231 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16232 {
16233 if (mp->refcount > 0)
16234 {
16235 if (dump_file)
16236 {
16237 fprintf (dump_file,
16238 ";; Offset %u, min %ld, max %ld ",
16239 (unsigned) mp->offset, (unsigned long) mp->min_address,
16240 (unsigned long) mp->max_address);
16241 arm_print_value (dump_file, mp->value);
16242 fputc ('\n', dump_file);
16243 }
16244
16245 rtx val = copy_rtx (mp->value);
16246
16247 switch (GET_MODE_SIZE (mp->mode))
16248 {
16249 #ifdef HAVE_consttable_1
16250 case 1:
16251 scan = emit_insn_after (gen_consttable_1 (val), scan);
16252 break;
16253
16254 #endif
16255 #ifdef HAVE_consttable_2
16256 case 2:
16257 scan = emit_insn_after (gen_consttable_2 (val), scan);
16258 break;
16259
16260 #endif
16261 #ifdef HAVE_consttable_4
16262 case 4:
16263 scan = emit_insn_after (gen_consttable_4 (val), scan);
16264 break;
16265
16266 #endif
16267 #ifdef HAVE_consttable_8
16268 case 8:
16269 scan = emit_insn_after (gen_consttable_8 (val), scan);
16270 break;
16271
16272 #endif
16273 #ifdef HAVE_consttable_16
16274 case 16:
16275 scan = emit_insn_after (gen_consttable_16 (val), scan);
16276 break;
16277
16278 #endif
16279 default:
16280 gcc_unreachable ();
16281 }
16282 }
16283
16284 nmp = mp->next;
16285 free (mp);
16286 }
16287
16288 minipool_vector_head = minipool_vector_tail = NULL;
16289 scan = emit_insn_after (gen_consttable_end (), scan);
16290 scan = emit_barrier_after (scan);
16291 }
16292
16293 /* Return the cost of forcibly inserting a barrier after INSN. */
16294 static int
16295 arm_barrier_cost (rtx_insn *insn)
16296 {
16297 /* Basing the location of the pool on the loop depth is preferable,
16298 but at the moment, the basic block information seems to be
16299 corrupt by this stage of the compilation. */
16300 int base_cost = 50;
16301 rtx_insn *next = next_nonnote_insn (insn);
16302
16303 if (next != NULL && LABEL_P (next))
16304 base_cost -= 20;
16305
16306 switch (GET_CODE (insn))
16307 {
16308 case CODE_LABEL:
16309 /* It will always be better to place the table before the label, rather
16310 than after it. */
16311 return 50;
16312
16313 case INSN:
16314 case CALL_INSN:
16315 return base_cost;
16316
16317 case JUMP_INSN:
16318 return base_cost - 10;
16319
16320 default:
16321 return base_cost + 10;
16322 }
16323 }
16324
16325 /* Find the best place in the insn stream in the range
16326 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16327 Create the barrier by inserting a jump and add a new fix entry for
16328 it. */
16329 static Mfix *
16330 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16331 {
16332 HOST_WIDE_INT count = 0;
16333 rtx_barrier *barrier;
16334 rtx_insn *from = fix->insn;
16335 /* The instruction after which we will insert the jump. */
16336 rtx_insn *selected = NULL;
16337 int selected_cost;
16338 /* The address at which the jump instruction will be placed. */
16339 HOST_WIDE_INT selected_address;
16340 Mfix * new_fix;
16341 HOST_WIDE_INT max_count = max_address - fix->address;
16342 rtx_code_label *label = gen_label_rtx ();
16343
16344 selected_cost = arm_barrier_cost (from);
16345 selected_address = fix->address;
16346
16347 while (from && count < max_count)
16348 {
16349 rtx_jump_table_data *tmp;
16350 int new_cost;
16351
16352 /* This code shouldn't have been called if there was a natural barrier
16353 within range. */
16354 gcc_assert (!BARRIER_P (from));
16355
16356 /* Count the length of this insn. This must stay in sync with the
16357 code that pushes minipool fixes. */
16358 if (LABEL_P (from))
16359 count += get_label_padding (from);
16360 else
16361 count += get_attr_length (from);
16362
16363 /* If there is a jump table, add its length. */
16364 if (tablejump_p (from, NULL, &tmp))
16365 {
16366 count += get_jump_table_size (tmp);
16367
16368 /* Jump tables aren't in a basic block, so base the cost on
16369 the dispatch insn. If we select this location, we will
16370 still put the pool after the table. */
16371 new_cost = arm_barrier_cost (from);
16372
16373 if (count < max_count
16374 && (!selected || new_cost <= selected_cost))
16375 {
16376 selected = tmp;
16377 selected_cost = new_cost;
16378 selected_address = fix->address + count;
16379 }
16380
16381 /* Continue after the dispatch table. */
16382 from = NEXT_INSN (tmp);
16383 continue;
16384 }
16385
16386 new_cost = arm_barrier_cost (from);
16387
16388 if (count < max_count
16389 && (!selected || new_cost <= selected_cost))
16390 {
16391 selected = from;
16392 selected_cost = new_cost;
16393 selected_address = fix->address + count;
16394 }
16395
16396 from = NEXT_INSN (from);
16397 }
16398
16399 /* Make sure that we found a place to insert the jump. */
16400 gcc_assert (selected);
16401
16402 /* Make sure we do not split a call and its corresponding
16403 CALL_ARG_LOCATION note. */
16404 if (CALL_P (selected))
16405 {
16406 rtx_insn *next = NEXT_INSN (selected);
16407 if (next && NOTE_P (next)
16408 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16409 selected = next;
16410 }
16411
16412 /* Create a new JUMP_INSN that branches around a barrier. */
16413 from = emit_jump_insn_after (gen_jump (label), selected);
16414 JUMP_LABEL (from) = label;
16415 barrier = emit_barrier_after (from);
16416 emit_label_after (label, barrier);
16417
16418 /* Create a minipool barrier entry for the new barrier. */
16419 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16420 new_fix->insn = barrier;
16421 new_fix->address = selected_address;
16422 new_fix->next = fix->next;
16423 fix->next = new_fix;
16424
16425 return new_fix;
16426 }
16427
16428 /* Record that there is a natural barrier in the insn stream at
16429 ADDRESS. */
16430 static void
16431 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16432 {
16433 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16434
16435 fix->insn = insn;
16436 fix->address = address;
16437
16438 fix->next = NULL;
16439 if (minipool_fix_head != NULL)
16440 minipool_fix_tail->next = fix;
16441 else
16442 minipool_fix_head = fix;
16443
16444 minipool_fix_tail = fix;
16445 }
16446
16447 /* Record INSN, which will need fixing up to load a value from the
16448 minipool. ADDRESS is the offset of the insn since the start of the
16449 function; LOC is a pointer to the part of the insn which requires
16450 fixing; VALUE is the constant that must be loaded, which is of type
16451 MODE. */
16452 static void
16453 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16454 machine_mode mode, rtx value)
16455 {
16456 gcc_assert (!arm_disable_literal_pool);
16457 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16458
16459 fix->insn = insn;
16460 fix->address = address;
16461 fix->loc = loc;
16462 fix->mode = mode;
16463 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16464 fix->value = value;
16465 fix->forwards = get_attr_pool_range (insn);
16466 fix->backwards = get_attr_neg_pool_range (insn);
16467 fix->minipool = NULL;
16468
16469 /* If an insn doesn't have a range defined for it, then it isn't
16470 expecting to be reworked by this code. Better to stop now than
16471 to generate duff assembly code. */
16472 gcc_assert (fix->forwards || fix->backwards);
16473
16474 /* If an entry requires 8-byte alignment then assume all constant pools
16475 require 4 bytes of padding. Trying to do this later on a per-pool
16476 basis is awkward because existing pool entries have to be modified. */
16477 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16478 minipool_pad = 4;
16479
16480 if (dump_file)
16481 {
16482 fprintf (dump_file,
16483 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16484 GET_MODE_NAME (mode),
16485 INSN_UID (insn), (unsigned long) address,
16486 -1 * (long)fix->backwards, (long)fix->forwards);
16487 arm_print_value (dump_file, fix->value);
16488 fprintf (dump_file, "\n");
16489 }
16490
16491 /* Add it to the chain of fixes. */
16492 fix->next = NULL;
16493
16494 if (minipool_fix_head != NULL)
16495 minipool_fix_tail->next = fix;
16496 else
16497 minipool_fix_head = fix;
16498
16499 minipool_fix_tail = fix;
16500 }
16501
16502 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16503 Returns the number of insns needed, or 99 if we always want to synthesize
16504 the value. */
16505 int
16506 arm_max_const_double_inline_cost ()
16507 {
16508 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16509 }
16510
16511 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16512 Returns the number of insns needed, or 99 if we don't know how to
16513 do it. */
16514 int
16515 arm_const_double_inline_cost (rtx val)
16516 {
16517 rtx lowpart, highpart;
16518 machine_mode mode;
16519
16520 mode = GET_MODE (val);
16521
16522 if (mode == VOIDmode)
16523 mode = DImode;
16524
16525 gcc_assert (GET_MODE_SIZE (mode) == 8);
16526
16527 lowpart = gen_lowpart (SImode, val);
16528 highpart = gen_highpart_mode (SImode, mode, val);
16529
16530 gcc_assert (CONST_INT_P (lowpart));
16531 gcc_assert (CONST_INT_P (highpart));
16532
16533 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16534 NULL_RTX, NULL_RTX, 0, 0)
16535 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16536 NULL_RTX, NULL_RTX, 0, 0));
16537 }
16538
16539 /* Cost of loading a SImode constant. */
16540 static inline int
16541 arm_const_inline_cost (enum rtx_code code, rtx val)
16542 {
16543 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16544 NULL_RTX, NULL_RTX, 1, 0);
16545 }
16546
16547 /* Return true if it is worthwhile to split a 64-bit constant into two
16548 32-bit operations. This is the case if optimizing for size, or
16549 if we have load delay slots, or if one 32-bit part can be done with
16550 a single data operation. */
16551 bool
16552 arm_const_double_by_parts (rtx val)
16553 {
16554 machine_mode mode = GET_MODE (val);
16555 rtx part;
16556
16557 if (optimize_size || arm_ld_sched)
16558 return true;
16559
16560 if (mode == VOIDmode)
16561 mode = DImode;
16562
16563 part = gen_highpart_mode (SImode, mode, val);
16564
16565 gcc_assert (CONST_INT_P (part));
16566
16567 if (const_ok_for_arm (INTVAL (part))
16568 || const_ok_for_arm (~INTVAL (part)))
16569 return true;
16570
16571 part = gen_lowpart (SImode, val);
16572
16573 gcc_assert (CONST_INT_P (part));
16574
16575 if (const_ok_for_arm (INTVAL (part))
16576 || const_ok_for_arm (~INTVAL (part)))
16577 return true;
16578
16579 return false;
16580 }
16581
16582 /* Return true if it is possible to inline both the high and low parts
16583 of a 64-bit constant into 32-bit data processing instructions. */
16584 bool
16585 arm_const_double_by_immediates (rtx val)
16586 {
16587 machine_mode mode = GET_MODE (val);
16588 rtx part;
16589
16590 if (mode == VOIDmode)
16591 mode = DImode;
16592
16593 part = gen_highpart_mode (SImode, mode, val);
16594
16595 gcc_assert (CONST_INT_P (part));
16596
16597 if (!const_ok_for_arm (INTVAL (part)))
16598 return false;
16599
16600 part = gen_lowpart (SImode, val);
16601
16602 gcc_assert (CONST_INT_P (part));
16603
16604 if (!const_ok_for_arm (INTVAL (part)))
16605 return false;
16606
16607 return true;
16608 }
16609
16610 /* Scan INSN and note any of its operands that need fixing.
16611 If DO_PUSHES is false we do not actually push any of the fixups
16612 needed. */
16613 static void
16614 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16615 {
16616 int opno;
16617
16618 extract_constrain_insn (insn);
16619
16620 if (recog_data.n_alternatives == 0)
16621 return;
16622
16623 /* Fill in recog_op_alt with information about the constraints of
16624 this insn. */
16625 preprocess_constraints (insn);
16626
16627 const operand_alternative *op_alt = which_op_alt ();
16628 for (opno = 0; opno < recog_data.n_operands; opno++)
16629 {
16630 /* Things we need to fix can only occur in inputs. */
16631 if (recog_data.operand_type[opno] != OP_IN)
16632 continue;
16633
16634 /* If this alternative is a memory reference, then any mention
16635 of constants in this alternative is really to fool reload
16636 into allowing us to accept one there. We need to fix them up
16637 now so that we output the right code. */
16638 if (op_alt[opno].memory_ok)
16639 {
16640 rtx op = recog_data.operand[opno];
16641
16642 if (CONSTANT_P (op))
16643 {
16644 if (do_pushes)
16645 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16646 recog_data.operand_mode[opno], op);
16647 }
16648 else if (MEM_P (op)
16649 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16650 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16651 {
16652 if (do_pushes)
16653 {
16654 rtx cop = avoid_constant_pool_reference (op);
16655
16656 /* Casting the address of something to a mode narrower
16657 than a word can cause avoid_constant_pool_reference()
16658 to return the pool reference itself. That's no good to
16659 us here. Lets just hope that we can use the
16660 constant pool value directly. */
16661 if (op == cop)
16662 cop = get_pool_constant (XEXP (op, 0));
16663
16664 push_minipool_fix (insn, address,
16665 recog_data.operand_loc[opno],
16666 recog_data.operand_mode[opno], cop);
16667 }
16668
16669 }
16670 }
16671 }
16672
16673 return;
16674 }
16675
16676 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16677 and unions in the context of ARMv8-M Security Extensions. It is used as a
16678 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16679 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16680 or four masks, depending on whether it is being computed for a
16681 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16682 respectively. The tree for the type of the argument or a field within an
16683 argument is passed in ARG_TYPE, the current register this argument or field
16684 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16685 argument or field starts at is passed in STARTING_BIT and the last used bit
16686 is kept in LAST_USED_BIT which is also updated accordingly. */
16687
16688 static unsigned HOST_WIDE_INT
16689 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16690 uint32_t * padding_bits_to_clear,
16691 unsigned starting_bit, int * last_used_bit)
16692
16693 {
16694 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16695
16696 if (TREE_CODE (arg_type) == RECORD_TYPE)
16697 {
16698 unsigned current_bit = starting_bit;
16699 tree field;
16700 long int offset, size;
16701
16702
16703 field = TYPE_FIELDS (arg_type);
16704 while (field)
16705 {
16706 /* The offset within a structure is always an offset from
16707 the start of that structure. Make sure we take that into the
16708 calculation of the register based offset that we use here. */
16709 offset = starting_bit;
16710 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16711 offset %= 32;
16712
16713 /* This is the actual size of the field, for bitfields this is the
16714 bitfield width and not the container size. */
16715 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16716
16717 if (*last_used_bit != offset)
16718 {
16719 if (offset < *last_used_bit)
16720 {
16721 /* This field's offset is before the 'last_used_bit', that
16722 means this field goes on the next register. So we need to
16723 pad the rest of the current register and increase the
16724 register number. */
16725 uint32_t mask;
16726 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16727 mask++;
16728
16729 padding_bits_to_clear[*regno] |= mask;
16730 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16731 (*regno)++;
16732 }
16733 else
16734 {
16735 /* Otherwise we pad the bits between the last field's end and
16736 the start of the new field. */
16737 uint32_t mask;
16738
16739 mask = ((uint32_t)-1) >> (32 - offset);
16740 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16741 padding_bits_to_clear[*regno] |= mask;
16742 }
16743 current_bit = offset;
16744 }
16745
16746 /* Calculate further padding bits for inner structs/unions too. */
16747 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16748 {
16749 *last_used_bit = current_bit;
16750 not_to_clear_reg_mask
16751 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16752 padding_bits_to_clear, offset,
16753 last_used_bit);
16754 }
16755 else
16756 {
16757 /* Update 'current_bit' with this field's size. If the
16758 'current_bit' lies in a subsequent register, update 'regno' and
16759 reset 'current_bit' to point to the current bit in that new
16760 register. */
16761 current_bit += size;
16762 while (current_bit >= 32)
16763 {
16764 current_bit-=32;
16765 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16766 (*regno)++;
16767 }
16768 *last_used_bit = current_bit;
16769 }
16770
16771 field = TREE_CHAIN (field);
16772 }
16773 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16774 }
16775 else if (TREE_CODE (arg_type) == UNION_TYPE)
16776 {
16777 tree field, field_t;
16778 int i, regno_t, field_size;
16779 int max_reg = -1;
16780 int max_bit = -1;
16781 uint32_t mask;
16782 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16783 = {-1, -1, -1, -1};
16784
16785 /* To compute the padding bits in a union we only consider bits as
16786 padding bits if they are always either a padding bit or fall outside a
16787 fields size for all fields in the union. */
16788 field = TYPE_FIELDS (arg_type);
16789 while (field)
16790 {
16791 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16792 = {0U, 0U, 0U, 0U};
16793 int last_used_bit_t = *last_used_bit;
16794 regno_t = *regno;
16795 field_t = TREE_TYPE (field);
16796
16797 /* If the field's type is either a record or a union make sure to
16798 compute their padding bits too. */
16799 if (RECORD_OR_UNION_TYPE_P (field_t))
16800 not_to_clear_reg_mask
16801 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16802 &padding_bits_to_clear_t[0],
16803 starting_bit, &last_used_bit_t);
16804 else
16805 {
16806 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16807 regno_t = (field_size / 32) + *regno;
16808 last_used_bit_t = (starting_bit + field_size) % 32;
16809 }
16810
16811 for (i = *regno; i < regno_t; i++)
16812 {
16813 /* For all but the last register used by this field only keep the
16814 padding bits that were padding bits in this field. */
16815 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16816 }
16817
16818 /* For the last register, keep all padding bits that were padding
16819 bits in this field and any padding bits that are still valid
16820 as padding bits but fall outside of this field's size. */
16821 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16822 padding_bits_to_clear_res[regno_t]
16823 &= padding_bits_to_clear_t[regno_t] | mask;
16824
16825 /* Update the maximum size of the fields in terms of registers used
16826 ('max_reg') and the 'last_used_bit' in said register. */
16827 if (max_reg < regno_t)
16828 {
16829 max_reg = regno_t;
16830 max_bit = last_used_bit_t;
16831 }
16832 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16833 max_bit = last_used_bit_t;
16834
16835 field = TREE_CHAIN (field);
16836 }
16837
16838 /* Update the current padding_bits_to_clear using the intersection of the
16839 padding bits of all the fields. */
16840 for (i=*regno; i < max_reg; i++)
16841 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16842
16843 /* Do not keep trailing padding bits, we do not know yet whether this
16844 is the end of the argument. */
16845 mask = ((uint32_t) 1 << max_bit) - 1;
16846 padding_bits_to_clear[max_reg]
16847 |= padding_bits_to_clear_res[max_reg] & mask;
16848
16849 *regno = max_reg;
16850 *last_used_bit = max_bit;
16851 }
16852 else
16853 /* This function should only be used for structs and unions. */
16854 gcc_unreachable ();
16855
16856 return not_to_clear_reg_mask;
16857 }
16858
16859 /* In the context of ARMv8-M Security Extensions, this function is used for both
16860 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16861 registers are used when returning or passing arguments, which is then
16862 returned as a mask. It will also compute a mask to indicate padding/unused
16863 bits for each of these registers, and passes this through the
16864 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16865 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16866 the starting register used to pass this argument or return value is passed
16867 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16868 for struct and union types. */
16869
16870 static unsigned HOST_WIDE_INT
16871 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16872 uint32_t * padding_bits_to_clear)
16873
16874 {
16875 int last_used_bit = 0;
16876 unsigned HOST_WIDE_INT not_to_clear_mask;
16877
16878 if (RECORD_OR_UNION_TYPE_P (arg_type))
16879 {
16880 not_to_clear_mask
16881 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16882 padding_bits_to_clear, 0,
16883 &last_used_bit);
16884
16885
16886 /* If the 'last_used_bit' is not zero, that means we are still using a
16887 part of the last 'regno'. In such cases we must clear the trailing
16888 bits. Otherwise we are not using regno and we should mark it as to
16889 clear. */
16890 if (last_used_bit != 0)
16891 padding_bits_to_clear[regno]
16892 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16893 else
16894 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16895 }
16896 else
16897 {
16898 not_to_clear_mask = 0;
16899 /* We are not dealing with structs nor unions. So these arguments may be
16900 passed in floating point registers too. In some cases a BLKmode is
16901 used when returning or passing arguments in multiple VFP registers. */
16902 if (GET_MODE (arg_rtx) == BLKmode)
16903 {
16904 int i, arg_regs;
16905 rtx reg;
16906
16907 /* This should really only occur when dealing with the hard-float
16908 ABI. */
16909 gcc_assert (TARGET_HARD_FLOAT_ABI);
16910
16911 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16912 {
16913 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16914 gcc_assert (REG_P (reg));
16915
16916 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16917
16918 /* If we are dealing with DF mode, make sure we don't
16919 clear either of the registers it addresses. */
16920 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16921 if (arg_regs > 1)
16922 {
16923 unsigned HOST_WIDE_INT mask;
16924 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16925 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16926 not_to_clear_mask |= mask;
16927 }
16928 }
16929 }
16930 else
16931 {
16932 /* Otherwise we can rely on the MODE to determine how many registers
16933 are being used by this argument. */
16934 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16935 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16936 if (arg_regs > 1)
16937 {
16938 unsigned HOST_WIDE_INT
16939 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16940 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16941 not_to_clear_mask |= mask;
16942 }
16943 }
16944 }
16945
16946 return not_to_clear_mask;
16947 }
16948
16949 /* Clears caller saved registers not used to pass arguments before a
16950 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16951 registers is done in __gnu_cmse_nonsecure_call libcall.
16952 See libgcc/config/arm/cmse_nonsecure_call.S. */
16953
16954 static void
16955 cmse_nonsecure_call_clear_caller_saved (void)
16956 {
16957 basic_block bb;
16958
16959 FOR_EACH_BB_FN (bb, cfun)
16960 {
16961 rtx_insn *insn;
16962
16963 FOR_BB_INSNS (bb, insn)
16964 {
16965 uint64_t to_clear_mask, float_mask;
16966 rtx_insn *seq;
16967 rtx pat, call, unspec, reg, cleared_reg, tmp;
16968 unsigned int regno, maxregno;
16969 rtx address;
16970 CUMULATIVE_ARGS args_so_far_v;
16971 cumulative_args_t args_so_far;
16972 tree arg_type, fntype;
16973 bool using_r4, first_param = true;
16974 function_args_iterator args_iter;
16975 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16976 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16977
16978 if (!NONDEBUG_INSN_P (insn))
16979 continue;
16980
16981 if (!CALL_P (insn))
16982 continue;
16983
16984 pat = PATTERN (insn);
16985 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16986 call = XVECEXP (pat, 0, 0);
16987
16988 /* Get the real call RTX if the insn sets a value, ie. returns. */
16989 if (GET_CODE (call) == SET)
16990 call = SET_SRC (call);
16991
16992 /* Check if it is a cmse_nonsecure_call. */
16993 unspec = XEXP (call, 0);
16994 if (GET_CODE (unspec) != UNSPEC
16995 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16996 continue;
16997
16998 /* Determine the caller-saved registers we need to clear. */
16999 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17000 maxregno = NUM_ARG_REGS - 1;
17001 /* Only look at the caller-saved floating point registers in case of
17002 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17003 lazy store and loads which clear both caller- and callee-saved
17004 registers. */
17005 if (TARGET_HARD_FLOAT_ABI)
17006 {
17007 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17008 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17009 to_clear_mask |= float_mask;
17010 maxregno = D7_VFP_REGNUM;
17011 }
17012
17013 /* Make sure the register used to hold the function address is not
17014 cleared. */
17015 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17016 gcc_assert (MEM_P (address));
17017 gcc_assert (REG_P (XEXP (address, 0)));
17018 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17019
17020 /* Set basic block of call insn so that df rescan is performed on
17021 insns inserted here. */
17022 set_block_for_insn (insn, bb);
17023 df_set_flags (DF_DEFER_INSN_RESCAN);
17024 start_sequence ();
17025
17026 /* Make sure the scheduler doesn't schedule other insns beyond
17027 here. */
17028 emit_insn (gen_blockage ());
17029
17030 /* Walk through all arguments and clear registers appropriately.
17031 */
17032 fntype = TREE_TYPE (MEM_EXPR (address));
17033 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17034 NULL_TREE);
17035 args_so_far = pack_cumulative_args (&args_so_far_v);
17036 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17037 {
17038 rtx arg_rtx;
17039 machine_mode arg_mode = TYPE_MODE (arg_type);
17040
17041 if (VOID_TYPE_P (arg_type))
17042 continue;
17043
17044 if (!first_param)
17045 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17046 true);
17047
17048 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17049 true);
17050 gcc_assert (REG_P (arg_rtx));
17051 to_clear_mask
17052 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17053 REGNO (arg_rtx),
17054 padding_bits_to_clear_ptr);
17055
17056 first_param = false;
17057 }
17058
17059 /* Clear padding bits where needed. */
17060 cleared_reg = XEXP (address, 0);
17061 reg = gen_rtx_REG (SImode, IP_REGNUM);
17062 using_r4 = false;
17063 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17064 {
17065 if (padding_bits_to_clear[regno] == 0)
17066 continue;
17067
17068 /* If this is a Thumb-1 target copy the address of the function
17069 we are calling from 'r4' into 'ip' such that we can use r4 to
17070 clear the unused bits in the arguments. */
17071 if (TARGET_THUMB1 && !using_r4)
17072 {
17073 using_r4 = true;
17074 reg = cleared_reg;
17075 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17076 reg);
17077 }
17078
17079 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17080 emit_move_insn (reg, tmp);
17081 /* Also fill the top half of the negated
17082 padding_bits_to_clear. */
17083 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17084 {
17085 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17086 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17087 GEN_INT (16),
17088 GEN_INT (16)),
17089 tmp));
17090 }
17091
17092 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17093 gen_rtx_REG (SImode, regno),
17094 reg));
17095
17096 }
17097 if (using_r4)
17098 emit_move_insn (cleared_reg,
17099 gen_rtx_REG (SImode, IP_REGNUM));
17100
17101 /* We use right shift and left shift to clear the LSB of the address
17102 we jump to instead of using bic, to avoid having to use an extra
17103 register on Thumb-1. */
17104 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17105 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17106 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17107 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17108
17109 /* Clearing all registers that leak before doing a non-secure
17110 call. */
17111 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17112 {
17113 if (!(to_clear_mask & (1LL << regno)))
17114 continue;
17115
17116 /* If regno is an even vfp register and its successor is also to
17117 be cleared, use vmov. */
17118 if (IS_VFP_REGNUM (regno))
17119 {
17120 if (TARGET_VFP_DOUBLE
17121 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17122 && to_clear_mask & (1LL << (regno + 1)))
17123 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17124 CONST0_RTX (DFmode));
17125 else
17126 emit_move_insn (gen_rtx_REG (SFmode, regno),
17127 CONST0_RTX (SFmode));
17128 }
17129 else
17130 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17131 }
17132
17133 seq = get_insns ();
17134 end_sequence ();
17135 emit_insn_before (seq, insn);
17136
17137 }
17138 }
17139 }
17140
17141 /* Rewrite move insn into subtract of 0 if the condition codes will
17142 be useful in next conditional jump insn. */
17143
17144 static void
17145 thumb1_reorg (void)
17146 {
17147 basic_block bb;
17148
17149 FOR_EACH_BB_FN (bb, cfun)
17150 {
17151 rtx dest, src;
17152 rtx cmp, op0, op1, set = NULL;
17153 rtx_insn *prev, *insn = BB_END (bb);
17154 bool insn_clobbered = false;
17155
17156 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17157 insn = PREV_INSN (insn);
17158
17159 /* Find the last cbranchsi4_insn in basic block BB. */
17160 if (insn == BB_HEAD (bb)
17161 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17162 continue;
17163
17164 /* Get the register with which we are comparing. */
17165 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17166 op0 = XEXP (cmp, 0);
17167 op1 = XEXP (cmp, 1);
17168
17169 /* Check that comparison is against ZERO. */
17170 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17171 continue;
17172
17173 /* Find the first flag setting insn before INSN in basic block BB. */
17174 gcc_assert (insn != BB_HEAD (bb));
17175 for (prev = PREV_INSN (insn);
17176 (!insn_clobbered
17177 && prev != BB_HEAD (bb)
17178 && (NOTE_P (prev)
17179 || DEBUG_INSN_P (prev)
17180 || ((set = single_set (prev)) != NULL
17181 && get_attr_conds (prev) == CONDS_NOCOND)));
17182 prev = PREV_INSN (prev))
17183 {
17184 if (reg_set_p (op0, prev))
17185 insn_clobbered = true;
17186 }
17187
17188 /* Skip if op0 is clobbered by insn other than prev. */
17189 if (insn_clobbered)
17190 continue;
17191
17192 if (!set)
17193 continue;
17194
17195 dest = SET_DEST (set);
17196 src = SET_SRC (set);
17197 if (!low_register_operand (dest, SImode)
17198 || !low_register_operand (src, SImode))
17199 continue;
17200
17201 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17202 in INSN. Both src and dest of the move insn are checked. */
17203 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17204 {
17205 dest = copy_rtx (dest);
17206 src = copy_rtx (src);
17207 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17208 PATTERN (prev) = gen_rtx_SET (dest, src);
17209 INSN_CODE (prev) = -1;
17210 /* Set test register in INSN to dest. */
17211 XEXP (cmp, 0) = copy_rtx (dest);
17212 INSN_CODE (insn) = -1;
17213 }
17214 }
17215 }
17216
17217 /* Convert instructions to their cc-clobbering variant if possible, since
17218 that allows us to use smaller encodings. */
17219
17220 static void
17221 thumb2_reorg (void)
17222 {
17223 basic_block bb;
17224 regset_head live;
17225
17226 INIT_REG_SET (&live);
17227
17228 /* We are freeing block_for_insn in the toplev to keep compatibility
17229 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17230 compute_bb_for_insn ();
17231 df_analyze ();
17232
17233 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17234
17235 FOR_EACH_BB_FN (bb, cfun)
17236 {
17237 if ((current_tune->disparage_flag_setting_t16_encodings
17238 == tune_params::DISPARAGE_FLAGS_ALL)
17239 && optimize_bb_for_speed_p (bb))
17240 continue;
17241
17242 rtx_insn *insn;
17243 Convert_Action action = SKIP;
17244 Convert_Action action_for_partial_flag_setting
17245 = ((current_tune->disparage_flag_setting_t16_encodings
17246 != tune_params::DISPARAGE_FLAGS_NEITHER)
17247 && optimize_bb_for_speed_p (bb))
17248 ? SKIP : CONV;
17249
17250 COPY_REG_SET (&live, DF_LR_OUT (bb));
17251 df_simulate_initialize_backwards (bb, &live);
17252 FOR_BB_INSNS_REVERSE (bb, insn)
17253 {
17254 if (NONJUMP_INSN_P (insn)
17255 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17256 && GET_CODE (PATTERN (insn)) == SET)
17257 {
17258 action = SKIP;
17259 rtx pat = PATTERN (insn);
17260 rtx dst = XEXP (pat, 0);
17261 rtx src = XEXP (pat, 1);
17262 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17263
17264 if (UNARY_P (src) || BINARY_P (src))
17265 op0 = XEXP (src, 0);
17266
17267 if (BINARY_P (src))
17268 op1 = XEXP (src, 1);
17269
17270 if (low_register_operand (dst, SImode))
17271 {
17272 switch (GET_CODE (src))
17273 {
17274 case PLUS:
17275 /* Adding two registers and storing the result
17276 in the first source is already a 16-bit
17277 operation. */
17278 if (rtx_equal_p (dst, op0)
17279 && register_operand (op1, SImode))
17280 break;
17281
17282 if (low_register_operand (op0, SImode))
17283 {
17284 /* ADDS <Rd>,<Rn>,<Rm> */
17285 if (low_register_operand (op1, SImode))
17286 action = CONV;
17287 /* ADDS <Rdn>,#<imm8> */
17288 /* SUBS <Rdn>,#<imm8> */
17289 else if (rtx_equal_p (dst, op0)
17290 && CONST_INT_P (op1)
17291 && IN_RANGE (INTVAL (op1), -255, 255))
17292 action = CONV;
17293 /* ADDS <Rd>,<Rn>,#<imm3> */
17294 /* SUBS <Rd>,<Rn>,#<imm3> */
17295 else if (CONST_INT_P (op1)
17296 && IN_RANGE (INTVAL (op1), -7, 7))
17297 action = CONV;
17298 }
17299 /* ADCS <Rd>, <Rn> */
17300 else if (GET_CODE (XEXP (src, 0)) == PLUS
17301 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17302 && low_register_operand (XEXP (XEXP (src, 0), 1),
17303 SImode)
17304 && COMPARISON_P (op1)
17305 && cc_register (XEXP (op1, 0), VOIDmode)
17306 && maybe_get_arm_condition_code (op1) == ARM_CS
17307 && XEXP (op1, 1) == const0_rtx)
17308 action = CONV;
17309 break;
17310
17311 case MINUS:
17312 /* RSBS <Rd>,<Rn>,#0
17313 Not handled here: see NEG below. */
17314 /* SUBS <Rd>,<Rn>,#<imm3>
17315 SUBS <Rdn>,#<imm8>
17316 Not handled here: see PLUS above. */
17317 /* SUBS <Rd>,<Rn>,<Rm> */
17318 if (low_register_operand (op0, SImode)
17319 && low_register_operand (op1, SImode))
17320 action = CONV;
17321 break;
17322
17323 case MULT:
17324 /* MULS <Rdm>,<Rn>,<Rdm>
17325 As an exception to the rule, this is only used
17326 when optimizing for size since MULS is slow on all
17327 known implementations. We do not even want to use
17328 MULS in cold code, if optimizing for speed, so we
17329 test the global flag here. */
17330 if (!optimize_size)
17331 break;
17332 /* Fall through. */
17333 case AND:
17334 case IOR:
17335 case XOR:
17336 /* ANDS <Rdn>,<Rm> */
17337 if (rtx_equal_p (dst, op0)
17338 && low_register_operand (op1, SImode))
17339 action = action_for_partial_flag_setting;
17340 else if (rtx_equal_p (dst, op1)
17341 && low_register_operand (op0, SImode))
17342 action = action_for_partial_flag_setting == SKIP
17343 ? SKIP : SWAP_CONV;
17344 break;
17345
17346 case ASHIFTRT:
17347 case ASHIFT:
17348 case LSHIFTRT:
17349 /* ASRS <Rdn>,<Rm> */
17350 /* LSRS <Rdn>,<Rm> */
17351 /* LSLS <Rdn>,<Rm> */
17352 if (rtx_equal_p (dst, op0)
17353 && low_register_operand (op1, SImode))
17354 action = action_for_partial_flag_setting;
17355 /* ASRS <Rd>,<Rm>,#<imm5> */
17356 /* LSRS <Rd>,<Rm>,#<imm5> */
17357 /* LSLS <Rd>,<Rm>,#<imm5> */
17358 else if (low_register_operand (op0, SImode)
17359 && CONST_INT_P (op1)
17360 && IN_RANGE (INTVAL (op1), 0, 31))
17361 action = action_for_partial_flag_setting;
17362 break;
17363
17364 case ROTATERT:
17365 /* RORS <Rdn>,<Rm> */
17366 if (rtx_equal_p (dst, op0)
17367 && low_register_operand (op1, SImode))
17368 action = action_for_partial_flag_setting;
17369 break;
17370
17371 case NOT:
17372 /* MVNS <Rd>,<Rm> */
17373 if (low_register_operand (op0, SImode))
17374 action = action_for_partial_flag_setting;
17375 break;
17376
17377 case NEG:
17378 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17379 if (low_register_operand (op0, SImode))
17380 action = CONV;
17381 break;
17382
17383 case CONST_INT:
17384 /* MOVS <Rd>,#<imm8> */
17385 if (CONST_INT_P (src)
17386 && IN_RANGE (INTVAL (src), 0, 255))
17387 action = action_for_partial_flag_setting;
17388 break;
17389
17390 case REG:
17391 /* MOVS and MOV<c> with registers have different
17392 encodings, so are not relevant here. */
17393 break;
17394
17395 default:
17396 break;
17397 }
17398 }
17399
17400 if (action != SKIP)
17401 {
17402 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17403 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17404 rtvec vec;
17405
17406 if (action == SWAP_CONV)
17407 {
17408 src = copy_rtx (src);
17409 XEXP (src, 0) = op1;
17410 XEXP (src, 1) = op0;
17411 pat = gen_rtx_SET (dst, src);
17412 vec = gen_rtvec (2, pat, clobber);
17413 }
17414 else /* action == CONV */
17415 vec = gen_rtvec (2, pat, clobber);
17416
17417 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17418 INSN_CODE (insn) = -1;
17419 }
17420 }
17421
17422 if (NONDEBUG_INSN_P (insn))
17423 df_simulate_one_insn_backwards (bb, insn, &live);
17424 }
17425 }
17426
17427 CLEAR_REG_SET (&live);
17428 }
17429
17430 /* Gcc puts the pool in the wrong place for ARM, since we can only
17431 load addresses a limited distance around the pc. We do some
17432 special munging to move the constant pool values to the correct
17433 point in the code. */
17434 static void
17435 arm_reorg (void)
17436 {
17437 rtx_insn *insn;
17438 HOST_WIDE_INT address = 0;
17439 Mfix * fix;
17440
17441 if (use_cmse)
17442 cmse_nonsecure_call_clear_caller_saved ();
17443 if (TARGET_THUMB1)
17444 thumb1_reorg ();
17445 else if (TARGET_THUMB2)
17446 thumb2_reorg ();
17447
17448 /* Ensure all insns that must be split have been split at this point.
17449 Otherwise, the pool placement code below may compute incorrect
17450 insn lengths. Note that when optimizing, all insns have already
17451 been split at this point. */
17452 if (!optimize)
17453 split_all_insns_noflow ();
17454
17455 /* Make sure we do not attempt to create a literal pool even though it should
17456 no longer be necessary to create any. */
17457 if (arm_disable_literal_pool)
17458 return ;
17459
17460 minipool_fix_head = minipool_fix_tail = NULL;
17461
17462 /* The first insn must always be a note, or the code below won't
17463 scan it properly. */
17464 insn = get_insns ();
17465 gcc_assert (NOTE_P (insn));
17466 minipool_pad = 0;
17467
17468 /* Scan all the insns and record the operands that will need fixing. */
17469 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17470 {
17471 if (BARRIER_P (insn))
17472 push_minipool_barrier (insn, address);
17473 else if (INSN_P (insn))
17474 {
17475 rtx_jump_table_data *table;
17476
17477 note_invalid_constants (insn, address, true);
17478 address += get_attr_length (insn);
17479
17480 /* If the insn is a vector jump, add the size of the table
17481 and skip the table. */
17482 if (tablejump_p (insn, NULL, &table))
17483 {
17484 address += get_jump_table_size (table);
17485 insn = table;
17486 }
17487 }
17488 else if (LABEL_P (insn))
17489 /* Add the worst-case padding due to alignment. We don't add
17490 the _current_ padding because the minipool insertions
17491 themselves might change it. */
17492 address += get_label_padding (insn);
17493 }
17494
17495 fix = minipool_fix_head;
17496
17497 /* Now scan the fixups and perform the required changes. */
17498 while (fix)
17499 {
17500 Mfix * ftmp;
17501 Mfix * fdel;
17502 Mfix * last_added_fix;
17503 Mfix * last_barrier = NULL;
17504 Mfix * this_fix;
17505
17506 /* Skip any further barriers before the next fix. */
17507 while (fix && BARRIER_P (fix->insn))
17508 fix = fix->next;
17509
17510 /* No more fixes. */
17511 if (fix == NULL)
17512 break;
17513
17514 last_added_fix = NULL;
17515
17516 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17517 {
17518 if (BARRIER_P (ftmp->insn))
17519 {
17520 if (ftmp->address >= minipool_vector_head->max_address)
17521 break;
17522
17523 last_barrier = ftmp;
17524 }
17525 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17526 break;
17527
17528 last_added_fix = ftmp; /* Keep track of the last fix added. */
17529 }
17530
17531 /* If we found a barrier, drop back to that; any fixes that we
17532 could have reached but come after the barrier will now go in
17533 the next mini-pool. */
17534 if (last_barrier != NULL)
17535 {
17536 /* Reduce the refcount for those fixes that won't go into this
17537 pool after all. */
17538 for (fdel = last_barrier->next;
17539 fdel && fdel != ftmp;
17540 fdel = fdel->next)
17541 {
17542 fdel->minipool->refcount--;
17543 fdel->minipool = NULL;
17544 }
17545
17546 ftmp = last_barrier;
17547 }
17548 else
17549 {
17550 /* ftmp is first fix that we can't fit into this pool and
17551 there no natural barriers that we could use. Insert a
17552 new barrier in the code somewhere between the previous
17553 fix and this one, and arrange to jump around it. */
17554 HOST_WIDE_INT max_address;
17555
17556 /* The last item on the list of fixes must be a barrier, so
17557 we can never run off the end of the list of fixes without
17558 last_barrier being set. */
17559 gcc_assert (ftmp);
17560
17561 max_address = minipool_vector_head->max_address;
17562 /* Check that there isn't another fix that is in range that
17563 we couldn't fit into this pool because the pool was
17564 already too large: we need to put the pool before such an
17565 instruction. The pool itself may come just after the
17566 fix because create_fix_barrier also allows space for a
17567 jump instruction. */
17568 if (ftmp->address < max_address)
17569 max_address = ftmp->address + 1;
17570
17571 last_barrier = create_fix_barrier (last_added_fix, max_address);
17572 }
17573
17574 assign_minipool_offsets (last_barrier);
17575
17576 while (ftmp)
17577 {
17578 if (!BARRIER_P (ftmp->insn)
17579 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17580 == NULL))
17581 break;
17582
17583 ftmp = ftmp->next;
17584 }
17585
17586 /* Scan over the fixes we have identified for this pool, fixing them
17587 up and adding the constants to the pool itself. */
17588 for (this_fix = fix; this_fix && ftmp != this_fix;
17589 this_fix = this_fix->next)
17590 if (!BARRIER_P (this_fix->insn))
17591 {
17592 rtx addr
17593 = plus_constant (Pmode,
17594 gen_rtx_LABEL_REF (VOIDmode,
17595 minipool_vector_label),
17596 this_fix->minipool->offset);
17597 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17598 }
17599
17600 dump_minipool (last_barrier->insn);
17601 fix = ftmp;
17602 }
17603
17604 /* From now on we must synthesize any constants that we can't handle
17605 directly. This can happen if the RTL gets split during final
17606 instruction generation. */
17607 cfun->machine->after_arm_reorg = 1;
17608
17609 /* Free the minipool memory. */
17610 obstack_free (&minipool_obstack, minipool_startobj);
17611 }
17612 \f
17613 /* Routines to output assembly language. */
17614
17615 /* Return string representation of passed in real value. */
17616 static const char *
17617 fp_const_from_val (REAL_VALUE_TYPE *r)
17618 {
17619 if (!fp_consts_inited)
17620 init_fp_table ();
17621
17622 gcc_assert (real_equal (r, &value_fp0));
17623 return "0";
17624 }
17625
17626 /* OPERANDS[0] is the entire list of insns that constitute pop,
17627 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17628 is in the list, UPDATE is true iff the list contains explicit
17629 update of base register. */
17630 void
17631 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17632 bool update)
17633 {
17634 int i;
17635 char pattern[100];
17636 int offset;
17637 const char *conditional;
17638 int num_saves = XVECLEN (operands[0], 0);
17639 unsigned int regno;
17640 unsigned int regno_base = REGNO (operands[1]);
17641 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17642
17643 offset = 0;
17644 offset += update ? 1 : 0;
17645 offset += return_pc ? 1 : 0;
17646
17647 /* Is the base register in the list? */
17648 for (i = offset; i < num_saves; i++)
17649 {
17650 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17651 /* If SP is in the list, then the base register must be SP. */
17652 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17653 /* If base register is in the list, there must be no explicit update. */
17654 if (regno == regno_base)
17655 gcc_assert (!update);
17656 }
17657
17658 conditional = reverse ? "%?%D0" : "%?%d0";
17659 /* Can't use POP if returning from an interrupt. */
17660 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17661 sprintf (pattern, "pop%s\t{", conditional);
17662 else
17663 {
17664 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17665 It's just a convention, their semantics are identical. */
17666 if (regno_base == SP_REGNUM)
17667 sprintf (pattern, "ldmfd%s\t", conditional);
17668 else if (update)
17669 sprintf (pattern, "ldmia%s\t", conditional);
17670 else
17671 sprintf (pattern, "ldm%s\t", conditional);
17672
17673 strcat (pattern, reg_names[regno_base]);
17674 if (update)
17675 strcat (pattern, "!, {");
17676 else
17677 strcat (pattern, ", {");
17678 }
17679
17680 /* Output the first destination register. */
17681 strcat (pattern,
17682 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17683
17684 /* Output the rest of the destination registers. */
17685 for (i = offset + 1; i < num_saves; i++)
17686 {
17687 strcat (pattern, ", ");
17688 strcat (pattern,
17689 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17690 }
17691
17692 strcat (pattern, "}");
17693
17694 if (interrupt_p && return_pc)
17695 strcat (pattern, "^");
17696
17697 output_asm_insn (pattern, &cond);
17698 }
17699
17700
17701 /* Output the assembly for a store multiple. */
17702
17703 const char *
17704 vfp_output_vstmd (rtx * operands)
17705 {
17706 char pattern[100];
17707 int p;
17708 int base;
17709 int i;
17710 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17711 ? XEXP (operands[0], 0)
17712 : XEXP (XEXP (operands[0], 0), 0);
17713 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17714
17715 if (push_p)
17716 strcpy (pattern, "vpush%?.64\t{%P1");
17717 else
17718 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17719
17720 p = strlen (pattern);
17721
17722 gcc_assert (REG_P (operands[1]));
17723
17724 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17725 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17726 {
17727 p += sprintf (&pattern[p], ", d%d", base + i);
17728 }
17729 strcpy (&pattern[p], "}");
17730
17731 output_asm_insn (pattern, operands);
17732 return "";
17733 }
17734
17735
17736 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17737 number of bytes pushed. */
17738
17739 static int
17740 vfp_emit_fstmd (int base_reg, int count)
17741 {
17742 rtx par;
17743 rtx dwarf;
17744 rtx tmp, reg;
17745 int i;
17746
17747 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17748 register pairs are stored by a store multiple insn. We avoid this
17749 by pushing an extra pair. */
17750 if (count == 2 && !arm_arch6)
17751 {
17752 if (base_reg == LAST_VFP_REGNUM - 3)
17753 base_reg -= 2;
17754 count++;
17755 }
17756
17757 /* FSTMD may not store more than 16 doubleword registers at once. Split
17758 larger stores into multiple parts (up to a maximum of two, in
17759 practice). */
17760 if (count > 16)
17761 {
17762 int saved;
17763 /* NOTE: base_reg is an internal register number, so each D register
17764 counts as 2. */
17765 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17766 saved += vfp_emit_fstmd (base_reg, 16);
17767 return saved;
17768 }
17769
17770 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17771 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17772
17773 reg = gen_rtx_REG (DFmode, base_reg);
17774 base_reg += 2;
17775
17776 XVECEXP (par, 0, 0)
17777 = gen_rtx_SET (gen_frame_mem
17778 (BLKmode,
17779 gen_rtx_PRE_MODIFY (Pmode,
17780 stack_pointer_rtx,
17781 plus_constant
17782 (Pmode, stack_pointer_rtx,
17783 - (count * 8)))
17784 ),
17785 gen_rtx_UNSPEC (BLKmode,
17786 gen_rtvec (1, reg),
17787 UNSPEC_PUSH_MULT));
17788
17789 tmp = gen_rtx_SET (stack_pointer_rtx,
17790 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17791 RTX_FRAME_RELATED_P (tmp) = 1;
17792 XVECEXP (dwarf, 0, 0) = tmp;
17793
17794 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17795 RTX_FRAME_RELATED_P (tmp) = 1;
17796 XVECEXP (dwarf, 0, 1) = tmp;
17797
17798 for (i = 1; i < count; i++)
17799 {
17800 reg = gen_rtx_REG (DFmode, base_reg);
17801 base_reg += 2;
17802 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17803
17804 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17805 plus_constant (Pmode,
17806 stack_pointer_rtx,
17807 i * 8)),
17808 reg);
17809 RTX_FRAME_RELATED_P (tmp) = 1;
17810 XVECEXP (dwarf, 0, i + 1) = tmp;
17811 }
17812
17813 par = emit_insn (par);
17814 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17815 RTX_FRAME_RELATED_P (par) = 1;
17816
17817 return count * 8;
17818 }
17819
17820 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17821 has the cmse_nonsecure_call attribute and returns false otherwise. */
17822
17823 bool
17824 detect_cmse_nonsecure_call (tree addr)
17825 {
17826 if (!addr)
17827 return FALSE;
17828
17829 tree fntype = TREE_TYPE (addr);
17830 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17831 TYPE_ATTRIBUTES (fntype)))
17832 return TRUE;
17833 return FALSE;
17834 }
17835
17836
17837 /* Emit a call instruction with pattern PAT. ADDR is the address of
17838 the call target. */
17839
17840 void
17841 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17842 {
17843 rtx insn;
17844
17845 insn = emit_call_insn (pat);
17846
17847 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17848 If the call might use such an entry, add a use of the PIC register
17849 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17850 if (TARGET_VXWORKS_RTP
17851 && flag_pic
17852 && !sibcall
17853 && GET_CODE (addr) == SYMBOL_REF
17854 && (SYMBOL_REF_DECL (addr)
17855 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17856 : !SYMBOL_REF_LOCAL_P (addr)))
17857 {
17858 require_pic_register ();
17859 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17860 }
17861
17862 if (TARGET_AAPCS_BASED)
17863 {
17864 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17865 linker. We need to add an IP clobber to allow setting
17866 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17867 is not needed since it's a fixed register. */
17868 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17869 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17870 }
17871 }
17872
17873 /* Output a 'call' insn. */
17874 const char *
17875 output_call (rtx *operands)
17876 {
17877 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17878
17879 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17880 if (REGNO (operands[0]) == LR_REGNUM)
17881 {
17882 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17883 output_asm_insn ("mov%?\t%0, %|lr", operands);
17884 }
17885
17886 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17887
17888 if (TARGET_INTERWORK || arm_arch4t)
17889 output_asm_insn ("bx%?\t%0", operands);
17890 else
17891 output_asm_insn ("mov%?\t%|pc, %0", operands);
17892
17893 return "";
17894 }
17895
17896 /* Output a move from arm registers to arm registers of a long double
17897 OPERANDS[0] is the destination.
17898 OPERANDS[1] is the source. */
17899 const char *
17900 output_mov_long_double_arm_from_arm (rtx *operands)
17901 {
17902 /* We have to be careful here because the two might overlap. */
17903 int dest_start = REGNO (operands[0]);
17904 int src_start = REGNO (operands[1]);
17905 rtx ops[2];
17906 int i;
17907
17908 if (dest_start < src_start)
17909 {
17910 for (i = 0; i < 3; i++)
17911 {
17912 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17913 ops[1] = gen_rtx_REG (SImode, src_start + i);
17914 output_asm_insn ("mov%?\t%0, %1", ops);
17915 }
17916 }
17917 else
17918 {
17919 for (i = 2; i >= 0; i--)
17920 {
17921 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17922 ops[1] = gen_rtx_REG (SImode, src_start + i);
17923 output_asm_insn ("mov%?\t%0, %1", ops);
17924 }
17925 }
17926
17927 return "";
17928 }
17929
17930 void
17931 arm_emit_movpair (rtx dest, rtx src)
17932 {
17933 /* If the src is an immediate, simplify it. */
17934 if (CONST_INT_P (src))
17935 {
17936 HOST_WIDE_INT val = INTVAL (src);
17937 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17938 if ((val >> 16) & 0x0000ffff)
17939 {
17940 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17941 GEN_INT (16)),
17942 GEN_INT ((val >> 16) & 0x0000ffff));
17943 rtx_insn *insn = get_last_insn ();
17944 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17945 }
17946 return;
17947 }
17948 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17949 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17950 rtx_insn *insn = get_last_insn ();
17951 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17952 }
17953
17954 /* Output a move between double words. It must be REG<-MEM
17955 or MEM<-REG. */
17956 const char *
17957 output_move_double (rtx *operands, bool emit, int *count)
17958 {
17959 enum rtx_code code0 = GET_CODE (operands[0]);
17960 enum rtx_code code1 = GET_CODE (operands[1]);
17961 rtx otherops[3];
17962 if (count)
17963 *count = 1;
17964
17965 /* The only case when this might happen is when
17966 you are looking at the length of a DImode instruction
17967 that has an invalid constant in it. */
17968 if (code0 == REG && code1 != MEM)
17969 {
17970 gcc_assert (!emit);
17971 *count = 2;
17972 return "";
17973 }
17974
17975 if (code0 == REG)
17976 {
17977 unsigned int reg0 = REGNO (operands[0]);
17978
17979 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17980
17981 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17982
17983 switch (GET_CODE (XEXP (operands[1], 0)))
17984 {
17985 case REG:
17986
17987 if (emit)
17988 {
17989 if (TARGET_LDRD
17990 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17991 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17992 else
17993 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17994 }
17995 break;
17996
17997 case PRE_INC:
17998 gcc_assert (TARGET_LDRD);
17999 if (emit)
18000 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18001 break;
18002
18003 case PRE_DEC:
18004 if (emit)
18005 {
18006 if (TARGET_LDRD)
18007 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18008 else
18009 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18010 }
18011 break;
18012
18013 case POST_INC:
18014 if (emit)
18015 {
18016 if (TARGET_LDRD)
18017 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18018 else
18019 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18020 }
18021 break;
18022
18023 case POST_DEC:
18024 gcc_assert (TARGET_LDRD);
18025 if (emit)
18026 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18027 break;
18028
18029 case PRE_MODIFY:
18030 case POST_MODIFY:
18031 /* Autoicrement addressing modes should never have overlapping
18032 base and destination registers, and overlapping index registers
18033 are already prohibited, so this doesn't need to worry about
18034 fix_cm3_ldrd. */
18035 otherops[0] = operands[0];
18036 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18037 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18038
18039 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18040 {
18041 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18042 {
18043 /* Registers overlap so split out the increment. */
18044 if (emit)
18045 {
18046 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18047 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18048 }
18049 if (count)
18050 *count = 2;
18051 }
18052 else
18053 {
18054 /* Use a single insn if we can.
18055 FIXME: IWMMXT allows offsets larger than ldrd can
18056 handle, fix these up with a pair of ldr. */
18057 if (TARGET_THUMB2
18058 || !CONST_INT_P (otherops[2])
18059 || (INTVAL (otherops[2]) > -256
18060 && INTVAL (otherops[2]) < 256))
18061 {
18062 if (emit)
18063 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18064 }
18065 else
18066 {
18067 if (emit)
18068 {
18069 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18070 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18071 }
18072 if (count)
18073 *count = 2;
18074
18075 }
18076 }
18077 }
18078 else
18079 {
18080 /* Use a single insn if we can.
18081 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18082 fix these up with a pair of ldr. */
18083 if (TARGET_THUMB2
18084 || !CONST_INT_P (otherops[2])
18085 || (INTVAL (otherops[2]) > -256
18086 && INTVAL (otherops[2]) < 256))
18087 {
18088 if (emit)
18089 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18090 }
18091 else
18092 {
18093 if (emit)
18094 {
18095 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18096 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18097 }
18098 if (count)
18099 *count = 2;
18100 }
18101 }
18102 break;
18103
18104 case LABEL_REF:
18105 case CONST:
18106 /* We might be able to use ldrd %0, %1 here. However the range is
18107 different to ldr/adr, and it is broken on some ARMv7-M
18108 implementations. */
18109 /* Use the second register of the pair to avoid problematic
18110 overlap. */
18111 otherops[1] = operands[1];
18112 if (emit)
18113 output_asm_insn ("adr%?\t%0, %1", otherops);
18114 operands[1] = otherops[0];
18115 if (emit)
18116 {
18117 if (TARGET_LDRD)
18118 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18119 else
18120 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18121 }
18122
18123 if (count)
18124 *count = 2;
18125 break;
18126
18127 /* ??? This needs checking for thumb2. */
18128 default:
18129 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18130 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18131 {
18132 otherops[0] = operands[0];
18133 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18134 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18135
18136 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18137 {
18138 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18139 {
18140 switch ((int) INTVAL (otherops[2]))
18141 {
18142 case -8:
18143 if (emit)
18144 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18145 return "";
18146 case -4:
18147 if (TARGET_THUMB2)
18148 break;
18149 if (emit)
18150 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18151 return "";
18152 case 4:
18153 if (TARGET_THUMB2)
18154 break;
18155 if (emit)
18156 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18157 return "";
18158 }
18159 }
18160 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18161 operands[1] = otherops[0];
18162 if (TARGET_LDRD
18163 && (REG_P (otherops[2])
18164 || TARGET_THUMB2
18165 || (CONST_INT_P (otherops[2])
18166 && INTVAL (otherops[2]) > -256
18167 && INTVAL (otherops[2]) < 256)))
18168 {
18169 if (reg_overlap_mentioned_p (operands[0],
18170 otherops[2]))
18171 {
18172 /* Swap base and index registers over to
18173 avoid a conflict. */
18174 std::swap (otherops[1], otherops[2]);
18175 }
18176 /* If both registers conflict, it will usually
18177 have been fixed by a splitter. */
18178 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18179 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18180 {
18181 if (emit)
18182 {
18183 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18184 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18185 }
18186 if (count)
18187 *count = 2;
18188 }
18189 else
18190 {
18191 otherops[0] = operands[0];
18192 if (emit)
18193 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18194 }
18195 return "";
18196 }
18197
18198 if (CONST_INT_P (otherops[2]))
18199 {
18200 if (emit)
18201 {
18202 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18203 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18204 else
18205 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18206 }
18207 }
18208 else
18209 {
18210 if (emit)
18211 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18212 }
18213 }
18214 else
18215 {
18216 if (emit)
18217 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18218 }
18219
18220 if (count)
18221 *count = 2;
18222
18223 if (TARGET_LDRD)
18224 return "ldrd%?\t%0, [%1]";
18225
18226 return "ldmia%?\t%1, %M0";
18227 }
18228 else
18229 {
18230 otherops[1] = adjust_address (operands[1], SImode, 4);
18231 /* Take care of overlapping base/data reg. */
18232 if (reg_mentioned_p (operands[0], operands[1]))
18233 {
18234 if (emit)
18235 {
18236 output_asm_insn ("ldr%?\t%0, %1", otherops);
18237 output_asm_insn ("ldr%?\t%0, %1", operands);
18238 }
18239 if (count)
18240 *count = 2;
18241
18242 }
18243 else
18244 {
18245 if (emit)
18246 {
18247 output_asm_insn ("ldr%?\t%0, %1", operands);
18248 output_asm_insn ("ldr%?\t%0, %1", otherops);
18249 }
18250 if (count)
18251 *count = 2;
18252 }
18253 }
18254 }
18255 }
18256 else
18257 {
18258 /* Constraints should ensure this. */
18259 gcc_assert (code0 == MEM && code1 == REG);
18260 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18261 || (TARGET_ARM && TARGET_LDRD));
18262
18263 switch (GET_CODE (XEXP (operands[0], 0)))
18264 {
18265 case REG:
18266 if (emit)
18267 {
18268 if (TARGET_LDRD)
18269 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18270 else
18271 output_asm_insn ("stm%?\t%m0, %M1", operands);
18272 }
18273 break;
18274
18275 case PRE_INC:
18276 gcc_assert (TARGET_LDRD);
18277 if (emit)
18278 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18279 break;
18280
18281 case PRE_DEC:
18282 if (emit)
18283 {
18284 if (TARGET_LDRD)
18285 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18286 else
18287 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18288 }
18289 break;
18290
18291 case POST_INC:
18292 if (emit)
18293 {
18294 if (TARGET_LDRD)
18295 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18296 else
18297 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18298 }
18299 break;
18300
18301 case POST_DEC:
18302 gcc_assert (TARGET_LDRD);
18303 if (emit)
18304 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18305 break;
18306
18307 case PRE_MODIFY:
18308 case POST_MODIFY:
18309 otherops[0] = operands[1];
18310 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18311 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18312
18313 /* IWMMXT allows offsets larger than ldrd can handle,
18314 fix these up with a pair of ldr. */
18315 if (!TARGET_THUMB2
18316 && CONST_INT_P (otherops[2])
18317 && (INTVAL(otherops[2]) <= -256
18318 || INTVAL(otherops[2]) >= 256))
18319 {
18320 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18321 {
18322 if (emit)
18323 {
18324 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18325 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18326 }
18327 if (count)
18328 *count = 2;
18329 }
18330 else
18331 {
18332 if (emit)
18333 {
18334 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18335 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18336 }
18337 if (count)
18338 *count = 2;
18339 }
18340 }
18341 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18342 {
18343 if (emit)
18344 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18345 }
18346 else
18347 {
18348 if (emit)
18349 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18350 }
18351 break;
18352
18353 case PLUS:
18354 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18355 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18356 {
18357 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18358 {
18359 case -8:
18360 if (emit)
18361 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18362 return "";
18363
18364 case -4:
18365 if (TARGET_THUMB2)
18366 break;
18367 if (emit)
18368 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18369 return "";
18370
18371 case 4:
18372 if (TARGET_THUMB2)
18373 break;
18374 if (emit)
18375 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18376 return "";
18377 }
18378 }
18379 if (TARGET_LDRD
18380 && (REG_P (otherops[2])
18381 || TARGET_THUMB2
18382 || (CONST_INT_P (otherops[2])
18383 && INTVAL (otherops[2]) > -256
18384 && INTVAL (otherops[2]) < 256)))
18385 {
18386 otherops[0] = operands[1];
18387 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18388 if (emit)
18389 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18390 return "";
18391 }
18392 /* Fall through */
18393
18394 default:
18395 otherops[0] = adjust_address (operands[0], SImode, 4);
18396 otherops[1] = operands[1];
18397 if (emit)
18398 {
18399 output_asm_insn ("str%?\t%1, %0", operands);
18400 output_asm_insn ("str%?\t%H1, %0", otherops);
18401 }
18402 if (count)
18403 *count = 2;
18404 }
18405 }
18406
18407 return "";
18408 }
18409
18410 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18411 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18412
18413 const char *
18414 output_move_quad (rtx *operands)
18415 {
18416 if (REG_P (operands[0]))
18417 {
18418 /* Load, or reg->reg move. */
18419
18420 if (MEM_P (operands[1]))
18421 {
18422 switch (GET_CODE (XEXP (operands[1], 0)))
18423 {
18424 case REG:
18425 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18426 break;
18427
18428 case LABEL_REF:
18429 case CONST:
18430 output_asm_insn ("adr%?\t%0, %1", operands);
18431 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18432 break;
18433
18434 default:
18435 gcc_unreachable ();
18436 }
18437 }
18438 else
18439 {
18440 rtx ops[2];
18441 int dest, src, i;
18442
18443 gcc_assert (REG_P (operands[1]));
18444
18445 dest = REGNO (operands[0]);
18446 src = REGNO (operands[1]);
18447
18448 /* This seems pretty dumb, but hopefully GCC won't try to do it
18449 very often. */
18450 if (dest < src)
18451 for (i = 0; i < 4; i++)
18452 {
18453 ops[0] = gen_rtx_REG (SImode, dest + i);
18454 ops[1] = gen_rtx_REG (SImode, src + i);
18455 output_asm_insn ("mov%?\t%0, %1", ops);
18456 }
18457 else
18458 for (i = 3; i >= 0; i--)
18459 {
18460 ops[0] = gen_rtx_REG (SImode, dest + i);
18461 ops[1] = gen_rtx_REG (SImode, src + i);
18462 output_asm_insn ("mov%?\t%0, %1", ops);
18463 }
18464 }
18465 }
18466 else
18467 {
18468 gcc_assert (MEM_P (operands[0]));
18469 gcc_assert (REG_P (operands[1]));
18470 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18471
18472 switch (GET_CODE (XEXP (operands[0], 0)))
18473 {
18474 case REG:
18475 output_asm_insn ("stm%?\t%m0, %M1", operands);
18476 break;
18477
18478 default:
18479 gcc_unreachable ();
18480 }
18481 }
18482
18483 return "";
18484 }
18485
18486 /* Output a VFP load or store instruction. */
18487
18488 const char *
18489 output_move_vfp (rtx *operands)
18490 {
18491 rtx reg, mem, addr, ops[2];
18492 int load = REG_P (operands[0]);
18493 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18494 int sp = (!TARGET_VFP_FP16INST
18495 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18496 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18497 const char *templ;
18498 char buff[50];
18499 machine_mode mode;
18500
18501 reg = operands[!load];
18502 mem = operands[load];
18503
18504 mode = GET_MODE (reg);
18505
18506 gcc_assert (REG_P (reg));
18507 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18508 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18509 || mode == SFmode
18510 || mode == DFmode
18511 || mode == HImode
18512 || mode == SImode
18513 || mode == DImode
18514 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18515 gcc_assert (MEM_P (mem));
18516
18517 addr = XEXP (mem, 0);
18518
18519 switch (GET_CODE (addr))
18520 {
18521 case PRE_DEC:
18522 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18523 ops[0] = XEXP (addr, 0);
18524 ops[1] = reg;
18525 break;
18526
18527 case POST_INC:
18528 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18529 ops[0] = XEXP (addr, 0);
18530 ops[1] = reg;
18531 break;
18532
18533 default:
18534 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18535 ops[0] = reg;
18536 ops[1] = mem;
18537 break;
18538 }
18539
18540 sprintf (buff, templ,
18541 load ? "ld" : "st",
18542 dp ? "64" : sp ? "32" : "16",
18543 dp ? "P" : "",
18544 integer_p ? "\t%@ int" : "");
18545 output_asm_insn (buff, ops);
18546
18547 return "";
18548 }
18549
18550 /* Output a Neon double-word or quad-word load or store, or a load
18551 or store for larger structure modes.
18552
18553 WARNING: The ordering of elements is weird in big-endian mode,
18554 because the EABI requires that vectors stored in memory appear
18555 as though they were stored by a VSTM, as required by the EABI.
18556 GCC RTL defines element ordering based on in-memory order.
18557 This can be different from the architectural ordering of elements
18558 within a NEON register. The intrinsics defined in arm_neon.h use the
18559 NEON register element ordering, not the GCC RTL element ordering.
18560
18561 For example, the in-memory ordering of a big-endian a quadword
18562 vector with 16-bit elements when stored from register pair {d0,d1}
18563 will be (lowest address first, d0[N] is NEON register element N):
18564
18565 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18566
18567 When necessary, quadword registers (dN, dN+1) are moved to ARM
18568 registers from rN in the order:
18569
18570 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18571
18572 So that STM/LDM can be used on vectors in ARM registers, and the
18573 same memory layout will result as if VSTM/VLDM were used.
18574
18575 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18576 possible, which allows use of appropriate alignment tags.
18577 Note that the choice of "64" is independent of the actual vector
18578 element size; this size simply ensures that the behavior is
18579 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18580
18581 Due to limitations of those instructions, use of VST1.64/VLD1.64
18582 is not possible if:
18583 - the address contains PRE_DEC, or
18584 - the mode refers to more than 4 double-word registers
18585
18586 In those cases, it would be possible to replace VSTM/VLDM by a
18587 sequence of instructions; this is not currently implemented since
18588 this is not certain to actually improve performance. */
18589
18590 const char *
18591 output_move_neon (rtx *operands)
18592 {
18593 rtx reg, mem, addr, ops[2];
18594 int regno, nregs, load = REG_P (operands[0]);
18595 const char *templ;
18596 char buff[50];
18597 machine_mode mode;
18598
18599 reg = operands[!load];
18600 mem = operands[load];
18601
18602 mode = GET_MODE (reg);
18603
18604 gcc_assert (REG_P (reg));
18605 regno = REGNO (reg);
18606 nregs = REG_NREGS (reg) / 2;
18607 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18608 || NEON_REGNO_OK_FOR_QUAD (regno));
18609 gcc_assert (VALID_NEON_DREG_MODE (mode)
18610 || VALID_NEON_QREG_MODE (mode)
18611 || VALID_NEON_STRUCT_MODE (mode));
18612 gcc_assert (MEM_P (mem));
18613
18614 addr = XEXP (mem, 0);
18615
18616 /* Strip off const from addresses like (const (plus (...))). */
18617 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18618 addr = XEXP (addr, 0);
18619
18620 switch (GET_CODE (addr))
18621 {
18622 case POST_INC:
18623 /* We have to use vldm / vstm for too-large modes. */
18624 if (nregs > 4)
18625 {
18626 templ = "v%smia%%?\t%%0!, %%h1";
18627 ops[0] = XEXP (addr, 0);
18628 }
18629 else
18630 {
18631 templ = "v%s1.64\t%%h1, %%A0";
18632 ops[0] = mem;
18633 }
18634 ops[1] = reg;
18635 break;
18636
18637 case PRE_DEC:
18638 /* We have to use vldm / vstm in this case, since there is no
18639 pre-decrement form of the vld1 / vst1 instructions. */
18640 templ = "v%smdb%%?\t%%0!, %%h1";
18641 ops[0] = XEXP (addr, 0);
18642 ops[1] = reg;
18643 break;
18644
18645 case POST_MODIFY:
18646 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18647 gcc_unreachable ();
18648
18649 case REG:
18650 /* We have to use vldm / vstm for too-large modes. */
18651 if (nregs > 1)
18652 {
18653 if (nregs > 4)
18654 templ = "v%smia%%?\t%%m0, %%h1";
18655 else
18656 templ = "v%s1.64\t%%h1, %%A0";
18657
18658 ops[0] = mem;
18659 ops[1] = reg;
18660 break;
18661 }
18662 /* Fall through. */
18663 case LABEL_REF:
18664 case PLUS:
18665 {
18666 int i;
18667 int overlap = -1;
18668 for (i = 0; i < nregs; i++)
18669 {
18670 /* We're only using DImode here because it's a convenient size. */
18671 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18672 ops[1] = adjust_address (mem, DImode, 8 * i);
18673 if (reg_overlap_mentioned_p (ops[0], mem))
18674 {
18675 gcc_assert (overlap == -1);
18676 overlap = i;
18677 }
18678 else
18679 {
18680 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18681 output_asm_insn (buff, ops);
18682 }
18683 }
18684 if (overlap != -1)
18685 {
18686 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18687 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18688 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18689 output_asm_insn (buff, ops);
18690 }
18691
18692 return "";
18693 }
18694
18695 default:
18696 gcc_unreachable ();
18697 }
18698
18699 sprintf (buff, templ, load ? "ld" : "st");
18700 output_asm_insn (buff, ops);
18701
18702 return "";
18703 }
18704
18705 /* Compute and return the length of neon_mov<mode>, where <mode> is
18706 one of VSTRUCT modes: EI, OI, CI or XI. */
18707 int
18708 arm_attr_length_move_neon (rtx_insn *insn)
18709 {
18710 rtx reg, mem, addr;
18711 int load;
18712 machine_mode mode;
18713
18714 extract_insn_cached (insn);
18715
18716 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18717 {
18718 mode = GET_MODE (recog_data.operand[0]);
18719 switch (mode)
18720 {
18721 case E_EImode:
18722 case E_OImode:
18723 return 8;
18724 case E_CImode:
18725 return 12;
18726 case E_XImode:
18727 return 16;
18728 default:
18729 gcc_unreachable ();
18730 }
18731 }
18732
18733 load = REG_P (recog_data.operand[0]);
18734 reg = recog_data.operand[!load];
18735 mem = recog_data.operand[load];
18736
18737 gcc_assert (MEM_P (mem));
18738
18739 addr = XEXP (mem, 0);
18740
18741 /* Strip off const from addresses like (const (plus (...))). */
18742 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18743 addr = XEXP (addr, 0);
18744
18745 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18746 {
18747 int insns = REG_NREGS (reg) / 2;
18748 return insns * 4;
18749 }
18750 else
18751 return 4;
18752 }
18753
18754 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18755 return zero. */
18756
18757 int
18758 arm_address_offset_is_imm (rtx_insn *insn)
18759 {
18760 rtx mem, addr;
18761
18762 extract_insn_cached (insn);
18763
18764 if (REG_P (recog_data.operand[0]))
18765 return 0;
18766
18767 mem = recog_data.operand[0];
18768
18769 gcc_assert (MEM_P (mem));
18770
18771 addr = XEXP (mem, 0);
18772
18773 if (REG_P (addr)
18774 || (GET_CODE (addr) == PLUS
18775 && REG_P (XEXP (addr, 0))
18776 && CONST_INT_P (XEXP (addr, 1))))
18777 return 1;
18778 else
18779 return 0;
18780 }
18781
18782 /* Output an ADD r, s, #n where n may be too big for one instruction.
18783 If adding zero to one register, output nothing. */
18784 const char *
18785 output_add_immediate (rtx *operands)
18786 {
18787 HOST_WIDE_INT n = INTVAL (operands[2]);
18788
18789 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18790 {
18791 if (n < 0)
18792 output_multi_immediate (operands,
18793 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18794 -n);
18795 else
18796 output_multi_immediate (operands,
18797 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18798 n);
18799 }
18800
18801 return "";
18802 }
18803
18804 /* Output a multiple immediate operation.
18805 OPERANDS is the vector of operands referred to in the output patterns.
18806 INSTR1 is the output pattern to use for the first constant.
18807 INSTR2 is the output pattern to use for subsequent constants.
18808 IMMED_OP is the index of the constant slot in OPERANDS.
18809 N is the constant value. */
18810 static const char *
18811 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18812 int immed_op, HOST_WIDE_INT n)
18813 {
18814 #if HOST_BITS_PER_WIDE_INT > 32
18815 n &= 0xffffffff;
18816 #endif
18817
18818 if (n == 0)
18819 {
18820 /* Quick and easy output. */
18821 operands[immed_op] = const0_rtx;
18822 output_asm_insn (instr1, operands);
18823 }
18824 else
18825 {
18826 int i;
18827 const char * instr = instr1;
18828
18829 /* Note that n is never zero here (which would give no output). */
18830 for (i = 0; i < 32; i += 2)
18831 {
18832 if (n & (3 << i))
18833 {
18834 operands[immed_op] = GEN_INT (n & (255 << i));
18835 output_asm_insn (instr, operands);
18836 instr = instr2;
18837 i += 6;
18838 }
18839 }
18840 }
18841
18842 return "";
18843 }
18844
18845 /* Return the name of a shifter operation. */
18846 static const char *
18847 arm_shift_nmem(enum rtx_code code)
18848 {
18849 switch (code)
18850 {
18851 case ASHIFT:
18852 return ARM_LSL_NAME;
18853
18854 case ASHIFTRT:
18855 return "asr";
18856
18857 case LSHIFTRT:
18858 return "lsr";
18859
18860 case ROTATERT:
18861 return "ror";
18862
18863 default:
18864 abort();
18865 }
18866 }
18867
18868 /* Return the appropriate ARM instruction for the operation code.
18869 The returned result should not be overwritten. OP is the rtx of the
18870 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18871 was shifted. */
18872 const char *
18873 arithmetic_instr (rtx op, int shift_first_arg)
18874 {
18875 switch (GET_CODE (op))
18876 {
18877 case PLUS:
18878 return "add";
18879
18880 case MINUS:
18881 return shift_first_arg ? "rsb" : "sub";
18882
18883 case IOR:
18884 return "orr";
18885
18886 case XOR:
18887 return "eor";
18888
18889 case AND:
18890 return "and";
18891
18892 case ASHIFT:
18893 case ASHIFTRT:
18894 case LSHIFTRT:
18895 case ROTATERT:
18896 return arm_shift_nmem(GET_CODE(op));
18897
18898 default:
18899 gcc_unreachable ();
18900 }
18901 }
18902
18903 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18904 for the operation code. The returned result should not be overwritten.
18905 OP is the rtx code of the shift.
18906 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18907 shift. */
18908 static const char *
18909 shift_op (rtx op, HOST_WIDE_INT *amountp)
18910 {
18911 const char * mnem;
18912 enum rtx_code code = GET_CODE (op);
18913
18914 switch (code)
18915 {
18916 case ROTATE:
18917 if (!CONST_INT_P (XEXP (op, 1)))
18918 {
18919 output_operand_lossage ("invalid shift operand");
18920 return NULL;
18921 }
18922
18923 code = ROTATERT;
18924 *amountp = 32 - INTVAL (XEXP (op, 1));
18925 mnem = "ror";
18926 break;
18927
18928 case ASHIFT:
18929 case ASHIFTRT:
18930 case LSHIFTRT:
18931 case ROTATERT:
18932 mnem = arm_shift_nmem(code);
18933 if (CONST_INT_P (XEXP (op, 1)))
18934 {
18935 *amountp = INTVAL (XEXP (op, 1));
18936 }
18937 else if (REG_P (XEXP (op, 1)))
18938 {
18939 *amountp = -1;
18940 return mnem;
18941 }
18942 else
18943 {
18944 output_operand_lossage ("invalid shift operand");
18945 return NULL;
18946 }
18947 break;
18948
18949 case MULT:
18950 /* We never have to worry about the amount being other than a
18951 power of 2, since this case can never be reloaded from a reg. */
18952 if (!CONST_INT_P (XEXP (op, 1)))
18953 {
18954 output_operand_lossage ("invalid shift operand");
18955 return NULL;
18956 }
18957
18958 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18959
18960 /* Amount must be a power of two. */
18961 if (*amountp & (*amountp - 1))
18962 {
18963 output_operand_lossage ("invalid shift operand");
18964 return NULL;
18965 }
18966
18967 *amountp = exact_log2 (*amountp);
18968 gcc_assert (IN_RANGE (*amountp, 0, 31));
18969 return ARM_LSL_NAME;
18970
18971 default:
18972 output_operand_lossage ("invalid shift operand");
18973 return NULL;
18974 }
18975
18976 /* This is not 100% correct, but follows from the desire to merge
18977 multiplication by a power of 2 with the recognizer for a
18978 shift. >=32 is not a valid shift for "lsl", so we must try and
18979 output a shift that produces the correct arithmetical result.
18980 Using lsr #32 is identical except for the fact that the carry bit
18981 is not set correctly if we set the flags; but we never use the
18982 carry bit from such an operation, so we can ignore that. */
18983 if (code == ROTATERT)
18984 /* Rotate is just modulo 32. */
18985 *amountp &= 31;
18986 else if (*amountp != (*amountp & 31))
18987 {
18988 if (code == ASHIFT)
18989 mnem = "lsr";
18990 *amountp = 32;
18991 }
18992
18993 /* Shifts of 0 are no-ops. */
18994 if (*amountp == 0)
18995 return NULL;
18996
18997 return mnem;
18998 }
18999
19000 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19001 because /bin/as is horribly restrictive. The judgement about
19002 whether or not each character is 'printable' (and can be output as
19003 is) or not (and must be printed with an octal escape) must be made
19004 with reference to the *host* character set -- the situation is
19005 similar to that discussed in the comments above pp_c_char in
19006 c-pretty-print.c. */
19007
19008 #define MAX_ASCII_LEN 51
19009
19010 void
19011 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19012 {
19013 int i;
19014 int len_so_far = 0;
19015
19016 fputs ("\t.ascii\t\"", stream);
19017
19018 for (i = 0; i < len; i++)
19019 {
19020 int c = p[i];
19021
19022 if (len_so_far >= MAX_ASCII_LEN)
19023 {
19024 fputs ("\"\n\t.ascii\t\"", stream);
19025 len_so_far = 0;
19026 }
19027
19028 if (ISPRINT (c))
19029 {
19030 if (c == '\\' || c == '\"')
19031 {
19032 putc ('\\', stream);
19033 len_so_far++;
19034 }
19035 putc (c, stream);
19036 len_so_far++;
19037 }
19038 else
19039 {
19040 fprintf (stream, "\\%03o", c);
19041 len_so_far += 4;
19042 }
19043 }
19044
19045 fputs ("\"\n", stream);
19046 }
19047 \f
19048 /* Whether a register is callee saved or not. This is necessary because high
19049 registers are marked as caller saved when optimizing for size on Thumb-1
19050 targets despite being callee saved in order to avoid using them. */
19051 #define callee_saved_reg_p(reg) \
19052 (!call_used_regs[reg] \
19053 || (TARGET_THUMB1 && optimize_size \
19054 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19055
19056 /* Compute the register save mask for registers 0 through 12
19057 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19058
19059 static unsigned long
19060 arm_compute_save_reg0_reg12_mask (void)
19061 {
19062 unsigned long func_type = arm_current_func_type ();
19063 unsigned long save_reg_mask = 0;
19064 unsigned int reg;
19065
19066 if (IS_INTERRUPT (func_type))
19067 {
19068 unsigned int max_reg;
19069 /* Interrupt functions must not corrupt any registers,
19070 even call clobbered ones. If this is a leaf function
19071 we can just examine the registers used by the RTL, but
19072 otherwise we have to assume that whatever function is
19073 called might clobber anything, and so we have to save
19074 all the call-clobbered registers as well. */
19075 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19076 /* FIQ handlers have registers r8 - r12 banked, so
19077 we only need to check r0 - r7, Normal ISRs only
19078 bank r14 and r15, so we must check up to r12.
19079 r13 is the stack pointer which is always preserved,
19080 so we do not need to consider it here. */
19081 max_reg = 7;
19082 else
19083 max_reg = 12;
19084
19085 for (reg = 0; reg <= max_reg; reg++)
19086 if (df_regs_ever_live_p (reg)
19087 || (! crtl->is_leaf && call_used_regs[reg]))
19088 save_reg_mask |= (1 << reg);
19089
19090 /* Also save the pic base register if necessary. */
19091 if (flag_pic
19092 && !TARGET_SINGLE_PIC_BASE
19093 && arm_pic_register != INVALID_REGNUM
19094 && crtl->uses_pic_offset_table)
19095 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19096 }
19097 else if (IS_VOLATILE(func_type))
19098 {
19099 /* For noreturn functions we historically omitted register saves
19100 altogether. However this really messes up debugging. As a
19101 compromise save just the frame pointers. Combined with the link
19102 register saved elsewhere this should be sufficient to get
19103 a backtrace. */
19104 if (frame_pointer_needed)
19105 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19106 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19107 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19108 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19109 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19110 }
19111 else
19112 {
19113 /* In the normal case we only need to save those registers
19114 which are call saved and which are used by this function. */
19115 for (reg = 0; reg <= 11; reg++)
19116 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19117 save_reg_mask |= (1 << reg);
19118
19119 /* Handle the frame pointer as a special case. */
19120 if (frame_pointer_needed)
19121 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19122
19123 /* If we aren't loading the PIC register,
19124 don't stack it even though it may be live. */
19125 if (flag_pic
19126 && !TARGET_SINGLE_PIC_BASE
19127 && arm_pic_register != INVALID_REGNUM
19128 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19129 || crtl->uses_pic_offset_table))
19130 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19131
19132 /* The prologue will copy SP into R0, so save it. */
19133 if (IS_STACKALIGN (func_type))
19134 save_reg_mask |= 1;
19135 }
19136
19137 /* Save registers so the exception handler can modify them. */
19138 if (crtl->calls_eh_return)
19139 {
19140 unsigned int i;
19141
19142 for (i = 0; ; i++)
19143 {
19144 reg = EH_RETURN_DATA_REGNO (i);
19145 if (reg == INVALID_REGNUM)
19146 break;
19147 save_reg_mask |= 1 << reg;
19148 }
19149 }
19150
19151 return save_reg_mask;
19152 }
19153
19154 /* Return true if r3 is live at the start of the function. */
19155
19156 static bool
19157 arm_r3_live_at_start_p (void)
19158 {
19159 /* Just look at cfg info, which is still close enough to correct at this
19160 point. This gives false positives for broken functions that might use
19161 uninitialized data that happens to be allocated in r3, but who cares? */
19162 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19163 }
19164
19165 /* Compute the number of bytes used to store the static chain register on the
19166 stack, above the stack frame. We need to know this accurately to get the
19167 alignment of the rest of the stack frame correct. */
19168
19169 static int
19170 arm_compute_static_chain_stack_bytes (void)
19171 {
19172 /* See the defining assertion in arm_expand_prologue. */
19173 if (IS_NESTED (arm_current_func_type ())
19174 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19175 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19176 || flag_stack_clash_protection)
19177 && !df_regs_ever_live_p (LR_REGNUM)))
19178 && arm_r3_live_at_start_p ()
19179 && crtl->args.pretend_args_size == 0)
19180 return 4;
19181
19182 return 0;
19183 }
19184
19185 /* Compute a bit mask of which core registers need to be
19186 saved on the stack for the current function.
19187 This is used by arm_compute_frame_layout, which may add extra registers. */
19188
19189 static unsigned long
19190 arm_compute_save_core_reg_mask (void)
19191 {
19192 unsigned int save_reg_mask = 0;
19193 unsigned long func_type = arm_current_func_type ();
19194 unsigned int reg;
19195
19196 if (IS_NAKED (func_type))
19197 /* This should never really happen. */
19198 return 0;
19199
19200 /* If we are creating a stack frame, then we must save the frame pointer,
19201 IP (which will hold the old stack pointer), LR and the PC. */
19202 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19203 save_reg_mask |=
19204 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19205 | (1 << IP_REGNUM)
19206 | (1 << LR_REGNUM)
19207 | (1 << PC_REGNUM);
19208
19209 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19210
19211 /* Decide if we need to save the link register.
19212 Interrupt routines have their own banked link register,
19213 so they never need to save it.
19214 Otherwise if we do not use the link register we do not need to save
19215 it. If we are pushing other registers onto the stack however, we
19216 can save an instruction in the epilogue by pushing the link register
19217 now and then popping it back into the PC. This incurs extra memory
19218 accesses though, so we only do it when optimizing for size, and only
19219 if we know that we will not need a fancy return sequence. */
19220 if (df_regs_ever_live_p (LR_REGNUM)
19221 || (save_reg_mask
19222 && optimize_size
19223 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19224 && !crtl->tail_call_emit
19225 && !crtl->calls_eh_return))
19226 save_reg_mask |= 1 << LR_REGNUM;
19227
19228 if (cfun->machine->lr_save_eliminated)
19229 save_reg_mask &= ~ (1 << LR_REGNUM);
19230
19231 if (TARGET_REALLY_IWMMXT
19232 && ((bit_count (save_reg_mask)
19233 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19234 arm_compute_static_chain_stack_bytes())
19235 ) % 2) != 0)
19236 {
19237 /* The total number of registers that are going to be pushed
19238 onto the stack is odd. We need to ensure that the stack
19239 is 64-bit aligned before we start to save iWMMXt registers,
19240 and also before we start to create locals. (A local variable
19241 might be a double or long long which we will load/store using
19242 an iWMMXt instruction). Therefore we need to push another
19243 ARM register, so that the stack will be 64-bit aligned. We
19244 try to avoid using the arg registers (r0 -r3) as they might be
19245 used to pass values in a tail call. */
19246 for (reg = 4; reg <= 12; reg++)
19247 if ((save_reg_mask & (1 << reg)) == 0)
19248 break;
19249
19250 if (reg <= 12)
19251 save_reg_mask |= (1 << reg);
19252 else
19253 {
19254 cfun->machine->sibcall_blocked = 1;
19255 save_reg_mask |= (1 << 3);
19256 }
19257 }
19258
19259 /* We may need to push an additional register for use initializing the
19260 PIC base register. */
19261 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19262 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19263 {
19264 reg = thumb_find_work_register (1 << 4);
19265 if (!call_used_regs[reg])
19266 save_reg_mask |= (1 << reg);
19267 }
19268
19269 return save_reg_mask;
19270 }
19271
19272 /* Compute a bit mask of which core registers need to be
19273 saved on the stack for the current function. */
19274 static unsigned long
19275 thumb1_compute_save_core_reg_mask (void)
19276 {
19277 unsigned long mask;
19278 unsigned reg;
19279
19280 mask = 0;
19281 for (reg = 0; reg < 12; reg ++)
19282 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19283 mask |= 1 << reg;
19284
19285 /* Handle the frame pointer as a special case. */
19286 if (frame_pointer_needed)
19287 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19288
19289 if (flag_pic
19290 && !TARGET_SINGLE_PIC_BASE
19291 && arm_pic_register != INVALID_REGNUM
19292 && crtl->uses_pic_offset_table)
19293 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19294
19295 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19296 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19297 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19298
19299 /* LR will also be pushed if any lo regs are pushed. */
19300 if (mask & 0xff || thumb_force_lr_save ())
19301 mask |= (1 << LR_REGNUM);
19302
19303 /* Make sure we have a low work register if we need one.
19304 We will need one if we are going to push a high register,
19305 but we are not currently intending to push a low register. */
19306 if ((mask & 0xff) == 0
19307 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19308 {
19309 /* Use thumb_find_work_register to choose which register
19310 we will use. If the register is live then we will
19311 have to push it. Use LAST_LO_REGNUM as our fallback
19312 choice for the register to select. */
19313 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19314 /* Make sure the register returned by thumb_find_work_register is
19315 not part of the return value. */
19316 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19317 reg = LAST_LO_REGNUM;
19318
19319 if (callee_saved_reg_p (reg))
19320 mask |= 1 << reg;
19321 }
19322
19323 /* The 504 below is 8 bytes less than 512 because there are two possible
19324 alignment words. We can't tell here if they will be present or not so we
19325 have to play it safe and assume that they are. */
19326 if ((CALLER_INTERWORKING_SLOT_SIZE +
19327 ROUND_UP_WORD (get_frame_size ()) +
19328 crtl->outgoing_args_size) >= 504)
19329 {
19330 /* This is the same as the code in thumb1_expand_prologue() which
19331 determines which register to use for stack decrement. */
19332 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19333 if (mask & (1 << reg))
19334 break;
19335
19336 if (reg > LAST_LO_REGNUM)
19337 {
19338 /* Make sure we have a register available for stack decrement. */
19339 mask |= 1 << LAST_LO_REGNUM;
19340 }
19341 }
19342
19343 return mask;
19344 }
19345
19346
19347 /* Return the number of bytes required to save VFP registers. */
19348 static int
19349 arm_get_vfp_saved_size (void)
19350 {
19351 unsigned int regno;
19352 int count;
19353 int saved;
19354
19355 saved = 0;
19356 /* Space for saved VFP registers. */
19357 if (TARGET_HARD_FLOAT)
19358 {
19359 count = 0;
19360 for (regno = FIRST_VFP_REGNUM;
19361 regno < LAST_VFP_REGNUM;
19362 regno += 2)
19363 {
19364 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19365 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19366 {
19367 if (count > 0)
19368 {
19369 /* Workaround ARM10 VFPr1 bug. */
19370 if (count == 2 && !arm_arch6)
19371 count++;
19372 saved += count * 8;
19373 }
19374 count = 0;
19375 }
19376 else
19377 count++;
19378 }
19379 if (count > 0)
19380 {
19381 if (count == 2 && !arm_arch6)
19382 count++;
19383 saved += count * 8;
19384 }
19385 }
19386 return saved;
19387 }
19388
19389
19390 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19391 everything bar the final return instruction. If simple_return is true,
19392 then do not output epilogue, because it has already been emitted in RTL. */
19393 const char *
19394 output_return_instruction (rtx operand, bool really_return, bool reverse,
19395 bool simple_return)
19396 {
19397 char conditional[10];
19398 char instr[100];
19399 unsigned reg;
19400 unsigned long live_regs_mask;
19401 unsigned long func_type;
19402 arm_stack_offsets *offsets;
19403
19404 func_type = arm_current_func_type ();
19405
19406 if (IS_NAKED (func_type))
19407 return "";
19408
19409 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19410 {
19411 /* If this function was declared non-returning, and we have
19412 found a tail call, then we have to trust that the called
19413 function won't return. */
19414 if (really_return)
19415 {
19416 rtx ops[2];
19417
19418 /* Otherwise, trap an attempted return by aborting. */
19419 ops[0] = operand;
19420 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19421 : "abort");
19422 assemble_external_libcall (ops[1]);
19423 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19424 }
19425
19426 return "";
19427 }
19428
19429 gcc_assert (!cfun->calls_alloca || really_return);
19430
19431 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19432
19433 cfun->machine->return_used_this_function = 1;
19434
19435 offsets = arm_get_frame_offsets ();
19436 live_regs_mask = offsets->saved_regs_mask;
19437
19438 if (!simple_return && live_regs_mask)
19439 {
19440 const char * return_reg;
19441
19442 /* If we do not have any special requirements for function exit
19443 (e.g. interworking) then we can load the return address
19444 directly into the PC. Otherwise we must load it into LR. */
19445 if (really_return
19446 && !IS_CMSE_ENTRY (func_type)
19447 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19448 return_reg = reg_names[PC_REGNUM];
19449 else
19450 return_reg = reg_names[LR_REGNUM];
19451
19452 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19453 {
19454 /* There are three possible reasons for the IP register
19455 being saved. 1) a stack frame was created, in which case
19456 IP contains the old stack pointer, or 2) an ISR routine
19457 corrupted it, or 3) it was saved to align the stack on
19458 iWMMXt. In case 1, restore IP into SP, otherwise just
19459 restore IP. */
19460 if (frame_pointer_needed)
19461 {
19462 live_regs_mask &= ~ (1 << IP_REGNUM);
19463 live_regs_mask |= (1 << SP_REGNUM);
19464 }
19465 else
19466 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19467 }
19468
19469 /* On some ARM architectures it is faster to use LDR rather than
19470 LDM to load a single register. On other architectures, the
19471 cost is the same. In 26 bit mode, or for exception handlers,
19472 we have to use LDM to load the PC so that the CPSR is also
19473 restored. */
19474 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19475 if (live_regs_mask == (1U << reg))
19476 break;
19477
19478 if (reg <= LAST_ARM_REGNUM
19479 && (reg != LR_REGNUM
19480 || ! really_return
19481 || ! IS_INTERRUPT (func_type)))
19482 {
19483 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19484 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19485 }
19486 else
19487 {
19488 char *p;
19489 int first = 1;
19490
19491 /* Generate the load multiple instruction to restore the
19492 registers. Note we can get here, even if
19493 frame_pointer_needed is true, but only if sp already
19494 points to the base of the saved core registers. */
19495 if (live_regs_mask & (1 << SP_REGNUM))
19496 {
19497 unsigned HOST_WIDE_INT stack_adjust;
19498
19499 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19500 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19501
19502 if (stack_adjust && arm_arch5 && TARGET_ARM)
19503 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19504 else
19505 {
19506 /* If we can't use ldmib (SA110 bug),
19507 then try to pop r3 instead. */
19508 if (stack_adjust)
19509 live_regs_mask |= 1 << 3;
19510
19511 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19512 }
19513 }
19514 /* For interrupt returns we have to use an LDM rather than
19515 a POP so that we can use the exception return variant. */
19516 else if (IS_INTERRUPT (func_type))
19517 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19518 else
19519 sprintf (instr, "pop%s\t{", conditional);
19520
19521 p = instr + strlen (instr);
19522
19523 for (reg = 0; reg <= SP_REGNUM; reg++)
19524 if (live_regs_mask & (1 << reg))
19525 {
19526 int l = strlen (reg_names[reg]);
19527
19528 if (first)
19529 first = 0;
19530 else
19531 {
19532 memcpy (p, ", ", 2);
19533 p += 2;
19534 }
19535
19536 memcpy (p, "%|", 2);
19537 memcpy (p + 2, reg_names[reg], l);
19538 p += l + 2;
19539 }
19540
19541 if (live_regs_mask & (1 << LR_REGNUM))
19542 {
19543 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19544 /* If returning from an interrupt, restore the CPSR. */
19545 if (IS_INTERRUPT (func_type))
19546 strcat (p, "^");
19547 }
19548 else
19549 strcpy (p, "}");
19550 }
19551
19552 output_asm_insn (instr, & operand);
19553
19554 /* See if we need to generate an extra instruction to
19555 perform the actual function return. */
19556 if (really_return
19557 && func_type != ARM_FT_INTERWORKED
19558 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19559 {
19560 /* The return has already been handled
19561 by loading the LR into the PC. */
19562 return "";
19563 }
19564 }
19565
19566 if (really_return)
19567 {
19568 switch ((int) ARM_FUNC_TYPE (func_type))
19569 {
19570 case ARM_FT_ISR:
19571 case ARM_FT_FIQ:
19572 /* ??? This is wrong for unified assembly syntax. */
19573 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19574 break;
19575
19576 case ARM_FT_INTERWORKED:
19577 gcc_assert (arm_arch5 || arm_arch4t);
19578 sprintf (instr, "bx%s\t%%|lr", conditional);
19579 break;
19580
19581 case ARM_FT_EXCEPTION:
19582 /* ??? This is wrong for unified assembly syntax. */
19583 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19584 break;
19585
19586 default:
19587 if (IS_CMSE_ENTRY (func_type))
19588 {
19589 /* Check if we have to clear the 'GE bits' which is only used if
19590 parallel add and subtraction instructions are available. */
19591 if (TARGET_INT_SIMD)
19592 snprintf (instr, sizeof (instr),
19593 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19594 else
19595 snprintf (instr, sizeof (instr),
19596 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19597
19598 output_asm_insn (instr, & operand);
19599 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19600 {
19601 /* Clear the cumulative exception-status bits (0-4,7) and the
19602 condition code bits (28-31) of the FPSCR. We need to
19603 remember to clear the first scratch register used (IP) and
19604 save and restore the second (r4). */
19605 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19606 output_asm_insn (instr, & operand);
19607 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19608 output_asm_insn (instr, & operand);
19609 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19610 output_asm_insn (instr, & operand);
19611 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19612 output_asm_insn (instr, & operand);
19613 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19614 output_asm_insn (instr, & operand);
19615 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19616 output_asm_insn (instr, & operand);
19617 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19618 output_asm_insn (instr, & operand);
19619 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19620 output_asm_insn (instr, & operand);
19621 }
19622 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19623 }
19624 /* Use bx if it's available. */
19625 else if (arm_arch5 || arm_arch4t)
19626 sprintf (instr, "bx%s\t%%|lr", conditional);
19627 else
19628 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19629 break;
19630 }
19631
19632 output_asm_insn (instr, & operand);
19633 }
19634
19635 return "";
19636 }
19637
19638 /* Output in FILE asm statements needed to declare the NAME of the function
19639 defined by its DECL node. */
19640
19641 void
19642 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19643 {
19644 size_t cmse_name_len;
19645 char *cmse_name = 0;
19646 char cmse_prefix[] = "__acle_se_";
19647
19648 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19649 extra function label for each function with the 'cmse_nonsecure_entry'
19650 attribute. This extra function label should be prepended with
19651 '__acle_se_', telling the linker that it needs to create secure gateway
19652 veneers for this function. */
19653 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19654 DECL_ATTRIBUTES (decl)))
19655 {
19656 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19657 cmse_name = XALLOCAVEC (char, cmse_name_len);
19658 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19659 targetm.asm_out.globalize_label (file, cmse_name);
19660
19661 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19662 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19663 }
19664
19665 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19666 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19667 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19668 ASM_OUTPUT_LABEL (file, name);
19669
19670 if (cmse_name)
19671 ASM_OUTPUT_LABEL (file, cmse_name);
19672
19673 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19674 }
19675
19676 /* Write the function name into the code section, directly preceding
19677 the function prologue.
19678
19679 Code will be output similar to this:
19680 t0
19681 .ascii "arm_poke_function_name", 0
19682 .align
19683 t1
19684 .word 0xff000000 + (t1 - t0)
19685 arm_poke_function_name
19686 mov ip, sp
19687 stmfd sp!, {fp, ip, lr, pc}
19688 sub fp, ip, #4
19689
19690 When performing a stack backtrace, code can inspect the value
19691 of 'pc' stored at 'fp' + 0. If the trace function then looks
19692 at location pc - 12 and the top 8 bits are set, then we know
19693 that there is a function name embedded immediately preceding this
19694 location and has length ((pc[-3]) & 0xff000000).
19695
19696 We assume that pc is declared as a pointer to an unsigned long.
19697
19698 It is of no benefit to output the function name if we are assembling
19699 a leaf function. These function types will not contain a stack
19700 backtrace structure, therefore it is not possible to determine the
19701 function name. */
19702 void
19703 arm_poke_function_name (FILE *stream, const char *name)
19704 {
19705 unsigned long alignlength;
19706 unsigned long length;
19707 rtx x;
19708
19709 length = strlen (name) + 1;
19710 alignlength = ROUND_UP_WORD (length);
19711
19712 ASM_OUTPUT_ASCII (stream, name, length);
19713 ASM_OUTPUT_ALIGN (stream, 2);
19714 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19715 assemble_aligned_integer (UNITS_PER_WORD, x);
19716 }
19717
19718 /* Place some comments into the assembler stream
19719 describing the current function. */
19720 static void
19721 arm_output_function_prologue (FILE *f)
19722 {
19723 unsigned long func_type;
19724
19725 /* Sanity check. */
19726 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19727
19728 func_type = arm_current_func_type ();
19729
19730 switch ((int) ARM_FUNC_TYPE (func_type))
19731 {
19732 default:
19733 case ARM_FT_NORMAL:
19734 break;
19735 case ARM_FT_INTERWORKED:
19736 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19737 break;
19738 case ARM_FT_ISR:
19739 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19740 break;
19741 case ARM_FT_FIQ:
19742 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19743 break;
19744 case ARM_FT_EXCEPTION:
19745 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19746 break;
19747 }
19748
19749 if (IS_NAKED (func_type))
19750 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19751
19752 if (IS_VOLATILE (func_type))
19753 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19754
19755 if (IS_NESTED (func_type))
19756 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19757 if (IS_STACKALIGN (func_type))
19758 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19759 if (IS_CMSE_ENTRY (func_type))
19760 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19761
19762 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19763 crtl->args.size,
19764 crtl->args.pretend_args_size,
19765 (HOST_WIDE_INT) get_frame_size ());
19766
19767 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19768 frame_pointer_needed,
19769 cfun->machine->uses_anonymous_args);
19770
19771 if (cfun->machine->lr_save_eliminated)
19772 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19773
19774 if (crtl->calls_eh_return)
19775 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19776
19777 }
19778
19779 static void
19780 arm_output_function_epilogue (FILE *)
19781 {
19782 arm_stack_offsets *offsets;
19783
19784 if (TARGET_THUMB1)
19785 {
19786 int regno;
19787
19788 /* Emit any call-via-reg trampolines that are needed for v4t support
19789 of call_reg and call_value_reg type insns. */
19790 for (regno = 0; regno < LR_REGNUM; regno++)
19791 {
19792 rtx label = cfun->machine->call_via[regno];
19793
19794 if (label != NULL)
19795 {
19796 switch_to_section (function_section (current_function_decl));
19797 targetm.asm_out.internal_label (asm_out_file, "L",
19798 CODE_LABEL_NUMBER (label));
19799 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19800 }
19801 }
19802
19803 /* ??? Probably not safe to set this here, since it assumes that a
19804 function will be emitted as assembly immediately after we generate
19805 RTL for it. This does not happen for inline functions. */
19806 cfun->machine->return_used_this_function = 0;
19807 }
19808 else /* TARGET_32BIT */
19809 {
19810 /* We need to take into account any stack-frame rounding. */
19811 offsets = arm_get_frame_offsets ();
19812
19813 gcc_assert (!use_return_insn (FALSE, NULL)
19814 || (cfun->machine->return_used_this_function != 0)
19815 || offsets->saved_regs == offsets->outgoing_args
19816 || frame_pointer_needed);
19817 }
19818 }
19819
19820 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19821 STR and STRD. If an even number of registers are being pushed, one
19822 or more STRD patterns are created for each register pair. If an
19823 odd number of registers are pushed, emit an initial STR followed by
19824 as many STRD instructions as are needed. This works best when the
19825 stack is initially 64-bit aligned (the normal case), since it
19826 ensures that each STRD is also 64-bit aligned. */
19827 static void
19828 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19829 {
19830 int num_regs = 0;
19831 int i;
19832 int regno;
19833 rtx par = NULL_RTX;
19834 rtx dwarf = NULL_RTX;
19835 rtx tmp;
19836 bool first = true;
19837
19838 num_regs = bit_count (saved_regs_mask);
19839
19840 /* Must be at least one register to save, and can't save SP or PC. */
19841 gcc_assert (num_regs > 0 && num_regs <= 14);
19842 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19843 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19844
19845 /* Create sequence for DWARF info. All the frame-related data for
19846 debugging is held in this wrapper. */
19847 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19848
19849 /* Describe the stack adjustment. */
19850 tmp = gen_rtx_SET (stack_pointer_rtx,
19851 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19852 RTX_FRAME_RELATED_P (tmp) = 1;
19853 XVECEXP (dwarf, 0, 0) = tmp;
19854
19855 /* Find the first register. */
19856 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19857 ;
19858
19859 i = 0;
19860
19861 /* If there's an odd number of registers to push. Start off by
19862 pushing a single register. This ensures that subsequent strd
19863 operations are dword aligned (assuming that SP was originally
19864 64-bit aligned). */
19865 if ((num_regs & 1) != 0)
19866 {
19867 rtx reg, mem, insn;
19868
19869 reg = gen_rtx_REG (SImode, regno);
19870 if (num_regs == 1)
19871 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19872 stack_pointer_rtx));
19873 else
19874 mem = gen_frame_mem (Pmode,
19875 gen_rtx_PRE_MODIFY
19876 (Pmode, stack_pointer_rtx,
19877 plus_constant (Pmode, stack_pointer_rtx,
19878 -4 * num_regs)));
19879
19880 tmp = gen_rtx_SET (mem, reg);
19881 RTX_FRAME_RELATED_P (tmp) = 1;
19882 insn = emit_insn (tmp);
19883 RTX_FRAME_RELATED_P (insn) = 1;
19884 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19885 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19886 RTX_FRAME_RELATED_P (tmp) = 1;
19887 i++;
19888 regno++;
19889 XVECEXP (dwarf, 0, i) = tmp;
19890 first = false;
19891 }
19892
19893 while (i < num_regs)
19894 if (saved_regs_mask & (1 << regno))
19895 {
19896 rtx reg1, reg2, mem1, mem2;
19897 rtx tmp0, tmp1, tmp2;
19898 int regno2;
19899
19900 /* Find the register to pair with this one. */
19901 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19902 regno2++)
19903 ;
19904
19905 reg1 = gen_rtx_REG (SImode, regno);
19906 reg2 = gen_rtx_REG (SImode, regno2);
19907
19908 if (first)
19909 {
19910 rtx insn;
19911
19912 first = false;
19913 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19914 stack_pointer_rtx,
19915 -4 * num_regs));
19916 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19917 stack_pointer_rtx,
19918 -4 * (num_regs - 1)));
19919 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19920 plus_constant (Pmode, stack_pointer_rtx,
19921 -4 * (num_regs)));
19922 tmp1 = gen_rtx_SET (mem1, reg1);
19923 tmp2 = gen_rtx_SET (mem2, reg2);
19924 RTX_FRAME_RELATED_P (tmp0) = 1;
19925 RTX_FRAME_RELATED_P (tmp1) = 1;
19926 RTX_FRAME_RELATED_P (tmp2) = 1;
19927 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19928 XVECEXP (par, 0, 0) = tmp0;
19929 XVECEXP (par, 0, 1) = tmp1;
19930 XVECEXP (par, 0, 2) = tmp2;
19931 insn = emit_insn (par);
19932 RTX_FRAME_RELATED_P (insn) = 1;
19933 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19934 }
19935 else
19936 {
19937 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19938 stack_pointer_rtx,
19939 4 * i));
19940 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19941 stack_pointer_rtx,
19942 4 * (i + 1)));
19943 tmp1 = gen_rtx_SET (mem1, reg1);
19944 tmp2 = gen_rtx_SET (mem2, reg2);
19945 RTX_FRAME_RELATED_P (tmp1) = 1;
19946 RTX_FRAME_RELATED_P (tmp2) = 1;
19947 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19948 XVECEXP (par, 0, 0) = tmp1;
19949 XVECEXP (par, 0, 1) = tmp2;
19950 emit_insn (par);
19951 }
19952
19953 /* Create unwind information. This is an approximation. */
19954 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19955 plus_constant (Pmode,
19956 stack_pointer_rtx,
19957 4 * i)),
19958 reg1);
19959 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19960 plus_constant (Pmode,
19961 stack_pointer_rtx,
19962 4 * (i + 1))),
19963 reg2);
19964
19965 RTX_FRAME_RELATED_P (tmp1) = 1;
19966 RTX_FRAME_RELATED_P (tmp2) = 1;
19967 XVECEXP (dwarf, 0, i + 1) = tmp1;
19968 XVECEXP (dwarf, 0, i + 2) = tmp2;
19969 i += 2;
19970 regno = regno2 + 1;
19971 }
19972 else
19973 regno++;
19974
19975 return;
19976 }
19977
19978 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19979 whenever possible, otherwise it emits single-word stores. The first store
19980 also allocates stack space for all saved registers, using writeback with
19981 post-addressing mode. All other stores use offset addressing. If no STRD
19982 can be emitted, this function emits a sequence of single-word stores,
19983 and not an STM as before, because single-word stores provide more freedom
19984 scheduling and can be turned into an STM by peephole optimizations. */
19985 static void
19986 arm_emit_strd_push (unsigned long saved_regs_mask)
19987 {
19988 int num_regs = 0;
19989 int i, j, dwarf_index = 0;
19990 int offset = 0;
19991 rtx dwarf = NULL_RTX;
19992 rtx insn = NULL_RTX;
19993 rtx tmp, mem;
19994
19995 /* TODO: A more efficient code can be emitted by changing the
19996 layout, e.g., first push all pairs that can use STRD to keep the
19997 stack aligned, and then push all other registers. */
19998 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19999 if (saved_regs_mask & (1 << i))
20000 num_regs++;
20001
20002 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20003 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20004 gcc_assert (num_regs > 0);
20005
20006 /* Create sequence for DWARF info. */
20007 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20008
20009 /* For dwarf info, we generate explicit stack update. */
20010 tmp = gen_rtx_SET (stack_pointer_rtx,
20011 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20012 RTX_FRAME_RELATED_P (tmp) = 1;
20013 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20014
20015 /* Save registers. */
20016 offset = - 4 * num_regs;
20017 j = 0;
20018 while (j <= LAST_ARM_REGNUM)
20019 if (saved_regs_mask & (1 << j))
20020 {
20021 if ((j % 2 == 0)
20022 && (saved_regs_mask & (1 << (j + 1))))
20023 {
20024 /* Current register and previous register form register pair for
20025 which STRD can be generated. */
20026 if (offset < 0)
20027 {
20028 /* Allocate stack space for all saved registers. */
20029 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20030 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20031 mem = gen_frame_mem (DImode, tmp);
20032 offset = 0;
20033 }
20034 else if (offset > 0)
20035 mem = gen_frame_mem (DImode,
20036 plus_constant (Pmode,
20037 stack_pointer_rtx,
20038 offset));
20039 else
20040 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20041
20042 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20043 RTX_FRAME_RELATED_P (tmp) = 1;
20044 tmp = emit_insn (tmp);
20045
20046 /* Record the first store insn. */
20047 if (dwarf_index == 1)
20048 insn = tmp;
20049
20050 /* Generate dwarf info. */
20051 mem = gen_frame_mem (SImode,
20052 plus_constant (Pmode,
20053 stack_pointer_rtx,
20054 offset));
20055 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20056 RTX_FRAME_RELATED_P (tmp) = 1;
20057 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20058
20059 mem = gen_frame_mem (SImode,
20060 plus_constant (Pmode,
20061 stack_pointer_rtx,
20062 offset + 4));
20063 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20064 RTX_FRAME_RELATED_P (tmp) = 1;
20065 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20066
20067 offset += 8;
20068 j += 2;
20069 }
20070 else
20071 {
20072 /* Emit a single word store. */
20073 if (offset < 0)
20074 {
20075 /* Allocate stack space for all saved registers. */
20076 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20077 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20078 mem = gen_frame_mem (SImode, tmp);
20079 offset = 0;
20080 }
20081 else if (offset > 0)
20082 mem = gen_frame_mem (SImode,
20083 plus_constant (Pmode,
20084 stack_pointer_rtx,
20085 offset));
20086 else
20087 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20088
20089 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20090 RTX_FRAME_RELATED_P (tmp) = 1;
20091 tmp = emit_insn (tmp);
20092
20093 /* Record the first store insn. */
20094 if (dwarf_index == 1)
20095 insn = tmp;
20096
20097 /* Generate dwarf info. */
20098 mem = gen_frame_mem (SImode,
20099 plus_constant(Pmode,
20100 stack_pointer_rtx,
20101 offset));
20102 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20103 RTX_FRAME_RELATED_P (tmp) = 1;
20104 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20105
20106 offset += 4;
20107 j += 1;
20108 }
20109 }
20110 else
20111 j++;
20112
20113 /* Attach dwarf info to the first insn we generate. */
20114 gcc_assert (insn != NULL_RTX);
20115 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20116 RTX_FRAME_RELATED_P (insn) = 1;
20117 }
20118
20119 /* Generate and emit an insn that we will recognize as a push_multi.
20120 Unfortunately, since this insn does not reflect very well the actual
20121 semantics of the operation, we need to annotate the insn for the benefit
20122 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20123 MASK for registers that should be annotated for DWARF2 frame unwind
20124 information. */
20125 static rtx
20126 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20127 {
20128 int num_regs = 0;
20129 int num_dwarf_regs = 0;
20130 int i, j;
20131 rtx par;
20132 rtx dwarf;
20133 int dwarf_par_index;
20134 rtx tmp, reg;
20135
20136 /* We don't record the PC in the dwarf frame information. */
20137 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20138
20139 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20140 {
20141 if (mask & (1 << i))
20142 num_regs++;
20143 if (dwarf_regs_mask & (1 << i))
20144 num_dwarf_regs++;
20145 }
20146
20147 gcc_assert (num_regs && num_regs <= 16);
20148 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20149
20150 /* For the body of the insn we are going to generate an UNSPEC in
20151 parallel with several USEs. This allows the insn to be recognized
20152 by the push_multi pattern in the arm.md file.
20153
20154 The body of the insn looks something like this:
20155
20156 (parallel [
20157 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20158 (const_int:SI <num>)))
20159 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20160 (use (reg:SI XX))
20161 (use (reg:SI YY))
20162 ...
20163 ])
20164
20165 For the frame note however, we try to be more explicit and actually
20166 show each register being stored into the stack frame, plus a (single)
20167 decrement of the stack pointer. We do it this way in order to be
20168 friendly to the stack unwinding code, which only wants to see a single
20169 stack decrement per instruction. The RTL we generate for the note looks
20170 something like this:
20171
20172 (sequence [
20173 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20174 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20175 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20176 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20177 ...
20178 ])
20179
20180 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20181 instead we'd have a parallel expression detailing all
20182 the stores to the various memory addresses so that debug
20183 information is more up-to-date. Remember however while writing
20184 this to take care of the constraints with the push instruction.
20185
20186 Note also that this has to be taken care of for the VFP registers.
20187
20188 For more see PR43399. */
20189
20190 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20191 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20192 dwarf_par_index = 1;
20193
20194 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20195 {
20196 if (mask & (1 << i))
20197 {
20198 reg = gen_rtx_REG (SImode, i);
20199
20200 XVECEXP (par, 0, 0)
20201 = gen_rtx_SET (gen_frame_mem
20202 (BLKmode,
20203 gen_rtx_PRE_MODIFY (Pmode,
20204 stack_pointer_rtx,
20205 plus_constant
20206 (Pmode, stack_pointer_rtx,
20207 -4 * num_regs))
20208 ),
20209 gen_rtx_UNSPEC (BLKmode,
20210 gen_rtvec (1, reg),
20211 UNSPEC_PUSH_MULT));
20212
20213 if (dwarf_regs_mask & (1 << i))
20214 {
20215 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20216 reg);
20217 RTX_FRAME_RELATED_P (tmp) = 1;
20218 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20219 }
20220
20221 break;
20222 }
20223 }
20224
20225 for (j = 1, i++; j < num_regs; i++)
20226 {
20227 if (mask & (1 << i))
20228 {
20229 reg = gen_rtx_REG (SImode, i);
20230
20231 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20232
20233 if (dwarf_regs_mask & (1 << i))
20234 {
20235 tmp
20236 = gen_rtx_SET (gen_frame_mem
20237 (SImode,
20238 plus_constant (Pmode, stack_pointer_rtx,
20239 4 * j)),
20240 reg);
20241 RTX_FRAME_RELATED_P (tmp) = 1;
20242 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20243 }
20244
20245 j++;
20246 }
20247 }
20248
20249 par = emit_insn (par);
20250
20251 tmp = gen_rtx_SET (stack_pointer_rtx,
20252 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20253 RTX_FRAME_RELATED_P (tmp) = 1;
20254 XVECEXP (dwarf, 0, 0) = tmp;
20255
20256 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20257
20258 return par;
20259 }
20260
20261 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20262 SIZE is the offset to be adjusted.
20263 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20264 static void
20265 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20266 {
20267 rtx dwarf;
20268
20269 RTX_FRAME_RELATED_P (insn) = 1;
20270 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20271 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20272 }
20273
20274 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20275 SAVED_REGS_MASK shows which registers need to be restored.
20276
20277 Unfortunately, since this insn does not reflect very well the actual
20278 semantics of the operation, we need to annotate the insn for the benefit
20279 of DWARF2 frame unwind information. */
20280 static void
20281 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20282 {
20283 int num_regs = 0;
20284 int i, j;
20285 rtx par;
20286 rtx dwarf = NULL_RTX;
20287 rtx tmp, reg;
20288 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20289 int offset_adj;
20290 int emit_update;
20291
20292 offset_adj = return_in_pc ? 1 : 0;
20293 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20294 if (saved_regs_mask & (1 << i))
20295 num_regs++;
20296
20297 gcc_assert (num_regs && num_regs <= 16);
20298
20299 /* If SP is in reglist, then we don't emit SP update insn. */
20300 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20301
20302 /* The parallel needs to hold num_regs SETs
20303 and one SET for the stack update. */
20304 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20305
20306 if (return_in_pc)
20307 XVECEXP (par, 0, 0) = ret_rtx;
20308
20309 if (emit_update)
20310 {
20311 /* Increment the stack pointer, based on there being
20312 num_regs 4-byte registers to restore. */
20313 tmp = gen_rtx_SET (stack_pointer_rtx,
20314 plus_constant (Pmode,
20315 stack_pointer_rtx,
20316 4 * num_regs));
20317 RTX_FRAME_RELATED_P (tmp) = 1;
20318 XVECEXP (par, 0, offset_adj) = tmp;
20319 }
20320
20321 /* Now restore every reg, which may include PC. */
20322 for (j = 0, i = 0; j < num_regs; i++)
20323 if (saved_regs_mask & (1 << i))
20324 {
20325 reg = gen_rtx_REG (SImode, i);
20326 if ((num_regs == 1) && emit_update && !return_in_pc)
20327 {
20328 /* Emit single load with writeback. */
20329 tmp = gen_frame_mem (SImode,
20330 gen_rtx_POST_INC (Pmode,
20331 stack_pointer_rtx));
20332 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20333 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20334 return;
20335 }
20336
20337 tmp = gen_rtx_SET (reg,
20338 gen_frame_mem
20339 (SImode,
20340 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20341 RTX_FRAME_RELATED_P (tmp) = 1;
20342 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20343
20344 /* We need to maintain a sequence for DWARF info too. As dwarf info
20345 should not have PC, skip PC. */
20346 if (i != PC_REGNUM)
20347 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20348
20349 j++;
20350 }
20351
20352 if (return_in_pc)
20353 par = emit_jump_insn (par);
20354 else
20355 par = emit_insn (par);
20356
20357 REG_NOTES (par) = dwarf;
20358 if (!return_in_pc)
20359 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20360 stack_pointer_rtx, stack_pointer_rtx);
20361 }
20362
20363 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20364 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20365
20366 Unfortunately, since this insn does not reflect very well the actual
20367 semantics of the operation, we need to annotate the insn for the benefit
20368 of DWARF2 frame unwind information. */
20369 static void
20370 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20371 {
20372 int i, j;
20373 rtx par;
20374 rtx dwarf = NULL_RTX;
20375 rtx tmp, reg;
20376
20377 gcc_assert (num_regs && num_regs <= 32);
20378
20379 /* Workaround ARM10 VFPr1 bug. */
20380 if (num_regs == 2 && !arm_arch6)
20381 {
20382 if (first_reg == 15)
20383 first_reg--;
20384
20385 num_regs++;
20386 }
20387
20388 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20389 there could be up to 32 D-registers to restore.
20390 If there are more than 16 D-registers, make two recursive calls,
20391 each of which emits one pop_multi instruction. */
20392 if (num_regs > 16)
20393 {
20394 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20395 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20396 return;
20397 }
20398
20399 /* The parallel needs to hold num_regs SETs
20400 and one SET for the stack update. */
20401 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20402
20403 /* Increment the stack pointer, based on there being
20404 num_regs 8-byte registers to restore. */
20405 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20406 RTX_FRAME_RELATED_P (tmp) = 1;
20407 XVECEXP (par, 0, 0) = tmp;
20408
20409 /* Now show every reg that will be restored, using a SET for each. */
20410 for (j = 0, i=first_reg; j < num_regs; i += 2)
20411 {
20412 reg = gen_rtx_REG (DFmode, i);
20413
20414 tmp = gen_rtx_SET (reg,
20415 gen_frame_mem
20416 (DFmode,
20417 plus_constant (Pmode, base_reg, 8 * j)));
20418 RTX_FRAME_RELATED_P (tmp) = 1;
20419 XVECEXP (par, 0, j + 1) = tmp;
20420
20421 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20422
20423 j++;
20424 }
20425
20426 par = emit_insn (par);
20427 REG_NOTES (par) = dwarf;
20428
20429 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20430 if (REGNO (base_reg) == IP_REGNUM)
20431 {
20432 RTX_FRAME_RELATED_P (par) = 1;
20433 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20434 }
20435 else
20436 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20437 base_reg, base_reg);
20438 }
20439
20440 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20441 number of registers are being popped, multiple LDRD patterns are created for
20442 all register pairs. If odd number of registers are popped, last register is
20443 loaded by using LDR pattern. */
20444 static void
20445 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20446 {
20447 int num_regs = 0;
20448 int i, j;
20449 rtx par = NULL_RTX;
20450 rtx dwarf = NULL_RTX;
20451 rtx tmp, reg, tmp1;
20452 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20453
20454 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20455 if (saved_regs_mask & (1 << i))
20456 num_regs++;
20457
20458 gcc_assert (num_regs && num_regs <= 16);
20459
20460 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20461 to be popped. So, if num_regs is even, now it will become odd,
20462 and we can generate pop with PC. If num_regs is odd, it will be
20463 even now, and ldr with return can be generated for PC. */
20464 if (return_in_pc)
20465 num_regs--;
20466
20467 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20468
20469 /* Var j iterates over all the registers to gather all the registers in
20470 saved_regs_mask. Var i gives index of saved registers in stack frame.
20471 A PARALLEL RTX of register-pair is created here, so that pattern for
20472 LDRD can be matched. As PC is always last register to be popped, and
20473 we have already decremented num_regs if PC, we don't have to worry
20474 about PC in this loop. */
20475 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20476 if (saved_regs_mask & (1 << j))
20477 {
20478 /* Create RTX for memory load. */
20479 reg = gen_rtx_REG (SImode, j);
20480 tmp = gen_rtx_SET (reg,
20481 gen_frame_mem (SImode,
20482 plus_constant (Pmode,
20483 stack_pointer_rtx, 4 * i)));
20484 RTX_FRAME_RELATED_P (tmp) = 1;
20485
20486 if (i % 2 == 0)
20487 {
20488 /* When saved-register index (i) is even, the RTX to be emitted is
20489 yet to be created. Hence create it first. The LDRD pattern we
20490 are generating is :
20491 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20492 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20493 where target registers need not be consecutive. */
20494 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20495 dwarf = NULL_RTX;
20496 }
20497
20498 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20499 added as 0th element and if i is odd, reg_i is added as 1st element
20500 of LDRD pattern shown above. */
20501 XVECEXP (par, 0, (i % 2)) = tmp;
20502 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20503
20504 if ((i % 2) == 1)
20505 {
20506 /* When saved-register index (i) is odd, RTXs for both the registers
20507 to be loaded are generated in above given LDRD pattern, and the
20508 pattern can be emitted now. */
20509 par = emit_insn (par);
20510 REG_NOTES (par) = dwarf;
20511 RTX_FRAME_RELATED_P (par) = 1;
20512 }
20513
20514 i++;
20515 }
20516
20517 /* If the number of registers pushed is odd AND return_in_pc is false OR
20518 number of registers are even AND return_in_pc is true, last register is
20519 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20520 then LDR with post increment. */
20521
20522 /* Increment the stack pointer, based on there being
20523 num_regs 4-byte registers to restore. */
20524 tmp = gen_rtx_SET (stack_pointer_rtx,
20525 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20526 RTX_FRAME_RELATED_P (tmp) = 1;
20527 tmp = emit_insn (tmp);
20528 if (!return_in_pc)
20529 {
20530 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20531 stack_pointer_rtx, stack_pointer_rtx);
20532 }
20533
20534 dwarf = NULL_RTX;
20535
20536 if (((num_regs % 2) == 1 && !return_in_pc)
20537 || ((num_regs % 2) == 0 && return_in_pc))
20538 {
20539 /* Scan for the single register to be popped. Skip until the saved
20540 register is found. */
20541 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20542
20543 /* Gen LDR with post increment here. */
20544 tmp1 = gen_rtx_MEM (SImode,
20545 gen_rtx_POST_INC (SImode,
20546 stack_pointer_rtx));
20547 set_mem_alias_set (tmp1, get_frame_alias_set ());
20548
20549 reg = gen_rtx_REG (SImode, j);
20550 tmp = gen_rtx_SET (reg, tmp1);
20551 RTX_FRAME_RELATED_P (tmp) = 1;
20552 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20553
20554 if (return_in_pc)
20555 {
20556 /* If return_in_pc, j must be PC_REGNUM. */
20557 gcc_assert (j == PC_REGNUM);
20558 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20559 XVECEXP (par, 0, 0) = ret_rtx;
20560 XVECEXP (par, 0, 1) = tmp;
20561 par = emit_jump_insn (par);
20562 }
20563 else
20564 {
20565 par = emit_insn (tmp);
20566 REG_NOTES (par) = dwarf;
20567 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20568 stack_pointer_rtx, stack_pointer_rtx);
20569 }
20570
20571 }
20572 else if ((num_regs % 2) == 1 && return_in_pc)
20573 {
20574 /* There are 2 registers to be popped. So, generate the pattern
20575 pop_multiple_with_stack_update_and_return to pop in PC. */
20576 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20577 }
20578
20579 return;
20580 }
20581
20582 /* LDRD in ARM mode needs consecutive registers as operands. This function
20583 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20584 offset addressing and then generates one separate stack udpate. This provides
20585 more scheduling freedom, compared to writeback on every load. However,
20586 if the function returns using load into PC directly
20587 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20588 before the last load. TODO: Add a peephole optimization to recognize
20589 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20590 peephole optimization to merge the load at stack-offset zero
20591 with the stack update instruction using load with writeback
20592 in post-index addressing mode. */
20593 static void
20594 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20595 {
20596 int j = 0;
20597 int offset = 0;
20598 rtx par = NULL_RTX;
20599 rtx dwarf = NULL_RTX;
20600 rtx tmp, mem;
20601
20602 /* Restore saved registers. */
20603 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20604 j = 0;
20605 while (j <= LAST_ARM_REGNUM)
20606 if (saved_regs_mask & (1 << j))
20607 {
20608 if ((j % 2) == 0
20609 && (saved_regs_mask & (1 << (j + 1)))
20610 && (j + 1) != PC_REGNUM)
20611 {
20612 /* Current register and next register form register pair for which
20613 LDRD can be generated. PC is always the last register popped, and
20614 we handle it separately. */
20615 if (offset > 0)
20616 mem = gen_frame_mem (DImode,
20617 plus_constant (Pmode,
20618 stack_pointer_rtx,
20619 offset));
20620 else
20621 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20622
20623 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20624 tmp = emit_insn (tmp);
20625 RTX_FRAME_RELATED_P (tmp) = 1;
20626
20627 /* Generate dwarf info. */
20628
20629 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20630 gen_rtx_REG (SImode, j),
20631 NULL_RTX);
20632 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20633 gen_rtx_REG (SImode, j + 1),
20634 dwarf);
20635
20636 REG_NOTES (tmp) = dwarf;
20637
20638 offset += 8;
20639 j += 2;
20640 }
20641 else if (j != PC_REGNUM)
20642 {
20643 /* Emit a single word load. */
20644 if (offset > 0)
20645 mem = gen_frame_mem (SImode,
20646 plus_constant (Pmode,
20647 stack_pointer_rtx,
20648 offset));
20649 else
20650 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20651
20652 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20653 tmp = emit_insn (tmp);
20654 RTX_FRAME_RELATED_P (tmp) = 1;
20655
20656 /* Generate dwarf info. */
20657 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20658 gen_rtx_REG (SImode, j),
20659 NULL_RTX);
20660
20661 offset += 4;
20662 j += 1;
20663 }
20664 else /* j == PC_REGNUM */
20665 j++;
20666 }
20667 else
20668 j++;
20669
20670 /* Update the stack. */
20671 if (offset > 0)
20672 {
20673 tmp = gen_rtx_SET (stack_pointer_rtx,
20674 plus_constant (Pmode,
20675 stack_pointer_rtx,
20676 offset));
20677 tmp = emit_insn (tmp);
20678 arm_add_cfa_adjust_cfa_note (tmp, offset,
20679 stack_pointer_rtx, stack_pointer_rtx);
20680 offset = 0;
20681 }
20682
20683 if (saved_regs_mask & (1 << PC_REGNUM))
20684 {
20685 /* Only PC is to be popped. */
20686 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20687 XVECEXP (par, 0, 0) = ret_rtx;
20688 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20689 gen_frame_mem (SImode,
20690 gen_rtx_POST_INC (SImode,
20691 stack_pointer_rtx)));
20692 RTX_FRAME_RELATED_P (tmp) = 1;
20693 XVECEXP (par, 0, 1) = tmp;
20694 par = emit_jump_insn (par);
20695
20696 /* Generate dwarf info. */
20697 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20698 gen_rtx_REG (SImode, PC_REGNUM),
20699 NULL_RTX);
20700 REG_NOTES (par) = dwarf;
20701 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20702 stack_pointer_rtx, stack_pointer_rtx);
20703 }
20704 }
20705
20706 /* Calculate the size of the return value that is passed in registers. */
20707 static unsigned
20708 arm_size_return_regs (void)
20709 {
20710 machine_mode mode;
20711
20712 if (crtl->return_rtx != 0)
20713 mode = GET_MODE (crtl->return_rtx);
20714 else
20715 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20716
20717 return GET_MODE_SIZE (mode);
20718 }
20719
20720 /* Return true if the current function needs to save/restore LR. */
20721 static bool
20722 thumb_force_lr_save (void)
20723 {
20724 return !cfun->machine->lr_save_eliminated
20725 && (!crtl->is_leaf
20726 || thumb_far_jump_used_p ()
20727 || df_regs_ever_live_p (LR_REGNUM));
20728 }
20729
20730 /* We do not know if r3 will be available because
20731 we do have an indirect tailcall happening in this
20732 particular case. */
20733 static bool
20734 is_indirect_tailcall_p (rtx call)
20735 {
20736 rtx pat = PATTERN (call);
20737
20738 /* Indirect tail call. */
20739 pat = XVECEXP (pat, 0, 0);
20740 if (GET_CODE (pat) == SET)
20741 pat = SET_SRC (pat);
20742
20743 pat = XEXP (XEXP (pat, 0), 0);
20744 return REG_P (pat);
20745 }
20746
20747 /* Return true if r3 is used by any of the tail call insns in the
20748 current function. */
20749 static bool
20750 any_sibcall_could_use_r3 (void)
20751 {
20752 edge_iterator ei;
20753 edge e;
20754
20755 if (!crtl->tail_call_emit)
20756 return false;
20757 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20758 if (e->flags & EDGE_SIBCALL)
20759 {
20760 rtx_insn *call = BB_END (e->src);
20761 if (!CALL_P (call))
20762 call = prev_nonnote_nondebug_insn (call);
20763 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20764 if (find_regno_fusage (call, USE, 3)
20765 || is_indirect_tailcall_p (call))
20766 return true;
20767 }
20768 return false;
20769 }
20770
20771
20772 /* Compute the distance from register FROM to register TO.
20773 These can be the arg pointer (26), the soft frame pointer (25),
20774 the stack pointer (13) or the hard frame pointer (11).
20775 In thumb mode r7 is used as the soft frame pointer, if needed.
20776 Typical stack layout looks like this:
20777
20778 old stack pointer -> | |
20779 ----
20780 | | \
20781 | | saved arguments for
20782 | | vararg functions
20783 | | /
20784 --
20785 hard FP & arg pointer -> | | \
20786 | | stack
20787 | | frame
20788 | | /
20789 --
20790 | | \
20791 | | call saved
20792 | | registers
20793 soft frame pointer -> | | /
20794 --
20795 | | \
20796 | | local
20797 | | variables
20798 locals base pointer -> | | /
20799 --
20800 | | \
20801 | | outgoing
20802 | | arguments
20803 current stack pointer -> | | /
20804 --
20805
20806 For a given function some or all of these stack components
20807 may not be needed, giving rise to the possibility of
20808 eliminating some of the registers.
20809
20810 The values returned by this function must reflect the behavior
20811 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20812
20813 The sign of the number returned reflects the direction of stack
20814 growth, so the values are positive for all eliminations except
20815 from the soft frame pointer to the hard frame pointer.
20816
20817 SFP may point just inside the local variables block to ensure correct
20818 alignment. */
20819
20820
20821 /* Return cached stack offsets. */
20822
20823 static arm_stack_offsets *
20824 arm_get_frame_offsets (void)
20825 {
20826 struct arm_stack_offsets *offsets;
20827
20828 offsets = &cfun->machine->stack_offsets;
20829
20830 return offsets;
20831 }
20832
20833
20834 /* Calculate stack offsets. These are used to calculate register elimination
20835 offsets and in prologue/epilogue code. Also calculates which registers
20836 should be saved. */
20837
20838 static void
20839 arm_compute_frame_layout (void)
20840 {
20841 struct arm_stack_offsets *offsets;
20842 unsigned long func_type;
20843 int saved;
20844 int core_saved;
20845 HOST_WIDE_INT frame_size;
20846 int i;
20847
20848 offsets = &cfun->machine->stack_offsets;
20849
20850 /* Initially this is the size of the local variables. It will translated
20851 into an offset once we have determined the size of preceding data. */
20852 frame_size = ROUND_UP_WORD (get_frame_size ());
20853
20854 /* Space for variadic functions. */
20855 offsets->saved_args = crtl->args.pretend_args_size;
20856
20857 /* In Thumb mode this is incorrect, but never used. */
20858 offsets->frame
20859 = (offsets->saved_args
20860 + arm_compute_static_chain_stack_bytes ()
20861 + (frame_pointer_needed ? 4 : 0));
20862
20863 if (TARGET_32BIT)
20864 {
20865 unsigned int regno;
20866
20867 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20868 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20869 saved = core_saved;
20870
20871 /* We know that SP will be doubleword aligned on entry, and we must
20872 preserve that condition at any subroutine call. We also require the
20873 soft frame pointer to be doubleword aligned. */
20874
20875 if (TARGET_REALLY_IWMMXT)
20876 {
20877 /* Check for the call-saved iWMMXt registers. */
20878 for (regno = FIRST_IWMMXT_REGNUM;
20879 regno <= LAST_IWMMXT_REGNUM;
20880 regno++)
20881 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20882 saved += 8;
20883 }
20884
20885 func_type = arm_current_func_type ();
20886 /* Space for saved VFP registers. */
20887 if (! IS_VOLATILE (func_type)
20888 && TARGET_HARD_FLOAT)
20889 saved += arm_get_vfp_saved_size ();
20890 }
20891 else /* TARGET_THUMB1 */
20892 {
20893 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20894 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20895 saved = core_saved;
20896 if (TARGET_BACKTRACE)
20897 saved += 16;
20898 }
20899
20900 /* Saved registers include the stack frame. */
20901 offsets->saved_regs
20902 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20903 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20904
20905 /* A leaf function does not need any stack alignment if it has nothing
20906 on the stack. */
20907 if (crtl->is_leaf && frame_size == 0
20908 /* However if it calls alloca(), we have a dynamically allocated
20909 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20910 && ! cfun->calls_alloca)
20911 {
20912 offsets->outgoing_args = offsets->soft_frame;
20913 offsets->locals_base = offsets->soft_frame;
20914 return;
20915 }
20916
20917 /* Ensure SFP has the correct alignment. */
20918 if (ARM_DOUBLEWORD_ALIGN
20919 && (offsets->soft_frame & 7))
20920 {
20921 offsets->soft_frame += 4;
20922 /* Try to align stack by pushing an extra reg. Don't bother doing this
20923 when there is a stack frame as the alignment will be rolled into
20924 the normal stack adjustment. */
20925 if (frame_size + crtl->outgoing_args_size == 0)
20926 {
20927 int reg = -1;
20928
20929 /* Register r3 is caller-saved. Normally it does not need to be
20930 saved on entry by the prologue. However if we choose to save
20931 it for padding then we may confuse the compiler into thinking
20932 a prologue sequence is required when in fact it is not. This
20933 will occur when shrink-wrapping if r3 is used as a scratch
20934 register and there are no other callee-saved writes.
20935
20936 This situation can be avoided when other callee-saved registers
20937 are available and r3 is not mandatory if we choose a callee-saved
20938 register for padding. */
20939 bool prefer_callee_reg_p = false;
20940
20941 /* If it is safe to use r3, then do so. This sometimes
20942 generates better code on Thumb-2 by avoiding the need to
20943 use 32-bit push/pop instructions. */
20944 if (! any_sibcall_could_use_r3 ()
20945 && arm_size_return_regs () <= 12
20946 && (offsets->saved_regs_mask & (1 << 3)) == 0
20947 && (TARGET_THUMB2
20948 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20949 {
20950 reg = 3;
20951 if (!TARGET_THUMB2)
20952 prefer_callee_reg_p = true;
20953 }
20954 if (reg == -1
20955 || prefer_callee_reg_p)
20956 {
20957 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20958 {
20959 /* Avoid fixed registers; they may be changed at
20960 arbitrary times so it's unsafe to restore them
20961 during the epilogue. */
20962 if (!fixed_regs[i]
20963 && (offsets->saved_regs_mask & (1 << i)) == 0)
20964 {
20965 reg = i;
20966 break;
20967 }
20968 }
20969 }
20970
20971 if (reg != -1)
20972 {
20973 offsets->saved_regs += 4;
20974 offsets->saved_regs_mask |= (1 << reg);
20975 }
20976 }
20977 }
20978
20979 offsets->locals_base = offsets->soft_frame + frame_size;
20980 offsets->outgoing_args = (offsets->locals_base
20981 + crtl->outgoing_args_size);
20982
20983 if (ARM_DOUBLEWORD_ALIGN)
20984 {
20985 /* Ensure SP remains doubleword aligned. */
20986 if (offsets->outgoing_args & 7)
20987 offsets->outgoing_args += 4;
20988 gcc_assert (!(offsets->outgoing_args & 7));
20989 }
20990 }
20991
20992
20993 /* Calculate the relative offsets for the different stack pointers. Positive
20994 offsets are in the direction of stack growth. */
20995
20996 HOST_WIDE_INT
20997 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20998 {
20999 arm_stack_offsets *offsets;
21000
21001 offsets = arm_get_frame_offsets ();
21002
21003 /* OK, now we have enough information to compute the distances.
21004 There must be an entry in these switch tables for each pair
21005 of registers in ELIMINABLE_REGS, even if some of the entries
21006 seem to be redundant or useless. */
21007 switch (from)
21008 {
21009 case ARG_POINTER_REGNUM:
21010 switch (to)
21011 {
21012 case THUMB_HARD_FRAME_POINTER_REGNUM:
21013 return 0;
21014
21015 case FRAME_POINTER_REGNUM:
21016 /* This is the reverse of the soft frame pointer
21017 to hard frame pointer elimination below. */
21018 return offsets->soft_frame - offsets->saved_args;
21019
21020 case ARM_HARD_FRAME_POINTER_REGNUM:
21021 /* This is only non-zero in the case where the static chain register
21022 is stored above the frame. */
21023 return offsets->frame - offsets->saved_args - 4;
21024
21025 case STACK_POINTER_REGNUM:
21026 /* If nothing has been pushed on the stack at all
21027 then this will return -4. This *is* correct! */
21028 return offsets->outgoing_args - (offsets->saved_args + 4);
21029
21030 default:
21031 gcc_unreachable ();
21032 }
21033 gcc_unreachable ();
21034
21035 case FRAME_POINTER_REGNUM:
21036 switch (to)
21037 {
21038 case THUMB_HARD_FRAME_POINTER_REGNUM:
21039 return 0;
21040
21041 case ARM_HARD_FRAME_POINTER_REGNUM:
21042 /* The hard frame pointer points to the top entry in the
21043 stack frame. The soft frame pointer to the bottom entry
21044 in the stack frame. If there is no stack frame at all,
21045 then they are identical. */
21046
21047 return offsets->frame - offsets->soft_frame;
21048
21049 case STACK_POINTER_REGNUM:
21050 return offsets->outgoing_args - offsets->soft_frame;
21051
21052 default:
21053 gcc_unreachable ();
21054 }
21055 gcc_unreachable ();
21056
21057 default:
21058 /* You cannot eliminate from the stack pointer.
21059 In theory you could eliminate from the hard frame
21060 pointer to the stack pointer, but this will never
21061 happen, since if a stack frame is not needed the
21062 hard frame pointer will never be used. */
21063 gcc_unreachable ();
21064 }
21065 }
21066
21067 /* Given FROM and TO register numbers, say whether this elimination is
21068 allowed. Frame pointer elimination is automatically handled.
21069
21070 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21071 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21072 pointer, we must eliminate FRAME_POINTER_REGNUM into
21073 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21074 ARG_POINTER_REGNUM. */
21075
21076 bool
21077 arm_can_eliminate (const int from, const int to)
21078 {
21079 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21080 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21081 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21082 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21083 true);
21084 }
21085
21086 /* Emit RTL to save coprocessor registers on function entry. Returns the
21087 number of bytes pushed. */
21088
21089 static int
21090 arm_save_coproc_regs(void)
21091 {
21092 int saved_size = 0;
21093 unsigned reg;
21094 unsigned start_reg;
21095 rtx insn;
21096
21097 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21098 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21099 {
21100 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21101 insn = gen_rtx_MEM (V2SImode, insn);
21102 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21103 RTX_FRAME_RELATED_P (insn) = 1;
21104 saved_size += 8;
21105 }
21106
21107 if (TARGET_HARD_FLOAT)
21108 {
21109 start_reg = FIRST_VFP_REGNUM;
21110
21111 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21112 {
21113 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21114 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21115 {
21116 if (start_reg != reg)
21117 saved_size += vfp_emit_fstmd (start_reg,
21118 (reg - start_reg) / 2);
21119 start_reg = reg + 2;
21120 }
21121 }
21122 if (start_reg != reg)
21123 saved_size += vfp_emit_fstmd (start_reg,
21124 (reg - start_reg) / 2);
21125 }
21126 return saved_size;
21127 }
21128
21129
21130 /* Set the Thumb frame pointer from the stack pointer. */
21131
21132 static void
21133 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21134 {
21135 HOST_WIDE_INT amount;
21136 rtx insn, dwarf;
21137
21138 amount = offsets->outgoing_args - offsets->locals_base;
21139 if (amount < 1024)
21140 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21141 stack_pointer_rtx, GEN_INT (amount)));
21142 else
21143 {
21144 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21145 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21146 expects the first two operands to be the same. */
21147 if (TARGET_THUMB2)
21148 {
21149 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21150 stack_pointer_rtx,
21151 hard_frame_pointer_rtx));
21152 }
21153 else
21154 {
21155 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21156 hard_frame_pointer_rtx,
21157 stack_pointer_rtx));
21158 }
21159 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21160 plus_constant (Pmode, stack_pointer_rtx, amount));
21161 RTX_FRAME_RELATED_P (dwarf) = 1;
21162 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21163 }
21164
21165 RTX_FRAME_RELATED_P (insn) = 1;
21166 }
21167
21168 struct scratch_reg {
21169 rtx reg;
21170 bool saved;
21171 };
21172
21173 /* Return a short-lived scratch register for use as a 2nd scratch register on
21174 function entry after the registers are saved in the prologue. This register
21175 must be released by means of release_scratch_register_on_entry. IP is not
21176 considered since it is always used as the 1st scratch register if available.
21177
21178 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21179 mask of live registers. */
21180
21181 static void
21182 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21183 unsigned long live_regs)
21184 {
21185 int regno = -1;
21186
21187 sr->saved = false;
21188
21189 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21190 regno = LR_REGNUM;
21191 else
21192 {
21193 unsigned int i;
21194
21195 for (i = 4; i < 11; i++)
21196 if (regno1 != i && (live_regs & (1 << i)) != 0)
21197 {
21198 regno = i;
21199 break;
21200 }
21201
21202 if (regno < 0)
21203 {
21204 /* If IP is used as the 1st scratch register for a nested function,
21205 then either r3 wasn't available or is used to preserve IP. */
21206 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21207 regno1 = 3;
21208 regno = (regno1 == 3 ? 2 : 3);
21209 sr->saved
21210 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21211 regno);
21212 }
21213 }
21214
21215 sr->reg = gen_rtx_REG (SImode, regno);
21216 if (sr->saved)
21217 {
21218 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21219 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21220 rtx x = gen_rtx_SET (stack_pointer_rtx,
21221 plus_constant (Pmode, stack_pointer_rtx, -4));
21222 RTX_FRAME_RELATED_P (insn) = 1;
21223 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21224 }
21225 }
21226
21227 /* Release a scratch register obtained from the preceding function. */
21228
21229 static void
21230 release_scratch_register_on_entry (struct scratch_reg *sr)
21231 {
21232 if (sr->saved)
21233 {
21234 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21235 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21236 rtx x = gen_rtx_SET (stack_pointer_rtx,
21237 plus_constant (Pmode, stack_pointer_rtx, 4));
21238 RTX_FRAME_RELATED_P (insn) = 1;
21239 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21240 }
21241 }
21242
21243 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21244
21245 #if PROBE_INTERVAL > 4096
21246 #error Cannot use indexed addressing mode for stack probing
21247 #endif
21248
21249 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21250 inclusive. These are offsets from the current stack pointer. REGNO1
21251 is the index number of the 1st scratch register and LIVE_REGS is the
21252 mask of live registers. */
21253
21254 static void
21255 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21256 unsigned int regno1, unsigned long live_regs)
21257 {
21258 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21259
21260 /* See if we have a constant small number of probes to generate. If so,
21261 that's the easy case. */
21262 if (size <= PROBE_INTERVAL)
21263 {
21264 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21265 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21266 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21267 }
21268
21269 /* The run-time loop is made up of 10 insns in the generic case while the
21270 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21271 else if (size <= 5 * PROBE_INTERVAL)
21272 {
21273 HOST_WIDE_INT i, rem;
21274
21275 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21276 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21277 emit_stack_probe (reg1);
21278
21279 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21280 it exceeds SIZE. If only two probes are needed, this will not
21281 generate any code. Then probe at FIRST + SIZE. */
21282 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21283 {
21284 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21285 emit_stack_probe (reg1);
21286 }
21287
21288 rem = size - (i - PROBE_INTERVAL);
21289 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21290 {
21291 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21292 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21293 }
21294 else
21295 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21296 }
21297
21298 /* Otherwise, do the same as above, but in a loop. Note that we must be
21299 extra careful with variables wrapping around because we might be at
21300 the very top (or the very bottom) of the address space and we have
21301 to be able to handle this case properly; in particular, we use an
21302 equality test for the loop condition. */
21303 else
21304 {
21305 HOST_WIDE_INT rounded_size;
21306 struct scratch_reg sr;
21307
21308 get_scratch_register_on_entry (&sr, regno1, live_regs);
21309
21310 emit_move_insn (reg1, GEN_INT (first));
21311
21312
21313 /* Step 1: round SIZE to the previous multiple of the interval. */
21314
21315 rounded_size = size & -PROBE_INTERVAL;
21316 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21317
21318
21319 /* Step 2: compute initial and final value of the loop counter. */
21320
21321 /* TEST_ADDR = SP + FIRST. */
21322 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21323
21324 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21325 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21326
21327
21328 /* Step 3: the loop
21329
21330 do
21331 {
21332 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21333 probe at TEST_ADDR
21334 }
21335 while (TEST_ADDR != LAST_ADDR)
21336
21337 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21338 until it is equal to ROUNDED_SIZE. */
21339
21340 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21341
21342
21343 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21344 that SIZE is equal to ROUNDED_SIZE. */
21345
21346 if (size != rounded_size)
21347 {
21348 HOST_WIDE_INT rem = size - rounded_size;
21349
21350 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21351 {
21352 emit_set_insn (sr.reg,
21353 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21354 emit_stack_probe (plus_constant (Pmode, sr.reg,
21355 PROBE_INTERVAL - rem));
21356 }
21357 else
21358 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21359 }
21360
21361 release_scratch_register_on_entry (&sr);
21362 }
21363
21364 /* Make sure nothing is scheduled before we are done. */
21365 emit_insn (gen_blockage ());
21366 }
21367
21368 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21369 absolute addresses. */
21370
21371 const char *
21372 output_probe_stack_range (rtx reg1, rtx reg2)
21373 {
21374 static int labelno = 0;
21375 char loop_lab[32];
21376 rtx xops[2];
21377
21378 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21379
21380 /* Loop. */
21381 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21382
21383 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21384 xops[0] = reg1;
21385 xops[1] = GEN_INT (PROBE_INTERVAL);
21386 output_asm_insn ("sub\t%0, %0, %1", xops);
21387
21388 /* Probe at TEST_ADDR. */
21389 output_asm_insn ("str\tr0, [%0, #0]", xops);
21390
21391 /* Test if TEST_ADDR == LAST_ADDR. */
21392 xops[1] = reg2;
21393 output_asm_insn ("cmp\t%0, %1", xops);
21394
21395 /* Branch. */
21396 fputs ("\tbne\t", asm_out_file);
21397 assemble_name_raw (asm_out_file, loop_lab);
21398 fputc ('\n', asm_out_file);
21399
21400 return "";
21401 }
21402
21403 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21404 function. */
21405 void
21406 arm_expand_prologue (void)
21407 {
21408 rtx amount;
21409 rtx insn;
21410 rtx ip_rtx;
21411 unsigned long live_regs_mask;
21412 unsigned long func_type;
21413 int fp_offset = 0;
21414 int saved_pretend_args = 0;
21415 int saved_regs = 0;
21416 unsigned HOST_WIDE_INT args_to_push;
21417 HOST_WIDE_INT size;
21418 arm_stack_offsets *offsets;
21419 bool clobber_ip;
21420
21421 func_type = arm_current_func_type ();
21422
21423 /* Naked functions don't have prologues. */
21424 if (IS_NAKED (func_type))
21425 {
21426 if (flag_stack_usage_info)
21427 current_function_static_stack_size = 0;
21428 return;
21429 }
21430
21431 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21432 args_to_push = crtl->args.pretend_args_size;
21433
21434 /* Compute which register we will have to save onto the stack. */
21435 offsets = arm_get_frame_offsets ();
21436 live_regs_mask = offsets->saved_regs_mask;
21437
21438 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21439
21440 if (IS_STACKALIGN (func_type))
21441 {
21442 rtx r0, r1;
21443
21444 /* Handle a word-aligned stack pointer. We generate the following:
21445
21446 mov r0, sp
21447 bic r1, r0, #7
21448 mov sp, r1
21449 <save and restore r0 in normal prologue/epilogue>
21450 mov sp, r0
21451 bx lr
21452
21453 The unwinder doesn't need to know about the stack realignment.
21454 Just tell it we saved SP in r0. */
21455 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21456
21457 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21458 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21459
21460 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21461 RTX_FRAME_RELATED_P (insn) = 1;
21462 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21463
21464 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21465
21466 /* ??? The CFA changes here, which may cause GDB to conclude that it
21467 has entered a different function. That said, the unwind info is
21468 correct, individually, before and after this instruction because
21469 we've described the save of SP, which will override the default
21470 handling of SP as restoring from the CFA. */
21471 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21472 }
21473
21474 /* The static chain register is the same as the IP register. If it is
21475 clobbered when creating the frame, we need to save and restore it. */
21476 clobber_ip = IS_NESTED (func_type)
21477 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21478 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21479 || flag_stack_clash_protection)
21480 && !df_regs_ever_live_p (LR_REGNUM)
21481 && arm_r3_live_at_start_p ()));
21482
21483 /* Find somewhere to store IP whilst the frame is being created.
21484 We try the following places in order:
21485
21486 1. The last argument register r3 if it is available.
21487 2. A slot on the stack above the frame if there are no
21488 arguments to push onto the stack.
21489 3. Register r3 again, after pushing the argument registers
21490 onto the stack, if this is a varargs function.
21491 4. The last slot on the stack created for the arguments to
21492 push, if this isn't a varargs function.
21493
21494 Note - we only need to tell the dwarf2 backend about the SP
21495 adjustment in the second variant; the static chain register
21496 doesn't need to be unwound, as it doesn't contain a value
21497 inherited from the caller. */
21498 if (clobber_ip)
21499 {
21500 if (!arm_r3_live_at_start_p ())
21501 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21502 else if (args_to_push == 0)
21503 {
21504 rtx addr, dwarf;
21505
21506 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21507 saved_regs += 4;
21508
21509 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21510 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21511 fp_offset = 4;
21512
21513 /* Just tell the dwarf backend that we adjusted SP. */
21514 dwarf = gen_rtx_SET (stack_pointer_rtx,
21515 plus_constant (Pmode, stack_pointer_rtx,
21516 -fp_offset));
21517 RTX_FRAME_RELATED_P (insn) = 1;
21518 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21519 }
21520 else
21521 {
21522 /* Store the args on the stack. */
21523 if (cfun->machine->uses_anonymous_args)
21524 {
21525 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21526 (0xf0 >> (args_to_push / 4)) & 0xf);
21527 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21528 saved_pretend_args = 1;
21529 }
21530 else
21531 {
21532 rtx addr, dwarf;
21533
21534 if (args_to_push == 4)
21535 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21536 else
21537 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21538 plus_constant (Pmode,
21539 stack_pointer_rtx,
21540 -args_to_push));
21541
21542 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21543
21544 /* Just tell the dwarf backend that we adjusted SP. */
21545 dwarf = gen_rtx_SET (stack_pointer_rtx,
21546 plus_constant (Pmode, stack_pointer_rtx,
21547 -args_to_push));
21548 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21549 }
21550
21551 RTX_FRAME_RELATED_P (insn) = 1;
21552 fp_offset = args_to_push;
21553 args_to_push = 0;
21554 }
21555 }
21556
21557 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21558 {
21559 if (IS_INTERRUPT (func_type))
21560 {
21561 /* Interrupt functions must not corrupt any registers.
21562 Creating a frame pointer however, corrupts the IP
21563 register, so we must push it first. */
21564 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21565
21566 /* Do not set RTX_FRAME_RELATED_P on this insn.
21567 The dwarf stack unwinding code only wants to see one
21568 stack decrement per function, and this is not it. If
21569 this instruction is labeled as being part of the frame
21570 creation sequence then dwarf2out_frame_debug_expr will
21571 die when it encounters the assignment of IP to FP
21572 later on, since the use of SP here establishes SP as
21573 the CFA register and not IP.
21574
21575 Anyway this instruction is not really part of the stack
21576 frame creation although it is part of the prologue. */
21577 }
21578
21579 insn = emit_set_insn (ip_rtx,
21580 plus_constant (Pmode, stack_pointer_rtx,
21581 fp_offset));
21582 RTX_FRAME_RELATED_P (insn) = 1;
21583 }
21584
21585 if (args_to_push)
21586 {
21587 /* Push the argument registers, or reserve space for them. */
21588 if (cfun->machine->uses_anonymous_args)
21589 insn = emit_multi_reg_push
21590 ((0xf0 >> (args_to_push / 4)) & 0xf,
21591 (0xf0 >> (args_to_push / 4)) & 0xf);
21592 else
21593 insn = emit_insn
21594 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21595 GEN_INT (- args_to_push)));
21596 RTX_FRAME_RELATED_P (insn) = 1;
21597 }
21598
21599 /* If this is an interrupt service routine, and the link register
21600 is going to be pushed, and we're not generating extra
21601 push of IP (needed when frame is needed and frame layout if apcs),
21602 subtracting four from LR now will mean that the function return
21603 can be done with a single instruction. */
21604 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21605 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21606 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21607 && TARGET_ARM)
21608 {
21609 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21610
21611 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21612 }
21613
21614 if (live_regs_mask)
21615 {
21616 unsigned long dwarf_regs_mask = live_regs_mask;
21617
21618 saved_regs += bit_count (live_regs_mask) * 4;
21619 if (optimize_size && !frame_pointer_needed
21620 && saved_regs == offsets->saved_regs - offsets->saved_args)
21621 {
21622 /* If no coprocessor registers are being pushed and we don't have
21623 to worry about a frame pointer then push extra registers to
21624 create the stack frame. This is done in a way that does not
21625 alter the frame layout, so is independent of the epilogue. */
21626 int n;
21627 int frame;
21628 n = 0;
21629 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21630 n++;
21631 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21632 if (frame && n * 4 >= frame)
21633 {
21634 n = frame / 4;
21635 live_regs_mask |= (1 << n) - 1;
21636 saved_regs += frame;
21637 }
21638 }
21639
21640 if (TARGET_LDRD
21641 && current_tune->prefer_ldrd_strd
21642 && !optimize_function_for_size_p (cfun))
21643 {
21644 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21645 if (TARGET_THUMB2)
21646 thumb2_emit_strd_push (live_regs_mask);
21647 else if (TARGET_ARM
21648 && !TARGET_APCS_FRAME
21649 && !IS_INTERRUPT (func_type))
21650 arm_emit_strd_push (live_regs_mask);
21651 else
21652 {
21653 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21654 RTX_FRAME_RELATED_P (insn) = 1;
21655 }
21656 }
21657 else
21658 {
21659 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21660 RTX_FRAME_RELATED_P (insn) = 1;
21661 }
21662 }
21663
21664 if (! IS_VOLATILE (func_type))
21665 saved_regs += arm_save_coproc_regs ();
21666
21667 if (frame_pointer_needed && TARGET_ARM)
21668 {
21669 /* Create the new frame pointer. */
21670 if (TARGET_APCS_FRAME)
21671 {
21672 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21673 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21674 RTX_FRAME_RELATED_P (insn) = 1;
21675 }
21676 else
21677 {
21678 insn = GEN_INT (saved_regs - (4 + fp_offset));
21679 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21680 stack_pointer_rtx, insn));
21681 RTX_FRAME_RELATED_P (insn) = 1;
21682 }
21683 }
21684
21685 size = offsets->outgoing_args - offsets->saved_args;
21686 if (flag_stack_usage_info)
21687 current_function_static_stack_size = size;
21688
21689 /* If this isn't an interrupt service routine and we have a frame, then do
21690 stack checking. We use IP as the first scratch register, except for the
21691 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21692 if (!IS_INTERRUPT (func_type)
21693 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21694 || flag_stack_clash_protection))
21695 {
21696 unsigned int regno;
21697
21698 if (!IS_NESTED (func_type) || clobber_ip)
21699 regno = IP_REGNUM;
21700 else if (df_regs_ever_live_p (LR_REGNUM))
21701 regno = LR_REGNUM;
21702 else
21703 regno = 3;
21704
21705 if (crtl->is_leaf && !cfun->calls_alloca)
21706 {
21707 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21708 arm_emit_probe_stack_range (get_stack_check_protect (),
21709 size - get_stack_check_protect (),
21710 regno, live_regs_mask);
21711 }
21712 else if (size > 0)
21713 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21714 regno, live_regs_mask);
21715 }
21716
21717 /* Recover the static chain register. */
21718 if (clobber_ip)
21719 {
21720 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21721 insn = gen_rtx_REG (SImode, 3);
21722 else
21723 {
21724 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21725 insn = gen_frame_mem (SImode, insn);
21726 }
21727 emit_set_insn (ip_rtx, insn);
21728 emit_insn (gen_force_register_use (ip_rtx));
21729 }
21730
21731 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21732 {
21733 /* This add can produce multiple insns for a large constant, so we
21734 need to get tricky. */
21735 rtx_insn *last = get_last_insn ();
21736
21737 amount = GEN_INT (offsets->saved_args + saved_regs
21738 - offsets->outgoing_args);
21739
21740 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21741 amount));
21742 do
21743 {
21744 last = last ? NEXT_INSN (last) : get_insns ();
21745 RTX_FRAME_RELATED_P (last) = 1;
21746 }
21747 while (last != insn);
21748
21749 /* If the frame pointer is needed, emit a special barrier that
21750 will prevent the scheduler from moving stores to the frame
21751 before the stack adjustment. */
21752 if (frame_pointer_needed)
21753 emit_insn (gen_stack_tie (stack_pointer_rtx,
21754 hard_frame_pointer_rtx));
21755 }
21756
21757
21758 if (frame_pointer_needed && TARGET_THUMB2)
21759 thumb_set_frame_pointer (offsets);
21760
21761 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21762 {
21763 unsigned long mask;
21764
21765 mask = live_regs_mask;
21766 mask &= THUMB2_WORK_REGS;
21767 if (!IS_NESTED (func_type))
21768 mask |= (1 << IP_REGNUM);
21769 arm_load_pic_register (mask);
21770 }
21771
21772 /* If we are profiling, make sure no instructions are scheduled before
21773 the call to mcount. Similarly if the user has requested no
21774 scheduling in the prolog. Similarly if we want non-call exceptions
21775 using the EABI unwinder, to prevent faulting instructions from being
21776 swapped with a stack adjustment. */
21777 if (crtl->profile || !TARGET_SCHED_PROLOG
21778 || (arm_except_unwind_info (&global_options) == UI_TARGET
21779 && cfun->can_throw_non_call_exceptions))
21780 emit_insn (gen_blockage ());
21781
21782 /* If the link register is being kept alive, with the return address in it,
21783 then make sure that it does not get reused by the ce2 pass. */
21784 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21785 cfun->machine->lr_save_eliminated = 1;
21786 }
21787 \f
21788 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21789 static void
21790 arm_print_condition (FILE *stream)
21791 {
21792 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21793 {
21794 /* Branch conversion is not implemented for Thumb-2. */
21795 if (TARGET_THUMB)
21796 {
21797 output_operand_lossage ("predicated Thumb instruction");
21798 return;
21799 }
21800 if (current_insn_predicate != NULL)
21801 {
21802 output_operand_lossage
21803 ("predicated instruction in conditional sequence");
21804 return;
21805 }
21806
21807 fputs (arm_condition_codes[arm_current_cc], stream);
21808 }
21809 else if (current_insn_predicate)
21810 {
21811 enum arm_cond_code code;
21812
21813 if (TARGET_THUMB1)
21814 {
21815 output_operand_lossage ("predicated Thumb instruction");
21816 return;
21817 }
21818
21819 code = get_arm_condition_code (current_insn_predicate);
21820 fputs (arm_condition_codes[code], stream);
21821 }
21822 }
21823
21824
21825 /* Globally reserved letters: acln
21826 Puncutation letters currently used: @_|?().!#
21827 Lower case letters currently used: bcdefhimpqtvwxyz
21828 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21829 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21830
21831 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21832
21833 If CODE is 'd', then the X is a condition operand and the instruction
21834 should only be executed if the condition is true.
21835 if CODE is 'D', then the X is a condition operand and the instruction
21836 should only be executed if the condition is false: however, if the mode
21837 of the comparison is CCFPEmode, then always execute the instruction -- we
21838 do this because in these circumstances !GE does not necessarily imply LT;
21839 in these cases the instruction pattern will take care to make sure that
21840 an instruction containing %d will follow, thereby undoing the effects of
21841 doing this instruction unconditionally.
21842 If CODE is 'N' then X is a floating point operand that must be negated
21843 before output.
21844 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21845 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21846 static void
21847 arm_print_operand (FILE *stream, rtx x, int code)
21848 {
21849 switch (code)
21850 {
21851 case '@':
21852 fputs (ASM_COMMENT_START, stream);
21853 return;
21854
21855 case '_':
21856 fputs (user_label_prefix, stream);
21857 return;
21858
21859 case '|':
21860 fputs (REGISTER_PREFIX, stream);
21861 return;
21862
21863 case '?':
21864 arm_print_condition (stream);
21865 return;
21866
21867 case '.':
21868 /* The current condition code for a condition code setting instruction.
21869 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21870 fputc('s', stream);
21871 arm_print_condition (stream);
21872 return;
21873
21874 case '!':
21875 /* If the instruction is conditionally executed then print
21876 the current condition code, otherwise print 's'. */
21877 gcc_assert (TARGET_THUMB2);
21878 if (current_insn_predicate)
21879 arm_print_condition (stream);
21880 else
21881 fputc('s', stream);
21882 break;
21883
21884 /* %# is a "break" sequence. It doesn't output anything, but is used to
21885 separate e.g. operand numbers from following text, if that text consists
21886 of further digits which we don't want to be part of the operand
21887 number. */
21888 case '#':
21889 return;
21890
21891 case 'N':
21892 {
21893 REAL_VALUE_TYPE r;
21894 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21895 fprintf (stream, "%s", fp_const_from_val (&r));
21896 }
21897 return;
21898
21899 /* An integer or symbol address without a preceding # sign. */
21900 case 'c':
21901 switch (GET_CODE (x))
21902 {
21903 case CONST_INT:
21904 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21905 break;
21906
21907 case SYMBOL_REF:
21908 output_addr_const (stream, x);
21909 break;
21910
21911 case CONST:
21912 if (GET_CODE (XEXP (x, 0)) == PLUS
21913 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21914 {
21915 output_addr_const (stream, x);
21916 break;
21917 }
21918 /* Fall through. */
21919
21920 default:
21921 output_operand_lossage ("Unsupported operand for code '%c'", code);
21922 }
21923 return;
21924
21925 /* An integer that we want to print in HEX. */
21926 case 'x':
21927 switch (GET_CODE (x))
21928 {
21929 case CONST_INT:
21930 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21931 break;
21932
21933 default:
21934 output_operand_lossage ("Unsupported operand for code '%c'", code);
21935 }
21936 return;
21937
21938 case 'B':
21939 if (CONST_INT_P (x))
21940 {
21941 HOST_WIDE_INT val;
21942 val = ARM_SIGN_EXTEND (~INTVAL (x));
21943 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21944 }
21945 else
21946 {
21947 putc ('~', stream);
21948 output_addr_const (stream, x);
21949 }
21950 return;
21951
21952 case 'b':
21953 /* Print the log2 of a CONST_INT. */
21954 {
21955 HOST_WIDE_INT val;
21956
21957 if (!CONST_INT_P (x)
21958 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21959 output_operand_lossage ("Unsupported operand for code '%c'", code);
21960 else
21961 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21962 }
21963 return;
21964
21965 case 'L':
21966 /* The low 16 bits of an immediate constant. */
21967 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21968 return;
21969
21970 case 'i':
21971 fprintf (stream, "%s", arithmetic_instr (x, 1));
21972 return;
21973
21974 case 'I':
21975 fprintf (stream, "%s", arithmetic_instr (x, 0));
21976 return;
21977
21978 case 'S':
21979 {
21980 HOST_WIDE_INT val;
21981 const char *shift;
21982
21983 shift = shift_op (x, &val);
21984
21985 if (shift)
21986 {
21987 fprintf (stream, ", %s ", shift);
21988 if (val == -1)
21989 arm_print_operand (stream, XEXP (x, 1), 0);
21990 else
21991 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21992 }
21993 }
21994 return;
21995
21996 /* An explanation of the 'Q', 'R' and 'H' register operands:
21997
21998 In a pair of registers containing a DI or DF value the 'Q'
21999 operand returns the register number of the register containing
22000 the least significant part of the value. The 'R' operand returns
22001 the register number of the register containing the most
22002 significant part of the value.
22003
22004 The 'H' operand returns the higher of the two register numbers.
22005 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22006 same as the 'Q' operand, since the most significant part of the
22007 value is held in the lower number register. The reverse is true
22008 on systems where WORDS_BIG_ENDIAN is false.
22009
22010 The purpose of these operands is to distinguish between cases
22011 where the endian-ness of the values is important (for example
22012 when they are added together), and cases where the endian-ness
22013 is irrelevant, but the order of register operations is important.
22014 For example when loading a value from memory into a register
22015 pair, the endian-ness does not matter. Provided that the value
22016 from the lower memory address is put into the lower numbered
22017 register, and the value from the higher address is put into the
22018 higher numbered register, the load will work regardless of whether
22019 the value being loaded is big-wordian or little-wordian. The
22020 order of the two register loads can matter however, if the address
22021 of the memory location is actually held in one of the registers
22022 being overwritten by the load.
22023
22024 The 'Q' and 'R' constraints are also available for 64-bit
22025 constants. */
22026 case 'Q':
22027 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22028 {
22029 rtx part = gen_lowpart (SImode, x);
22030 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22031 return;
22032 }
22033
22034 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22035 {
22036 output_operand_lossage ("invalid operand for code '%c'", code);
22037 return;
22038 }
22039
22040 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22041 return;
22042
22043 case 'R':
22044 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22045 {
22046 machine_mode mode = GET_MODE (x);
22047 rtx part;
22048
22049 if (mode == VOIDmode)
22050 mode = DImode;
22051 part = gen_highpart_mode (SImode, mode, x);
22052 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22053 return;
22054 }
22055
22056 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22057 {
22058 output_operand_lossage ("invalid operand for code '%c'", code);
22059 return;
22060 }
22061
22062 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22063 return;
22064
22065 case 'H':
22066 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22067 {
22068 output_operand_lossage ("invalid operand for code '%c'", code);
22069 return;
22070 }
22071
22072 asm_fprintf (stream, "%r", REGNO (x) + 1);
22073 return;
22074
22075 case 'J':
22076 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22077 {
22078 output_operand_lossage ("invalid operand for code '%c'", code);
22079 return;
22080 }
22081
22082 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22083 return;
22084
22085 case 'K':
22086 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22087 {
22088 output_operand_lossage ("invalid operand for code '%c'", code);
22089 return;
22090 }
22091
22092 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22093 return;
22094
22095 case 'm':
22096 asm_fprintf (stream, "%r",
22097 REG_P (XEXP (x, 0))
22098 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22099 return;
22100
22101 case 'M':
22102 asm_fprintf (stream, "{%r-%r}",
22103 REGNO (x),
22104 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22105 return;
22106
22107 /* Like 'M', but writing doubleword vector registers, for use by Neon
22108 insns. */
22109 case 'h':
22110 {
22111 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22112 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22113 if (numregs == 1)
22114 asm_fprintf (stream, "{d%d}", regno);
22115 else
22116 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22117 }
22118 return;
22119
22120 case 'd':
22121 /* CONST_TRUE_RTX means always -- that's the default. */
22122 if (x == const_true_rtx)
22123 return;
22124
22125 if (!COMPARISON_P (x))
22126 {
22127 output_operand_lossage ("invalid operand for code '%c'", code);
22128 return;
22129 }
22130
22131 fputs (arm_condition_codes[get_arm_condition_code (x)],
22132 stream);
22133 return;
22134
22135 case 'D':
22136 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22137 want to do that. */
22138 if (x == const_true_rtx)
22139 {
22140 output_operand_lossage ("instruction never executed");
22141 return;
22142 }
22143 if (!COMPARISON_P (x))
22144 {
22145 output_operand_lossage ("invalid operand for code '%c'", code);
22146 return;
22147 }
22148
22149 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22150 (get_arm_condition_code (x))],
22151 stream);
22152 return;
22153
22154 case 's':
22155 case 'V':
22156 case 'W':
22157 case 'X':
22158 case 'Y':
22159 case 'Z':
22160 /* Former Maverick support, removed after GCC-4.7. */
22161 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22162 return;
22163
22164 case 'U':
22165 if (!REG_P (x)
22166 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22167 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22168 /* Bad value for wCG register number. */
22169 {
22170 output_operand_lossage ("invalid operand for code '%c'", code);
22171 return;
22172 }
22173
22174 else
22175 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22176 return;
22177
22178 /* Print an iWMMXt control register name. */
22179 case 'w':
22180 if (!CONST_INT_P (x)
22181 || INTVAL (x) < 0
22182 || INTVAL (x) >= 16)
22183 /* Bad value for wC register number. */
22184 {
22185 output_operand_lossage ("invalid operand for code '%c'", code);
22186 return;
22187 }
22188
22189 else
22190 {
22191 static const char * wc_reg_names [16] =
22192 {
22193 "wCID", "wCon", "wCSSF", "wCASF",
22194 "wC4", "wC5", "wC6", "wC7",
22195 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22196 "wC12", "wC13", "wC14", "wC15"
22197 };
22198
22199 fputs (wc_reg_names [INTVAL (x)], stream);
22200 }
22201 return;
22202
22203 /* Print the high single-precision register of a VFP double-precision
22204 register. */
22205 case 'p':
22206 {
22207 machine_mode mode = GET_MODE (x);
22208 int regno;
22209
22210 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22211 {
22212 output_operand_lossage ("invalid operand for code '%c'", code);
22213 return;
22214 }
22215
22216 regno = REGNO (x);
22217 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22218 {
22219 output_operand_lossage ("invalid operand for code '%c'", code);
22220 return;
22221 }
22222
22223 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22224 }
22225 return;
22226
22227 /* Print a VFP/Neon double precision or quad precision register name. */
22228 case 'P':
22229 case 'q':
22230 {
22231 machine_mode mode = GET_MODE (x);
22232 int is_quad = (code == 'q');
22233 int regno;
22234
22235 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22236 {
22237 output_operand_lossage ("invalid operand for code '%c'", code);
22238 return;
22239 }
22240
22241 if (!REG_P (x)
22242 || !IS_VFP_REGNUM (REGNO (x)))
22243 {
22244 output_operand_lossage ("invalid operand for code '%c'", code);
22245 return;
22246 }
22247
22248 regno = REGNO (x);
22249 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22250 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22251 {
22252 output_operand_lossage ("invalid operand for code '%c'", code);
22253 return;
22254 }
22255
22256 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22257 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22258 }
22259 return;
22260
22261 /* These two codes print the low/high doubleword register of a Neon quad
22262 register, respectively. For pair-structure types, can also print
22263 low/high quadword registers. */
22264 case 'e':
22265 case 'f':
22266 {
22267 machine_mode mode = GET_MODE (x);
22268 int regno;
22269
22270 if ((GET_MODE_SIZE (mode) != 16
22271 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22272 {
22273 output_operand_lossage ("invalid operand for code '%c'", code);
22274 return;
22275 }
22276
22277 regno = REGNO (x);
22278 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22279 {
22280 output_operand_lossage ("invalid operand for code '%c'", code);
22281 return;
22282 }
22283
22284 if (GET_MODE_SIZE (mode) == 16)
22285 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22286 + (code == 'f' ? 1 : 0));
22287 else
22288 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22289 + (code == 'f' ? 1 : 0));
22290 }
22291 return;
22292
22293 /* Print a VFPv3 floating-point constant, represented as an integer
22294 index. */
22295 case 'G':
22296 {
22297 int index = vfp3_const_double_index (x);
22298 gcc_assert (index != -1);
22299 fprintf (stream, "%d", index);
22300 }
22301 return;
22302
22303 /* Print bits representing opcode features for Neon.
22304
22305 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22306 and polynomials as unsigned.
22307
22308 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22309
22310 Bit 2 is 1 for rounding functions, 0 otherwise. */
22311
22312 /* Identify the type as 's', 'u', 'p' or 'f'. */
22313 case 'T':
22314 {
22315 HOST_WIDE_INT bits = INTVAL (x);
22316 fputc ("uspf"[bits & 3], stream);
22317 }
22318 return;
22319
22320 /* Likewise, but signed and unsigned integers are both 'i'. */
22321 case 'F':
22322 {
22323 HOST_WIDE_INT bits = INTVAL (x);
22324 fputc ("iipf"[bits & 3], stream);
22325 }
22326 return;
22327
22328 /* As for 'T', but emit 'u' instead of 'p'. */
22329 case 't':
22330 {
22331 HOST_WIDE_INT bits = INTVAL (x);
22332 fputc ("usuf"[bits & 3], stream);
22333 }
22334 return;
22335
22336 /* Bit 2: rounding (vs none). */
22337 case 'O':
22338 {
22339 HOST_WIDE_INT bits = INTVAL (x);
22340 fputs ((bits & 4) != 0 ? "r" : "", stream);
22341 }
22342 return;
22343
22344 /* Memory operand for vld1/vst1 instruction. */
22345 case 'A':
22346 {
22347 rtx addr;
22348 bool postinc = FALSE;
22349 rtx postinc_reg = NULL;
22350 unsigned align, memsize, align_bits;
22351
22352 gcc_assert (MEM_P (x));
22353 addr = XEXP (x, 0);
22354 if (GET_CODE (addr) == POST_INC)
22355 {
22356 postinc = 1;
22357 addr = XEXP (addr, 0);
22358 }
22359 if (GET_CODE (addr) == POST_MODIFY)
22360 {
22361 postinc_reg = XEXP( XEXP (addr, 1), 1);
22362 addr = XEXP (addr, 0);
22363 }
22364 asm_fprintf (stream, "[%r", REGNO (addr));
22365
22366 /* We know the alignment of this access, so we can emit a hint in the
22367 instruction (for some alignments) as an aid to the memory subsystem
22368 of the target. */
22369 align = MEM_ALIGN (x) >> 3;
22370 memsize = MEM_SIZE (x);
22371
22372 /* Only certain alignment specifiers are supported by the hardware. */
22373 if (memsize == 32 && (align % 32) == 0)
22374 align_bits = 256;
22375 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22376 align_bits = 128;
22377 else if (memsize >= 8 && (align % 8) == 0)
22378 align_bits = 64;
22379 else
22380 align_bits = 0;
22381
22382 if (align_bits != 0)
22383 asm_fprintf (stream, ":%d", align_bits);
22384
22385 asm_fprintf (stream, "]");
22386
22387 if (postinc)
22388 fputs("!", stream);
22389 if (postinc_reg)
22390 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22391 }
22392 return;
22393
22394 case 'C':
22395 {
22396 rtx addr;
22397
22398 gcc_assert (MEM_P (x));
22399 addr = XEXP (x, 0);
22400 gcc_assert (REG_P (addr));
22401 asm_fprintf (stream, "[%r]", REGNO (addr));
22402 }
22403 return;
22404
22405 /* Translate an S register number into a D register number and element index. */
22406 case 'y':
22407 {
22408 machine_mode mode = GET_MODE (x);
22409 int regno;
22410
22411 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22412 {
22413 output_operand_lossage ("invalid operand for code '%c'", code);
22414 return;
22415 }
22416
22417 regno = REGNO (x);
22418 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22419 {
22420 output_operand_lossage ("invalid operand for code '%c'", code);
22421 return;
22422 }
22423
22424 regno = regno - FIRST_VFP_REGNUM;
22425 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22426 }
22427 return;
22428
22429 case 'v':
22430 gcc_assert (CONST_DOUBLE_P (x));
22431 int result;
22432 result = vfp3_const_double_for_fract_bits (x);
22433 if (result == 0)
22434 result = vfp3_const_double_for_bits (x);
22435 fprintf (stream, "#%d", result);
22436 return;
22437
22438 /* Register specifier for vld1.16/vst1.16. Translate the S register
22439 number into a D register number and element index. */
22440 case 'z':
22441 {
22442 machine_mode mode = GET_MODE (x);
22443 int regno;
22444
22445 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22446 {
22447 output_operand_lossage ("invalid operand for code '%c'", code);
22448 return;
22449 }
22450
22451 regno = REGNO (x);
22452 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22453 {
22454 output_operand_lossage ("invalid operand for code '%c'", code);
22455 return;
22456 }
22457
22458 regno = regno - FIRST_VFP_REGNUM;
22459 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22460 }
22461 return;
22462
22463 default:
22464 if (x == 0)
22465 {
22466 output_operand_lossage ("missing operand");
22467 return;
22468 }
22469
22470 switch (GET_CODE (x))
22471 {
22472 case REG:
22473 asm_fprintf (stream, "%r", REGNO (x));
22474 break;
22475
22476 case MEM:
22477 output_address (GET_MODE (x), XEXP (x, 0));
22478 break;
22479
22480 case CONST_DOUBLE:
22481 {
22482 char fpstr[20];
22483 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22484 sizeof (fpstr), 0, 1);
22485 fprintf (stream, "#%s", fpstr);
22486 }
22487 break;
22488
22489 default:
22490 gcc_assert (GET_CODE (x) != NEG);
22491 fputc ('#', stream);
22492 if (GET_CODE (x) == HIGH)
22493 {
22494 fputs (":lower16:", stream);
22495 x = XEXP (x, 0);
22496 }
22497
22498 output_addr_const (stream, x);
22499 break;
22500 }
22501 }
22502 }
22503 \f
22504 /* Target hook for printing a memory address. */
22505 static void
22506 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22507 {
22508 if (TARGET_32BIT)
22509 {
22510 int is_minus = GET_CODE (x) == MINUS;
22511
22512 if (REG_P (x))
22513 asm_fprintf (stream, "[%r]", REGNO (x));
22514 else if (GET_CODE (x) == PLUS || is_minus)
22515 {
22516 rtx base = XEXP (x, 0);
22517 rtx index = XEXP (x, 1);
22518 HOST_WIDE_INT offset = 0;
22519 if (!REG_P (base)
22520 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22521 {
22522 /* Ensure that BASE is a register. */
22523 /* (one of them must be). */
22524 /* Also ensure the SP is not used as in index register. */
22525 std::swap (base, index);
22526 }
22527 switch (GET_CODE (index))
22528 {
22529 case CONST_INT:
22530 offset = INTVAL (index);
22531 if (is_minus)
22532 offset = -offset;
22533 asm_fprintf (stream, "[%r, #%wd]",
22534 REGNO (base), offset);
22535 break;
22536
22537 case REG:
22538 asm_fprintf (stream, "[%r, %s%r]",
22539 REGNO (base), is_minus ? "-" : "",
22540 REGNO (index));
22541 break;
22542
22543 case MULT:
22544 case ASHIFTRT:
22545 case LSHIFTRT:
22546 case ASHIFT:
22547 case ROTATERT:
22548 {
22549 asm_fprintf (stream, "[%r, %s%r",
22550 REGNO (base), is_minus ? "-" : "",
22551 REGNO (XEXP (index, 0)));
22552 arm_print_operand (stream, index, 'S');
22553 fputs ("]", stream);
22554 break;
22555 }
22556
22557 default:
22558 gcc_unreachable ();
22559 }
22560 }
22561 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22562 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22563 {
22564 gcc_assert (REG_P (XEXP (x, 0)));
22565
22566 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22567 asm_fprintf (stream, "[%r, #%s%d]!",
22568 REGNO (XEXP (x, 0)),
22569 GET_CODE (x) == PRE_DEC ? "-" : "",
22570 GET_MODE_SIZE (mode));
22571 else
22572 asm_fprintf (stream, "[%r], #%s%d",
22573 REGNO (XEXP (x, 0)),
22574 GET_CODE (x) == POST_DEC ? "-" : "",
22575 GET_MODE_SIZE (mode));
22576 }
22577 else if (GET_CODE (x) == PRE_MODIFY)
22578 {
22579 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22580 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22581 asm_fprintf (stream, "#%wd]!",
22582 INTVAL (XEXP (XEXP (x, 1), 1)));
22583 else
22584 asm_fprintf (stream, "%r]!",
22585 REGNO (XEXP (XEXP (x, 1), 1)));
22586 }
22587 else if (GET_CODE (x) == POST_MODIFY)
22588 {
22589 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22590 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22591 asm_fprintf (stream, "#%wd",
22592 INTVAL (XEXP (XEXP (x, 1), 1)));
22593 else
22594 asm_fprintf (stream, "%r",
22595 REGNO (XEXP (XEXP (x, 1), 1)));
22596 }
22597 else output_addr_const (stream, x);
22598 }
22599 else
22600 {
22601 if (REG_P (x))
22602 asm_fprintf (stream, "[%r]", REGNO (x));
22603 else if (GET_CODE (x) == POST_INC)
22604 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22605 else if (GET_CODE (x) == PLUS)
22606 {
22607 gcc_assert (REG_P (XEXP (x, 0)));
22608 if (CONST_INT_P (XEXP (x, 1)))
22609 asm_fprintf (stream, "[%r, #%wd]",
22610 REGNO (XEXP (x, 0)),
22611 INTVAL (XEXP (x, 1)));
22612 else
22613 asm_fprintf (stream, "[%r, %r]",
22614 REGNO (XEXP (x, 0)),
22615 REGNO (XEXP (x, 1)));
22616 }
22617 else
22618 output_addr_const (stream, x);
22619 }
22620 }
22621 \f
22622 /* Target hook for indicating whether a punctuation character for
22623 TARGET_PRINT_OPERAND is valid. */
22624 static bool
22625 arm_print_operand_punct_valid_p (unsigned char code)
22626 {
22627 return (code == '@' || code == '|' || code == '.'
22628 || code == '(' || code == ')' || code == '#'
22629 || (TARGET_32BIT && (code == '?'))
22630 || (TARGET_THUMB2 && (code == '!'))
22631 || (TARGET_THUMB && (code == '_')));
22632 }
22633 \f
22634 /* Target hook for assembling integer objects. The ARM version needs to
22635 handle word-sized values specially. */
22636 static bool
22637 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22638 {
22639 machine_mode mode;
22640
22641 if (size == UNITS_PER_WORD && aligned_p)
22642 {
22643 fputs ("\t.word\t", asm_out_file);
22644 output_addr_const (asm_out_file, x);
22645
22646 /* Mark symbols as position independent. We only do this in the
22647 .text segment, not in the .data segment. */
22648 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22649 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22650 {
22651 /* See legitimize_pic_address for an explanation of the
22652 TARGET_VXWORKS_RTP check. */
22653 /* References to weak symbols cannot be resolved locally:
22654 they may be overridden by a non-weak definition at link
22655 time. */
22656 if (!arm_pic_data_is_text_relative
22657 || (GET_CODE (x) == SYMBOL_REF
22658 && (!SYMBOL_REF_LOCAL_P (x)
22659 || (SYMBOL_REF_DECL (x)
22660 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22661 fputs ("(GOT)", asm_out_file);
22662 else
22663 fputs ("(GOTOFF)", asm_out_file);
22664 }
22665 fputc ('\n', asm_out_file);
22666 return true;
22667 }
22668
22669 mode = GET_MODE (x);
22670
22671 if (arm_vector_mode_supported_p (mode))
22672 {
22673 int i, units;
22674
22675 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22676
22677 units = CONST_VECTOR_NUNITS (x);
22678 size = GET_MODE_UNIT_SIZE (mode);
22679
22680 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22681 for (i = 0; i < units; i++)
22682 {
22683 rtx elt = CONST_VECTOR_ELT (x, i);
22684 assemble_integer
22685 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22686 }
22687 else
22688 for (i = 0; i < units; i++)
22689 {
22690 rtx elt = CONST_VECTOR_ELT (x, i);
22691 assemble_real
22692 (*CONST_DOUBLE_REAL_VALUE (elt),
22693 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22694 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22695 }
22696
22697 return true;
22698 }
22699
22700 return default_assemble_integer (x, size, aligned_p);
22701 }
22702
22703 static void
22704 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22705 {
22706 section *s;
22707
22708 if (!TARGET_AAPCS_BASED)
22709 {
22710 (is_ctor ?
22711 default_named_section_asm_out_constructor
22712 : default_named_section_asm_out_destructor) (symbol, priority);
22713 return;
22714 }
22715
22716 /* Put these in the .init_array section, using a special relocation. */
22717 if (priority != DEFAULT_INIT_PRIORITY)
22718 {
22719 char buf[18];
22720 sprintf (buf, "%s.%.5u",
22721 is_ctor ? ".init_array" : ".fini_array",
22722 priority);
22723 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22724 }
22725 else if (is_ctor)
22726 s = ctors_section;
22727 else
22728 s = dtors_section;
22729
22730 switch_to_section (s);
22731 assemble_align (POINTER_SIZE);
22732 fputs ("\t.word\t", asm_out_file);
22733 output_addr_const (asm_out_file, symbol);
22734 fputs ("(target1)\n", asm_out_file);
22735 }
22736
22737 /* Add a function to the list of static constructors. */
22738
22739 static void
22740 arm_elf_asm_constructor (rtx symbol, int priority)
22741 {
22742 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22743 }
22744
22745 /* Add a function to the list of static destructors. */
22746
22747 static void
22748 arm_elf_asm_destructor (rtx symbol, int priority)
22749 {
22750 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22751 }
22752 \f
22753 /* A finite state machine takes care of noticing whether or not instructions
22754 can be conditionally executed, and thus decrease execution time and code
22755 size by deleting branch instructions. The fsm is controlled by
22756 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22757
22758 /* The state of the fsm controlling condition codes are:
22759 0: normal, do nothing special
22760 1: make ASM_OUTPUT_OPCODE not output this instruction
22761 2: make ASM_OUTPUT_OPCODE not output this instruction
22762 3: make instructions conditional
22763 4: make instructions conditional
22764
22765 State transitions (state->state by whom under condition):
22766 0 -> 1 final_prescan_insn if the `target' is a label
22767 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22768 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22769 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22770 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22771 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22772 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22773 (the target insn is arm_target_insn).
22774
22775 If the jump clobbers the conditions then we use states 2 and 4.
22776
22777 A similar thing can be done with conditional return insns.
22778
22779 XXX In case the `target' is an unconditional branch, this conditionalising
22780 of the instructions always reduces code size, but not always execution
22781 time. But then, I want to reduce the code size to somewhere near what
22782 /bin/cc produces. */
22783
22784 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22785 instructions. When a COND_EXEC instruction is seen the subsequent
22786 instructions are scanned so that multiple conditional instructions can be
22787 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22788 specify the length and true/false mask for the IT block. These will be
22789 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22790
22791 /* Returns the index of the ARM condition code string in
22792 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22793 COMPARISON should be an rtx like `(eq (...) (...))'. */
22794
22795 enum arm_cond_code
22796 maybe_get_arm_condition_code (rtx comparison)
22797 {
22798 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22799 enum arm_cond_code code;
22800 enum rtx_code comp_code = GET_CODE (comparison);
22801
22802 if (GET_MODE_CLASS (mode) != MODE_CC)
22803 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22804 XEXP (comparison, 1));
22805
22806 switch (mode)
22807 {
22808 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22809 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22810 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22811 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22812 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22813 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22814 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22815 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22816 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22817 case E_CC_DLTUmode: code = ARM_CC;
22818
22819 dominance:
22820 if (comp_code == EQ)
22821 return ARM_INVERSE_CONDITION_CODE (code);
22822 if (comp_code == NE)
22823 return code;
22824 return ARM_NV;
22825
22826 case E_CC_NOOVmode:
22827 switch (comp_code)
22828 {
22829 case NE: return ARM_NE;
22830 case EQ: return ARM_EQ;
22831 case GE: return ARM_PL;
22832 case LT: return ARM_MI;
22833 default: return ARM_NV;
22834 }
22835
22836 case E_CC_Zmode:
22837 switch (comp_code)
22838 {
22839 case NE: return ARM_NE;
22840 case EQ: return ARM_EQ;
22841 default: return ARM_NV;
22842 }
22843
22844 case E_CC_Nmode:
22845 switch (comp_code)
22846 {
22847 case NE: return ARM_MI;
22848 case EQ: return ARM_PL;
22849 default: return ARM_NV;
22850 }
22851
22852 case E_CCFPEmode:
22853 case E_CCFPmode:
22854 /* We can handle all cases except UNEQ and LTGT. */
22855 switch (comp_code)
22856 {
22857 case GE: return ARM_GE;
22858 case GT: return ARM_GT;
22859 case LE: return ARM_LS;
22860 case LT: return ARM_MI;
22861 case NE: return ARM_NE;
22862 case EQ: return ARM_EQ;
22863 case ORDERED: return ARM_VC;
22864 case UNORDERED: return ARM_VS;
22865 case UNLT: return ARM_LT;
22866 case UNLE: return ARM_LE;
22867 case UNGT: return ARM_HI;
22868 case UNGE: return ARM_PL;
22869 /* UNEQ and LTGT do not have a representation. */
22870 case UNEQ: /* Fall through. */
22871 case LTGT: /* Fall through. */
22872 default: return ARM_NV;
22873 }
22874
22875 case E_CC_SWPmode:
22876 switch (comp_code)
22877 {
22878 case NE: return ARM_NE;
22879 case EQ: return ARM_EQ;
22880 case GE: return ARM_LE;
22881 case GT: return ARM_LT;
22882 case LE: return ARM_GE;
22883 case LT: return ARM_GT;
22884 case GEU: return ARM_LS;
22885 case GTU: return ARM_CC;
22886 case LEU: return ARM_CS;
22887 case LTU: return ARM_HI;
22888 default: return ARM_NV;
22889 }
22890
22891 case E_CC_Cmode:
22892 switch (comp_code)
22893 {
22894 case LTU: return ARM_CS;
22895 case GEU: return ARM_CC;
22896 case NE: return ARM_CS;
22897 case EQ: return ARM_CC;
22898 default: return ARM_NV;
22899 }
22900
22901 case E_CC_CZmode:
22902 switch (comp_code)
22903 {
22904 case NE: return ARM_NE;
22905 case EQ: return ARM_EQ;
22906 case GEU: return ARM_CS;
22907 case GTU: return ARM_HI;
22908 case LEU: return ARM_LS;
22909 case LTU: return ARM_CC;
22910 default: return ARM_NV;
22911 }
22912
22913 case E_CC_NCVmode:
22914 switch (comp_code)
22915 {
22916 case GE: return ARM_GE;
22917 case LT: return ARM_LT;
22918 case GEU: return ARM_CS;
22919 case LTU: return ARM_CC;
22920 default: return ARM_NV;
22921 }
22922
22923 case E_CC_Vmode:
22924 switch (comp_code)
22925 {
22926 case NE: return ARM_VS;
22927 case EQ: return ARM_VC;
22928 default: return ARM_NV;
22929 }
22930
22931 case E_CCmode:
22932 switch (comp_code)
22933 {
22934 case NE: return ARM_NE;
22935 case EQ: return ARM_EQ;
22936 case GE: return ARM_GE;
22937 case GT: return ARM_GT;
22938 case LE: return ARM_LE;
22939 case LT: return ARM_LT;
22940 case GEU: return ARM_CS;
22941 case GTU: return ARM_HI;
22942 case LEU: return ARM_LS;
22943 case LTU: return ARM_CC;
22944 default: return ARM_NV;
22945 }
22946
22947 default: gcc_unreachable ();
22948 }
22949 }
22950
22951 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22952 static enum arm_cond_code
22953 get_arm_condition_code (rtx comparison)
22954 {
22955 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22956 gcc_assert (code != ARM_NV);
22957 return code;
22958 }
22959
22960 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22961 code registers when not targetting Thumb1. The VFP condition register
22962 only exists when generating hard-float code. */
22963 static bool
22964 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22965 {
22966 if (!TARGET_32BIT)
22967 return false;
22968
22969 *p1 = CC_REGNUM;
22970 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22971 return true;
22972 }
22973
22974 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22975 instructions. */
22976 void
22977 thumb2_final_prescan_insn (rtx_insn *insn)
22978 {
22979 rtx_insn *first_insn = insn;
22980 rtx body = PATTERN (insn);
22981 rtx predicate;
22982 enum arm_cond_code code;
22983 int n;
22984 int mask;
22985 int max;
22986
22987 /* max_insns_skipped in the tune was already taken into account in the
22988 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22989 just emit the IT blocks as we can. It does not make sense to split
22990 the IT blocks. */
22991 max = MAX_INSN_PER_IT_BLOCK;
22992
22993 /* Remove the previous insn from the count of insns to be output. */
22994 if (arm_condexec_count)
22995 arm_condexec_count--;
22996
22997 /* Nothing to do if we are already inside a conditional block. */
22998 if (arm_condexec_count)
22999 return;
23000
23001 if (GET_CODE (body) != COND_EXEC)
23002 return;
23003
23004 /* Conditional jumps are implemented directly. */
23005 if (JUMP_P (insn))
23006 return;
23007
23008 predicate = COND_EXEC_TEST (body);
23009 arm_current_cc = get_arm_condition_code (predicate);
23010
23011 n = get_attr_ce_count (insn);
23012 arm_condexec_count = 1;
23013 arm_condexec_mask = (1 << n) - 1;
23014 arm_condexec_masklen = n;
23015 /* See if subsequent instructions can be combined into the same block. */
23016 for (;;)
23017 {
23018 insn = next_nonnote_insn (insn);
23019
23020 /* Jumping into the middle of an IT block is illegal, so a label or
23021 barrier terminates the block. */
23022 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23023 break;
23024
23025 body = PATTERN (insn);
23026 /* USE and CLOBBER aren't really insns, so just skip them. */
23027 if (GET_CODE (body) == USE
23028 || GET_CODE (body) == CLOBBER)
23029 continue;
23030
23031 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23032 if (GET_CODE (body) != COND_EXEC)
23033 break;
23034 /* Maximum number of conditionally executed instructions in a block. */
23035 n = get_attr_ce_count (insn);
23036 if (arm_condexec_masklen + n > max)
23037 break;
23038
23039 predicate = COND_EXEC_TEST (body);
23040 code = get_arm_condition_code (predicate);
23041 mask = (1 << n) - 1;
23042 if (arm_current_cc == code)
23043 arm_condexec_mask |= (mask << arm_condexec_masklen);
23044 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23045 break;
23046
23047 arm_condexec_count++;
23048 arm_condexec_masklen += n;
23049
23050 /* A jump must be the last instruction in a conditional block. */
23051 if (JUMP_P (insn))
23052 break;
23053 }
23054 /* Restore recog_data (getting the attributes of other insns can
23055 destroy this array, but final.c assumes that it remains intact
23056 across this call). */
23057 extract_constrain_insn_cached (first_insn);
23058 }
23059
23060 void
23061 arm_final_prescan_insn (rtx_insn *insn)
23062 {
23063 /* BODY will hold the body of INSN. */
23064 rtx body = PATTERN (insn);
23065
23066 /* This will be 1 if trying to repeat the trick, and things need to be
23067 reversed if it appears to fail. */
23068 int reverse = 0;
23069
23070 /* If we start with a return insn, we only succeed if we find another one. */
23071 int seeking_return = 0;
23072 enum rtx_code return_code = UNKNOWN;
23073
23074 /* START_INSN will hold the insn from where we start looking. This is the
23075 first insn after the following code_label if REVERSE is true. */
23076 rtx_insn *start_insn = insn;
23077
23078 /* If in state 4, check if the target branch is reached, in order to
23079 change back to state 0. */
23080 if (arm_ccfsm_state == 4)
23081 {
23082 if (insn == arm_target_insn)
23083 {
23084 arm_target_insn = NULL;
23085 arm_ccfsm_state = 0;
23086 }
23087 return;
23088 }
23089
23090 /* If in state 3, it is possible to repeat the trick, if this insn is an
23091 unconditional branch to a label, and immediately following this branch
23092 is the previous target label which is only used once, and the label this
23093 branch jumps to is not too far off. */
23094 if (arm_ccfsm_state == 3)
23095 {
23096 if (simplejump_p (insn))
23097 {
23098 start_insn = next_nonnote_insn (start_insn);
23099 if (BARRIER_P (start_insn))
23100 {
23101 /* XXX Isn't this always a barrier? */
23102 start_insn = next_nonnote_insn (start_insn);
23103 }
23104 if (LABEL_P (start_insn)
23105 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23106 && LABEL_NUSES (start_insn) == 1)
23107 reverse = TRUE;
23108 else
23109 return;
23110 }
23111 else if (ANY_RETURN_P (body))
23112 {
23113 start_insn = next_nonnote_insn (start_insn);
23114 if (BARRIER_P (start_insn))
23115 start_insn = next_nonnote_insn (start_insn);
23116 if (LABEL_P (start_insn)
23117 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23118 && LABEL_NUSES (start_insn) == 1)
23119 {
23120 reverse = TRUE;
23121 seeking_return = 1;
23122 return_code = GET_CODE (body);
23123 }
23124 else
23125 return;
23126 }
23127 else
23128 return;
23129 }
23130
23131 gcc_assert (!arm_ccfsm_state || reverse);
23132 if (!JUMP_P (insn))
23133 return;
23134
23135 /* This jump might be paralleled with a clobber of the condition codes
23136 the jump should always come first */
23137 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23138 body = XVECEXP (body, 0, 0);
23139
23140 if (reverse
23141 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23142 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23143 {
23144 int insns_skipped;
23145 int fail = FALSE, succeed = FALSE;
23146 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23147 int then_not_else = TRUE;
23148 rtx_insn *this_insn = start_insn;
23149 rtx label = 0;
23150
23151 /* Register the insn jumped to. */
23152 if (reverse)
23153 {
23154 if (!seeking_return)
23155 label = XEXP (SET_SRC (body), 0);
23156 }
23157 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23158 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23159 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23160 {
23161 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23162 then_not_else = FALSE;
23163 }
23164 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23165 {
23166 seeking_return = 1;
23167 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23168 }
23169 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23170 {
23171 seeking_return = 1;
23172 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23173 then_not_else = FALSE;
23174 }
23175 else
23176 gcc_unreachable ();
23177
23178 /* See how many insns this branch skips, and what kind of insns. If all
23179 insns are okay, and the label or unconditional branch to the same
23180 label is not too far away, succeed. */
23181 for (insns_skipped = 0;
23182 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23183 {
23184 rtx scanbody;
23185
23186 this_insn = next_nonnote_insn (this_insn);
23187 if (!this_insn)
23188 break;
23189
23190 switch (GET_CODE (this_insn))
23191 {
23192 case CODE_LABEL:
23193 /* Succeed if it is the target label, otherwise fail since
23194 control falls in from somewhere else. */
23195 if (this_insn == label)
23196 {
23197 arm_ccfsm_state = 1;
23198 succeed = TRUE;
23199 }
23200 else
23201 fail = TRUE;
23202 break;
23203
23204 case BARRIER:
23205 /* Succeed if the following insn is the target label.
23206 Otherwise fail.
23207 If return insns are used then the last insn in a function
23208 will be a barrier. */
23209 this_insn = next_nonnote_insn (this_insn);
23210 if (this_insn && this_insn == label)
23211 {
23212 arm_ccfsm_state = 1;
23213 succeed = TRUE;
23214 }
23215 else
23216 fail = TRUE;
23217 break;
23218
23219 case CALL_INSN:
23220 /* The AAPCS says that conditional calls should not be
23221 used since they make interworking inefficient (the
23222 linker can't transform BL<cond> into BLX). That's
23223 only a problem if the machine has BLX. */
23224 if (arm_arch5)
23225 {
23226 fail = TRUE;
23227 break;
23228 }
23229
23230 /* Succeed if the following insn is the target label, or
23231 if the following two insns are a barrier and the
23232 target label. */
23233 this_insn = next_nonnote_insn (this_insn);
23234 if (this_insn && BARRIER_P (this_insn))
23235 this_insn = next_nonnote_insn (this_insn);
23236
23237 if (this_insn && this_insn == label
23238 && insns_skipped < max_insns_skipped)
23239 {
23240 arm_ccfsm_state = 1;
23241 succeed = TRUE;
23242 }
23243 else
23244 fail = TRUE;
23245 break;
23246
23247 case JUMP_INSN:
23248 /* If this is an unconditional branch to the same label, succeed.
23249 If it is to another label, do nothing. If it is conditional,
23250 fail. */
23251 /* XXX Probably, the tests for SET and the PC are
23252 unnecessary. */
23253
23254 scanbody = PATTERN (this_insn);
23255 if (GET_CODE (scanbody) == SET
23256 && GET_CODE (SET_DEST (scanbody)) == PC)
23257 {
23258 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23259 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23260 {
23261 arm_ccfsm_state = 2;
23262 succeed = TRUE;
23263 }
23264 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23265 fail = TRUE;
23266 }
23267 /* Fail if a conditional return is undesirable (e.g. on a
23268 StrongARM), but still allow this if optimizing for size. */
23269 else if (GET_CODE (scanbody) == return_code
23270 && !use_return_insn (TRUE, NULL)
23271 && !optimize_size)
23272 fail = TRUE;
23273 else if (GET_CODE (scanbody) == return_code)
23274 {
23275 arm_ccfsm_state = 2;
23276 succeed = TRUE;
23277 }
23278 else if (GET_CODE (scanbody) == PARALLEL)
23279 {
23280 switch (get_attr_conds (this_insn))
23281 {
23282 case CONDS_NOCOND:
23283 break;
23284 default:
23285 fail = TRUE;
23286 break;
23287 }
23288 }
23289 else
23290 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23291
23292 break;
23293
23294 case INSN:
23295 /* Instructions using or affecting the condition codes make it
23296 fail. */
23297 scanbody = PATTERN (this_insn);
23298 if (!(GET_CODE (scanbody) == SET
23299 || GET_CODE (scanbody) == PARALLEL)
23300 || get_attr_conds (this_insn) != CONDS_NOCOND)
23301 fail = TRUE;
23302 break;
23303
23304 default:
23305 break;
23306 }
23307 }
23308 if (succeed)
23309 {
23310 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23311 arm_target_label = CODE_LABEL_NUMBER (label);
23312 else
23313 {
23314 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23315
23316 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23317 {
23318 this_insn = next_nonnote_insn (this_insn);
23319 gcc_assert (!this_insn
23320 || (!BARRIER_P (this_insn)
23321 && !LABEL_P (this_insn)));
23322 }
23323 if (!this_insn)
23324 {
23325 /* Oh, dear! we ran off the end.. give up. */
23326 extract_constrain_insn_cached (insn);
23327 arm_ccfsm_state = 0;
23328 arm_target_insn = NULL;
23329 return;
23330 }
23331 arm_target_insn = this_insn;
23332 }
23333
23334 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23335 what it was. */
23336 if (!reverse)
23337 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23338
23339 if (reverse || then_not_else)
23340 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23341 }
23342
23343 /* Restore recog_data (getting the attributes of other insns can
23344 destroy this array, but final.c assumes that it remains intact
23345 across this call. */
23346 extract_constrain_insn_cached (insn);
23347 }
23348 }
23349
23350 /* Output IT instructions. */
23351 void
23352 thumb2_asm_output_opcode (FILE * stream)
23353 {
23354 char buff[5];
23355 int n;
23356
23357 if (arm_condexec_mask)
23358 {
23359 for (n = 0; n < arm_condexec_masklen; n++)
23360 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23361 buff[n] = 0;
23362 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23363 arm_condition_codes[arm_current_cc]);
23364 arm_condexec_mask = 0;
23365 }
23366 }
23367
23368 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23369 UNITS_PER_WORD bytes wide. */
23370 static unsigned int
23371 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23372 {
23373 if (TARGET_32BIT
23374 && regno > PC_REGNUM
23375 && regno != FRAME_POINTER_REGNUM
23376 && regno != ARG_POINTER_REGNUM
23377 && !IS_VFP_REGNUM (regno))
23378 return 1;
23379
23380 return ARM_NUM_REGS (mode);
23381 }
23382
23383 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23384 static bool
23385 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23386 {
23387 if (GET_MODE_CLASS (mode) == MODE_CC)
23388 return (regno == CC_REGNUM
23389 || (TARGET_HARD_FLOAT
23390 && regno == VFPCC_REGNUM));
23391
23392 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23393 return false;
23394
23395 if (TARGET_THUMB1)
23396 /* For the Thumb we only allow values bigger than SImode in
23397 registers 0 - 6, so that there is always a second low
23398 register available to hold the upper part of the value.
23399 We probably we ought to ensure that the register is the
23400 start of an even numbered register pair. */
23401 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23402
23403 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23404 {
23405 if (mode == SFmode || mode == SImode)
23406 return VFP_REGNO_OK_FOR_SINGLE (regno);
23407
23408 if (mode == DFmode)
23409 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23410
23411 if (mode == HFmode)
23412 return VFP_REGNO_OK_FOR_SINGLE (regno);
23413
23414 /* VFP registers can hold HImode values. */
23415 if (mode == HImode)
23416 return VFP_REGNO_OK_FOR_SINGLE (regno);
23417
23418 if (TARGET_NEON)
23419 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23420 || (VALID_NEON_QREG_MODE (mode)
23421 && NEON_REGNO_OK_FOR_QUAD (regno))
23422 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23423 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23424 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23425 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23426 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23427
23428 return false;
23429 }
23430
23431 if (TARGET_REALLY_IWMMXT)
23432 {
23433 if (IS_IWMMXT_GR_REGNUM (regno))
23434 return mode == SImode;
23435
23436 if (IS_IWMMXT_REGNUM (regno))
23437 return VALID_IWMMXT_REG_MODE (mode);
23438 }
23439
23440 /* We allow almost any value to be stored in the general registers.
23441 Restrict doubleword quantities to even register pairs in ARM state
23442 so that we can use ldrd. Do not allow very large Neon structure
23443 opaque modes in general registers; they would use too many. */
23444 if (regno <= LAST_ARM_REGNUM)
23445 {
23446 if (ARM_NUM_REGS (mode) > 4)
23447 return false;
23448
23449 if (TARGET_THUMB2)
23450 return true;
23451
23452 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23453 }
23454
23455 if (regno == FRAME_POINTER_REGNUM
23456 || regno == ARG_POINTER_REGNUM)
23457 /* We only allow integers in the fake hard registers. */
23458 return GET_MODE_CLASS (mode) == MODE_INT;
23459
23460 return false;
23461 }
23462
23463 /* Implement TARGET_MODES_TIEABLE_P. */
23464
23465 static bool
23466 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23467 {
23468 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23469 return true;
23470
23471 /* We specifically want to allow elements of "structure" modes to
23472 be tieable to the structure. This more general condition allows
23473 other rarer situations too. */
23474 if (TARGET_NEON
23475 && (VALID_NEON_DREG_MODE (mode1)
23476 || VALID_NEON_QREG_MODE (mode1)
23477 || VALID_NEON_STRUCT_MODE (mode1))
23478 && (VALID_NEON_DREG_MODE (mode2)
23479 || VALID_NEON_QREG_MODE (mode2)
23480 || VALID_NEON_STRUCT_MODE (mode2)))
23481 return true;
23482
23483 return false;
23484 }
23485
23486 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23487 not used in arm mode. */
23488
23489 enum reg_class
23490 arm_regno_class (int regno)
23491 {
23492 if (regno == PC_REGNUM)
23493 return NO_REGS;
23494
23495 if (TARGET_THUMB1)
23496 {
23497 if (regno == STACK_POINTER_REGNUM)
23498 return STACK_REG;
23499 if (regno == CC_REGNUM)
23500 return CC_REG;
23501 if (regno < 8)
23502 return LO_REGS;
23503 return HI_REGS;
23504 }
23505
23506 if (TARGET_THUMB2 && regno < 8)
23507 return LO_REGS;
23508
23509 if ( regno <= LAST_ARM_REGNUM
23510 || regno == FRAME_POINTER_REGNUM
23511 || regno == ARG_POINTER_REGNUM)
23512 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23513
23514 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23515 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23516
23517 if (IS_VFP_REGNUM (regno))
23518 {
23519 if (regno <= D7_VFP_REGNUM)
23520 return VFP_D0_D7_REGS;
23521 else if (regno <= LAST_LO_VFP_REGNUM)
23522 return VFP_LO_REGS;
23523 else
23524 return VFP_HI_REGS;
23525 }
23526
23527 if (IS_IWMMXT_REGNUM (regno))
23528 return IWMMXT_REGS;
23529
23530 if (IS_IWMMXT_GR_REGNUM (regno))
23531 return IWMMXT_GR_REGS;
23532
23533 return NO_REGS;
23534 }
23535
23536 /* Handle a special case when computing the offset
23537 of an argument from the frame pointer. */
23538 int
23539 arm_debugger_arg_offset (int value, rtx addr)
23540 {
23541 rtx_insn *insn;
23542
23543 /* We are only interested if dbxout_parms() failed to compute the offset. */
23544 if (value != 0)
23545 return 0;
23546
23547 /* We can only cope with the case where the address is held in a register. */
23548 if (!REG_P (addr))
23549 return 0;
23550
23551 /* If we are using the frame pointer to point at the argument, then
23552 an offset of 0 is correct. */
23553 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23554 return 0;
23555
23556 /* If we are using the stack pointer to point at the
23557 argument, then an offset of 0 is correct. */
23558 /* ??? Check this is consistent with thumb2 frame layout. */
23559 if ((TARGET_THUMB || !frame_pointer_needed)
23560 && REGNO (addr) == SP_REGNUM)
23561 return 0;
23562
23563 /* Oh dear. The argument is pointed to by a register rather
23564 than being held in a register, or being stored at a known
23565 offset from the frame pointer. Since GDB only understands
23566 those two kinds of argument we must translate the address
23567 held in the register into an offset from the frame pointer.
23568 We do this by searching through the insns for the function
23569 looking to see where this register gets its value. If the
23570 register is initialized from the frame pointer plus an offset
23571 then we are in luck and we can continue, otherwise we give up.
23572
23573 This code is exercised by producing debugging information
23574 for a function with arguments like this:
23575
23576 double func (double a, double b, int c, double d) {return d;}
23577
23578 Without this code the stab for parameter 'd' will be set to
23579 an offset of 0 from the frame pointer, rather than 8. */
23580
23581 /* The if() statement says:
23582
23583 If the insn is a normal instruction
23584 and if the insn is setting the value in a register
23585 and if the register being set is the register holding the address of the argument
23586 and if the address is computing by an addition
23587 that involves adding to a register
23588 which is the frame pointer
23589 a constant integer
23590
23591 then... */
23592
23593 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23594 {
23595 if ( NONJUMP_INSN_P (insn)
23596 && GET_CODE (PATTERN (insn)) == SET
23597 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23598 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23599 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23600 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23601 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23602 )
23603 {
23604 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23605
23606 break;
23607 }
23608 }
23609
23610 if (value == 0)
23611 {
23612 debug_rtx (addr);
23613 warning (0, "unable to compute real location of stacked parameter");
23614 value = 8; /* XXX magic hack */
23615 }
23616
23617 return value;
23618 }
23619 \f
23620 /* Implement TARGET_PROMOTED_TYPE. */
23621
23622 static tree
23623 arm_promoted_type (const_tree t)
23624 {
23625 if (SCALAR_FLOAT_TYPE_P (t)
23626 && TYPE_PRECISION (t) == 16
23627 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23628 return float_type_node;
23629 return NULL_TREE;
23630 }
23631
23632 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23633 This simply adds HFmode as a supported mode; even though we don't
23634 implement arithmetic on this type directly, it's supported by
23635 optabs conversions, much the way the double-word arithmetic is
23636 special-cased in the default hook. */
23637
23638 static bool
23639 arm_scalar_mode_supported_p (scalar_mode mode)
23640 {
23641 if (mode == HFmode)
23642 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23643 else if (ALL_FIXED_POINT_MODE_P (mode))
23644 return true;
23645 else
23646 return default_scalar_mode_supported_p (mode);
23647 }
23648
23649 /* Set the value of FLT_EVAL_METHOD.
23650 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23651
23652 0: evaluate all operations and constants, whose semantic type has at
23653 most the range and precision of type float, to the range and
23654 precision of float; evaluate all other operations and constants to
23655 the range and precision of the semantic type;
23656
23657 N, where _FloatN is a supported interchange floating type
23658 evaluate all operations and constants, whose semantic type has at
23659 most the range and precision of _FloatN type, to the range and
23660 precision of the _FloatN type; evaluate all other operations and
23661 constants to the range and precision of the semantic type;
23662
23663 If we have the ARMv8.2-A extensions then we support _Float16 in native
23664 precision, so we should set this to 16. Otherwise, we support the type,
23665 but want to evaluate expressions in float precision, so set this to
23666 0. */
23667
23668 static enum flt_eval_method
23669 arm_excess_precision (enum excess_precision_type type)
23670 {
23671 switch (type)
23672 {
23673 case EXCESS_PRECISION_TYPE_FAST:
23674 case EXCESS_PRECISION_TYPE_STANDARD:
23675 /* We can calculate either in 16-bit range and precision or
23676 32-bit range and precision. Make that decision based on whether
23677 we have native support for the ARMv8.2-A 16-bit floating-point
23678 instructions or not. */
23679 return (TARGET_VFP_FP16INST
23680 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23681 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23682 case EXCESS_PRECISION_TYPE_IMPLICIT:
23683 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23684 default:
23685 gcc_unreachable ();
23686 }
23687 return FLT_EVAL_METHOD_UNPREDICTABLE;
23688 }
23689
23690
23691 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23692 _Float16 if we are using anything other than ieee format for 16-bit
23693 floating point. Otherwise, punt to the default implementation. */
23694 static opt_scalar_float_mode
23695 arm_floatn_mode (int n, bool extended)
23696 {
23697 if (!extended && n == 16)
23698 {
23699 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23700 return HFmode;
23701 return opt_scalar_float_mode ();
23702 }
23703
23704 return default_floatn_mode (n, extended);
23705 }
23706
23707
23708 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23709 not to early-clobber SRC registers in the process.
23710
23711 We assume that the operands described by SRC and DEST represent a
23712 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23713 number of components into which the copy has been decomposed. */
23714 void
23715 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23716 {
23717 unsigned int i;
23718
23719 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23720 || REGNO (operands[0]) < REGNO (operands[1]))
23721 {
23722 for (i = 0; i < count; i++)
23723 {
23724 operands[2 * i] = dest[i];
23725 operands[2 * i + 1] = src[i];
23726 }
23727 }
23728 else
23729 {
23730 for (i = 0; i < count; i++)
23731 {
23732 operands[2 * i] = dest[count - i - 1];
23733 operands[2 * i + 1] = src[count - i - 1];
23734 }
23735 }
23736 }
23737
23738 /* Split operands into moves from op[1] + op[2] into op[0]. */
23739
23740 void
23741 neon_split_vcombine (rtx operands[3])
23742 {
23743 unsigned int dest = REGNO (operands[0]);
23744 unsigned int src1 = REGNO (operands[1]);
23745 unsigned int src2 = REGNO (operands[2]);
23746 machine_mode halfmode = GET_MODE (operands[1]);
23747 unsigned int halfregs = REG_NREGS (operands[1]);
23748 rtx destlo, desthi;
23749
23750 if (src1 == dest && src2 == dest + halfregs)
23751 {
23752 /* No-op move. Can't split to nothing; emit something. */
23753 emit_note (NOTE_INSN_DELETED);
23754 return;
23755 }
23756
23757 /* Preserve register attributes for variable tracking. */
23758 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23759 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23760 GET_MODE_SIZE (halfmode));
23761
23762 /* Special case of reversed high/low parts. Use VSWP. */
23763 if (src2 == dest && src1 == dest + halfregs)
23764 {
23765 rtx x = gen_rtx_SET (destlo, operands[1]);
23766 rtx y = gen_rtx_SET (desthi, operands[2]);
23767 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23768 return;
23769 }
23770
23771 if (!reg_overlap_mentioned_p (operands[2], destlo))
23772 {
23773 /* Try to avoid unnecessary moves if part of the result
23774 is in the right place already. */
23775 if (src1 != dest)
23776 emit_move_insn (destlo, operands[1]);
23777 if (src2 != dest + halfregs)
23778 emit_move_insn (desthi, operands[2]);
23779 }
23780 else
23781 {
23782 if (src2 != dest + halfregs)
23783 emit_move_insn (desthi, operands[2]);
23784 if (src1 != dest)
23785 emit_move_insn (destlo, operands[1]);
23786 }
23787 }
23788 \f
23789 /* Return the number (counting from 0) of
23790 the least significant set bit in MASK. */
23791
23792 inline static int
23793 number_of_first_bit_set (unsigned mask)
23794 {
23795 return ctz_hwi (mask);
23796 }
23797
23798 /* Like emit_multi_reg_push, but allowing for a different set of
23799 registers to be described as saved. MASK is the set of registers
23800 to be saved; REAL_REGS is the set of registers to be described as
23801 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23802
23803 static rtx_insn *
23804 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23805 {
23806 unsigned long regno;
23807 rtx par[10], tmp, reg;
23808 rtx_insn *insn;
23809 int i, j;
23810
23811 /* Build the parallel of the registers actually being stored. */
23812 for (i = 0; mask; ++i, mask &= mask - 1)
23813 {
23814 regno = ctz_hwi (mask);
23815 reg = gen_rtx_REG (SImode, regno);
23816
23817 if (i == 0)
23818 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23819 else
23820 tmp = gen_rtx_USE (VOIDmode, reg);
23821
23822 par[i] = tmp;
23823 }
23824
23825 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23826 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23827 tmp = gen_frame_mem (BLKmode, tmp);
23828 tmp = gen_rtx_SET (tmp, par[0]);
23829 par[0] = tmp;
23830
23831 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23832 insn = emit_insn (tmp);
23833
23834 /* Always build the stack adjustment note for unwind info. */
23835 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23836 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23837 par[0] = tmp;
23838
23839 /* Build the parallel of the registers recorded as saved for unwind. */
23840 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23841 {
23842 regno = ctz_hwi (real_regs);
23843 reg = gen_rtx_REG (SImode, regno);
23844
23845 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23846 tmp = gen_frame_mem (SImode, tmp);
23847 tmp = gen_rtx_SET (tmp, reg);
23848 RTX_FRAME_RELATED_P (tmp) = 1;
23849 par[j + 1] = tmp;
23850 }
23851
23852 if (j == 0)
23853 tmp = par[0];
23854 else
23855 {
23856 RTX_FRAME_RELATED_P (par[0]) = 1;
23857 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23858 }
23859
23860 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23861
23862 return insn;
23863 }
23864
23865 /* Emit code to push or pop registers to or from the stack. F is the
23866 assembly file. MASK is the registers to pop. */
23867 static void
23868 thumb_pop (FILE *f, unsigned long mask)
23869 {
23870 int regno;
23871 int lo_mask = mask & 0xFF;
23872
23873 gcc_assert (mask);
23874
23875 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23876 {
23877 /* Special case. Do not generate a POP PC statement here, do it in
23878 thumb_exit() */
23879 thumb_exit (f, -1);
23880 return;
23881 }
23882
23883 fprintf (f, "\tpop\t{");
23884
23885 /* Look at the low registers first. */
23886 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23887 {
23888 if (lo_mask & 1)
23889 {
23890 asm_fprintf (f, "%r", regno);
23891
23892 if ((lo_mask & ~1) != 0)
23893 fprintf (f, ", ");
23894 }
23895 }
23896
23897 if (mask & (1 << PC_REGNUM))
23898 {
23899 /* Catch popping the PC. */
23900 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23901 || IS_CMSE_ENTRY (arm_current_func_type ()))
23902 {
23903 /* The PC is never poped directly, instead
23904 it is popped into r3 and then BX is used. */
23905 fprintf (f, "}\n");
23906
23907 thumb_exit (f, -1);
23908
23909 return;
23910 }
23911 else
23912 {
23913 if (mask & 0xFF)
23914 fprintf (f, ", ");
23915
23916 asm_fprintf (f, "%r", PC_REGNUM);
23917 }
23918 }
23919
23920 fprintf (f, "}\n");
23921 }
23922
23923 /* Generate code to return from a thumb function.
23924 If 'reg_containing_return_addr' is -1, then the return address is
23925 actually on the stack, at the stack pointer. */
23926 static void
23927 thumb_exit (FILE *f, int reg_containing_return_addr)
23928 {
23929 unsigned regs_available_for_popping;
23930 unsigned regs_to_pop;
23931 int pops_needed;
23932 unsigned available;
23933 unsigned required;
23934 machine_mode mode;
23935 int size;
23936 int restore_a4 = FALSE;
23937
23938 /* Compute the registers we need to pop. */
23939 regs_to_pop = 0;
23940 pops_needed = 0;
23941
23942 if (reg_containing_return_addr == -1)
23943 {
23944 regs_to_pop |= 1 << LR_REGNUM;
23945 ++pops_needed;
23946 }
23947
23948 if (TARGET_BACKTRACE)
23949 {
23950 /* Restore the (ARM) frame pointer and stack pointer. */
23951 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23952 pops_needed += 2;
23953 }
23954
23955 /* If there is nothing to pop then just emit the BX instruction and
23956 return. */
23957 if (pops_needed == 0)
23958 {
23959 if (crtl->calls_eh_return)
23960 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23961
23962 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23963 {
23964 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23965 reg_containing_return_addr);
23966 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23967 }
23968 else
23969 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23970 return;
23971 }
23972 /* Otherwise if we are not supporting interworking and we have not created
23973 a backtrace structure and the function was not entered in ARM mode then
23974 just pop the return address straight into the PC. */
23975 else if (!TARGET_INTERWORK
23976 && !TARGET_BACKTRACE
23977 && !is_called_in_ARM_mode (current_function_decl)
23978 && !crtl->calls_eh_return
23979 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23980 {
23981 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23982 return;
23983 }
23984
23985 /* Find out how many of the (return) argument registers we can corrupt. */
23986 regs_available_for_popping = 0;
23987
23988 /* If returning via __builtin_eh_return, the bottom three registers
23989 all contain information needed for the return. */
23990 if (crtl->calls_eh_return)
23991 size = 12;
23992 else
23993 {
23994 /* If we can deduce the registers used from the function's
23995 return value. This is more reliable that examining
23996 df_regs_ever_live_p () because that will be set if the register is
23997 ever used in the function, not just if the register is used
23998 to hold a return value. */
23999
24000 if (crtl->return_rtx != 0)
24001 mode = GET_MODE (crtl->return_rtx);
24002 else
24003 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24004
24005 size = GET_MODE_SIZE (mode);
24006
24007 if (size == 0)
24008 {
24009 /* In a void function we can use any argument register.
24010 In a function that returns a structure on the stack
24011 we can use the second and third argument registers. */
24012 if (mode == VOIDmode)
24013 regs_available_for_popping =
24014 (1 << ARG_REGISTER (1))
24015 | (1 << ARG_REGISTER (2))
24016 | (1 << ARG_REGISTER (3));
24017 else
24018 regs_available_for_popping =
24019 (1 << ARG_REGISTER (2))
24020 | (1 << ARG_REGISTER (3));
24021 }
24022 else if (size <= 4)
24023 regs_available_for_popping =
24024 (1 << ARG_REGISTER (2))
24025 | (1 << ARG_REGISTER (3));
24026 else if (size <= 8)
24027 regs_available_for_popping =
24028 (1 << ARG_REGISTER (3));
24029 }
24030
24031 /* Match registers to be popped with registers into which we pop them. */
24032 for (available = regs_available_for_popping,
24033 required = regs_to_pop;
24034 required != 0 && available != 0;
24035 available &= ~(available & - available),
24036 required &= ~(required & - required))
24037 -- pops_needed;
24038
24039 /* If we have any popping registers left over, remove them. */
24040 if (available > 0)
24041 regs_available_for_popping &= ~available;
24042
24043 /* Otherwise if we need another popping register we can use
24044 the fourth argument register. */
24045 else if (pops_needed)
24046 {
24047 /* If we have not found any free argument registers and
24048 reg a4 contains the return address, we must move it. */
24049 if (regs_available_for_popping == 0
24050 && reg_containing_return_addr == LAST_ARG_REGNUM)
24051 {
24052 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24053 reg_containing_return_addr = LR_REGNUM;
24054 }
24055 else if (size > 12)
24056 {
24057 /* Register a4 is being used to hold part of the return value,
24058 but we have dire need of a free, low register. */
24059 restore_a4 = TRUE;
24060
24061 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24062 }
24063
24064 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24065 {
24066 /* The fourth argument register is available. */
24067 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24068
24069 --pops_needed;
24070 }
24071 }
24072
24073 /* Pop as many registers as we can. */
24074 thumb_pop (f, regs_available_for_popping);
24075
24076 /* Process the registers we popped. */
24077 if (reg_containing_return_addr == -1)
24078 {
24079 /* The return address was popped into the lowest numbered register. */
24080 regs_to_pop &= ~(1 << LR_REGNUM);
24081
24082 reg_containing_return_addr =
24083 number_of_first_bit_set (regs_available_for_popping);
24084
24085 /* Remove this register for the mask of available registers, so that
24086 the return address will not be corrupted by further pops. */
24087 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24088 }
24089
24090 /* If we popped other registers then handle them here. */
24091 if (regs_available_for_popping)
24092 {
24093 int frame_pointer;
24094
24095 /* Work out which register currently contains the frame pointer. */
24096 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24097
24098 /* Move it into the correct place. */
24099 asm_fprintf (f, "\tmov\t%r, %r\n",
24100 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24101
24102 /* (Temporarily) remove it from the mask of popped registers. */
24103 regs_available_for_popping &= ~(1 << frame_pointer);
24104 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24105
24106 if (regs_available_for_popping)
24107 {
24108 int stack_pointer;
24109
24110 /* We popped the stack pointer as well,
24111 find the register that contains it. */
24112 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24113
24114 /* Move it into the stack register. */
24115 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24116
24117 /* At this point we have popped all necessary registers, so
24118 do not worry about restoring regs_available_for_popping
24119 to its correct value:
24120
24121 assert (pops_needed == 0)
24122 assert (regs_available_for_popping == (1 << frame_pointer))
24123 assert (regs_to_pop == (1 << STACK_POINTER)) */
24124 }
24125 else
24126 {
24127 /* Since we have just move the popped value into the frame
24128 pointer, the popping register is available for reuse, and
24129 we know that we still have the stack pointer left to pop. */
24130 regs_available_for_popping |= (1 << frame_pointer);
24131 }
24132 }
24133
24134 /* If we still have registers left on the stack, but we no longer have
24135 any registers into which we can pop them, then we must move the return
24136 address into the link register and make available the register that
24137 contained it. */
24138 if (regs_available_for_popping == 0 && pops_needed > 0)
24139 {
24140 regs_available_for_popping |= 1 << reg_containing_return_addr;
24141
24142 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24143 reg_containing_return_addr);
24144
24145 reg_containing_return_addr = LR_REGNUM;
24146 }
24147
24148 /* If we have registers left on the stack then pop some more.
24149 We know that at most we will want to pop FP and SP. */
24150 if (pops_needed > 0)
24151 {
24152 int popped_into;
24153 int move_to;
24154
24155 thumb_pop (f, regs_available_for_popping);
24156
24157 /* We have popped either FP or SP.
24158 Move whichever one it is into the correct register. */
24159 popped_into = number_of_first_bit_set (regs_available_for_popping);
24160 move_to = number_of_first_bit_set (regs_to_pop);
24161
24162 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24163 --pops_needed;
24164 }
24165
24166 /* If we still have not popped everything then we must have only
24167 had one register available to us and we are now popping the SP. */
24168 if (pops_needed > 0)
24169 {
24170 int popped_into;
24171
24172 thumb_pop (f, regs_available_for_popping);
24173
24174 popped_into = number_of_first_bit_set (regs_available_for_popping);
24175
24176 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24177 /*
24178 assert (regs_to_pop == (1 << STACK_POINTER))
24179 assert (pops_needed == 1)
24180 */
24181 }
24182
24183 /* If necessary restore the a4 register. */
24184 if (restore_a4)
24185 {
24186 if (reg_containing_return_addr != LR_REGNUM)
24187 {
24188 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24189 reg_containing_return_addr = LR_REGNUM;
24190 }
24191
24192 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24193 }
24194
24195 if (crtl->calls_eh_return)
24196 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24197
24198 /* Return to caller. */
24199 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24200 {
24201 /* This is for the cases where LR is not being used to contain the return
24202 address. It may therefore contain information that we might not want
24203 to leak, hence it must be cleared. The value in R0 will never be a
24204 secret at this point, so it is safe to use it, see the clearing code
24205 in 'cmse_nonsecure_entry_clear_before_return'. */
24206 if (reg_containing_return_addr != LR_REGNUM)
24207 asm_fprintf (f, "\tmov\tlr, r0\n");
24208
24209 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24210 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24211 }
24212 else
24213 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24214 }
24215 \f
24216 /* Scan INSN just before assembler is output for it.
24217 For Thumb-1, we track the status of the condition codes; this
24218 information is used in the cbranchsi4_insn pattern. */
24219 void
24220 thumb1_final_prescan_insn (rtx_insn *insn)
24221 {
24222 if (flag_print_asm_name)
24223 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24224 INSN_ADDRESSES (INSN_UID (insn)));
24225 /* Don't overwrite the previous setter when we get to a cbranch. */
24226 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24227 {
24228 enum attr_conds conds;
24229
24230 if (cfun->machine->thumb1_cc_insn)
24231 {
24232 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24233 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24234 CC_STATUS_INIT;
24235 }
24236 conds = get_attr_conds (insn);
24237 if (conds == CONDS_SET)
24238 {
24239 rtx set = single_set (insn);
24240 cfun->machine->thumb1_cc_insn = insn;
24241 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24242 cfun->machine->thumb1_cc_op1 = const0_rtx;
24243 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24244 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24245 {
24246 rtx src1 = XEXP (SET_SRC (set), 1);
24247 if (src1 == const0_rtx)
24248 cfun->machine->thumb1_cc_mode = CCmode;
24249 }
24250 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24251 {
24252 /* Record the src register operand instead of dest because
24253 cprop_hardreg pass propagates src. */
24254 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24255 }
24256 }
24257 else if (conds != CONDS_NOCOND)
24258 cfun->machine->thumb1_cc_insn = NULL_RTX;
24259 }
24260
24261 /* Check if unexpected far jump is used. */
24262 if (cfun->machine->lr_save_eliminated
24263 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24264 internal_error("Unexpected thumb1 far jump");
24265 }
24266
24267 int
24268 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24269 {
24270 unsigned HOST_WIDE_INT mask = 0xff;
24271 int i;
24272
24273 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24274 if (val == 0) /* XXX */
24275 return 0;
24276
24277 for (i = 0; i < 25; i++)
24278 if ((val & (mask << i)) == val)
24279 return 1;
24280
24281 return 0;
24282 }
24283
24284 /* Returns nonzero if the current function contains,
24285 or might contain a far jump. */
24286 static int
24287 thumb_far_jump_used_p (void)
24288 {
24289 rtx_insn *insn;
24290 bool far_jump = false;
24291 unsigned int func_size = 0;
24292
24293 /* If we have already decided that far jumps may be used,
24294 do not bother checking again, and always return true even if
24295 it turns out that they are not being used. Once we have made
24296 the decision that far jumps are present (and that hence the link
24297 register will be pushed onto the stack) we cannot go back on it. */
24298 if (cfun->machine->far_jump_used)
24299 return 1;
24300
24301 /* If this function is not being called from the prologue/epilogue
24302 generation code then it must be being called from the
24303 INITIAL_ELIMINATION_OFFSET macro. */
24304 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24305 {
24306 /* In this case we know that we are being asked about the elimination
24307 of the arg pointer register. If that register is not being used,
24308 then there are no arguments on the stack, and we do not have to
24309 worry that a far jump might force the prologue to push the link
24310 register, changing the stack offsets. In this case we can just
24311 return false, since the presence of far jumps in the function will
24312 not affect stack offsets.
24313
24314 If the arg pointer is live (or if it was live, but has now been
24315 eliminated and so set to dead) then we do have to test to see if
24316 the function might contain a far jump. This test can lead to some
24317 false negatives, since before reload is completed, then length of
24318 branch instructions is not known, so gcc defaults to returning their
24319 longest length, which in turn sets the far jump attribute to true.
24320
24321 A false negative will not result in bad code being generated, but it
24322 will result in a needless push and pop of the link register. We
24323 hope that this does not occur too often.
24324
24325 If we need doubleword stack alignment this could affect the other
24326 elimination offsets so we can't risk getting it wrong. */
24327 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24328 cfun->machine->arg_pointer_live = 1;
24329 else if (!cfun->machine->arg_pointer_live)
24330 return 0;
24331 }
24332
24333 /* We should not change far_jump_used during or after reload, as there is
24334 no chance to change stack frame layout. */
24335 if (reload_in_progress || reload_completed)
24336 return 0;
24337
24338 /* Check to see if the function contains a branch
24339 insn with the far jump attribute set. */
24340 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24341 {
24342 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24343 {
24344 far_jump = true;
24345 }
24346 func_size += get_attr_length (insn);
24347 }
24348
24349 /* Attribute far_jump will always be true for thumb1 before
24350 shorten_branch pass. So checking far_jump attribute before
24351 shorten_branch isn't much useful.
24352
24353 Following heuristic tries to estimate more accurately if a far jump
24354 may finally be used. The heuristic is very conservative as there is
24355 no chance to roll-back the decision of not to use far jump.
24356
24357 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24358 2-byte insn is associated with a 4 byte constant pool. Using
24359 function size 2048/3 as the threshold is conservative enough. */
24360 if (far_jump)
24361 {
24362 if ((func_size * 3) >= 2048)
24363 {
24364 /* Record the fact that we have decided that
24365 the function does use far jumps. */
24366 cfun->machine->far_jump_used = 1;
24367 return 1;
24368 }
24369 }
24370
24371 return 0;
24372 }
24373
24374 /* Return nonzero if FUNC must be entered in ARM mode. */
24375 static bool
24376 is_called_in_ARM_mode (tree func)
24377 {
24378 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24379
24380 /* Ignore the problem about functions whose address is taken. */
24381 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24382 return true;
24383
24384 #ifdef ARM_PE
24385 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24386 #else
24387 return false;
24388 #endif
24389 }
24390
24391 /* Given the stack offsets and register mask in OFFSETS, decide how
24392 many additional registers to push instead of subtracting a constant
24393 from SP. For epilogues the principle is the same except we use pop.
24394 FOR_PROLOGUE indicates which we're generating. */
24395 static int
24396 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24397 {
24398 HOST_WIDE_INT amount;
24399 unsigned long live_regs_mask = offsets->saved_regs_mask;
24400 /* Extract a mask of the ones we can give to the Thumb's push/pop
24401 instruction. */
24402 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24403 /* Then count how many other high registers will need to be pushed. */
24404 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24405 int n_free, reg_base, size;
24406
24407 if (!for_prologue && frame_pointer_needed)
24408 amount = offsets->locals_base - offsets->saved_regs;
24409 else
24410 amount = offsets->outgoing_args - offsets->saved_regs;
24411
24412 /* If the stack frame size is 512 exactly, we can save one load
24413 instruction, which should make this a win even when optimizing
24414 for speed. */
24415 if (!optimize_size && amount != 512)
24416 return 0;
24417
24418 /* Can't do this if there are high registers to push. */
24419 if (high_regs_pushed != 0)
24420 return 0;
24421
24422 /* Shouldn't do it in the prologue if no registers would normally
24423 be pushed at all. In the epilogue, also allow it if we'll have
24424 a pop insn for the PC. */
24425 if (l_mask == 0
24426 && (for_prologue
24427 || TARGET_BACKTRACE
24428 || (live_regs_mask & 1 << LR_REGNUM) == 0
24429 || TARGET_INTERWORK
24430 || crtl->args.pretend_args_size != 0))
24431 return 0;
24432
24433 /* Don't do this if thumb_expand_prologue wants to emit instructions
24434 between the push and the stack frame allocation. */
24435 if (for_prologue
24436 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24437 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24438 return 0;
24439
24440 reg_base = 0;
24441 n_free = 0;
24442 if (!for_prologue)
24443 {
24444 size = arm_size_return_regs ();
24445 reg_base = ARM_NUM_INTS (size);
24446 live_regs_mask >>= reg_base;
24447 }
24448
24449 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24450 && (for_prologue || call_used_regs[reg_base + n_free]))
24451 {
24452 live_regs_mask >>= 1;
24453 n_free++;
24454 }
24455
24456 if (n_free == 0)
24457 return 0;
24458 gcc_assert (amount / 4 * 4 == amount);
24459
24460 if (amount >= 512 && (amount - n_free * 4) < 512)
24461 return (amount - 508) / 4;
24462 if (amount <= n_free * 4)
24463 return amount / 4;
24464 return 0;
24465 }
24466
24467 /* The bits which aren't usefully expanded as rtl. */
24468 const char *
24469 thumb1_unexpanded_epilogue (void)
24470 {
24471 arm_stack_offsets *offsets;
24472 int regno;
24473 unsigned long live_regs_mask = 0;
24474 int high_regs_pushed = 0;
24475 int extra_pop;
24476 int had_to_push_lr;
24477 int size;
24478
24479 if (cfun->machine->return_used_this_function != 0)
24480 return "";
24481
24482 if (IS_NAKED (arm_current_func_type ()))
24483 return "";
24484
24485 offsets = arm_get_frame_offsets ();
24486 live_regs_mask = offsets->saved_regs_mask;
24487 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24488
24489 /* If we can deduce the registers used from the function's return value.
24490 This is more reliable that examining df_regs_ever_live_p () because that
24491 will be set if the register is ever used in the function, not just if
24492 the register is used to hold a return value. */
24493 size = arm_size_return_regs ();
24494
24495 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24496 if (extra_pop > 0)
24497 {
24498 unsigned long extra_mask = (1 << extra_pop) - 1;
24499 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24500 }
24501
24502 /* The prolog may have pushed some high registers to use as
24503 work registers. e.g. the testsuite file:
24504 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24505 compiles to produce:
24506 push {r4, r5, r6, r7, lr}
24507 mov r7, r9
24508 mov r6, r8
24509 push {r6, r7}
24510 as part of the prolog. We have to undo that pushing here. */
24511
24512 if (high_regs_pushed)
24513 {
24514 unsigned long mask = live_regs_mask & 0xff;
24515 int next_hi_reg;
24516
24517 /* The available low registers depend on the size of the value we are
24518 returning. */
24519 if (size <= 12)
24520 mask |= 1 << 3;
24521 if (size <= 8)
24522 mask |= 1 << 2;
24523
24524 if (mask == 0)
24525 /* Oh dear! We have no low registers into which we can pop
24526 high registers! */
24527 internal_error
24528 ("no low registers available for popping high registers");
24529
24530 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24531 if (live_regs_mask & (1 << next_hi_reg))
24532 break;
24533
24534 while (high_regs_pushed)
24535 {
24536 /* Find lo register(s) into which the high register(s) can
24537 be popped. */
24538 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24539 {
24540 if (mask & (1 << regno))
24541 high_regs_pushed--;
24542 if (high_regs_pushed == 0)
24543 break;
24544 }
24545
24546 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24547
24548 /* Pop the values into the low register(s). */
24549 thumb_pop (asm_out_file, mask);
24550
24551 /* Move the value(s) into the high registers. */
24552 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24553 {
24554 if (mask & (1 << regno))
24555 {
24556 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24557 regno);
24558
24559 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24560 if (live_regs_mask & (1 << next_hi_reg))
24561 break;
24562 }
24563 }
24564 }
24565 live_regs_mask &= ~0x0f00;
24566 }
24567
24568 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24569 live_regs_mask &= 0xff;
24570
24571 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24572 {
24573 /* Pop the return address into the PC. */
24574 if (had_to_push_lr)
24575 live_regs_mask |= 1 << PC_REGNUM;
24576
24577 /* Either no argument registers were pushed or a backtrace
24578 structure was created which includes an adjusted stack
24579 pointer, so just pop everything. */
24580 if (live_regs_mask)
24581 thumb_pop (asm_out_file, live_regs_mask);
24582
24583 /* We have either just popped the return address into the
24584 PC or it is was kept in LR for the entire function.
24585 Note that thumb_pop has already called thumb_exit if the
24586 PC was in the list. */
24587 if (!had_to_push_lr)
24588 thumb_exit (asm_out_file, LR_REGNUM);
24589 }
24590 else
24591 {
24592 /* Pop everything but the return address. */
24593 if (live_regs_mask)
24594 thumb_pop (asm_out_file, live_regs_mask);
24595
24596 if (had_to_push_lr)
24597 {
24598 if (size > 12)
24599 {
24600 /* We have no free low regs, so save one. */
24601 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24602 LAST_ARG_REGNUM);
24603 }
24604
24605 /* Get the return address into a temporary register. */
24606 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24607
24608 if (size > 12)
24609 {
24610 /* Move the return address to lr. */
24611 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24612 LAST_ARG_REGNUM);
24613 /* Restore the low register. */
24614 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24615 IP_REGNUM);
24616 regno = LR_REGNUM;
24617 }
24618 else
24619 regno = LAST_ARG_REGNUM;
24620 }
24621 else
24622 regno = LR_REGNUM;
24623
24624 /* Remove the argument registers that were pushed onto the stack. */
24625 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24626 SP_REGNUM, SP_REGNUM,
24627 crtl->args.pretend_args_size);
24628
24629 thumb_exit (asm_out_file, regno);
24630 }
24631
24632 return "";
24633 }
24634
24635 /* Functions to save and restore machine-specific function data. */
24636 static struct machine_function *
24637 arm_init_machine_status (void)
24638 {
24639 struct machine_function *machine;
24640 machine = ggc_cleared_alloc<machine_function> ();
24641
24642 #if ARM_FT_UNKNOWN != 0
24643 machine->func_type = ARM_FT_UNKNOWN;
24644 #endif
24645 return machine;
24646 }
24647
24648 /* Return an RTX indicating where the return address to the
24649 calling function can be found. */
24650 rtx
24651 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24652 {
24653 if (count != 0)
24654 return NULL_RTX;
24655
24656 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24657 }
24658
24659 /* Do anything needed before RTL is emitted for each function. */
24660 void
24661 arm_init_expanders (void)
24662 {
24663 /* Arrange to initialize and mark the machine per-function status. */
24664 init_machine_status = arm_init_machine_status;
24665
24666 /* This is to stop the combine pass optimizing away the alignment
24667 adjustment of va_arg. */
24668 /* ??? It is claimed that this should not be necessary. */
24669 if (cfun)
24670 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24671 }
24672
24673 /* Check that FUNC is called with a different mode. */
24674
24675 bool
24676 arm_change_mode_p (tree func)
24677 {
24678 if (TREE_CODE (func) != FUNCTION_DECL)
24679 return false;
24680
24681 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24682
24683 if (!callee_tree)
24684 callee_tree = target_option_default_node;
24685
24686 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24687 int flags = callee_opts->x_target_flags;
24688
24689 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24690 }
24691
24692 /* Like arm_compute_initial_elimination offset. Simpler because there
24693 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24694 to point at the base of the local variables after static stack
24695 space for a function has been allocated. */
24696
24697 HOST_WIDE_INT
24698 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24699 {
24700 arm_stack_offsets *offsets;
24701
24702 offsets = arm_get_frame_offsets ();
24703
24704 switch (from)
24705 {
24706 case ARG_POINTER_REGNUM:
24707 switch (to)
24708 {
24709 case STACK_POINTER_REGNUM:
24710 return offsets->outgoing_args - offsets->saved_args;
24711
24712 case FRAME_POINTER_REGNUM:
24713 return offsets->soft_frame - offsets->saved_args;
24714
24715 case ARM_HARD_FRAME_POINTER_REGNUM:
24716 return offsets->saved_regs - offsets->saved_args;
24717
24718 case THUMB_HARD_FRAME_POINTER_REGNUM:
24719 return offsets->locals_base - offsets->saved_args;
24720
24721 default:
24722 gcc_unreachable ();
24723 }
24724 break;
24725
24726 case FRAME_POINTER_REGNUM:
24727 switch (to)
24728 {
24729 case STACK_POINTER_REGNUM:
24730 return offsets->outgoing_args - offsets->soft_frame;
24731
24732 case ARM_HARD_FRAME_POINTER_REGNUM:
24733 return offsets->saved_regs - offsets->soft_frame;
24734
24735 case THUMB_HARD_FRAME_POINTER_REGNUM:
24736 return offsets->locals_base - offsets->soft_frame;
24737
24738 default:
24739 gcc_unreachable ();
24740 }
24741 break;
24742
24743 default:
24744 gcc_unreachable ();
24745 }
24746 }
24747
24748 /* Generate the function's prologue. */
24749
24750 void
24751 thumb1_expand_prologue (void)
24752 {
24753 rtx_insn *insn;
24754
24755 HOST_WIDE_INT amount;
24756 HOST_WIDE_INT size;
24757 arm_stack_offsets *offsets;
24758 unsigned long func_type;
24759 int regno;
24760 unsigned long live_regs_mask;
24761 unsigned long l_mask;
24762 unsigned high_regs_pushed = 0;
24763 bool lr_needs_saving;
24764
24765 func_type = arm_current_func_type ();
24766
24767 /* Naked functions don't have prologues. */
24768 if (IS_NAKED (func_type))
24769 {
24770 if (flag_stack_usage_info)
24771 current_function_static_stack_size = 0;
24772 return;
24773 }
24774
24775 if (IS_INTERRUPT (func_type))
24776 {
24777 error ("interrupt Service Routines cannot be coded in Thumb mode");
24778 return;
24779 }
24780
24781 if (is_called_in_ARM_mode (current_function_decl))
24782 emit_insn (gen_prologue_thumb1_interwork ());
24783
24784 offsets = arm_get_frame_offsets ();
24785 live_regs_mask = offsets->saved_regs_mask;
24786 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24787
24788 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24789 l_mask = live_regs_mask & 0x40ff;
24790 /* Then count how many other high registers will need to be pushed. */
24791 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24792
24793 if (crtl->args.pretend_args_size)
24794 {
24795 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24796
24797 if (cfun->machine->uses_anonymous_args)
24798 {
24799 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24800 unsigned long mask;
24801
24802 mask = 1ul << (LAST_ARG_REGNUM + 1);
24803 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24804
24805 insn = thumb1_emit_multi_reg_push (mask, 0);
24806 }
24807 else
24808 {
24809 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24810 stack_pointer_rtx, x));
24811 }
24812 RTX_FRAME_RELATED_P (insn) = 1;
24813 }
24814
24815 if (TARGET_BACKTRACE)
24816 {
24817 HOST_WIDE_INT offset = 0;
24818 unsigned work_register;
24819 rtx work_reg, x, arm_hfp_rtx;
24820
24821 /* We have been asked to create a stack backtrace structure.
24822 The code looks like this:
24823
24824 0 .align 2
24825 0 func:
24826 0 sub SP, #16 Reserve space for 4 registers.
24827 2 push {R7} Push low registers.
24828 4 add R7, SP, #20 Get the stack pointer before the push.
24829 6 str R7, [SP, #8] Store the stack pointer
24830 (before reserving the space).
24831 8 mov R7, PC Get hold of the start of this code + 12.
24832 10 str R7, [SP, #16] Store it.
24833 12 mov R7, FP Get hold of the current frame pointer.
24834 14 str R7, [SP, #4] Store it.
24835 16 mov R7, LR Get hold of the current return address.
24836 18 str R7, [SP, #12] Store it.
24837 20 add R7, SP, #16 Point at the start of the
24838 backtrace structure.
24839 22 mov FP, R7 Put this value into the frame pointer. */
24840
24841 work_register = thumb_find_work_register (live_regs_mask);
24842 work_reg = gen_rtx_REG (SImode, work_register);
24843 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24844
24845 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24846 stack_pointer_rtx, GEN_INT (-16)));
24847 RTX_FRAME_RELATED_P (insn) = 1;
24848
24849 if (l_mask)
24850 {
24851 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24852 RTX_FRAME_RELATED_P (insn) = 1;
24853 lr_needs_saving = false;
24854
24855 offset = bit_count (l_mask) * UNITS_PER_WORD;
24856 }
24857
24858 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24859 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24860
24861 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24862 x = gen_frame_mem (SImode, x);
24863 emit_move_insn (x, work_reg);
24864
24865 /* Make sure that the instruction fetching the PC is in the right place
24866 to calculate "start of backtrace creation code + 12". */
24867 /* ??? The stores using the common WORK_REG ought to be enough to
24868 prevent the scheduler from doing anything weird. Failing that
24869 we could always move all of the following into an UNSPEC_VOLATILE. */
24870 if (l_mask)
24871 {
24872 x = gen_rtx_REG (SImode, PC_REGNUM);
24873 emit_move_insn (work_reg, x);
24874
24875 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24876 x = gen_frame_mem (SImode, x);
24877 emit_move_insn (x, work_reg);
24878
24879 emit_move_insn (work_reg, arm_hfp_rtx);
24880
24881 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24882 x = gen_frame_mem (SImode, x);
24883 emit_move_insn (x, work_reg);
24884 }
24885 else
24886 {
24887 emit_move_insn (work_reg, arm_hfp_rtx);
24888
24889 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24890 x = gen_frame_mem (SImode, x);
24891 emit_move_insn (x, work_reg);
24892
24893 x = gen_rtx_REG (SImode, PC_REGNUM);
24894 emit_move_insn (work_reg, x);
24895
24896 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24897 x = gen_frame_mem (SImode, x);
24898 emit_move_insn (x, work_reg);
24899 }
24900
24901 x = gen_rtx_REG (SImode, LR_REGNUM);
24902 emit_move_insn (work_reg, x);
24903
24904 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24905 x = gen_frame_mem (SImode, x);
24906 emit_move_insn (x, work_reg);
24907
24908 x = GEN_INT (offset + 12);
24909 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24910
24911 emit_move_insn (arm_hfp_rtx, work_reg);
24912 }
24913 /* Optimization: If we are not pushing any low registers but we are going
24914 to push some high registers then delay our first push. This will just
24915 be a push of LR and we can combine it with the push of the first high
24916 register. */
24917 else if ((l_mask & 0xff) != 0
24918 || (high_regs_pushed == 0 && lr_needs_saving))
24919 {
24920 unsigned long mask = l_mask;
24921 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24922 insn = thumb1_emit_multi_reg_push (mask, mask);
24923 RTX_FRAME_RELATED_P (insn) = 1;
24924 lr_needs_saving = false;
24925 }
24926
24927 if (high_regs_pushed)
24928 {
24929 unsigned pushable_regs;
24930 unsigned next_hi_reg;
24931 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24932 : crtl->args.info.nregs;
24933 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24934
24935 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24936 if (live_regs_mask & (1 << next_hi_reg))
24937 break;
24938
24939 /* Here we need to mask out registers used for passing arguments
24940 even if they can be pushed. This is to avoid using them to stash the high
24941 registers. Such kind of stash may clobber the use of arguments. */
24942 pushable_regs = l_mask & (~arg_regs_mask);
24943 if (lr_needs_saving)
24944 pushable_regs &= ~(1 << LR_REGNUM);
24945
24946 if (pushable_regs == 0)
24947 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24948
24949 while (high_regs_pushed > 0)
24950 {
24951 unsigned long real_regs_mask = 0;
24952 unsigned long push_mask = 0;
24953
24954 for (regno = LR_REGNUM; regno >= 0; regno --)
24955 {
24956 if (pushable_regs & (1 << regno))
24957 {
24958 emit_move_insn (gen_rtx_REG (SImode, regno),
24959 gen_rtx_REG (SImode, next_hi_reg));
24960
24961 high_regs_pushed --;
24962 real_regs_mask |= (1 << next_hi_reg);
24963 push_mask |= (1 << regno);
24964
24965 if (high_regs_pushed)
24966 {
24967 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24968 next_hi_reg --)
24969 if (live_regs_mask & (1 << next_hi_reg))
24970 break;
24971 }
24972 else
24973 break;
24974 }
24975 }
24976
24977 /* If we had to find a work register and we have not yet
24978 saved the LR then add it to the list of regs to push. */
24979 if (lr_needs_saving)
24980 {
24981 push_mask |= 1 << LR_REGNUM;
24982 real_regs_mask |= 1 << LR_REGNUM;
24983 lr_needs_saving = false;
24984 }
24985
24986 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24987 RTX_FRAME_RELATED_P (insn) = 1;
24988 }
24989 }
24990
24991 /* Load the pic register before setting the frame pointer,
24992 so we can use r7 as a temporary work register. */
24993 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24994 arm_load_pic_register (live_regs_mask);
24995
24996 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24997 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24998 stack_pointer_rtx);
24999
25000 size = offsets->outgoing_args - offsets->saved_args;
25001 if (flag_stack_usage_info)
25002 current_function_static_stack_size = size;
25003
25004 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25005 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25006 || flag_stack_clash_protection)
25007 && size)
25008 sorry ("-fstack-check=specific for Thumb-1");
25009
25010 amount = offsets->outgoing_args - offsets->saved_regs;
25011 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25012 if (amount)
25013 {
25014 if (amount < 512)
25015 {
25016 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25017 GEN_INT (- amount)));
25018 RTX_FRAME_RELATED_P (insn) = 1;
25019 }
25020 else
25021 {
25022 rtx reg, dwarf;
25023
25024 /* The stack decrement is too big for an immediate value in a single
25025 insn. In theory we could issue multiple subtracts, but after
25026 three of them it becomes more space efficient to place the full
25027 value in the constant pool and load into a register. (Also the
25028 ARM debugger really likes to see only one stack decrement per
25029 function). So instead we look for a scratch register into which
25030 we can load the decrement, and then we subtract this from the
25031 stack pointer. Unfortunately on the thumb the only available
25032 scratch registers are the argument registers, and we cannot use
25033 these as they may hold arguments to the function. Instead we
25034 attempt to locate a call preserved register which is used by this
25035 function. If we can find one, then we know that it will have
25036 been pushed at the start of the prologue and so we can corrupt
25037 it now. */
25038 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25039 if (live_regs_mask & (1 << regno))
25040 break;
25041
25042 gcc_assert(regno <= LAST_LO_REGNUM);
25043
25044 reg = gen_rtx_REG (SImode, regno);
25045
25046 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25047
25048 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25049 stack_pointer_rtx, reg));
25050
25051 dwarf = gen_rtx_SET (stack_pointer_rtx,
25052 plus_constant (Pmode, stack_pointer_rtx,
25053 -amount));
25054 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25055 RTX_FRAME_RELATED_P (insn) = 1;
25056 }
25057 }
25058
25059 if (frame_pointer_needed)
25060 thumb_set_frame_pointer (offsets);
25061
25062 /* If we are profiling, make sure no instructions are scheduled before
25063 the call to mcount. Similarly if the user has requested no
25064 scheduling in the prolog. Similarly if we want non-call exceptions
25065 using the EABI unwinder, to prevent faulting instructions from being
25066 swapped with a stack adjustment. */
25067 if (crtl->profile || !TARGET_SCHED_PROLOG
25068 || (arm_except_unwind_info (&global_options) == UI_TARGET
25069 && cfun->can_throw_non_call_exceptions))
25070 emit_insn (gen_blockage ());
25071
25072 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25073 if (live_regs_mask & 0xff)
25074 cfun->machine->lr_save_eliminated = 0;
25075 }
25076
25077 /* Clear caller saved registers not used to pass return values and leaked
25078 condition flags before exiting a cmse_nonsecure_entry function. */
25079
25080 void
25081 cmse_nonsecure_entry_clear_before_return (void)
25082 {
25083 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25084 uint32_t padding_bits_to_clear = 0;
25085 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25086 auto_sbitmap to_clear_bitmap (maxregno + 1);
25087 tree result_type;
25088 rtx result_rtl;
25089
25090 bitmap_clear (to_clear_bitmap);
25091 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25092 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25093
25094 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25095 registers. */
25096 if (TARGET_HARD_FLOAT)
25097 {
25098 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25099
25100 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25101
25102 /* Make sure we don't clear the two scratch registers used to clear the
25103 relevant FPSCR bits in output_return_instruction. */
25104 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25105 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25106 emit_use (gen_rtx_REG (SImode, 4));
25107 bitmap_clear_bit (to_clear_bitmap, 4);
25108 }
25109
25110 /* If the user has defined registers to be caller saved, these are no longer
25111 restored by the function before returning and must thus be cleared for
25112 security purposes. */
25113 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25114 {
25115 /* We do not touch registers that can be used to pass arguments as per
25116 the AAPCS, since these should never be made callee-saved by user
25117 options. */
25118 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25119 continue;
25120 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25121 continue;
25122 if (call_used_regs[regno])
25123 bitmap_set_bit (to_clear_bitmap, regno);
25124 }
25125
25126 /* Make sure we do not clear the registers used to return the result in. */
25127 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25128 if (!VOID_TYPE_P (result_type))
25129 {
25130 uint64_t to_clear_return_mask;
25131 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25132
25133 /* No need to check that we return in registers, because we don't
25134 support returning on stack yet. */
25135 gcc_assert (REG_P (result_rtl));
25136 to_clear_return_mask
25137 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25138 padding_bits_to_clear_ptr);
25139 if (to_clear_return_mask)
25140 {
25141 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25142 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25143 {
25144 if (to_clear_return_mask & (1ULL << regno))
25145 bitmap_clear_bit (to_clear_bitmap, regno);
25146 }
25147 }
25148 }
25149
25150 if (padding_bits_to_clear != 0)
25151 {
25152 rtx reg_rtx;
25153 auto_sbitmap to_clear_arg_regs_bitmap (R0_REGNUM + NUM_ARG_REGS);
25154
25155 /* Padding bits to clear is not 0 so we know we are dealing with
25156 returning a composite type, which only uses r0. Let's make sure that
25157 r1-r3 is cleared too, we will use r1 as a scratch register. */
25158 bitmap_clear (to_clear_arg_regs_bitmap);
25159 bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25160 NUM_ARG_REGS - 1);
25161 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25162
25163 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25164
25165 /* Fill the lower half of the negated padding_bits_to_clear. */
25166 emit_move_insn (reg_rtx,
25167 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25168
25169 /* Also fill the top half of the negated padding_bits_to_clear. */
25170 if (((~padding_bits_to_clear) >> 16) > 0)
25171 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25172 GEN_INT (16),
25173 GEN_INT (16)),
25174 GEN_INT ((~padding_bits_to_clear) >> 16)));
25175
25176 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25177 gen_rtx_REG (SImode, R0_REGNUM),
25178 reg_rtx));
25179 }
25180
25181 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25182 {
25183 if (!bitmap_bit_p (to_clear_bitmap, regno))
25184 continue;
25185
25186 if (IS_VFP_REGNUM (regno))
25187 {
25188 /* If regno is an even vfp register and its successor is also to
25189 be cleared, use vmov. */
25190 if (TARGET_VFP_DOUBLE
25191 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25192 && bitmap_bit_p (to_clear_bitmap, regno + 1))
25193 {
25194 emit_move_insn (gen_rtx_REG (DFmode, regno),
25195 CONST1_RTX (DFmode));
25196 emit_use (gen_rtx_REG (DFmode, regno));
25197 regno++;
25198 }
25199 else
25200 {
25201 emit_move_insn (gen_rtx_REG (SFmode, regno),
25202 CONST1_RTX (SFmode));
25203 emit_use (gen_rtx_REG (SFmode, regno));
25204 }
25205 }
25206 else
25207 {
25208 if (TARGET_THUMB1)
25209 {
25210 if (regno == R0_REGNUM)
25211 emit_move_insn (gen_rtx_REG (SImode, regno),
25212 const0_rtx);
25213 else
25214 /* R0 has either been cleared before, see code above, or it
25215 holds a return value, either way it is not secret
25216 information. */
25217 emit_move_insn (gen_rtx_REG (SImode, regno),
25218 gen_rtx_REG (SImode, R0_REGNUM));
25219 emit_use (gen_rtx_REG (SImode, regno));
25220 }
25221 else
25222 {
25223 emit_move_insn (gen_rtx_REG (SImode, regno),
25224 gen_rtx_REG (SImode, LR_REGNUM));
25225 emit_use (gen_rtx_REG (SImode, regno));
25226 }
25227 }
25228 }
25229 }
25230
25231 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25232 POP instruction can be generated. LR should be replaced by PC. All
25233 the checks required are already done by USE_RETURN_INSN (). Hence,
25234 all we really need to check here is if single register is to be
25235 returned, or multiple register return. */
25236 void
25237 thumb2_expand_return (bool simple_return)
25238 {
25239 int i, num_regs;
25240 unsigned long saved_regs_mask;
25241 arm_stack_offsets *offsets;
25242
25243 offsets = arm_get_frame_offsets ();
25244 saved_regs_mask = offsets->saved_regs_mask;
25245
25246 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25247 if (saved_regs_mask & (1 << i))
25248 num_regs++;
25249
25250 if (!simple_return && saved_regs_mask)
25251 {
25252 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25253 functions or adapt code to handle according to ACLE. This path should
25254 not be reachable for cmse_nonsecure_entry functions though we prefer
25255 to assert it for now to ensure that future code changes do not silently
25256 change this behavior. */
25257 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25258 if (num_regs == 1)
25259 {
25260 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25261 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25262 rtx addr = gen_rtx_MEM (SImode,
25263 gen_rtx_POST_INC (SImode,
25264 stack_pointer_rtx));
25265 set_mem_alias_set (addr, get_frame_alias_set ());
25266 XVECEXP (par, 0, 0) = ret_rtx;
25267 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25268 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25269 emit_jump_insn (par);
25270 }
25271 else
25272 {
25273 saved_regs_mask &= ~ (1 << LR_REGNUM);
25274 saved_regs_mask |= (1 << PC_REGNUM);
25275 arm_emit_multi_reg_pop (saved_regs_mask);
25276 }
25277 }
25278 else
25279 {
25280 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25281 cmse_nonsecure_entry_clear_before_return ();
25282 emit_jump_insn (simple_return_rtx);
25283 }
25284 }
25285
25286 void
25287 thumb1_expand_epilogue (void)
25288 {
25289 HOST_WIDE_INT amount;
25290 arm_stack_offsets *offsets;
25291 int regno;
25292
25293 /* Naked functions don't have prologues. */
25294 if (IS_NAKED (arm_current_func_type ()))
25295 return;
25296
25297 offsets = arm_get_frame_offsets ();
25298 amount = offsets->outgoing_args - offsets->saved_regs;
25299
25300 if (frame_pointer_needed)
25301 {
25302 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25303 amount = offsets->locals_base - offsets->saved_regs;
25304 }
25305 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25306
25307 gcc_assert (amount >= 0);
25308 if (amount)
25309 {
25310 emit_insn (gen_blockage ());
25311
25312 if (amount < 512)
25313 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25314 GEN_INT (amount)));
25315 else
25316 {
25317 /* r3 is always free in the epilogue. */
25318 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25319
25320 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25321 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25322 }
25323 }
25324
25325 /* Emit a USE (stack_pointer_rtx), so that
25326 the stack adjustment will not be deleted. */
25327 emit_insn (gen_force_register_use (stack_pointer_rtx));
25328
25329 if (crtl->profile || !TARGET_SCHED_PROLOG)
25330 emit_insn (gen_blockage ());
25331
25332 /* Emit a clobber for each insn that will be restored in the epilogue,
25333 so that flow2 will get register lifetimes correct. */
25334 for (regno = 0; regno < 13; regno++)
25335 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25336 emit_clobber (gen_rtx_REG (SImode, regno));
25337
25338 if (! df_regs_ever_live_p (LR_REGNUM))
25339 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25340
25341 /* Clear all caller-saved regs that are not used to return. */
25342 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25343 cmse_nonsecure_entry_clear_before_return ();
25344 }
25345
25346 /* Epilogue code for APCS frame. */
25347 static void
25348 arm_expand_epilogue_apcs_frame (bool really_return)
25349 {
25350 unsigned long func_type;
25351 unsigned long saved_regs_mask;
25352 int num_regs = 0;
25353 int i;
25354 int floats_from_frame = 0;
25355 arm_stack_offsets *offsets;
25356
25357 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25358 func_type = arm_current_func_type ();
25359
25360 /* Get frame offsets for ARM. */
25361 offsets = arm_get_frame_offsets ();
25362 saved_regs_mask = offsets->saved_regs_mask;
25363
25364 /* Find the offset of the floating-point save area in the frame. */
25365 floats_from_frame
25366 = (offsets->saved_args
25367 + arm_compute_static_chain_stack_bytes ()
25368 - offsets->frame);
25369
25370 /* Compute how many core registers saved and how far away the floats are. */
25371 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25372 if (saved_regs_mask & (1 << i))
25373 {
25374 num_regs++;
25375 floats_from_frame += 4;
25376 }
25377
25378 if (TARGET_HARD_FLOAT)
25379 {
25380 int start_reg;
25381 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25382
25383 /* The offset is from IP_REGNUM. */
25384 int saved_size = arm_get_vfp_saved_size ();
25385 if (saved_size > 0)
25386 {
25387 rtx_insn *insn;
25388 floats_from_frame += saved_size;
25389 insn = emit_insn (gen_addsi3 (ip_rtx,
25390 hard_frame_pointer_rtx,
25391 GEN_INT (-floats_from_frame)));
25392 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25393 ip_rtx, hard_frame_pointer_rtx);
25394 }
25395
25396 /* Generate VFP register multi-pop. */
25397 start_reg = FIRST_VFP_REGNUM;
25398
25399 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25400 /* Look for a case where a reg does not need restoring. */
25401 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25402 && (!df_regs_ever_live_p (i + 1)
25403 || call_used_regs[i + 1]))
25404 {
25405 if (start_reg != i)
25406 arm_emit_vfp_multi_reg_pop (start_reg,
25407 (i - start_reg) / 2,
25408 gen_rtx_REG (SImode,
25409 IP_REGNUM));
25410 start_reg = i + 2;
25411 }
25412
25413 /* Restore the remaining regs that we have discovered (or possibly
25414 even all of them, if the conditional in the for loop never
25415 fired). */
25416 if (start_reg != i)
25417 arm_emit_vfp_multi_reg_pop (start_reg,
25418 (i - start_reg) / 2,
25419 gen_rtx_REG (SImode, IP_REGNUM));
25420 }
25421
25422 if (TARGET_IWMMXT)
25423 {
25424 /* The frame pointer is guaranteed to be non-double-word aligned, as
25425 it is set to double-word-aligned old_stack_pointer - 4. */
25426 rtx_insn *insn;
25427 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25428
25429 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25430 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25431 {
25432 rtx addr = gen_frame_mem (V2SImode,
25433 plus_constant (Pmode, hard_frame_pointer_rtx,
25434 - lrm_count * 4));
25435 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25436 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25437 gen_rtx_REG (V2SImode, i),
25438 NULL_RTX);
25439 lrm_count += 2;
25440 }
25441 }
25442
25443 /* saved_regs_mask should contain IP which contains old stack pointer
25444 at the time of activation creation. Since SP and IP are adjacent registers,
25445 we can restore the value directly into SP. */
25446 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25447 saved_regs_mask &= ~(1 << IP_REGNUM);
25448 saved_regs_mask |= (1 << SP_REGNUM);
25449
25450 /* There are two registers left in saved_regs_mask - LR and PC. We
25451 only need to restore LR (the return address), but to
25452 save time we can load it directly into PC, unless we need a
25453 special function exit sequence, or we are not really returning. */
25454 if (really_return
25455 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25456 && !crtl->calls_eh_return)
25457 /* Delete LR from the register mask, so that LR on
25458 the stack is loaded into the PC in the register mask. */
25459 saved_regs_mask &= ~(1 << LR_REGNUM);
25460 else
25461 saved_regs_mask &= ~(1 << PC_REGNUM);
25462
25463 num_regs = bit_count (saved_regs_mask);
25464 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25465 {
25466 rtx_insn *insn;
25467 emit_insn (gen_blockage ());
25468 /* Unwind the stack to just below the saved registers. */
25469 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25470 hard_frame_pointer_rtx,
25471 GEN_INT (- 4 * num_regs)));
25472
25473 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25474 stack_pointer_rtx, hard_frame_pointer_rtx);
25475 }
25476
25477 arm_emit_multi_reg_pop (saved_regs_mask);
25478
25479 if (IS_INTERRUPT (func_type))
25480 {
25481 /* Interrupt handlers will have pushed the
25482 IP onto the stack, so restore it now. */
25483 rtx_insn *insn;
25484 rtx addr = gen_rtx_MEM (SImode,
25485 gen_rtx_POST_INC (SImode,
25486 stack_pointer_rtx));
25487 set_mem_alias_set (addr, get_frame_alias_set ());
25488 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25489 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25490 gen_rtx_REG (SImode, IP_REGNUM),
25491 NULL_RTX);
25492 }
25493
25494 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25495 return;
25496
25497 if (crtl->calls_eh_return)
25498 emit_insn (gen_addsi3 (stack_pointer_rtx,
25499 stack_pointer_rtx,
25500 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25501
25502 if (IS_STACKALIGN (func_type))
25503 /* Restore the original stack pointer. Before prologue, the stack was
25504 realigned and the original stack pointer saved in r0. For details,
25505 see comment in arm_expand_prologue. */
25506 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25507
25508 emit_jump_insn (simple_return_rtx);
25509 }
25510
25511 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25512 function is not a sibcall. */
25513 void
25514 arm_expand_epilogue (bool really_return)
25515 {
25516 unsigned long func_type;
25517 unsigned long saved_regs_mask;
25518 int num_regs = 0;
25519 int i;
25520 int amount;
25521 arm_stack_offsets *offsets;
25522
25523 func_type = arm_current_func_type ();
25524
25525 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25526 let output_return_instruction take care of instruction emission if any. */
25527 if (IS_NAKED (func_type)
25528 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25529 {
25530 if (really_return)
25531 emit_jump_insn (simple_return_rtx);
25532 return;
25533 }
25534
25535 /* If we are throwing an exception, then we really must be doing a
25536 return, so we can't tail-call. */
25537 gcc_assert (!crtl->calls_eh_return || really_return);
25538
25539 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25540 {
25541 arm_expand_epilogue_apcs_frame (really_return);
25542 return;
25543 }
25544
25545 /* Get frame offsets for ARM. */
25546 offsets = arm_get_frame_offsets ();
25547 saved_regs_mask = offsets->saved_regs_mask;
25548 num_regs = bit_count (saved_regs_mask);
25549
25550 if (frame_pointer_needed)
25551 {
25552 rtx_insn *insn;
25553 /* Restore stack pointer if necessary. */
25554 if (TARGET_ARM)
25555 {
25556 /* In ARM mode, frame pointer points to first saved register.
25557 Restore stack pointer to last saved register. */
25558 amount = offsets->frame - offsets->saved_regs;
25559
25560 /* Force out any pending memory operations that reference stacked data
25561 before stack de-allocation occurs. */
25562 emit_insn (gen_blockage ());
25563 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25564 hard_frame_pointer_rtx,
25565 GEN_INT (amount)));
25566 arm_add_cfa_adjust_cfa_note (insn, amount,
25567 stack_pointer_rtx,
25568 hard_frame_pointer_rtx);
25569
25570 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25571 deleted. */
25572 emit_insn (gen_force_register_use (stack_pointer_rtx));
25573 }
25574 else
25575 {
25576 /* In Thumb-2 mode, the frame pointer points to the last saved
25577 register. */
25578 amount = offsets->locals_base - offsets->saved_regs;
25579 if (amount)
25580 {
25581 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25582 hard_frame_pointer_rtx,
25583 GEN_INT (amount)));
25584 arm_add_cfa_adjust_cfa_note (insn, amount,
25585 hard_frame_pointer_rtx,
25586 hard_frame_pointer_rtx);
25587 }
25588
25589 /* Force out any pending memory operations that reference stacked data
25590 before stack de-allocation occurs. */
25591 emit_insn (gen_blockage ());
25592 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25593 hard_frame_pointer_rtx));
25594 arm_add_cfa_adjust_cfa_note (insn, 0,
25595 stack_pointer_rtx,
25596 hard_frame_pointer_rtx);
25597 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25598 deleted. */
25599 emit_insn (gen_force_register_use (stack_pointer_rtx));
25600 }
25601 }
25602 else
25603 {
25604 /* Pop off outgoing args and local frame to adjust stack pointer to
25605 last saved register. */
25606 amount = offsets->outgoing_args - offsets->saved_regs;
25607 if (amount)
25608 {
25609 rtx_insn *tmp;
25610 /* Force out any pending memory operations that reference stacked data
25611 before stack de-allocation occurs. */
25612 emit_insn (gen_blockage ());
25613 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25614 stack_pointer_rtx,
25615 GEN_INT (amount)));
25616 arm_add_cfa_adjust_cfa_note (tmp, amount,
25617 stack_pointer_rtx, stack_pointer_rtx);
25618 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25619 not deleted. */
25620 emit_insn (gen_force_register_use (stack_pointer_rtx));
25621 }
25622 }
25623
25624 if (TARGET_HARD_FLOAT)
25625 {
25626 /* Generate VFP register multi-pop. */
25627 int end_reg = LAST_VFP_REGNUM + 1;
25628
25629 /* Scan the registers in reverse order. We need to match
25630 any groupings made in the prologue and generate matching
25631 vldm operations. The need to match groups is because,
25632 unlike pop, vldm can only do consecutive regs. */
25633 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25634 /* Look for a case where a reg does not need restoring. */
25635 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25636 && (!df_regs_ever_live_p (i + 1)
25637 || call_used_regs[i + 1]))
25638 {
25639 /* Restore the regs discovered so far (from reg+2 to
25640 end_reg). */
25641 if (end_reg > i + 2)
25642 arm_emit_vfp_multi_reg_pop (i + 2,
25643 (end_reg - (i + 2)) / 2,
25644 stack_pointer_rtx);
25645 end_reg = i;
25646 }
25647
25648 /* Restore the remaining regs that we have discovered (or possibly
25649 even all of them, if the conditional in the for loop never
25650 fired). */
25651 if (end_reg > i + 2)
25652 arm_emit_vfp_multi_reg_pop (i + 2,
25653 (end_reg - (i + 2)) / 2,
25654 stack_pointer_rtx);
25655 }
25656
25657 if (TARGET_IWMMXT)
25658 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25659 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25660 {
25661 rtx_insn *insn;
25662 rtx addr = gen_rtx_MEM (V2SImode,
25663 gen_rtx_POST_INC (SImode,
25664 stack_pointer_rtx));
25665 set_mem_alias_set (addr, get_frame_alias_set ());
25666 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25667 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25668 gen_rtx_REG (V2SImode, i),
25669 NULL_RTX);
25670 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25671 stack_pointer_rtx, stack_pointer_rtx);
25672 }
25673
25674 if (saved_regs_mask)
25675 {
25676 rtx insn;
25677 bool return_in_pc = false;
25678
25679 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25680 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25681 && !IS_CMSE_ENTRY (func_type)
25682 && !IS_STACKALIGN (func_type)
25683 && really_return
25684 && crtl->args.pretend_args_size == 0
25685 && saved_regs_mask & (1 << LR_REGNUM)
25686 && !crtl->calls_eh_return)
25687 {
25688 saved_regs_mask &= ~(1 << LR_REGNUM);
25689 saved_regs_mask |= (1 << PC_REGNUM);
25690 return_in_pc = true;
25691 }
25692
25693 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25694 {
25695 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25696 if (saved_regs_mask & (1 << i))
25697 {
25698 rtx addr = gen_rtx_MEM (SImode,
25699 gen_rtx_POST_INC (SImode,
25700 stack_pointer_rtx));
25701 set_mem_alias_set (addr, get_frame_alias_set ());
25702
25703 if (i == PC_REGNUM)
25704 {
25705 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25706 XVECEXP (insn, 0, 0) = ret_rtx;
25707 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25708 addr);
25709 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25710 insn = emit_jump_insn (insn);
25711 }
25712 else
25713 {
25714 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25715 addr));
25716 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25717 gen_rtx_REG (SImode, i),
25718 NULL_RTX);
25719 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25720 stack_pointer_rtx,
25721 stack_pointer_rtx);
25722 }
25723 }
25724 }
25725 else
25726 {
25727 if (TARGET_LDRD
25728 && current_tune->prefer_ldrd_strd
25729 && !optimize_function_for_size_p (cfun))
25730 {
25731 if (TARGET_THUMB2)
25732 thumb2_emit_ldrd_pop (saved_regs_mask);
25733 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25734 arm_emit_ldrd_pop (saved_regs_mask);
25735 else
25736 arm_emit_multi_reg_pop (saved_regs_mask);
25737 }
25738 else
25739 arm_emit_multi_reg_pop (saved_regs_mask);
25740 }
25741
25742 if (return_in_pc)
25743 return;
25744 }
25745
25746 amount
25747 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25748 if (amount)
25749 {
25750 int i, j;
25751 rtx dwarf = NULL_RTX;
25752 rtx_insn *tmp =
25753 emit_insn (gen_addsi3 (stack_pointer_rtx,
25754 stack_pointer_rtx,
25755 GEN_INT (amount)));
25756
25757 RTX_FRAME_RELATED_P (tmp) = 1;
25758
25759 if (cfun->machine->uses_anonymous_args)
25760 {
25761 /* Restore pretend args. Refer arm_expand_prologue on how to save
25762 pretend_args in stack. */
25763 int num_regs = crtl->args.pretend_args_size / 4;
25764 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25765 for (j = 0, i = 0; j < num_regs; i++)
25766 if (saved_regs_mask & (1 << i))
25767 {
25768 rtx reg = gen_rtx_REG (SImode, i);
25769 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25770 j++;
25771 }
25772 REG_NOTES (tmp) = dwarf;
25773 }
25774 arm_add_cfa_adjust_cfa_note (tmp, amount,
25775 stack_pointer_rtx, stack_pointer_rtx);
25776 }
25777
25778 /* Clear all caller-saved regs that are not used to return. */
25779 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25780 {
25781 /* CMSE_ENTRY always returns. */
25782 gcc_assert (really_return);
25783 cmse_nonsecure_entry_clear_before_return ();
25784 }
25785
25786 if (!really_return)
25787 return;
25788
25789 if (crtl->calls_eh_return)
25790 emit_insn (gen_addsi3 (stack_pointer_rtx,
25791 stack_pointer_rtx,
25792 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25793
25794 if (IS_STACKALIGN (func_type))
25795 /* Restore the original stack pointer. Before prologue, the stack was
25796 realigned and the original stack pointer saved in r0. For details,
25797 see comment in arm_expand_prologue. */
25798 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25799
25800 emit_jump_insn (simple_return_rtx);
25801 }
25802
25803 /* Implementation of insn prologue_thumb1_interwork. This is the first
25804 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25805
25806 const char *
25807 thumb1_output_interwork (void)
25808 {
25809 const char * name;
25810 FILE *f = asm_out_file;
25811
25812 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25813 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25814 == SYMBOL_REF);
25815 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25816
25817 /* Generate code sequence to switch us into Thumb mode. */
25818 /* The .code 32 directive has already been emitted by
25819 ASM_DECLARE_FUNCTION_NAME. */
25820 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25821 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25822
25823 /* Generate a label, so that the debugger will notice the
25824 change in instruction sets. This label is also used by
25825 the assembler to bypass the ARM code when this function
25826 is called from a Thumb encoded function elsewhere in the
25827 same file. Hence the definition of STUB_NAME here must
25828 agree with the definition in gas/config/tc-arm.c. */
25829
25830 #define STUB_NAME ".real_start_of"
25831
25832 fprintf (f, "\t.code\t16\n");
25833 #ifdef ARM_PE
25834 if (arm_dllexport_name_p (name))
25835 name = arm_strip_name_encoding (name);
25836 #endif
25837 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25838 fprintf (f, "\t.thumb_func\n");
25839 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25840
25841 return "";
25842 }
25843
25844 /* Handle the case of a double word load into a low register from
25845 a computed memory address. The computed address may involve a
25846 register which is overwritten by the load. */
25847 const char *
25848 thumb_load_double_from_address (rtx *operands)
25849 {
25850 rtx addr;
25851 rtx base;
25852 rtx offset;
25853 rtx arg1;
25854 rtx arg2;
25855
25856 gcc_assert (REG_P (operands[0]));
25857 gcc_assert (MEM_P (operands[1]));
25858
25859 /* Get the memory address. */
25860 addr = XEXP (operands[1], 0);
25861
25862 /* Work out how the memory address is computed. */
25863 switch (GET_CODE (addr))
25864 {
25865 case REG:
25866 operands[2] = adjust_address (operands[1], SImode, 4);
25867
25868 if (REGNO (operands[0]) == REGNO (addr))
25869 {
25870 output_asm_insn ("ldr\t%H0, %2", operands);
25871 output_asm_insn ("ldr\t%0, %1", operands);
25872 }
25873 else
25874 {
25875 output_asm_insn ("ldr\t%0, %1", operands);
25876 output_asm_insn ("ldr\t%H0, %2", operands);
25877 }
25878 break;
25879
25880 case CONST:
25881 /* Compute <address> + 4 for the high order load. */
25882 operands[2] = adjust_address (operands[1], SImode, 4);
25883
25884 output_asm_insn ("ldr\t%0, %1", operands);
25885 output_asm_insn ("ldr\t%H0, %2", operands);
25886 break;
25887
25888 case PLUS:
25889 arg1 = XEXP (addr, 0);
25890 arg2 = XEXP (addr, 1);
25891
25892 if (CONSTANT_P (arg1))
25893 base = arg2, offset = arg1;
25894 else
25895 base = arg1, offset = arg2;
25896
25897 gcc_assert (REG_P (base));
25898
25899 /* Catch the case of <address> = <reg> + <reg> */
25900 if (REG_P (offset))
25901 {
25902 int reg_offset = REGNO (offset);
25903 int reg_base = REGNO (base);
25904 int reg_dest = REGNO (operands[0]);
25905
25906 /* Add the base and offset registers together into the
25907 higher destination register. */
25908 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25909 reg_dest + 1, reg_base, reg_offset);
25910
25911 /* Load the lower destination register from the address in
25912 the higher destination register. */
25913 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25914 reg_dest, reg_dest + 1);
25915
25916 /* Load the higher destination register from its own address
25917 plus 4. */
25918 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25919 reg_dest + 1, reg_dest + 1);
25920 }
25921 else
25922 {
25923 /* Compute <address> + 4 for the high order load. */
25924 operands[2] = adjust_address (operands[1], SImode, 4);
25925
25926 /* If the computed address is held in the low order register
25927 then load the high order register first, otherwise always
25928 load the low order register first. */
25929 if (REGNO (operands[0]) == REGNO (base))
25930 {
25931 output_asm_insn ("ldr\t%H0, %2", operands);
25932 output_asm_insn ("ldr\t%0, %1", operands);
25933 }
25934 else
25935 {
25936 output_asm_insn ("ldr\t%0, %1", operands);
25937 output_asm_insn ("ldr\t%H0, %2", operands);
25938 }
25939 }
25940 break;
25941
25942 case LABEL_REF:
25943 /* With no registers to worry about we can just load the value
25944 directly. */
25945 operands[2] = adjust_address (operands[1], SImode, 4);
25946
25947 output_asm_insn ("ldr\t%H0, %2", operands);
25948 output_asm_insn ("ldr\t%0, %1", operands);
25949 break;
25950
25951 default:
25952 gcc_unreachable ();
25953 }
25954
25955 return "";
25956 }
25957
25958 const char *
25959 thumb_output_move_mem_multiple (int n, rtx *operands)
25960 {
25961 switch (n)
25962 {
25963 case 2:
25964 if (REGNO (operands[4]) > REGNO (operands[5]))
25965 std::swap (operands[4], operands[5]);
25966
25967 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25968 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25969 break;
25970
25971 case 3:
25972 if (REGNO (operands[4]) > REGNO (operands[5]))
25973 std::swap (operands[4], operands[5]);
25974 if (REGNO (operands[5]) > REGNO (operands[6]))
25975 std::swap (operands[5], operands[6]);
25976 if (REGNO (operands[4]) > REGNO (operands[5]))
25977 std::swap (operands[4], operands[5]);
25978
25979 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25980 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25981 break;
25982
25983 default:
25984 gcc_unreachable ();
25985 }
25986
25987 return "";
25988 }
25989
25990 /* Output a call-via instruction for thumb state. */
25991 const char *
25992 thumb_call_via_reg (rtx reg)
25993 {
25994 int regno = REGNO (reg);
25995 rtx *labelp;
25996
25997 gcc_assert (regno < LR_REGNUM);
25998
25999 /* If we are in the normal text section we can use a single instance
26000 per compilation unit. If we are doing function sections, then we need
26001 an entry per section, since we can't rely on reachability. */
26002 if (in_section == text_section)
26003 {
26004 thumb_call_reg_needed = 1;
26005
26006 if (thumb_call_via_label[regno] == NULL)
26007 thumb_call_via_label[regno] = gen_label_rtx ();
26008 labelp = thumb_call_via_label + regno;
26009 }
26010 else
26011 {
26012 if (cfun->machine->call_via[regno] == NULL)
26013 cfun->machine->call_via[regno] = gen_label_rtx ();
26014 labelp = cfun->machine->call_via + regno;
26015 }
26016
26017 output_asm_insn ("bl\t%a0", labelp);
26018 return "";
26019 }
26020
26021 /* Routines for generating rtl. */
26022 void
26023 thumb_expand_movmemqi (rtx *operands)
26024 {
26025 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26026 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26027 HOST_WIDE_INT len = INTVAL (operands[2]);
26028 HOST_WIDE_INT offset = 0;
26029
26030 while (len >= 12)
26031 {
26032 emit_insn (gen_movmem12b (out, in, out, in));
26033 len -= 12;
26034 }
26035
26036 if (len >= 8)
26037 {
26038 emit_insn (gen_movmem8b (out, in, out, in));
26039 len -= 8;
26040 }
26041
26042 if (len >= 4)
26043 {
26044 rtx reg = gen_reg_rtx (SImode);
26045 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26046 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26047 len -= 4;
26048 offset += 4;
26049 }
26050
26051 if (len >= 2)
26052 {
26053 rtx reg = gen_reg_rtx (HImode);
26054 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26055 plus_constant (Pmode, in,
26056 offset))));
26057 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26058 offset)),
26059 reg));
26060 len -= 2;
26061 offset += 2;
26062 }
26063
26064 if (len)
26065 {
26066 rtx reg = gen_reg_rtx (QImode);
26067 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26068 plus_constant (Pmode, in,
26069 offset))));
26070 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26071 offset)),
26072 reg));
26073 }
26074 }
26075
26076 void
26077 thumb_reload_out_hi (rtx *operands)
26078 {
26079 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26080 }
26081
26082 /* Return the length of a function name prefix
26083 that starts with the character 'c'. */
26084 static int
26085 arm_get_strip_length (int c)
26086 {
26087 switch (c)
26088 {
26089 ARM_NAME_ENCODING_LENGTHS
26090 default: return 0;
26091 }
26092 }
26093
26094 /* Return a pointer to a function's name with any
26095 and all prefix encodings stripped from it. */
26096 const char *
26097 arm_strip_name_encoding (const char *name)
26098 {
26099 int skip;
26100
26101 while ((skip = arm_get_strip_length (* name)))
26102 name += skip;
26103
26104 return name;
26105 }
26106
26107 /* If there is a '*' anywhere in the name's prefix, then
26108 emit the stripped name verbatim, otherwise prepend an
26109 underscore if leading underscores are being used. */
26110 void
26111 arm_asm_output_labelref (FILE *stream, const char *name)
26112 {
26113 int skip;
26114 int verbatim = 0;
26115
26116 while ((skip = arm_get_strip_length (* name)))
26117 {
26118 verbatim |= (*name == '*');
26119 name += skip;
26120 }
26121
26122 if (verbatim)
26123 fputs (name, stream);
26124 else
26125 asm_fprintf (stream, "%U%s", name);
26126 }
26127
26128 /* This function is used to emit an EABI tag and its associated value.
26129 We emit the numerical value of the tag in case the assembler does not
26130 support textual tags. (Eg gas prior to 2.20). If requested we include
26131 the tag name in a comment so that anyone reading the assembler output
26132 will know which tag is being set.
26133
26134 This function is not static because arm-c.c needs it too. */
26135
26136 void
26137 arm_emit_eabi_attribute (const char *name, int num, int val)
26138 {
26139 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26140 if (flag_verbose_asm || flag_debug_asm)
26141 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26142 asm_fprintf (asm_out_file, "\n");
26143 }
26144
26145 /* This function is used to print CPU tuning information as comment
26146 in assembler file. Pointers are not printed for now. */
26147
26148 void
26149 arm_print_tune_info (void)
26150 {
26151 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26152 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26153 current_tune->constant_limit);
26154 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26155 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26156 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26157 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26158 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26159 "prefetch.l1_cache_size:\t%d\n",
26160 current_tune->prefetch.l1_cache_size);
26161 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26162 "prefetch.l1_cache_line_size:\t%d\n",
26163 current_tune->prefetch.l1_cache_line_size);
26164 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26165 "prefer_constant_pool:\t%d\n",
26166 (int) current_tune->prefer_constant_pool);
26167 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26168 "branch_cost:\t(s:speed, p:predictable)\n");
26169 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26170 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26171 current_tune->branch_cost (false, false));
26172 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26173 current_tune->branch_cost (false, true));
26174 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26175 current_tune->branch_cost (true, false));
26176 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26177 current_tune->branch_cost (true, true));
26178 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26179 "prefer_ldrd_strd:\t%d\n",
26180 (int) current_tune->prefer_ldrd_strd);
26181 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26182 "logical_op_non_short_circuit:\t[%d,%d]\n",
26183 (int) current_tune->logical_op_non_short_circuit_thumb,
26184 (int) current_tune->logical_op_non_short_circuit_arm);
26185 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26186 "prefer_neon_for_64bits:\t%d\n",
26187 (int) current_tune->prefer_neon_for_64bits);
26188 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26189 "disparage_flag_setting_t16_encodings:\t%d\n",
26190 (int) current_tune->disparage_flag_setting_t16_encodings);
26191 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26192 "string_ops_prefer_neon:\t%d\n",
26193 (int) current_tune->string_ops_prefer_neon);
26194 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26195 "max_insns_inline_memset:\t%d\n",
26196 current_tune->max_insns_inline_memset);
26197 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26198 current_tune->fusible_ops);
26199 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26200 (int) current_tune->sched_autopref);
26201 }
26202
26203 /* Print .arch and .arch_extension directives corresponding to the
26204 current architecture configuration. */
26205 static void
26206 arm_print_asm_arch_directives ()
26207 {
26208 const arch_option *arch
26209 = arm_parse_arch_option_name (all_architectures, "-march",
26210 arm_active_target.arch_name);
26211 auto_sbitmap opt_bits (isa_num_bits);
26212
26213 gcc_assert (arch);
26214
26215 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26216 if (!arch->common.extensions)
26217 return;
26218
26219 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26220 opt->name != NULL;
26221 opt++)
26222 {
26223 if (!opt->remove)
26224 {
26225 arm_initialize_isa (opt_bits, opt->isa_bits);
26226
26227 /* If every feature bit of this option is set in the target
26228 ISA specification, print out the option name. However,
26229 don't print anything if all the bits are part of the
26230 FPU specification. */
26231 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26232 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26233 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26234 }
26235 }
26236 }
26237
26238 static void
26239 arm_file_start (void)
26240 {
26241 int val;
26242
26243 if (TARGET_BPABI)
26244 {
26245 /* We don't have a specified CPU. Use the architecture to
26246 generate the tags.
26247
26248 Note: it might be better to do this unconditionally, then the
26249 assembler would not need to know about all new CPU names as
26250 they are added. */
26251 if (!arm_active_target.core_name)
26252 {
26253 /* armv7ve doesn't support any extensions. */
26254 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26255 {
26256 /* Keep backward compatability for assemblers
26257 which don't support armv7ve. */
26258 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26259 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26260 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26261 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26262 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26263 }
26264 else
26265 arm_print_asm_arch_directives ();
26266 }
26267 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26268 asm_fprintf (asm_out_file, "\t.arch %s\n",
26269 arm_active_target.core_name + 8);
26270 else
26271 {
26272 const char* truncated_name
26273 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26274 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26275 }
26276
26277 if (print_tune_info)
26278 arm_print_tune_info ();
26279
26280 if (! TARGET_SOFT_FLOAT)
26281 {
26282 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26283 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26284
26285 if (TARGET_HARD_FLOAT_ABI)
26286 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26287 }
26288
26289 /* Some of these attributes only apply when the corresponding features
26290 are used. However we don't have any easy way of figuring this out.
26291 Conservatively record the setting that would have been used. */
26292
26293 if (flag_rounding_math)
26294 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26295
26296 if (!flag_unsafe_math_optimizations)
26297 {
26298 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26299 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26300 }
26301 if (flag_signaling_nans)
26302 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26303
26304 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26305 flag_finite_math_only ? 1 : 3);
26306
26307 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26308 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26309 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26310 flag_short_enums ? 1 : 2);
26311
26312 /* Tag_ABI_optimization_goals. */
26313 if (optimize_size)
26314 val = 4;
26315 else if (optimize >= 2)
26316 val = 2;
26317 else if (optimize)
26318 val = 1;
26319 else
26320 val = 6;
26321 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26322
26323 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26324 unaligned_access);
26325
26326 if (arm_fp16_format)
26327 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26328 (int) arm_fp16_format);
26329
26330 if (arm_lang_output_object_attributes_hook)
26331 arm_lang_output_object_attributes_hook();
26332 }
26333
26334 default_file_start ();
26335 }
26336
26337 static void
26338 arm_file_end (void)
26339 {
26340 int regno;
26341
26342 if (NEED_INDICATE_EXEC_STACK)
26343 /* Add .note.GNU-stack. */
26344 file_end_indicate_exec_stack ();
26345
26346 if (! thumb_call_reg_needed)
26347 return;
26348
26349 switch_to_section (text_section);
26350 asm_fprintf (asm_out_file, "\t.code 16\n");
26351 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26352
26353 for (regno = 0; regno < LR_REGNUM; regno++)
26354 {
26355 rtx label = thumb_call_via_label[regno];
26356
26357 if (label != 0)
26358 {
26359 targetm.asm_out.internal_label (asm_out_file, "L",
26360 CODE_LABEL_NUMBER (label));
26361 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26362 }
26363 }
26364 }
26365
26366 #ifndef ARM_PE
26367 /* Symbols in the text segment can be accessed without indirecting via the
26368 constant pool; it may take an extra binary operation, but this is still
26369 faster than indirecting via memory. Don't do this when not optimizing,
26370 since we won't be calculating al of the offsets necessary to do this
26371 simplification. */
26372
26373 static void
26374 arm_encode_section_info (tree decl, rtx rtl, int first)
26375 {
26376 if (optimize > 0 && TREE_CONSTANT (decl))
26377 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26378
26379 default_encode_section_info (decl, rtl, first);
26380 }
26381 #endif /* !ARM_PE */
26382
26383 static void
26384 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26385 {
26386 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26387 && !strcmp (prefix, "L"))
26388 {
26389 arm_ccfsm_state = 0;
26390 arm_target_insn = NULL;
26391 }
26392 default_internal_label (stream, prefix, labelno);
26393 }
26394
26395 /* Output code to add DELTA to the first argument, and then jump
26396 to FUNCTION. Used for C++ multiple inheritance. */
26397
26398 static void
26399 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26400 HOST_WIDE_INT, tree function)
26401 {
26402 static int thunk_label = 0;
26403 char label[256];
26404 char labelpc[256];
26405 int mi_delta = delta;
26406 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26407 int shift = 0;
26408 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26409 ? 1 : 0);
26410 if (mi_delta < 0)
26411 mi_delta = - mi_delta;
26412
26413 final_start_function (emit_barrier (), file, 1);
26414
26415 if (TARGET_THUMB1)
26416 {
26417 int labelno = thunk_label++;
26418 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26419 /* Thunks are entered in arm mode when available. */
26420 if (TARGET_THUMB1_ONLY)
26421 {
26422 /* push r3 so we can use it as a temporary. */
26423 /* TODO: Omit this save if r3 is not used. */
26424 fputs ("\tpush {r3}\n", file);
26425 fputs ("\tldr\tr3, ", file);
26426 }
26427 else
26428 {
26429 fputs ("\tldr\tr12, ", file);
26430 }
26431 assemble_name (file, label);
26432 fputc ('\n', file);
26433 if (flag_pic)
26434 {
26435 /* If we are generating PIC, the ldr instruction below loads
26436 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26437 the address of the add + 8, so we have:
26438
26439 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26440 = target + 1.
26441
26442 Note that we have "+ 1" because some versions of GNU ld
26443 don't set the low bit of the result for R_ARM_REL32
26444 relocations against thumb function symbols.
26445 On ARMv6M this is +4, not +8. */
26446 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26447 assemble_name (file, labelpc);
26448 fputs (":\n", file);
26449 if (TARGET_THUMB1_ONLY)
26450 {
26451 /* This is 2 insns after the start of the thunk, so we know it
26452 is 4-byte aligned. */
26453 fputs ("\tadd\tr3, pc, r3\n", file);
26454 fputs ("\tmov r12, r3\n", file);
26455 }
26456 else
26457 fputs ("\tadd\tr12, pc, r12\n", file);
26458 }
26459 else if (TARGET_THUMB1_ONLY)
26460 fputs ("\tmov r12, r3\n", file);
26461 }
26462 if (TARGET_THUMB1_ONLY)
26463 {
26464 if (mi_delta > 255)
26465 {
26466 fputs ("\tldr\tr3, ", file);
26467 assemble_name (file, label);
26468 fputs ("+4\n", file);
26469 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26470 mi_op, this_regno, this_regno);
26471 }
26472 else if (mi_delta != 0)
26473 {
26474 /* Thumb1 unified syntax requires s suffix in instruction name when
26475 one of the operands is immediate. */
26476 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26477 mi_op, this_regno, this_regno,
26478 mi_delta);
26479 }
26480 }
26481 else
26482 {
26483 /* TODO: Use movw/movt for large constants when available. */
26484 while (mi_delta != 0)
26485 {
26486 if ((mi_delta & (3 << shift)) == 0)
26487 shift += 2;
26488 else
26489 {
26490 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26491 mi_op, this_regno, this_regno,
26492 mi_delta & (0xff << shift));
26493 mi_delta &= ~(0xff << shift);
26494 shift += 8;
26495 }
26496 }
26497 }
26498 if (TARGET_THUMB1)
26499 {
26500 if (TARGET_THUMB1_ONLY)
26501 fputs ("\tpop\t{r3}\n", file);
26502
26503 fprintf (file, "\tbx\tr12\n");
26504 ASM_OUTPUT_ALIGN (file, 2);
26505 assemble_name (file, label);
26506 fputs (":\n", file);
26507 if (flag_pic)
26508 {
26509 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26510 rtx tem = XEXP (DECL_RTL (function), 0);
26511 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26512 pipeline offset is four rather than eight. Adjust the offset
26513 accordingly. */
26514 tem = plus_constant (GET_MODE (tem), tem,
26515 TARGET_THUMB1_ONLY ? -3 : -7);
26516 tem = gen_rtx_MINUS (GET_MODE (tem),
26517 tem,
26518 gen_rtx_SYMBOL_REF (Pmode,
26519 ggc_strdup (labelpc)));
26520 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26521 }
26522 else
26523 /* Output ".word .LTHUNKn". */
26524 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26525
26526 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26527 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26528 }
26529 else
26530 {
26531 fputs ("\tb\t", file);
26532 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26533 if (NEED_PLT_RELOC)
26534 fputs ("(PLT)", file);
26535 fputc ('\n', file);
26536 }
26537
26538 final_end_function ();
26539 }
26540
26541 /* MI thunk handling for TARGET_32BIT. */
26542
26543 static void
26544 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26545 HOST_WIDE_INT vcall_offset, tree function)
26546 {
26547 /* On ARM, this_regno is R0 or R1 depending on
26548 whether the function returns an aggregate or not.
26549 */
26550 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26551 function)
26552 ? R1_REGNUM : R0_REGNUM);
26553
26554 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26555 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26556 reload_completed = 1;
26557 emit_note (NOTE_INSN_PROLOGUE_END);
26558
26559 /* Add DELTA to THIS_RTX. */
26560 if (delta != 0)
26561 arm_split_constant (PLUS, Pmode, NULL_RTX,
26562 delta, this_rtx, this_rtx, false);
26563
26564 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26565 if (vcall_offset != 0)
26566 {
26567 /* Load *THIS_RTX. */
26568 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26569 /* Compute *THIS_RTX + VCALL_OFFSET. */
26570 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26571 false);
26572 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26573 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26574 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26575 }
26576
26577 /* Generate a tail call to the target function. */
26578 if (!TREE_USED (function))
26579 {
26580 assemble_external (function);
26581 TREE_USED (function) = 1;
26582 }
26583 rtx funexp = XEXP (DECL_RTL (function), 0);
26584 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26585 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26586 SIBLING_CALL_P (insn) = 1;
26587
26588 insn = get_insns ();
26589 shorten_branches (insn);
26590 final_start_function (insn, file, 1);
26591 final (insn, file, 1);
26592 final_end_function ();
26593
26594 /* Stop pretending this is a post-reload pass. */
26595 reload_completed = 0;
26596 }
26597
26598 /* Output code to add DELTA to the first argument, and then jump
26599 to FUNCTION. Used for C++ multiple inheritance. */
26600
26601 static void
26602 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26603 HOST_WIDE_INT vcall_offset, tree function)
26604 {
26605 if (TARGET_32BIT)
26606 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26607 else
26608 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26609 }
26610
26611 int
26612 arm_emit_vector_const (FILE *file, rtx x)
26613 {
26614 int i;
26615 const char * pattern;
26616
26617 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26618
26619 switch (GET_MODE (x))
26620 {
26621 case E_V2SImode: pattern = "%08x"; break;
26622 case E_V4HImode: pattern = "%04x"; break;
26623 case E_V8QImode: pattern = "%02x"; break;
26624 default: gcc_unreachable ();
26625 }
26626
26627 fprintf (file, "0x");
26628 for (i = CONST_VECTOR_NUNITS (x); i--;)
26629 {
26630 rtx element;
26631
26632 element = CONST_VECTOR_ELT (x, i);
26633 fprintf (file, pattern, INTVAL (element));
26634 }
26635
26636 return 1;
26637 }
26638
26639 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26640 HFmode constant pool entries are actually loaded with ldr. */
26641 void
26642 arm_emit_fp16_const (rtx c)
26643 {
26644 long bits;
26645
26646 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26647 if (WORDS_BIG_ENDIAN)
26648 assemble_zeros (2);
26649 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26650 if (!WORDS_BIG_ENDIAN)
26651 assemble_zeros (2);
26652 }
26653
26654 const char *
26655 arm_output_load_gr (rtx *operands)
26656 {
26657 rtx reg;
26658 rtx offset;
26659 rtx wcgr;
26660 rtx sum;
26661
26662 if (!MEM_P (operands [1])
26663 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26664 || !REG_P (reg = XEXP (sum, 0))
26665 || !CONST_INT_P (offset = XEXP (sum, 1))
26666 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26667 return "wldrw%?\t%0, %1";
26668
26669 /* Fix up an out-of-range load of a GR register. */
26670 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26671 wcgr = operands[0];
26672 operands[0] = reg;
26673 output_asm_insn ("ldr%?\t%0, %1", operands);
26674
26675 operands[0] = wcgr;
26676 operands[1] = reg;
26677 output_asm_insn ("tmcr%?\t%0, %1", operands);
26678 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26679
26680 return "";
26681 }
26682
26683 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26684
26685 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26686 named arg and all anonymous args onto the stack.
26687 XXX I know the prologue shouldn't be pushing registers, but it is faster
26688 that way. */
26689
26690 static void
26691 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26692 machine_mode mode,
26693 tree type,
26694 int *pretend_size,
26695 int second_time ATTRIBUTE_UNUSED)
26696 {
26697 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26698 int nregs;
26699
26700 cfun->machine->uses_anonymous_args = 1;
26701 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26702 {
26703 nregs = pcum->aapcs_ncrn;
26704 if (nregs & 1)
26705 {
26706 int res = arm_needs_doubleword_align (mode, type);
26707 if (res < 0 && warn_psabi)
26708 inform (input_location, "parameter passing for argument of "
26709 "type %qT changed in GCC 7.1", type);
26710 else if (res > 0)
26711 nregs++;
26712 }
26713 }
26714 else
26715 nregs = pcum->nregs;
26716
26717 if (nregs < NUM_ARG_REGS)
26718 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26719 }
26720
26721 /* We can't rely on the caller doing the proper promotion when
26722 using APCS or ATPCS. */
26723
26724 static bool
26725 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26726 {
26727 return !TARGET_AAPCS_BASED;
26728 }
26729
26730 static machine_mode
26731 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26732 machine_mode mode,
26733 int *punsignedp ATTRIBUTE_UNUSED,
26734 const_tree fntype ATTRIBUTE_UNUSED,
26735 int for_return ATTRIBUTE_UNUSED)
26736 {
26737 if (GET_MODE_CLASS (mode) == MODE_INT
26738 && GET_MODE_SIZE (mode) < 4)
26739 return SImode;
26740
26741 return mode;
26742 }
26743
26744
26745 static bool
26746 arm_default_short_enums (void)
26747 {
26748 return ARM_DEFAULT_SHORT_ENUMS;
26749 }
26750
26751
26752 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26753
26754 static bool
26755 arm_align_anon_bitfield (void)
26756 {
26757 return TARGET_AAPCS_BASED;
26758 }
26759
26760
26761 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26762
26763 static tree
26764 arm_cxx_guard_type (void)
26765 {
26766 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26767 }
26768
26769
26770 /* The EABI says test the least significant bit of a guard variable. */
26771
26772 static bool
26773 arm_cxx_guard_mask_bit (void)
26774 {
26775 return TARGET_AAPCS_BASED;
26776 }
26777
26778
26779 /* The EABI specifies that all array cookies are 8 bytes long. */
26780
26781 static tree
26782 arm_get_cookie_size (tree type)
26783 {
26784 tree size;
26785
26786 if (!TARGET_AAPCS_BASED)
26787 return default_cxx_get_cookie_size (type);
26788
26789 size = build_int_cst (sizetype, 8);
26790 return size;
26791 }
26792
26793
26794 /* The EABI says that array cookies should also contain the element size. */
26795
26796 static bool
26797 arm_cookie_has_size (void)
26798 {
26799 return TARGET_AAPCS_BASED;
26800 }
26801
26802
26803 /* The EABI says constructors and destructors should return a pointer to
26804 the object constructed/destroyed. */
26805
26806 static bool
26807 arm_cxx_cdtor_returns_this (void)
26808 {
26809 return TARGET_AAPCS_BASED;
26810 }
26811
26812 /* The EABI says that an inline function may never be the key
26813 method. */
26814
26815 static bool
26816 arm_cxx_key_method_may_be_inline (void)
26817 {
26818 return !TARGET_AAPCS_BASED;
26819 }
26820
26821 static void
26822 arm_cxx_determine_class_data_visibility (tree decl)
26823 {
26824 if (!TARGET_AAPCS_BASED
26825 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26826 return;
26827
26828 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26829 is exported. However, on systems without dynamic vague linkage,
26830 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26831 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26832 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26833 else
26834 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26835 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26836 }
26837
26838 static bool
26839 arm_cxx_class_data_always_comdat (void)
26840 {
26841 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26842 vague linkage if the class has no key function. */
26843 return !TARGET_AAPCS_BASED;
26844 }
26845
26846
26847 /* The EABI says __aeabi_atexit should be used to register static
26848 destructors. */
26849
26850 static bool
26851 arm_cxx_use_aeabi_atexit (void)
26852 {
26853 return TARGET_AAPCS_BASED;
26854 }
26855
26856
26857 void
26858 arm_set_return_address (rtx source, rtx scratch)
26859 {
26860 arm_stack_offsets *offsets;
26861 HOST_WIDE_INT delta;
26862 rtx addr;
26863 unsigned long saved_regs;
26864
26865 offsets = arm_get_frame_offsets ();
26866 saved_regs = offsets->saved_regs_mask;
26867
26868 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26869 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26870 else
26871 {
26872 if (frame_pointer_needed)
26873 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26874 else
26875 {
26876 /* LR will be the first saved register. */
26877 delta = offsets->outgoing_args - (offsets->frame + 4);
26878
26879
26880 if (delta >= 4096)
26881 {
26882 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26883 GEN_INT (delta & ~4095)));
26884 addr = scratch;
26885 delta &= 4095;
26886 }
26887 else
26888 addr = stack_pointer_rtx;
26889
26890 addr = plus_constant (Pmode, addr, delta);
26891 }
26892 /* The store needs to be marked as frame related in order to prevent
26893 DSE from deleting it as dead if it is based on fp. */
26894 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26895 RTX_FRAME_RELATED_P (insn) = 1;
26896 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26897 }
26898 }
26899
26900
26901 void
26902 thumb_set_return_address (rtx source, rtx scratch)
26903 {
26904 arm_stack_offsets *offsets;
26905 HOST_WIDE_INT delta;
26906 HOST_WIDE_INT limit;
26907 int reg;
26908 rtx addr;
26909 unsigned long mask;
26910
26911 emit_use (source);
26912
26913 offsets = arm_get_frame_offsets ();
26914 mask = offsets->saved_regs_mask;
26915 if (mask & (1 << LR_REGNUM))
26916 {
26917 limit = 1024;
26918 /* Find the saved regs. */
26919 if (frame_pointer_needed)
26920 {
26921 delta = offsets->soft_frame - offsets->saved_args;
26922 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26923 if (TARGET_THUMB1)
26924 limit = 128;
26925 }
26926 else
26927 {
26928 delta = offsets->outgoing_args - offsets->saved_args;
26929 reg = SP_REGNUM;
26930 }
26931 /* Allow for the stack frame. */
26932 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26933 delta -= 16;
26934 /* The link register is always the first saved register. */
26935 delta -= 4;
26936
26937 /* Construct the address. */
26938 addr = gen_rtx_REG (SImode, reg);
26939 if (delta > limit)
26940 {
26941 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26942 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26943 addr = scratch;
26944 }
26945 else
26946 addr = plus_constant (Pmode, addr, delta);
26947
26948 /* The store needs to be marked as frame related in order to prevent
26949 DSE from deleting it as dead if it is based on fp. */
26950 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26951 RTX_FRAME_RELATED_P (insn) = 1;
26952 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26953 }
26954 else
26955 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26956 }
26957
26958 /* Implements target hook vector_mode_supported_p. */
26959 bool
26960 arm_vector_mode_supported_p (machine_mode mode)
26961 {
26962 /* Neon also supports V2SImode, etc. listed in the clause below. */
26963 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26964 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26965 || mode == V2DImode || mode == V8HFmode))
26966 return true;
26967
26968 if ((TARGET_NEON || TARGET_IWMMXT)
26969 && ((mode == V2SImode)
26970 || (mode == V4HImode)
26971 || (mode == V8QImode)))
26972 return true;
26973
26974 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26975 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26976 || mode == V2HAmode))
26977 return true;
26978
26979 return false;
26980 }
26981
26982 /* Implements target hook array_mode_supported_p. */
26983
26984 static bool
26985 arm_array_mode_supported_p (machine_mode mode,
26986 unsigned HOST_WIDE_INT nelems)
26987 {
26988 if (TARGET_NEON
26989 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26990 && (nelems >= 2 && nelems <= 4))
26991 return true;
26992
26993 return false;
26994 }
26995
26996 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26997 registers when autovectorizing for Neon, at least until multiple vector
26998 widths are supported properly by the middle-end. */
26999
27000 static machine_mode
27001 arm_preferred_simd_mode (scalar_mode mode)
27002 {
27003 if (TARGET_NEON)
27004 switch (mode)
27005 {
27006 case E_SFmode:
27007 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27008 case E_SImode:
27009 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27010 case E_HImode:
27011 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27012 case E_QImode:
27013 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27014 case E_DImode:
27015 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27016 return V2DImode;
27017 break;
27018
27019 default:;
27020 }
27021
27022 if (TARGET_REALLY_IWMMXT)
27023 switch (mode)
27024 {
27025 case E_SImode:
27026 return V2SImode;
27027 case E_HImode:
27028 return V4HImode;
27029 case E_QImode:
27030 return V8QImode;
27031
27032 default:;
27033 }
27034
27035 return word_mode;
27036 }
27037
27038 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27039
27040 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27041 using r0-r4 for function arguments, r7 for the stack frame and don't have
27042 enough left over to do doubleword arithmetic. For Thumb-2 all the
27043 potentially problematic instructions accept high registers so this is not
27044 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27045 that require many low registers. */
27046 static bool
27047 arm_class_likely_spilled_p (reg_class_t rclass)
27048 {
27049 if ((TARGET_THUMB1 && rclass == LO_REGS)
27050 || rclass == CC_REG)
27051 return true;
27052
27053 return false;
27054 }
27055
27056 /* Implements target hook small_register_classes_for_mode_p. */
27057 bool
27058 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27059 {
27060 return TARGET_THUMB1;
27061 }
27062
27063 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27064 ARM insns and therefore guarantee that the shift count is modulo 256.
27065 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27066 guarantee no particular behavior for out-of-range counts. */
27067
27068 static unsigned HOST_WIDE_INT
27069 arm_shift_truncation_mask (machine_mode mode)
27070 {
27071 return mode == SImode ? 255 : 0;
27072 }
27073
27074
27075 /* Map internal gcc register numbers to DWARF2 register numbers. */
27076
27077 unsigned int
27078 arm_dbx_register_number (unsigned int regno)
27079 {
27080 if (regno < 16)
27081 return regno;
27082
27083 if (IS_VFP_REGNUM (regno))
27084 {
27085 /* See comment in arm_dwarf_register_span. */
27086 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27087 return 64 + regno - FIRST_VFP_REGNUM;
27088 else
27089 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27090 }
27091
27092 if (IS_IWMMXT_GR_REGNUM (regno))
27093 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27094
27095 if (IS_IWMMXT_REGNUM (regno))
27096 return 112 + regno - FIRST_IWMMXT_REGNUM;
27097
27098 return DWARF_FRAME_REGISTERS;
27099 }
27100
27101 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27102 GCC models tham as 64 32-bit registers, so we need to describe this to
27103 the DWARF generation code. Other registers can use the default. */
27104 static rtx
27105 arm_dwarf_register_span (rtx rtl)
27106 {
27107 machine_mode mode;
27108 unsigned regno;
27109 rtx parts[16];
27110 int nregs;
27111 int i;
27112
27113 regno = REGNO (rtl);
27114 if (!IS_VFP_REGNUM (regno))
27115 return NULL_RTX;
27116
27117 /* XXX FIXME: The EABI defines two VFP register ranges:
27118 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27119 256-287: D0-D31
27120 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27121 corresponding D register. Until GDB supports this, we shall use the
27122 legacy encodings. We also use these encodings for D0-D15 for
27123 compatibility with older debuggers. */
27124 mode = GET_MODE (rtl);
27125 if (GET_MODE_SIZE (mode) < 8)
27126 return NULL_RTX;
27127
27128 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27129 {
27130 nregs = GET_MODE_SIZE (mode) / 4;
27131 for (i = 0; i < nregs; i += 2)
27132 if (TARGET_BIG_END)
27133 {
27134 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27135 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27136 }
27137 else
27138 {
27139 parts[i] = gen_rtx_REG (SImode, regno + i);
27140 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27141 }
27142 }
27143 else
27144 {
27145 nregs = GET_MODE_SIZE (mode) / 8;
27146 for (i = 0; i < nregs; i++)
27147 parts[i] = gen_rtx_REG (DImode, regno + i);
27148 }
27149
27150 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27151 }
27152
27153 #if ARM_UNWIND_INFO
27154 /* Emit unwind directives for a store-multiple instruction or stack pointer
27155 push during alignment.
27156 These should only ever be generated by the function prologue code, so
27157 expect them to have a particular form.
27158 The store-multiple instruction sometimes pushes pc as the last register,
27159 although it should not be tracked into unwind information, or for -Os
27160 sometimes pushes some dummy registers before first register that needs
27161 to be tracked in unwind information; such dummy registers are there just
27162 to avoid separate stack adjustment, and will not be restored in the
27163 epilogue. */
27164
27165 static void
27166 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27167 {
27168 int i;
27169 HOST_WIDE_INT offset;
27170 HOST_WIDE_INT nregs;
27171 int reg_size;
27172 unsigned reg;
27173 unsigned lastreg;
27174 unsigned padfirst = 0, padlast = 0;
27175 rtx e;
27176
27177 e = XVECEXP (p, 0, 0);
27178 gcc_assert (GET_CODE (e) == SET);
27179
27180 /* First insn will adjust the stack pointer. */
27181 gcc_assert (GET_CODE (e) == SET
27182 && REG_P (SET_DEST (e))
27183 && REGNO (SET_DEST (e)) == SP_REGNUM
27184 && GET_CODE (SET_SRC (e)) == PLUS);
27185
27186 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27187 nregs = XVECLEN (p, 0) - 1;
27188 gcc_assert (nregs);
27189
27190 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27191 if (reg < 16)
27192 {
27193 /* For -Os dummy registers can be pushed at the beginning to
27194 avoid separate stack pointer adjustment. */
27195 e = XVECEXP (p, 0, 1);
27196 e = XEXP (SET_DEST (e), 0);
27197 if (GET_CODE (e) == PLUS)
27198 padfirst = INTVAL (XEXP (e, 1));
27199 gcc_assert (padfirst == 0 || optimize_size);
27200 /* The function prologue may also push pc, but not annotate it as it is
27201 never restored. We turn this into a stack pointer adjustment. */
27202 e = XVECEXP (p, 0, nregs);
27203 e = XEXP (SET_DEST (e), 0);
27204 if (GET_CODE (e) == PLUS)
27205 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27206 else
27207 padlast = offset - 4;
27208 gcc_assert (padlast == 0 || padlast == 4);
27209 if (padlast == 4)
27210 fprintf (asm_out_file, "\t.pad #4\n");
27211 reg_size = 4;
27212 fprintf (asm_out_file, "\t.save {");
27213 }
27214 else if (IS_VFP_REGNUM (reg))
27215 {
27216 reg_size = 8;
27217 fprintf (asm_out_file, "\t.vsave {");
27218 }
27219 else
27220 /* Unknown register type. */
27221 gcc_unreachable ();
27222
27223 /* If the stack increment doesn't match the size of the saved registers,
27224 something has gone horribly wrong. */
27225 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27226
27227 offset = padfirst;
27228 lastreg = 0;
27229 /* The remaining insns will describe the stores. */
27230 for (i = 1; i <= nregs; i++)
27231 {
27232 /* Expect (set (mem <addr>) (reg)).
27233 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27234 e = XVECEXP (p, 0, i);
27235 gcc_assert (GET_CODE (e) == SET
27236 && MEM_P (SET_DEST (e))
27237 && REG_P (SET_SRC (e)));
27238
27239 reg = REGNO (SET_SRC (e));
27240 gcc_assert (reg >= lastreg);
27241
27242 if (i != 1)
27243 fprintf (asm_out_file, ", ");
27244 /* We can't use %r for vfp because we need to use the
27245 double precision register names. */
27246 if (IS_VFP_REGNUM (reg))
27247 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27248 else
27249 asm_fprintf (asm_out_file, "%r", reg);
27250
27251 if (flag_checking)
27252 {
27253 /* Check that the addresses are consecutive. */
27254 e = XEXP (SET_DEST (e), 0);
27255 if (GET_CODE (e) == PLUS)
27256 gcc_assert (REG_P (XEXP (e, 0))
27257 && REGNO (XEXP (e, 0)) == SP_REGNUM
27258 && CONST_INT_P (XEXP (e, 1))
27259 && offset == INTVAL (XEXP (e, 1)));
27260 else
27261 gcc_assert (i == 1
27262 && REG_P (e)
27263 && REGNO (e) == SP_REGNUM);
27264 offset += reg_size;
27265 }
27266 }
27267 fprintf (asm_out_file, "}\n");
27268 if (padfirst)
27269 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27270 }
27271
27272 /* Emit unwind directives for a SET. */
27273
27274 static void
27275 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27276 {
27277 rtx e0;
27278 rtx e1;
27279 unsigned reg;
27280
27281 e0 = XEXP (p, 0);
27282 e1 = XEXP (p, 1);
27283 switch (GET_CODE (e0))
27284 {
27285 case MEM:
27286 /* Pushing a single register. */
27287 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27288 || !REG_P (XEXP (XEXP (e0, 0), 0))
27289 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27290 abort ();
27291
27292 asm_fprintf (asm_out_file, "\t.save ");
27293 if (IS_VFP_REGNUM (REGNO (e1)))
27294 asm_fprintf(asm_out_file, "{d%d}\n",
27295 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27296 else
27297 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27298 break;
27299
27300 case REG:
27301 if (REGNO (e0) == SP_REGNUM)
27302 {
27303 /* A stack increment. */
27304 if (GET_CODE (e1) != PLUS
27305 || !REG_P (XEXP (e1, 0))
27306 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27307 || !CONST_INT_P (XEXP (e1, 1)))
27308 abort ();
27309
27310 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27311 -INTVAL (XEXP (e1, 1)));
27312 }
27313 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27314 {
27315 HOST_WIDE_INT offset;
27316
27317 if (GET_CODE (e1) == PLUS)
27318 {
27319 if (!REG_P (XEXP (e1, 0))
27320 || !CONST_INT_P (XEXP (e1, 1)))
27321 abort ();
27322 reg = REGNO (XEXP (e1, 0));
27323 offset = INTVAL (XEXP (e1, 1));
27324 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27325 HARD_FRAME_POINTER_REGNUM, reg,
27326 offset);
27327 }
27328 else if (REG_P (e1))
27329 {
27330 reg = REGNO (e1);
27331 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27332 HARD_FRAME_POINTER_REGNUM, reg);
27333 }
27334 else
27335 abort ();
27336 }
27337 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27338 {
27339 /* Move from sp to reg. */
27340 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27341 }
27342 else if (GET_CODE (e1) == PLUS
27343 && REG_P (XEXP (e1, 0))
27344 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27345 && CONST_INT_P (XEXP (e1, 1)))
27346 {
27347 /* Set reg to offset from sp. */
27348 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27349 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27350 }
27351 else
27352 abort ();
27353 break;
27354
27355 default:
27356 abort ();
27357 }
27358 }
27359
27360
27361 /* Emit unwind directives for the given insn. */
27362
27363 static void
27364 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27365 {
27366 rtx note, pat;
27367 bool handled_one = false;
27368
27369 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27370 return;
27371
27372 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27373 && (TREE_NOTHROW (current_function_decl)
27374 || crtl->all_throwers_are_sibcalls))
27375 return;
27376
27377 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27378 return;
27379
27380 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27381 {
27382 switch (REG_NOTE_KIND (note))
27383 {
27384 case REG_FRAME_RELATED_EXPR:
27385 pat = XEXP (note, 0);
27386 goto found;
27387
27388 case REG_CFA_REGISTER:
27389 pat = XEXP (note, 0);
27390 if (pat == NULL)
27391 {
27392 pat = PATTERN (insn);
27393 if (GET_CODE (pat) == PARALLEL)
27394 pat = XVECEXP (pat, 0, 0);
27395 }
27396
27397 /* Only emitted for IS_STACKALIGN re-alignment. */
27398 {
27399 rtx dest, src;
27400 unsigned reg;
27401
27402 src = SET_SRC (pat);
27403 dest = SET_DEST (pat);
27404
27405 gcc_assert (src == stack_pointer_rtx);
27406 reg = REGNO (dest);
27407 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27408 reg + 0x90, reg);
27409 }
27410 handled_one = true;
27411 break;
27412
27413 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27414 to get correct dwarf information for shrink-wrap. We should not
27415 emit unwind information for it because these are used either for
27416 pretend arguments or notes to adjust sp and restore registers from
27417 stack. */
27418 case REG_CFA_DEF_CFA:
27419 case REG_CFA_ADJUST_CFA:
27420 case REG_CFA_RESTORE:
27421 return;
27422
27423 case REG_CFA_EXPRESSION:
27424 case REG_CFA_OFFSET:
27425 /* ??? Only handling here what we actually emit. */
27426 gcc_unreachable ();
27427
27428 default:
27429 break;
27430 }
27431 }
27432 if (handled_one)
27433 return;
27434 pat = PATTERN (insn);
27435 found:
27436
27437 switch (GET_CODE (pat))
27438 {
27439 case SET:
27440 arm_unwind_emit_set (asm_out_file, pat);
27441 break;
27442
27443 case SEQUENCE:
27444 /* Store multiple. */
27445 arm_unwind_emit_sequence (asm_out_file, pat);
27446 break;
27447
27448 default:
27449 abort();
27450 }
27451 }
27452
27453
27454 /* Output a reference from a function exception table to the type_info
27455 object X. The EABI specifies that the symbol should be relocated by
27456 an R_ARM_TARGET2 relocation. */
27457
27458 static bool
27459 arm_output_ttype (rtx x)
27460 {
27461 fputs ("\t.word\t", asm_out_file);
27462 output_addr_const (asm_out_file, x);
27463 /* Use special relocations for symbol references. */
27464 if (!CONST_INT_P (x))
27465 fputs ("(TARGET2)", asm_out_file);
27466 fputc ('\n', asm_out_file);
27467
27468 return TRUE;
27469 }
27470
27471 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27472
27473 static void
27474 arm_asm_emit_except_personality (rtx personality)
27475 {
27476 fputs ("\t.personality\t", asm_out_file);
27477 output_addr_const (asm_out_file, personality);
27478 fputc ('\n', asm_out_file);
27479 }
27480 #endif /* ARM_UNWIND_INFO */
27481
27482 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27483
27484 static void
27485 arm_asm_init_sections (void)
27486 {
27487 #if ARM_UNWIND_INFO
27488 exception_section = get_unnamed_section (0, output_section_asm_op,
27489 "\t.handlerdata");
27490 #endif /* ARM_UNWIND_INFO */
27491
27492 #ifdef OBJECT_FORMAT_ELF
27493 if (target_pure_code)
27494 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27495 #endif
27496 }
27497
27498 /* Output unwind directives for the start/end of a function. */
27499
27500 void
27501 arm_output_fn_unwind (FILE * f, bool prologue)
27502 {
27503 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27504 return;
27505
27506 if (prologue)
27507 fputs ("\t.fnstart\n", f);
27508 else
27509 {
27510 /* If this function will never be unwound, then mark it as such.
27511 The came condition is used in arm_unwind_emit to suppress
27512 the frame annotations. */
27513 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27514 && (TREE_NOTHROW (current_function_decl)
27515 || crtl->all_throwers_are_sibcalls))
27516 fputs("\t.cantunwind\n", f);
27517
27518 fputs ("\t.fnend\n", f);
27519 }
27520 }
27521
27522 static bool
27523 arm_emit_tls_decoration (FILE *fp, rtx x)
27524 {
27525 enum tls_reloc reloc;
27526 rtx val;
27527
27528 val = XVECEXP (x, 0, 0);
27529 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27530
27531 output_addr_const (fp, val);
27532
27533 switch (reloc)
27534 {
27535 case TLS_GD32:
27536 fputs ("(tlsgd)", fp);
27537 break;
27538 case TLS_LDM32:
27539 fputs ("(tlsldm)", fp);
27540 break;
27541 case TLS_LDO32:
27542 fputs ("(tlsldo)", fp);
27543 break;
27544 case TLS_IE32:
27545 fputs ("(gottpoff)", fp);
27546 break;
27547 case TLS_LE32:
27548 fputs ("(tpoff)", fp);
27549 break;
27550 case TLS_DESCSEQ:
27551 fputs ("(tlsdesc)", fp);
27552 break;
27553 default:
27554 gcc_unreachable ();
27555 }
27556
27557 switch (reloc)
27558 {
27559 case TLS_GD32:
27560 case TLS_LDM32:
27561 case TLS_IE32:
27562 case TLS_DESCSEQ:
27563 fputs (" + (. - ", fp);
27564 output_addr_const (fp, XVECEXP (x, 0, 2));
27565 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27566 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27567 output_addr_const (fp, XVECEXP (x, 0, 3));
27568 fputc (')', fp);
27569 break;
27570 default:
27571 break;
27572 }
27573
27574 return TRUE;
27575 }
27576
27577 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27578
27579 static void
27580 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27581 {
27582 gcc_assert (size == 4);
27583 fputs ("\t.word\t", file);
27584 output_addr_const (file, x);
27585 fputs ("(tlsldo)", file);
27586 }
27587
27588 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27589
27590 static bool
27591 arm_output_addr_const_extra (FILE *fp, rtx x)
27592 {
27593 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27594 return arm_emit_tls_decoration (fp, x);
27595 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27596 {
27597 char label[256];
27598 int labelno = INTVAL (XVECEXP (x, 0, 0));
27599
27600 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27601 assemble_name_raw (fp, label);
27602
27603 return TRUE;
27604 }
27605 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27606 {
27607 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27608 if (GOT_PCREL)
27609 fputs ("+.", fp);
27610 fputs ("-(", fp);
27611 output_addr_const (fp, XVECEXP (x, 0, 0));
27612 fputc (')', fp);
27613 return TRUE;
27614 }
27615 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27616 {
27617 output_addr_const (fp, XVECEXP (x, 0, 0));
27618 if (GOT_PCREL)
27619 fputs ("+.", fp);
27620 fputs ("-(", fp);
27621 output_addr_const (fp, XVECEXP (x, 0, 1));
27622 fputc (')', fp);
27623 return TRUE;
27624 }
27625 else if (GET_CODE (x) == CONST_VECTOR)
27626 return arm_emit_vector_const (fp, x);
27627
27628 return FALSE;
27629 }
27630
27631 /* Output assembly for a shift instruction.
27632 SET_FLAGS determines how the instruction modifies the condition codes.
27633 0 - Do not set condition codes.
27634 1 - Set condition codes.
27635 2 - Use smallest instruction. */
27636 const char *
27637 arm_output_shift(rtx * operands, int set_flags)
27638 {
27639 char pattern[100];
27640 static const char flag_chars[3] = {'?', '.', '!'};
27641 const char *shift;
27642 HOST_WIDE_INT val;
27643 char c;
27644
27645 c = flag_chars[set_flags];
27646 shift = shift_op(operands[3], &val);
27647 if (shift)
27648 {
27649 if (val != -1)
27650 operands[2] = GEN_INT(val);
27651 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27652 }
27653 else
27654 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27655
27656 output_asm_insn (pattern, operands);
27657 return "";
27658 }
27659
27660 /* Output assembly for a WMMX immediate shift instruction. */
27661 const char *
27662 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27663 {
27664 int shift = INTVAL (operands[2]);
27665 char templ[50];
27666 machine_mode opmode = GET_MODE (operands[0]);
27667
27668 gcc_assert (shift >= 0);
27669
27670 /* If the shift value in the register versions is > 63 (for D qualifier),
27671 31 (for W qualifier) or 15 (for H qualifier). */
27672 if (((opmode == V4HImode) && (shift > 15))
27673 || ((opmode == V2SImode) && (shift > 31))
27674 || ((opmode == DImode) && (shift > 63)))
27675 {
27676 if (wror_or_wsra)
27677 {
27678 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27679 output_asm_insn (templ, operands);
27680 if (opmode == DImode)
27681 {
27682 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27683 output_asm_insn (templ, operands);
27684 }
27685 }
27686 else
27687 {
27688 /* The destination register will contain all zeros. */
27689 sprintf (templ, "wzero\t%%0");
27690 output_asm_insn (templ, operands);
27691 }
27692 return "";
27693 }
27694
27695 if ((opmode == DImode) && (shift > 32))
27696 {
27697 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27698 output_asm_insn (templ, operands);
27699 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27700 output_asm_insn (templ, operands);
27701 }
27702 else
27703 {
27704 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27705 output_asm_insn (templ, operands);
27706 }
27707 return "";
27708 }
27709
27710 /* Output assembly for a WMMX tinsr instruction. */
27711 const char *
27712 arm_output_iwmmxt_tinsr (rtx *operands)
27713 {
27714 int mask = INTVAL (operands[3]);
27715 int i;
27716 char templ[50];
27717 int units = mode_nunits[GET_MODE (operands[0])];
27718 gcc_assert ((mask & (mask - 1)) == 0);
27719 for (i = 0; i < units; ++i)
27720 {
27721 if ((mask & 0x01) == 1)
27722 {
27723 break;
27724 }
27725 mask >>= 1;
27726 }
27727 gcc_assert (i < units);
27728 {
27729 switch (GET_MODE (operands[0]))
27730 {
27731 case E_V8QImode:
27732 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27733 break;
27734 case E_V4HImode:
27735 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27736 break;
27737 case E_V2SImode:
27738 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27739 break;
27740 default:
27741 gcc_unreachable ();
27742 break;
27743 }
27744 output_asm_insn (templ, operands);
27745 }
27746 return "";
27747 }
27748
27749 /* Output a Thumb-1 casesi dispatch sequence. */
27750 const char *
27751 thumb1_output_casesi (rtx *operands)
27752 {
27753 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27754
27755 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27756
27757 switch (GET_MODE(diff_vec))
27758 {
27759 case E_QImode:
27760 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27761 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27762 case E_HImode:
27763 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27764 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27765 case E_SImode:
27766 return "bl\t%___gnu_thumb1_case_si";
27767 default:
27768 gcc_unreachable ();
27769 }
27770 }
27771
27772 /* Output a Thumb-2 casesi instruction. */
27773 const char *
27774 thumb2_output_casesi (rtx *operands)
27775 {
27776 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27777
27778 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27779
27780 output_asm_insn ("cmp\t%0, %1", operands);
27781 output_asm_insn ("bhi\t%l3", operands);
27782 switch (GET_MODE(diff_vec))
27783 {
27784 case E_QImode:
27785 return "tbb\t[%|pc, %0]";
27786 case E_HImode:
27787 return "tbh\t[%|pc, %0, lsl #1]";
27788 case E_SImode:
27789 if (flag_pic)
27790 {
27791 output_asm_insn ("adr\t%4, %l2", operands);
27792 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27793 output_asm_insn ("add\t%4, %4, %5", operands);
27794 return "bx\t%4";
27795 }
27796 else
27797 {
27798 output_asm_insn ("adr\t%4, %l2", operands);
27799 return "ldr\t%|pc, [%4, %0, lsl #2]";
27800 }
27801 default:
27802 gcc_unreachable ();
27803 }
27804 }
27805
27806 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27807 per-core tuning structs. */
27808 static int
27809 arm_issue_rate (void)
27810 {
27811 return current_tune->issue_rate;
27812 }
27813
27814 /* Return how many instructions should scheduler lookahead to choose the
27815 best one. */
27816 static int
27817 arm_first_cycle_multipass_dfa_lookahead (void)
27818 {
27819 int issue_rate = arm_issue_rate ();
27820
27821 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27822 }
27823
27824 /* Enable modeling of L2 auto-prefetcher. */
27825 static int
27826 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27827 {
27828 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27829 }
27830
27831 const char *
27832 arm_mangle_type (const_tree type)
27833 {
27834 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27835 has to be managled as if it is in the "std" namespace. */
27836 if (TARGET_AAPCS_BASED
27837 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27838 return "St9__va_list";
27839
27840 /* Half-precision float. */
27841 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27842 return "Dh";
27843
27844 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27845 builtin type. */
27846 if (TYPE_NAME (type) != NULL)
27847 return arm_mangle_builtin_type (type);
27848
27849 /* Use the default mangling. */
27850 return NULL;
27851 }
27852
27853 /* Order of allocation of core registers for Thumb: this allocation is
27854 written over the corresponding initial entries of the array
27855 initialized with REG_ALLOC_ORDER. We allocate all low registers
27856 first. Saving and restoring a low register is usually cheaper than
27857 using a call-clobbered high register. */
27858
27859 static const int thumb_core_reg_alloc_order[] =
27860 {
27861 3, 2, 1, 0, 4, 5, 6, 7,
27862 12, 14, 8, 9, 10, 11
27863 };
27864
27865 /* Adjust register allocation order when compiling for Thumb. */
27866
27867 void
27868 arm_order_regs_for_local_alloc (void)
27869 {
27870 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27871 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27872 if (TARGET_THUMB)
27873 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27874 sizeof (thumb_core_reg_alloc_order));
27875 }
27876
27877 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27878
27879 bool
27880 arm_frame_pointer_required (void)
27881 {
27882 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27883 return true;
27884
27885 /* If the function receives nonlocal gotos, it needs to save the frame
27886 pointer in the nonlocal_goto_save_area object. */
27887 if (cfun->has_nonlocal_label)
27888 return true;
27889
27890 /* The frame pointer is required for non-leaf APCS frames. */
27891 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27892 return true;
27893
27894 /* If we are probing the stack in the prologue, we will have a faulting
27895 instruction prior to the stack adjustment and this requires a frame
27896 pointer if we want to catch the exception using the EABI unwinder. */
27897 if (!IS_INTERRUPT (arm_current_func_type ())
27898 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27899 || flag_stack_clash_protection)
27900 && arm_except_unwind_info (&global_options) == UI_TARGET
27901 && cfun->can_throw_non_call_exceptions)
27902 {
27903 HOST_WIDE_INT size = get_frame_size ();
27904
27905 /* That's irrelevant if there is no stack adjustment. */
27906 if (size <= 0)
27907 return false;
27908
27909 /* That's relevant only if there is a stack probe. */
27910 if (crtl->is_leaf && !cfun->calls_alloca)
27911 {
27912 /* We don't have the final size of the frame so adjust. */
27913 size += 32 * UNITS_PER_WORD;
27914 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27915 return true;
27916 }
27917 else
27918 return true;
27919 }
27920
27921 return false;
27922 }
27923
27924 /* Only thumb1 can't support conditional execution, so return true if
27925 the target is not thumb1. */
27926 static bool
27927 arm_have_conditional_execution (void)
27928 {
27929 return !TARGET_THUMB1;
27930 }
27931
27932 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27933 static HOST_WIDE_INT
27934 arm_vector_alignment (const_tree type)
27935 {
27936 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27937
27938 if (TARGET_AAPCS_BASED)
27939 align = MIN (align, 64);
27940
27941 return align;
27942 }
27943
27944 static unsigned int
27945 arm_autovectorize_vector_sizes (void)
27946 {
27947 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27948 }
27949
27950 static bool
27951 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27952 {
27953 /* Vectors which aren't in packed structures will not be less aligned than
27954 the natural alignment of their element type, so this is safe. */
27955 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27956 return !is_packed;
27957
27958 return default_builtin_vector_alignment_reachable (type, is_packed);
27959 }
27960
27961 static bool
27962 arm_builtin_support_vector_misalignment (machine_mode mode,
27963 const_tree type, int misalignment,
27964 bool is_packed)
27965 {
27966 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27967 {
27968 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27969
27970 if (is_packed)
27971 return align == 1;
27972
27973 /* If the misalignment is unknown, we should be able to handle the access
27974 so long as it is not to a member of a packed data structure. */
27975 if (misalignment == -1)
27976 return true;
27977
27978 /* Return true if the misalignment is a multiple of the natural alignment
27979 of the vector's element type. This is probably always going to be
27980 true in practice, since we've already established that this isn't a
27981 packed access. */
27982 return ((misalignment % align) == 0);
27983 }
27984
27985 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27986 is_packed);
27987 }
27988
27989 static void
27990 arm_conditional_register_usage (void)
27991 {
27992 int regno;
27993
27994 if (TARGET_THUMB1 && optimize_size)
27995 {
27996 /* When optimizing for size on Thumb-1, it's better not
27997 to use the HI regs, because of the overhead of
27998 stacking them. */
27999 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28000 fixed_regs[regno] = call_used_regs[regno] = 1;
28001 }
28002
28003 /* The link register can be clobbered by any branch insn,
28004 but we have no way to track that at present, so mark
28005 it as unavailable. */
28006 if (TARGET_THUMB1)
28007 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28008
28009 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28010 {
28011 /* VFPv3 registers are disabled when earlier VFP
28012 versions are selected due to the definition of
28013 LAST_VFP_REGNUM. */
28014 for (regno = FIRST_VFP_REGNUM;
28015 regno <= LAST_VFP_REGNUM; ++ regno)
28016 {
28017 fixed_regs[regno] = 0;
28018 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28019 || regno >= FIRST_VFP_REGNUM + 32;
28020 }
28021 }
28022
28023 if (TARGET_REALLY_IWMMXT)
28024 {
28025 regno = FIRST_IWMMXT_GR_REGNUM;
28026 /* The 2002/10/09 revision of the XScale ABI has wCG0
28027 and wCG1 as call-preserved registers. The 2002/11/21
28028 revision changed this so that all wCG registers are
28029 scratch registers. */
28030 for (regno = FIRST_IWMMXT_GR_REGNUM;
28031 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28032 fixed_regs[regno] = 0;
28033 /* The XScale ABI has wR0 - wR9 as scratch registers,
28034 the rest as call-preserved registers. */
28035 for (regno = FIRST_IWMMXT_REGNUM;
28036 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28037 {
28038 fixed_regs[regno] = 0;
28039 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28040 }
28041 }
28042
28043 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28044 {
28045 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28046 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28047 }
28048 else if (TARGET_APCS_STACK)
28049 {
28050 fixed_regs[10] = 1;
28051 call_used_regs[10] = 1;
28052 }
28053 /* -mcaller-super-interworking reserves r11 for calls to
28054 _interwork_r11_call_via_rN(). Making the register global
28055 is an easy way of ensuring that it remains valid for all
28056 calls. */
28057 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28058 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28059 {
28060 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28061 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28062 if (TARGET_CALLER_INTERWORKING)
28063 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28064 }
28065 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28066 }
28067
28068 static reg_class_t
28069 arm_preferred_rename_class (reg_class_t rclass)
28070 {
28071 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28072 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28073 and code size can be reduced. */
28074 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28075 return LO_REGS;
28076 else
28077 return NO_REGS;
28078 }
28079
28080 /* Compute the attribute "length" of insn "*push_multi".
28081 So this function MUST be kept in sync with that insn pattern. */
28082 int
28083 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28084 {
28085 int i, regno, hi_reg;
28086 int num_saves = XVECLEN (parallel_op, 0);
28087
28088 /* ARM mode. */
28089 if (TARGET_ARM)
28090 return 4;
28091 /* Thumb1 mode. */
28092 if (TARGET_THUMB1)
28093 return 2;
28094
28095 /* Thumb2 mode. */
28096 regno = REGNO (first_op);
28097 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28098 list is 8-bit. Normally this means all registers in the list must be
28099 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28100 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28101 with 16-bit encoding. */
28102 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28103 for (i = 1; i < num_saves && !hi_reg; i++)
28104 {
28105 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28106 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28107 }
28108
28109 if (!hi_reg)
28110 return 2;
28111 return 4;
28112 }
28113
28114 /* Compute the attribute "length" of insn. Currently, this function is used
28115 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28116 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28117 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28118 true if OPERANDS contains insn which explicit updates base register. */
28119
28120 int
28121 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28122 {
28123 /* ARM mode. */
28124 if (TARGET_ARM)
28125 return 4;
28126 /* Thumb1 mode. */
28127 if (TARGET_THUMB1)
28128 return 2;
28129
28130 rtx parallel_op = operands[0];
28131 /* Initialize to elements number of PARALLEL. */
28132 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28133 /* Initialize the value to base register. */
28134 unsigned regno = REGNO (operands[1]);
28135 /* Skip return and write back pattern.
28136 We only need register pop pattern for later analysis. */
28137 unsigned first_indx = 0;
28138 first_indx += return_pc ? 1 : 0;
28139 first_indx += write_back_p ? 1 : 0;
28140
28141 /* A pop operation can be done through LDM or POP. If the base register is SP
28142 and if it's with write back, then a LDM will be alias of POP. */
28143 bool pop_p = (regno == SP_REGNUM && write_back_p);
28144 bool ldm_p = !pop_p;
28145
28146 /* Check base register for LDM. */
28147 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28148 return 4;
28149
28150 /* Check each register in the list. */
28151 for (; indx >= first_indx; indx--)
28152 {
28153 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28154 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28155 comment in arm_attr_length_push_multi. */
28156 if (REGNO_REG_CLASS (regno) == HI_REGS
28157 && (regno != PC_REGNUM || ldm_p))
28158 return 4;
28159 }
28160
28161 return 2;
28162 }
28163
28164 /* Compute the number of instructions emitted by output_move_double. */
28165 int
28166 arm_count_output_move_double_insns (rtx *operands)
28167 {
28168 int count;
28169 rtx ops[2];
28170 /* output_move_double may modify the operands array, so call it
28171 here on a copy of the array. */
28172 ops[0] = operands[0];
28173 ops[1] = operands[1];
28174 output_move_double (ops, false, &count);
28175 return count;
28176 }
28177
28178 int
28179 vfp3_const_double_for_fract_bits (rtx operand)
28180 {
28181 REAL_VALUE_TYPE r0;
28182
28183 if (!CONST_DOUBLE_P (operand))
28184 return 0;
28185
28186 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28187 if (exact_real_inverse (DFmode, &r0)
28188 && !REAL_VALUE_NEGATIVE (r0))
28189 {
28190 if (exact_real_truncate (DFmode, &r0))
28191 {
28192 HOST_WIDE_INT value = real_to_integer (&r0);
28193 value = value & 0xffffffff;
28194 if ((value != 0) && ( (value & (value - 1)) == 0))
28195 {
28196 int ret = exact_log2 (value);
28197 gcc_assert (IN_RANGE (ret, 0, 31));
28198 return ret;
28199 }
28200 }
28201 }
28202 return 0;
28203 }
28204
28205 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28206 log2 is in [1, 32], return that log2. Otherwise return -1.
28207 This is used in the patterns for vcvt.s32.f32 floating-point to
28208 fixed-point conversions. */
28209
28210 int
28211 vfp3_const_double_for_bits (rtx x)
28212 {
28213 const REAL_VALUE_TYPE *r;
28214
28215 if (!CONST_DOUBLE_P (x))
28216 return -1;
28217
28218 r = CONST_DOUBLE_REAL_VALUE (x);
28219
28220 if (REAL_VALUE_NEGATIVE (*r)
28221 || REAL_VALUE_ISNAN (*r)
28222 || REAL_VALUE_ISINF (*r)
28223 || !real_isinteger (r, SFmode))
28224 return -1;
28225
28226 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28227
28228 /* The exact_log2 above will have returned -1 if this is
28229 not an exact log2. */
28230 if (!IN_RANGE (hwint, 1, 32))
28231 return -1;
28232
28233 return hwint;
28234 }
28235
28236 \f
28237 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28238
28239 static void
28240 arm_pre_atomic_barrier (enum memmodel model)
28241 {
28242 if (need_atomic_barrier_p (model, true))
28243 emit_insn (gen_memory_barrier ());
28244 }
28245
28246 static void
28247 arm_post_atomic_barrier (enum memmodel model)
28248 {
28249 if (need_atomic_barrier_p (model, false))
28250 emit_insn (gen_memory_barrier ());
28251 }
28252
28253 /* Emit the load-exclusive and store-exclusive instructions.
28254 Use acquire and release versions if necessary. */
28255
28256 static void
28257 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28258 {
28259 rtx (*gen) (rtx, rtx);
28260
28261 if (acq)
28262 {
28263 switch (mode)
28264 {
28265 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28266 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28267 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28268 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28269 default:
28270 gcc_unreachable ();
28271 }
28272 }
28273 else
28274 {
28275 switch (mode)
28276 {
28277 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28278 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28279 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28280 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28281 default:
28282 gcc_unreachable ();
28283 }
28284 }
28285
28286 emit_insn (gen (rval, mem));
28287 }
28288
28289 static void
28290 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28291 rtx mem, bool rel)
28292 {
28293 rtx (*gen) (rtx, rtx, rtx);
28294
28295 if (rel)
28296 {
28297 switch (mode)
28298 {
28299 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28300 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28301 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28302 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28303 default:
28304 gcc_unreachable ();
28305 }
28306 }
28307 else
28308 {
28309 switch (mode)
28310 {
28311 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28312 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28313 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28314 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28315 default:
28316 gcc_unreachable ();
28317 }
28318 }
28319
28320 emit_insn (gen (bval, rval, mem));
28321 }
28322
28323 /* Mark the previous jump instruction as unlikely. */
28324
28325 static void
28326 emit_unlikely_jump (rtx insn)
28327 {
28328 rtx_insn *jump = emit_jump_insn (insn);
28329 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28330 }
28331
28332 /* Expand a compare and swap pattern. */
28333
28334 void
28335 arm_expand_compare_and_swap (rtx operands[])
28336 {
28337 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28338 machine_mode mode;
28339 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28340
28341 bval = operands[0];
28342 rval = operands[1];
28343 mem = operands[2];
28344 oldval = operands[3];
28345 newval = operands[4];
28346 is_weak = operands[5];
28347 mod_s = operands[6];
28348 mod_f = operands[7];
28349 mode = GET_MODE (mem);
28350
28351 /* Normally the succ memory model must be stronger than fail, but in the
28352 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28353 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28354
28355 if (TARGET_HAVE_LDACQ
28356 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28357 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28358 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28359
28360 switch (mode)
28361 {
28362 case E_QImode:
28363 case E_HImode:
28364 /* For narrow modes, we're going to perform the comparison in SImode,
28365 so do the zero-extension now. */
28366 rval = gen_reg_rtx (SImode);
28367 oldval = convert_modes (SImode, mode, oldval, true);
28368 /* FALLTHRU */
28369
28370 case E_SImode:
28371 /* Force the value into a register if needed. We waited until after
28372 the zero-extension above to do this properly. */
28373 if (!arm_add_operand (oldval, SImode))
28374 oldval = force_reg (SImode, oldval);
28375 break;
28376
28377 case E_DImode:
28378 if (!cmpdi_operand (oldval, mode))
28379 oldval = force_reg (mode, oldval);
28380 break;
28381
28382 default:
28383 gcc_unreachable ();
28384 }
28385
28386 if (TARGET_THUMB1)
28387 {
28388 switch (mode)
28389 {
28390 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28391 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28392 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28393 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28394 default:
28395 gcc_unreachable ();
28396 }
28397 }
28398 else
28399 {
28400 switch (mode)
28401 {
28402 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28403 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28404 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28405 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28406 default:
28407 gcc_unreachable ();
28408 }
28409 }
28410
28411 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28412 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28413
28414 if (mode == QImode || mode == HImode)
28415 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28416
28417 /* In all cases, we arrange for success to be signaled by Z set.
28418 This arrangement allows for the boolean result to be used directly
28419 in a subsequent branch, post optimization. For Thumb-1 targets, the
28420 boolean negation of the result is also stored in bval because Thumb-1
28421 backend lacks dependency tracking for CC flag due to flag-setting not
28422 being represented at RTL level. */
28423 if (TARGET_THUMB1)
28424 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28425 else
28426 {
28427 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28428 emit_insn (gen_rtx_SET (bval, x));
28429 }
28430 }
28431
28432 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28433 another memory store between the load-exclusive and store-exclusive can
28434 reset the monitor from Exclusive to Open state. This means we must wait
28435 until after reload to split the pattern, lest we get a register spill in
28436 the middle of the atomic sequence. Success of the compare and swap is
28437 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28438 for Thumb-1 targets (ie. negation of the boolean value returned by
28439 atomic_compare_and_swapmode standard pattern in operand 0). */
28440
28441 void
28442 arm_split_compare_and_swap (rtx operands[])
28443 {
28444 rtx rval, mem, oldval, newval, neg_bval;
28445 machine_mode mode;
28446 enum memmodel mod_s, mod_f;
28447 bool is_weak;
28448 rtx_code_label *label1, *label2;
28449 rtx x, cond;
28450
28451 rval = operands[1];
28452 mem = operands[2];
28453 oldval = operands[3];
28454 newval = operands[4];
28455 is_weak = (operands[5] != const0_rtx);
28456 mod_s = memmodel_from_int (INTVAL (operands[6]));
28457 mod_f = memmodel_from_int (INTVAL (operands[7]));
28458 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28459 mode = GET_MODE (mem);
28460
28461 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28462
28463 bool use_acquire = TARGET_HAVE_LDACQ
28464 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28465 || is_mm_release (mod_s));
28466
28467 bool use_release = TARGET_HAVE_LDACQ
28468 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28469 || is_mm_acquire (mod_s));
28470
28471 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28472 a full barrier is emitted after the store-release. */
28473 if (is_armv8_sync)
28474 use_acquire = false;
28475
28476 /* Checks whether a barrier is needed and emits one accordingly. */
28477 if (!(use_acquire || use_release))
28478 arm_pre_atomic_barrier (mod_s);
28479
28480 label1 = NULL;
28481 if (!is_weak)
28482 {
28483 label1 = gen_label_rtx ();
28484 emit_label (label1);
28485 }
28486 label2 = gen_label_rtx ();
28487
28488 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28489
28490 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28491 as required to communicate with arm_expand_compare_and_swap. */
28492 if (TARGET_32BIT)
28493 {
28494 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28495 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28496 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28497 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28498 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28499 }
28500 else
28501 {
28502 emit_move_insn (neg_bval, const1_rtx);
28503 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28504 if (thumb1_cmpneg_operand (oldval, SImode))
28505 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28506 label2, cond));
28507 else
28508 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28509 }
28510
28511 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28512
28513 /* Weak or strong, we want EQ to be true for success, so that we
28514 match the flags that we got from the compare above. */
28515 if (TARGET_32BIT)
28516 {
28517 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28518 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28519 emit_insn (gen_rtx_SET (cond, x));
28520 }
28521
28522 if (!is_weak)
28523 {
28524 /* Z is set to boolean value of !neg_bval, as required to communicate
28525 with arm_expand_compare_and_swap. */
28526 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28527 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28528 }
28529
28530 if (!is_mm_relaxed (mod_f))
28531 emit_label (label2);
28532
28533 /* Checks whether a barrier is needed and emits one accordingly. */
28534 if (is_armv8_sync
28535 || !(use_acquire || use_release))
28536 arm_post_atomic_barrier (mod_s);
28537
28538 if (is_mm_relaxed (mod_f))
28539 emit_label (label2);
28540 }
28541
28542 /* Split an atomic operation pattern. Operation is given by CODE and is one
28543 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28544 operation). Operation is performed on the content at MEM and on VALUE
28545 following the memory model MODEL_RTX. The content at MEM before and after
28546 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28547 success of the operation is returned in COND. Using a scratch register or
28548 an operand register for these determines what result is returned for that
28549 pattern. */
28550
28551 void
28552 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28553 rtx value, rtx model_rtx, rtx cond)
28554 {
28555 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28556 machine_mode mode = GET_MODE (mem);
28557 machine_mode wmode = (mode == DImode ? DImode : SImode);
28558 rtx_code_label *label;
28559 bool all_low_regs, bind_old_new;
28560 rtx x;
28561
28562 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28563
28564 bool use_acquire = TARGET_HAVE_LDACQ
28565 && !(is_mm_relaxed (model) || is_mm_consume (model)
28566 || is_mm_release (model));
28567
28568 bool use_release = TARGET_HAVE_LDACQ
28569 && !(is_mm_relaxed (model) || is_mm_consume (model)
28570 || is_mm_acquire (model));
28571
28572 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28573 a full barrier is emitted after the store-release. */
28574 if (is_armv8_sync)
28575 use_acquire = false;
28576
28577 /* Checks whether a barrier is needed and emits one accordingly. */
28578 if (!(use_acquire || use_release))
28579 arm_pre_atomic_barrier (model);
28580
28581 label = gen_label_rtx ();
28582 emit_label (label);
28583
28584 if (new_out)
28585 new_out = gen_lowpart (wmode, new_out);
28586 if (old_out)
28587 old_out = gen_lowpart (wmode, old_out);
28588 else
28589 old_out = new_out;
28590 value = simplify_gen_subreg (wmode, value, mode, 0);
28591
28592 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28593
28594 /* Does the operation require destination and first operand to use the same
28595 register? This is decided by register constraints of relevant insn
28596 patterns in thumb1.md. */
28597 gcc_assert (!new_out || REG_P (new_out));
28598 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28599 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28600 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28601 bind_old_new =
28602 (TARGET_THUMB1
28603 && code != SET
28604 && code != MINUS
28605 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28606
28607 /* We want to return the old value while putting the result of the operation
28608 in the same register as the old value so copy the old value over to the
28609 destination register and use that register for the operation. */
28610 if (old_out && bind_old_new)
28611 {
28612 emit_move_insn (new_out, old_out);
28613 old_out = new_out;
28614 }
28615
28616 switch (code)
28617 {
28618 case SET:
28619 new_out = value;
28620 break;
28621
28622 case NOT:
28623 x = gen_rtx_AND (wmode, old_out, value);
28624 emit_insn (gen_rtx_SET (new_out, x));
28625 x = gen_rtx_NOT (wmode, new_out);
28626 emit_insn (gen_rtx_SET (new_out, x));
28627 break;
28628
28629 case MINUS:
28630 if (CONST_INT_P (value))
28631 {
28632 value = GEN_INT (-INTVAL (value));
28633 code = PLUS;
28634 }
28635 /* FALLTHRU */
28636
28637 case PLUS:
28638 if (mode == DImode)
28639 {
28640 /* DImode plus/minus need to clobber flags. */
28641 /* The adddi3 and subdi3 patterns are incorrectly written so that
28642 they require matching operands, even when we could easily support
28643 three operands. Thankfully, this can be fixed up post-splitting,
28644 as the individual add+adc patterns do accept three operands and
28645 post-reload cprop can make these moves go away. */
28646 emit_move_insn (new_out, old_out);
28647 if (code == PLUS)
28648 x = gen_adddi3 (new_out, new_out, value);
28649 else
28650 x = gen_subdi3 (new_out, new_out, value);
28651 emit_insn (x);
28652 break;
28653 }
28654 /* FALLTHRU */
28655
28656 default:
28657 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28658 emit_insn (gen_rtx_SET (new_out, x));
28659 break;
28660 }
28661
28662 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28663 use_release);
28664
28665 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28666 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28667
28668 /* Checks whether a barrier is needed and emits one accordingly. */
28669 if (is_armv8_sync
28670 || !(use_acquire || use_release))
28671 arm_post_atomic_barrier (model);
28672 }
28673 \f
28674 #define MAX_VECT_LEN 16
28675
28676 struct expand_vec_perm_d
28677 {
28678 rtx target, op0, op1;
28679 auto_vec_perm_indices perm;
28680 machine_mode vmode;
28681 bool one_vector_p;
28682 bool testing_p;
28683 };
28684
28685 /* Generate a variable permutation. */
28686
28687 static void
28688 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28689 {
28690 machine_mode vmode = GET_MODE (target);
28691 bool one_vector_p = rtx_equal_p (op0, op1);
28692
28693 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28694 gcc_checking_assert (GET_MODE (op0) == vmode);
28695 gcc_checking_assert (GET_MODE (op1) == vmode);
28696 gcc_checking_assert (GET_MODE (sel) == vmode);
28697 gcc_checking_assert (TARGET_NEON);
28698
28699 if (one_vector_p)
28700 {
28701 if (vmode == V8QImode)
28702 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28703 else
28704 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28705 }
28706 else
28707 {
28708 rtx pair;
28709
28710 if (vmode == V8QImode)
28711 {
28712 pair = gen_reg_rtx (V16QImode);
28713 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28714 pair = gen_lowpart (TImode, pair);
28715 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28716 }
28717 else
28718 {
28719 pair = gen_reg_rtx (OImode);
28720 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28721 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28722 }
28723 }
28724 }
28725
28726 void
28727 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28728 {
28729 machine_mode vmode = GET_MODE (target);
28730 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28731 bool one_vector_p = rtx_equal_p (op0, op1);
28732 rtx rmask[MAX_VECT_LEN], mask;
28733
28734 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28735 numbering of elements for big-endian, we must reverse the order. */
28736 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28737
28738 /* The VTBL instruction does not use a modulo index, so we must take care
28739 of that ourselves. */
28740 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28741 for (i = 0; i < nelt; ++i)
28742 rmask[i] = mask;
28743 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28744 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28745
28746 arm_expand_vec_perm_1 (target, op0, op1, sel);
28747 }
28748
28749 /* Map lane ordering between architectural lane order, and GCC lane order,
28750 taking into account ABI. See comment above output_move_neon for details. */
28751
28752 static int
28753 neon_endian_lane_map (machine_mode mode, int lane)
28754 {
28755 if (BYTES_BIG_ENDIAN)
28756 {
28757 int nelems = GET_MODE_NUNITS (mode);
28758 /* Reverse lane order. */
28759 lane = (nelems - 1 - lane);
28760 /* Reverse D register order, to match ABI. */
28761 if (GET_MODE_SIZE (mode) == 16)
28762 lane = lane ^ (nelems / 2);
28763 }
28764 return lane;
28765 }
28766
28767 /* Some permutations index into pairs of vectors, this is a helper function
28768 to map indexes into those pairs of vectors. */
28769
28770 static int
28771 neon_pair_endian_lane_map (machine_mode mode, int lane)
28772 {
28773 int nelem = GET_MODE_NUNITS (mode);
28774 if (BYTES_BIG_ENDIAN)
28775 lane =
28776 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28777 return lane;
28778 }
28779
28780 /* Generate or test for an insn that supports a constant permutation. */
28781
28782 /* Recognize patterns for the VUZP insns. */
28783
28784 static bool
28785 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28786 {
28787 unsigned int i, odd, mask, nelt = d->perm.length ();
28788 rtx out0, out1, in0, in1;
28789 rtx (*gen)(rtx, rtx, rtx, rtx);
28790 int first_elem;
28791 int swap_nelt;
28792
28793 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28794 return false;
28795
28796 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28797 big endian pattern on 64 bit vectors, so we correct for that. */
28798 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28799 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28800
28801 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28802
28803 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28804 odd = 0;
28805 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28806 odd = 1;
28807 else
28808 return false;
28809 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28810
28811 for (i = 0; i < nelt; i++)
28812 {
28813 unsigned elt =
28814 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28815 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28816 return false;
28817 }
28818
28819 /* Success! */
28820 if (d->testing_p)
28821 return true;
28822
28823 switch (d->vmode)
28824 {
28825 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28826 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28827 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28828 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28829 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28830 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28831 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28832 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28833 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28834 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28835 default:
28836 gcc_unreachable ();
28837 }
28838
28839 in0 = d->op0;
28840 in1 = d->op1;
28841 if (swap_nelt != 0)
28842 std::swap (in0, in1);
28843
28844 out0 = d->target;
28845 out1 = gen_reg_rtx (d->vmode);
28846 if (odd)
28847 std::swap (out0, out1);
28848
28849 emit_insn (gen (out0, in0, in1, out1));
28850 return true;
28851 }
28852
28853 /* Recognize patterns for the VZIP insns. */
28854
28855 static bool
28856 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28857 {
28858 unsigned int i, high, mask, nelt = d->perm.length ();
28859 rtx out0, out1, in0, in1;
28860 rtx (*gen)(rtx, rtx, rtx, rtx);
28861 int first_elem;
28862 bool is_swapped;
28863
28864 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28865 return false;
28866
28867 is_swapped = BYTES_BIG_ENDIAN;
28868
28869 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28870
28871 high = nelt / 2;
28872 if (first_elem == neon_endian_lane_map (d->vmode, high))
28873 ;
28874 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28875 high = 0;
28876 else
28877 return false;
28878 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28879
28880 for (i = 0; i < nelt / 2; i++)
28881 {
28882 unsigned elt =
28883 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28884 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28885 != elt)
28886 return false;
28887 elt =
28888 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28889 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28890 != elt)
28891 return false;
28892 }
28893
28894 /* Success! */
28895 if (d->testing_p)
28896 return true;
28897
28898 switch (d->vmode)
28899 {
28900 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28901 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28902 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28903 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28904 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28905 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28906 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28907 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28908 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28909 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28910 default:
28911 gcc_unreachable ();
28912 }
28913
28914 in0 = d->op0;
28915 in1 = d->op1;
28916 if (is_swapped)
28917 std::swap (in0, in1);
28918
28919 out0 = d->target;
28920 out1 = gen_reg_rtx (d->vmode);
28921 if (high)
28922 std::swap (out0, out1);
28923
28924 emit_insn (gen (out0, in0, in1, out1));
28925 return true;
28926 }
28927
28928 /* Recognize patterns for the VREV insns. */
28929
28930 static bool
28931 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28932 {
28933 unsigned int i, j, diff, nelt = d->perm.length ();
28934 rtx (*gen)(rtx, rtx);
28935
28936 if (!d->one_vector_p)
28937 return false;
28938
28939 diff = d->perm[0];
28940 switch (diff)
28941 {
28942 case 7:
28943 switch (d->vmode)
28944 {
28945 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28946 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28947 default:
28948 return false;
28949 }
28950 break;
28951 case 3:
28952 switch (d->vmode)
28953 {
28954 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28955 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28956 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28957 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28958 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28959 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28960 default:
28961 return false;
28962 }
28963 break;
28964 case 1:
28965 switch (d->vmode)
28966 {
28967 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28968 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28969 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28970 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
28971 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
28972 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
28973 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
28974 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
28975 default:
28976 return false;
28977 }
28978 break;
28979 default:
28980 return false;
28981 }
28982
28983 for (i = 0; i < nelt ; i += diff + 1)
28984 for (j = 0; j <= diff; j += 1)
28985 {
28986 /* This is guaranteed to be true as the value of diff
28987 is 7, 3, 1 and we should have enough elements in the
28988 queue to generate this. Getting a vector mask with a
28989 value of diff other than these values implies that
28990 something is wrong by the time we get here. */
28991 gcc_assert (i + j < nelt);
28992 if (d->perm[i + j] != i + diff - j)
28993 return false;
28994 }
28995
28996 /* Success! */
28997 if (d->testing_p)
28998 return true;
28999
29000 emit_insn (gen (d->target, d->op0));
29001 return true;
29002 }
29003
29004 /* Recognize patterns for the VTRN insns. */
29005
29006 static bool
29007 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29008 {
29009 unsigned int i, odd, mask, nelt = d->perm.length ();
29010 rtx out0, out1, in0, in1;
29011 rtx (*gen)(rtx, rtx, rtx, rtx);
29012
29013 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29014 return false;
29015
29016 /* Note that these are little-endian tests. Adjust for big-endian later. */
29017 if (d->perm[0] == 0)
29018 odd = 0;
29019 else if (d->perm[0] == 1)
29020 odd = 1;
29021 else
29022 return false;
29023 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29024
29025 for (i = 0; i < nelt; i += 2)
29026 {
29027 if (d->perm[i] != i + odd)
29028 return false;
29029 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29030 return false;
29031 }
29032
29033 /* Success! */
29034 if (d->testing_p)
29035 return true;
29036
29037 switch (d->vmode)
29038 {
29039 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29040 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29041 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29042 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29043 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29044 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29045 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29046 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29047 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29048 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29049 default:
29050 gcc_unreachable ();
29051 }
29052
29053 in0 = d->op0;
29054 in1 = d->op1;
29055 if (BYTES_BIG_ENDIAN)
29056 {
29057 std::swap (in0, in1);
29058 odd = !odd;
29059 }
29060
29061 out0 = d->target;
29062 out1 = gen_reg_rtx (d->vmode);
29063 if (odd)
29064 std::swap (out0, out1);
29065
29066 emit_insn (gen (out0, in0, in1, out1));
29067 return true;
29068 }
29069
29070 /* Recognize patterns for the VEXT insns. */
29071
29072 static bool
29073 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29074 {
29075 unsigned int i, nelt = d->perm.length ();
29076 rtx (*gen) (rtx, rtx, rtx, rtx);
29077 rtx offset;
29078
29079 unsigned int location;
29080
29081 unsigned int next = d->perm[0] + 1;
29082
29083 /* TODO: Handle GCC's numbering of elements for big-endian. */
29084 if (BYTES_BIG_ENDIAN)
29085 return false;
29086
29087 /* Check if the extracted indexes are increasing by one. */
29088 for (i = 1; i < nelt; next++, i++)
29089 {
29090 /* If we hit the most significant element of the 2nd vector in
29091 the previous iteration, no need to test further. */
29092 if (next == 2 * nelt)
29093 return false;
29094
29095 /* If we are operating on only one vector: it could be a
29096 rotation. If there are only two elements of size < 64, let
29097 arm_evpc_neon_vrev catch it. */
29098 if (d->one_vector_p && (next == nelt))
29099 {
29100 if ((nelt == 2) && (d->vmode != V2DImode))
29101 return false;
29102 else
29103 next = 0;
29104 }
29105
29106 if (d->perm[i] != next)
29107 return false;
29108 }
29109
29110 location = d->perm[0];
29111
29112 switch (d->vmode)
29113 {
29114 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29115 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29116 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29117 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29118 case E_V2SImode: gen = gen_neon_vextv2si; break;
29119 case E_V4SImode: gen = gen_neon_vextv4si; break;
29120 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29121 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29122 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29123 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29124 case E_V2DImode: gen = gen_neon_vextv2di; break;
29125 default:
29126 return false;
29127 }
29128
29129 /* Success! */
29130 if (d->testing_p)
29131 return true;
29132
29133 offset = GEN_INT (location);
29134 emit_insn (gen (d->target, d->op0, d->op1, offset));
29135 return true;
29136 }
29137
29138 /* The NEON VTBL instruction is a fully variable permuation that's even
29139 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29140 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29141 can do slightly better by expanding this as a constant where we don't
29142 have to apply a mask. */
29143
29144 static bool
29145 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29146 {
29147 rtx rperm[MAX_VECT_LEN], sel;
29148 machine_mode vmode = d->vmode;
29149 unsigned int i, nelt = d->perm.length ();
29150
29151 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29152 numbering of elements for big-endian, we must reverse the order. */
29153 if (BYTES_BIG_ENDIAN)
29154 return false;
29155
29156 if (d->testing_p)
29157 return true;
29158
29159 /* Generic code will try constant permutation twice. Once with the
29160 original mode and again with the elements lowered to QImode.
29161 So wait and don't do the selector expansion ourselves. */
29162 if (vmode != V8QImode && vmode != V16QImode)
29163 return false;
29164
29165 for (i = 0; i < nelt; ++i)
29166 rperm[i] = GEN_INT (d->perm[i]);
29167 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29168 sel = force_reg (vmode, sel);
29169
29170 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29171 return true;
29172 }
29173
29174 static bool
29175 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29176 {
29177 /* Check if the input mask matches vext before reordering the
29178 operands. */
29179 if (TARGET_NEON)
29180 if (arm_evpc_neon_vext (d))
29181 return true;
29182
29183 /* The pattern matching functions above are written to look for a small
29184 number to begin the sequence (0, 1, N/2). If we begin with an index
29185 from the second operand, we can swap the operands. */
29186 unsigned int nelt = d->perm.length ();
29187 if (d->perm[0] >= nelt)
29188 {
29189 for (unsigned int i = 0; i < nelt; ++i)
29190 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29191
29192 std::swap (d->op0, d->op1);
29193 }
29194
29195 if (TARGET_NEON)
29196 {
29197 if (arm_evpc_neon_vuzp (d))
29198 return true;
29199 if (arm_evpc_neon_vzip (d))
29200 return true;
29201 if (arm_evpc_neon_vrev (d))
29202 return true;
29203 if (arm_evpc_neon_vtrn (d))
29204 return true;
29205 return arm_evpc_neon_vtbl (d);
29206 }
29207 return false;
29208 }
29209
29210 /* Expand a vec_perm_const pattern. */
29211
29212 bool
29213 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29214 {
29215 struct expand_vec_perm_d d;
29216 int i, nelt, which;
29217
29218 d.target = target;
29219 d.op0 = op0;
29220 d.op1 = op1;
29221
29222 d.vmode = GET_MODE (target);
29223 gcc_assert (VECTOR_MODE_P (d.vmode));
29224 d.testing_p = false;
29225
29226 nelt = GET_MODE_NUNITS (d.vmode);
29227 d.perm.reserve (nelt);
29228 for (i = which = 0; i < nelt; ++i)
29229 {
29230 rtx e = XVECEXP (sel, 0, i);
29231 int ei = INTVAL (e) & (2 * nelt - 1);
29232 which |= (ei < nelt ? 1 : 2);
29233 d.perm.quick_push (ei);
29234 }
29235
29236 switch (which)
29237 {
29238 default:
29239 gcc_unreachable();
29240
29241 case 3:
29242 d.one_vector_p = false;
29243 if (!rtx_equal_p (op0, op1))
29244 break;
29245
29246 /* The elements of PERM do not suggest that only the first operand
29247 is used, but both operands are identical. Allow easier matching
29248 of the permutation by folding the permutation into the single
29249 input vector. */
29250 /* FALLTHRU */
29251 case 2:
29252 for (i = 0; i < nelt; ++i)
29253 d.perm[i] &= nelt - 1;
29254 d.op0 = op1;
29255 d.one_vector_p = true;
29256 break;
29257
29258 case 1:
29259 d.op1 = op0;
29260 d.one_vector_p = true;
29261 break;
29262 }
29263
29264 return arm_expand_vec_perm_const_1 (&d);
29265 }
29266
29267 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29268
29269 static bool
29270 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29271 {
29272 struct expand_vec_perm_d d;
29273 unsigned int i, nelt, which;
29274 bool ret;
29275
29276 d.vmode = vmode;
29277 d.testing_p = true;
29278 d.perm.safe_splice (sel);
29279
29280 /* Categorize the set of elements in the selector. */
29281 nelt = GET_MODE_NUNITS (d.vmode);
29282 for (i = which = 0; i < nelt; ++i)
29283 {
29284 unsigned int e = d.perm[i];
29285 gcc_assert (e < 2 * nelt);
29286 which |= (e < nelt ? 1 : 2);
29287 }
29288
29289 /* For all elements from second vector, fold the elements to first. */
29290 if (which == 2)
29291 for (i = 0; i < nelt; ++i)
29292 d.perm[i] -= nelt;
29293
29294 /* Check whether the mask can be applied to the vector type. */
29295 d.one_vector_p = (which != 3);
29296
29297 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29298 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29299 if (!d.one_vector_p)
29300 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29301
29302 start_sequence ();
29303 ret = arm_expand_vec_perm_const_1 (&d);
29304 end_sequence ();
29305
29306 return ret;
29307 }
29308
29309 bool
29310 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29311 {
29312 /* If we are soft float and we do not have ldrd
29313 then all auto increment forms are ok. */
29314 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29315 return true;
29316
29317 switch (code)
29318 {
29319 /* Post increment and Pre Decrement are supported for all
29320 instruction forms except for vector forms. */
29321 case ARM_POST_INC:
29322 case ARM_PRE_DEC:
29323 if (VECTOR_MODE_P (mode))
29324 {
29325 if (code != ARM_PRE_DEC)
29326 return true;
29327 else
29328 return false;
29329 }
29330
29331 return true;
29332
29333 case ARM_POST_DEC:
29334 case ARM_PRE_INC:
29335 /* Without LDRD and mode size greater than
29336 word size, there is no point in auto-incrementing
29337 because ldm and stm will not have these forms. */
29338 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29339 return false;
29340
29341 /* Vector and floating point modes do not support
29342 these auto increment forms. */
29343 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29344 return false;
29345
29346 return true;
29347
29348 default:
29349 return false;
29350
29351 }
29352
29353 return false;
29354 }
29355
29356 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29357 on ARM, since we know that shifts by negative amounts are no-ops.
29358 Additionally, the default expansion code is not available or suitable
29359 for post-reload insn splits (this can occur when the register allocator
29360 chooses not to do a shift in NEON).
29361
29362 This function is used in both initial expand and post-reload splits, and
29363 handles all kinds of 64-bit shifts.
29364
29365 Input requirements:
29366 - It is safe for the input and output to be the same register, but
29367 early-clobber rules apply for the shift amount and scratch registers.
29368 - Shift by register requires both scratch registers. In all other cases
29369 the scratch registers may be NULL.
29370 - Ashiftrt by a register also clobbers the CC register. */
29371 void
29372 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29373 rtx amount, rtx scratch1, rtx scratch2)
29374 {
29375 rtx out_high = gen_highpart (SImode, out);
29376 rtx out_low = gen_lowpart (SImode, out);
29377 rtx in_high = gen_highpart (SImode, in);
29378 rtx in_low = gen_lowpart (SImode, in);
29379
29380 /* Terminology:
29381 in = the register pair containing the input value.
29382 out = the destination register pair.
29383 up = the high- or low-part of each pair.
29384 down = the opposite part to "up".
29385 In a shift, we can consider bits to shift from "up"-stream to
29386 "down"-stream, so in a left-shift "up" is the low-part and "down"
29387 is the high-part of each register pair. */
29388
29389 rtx out_up = code == ASHIFT ? out_low : out_high;
29390 rtx out_down = code == ASHIFT ? out_high : out_low;
29391 rtx in_up = code == ASHIFT ? in_low : in_high;
29392 rtx in_down = code == ASHIFT ? in_high : in_low;
29393
29394 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29395 gcc_assert (out
29396 && (REG_P (out) || GET_CODE (out) == SUBREG)
29397 && GET_MODE (out) == DImode);
29398 gcc_assert (in
29399 && (REG_P (in) || GET_CODE (in) == SUBREG)
29400 && GET_MODE (in) == DImode);
29401 gcc_assert (amount
29402 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29403 && GET_MODE (amount) == SImode)
29404 || CONST_INT_P (amount)));
29405 gcc_assert (scratch1 == NULL
29406 || (GET_CODE (scratch1) == SCRATCH)
29407 || (GET_MODE (scratch1) == SImode
29408 && REG_P (scratch1)));
29409 gcc_assert (scratch2 == NULL
29410 || (GET_CODE (scratch2) == SCRATCH)
29411 || (GET_MODE (scratch2) == SImode
29412 && REG_P (scratch2)));
29413 gcc_assert (!REG_P (out) || !REG_P (amount)
29414 || !HARD_REGISTER_P (out)
29415 || (REGNO (out) != REGNO (amount)
29416 && REGNO (out) + 1 != REGNO (amount)));
29417
29418 /* Macros to make following code more readable. */
29419 #define SUB_32(DEST,SRC) \
29420 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29421 #define RSB_32(DEST,SRC) \
29422 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29423 #define SUB_S_32(DEST,SRC) \
29424 gen_addsi3_compare0 ((DEST), (SRC), \
29425 GEN_INT (-32))
29426 #define SET(DEST,SRC) \
29427 gen_rtx_SET ((DEST), (SRC))
29428 #define SHIFT(CODE,SRC,AMOUNT) \
29429 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29430 #define LSHIFT(CODE,SRC,AMOUNT) \
29431 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29432 SImode, (SRC), (AMOUNT))
29433 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29434 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29435 SImode, (SRC), (AMOUNT))
29436 #define ORR(A,B) \
29437 gen_rtx_IOR (SImode, (A), (B))
29438 #define BRANCH(COND,LABEL) \
29439 gen_arm_cond_branch ((LABEL), \
29440 gen_rtx_ ## COND (CCmode, cc_reg, \
29441 const0_rtx), \
29442 cc_reg)
29443
29444 /* Shifts by register and shifts by constant are handled separately. */
29445 if (CONST_INT_P (amount))
29446 {
29447 /* We have a shift-by-constant. */
29448
29449 /* First, handle out-of-range shift amounts.
29450 In both cases we try to match the result an ARM instruction in a
29451 shift-by-register would give. This helps reduce execution
29452 differences between optimization levels, but it won't stop other
29453 parts of the compiler doing different things. This is "undefined
29454 behavior, in any case. */
29455 if (INTVAL (amount) <= 0)
29456 emit_insn (gen_movdi (out, in));
29457 else if (INTVAL (amount) >= 64)
29458 {
29459 if (code == ASHIFTRT)
29460 {
29461 rtx const31_rtx = GEN_INT (31);
29462 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29463 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29464 }
29465 else
29466 emit_insn (gen_movdi (out, const0_rtx));
29467 }
29468
29469 /* Now handle valid shifts. */
29470 else if (INTVAL (amount) < 32)
29471 {
29472 /* Shifts by a constant less than 32. */
29473 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29474
29475 /* Clearing the out register in DImode first avoids lots
29476 of spilling and results in less stack usage.
29477 Later this redundant insn is completely removed.
29478 Do that only if "in" and "out" are different registers. */
29479 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29480 emit_insn (SET (out, const0_rtx));
29481 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29482 emit_insn (SET (out_down,
29483 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29484 out_down)));
29485 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29486 }
29487 else
29488 {
29489 /* Shifts by a constant greater than 31. */
29490 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29491
29492 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29493 emit_insn (SET (out, const0_rtx));
29494 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29495 if (code == ASHIFTRT)
29496 emit_insn (gen_ashrsi3 (out_up, in_up,
29497 GEN_INT (31)));
29498 else
29499 emit_insn (SET (out_up, const0_rtx));
29500 }
29501 }
29502 else
29503 {
29504 /* We have a shift-by-register. */
29505 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29506
29507 /* This alternative requires the scratch registers. */
29508 gcc_assert (scratch1 && REG_P (scratch1));
29509 gcc_assert (scratch2 && REG_P (scratch2));
29510
29511 /* We will need the values "amount-32" and "32-amount" later.
29512 Swapping them around now allows the later code to be more general. */
29513 switch (code)
29514 {
29515 case ASHIFT:
29516 emit_insn (SUB_32 (scratch1, amount));
29517 emit_insn (RSB_32 (scratch2, amount));
29518 break;
29519 case ASHIFTRT:
29520 emit_insn (RSB_32 (scratch1, amount));
29521 /* Also set CC = amount > 32. */
29522 emit_insn (SUB_S_32 (scratch2, amount));
29523 break;
29524 case LSHIFTRT:
29525 emit_insn (RSB_32 (scratch1, amount));
29526 emit_insn (SUB_32 (scratch2, amount));
29527 break;
29528 default:
29529 gcc_unreachable ();
29530 }
29531
29532 /* Emit code like this:
29533
29534 arithmetic-left:
29535 out_down = in_down << amount;
29536 out_down = (in_up << (amount - 32)) | out_down;
29537 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29538 out_up = in_up << amount;
29539
29540 arithmetic-right:
29541 out_down = in_down >> amount;
29542 out_down = (in_up << (32 - amount)) | out_down;
29543 if (amount < 32)
29544 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29545 out_up = in_up << amount;
29546
29547 logical-right:
29548 out_down = in_down >> amount;
29549 out_down = (in_up << (32 - amount)) | out_down;
29550 if (amount < 32)
29551 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29552 out_up = in_up << amount;
29553
29554 The ARM and Thumb2 variants are the same but implemented slightly
29555 differently. If this were only called during expand we could just
29556 use the Thumb2 case and let combine do the right thing, but this
29557 can also be called from post-reload splitters. */
29558
29559 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29560
29561 if (!TARGET_THUMB2)
29562 {
29563 /* Emit code for ARM mode. */
29564 emit_insn (SET (out_down,
29565 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29566 if (code == ASHIFTRT)
29567 {
29568 rtx_code_label *done_label = gen_label_rtx ();
29569 emit_jump_insn (BRANCH (LT, done_label));
29570 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29571 out_down)));
29572 emit_label (done_label);
29573 }
29574 else
29575 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29576 out_down)));
29577 }
29578 else
29579 {
29580 /* Emit code for Thumb2 mode.
29581 Thumb2 can't do shift and or in one insn. */
29582 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29583 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29584
29585 if (code == ASHIFTRT)
29586 {
29587 rtx_code_label *done_label = gen_label_rtx ();
29588 emit_jump_insn (BRANCH (LT, done_label));
29589 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29590 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29591 emit_label (done_label);
29592 }
29593 else
29594 {
29595 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29596 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29597 }
29598 }
29599
29600 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29601 }
29602
29603 #undef SUB_32
29604 #undef RSB_32
29605 #undef SUB_S_32
29606 #undef SET
29607 #undef SHIFT
29608 #undef LSHIFT
29609 #undef REV_LSHIFT
29610 #undef ORR
29611 #undef BRANCH
29612 }
29613
29614 /* Returns true if the pattern is a valid symbolic address, which is either a
29615 symbol_ref or (symbol_ref + addend).
29616
29617 According to the ARM ELF ABI, the initial addend of REL-type relocations
29618 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29619 literal field of the instruction as a 16-bit signed value in the range
29620 -32768 <= A < 32768. */
29621
29622 bool
29623 arm_valid_symbolic_address_p (rtx addr)
29624 {
29625 rtx xop0, xop1 = NULL_RTX;
29626 rtx tmp = addr;
29627
29628 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29629 return true;
29630
29631 /* (const (plus: symbol_ref const_int)) */
29632 if (GET_CODE (addr) == CONST)
29633 tmp = XEXP (addr, 0);
29634
29635 if (GET_CODE (tmp) == PLUS)
29636 {
29637 xop0 = XEXP (tmp, 0);
29638 xop1 = XEXP (tmp, 1);
29639
29640 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29641 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29642 }
29643
29644 return false;
29645 }
29646
29647 /* Returns true if a valid comparison operation and makes
29648 the operands in a form that is valid. */
29649 bool
29650 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29651 {
29652 enum rtx_code code = GET_CODE (*comparison);
29653 int code_int;
29654 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29655 ? GET_MODE (*op2) : GET_MODE (*op1);
29656
29657 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29658
29659 if (code == UNEQ || code == LTGT)
29660 return false;
29661
29662 code_int = (int)code;
29663 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29664 PUT_CODE (*comparison, (enum rtx_code)code_int);
29665
29666 switch (mode)
29667 {
29668 case E_SImode:
29669 if (!arm_add_operand (*op1, mode))
29670 *op1 = force_reg (mode, *op1);
29671 if (!arm_add_operand (*op2, mode))
29672 *op2 = force_reg (mode, *op2);
29673 return true;
29674
29675 case E_DImode:
29676 if (!cmpdi_operand (*op1, mode))
29677 *op1 = force_reg (mode, *op1);
29678 if (!cmpdi_operand (*op2, mode))
29679 *op2 = force_reg (mode, *op2);
29680 return true;
29681
29682 case E_HFmode:
29683 if (!TARGET_VFP_FP16INST)
29684 break;
29685 /* FP16 comparisons are done in SF mode. */
29686 mode = SFmode;
29687 *op1 = convert_to_mode (mode, *op1, 1);
29688 *op2 = convert_to_mode (mode, *op2, 1);
29689 /* Fall through. */
29690 case E_SFmode:
29691 case E_DFmode:
29692 if (!vfp_compare_operand (*op1, mode))
29693 *op1 = force_reg (mode, *op1);
29694 if (!vfp_compare_operand (*op2, mode))
29695 *op2 = force_reg (mode, *op2);
29696 return true;
29697 default:
29698 break;
29699 }
29700
29701 return false;
29702
29703 }
29704
29705 /* Maximum number of instructions to set block of memory. */
29706 static int
29707 arm_block_set_max_insns (void)
29708 {
29709 if (optimize_function_for_size_p (cfun))
29710 return 4;
29711 else
29712 return current_tune->max_insns_inline_memset;
29713 }
29714
29715 /* Return TRUE if it's profitable to set block of memory for
29716 non-vectorized case. VAL is the value to set the memory
29717 with. LENGTH is the number of bytes to set. ALIGN is the
29718 alignment of the destination memory in bytes. UNALIGNED_P
29719 is TRUE if we can only set the memory with instructions
29720 meeting alignment requirements. USE_STRD_P is TRUE if we
29721 can use strd to set the memory. */
29722 static bool
29723 arm_block_set_non_vect_profit_p (rtx val,
29724 unsigned HOST_WIDE_INT length,
29725 unsigned HOST_WIDE_INT align,
29726 bool unaligned_p, bool use_strd_p)
29727 {
29728 int num = 0;
29729 /* For leftovers in bytes of 0-7, we can set the memory block using
29730 strb/strh/str with minimum instruction number. */
29731 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29732
29733 if (unaligned_p)
29734 {
29735 num = arm_const_inline_cost (SET, val);
29736 num += length / align + length % align;
29737 }
29738 else if (use_strd_p)
29739 {
29740 num = arm_const_double_inline_cost (val);
29741 num += (length >> 3) + leftover[length & 7];
29742 }
29743 else
29744 {
29745 num = arm_const_inline_cost (SET, val);
29746 num += (length >> 2) + leftover[length & 3];
29747 }
29748
29749 /* We may be able to combine last pair STRH/STRB into a single STR
29750 by shifting one byte back. */
29751 if (unaligned_access && length > 3 && (length & 3) == 3)
29752 num--;
29753
29754 return (num <= arm_block_set_max_insns ());
29755 }
29756
29757 /* Return TRUE if it's profitable to set block of memory for
29758 vectorized case. LENGTH is the number of bytes to set.
29759 ALIGN is the alignment of destination memory in bytes.
29760 MODE is the vector mode used to set the memory. */
29761 static bool
29762 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29763 unsigned HOST_WIDE_INT align,
29764 machine_mode mode)
29765 {
29766 int num;
29767 bool unaligned_p = ((align & 3) != 0);
29768 unsigned int nelt = GET_MODE_NUNITS (mode);
29769
29770 /* Instruction loading constant value. */
29771 num = 1;
29772 /* Instructions storing the memory. */
29773 num += (length + nelt - 1) / nelt;
29774 /* Instructions adjusting the address expression. Only need to
29775 adjust address expression if it's 4 bytes aligned and bytes
29776 leftover can only be stored by mis-aligned store instruction. */
29777 if (!unaligned_p && (length & 3) != 0)
29778 num++;
29779
29780 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29781 if (!unaligned_p && mode == V16QImode)
29782 num--;
29783
29784 return (num <= arm_block_set_max_insns ());
29785 }
29786
29787 /* Set a block of memory using vectorization instructions for the
29788 unaligned case. We fill the first LENGTH bytes of the memory
29789 area starting from DSTBASE with byte constant VALUE. ALIGN is
29790 the alignment requirement of memory. Return TRUE if succeeded. */
29791 static bool
29792 arm_block_set_unaligned_vect (rtx dstbase,
29793 unsigned HOST_WIDE_INT length,
29794 unsigned HOST_WIDE_INT value,
29795 unsigned HOST_WIDE_INT align)
29796 {
29797 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29798 rtx dst, mem;
29799 rtx val_elt, val_vec, reg;
29800 rtx rval[MAX_VECT_LEN];
29801 rtx (*gen_func) (rtx, rtx);
29802 machine_mode mode;
29803 unsigned HOST_WIDE_INT v = value;
29804 unsigned int offset = 0;
29805 gcc_assert ((align & 0x3) != 0);
29806 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29807 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29808 if (length >= nelt_v16)
29809 {
29810 mode = V16QImode;
29811 gen_func = gen_movmisalignv16qi;
29812 }
29813 else
29814 {
29815 mode = V8QImode;
29816 gen_func = gen_movmisalignv8qi;
29817 }
29818 nelt_mode = GET_MODE_NUNITS (mode);
29819 gcc_assert (length >= nelt_mode);
29820 /* Skip if it isn't profitable. */
29821 if (!arm_block_set_vect_profit_p (length, align, mode))
29822 return false;
29823
29824 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29825 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29826
29827 v = sext_hwi (v, BITS_PER_WORD);
29828 val_elt = GEN_INT (v);
29829 for (j = 0; j < nelt_mode; j++)
29830 rval[j] = val_elt;
29831
29832 reg = gen_reg_rtx (mode);
29833 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29834 /* Emit instruction loading the constant value. */
29835 emit_move_insn (reg, val_vec);
29836
29837 /* Handle nelt_mode bytes in a vector. */
29838 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29839 {
29840 emit_insn ((*gen_func) (mem, reg));
29841 if (i + 2 * nelt_mode <= length)
29842 {
29843 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29844 offset += nelt_mode;
29845 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29846 }
29847 }
29848
29849 /* If there are not less than nelt_v8 bytes leftover, we must be in
29850 V16QI mode. */
29851 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29852
29853 /* Handle (8, 16) bytes leftover. */
29854 if (i + nelt_v8 < length)
29855 {
29856 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29857 offset += length - i;
29858 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29859
29860 /* We are shifting bytes back, set the alignment accordingly. */
29861 if ((length & 1) != 0 && align >= 2)
29862 set_mem_align (mem, BITS_PER_UNIT);
29863
29864 emit_insn (gen_movmisalignv16qi (mem, reg));
29865 }
29866 /* Handle (0, 8] bytes leftover. */
29867 else if (i < length && i + nelt_v8 >= length)
29868 {
29869 if (mode == V16QImode)
29870 reg = gen_lowpart (V8QImode, reg);
29871
29872 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29873 + (nelt_mode - nelt_v8))));
29874 offset += (length - i) + (nelt_mode - nelt_v8);
29875 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29876
29877 /* We are shifting bytes back, set the alignment accordingly. */
29878 if ((length & 1) != 0 && align >= 2)
29879 set_mem_align (mem, BITS_PER_UNIT);
29880
29881 emit_insn (gen_movmisalignv8qi (mem, reg));
29882 }
29883
29884 return true;
29885 }
29886
29887 /* Set a block of memory using vectorization instructions for the
29888 aligned case. We fill the first LENGTH bytes of the memory area
29889 starting from DSTBASE with byte constant VALUE. ALIGN is the
29890 alignment requirement of memory. Return TRUE if succeeded. */
29891 static bool
29892 arm_block_set_aligned_vect (rtx dstbase,
29893 unsigned HOST_WIDE_INT length,
29894 unsigned HOST_WIDE_INT value,
29895 unsigned HOST_WIDE_INT align)
29896 {
29897 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29898 rtx dst, addr, mem;
29899 rtx val_elt, val_vec, reg;
29900 rtx rval[MAX_VECT_LEN];
29901 machine_mode mode;
29902 unsigned HOST_WIDE_INT v = value;
29903 unsigned int offset = 0;
29904
29905 gcc_assert ((align & 0x3) == 0);
29906 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29907 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29908 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29909 mode = V16QImode;
29910 else
29911 mode = V8QImode;
29912
29913 nelt_mode = GET_MODE_NUNITS (mode);
29914 gcc_assert (length >= nelt_mode);
29915 /* Skip if it isn't profitable. */
29916 if (!arm_block_set_vect_profit_p (length, align, mode))
29917 return false;
29918
29919 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29920
29921 v = sext_hwi (v, BITS_PER_WORD);
29922 val_elt = GEN_INT (v);
29923 for (j = 0; j < nelt_mode; j++)
29924 rval[j] = val_elt;
29925
29926 reg = gen_reg_rtx (mode);
29927 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29928 /* Emit instruction loading the constant value. */
29929 emit_move_insn (reg, val_vec);
29930
29931 i = 0;
29932 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29933 if (mode == V16QImode)
29934 {
29935 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29936 emit_insn (gen_movmisalignv16qi (mem, reg));
29937 i += nelt_mode;
29938 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29939 if (i + nelt_v8 < length && i + nelt_v16 > length)
29940 {
29941 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29942 offset += length - nelt_mode;
29943 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29944 /* We are shifting bytes back, set the alignment accordingly. */
29945 if ((length & 0x3) == 0)
29946 set_mem_align (mem, BITS_PER_UNIT * 4);
29947 else if ((length & 0x1) == 0)
29948 set_mem_align (mem, BITS_PER_UNIT * 2);
29949 else
29950 set_mem_align (mem, BITS_PER_UNIT);
29951
29952 emit_insn (gen_movmisalignv16qi (mem, reg));
29953 return true;
29954 }
29955 /* Fall through for bytes leftover. */
29956 mode = V8QImode;
29957 nelt_mode = GET_MODE_NUNITS (mode);
29958 reg = gen_lowpart (V8QImode, reg);
29959 }
29960
29961 /* Handle 8 bytes in a vector. */
29962 for (; (i + nelt_mode <= length); i += nelt_mode)
29963 {
29964 addr = plus_constant (Pmode, dst, i);
29965 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29966 emit_move_insn (mem, reg);
29967 }
29968
29969 /* Handle single word leftover by shifting 4 bytes back. We can
29970 use aligned access for this case. */
29971 if (i + UNITS_PER_WORD == length)
29972 {
29973 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29974 offset += i - UNITS_PER_WORD;
29975 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29976 /* We are shifting 4 bytes back, set the alignment accordingly. */
29977 if (align > UNITS_PER_WORD)
29978 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29979
29980 emit_move_insn (mem, reg);
29981 }
29982 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29983 We have to use unaligned access for this case. */
29984 else if (i < length)
29985 {
29986 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29987 offset += length - nelt_mode;
29988 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29989 /* We are shifting bytes back, set the alignment accordingly. */
29990 if ((length & 1) == 0)
29991 set_mem_align (mem, BITS_PER_UNIT * 2);
29992 else
29993 set_mem_align (mem, BITS_PER_UNIT);
29994
29995 emit_insn (gen_movmisalignv8qi (mem, reg));
29996 }
29997
29998 return true;
29999 }
30000
30001 /* Set a block of memory using plain strh/strb instructions, only
30002 using instructions allowed by ALIGN on processor. We fill the
30003 first LENGTH bytes of the memory area starting from DSTBASE
30004 with byte constant VALUE. ALIGN is the alignment requirement
30005 of memory. */
30006 static bool
30007 arm_block_set_unaligned_non_vect (rtx dstbase,
30008 unsigned HOST_WIDE_INT length,
30009 unsigned HOST_WIDE_INT value,
30010 unsigned HOST_WIDE_INT align)
30011 {
30012 unsigned int i;
30013 rtx dst, addr, mem;
30014 rtx val_exp, val_reg, reg;
30015 machine_mode mode;
30016 HOST_WIDE_INT v = value;
30017
30018 gcc_assert (align == 1 || align == 2);
30019
30020 if (align == 2)
30021 v |= (value << BITS_PER_UNIT);
30022
30023 v = sext_hwi (v, BITS_PER_WORD);
30024 val_exp = GEN_INT (v);
30025 /* Skip if it isn't profitable. */
30026 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30027 align, true, false))
30028 return false;
30029
30030 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30031 mode = (align == 2 ? HImode : QImode);
30032 val_reg = force_reg (SImode, val_exp);
30033 reg = gen_lowpart (mode, val_reg);
30034
30035 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30036 {
30037 addr = plus_constant (Pmode, dst, i);
30038 mem = adjust_automodify_address (dstbase, mode, addr, i);
30039 emit_move_insn (mem, reg);
30040 }
30041
30042 /* Handle single byte leftover. */
30043 if (i + 1 == length)
30044 {
30045 reg = gen_lowpart (QImode, val_reg);
30046 addr = plus_constant (Pmode, dst, i);
30047 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30048 emit_move_insn (mem, reg);
30049 i++;
30050 }
30051
30052 gcc_assert (i == length);
30053 return true;
30054 }
30055
30056 /* Set a block of memory using plain strd/str/strh/strb instructions,
30057 to permit unaligned copies on processors which support unaligned
30058 semantics for those instructions. We fill the first LENGTH bytes
30059 of the memory area starting from DSTBASE with byte constant VALUE.
30060 ALIGN is the alignment requirement of memory. */
30061 static bool
30062 arm_block_set_aligned_non_vect (rtx dstbase,
30063 unsigned HOST_WIDE_INT length,
30064 unsigned HOST_WIDE_INT value,
30065 unsigned HOST_WIDE_INT align)
30066 {
30067 unsigned int i;
30068 rtx dst, addr, mem;
30069 rtx val_exp, val_reg, reg;
30070 unsigned HOST_WIDE_INT v;
30071 bool use_strd_p;
30072
30073 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30074 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30075
30076 v = (value | (value << 8) | (value << 16) | (value << 24));
30077 if (length < UNITS_PER_WORD)
30078 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30079
30080 if (use_strd_p)
30081 v |= (v << BITS_PER_WORD);
30082 else
30083 v = sext_hwi (v, BITS_PER_WORD);
30084
30085 val_exp = GEN_INT (v);
30086 /* Skip if it isn't profitable. */
30087 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30088 align, false, use_strd_p))
30089 {
30090 if (!use_strd_p)
30091 return false;
30092
30093 /* Try without strd. */
30094 v = (v >> BITS_PER_WORD);
30095 v = sext_hwi (v, BITS_PER_WORD);
30096 val_exp = GEN_INT (v);
30097 use_strd_p = false;
30098 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30099 align, false, use_strd_p))
30100 return false;
30101 }
30102
30103 i = 0;
30104 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30105 /* Handle double words using strd if possible. */
30106 if (use_strd_p)
30107 {
30108 val_reg = force_reg (DImode, val_exp);
30109 reg = val_reg;
30110 for (; (i + 8 <= length); i += 8)
30111 {
30112 addr = plus_constant (Pmode, dst, i);
30113 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30114 emit_move_insn (mem, reg);
30115 }
30116 }
30117 else
30118 val_reg = force_reg (SImode, val_exp);
30119
30120 /* Handle words. */
30121 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30122 for (; (i + 4 <= length); i += 4)
30123 {
30124 addr = plus_constant (Pmode, dst, i);
30125 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30126 if ((align & 3) == 0)
30127 emit_move_insn (mem, reg);
30128 else
30129 emit_insn (gen_unaligned_storesi (mem, reg));
30130 }
30131
30132 /* Merge last pair of STRH and STRB into a STR if possible. */
30133 if (unaligned_access && i > 0 && (i + 3) == length)
30134 {
30135 addr = plus_constant (Pmode, dst, i - 1);
30136 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30137 /* We are shifting one byte back, set the alignment accordingly. */
30138 if ((align & 1) == 0)
30139 set_mem_align (mem, BITS_PER_UNIT);
30140
30141 /* Most likely this is an unaligned access, and we can't tell at
30142 compilation time. */
30143 emit_insn (gen_unaligned_storesi (mem, reg));
30144 return true;
30145 }
30146
30147 /* Handle half word leftover. */
30148 if (i + 2 <= length)
30149 {
30150 reg = gen_lowpart (HImode, val_reg);
30151 addr = plus_constant (Pmode, dst, i);
30152 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30153 if ((align & 1) == 0)
30154 emit_move_insn (mem, reg);
30155 else
30156 emit_insn (gen_unaligned_storehi (mem, reg));
30157
30158 i += 2;
30159 }
30160
30161 /* Handle single byte leftover. */
30162 if (i + 1 == length)
30163 {
30164 reg = gen_lowpart (QImode, val_reg);
30165 addr = plus_constant (Pmode, dst, i);
30166 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30167 emit_move_insn (mem, reg);
30168 }
30169
30170 return true;
30171 }
30172
30173 /* Set a block of memory using vectorization instructions for both
30174 aligned and unaligned cases. We fill the first LENGTH bytes of
30175 the memory area starting from DSTBASE with byte constant VALUE.
30176 ALIGN is the alignment requirement of memory. */
30177 static bool
30178 arm_block_set_vect (rtx dstbase,
30179 unsigned HOST_WIDE_INT length,
30180 unsigned HOST_WIDE_INT value,
30181 unsigned HOST_WIDE_INT align)
30182 {
30183 /* Check whether we need to use unaligned store instruction. */
30184 if (((align & 3) != 0 || (length & 3) != 0)
30185 /* Check whether unaligned store instruction is available. */
30186 && (!unaligned_access || BYTES_BIG_ENDIAN))
30187 return false;
30188
30189 if ((align & 3) == 0)
30190 return arm_block_set_aligned_vect (dstbase, length, value, align);
30191 else
30192 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30193 }
30194
30195 /* Expand string store operation. Firstly we try to do that by using
30196 vectorization instructions, then try with ARM unaligned access and
30197 double-word store if profitable. OPERANDS[0] is the destination,
30198 OPERANDS[1] is the number of bytes, operands[2] is the value to
30199 initialize the memory, OPERANDS[3] is the known alignment of the
30200 destination. */
30201 bool
30202 arm_gen_setmem (rtx *operands)
30203 {
30204 rtx dstbase = operands[0];
30205 unsigned HOST_WIDE_INT length;
30206 unsigned HOST_WIDE_INT value;
30207 unsigned HOST_WIDE_INT align;
30208
30209 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30210 return false;
30211
30212 length = UINTVAL (operands[1]);
30213 if (length > 64)
30214 return false;
30215
30216 value = (UINTVAL (operands[2]) & 0xFF);
30217 align = UINTVAL (operands[3]);
30218 if (TARGET_NEON && length >= 8
30219 && current_tune->string_ops_prefer_neon
30220 && arm_block_set_vect (dstbase, length, value, align))
30221 return true;
30222
30223 if (!unaligned_access && (align & 3) != 0)
30224 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30225
30226 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30227 }
30228
30229
30230 static bool
30231 arm_macro_fusion_p (void)
30232 {
30233 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30234 }
30235
30236 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30237 for MOVW / MOVT macro fusion. */
30238
30239 static bool
30240 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30241 {
30242 /* We are trying to fuse
30243 movw imm / movt imm
30244 instructions as a group that gets scheduled together. */
30245
30246 rtx set_dest = SET_DEST (curr_set);
30247
30248 if (GET_MODE (set_dest) != SImode)
30249 return false;
30250
30251 /* We are trying to match:
30252 prev (movw) == (set (reg r0) (const_int imm16))
30253 curr (movt) == (set (zero_extract (reg r0)
30254 (const_int 16)
30255 (const_int 16))
30256 (const_int imm16_1))
30257 or
30258 prev (movw) == (set (reg r1)
30259 (high (symbol_ref ("SYM"))))
30260 curr (movt) == (set (reg r0)
30261 (lo_sum (reg r1)
30262 (symbol_ref ("SYM")))) */
30263
30264 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30265 {
30266 if (CONST_INT_P (SET_SRC (curr_set))
30267 && CONST_INT_P (SET_SRC (prev_set))
30268 && REG_P (XEXP (set_dest, 0))
30269 && REG_P (SET_DEST (prev_set))
30270 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30271 return true;
30272
30273 }
30274 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30275 && REG_P (SET_DEST (curr_set))
30276 && REG_P (SET_DEST (prev_set))
30277 && GET_CODE (SET_SRC (prev_set)) == HIGH
30278 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30279 return true;
30280
30281 return false;
30282 }
30283
30284 static bool
30285 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30286 {
30287 rtx prev_set = single_set (prev);
30288 rtx curr_set = single_set (curr);
30289
30290 if (!prev_set
30291 || !curr_set)
30292 return false;
30293
30294 if (any_condjump_p (curr))
30295 return false;
30296
30297 if (!arm_macro_fusion_p ())
30298 return false;
30299
30300 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30301 && aarch_crypto_can_dual_issue (prev, curr))
30302 return true;
30303
30304 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30305 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30306 return true;
30307
30308 return false;
30309 }
30310
30311 /* Return true iff the instruction fusion described by OP is enabled. */
30312 bool
30313 arm_fusion_enabled_p (tune_params::fuse_ops op)
30314 {
30315 return current_tune->fusible_ops & op;
30316 }
30317
30318 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30319 scheduled for speculative execution. Reject the long-running division
30320 and square-root instructions. */
30321
30322 static bool
30323 arm_sched_can_speculate_insn (rtx_insn *insn)
30324 {
30325 switch (get_attr_type (insn))
30326 {
30327 case TYPE_SDIV:
30328 case TYPE_UDIV:
30329 case TYPE_FDIVS:
30330 case TYPE_FDIVD:
30331 case TYPE_FSQRTS:
30332 case TYPE_FSQRTD:
30333 case TYPE_NEON_FP_SQRT_S:
30334 case TYPE_NEON_FP_SQRT_D:
30335 case TYPE_NEON_FP_SQRT_S_Q:
30336 case TYPE_NEON_FP_SQRT_D_Q:
30337 case TYPE_NEON_FP_DIV_S:
30338 case TYPE_NEON_FP_DIV_D:
30339 case TYPE_NEON_FP_DIV_S_Q:
30340 case TYPE_NEON_FP_DIV_D_Q:
30341 return false;
30342 default:
30343 return true;
30344 }
30345 }
30346
30347 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30348
30349 static unsigned HOST_WIDE_INT
30350 arm_asan_shadow_offset (void)
30351 {
30352 return HOST_WIDE_INT_1U << 29;
30353 }
30354
30355
30356 /* This is a temporary fix for PR60655. Ideally we need
30357 to handle most of these cases in the generic part but
30358 currently we reject minus (..) (sym_ref). We try to
30359 ameliorate the case with minus (sym_ref1) (sym_ref2)
30360 where they are in the same section. */
30361
30362 static bool
30363 arm_const_not_ok_for_debug_p (rtx p)
30364 {
30365 tree decl_op0 = NULL;
30366 tree decl_op1 = NULL;
30367
30368 if (GET_CODE (p) == MINUS)
30369 {
30370 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30371 {
30372 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30373 if (decl_op1
30374 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30375 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30376 {
30377 if ((VAR_P (decl_op1)
30378 || TREE_CODE (decl_op1) == CONST_DECL)
30379 && (VAR_P (decl_op0)
30380 || TREE_CODE (decl_op0) == CONST_DECL))
30381 return (get_variable_section (decl_op1, false)
30382 != get_variable_section (decl_op0, false));
30383
30384 if (TREE_CODE (decl_op1) == LABEL_DECL
30385 && TREE_CODE (decl_op0) == LABEL_DECL)
30386 return (DECL_CONTEXT (decl_op1)
30387 != DECL_CONTEXT (decl_op0));
30388 }
30389
30390 return true;
30391 }
30392 }
30393
30394 return false;
30395 }
30396
30397 /* return TRUE if x is a reference to a value in a constant pool */
30398 extern bool
30399 arm_is_constant_pool_ref (rtx x)
30400 {
30401 return (MEM_P (x)
30402 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30403 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30404 }
30405
30406 /* Remember the last target of arm_set_current_function. */
30407 static GTY(()) tree arm_previous_fndecl;
30408
30409 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30410
30411 void
30412 save_restore_target_globals (tree new_tree)
30413 {
30414 /* If we have a previous state, use it. */
30415 if (TREE_TARGET_GLOBALS (new_tree))
30416 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30417 else if (new_tree == target_option_default_node)
30418 restore_target_globals (&default_target_globals);
30419 else
30420 {
30421 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30422 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30423 }
30424
30425 arm_option_params_internal ();
30426 }
30427
30428 /* Invalidate arm_previous_fndecl. */
30429
30430 void
30431 arm_reset_previous_fndecl (void)
30432 {
30433 arm_previous_fndecl = NULL_TREE;
30434 }
30435
30436 /* Establish appropriate back-end context for processing the function
30437 FNDECL. The argument might be NULL to indicate processing at top
30438 level, outside of any function scope. */
30439
30440 static void
30441 arm_set_current_function (tree fndecl)
30442 {
30443 if (!fndecl || fndecl == arm_previous_fndecl)
30444 return;
30445
30446 tree old_tree = (arm_previous_fndecl
30447 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30448 : NULL_TREE);
30449
30450 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30451
30452 /* If current function has no attributes but previous one did,
30453 use the default node. */
30454 if (! new_tree && old_tree)
30455 new_tree = target_option_default_node;
30456
30457 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30458 the default have been handled by save_restore_target_globals from
30459 arm_pragma_target_parse. */
30460 if (old_tree == new_tree)
30461 return;
30462
30463 arm_previous_fndecl = fndecl;
30464
30465 /* First set the target options. */
30466 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30467
30468 save_restore_target_globals (new_tree);
30469 }
30470
30471 /* Implement TARGET_OPTION_PRINT. */
30472
30473 static void
30474 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30475 {
30476 int flags = ptr->x_target_flags;
30477 const char *fpu_name;
30478
30479 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30480 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30481
30482 fprintf (file, "%*sselected isa %s\n", indent, "",
30483 TARGET_THUMB2_P (flags) ? "thumb2" :
30484 TARGET_THUMB_P (flags) ? "thumb1" :
30485 "arm");
30486
30487 if (ptr->x_arm_arch_string)
30488 fprintf (file, "%*sselected architecture %s\n", indent, "",
30489 ptr->x_arm_arch_string);
30490
30491 if (ptr->x_arm_cpu_string)
30492 fprintf (file, "%*sselected CPU %s\n", indent, "",
30493 ptr->x_arm_cpu_string);
30494
30495 if (ptr->x_arm_tune_string)
30496 fprintf (file, "%*sselected tune %s\n", indent, "",
30497 ptr->x_arm_tune_string);
30498
30499 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30500 }
30501
30502 /* Hook to determine if one function can safely inline another. */
30503
30504 static bool
30505 arm_can_inline_p (tree caller, tree callee)
30506 {
30507 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30508 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30509 bool can_inline = true;
30510
30511 struct cl_target_option *caller_opts
30512 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30513 : target_option_default_node);
30514
30515 struct cl_target_option *callee_opts
30516 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30517 : target_option_default_node);
30518
30519 if (callee_opts == caller_opts)
30520 return true;
30521
30522 /* Callee's ISA features should be a subset of the caller's. */
30523 struct arm_build_target caller_target;
30524 struct arm_build_target callee_target;
30525 caller_target.isa = sbitmap_alloc (isa_num_bits);
30526 callee_target.isa = sbitmap_alloc (isa_num_bits);
30527
30528 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30529 false);
30530 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30531 false);
30532 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30533 can_inline = false;
30534
30535 sbitmap_free (caller_target.isa);
30536 sbitmap_free (callee_target.isa);
30537
30538 /* OK to inline between different modes.
30539 Function with mode specific instructions, e.g using asm,
30540 must be explicitly protected with noinline. */
30541 return can_inline;
30542 }
30543
30544 /* Hook to fix function's alignment affected by target attribute. */
30545
30546 static void
30547 arm_relayout_function (tree fndecl)
30548 {
30549 if (DECL_USER_ALIGN (fndecl))
30550 return;
30551
30552 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30553
30554 if (!callee_tree)
30555 callee_tree = target_option_default_node;
30556
30557 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30558 SET_DECL_ALIGN
30559 (fndecl,
30560 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30561 }
30562
30563 /* Inner function to process the attribute((target(...))), take an argument and
30564 set the current options from the argument. If we have a list, recursively
30565 go over the list. */
30566
30567 static bool
30568 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30569 {
30570 if (TREE_CODE (args) == TREE_LIST)
30571 {
30572 bool ret = true;
30573
30574 for (; args; args = TREE_CHAIN (args))
30575 if (TREE_VALUE (args)
30576 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30577 ret = false;
30578 return ret;
30579 }
30580
30581 else if (TREE_CODE (args) != STRING_CST)
30582 {
30583 error ("attribute %<target%> argument not a string");
30584 return false;
30585 }
30586
30587 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30588 char *q;
30589
30590 while ((q = strtok (argstr, ",")) != NULL)
30591 {
30592 while (ISSPACE (*q)) ++q;
30593
30594 argstr = NULL;
30595 if (!strncmp (q, "thumb", 5))
30596 opts->x_target_flags |= MASK_THUMB;
30597
30598 else if (!strncmp (q, "arm", 3))
30599 opts->x_target_flags &= ~MASK_THUMB;
30600
30601 else if (!strncmp (q, "fpu=", 4))
30602 {
30603 int fpu_index;
30604 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30605 &fpu_index, CL_TARGET))
30606 {
30607 error ("invalid fpu for attribute(target(\"%s\"))", q);
30608 return false;
30609 }
30610 if (fpu_index == TARGET_FPU_auto)
30611 {
30612 /* This doesn't really make sense until we support
30613 general dynamic selection of the architecture and all
30614 sub-features. */
30615 sorry ("auto fpu selection not currently permitted here");
30616 return false;
30617 }
30618 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30619 }
30620 else
30621 {
30622 error ("attribute(target(\"%s\")) is unknown", q);
30623 return false;
30624 }
30625 }
30626
30627 return true;
30628 }
30629
30630 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30631
30632 tree
30633 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30634 struct gcc_options *opts_set)
30635 {
30636 struct cl_target_option cl_opts;
30637
30638 if (!arm_valid_target_attribute_rec (args, opts))
30639 return NULL_TREE;
30640
30641 cl_target_option_save (&cl_opts, opts);
30642 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30643 arm_option_check_internal (opts);
30644 /* Do any overrides, such as global options arch=xxx. */
30645 arm_option_override_internal (opts, opts_set);
30646
30647 return build_target_option_node (opts);
30648 }
30649
30650 static void
30651 add_attribute (const char * mode, tree *attributes)
30652 {
30653 size_t len = strlen (mode);
30654 tree value = build_string (len, mode);
30655
30656 TREE_TYPE (value) = build_array_type (char_type_node,
30657 build_index_type (size_int (len)));
30658
30659 *attributes = tree_cons (get_identifier ("target"),
30660 build_tree_list (NULL_TREE, value),
30661 *attributes);
30662 }
30663
30664 /* For testing. Insert thumb or arm modes alternatively on functions. */
30665
30666 static void
30667 arm_insert_attributes (tree fndecl, tree * attributes)
30668 {
30669 const char *mode;
30670
30671 if (! TARGET_FLIP_THUMB)
30672 return;
30673
30674 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30675 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30676 return;
30677
30678 /* Nested definitions must inherit mode. */
30679 if (current_function_decl)
30680 {
30681 mode = TARGET_THUMB ? "thumb" : "arm";
30682 add_attribute (mode, attributes);
30683 return;
30684 }
30685
30686 /* If there is already a setting don't change it. */
30687 if (lookup_attribute ("target", *attributes) != NULL)
30688 return;
30689
30690 mode = thumb_flipper ? "thumb" : "arm";
30691 add_attribute (mode, attributes);
30692
30693 thumb_flipper = !thumb_flipper;
30694 }
30695
30696 /* Hook to validate attribute((target("string"))). */
30697
30698 static bool
30699 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30700 tree args, int ARG_UNUSED (flags))
30701 {
30702 bool ret = true;
30703 struct gcc_options func_options;
30704 tree cur_tree, new_optimize;
30705 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30706
30707 /* Get the optimization options of the current function. */
30708 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30709
30710 /* If the function changed the optimization levels as well as setting target
30711 options, start with the optimizations specified. */
30712 if (!func_optimize)
30713 func_optimize = optimization_default_node;
30714
30715 /* Init func_options. */
30716 memset (&func_options, 0, sizeof (func_options));
30717 init_options_struct (&func_options, NULL);
30718 lang_hooks.init_options_struct (&func_options);
30719
30720 /* Initialize func_options to the defaults. */
30721 cl_optimization_restore (&func_options,
30722 TREE_OPTIMIZATION (func_optimize));
30723
30724 cl_target_option_restore (&func_options,
30725 TREE_TARGET_OPTION (target_option_default_node));
30726
30727 /* Set func_options flags with new target mode. */
30728 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30729 &global_options_set);
30730
30731 if (cur_tree == NULL_TREE)
30732 ret = false;
30733
30734 new_optimize = build_optimization_node (&func_options);
30735
30736 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30737
30738 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30739
30740 finalize_options_struct (&func_options);
30741
30742 return ret;
30743 }
30744
30745 /* Match an ISA feature bitmap to a named FPU. We always use the
30746 first entry that exactly matches the feature set, so that we
30747 effectively canonicalize the FPU name for the assembler. */
30748 static const char*
30749 arm_identify_fpu_from_isa (sbitmap isa)
30750 {
30751 auto_sbitmap fpubits (isa_num_bits);
30752 auto_sbitmap cand_fpubits (isa_num_bits);
30753
30754 bitmap_and (fpubits, isa, isa_all_fpubits);
30755
30756 /* If there are no ISA feature bits relating to the FPU, we must be
30757 doing soft-float. */
30758 if (bitmap_empty_p (fpubits))
30759 return "softvfp";
30760
30761 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30762 {
30763 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30764 if (bitmap_equal_p (fpubits, cand_fpubits))
30765 return all_fpus[i].name;
30766 }
30767 /* We must find an entry, or things have gone wrong. */
30768 gcc_unreachable ();
30769 }
30770
30771 void
30772 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30773 {
30774
30775 fprintf (stream, "\t.syntax unified\n");
30776
30777 if (TARGET_THUMB)
30778 {
30779 if (is_called_in_ARM_mode (decl)
30780 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30781 && cfun->is_thunk))
30782 fprintf (stream, "\t.code 32\n");
30783 else if (TARGET_THUMB1)
30784 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30785 else
30786 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30787 }
30788 else
30789 fprintf (stream, "\t.arm\n");
30790
30791 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30792 (TARGET_SOFT_FLOAT
30793 ? "softvfp"
30794 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30795
30796 if (TARGET_POKE_FUNCTION_NAME)
30797 arm_poke_function_name (stream, (const char *) name);
30798 }
30799
30800 /* If MEM is in the form of [base+offset], extract the two parts
30801 of address and set to BASE and OFFSET, otherwise return false
30802 after clearing BASE and OFFSET. */
30803
30804 static bool
30805 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30806 {
30807 rtx addr;
30808
30809 gcc_assert (MEM_P (mem));
30810
30811 addr = XEXP (mem, 0);
30812
30813 /* Strip off const from addresses like (const (addr)). */
30814 if (GET_CODE (addr) == CONST)
30815 addr = XEXP (addr, 0);
30816
30817 if (GET_CODE (addr) == REG)
30818 {
30819 *base = addr;
30820 *offset = const0_rtx;
30821 return true;
30822 }
30823
30824 if (GET_CODE (addr) == PLUS
30825 && GET_CODE (XEXP (addr, 0)) == REG
30826 && CONST_INT_P (XEXP (addr, 1)))
30827 {
30828 *base = XEXP (addr, 0);
30829 *offset = XEXP (addr, 1);
30830 return true;
30831 }
30832
30833 *base = NULL_RTX;
30834 *offset = NULL_RTX;
30835
30836 return false;
30837 }
30838
30839 /* If INSN is a load or store of address in the form of [base+offset],
30840 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30841 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30842 otherwise return FALSE. */
30843
30844 static bool
30845 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30846 {
30847 rtx x, dest, src;
30848
30849 gcc_assert (INSN_P (insn));
30850 x = PATTERN (insn);
30851 if (GET_CODE (x) != SET)
30852 return false;
30853
30854 src = SET_SRC (x);
30855 dest = SET_DEST (x);
30856 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30857 {
30858 *is_load = false;
30859 extract_base_offset_in_addr (dest, base, offset);
30860 }
30861 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30862 {
30863 *is_load = true;
30864 extract_base_offset_in_addr (src, base, offset);
30865 }
30866 else
30867 return false;
30868
30869 return (*base != NULL_RTX && *offset != NULL_RTX);
30870 }
30871
30872 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30873
30874 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30875 and PRI are only calculated for these instructions. For other instruction,
30876 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30877 instruction fusion can be supported by returning different priorities.
30878
30879 It's important that irrelevant instructions get the largest FUSION_PRI. */
30880
30881 static void
30882 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30883 int *fusion_pri, int *pri)
30884 {
30885 int tmp, off_val;
30886 bool is_load;
30887 rtx base, offset;
30888
30889 gcc_assert (INSN_P (insn));
30890
30891 tmp = max_pri - 1;
30892 if (!fusion_load_store (insn, &base, &offset, &is_load))
30893 {
30894 *pri = tmp;
30895 *fusion_pri = tmp;
30896 return;
30897 }
30898
30899 /* Load goes first. */
30900 if (is_load)
30901 *fusion_pri = tmp - 1;
30902 else
30903 *fusion_pri = tmp - 2;
30904
30905 tmp /= 2;
30906
30907 /* INSN with smaller base register goes first. */
30908 tmp -= ((REGNO (base) & 0xff) << 20);
30909
30910 /* INSN with smaller offset goes first. */
30911 off_val = (int)(INTVAL (offset));
30912 if (off_val >= 0)
30913 tmp -= (off_val & 0xfffff);
30914 else
30915 tmp += ((- off_val) & 0xfffff);
30916
30917 *pri = tmp;
30918 return;
30919 }
30920
30921
30922 /* Construct and return a PARALLEL RTX vector with elements numbering the
30923 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30924 the vector - from the perspective of the architecture. This does not
30925 line up with GCC's perspective on lane numbers, so we end up with
30926 different masks depending on our target endian-ness. The diagram
30927 below may help. We must draw the distinction when building masks
30928 which select one half of the vector. An instruction selecting
30929 architectural low-lanes for a big-endian target, must be described using
30930 a mask selecting GCC high-lanes.
30931
30932 Big-Endian Little-Endian
30933
30934 GCC 0 1 2 3 3 2 1 0
30935 | x | x | x | x | | x | x | x | x |
30936 Architecture 3 2 1 0 3 2 1 0
30937
30938 Low Mask: { 2, 3 } { 0, 1 }
30939 High Mask: { 0, 1 } { 2, 3 }
30940 */
30941
30942 rtx
30943 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30944 {
30945 int nunits = GET_MODE_NUNITS (mode);
30946 rtvec v = rtvec_alloc (nunits / 2);
30947 int high_base = nunits / 2;
30948 int low_base = 0;
30949 int base;
30950 rtx t1;
30951 int i;
30952
30953 if (BYTES_BIG_ENDIAN)
30954 base = high ? low_base : high_base;
30955 else
30956 base = high ? high_base : low_base;
30957
30958 for (i = 0; i < nunits / 2; i++)
30959 RTVEC_ELT (v, i) = GEN_INT (base + i);
30960
30961 t1 = gen_rtx_PARALLEL (mode, v);
30962 return t1;
30963 }
30964
30965 /* Check OP for validity as a PARALLEL RTX vector with elements
30966 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30967 from the perspective of the architecture. See the diagram above
30968 arm_simd_vect_par_cnst_half_p for more details. */
30969
30970 bool
30971 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30972 bool high)
30973 {
30974 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30975 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30976 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30977 int i = 0;
30978
30979 if (!VECTOR_MODE_P (mode))
30980 return false;
30981
30982 if (count_op != count_ideal)
30983 return false;
30984
30985 for (i = 0; i < count_ideal; i++)
30986 {
30987 rtx elt_op = XVECEXP (op, 0, i);
30988 rtx elt_ideal = XVECEXP (ideal, 0, i);
30989
30990 if (!CONST_INT_P (elt_op)
30991 || INTVAL (elt_ideal) != INTVAL (elt_op))
30992 return false;
30993 }
30994 return true;
30995 }
30996
30997 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30998 in Thumb1. */
30999 static bool
31000 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31001 const_tree)
31002 {
31003 /* For now, we punt and not handle this for TARGET_THUMB1. */
31004 if (vcall_offset && TARGET_THUMB1)
31005 return false;
31006
31007 /* Otherwise ok. */
31008 return true;
31009 }
31010
31011 /* Generate RTL for a conditional branch with rtx comparison CODE in
31012 mode CC_MODE. The destination of the unlikely conditional branch
31013 is LABEL_REF. */
31014
31015 void
31016 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31017 rtx label_ref)
31018 {
31019 rtx x;
31020 x = gen_rtx_fmt_ee (code, VOIDmode,
31021 gen_rtx_REG (cc_mode, CC_REGNUM),
31022 const0_rtx);
31023
31024 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31025 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31026 pc_rtx);
31027 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31028 }
31029
31030 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31031
31032 For pure-code sections there is no letter code for this attribute, so
31033 output all the section flags numerically when this is needed. */
31034
31035 static bool
31036 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31037 {
31038
31039 if (flags & SECTION_ARM_PURECODE)
31040 {
31041 *num = 0x20000000;
31042
31043 if (!(flags & SECTION_DEBUG))
31044 *num |= 0x2;
31045 if (flags & SECTION_EXCLUDE)
31046 *num |= 0x80000000;
31047 if (flags & SECTION_WRITE)
31048 *num |= 0x1;
31049 if (flags & SECTION_CODE)
31050 *num |= 0x4;
31051 if (flags & SECTION_MERGE)
31052 *num |= 0x10;
31053 if (flags & SECTION_STRINGS)
31054 *num |= 0x20;
31055 if (flags & SECTION_TLS)
31056 *num |= 0x400;
31057 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31058 *num |= 0x200;
31059
31060 return true;
31061 }
31062
31063 return false;
31064 }
31065
31066 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31067
31068 If pure-code is passed as an option, make sure all functions are in
31069 sections that have the SHF_ARM_PURECODE attribute. */
31070
31071 static section *
31072 arm_function_section (tree decl, enum node_frequency freq,
31073 bool startup, bool exit)
31074 {
31075 const char * section_name;
31076 section * sec;
31077
31078 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31079 return default_function_section (decl, freq, startup, exit);
31080
31081 if (!target_pure_code)
31082 return default_function_section (decl, freq, startup, exit);
31083
31084
31085 section_name = DECL_SECTION_NAME (decl);
31086
31087 /* If a function is not in a named section then it falls under the 'default'
31088 text section, also known as '.text'. We can preserve previous behavior as
31089 the default text section already has the SHF_ARM_PURECODE section
31090 attribute. */
31091 if (!section_name)
31092 {
31093 section *default_sec = default_function_section (decl, freq, startup,
31094 exit);
31095
31096 /* If default_sec is not null, then it must be a special section like for
31097 example .text.startup. We set the pure-code attribute and return the
31098 same section to preserve existing behavior. */
31099 if (default_sec)
31100 default_sec->common.flags |= SECTION_ARM_PURECODE;
31101 return default_sec;
31102 }
31103
31104 /* Otherwise look whether a section has already been created with
31105 'section_name'. */
31106 sec = get_named_section (decl, section_name, 0);
31107 if (!sec)
31108 /* If that is not the case passing NULL as the section's name to
31109 'get_named_section' will create a section with the declaration's
31110 section name. */
31111 sec = get_named_section (decl, NULL, 0);
31112
31113 /* Set the SHF_ARM_PURECODE attribute. */
31114 sec->common.flags |= SECTION_ARM_PURECODE;
31115
31116 return sec;
31117 }
31118
31119 /* Implements the TARGET_SECTION_FLAGS hook.
31120
31121 If DECL is a function declaration and pure-code is passed as an option
31122 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31123 section's name and RELOC indicates whether the declarations initializer may
31124 contain runtime relocations. */
31125
31126 static unsigned int
31127 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31128 {
31129 unsigned int flags = default_section_type_flags (decl, name, reloc);
31130
31131 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31132 flags |= SECTION_ARM_PURECODE;
31133
31134 return flags;
31135 }
31136
31137 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31138
31139 static void
31140 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31141 rtx op0, rtx op1,
31142 rtx *quot_p, rtx *rem_p)
31143 {
31144 if (mode == SImode)
31145 gcc_assert (!TARGET_IDIV);
31146
31147 scalar_int_mode libval_mode
31148 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31149
31150 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31151 libval_mode,
31152 op0, GET_MODE (op0),
31153 op1, GET_MODE (op1));
31154
31155 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31156 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31157 GET_MODE_SIZE (mode));
31158
31159 gcc_assert (quotient);
31160 gcc_assert (remainder);
31161
31162 *quot_p = quotient;
31163 *rem_p = remainder;
31164 }
31165
31166 /* This function checks for the availability of the coprocessor builtin passed
31167 in BUILTIN for the current target. Returns true if it is available and
31168 false otherwise. If a BUILTIN is passed for which this function has not
31169 been implemented it will cause an exception. */
31170
31171 bool
31172 arm_coproc_builtin_available (enum unspecv builtin)
31173 {
31174 /* None of these builtins are available in Thumb mode if the target only
31175 supports Thumb-1. */
31176 if (TARGET_THUMB1)
31177 return false;
31178
31179 switch (builtin)
31180 {
31181 case VUNSPEC_CDP:
31182 case VUNSPEC_LDC:
31183 case VUNSPEC_LDCL:
31184 case VUNSPEC_STC:
31185 case VUNSPEC_STCL:
31186 case VUNSPEC_MCR:
31187 case VUNSPEC_MRC:
31188 if (arm_arch4)
31189 return true;
31190 break;
31191 case VUNSPEC_CDP2:
31192 case VUNSPEC_LDC2:
31193 case VUNSPEC_LDC2L:
31194 case VUNSPEC_STC2:
31195 case VUNSPEC_STC2L:
31196 case VUNSPEC_MCR2:
31197 case VUNSPEC_MRC2:
31198 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31199 ARMv8-{A,M}. */
31200 if (arm_arch5)
31201 return true;
31202 break;
31203 case VUNSPEC_MCRR:
31204 case VUNSPEC_MRRC:
31205 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31206 ARMv8-{A,M}. */
31207 if (arm_arch6 || arm_arch5te)
31208 return true;
31209 break;
31210 case VUNSPEC_MCRR2:
31211 case VUNSPEC_MRRC2:
31212 if (arm_arch6)
31213 return true;
31214 break;
31215 default:
31216 gcc_unreachable ();
31217 }
31218 return false;
31219 }
31220
31221 /* This function returns true if OP is a valid memory operand for the ldc and
31222 stc coprocessor instructions and false otherwise. */
31223
31224 bool
31225 arm_coproc_ldc_stc_legitimate_address (rtx op)
31226 {
31227 HOST_WIDE_INT range;
31228 /* Has to be a memory operand. */
31229 if (!MEM_P (op))
31230 return false;
31231
31232 op = XEXP (op, 0);
31233
31234 /* We accept registers. */
31235 if (REG_P (op))
31236 return true;
31237
31238 switch GET_CODE (op)
31239 {
31240 case PLUS:
31241 {
31242 /* Or registers with an offset. */
31243 if (!REG_P (XEXP (op, 0)))
31244 return false;
31245
31246 op = XEXP (op, 1);
31247
31248 /* The offset must be an immediate though. */
31249 if (!CONST_INT_P (op))
31250 return false;
31251
31252 range = INTVAL (op);
31253
31254 /* Within the range of [-1020,1020]. */
31255 if (!IN_RANGE (range, -1020, 1020))
31256 return false;
31257
31258 /* And a multiple of 4. */
31259 return (range % 4) == 0;
31260 }
31261 case PRE_INC:
31262 case POST_INC:
31263 case PRE_DEC:
31264 case POST_DEC:
31265 return REG_P (XEXP (op, 0));
31266 default:
31267 gcc_unreachable ();
31268 }
31269 return false;
31270 }
31271
31272 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31273
31274 In VFPv1, VFP registers could only be accessed in the mode they were
31275 set, so subregs would be invalid there. However, we don't support
31276 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31277
31278 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31279 VFP registers in little-endian order. We can't describe that accurately to
31280 GCC, so avoid taking subregs of such values.
31281
31282 The only exception is going from a 128-bit to a 64-bit type. In that
31283 case the data layout happens to be consistent for big-endian, so we
31284 explicitly allow that case. */
31285
31286 static bool
31287 arm_can_change_mode_class (machine_mode from, machine_mode to,
31288 reg_class_t rclass)
31289 {
31290 if (TARGET_BIG_END
31291 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31292 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31293 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31294 && reg_classes_intersect_p (VFP_REGS, rclass))
31295 return false;
31296 return true;
31297 }
31298
31299 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31300 strcpy from constants will be faster. */
31301
31302 static HOST_WIDE_INT
31303 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31304 {
31305 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31306 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31307 return MAX (align, BITS_PER_WORD * factor);
31308 return align;
31309 }
31310
31311 #if CHECKING_P
31312 namespace selftest {
31313
31314 /* Scan the static data tables generated by parsecpu.awk looking for
31315 potential issues with the data. We primarily check for
31316 inconsistencies in the option extensions at present (extensions
31317 that duplicate others but aren't marked as aliases). Furthermore,
31318 for correct canonicalization later options must never be a subset
31319 of an earlier option. Any extension should also only specify other
31320 feature bits and never an architecture bit. The architecture is inferred
31321 from the declaration of the extension. */
31322 static void
31323 arm_test_cpu_arch_data (void)
31324 {
31325 const arch_option *arch;
31326 const cpu_option *cpu;
31327 auto_sbitmap target_isa (isa_num_bits);
31328 auto_sbitmap isa1 (isa_num_bits);
31329 auto_sbitmap isa2 (isa_num_bits);
31330
31331 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31332 {
31333 const cpu_arch_extension *ext1, *ext2;
31334
31335 if (arch->common.extensions == NULL)
31336 continue;
31337
31338 arm_initialize_isa (target_isa, arch->common.isa_bits);
31339
31340 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31341 {
31342 if (ext1->alias)
31343 continue;
31344
31345 arm_initialize_isa (isa1, ext1->isa_bits);
31346 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31347 {
31348 if (ext2->alias || ext1->remove != ext2->remove)
31349 continue;
31350
31351 arm_initialize_isa (isa2, ext2->isa_bits);
31352 /* If the option is a subset of the parent option, it doesn't
31353 add anything and so isn't useful. */
31354 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31355
31356 /* If the extension specifies any architectural bits then
31357 disallow it. Extensions should only specify feature bits. */
31358 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31359 }
31360 }
31361 }
31362
31363 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31364 {
31365 const cpu_arch_extension *ext1, *ext2;
31366
31367 if (cpu->common.extensions == NULL)
31368 continue;
31369
31370 arm_initialize_isa (target_isa, arch->common.isa_bits);
31371
31372 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31373 {
31374 if (ext1->alias)
31375 continue;
31376
31377 arm_initialize_isa (isa1, ext1->isa_bits);
31378 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31379 {
31380 if (ext2->alias || ext1->remove != ext2->remove)
31381 continue;
31382
31383 arm_initialize_isa (isa2, ext2->isa_bits);
31384 /* If the option is a subset of the parent option, it doesn't
31385 add anything and so isn't useful. */
31386 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31387
31388 /* If the extension specifies any architectural bits then
31389 disallow it. Extensions should only specify feature bits. */
31390 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31391 }
31392 }
31393 }
31394 }
31395
31396 /* Scan the static data tables generated by parsecpu.awk looking for
31397 potential issues with the data. Here we check for consistency between the
31398 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31399 a feature bit that is not defined by any FPU flag. */
31400 static void
31401 arm_test_fpu_data (void)
31402 {
31403 auto_sbitmap isa_all_fpubits (isa_num_bits);
31404 auto_sbitmap fpubits (isa_num_bits);
31405 auto_sbitmap tmpset (isa_num_bits);
31406
31407 static const enum isa_feature fpu_bitlist[]
31408 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31409 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31410
31411 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31412 {
31413 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31414 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31415 bitmap_clear (isa_all_fpubits);
31416 bitmap_copy (isa_all_fpubits, tmpset);
31417 }
31418
31419 if (!bitmap_empty_p (isa_all_fpubits))
31420 {
31421 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31422 " group that are not defined by any FPU.\n"
31423 " Check your arm-cpus.in.\n");
31424 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31425 }
31426 }
31427
31428 static void
31429 arm_run_selftests (void)
31430 {
31431 arm_test_cpu_arch_data ();
31432 arm_test_fpu_data ();
31433 }
31434 } /* Namespace selftest. */
31435
31436 #undef TARGET_RUN_TARGET_SELFTESTS
31437 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31438 #endif /* CHECKING_P */
31439
31440 struct gcc_target targetm = TARGET_INITIALIZER;
31441
31442 #include "gt-arm.h"