]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
[ARM] PR 82445 - suppress 32-bit aligned ldrd/strd peepholing with -mno-unaligned...
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
70
71 /* This file should be included last. */
72 #include "target-def.h"
73
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
77
78 void (*arm_lang_output_object_attributes_hook)(void);
79
80 struct four_ints
81 {
82 int i[4];
83 };
84
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
192 const_tree);
193 static rtx aapcs_libcall_value (machine_mode);
194 static int aapcs_select_return_coproc (const_tree, const_tree);
195
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
199 #endif
200 #ifndef ARM_PE
201 static void arm_encode_section_info (tree, rtx, int);
202 #endif
203
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree, tree *);
207
208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 #endif
223 static void arm_asm_init_sections (void);
224 static rtx arm_dwarf_register_span (rtx);
225
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
241 static void arm_option_restore (struct gcc_options *,
242 struct cl_target_option *);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option *);
245 static void arm_set_current_function (tree);
246 static bool arm_can_inline_p (tree, tree);
247 static void arm_relayout_function (tree);
248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
250 static bool arm_sched_can_speculate_insn (rtx_insn *);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static tree arm_promoted_type (const_tree t);
261 static bool arm_scalar_mode_supported_p (scalar_mode);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx, tree, rtx);
266 static rtx arm_trampoline_adjust_address (rtx);
267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool arm_array_mode_supported_p (machine_mode,
272 unsigned HOST_WIDE_INT);
273 static machine_mode arm_preferred_simd_mode (scalar_mode);
274 static bool arm_class_likely_spilled_p (reg_class_t);
275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
278 const_tree type,
279 int misalignment,
280 bool is_packed);
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
289
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
291
292 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
293
294 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
295 tree vectype,
296 int misalign ATTRIBUTE_UNUSED);
297 static unsigned arm_add_stmt_cost (void *data, int count,
298 enum vect_cost_for_stmt kind,
299 struct _stmt_vec_info *stmt_info,
300 int misalign,
301 enum vect_cost_model_location where);
302
303 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
304 bool op0_preserve_value);
305 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
306
307 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
308 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
309 const_tree);
310 static section *arm_function_section (tree, enum node_frequency, bool, bool);
311 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
312 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
313 int reloc);
314 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
315 static opt_scalar_float_mode arm_floatn_mode (int, bool);
316 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
319 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
320 \f
321 /* Table of machine attributes. */
322 static const struct attribute_spec arm_attribute_table[] =
323 {
324 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
325 affects_type_identity } */
326 /* Function calls made to this symbol must be done indirectly, because
327 it may lie outside of the 26 bit addressing range of a normal function
328 call. */
329 { "long_call", 0, 0, false, true, true, NULL, false },
330 /* Whereas these functions are always known to reside within the 26 bit
331 addressing range. */
332 { "short_call", 0, 0, false, true, true, NULL, false },
333 /* Specify the procedure call conventions for a function. */
334 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
335 false },
336 /* Interrupt Service Routines have special prologue and epilogue requirements. */
337 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
338 false },
339 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
340 false },
341 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
342 false },
343 #ifdef ARM_PE
344 /* ARM/PE has three new attributes:
345 interfacearm - ?
346 dllexport - for exporting a function/variable that will live in a dll
347 dllimport - for importing a function/variable from a dll
348
349 Microsoft allows multiple declspecs in one __declspec, separating
350 them with spaces. We do NOT support this. Instead, use __declspec
351 multiple times.
352 */
353 { "dllimport", 0, 0, true, false, false, NULL, false },
354 { "dllexport", 0, 0, true, false, false, NULL, false },
355 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
356 false },
357 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
358 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
360 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
361 false },
362 #endif
363 /* ARMv8-M Security Extensions support. */
364 { "cmse_nonsecure_entry", 0, 0, true, false, false,
365 arm_handle_cmse_nonsecure_entry, false },
366 { "cmse_nonsecure_call", 0, 0, true, false, false,
367 arm_handle_cmse_nonsecure_call, true },
368 { NULL, 0, 0, false, false, false, NULL, false }
369 };
370 \f
371 /* Initialize the GCC target structure. */
372 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
373 #undef TARGET_MERGE_DECL_ATTRIBUTES
374 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
375 #endif
376
377 #undef TARGET_LEGITIMIZE_ADDRESS
378 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
379
380 #undef TARGET_ATTRIBUTE_TABLE
381 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
382
383 #undef TARGET_INSERT_ATTRIBUTES
384 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
385
386 #undef TARGET_ASM_FILE_START
387 #define TARGET_ASM_FILE_START arm_file_start
388 #undef TARGET_ASM_FILE_END
389 #define TARGET_ASM_FILE_END arm_file_end
390
391 #undef TARGET_ASM_ALIGNED_SI_OP
392 #define TARGET_ASM_ALIGNED_SI_OP NULL
393 #undef TARGET_ASM_INTEGER
394 #define TARGET_ASM_INTEGER arm_assemble_integer
395
396 #undef TARGET_PRINT_OPERAND
397 #define TARGET_PRINT_OPERAND arm_print_operand
398 #undef TARGET_PRINT_OPERAND_ADDRESS
399 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
400 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
401 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
402
403 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
404 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
405
406 #undef TARGET_ASM_FUNCTION_PROLOGUE
407 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
408
409 #undef TARGET_ASM_FUNCTION_EPILOGUE
410 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
411
412 #undef TARGET_CAN_INLINE_P
413 #define TARGET_CAN_INLINE_P arm_can_inline_p
414
415 #undef TARGET_RELAYOUT_FUNCTION
416 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
417
418 #undef TARGET_OPTION_OVERRIDE
419 #define TARGET_OPTION_OVERRIDE arm_option_override
420
421 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
422 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
423
424 #undef TARGET_OPTION_SAVE
425 #define TARGET_OPTION_SAVE arm_option_save
426
427 #undef TARGET_OPTION_RESTORE
428 #define TARGET_OPTION_RESTORE arm_option_restore
429
430 #undef TARGET_OPTION_PRINT
431 #define TARGET_OPTION_PRINT arm_option_print
432
433 #undef TARGET_COMP_TYPE_ATTRIBUTES
434 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
435
436 #undef TARGET_SCHED_CAN_SPECULATE_INSN
437 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
438
439 #undef TARGET_SCHED_MACRO_FUSION_P
440 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
441
442 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
443 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
444
445 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
446 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
447
448 #undef TARGET_SCHED_ADJUST_COST
449 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
450
451 #undef TARGET_SET_CURRENT_FUNCTION
452 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
453
454 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
455 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
456
457 #undef TARGET_SCHED_REORDER
458 #define TARGET_SCHED_REORDER arm_sched_reorder
459
460 #undef TARGET_REGISTER_MOVE_COST
461 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
462
463 #undef TARGET_MEMORY_MOVE_COST
464 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
465
466 #undef TARGET_ENCODE_SECTION_INFO
467 #ifdef ARM_PE
468 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
469 #else
470 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
471 #endif
472
473 #undef TARGET_STRIP_NAME_ENCODING
474 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
475
476 #undef TARGET_ASM_INTERNAL_LABEL
477 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
478
479 #undef TARGET_FLOATN_MODE
480 #define TARGET_FLOATN_MODE arm_floatn_mode
481
482 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
483 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
484
485 #undef TARGET_FUNCTION_VALUE
486 #define TARGET_FUNCTION_VALUE arm_function_value
487
488 #undef TARGET_LIBCALL_VALUE
489 #define TARGET_LIBCALL_VALUE arm_libcall_value
490
491 #undef TARGET_FUNCTION_VALUE_REGNO_P
492 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
493
494 #undef TARGET_ASM_OUTPUT_MI_THUNK
495 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
496 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
497 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
498
499 #undef TARGET_RTX_COSTS
500 #define TARGET_RTX_COSTS arm_rtx_costs
501 #undef TARGET_ADDRESS_COST
502 #define TARGET_ADDRESS_COST arm_address_cost
503
504 #undef TARGET_SHIFT_TRUNCATION_MASK
505 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
506 #undef TARGET_VECTOR_MODE_SUPPORTED_P
507 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
508 #undef TARGET_ARRAY_MODE_SUPPORTED_P
509 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
510 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
511 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
512 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
513 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
514 arm_autovectorize_vector_sizes
515
516 #undef TARGET_MACHINE_DEPENDENT_REORG
517 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
518
519 #undef TARGET_INIT_BUILTINS
520 #define TARGET_INIT_BUILTINS arm_init_builtins
521 #undef TARGET_EXPAND_BUILTIN
522 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
523 #undef TARGET_BUILTIN_DECL
524 #define TARGET_BUILTIN_DECL arm_builtin_decl
525
526 #undef TARGET_INIT_LIBFUNCS
527 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
528
529 #undef TARGET_PROMOTE_FUNCTION_MODE
530 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
531 #undef TARGET_PROMOTE_PROTOTYPES
532 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
533 #undef TARGET_PASS_BY_REFERENCE
534 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
535 #undef TARGET_ARG_PARTIAL_BYTES
536 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
537 #undef TARGET_FUNCTION_ARG
538 #define TARGET_FUNCTION_ARG arm_function_arg
539 #undef TARGET_FUNCTION_ARG_ADVANCE
540 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
541 #undef TARGET_FUNCTION_ARG_PADDING
542 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
543 #undef TARGET_FUNCTION_ARG_BOUNDARY
544 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
545
546 #undef TARGET_SETUP_INCOMING_VARARGS
547 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
548
549 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
550 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
551
552 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
553 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
554 #undef TARGET_TRAMPOLINE_INIT
555 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
556 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
557 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
558
559 #undef TARGET_WARN_FUNC_RETURN
560 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
561
562 #undef TARGET_DEFAULT_SHORT_ENUMS
563 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
564
565 #undef TARGET_ALIGN_ANON_BITFIELD
566 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
567
568 #undef TARGET_NARROW_VOLATILE_BITFIELD
569 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
570
571 #undef TARGET_CXX_GUARD_TYPE
572 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
573
574 #undef TARGET_CXX_GUARD_MASK_BIT
575 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
576
577 #undef TARGET_CXX_GET_COOKIE_SIZE
578 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
579
580 #undef TARGET_CXX_COOKIE_HAS_SIZE
581 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
582
583 #undef TARGET_CXX_CDTOR_RETURNS_THIS
584 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
585
586 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
587 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
588
589 #undef TARGET_CXX_USE_AEABI_ATEXIT
590 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
591
592 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
593 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
594 arm_cxx_determine_class_data_visibility
595
596 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
597 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
598
599 #undef TARGET_RETURN_IN_MSB
600 #define TARGET_RETURN_IN_MSB arm_return_in_msb
601
602 #undef TARGET_RETURN_IN_MEMORY
603 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
604
605 #undef TARGET_MUST_PASS_IN_STACK
606 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
607
608 #if ARM_UNWIND_INFO
609 #undef TARGET_ASM_UNWIND_EMIT
610 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
611
612 /* EABI unwinding tables use a different format for the typeinfo tables. */
613 #undef TARGET_ASM_TTYPE
614 #define TARGET_ASM_TTYPE arm_output_ttype
615
616 #undef TARGET_ARM_EABI_UNWINDER
617 #define TARGET_ARM_EABI_UNWINDER true
618
619 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
620 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
621
622 #endif /* ARM_UNWIND_INFO */
623
624 #undef TARGET_ASM_INIT_SECTIONS
625 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
626
627 #undef TARGET_DWARF_REGISTER_SPAN
628 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
629
630 #undef TARGET_CANNOT_COPY_INSN_P
631 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
632
633 #ifdef HAVE_AS_TLS
634 #undef TARGET_HAVE_TLS
635 #define TARGET_HAVE_TLS true
636 #endif
637
638 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
639 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
640
641 #undef TARGET_LEGITIMATE_CONSTANT_P
642 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
643
644 #undef TARGET_CANNOT_FORCE_CONST_MEM
645 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
646
647 #undef TARGET_MAX_ANCHOR_OFFSET
648 #define TARGET_MAX_ANCHOR_OFFSET 4095
649
650 /* The minimum is set such that the total size of the block
651 for a particular anchor is -4088 + 1 + 4095 bytes, which is
652 divisible by eight, ensuring natural spacing of anchors. */
653 #undef TARGET_MIN_ANCHOR_OFFSET
654 #define TARGET_MIN_ANCHOR_OFFSET -4088
655
656 #undef TARGET_SCHED_ISSUE_RATE
657 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
658
659 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
660 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
661 arm_first_cycle_multipass_dfa_lookahead
662
663 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
664 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
665 arm_first_cycle_multipass_dfa_lookahead_guard
666
667 #undef TARGET_MANGLE_TYPE
668 #define TARGET_MANGLE_TYPE arm_mangle_type
669
670 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
671 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
672
673 #undef TARGET_BUILD_BUILTIN_VA_LIST
674 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
675 #undef TARGET_EXPAND_BUILTIN_VA_START
676 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
677 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
678 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
679
680 #ifdef HAVE_AS_TLS
681 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
682 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
683 #endif
684
685 #undef TARGET_LEGITIMATE_ADDRESS_P
686 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
687
688 #undef TARGET_PREFERRED_RELOAD_CLASS
689 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
690
691 #undef TARGET_PROMOTED_TYPE
692 #define TARGET_PROMOTED_TYPE arm_promoted_type
693
694 #undef TARGET_SCALAR_MODE_SUPPORTED_P
695 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
696
697 #undef TARGET_COMPUTE_FRAME_LAYOUT
698 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
699
700 #undef TARGET_FRAME_POINTER_REQUIRED
701 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
702
703 #undef TARGET_CAN_ELIMINATE
704 #define TARGET_CAN_ELIMINATE arm_can_eliminate
705
706 #undef TARGET_CONDITIONAL_REGISTER_USAGE
707 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
708
709 #undef TARGET_CLASS_LIKELY_SPILLED_P
710 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
711
712 #undef TARGET_VECTORIZE_BUILTINS
713 #define TARGET_VECTORIZE_BUILTINS
714
715 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
716 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
717 arm_builtin_vectorized_function
718
719 #undef TARGET_VECTOR_ALIGNMENT
720 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
721
722 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
723 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
724 arm_vector_alignment_reachable
725
726 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
727 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
728 arm_builtin_support_vector_misalignment
729
730 #undef TARGET_PREFERRED_RENAME_CLASS
731 #define TARGET_PREFERRED_RENAME_CLASS \
732 arm_preferred_rename_class
733
734 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
735 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
736 arm_vectorize_vec_perm_const_ok
737
738 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
739 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
740 arm_builtin_vectorization_cost
741 #undef TARGET_VECTORIZE_ADD_STMT_COST
742 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
743
744 #undef TARGET_CANONICALIZE_COMPARISON
745 #define TARGET_CANONICALIZE_COMPARISON \
746 arm_canonicalize_comparison
747
748 #undef TARGET_ASAN_SHADOW_OFFSET
749 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
750
751 #undef MAX_INSN_PER_IT_BLOCK
752 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
753
754 #undef TARGET_CAN_USE_DOLOOP_P
755 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
756
757 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
758 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
759
760 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
761 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
762
763 #undef TARGET_SCHED_FUSION_PRIORITY
764 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
765
766 #undef TARGET_ASM_FUNCTION_SECTION
767 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
768
769 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
770 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
771
772 #undef TARGET_SECTION_TYPE_FLAGS
773 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
774
775 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
776 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
777
778 #undef TARGET_C_EXCESS_PRECISION
779 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
780
781 /* Although the architecture reserves bits 0 and 1, only the former is
782 used for ARM/Thumb ISA selection in v7 and earlier versions. */
783 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
784 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
785
786 #undef TARGET_FIXED_CONDITION_CODE_REGS
787 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
788
789 #undef TARGET_HARD_REGNO_NREGS
790 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
791 #undef TARGET_HARD_REGNO_MODE_OK
792 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
793
794 #undef TARGET_MODES_TIEABLE_P
795 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
796
797 #undef TARGET_CAN_CHANGE_MODE_CLASS
798 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
799
800 #undef TARGET_CONSTANT_ALIGNMENT
801 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
802 \f
803 /* Obstack for minipool constant handling. */
804 static struct obstack minipool_obstack;
805 static char * minipool_startobj;
806
807 /* The maximum number of insns skipped which
808 will be conditionalised if possible. */
809 static int max_insns_skipped = 5;
810
811 extern FILE * asm_out_file;
812
813 /* True if we are currently building a constant table. */
814 int making_const_table;
815
816 /* The processor for which instructions should be scheduled. */
817 enum processor_type arm_tune = TARGET_CPU_arm_none;
818
819 /* The current tuning set. */
820 const struct tune_params *current_tune;
821
822 /* Which floating point hardware to schedule for. */
823 int arm_fpu_attr;
824
825 /* Used for Thumb call_via trampolines. */
826 rtx thumb_call_via_label[14];
827 static int thumb_call_reg_needed;
828
829 /* The bits in this mask specify which instruction scheduling options should
830 be used. */
831 unsigned int tune_flags = 0;
832
833 /* The highest ARM architecture version supported by the
834 target. */
835 enum base_architecture arm_base_arch = BASE_ARCH_0;
836
837 /* Active target architecture and tuning. */
838
839 struct arm_build_target arm_active_target;
840
841 /* The following are used in the arm.md file as equivalents to bits
842 in the above two flag variables. */
843
844 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
845 int arm_arch3m = 0;
846
847 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
848 int arm_arch4 = 0;
849
850 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
851 int arm_arch4t = 0;
852
853 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
854 int arm_arch5 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
857 int arm_arch5e = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
860 int arm_arch5te = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
863 int arm_arch6 = 0;
864
865 /* Nonzero if this chip supports the ARM 6K extensions. */
866 int arm_arch6k = 0;
867
868 /* Nonzero if this chip supports the ARM 6KZ extensions. */
869 int arm_arch6kz = 0;
870
871 /* Nonzero if instructions present in ARMv6-M can be used. */
872 int arm_arch6m = 0;
873
874 /* Nonzero if this chip supports the ARM 7 extensions. */
875 int arm_arch7 = 0;
876
877 /* Nonzero if this chip supports the Large Physical Address Extension. */
878 int arm_arch_lpae = 0;
879
880 /* Nonzero if instructions not present in the 'M' profile can be used. */
881 int arm_arch_notm = 0;
882
883 /* Nonzero if instructions present in ARMv7E-M can be used. */
884 int arm_arch7em = 0;
885
886 /* Nonzero if instructions present in ARMv8 can be used. */
887 int arm_arch8 = 0;
888
889 /* Nonzero if this chip supports the ARMv8.1 extensions. */
890 int arm_arch8_1 = 0;
891
892 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
893 int arm_arch8_2 = 0;
894
895 /* Nonzero if this chip supports the FP16 instructions extension of ARM
896 Architecture 8.2. */
897 int arm_fp16_inst = 0;
898
899 /* Nonzero if this chip can benefit from load scheduling. */
900 int arm_ld_sched = 0;
901
902 /* Nonzero if this chip is a StrongARM. */
903 int arm_tune_strongarm = 0;
904
905 /* Nonzero if this chip supports Intel Wireless MMX technology. */
906 int arm_arch_iwmmxt = 0;
907
908 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
909 int arm_arch_iwmmxt2 = 0;
910
911 /* Nonzero if this chip is an XScale. */
912 int arm_arch_xscale = 0;
913
914 /* Nonzero if tuning for XScale */
915 int arm_tune_xscale = 0;
916
917 /* Nonzero if we want to tune for stores that access the write-buffer.
918 This typically means an ARM6 or ARM7 with MMU or MPU. */
919 int arm_tune_wbuf = 0;
920
921 /* Nonzero if tuning for Cortex-A9. */
922 int arm_tune_cortex_a9 = 0;
923
924 /* Nonzero if we should define __THUMB_INTERWORK__ in the
925 preprocessor.
926 XXX This is a bit of a hack, it's intended to help work around
927 problems in GLD which doesn't understand that armv5t code is
928 interworking clean. */
929 int arm_cpp_interwork = 0;
930
931 /* Nonzero if chip supports Thumb 1. */
932 int arm_arch_thumb1;
933
934 /* Nonzero if chip supports Thumb 2. */
935 int arm_arch_thumb2;
936
937 /* Nonzero if chip supports integer division instruction. */
938 int arm_arch_arm_hwdiv;
939 int arm_arch_thumb_hwdiv;
940
941 /* Nonzero if chip disallows volatile memory access in IT block. */
942 int arm_arch_no_volatile_ce;
943
944 /* Nonzero if we should use Neon to handle 64-bits operations rather
945 than core registers. */
946 int prefer_neon_for_64bits = 0;
947
948 /* Nonzero if we shouldn't use literal pools. */
949 bool arm_disable_literal_pool = false;
950
951 /* The register number to be used for the PIC offset register. */
952 unsigned arm_pic_register = INVALID_REGNUM;
953
954 enum arm_pcs arm_pcs_default;
955
956 /* For an explanation of these variables, see final_prescan_insn below. */
957 int arm_ccfsm_state;
958 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
959 enum arm_cond_code arm_current_cc;
960
961 rtx arm_target_insn;
962 int arm_target_label;
963 /* The number of conditionally executed insns, including the current insn. */
964 int arm_condexec_count = 0;
965 /* A bitmask specifying the patterns for the IT block.
966 Zero means do not output an IT block before this insn. */
967 int arm_condexec_mask = 0;
968 /* The number of bits used in arm_condexec_mask. */
969 int arm_condexec_masklen = 0;
970
971 /* Nonzero if chip supports the ARMv8 CRC instructions. */
972 int arm_arch_crc = 0;
973
974 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
975 int arm_arch_dotprod = 0;
976
977 /* Nonzero if chip supports the ARMv8-M security extensions. */
978 int arm_arch_cmse = 0;
979
980 /* Nonzero if the core has a very small, high-latency, multiply unit. */
981 int arm_m_profile_small_mul = 0;
982
983 /* The condition codes of the ARM, and the inverse function. */
984 static const char * const arm_condition_codes[] =
985 {
986 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
987 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
988 };
989
990 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
991 int arm_regs_in_sequence[] =
992 {
993 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
994 };
995
996 #define ARM_LSL_NAME "lsl"
997 #define streq(string1, string2) (strcmp (string1, string2) == 0)
998
999 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1000 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1001 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1002 \f
1003 /* Initialization code. */
1004
1005 struct cpu_tune
1006 {
1007 enum processor_type scheduler;
1008 unsigned int tune_flags;
1009 const struct tune_params *tune;
1010 };
1011
1012 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1013 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1014 { \
1015 num_slots, \
1016 l1_size, \
1017 l1_line_size \
1018 }
1019
1020 /* arm generic vectorizer costs. */
1021 static const
1022 struct cpu_vec_costs arm_default_vec_cost = {
1023 1, /* scalar_stmt_cost. */
1024 1, /* scalar load_cost. */
1025 1, /* scalar_store_cost. */
1026 1, /* vec_stmt_cost. */
1027 1, /* vec_to_scalar_cost. */
1028 1, /* scalar_to_vec_cost. */
1029 1, /* vec_align_load_cost. */
1030 1, /* vec_unalign_load_cost. */
1031 1, /* vec_unalign_store_cost. */
1032 1, /* vec_store_cost. */
1033 3, /* cond_taken_branch_cost. */
1034 1, /* cond_not_taken_branch_cost. */
1035 };
1036
1037 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1038 #include "aarch-cost-tables.h"
1039
1040
1041
1042 const struct cpu_cost_table cortexa9_extra_costs =
1043 {
1044 /* ALU */
1045 {
1046 0, /* arith. */
1047 0, /* logical. */
1048 0, /* shift. */
1049 COSTS_N_INSNS (1), /* shift_reg. */
1050 COSTS_N_INSNS (1), /* arith_shift. */
1051 COSTS_N_INSNS (2), /* arith_shift_reg. */
1052 0, /* log_shift. */
1053 COSTS_N_INSNS (1), /* log_shift_reg. */
1054 COSTS_N_INSNS (1), /* extend. */
1055 COSTS_N_INSNS (2), /* extend_arith. */
1056 COSTS_N_INSNS (1), /* bfi. */
1057 COSTS_N_INSNS (1), /* bfx. */
1058 0, /* clz. */
1059 0, /* rev. */
1060 0, /* non_exec. */
1061 true /* non_exec_costs_exec. */
1062 },
1063 {
1064 /* MULT SImode */
1065 {
1066 COSTS_N_INSNS (3), /* simple. */
1067 COSTS_N_INSNS (3), /* flag_setting. */
1068 COSTS_N_INSNS (2), /* extend. */
1069 COSTS_N_INSNS (3), /* add. */
1070 COSTS_N_INSNS (2), /* extend_add. */
1071 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1072 },
1073 /* MULT DImode */
1074 {
1075 0, /* simple (N/A). */
1076 0, /* flag_setting (N/A). */
1077 COSTS_N_INSNS (4), /* extend. */
1078 0, /* add (N/A). */
1079 COSTS_N_INSNS (4), /* extend_add. */
1080 0 /* idiv (N/A). */
1081 }
1082 },
1083 /* LD/ST */
1084 {
1085 COSTS_N_INSNS (2), /* load. */
1086 COSTS_N_INSNS (2), /* load_sign_extend. */
1087 COSTS_N_INSNS (2), /* ldrd. */
1088 COSTS_N_INSNS (2), /* ldm_1st. */
1089 1, /* ldm_regs_per_insn_1st. */
1090 2, /* ldm_regs_per_insn_subsequent. */
1091 COSTS_N_INSNS (5), /* loadf. */
1092 COSTS_N_INSNS (5), /* loadd. */
1093 COSTS_N_INSNS (1), /* load_unaligned. */
1094 COSTS_N_INSNS (2), /* store. */
1095 COSTS_N_INSNS (2), /* strd. */
1096 COSTS_N_INSNS (2), /* stm_1st. */
1097 1, /* stm_regs_per_insn_1st. */
1098 2, /* stm_regs_per_insn_subsequent. */
1099 COSTS_N_INSNS (1), /* storef. */
1100 COSTS_N_INSNS (1), /* stored. */
1101 COSTS_N_INSNS (1), /* store_unaligned. */
1102 COSTS_N_INSNS (1), /* loadv. */
1103 COSTS_N_INSNS (1) /* storev. */
1104 },
1105 {
1106 /* FP SFmode */
1107 {
1108 COSTS_N_INSNS (14), /* div. */
1109 COSTS_N_INSNS (4), /* mult. */
1110 COSTS_N_INSNS (7), /* mult_addsub. */
1111 COSTS_N_INSNS (30), /* fma. */
1112 COSTS_N_INSNS (3), /* addsub. */
1113 COSTS_N_INSNS (1), /* fpconst. */
1114 COSTS_N_INSNS (1), /* neg. */
1115 COSTS_N_INSNS (3), /* compare. */
1116 COSTS_N_INSNS (3), /* widen. */
1117 COSTS_N_INSNS (3), /* narrow. */
1118 COSTS_N_INSNS (3), /* toint. */
1119 COSTS_N_INSNS (3), /* fromint. */
1120 COSTS_N_INSNS (3) /* roundint. */
1121 },
1122 /* FP DFmode */
1123 {
1124 COSTS_N_INSNS (24), /* div. */
1125 COSTS_N_INSNS (5), /* mult. */
1126 COSTS_N_INSNS (8), /* mult_addsub. */
1127 COSTS_N_INSNS (30), /* fma. */
1128 COSTS_N_INSNS (3), /* addsub. */
1129 COSTS_N_INSNS (1), /* fpconst. */
1130 COSTS_N_INSNS (1), /* neg. */
1131 COSTS_N_INSNS (3), /* compare. */
1132 COSTS_N_INSNS (3), /* widen. */
1133 COSTS_N_INSNS (3), /* narrow. */
1134 COSTS_N_INSNS (3), /* toint. */
1135 COSTS_N_INSNS (3), /* fromint. */
1136 COSTS_N_INSNS (3) /* roundint. */
1137 }
1138 },
1139 /* Vector */
1140 {
1141 COSTS_N_INSNS (1) /* alu. */
1142 }
1143 };
1144
1145 const struct cpu_cost_table cortexa8_extra_costs =
1146 {
1147 /* ALU */
1148 {
1149 0, /* arith. */
1150 0, /* logical. */
1151 COSTS_N_INSNS (1), /* shift. */
1152 0, /* shift_reg. */
1153 COSTS_N_INSNS (1), /* arith_shift. */
1154 0, /* arith_shift_reg. */
1155 COSTS_N_INSNS (1), /* log_shift. */
1156 0, /* log_shift_reg. */
1157 0, /* extend. */
1158 0, /* extend_arith. */
1159 0, /* bfi. */
1160 0, /* bfx. */
1161 0, /* clz. */
1162 0, /* rev. */
1163 0, /* non_exec. */
1164 true /* non_exec_costs_exec. */
1165 },
1166 {
1167 /* MULT SImode */
1168 {
1169 COSTS_N_INSNS (1), /* simple. */
1170 COSTS_N_INSNS (1), /* flag_setting. */
1171 COSTS_N_INSNS (1), /* extend. */
1172 COSTS_N_INSNS (1), /* add. */
1173 COSTS_N_INSNS (1), /* extend_add. */
1174 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1175 },
1176 /* MULT DImode */
1177 {
1178 0, /* simple (N/A). */
1179 0, /* flag_setting (N/A). */
1180 COSTS_N_INSNS (2), /* extend. */
1181 0, /* add (N/A). */
1182 COSTS_N_INSNS (2), /* extend_add. */
1183 0 /* idiv (N/A). */
1184 }
1185 },
1186 /* LD/ST */
1187 {
1188 COSTS_N_INSNS (1), /* load. */
1189 COSTS_N_INSNS (1), /* load_sign_extend. */
1190 COSTS_N_INSNS (1), /* ldrd. */
1191 COSTS_N_INSNS (1), /* ldm_1st. */
1192 1, /* ldm_regs_per_insn_1st. */
1193 2, /* ldm_regs_per_insn_subsequent. */
1194 COSTS_N_INSNS (1), /* loadf. */
1195 COSTS_N_INSNS (1), /* loadd. */
1196 COSTS_N_INSNS (1), /* load_unaligned. */
1197 COSTS_N_INSNS (1), /* store. */
1198 COSTS_N_INSNS (1), /* strd. */
1199 COSTS_N_INSNS (1), /* stm_1st. */
1200 1, /* stm_regs_per_insn_1st. */
1201 2, /* stm_regs_per_insn_subsequent. */
1202 COSTS_N_INSNS (1), /* storef. */
1203 COSTS_N_INSNS (1), /* stored. */
1204 COSTS_N_INSNS (1), /* store_unaligned. */
1205 COSTS_N_INSNS (1), /* loadv. */
1206 COSTS_N_INSNS (1) /* storev. */
1207 },
1208 {
1209 /* FP SFmode */
1210 {
1211 COSTS_N_INSNS (36), /* div. */
1212 COSTS_N_INSNS (11), /* mult. */
1213 COSTS_N_INSNS (20), /* mult_addsub. */
1214 COSTS_N_INSNS (30), /* fma. */
1215 COSTS_N_INSNS (9), /* addsub. */
1216 COSTS_N_INSNS (3), /* fpconst. */
1217 COSTS_N_INSNS (3), /* neg. */
1218 COSTS_N_INSNS (6), /* compare. */
1219 COSTS_N_INSNS (4), /* widen. */
1220 COSTS_N_INSNS (4), /* narrow. */
1221 COSTS_N_INSNS (8), /* toint. */
1222 COSTS_N_INSNS (8), /* fromint. */
1223 COSTS_N_INSNS (8) /* roundint. */
1224 },
1225 /* FP DFmode */
1226 {
1227 COSTS_N_INSNS (64), /* div. */
1228 COSTS_N_INSNS (16), /* mult. */
1229 COSTS_N_INSNS (25), /* mult_addsub. */
1230 COSTS_N_INSNS (30), /* fma. */
1231 COSTS_N_INSNS (9), /* addsub. */
1232 COSTS_N_INSNS (3), /* fpconst. */
1233 COSTS_N_INSNS (3), /* neg. */
1234 COSTS_N_INSNS (6), /* compare. */
1235 COSTS_N_INSNS (6), /* widen. */
1236 COSTS_N_INSNS (6), /* narrow. */
1237 COSTS_N_INSNS (8), /* toint. */
1238 COSTS_N_INSNS (8), /* fromint. */
1239 COSTS_N_INSNS (8) /* roundint. */
1240 }
1241 },
1242 /* Vector */
1243 {
1244 COSTS_N_INSNS (1) /* alu. */
1245 }
1246 };
1247
1248 const struct cpu_cost_table cortexa5_extra_costs =
1249 {
1250 /* ALU */
1251 {
1252 0, /* arith. */
1253 0, /* logical. */
1254 COSTS_N_INSNS (1), /* shift. */
1255 COSTS_N_INSNS (1), /* shift_reg. */
1256 COSTS_N_INSNS (1), /* arith_shift. */
1257 COSTS_N_INSNS (1), /* arith_shift_reg. */
1258 COSTS_N_INSNS (1), /* log_shift. */
1259 COSTS_N_INSNS (1), /* log_shift_reg. */
1260 COSTS_N_INSNS (1), /* extend. */
1261 COSTS_N_INSNS (1), /* extend_arith. */
1262 COSTS_N_INSNS (1), /* bfi. */
1263 COSTS_N_INSNS (1), /* bfx. */
1264 COSTS_N_INSNS (1), /* clz. */
1265 COSTS_N_INSNS (1), /* rev. */
1266 0, /* non_exec. */
1267 true /* non_exec_costs_exec. */
1268 },
1269
1270 {
1271 /* MULT SImode */
1272 {
1273 0, /* simple. */
1274 COSTS_N_INSNS (1), /* flag_setting. */
1275 COSTS_N_INSNS (1), /* extend. */
1276 COSTS_N_INSNS (1), /* add. */
1277 COSTS_N_INSNS (1), /* extend_add. */
1278 COSTS_N_INSNS (7) /* idiv. */
1279 },
1280 /* MULT DImode */
1281 {
1282 0, /* simple (N/A). */
1283 0, /* flag_setting (N/A). */
1284 COSTS_N_INSNS (1), /* extend. */
1285 0, /* add. */
1286 COSTS_N_INSNS (2), /* extend_add. */
1287 0 /* idiv (N/A). */
1288 }
1289 },
1290 /* LD/ST */
1291 {
1292 COSTS_N_INSNS (1), /* load. */
1293 COSTS_N_INSNS (1), /* load_sign_extend. */
1294 COSTS_N_INSNS (6), /* ldrd. */
1295 COSTS_N_INSNS (1), /* ldm_1st. */
1296 1, /* ldm_regs_per_insn_1st. */
1297 2, /* ldm_regs_per_insn_subsequent. */
1298 COSTS_N_INSNS (2), /* loadf. */
1299 COSTS_N_INSNS (4), /* loadd. */
1300 COSTS_N_INSNS (1), /* load_unaligned. */
1301 COSTS_N_INSNS (1), /* store. */
1302 COSTS_N_INSNS (3), /* strd. */
1303 COSTS_N_INSNS (1), /* stm_1st. */
1304 1, /* stm_regs_per_insn_1st. */
1305 2, /* stm_regs_per_insn_subsequent. */
1306 COSTS_N_INSNS (2), /* storef. */
1307 COSTS_N_INSNS (2), /* stored. */
1308 COSTS_N_INSNS (1), /* store_unaligned. */
1309 COSTS_N_INSNS (1), /* loadv. */
1310 COSTS_N_INSNS (1) /* storev. */
1311 },
1312 {
1313 /* FP SFmode */
1314 {
1315 COSTS_N_INSNS (15), /* div. */
1316 COSTS_N_INSNS (3), /* mult. */
1317 COSTS_N_INSNS (7), /* mult_addsub. */
1318 COSTS_N_INSNS (7), /* fma. */
1319 COSTS_N_INSNS (3), /* addsub. */
1320 COSTS_N_INSNS (3), /* fpconst. */
1321 COSTS_N_INSNS (3), /* neg. */
1322 COSTS_N_INSNS (3), /* compare. */
1323 COSTS_N_INSNS (3), /* widen. */
1324 COSTS_N_INSNS (3), /* narrow. */
1325 COSTS_N_INSNS (3), /* toint. */
1326 COSTS_N_INSNS (3), /* fromint. */
1327 COSTS_N_INSNS (3) /* roundint. */
1328 },
1329 /* FP DFmode */
1330 {
1331 COSTS_N_INSNS (30), /* div. */
1332 COSTS_N_INSNS (6), /* mult. */
1333 COSTS_N_INSNS (10), /* mult_addsub. */
1334 COSTS_N_INSNS (7), /* fma. */
1335 COSTS_N_INSNS (3), /* addsub. */
1336 COSTS_N_INSNS (3), /* fpconst. */
1337 COSTS_N_INSNS (3), /* neg. */
1338 COSTS_N_INSNS (3), /* compare. */
1339 COSTS_N_INSNS (3), /* widen. */
1340 COSTS_N_INSNS (3), /* narrow. */
1341 COSTS_N_INSNS (3), /* toint. */
1342 COSTS_N_INSNS (3), /* fromint. */
1343 COSTS_N_INSNS (3) /* roundint. */
1344 }
1345 },
1346 /* Vector */
1347 {
1348 COSTS_N_INSNS (1) /* alu. */
1349 }
1350 };
1351
1352
1353 const struct cpu_cost_table cortexa7_extra_costs =
1354 {
1355 /* ALU */
1356 {
1357 0, /* arith. */
1358 0, /* logical. */
1359 COSTS_N_INSNS (1), /* shift. */
1360 COSTS_N_INSNS (1), /* shift_reg. */
1361 COSTS_N_INSNS (1), /* arith_shift. */
1362 COSTS_N_INSNS (1), /* arith_shift_reg. */
1363 COSTS_N_INSNS (1), /* log_shift. */
1364 COSTS_N_INSNS (1), /* log_shift_reg. */
1365 COSTS_N_INSNS (1), /* extend. */
1366 COSTS_N_INSNS (1), /* extend_arith. */
1367 COSTS_N_INSNS (1), /* bfi. */
1368 COSTS_N_INSNS (1), /* bfx. */
1369 COSTS_N_INSNS (1), /* clz. */
1370 COSTS_N_INSNS (1), /* rev. */
1371 0, /* non_exec. */
1372 true /* non_exec_costs_exec. */
1373 },
1374
1375 {
1376 /* MULT SImode */
1377 {
1378 0, /* simple. */
1379 COSTS_N_INSNS (1), /* flag_setting. */
1380 COSTS_N_INSNS (1), /* extend. */
1381 COSTS_N_INSNS (1), /* add. */
1382 COSTS_N_INSNS (1), /* extend_add. */
1383 COSTS_N_INSNS (7) /* idiv. */
1384 },
1385 /* MULT DImode */
1386 {
1387 0, /* simple (N/A). */
1388 0, /* flag_setting (N/A). */
1389 COSTS_N_INSNS (1), /* extend. */
1390 0, /* add. */
1391 COSTS_N_INSNS (2), /* extend_add. */
1392 0 /* idiv (N/A). */
1393 }
1394 },
1395 /* LD/ST */
1396 {
1397 COSTS_N_INSNS (1), /* load. */
1398 COSTS_N_INSNS (1), /* load_sign_extend. */
1399 COSTS_N_INSNS (3), /* ldrd. */
1400 COSTS_N_INSNS (1), /* ldm_1st. */
1401 1, /* ldm_regs_per_insn_1st. */
1402 2, /* ldm_regs_per_insn_subsequent. */
1403 COSTS_N_INSNS (2), /* loadf. */
1404 COSTS_N_INSNS (2), /* loadd. */
1405 COSTS_N_INSNS (1), /* load_unaligned. */
1406 COSTS_N_INSNS (1), /* store. */
1407 COSTS_N_INSNS (3), /* strd. */
1408 COSTS_N_INSNS (1), /* stm_1st. */
1409 1, /* stm_regs_per_insn_1st. */
1410 2, /* stm_regs_per_insn_subsequent. */
1411 COSTS_N_INSNS (2), /* storef. */
1412 COSTS_N_INSNS (2), /* stored. */
1413 COSTS_N_INSNS (1), /* store_unaligned. */
1414 COSTS_N_INSNS (1), /* loadv. */
1415 COSTS_N_INSNS (1) /* storev. */
1416 },
1417 {
1418 /* FP SFmode */
1419 {
1420 COSTS_N_INSNS (15), /* div. */
1421 COSTS_N_INSNS (3), /* mult. */
1422 COSTS_N_INSNS (7), /* mult_addsub. */
1423 COSTS_N_INSNS (7), /* fma. */
1424 COSTS_N_INSNS (3), /* addsub. */
1425 COSTS_N_INSNS (3), /* fpconst. */
1426 COSTS_N_INSNS (3), /* neg. */
1427 COSTS_N_INSNS (3), /* compare. */
1428 COSTS_N_INSNS (3), /* widen. */
1429 COSTS_N_INSNS (3), /* narrow. */
1430 COSTS_N_INSNS (3), /* toint. */
1431 COSTS_N_INSNS (3), /* fromint. */
1432 COSTS_N_INSNS (3) /* roundint. */
1433 },
1434 /* FP DFmode */
1435 {
1436 COSTS_N_INSNS (30), /* div. */
1437 COSTS_N_INSNS (6), /* mult. */
1438 COSTS_N_INSNS (10), /* mult_addsub. */
1439 COSTS_N_INSNS (7), /* fma. */
1440 COSTS_N_INSNS (3), /* addsub. */
1441 COSTS_N_INSNS (3), /* fpconst. */
1442 COSTS_N_INSNS (3), /* neg. */
1443 COSTS_N_INSNS (3), /* compare. */
1444 COSTS_N_INSNS (3), /* widen. */
1445 COSTS_N_INSNS (3), /* narrow. */
1446 COSTS_N_INSNS (3), /* toint. */
1447 COSTS_N_INSNS (3), /* fromint. */
1448 COSTS_N_INSNS (3) /* roundint. */
1449 }
1450 },
1451 /* Vector */
1452 {
1453 COSTS_N_INSNS (1) /* alu. */
1454 }
1455 };
1456
1457 const struct cpu_cost_table cortexa12_extra_costs =
1458 {
1459 /* ALU */
1460 {
1461 0, /* arith. */
1462 0, /* logical. */
1463 0, /* shift. */
1464 COSTS_N_INSNS (1), /* shift_reg. */
1465 COSTS_N_INSNS (1), /* arith_shift. */
1466 COSTS_N_INSNS (1), /* arith_shift_reg. */
1467 COSTS_N_INSNS (1), /* log_shift. */
1468 COSTS_N_INSNS (1), /* log_shift_reg. */
1469 0, /* extend. */
1470 COSTS_N_INSNS (1), /* extend_arith. */
1471 0, /* bfi. */
1472 COSTS_N_INSNS (1), /* bfx. */
1473 COSTS_N_INSNS (1), /* clz. */
1474 COSTS_N_INSNS (1), /* rev. */
1475 0, /* non_exec. */
1476 true /* non_exec_costs_exec. */
1477 },
1478 /* MULT SImode */
1479 {
1480 {
1481 COSTS_N_INSNS (2), /* simple. */
1482 COSTS_N_INSNS (3), /* flag_setting. */
1483 COSTS_N_INSNS (2), /* extend. */
1484 COSTS_N_INSNS (3), /* add. */
1485 COSTS_N_INSNS (2), /* extend_add. */
1486 COSTS_N_INSNS (18) /* idiv. */
1487 },
1488 /* MULT DImode */
1489 {
1490 0, /* simple (N/A). */
1491 0, /* flag_setting (N/A). */
1492 COSTS_N_INSNS (3), /* extend. */
1493 0, /* add (N/A). */
1494 COSTS_N_INSNS (3), /* extend_add. */
1495 0 /* idiv (N/A). */
1496 }
1497 },
1498 /* LD/ST */
1499 {
1500 COSTS_N_INSNS (3), /* load. */
1501 COSTS_N_INSNS (3), /* load_sign_extend. */
1502 COSTS_N_INSNS (3), /* ldrd. */
1503 COSTS_N_INSNS (3), /* ldm_1st. */
1504 1, /* ldm_regs_per_insn_1st. */
1505 2, /* ldm_regs_per_insn_subsequent. */
1506 COSTS_N_INSNS (3), /* loadf. */
1507 COSTS_N_INSNS (3), /* loadd. */
1508 0, /* load_unaligned. */
1509 0, /* store. */
1510 0, /* strd. */
1511 0, /* stm_1st. */
1512 1, /* stm_regs_per_insn_1st. */
1513 2, /* stm_regs_per_insn_subsequent. */
1514 COSTS_N_INSNS (2), /* storef. */
1515 COSTS_N_INSNS (2), /* stored. */
1516 0, /* store_unaligned. */
1517 COSTS_N_INSNS (1), /* loadv. */
1518 COSTS_N_INSNS (1) /* storev. */
1519 },
1520 {
1521 /* FP SFmode */
1522 {
1523 COSTS_N_INSNS (17), /* div. */
1524 COSTS_N_INSNS (4), /* mult. */
1525 COSTS_N_INSNS (8), /* mult_addsub. */
1526 COSTS_N_INSNS (8), /* fma. */
1527 COSTS_N_INSNS (4), /* addsub. */
1528 COSTS_N_INSNS (2), /* fpconst. */
1529 COSTS_N_INSNS (2), /* neg. */
1530 COSTS_N_INSNS (2), /* compare. */
1531 COSTS_N_INSNS (4), /* widen. */
1532 COSTS_N_INSNS (4), /* narrow. */
1533 COSTS_N_INSNS (4), /* toint. */
1534 COSTS_N_INSNS (4), /* fromint. */
1535 COSTS_N_INSNS (4) /* roundint. */
1536 },
1537 /* FP DFmode */
1538 {
1539 COSTS_N_INSNS (31), /* div. */
1540 COSTS_N_INSNS (4), /* mult. */
1541 COSTS_N_INSNS (8), /* mult_addsub. */
1542 COSTS_N_INSNS (8), /* fma. */
1543 COSTS_N_INSNS (4), /* addsub. */
1544 COSTS_N_INSNS (2), /* fpconst. */
1545 COSTS_N_INSNS (2), /* neg. */
1546 COSTS_N_INSNS (2), /* compare. */
1547 COSTS_N_INSNS (4), /* widen. */
1548 COSTS_N_INSNS (4), /* narrow. */
1549 COSTS_N_INSNS (4), /* toint. */
1550 COSTS_N_INSNS (4), /* fromint. */
1551 COSTS_N_INSNS (4) /* roundint. */
1552 }
1553 },
1554 /* Vector */
1555 {
1556 COSTS_N_INSNS (1) /* alu. */
1557 }
1558 };
1559
1560 const struct cpu_cost_table cortexa15_extra_costs =
1561 {
1562 /* ALU */
1563 {
1564 0, /* arith. */
1565 0, /* logical. */
1566 0, /* shift. */
1567 0, /* shift_reg. */
1568 COSTS_N_INSNS (1), /* arith_shift. */
1569 COSTS_N_INSNS (1), /* arith_shift_reg. */
1570 COSTS_N_INSNS (1), /* log_shift. */
1571 COSTS_N_INSNS (1), /* log_shift_reg. */
1572 0, /* extend. */
1573 COSTS_N_INSNS (1), /* extend_arith. */
1574 COSTS_N_INSNS (1), /* bfi. */
1575 0, /* bfx. */
1576 0, /* clz. */
1577 0, /* rev. */
1578 0, /* non_exec. */
1579 true /* non_exec_costs_exec. */
1580 },
1581 /* MULT SImode */
1582 {
1583 {
1584 COSTS_N_INSNS (2), /* simple. */
1585 COSTS_N_INSNS (3), /* flag_setting. */
1586 COSTS_N_INSNS (2), /* extend. */
1587 COSTS_N_INSNS (2), /* add. */
1588 COSTS_N_INSNS (2), /* extend_add. */
1589 COSTS_N_INSNS (18) /* idiv. */
1590 },
1591 /* MULT DImode */
1592 {
1593 0, /* simple (N/A). */
1594 0, /* flag_setting (N/A). */
1595 COSTS_N_INSNS (3), /* extend. */
1596 0, /* add (N/A). */
1597 COSTS_N_INSNS (3), /* extend_add. */
1598 0 /* idiv (N/A). */
1599 }
1600 },
1601 /* LD/ST */
1602 {
1603 COSTS_N_INSNS (3), /* load. */
1604 COSTS_N_INSNS (3), /* load_sign_extend. */
1605 COSTS_N_INSNS (3), /* ldrd. */
1606 COSTS_N_INSNS (4), /* ldm_1st. */
1607 1, /* ldm_regs_per_insn_1st. */
1608 2, /* ldm_regs_per_insn_subsequent. */
1609 COSTS_N_INSNS (4), /* loadf. */
1610 COSTS_N_INSNS (4), /* loadd. */
1611 0, /* load_unaligned. */
1612 0, /* store. */
1613 0, /* strd. */
1614 COSTS_N_INSNS (1), /* stm_1st. */
1615 1, /* stm_regs_per_insn_1st. */
1616 2, /* stm_regs_per_insn_subsequent. */
1617 0, /* storef. */
1618 0, /* stored. */
1619 0, /* store_unaligned. */
1620 COSTS_N_INSNS (1), /* loadv. */
1621 COSTS_N_INSNS (1) /* storev. */
1622 },
1623 {
1624 /* FP SFmode */
1625 {
1626 COSTS_N_INSNS (17), /* div. */
1627 COSTS_N_INSNS (4), /* mult. */
1628 COSTS_N_INSNS (8), /* mult_addsub. */
1629 COSTS_N_INSNS (8), /* fma. */
1630 COSTS_N_INSNS (4), /* addsub. */
1631 COSTS_N_INSNS (2), /* fpconst. */
1632 COSTS_N_INSNS (2), /* neg. */
1633 COSTS_N_INSNS (5), /* compare. */
1634 COSTS_N_INSNS (4), /* widen. */
1635 COSTS_N_INSNS (4), /* narrow. */
1636 COSTS_N_INSNS (4), /* toint. */
1637 COSTS_N_INSNS (4), /* fromint. */
1638 COSTS_N_INSNS (4) /* roundint. */
1639 },
1640 /* FP DFmode */
1641 {
1642 COSTS_N_INSNS (31), /* div. */
1643 COSTS_N_INSNS (4), /* mult. */
1644 COSTS_N_INSNS (8), /* mult_addsub. */
1645 COSTS_N_INSNS (8), /* fma. */
1646 COSTS_N_INSNS (4), /* addsub. */
1647 COSTS_N_INSNS (2), /* fpconst. */
1648 COSTS_N_INSNS (2), /* neg. */
1649 COSTS_N_INSNS (2), /* compare. */
1650 COSTS_N_INSNS (4), /* widen. */
1651 COSTS_N_INSNS (4), /* narrow. */
1652 COSTS_N_INSNS (4), /* toint. */
1653 COSTS_N_INSNS (4), /* fromint. */
1654 COSTS_N_INSNS (4) /* roundint. */
1655 }
1656 },
1657 /* Vector */
1658 {
1659 COSTS_N_INSNS (1) /* alu. */
1660 }
1661 };
1662
1663 const struct cpu_cost_table v7m_extra_costs =
1664 {
1665 /* ALU */
1666 {
1667 0, /* arith. */
1668 0, /* logical. */
1669 0, /* shift. */
1670 0, /* shift_reg. */
1671 0, /* arith_shift. */
1672 COSTS_N_INSNS (1), /* arith_shift_reg. */
1673 0, /* log_shift. */
1674 COSTS_N_INSNS (1), /* log_shift_reg. */
1675 0, /* extend. */
1676 COSTS_N_INSNS (1), /* extend_arith. */
1677 0, /* bfi. */
1678 0, /* bfx. */
1679 0, /* clz. */
1680 0, /* rev. */
1681 COSTS_N_INSNS (1), /* non_exec. */
1682 false /* non_exec_costs_exec. */
1683 },
1684 {
1685 /* MULT SImode */
1686 {
1687 COSTS_N_INSNS (1), /* simple. */
1688 COSTS_N_INSNS (1), /* flag_setting. */
1689 COSTS_N_INSNS (2), /* extend. */
1690 COSTS_N_INSNS (1), /* add. */
1691 COSTS_N_INSNS (3), /* extend_add. */
1692 COSTS_N_INSNS (8) /* idiv. */
1693 },
1694 /* MULT DImode */
1695 {
1696 0, /* simple (N/A). */
1697 0, /* flag_setting (N/A). */
1698 COSTS_N_INSNS (2), /* extend. */
1699 0, /* add (N/A). */
1700 COSTS_N_INSNS (3), /* extend_add. */
1701 0 /* idiv (N/A). */
1702 }
1703 },
1704 /* LD/ST */
1705 {
1706 COSTS_N_INSNS (2), /* load. */
1707 0, /* load_sign_extend. */
1708 COSTS_N_INSNS (3), /* ldrd. */
1709 COSTS_N_INSNS (2), /* ldm_1st. */
1710 1, /* ldm_regs_per_insn_1st. */
1711 1, /* ldm_regs_per_insn_subsequent. */
1712 COSTS_N_INSNS (2), /* loadf. */
1713 COSTS_N_INSNS (3), /* loadd. */
1714 COSTS_N_INSNS (1), /* load_unaligned. */
1715 COSTS_N_INSNS (2), /* store. */
1716 COSTS_N_INSNS (3), /* strd. */
1717 COSTS_N_INSNS (2), /* stm_1st. */
1718 1, /* stm_regs_per_insn_1st. */
1719 1, /* stm_regs_per_insn_subsequent. */
1720 COSTS_N_INSNS (2), /* storef. */
1721 COSTS_N_INSNS (3), /* stored. */
1722 COSTS_N_INSNS (1), /* store_unaligned. */
1723 COSTS_N_INSNS (1), /* loadv. */
1724 COSTS_N_INSNS (1) /* storev. */
1725 },
1726 {
1727 /* FP SFmode */
1728 {
1729 COSTS_N_INSNS (7), /* div. */
1730 COSTS_N_INSNS (2), /* mult. */
1731 COSTS_N_INSNS (5), /* mult_addsub. */
1732 COSTS_N_INSNS (3), /* fma. */
1733 COSTS_N_INSNS (1), /* addsub. */
1734 0, /* fpconst. */
1735 0, /* neg. */
1736 0, /* compare. */
1737 0, /* widen. */
1738 0, /* narrow. */
1739 0, /* toint. */
1740 0, /* fromint. */
1741 0 /* roundint. */
1742 },
1743 /* FP DFmode */
1744 {
1745 COSTS_N_INSNS (15), /* div. */
1746 COSTS_N_INSNS (5), /* mult. */
1747 COSTS_N_INSNS (7), /* mult_addsub. */
1748 COSTS_N_INSNS (7), /* fma. */
1749 COSTS_N_INSNS (3), /* addsub. */
1750 0, /* fpconst. */
1751 0, /* neg. */
1752 0, /* compare. */
1753 0, /* widen. */
1754 0, /* narrow. */
1755 0, /* toint. */
1756 0, /* fromint. */
1757 0 /* roundint. */
1758 }
1759 },
1760 /* Vector */
1761 {
1762 COSTS_N_INSNS (1) /* alu. */
1763 }
1764 };
1765
1766 const struct tune_params arm_slowmul_tune =
1767 {
1768 &generic_extra_costs, /* Insn extra costs. */
1769 NULL, /* Sched adj cost. */
1770 arm_default_branch_cost,
1771 &arm_default_vec_cost,
1772 3, /* Constant limit. */
1773 5, /* Max cond insns. */
1774 8, /* Memset max inline. */
1775 1, /* Issue rate. */
1776 ARM_PREFETCH_NOT_BENEFICIAL,
1777 tune_params::PREF_CONST_POOL_TRUE,
1778 tune_params::PREF_LDRD_FALSE,
1779 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1780 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1781 tune_params::DISPARAGE_FLAGS_NEITHER,
1782 tune_params::PREF_NEON_64_FALSE,
1783 tune_params::PREF_NEON_STRINGOPS_FALSE,
1784 tune_params::FUSE_NOTHING,
1785 tune_params::SCHED_AUTOPREF_OFF
1786 };
1787
1788 const struct tune_params arm_fastmul_tune =
1789 {
1790 &generic_extra_costs, /* Insn extra costs. */
1791 NULL, /* Sched adj cost. */
1792 arm_default_branch_cost,
1793 &arm_default_vec_cost,
1794 1, /* Constant limit. */
1795 5, /* Max cond insns. */
1796 8, /* Memset max inline. */
1797 1, /* Issue rate. */
1798 ARM_PREFETCH_NOT_BENEFICIAL,
1799 tune_params::PREF_CONST_POOL_TRUE,
1800 tune_params::PREF_LDRD_FALSE,
1801 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1802 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1803 tune_params::DISPARAGE_FLAGS_NEITHER,
1804 tune_params::PREF_NEON_64_FALSE,
1805 tune_params::PREF_NEON_STRINGOPS_FALSE,
1806 tune_params::FUSE_NOTHING,
1807 tune_params::SCHED_AUTOPREF_OFF
1808 };
1809
1810 /* StrongARM has early execution of branches, so a sequence that is worth
1811 skipping is shorter. Set max_insns_skipped to a lower value. */
1812
1813 const struct tune_params arm_strongarm_tune =
1814 {
1815 &generic_extra_costs, /* Insn extra costs. */
1816 NULL, /* Sched adj cost. */
1817 arm_default_branch_cost,
1818 &arm_default_vec_cost,
1819 1, /* Constant limit. */
1820 3, /* Max cond insns. */
1821 8, /* Memset max inline. */
1822 1, /* Issue rate. */
1823 ARM_PREFETCH_NOT_BENEFICIAL,
1824 tune_params::PREF_CONST_POOL_TRUE,
1825 tune_params::PREF_LDRD_FALSE,
1826 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1827 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1828 tune_params::DISPARAGE_FLAGS_NEITHER,
1829 tune_params::PREF_NEON_64_FALSE,
1830 tune_params::PREF_NEON_STRINGOPS_FALSE,
1831 tune_params::FUSE_NOTHING,
1832 tune_params::SCHED_AUTOPREF_OFF
1833 };
1834
1835 const struct tune_params arm_xscale_tune =
1836 {
1837 &generic_extra_costs, /* Insn extra costs. */
1838 xscale_sched_adjust_cost,
1839 arm_default_branch_cost,
1840 &arm_default_vec_cost,
1841 2, /* Constant limit. */
1842 3, /* Max cond insns. */
1843 8, /* Memset max inline. */
1844 1, /* Issue rate. */
1845 ARM_PREFETCH_NOT_BENEFICIAL,
1846 tune_params::PREF_CONST_POOL_TRUE,
1847 tune_params::PREF_LDRD_FALSE,
1848 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1849 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1850 tune_params::DISPARAGE_FLAGS_NEITHER,
1851 tune_params::PREF_NEON_64_FALSE,
1852 tune_params::PREF_NEON_STRINGOPS_FALSE,
1853 tune_params::FUSE_NOTHING,
1854 tune_params::SCHED_AUTOPREF_OFF
1855 };
1856
1857 const struct tune_params arm_9e_tune =
1858 {
1859 &generic_extra_costs, /* Insn extra costs. */
1860 NULL, /* Sched adj cost. */
1861 arm_default_branch_cost,
1862 &arm_default_vec_cost,
1863 1, /* Constant limit. */
1864 5, /* Max cond insns. */
1865 8, /* Memset max inline. */
1866 1, /* Issue rate. */
1867 ARM_PREFETCH_NOT_BENEFICIAL,
1868 tune_params::PREF_CONST_POOL_TRUE,
1869 tune_params::PREF_LDRD_FALSE,
1870 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1871 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1872 tune_params::DISPARAGE_FLAGS_NEITHER,
1873 tune_params::PREF_NEON_64_FALSE,
1874 tune_params::PREF_NEON_STRINGOPS_FALSE,
1875 tune_params::FUSE_NOTHING,
1876 tune_params::SCHED_AUTOPREF_OFF
1877 };
1878
1879 const struct tune_params arm_marvell_pj4_tune =
1880 {
1881 &generic_extra_costs, /* Insn extra costs. */
1882 NULL, /* Sched adj cost. */
1883 arm_default_branch_cost,
1884 &arm_default_vec_cost,
1885 1, /* Constant limit. */
1886 5, /* Max cond insns. */
1887 8, /* Memset max inline. */
1888 2, /* Issue rate. */
1889 ARM_PREFETCH_NOT_BENEFICIAL,
1890 tune_params::PREF_CONST_POOL_TRUE,
1891 tune_params::PREF_LDRD_FALSE,
1892 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1893 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1894 tune_params::DISPARAGE_FLAGS_NEITHER,
1895 tune_params::PREF_NEON_64_FALSE,
1896 tune_params::PREF_NEON_STRINGOPS_FALSE,
1897 tune_params::FUSE_NOTHING,
1898 tune_params::SCHED_AUTOPREF_OFF
1899 };
1900
1901 const struct tune_params arm_v6t2_tune =
1902 {
1903 &generic_extra_costs, /* Insn extra costs. */
1904 NULL, /* Sched adj cost. */
1905 arm_default_branch_cost,
1906 &arm_default_vec_cost,
1907 1, /* Constant limit. */
1908 5, /* Max cond insns. */
1909 8, /* Memset max inline. */
1910 1, /* Issue rate. */
1911 ARM_PREFETCH_NOT_BENEFICIAL,
1912 tune_params::PREF_CONST_POOL_FALSE,
1913 tune_params::PREF_LDRD_FALSE,
1914 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1915 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1916 tune_params::DISPARAGE_FLAGS_NEITHER,
1917 tune_params::PREF_NEON_64_FALSE,
1918 tune_params::PREF_NEON_STRINGOPS_FALSE,
1919 tune_params::FUSE_NOTHING,
1920 tune_params::SCHED_AUTOPREF_OFF
1921 };
1922
1923
1924 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1925 const struct tune_params arm_cortex_tune =
1926 {
1927 &generic_extra_costs,
1928 NULL, /* Sched adj cost. */
1929 arm_default_branch_cost,
1930 &arm_default_vec_cost,
1931 1, /* Constant limit. */
1932 5, /* Max cond insns. */
1933 8, /* Memset max inline. */
1934 2, /* Issue rate. */
1935 ARM_PREFETCH_NOT_BENEFICIAL,
1936 tune_params::PREF_CONST_POOL_FALSE,
1937 tune_params::PREF_LDRD_FALSE,
1938 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1939 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1940 tune_params::DISPARAGE_FLAGS_NEITHER,
1941 tune_params::PREF_NEON_64_FALSE,
1942 tune_params::PREF_NEON_STRINGOPS_FALSE,
1943 tune_params::FUSE_NOTHING,
1944 tune_params::SCHED_AUTOPREF_OFF
1945 };
1946
1947 const struct tune_params arm_cortex_a8_tune =
1948 {
1949 &cortexa8_extra_costs,
1950 NULL, /* Sched adj cost. */
1951 arm_default_branch_cost,
1952 &arm_default_vec_cost,
1953 1, /* Constant limit. */
1954 5, /* Max cond insns. */
1955 8, /* Memset max inline. */
1956 2, /* Issue rate. */
1957 ARM_PREFETCH_NOT_BENEFICIAL,
1958 tune_params::PREF_CONST_POOL_FALSE,
1959 tune_params::PREF_LDRD_FALSE,
1960 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1961 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1962 tune_params::DISPARAGE_FLAGS_NEITHER,
1963 tune_params::PREF_NEON_64_FALSE,
1964 tune_params::PREF_NEON_STRINGOPS_TRUE,
1965 tune_params::FUSE_NOTHING,
1966 tune_params::SCHED_AUTOPREF_OFF
1967 };
1968
1969 const struct tune_params arm_cortex_a7_tune =
1970 {
1971 &cortexa7_extra_costs,
1972 NULL, /* Sched adj cost. */
1973 arm_default_branch_cost,
1974 &arm_default_vec_cost,
1975 1, /* Constant limit. */
1976 5, /* Max cond insns. */
1977 8, /* Memset max inline. */
1978 2, /* Issue rate. */
1979 ARM_PREFETCH_NOT_BENEFICIAL,
1980 tune_params::PREF_CONST_POOL_FALSE,
1981 tune_params::PREF_LDRD_FALSE,
1982 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1983 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1984 tune_params::DISPARAGE_FLAGS_NEITHER,
1985 tune_params::PREF_NEON_64_FALSE,
1986 tune_params::PREF_NEON_STRINGOPS_TRUE,
1987 tune_params::FUSE_NOTHING,
1988 tune_params::SCHED_AUTOPREF_OFF
1989 };
1990
1991 const struct tune_params arm_cortex_a15_tune =
1992 {
1993 &cortexa15_extra_costs,
1994 NULL, /* Sched adj cost. */
1995 arm_default_branch_cost,
1996 &arm_default_vec_cost,
1997 1, /* Constant limit. */
1998 2, /* Max cond insns. */
1999 8, /* Memset max inline. */
2000 3, /* Issue rate. */
2001 ARM_PREFETCH_NOT_BENEFICIAL,
2002 tune_params::PREF_CONST_POOL_FALSE,
2003 tune_params::PREF_LDRD_TRUE,
2004 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2005 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2006 tune_params::DISPARAGE_FLAGS_ALL,
2007 tune_params::PREF_NEON_64_FALSE,
2008 tune_params::PREF_NEON_STRINGOPS_TRUE,
2009 tune_params::FUSE_NOTHING,
2010 tune_params::SCHED_AUTOPREF_FULL
2011 };
2012
2013 const struct tune_params arm_cortex_a35_tune =
2014 {
2015 &cortexa53_extra_costs,
2016 NULL, /* Sched adj cost. */
2017 arm_default_branch_cost,
2018 &arm_default_vec_cost,
2019 1, /* Constant limit. */
2020 5, /* Max cond insns. */
2021 8, /* Memset max inline. */
2022 1, /* Issue rate. */
2023 ARM_PREFETCH_NOT_BENEFICIAL,
2024 tune_params::PREF_CONST_POOL_FALSE,
2025 tune_params::PREF_LDRD_FALSE,
2026 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2027 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2028 tune_params::DISPARAGE_FLAGS_NEITHER,
2029 tune_params::PREF_NEON_64_FALSE,
2030 tune_params::PREF_NEON_STRINGOPS_TRUE,
2031 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2032 tune_params::SCHED_AUTOPREF_OFF
2033 };
2034
2035 const struct tune_params arm_cortex_a53_tune =
2036 {
2037 &cortexa53_extra_costs,
2038 NULL, /* Sched adj cost. */
2039 arm_default_branch_cost,
2040 &arm_default_vec_cost,
2041 1, /* Constant limit. */
2042 5, /* Max cond insns. */
2043 8, /* Memset max inline. */
2044 2, /* Issue rate. */
2045 ARM_PREFETCH_NOT_BENEFICIAL,
2046 tune_params::PREF_CONST_POOL_FALSE,
2047 tune_params::PREF_LDRD_FALSE,
2048 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2049 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2050 tune_params::DISPARAGE_FLAGS_NEITHER,
2051 tune_params::PREF_NEON_64_FALSE,
2052 tune_params::PREF_NEON_STRINGOPS_TRUE,
2053 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2054 tune_params::SCHED_AUTOPREF_OFF
2055 };
2056
2057 const struct tune_params arm_cortex_a57_tune =
2058 {
2059 &cortexa57_extra_costs,
2060 NULL, /* Sched adj cost. */
2061 arm_default_branch_cost,
2062 &arm_default_vec_cost,
2063 1, /* Constant limit. */
2064 2, /* Max cond insns. */
2065 8, /* Memset max inline. */
2066 3, /* Issue rate. */
2067 ARM_PREFETCH_NOT_BENEFICIAL,
2068 tune_params::PREF_CONST_POOL_FALSE,
2069 tune_params::PREF_LDRD_TRUE,
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2071 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2072 tune_params::DISPARAGE_FLAGS_ALL,
2073 tune_params::PREF_NEON_64_FALSE,
2074 tune_params::PREF_NEON_STRINGOPS_TRUE,
2075 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2076 tune_params::SCHED_AUTOPREF_FULL
2077 };
2078
2079 const struct tune_params arm_exynosm1_tune =
2080 {
2081 &exynosm1_extra_costs,
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 2, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 3, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_TRUE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_ALL,
2095 tune_params::PREF_NEON_64_FALSE,
2096 tune_params::PREF_NEON_STRINGOPS_TRUE,
2097 tune_params::FUSE_NOTHING,
2098 tune_params::SCHED_AUTOPREF_OFF
2099 };
2100
2101 const struct tune_params arm_xgene1_tune =
2102 {
2103 &xgene1_extra_costs,
2104 NULL, /* Sched adj cost. */
2105 arm_default_branch_cost,
2106 &arm_default_vec_cost,
2107 1, /* Constant limit. */
2108 2, /* Max cond insns. */
2109 32, /* Memset max inline. */
2110 4, /* Issue rate. */
2111 ARM_PREFETCH_NOT_BENEFICIAL,
2112 tune_params::PREF_CONST_POOL_FALSE,
2113 tune_params::PREF_LDRD_TRUE,
2114 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2116 tune_params::DISPARAGE_FLAGS_ALL,
2117 tune_params::PREF_NEON_64_FALSE,
2118 tune_params::PREF_NEON_STRINGOPS_FALSE,
2119 tune_params::FUSE_NOTHING,
2120 tune_params::SCHED_AUTOPREF_OFF
2121 };
2122
2123 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2124 less appealing. Set max_insns_skipped to a low value. */
2125
2126 const struct tune_params arm_cortex_a5_tune =
2127 {
2128 &cortexa5_extra_costs,
2129 NULL, /* Sched adj cost. */
2130 arm_cortex_a5_branch_cost,
2131 &arm_default_vec_cost,
2132 1, /* Constant limit. */
2133 1, /* Max cond insns. */
2134 8, /* Memset max inline. */
2135 2, /* Issue rate. */
2136 ARM_PREFETCH_NOT_BENEFICIAL,
2137 tune_params::PREF_CONST_POOL_FALSE,
2138 tune_params::PREF_LDRD_FALSE,
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2140 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2141 tune_params::DISPARAGE_FLAGS_NEITHER,
2142 tune_params::PREF_NEON_64_FALSE,
2143 tune_params::PREF_NEON_STRINGOPS_TRUE,
2144 tune_params::FUSE_NOTHING,
2145 tune_params::SCHED_AUTOPREF_OFF
2146 };
2147
2148 const struct tune_params arm_cortex_a9_tune =
2149 {
2150 &cortexa9_extra_costs,
2151 cortex_a9_sched_adjust_cost,
2152 arm_default_branch_cost,
2153 &arm_default_vec_cost,
2154 1, /* Constant limit. */
2155 5, /* Max cond insns. */
2156 8, /* Memset max inline. */
2157 2, /* Issue rate. */
2158 ARM_PREFETCH_BENEFICIAL(4,32,32),
2159 tune_params::PREF_CONST_POOL_FALSE,
2160 tune_params::PREF_LDRD_FALSE,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_NEITHER,
2164 tune_params::PREF_NEON_64_FALSE,
2165 tune_params::PREF_NEON_STRINGOPS_FALSE,
2166 tune_params::FUSE_NOTHING,
2167 tune_params::SCHED_AUTOPREF_OFF
2168 };
2169
2170 const struct tune_params arm_cortex_a12_tune =
2171 {
2172 &cortexa12_extra_costs,
2173 NULL, /* Sched adj cost. */
2174 arm_default_branch_cost,
2175 &arm_default_vec_cost, /* Vectorizer costs. */
2176 1, /* Constant limit. */
2177 2, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL,
2181 tune_params::PREF_CONST_POOL_FALSE,
2182 tune_params::PREF_LDRD_TRUE,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_ALL,
2186 tune_params::PREF_NEON_64_FALSE,
2187 tune_params::PREF_NEON_STRINGOPS_TRUE,
2188 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2189 tune_params::SCHED_AUTOPREF_OFF
2190 };
2191
2192 const struct tune_params arm_cortex_a73_tune =
2193 {
2194 &cortexa57_extra_costs,
2195 NULL, /* Sched adj cost. */
2196 arm_default_branch_cost,
2197 &arm_default_vec_cost, /* Vectorizer costs. */
2198 1, /* Constant limit. */
2199 2, /* Max cond insns. */
2200 8, /* Memset max inline. */
2201 2, /* Issue rate. */
2202 ARM_PREFETCH_NOT_BENEFICIAL,
2203 tune_params::PREF_CONST_POOL_FALSE,
2204 tune_params::PREF_LDRD_TRUE,
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2206 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2207 tune_params::DISPARAGE_FLAGS_ALL,
2208 tune_params::PREF_NEON_64_FALSE,
2209 tune_params::PREF_NEON_STRINGOPS_TRUE,
2210 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2211 tune_params::SCHED_AUTOPREF_FULL
2212 };
2213
2214 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2215 cycle to execute each. An LDR from the constant pool also takes two cycles
2216 to execute, but mildly increases pipelining opportunity (consecutive
2217 loads/stores can be pipelined together, saving one cycle), and may also
2218 improve icache utilisation. Hence we prefer the constant pool for such
2219 processors. */
2220
2221 const struct tune_params arm_v7m_tune =
2222 {
2223 &v7m_extra_costs,
2224 NULL, /* Sched adj cost. */
2225 arm_cortex_m_branch_cost,
2226 &arm_default_vec_cost,
2227 1, /* Constant limit. */
2228 2, /* Max cond insns. */
2229 8, /* Memset max inline. */
2230 1, /* Issue rate. */
2231 ARM_PREFETCH_NOT_BENEFICIAL,
2232 tune_params::PREF_CONST_POOL_TRUE,
2233 tune_params::PREF_LDRD_FALSE,
2234 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2235 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2236 tune_params::DISPARAGE_FLAGS_NEITHER,
2237 tune_params::PREF_NEON_64_FALSE,
2238 tune_params::PREF_NEON_STRINGOPS_FALSE,
2239 tune_params::FUSE_NOTHING,
2240 tune_params::SCHED_AUTOPREF_OFF
2241 };
2242
2243 /* Cortex-M7 tuning. */
2244
2245 const struct tune_params arm_cortex_m7_tune =
2246 {
2247 &v7m_extra_costs,
2248 NULL, /* Sched adj cost. */
2249 arm_cortex_m7_branch_cost,
2250 &arm_default_vec_cost,
2251 0, /* Constant limit. */
2252 1, /* Max cond insns. */
2253 8, /* Memset max inline. */
2254 2, /* Issue rate. */
2255 ARM_PREFETCH_NOT_BENEFICIAL,
2256 tune_params::PREF_CONST_POOL_TRUE,
2257 tune_params::PREF_LDRD_FALSE,
2258 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2259 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2260 tune_params::DISPARAGE_FLAGS_NEITHER,
2261 tune_params::PREF_NEON_64_FALSE,
2262 tune_params::PREF_NEON_STRINGOPS_FALSE,
2263 tune_params::FUSE_NOTHING,
2264 tune_params::SCHED_AUTOPREF_OFF
2265 };
2266
2267 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2268 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2269 cortex-m23. */
2270 const struct tune_params arm_v6m_tune =
2271 {
2272 &generic_extra_costs, /* Insn extra costs. */
2273 NULL, /* Sched adj cost. */
2274 arm_default_branch_cost,
2275 &arm_default_vec_cost, /* Vectorizer costs. */
2276 1, /* Constant limit. */
2277 5, /* Max cond insns. */
2278 8, /* Memset max inline. */
2279 1, /* Issue rate. */
2280 ARM_PREFETCH_NOT_BENEFICIAL,
2281 tune_params::PREF_CONST_POOL_FALSE,
2282 tune_params::PREF_LDRD_FALSE,
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2284 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2285 tune_params::DISPARAGE_FLAGS_NEITHER,
2286 tune_params::PREF_NEON_64_FALSE,
2287 tune_params::PREF_NEON_STRINGOPS_FALSE,
2288 tune_params::FUSE_NOTHING,
2289 tune_params::SCHED_AUTOPREF_OFF
2290 };
2291
2292 const struct tune_params arm_fa726te_tune =
2293 {
2294 &generic_extra_costs, /* Insn extra costs. */
2295 fa726te_sched_adjust_cost,
2296 arm_default_branch_cost,
2297 &arm_default_vec_cost,
2298 1, /* Constant limit. */
2299 5, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL,
2303 tune_params::PREF_CONST_POOL_TRUE,
2304 tune_params::PREF_LDRD_FALSE,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER,
2308 tune_params::PREF_NEON_64_FALSE,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE,
2310 tune_params::FUSE_NOTHING,
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* Auto-generated CPU, FPU and architecture tables. */
2315 #include "arm-cpu-data.h"
2316
2317 /* The name of the preprocessor macro to define for this architecture. PROFILE
2318 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2319 is thus chosen to be big enough to hold the longest architecture name. */
2320
2321 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2322
2323 /* Supported TLS relocations. */
2324
2325 enum tls_reloc {
2326 TLS_GD32,
2327 TLS_LDM32,
2328 TLS_LDO32,
2329 TLS_IE32,
2330 TLS_LE32,
2331 TLS_DESCSEQ /* GNU scheme */
2332 };
2333
2334 /* The maximum number of insns to be used when loading a constant. */
2335 inline static int
2336 arm_constant_limit (bool size_p)
2337 {
2338 return size_p ? 1 : current_tune->constant_limit;
2339 }
2340
2341 /* Emit an insn that's a simple single-set. Both the operands must be known
2342 to be valid. */
2343 inline static rtx_insn *
2344 emit_set_insn (rtx x, rtx y)
2345 {
2346 return emit_insn (gen_rtx_SET (x, y));
2347 }
2348
2349 /* Return the number of bits set in VALUE. */
2350 static unsigned
2351 bit_count (unsigned long value)
2352 {
2353 unsigned long count = 0;
2354
2355 while (value)
2356 {
2357 count++;
2358 value &= value - 1; /* Clear the least-significant set bit. */
2359 }
2360
2361 return count;
2362 }
2363
2364 /* Return the number of bits set in BMAP. */
2365 static unsigned
2366 bitmap_popcount (const sbitmap bmap)
2367 {
2368 unsigned int count = 0;
2369 unsigned int n = 0;
2370 sbitmap_iterator sbi;
2371
2372 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2373 count++;
2374 return count;
2375 }
2376
2377 typedef struct
2378 {
2379 machine_mode mode;
2380 const char *name;
2381 } arm_fixed_mode_set;
2382
2383 /* A small helper for setting fixed-point library libfuncs. */
2384
2385 static void
2386 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2387 const char *funcname, const char *modename,
2388 int num_suffix)
2389 {
2390 char buffer[50];
2391
2392 if (num_suffix == 0)
2393 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2394 else
2395 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2396
2397 set_optab_libfunc (optable, mode, buffer);
2398 }
2399
2400 static void
2401 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2402 machine_mode from, const char *funcname,
2403 const char *toname, const char *fromname)
2404 {
2405 char buffer[50];
2406 const char *maybe_suffix_2 = "";
2407
2408 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2409 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2410 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2411 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2412 maybe_suffix_2 = "2";
2413
2414 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2415 maybe_suffix_2);
2416
2417 set_conv_libfunc (optable, to, from, buffer);
2418 }
2419
2420 /* Set up library functions unique to ARM. */
2421
2422 static void
2423 arm_init_libfuncs (void)
2424 {
2425 /* For Linux, we have access to kernel support for atomic operations. */
2426 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2427 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2428
2429 /* There are no special library functions unless we are using the
2430 ARM BPABI. */
2431 if (!TARGET_BPABI)
2432 return;
2433
2434 /* The functions below are described in Section 4 of the "Run-Time
2435 ABI for the ARM architecture", Version 1.0. */
2436
2437 /* Double-precision floating-point arithmetic. Table 2. */
2438 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2439 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2440 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2441 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2442 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2443
2444 /* Double-precision comparisons. Table 3. */
2445 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2446 set_optab_libfunc (ne_optab, DFmode, NULL);
2447 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2448 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2449 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2450 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2451 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2452
2453 /* Single-precision floating-point arithmetic. Table 4. */
2454 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2455 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2456 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2457 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2458 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2459
2460 /* Single-precision comparisons. Table 5. */
2461 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2462 set_optab_libfunc (ne_optab, SFmode, NULL);
2463 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2464 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2465 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2466 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2467 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2468
2469 /* Floating-point to integer conversions. Table 6. */
2470 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2471 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2472 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2473 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2474 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2475 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2476 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2477 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2478
2479 /* Conversions between floating types. Table 7. */
2480 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2481 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2482
2483 /* Integer to floating-point conversions. Table 8. */
2484 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2485 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2486 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2487 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2488 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2489 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2490 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2491 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2492
2493 /* Long long. Table 9. */
2494 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2495 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2496 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2497 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2498 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2499 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2500 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2501 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2502
2503 /* Integer (32/32->32) division. \S 4.3.1. */
2504 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2505 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2506
2507 /* The divmod functions are designed so that they can be used for
2508 plain division, even though they return both the quotient and the
2509 remainder. The quotient is returned in the usual location (i.e.,
2510 r0 for SImode, {r0, r1} for DImode), just as would be expected
2511 for an ordinary division routine. Because the AAPCS calling
2512 conventions specify that all of { r0, r1, r2, r3 } are
2513 callee-saved registers, there is no need to tell the compiler
2514 explicitly that those registers are clobbered by these
2515 routines. */
2516 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2517 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2518
2519 /* For SImode division the ABI provides div-without-mod routines,
2520 which are faster. */
2521 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2522 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2523
2524 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2525 divmod libcalls instead. */
2526 set_optab_libfunc (smod_optab, DImode, NULL);
2527 set_optab_libfunc (umod_optab, DImode, NULL);
2528 set_optab_libfunc (smod_optab, SImode, NULL);
2529 set_optab_libfunc (umod_optab, SImode, NULL);
2530
2531 /* Half-precision float operations. The compiler handles all operations
2532 with NULL libfuncs by converting the SFmode. */
2533 switch (arm_fp16_format)
2534 {
2535 case ARM_FP16_FORMAT_IEEE:
2536 case ARM_FP16_FORMAT_ALTERNATIVE:
2537
2538 /* Conversions. */
2539 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2540 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2541 ? "__gnu_f2h_ieee"
2542 : "__gnu_f2h_alternative"));
2543 set_conv_libfunc (sext_optab, SFmode, HFmode,
2544 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2545 ? "__gnu_h2f_ieee"
2546 : "__gnu_h2f_alternative"));
2547
2548 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2549 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2550 ? "__gnu_d2h_ieee"
2551 : "__gnu_d2h_alternative"));
2552
2553 /* Arithmetic. */
2554 set_optab_libfunc (add_optab, HFmode, NULL);
2555 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2556 set_optab_libfunc (smul_optab, HFmode, NULL);
2557 set_optab_libfunc (neg_optab, HFmode, NULL);
2558 set_optab_libfunc (sub_optab, HFmode, NULL);
2559
2560 /* Comparisons. */
2561 set_optab_libfunc (eq_optab, HFmode, NULL);
2562 set_optab_libfunc (ne_optab, HFmode, NULL);
2563 set_optab_libfunc (lt_optab, HFmode, NULL);
2564 set_optab_libfunc (le_optab, HFmode, NULL);
2565 set_optab_libfunc (ge_optab, HFmode, NULL);
2566 set_optab_libfunc (gt_optab, HFmode, NULL);
2567 set_optab_libfunc (unord_optab, HFmode, NULL);
2568 break;
2569
2570 default:
2571 break;
2572 }
2573
2574 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2575 {
2576 const arm_fixed_mode_set fixed_arith_modes[] =
2577 {
2578 { E_QQmode, "qq" },
2579 { E_UQQmode, "uqq" },
2580 { E_HQmode, "hq" },
2581 { E_UHQmode, "uhq" },
2582 { E_SQmode, "sq" },
2583 { E_USQmode, "usq" },
2584 { E_DQmode, "dq" },
2585 { E_UDQmode, "udq" },
2586 { E_TQmode, "tq" },
2587 { E_UTQmode, "utq" },
2588 { E_HAmode, "ha" },
2589 { E_UHAmode, "uha" },
2590 { E_SAmode, "sa" },
2591 { E_USAmode, "usa" },
2592 { E_DAmode, "da" },
2593 { E_UDAmode, "uda" },
2594 { E_TAmode, "ta" },
2595 { E_UTAmode, "uta" }
2596 };
2597 const arm_fixed_mode_set fixed_conv_modes[] =
2598 {
2599 { E_QQmode, "qq" },
2600 { E_UQQmode, "uqq" },
2601 { E_HQmode, "hq" },
2602 { E_UHQmode, "uhq" },
2603 { E_SQmode, "sq" },
2604 { E_USQmode, "usq" },
2605 { E_DQmode, "dq" },
2606 { E_UDQmode, "udq" },
2607 { E_TQmode, "tq" },
2608 { E_UTQmode, "utq" },
2609 { E_HAmode, "ha" },
2610 { E_UHAmode, "uha" },
2611 { E_SAmode, "sa" },
2612 { E_USAmode, "usa" },
2613 { E_DAmode, "da" },
2614 { E_UDAmode, "uda" },
2615 { E_TAmode, "ta" },
2616 { E_UTAmode, "uta" },
2617 { E_QImode, "qi" },
2618 { E_HImode, "hi" },
2619 { E_SImode, "si" },
2620 { E_DImode, "di" },
2621 { E_TImode, "ti" },
2622 { E_SFmode, "sf" },
2623 { E_DFmode, "df" }
2624 };
2625 unsigned int i, j;
2626
2627 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2628 {
2629 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2630 "add", fixed_arith_modes[i].name, 3);
2631 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2632 "ssadd", fixed_arith_modes[i].name, 3);
2633 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2634 "usadd", fixed_arith_modes[i].name, 3);
2635 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2636 "sub", fixed_arith_modes[i].name, 3);
2637 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2638 "sssub", fixed_arith_modes[i].name, 3);
2639 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2640 "ussub", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2642 "mul", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2644 "ssmul", fixed_arith_modes[i].name, 3);
2645 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2646 "usmul", fixed_arith_modes[i].name, 3);
2647 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2648 "div", fixed_arith_modes[i].name, 3);
2649 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2650 "udiv", fixed_arith_modes[i].name, 3);
2651 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2652 "ssdiv", fixed_arith_modes[i].name, 3);
2653 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2654 "usdiv", fixed_arith_modes[i].name, 3);
2655 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2656 "neg", fixed_arith_modes[i].name, 2);
2657 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2658 "ssneg", fixed_arith_modes[i].name, 2);
2659 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2660 "usneg", fixed_arith_modes[i].name, 2);
2661 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2662 "ashl", fixed_arith_modes[i].name, 3);
2663 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2664 "ashr", fixed_arith_modes[i].name, 3);
2665 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2666 "lshr", fixed_arith_modes[i].name, 3);
2667 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2668 "ssashl", fixed_arith_modes[i].name, 3);
2669 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2670 "usashl", fixed_arith_modes[i].name, 3);
2671 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2672 "cmp", fixed_arith_modes[i].name, 2);
2673 }
2674
2675 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2676 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2677 {
2678 if (i == j
2679 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2680 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2681 continue;
2682
2683 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2684 fixed_conv_modes[j].mode, "fract",
2685 fixed_conv_modes[i].name,
2686 fixed_conv_modes[j].name);
2687 arm_set_fixed_conv_libfunc (satfract_optab,
2688 fixed_conv_modes[i].mode,
2689 fixed_conv_modes[j].mode, "satfract",
2690 fixed_conv_modes[i].name,
2691 fixed_conv_modes[j].name);
2692 arm_set_fixed_conv_libfunc (fractuns_optab,
2693 fixed_conv_modes[i].mode,
2694 fixed_conv_modes[j].mode, "fractuns",
2695 fixed_conv_modes[i].name,
2696 fixed_conv_modes[j].name);
2697 arm_set_fixed_conv_libfunc (satfractuns_optab,
2698 fixed_conv_modes[i].mode,
2699 fixed_conv_modes[j].mode, "satfractuns",
2700 fixed_conv_modes[i].name,
2701 fixed_conv_modes[j].name);
2702 }
2703 }
2704
2705 if (TARGET_AAPCS_BASED)
2706 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2707 }
2708
2709 /* On AAPCS systems, this is the "struct __va_list". */
2710 static GTY(()) tree va_list_type;
2711
2712 /* Return the type to use as __builtin_va_list. */
2713 static tree
2714 arm_build_builtin_va_list (void)
2715 {
2716 tree va_list_name;
2717 tree ap_field;
2718
2719 if (!TARGET_AAPCS_BASED)
2720 return std_build_builtin_va_list ();
2721
2722 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2723 defined as:
2724
2725 struct __va_list
2726 {
2727 void *__ap;
2728 };
2729
2730 The C Library ABI further reinforces this definition in \S
2731 4.1.
2732
2733 We must follow this definition exactly. The structure tag
2734 name is visible in C++ mangled names, and thus forms a part
2735 of the ABI. The field name may be used by people who
2736 #include <stdarg.h>. */
2737 /* Create the type. */
2738 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2739 /* Give it the required name. */
2740 va_list_name = build_decl (BUILTINS_LOCATION,
2741 TYPE_DECL,
2742 get_identifier ("__va_list"),
2743 va_list_type);
2744 DECL_ARTIFICIAL (va_list_name) = 1;
2745 TYPE_NAME (va_list_type) = va_list_name;
2746 TYPE_STUB_DECL (va_list_type) = va_list_name;
2747 /* Create the __ap field. */
2748 ap_field = build_decl (BUILTINS_LOCATION,
2749 FIELD_DECL,
2750 get_identifier ("__ap"),
2751 ptr_type_node);
2752 DECL_ARTIFICIAL (ap_field) = 1;
2753 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2754 TYPE_FIELDS (va_list_type) = ap_field;
2755 /* Compute its layout. */
2756 layout_type (va_list_type);
2757
2758 return va_list_type;
2759 }
2760
2761 /* Return an expression of type "void *" pointing to the next
2762 available argument in a variable-argument list. VALIST is the
2763 user-level va_list object, of type __builtin_va_list. */
2764 static tree
2765 arm_extract_valist_ptr (tree valist)
2766 {
2767 if (TREE_TYPE (valist) == error_mark_node)
2768 return error_mark_node;
2769
2770 /* On an AAPCS target, the pointer is stored within "struct
2771 va_list". */
2772 if (TARGET_AAPCS_BASED)
2773 {
2774 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2775 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2776 valist, ap_field, NULL_TREE);
2777 }
2778
2779 return valist;
2780 }
2781
2782 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2783 static void
2784 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2785 {
2786 valist = arm_extract_valist_ptr (valist);
2787 std_expand_builtin_va_start (valist, nextarg);
2788 }
2789
2790 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2791 static tree
2792 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2793 gimple_seq *post_p)
2794 {
2795 valist = arm_extract_valist_ptr (valist);
2796 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2797 }
2798
2799 /* Check any incompatible options that the user has specified. */
2800 static void
2801 arm_option_check_internal (struct gcc_options *opts)
2802 {
2803 int flags = opts->x_target_flags;
2804
2805 /* iWMMXt and NEON are incompatible. */
2806 if (TARGET_IWMMXT
2807 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2808 error ("iWMMXt and NEON are incompatible");
2809
2810 /* Make sure that the processor choice does not conflict with any of the
2811 other command line choices. */
2812 if (TARGET_ARM_P (flags)
2813 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2814 error ("target CPU does not support ARM mode");
2815
2816 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2817 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2818 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2819
2820 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2821 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2822
2823 /* If this target is normally configured to use APCS frames, warn if they
2824 are turned off and debugging is turned on. */
2825 if (TARGET_ARM_P (flags)
2826 && write_symbols != NO_DEBUG
2827 && !TARGET_APCS_FRAME
2828 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2829 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2830
2831 /* iWMMXt unsupported under Thumb mode. */
2832 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2833 error ("iWMMXt unsupported under Thumb mode");
2834
2835 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2836 error ("can not use -mtp=cp15 with 16-bit Thumb");
2837
2838 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2839 {
2840 error ("RTP PIC is incompatible with Thumb");
2841 flag_pic = 0;
2842 }
2843
2844 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2845 with MOVT. */
2846 if ((target_pure_code || target_slow_flash_data)
2847 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2848 {
2849 const char *flag = (target_pure_code ? "-mpure-code" :
2850 "-mslow-flash-data");
2851 error ("%s only supports non-pic code on M-profile targets with the "
2852 "MOVT instruction", flag);
2853 }
2854
2855 }
2856
2857 /* Recompute the global settings depending on target attribute options. */
2858
2859 static void
2860 arm_option_params_internal (void)
2861 {
2862 /* If we are not using the default (ARM mode) section anchor offset
2863 ranges, then set the correct ranges now. */
2864 if (TARGET_THUMB1)
2865 {
2866 /* Thumb-1 LDR instructions cannot have negative offsets.
2867 Permissible positive offset ranges are 5-bit (for byte loads),
2868 6-bit (for halfword loads), or 7-bit (for word loads).
2869 Empirical results suggest a 7-bit anchor range gives the best
2870 overall code size. */
2871 targetm.min_anchor_offset = 0;
2872 targetm.max_anchor_offset = 127;
2873 }
2874 else if (TARGET_THUMB2)
2875 {
2876 /* The minimum is set such that the total size of the block
2877 for a particular anchor is 248 + 1 + 4095 bytes, which is
2878 divisible by eight, ensuring natural spacing of anchors. */
2879 targetm.min_anchor_offset = -248;
2880 targetm.max_anchor_offset = 4095;
2881 }
2882 else
2883 {
2884 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2885 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2886 }
2887
2888 /* Increase the number of conditional instructions with -Os. */
2889 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2890
2891 /* For THUMB2, we limit the conditional sequence to one IT block. */
2892 if (TARGET_THUMB2)
2893 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2894 }
2895
2896 /* True if -mflip-thumb should next add an attribute for the default
2897 mode, false if it should next add an attribute for the opposite mode. */
2898 static GTY(()) bool thumb_flipper;
2899
2900 /* Options after initial target override. */
2901 static GTY(()) tree init_optimize;
2902
2903 static void
2904 arm_override_options_after_change_1 (struct gcc_options *opts)
2905 {
2906 if (opts->x_align_functions <= 0)
2907 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2908 && opts->x_optimize_size ? 2 : 4;
2909 }
2910
2911 /* Implement targetm.override_options_after_change. */
2912
2913 static void
2914 arm_override_options_after_change (void)
2915 {
2916 arm_configure_build_target (&arm_active_target,
2917 TREE_TARGET_OPTION (target_option_default_node),
2918 &global_options_set, false);
2919
2920 arm_override_options_after_change_1 (&global_options);
2921 }
2922
2923 /* Implement TARGET_OPTION_SAVE. */
2924 static void
2925 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2926 {
2927 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2928 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2929 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2930 }
2931
2932 /* Implement TARGET_OPTION_RESTORE. */
2933 static void
2934 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2935 {
2936 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2937 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2938 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2939 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2940 false);
2941 }
2942
2943 /* Reset options between modes that the user has specified. */
2944 static void
2945 arm_option_override_internal (struct gcc_options *opts,
2946 struct gcc_options *opts_set)
2947 {
2948 arm_override_options_after_change_1 (opts);
2949
2950 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2951 {
2952 /* The default is to enable interworking, so this warning message would
2953 be confusing to users who have just compiled with, eg, -march=armv3. */
2954 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2955 opts->x_target_flags &= ~MASK_INTERWORK;
2956 }
2957
2958 if (TARGET_THUMB_P (opts->x_target_flags)
2959 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2960 {
2961 warning (0, "target CPU does not support THUMB instructions");
2962 opts->x_target_flags &= ~MASK_THUMB;
2963 }
2964
2965 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2966 {
2967 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2968 opts->x_target_flags &= ~MASK_APCS_FRAME;
2969 }
2970
2971 /* Callee super interworking implies thumb interworking. Adding
2972 this to the flags here simplifies the logic elsewhere. */
2973 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2974 opts->x_target_flags |= MASK_INTERWORK;
2975
2976 /* need to remember initial values so combinaisons of options like
2977 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2978 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2979
2980 if (! opts_set->x_arm_restrict_it)
2981 opts->x_arm_restrict_it = arm_arch8;
2982
2983 /* ARM execution state and M profile don't have [restrict] IT. */
2984 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2985 opts->x_arm_restrict_it = 0;
2986
2987 /* Enable -munaligned-access by default for
2988 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2989 i.e. Thumb2 and ARM state only.
2990 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2991 - ARMv8 architecture-base processors.
2992
2993 Disable -munaligned-access by default for
2994 - all pre-ARMv6 architecture-based processors
2995 - ARMv6-M architecture-based processors
2996 - ARMv8-M Baseline processors. */
2997
2998 if (! opts_set->x_unaligned_access)
2999 {
3000 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3001 && arm_arch6 && (arm_arch_notm || arm_arch7));
3002 }
3003 else if (opts->x_unaligned_access == 1
3004 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3005 {
3006 warning (0, "target CPU does not support unaligned accesses");
3007 opts->x_unaligned_access = 0;
3008 }
3009
3010 /* Don't warn since it's on by default in -O2. */
3011 if (TARGET_THUMB1_P (opts->x_target_flags))
3012 opts->x_flag_schedule_insns = 0;
3013 else
3014 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3015
3016 /* Disable shrink-wrap when optimizing function for size, since it tends to
3017 generate additional returns. */
3018 if (optimize_function_for_size_p (cfun)
3019 && TARGET_THUMB2_P (opts->x_target_flags))
3020 opts->x_flag_shrink_wrap = false;
3021 else
3022 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3023
3024 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3025 - epilogue_insns - does not accurately model the corresponding insns
3026 emitted in the asm file. In particular, see the comment in thumb_exit
3027 'Find out how many of the (return) argument registers we can corrupt'.
3028 As a consequence, the epilogue may clobber registers without fipa-ra
3029 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3030 TODO: Accurately model clobbers for epilogue_insns and reenable
3031 fipa-ra. */
3032 if (TARGET_THUMB1_P (opts->x_target_flags))
3033 opts->x_flag_ipa_ra = 0;
3034 else
3035 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3036
3037 /* Thumb2 inline assembly code should always use unified syntax.
3038 This will apply to ARM and Thumb1 eventually. */
3039 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3040
3041 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3042 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3043 #endif
3044 }
3045
3046 static sbitmap isa_all_fpubits;
3047 static sbitmap isa_quirkbits;
3048
3049 /* Configure a build target TARGET from the user-specified options OPTS and
3050 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3051 architecture have been specified, but the two are not identical. */
3052 void
3053 arm_configure_build_target (struct arm_build_target *target,
3054 struct cl_target_option *opts,
3055 struct gcc_options *opts_set,
3056 bool warn_compatible)
3057 {
3058 const cpu_option *arm_selected_tune = NULL;
3059 const arch_option *arm_selected_arch = NULL;
3060 const cpu_option *arm_selected_cpu = NULL;
3061 const arm_fpu_desc *arm_selected_fpu = NULL;
3062 const char *tune_opts = NULL;
3063 const char *arch_opts = NULL;
3064 const char *cpu_opts = NULL;
3065
3066 bitmap_clear (target->isa);
3067 target->core_name = NULL;
3068 target->arch_name = NULL;
3069
3070 if (opts_set->x_arm_arch_string)
3071 {
3072 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3073 "-march",
3074 opts->x_arm_arch_string);
3075 arch_opts = strchr (opts->x_arm_arch_string, '+');
3076 }
3077
3078 if (opts_set->x_arm_cpu_string)
3079 {
3080 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3081 opts->x_arm_cpu_string);
3082 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3083 arm_selected_tune = arm_selected_cpu;
3084 /* If taking the tuning from -mcpu, we don't need to rescan the
3085 options for tuning. */
3086 }
3087
3088 if (opts_set->x_arm_tune_string)
3089 {
3090 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3091 opts->x_arm_tune_string);
3092 tune_opts = strchr (opts->x_arm_tune_string, '+');
3093 }
3094
3095 if (arm_selected_arch)
3096 {
3097 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3098 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3099 arch_opts);
3100
3101 if (arm_selected_cpu)
3102 {
3103 auto_sbitmap cpu_isa (isa_num_bits);
3104 auto_sbitmap isa_delta (isa_num_bits);
3105
3106 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3107 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3108 cpu_opts);
3109 bitmap_xor (isa_delta, cpu_isa, target->isa);
3110 /* Ignore any bits that are quirk bits. */
3111 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3112 /* Ignore (for now) any bits that might be set by -mfpu. */
3113 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3114
3115 if (!bitmap_empty_p (isa_delta))
3116 {
3117 if (warn_compatible)
3118 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3119 arm_selected_cpu->common.name,
3120 arm_selected_arch->common.name);
3121 /* -march wins for code generation.
3122 -mcpu wins for default tuning. */
3123 if (!arm_selected_tune)
3124 arm_selected_tune = arm_selected_cpu;
3125
3126 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3127 target->arch_name = arm_selected_arch->common.name;
3128 }
3129 else
3130 {
3131 /* Architecture and CPU are essentially the same.
3132 Prefer the CPU setting. */
3133 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3134 target->core_name = arm_selected_cpu->common.name;
3135 /* Copy the CPU's capabilities, so that we inherit the
3136 appropriate extensions and quirks. */
3137 bitmap_copy (target->isa, cpu_isa);
3138 }
3139 }
3140 else
3141 {
3142 /* Pick a CPU based on the architecture. */
3143 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3144 target->arch_name = arm_selected_arch->common.name;
3145 /* Note: target->core_name is left unset in this path. */
3146 }
3147 }
3148 else if (arm_selected_cpu)
3149 {
3150 target->core_name = arm_selected_cpu->common.name;
3151 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3152 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3153 cpu_opts);
3154 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3155 }
3156 /* If the user did not specify a processor or architecture, choose
3157 one for them. */
3158 else
3159 {
3160 const cpu_option *sel;
3161 auto_sbitmap sought_isa (isa_num_bits);
3162 bitmap_clear (sought_isa);
3163 auto_sbitmap default_isa (isa_num_bits);
3164
3165 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3166 TARGET_CPU_DEFAULT);
3167 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3168 gcc_assert (arm_selected_cpu->common.name);
3169
3170 /* RWE: All of the selection logic below (to the end of this
3171 'if' clause) looks somewhat suspect. It appears to be mostly
3172 there to support forcing thumb support when the default CPU
3173 does not have thumb (somewhat dubious in terms of what the
3174 user might be expecting). I think it should be removed once
3175 support for the pre-thumb era cores is removed. */
3176 sel = arm_selected_cpu;
3177 arm_initialize_isa (default_isa, sel->common.isa_bits);
3178 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3179 cpu_opts);
3180
3181 /* Now check to see if the user has specified any command line
3182 switches that require certain abilities from the cpu. */
3183
3184 if (TARGET_INTERWORK || TARGET_THUMB)
3185 {
3186 bitmap_set_bit (sought_isa, isa_bit_thumb);
3187 bitmap_set_bit (sought_isa, isa_bit_mode32);
3188
3189 /* There are no ARM processors that support both APCS-26 and
3190 interworking. Therefore we forcibly remove MODE26 from
3191 from the isa features here (if it was set), so that the
3192 search below will always be able to find a compatible
3193 processor. */
3194 bitmap_clear_bit (default_isa, isa_bit_mode26);
3195 }
3196
3197 /* If there are such requirements and the default CPU does not
3198 satisfy them, we need to run over the complete list of
3199 cores looking for one that is satisfactory. */
3200 if (!bitmap_empty_p (sought_isa)
3201 && !bitmap_subset_p (sought_isa, default_isa))
3202 {
3203 auto_sbitmap candidate_isa (isa_num_bits);
3204 /* We're only interested in a CPU with at least the
3205 capabilities of the default CPU and the required
3206 additional features. */
3207 bitmap_ior (default_isa, default_isa, sought_isa);
3208
3209 /* Try to locate a CPU type that supports all of the abilities
3210 of the default CPU, plus the extra abilities requested by
3211 the user. */
3212 for (sel = all_cores; sel->common.name != NULL; sel++)
3213 {
3214 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3215 /* An exact match? */
3216 if (bitmap_equal_p (default_isa, candidate_isa))
3217 break;
3218 }
3219
3220 if (sel->common.name == NULL)
3221 {
3222 unsigned current_bit_count = isa_num_bits;
3223 const cpu_option *best_fit = NULL;
3224
3225 /* Ideally we would like to issue an error message here
3226 saying that it was not possible to find a CPU compatible
3227 with the default CPU, but which also supports the command
3228 line options specified by the programmer, and so they
3229 ought to use the -mcpu=<name> command line option to
3230 override the default CPU type.
3231
3232 If we cannot find a CPU that has exactly the
3233 characteristics of the default CPU and the given
3234 command line options we scan the array again looking
3235 for a best match. The best match must have at least
3236 the capabilities of the perfect match. */
3237 for (sel = all_cores; sel->common.name != NULL; sel++)
3238 {
3239 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3240
3241 if (bitmap_subset_p (default_isa, candidate_isa))
3242 {
3243 unsigned count;
3244
3245 bitmap_and_compl (candidate_isa, candidate_isa,
3246 default_isa);
3247 count = bitmap_popcount (candidate_isa);
3248
3249 if (count < current_bit_count)
3250 {
3251 best_fit = sel;
3252 current_bit_count = count;
3253 }
3254 }
3255
3256 gcc_assert (best_fit);
3257 sel = best_fit;
3258 }
3259 }
3260 arm_selected_cpu = sel;
3261 }
3262
3263 /* Now we know the CPU, we can finally initialize the target
3264 structure. */
3265 target->core_name = arm_selected_cpu->common.name;
3266 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3267 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3268 cpu_opts);
3269 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3270 }
3271
3272 gcc_assert (arm_selected_cpu);
3273 gcc_assert (arm_selected_arch);
3274
3275 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3276 {
3277 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3278 auto_sbitmap fpu_bits (isa_num_bits);
3279
3280 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3281 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3282 bitmap_ior (target->isa, target->isa, fpu_bits);
3283 }
3284
3285 if (!arm_selected_tune)
3286 arm_selected_tune = arm_selected_cpu;
3287 else /* Validate the features passed to -mtune. */
3288 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3289
3290 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3291
3292 /* Finish initializing the target structure. */
3293 target->arch_pp_name = arm_selected_arch->arch;
3294 target->base_arch = arm_selected_arch->base_arch;
3295 target->profile = arm_selected_arch->profile;
3296
3297 target->tune_flags = tune_data->tune_flags;
3298 target->tune = tune_data->tune;
3299 target->tune_core = tune_data->scheduler;
3300 }
3301
3302 /* Fix up any incompatible options that the user has specified. */
3303 static void
3304 arm_option_override (void)
3305 {
3306 static const enum isa_feature fpu_bitlist[]
3307 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3308 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3309 cl_target_option opts;
3310
3311 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3312 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3313
3314 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3315 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3316
3317 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3318
3319 if (!global_options_set.x_arm_fpu_index)
3320 {
3321 bool ok;
3322 int fpu_index;
3323
3324 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3325 CL_TARGET);
3326 gcc_assert (ok);
3327 arm_fpu_index = (enum fpu_type) fpu_index;
3328 }
3329
3330 cl_target_option_save (&opts, &global_options);
3331 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3332 true);
3333
3334 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3335 SUBTARGET_OVERRIDE_OPTIONS;
3336 #endif
3337
3338 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3339 arm_base_arch = arm_active_target.base_arch;
3340
3341 arm_tune = arm_active_target.tune_core;
3342 tune_flags = arm_active_target.tune_flags;
3343 current_tune = arm_active_target.tune;
3344
3345 /* TBD: Dwarf info for apcs frame is not handled yet. */
3346 if (TARGET_APCS_FRAME)
3347 flag_shrink_wrap = false;
3348
3349 /* BPABI targets use linker tricks to allow interworking on cores
3350 without thumb support. */
3351 if (TARGET_INTERWORK
3352 && !TARGET_BPABI
3353 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3354 {
3355 warning (0, "target CPU does not support interworking" );
3356 target_flags &= ~MASK_INTERWORK;
3357 }
3358
3359 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3360 {
3361 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3362 target_flags |= MASK_APCS_FRAME;
3363 }
3364
3365 if (TARGET_POKE_FUNCTION_NAME)
3366 target_flags |= MASK_APCS_FRAME;
3367
3368 if (TARGET_APCS_REENT && flag_pic)
3369 error ("-fpic and -mapcs-reent are incompatible");
3370
3371 if (TARGET_APCS_REENT)
3372 warning (0, "APCS reentrant code not supported. Ignored");
3373
3374 /* Initialize boolean versions of the architectural flags, for use
3375 in the arm.md file. */
3376 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3377 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3378 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3379 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3380 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3381 arm_arch5te = arm_arch5e
3382 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3383 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3384 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3385 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3386 arm_arch6m = arm_arch6 && !arm_arch_notm;
3387 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3388 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3389 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3390 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3391 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3392 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3393 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3394 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3395 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3396 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3397 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3398 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3399 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3400 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3401 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3402 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3403 if (arm_fp16_inst)
3404 {
3405 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3406 error ("selected fp16 options are incompatible");
3407 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3408 }
3409
3410
3411 /* Set up some tuning parameters. */
3412 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3413 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3414 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3415 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3416 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3417 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3418
3419 /* And finally, set up some quirks. */
3420 arm_arch_no_volatile_ce
3421 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3422 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3423 isa_bit_quirk_armv6kz);
3424
3425 /* V5 code we generate is completely interworking capable, so we turn off
3426 TARGET_INTERWORK here to avoid many tests later on. */
3427
3428 /* XXX However, we must pass the right pre-processor defines to CPP
3429 or GLD can get confused. This is a hack. */
3430 if (TARGET_INTERWORK)
3431 arm_cpp_interwork = 1;
3432
3433 if (arm_arch5)
3434 target_flags &= ~MASK_INTERWORK;
3435
3436 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3437 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3438
3439 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3440 error ("iwmmxt abi requires an iwmmxt capable cpu");
3441
3442 /* If soft-float is specified then don't use FPU. */
3443 if (TARGET_SOFT_FLOAT)
3444 arm_fpu_attr = FPU_NONE;
3445 else
3446 arm_fpu_attr = FPU_VFP;
3447
3448 if (TARGET_AAPCS_BASED)
3449 {
3450 if (TARGET_CALLER_INTERWORKING)
3451 error ("AAPCS does not support -mcaller-super-interworking");
3452 else
3453 if (TARGET_CALLEE_INTERWORKING)
3454 error ("AAPCS does not support -mcallee-super-interworking");
3455 }
3456
3457 /* __fp16 support currently assumes the core has ldrh. */
3458 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3459 sorry ("__fp16 and no ldrh");
3460
3461 if (TARGET_AAPCS_BASED)
3462 {
3463 if (arm_abi == ARM_ABI_IWMMXT)
3464 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3465 else if (TARGET_HARD_FLOAT_ABI)
3466 {
3467 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3468 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3469 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3470 }
3471 else
3472 arm_pcs_default = ARM_PCS_AAPCS;
3473 }
3474 else
3475 {
3476 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3477 sorry ("-mfloat-abi=hard and VFP");
3478
3479 if (arm_abi == ARM_ABI_APCS)
3480 arm_pcs_default = ARM_PCS_APCS;
3481 else
3482 arm_pcs_default = ARM_PCS_ATPCS;
3483 }
3484
3485 /* For arm2/3 there is no need to do any scheduling if we are doing
3486 software floating-point. */
3487 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3488 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3489
3490 /* Use the cp15 method if it is available. */
3491 if (target_thread_pointer == TP_AUTO)
3492 {
3493 if (arm_arch6k && !TARGET_THUMB1)
3494 target_thread_pointer = TP_CP15;
3495 else
3496 target_thread_pointer = TP_SOFT;
3497 }
3498
3499 /* Override the default structure alignment for AAPCS ABI. */
3500 if (!global_options_set.x_arm_structure_size_boundary)
3501 {
3502 if (TARGET_AAPCS_BASED)
3503 arm_structure_size_boundary = 8;
3504 }
3505 else
3506 {
3507 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3508
3509 if (arm_structure_size_boundary != 8
3510 && arm_structure_size_boundary != 32
3511 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3512 {
3513 if (ARM_DOUBLEWORD_ALIGN)
3514 warning (0,
3515 "structure size boundary can only be set to 8, 32 or 64");
3516 else
3517 warning (0, "structure size boundary can only be set to 8 or 32");
3518 arm_structure_size_boundary
3519 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3520 }
3521 }
3522
3523 if (TARGET_VXWORKS_RTP)
3524 {
3525 if (!global_options_set.x_arm_pic_data_is_text_relative)
3526 arm_pic_data_is_text_relative = 0;
3527 }
3528 else if (flag_pic
3529 && !arm_pic_data_is_text_relative
3530 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3531 /* When text & data segments don't have a fixed displacement, the
3532 intended use is with a single, read only, pic base register.
3533 Unless the user explicitly requested not to do that, set
3534 it. */
3535 target_flags |= MASK_SINGLE_PIC_BASE;
3536
3537 /* If stack checking is disabled, we can use r10 as the PIC register,
3538 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3539 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3540 {
3541 if (TARGET_VXWORKS_RTP)
3542 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3543 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3544 }
3545
3546 if (flag_pic && TARGET_VXWORKS_RTP)
3547 arm_pic_register = 9;
3548
3549 if (arm_pic_register_string != NULL)
3550 {
3551 int pic_register = decode_reg_name (arm_pic_register_string);
3552
3553 if (!flag_pic)
3554 warning (0, "-mpic-register= is useless without -fpic");
3555
3556 /* Prevent the user from choosing an obviously stupid PIC register. */
3557 else if (pic_register < 0 || call_used_regs[pic_register]
3558 || pic_register == HARD_FRAME_POINTER_REGNUM
3559 || pic_register == STACK_POINTER_REGNUM
3560 || pic_register >= PC_REGNUM
3561 || (TARGET_VXWORKS_RTP
3562 && (unsigned int) pic_register != arm_pic_register))
3563 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3564 else
3565 arm_pic_register = pic_register;
3566 }
3567
3568 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3569 if (fix_cm3_ldrd == 2)
3570 {
3571 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3572 fix_cm3_ldrd = 1;
3573 else
3574 fix_cm3_ldrd = 0;
3575 }
3576
3577 /* Hot/Cold partitioning is not currently supported, since we can't
3578 handle literal pool placement in that case. */
3579 if (flag_reorder_blocks_and_partition)
3580 {
3581 inform (input_location,
3582 "-freorder-blocks-and-partition not supported on this architecture");
3583 flag_reorder_blocks_and_partition = 0;
3584 flag_reorder_blocks = 1;
3585 }
3586
3587 if (flag_pic)
3588 /* Hoisting PIC address calculations more aggressively provides a small,
3589 but measurable, size reduction for PIC code. Therefore, we decrease
3590 the bar for unrestricted expression hoisting to the cost of PIC address
3591 calculation, which is 2 instructions. */
3592 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3593 global_options.x_param_values,
3594 global_options_set.x_param_values);
3595
3596 /* ARM EABI defaults to strict volatile bitfields. */
3597 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3598 && abi_version_at_least(2))
3599 flag_strict_volatile_bitfields = 1;
3600
3601 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3602 have deemed it beneficial (signified by setting
3603 prefetch.num_slots to 1 or more). */
3604 if (flag_prefetch_loop_arrays < 0
3605 && HAVE_prefetch
3606 && optimize >= 3
3607 && current_tune->prefetch.num_slots > 0)
3608 flag_prefetch_loop_arrays = 1;
3609
3610 /* Set up parameters to be used in prefetching algorithm. Do not
3611 override the defaults unless we are tuning for a core we have
3612 researched values for. */
3613 if (current_tune->prefetch.num_slots > 0)
3614 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3615 current_tune->prefetch.num_slots,
3616 global_options.x_param_values,
3617 global_options_set.x_param_values);
3618 if (current_tune->prefetch.l1_cache_line_size >= 0)
3619 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3620 current_tune->prefetch.l1_cache_line_size,
3621 global_options.x_param_values,
3622 global_options_set.x_param_values);
3623 if (current_tune->prefetch.l1_cache_size >= 0)
3624 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3625 current_tune->prefetch.l1_cache_size,
3626 global_options.x_param_values,
3627 global_options_set.x_param_values);
3628
3629 /* Use Neon to perform 64-bits operations rather than core
3630 registers. */
3631 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3632 if (use_neon_for_64bits == 1)
3633 prefer_neon_for_64bits = true;
3634
3635 /* Use the alternative scheduling-pressure algorithm by default. */
3636 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3637 global_options.x_param_values,
3638 global_options_set.x_param_values);
3639
3640 /* Look through ready list and all of queue for instructions
3641 relevant for L2 auto-prefetcher. */
3642 int param_sched_autopref_queue_depth;
3643
3644 switch (current_tune->sched_autopref)
3645 {
3646 case tune_params::SCHED_AUTOPREF_OFF:
3647 param_sched_autopref_queue_depth = -1;
3648 break;
3649
3650 case tune_params::SCHED_AUTOPREF_RANK:
3651 param_sched_autopref_queue_depth = 0;
3652 break;
3653
3654 case tune_params::SCHED_AUTOPREF_FULL:
3655 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3656 break;
3657
3658 default:
3659 gcc_unreachable ();
3660 }
3661
3662 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3663 param_sched_autopref_queue_depth,
3664 global_options.x_param_values,
3665 global_options_set.x_param_values);
3666
3667 /* Currently, for slow flash data, we just disable literal pools. We also
3668 disable it for pure-code. */
3669 if (target_slow_flash_data || target_pure_code)
3670 arm_disable_literal_pool = true;
3671
3672 if (use_cmse && !arm_arch_cmse)
3673 error ("target CPU does not support ARMv8-M Security Extensions");
3674
3675 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3676 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3677 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3678 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3679
3680 /* Disable scheduling fusion by default if it's not armv7 processor
3681 or doesn't prefer ldrd/strd. */
3682 if (flag_schedule_fusion == 2
3683 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3684 flag_schedule_fusion = 0;
3685
3686 /* Need to remember initial options before they are overriden. */
3687 init_optimize = build_optimization_node (&global_options);
3688
3689 arm_option_override_internal (&global_options, &global_options_set);
3690 arm_option_check_internal (&global_options);
3691 arm_option_params_internal ();
3692
3693 /* Create the default target_options structure. */
3694 target_option_default_node = target_option_current_node
3695 = build_target_option_node (&global_options);
3696
3697 /* Register global variables with the garbage collector. */
3698 arm_add_gc_roots ();
3699
3700 /* Init initial mode for testing. */
3701 thumb_flipper = TARGET_THUMB;
3702 }
3703
3704 static void
3705 arm_add_gc_roots (void)
3706 {
3707 gcc_obstack_init(&minipool_obstack);
3708 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3709 }
3710 \f
3711 /* A table of known ARM exception types.
3712 For use with the interrupt function attribute. */
3713
3714 typedef struct
3715 {
3716 const char *const arg;
3717 const unsigned long return_value;
3718 }
3719 isr_attribute_arg;
3720
3721 static const isr_attribute_arg isr_attribute_args [] =
3722 {
3723 { "IRQ", ARM_FT_ISR },
3724 { "irq", ARM_FT_ISR },
3725 { "FIQ", ARM_FT_FIQ },
3726 { "fiq", ARM_FT_FIQ },
3727 { "ABORT", ARM_FT_ISR },
3728 { "abort", ARM_FT_ISR },
3729 { "ABORT", ARM_FT_ISR },
3730 { "abort", ARM_FT_ISR },
3731 { "UNDEF", ARM_FT_EXCEPTION },
3732 { "undef", ARM_FT_EXCEPTION },
3733 { "SWI", ARM_FT_EXCEPTION },
3734 { "swi", ARM_FT_EXCEPTION },
3735 { NULL, ARM_FT_NORMAL }
3736 };
3737
3738 /* Returns the (interrupt) function type of the current
3739 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3740
3741 static unsigned long
3742 arm_isr_value (tree argument)
3743 {
3744 const isr_attribute_arg * ptr;
3745 const char * arg;
3746
3747 if (!arm_arch_notm)
3748 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3749
3750 /* No argument - default to IRQ. */
3751 if (argument == NULL_TREE)
3752 return ARM_FT_ISR;
3753
3754 /* Get the value of the argument. */
3755 if (TREE_VALUE (argument) == NULL_TREE
3756 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3757 return ARM_FT_UNKNOWN;
3758
3759 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3760
3761 /* Check it against the list of known arguments. */
3762 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3763 if (streq (arg, ptr->arg))
3764 return ptr->return_value;
3765
3766 /* An unrecognized interrupt type. */
3767 return ARM_FT_UNKNOWN;
3768 }
3769
3770 /* Computes the type of the current function. */
3771
3772 static unsigned long
3773 arm_compute_func_type (void)
3774 {
3775 unsigned long type = ARM_FT_UNKNOWN;
3776 tree a;
3777 tree attr;
3778
3779 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3780
3781 /* Decide if the current function is volatile. Such functions
3782 never return, and many memory cycles can be saved by not storing
3783 register values that will never be needed again. This optimization
3784 was added to speed up context switching in a kernel application. */
3785 if (optimize > 0
3786 && (TREE_NOTHROW (current_function_decl)
3787 || !(flag_unwind_tables
3788 || (flag_exceptions
3789 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3790 && TREE_THIS_VOLATILE (current_function_decl))
3791 type |= ARM_FT_VOLATILE;
3792
3793 if (cfun->static_chain_decl != NULL)
3794 type |= ARM_FT_NESTED;
3795
3796 attr = DECL_ATTRIBUTES (current_function_decl);
3797
3798 a = lookup_attribute ("naked", attr);
3799 if (a != NULL_TREE)
3800 type |= ARM_FT_NAKED;
3801
3802 a = lookup_attribute ("isr", attr);
3803 if (a == NULL_TREE)
3804 a = lookup_attribute ("interrupt", attr);
3805
3806 if (a == NULL_TREE)
3807 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3808 else
3809 type |= arm_isr_value (TREE_VALUE (a));
3810
3811 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3812 type |= ARM_FT_CMSE_ENTRY;
3813
3814 return type;
3815 }
3816
3817 /* Returns the type of the current function. */
3818
3819 unsigned long
3820 arm_current_func_type (void)
3821 {
3822 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3823 cfun->machine->func_type = arm_compute_func_type ();
3824
3825 return cfun->machine->func_type;
3826 }
3827
3828 bool
3829 arm_allocate_stack_slots_for_args (void)
3830 {
3831 /* Naked functions should not allocate stack slots for arguments. */
3832 return !IS_NAKED (arm_current_func_type ());
3833 }
3834
3835 static bool
3836 arm_warn_func_return (tree decl)
3837 {
3838 /* Naked functions are implemented entirely in assembly, including the
3839 return sequence, so suppress warnings about this. */
3840 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3841 }
3842
3843 \f
3844 /* Output assembler code for a block containing the constant parts
3845 of a trampoline, leaving space for the variable parts.
3846
3847 On the ARM, (if r8 is the static chain regnum, and remembering that
3848 referencing pc adds an offset of 8) the trampoline looks like:
3849 ldr r8, [pc, #0]
3850 ldr pc, [pc]
3851 .word static chain value
3852 .word function's address
3853 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3854
3855 static void
3856 arm_asm_trampoline_template (FILE *f)
3857 {
3858 fprintf (f, "\t.syntax unified\n");
3859
3860 if (TARGET_ARM)
3861 {
3862 fprintf (f, "\t.arm\n");
3863 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3864 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3865 }
3866 else if (TARGET_THUMB2)
3867 {
3868 fprintf (f, "\t.thumb\n");
3869 /* The Thumb-2 trampoline is similar to the arm implementation.
3870 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3871 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3872 STATIC_CHAIN_REGNUM, PC_REGNUM);
3873 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3874 }
3875 else
3876 {
3877 ASM_OUTPUT_ALIGN (f, 2);
3878 fprintf (f, "\t.code\t16\n");
3879 fprintf (f, ".Ltrampoline_start:\n");
3880 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3881 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3882 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3883 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3884 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3885 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3886 }
3887 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3888 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3889 }
3890
3891 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3892
3893 static void
3894 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3895 {
3896 rtx fnaddr, mem, a_tramp;
3897
3898 emit_block_move (m_tramp, assemble_trampoline_template (),
3899 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3900
3901 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3902 emit_move_insn (mem, chain_value);
3903
3904 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3905 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3906 emit_move_insn (mem, fnaddr);
3907
3908 a_tramp = XEXP (m_tramp, 0);
3909 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3910 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3911 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3912 }
3913
3914 /* Thumb trampolines should be entered in thumb mode, so set
3915 the bottom bit of the address. */
3916
3917 static rtx
3918 arm_trampoline_adjust_address (rtx addr)
3919 {
3920 if (TARGET_THUMB)
3921 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3922 NULL, 0, OPTAB_LIB_WIDEN);
3923 return addr;
3924 }
3925 \f
3926 /* Return 1 if it is possible to return using a single instruction.
3927 If SIBLING is non-null, this is a test for a return before a sibling
3928 call. SIBLING is the call insn, so we can examine its register usage. */
3929
3930 int
3931 use_return_insn (int iscond, rtx sibling)
3932 {
3933 int regno;
3934 unsigned int func_type;
3935 unsigned long saved_int_regs;
3936 unsigned HOST_WIDE_INT stack_adjust;
3937 arm_stack_offsets *offsets;
3938
3939 /* Never use a return instruction before reload has run. */
3940 if (!reload_completed)
3941 return 0;
3942
3943 func_type = arm_current_func_type ();
3944
3945 /* Naked, volatile and stack alignment functions need special
3946 consideration. */
3947 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3948 return 0;
3949
3950 /* So do interrupt functions that use the frame pointer and Thumb
3951 interrupt functions. */
3952 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3953 return 0;
3954
3955 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3956 && !optimize_function_for_size_p (cfun))
3957 return 0;
3958
3959 offsets = arm_get_frame_offsets ();
3960 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3961
3962 /* As do variadic functions. */
3963 if (crtl->args.pretend_args_size
3964 || cfun->machine->uses_anonymous_args
3965 /* Or if the function calls __builtin_eh_return () */
3966 || crtl->calls_eh_return
3967 /* Or if the function calls alloca */
3968 || cfun->calls_alloca
3969 /* Or if there is a stack adjustment. However, if the stack pointer
3970 is saved on the stack, we can use a pre-incrementing stack load. */
3971 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3972 && stack_adjust == 4))
3973 /* Or if the static chain register was saved above the frame, under the
3974 assumption that the stack pointer isn't saved on the stack. */
3975 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3976 && arm_compute_static_chain_stack_bytes() != 0))
3977 return 0;
3978
3979 saved_int_regs = offsets->saved_regs_mask;
3980
3981 /* Unfortunately, the insn
3982
3983 ldmib sp, {..., sp, ...}
3984
3985 triggers a bug on most SA-110 based devices, such that the stack
3986 pointer won't be correctly restored if the instruction takes a
3987 page fault. We work around this problem by popping r3 along with
3988 the other registers, since that is never slower than executing
3989 another instruction.
3990
3991 We test for !arm_arch5 here, because code for any architecture
3992 less than this could potentially be run on one of the buggy
3993 chips. */
3994 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3995 {
3996 /* Validate that r3 is a call-clobbered register (always true in
3997 the default abi) ... */
3998 if (!call_used_regs[3])
3999 return 0;
4000
4001 /* ... that it isn't being used for a return value ... */
4002 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4003 return 0;
4004
4005 /* ... or for a tail-call argument ... */
4006 if (sibling)
4007 {
4008 gcc_assert (CALL_P (sibling));
4009
4010 if (find_regno_fusage (sibling, USE, 3))
4011 return 0;
4012 }
4013
4014 /* ... and that there are no call-saved registers in r0-r2
4015 (always true in the default ABI). */
4016 if (saved_int_regs & 0x7)
4017 return 0;
4018 }
4019
4020 /* Can't be done if interworking with Thumb, and any registers have been
4021 stacked. */
4022 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4023 return 0;
4024
4025 /* On StrongARM, conditional returns are expensive if they aren't
4026 taken and multiple registers have been stacked. */
4027 if (iscond && arm_tune_strongarm)
4028 {
4029 /* Conditional return when just the LR is stored is a simple
4030 conditional-load instruction, that's not expensive. */
4031 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4032 return 0;
4033
4034 if (flag_pic
4035 && arm_pic_register != INVALID_REGNUM
4036 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4037 return 0;
4038 }
4039
4040 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4041 several instructions if anything needs to be popped. */
4042 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4043 return 0;
4044
4045 /* If there are saved registers but the LR isn't saved, then we need
4046 two instructions for the return. */
4047 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4048 return 0;
4049
4050 /* Can't be done if any of the VFP regs are pushed,
4051 since this also requires an insn. */
4052 if (TARGET_HARD_FLOAT)
4053 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4054 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4055 return 0;
4056
4057 if (TARGET_REALLY_IWMMXT)
4058 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4059 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4060 return 0;
4061
4062 return 1;
4063 }
4064
4065 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4066 shrink-wrapping if possible. This is the case if we need to emit a
4067 prologue, which we can test by looking at the offsets. */
4068 bool
4069 use_simple_return_p (void)
4070 {
4071 arm_stack_offsets *offsets;
4072
4073 /* Note this function can be called before or after reload. */
4074 if (!reload_completed)
4075 arm_compute_frame_layout ();
4076
4077 offsets = arm_get_frame_offsets ();
4078 return offsets->outgoing_args != 0;
4079 }
4080
4081 /* Return TRUE if int I is a valid immediate ARM constant. */
4082
4083 int
4084 const_ok_for_arm (HOST_WIDE_INT i)
4085 {
4086 int lowbit;
4087
4088 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4089 be all zero, or all one. */
4090 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4091 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4092 != ((~(unsigned HOST_WIDE_INT) 0)
4093 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4094 return FALSE;
4095
4096 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4097
4098 /* Fast return for 0 and small values. We must do this for zero, since
4099 the code below can't handle that one case. */
4100 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4101 return TRUE;
4102
4103 /* Get the number of trailing zeros. */
4104 lowbit = ffs((int) i) - 1;
4105
4106 /* Only even shifts are allowed in ARM mode so round down to the
4107 nearest even number. */
4108 if (TARGET_ARM)
4109 lowbit &= ~1;
4110
4111 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4112 return TRUE;
4113
4114 if (TARGET_ARM)
4115 {
4116 /* Allow rotated constants in ARM mode. */
4117 if (lowbit <= 4
4118 && ((i & ~0xc000003f) == 0
4119 || (i & ~0xf000000f) == 0
4120 || (i & ~0xfc000003) == 0))
4121 return TRUE;
4122 }
4123 else if (TARGET_THUMB2)
4124 {
4125 HOST_WIDE_INT v;
4126
4127 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4128 v = i & 0xff;
4129 v |= v << 16;
4130 if (i == v || i == (v | (v << 8)))
4131 return TRUE;
4132
4133 /* Allow repeated pattern 0xXY00XY00. */
4134 v = i & 0xff00;
4135 v |= v << 16;
4136 if (i == v)
4137 return TRUE;
4138 }
4139 else if (TARGET_HAVE_MOVT)
4140 {
4141 /* Thumb-1 Targets with MOVT. */
4142 if (i > 0xffff)
4143 return FALSE;
4144 else
4145 return TRUE;
4146 }
4147
4148 return FALSE;
4149 }
4150
4151 /* Return true if I is a valid constant for the operation CODE. */
4152 int
4153 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4154 {
4155 if (const_ok_for_arm (i))
4156 return 1;
4157
4158 switch (code)
4159 {
4160 case SET:
4161 /* See if we can use movw. */
4162 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4163 return 1;
4164 else
4165 /* Otherwise, try mvn. */
4166 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4167
4168 case PLUS:
4169 /* See if we can use addw or subw. */
4170 if (TARGET_THUMB2
4171 && ((i & 0xfffff000) == 0
4172 || ((-i) & 0xfffff000) == 0))
4173 return 1;
4174 /* Fall through. */
4175 case COMPARE:
4176 case EQ:
4177 case NE:
4178 case GT:
4179 case LE:
4180 case LT:
4181 case GE:
4182 case GEU:
4183 case LTU:
4184 case GTU:
4185 case LEU:
4186 case UNORDERED:
4187 case ORDERED:
4188 case UNEQ:
4189 case UNGE:
4190 case UNLT:
4191 case UNGT:
4192 case UNLE:
4193 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4194
4195 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4196 case XOR:
4197 return 0;
4198
4199 case IOR:
4200 if (TARGET_THUMB2)
4201 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4202 return 0;
4203
4204 case AND:
4205 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4206
4207 default:
4208 gcc_unreachable ();
4209 }
4210 }
4211
4212 /* Return true if I is a valid di mode constant for the operation CODE. */
4213 int
4214 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4215 {
4216 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4217 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4218 rtx hi = GEN_INT (hi_val);
4219 rtx lo = GEN_INT (lo_val);
4220
4221 if (TARGET_THUMB1)
4222 return 0;
4223
4224 switch (code)
4225 {
4226 case AND:
4227 case IOR:
4228 case XOR:
4229 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4230 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4231 case PLUS:
4232 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4233
4234 default:
4235 return 0;
4236 }
4237 }
4238
4239 /* Emit a sequence of insns to handle a large constant.
4240 CODE is the code of the operation required, it can be any of SET, PLUS,
4241 IOR, AND, XOR, MINUS;
4242 MODE is the mode in which the operation is being performed;
4243 VAL is the integer to operate on;
4244 SOURCE is the other operand (a register, or a null-pointer for SET);
4245 SUBTARGETS means it is safe to create scratch registers if that will
4246 either produce a simpler sequence, or we will want to cse the values.
4247 Return value is the number of insns emitted. */
4248
4249 /* ??? Tweak this for thumb2. */
4250 int
4251 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4252 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4253 {
4254 rtx cond;
4255
4256 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4257 cond = COND_EXEC_TEST (PATTERN (insn));
4258 else
4259 cond = NULL_RTX;
4260
4261 if (subtargets || code == SET
4262 || (REG_P (target) && REG_P (source)
4263 && REGNO (target) != REGNO (source)))
4264 {
4265 /* After arm_reorg has been called, we can't fix up expensive
4266 constants by pushing them into memory so we must synthesize
4267 them in-line, regardless of the cost. This is only likely to
4268 be more costly on chips that have load delay slots and we are
4269 compiling without running the scheduler (so no splitting
4270 occurred before the final instruction emission).
4271
4272 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4273 */
4274 if (!cfun->machine->after_arm_reorg
4275 && !cond
4276 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4277 1, 0)
4278 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4279 + (code != SET))))
4280 {
4281 if (code == SET)
4282 {
4283 /* Currently SET is the only monadic value for CODE, all
4284 the rest are diadic. */
4285 if (TARGET_USE_MOVT)
4286 arm_emit_movpair (target, GEN_INT (val));
4287 else
4288 emit_set_insn (target, GEN_INT (val));
4289
4290 return 1;
4291 }
4292 else
4293 {
4294 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4295
4296 if (TARGET_USE_MOVT)
4297 arm_emit_movpair (temp, GEN_INT (val));
4298 else
4299 emit_set_insn (temp, GEN_INT (val));
4300
4301 /* For MINUS, the value is subtracted from, since we never
4302 have subtraction of a constant. */
4303 if (code == MINUS)
4304 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4305 else
4306 emit_set_insn (target,
4307 gen_rtx_fmt_ee (code, mode, source, temp));
4308 return 2;
4309 }
4310 }
4311 }
4312
4313 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4314 1);
4315 }
4316
4317 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4318 ARM/THUMB2 immediates, and add up to VAL.
4319 Thr function return value gives the number of insns required. */
4320 static int
4321 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4322 struct four_ints *return_sequence)
4323 {
4324 int best_consecutive_zeros = 0;
4325 int i;
4326 int best_start = 0;
4327 int insns1, insns2;
4328 struct four_ints tmp_sequence;
4329
4330 /* If we aren't targeting ARM, the best place to start is always at
4331 the bottom, otherwise look more closely. */
4332 if (TARGET_ARM)
4333 {
4334 for (i = 0; i < 32; i += 2)
4335 {
4336 int consecutive_zeros = 0;
4337
4338 if (!(val & (3 << i)))
4339 {
4340 while ((i < 32) && !(val & (3 << i)))
4341 {
4342 consecutive_zeros += 2;
4343 i += 2;
4344 }
4345 if (consecutive_zeros > best_consecutive_zeros)
4346 {
4347 best_consecutive_zeros = consecutive_zeros;
4348 best_start = i - consecutive_zeros;
4349 }
4350 i -= 2;
4351 }
4352 }
4353 }
4354
4355 /* So long as it won't require any more insns to do so, it's
4356 desirable to emit a small constant (in bits 0...9) in the last
4357 insn. This way there is more chance that it can be combined with
4358 a later addressing insn to form a pre-indexed load or store
4359 operation. Consider:
4360
4361 *((volatile int *)0xe0000100) = 1;
4362 *((volatile int *)0xe0000110) = 2;
4363
4364 We want this to wind up as:
4365
4366 mov rA, #0xe0000000
4367 mov rB, #1
4368 str rB, [rA, #0x100]
4369 mov rB, #2
4370 str rB, [rA, #0x110]
4371
4372 rather than having to synthesize both large constants from scratch.
4373
4374 Therefore, we calculate how many insns would be required to emit
4375 the constant starting from `best_start', and also starting from
4376 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4377 yield a shorter sequence, we may as well use zero. */
4378 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4379 if (best_start != 0
4380 && ((HOST_WIDE_INT_1U << best_start) < val))
4381 {
4382 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4383 if (insns2 <= insns1)
4384 {
4385 *return_sequence = tmp_sequence;
4386 insns1 = insns2;
4387 }
4388 }
4389
4390 return insns1;
4391 }
4392
4393 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4394 static int
4395 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4396 struct four_ints *return_sequence, int i)
4397 {
4398 int remainder = val & 0xffffffff;
4399 int insns = 0;
4400
4401 /* Try and find a way of doing the job in either two or three
4402 instructions.
4403
4404 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4405 location. We start at position I. This may be the MSB, or
4406 optimial_immediate_sequence may have positioned it at the largest block
4407 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4408 wrapping around to the top of the word when we drop off the bottom.
4409 In the worst case this code should produce no more than four insns.
4410
4411 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4412 constants, shifted to any arbitrary location. We should always start
4413 at the MSB. */
4414 do
4415 {
4416 int end;
4417 unsigned int b1, b2, b3, b4;
4418 unsigned HOST_WIDE_INT result;
4419 int loc;
4420
4421 gcc_assert (insns < 4);
4422
4423 if (i <= 0)
4424 i += 32;
4425
4426 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4427 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4428 {
4429 loc = i;
4430 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4431 /* We can use addw/subw for the last 12 bits. */
4432 result = remainder;
4433 else
4434 {
4435 /* Use an 8-bit shifted/rotated immediate. */
4436 end = i - 8;
4437 if (end < 0)
4438 end += 32;
4439 result = remainder & ((0x0ff << end)
4440 | ((i < end) ? (0xff >> (32 - end))
4441 : 0));
4442 i -= 8;
4443 }
4444 }
4445 else
4446 {
4447 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4448 arbitrary shifts. */
4449 i -= TARGET_ARM ? 2 : 1;
4450 continue;
4451 }
4452
4453 /* Next, see if we can do a better job with a thumb2 replicated
4454 constant.
4455
4456 We do it this way around to catch the cases like 0x01F001E0 where
4457 two 8-bit immediates would work, but a replicated constant would
4458 make it worse.
4459
4460 TODO: 16-bit constants that don't clear all the bits, but still win.
4461 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4462 if (TARGET_THUMB2)
4463 {
4464 b1 = (remainder & 0xff000000) >> 24;
4465 b2 = (remainder & 0x00ff0000) >> 16;
4466 b3 = (remainder & 0x0000ff00) >> 8;
4467 b4 = remainder & 0xff;
4468
4469 if (loc > 24)
4470 {
4471 /* The 8-bit immediate already found clears b1 (and maybe b2),
4472 but must leave b3 and b4 alone. */
4473
4474 /* First try to find a 32-bit replicated constant that clears
4475 almost everything. We can assume that we can't do it in one,
4476 or else we wouldn't be here. */
4477 unsigned int tmp = b1 & b2 & b3 & b4;
4478 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4479 + (tmp << 24);
4480 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4481 + (tmp == b3) + (tmp == b4);
4482 if (tmp
4483 && (matching_bytes >= 3
4484 || (matching_bytes == 2
4485 && const_ok_for_op (remainder & ~tmp2, code))))
4486 {
4487 /* At least 3 of the bytes match, and the fourth has at
4488 least as many bits set, or two of the bytes match
4489 and it will only require one more insn to finish. */
4490 result = tmp2;
4491 i = tmp != b1 ? 32
4492 : tmp != b2 ? 24
4493 : tmp != b3 ? 16
4494 : 8;
4495 }
4496
4497 /* Second, try to find a 16-bit replicated constant that can
4498 leave three of the bytes clear. If b2 or b4 is already
4499 zero, then we can. If the 8-bit from above would not
4500 clear b2 anyway, then we still win. */
4501 else if (b1 == b3 && (!b2 || !b4
4502 || (remainder & 0x00ff0000 & ~result)))
4503 {
4504 result = remainder & 0xff00ff00;
4505 i = 24;
4506 }
4507 }
4508 else if (loc > 16)
4509 {
4510 /* The 8-bit immediate already found clears b2 (and maybe b3)
4511 and we don't get here unless b1 is alredy clear, but it will
4512 leave b4 unchanged. */
4513
4514 /* If we can clear b2 and b4 at once, then we win, since the
4515 8-bits couldn't possibly reach that far. */
4516 if (b2 == b4)
4517 {
4518 result = remainder & 0x00ff00ff;
4519 i = 16;
4520 }
4521 }
4522 }
4523
4524 return_sequence->i[insns++] = result;
4525 remainder &= ~result;
4526
4527 if (code == SET || code == MINUS)
4528 code = PLUS;
4529 }
4530 while (remainder);
4531
4532 return insns;
4533 }
4534
4535 /* Emit an instruction with the indicated PATTERN. If COND is
4536 non-NULL, conditionalize the execution of the instruction on COND
4537 being true. */
4538
4539 static void
4540 emit_constant_insn (rtx cond, rtx pattern)
4541 {
4542 if (cond)
4543 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4544 emit_insn (pattern);
4545 }
4546
4547 /* As above, but extra parameter GENERATE which, if clear, suppresses
4548 RTL generation. */
4549
4550 static int
4551 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4552 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4553 int subtargets, int generate)
4554 {
4555 int can_invert = 0;
4556 int can_negate = 0;
4557 int final_invert = 0;
4558 int i;
4559 int set_sign_bit_copies = 0;
4560 int clear_sign_bit_copies = 0;
4561 int clear_zero_bit_copies = 0;
4562 int set_zero_bit_copies = 0;
4563 int insns = 0, neg_insns, inv_insns;
4564 unsigned HOST_WIDE_INT temp1, temp2;
4565 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4566 struct four_ints *immediates;
4567 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4568
4569 /* Find out which operations are safe for a given CODE. Also do a quick
4570 check for degenerate cases; these can occur when DImode operations
4571 are split. */
4572 switch (code)
4573 {
4574 case SET:
4575 can_invert = 1;
4576 break;
4577
4578 case PLUS:
4579 can_negate = 1;
4580 break;
4581
4582 case IOR:
4583 if (remainder == 0xffffffff)
4584 {
4585 if (generate)
4586 emit_constant_insn (cond,
4587 gen_rtx_SET (target,
4588 GEN_INT (ARM_SIGN_EXTEND (val))));
4589 return 1;
4590 }
4591
4592 if (remainder == 0)
4593 {
4594 if (reload_completed && rtx_equal_p (target, source))
4595 return 0;
4596
4597 if (generate)
4598 emit_constant_insn (cond, gen_rtx_SET (target, source));
4599 return 1;
4600 }
4601 break;
4602
4603 case AND:
4604 if (remainder == 0)
4605 {
4606 if (generate)
4607 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4608 return 1;
4609 }
4610 if (remainder == 0xffffffff)
4611 {
4612 if (reload_completed && rtx_equal_p (target, source))
4613 return 0;
4614 if (generate)
4615 emit_constant_insn (cond, gen_rtx_SET (target, source));
4616 return 1;
4617 }
4618 can_invert = 1;
4619 break;
4620
4621 case XOR:
4622 if (remainder == 0)
4623 {
4624 if (reload_completed && rtx_equal_p (target, source))
4625 return 0;
4626 if (generate)
4627 emit_constant_insn (cond, gen_rtx_SET (target, source));
4628 return 1;
4629 }
4630
4631 if (remainder == 0xffffffff)
4632 {
4633 if (generate)
4634 emit_constant_insn (cond,
4635 gen_rtx_SET (target,
4636 gen_rtx_NOT (mode, source)));
4637 return 1;
4638 }
4639 final_invert = 1;
4640 break;
4641
4642 case MINUS:
4643 /* We treat MINUS as (val - source), since (source - val) is always
4644 passed as (source + (-val)). */
4645 if (remainder == 0)
4646 {
4647 if (generate)
4648 emit_constant_insn (cond,
4649 gen_rtx_SET (target,
4650 gen_rtx_NEG (mode, source)));
4651 return 1;
4652 }
4653 if (const_ok_for_arm (val))
4654 {
4655 if (generate)
4656 emit_constant_insn (cond,
4657 gen_rtx_SET (target,
4658 gen_rtx_MINUS (mode, GEN_INT (val),
4659 source)));
4660 return 1;
4661 }
4662
4663 break;
4664
4665 default:
4666 gcc_unreachable ();
4667 }
4668
4669 /* If we can do it in one insn get out quickly. */
4670 if (const_ok_for_op (val, code))
4671 {
4672 if (generate)
4673 emit_constant_insn (cond,
4674 gen_rtx_SET (target,
4675 (source
4676 ? gen_rtx_fmt_ee (code, mode, source,
4677 GEN_INT (val))
4678 : GEN_INT (val))));
4679 return 1;
4680 }
4681
4682 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4683 insn. */
4684 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4685 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4686 {
4687 if (generate)
4688 {
4689 if (mode == SImode && i == 16)
4690 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4691 smaller insn. */
4692 emit_constant_insn (cond,
4693 gen_zero_extendhisi2
4694 (target, gen_lowpart (HImode, source)));
4695 else
4696 /* Extz only supports SImode, but we can coerce the operands
4697 into that mode. */
4698 emit_constant_insn (cond,
4699 gen_extzv_t2 (gen_lowpart (SImode, target),
4700 gen_lowpart (SImode, source),
4701 GEN_INT (i), const0_rtx));
4702 }
4703
4704 return 1;
4705 }
4706
4707 /* Calculate a few attributes that may be useful for specific
4708 optimizations. */
4709 /* Count number of leading zeros. */
4710 for (i = 31; i >= 0; i--)
4711 {
4712 if ((remainder & (1 << i)) == 0)
4713 clear_sign_bit_copies++;
4714 else
4715 break;
4716 }
4717
4718 /* Count number of leading 1's. */
4719 for (i = 31; i >= 0; i--)
4720 {
4721 if ((remainder & (1 << i)) != 0)
4722 set_sign_bit_copies++;
4723 else
4724 break;
4725 }
4726
4727 /* Count number of trailing zero's. */
4728 for (i = 0; i <= 31; i++)
4729 {
4730 if ((remainder & (1 << i)) == 0)
4731 clear_zero_bit_copies++;
4732 else
4733 break;
4734 }
4735
4736 /* Count number of trailing 1's. */
4737 for (i = 0; i <= 31; i++)
4738 {
4739 if ((remainder & (1 << i)) != 0)
4740 set_zero_bit_copies++;
4741 else
4742 break;
4743 }
4744
4745 switch (code)
4746 {
4747 case SET:
4748 /* See if we can do this by sign_extending a constant that is known
4749 to be negative. This is a good, way of doing it, since the shift
4750 may well merge into a subsequent insn. */
4751 if (set_sign_bit_copies > 1)
4752 {
4753 if (const_ok_for_arm
4754 (temp1 = ARM_SIGN_EXTEND (remainder
4755 << (set_sign_bit_copies - 1))))
4756 {
4757 if (generate)
4758 {
4759 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4760 emit_constant_insn (cond,
4761 gen_rtx_SET (new_src, GEN_INT (temp1)));
4762 emit_constant_insn (cond,
4763 gen_ashrsi3 (target, new_src,
4764 GEN_INT (set_sign_bit_copies - 1)));
4765 }
4766 return 2;
4767 }
4768 /* For an inverted constant, we will need to set the low bits,
4769 these will be shifted out of harm's way. */
4770 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4771 if (const_ok_for_arm (~temp1))
4772 {
4773 if (generate)
4774 {
4775 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4776 emit_constant_insn (cond,
4777 gen_rtx_SET (new_src, GEN_INT (temp1)));
4778 emit_constant_insn (cond,
4779 gen_ashrsi3 (target, new_src,
4780 GEN_INT (set_sign_bit_copies - 1)));
4781 }
4782 return 2;
4783 }
4784 }
4785
4786 /* See if we can calculate the value as the difference between two
4787 valid immediates. */
4788 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4789 {
4790 int topshift = clear_sign_bit_copies & ~1;
4791
4792 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4793 & (0xff000000 >> topshift));
4794
4795 /* If temp1 is zero, then that means the 9 most significant
4796 bits of remainder were 1 and we've caused it to overflow.
4797 When topshift is 0 we don't need to do anything since we
4798 can borrow from 'bit 32'. */
4799 if (temp1 == 0 && topshift != 0)
4800 temp1 = 0x80000000 >> (topshift - 1);
4801
4802 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4803
4804 if (const_ok_for_arm (temp2))
4805 {
4806 if (generate)
4807 {
4808 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4809 emit_constant_insn (cond,
4810 gen_rtx_SET (new_src, GEN_INT (temp1)));
4811 emit_constant_insn (cond,
4812 gen_addsi3 (target, new_src,
4813 GEN_INT (-temp2)));
4814 }
4815
4816 return 2;
4817 }
4818 }
4819
4820 /* See if we can generate this by setting the bottom (or the top)
4821 16 bits, and then shifting these into the other half of the
4822 word. We only look for the simplest cases, to do more would cost
4823 too much. Be careful, however, not to generate this when the
4824 alternative would take fewer insns. */
4825 if (val & 0xffff0000)
4826 {
4827 temp1 = remainder & 0xffff0000;
4828 temp2 = remainder & 0x0000ffff;
4829
4830 /* Overlaps outside this range are best done using other methods. */
4831 for (i = 9; i < 24; i++)
4832 {
4833 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4834 && !const_ok_for_arm (temp2))
4835 {
4836 rtx new_src = (subtargets
4837 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4838 : target);
4839 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4840 source, subtargets, generate);
4841 source = new_src;
4842 if (generate)
4843 emit_constant_insn
4844 (cond,
4845 gen_rtx_SET
4846 (target,
4847 gen_rtx_IOR (mode,
4848 gen_rtx_ASHIFT (mode, source,
4849 GEN_INT (i)),
4850 source)));
4851 return insns + 1;
4852 }
4853 }
4854
4855 /* Don't duplicate cases already considered. */
4856 for (i = 17; i < 24; i++)
4857 {
4858 if (((temp1 | (temp1 >> i)) == remainder)
4859 && !const_ok_for_arm (temp1))
4860 {
4861 rtx new_src = (subtargets
4862 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4863 : target);
4864 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4865 source, subtargets, generate);
4866 source = new_src;
4867 if (generate)
4868 emit_constant_insn
4869 (cond,
4870 gen_rtx_SET (target,
4871 gen_rtx_IOR
4872 (mode,
4873 gen_rtx_LSHIFTRT (mode, source,
4874 GEN_INT (i)),
4875 source)));
4876 return insns + 1;
4877 }
4878 }
4879 }
4880 break;
4881
4882 case IOR:
4883 case XOR:
4884 /* If we have IOR or XOR, and the constant can be loaded in a
4885 single instruction, and we can find a temporary to put it in,
4886 then this can be done in two instructions instead of 3-4. */
4887 if (subtargets
4888 /* TARGET can't be NULL if SUBTARGETS is 0 */
4889 || (reload_completed && !reg_mentioned_p (target, source)))
4890 {
4891 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4892 {
4893 if (generate)
4894 {
4895 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4896
4897 emit_constant_insn (cond,
4898 gen_rtx_SET (sub, GEN_INT (val)));
4899 emit_constant_insn (cond,
4900 gen_rtx_SET (target,
4901 gen_rtx_fmt_ee (code, mode,
4902 source, sub)));
4903 }
4904 return 2;
4905 }
4906 }
4907
4908 if (code == XOR)
4909 break;
4910
4911 /* Convert.
4912 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4913 and the remainder 0s for e.g. 0xfff00000)
4914 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4915
4916 This can be done in 2 instructions by using shifts with mov or mvn.
4917 e.g. for
4918 x = x | 0xfff00000;
4919 we generate.
4920 mvn r0, r0, asl #12
4921 mvn r0, r0, lsr #12 */
4922 if (set_sign_bit_copies > 8
4923 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4924 {
4925 if (generate)
4926 {
4927 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4928 rtx shift = GEN_INT (set_sign_bit_copies);
4929
4930 emit_constant_insn
4931 (cond,
4932 gen_rtx_SET (sub,
4933 gen_rtx_NOT (mode,
4934 gen_rtx_ASHIFT (mode,
4935 source,
4936 shift))));
4937 emit_constant_insn
4938 (cond,
4939 gen_rtx_SET (target,
4940 gen_rtx_NOT (mode,
4941 gen_rtx_LSHIFTRT (mode, sub,
4942 shift))));
4943 }
4944 return 2;
4945 }
4946
4947 /* Convert
4948 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4949 to
4950 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4951
4952 For eg. r0 = r0 | 0xfff
4953 mvn r0, r0, lsr #12
4954 mvn r0, r0, asl #12
4955
4956 */
4957 if (set_zero_bit_copies > 8
4958 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4959 {
4960 if (generate)
4961 {
4962 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4963 rtx shift = GEN_INT (set_zero_bit_copies);
4964
4965 emit_constant_insn
4966 (cond,
4967 gen_rtx_SET (sub,
4968 gen_rtx_NOT (mode,
4969 gen_rtx_LSHIFTRT (mode,
4970 source,
4971 shift))));
4972 emit_constant_insn
4973 (cond,
4974 gen_rtx_SET (target,
4975 gen_rtx_NOT (mode,
4976 gen_rtx_ASHIFT (mode, sub,
4977 shift))));
4978 }
4979 return 2;
4980 }
4981
4982 /* This will never be reached for Thumb2 because orn is a valid
4983 instruction. This is for Thumb1 and the ARM 32 bit cases.
4984
4985 x = y | constant (such that ~constant is a valid constant)
4986 Transform this to
4987 x = ~(~y & ~constant).
4988 */
4989 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4990 {
4991 if (generate)
4992 {
4993 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4994 emit_constant_insn (cond,
4995 gen_rtx_SET (sub,
4996 gen_rtx_NOT (mode, source)));
4997 source = sub;
4998 if (subtargets)
4999 sub = gen_reg_rtx (mode);
5000 emit_constant_insn (cond,
5001 gen_rtx_SET (sub,
5002 gen_rtx_AND (mode, source,
5003 GEN_INT (temp1))));
5004 emit_constant_insn (cond,
5005 gen_rtx_SET (target,
5006 gen_rtx_NOT (mode, sub)));
5007 }
5008 return 3;
5009 }
5010 break;
5011
5012 case AND:
5013 /* See if two shifts will do 2 or more insn's worth of work. */
5014 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5015 {
5016 HOST_WIDE_INT shift_mask = ((0xffffffff
5017 << (32 - clear_sign_bit_copies))
5018 & 0xffffffff);
5019
5020 if ((remainder | shift_mask) != 0xffffffff)
5021 {
5022 HOST_WIDE_INT new_val
5023 = ARM_SIGN_EXTEND (remainder | shift_mask);
5024
5025 if (generate)
5026 {
5027 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5028 insns = arm_gen_constant (AND, SImode, cond, new_val,
5029 new_src, source, subtargets, 1);
5030 source = new_src;
5031 }
5032 else
5033 {
5034 rtx targ = subtargets ? NULL_RTX : target;
5035 insns = arm_gen_constant (AND, mode, cond, new_val,
5036 targ, source, subtargets, 0);
5037 }
5038 }
5039
5040 if (generate)
5041 {
5042 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5043 rtx shift = GEN_INT (clear_sign_bit_copies);
5044
5045 emit_insn (gen_ashlsi3 (new_src, source, shift));
5046 emit_insn (gen_lshrsi3 (target, new_src, shift));
5047 }
5048
5049 return insns + 2;
5050 }
5051
5052 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5053 {
5054 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5055
5056 if ((remainder | shift_mask) != 0xffffffff)
5057 {
5058 HOST_WIDE_INT new_val
5059 = ARM_SIGN_EXTEND (remainder | shift_mask);
5060 if (generate)
5061 {
5062 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5063
5064 insns = arm_gen_constant (AND, mode, cond, new_val,
5065 new_src, source, subtargets, 1);
5066 source = new_src;
5067 }
5068 else
5069 {
5070 rtx targ = subtargets ? NULL_RTX : target;
5071
5072 insns = arm_gen_constant (AND, mode, cond, new_val,
5073 targ, source, subtargets, 0);
5074 }
5075 }
5076
5077 if (generate)
5078 {
5079 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5080 rtx shift = GEN_INT (clear_zero_bit_copies);
5081
5082 emit_insn (gen_lshrsi3 (new_src, source, shift));
5083 emit_insn (gen_ashlsi3 (target, new_src, shift));
5084 }
5085
5086 return insns + 2;
5087 }
5088
5089 break;
5090
5091 default:
5092 break;
5093 }
5094
5095 /* Calculate what the instruction sequences would be if we generated it
5096 normally, negated, or inverted. */
5097 if (code == AND)
5098 /* AND cannot be split into multiple insns, so invert and use BIC. */
5099 insns = 99;
5100 else
5101 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5102
5103 if (can_negate)
5104 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5105 &neg_immediates);
5106 else
5107 neg_insns = 99;
5108
5109 if (can_invert || final_invert)
5110 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5111 &inv_immediates);
5112 else
5113 inv_insns = 99;
5114
5115 immediates = &pos_immediates;
5116
5117 /* Is the negated immediate sequence more efficient? */
5118 if (neg_insns < insns && neg_insns <= inv_insns)
5119 {
5120 insns = neg_insns;
5121 immediates = &neg_immediates;
5122 }
5123 else
5124 can_negate = 0;
5125
5126 /* Is the inverted immediate sequence more efficient?
5127 We must allow for an extra NOT instruction for XOR operations, although
5128 there is some chance that the final 'mvn' will get optimized later. */
5129 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5130 {
5131 insns = inv_insns;
5132 immediates = &inv_immediates;
5133 }
5134 else
5135 {
5136 can_invert = 0;
5137 final_invert = 0;
5138 }
5139
5140 /* Now output the chosen sequence as instructions. */
5141 if (generate)
5142 {
5143 for (i = 0; i < insns; i++)
5144 {
5145 rtx new_src, temp1_rtx;
5146
5147 temp1 = immediates->i[i];
5148
5149 if (code == SET || code == MINUS)
5150 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5151 else if ((final_invert || i < (insns - 1)) && subtargets)
5152 new_src = gen_reg_rtx (mode);
5153 else
5154 new_src = target;
5155
5156 if (can_invert)
5157 temp1 = ~temp1;
5158 else if (can_negate)
5159 temp1 = -temp1;
5160
5161 temp1 = trunc_int_for_mode (temp1, mode);
5162 temp1_rtx = GEN_INT (temp1);
5163
5164 if (code == SET)
5165 ;
5166 else if (code == MINUS)
5167 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5168 else
5169 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5170
5171 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5172 source = new_src;
5173
5174 if (code == SET)
5175 {
5176 can_negate = can_invert;
5177 can_invert = 0;
5178 code = PLUS;
5179 }
5180 else if (code == MINUS)
5181 code = PLUS;
5182 }
5183 }
5184
5185 if (final_invert)
5186 {
5187 if (generate)
5188 emit_constant_insn (cond, gen_rtx_SET (target,
5189 gen_rtx_NOT (mode, source)));
5190 insns++;
5191 }
5192
5193 return insns;
5194 }
5195
5196 /* Canonicalize a comparison so that we are more likely to recognize it.
5197 This can be done for a few constant compares, where we can make the
5198 immediate value easier to load. */
5199
5200 static void
5201 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5202 bool op0_preserve_value)
5203 {
5204 machine_mode mode;
5205 unsigned HOST_WIDE_INT i, maxval;
5206
5207 mode = GET_MODE (*op0);
5208 if (mode == VOIDmode)
5209 mode = GET_MODE (*op1);
5210
5211 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5212
5213 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5214 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5215 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5216 for GTU/LEU in Thumb mode. */
5217 if (mode == DImode)
5218 {
5219
5220 if (*code == GT || *code == LE
5221 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5222 {
5223 /* Missing comparison. First try to use an available
5224 comparison. */
5225 if (CONST_INT_P (*op1))
5226 {
5227 i = INTVAL (*op1);
5228 switch (*code)
5229 {
5230 case GT:
5231 case LE:
5232 if (i != maxval
5233 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5234 {
5235 *op1 = GEN_INT (i + 1);
5236 *code = *code == GT ? GE : LT;
5237 return;
5238 }
5239 break;
5240 case GTU:
5241 case LEU:
5242 if (i != ~((unsigned HOST_WIDE_INT) 0)
5243 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5244 {
5245 *op1 = GEN_INT (i + 1);
5246 *code = *code == GTU ? GEU : LTU;
5247 return;
5248 }
5249 break;
5250 default:
5251 gcc_unreachable ();
5252 }
5253 }
5254
5255 /* If that did not work, reverse the condition. */
5256 if (!op0_preserve_value)
5257 {
5258 std::swap (*op0, *op1);
5259 *code = (int)swap_condition ((enum rtx_code)*code);
5260 }
5261 }
5262 return;
5263 }
5264
5265 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5266 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5267 to facilitate possible combining with a cmp into 'ands'. */
5268 if (mode == SImode
5269 && GET_CODE (*op0) == ZERO_EXTEND
5270 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5271 && GET_MODE (XEXP (*op0, 0)) == QImode
5272 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5273 && subreg_lowpart_p (XEXP (*op0, 0))
5274 && *op1 == const0_rtx)
5275 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5276 GEN_INT (255));
5277
5278 /* Comparisons smaller than DImode. Only adjust comparisons against
5279 an out-of-range constant. */
5280 if (!CONST_INT_P (*op1)
5281 || const_ok_for_arm (INTVAL (*op1))
5282 || const_ok_for_arm (- INTVAL (*op1)))
5283 return;
5284
5285 i = INTVAL (*op1);
5286
5287 switch (*code)
5288 {
5289 case EQ:
5290 case NE:
5291 return;
5292
5293 case GT:
5294 case LE:
5295 if (i != maxval
5296 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5297 {
5298 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5299 *code = *code == GT ? GE : LT;
5300 return;
5301 }
5302 break;
5303
5304 case GE:
5305 case LT:
5306 if (i != ~maxval
5307 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5308 {
5309 *op1 = GEN_INT (i - 1);
5310 *code = *code == GE ? GT : LE;
5311 return;
5312 }
5313 break;
5314
5315 case GTU:
5316 case LEU:
5317 if (i != ~((unsigned HOST_WIDE_INT) 0)
5318 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5319 {
5320 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5321 *code = *code == GTU ? GEU : LTU;
5322 return;
5323 }
5324 break;
5325
5326 case GEU:
5327 case LTU:
5328 if (i != 0
5329 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5330 {
5331 *op1 = GEN_INT (i - 1);
5332 *code = *code == GEU ? GTU : LEU;
5333 return;
5334 }
5335 break;
5336
5337 default:
5338 gcc_unreachable ();
5339 }
5340 }
5341
5342
5343 /* Define how to find the value returned by a function. */
5344
5345 static rtx
5346 arm_function_value(const_tree type, const_tree func,
5347 bool outgoing ATTRIBUTE_UNUSED)
5348 {
5349 machine_mode mode;
5350 int unsignedp ATTRIBUTE_UNUSED;
5351 rtx r ATTRIBUTE_UNUSED;
5352
5353 mode = TYPE_MODE (type);
5354
5355 if (TARGET_AAPCS_BASED)
5356 return aapcs_allocate_return_reg (mode, type, func);
5357
5358 /* Promote integer types. */
5359 if (INTEGRAL_TYPE_P (type))
5360 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5361
5362 /* Promotes small structs returned in a register to full-word size
5363 for big-endian AAPCS. */
5364 if (arm_return_in_msb (type))
5365 {
5366 HOST_WIDE_INT size = int_size_in_bytes (type);
5367 if (size % UNITS_PER_WORD != 0)
5368 {
5369 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5370 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5371 }
5372 }
5373
5374 return arm_libcall_value_1 (mode);
5375 }
5376
5377 /* libcall hashtable helpers. */
5378
5379 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5380 {
5381 static inline hashval_t hash (const rtx_def *);
5382 static inline bool equal (const rtx_def *, const rtx_def *);
5383 static inline void remove (rtx_def *);
5384 };
5385
5386 inline bool
5387 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5388 {
5389 return rtx_equal_p (p1, p2);
5390 }
5391
5392 inline hashval_t
5393 libcall_hasher::hash (const rtx_def *p1)
5394 {
5395 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5396 }
5397
5398 typedef hash_table<libcall_hasher> libcall_table_type;
5399
5400 static void
5401 add_libcall (libcall_table_type *htab, rtx libcall)
5402 {
5403 *htab->find_slot (libcall, INSERT) = libcall;
5404 }
5405
5406 static bool
5407 arm_libcall_uses_aapcs_base (const_rtx libcall)
5408 {
5409 static bool init_done = false;
5410 static libcall_table_type *libcall_htab = NULL;
5411
5412 if (!init_done)
5413 {
5414 init_done = true;
5415
5416 libcall_htab = new libcall_table_type (31);
5417 add_libcall (libcall_htab,
5418 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5419 add_libcall (libcall_htab,
5420 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5421 add_libcall (libcall_htab,
5422 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5423 add_libcall (libcall_htab,
5424 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5425
5426 add_libcall (libcall_htab,
5427 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5428 add_libcall (libcall_htab,
5429 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5430 add_libcall (libcall_htab,
5431 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5432 add_libcall (libcall_htab,
5433 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5434
5435 add_libcall (libcall_htab,
5436 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5437 add_libcall (libcall_htab,
5438 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5439 add_libcall (libcall_htab,
5440 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5441 add_libcall (libcall_htab,
5442 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5443 add_libcall (libcall_htab,
5444 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5445 add_libcall (libcall_htab,
5446 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5447 add_libcall (libcall_htab,
5448 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5449 add_libcall (libcall_htab,
5450 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5451
5452 /* Values from double-precision helper functions are returned in core
5453 registers if the selected core only supports single-precision
5454 arithmetic, even if we are using the hard-float ABI. The same is
5455 true for single-precision helpers, but we will never be using the
5456 hard-float ABI on a CPU which doesn't support single-precision
5457 operations in hardware. */
5458 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5459 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5460 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5461 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5462 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5463 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5464 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5465 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5466 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5467 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5468 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5469 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5470 SFmode));
5471 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5472 DFmode));
5473 add_libcall (libcall_htab,
5474 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5475 }
5476
5477 return libcall && libcall_htab->find (libcall) != NULL;
5478 }
5479
5480 static rtx
5481 arm_libcall_value_1 (machine_mode mode)
5482 {
5483 if (TARGET_AAPCS_BASED)
5484 return aapcs_libcall_value (mode);
5485 else if (TARGET_IWMMXT_ABI
5486 && arm_vector_mode_supported_p (mode))
5487 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5488 else
5489 return gen_rtx_REG (mode, ARG_REGISTER (1));
5490 }
5491
5492 /* Define how to find the value returned by a library function
5493 assuming the value has mode MODE. */
5494
5495 static rtx
5496 arm_libcall_value (machine_mode mode, const_rtx libcall)
5497 {
5498 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5499 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5500 {
5501 /* The following libcalls return their result in integer registers,
5502 even though they return a floating point value. */
5503 if (arm_libcall_uses_aapcs_base (libcall))
5504 return gen_rtx_REG (mode, ARG_REGISTER(1));
5505
5506 }
5507
5508 return arm_libcall_value_1 (mode);
5509 }
5510
5511 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5512
5513 static bool
5514 arm_function_value_regno_p (const unsigned int regno)
5515 {
5516 if (regno == ARG_REGISTER (1)
5517 || (TARGET_32BIT
5518 && TARGET_AAPCS_BASED
5519 && TARGET_HARD_FLOAT
5520 && regno == FIRST_VFP_REGNUM)
5521 || (TARGET_IWMMXT_ABI
5522 && regno == FIRST_IWMMXT_REGNUM))
5523 return true;
5524
5525 return false;
5526 }
5527
5528 /* Determine the amount of memory needed to store the possible return
5529 registers of an untyped call. */
5530 int
5531 arm_apply_result_size (void)
5532 {
5533 int size = 16;
5534
5535 if (TARGET_32BIT)
5536 {
5537 if (TARGET_HARD_FLOAT_ABI)
5538 size += 32;
5539 if (TARGET_IWMMXT_ABI)
5540 size += 8;
5541 }
5542
5543 return size;
5544 }
5545
5546 /* Decide whether TYPE should be returned in memory (true)
5547 or in a register (false). FNTYPE is the type of the function making
5548 the call. */
5549 static bool
5550 arm_return_in_memory (const_tree type, const_tree fntype)
5551 {
5552 HOST_WIDE_INT size;
5553
5554 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5555
5556 if (TARGET_AAPCS_BASED)
5557 {
5558 /* Simple, non-aggregate types (ie not including vectors and
5559 complex) are always returned in a register (or registers).
5560 We don't care about which register here, so we can short-cut
5561 some of the detail. */
5562 if (!AGGREGATE_TYPE_P (type)
5563 && TREE_CODE (type) != VECTOR_TYPE
5564 && TREE_CODE (type) != COMPLEX_TYPE)
5565 return false;
5566
5567 /* Any return value that is no larger than one word can be
5568 returned in r0. */
5569 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5570 return false;
5571
5572 /* Check any available co-processors to see if they accept the
5573 type as a register candidate (VFP, for example, can return
5574 some aggregates in consecutive registers). These aren't
5575 available if the call is variadic. */
5576 if (aapcs_select_return_coproc (type, fntype) >= 0)
5577 return false;
5578
5579 /* Vector values should be returned using ARM registers, not
5580 memory (unless they're over 16 bytes, which will break since
5581 we only have four call-clobbered registers to play with). */
5582 if (TREE_CODE (type) == VECTOR_TYPE)
5583 return (size < 0 || size > (4 * UNITS_PER_WORD));
5584
5585 /* The rest go in memory. */
5586 return true;
5587 }
5588
5589 if (TREE_CODE (type) == VECTOR_TYPE)
5590 return (size < 0 || size > (4 * UNITS_PER_WORD));
5591
5592 if (!AGGREGATE_TYPE_P (type) &&
5593 (TREE_CODE (type) != VECTOR_TYPE))
5594 /* All simple types are returned in registers. */
5595 return false;
5596
5597 if (arm_abi != ARM_ABI_APCS)
5598 {
5599 /* ATPCS and later return aggregate types in memory only if they are
5600 larger than a word (or are variable size). */
5601 return (size < 0 || size > UNITS_PER_WORD);
5602 }
5603
5604 /* For the arm-wince targets we choose to be compatible with Microsoft's
5605 ARM and Thumb compilers, which always return aggregates in memory. */
5606 #ifndef ARM_WINCE
5607 /* All structures/unions bigger than one word are returned in memory.
5608 Also catch the case where int_size_in_bytes returns -1. In this case
5609 the aggregate is either huge or of variable size, and in either case
5610 we will want to return it via memory and not in a register. */
5611 if (size < 0 || size > UNITS_PER_WORD)
5612 return true;
5613
5614 if (TREE_CODE (type) == RECORD_TYPE)
5615 {
5616 tree field;
5617
5618 /* For a struct the APCS says that we only return in a register
5619 if the type is 'integer like' and every addressable element
5620 has an offset of zero. For practical purposes this means
5621 that the structure can have at most one non bit-field element
5622 and that this element must be the first one in the structure. */
5623
5624 /* Find the first field, ignoring non FIELD_DECL things which will
5625 have been created by C++. */
5626 for (field = TYPE_FIELDS (type);
5627 field && TREE_CODE (field) != FIELD_DECL;
5628 field = DECL_CHAIN (field))
5629 continue;
5630
5631 if (field == NULL)
5632 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5633
5634 /* Check that the first field is valid for returning in a register. */
5635
5636 /* ... Floats are not allowed */
5637 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5638 return true;
5639
5640 /* ... Aggregates that are not themselves valid for returning in
5641 a register are not allowed. */
5642 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5643 return true;
5644
5645 /* Now check the remaining fields, if any. Only bitfields are allowed,
5646 since they are not addressable. */
5647 for (field = DECL_CHAIN (field);
5648 field;
5649 field = DECL_CHAIN (field))
5650 {
5651 if (TREE_CODE (field) != FIELD_DECL)
5652 continue;
5653
5654 if (!DECL_BIT_FIELD_TYPE (field))
5655 return true;
5656 }
5657
5658 return false;
5659 }
5660
5661 if (TREE_CODE (type) == UNION_TYPE)
5662 {
5663 tree field;
5664
5665 /* Unions can be returned in registers if every element is
5666 integral, or can be returned in an integer register. */
5667 for (field = TYPE_FIELDS (type);
5668 field;
5669 field = DECL_CHAIN (field))
5670 {
5671 if (TREE_CODE (field) != FIELD_DECL)
5672 continue;
5673
5674 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5675 return true;
5676
5677 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5678 return true;
5679 }
5680
5681 return false;
5682 }
5683 #endif /* not ARM_WINCE */
5684
5685 /* Return all other types in memory. */
5686 return true;
5687 }
5688
5689 const struct pcs_attribute_arg
5690 {
5691 const char *arg;
5692 enum arm_pcs value;
5693 } pcs_attribute_args[] =
5694 {
5695 {"aapcs", ARM_PCS_AAPCS},
5696 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5697 #if 0
5698 /* We could recognize these, but changes would be needed elsewhere
5699 * to implement them. */
5700 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5701 {"atpcs", ARM_PCS_ATPCS},
5702 {"apcs", ARM_PCS_APCS},
5703 #endif
5704 {NULL, ARM_PCS_UNKNOWN}
5705 };
5706
5707 static enum arm_pcs
5708 arm_pcs_from_attribute (tree attr)
5709 {
5710 const struct pcs_attribute_arg *ptr;
5711 const char *arg;
5712
5713 /* Get the value of the argument. */
5714 if (TREE_VALUE (attr) == NULL_TREE
5715 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5716 return ARM_PCS_UNKNOWN;
5717
5718 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5719
5720 /* Check it against the list of known arguments. */
5721 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5722 if (streq (arg, ptr->arg))
5723 return ptr->value;
5724
5725 /* An unrecognized interrupt type. */
5726 return ARM_PCS_UNKNOWN;
5727 }
5728
5729 /* Get the PCS variant to use for this call. TYPE is the function's type
5730 specification, DECL is the specific declartion. DECL may be null if
5731 the call could be indirect or if this is a library call. */
5732 static enum arm_pcs
5733 arm_get_pcs_model (const_tree type, const_tree decl)
5734 {
5735 bool user_convention = false;
5736 enum arm_pcs user_pcs = arm_pcs_default;
5737 tree attr;
5738
5739 gcc_assert (type);
5740
5741 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5742 if (attr)
5743 {
5744 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5745 user_convention = true;
5746 }
5747
5748 if (TARGET_AAPCS_BASED)
5749 {
5750 /* Detect varargs functions. These always use the base rules
5751 (no argument is ever a candidate for a co-processor
5752 register). */
5753 bool base_rules = stdarg_p (type);
5754
5755 if (user_convention)
5756 {
5757 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5758 sorry ("non-AAPCS derived PCS variant");
5759 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5760 error ("variadic functions must use the base AAPCS variant");
5761 }
5762
5763 if (base_rules)
5764 return ARM_PCS_AAPCS;
5765 else if (user_convention)
5766 return user_pcs;
5767 else if (decl && flag_unit_at_a_time)
5768 {
5769 /* Local functions never leak outside this compilation unit,
5770 so we are free to use whatever conventions are
5771 appropriate. */
5772 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5773 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5774 if (i && i->local)
5775 return ARM_PCS_AAPCS_LOCAL;
5776 }
5777 }
5778 else if (user_convention && user_pcs != arm_pcs_default)
5779 sorry ("PCS variant");
5780
5781 /* For everything else we use the target's default. */
5782 return arm_pcs_default;
5783 }
5784
5785
5786 static void
5787 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5788 const_tree fntype ATTRIBUTE_UNUSED,
5789 rtx libcall ATTRIBUTE_UNUSED,
5790 const_tree fndecl ATTRIBUTE_UNUSED)
5791 {
5792 /* Record the unallocated VFP registers. */
5793 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5794 pcum->aapcs_vfp_reg_alloc = 0;
5795 }
5796
5797 /* Walk down the type tree of TYPE counting consecutive base elements.
5798 If *MODEP is VOIDmode, then set it to the first valid floating point
5799 type. If a non-floating point type is found, or if a floating point
5800 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5801 otherwise return the count in the sub-tree. */
5802 static int
5803 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5804 {
5805 machine_mode mode;
5806 HOST_WIDE_INT size;
5807
5808 switch (TREE_CODE (type))
5809 {
5810 case REAL_TYPE:
5811 mode = TYPE_MODE (type);
5812 if (mode != DFmode && mode != SFmode && mode != HFmode)
5813 return -1;
5814
5815 if (*modep == VOIDmode)
5816 *modep = mode;
5817
5818 if (*modep == mode)
5819 return 1;
5820
5821 break;
5822
5823 case COMPLEX_TYPE:
5824 mode = TYPE_MODE (TREE_TYPE (type));
5825 if (mode != DFmode && mode != SFmode)
5826 return -1;
5827
5828 if (*modep == VOIDmode)
5829 *modep = mode;
5830
5831 if (*modep == mode)
5832 return 2;
5833
5834 break;
5835
5836 case VECTOR_TYPE:
5837 /* Use V2SImode and V4SImode as representatives of all 64-bit
5838 and 128-bit vector types, whether or not those modes are
5839 supported with the present options. */
5840 size = int_size_in_bytes (type);
5841 switch (size)
5842 {
5843 case 8:
5844 mode = V2SImode;
5845 break;
5846 case 16:
5847 mode = V4SImode;
5848 break;
5849 default:
5850 return -1;
5851 }
5852
5853 if (*modep == VOIDmode)
5854 *modep = mode;
5855
5856 /* Vector modes are considered to be opaque: two vectors are
5857 equivalent for the purposes of being homogeneous aggregates
5858 if they are the same size. */
5859 if (*modep == mode)
5860 return 1;
5861
5862 break;
5863
5864 case ARRAY_TYPE:
5865 {
5866 int count;
5867 tree index = TYPE_DOMAIN (type);
5868
5869 /* Can't handle incomplete types nor sizes that are not
5870 fixed. */
5871 if (!COMPLETE_TYPE_P (type)
5872 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5873 return -1;
5874
5875 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5876 if (count == -1
5877 || !index
5878 || !TYPE_MAX_VALUE (index)
5879 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5880 || !TYPE_MIN_VALUE (index)
5881 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5882 || count < 0)
5883 return -1;
5884
5885 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5886 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5887
5888 /* There must be no padding. */
5889 if (wi::to_wide (TYPE_SIZE (type))
5890 != count * GET_MODE_BITSIZE (*modep))
5891 return -1;
5892
5893 return count;
5894 }
5895
5896 case RECORD_TYPE:
5897 {
5898 int count = 0;
5899 int sub_count;
5900 tree field;
5901
5902 /* Can't handle incomplete types nor sizes that are not
5903 fixed. */
5904 if (!COMPLETE_TYPE_P (type)
5905 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5906 return -1;
5907
5908 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5909 {
5910 if (TREE_CODE (field) != FIELD_DECL)
5911 continue;
5912
5913 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5914 if (sub_count < 0)
5915 return -1;
5916 count += sub_count;
5917 }
5918
5919 /* There must be no padding. */
5920 if (wi::to_wide (TYPE_SIZE (type))
5921 != count * GET_MODE_BITSIZE (*modep))
5922 return -1;
5923
5924 return count;
5925 }
5926
5927 case UNION_TYPE:
5928 case QUAL_UNION_TYPE:
5929 {
5930 /* These aren't very interesting except in a degenerate case. */
5931 int count = 0;
5932 int sub_count;
5933 tree field;
5934
5935 /* Can't handle incomplete types nor sizes that are not
5936 fixed. */
5937 if (!COMPLETE_TYPE_P (type)
5938 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5939 return -1;
5940
5941 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5942 {
5943 if (TREE_CODE (field) != FIELD_DECL)
5944 continue;
5945
5946 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5947 if (sub_count < 0)
5948 return -1;
5949 count = count > sub_count ? count : sub_count;
5950 }
5951
5952 /* There must be no padding. */
5953 if (wi::to_wide (TYPE_SIZE (type))
5954 != count * GET_MODE_BITSIZE (*modep))
5955 return -1;
5956
5957 return count;
5958 }
5959
5960 default:
5961 break;
5962 }
5963
5964 return -1;
5965 }
5966
5967 /* Return true if PCS_VARIANT should use VFP registers. */
5968 static bool
5969 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5970 {
5971 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5972 {
5973 static bool seen_thumb1_vfp = false;
5974
5975 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5976 {
5977 sorry ("Thumb-1 hard-float VFP ABI");
5978 /* sorry() is not immediately fatal, so only display this once. */
5979 seen_thumb1_vfp = true;
5980 }
5981
5982 return true;
5983 }
5984
5985 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5986 return false;
5987
5988 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5989 (TARGET_VFP_DOUBLE || !is_double));
5990 }
5991
5992 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5993 suitable for passing or returning in VFP registers for the PCS
5994 variant selected. If it is, then *BASE_MODE is updated to contain
5995 a machine mode describing each element of the argument's type and
5996 *COUNT to hold the number of such elements. */
5997 static bool
5998 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5999 machine_mode mode, const_tree type,
6000 machine_mode *base_mode, int *count)
6001 {
6002 machine_mode new_mode = VOIDmode;
6003
6004 /* If we have the type information, prefer that to working things
6005 out from the mode. */
6006 if (type)
6007 {
6008 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6009
6010 if (ag_count > 0 && ag_count <= 4)
6011 *count = ag_count;
6012 else
6013 return false;
6014 }
6015 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6016 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6017 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6018 {
6019 *count = 1;
6020 new_mode = mode;
6021 }
6022 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6023 {
6024 *count = 2;
6025 new_mode = (mode == DCmode ? DFmode : SFmode);
6026 }
6027 else
6028 return false;
6029
6030
6031 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6032 return false;
6033
6034 *base_mode = new_mode;
6035 return true;
6036 }
6037
6038 static bool
6039 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6040 machine_mode mode, const_tree type)
6041 {
6042 int count ATTRIBUTE_UNUSED;
6043 machine_mode ag_mode ATTRIBUTE_UNUSED;
6044
6045 if (!use_vfp_abi (pcs_variant, false))
6046 return false;
6047 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6048 &ag_mode, &count);
6049 }
6050
6051 static bool
6052 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6053 const_tree type)
6054 {
6055 if (!use_vfp_abi (pcum->pcs_variant, false))
6056 return false;
6057
6058 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6059 &pcum->aapcs_vfp_rmode,
6060 &pcum->aapcs_vfp_rcount);
6061 }
6062
6063 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6064 for the behaviour of this function. */
6065
6066 static bool
6067 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6068 const_tree type ATTRIBUTE_UNUSED)
6069 {
6070 int rmode_size
6071 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6072 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6073 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6074 int regno;
6075
6076 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6077 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6078 {
6079 pcum->aapcs_vfp_reg_alloc = mask << regno;
6080 if (mode == BLKmode
6081 || (mode == TImode && ! TARGET_NEON)
6082 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6083 {
6084 int i;
6085 int rcount = pcum->aapcs_vfp_rcount;
6086 int rshift = shift;
6087 machine_mode rmode = pcum->aapcs_vfp_rmode;
6088 rtx par;
6089 if (!TARGET_NEON)
6090 {
6091 /* Avoid using unsupported vector modes. */
6092 if (rmode == V2SImode)
6093 rmode = DImode;
6094 else if (rmode == V4SImode)
6095 {
6096 rmode = DImode;
6097 rcount *= 2;
6098 rshift /= 2;
6099 }
6100 }
6101 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6102 for (i = 0; i < rcount; i++)
6103 {
6104 rtx tmp = gen_rtx_REG (rmode,
6105 FIRST_VFP_REGNUM + regno + i * rshift);
6106 tmp = gen_rtx_EXPR_LIST
6107 (VOIDmode, tmp,
6108 GEN_INT (i * GET_MODE_SIZE (rmode)));
6109 XVECEXP (par, 0, i) = tmp;
6110 }
6111
6112 pcum->aapcs_reg = par;
6113 }
6114 else
6115 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6116 return true;
6117 }
6118 return false;
6119 }
6120
6121 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6122 comment there for the behaviour of this function. */
6123
6124 static rtx
6125 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6126 machine_mode mode,
6127 const_tree type ATTRIBUTE_UNUSED)
6128 {
6129 if (!use_vfp_abi (pcs_variant, false))
6130 return NULL;
6131
6132 if (mode == BLKmode
6133 || (GET_MODE_CLASS (mode) == MODE_INT
6134 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6135 && !TARGET_NEON))
6136 {
6137 int count;
6138 machine_mode ag_mode;
6139 int i;
6140 rtx par;
6141 int shift;
6142
6143 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6144 &ag_mode, &count);
6145
6146 if (!TARGET_NEON)
6147 {
6148 if (ag_mode == V2SImode)
6149 ag_mode = DImode;
6150 else if (ag_mode == V4SImode)
6151 {
6152 ag_mode = DImode;
6153 count *= 2;
6154 }
6155 }
6156 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6157 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6158 for (i = 0; i < count; i++)
6159 {
6160 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6161 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6162 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6163 XVECEXP (par, 0, i) = tmp;
6164 }
6165
6166 return par;
6167 }
6168
6169 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6170 }
6171
6172 static void
6173 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6174 machine_mode mode ATTRIBUTE_UNUSED,
6175 const_tree type ATTRIBUTE_UNUSED)
6176 {
6177 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6178 pcum->aapcs_vfp_reg_alloc = 0;
6179 return;
6180 }
6181
6182 #define AAPCS_CP(X) \
6183 { \
6184 aapcs_ ## X ## _cum_init, \
6185 aapcs_ ## X ## _is_call_candidate, \
6186 aapcs_ ## X ## _allocate, \
6187 aapcs_ ## X ## _is_return_candidate, \
6188 aapcs_ ## X ## _allocate_return_reg, \
6189 aapcs_ ## X ## _advance \
6190 }
6191
6192 /* Table of co-processors that can be used to pass arguments in
6193 registers. Idealy no arugment should be a candidate for more than
6194 one co-processor table entry, but the table is processed in order
6195 and stops after the first match. If that entry then fails to put
6196 the argument into a co-processor register, the argument will go on
6197 the stack. */
6198 static struct
6199 {
6200 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6201 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6202
6203 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6204 BLKmode) is a candidate for this co-processor's registers; this
6205 function should ignore any position-dependent state in
6206 CUMULATIVE_ARGS and only use call-type dependent information. */
6207 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6208
6209 /* Return true if the argument does get a co-processor register; it
6210 should set aapcs_reg to an RTX of the register allocated as is
6211 required for a return from FUNCTION_ARG. */
6212 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6213
6214 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6215 be returned in this co-processor's registers. */
6216 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6217
6218 /* Allocate and return an RTX element to hold the return type of a call. This
6219 routine must not fail and will only be called if is_return_candidate
6220 returned true with the same parameters. */
6221 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6222
6223 /* Finish processing this argument and prepare to start processing
6224 the next one. */
6225 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6226 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6227 {
6228 AAPCS_CP(vfp)
6229 };
6230
6231 #undef AAPCS_CP
6232
6233 static int
6234 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6235 const_tree type)
6236 {
6237 int i;
6238
6239 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6240 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6241 return i;
6242
6243 return -1;
6244 }
6245
6246 static int
6247 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6248 {
6249 /* We aren't passed a decl, so we can't check that a call is local.
6250 However, it isn't clear that that would be a win anyway, since it
6251 might limit some tail-calling opportunities. */
6252 enum arm_pcs pcs_variant;
6253
6254 if (fntype)
6255 {
6256 const_tree fndecl = NULL_TREE;
6257
6258 if (TREE_CODE (fntype) == FUNCTION_DECL)
6259 {
6260 fndecl = fntype;
6261 fntype = TREE_TYPE (fntype);
6262 }
6263
6264 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6265 }
6266 else
6267 pcs_variant = arm_pcs_default;
6268
6269 if (pcs_variant != ARM_PCS_AAPCS)
6270 {
6271 int i;
6272
6273 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6274 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6275 TYPE_MODE (type),
6276 type))
6277 return i;
6278 }
6279 return -1;
6280 }
6281
6282 static rtx
6283 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6284 const_tree fntype)
6285 {
6286 /* We aren't passed a decl, so we can't check that a call is local.
6287 However, it isn't clear that that would be a win anyway, since it
6288 might limit some tail-calling opportunities. */
6289 enum arm_pcs pcs_variant;
6290 int unsignedp ATTRIBUTE_UNUSED;
6291
6292 if (fntype)
6293 {
6294 const_tree fndecl = NULL_TREE;
6295
6296 if (TREE_CODE (fntype) == FUNCTION_DECL)
6297 {
6298 fndecl = fntype;
6299 fntype = TREE_TYPE (fntype);
6300 }
6301
6302 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6303 }
6304 else
6305 pcs_variant = arm_pcs_default;
6306
6307 /* Promote integer types. */
6308 if (type && INTEGRAL_TYPE_P (type))
6309 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6310
6311 if (pcs_variant != ARM_PCS_AAPCS)
6312 {
6313 int i;
6314
6315 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6316 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6317 type))
6318 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6319 mode, type);
6320 }
6321
6322 /* Promotes small structs returned in a register to full-word size
6323 for big-endian AAPCS. */
6324 if (type && arm_return_in_msb (type))
6325 {
6326 HOST_WIDE_INT size = int_size_in_bytes (type);
6327 if (size % UNITS_PER_WORD != 0)
6328 {
6329 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6330 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6331 }
6332 }
6333
6334 return gen_rtx_REG (mode, R0_REGNUM);
6335 }
6336
6337 static rtx
6338 aapcs_libcall_value (machine_mode mode)
6339 {
6340 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6341 && GET_MODE_SIZE (mode) <= 4)
6342 mode = SImode;
6343
6344 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6345 }
6346
6347 /* Lay out a function argument using the AAPCS rules. The rule
6348 numbers referred to here are those in the AAPCS. */
6349 static void
6350 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6351 const_tree type, bool named)
6352 {
6353 int nregs, nregs2;
6354 int ncrn;
6355
6356 /* We only need to do this once per argument. */
6357 if (pcum->aapcs_arg_processed)
6358 return;
6359
6360 pcum->aapcs_arg_processed = true;
6361
6362 /* Special case: if named is false then we are handling an incoming
6363 anonymous argument which is on the stack. */
6364 if (!named)
6365 return;
6366
6367 /* Is this a potential co-processor register candidate? */
6368 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6369 {
6370 int slot = aapcs_select_call_coproc (pcum, mode, type);
6371 pcum->aapcs_cprc_slot = slot;
6372
6373 /* We don't have to apply any of the rules from part B of the
6374 preparation phase, these are handled elsewhere in the
6375 compiler. */
6376
6377 if (slot >= 0)
6378 {
6379 /* A Co-processor register candidate goes either in its own
6380 class of registers or on the stack. */
6381 if (!pcum->aapcs_cprc_failed[slot])
6382 {
6383 /* C1.cp - Try to allocate the argument to co-processor
6384 registers. */
6385 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6386 return;
6387
6388 /* C2.cp - Put the argument on the stack and note that we
6389 can't assign any more candidates in this slot. We also
6390 need to note that we have allocated stack space, so that
6391 we won't later try to split a non-cprc candidate between
6392 core registers and the stack. */
6393 pcum->aapcs_cprc_failed[slot] = true;
6394 pcum->can_split = false;
6395 }
6396
6397 /* We didn't get a register, so this argument goes on the
6398 stack. */
6399 gcc_assert (pcum->can_split == false);
6400 return;
6401 }
6402 }
6403
6404 /* C3 - For double-word aligned arguments, round the NCRN up to the
6405 next even number. */
6406 ncrn = pcum->aapcs_ncrn;
6407 if (ncrn & 1)
6408 {
6409 int res = arm_needs_doubleword_align (mode, type);
6410 /* Only warn during RTL expansion of call stmts, otherwise we would
6411 warn e.g. during gimplification even on functions that will be
6412 always inlined, and we'd warn multiple times. Don't warn when
6413 called in expand_function_start either, as we warn instead in
6414 arm_function_arg_boundary in that case. */
6415 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6416 inform (input_location, "parameter passing for argument of type "
6417 "%qT changed in GCC 7.1", type);
6418 else if (res > 0)
6419 ncrn++;
6420 }
6421
6422 nregs = ARM_NUM_REGS2(mode, type);
6423
6424 /* Sigh, this test should really assert that nregs > 0, but a GCC
6425 extension allows empty structs and then gives them empty size; it
6426 then allows such a structure to be passed by value. For some of
6427 the code below we have to pretend that such an argument has
6428 non-zero size so that we 'locate' it correctly either in
6429 registers or on the stack. */
6430 gcc_assert (nregs >= 0);
6431
6432 nregs2 = nregs ? nregs : 1;
6433
6434 /* C4 - Argument fits entirely in core registers. */
6435 if (ncrn + nregs2 <= NUM_ARG_REGS)
6436 {
6437 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6438 pcum->aapcs_next_ncrn = ncrn + nregs;
6439 return;
6440 }
6441
6442 /* C5 - Some core registers left and there are no arguments already
6443 on the stack: split this argument between the remaining core
6444 registers and the stack. */
6445 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6446 {
6447 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6448 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6449 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6450 return;
6451 }
6452
6453 /* C6 - NCRN is set to 4. */
6454 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6455
6456 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6457 return;
6458 }
6459
6460 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6461 for a call to a function whose data type is FNTYPE.
6462 For a library call, FNTYPE is NULL. */
6463 void
6464 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6465 rtx libname,
6466 tree fndecl ATTRIBUTE_UNUSED)
6467 {
6468 /* Long call handling. */
6469 if (fntype)
6470 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6471 else
6472 pcum->pcs_variant = arm_pcs_default;
6473
6474 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6475 {
6476 if (arm_libcall_uses_aapcs_base (libname))
6477 pcum->pcs_variant = ARM_PCS_AAPCS;
6478
6479 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6480 pcum->aapcs_reg = NULL_RTX;
6481 pcum->aapcs_partial = 0;
6482 pcum->aapcs_arg_processed = false;
6483 pcum->aapcs_cprc_slot = -1;
6484 pcum->can_split = true;
6485
6486 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6487 {
6488 int i;
6489
6490 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6491 {
6492 pcum->aapcs_cprc_failed[i] = false;
6493 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6494 }
6495 }
6496 return;
6497 }
6498
6499 /* Legacy ABIs */
6500
6501 /* On the ARM, the offset starts at 0. */
6502 pcum->nregs = 0;
6503 pcum->iwmmxt_nregs = 0;
6504 pcum->can_split = true;
6505
6506 /* Varargs vectors are treated the same as long long.
6507 named_count avoids having to change the way arm handles 'named' */
6508 pcum->named_count = 0;
6509 pcum->nargs = 0;
6510
6511 if (TARGET_REALLY_IWMMXT && fntype)
6512 {
6513 tree fn_arg;
6514
6515 for (fn_arg = TYPE_ARG_TYPES (fntype);
6516 fn_arg;
6517 fn_arg = TREE_CHAIN (fn_arg))
6518 pcum->named_count += 1;
6519
6520 if (! pcum->named_count)
6521 pcum->named_count = INT_MAX;
6522 }
6523 }
6524
6525 /* Return 1 if double word alignment is required for argument passing.
6526 Return -1 if double word alignment used to be required for argument
6527 passing before PR77728 ABI fix, but is not required anymore.
6528 Return 0 if double word alignment is not required and wasn't requried
6529 before either. */
6530 static int
6531 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6532 {
6533 if (!type)
6534 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6535
6536 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6537 if (!AGGREGATE_TYPE_P (type))
6538 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6539
6540 /* Array types: Use member alignment of element type. */
6541 if (TREE_CODE (type) == ARRAY_TYPE)
6542 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6543
6544 int ret = 0;
6545 /* Record/aggregate types: Use greatest member alignment of any member. */
6546 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6547 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6548 {
6549 if (TREE_CODE (field) == FIELD_DECL)
6550 return 1;
6551 else
6552 /* Before PR77728 fix, we were incorrectly considering also
6553 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6554 Make sure we can warn about that with -Wpsabi. */
6555 ret = -1;
6556 }
6557
6558 return ret;
6559 }
6560
6561
6562 /* Determine where to put an argument to a function.
6563 Value is zero to push the argument on the stack,
6564 or a hard register in which to store the argument.
6565
6566 MODE is the argument's machine mode.
6567 TYPE is the data type of the argument (as a tree).
6568 This is null for libcalls where that information may
6569 not be available.
6570 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6571 the preceding args and about the function being called.
6572 NAMED is nonzero if this argument is a named parameter
6573 (otherwise it is an extra parameter matching an ellipsis).
6574
6575 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6576 other arguments are passed on the stack. If (NAMED == 0) (which happens
6577 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6578 defined), say it is passed in the stack (function_prologue will
6579 indeed make it pass in the stack if necessary). */
6580
6581 static rtx
6582 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6583 const_tree type, bool named)
6584 {
6585 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6586 int nregs;
6587
6588 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6589 a call insn (op3 of a call_value insn). */
6590 if (mode == VOIDmode)
6591 return const0_rtx;
6592
6593 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6594 {
6595 aapcs_layout_arg (pcum, mode, type, named);
6596 return pcum->aapcs_reg;
6597 }
6598
6599 /* Varargs vectors are treated the same as long long.
6600 named_count avoids having to change the way arm handles 'named' */
6601 if (TARGET_IWMMXT_ABI
6602 && arm_vector_mode_supported_p (mode)
6603 && pcum->named_count > pcum->nargs + 1)
6604 {
6605 if (pcum->iwmmxt_nregs <= 9)
6606 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6607 else
6608 {
6609 pcum->can_split = false;
6610 return NULL_RTX;
6611 }
6612 }
6613
6614 /* Put doubleword aligned quantities in even register pairs. */
6615 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6616 {
6617 int res = arm_needs_doubleword_align (mode, type);
6618 if (res < 0 && warn_psabi)
6619 inform (input_location, "parameter passing for argument of type "
6620 "%qT changed in GCC 7.1", type);
6621 else if (res > 0)
6622 pcum->nregs++;
6623 }
6624
6625 /* Only allow splitting an arg between regs and memory if all preceding
6626 args were allocated to regs. For args passed by reference we only count
6627 the reference pointer. */
6628 if (pcum->can_split)
6629 nregs = 1;
6630 else
6631 nregs = ARM_NUM_REGS2 (mode, type);
6632
6633 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6634 return NULL_RTX;
6635
6636 return gen_rtx_REG (mode, pcum->nregs);
6637 }
6638
6639 static unsigned int
6640 arm_function_arg_boundary (machine_mode mode, const_tree type)
6641 {
6642 if (!ARM_DOUBLEWORD_ALIGN)
6643 return PARM_BOUNDARY;
6644
6645 int res = arm_needs_doubleword_align (mode, type);
6646 if (res < 0 && warn_psabi)
6647 inform (input_location, "parameter passing for argument of type %qT "
6648 "changed in GCC 7.1", type);
6649
6650 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6651 }
6652
6653 static int
6654 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6655 tree type, bool named)
6656 {
6657 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6658 int nregs = pcum->nregs;
6659
6660 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6661 {
6662 aapcs_layout_arg (pcum, mode, type, named);
6663 return pcum->aapcs_partial;
6664 }
6665
6666 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6667 return 0;
6668
6669 if (NUM_ARG_REGS > nregs
6670 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6671 && pcum->can_split)
6672 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6673
6674 return 0;
6675 }
6676
6677 /* Update the data in PCUM to advance over an argument
6678 of mode MODE and data type TYPE.
6679 (TYPE is null for libcalls where that information may not be available.) */
6680
6681 static void
6682 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6683 const_tree type, bool named)
6684 {
6685 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6686
6687 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6688 {
6689 aapcs_layout_arg (pcum, mode, type, named);
6690
6691 if (pcum->aapcs_cprc_slot >= 0)
6692 {
6693 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6694 type);
6695 pcum->aapcs_cprc_slot = -1;
6696 }
6697
6698 /* Generic stuff. */
6699 pcum->aapcs_arg_processed = false;
6700 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6701 pcum->aapcs_reg = NULL_RTX;
6702 pcum->aapcs_partial = 0;
6703 }
6704 else
6705 {
6706 pcum->nargs += 1;
6707 if (arm_vector_mode_supported_p (mode)
6708 && pcum->named_count > pcum->nargs
6709 && TARGET_IWMMXT_ABI)
6710 pcum->iwmmxt_nregs += 1;
6711 else
6712 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6713 }
6714 }
6715
6716 /* Variable sized types are passed by reference. This is a GCC
6717 extension to the ARM ABI. */
6718
6719 static bool
6720 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6721 machine_mode mode ATTRIBUTE_UNUSED,
6722 const_tree type, bool named ATTRIBUTE_UNUSED)
6723 {
6724 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6725 }
6726 \f
6727 /* Encode the current state of the #pragma [no_]long_calls. */
6728 typedef enum
6729 {
6730 OFF, /* No #pragma [no_]long_calls is in effect. */
6731 LONG, /* #pragma long_calls is in effect. */
6732 SHORT /* #pragma no_long_calls is in effect. */
6733 } arm_pragma_enum;
6734
6735 static arm_pragma_enum arm_pragma_long_calls = OFF;
6736
6737 void
6738 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6739 {
6740 arm_pragma_long_calls = LONG;
6741 }
6742
6743 void
6744 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6745 {
6746 arm_pragma_long_calls = SHORT;
6747 }
6748
6749 void
6750 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6751 {
6752 arm_pragma_long_calls = OFF;
6753 }
6754 \f
6755 /* Handle an attribute requiring a FUNCTION_DECL;
6756 arguments as in struct attribute_spec.handler. */
6757 static tree
6758 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6759 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6760 {
6761 if (TREE_CODE (*node) != FUNCTION_DECL)
6762 {
6763 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6764 name);
6765 *no_add_attrs = true;
6766 }
6767
6768 return NULL_TREE;
6769 }
6770
6771 /* Handle an "interrupt" or "isr" attribute;
6772 arguments as in struct attribute_spec.handler. */
6773 static tree
6774 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6775 bool *no_add_attrs)
6776 {
6777 if (DECL_P (*node))
6778 {
6779 if (TREE_CODE (*node) != FUNCTION_DECL)
6780 {
6781 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6782 name);
6783 *no_add_attrs = true;
6784 }
6785 /* FIXME: the argument if any is checked for type attributes;
6786 should it be checked for decl ones? */
6787 }
6788 else
6789 {
6790 if (TREE_CODE (*node) == FUNCTION_TYPE
6791 || TREE_CODE (*node) == METHOD_TYPE)
6792 {
6793 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6794 {
6795 warning (OPT_Wattributes, "%qE attribute ignored",
6796 name);
6797 *no_add_attrs = true;
6798 }
6799 }
6800 else if (TREE_CODE (*node) == POINTER_TYPE
6801 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6802 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6803 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6804 {
6805 *node = build_variant_type_copy (*node);
6806 TREE_TYPE (*node) = build_type_attribute_variant
6807 (TREE_TYPE (*node),
6808 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6809 *no_add_attrs = true;
6810 }
6811 else
6812 {
6813 /* Possibly pass this attribute on from the type to a decl. */
6814 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6815 | (int) ATTR_FLAG_FUNCTION_NEXT
6816 | (int) ATTR_FLAG_ARRAY_NEXT))
6817 {
6818 *no_add_attrs = true;
6819 return tree_cons (name, args, NULL_TREE);
6820 }
6821 else
6822 {
6823 warning (OPT_Wattributes, "%qE attribute ignored",
6824 name);
6825 }
6826 }
6827 }
6828
6829 return NULL_TREE;
6830 }
6831
6832 /* Handle a "pcs" attribute; arguments as in struct
6833 attribute_spec.handler. */
6834 static tree
6835 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6836 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6837 {
6838 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6839 {
6840 warning (OPT_Wattributes, "%qE attribute ignored", name);
6841 *no_add_attrs = true;
6842 }
6843 return NULL_TREE;
6844 }
6845
6846 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6847 /* Handle the "notshared" attribute. This attribute is another way of
6848 requesting hidden visibility. ARM's compiler supports
6849 "__declspec(notshared)"; we support the same thing via an
6850 attribute. */
6851
6852 static tree
6853 arm_handle_notshared_attribute (tree *node,
6854 tree name ATTRIBUTE_UNUSED,
6855 tree args ATTRIBUTE_UNUSED,
6856 int flags ATTRIBUTE_UNUSED,
6857 bool *no_add_attrs)
6858 {
6859 tree decl = TYPE_NAME (*node);
6860
6861 if (decl)
6862 {
6863 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6864 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6865 *no_add_attrs = false;
6866 }
6867 return NULL_TREE;
6868 }
6869 #endif
6870
6871 /* This function returns true if a function with declaration FNDECL and type
6872 FNTYPE uses the stack to pass arguments or return variables and false
6873 otherwise. This is used for functions with the attributes
6874 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6875 diagnostic messages if the stack is used. NAME is the name of the attribute
6876 used. */
6877
6878 static bool
6879 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6880 {
6881 function_args_iterator args_iter;
6882 CUMULATIVE_ARGS args_so_far_v;
6883 cumulative_args_t args_so_far;
6884 bool first_param = true;
6885 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6886
6887 /* Error out if any argument is passed on the stack. */
6888 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6889 args_so_far = pack_cumulative_args (&args_so_far_v);
6890 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6891 {
6892 rtx arg_rtx;
6893 machine_mode arg_mode = TYPE_MODE (arg_type);
6894
6895 prev_arg_type = arg_type;
6896 if (VOID_TYPE_P (arg_type))
6897 continue;
6898
6899 if (!first_param)
6900 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6901 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6902 if (!arg_rtx
6903 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6904 {
6905 error ("%qE attribute not available to functions with arguments "
6906 "passed on the stack", name);
6907 return true;
6908 }
6909 first_param = false;
6910 }
6911
6912 /* Error out for variadic functions since we cannot control how many
6913 arguments will be passed and thus stack could be used. stdarg_p () is not
6914 used for the checking to avoid browsing arguments twice. */
6915 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6916 {
6917 error ("%qE attribute not available to functions with variable number "
6918 "of arguments", name);
6919 return true;
6920 }
6921
6922 /* Error out if return value is passed on the stack. */
6923 ret_type = TREE_TYPE (fntype);
6924 if (arm_return_in_memory (ret_type, fntype))
6925 {
6926 error ("%qE attribute not available to functions that return value on "
6927 "the stack", name);
6928 return true;
6929 }
6930 return false;
6931 }
6932
6933 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6934 function will check whether the attribute is allowed here and will add the
6935 attribute to the function declaration tree or otherwise issue a warning. */
6936
6937 static tree
6938 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6939 tree /* args */,
6940 int /* flags */,
6941 bool *no_add_attrs)
6942 {
6943 tree fndecl;
6944
6945 if (!use_cmse)
6946 {
6947 *no_add_attrs = true;
6948 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6949 name);
6950 return NULL_TREE;
6951 }
6952
6953 /* Ignore attribute for function types. */
6954 if (TREE_CODE (*node) != FUNCTION_DECL)
6955 {
6956 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6957 name);
6958 *no_add_attrs = true;
6959 return NULL_TREE;
6960 }
6961
6962 fndecl = *node;
6963
6964 /* Warn for static linkage functions. */
6965 if (!TREE_PUBLIC (fndecl))
6966 {
6967 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6968 "with static linkage", name);
6969 *no_add_attrs = true;
6970 return NULL_TREE;
6971 }
6972
6973 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6974 TREE_TYPE (fndecl));
6975 return NULL_TREE;
6976 }
6977
6978
6979 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6980 function will check whether the attribute is allowed here and will add the
6981 attribute to the function type tree or otherwise issue a diagnostic. The
6982 reason we check this at declaration time is to only allow the use of the
6983 attribute with declarations of function pointers and not function
6984 declarations. This function checks NODE is of the expected type and issues
6985 diagnostics otherwise using NAME. If it is not of the expected type
6986 *NO_ADD_ATTRS will be set to true. */
6987
6988 static tree
6989 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6990 tree /* args */,
6991 int /* flags */,
6992 bool *no_add_attrs)
6993 {
6994 tree decl = NULL_TREE, fntype = NULL_TREE;
6995 tree type;
6996
6997 if (!use_cmse)
6998 {
6999 *no_add_attrs = true;
7000 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7001 name);
7002 return NULL_TREE;
7003 }
7004
7005 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7006 {
7007 decl = *node;
7008 fntype = TREE_TYPE (decl);
7009 }
7010
7011 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7012 fntype = TREE_TYPE (fntype);
7013
7014 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7015 {
7016 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7017 "function pointer", name);
7018 *no_add_attrs = true;
7019 return NULL_TREE;
7020 }
7021
7022 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7023
7024 if (*no_add_attrs)
7025 return NULL_TREE;
7026
7027 /* Prevent trees being shared among function types with and without
7028 cmse_nonsecure_call attribute. */
7029 type = TREE_TYPE (decl);
7030
7031 type = build_distinct_type_copy (type);
7032 TREE_TYPE (decl) = type;
7033 fntype = type;
7034
7035 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7036 {
7037 type = fntype;
7038 fntype = TREE_TYPE (fntype);
7039 fntype = build_distinct_type_copy (fntype);
7040 TREE_TYPE (type) = fntype;
7041 }
7042
7043 /* Construct a type attribute and add it to the function type. */
7044 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7045 TYPE_ATTRIBUTES (fntype));
7046 TYPE_ATTRIBUTES (fntype) = attrs;
7047 return NULL_TREE;
7048 }
7049
7050 /* Return 0 if the attributes for two types are incompatible, 1 if they
7051 are compatible, and 2 if they are nearly compatible (which causes a
7052 warning to be generated). */
7053 static int
7054 arm_comp_type_attributes (const_tree type1, const_tree type2)
7055 {
7056 int l1, l2, s1, s2;
7057
7058 /* Check for mismatch of non-default calling convention. */
7059 if (TREE_CODE (type1) != FUNCTION_TYPE)
7060 return 1;
7061
7062 /* Check for mismatched call attributes. */
7063 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7064 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7065 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7066 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7067
7068 /* Only bother to check if an attribute is defined. */
7069 if (l1 | l2 | s1 | s2)
7070 {
7071 /* If one type has an attribute, the other must have the same attribute. */
7072 if ((l1 != l2) || (s1 != s2))
7073 return 0;
7074
7075 /* Disallow mixed attributes. */
7076 if ((l1 & s2) || (l2 & s1))
7077 return 0;
7078 }
7079
7080 /* Check for mismatched ISR attribute. */
7081 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7082 if (! l1)
7083 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7084 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7085 if (! l2)
7086 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7087 if (l1 != l2)
7088 return 0;
7089
7090 l1 = lookup_attribute ("cmse_nonsecure_call",
7091 TYPE_ATTRIBUTES (type1)) != NULL;
7092 l2 = lookup_attribute ("cmse_nonsecure_call",
7093 TYPE_ATTRIBUTES (type2)) != NULL;
7094
7095 if (l1 != l2)
7096 return 0;
7097
7098 return 1;
7099 }
7100
7101 /* Assigns default attributes to newly defined type. This is used to
7102 set short_call/long_call attributes for function types of
7103 functions defined inside corresponding #pragma scopes. */
7104 static void
7105 arm_set_default_type_attributes (tree type)
7106 {
7107 /* Add __attribute__ ((long_call)) to all functions, when
7108 inside #pragma long_calls or __attribute__ ((short_call)),
7109 when inside #pragma no_long_calls. */
7110 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7111 {
7112 tree type_attr_list, attr_name;
7113 type_attr_list = TYPE_ATTRIBUTES (type);
7114
7115 if (arm_pragma_long_calls == LONG)
7116 attr_name = get_identifier ("long_call");
7117 else if (arm_pragma_long_calls == SHORT)
7118 attr_name = get_identifier ("short_call");
7119 else
7120 return;
7121
7122 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7123 TYPE_ATTRIBUTES (type) = type_attr_list;
7124 }
7125 }
7126 \f
7127 /* Return true if DECL is known to be linked into section SECTION. */
7128
7129 static bool
7130 arm_function_in_section_p (tree decl, section *section)
7131 {
7132 /* We can only be certain about the prevailing symbol definition. */
7133 if (!decl_binds_to_current_def_p (decl))
7134 return false;
7135
7136 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7137 if (!DECL_SECTION_NAME (decl))
7138 {
7139 /* Make sure that we will not create a unique section for DECL. */
7140 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7141 return false;
7142 }
7143
7144 return function_section (decl) == section;
7145 }
7146
7147 /* Return nonzero if a 32-bit "long_call" should be generated for
7148 a call from the current function to DECL. We generate a long_call
7149 if the function:
7150
7151 a. has an __attribute__((long call))
7152 or b. is within the scope of a #pragma long_calls
7153 or c. the -mlong-calls command line switch has been specified
7154
7155 However we do not generate a long call if the function:
7156
7157 d. has an __attribute__ ((short_call))
7158 or e. is inside the scope of a #pragma no_long_calls
7159 or f. is defined in the same section as the current function. */
7160
7161 bool
7162 arm_is_long_call_p (tree decl)
7163 {
7164 tree attrs;
7165
7166 if (!decl)
7167 return TARGET_LONG_CALLS;
7168
7169 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7170 if (lookup_attribute ("short_call", attrs))
7171 return false;
7172
7173 /* For "f", be conservative, and only cater for cases in which the
7174 whole of the current function is placed in the same section. */
7175 if (!flag_reorder_blocks_and_partition
7176 && TREE_CODE (decl) == FUNCTION_DECL
7177 && arm_function_in_section_p (decl, current_function_section ()))
7178 return false;
7179
7180 if (lookup_attribute ("long_call", attrs))
7181 return true;
7182
7183 return TARGET_LONG_CALLS;
7184 }
7185
7186 /* Return nonzero if it is ok to make a tail-call to DECL. */
7187 static bool
7188 arm_function_ok_for_sibcall (tree decl, tree exp)
7189 {
7190 unsigned long func_type;
7191
7192 if (cfun->machine->sibcall_blocked)
7193 return false;
7194
7195 /* Never tailcall something if we are generating code for Thumb-1. */
7196 if (TARGET_THUMB1)
7197 return false;
7198
7199 /* The PIC register is live on entry to VxWorks PLT entries, so we
7200 must make the call before restoring the PIC register. */
7201 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7202 return false;
7203
7204 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7205 may be used both as target of the call and base register for restoring
7206 the VFP registers */
7207 if (TARGET_APCS_FRAME && TARGET_ARM
7208 && TARGET_HARD_FLOAT
7209 && decl && arm_is_long_call_p (decl))
7210 return false;
7211
7212 /* If we are interworking and the function is not declared static
7213 then we can't tail-call it unless we know that it exists in this
7214 compilation unit (since it might be a Thumb routine). */
7215 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7216 && !TREE_ASM_WRITTEN (decl))
7217 return false;
7218
7219 func_type = arm_current_func_type ();
7220 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7221 if (IS_INTERRUPT (func_type))
7222 return false;
7223
7224 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7225 generated for entry functions themselves. */
7226 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7227 return false;
7228
7229 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7230 this would complicate matters for later code generation. */
7231 if (TREE_CODE (exp) == CALL_EXPR)
7232 {
7233 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7234 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7235 return false;
7236 }
7237
7238 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7239 {
7240 /* Check that the return value locations are the same. For
7241 example that we aren't returning a value from the sibling in
7242 a VFP register but then need to transfer it to a core
7243 register. */
7244 rtx a, b;
7245 tree decl_or_type = decl;
7246
7247 /* If it is an indirect function pointer, get the function type. */
7248 if (!decl)
7249 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7250
7251 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7252 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7253 cfun->decl, false);
7254 if (!rtx_equal_p (a, b))
7255 return false;
7256 }
7257
7258 /* Never tailcall if function may be called with a misaligned SP. */
7259 if (IS_STACKALIGN (func_type))
7260 return false;
7261
7262 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7263 references should become a NOP. Don't convert such calls into
7264 sibling calls. */
7265 if (TARGET_AAPCS_BASED
7266 && arm_abi == ARM_ABI_AAPCS
7267 && decl
7268 && DECL_WEAK (decl))
7269 return false;
7270
7271 /* We cannot do a tailcall for an indirect call by descriptor if all the
7272 argument registers are used because the only register left to load the
7273 address is IP and it will already contain the static chain. */
7274 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7275 {
7276 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7277 CUMULATIVE_ARGS cum;
7278 cumulative_args_t cum_v;
7279
7280 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7281 cum_v = pack_cumulative_args (&cum);
7282
7283 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7284 {
7285 tree type = TREE_VALUE (t);
7286 if (!VOID_TYPE_P (type))
7287 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7288 }
7289
7290 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7291 return false;
7292 }
7293
7294 /* Everything else is ok. */
7295 return true;
7296 }
7297
7298 \f
7299 /* Addressing mode support functions. */
7300
7301 /* Return nonzero if X is a legitimate immediate operand when compiling
7302 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7303 int
7304 legitimate_pic_operand_p (rtx x)
7305 {
7306 if (GET_CODE (x) == SYMBOL_REF
7307 || (GET_CODE (x) == CONST
7308 && GET_CODE (XEXP (x, 0)) == PLUS
7309 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7310 return 0;
7311
7312 return 1;
7313 }
7314
7315 /* Record that the current function needs a PIC register. Initialize
7316 cfun->machine->pic_reg if we have not already done so. */
7317
7318 static void
7319 require_pic_register (void)
7320 {
7321 /* A lot of the logic here is made obscure by the fact that this
7322 routine gets called as part of the rtx cost estimation process.
7323 We don't want those calls to affect any assumptions about the real
7324 function; and further, we can't call entry_of_function() until we
7325 start the real expansion process. */
7326 if (!crtl->uses_pic_offset_table)
7327 {
7328 gcc_assert (can_create_pseudo_p ());
7329 if (arm_pic_register != INVALID_REGNUM
7330 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7331 {
7332 if (!cfun->machine->pic_reg)
7333 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7334
7335 /* Play games to avoid marking the function as needing pic
7336 if we are being called as part of the cost-estimation
7337 process. */
7338 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7339 crtl->uses_pic_offset_table = 1;
7340 }
7341 else
7342 {
7343 rtx_insn *seq, *insn;
7344
7345 if (!cfun->machine->pic_reg)
7346 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7347
7348 /* Play games to avoid marking the function as needing pic
7349 if we are being called as part of the cost-estimation
7350 process. */
7351 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7352 {
7353 crtl->uses_pic_offset_table = 1;
7354 start_sequence ();
7355
7356 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7357 && arm_pic_register > LAST_LO_REGNUM)
7358 emit_move_insn (cfun->machine->pic_reg,
7359 gen_rtx_REG (Pmode, arm_pic_register));
7360 else
7361 arm_load_pic_register (0UL);
7362
7363 seq = get_insns ();
7364 end_sequence ();
7365
7366 for (insn = seq; insn; insn = NEXT_INSN (insn))
7367 if (INSN_P (insn))
7368 INSN_LOCATION (insn) = prologue_location;
7369
7370 /* We can be called during expansion of PHI nodes, where
7371 we can't yet emit instructions directly in the final
7372 insn stream. Queue the insns on the entry edge, they will
7373 be committed after everything else is expanded. */
7374 insert_insn_on_edge (seq,
7375 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7376 }
7377 }
7378 }
7379 }
7380
7381 rtx
7382 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7383 {
7384 if (GET_CODE (orig) == SYMBOL_REF
7385 || GET_CODE (orig) == LABEL_REF)
7386 {
7387 if (reg == 0)
7388 {
7389 gcc_assert (can_create_pseudo_p ());
7390 reg = gen_reg_rtx (Pmode);
7391 }
7392
7393 /* VxWorks does not impose a fixed gap between segments; the run-time
7394 gap can be different from the object-file gap. We therefore can't
7395 use GOTOFF unless we are absolutely sure that the symbol is in the
7396 same segment as the GOT. Unfortunately, the flexibility of linker
7397 scripts means that we can't be sure of that in general, so assume
7398 that GOTOFF is never valid on VxWorks. */
7399 /* References to weak symbols cannot be resolved locally: they
7400 may be overridden by a non-weak definition at link time. */
7401 rtx_insn *insn;
7402 if ((GET_CODE (orig) == LABEL_REF
7403 || (GET_CODE (orig) == SYMBOL_REF
7404 && SYMBOL_REF_LOCAL_P (orig)
7405 && (SYMBOL_REF_DECL (orig)
7406 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7407 && NEED_GOT_RELOC
7408 && arm_pic_data_is_text_relative)
7409 insn = arm_pic_static_addr (orig, reg);
7410 else
7411 {
7412 rtx pat;
7413 rtx mem;
7414
7415 /* If this function doesn't have a pic register, create one now. */
7416 require_pic_register ();
7417
7418 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7419
7420 /* Make the MEM as close to a constant as possible. */
7421 mem = SET_SRC (pat);
7422 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7423 MEM_READONLY_P (mem) = 1;
7424 MEM_NOTRAP_P (mem) = 1;
7425
7426 insn = emit_insn (pat);
7427 }
7428
7429 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7430 by loop. */
7431 set_unique_reg_note (insn, REG_EQUAL, orig);
7432
7433 return reg;
7434 }
7435 else if (GET_CODE (orig) == CONST)
7436 {
7437 rtx base, offset;
7438
7439 if (GET_CODE (XEXP (orig, 0)) == PLUS
7440 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7441 return orig;
7442
7443 /* Handle the case where we have: const (UNSPEC_TLS). */
7444 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7445 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7446 return orig;
7447
7448 /* Handle the case where we have:
7449 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7450 CONST_INT. */
7451 if (GET_CODE (XEXP (orig, 0)) == PLUS
7452 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7453 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7454 {
7455 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7456 return orig;
7457 }
7458
7459 if (reg == 0)
7460 {
7461 gcc_assert (can_create_pseudo_p ());
7462 reg = gen_reg_rtx (Pmode);
7463 }
7464
7465 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7466
7467 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7468 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7469 base == reg ? 0 : reg);
7470
7471 if (CONST_INT_P (offset))
7472 {
7473 /* The base register doesn't really matter, we only want to
7474 test the index for the appropriate mode. */
7475 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7476 {
7477 gcc_assert (can_create_pseudo_p ());
7478 offset = force_reg (Pmode, offset);
7479 }
7480
7481 if (CONST_INT_P (offset))
7482 return plus_constant (Pmode, base, INTVAL (offset));
7483 }
7484
7485 if (GET_MODE_SIZE (mode) > 4
7486 && (GET_MODE_CLASS (mode) == MODE_INT
7487 || TARGET_SOFT_FLOAT))
7488 {
7489 emit_insn (gen_addsi3 (reg, base, offset));
7490 return reg;
7491 }
7492
7493 return gen_rtx_PLUS (Pmode, base, offset);
7494 }
7495
7496 return orig;
7497 }
7498
7499
7500 /* Find a spare register to use during the prolog of a function. */
7501
7502 static int
7503 thumb_find_work_register (unsigned long pushed_regs_mask)
7504 {
7505 int reg;
7506
7507 /* Check the argument registers first as these are call-used. The
7508 register allocation order means that sometimes r3 might be used
7509 but earlier argument registers might not, so check them all. */
7510 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7511 if (!df_regs_ever_live_p (reg))
7512 return reg;
7513
7514 /* Before going on to check the call-saved registers we can try a couple
7515 more ways of deducing that r3 is available. The first is when we are
7516 pushing anonymous arguments onto the stack and we have less than 4
7517 registers worth of fixed arguments(*). In this case r3 will be part of
7518 the variable argument list and so we can be sure that it will be
7519 pushed right at the start of the function. Hence it will be available
7520 for the rest of the prologue.
7521 (*): ie crtl->args.pretend_args_size is greater than 0. */
7522 if (cfun->machine->uses_anonymous_args
7523 && crtl->args.pretend_args_size > 0)
7524 return LAST_ARG_REGNUM;
7525
7526 /* The other case is when we have fixed arguments but less than 4 registers
7527 worth. In this case r3 might be used in the body of the function, but
7528 it is not being used to convey an argument into the function. In theory
7529 we could just check crtl->args.size to see how many bytes are
7530 being passed in argument registers, but it seems that it is unreliable.
7531 Sometimes it will have the value 0 when in fact arguments are being
7532 passed. (See testcase execute/20021111-1.c for an example). So we also
7533 check the args_info.nregs field as well. The problem with this field is
7534 that it makes no allowances for arguments that are passed to the
7535 function but which are not used. Hence we could miss an opportunity
7536 when a function has an unused argument in r3. But it is better to be
7537 safe than to be sorry. */
7538 if (! cfun->machine->uses_anonymous_args
7539 && crtl->args.size >= 0
7540 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7541 && (TARGET_AAPCS_BASED
7542 ? crtl->args.info.aapcs_ncrn < 4
7543 : crtl->args.info.nregs < 4))
7544 return LAST_ARG_REGNUM;
7545
7546 /* Otherwise look for a call-saved register that is going to be pushed. */
7547 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7548 if (pushed_regs_mask & (1 << reg))
7549 return reg;
7550
7551 if (TARGET_THUMB2)
7552 {
7553 /* Thumb-2 can use high regs. */
7554 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7555 if (pushed_regs_mask & (1 << reg))
7556 return reg;
7557 }
7558 /* Something went wrong - thumb_compute_save_reg_mask()
7559 should have arranged for a suitable register to be pushed. */
7560 gcc_unreachable ();
7561 }
7562
7563 static GTY(()) int pic_labelno;
7564
7565 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7566 low register. */
7567
7568 void
7569 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7570 {
7571 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7572
7573 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7574 return;
7575
7576 gcc_assert (flag_pic);
7577
7578 pic_reg = cfun->machine->pic_reg;
7579 if (TARGET_VXWORKS_RTP)
7580 {
7581 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7582 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7583 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7584
7585 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7586
7587 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7588 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7589 }
7590 else
7591 {
7592 /* We use an UNSPEC rather than a LABEL_REF because this label
7593 never appears in the code stream. */
7594
7595 labelno = GEN_INT (pic_labelno++);
7596 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7597 l1 = gen_rtx_CONST (VOIDmode, l1);
7598
7599 /* On the ARM the PC register contains 'dot + 8' at the time of the
7600 addition, on the Thumb it is 'dot + 4'. */
7601 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7602 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7603 UNSPEC_GOTSYM_OFF);
7604 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7605
7606 if (TARGET_32BIT)
7607 {
7608 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7609 }
7610 else /* TARGET_THUMB1 */
7611 {
7612 if (arm_pic_register != INVALID_REGNUM
7613 && REGNO (pic_reg) > LAST_LO_REGNUM)
7614 {
7615 /* We will have pushed the pic register, so we should always be
7616 able to find a work register. */
7617 pic_tmp = gen_rtx_REG (SImode,
7618 thumb_find_work_register (saved_regs));
7619 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7620 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7621 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7622 }
7623 else if (arm_pic_register != INVALID_REGNUM
7624 && arm_pic_register > LAST_LO_REGNUM
7625 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7626 {
7627 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7628 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7629 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7630 }
7631 else
7632 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7633 }
7634 }
7635
7636 /* Need to emit this whether or not we obey regdecls,
7637 since setjmp/longjmp can cause life info to screw up. */
7638 emit_use (pic_reg);
7639 }
7640
7641 /* Generate code to load the address of a static var when flag_pic is set. */
7642 static rtx_insn *
7643 arm_pic_static_addr (rtx orig, rtx reg)
7644 {
7645 rtx l1, labelno, offset_rtx;
7646
7647 gcc_assert (flag_pic);
7648
7649 /* We use an UNSPEC rather than a LABEL_REF because this label
7650 never appears in the code stream. */
7651 labelno = GEN_INT (pic_labelno++);
7652 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7653 l1 = gen_rtx_CONST (VOIDmode, l1);
7654
7655 /* On the ARM the PC register contains 'dot + 8' at the time of the
7656 addition, on the Thumb it is 'dot + 4'. */
7657 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7658 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7659 UNSPEC_SYMBOL_OFFSET);
7660 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7661
7662 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7663 }
7664
7665 /* Return nonzero if X is valid as an ARM state addressing register. */
7666 static int
7667 arm_address_register_rtx_p (rtx x, int strict_p)
7668 {
7669 int regno;
7670
7671 if (!REG_P (x))
7672 return 0;
7673
7674 regno = REGNO (x);
7675
7676 if (strict_p)
7677 return ARM_REGNO_OK_FOR_BASE_P (regno);
7678
7679 return (regno <= LAST_ARM_REGNUM
7680 || regno >= FIRST_PSEUDO_REGISTER
7681 || regno == FRAME_POINTER_REGNUM
7682 || regno == ARG_POINTER_REGNUM);
7683 }
7684
7685 /* Return TRUE if this rtx is the difference of a symbol and a label,
7686 and will reduce to a PC-relative relocation in the object file.
7687 Expressions like this can be left alone when generating PIC, rather
7688 than forced through the GOT. */
7689 static int
7690 pcrel_constant_p (rtx x)
7691 {
7692 if (GET_CODE (x) == MINUS)
7693 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7694
7695 return FALSE;
7696 }
7697
7698 /* Return true if X will surely end up in an index register after next
7699 splitting pass. */
7700 static bool
7701 will_be_in_index_register (const_rtx x)
7702 {
7703 /* arm.md: calculate_pic_address will split this into a register. */
7704 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7705 }
7706
7707 /* Return nonzero if X is a valid ARM state address operand. */
7708 int
7709 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7710 int strict_p)
7711 {
7712 bool use_ldrd;
7713 enum rtx_code code = GET_CODE (x);
7714
7715 if (arm_address_register_rtx_p (x, strict_p))
7716 return 1;
7717
7718 use_ldrd = (TARGET_LDRD
7719 && (mode == DImode || mode == DFmode));
7720
7721 if (code == POST_INC || code == PRE_DEC
7722 || ((code == PRE_INC || code == POST_DEC)
7723 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7724 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7725
7726 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7727 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7728 && GET_CODE (XEXP (x, 1)) == PLUS
7729 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7730 {
7731 rtx addend = XEXP (XEXP (x, 1), 1);
7732
7733 /* Don't allow ldrd post increment by register because it's hard
7734 to fixup invalid register choices. */
7735 if (use_ldrd
7736 && GET_CODE (x) == POST_MODIFY
7737 && REG_P (addend))
7738 return 0;
7739
7740 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7741 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7742 }
7743
7744 /* After reload constants split into minipools will have addresses
7745 from a LABEL_REF. */
7746 else if (reload_completed
7747 && (code == LABEL_REF
7748 || (code == CONST
7749 && GET_CODE (XEXP (x, 0)) == PLUS
7750 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7751 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7752 return 1;
7753
7754 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7755 return 0;
7756
7757 else if (code == PLUS)
7758 {
7759 rtx xop0 = XEXP (x, 0);
7760 rtx xop1 = XEXP (x, 1);
7761
7762 return ((arm_address_register_rtx_p (xop0, strict_p)
7763 && ((CONST_INT_P (xop1)
7764 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7765 || (!strict_p && will_be_in_index_register (xop1))))
7766 || (arm_address_register_rtx_p (xop1, strict_p)
7767 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7768 }
7769
7770 #if 0
7771 /* Reload currently can't handle MINUS, so disable this for now */
7772 else if (GET_CODE (x) == MINUS)
7773 {
7774 rtx xop0 = XEXP (x, 0);
7775 rtx xop1 = XEXP (x, 1);
7776
7777 return (arm_address_register_rtx_p (xop0, strict_p)
7778 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7779 }
7780 #endif
7781
7782 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7783 && code == SYMBOL_REF
7784 && CONSTANT_POOL_ADDRESS_P (x)
7785 && ! (flag_pic
7786 && symbol_mentioned_p (get_pool_constant (x))
7787 && ! pcrel_constant_p (get_pool_constant (x))))
7788 return 1;
7789
7790 return 0;
7791 }
7792
7793 /* Return true if we can avoid creating a constant pool entry for x. */
7794 static bool
7795 can_avoid_literal_pool_for_label_p (rtx x)
7796 {
7797 /* Normally we can assign constant values to target registers without
7798 the help of constant pool. But there are cases we have to use constant
7799 pool like:
7800 1) assign a label to register.
7801 2) sign-extend a 8bit value to 32bit and then assign to register.
7802
7803 Constant pool access in format:
7804 (set (reg r0) (mem (symbol_ref (".LC0"))))
7805 will cause the use of literal pool (later in function arm_reorg).
7806 So here we mark such format as an invalid format, then the compiler
7807 will adjust it into:
7808 (set (reg r0) (symbol_ref (".LC0")))
7809 (set (reg r0) (mem (reg r0))).
7810 No extra register is required, and (mem (reg r0)) won't cause the use
7811 of literal pools. */
7812 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7813 && CONSTANT_POOL_ADDRESS_P (x))
7814 return 1;
7815 return 0;
7816 }
7817
7818
7819 /* Return nonzero if X is a valid Thumb-2 address operand. */
7820 static int
7821 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7822 {
7823 bool use_ldrd;
7824 enum rtx_code code = GET_CODE (x);
7825
7826 if (arm_address_register_rtx_p (x, strict_p))
7827 return 1;
7828
7829 use_ldrd = (TARGET_LDRD
7830 && (mode == DImode || mode == DFmode));
7831
7832 if (code == POST_INC || code == PRE_DEC
7833 || ((code == PRE_INC || code == POST_DEC)
7834 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7835 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7836
7837 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7838 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7839 && GET_CODE (XEXP (x, 1)) == PLUS
7840 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7841 {
7842 /* Thumb-2 only has autoincrement by constant. */
7843 rtx addend = XEXP (XEXP (x, 1), 1);
7844 HOST_WIDE_INT offset;
7845
7846 if (!CONST_INT_P (addend))
7847 return 0;
7848
7849 offset = INTVAL(addend);
7850 if (GET_MODE_SIZE (mode) <= 4)
7851 return (offset > -256 && offset < 256);
7852
7853 return (use_ldrd && offset > -1024 && offset < 1024
7854 && (offset & 3) == 0);
7855 }
7856
7857 /* After reload constants split into minipools will have addresses
7858 from a LABEL_REF. */
7859 else if (reload_completed
7860 && (code == LABEL_REF
7861 || (code == CONST
7862 && GET_CODE (XEXP (x, 0)) == PLUS
7863 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7864 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7865 return 1;
7866
7867 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7868 return 0;
7869
7870 else if (code == PLUS)
7871 {
7872 rtx xop0 = XEXP (x, 0);
7873 rtx xop1 = XEXP (x, 1);
7874
7875 return ((arm_address_register_rtx_p (xop0, strict_p)
7876 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7877 || (!strict_p && will_be_in_index_register (xop1))))
7878 || (arm_address_register_rtx_p (xop1, strict_p)
7879 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7880 }
7881
7882 else if (can_avoid_literal_pool_for_label_p (x))
7883 return 0;
7884
7885 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7886 && code == SYMBOL_REF
7887 && CONSTANT_POOL_ADDRESS_P (x)
7888 && ! (flag_pic
7889 && symbol_mentioned_p (get_pool_constant (x))
7890 && ! pcrel_constant_p (get_pool_constant (x))))
7891 return 1;
7892
7893 return 0;
7894 }
7895
7896 /* Return nonzero if INDEX is valid for an address index operand in
7897 ARM state. */
7898 static int
7899 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7900 int strict_p)
7901 {
7902 HOST_WIDE_INT range;
7903 enum rtx_code code = GET_CODE (index);
7904
7905 /* Standard coprocessor addressing modes. */
7906 if (TARGET_HARD_FLOAT
7907 && (mode == SFmode || mode == DFmode))
7908 return (code == CONST_INT && INTVAL (index) < 1024
7909 && INTVAL (index) > -1024
7910 && (INTVAL (index) & 3) == 0);
7911
7912 /* For quad modes, we restrict the constant offset to be slightly less
7913 than what the instruction format permits. We do this because for
7914 quad mode moves, we will actually decompose them into two separate
7915 double-mode reads or writes. INDEX must therefore be a valid
7916 (double-mode) offset and so should INDEX+8. */
7917 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7918 return (code == CONST_INT
7919 && INTVAL (index) < 1016
7920 && INTVAL (index) > -1024
7921 && (INTVAL (index) & 3) == 0);
7922
7923 /* We have no such constraint on double mode offsets, so we permit the
7924 full range of the instruction format. */
7925 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7926 return (code == CONST_INT
7927 && INTVAL (index) < 1024
7928 && INTVAL (index) > -1024
7929 && (INTVAL (index) & 3) == 0);
7930
7931 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7932 return (code == CONST_INT
7933 && INTVAL (index) < 1024
7934 && INTVAL (index) > -1024
7935 && (INTVAL (index) & 3) == 0);
7936
7937 if (arm_address_register_rtx_p (index, strict_p)
7938 && (GET_MODE_SIZE (mode) <= 4))
7939 return 1;
7940
7941 if (mode == DImode || mode == DFmode)
7942 {
7943 if (code == CONST_INT)
7944 {
7945 HOST_WIDE_INT val = INTVAL (index);
7946
7947 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7948 If vldr is selected it uses arm_coproc_mem_operand. */
7949 if (TARGET_LDRD)
7950 return val > -256 && val < 256;
7951 else
7952 return val > -4096 && val < 4092;
7953 }
7954
7955 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7956 }
7957
7958 if (GET_MODE_SIZE (mode) <= 4
7959 && ! (arm_arch4
7960 && (mode == HImode
7961 || mode == HFmode
7962 || (mode == QImode && outer == SIGN_EXTEND))))
7963 {
7964 if (code == MULT)
7965 {
7966 rtx xiop0 = XEXP (index, 0);
7967 rtx xiop1 = XEXP (index, 1);
7968
7969 return ((arm_address_register_rtx_p (xiop0, strict_p)
7970 && power_of_two_operand (xiop1, SImode))
7971 || (arm_address_register_rtx_p (xiop1, strict_p)
7972 && power_of_two_operand (xiop0, SImode)));
7973 }
7974 else if (code == LSHIFTRT || code == ASHIFTRT
7975 || code == ASHIFT || code == ROTATERT)
7976 {
7977 rtx op = XEXP (index, 1);
7978
7979 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7980 && CONST_INT_P (op)
7981 && INTVAL (op) > 0
7982 && INTVAL (op) <= 31);
7983 }
7984 }
7985
7986 /* For ARM v4 we may be doing a sign-extend operation during the
7987 load. */
7988 if (arm_arch4)
7989 {
7990 if (mode == HImode
7991 || mode == HFmode
7992 || (outer == SIGN_EXTEND && mode == QImode))
7993 range = 256;
7994 else
7995 range = 4096;
7996 }
7997 else
7998 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7999
8000 return (code == CONST_INT
8001 && INTVAL (index) < range
8002 && INTVAL (index) > -range);
8003 }
8004
8005 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8006 index operand. i.e. 1, 2, 4 or 8. */
8007 static bool
8008 thumb2_index_mul_operand (rtx op)
8009 {
8010 HOST_WIDE_INT val;
8011
8012 if (!CONST_INT_P (op))
8013 return false;
8014
8015 val = INTVAL(op);
8016 return (val == 1 || val == 2 || val == 4 || val == 8);
8017 }
8018
8019 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8020 static int
8021 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8022 {
8023 enum rtx_code code = GET_CODE (index);
8024
8025 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8026 /* Standard coprocessor addressing modes. */
8027 if (TARGET_HARD_FLOAT
8028 && (mode == SFmode || mode == DFmode))
8029 return (code == CONST_INT && INTVAL (index) < 1024
8030 /* Thumb-2 allows only > -256 index range for it's core register
8031 load/stores. Since we allow SF/DF in core registers, we have
8032 to use the intersection between -256~4096 (core) and -1024~1024
8033 (coprocessor). */
8034 && INTVAL (index) > -256
8035 && (INTVAL (index) & 3) == 0);
8036
8037 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8038 {
8039 /* For DImode assume values will usually live in core regs
8040 and only allow LDRD addressing modes. */
8041 if (!TARGET_LDRD || mode != DImode)
8042 return (code == CONST_INT
8043 && INTVAL (index) < 1024
8044 && INTVAL (index) > -1024
8045 && (INTVAL (index) & 3) == 0);
8046 }
8047
8048 /* For quad modes, we restrict the constant offset to be slightly less
8049 than what the instruction format permits. We do this because for
8050 quad mode moves, we will actually decompose them into two separate
8051 double-mode reads or writes. INDEX must therefore be a valid
8052 (double-mode) offset and so should INDEX+8. */
8053 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8054 return (code == CONST_INT
8055 && INTVAL (index) < 1016
8056 && INTVAL (index) > -1024
8057 && (INTVAL (index) & 3) == 0);
8058
8059 /* We have no such constraint on double mode offsets, so we permit the
8060 full range of the instruction format. */
8061 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8062 return (code == CONST_INT
8063 && INTVAL (index) < 1024
8064 && INTVAL (index) > -1024
8065 && (INTVAL (index) & 3) == 0);
8066
8067 if (arm_address_register_rtx_p (index, strict_p)
8068 && (GET_MODE_SIZE (mode) <= 4))
8069 return 1;
8070
8071 if (mode == DImode || mode == DFmode)
8072 {
8073 if (code == CONST_INT)
8074 {
8075 HOST_WIDE_INT val = INTVAL (index);
8076 /* Thumb-2 ldrd only has reg+const addressing modes.
8077 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8078 If vldr is selected it uses arm_coproc_mem_operand. */
8079 if (TARGET_LDRD)
8080 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8081 else
8082 return IN_RANGE (val, -255, 4095 - 4);
8083 }
8084 else
8085 return 0;
8086 }
8087
8088 if (code == MULT)
8089 {
8090 rtx xiop0 = XEXP (index, 0);
8091 rtx xiop1 = XEXP (index, 1);
8092
8093 return ((arm_address_register_rtx_p (xiop0, strict_p)
8094 && thumb2_index_mul_operand (xiop1))
8095 || (arm_address_register_rtx_p (xiop1, strict_p)
8096 && thumb2_index_mul_operand (xiop0)));
8097 }
8098 else if (code == ASHIFT)
8099 {
8100 rtx op = XEXP (index, 1);
8101
8102 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8103 && CONST_INT_P (op)
8104 && INTVAL (op) > 0
8105 && INTVAL (op) <= 3);
8106 }
8107
8108 return (code == CONST_INT
8109 && INTVAL (index) < 4096
8110 && INTVAL (index) > -256);
8111 }
8112
8113 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8114 static int
8115 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8116 {
8117 int regno;
8118
8119 if (!REG_P (x))
8120 return 0;
8121
8122 regno = REGNO (x);
8123
8124 if (strict_p)
8125 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8126
8127 return (regno <= LAST_LO_REGNUM
8128 || regno > LAST_VIRTUAL_REGISTER
8129 || regno == FRAME_POINTER_REGNUM
8130 || (GET_MODE_SIZE (mode) >= 4
8131 && (regno == STACK_POINTER_REGNUM
8132 || regno >= FIRST_PSEUDO_REGISTER
8133 || x == hard_frame_pointer_rtx
8134 || x == arg_pointer_rtx)));
8135 }
8136
8137 /* Return nonzero if x is a legitimate index register. This is the case
8138 for any base register that can access a QImode object. */
8139 inline static int
8140 thumb1_index_register_rtx_p (rtx x, int strict_p)
8141 {
8142 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8143 }
8144
8145 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8146
8147 The AP may be eliminated to either the SP or the FP, so we use the
8148 least common denominator, e.g. SImode, and offsets from 0 to 64.
8149
8150 ??? Verify whether the above is the right approach.
8151
8152 ??? Also, the FP may be eliminated to the SP, so perhaps that
8153 needs special handling also.
8154
8155 ??? Look at how the mips16 port solves this problem. It probably uses
8156 better ways to solve some of these problems.
8157
8158 Although it is not incorrect, we don't accept QImode and HImode
8159 addresses based on the frame pointer or arg pointer until the
8160 reload pass starts. This is so that eliminating such addresses
8161 into stack based ones won't produce impossible code. */
8162 int
8163 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8164 {
8165 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8166 return 0;
8167
8168 /* ??? Not clear if this is right. Experiment. */
8169 if (GET_MODE_SIZE (mode) < 4
8170 && !(reload_in_progress || reload_completed)
8171 && (reg_mentioned_p (frame_pointer_rtx, x)
8172 || reg_mentioned_p (arg_pointer_rtx, x)
8173 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8174 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8175 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8176 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8177 return 0;
8178
8179 /* Accept any base register. SP only in SImode or larger. */
8180 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8181 return 1;
8182
8183 /* This is PC relative data before arm_reorg runs. */
8184 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8185 && GET_CODE (x) == SYMBOL_REF
8186 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8187 return 1;
8188
8189 /* This is PC relative data after arm_reorg runs. */
8190 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8191 && reload_completed
8192 && (GET_CODE (x) == LABEL_REF
8193 || (GET_CODE (x) == CONST
8194 && GET_CODE (XEXP (x, 0)) == PLUS
8195 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8196 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8197 return 1;
8198
8199 /* Post-inc indexing only supported for SImode and larger. */
8200 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8201 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8202 return 1;
8203
8204 else if (GET_CODE (x) == PLUS)
8205 {
8206 /* REG+REG address can be any two index registers. */
8207 /* We disallow FRAME+REG addressing since we know that FRAME
8208 will be replaced with STACK, and SP relative addressing only
8209 permits SP+OFFSET. */
8210 if (GET_MODE_SIZE (mode) <= 4
8211 && XEXP (x, 0) != frame_pointer_rtx
8212 && XEXP (x, 1) != frame_pointer_rtx
8213 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8214 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8215 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8216 return 1;
8217
8218 /* REG+const has 5-7 bit offset for non-SP registers. */
8219 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8220 || XEXP (x, 0) == arg_pointer_rtx)
8221 && CONST_INT_P (XEXP (x, 1))
8222 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8223 return 1;
8224
8225 /* REG+const has 10-bit offset for SP, but only SImode and
8226 larger is supported. */
8227 /* ??? Should probably check for DI/DFmode overflow here
8228 just like GO_IF_LEGITIMATE_OFFSET does. */
8229 else if (REG_P (XEXP (x, 0))
8230 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8231 && GET_MODE_SIZE (mode) >= 4
8232 && CONST_INT_P (XEXP (x, 1))
8233 && INTVAL (XEXP (x, 1)) >= 0
8234 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8235 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8236 return 1;
8237
8238 else if (REG_P (XEXP (x, 0))
8239 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8240 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8241 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8242 && REGNO (XEXP (x, 0))
8243 <= LAST_VIRTUAL_POINTER_REGISTER))
8244 && GET_MODE_SIZE (mode) >= 4
8245 && CONST_INT_P (XEXP (x, 1))
8246 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8247 return 1;
8248 }
8249
8250 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8251 && GET_MODE_SIZE (mode) == 4
8252 && GET_CODE (x) == SYMBOL_REF
8253 && CONSTANT_POOL_ADDRESS_P (x)
8254 && ! (flag_pic
8255 && symbol_mentioned_p (get_pool_constant (x))
8256 && ! pcrel_constant_p (get_pool_constant (x))))
8257 return 1;
8258
8259 return 0;
8260 }
8261
8262 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8263 instruction of mode MODE. */
8264 int
8265 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8266 {
8267 switch (GET_MODE_SIZE (mode))
8268 {
8269 case 1:
8270 return val >= 0 && val < 32;
8271
8272 case 2:
8273 return val >= 0 && val < 64 && (val & 1) == 0;
8274
8275 default:
8276 return (val >= 0
8277 && (val + GET_MODE_SIZE (mode)) <= 128
8278 && (val & 3) == 0);
8279 }
8280 }
8281
8282 bool
8283 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8284 {
8285 if (TARGET_ARM)
8286 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8287 else if (TARGET_THUMB2)
8288 return thumb2_legitimate_address_p (mode, x, strict_p);
8289 else /* if (TARGET_THUMB1) */
8290 return thumb1_legitimate_address_p (mode, x, strict_p);
8291 }
8292
8293 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8294
8295 Given an rtx X being reloaded into a reg required to be
8296 in class CLASS, return the class of reg to actually use.
8297 In general this is just CLASS, but for the Thumb core registers and
8298 immediate constants we prefer a LO_REGS class or a subset. */
8299
8300 static reg_class_t
8301 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8302 {
8303 if (TARGET_32BIT)
8304 return rclass;
8305 else
8306 {
8307 if (rclass == GENERAL_REGS)
8308 return LO_REGS;
8309 else
8310 return rclass;
8311 }
8312 }
8313
8314 /* Build the SYMBOL_REF for __tls_get_addr. */
8315
8316 static GTY(()) rtx tls_get_addr_libfunc;
8317
8318 static rtx
8319 get_tls_get_addr (void)
8320 {
8321 if (!tls_get_addr_libfunc)
8322 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8323 return tls_get_addr_libfunc;
8324 }
8325
8326 rtx
8327 arm_load_tp (rtx target)
8328 {
8329 if (!target)
8330 target = gen_reg_rtx (SImode);
8331
8332 if (TARGET_HARD_TP)
8333 {
8334 /* Can return in any reg. */
8335 emit_insn (gen_load_tp_hard (target));
8336 }
8337 else
8338 {
8339 /* Always returned in r0. Immediately copy the result into a pseudo,
8340 otherwise other uses of r0 (e.g. setting up function arguments) may
8341 clobber the value. */
8342
8343 rtx tmp;
8344
8345 emit_insn (gen_load_tp_soft ());
8346
8347 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8348 emit_move_insn (target, tmp);
8349 }
8350 return target;
8351 }
8352
8353 static rtx
8354 load_tls_operand (rtx x, rtx reg)
8355 {
8356 rtx tmp;
8357
8358 if (reg == NULL_RTX)
8359 reg = gen_reg_rtx (SImode);
8360
8361 tmp = gen_rtx_CONST (SImode, x);
8362
8363 emit_move_insn (reg, tmp);
8364
8365 return reg;
8366 }
8367
8368 static rtx_insn *
8369 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8370 {
8371 rtx label, labelno, sum;
8372
8373 gcc_assert (reloc != TLS_DESCSEQ);
8374 start_sequence ();
8375
8376 labelno = GEN_INT (pic_labelno++);
8377 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8378 label = gen_rtx_CONST (VOIDmode, label);
8379
8380 sum = gen_rtx_UNSPEC (Pmode,
8381 gen_rtvec (4, x, GEN_INT (reloc), label,
8382 GEN_INT (TARGET_ARM ? 8 : 4)),
8383 UNSPEC_TLS);
8384 reg = load_tls_operand (sum, reg);
8385
8386 if (TARGET_ARM)
8387 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8388 else
8389 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8390
8391 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8392 LCT_PURE, /* LCT_CONST? */
8393 Pmode, reg, Pmode);
8394
8395 rtx_insn *insns = get_insns ();
8396 end_sequence ();
8397
8398 return insns;
8399 }
8400
8401 static rtx
8402 arm_tls_descseq_addr (rtx x, rtx reg)
8403 {
8404 rtx labelno = GEN_INT (pic_labelno++);
8405 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8406 rtx sum = gen_rtx_UNSPEC (Pmode,
8407 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8408 gen_rtx_CONST (VOIDmode, label),
8409 GEN_INT (!TARGET_ARM)),
8410 UNSPEC_TLS);
8411 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8412
8413 emit_insn (gen_tlscall (x, labelno));
8414 if (!reg)
8415 reg = gen_reg_rtx (SImode);
8416 else
8417 gcc_assert (REGNO (reg) != R0_REGNUM);
8418
8419 emit_move_insn (reg, reg0);
8420
8421 return reg;
8422 }
8423
8424 rtx
8425 legitimize_tls_address (rtx x, rtx reg)
8426 {
8427 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8428 rtx_insn *insns;
8429 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8430
8431 switch (model)
8432 {
8433 case TLS_MODEL_GLOBAL_DYNAMIC:
8434 if (TARGET_GNU2_TLS)
8435 {
8436 reg = arm_tls_descseq_addr (x, reg);
8437
8438 tp = arm_load_tp (NULL_RTX);
8439
8440 dest = gen_rtx_PLUS (Pmode, tp, reg);
8441 }
8442 else
8443 {
8444 /* Original scheme */
8445 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8446 dest = gen_reg_rtx (Pmode);
8447 emit_libcall_block (insns, dest, ret, x);
8448 }
8449 return dest;
8450
8451 case TLS_MODEL_LOCAL_DYNAMIC:
8452 if (TARGET_GNU2_TLS)
8453 {
8454 reg = arm_tls_descseq_addr (x, reg);
8455
8456 tp = arm_load_tp (NULL_RTX);
8457
8458 dest = gen_rtx_PLUS (Pmode, tp, reg);
8459 }
8460 else
8461 {
8462 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8463
8464 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8465 share the LDM result with other LD model accesses. */
8466 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8467 UNSPEC_TLS);
8468 dest = gen_reg_rtx (Pmode);
8469 emit_libcall_block (insns, dest, ret, eqv);
8470
8471 /* Load the addend. */
8472 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8473 GEN_INT (TLS_LDO32)),
8474 UNSPEC_TLS);
8475 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8476 dest = gen_rtx_PLUS (Pmode, dest, addend);
8477 }
8478 return dest;
8479
8480 case TLS_MODEL_INITIAL_EXEC:
8481 labelno = GEN_INT (pic_labelno++);
8482 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8483 label = gen_rtx_CONST (VOIDmode, label);
8484 sum = gen_rtx_UNSPEC (Pmode,
8485 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8486 GEN_INT (TARGET_ARM ? 8 : 4)),
8487 UNSPEC_TLS);
8488 reg = load_tls_operand (sum, reg);
8489
8490 if (TARGET_ARM)
8491 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8492 else if (TARGET_THUMB2)
8493 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8494 else
8495 {
8496 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8497 emit_move_insn (reg, gen_const_mem (SImode, reg));
8498 }
8499
8500 tp = arm_load_tp (NULL_RTX);
8501
8502 return gen_rtx_PLUS (Pmode, tp, reg);
8503
8504 case TLS_MODEL_LOCAL_EXEC:
8505 tp = arm_load_tp (NULL_RTX);
8506
8507 reg = gen_rtx_UNSPEC (Pmode,
8508 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8509 UNSPEC_TLS);
8510 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8511
8512 return gen_rtx_PLUS (Pmode, tp, reg);
8513
8514 default:
8515 abort ();
8516 }
8517 }
8518
8519 /* Try machine-dependent ways of modifying an illegitimate address
8520 to be legitimate. If we find one, return the new, valid address. */
8521 rtx
8522 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8523 {
8524 if (arm_tls_referenced_p (x))
8525 {
8526 rtx addend = NULL;
8527
8528 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8529 {
8530 addend = XEXP (XEXP (x, 0), 1);
8531 x = XEXP (XEXP (x, 0), 0);
8532 }
8533
8534 if (GET_CODE (x) != SYMBOL_REF)
8535 return x;
8536
8537 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8538
8539 x = legitimize_tls_address (x, NULL_RTX);
8540
8541 if (addend)
8542 {
8543 x = gen_rtx_PLUS (SImode, x, addend);
8544 orig_x = x;
8545 }
8546 else
8547 return x;
8548 }
8549
8550 if (!TARGET_ARM)
8551 {
8552 /* TODO: legitimize_address for Thumb2. */
8553 if (TARGET_THUMB2)
8554 return x;
8555 return thumb_legitimize_address (x, orig_x, mode);
8556 }
8557
8558 if (GET_CODE (x) == PLUS)
8559 {
8560 rtx xop0 = XEXP (x, 0);
8561 rtx xop1 = XEXP (x, 1);
8562
8563 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8564 xop0 = force_reg (SImode, xop0);
8565
8566 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8567 && !symbol_mentioned_p (xop1))
8568 xop1 = force_reg (SImode, xop1);
8569
8570 if (ARM_BASE_REGISTER_RTX_P (xop0)
8571 && CONST_INT_P (xop1))
8572 {
8573 HOST_WIDE_INT n, low_n;
8574 rtx base_reg, val;
8575 n = INTVAL (xop1);
8576
8577 /* VFP addressing modes actually allow greater offsets, but for
8578 now we just stick with the lowest common denominator. */
8579 if (mode == DImode || mode == DFmode)
8580 {
8581 low_n = n & 0x0f;
8582 n &= ~0x0f;
8583 if (low_n > 4)
8584 {
8585 n += 16;
8586 low_n -= 16;
8587 }
8588 }
8589 else
8590 {
8591 low_n = ((mode) == TImode ? 0
8592 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8593 n -= low_n;
8594 }
8595
8596 base_reg = gen_reg_rtx (SImode);
8597 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8598 emit_move_insn (base_reg, val);
8599 x = plus_constant (Pmode, base_reg, low_n);
8600 }
8601 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8602 x = gen_rtx_PLUS (SImode, xop0, xop1);
8603 }
8604
8605 /* XXX We don't allow MINUS any more -- see comment in
8606 arm_legitimate_address_outer_p (). */
8607 else if (GET_CODE (x) == MINUS)
8608 {
8609 rtx xop0 = XEXP (x, 0);
8610 rtx xop1 = XEXP (x, 1);
8611
8612 if (CONSTANT_P (xop0))
8613 xop0 = force_reg (SImode, xop0);
8614
8615 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8616 xop1 = force_reg (SImode, xop1);
8617
8618 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8619 x = gen_rtx_MINUS (SImode, xop0, xop1);
8620 }
8621
8622 /* Make sure to take full advantage of the pre-indexed addressing mode
8623 with absolute addresses which often allows for the base register to
8624 be factorized for multiple adjacent memory references, and it might
8625 even allows for the mini pool to be avoided entirely. */
8626 else if (CONST_INT_P (x) && optimize > 0)
8627 {
8628 unsigned int bits;
8629 HOST_WIDE_INT mask, base, index;
8630 rtx base_reg;
8631
8632 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8633 use a 8-bit index. So let's use a 12-bit index for SImode only and
8634 hope that arm_gen_constant will enable ldrb to use more bits. */
8635 bits = (mode == SImode) ? 12 : 8;
8636 mask = (1 << bits) - 1;
8637 base = INTVAL (x) & ~mask;
8638 index = INTVAL (x) & mask;
8639 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8640 {
8641 /* It'll most probably be more efficient to generate the base
8642 with more bits set and use a negative index instead. */
8643 base |= mask;
8644 index -= mask;
8645 }
8646 base_reg = force_reg (SImode, GEN_INT (base));
8647 x = plus_constant (Pmode, base_reg, index);
8648 }
8649
8650 if (flag_pic)
8651 {
8652 /* We need to find and carefully transform any SYMBOL and LABEL
8653 references; so go back to the original address expression. */
8654 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8655
8656 if (new_x != orig_x)
8657 x = new_x;
8658 }
8659
8660 return x;
8661 }
8662
8663
8664 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8665 to be legitimate. If we find one, return the new, valid address. */
8666 rtx
8667 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8668 {
8669 if (GET_CODE (x) == PLUS
8670 && CONST_INT_P (XEXP (x, 1))
8671 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8672 || INTVAL (XEXP (x, 1)) < 0))
8673 {
8674 rtx xop0 = XEXP (x, 0);
8675 rtx xop1 = XEXP (x, 1);
8676 HOST_WIDE_INT offset = INTVAL (xop1);
8677
8678 /* Try and fold the offset into a biasing of the base register and
8679 then offsetting that. Don't do this when optimizing for space
8680 since it can cause too many CSEs. */
8681 if (optimize_size && offset >= 0
8682 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8683 {
8684 HOST_WIDE_INT delta;
8685
8686 if (offset >= 256)
8687 delta = offset - (256 - GET_MODE_SIZE (mode));
8688 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8689 delta = 31 * GET_MODE_SIZE (mode);
8690 else
8691 delta = offset & (~31 * GET_MODE_SIZE (mode));
8692
8693 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8694 NULL_RTX);
8695 x = plus_constant (Pmode, xop0, delta);
8696 }
8697 else if (offset < 0 && offset > -256)
8698 /* Small negative offsets are best done with a subtract before the
8699 dereference, forcing these into a register normally takes two
8700 instructions. */
8701 x = force_operand (x, NULL_RTX);
8702 else
8703 {
8704 /* For the remaining cases, force the constant into a register. */
8705 xop1 = force_reg (SImode, xop1);
8706 x = gen_rtx_PLUS (SImode, xop0, xop1);
8707 }
8708 }
8709 else if (GET_CODE (x) == PLUS
8710 && s_register_operand (XEXP (x, 1), SImode)
8711 && !s_register_operand (XEXP (x, 0), SImode))
8712 {
8713 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8714
8715 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8716 }
8717
8718 if (flag_pic)
8719 {
8720 /* We need to find and carefully transform any SYMBOL and LABEL
8721 references; so go back to the original address expression. */
8722 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8723
8724 if (new_x != orig_x)
8725 x = new_x;
8726 }
8727
8728 return x;
8729 }
8730
8731 /* Return TRUE if X contains any TLS symbol references. */
8732
8733 bool
8734 arm_tls_referenced_p (rtx x)
8735 {
8736 if (! TARGET_HAVE_TLS)
8737 return false;
8738
8739 subrtx_iterator::array_type array;
8740 FOR_EACH_SUBRTX (iter, array, x, ALL)
8741 {
8742 const_rtx x = *iter;
8743 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8744 {
8745 /* ARM currently does not provide relocations to encode TLS variables
8746 into AArch32 instructions, only data, so there is no way to
8747 currently implement these if a literal pool is disabled. */
8748 if (arm_disable_literal_pool)
8749 sorry ("accessing thread-local storage is not currently supported "
8750 "with -mpure-code or -mslow-flash-data");
8751
8752 return true;
8753 }
8754
8755 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8756 TLS offsets, not real symbol references. */
8757 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8758 iter.skip_subrtxes ();
8759 }
8760 return false;
8761 }
8762
8763 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8764
8765 On the ARM, allow any integer (invalid ones are removed later by insn
8766 patterns), nice doubles and symbol_refs which refer to the function's
8767 constant pool XXX.
8768
8769 When generating pic allow anything. */
8770
8771 static bool
8772 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8773 {
8774 return flag_pic || !label_mentioned_p (x);
8775 }
8776
8777 static bool
8778 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8779 {
8780 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8781 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8782 for ARMv8-M Baseline or later the result is valid. */
8783 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8784 x = XEXP (x, 0);
8785
8786 return (CONST_INT_P (x)
8787 || CONST_DOUBLE_P (x)
8788 || CONSTANT_ADDRESS_P (x)
8789 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8790 || flag_pic);
8791 }
8792
8793 static bool
8794 arm_legitimate_constant_p (machine_mode mode, rtx x)
8795 {
8796 return (!arm_cannot_force_const_mem (mode, x)
8797 && (TARGET_32BIT
8798 ? arm_legitimate_constant_p_1 (mode, x)
8799 : thumb_legitimate_constant_p (mode, x)));
8800 }
8801
8802 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8803
8804 static bool
8805 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8806 {
8807 rtx base, offset;
8808
8809 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8810 {
8811 split_const (x, &base, &offset);
8812 if (GET_CODE (base) == SYMBOL_REF
8813 && !offset_within_block_p (base, INTVAL (offset)))
8814 return true;
8815 }
8816 return arm_tls_referenced_p (x);
8817 }
8818 \f
8819 #define REG_OR_SUBREG_REG(X) \
8820 (REG_P (X) \
8821 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8822
8823 #define REG_OR_SUBREG_RTX(X) \
8824 (REG_P (X) ? (X) : SUBREG_REG (X))
8825
8826 static inline int
8827 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8828 {
8829 machine_mode mode = GET_MODE (x);
8830 int total, words;
8831
8832 switch (code)
8833 {
8834 case ASHIFT:
8835 case ASHIFTRT:
8836 case LSHIFTRT:
8837 case ROTATERT:
8838 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8839
8840 case PLUS:
8841 case MINUS:
8842 case COMPARE:
8843 case NEG:
8844 case NOT:
8845 return COSTS_N_INSNS (1);
8846
8847 case MULT:
8848 if (arm_arch6m && arm_m_profile_small_mul)
8849 return COSTS_N_INSNS (32);
8850
8851 if (CONST_INT_P (XEXP (x, 1)))
8852 {
8853 int cycles = 0;
8854 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8855
8856 while (i)
8857 {
8858 i >>= 2;
8859 cycles++;
8860 }
8861 return COSTS_N_INSNS (2) + cycles;
8862 }
8863 return COSTS_N_INSNS (1) + 16;
8864
8865 case SET:
8866 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8867 the mode. */
8868 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8869 return (COSTS_N_INSNS (words)
8870 + 4 * ((MEM_P (SET_SRC (x)))
8871 + MEM_P (SET_DEST (x))));
8872
8873 case CONST_INT:
8874 if (outer == SET)
8875 {
8876 if (UINTVAL (x) < 256
8877 /* 16-bit constant. */
8878 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8879 return 0;
8880 if (thumb_shiftable_const (INTVAL (x)))
8881 return COSTS_N_INSNS (2);
8882 return COSTS_N_INSNS (3);
8883 }
8884 else if ((outer == PLUS || outer == COMPARE)
8885 && INTVAL (x) < 256 && INTVAL (x) > -256)
8886 return 0;
8887 else if ((outer == IOR || outer == XOR || outer == AND)
8888 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8889 return COSTS_N_INSNS (1);
8890 else if (outer == AND)
8891 {
8892 int i;
8893 /* This duplicates the tests in the andsi3 expander. */
8894 for (i = 9; i <= 31; i++)
8895 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8896 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8897 return COSTS_N_INSNS (2);
8898 }
8899 else if (outer == ASHIFT || outer == ASHIFTRT
8900 || outer == LSHIFTRT)
8901 return 0;
8902 return COSTS_N_INSNS (2);
8903
8904 case CONST:
8905 case CONST_DOUBLE:
8906 case LABEL_REF:
8907 case SYMBOL_REF:
8908 return COSTS_N_INSNS (3);
8909
8910 case UDIV:
8911 case UMOD:
8912 case DIV:
8913 case MOD:
8914 return 100;
8915
8916 case TRUNCATE:
8917 return 99;
8918
8919 case AND:
8920 case XOR:
8921 case IOR:
8922 /* XXX guess. */
8923 return 8;
8924
8925 case MEM:
8926 /* XXX another guess. */
8927 /* Memory costs quite a lot for the first word, but subsequent words
8928 load at the equivalent of a single insn each. */
8929 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8930 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8931 ? 4 : 0));
8932
8933 case IF_THEN_ELSE:
8934 /* XXX a guess. */
8935 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8936 return 14;
8937 return 2;
8938
8939 case SIGN_EXTEND:
8940 case ZERO_EXTEND:
8941 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8942 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8943
8944 if (mode == SImode)
8945 return total;
8946
8947 if (arm_arch6)
8948 return total + COSTS_N_INSNS (1);
8949
8950 /* Assume a two-shift sequence. Increase the cost slightly so
8951 we prefer actual shifts over an extend operation. */
8952 return total + 1 + COSTS_N_INSNS (2);
8953
8954 default:
8955 return 99;
8956 }
8957 }
8958
8959 /* Estimates the size cost of thumb1 instructions.
8960 For now most of the code is copied from thumb1_rtx_costs. We need more
8961 fine grain tuning when we have more related test cases. */
8962 static inline int
8963 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8964 {
8965 machine_mode mode = GET_MODE (x);
8966 int words, cost;
8967
8968 switch (code)
8969 {
8970 case ASHIFT:
8971 case ASHIFTRT:
8972 case LSHIFTRT:
8973 case ROTATERT:
8974 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8975
8976 case PLUS:
8977 case MINUS:
8978 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8979 defined by RTL expansion, especially for the expansion of
8980 multiplication. */
8981 if ((GET_CODE (XEXP (x, 0)) == MULT
8982 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8983 || (GET_CODE (XEXP (x, 1)) == MULT
8984 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8985 return COSTS_N_INSNS (2);
8986 /* Fall through. */
8987 case COMPARE:
8988 case NEG:
8989 case NOT:
8990 return COSTS_N_INSNS (1);
8991
8992 case MULT:
8993 if (CONST_INT_P (XEXP (x, 1)))
8994 {
8995 /* Thumb1 mul instruction can't operate on const. We must Load it
8996 into a register first. */
8997 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8998 /* For the targets which have a very small and high-latency multiply
8999 unit, we prefer to synthesize the mult with up to 5 instructions,
9000 giving a good balance between size and performance. */
9001 if (arm_arch6m && arm_m_profile_small_mul)
9002 return COSTS_N_INSNS (5);
9003 else
9004 return COSTS_N_INSNS (1) + const_size;
9005 }
9006 return COSTS_N_INSNS (1);
9007
9008 case SET:
9009 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9010 the mode. */
9011 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9012 cost = COSTS_N_INSNS (words);
9013 if (satisfies_constraint_J (SET_SRC (x))
9014 || satisfies_constraint_K (SET_SRC (x))
9015 /* Too big an immediate for a 2-byte mov, using MOVT. */
9016 || (CONST_INT_P (SET_SRC (x))
9017 && UINTVAL (SET_SRC (x)) >= 256
9018 && TARGET_HAVE_MOVT
9019 && satisfies_constraint_j (SET_SRC (x)))
9020 /* thumb1_movdi_insn. */
9021 || ((words > 1) && MEM_P (SET_SRC (x))))
9022 cost += COSTS_N_INSNS (1);
9023 return cost;
9024
9025 case CONST_INT:
9026 if (outer == SET)
9027 {
9028 if (UINTVAL (x) < 256)
9029 return COSTS_N_INSNS (1);
9030 /* movw is 4byte long. */
9031 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9032 return COSTS_N_INSNS (2);
9033 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9034 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9035 return COSTS_N_INSNS (2);
9036 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9037 if (thumb_shiftable_const (INTVAL (x)))
9038 return COSTS_N_INSNS (2);
9039 return COSTS_N_INSNS (3);
9040 }
9041 else if ((outer == PLUS || outer == COMPARE)
9042 && INTVAL (x) < 256 && INTVAL (x) > -256)
9043 return 0;
9044 else if ((outer == IOR || outer == XOR || outer == AND)
9045 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9046 return COSTS_N_INSNS (1);
9047 else if (outer == AND)
9048 {
9049 int i;
9050 /* This duplicates the tests in the andsi3 expander. */
9051 for (i = 9; i <= 31; i++)
9052 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9053 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9054 return COSTS_N_INSNS (2);
9055 }
9056 else if (outer == ASHIFT || outer == ASHIFTRT
9057 || outer == LSHIFTRT)
9058 return 0;
9059 return COSTS_N_INSNS (2);
9060
9061 case CONST:
9062 case CONST_DOUBLE:
9063 case LABEL_REF:
9064 case SYMBOL_REF:
9065 return COSTS_N_INSNS (3);
9066
9067 case UDIV:
9068 case UMOD:
9069 case DIV:
9070 case MOD:
9071 return 100;
9072
9073 case TRUNCATE:
9074 return 99;
9075
9076 case AND:
9077 case XOR:
9078 case IOR:
9079 return COSTS_N_INSNS (1);
9080
9081 case MEM:
9082 return (COSTS_N_INSNS (1)
9083 + COSTS_N_INSNS (1)
9084 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9085 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9086 ? COSTS_N_INSNS (1) : 0));
9087
9088 case IF_THEN_ELSE:
9089 /* XXX a guess. */
9090 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9091 return 14;
9092 return 2;
9093
9094 case ZERO_EXTEND:
9095 /* XXX still guessing. */
9096 switch (GET_MODE (XEXP (x, 0)))
9097 {
9098 case E_QImode:
9099 return (1 + (mode == DImode ? 4 : 0)
9100 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9101
9102 case E_HImode:
9103 return (4 + (mode == DImode ? 4 : 0)
9104 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9105
9106 case E_SImode:
9107 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9108
9109 default:
9110 return 99;
9111 }
9112
9113 default:
9114 return 99;
9115 }
9116 }
9117
9118 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9119 operand, then return the operand that is being shifted. If the shift
9120 is not by a constant, then set SHIFT_REG to point to the operand.
9121 Return NULL if OP is not a shifter operand. */
9122 static rtx
9123 shifter_op_p (rtx op, rtx *shift_reg)
9124 {
9125 enum rtx_code code = GET_CODE (op);
9126
9127 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9128 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9129 return XEXP (op, 0);
9130 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9131 return XEXP (op, 0);
9132 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9133 || code == ASHIFTRT)
9134 {
9135 if (!CONST_INT_P (XEXP (op, 1)))
9136 *shift_reg = XEXP (op, 1);
9137 return XEXP (op, 0);
9138 }
9139
9140 return NULL;
9141 }
9142
9143 static bool
9144 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9145 {
9146 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9147 rtx_code code = GET_CODE (x);
9148 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9149
9150 switch (XINT (x, 1))
9151 {
9152 case UNSPEC_UNALIGNED_LOAD:
9153 /* We can only do unaligned loads into the integer unit, and we can't
9154 use LDM or LDRD. */
9155 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9156 if (speed_p)
9157 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9158 + extra_cost->ldst.load_unaligned);
9159
9160 #ifdef NOT_YET
9161 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9162 ADDR_SPACE_GENERIC, speed_p);
9163 #endif
9164 return true;
9165
9166 case UNSPEC_UNALIGNED_STORE:
9167 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9168 if (speed_p)
9169 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9170 + extra_cost->ldst.store_unaligned);
9171
9172 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9173 #ifdef NOT_YET
9174 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9175 ADDR_SPACE_GENERIC, speed_p);
9176 #endif
9177 return true;
9178
9179 case UNSPEC_VRINTZ:
9180 case UNSPEC_VRINTP:
9181 case UNSPEC_VRINTM:
9182 case UNSPEC_VRINTR:
9183 case UNSPEC_VRINTX:
9184 case UNSPEC_VRINTA:
9185 if (speed_p)
9186 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9187
9188 return true;
9189 default:
9190 *cost = COSTS_N_INSNS (2);
9191 break;
9192 }
9193 return true;
9194 }
9195
9196 /* Cost of a libcall. We assume one insn per argument, an amount for the
9197 call (one insn for -Os) and then one for processing the result. */
9198 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9199
9200 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9201 do \
9202 { \
9203 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9204 if (shift_op != NULL \
9205 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9206 { \
9207 if (shift_reg) \
9208 { \
9209 if (speed_p) \
9210 *cost += extra_cost->alu.arith_shift_reg; \
9211 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9212 ASHIFT, 1, speed_p); \
9213 } \
9214 else if (speed_p) \
9215 *cost += extra_cost->alu.arith_shift; \
9216 \
9217 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9218 ASHIFT, 0, speed_p) \
9219 + rtx_cost (XEXP (x, 1 - IDX), \
9220 GET_MODE (shift_op), \
9221 OP, 1, speed_p)); \
9222 return true; \
9223 } \
9224 } \
9225 while (0);
9226
9227 /* RTX costs. Make an estimate of the cost of executing the operation
9228 X, which is contained with an operation with code OUTER_CODE.
9229 SPEED_P indicates whether the cost desired is the performance cost,
9230 or the size cost. The estimate is stored in COST and the return
9231 value is TRUE if the cost calculation is final, or FALSE if the
9232 caller should recurse through the operands of X to add additional
9233 costs.
9234
9235 We currently make no attempt to model the size savings of Thumb-2
9236 16-bit instructions. At the normal points in compilation where
9237 this code is called we have no measure of whether the condition
9238 flags are live or not, and thus no realistic way to determine what
9239 the size will eventually be. */
9240 static bool
9241 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9242 const struct cpu_cost_table *extra_cost,
9243 int *cost, bool speed_p)
9244 {
9245 machine_mode mode = GET_MODE (x);
9246
9247 *cost = COSTS_N_INSNS (1);
9248
9249 if (TARGET_THUMB1)
9250 {
9251 if (speed_p)
9252 *cost = thumb1_rtx_costs (x, code, outer_code);
9253 else
9254 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9255 return true;
9256 }
9257
9258 switch (code)
9259 {
9260 case SET:
9261 *cost = 0;
9262 /* SET RTXs don't have a mode so we get it from the destination. */
9263 mode = GET_MODE (SET_DEST (x));
9264
9265 if (REG_P (SET_SRC (x))
9266 && REG_P (SET_DEST (x)))
9267 {
9268 /* Assume that most copies can be done with a single insn,
9269 unless we don't have HW FP, in which case everything
9270 larger than word mode will require two insns. */
9271 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9272 && GET_MODE_SIZE (mode) > 4)
9273 || mode == DImode)
9274 ? 2 : 1);
9275 /* Conditional register moves can be encoded
9276 in 16 bits in Thumb mode. */
9277 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9278 *cost >>= 1;
9279
9280 return true;
9281 }
9282
9283 if (CONST_INT_P (SET_SRC (x)))
9284 {
9285 /* Handle CONST_INT here, since the value doesn't have a mode
9286 and we would otherwise be unable to work out the true cost. */
9287 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9288 0, speed_p);
9289 outer_code = SET;
9290 /* Slightly lower the cost of setting a core reg to a constant.
9291 This helps break up chains and allows for better scheduling. */
9292 if (REG_P (SET_DEST (x))
9293 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9294 *cost -= 1;
9295 x = SET_SRC (x);
9296 /* Immediate moves with an immediate in the range [0, 255] can be
9297 encoded in 16 bits in Thumb mode. */
9298 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9299 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9300 *cost >>= 1;
9301 goto const_int_cost;
9302 }
9303
9304 return false;
9305
9306 case MEM:
9307 /* A memory access costs 1 insn if the mode is small, or the address is
9308 a single register, otherwise it costs one insn per word. */
9309 if (REG_P (XEXP (x, 0)))
9310 *cost = COSTS_N_INSNS (1);
9311 else if (flag_pic
9312 && GET_CODE (XEXP (x, 0)) == PLUS
9313 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9314 /* This will be split into two instructions.
9315 See arm.md:calculate_pic_address. */
9316 *cost = COSTS_N_INSNS (2);
9317 else
9318 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9319
9320 /* For speed optimizations, add the costs of the address and
9321 accessing memory. */
9322 if (speed_p)
9323 #ifdef NOT_YET
9324 *cost += (extra_cost->ldst.load
9325 + arm_address_cost (XEXP (x, 0), mode,
9326 ADDR_SPACE_GENERIC, speed_p));
9327 #else
9328 *cost += extra_cost->ldst.load;
9329 #endif
9330 return true;
9331
9332 case PARALLEL:
9333 {
9334 /* Calculations of LDM costs are complex. We assume an initial cost
9335 (ldm_1st) which will load the number of registers mentioned in
9336 ldm_regs_per_insn_1st registers; then each additional
9337 ldm_regs_per_insn_subsequent registers cost one more insn. The
9338 formula for N regs is thus:
9339
9340 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9341 + ldm_regs_per_insn_subsequent - 1)
9342 / ldm_regs_per_insn_subsequent).
9343
9344 Additional costs may also be added for addressing. A similar
9345 formula is used for STM. */
9346
9347 bool is_ldm = load_multiple_operation (x, SImode);
9348 bool is_stm = store_multiple_operation (x, SImode);
9349
9350 if (is_ldm || is_stm)
9351 {
9352 if (speed_p)
9353 {
9354 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9355 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9356 ? extra_cost->ldst.ldm_regs_per_insn_1st
9357 : extra_cost->ldst.stm_regs_per_insn_1st;
9358 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9359 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9360 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9361
9362 *cost += regs_per_insn_1st
9363 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9364 + regs_per_insn_sub - 1)
9365 / regs_per_insn_sub);
9366 return true;
9367 }
9368
9369 }
9370 return false;
9371 }
9372 case DIV:
9373 case UDIV:
9374 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9375 && (mode == SFmode || !TARGET_VFP_SINGLE))
9376 *cost += COSTS_N_INSNS (speed_p
9377 ? extra_cost->fp[mode != SFmode].div : 0);
9378 else if (mode == SImode && TARGET_IDIV)
9379 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9380 else
9381 *cost = LIBCALL_COST (2);
9382
9383 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9384 possible udiv is prefered. */
9385 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9386 return false; /* All arguments must be in registers. */
9387
9388 case MOD:
9389 /* MOD by a power of 2 can be expanded as:
9390 rsbs r1, r0, #0
9391 and r0, r0, #(n - 1)
9392 and r1, r1, #(n - 1)
9393 rsbpl r0, r1, #0. */
9394 if (CONST_INT_P (XEXP (x, 1))
9395 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9396 && mode == SImode)
9397 {
9398 *cost += COSTS_N_INSNS (3);
9399
9400 if (speed_p)
9401 *cost += 2 * extra_cost->alu.logical
9402 + extra_cost->alu.arith;
9403 return true;
9404 }
9405
9406 /* Fall-through. */
9407 case UMOD:
9408 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9409 possible udiv is prefered. */
9410 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9411 return false; /* All arguments must be in registers. */
9412
9413 case ROTATE:
9414 if (mode == SImode && REG_P (XEXP (x, 1)))
9415 {
9416 *cost += (COSTS_N_INSNS (1)
9417 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9418 if (speed_p)
9419 *cost += extra_cost->alu.shift_reg;
9420 return true;
9421 }
9422 /* Fall through */
9423 case ROTATERT:
9424 case ASHIFT:
9425 case LSHIFTRT:
9426 case ASHIFTRT:
9427 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9428 {
9429 *cost += (COSTS_N_INSNS (2)
9430 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9431 if (speed_p)
9432 *cost += 2 * extra_cost->alu.shift;
9433 return true;
9434 }
9435 else if (mode == SImode)
9436 {
9437 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9438 /* Slightly disparage register shifts at -Os, but not by much. */
9439 if (!CONST_INT_P (XEXP (x, 1)))
9440 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9441 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9442 return true;
9443 }
9444 else if (GET_MODE_CLASS (mode) == MODE_INT
9445 && GET_MODE_SIZE (mode) < 4)
9446 {
9447 if (code == ASHIFT)
9448 {
9449 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9450 /* Slightly disparage register shifts at -Os, but not by
9451 much. */
9452 if (!CONST_INT_P (XEXP (x, 1)))
9453 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9454 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9455 }
9456 else if (code == LSHIFTRT || code == ASHIFTRT)
9457 {
9458 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9459 {
9460 /* Can use SBFX/UBFX. */
9461 if (speed_p)
9462 *cost += extra_cost->alu.bfx;
9463 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9464 }
9465 else
9466 {
9467 *cost += COSTS_N_INSNS (1);
9468 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9469 if (speed_p)
9470 {
9471 if (CONST_INT_P (XEXP (x, 1)))
9472 *cost += 2 * extra_cost->alu.shift;
9473 else
9474 *cost += (extra_cost->alu.shift
9475 + extra_cost->alu.shift_reg);
9476 }
9477 else
9478 /* Slightly disparage register shifts. */
9479 *cost += !CONST_INT_P (XEXP (x, 1));
9480 }
9481 }
9482 else /* Rotates. */
9483 {
9484 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9485 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9486 if (speed_p)
9487 {
9488 if (CONST_INT_P (XEXP (x, 1)))
9489 *cost += (2 * extra_cost->alu.shift
9490 + extra_cost->alu.log_shift);
9491 else
9492 *cost += (extra_cost->alu.shift
9493 + extra_cost->alu.shift_reg
9494 + extra_cost->alu.log_shift_reg);
9495 }
9496 }
9497 return true;
9498 }
9499
9500 *cost = LIBCALL_COST (2);
9501 return false;
9502
9503 case BSWAP:
9504 if (arm_arch6)
9505 {
9506 if (mode == SImode)
9507 {
9508 if (speed_p)
9509 *cost += extra_cost->alu.rev;
9510
9511 return false;
9512 }
9513 }
9514 else
9515 {
9516 /* No rev instruction available. Look at arm_legacy_rev
9517 and thumb_legacy_rev for the form of RTL used then. */
9518 if (TARGET_THUMB)
9519 {
9520 *cost += COSTS_N_INSNS (9);
9521
9522 if (speed_p)
9523 {
9524 *cost += 6 * extra_cost->alu.shift;
9525 *cost += 3 * extra_cost->alu.logical;
9526 }
9527 }
9528 else
9529 {
9530 *cost += COSTS_N_INSNS (4);
9531
9532 if (speed_p)
9533 {
9534 *cost += 2 * extra_cost->alu.shift;
9535 *cost += extra_cost->alu.arith_shift;
9536 *cost += 2 * extra_cost->alu.logical;
9537 }
9538 }
9539 return true;
9540 }
9541 return false;
9542
9543 case MINUS:
9544 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9545 && (mode == SFmode || !TARGET_VFP_SINGLE))
9546 {
9547 if (GET_CODE (XEXP (x, 0)) == MULT
9548 || GET_CODE (XEXP (x, 1)) == MULT)
9549 {
9550 rtx mul_op0, mul_op1, sub_op;
9551
9552 if (speed_p)
9553 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9554
9555 if (GET_CODE (XEXP (x, 0)) == MULT)
9556 {
9557 mul_op0 = XEXP (XEXP (x, 0), 0);
9558 mul_op1 = XEXP (XEXP (x, 0), 1);
9559 sub_op = XEXP (x, 1);
9560 }
9561 else
9562 {
9563 mul_op0 = XEXP (XEXP (x, 1), 0);
9564 mul_op1 = XEXP (XEXP (x, 1), 1);
9565 sub_op = XEXP (x, 0);
9566 }
9567
9568 /* The first operand of the multiply may be optionally
9569 negated. */
9570 if (GET_CODE (mul_op0) == NEG)
9571 mul_op0 = XEXP (mul_op0, 0);
9572
9573 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9574 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9575 + rtx_cost (sub_op, mode, code, 0, speed_p));
9576
9577 return true;
9578 }
9579
9580 if (speed_p)
9581 *cost += extra_cost->fp[mode != SFmode].addsub;
9582 return false;
9583 }
9584
9585 if (mode == SImode)
9586 {
9587 rtx shift_by_reg = NULL;
9588 rtx shift_op;
9589 rtx non_shift_op;
9590
9591 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9592 if (shift_op == NULL)
9593 {
9594 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9595 non_shift_op = XEXP (x, 0);
9596 }
9597 else
9598 non_shift_op = XEXP (x, 1);
9599
9600 if (shift_op != NULL)
9601 {
9602 if (shift_by_reg != NULL)
9603 {
9604 if (speed_p)
9605 *cost += extra_cost->alu.arith_shift_reg;
9606 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9607 }
9608 else if (speed_p)
9609 *cost += extra_cost->alu.arith_shift;
9610
9611 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9612 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9613 return true;
9614 }
9615
9616 if (arm_arch_thumb2
9617 && GET_CODE (XEXP (x, 1)) == MULT)
9618 {
9619 /* MLS. */
9620 if (speed_p)
9621 *cost += extra_cost->mult[0].add;
9622 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9623 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9624 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9625 return true;
9626 }
9627
9628 if (CONST_INT_P (XEXP (x, 0)))
9629 {
9630 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9631 INTVAL (XEXP (x, 0)), NULL_RTX,
9632 NULL_RTX, 1, 0);
9633 *cost = COSTS_N_INSNS (insns);
9634 if (speed_p)
9635 *cost += insns * extra_cost->alu.arith;
9636 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9637 return true;
9638 }
9639 else if (speed_p)
9640 *cost += extra_cost->alu.arith;
9641
9642 return false;
9643 }
9644
9645 if (GET_MODE_CLASS (mode) == MODE_INT
9646 && GET_MODE_SIZE (mode) < 4)
9647 {
9648 rtx shift_op, shift_reg;
9649 shift_reg = NULL;
9650
9651 /* We check both sides of the MINUS for shifter operands since,
9652 unlike PLUS, it's not commutative. */
9653
9654 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9655 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9656
9657 /* Slightly disparage, as we might need to widen the result. */
9658 *cost += 1;
9659 if (speed_p)
9660 *cost += extra_cost->alu.arith;
9661
9662 if (CONST_INT_P (XEXP (x, 0)))
9663 {
9664 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9665 return true;
9666 }
9667
9668 return false;
9669 }
9670
9671 if (mode == DImode)
9672 {
9673 *cost += COSTS_N_INSNS (1);
9674
9675 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9676 {
9677 rtx op1 = XEXP (x, 1);
9678
9679 if (speed_p)
9680 *cost += 2 * extra_cost->alu.arith;
9681
9682 if (GET_CODE (op1) == ZERO_EXTEND)
9683 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9684 0, speed_p);
9685 else
9686 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9687 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9688 0, speed_p);
9689 return true;
9690 }
9691 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9692 {
9693 if (speed_p)
9694 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9695 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9696 0, speed_p)
9697 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9698 return true;
9699 }
9700 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9701 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9702 {
9703 if (speed_p)
9704 *cost += (extra_cost->alu.arith
9705 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9706 ? extra_cost->alu.arith
9707 : extra_cost->alu.arith_shift));
9708 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9709 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9710 GET_CODE (XEXP (x, 1)), 0, speed_p));
9711 return true;
9712 }
9713
9714 if (speed_p)
9715 *cost += 2 * extra_cost->alu.arith;
9716 return false;
9717 }
9718
9719 /* Vector mode? */
9720
9721 *cost = LIBCALL_COST (2);
9722 return false;
9723
9724 case PLUS:
9725 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9726 && (mode == SFmode || !TARGET_VFP_SINGLE))
9727 {
9728 if (GET_CODE (XEXP (x, 0)) == MULT)
9729 {
9730 rtx mul_op0, mul_op1, add_op;
9731
9732 if (speed_p)
9733 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9734
9735 mul_op0 = XEXP (XEXP (x, 0), 0);
9736 mul_op1 = XEXP (XEXP (x, 0), 1);
9737 add_op = XEXP (x, 1);
9738
9739 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9740 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9741 + rtx_cost (add_op, mode, code, 0, speed_p));
9742
9743 return true;
9744 }
9745
9746 if (speed_p)
9747 *cost += extra_cost->fp[mode != SFmode].addsub;
9748 return false;
9749 }
9750 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9751 {
9752 *cost = LIBCALL_COST (2);
9753 return false;
9754 }
9755
9756 /* Narrow modes can be synthesized in SImode, but the range
9757 of useful sub-operations is limited. Check for shift operations
9758 on one of the operands. Only left shifts can be used in the
9759 narrow modes. */
9760 if (GET_MODE_CLASS (mode) == MODE_INT
9761 && GET_MODE_SIZE (mode) < 4)
9762 {
9763 rtx shift_op, shift_reg;
9764 shift_reg = NULL;
9765
9766 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9767
9768 if (CONST_INT_P (XEXP (x, 1)))
9769 {
9770 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9771 INTVAL (XEXP (x, 1)), NULL_RTX,
9772 NULL_RTX, 1, 0);
9773 *cost = COSTS_N_INSNS (insns);
9774 if (speed_p)
9775 *cost += insns * extra_cost->alu.arith;
9776 /* Slightly penalize a narrow operation as the result may
9777 need widening. */
9778 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9779 return true;
9780 }
9781
9782 /* Slightly penalize a narrow operation as the result may
9783 need widening. */
9784 *cost += 1;
9785 if (speed_p)
9786 *cost += extra_cost->alu.arith;
9787
9788 return false;
9789 }
9790
9791 if (mode == SImode)
9792 {
9793 rtx shift_op, shift_reg;
9794
9795 if (TARGET_INT_SIMD
9796 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9797 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9798 {
9799 /* UXTA[BH] or SXTA[BH]. */
9800 if (speed_p)
9801 *cost += extra_cost->alu.extend_arith;
9802 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9803 0, speed_p)
9804 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9805 return true;
9806 }
9807
9808 shift_reg = NULL;
9809 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9810 if (shift_op != NULL)
9811 {
9812 if (shift_reg)
9813 {
9814 if (speed_p)
9815 *cost += extra_cost->alu.arith_shift_reg;
9816 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9817 }
9818 else if (speed_p)
9819 *cost += extra_cost->alu.arith_shift;
9820
9821 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9822 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9823 return true;
9824 }
9825 if (GET_CODE (XEXP (x, 0)) == MULT)
9826 {
9827 rtx mul_op = XEXP (x, 0);
9828
9829 if (TARGET_DSP_MULTIPLY
9830 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9831 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9832 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9833 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9834 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9835 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9836 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9837 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9838 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9839 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9840 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9841 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9842 == 16))))))
9843 {
9844 /* SMLA[BT][BT]. */
9845 if (speed_p)
9846 *cost += extra_cost->mult[0].extend_add;
9847 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9848 SIGN_EXTEND, 0, speed_p)
9849 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9850 SIGN_EXTEND, 0, speed_p)
9851 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9852 return true;
9853 }
9854
9855 if (speed_p)
9856 *cost += extra_cost->mult[0].add;
9857 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9858 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9859 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9860 return true;
9861 }
9862 if (CONST_INT_P (XEXP (x, 1)))
9863 {
9864 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9865 INTVAL (XEXP (x, 1)), NULL_RTX,
9866 NULL_RTX, 1, 0);
9867 *cost = COSTS_N_INSNS (insns);
9868 if (speed_p)
9869 *cost += insns * extra_cost->alu.arith;
9870 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9871 return true;
9872 }
9873 else if (speed_p)
9874 *cost += extra_cost->alu.arith;
9875
9876 return false;
9877 }
9878
9879 if (mode == DImode)
9880 {
9881 if (arm_arch3m
9882 && GET_CODE (XEXP (x, 0)) == MULT
9883 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9884 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9885 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9886 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9887 {
9888 if (speed_p)
9889 *cost += extra_cost->mult[1].extend_add;
9890 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9891 ZERO_EXTEND, 0, speed_p)
9892 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9893 ZERO_EXTEND, 0, speed_p)
9894 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9895 return true;
9896 }
9897
9898 *cost += COSTS_N_INSNS (1);
9899
9900 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9901 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9902 {
9903 if (speed_p)
9904 *cost += (extra_cost->alu.arith
9905 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9906 ? extra_cost->alu.arith
9907 : extra_cost->alu.arith_shift));
9908
9909 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9910 0, speed_p)
9911 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9912 return true;
9913 }
9914
9915 if (speed_p)
9916 *cost += 2 * extra_cost->alu.arith;
9917 return false;
9918 }
9919
9920 /* Vector mode? */
9921 *cost = LIBCALL_COST (2);
9922 return false;
9923 case IOR:
9924 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9925 {
9926 if (speed_p)
9927 *cost += extra_cost->alu.rev;
9928
9929 return true;
9930 }
9931 /* Fall through. */
9932 case AND: case XOR:
9933 if (mode == SImode)
9934 {
9935 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9936 rtx op0 = XEXP (x, 0);
9937 rtx shift_op, shift_reg;
9938
9939 if (subcode == NOT
9940 && (code == AND
9941 || (code == IOR && TARGET_THUMB2)))
9942 op0 = XEXP (op0, 0);
9943
9944 shift_reg = NULL;
9945 shift_op = shifter_op_p (op0, &shift_reg);
9946 if (shift_op != NULL)
9947 {
9948 if (shift_reg)
9949 {
9950 if (speed_p)
9951 *cost += extra_cost->alu.log_shift_reg;
9952 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9953 }
9954 else if (speed_p)
9955 *cost += extra_cost->alu.log_shift;
9956
9957 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9958 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9959 return true;
9960 }
9961
9962 if (CONST_INT_P (XEXP (x, 1)))
9963 {
9964 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9965 INTVAL (XEXP (x, 1)), NULL_RTX,
9966 NULL_RTX, 1, 0);
9967
9968 *cost = COSTS_N_INSNS (insns);
9969 if (speed_p)
9970 *cost += insns * extra_cost->alu.logical;
9971 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9972 return true;
9973 }
9974
9975 if (speed_p)
9976 *cost += extra_cost->alu.logical;
9977 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9978 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9979 return true;
9980 }
9981
9982 if (mode == DImode)
9983 {
9984 rtx op0 = XEXP (x, 0);
9985 enum rtx_code subcode = GET_CODE (op0);
9986
9987 *cost += COSTS_N_INSNS (1);
9988
9989 if (subcode == NOT
9990 && (code == AND
9991 || (code == IOR && TARGET_THUMB2)))
9992 op0 = XEXP (op0, 0);
9993
9994 if (GET_CODE (op0) == ZERO_EXTEND)
9995 {
9996 if (speed_p)
9997 *cost += 2 * extra_cost->alu.logical;
9998
9999 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10000 0, speed_p)
10001 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10002 return true;
10003 }
10004 else if (GET_CODE (op0) == SIGN_EXTEND)
10005 {
10006 if (speed_p)
10007 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10008
10009 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10010 0, speed_p)
10011 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10012 return true;
10013 }
10014
10015 if (speed_p)
10016 *cost += 2 * extra_cost->alu.logical;
10017
10018 return true;
10019 }
10020 /* Vector mode? */
10021
10022 *cost = LIBCALL_COST (2);
10023 return false;
10024
10025 case MULT:
10026 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10027 && (mode == SFmode || !TARGET_VFP_SINGLE))
10028 {
10029 rtx op0 = XEXP (x, 0);
10030
10031 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10032 op0 = XEXP (op0, 0);
10033
10034 if (speed_p)
10035 *cost += extra_cost->fp[mode != SFmode].mult;
10036
10037 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10038 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10039 return true;
10040 }
10041 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10042 {
10043 *cost = LIBCALL_COST (2);
10044 return false;
10045 }
10046
10047 if (mode == SImode)
10048 {
10049 if (TARGET_DSP_MULTIPLY
10050 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10051 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10052 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10053 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10054 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10055 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10056 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10057 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10058 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10059 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10060 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10061 && (INTVAL (XEXP (XEXP (x, 1), 1))
10062 == 16))))))
10063 {
10064 /* SMUL[TB][TB]. */
10065 if (speed_p)
10066 *cost += extra_cost->mult[0].extend;
10067 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10068 SIGN_EXTEND, 0, speed_p);
10069 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10070 SIGN_EXTEND, 1, speed_p);
10071 return true;
10072 }
10073 if (speed_p)
10074 *cost += extra_cost->mult[0].simple;
10075 return false;
10076 }
10077
10078 if (mode == DImode)
10079 {
10080 if (arm_arch3m
10081 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10082 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10083 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10084 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10085 {
10086 if (speed_p)
10087 *cost += extra_cost->mult[1].extend;
10088 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10089 ZERO_EXTEND, 0, speed_p)
10090 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10091 ZERO_EXTEND, 0, speed_p));
10092 return true;
10093 }
10094
10095 *cost = LIBCALL_COST (2);
10096 return false;
10097 }
10098
10099 /* Vector mode? */
10100 *cost = LIBCALL_COST (2);
10101 return false;
10102
10103 case NEG:
10104 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10105 && (mode == SFmode || !TARGET_VFP_SINGLE))
10106 {
10107 if (GET_CODE (XEXP (x, 0)) == MULT)
10108 {
10109 /* VNMUL. */
10110 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10111 return true;
10112 }
10113
10114 if (speed_p)
10115 *cost += extra_cost->fp[mode != SFmode].neg;
10116
10117 return false;
10118 }
10119 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10120 {
10121 *cost = LIBCALL_COST (1);
10122 return false;
10123 }
10124
10125 if (mode == SImode)
10126 {
10127 if (GET_CODE (XEXP (x, 0)) == ABS)
10128 {
10129 *cost += COSTS_N_INSNS (1);
10130 /* Assume the non-flag-changing variant. */
10131 if (speed_p)
10132 *cost += (extra_cost->alu.log_shift
10133 + extra_cost->alu.arith_shift);
10134 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10135 return true;
10136 }
10137
10138 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10139 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10140 {
10141 *cost += COSTS_N_INSNS (1);
10142 /* No extra cost for MOV imm and MVN imm. */
10143 /* If the comparison op is using the flags, there's no further
10144 cost, otherwise we need to add the cost of the comparison. */
10145 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10146 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10147 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10148 {
10149 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10150 *cost += (COSTS_N_INSNS (1)
10151 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10152 0, speed_p)
10153 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10154 1, speed_p));
10155 if (speed_p)
10156 *cost += extra_cost->alu.arith;
10157 }
10158 return true;
10159 }
10160
10161 if (speed_p)
10162 *cost += extra_cost->alu.arith;
10163 return false;
10164 }
10165
10166 if (GET_MODE_CLASS (mode) == MODE_INT
10167 && GET_MODE_SIZE (mode) < 4)
10168 {
10169 /* Slightly disparage, as we might need an extend operation. */
10170 *cost += 1;
10171 if (speed_p)
10172 *cost += extra_cost->alu.arith;
10173 return false;
10174 }
10175
10176 if (mode == DImode)
10177 {
10178 *cost += COSTS_N_INSNS (1);
10179 if (speed_p)
10180 *cost += 2 * extra_cost->alu.arith;
10181 return false;
10182 }
10183
10184 /* Vector mode? */
10185 *cost = LIBCALL_COST (1);
10186 return false;
10187
10188 case NOT:
10189 if (mode == SImode)
10190 {
10191 rtx shift_op;
10192 rtx shift_reg = NULL;
10193
10194 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10195
10196 if (shift_op)
10197 {
10198 if (shift_reg != NULL)
10199 {
10200 if (speed_p)
10201 *cost += extra_cost->alu.log_shift_reg;
10202 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10203 }
10204 else if (speed_p)
10205 *cost += extra_cost->alu.log_shift;
10206 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10207 return true;
10208 }
10209
10210 if (speed_p)
10211 *cost += extra_cost->alu.logical;
10212 return false;
10213 }
10214 if (mode == DImode)
10215 {
10216 *cost += COSTS_N_INSNS (1);
10217 return false;
10218 }
10219
10220 /* Vector mode? */
10221
10222 *cost += LIBCALL_COST (1);
10223 return false;
10224
10225 case IF_THEN_ELSE:
10226 {
10227 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10228 {
10229 *cost += COSTS_N_INSNS (3);
10230 return true;
10231 }
10232 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10233 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10234
10235 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10236 /* Assume that if one arm of the if_then_else is a register,
10237 that it will be tied with the result and eliminate the
10238 conditional insn. */
10239 if (REG_P (XEXP (x, 1)))
10240 *cost += op2cost;
10241 else if (REG_P (XEXP (x, 2)))
10242 *cost += op1cost;
10243 else
10244 {
10245 if (speed_p)
10246 {
10247 if (extra_cost->alu.non_exec_costs_exec)
10248 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10249 else
10250 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10251 }
10252 else
10253 *cost += op1cost + op2cost;
10254 }
10255 }
10256 return true;
10257
10258 case COMPARE:
10259 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10260 *cost = 0;
10261 else
10262 {
10263 machine_mode op0mode;
10264 /* We'll mostly assume that the cost of a compare is the cost of the
10265 LHS. However, there are some notable exceptions. */
10266
10267 /* Floating point compares are never done as side-effects. */
10268 op0mode = GET_MODE (XEXP (x, 0));
10269 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10270 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10271 {
10272 if (speed_p)
10273 *cost += extra_cost->fp[op0mode != SFmode].compare;
10274
10275 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10276 {
10277 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10278 return true;
10279 }
10280
10281 return false;
10282 }
10283 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10284 {
10285 *cost = LIBCALL_COST (2);
10286 return false;
10287 }
10288
10289 /* DImode compares normally take two insns. */
10290 if (op0mode == DImode)
10291 {
10292 *cost += COSTS_N_INSNS (1);
10293 if (speed_p)
10294 *cost += 2 * extra_cost->alu.arith;
10295 return false;
10296 }
10297
10298 if (op0mode == SImode)
10299 {
10300 rtx shift_op;
10301 rtx shift_reg;
10302
10303 if (XEXP (x, 1) == const0_rtx
10304 && !(REG_P (XEXP (x, 0))
10305 || (GET_CODE (XEXP (x, 0)) == SUBREG
10306 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10307 {
10308 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10309
10310 /* Multiply operations that set the flags are often
10311 significantly more expensive. */
10312 if (speed_p
10313 && GET_CODE (XEXP (x, 0)) == MULT
10314 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10315 *cost += extra_cost->mult[0].flag_setting;
10316
10317 if (speed_p
10318 && GET_CODE (XEXP (x, 0)) == PLUS
10319 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10320 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10321 0), 1), mode))
10322 *cost += extra_cost->mult[0].flag_setting;
10323 return true;
10324 }
10325
10326 shift_reg = NULL;
10327 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10328 if (shift_op != NULL)
10329 {
10330 if (shift_reg != NULL)
10331 {
10332 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10333 1, speed_p);
10334 if (speed_p)
10335 *cost += extra_cost->alu.arith_shift_reg;
10336 }
10337 else if (speed_p)
10338 *cost += extra_cost->alu.arith_shift;
10339 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10340 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10341 return true;
10342 }
10343
10344 if (speed_p)
10345 *cost += extra_cost->alu.arith;
10346 if (CONST_INT_P (XEXP (x, 1))
10347 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10348 {
10349 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10350 return true;
10351 }
10352 return false;
10353 }
10354
10355 /* Vector mode? */
10356
10357 *cost = LIBCALL_COST (2);
10358 return false;
10359 }
10360 return true;
10361
10362 case EQ:
10363 case NE:
10364 case LT:
10365 case LE:
10366 case GT:
10367 case GE:
10368 case LTU:
10369 case LEU:
10370 case GEU:
10371 case GTU:
10372 case ORDERED:
10373 case UNORDERED:
10374 case UNEQ:
10375 case UNLE:
10376 case UNLT:
10377 case UNGE:
10378 case UNGT:
10379 case LTGT:
10380 if (outer_code == SET)
10381 {
10382 /* Is it a store-flag operation? */
10383 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10384 && XEXP (x, 1) == const0_rtx)
10385 {
10386 /* Thumb also needs an IT insn. */
10387 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10388 return true;
10389 }
10390 if (XEXP (x, 1) == const0_rtx)
10391 {
10392 switch (code)
10393 {
10394 case LT:
10395 /* LSR Rd, Rn, #31. */
10396 if (speed_p)
10397 *cost += extra_cost->alu.shift;
10398 break;
10399
10400 case EQ:
10401 /* RSBS T1, Rn, #0
10402 ADC Rd, Rn, T1. */
10403
10404 case NE:
10405 /* SUBS T1, Rn, #1
10406 SBC Rd, Rn, T1. */
10407 *cost += COSTS_N_INSNS (1);
10408 break;
10409
10410 case LE:
10411 /* RSBS T1, Rn, Rn, LSR #31
10412 ADC Rd, Rn, T1. */
10413 *cost += COSTS_N_INSNS (1);
10414 if (speed_p)
10415 *cost += extra_cost->alu.arith_shift;
10416 break;
10417
10418 case GT:
10419 /* RSB Rd, Rn, Rn, ASR #1
10420 LSR Rd, Rd, #31. */
10421 *cost += COSTS_N_INSNS (1);
10422 if (speed_p)
10423 *cost += (extra_cost->alu.arith_shift
10424 + extra_cost->alu.shift);
10425 break;
10426
10427 case GE:
10428 /* ASR Rd, Rn, #31
10429 ADD Rd, Rn, #1. */
10430 *cost += COSTS_N_INSNS (1);
10431 if (speed_p)
10432 *cost += extra_cost->alu.shift;
10433 break;
10434
10435 default:
10436 /* Remaining cases are either meaningless or would take
10437 three insns anyway. */
10438 *cost = COSTS_N_INSNS (3);
10439 break;
10440 }
10441 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10442 return true;
10443 }
10444 else
10445 {
10446 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10447 if (CONST_INT_P (XEXP (x, 1))
10448 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10449 {
10450 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10451 return true;
10452 }
10453
10454 return false;
10455 }
10456 }
10457 /* Not directly inside a set. If it involves the condition code
10458 register it must be the condition for a branch, cond_exec or
10459 I_T_E operation. Since the comparison is performed elsewhere
10460 this is just the control part which has no additional
10461 cost. */
10462 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10463 && XEXP (x, 1) == const0_rtx)
10464 {
10465 *cost = 0;
10466 return true;
10467 }
10468 return false;
10469
10470 case ABS:
10471 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10472 && (mode == SFmode || !TARGET_VFP_SINGLE))
10473 {
10474 if (speed_p)
10475 *cost += extra_cost->fp[mode != SFmode].neg;
10476
10477 return false;
10478 }
10479 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10480 {
10481 *cost = LIBCALL_COST (1);
10482 return false;
10483 }
10484
10485 if (mode == SImode)
10486 {
10487 if (speed_p)
10488 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10489 return false;
10490 }
10491 /* Vector mode? */
10492 *cost = LIBCALL_COST (1);
10493 return false;
10494
10495 case SIGN_EXTEND:
10496 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10497 && MEM_P (XEXP (x, 0)))
10498 {
10499 if (mode == DImode)
10500 *cost += COSTS_N_INSNS (1);
10501
10502 if (!speed_p)
10503 return true;
10504
10505 if (GET_MODE (XEXP (x, 0)) == SImode)
10506 *cost += extra_cost->ldst.load;
10507 else
10508 *cost += extra_cost->ldst.load_sign_extend;
10509
10510 if (mode == DImode)
10511 *cost += extra_cost->alu.shift;
10512
10513 return true;
10514 }
10515
10516 /* Widening from less than 32-bits requires an extend operation. */
10517 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10518 {
10519 /* We have SXTB/SXTH. */
10520 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10521 if (speed_p)
10522 *cost += extra_cost->alu.extend;
10523 }
10524 else if (GET_MODE (XEXP (x, 0)) != SImode)
10525 {
10526 /* Needs two shifts. */
10527 *cost += COSTS_N_INSNS (1);
10528 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10529 if (speed_p)
10530 *cost += 2 * extra_cost->alu.shift;
10531 }
10532
10533 /* Widening beyond 32-bits requires one more insn. */
10534 if (mode == DImode)
10535 {
10536 *cost += COSTS_N_INSNS (1);
10537 if (speed_p)
10538 *cost += extra_cost->alu.shift;
10539 }
10540
10541 return true;
10542
10543 case ZERO_EXTEND:
10544 if ((arm_arch4
10545 || GET_MODE (XEXP (x, 0)) == SImode
10546 || GET_MODE (XEXP (x, 0)) == QImode)
10547 && MEM_P (XEXP (x, 0)))
10548 {
10549 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10550
10551 if (mode == DImode)
10552 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10553
10554 return true;
10555 }
10556
10557 /* Widening from less than 32-bits requires an extend operation. */
10558 if (GET_MODE (XEXP (x, 0)) == QImode)
10559 {
10560 /* UXTB can be a shorter instruction in Thumb2, but it might
10561 be slower than the AND Rd, Rn, #255 alternative. When
10562 optimizing for speed it should never be slower to use
10563 AND, and we don't really model 16-bit vs 32-bit insns
10564 here. */
10565 if (speed_p)
10566 *cost += extra_cost->alu.logical;
10567 }
10568 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10569 {
10570 /* We have UXTB/UXTH. */
10571 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10572 if (speed_p)
10573 *cost += extra_cost->alu.extend;
10574 }
10575 else if (GET_MODE (XEXP (x, 0)) != SImode)
10576 {
10577 /* Needs two shifts. It's marginally preferable to use
10578 shifts rather than two BIC instructions as the second
10579 shift may merge with a subsequent insn as a shifter
10580 op. */
10581 *cost = COSTS_N_INSNS (2);
10582 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10583 if (speed_p)
10584 *cost += 2 * extra_cost->alu.shift;
10585 }
10586
10587 /* Widening beyond 32-bits requires one more insn. */
10588 if (mode == DImode)
10589 {
10590 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10591 }
10592
10593 return true;
10594
10595 case CONST_INT:
10596 *cost = 0;
10597 /* CONST_INT has no mode, so we cannot tell for sure how many
10598 insns are really going to be needed. The best we can do is
10599 look at the value passed. If it fits in SImode, then assume
10600 that's the mode it will be used for. Otherwise assume it
10601 will be used in DImode. */
10602 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10603 mode = SImode;
10604 else
10605 mode = DImode;
10606
10607 /* Avoid blowing up in arm_gen_constant (). */
10608 if (!(outer_code == PLUS
10609 || outer_code == AND
10610 || outer_code == IOR
10611 || outer_code == XOR
10612 || outer_code == MINUS))
10613 outer_code = SET;
10614
10615 const_int_cost:
10616 if (mode == SImode)
10617 {
10618 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10619 INTVAL (x), NULL, NULL,
10620 0, 0));
10621 /* Extra costs? */
10622 }
10623 else
10624 {
10625 *cost += COSTS_N_INSNS (arm_gen_constant
10626 (outer_code, SImode, NULL,
10627 trunc_int_for_mode (INTVAL (x), SImode),
10628 NULL, NULL, 0, 0)
10629 + arm_gen_constant (outer_code, SImode, NULL,
10630 INTVAL (x) >> 32, NULL,
10631 NULL, 0, 0));
10632 /* Extra costs? */
10633 }
10634
10635 return true;
10636
10637 case CONST:
10638 case LABEL_REF:
10639 case SYMBOL_REF:
10640 if (speed_p)
10641 {
10642 if (arm_arch_thumb2 && !flag_pic)
10643 *cost += COSTS_N_INSNS (1);
10644 else
10645 *cost += extra_cost->ldst.load;
10646 }
10647 else
10648 *cost += COSTS_N_INSNS (1);
10649
10650 if (flag_pic)
10651 {
10652 *cost += COSTS_N_INSNS (1);
10653 if (speed_p)
10654 *cost += extra_cost->alu.arith;
10655 }
10656
10657 return true;
10658
10659 case CONST_FIXED:
10660 *cost = COSTS_N_INSNS (4);
10661 /* Fixme. */
10662 return true;
10663
10664 case CONST_DOUBLE:
10665 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10666 && (mode == SFmode || !TARGET_VFP_SINGLE))
10667 {
10668 if (vfp3_const_double_rtx (x))
10669 {
10670 if (speed_p)
10671 *cost += extra_cost->fp[mode == DFmode].fpconst;
10672 return true;
10673 }
10674
10675 if (speed_p)
10676 {
10677 if (mode == DFmode)
10678 *cost += extra_cost->ldst.loadd;
10679 else
10680 *cost += extra_cost->ldst.loadf;
10681 }
10682 else
10683 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10684
10685 return true;
10686 }
10687 *cost = COSTS_N_INSNS (4);
10688 return true;
10689
10690 case CONST_VECTOR:
10691 /* Fixme. */
10692 if (TARGET_NEON
10693 && TARGET_HARD_FLOAT
10694 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10695 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10696 *cost = COSTS_N_INSNS (1);
10697 else
10698 *cost = COSTS_N_INSNS (4);
10699 return true;
10700
10701 case HIGH:
10702 case LO_SUM:
10703 /* When optimizing for size, we prefer constant pool entries to
10704 MOVW/MOVT pairs, so bump the cost of these slightly. */
10705 if (!speed_p)
10706 *cost += 1;
10707 return true;
10708
10709 case CLZ:
10710 if (speed_p)
10711 *cost += extra_cost->alu.clz;
10712 return false;
10713
10714 case SMIN:
10715 if (XEXP (x, 1) == const0_rtx)
10716 {
10717 if (speed_p)
10718 *cost += extra_cost->alu.log_shift;
10719 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10720 return true;
10721 }
10722 /* Fall through. */
10723 case SMAX:
10724 case UMIN:
10725 case UMAX:
10726 *cost += COSTS_N_INSNS (1);
10727 return false;
10728
10729 case TRUNCATE:
10730 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10731 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10732 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10733 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10734 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10735 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10736 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10737 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10738 == ZERO_EXTEND))))
10739 {
10740 if (speed_p)
10741 *cost += extra_cost->mult[1].extend;
10742 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10743 ZERO_EXTEND, 0, speed_p)
10744 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10745 ZERO_EXTEND, 0, speed_p));
10746 return true;
10747 }
10748 *cost = LIBCALL_COST (1);
10749 return false;
10750
10751 case UNSPEC_VOLATILE:
10752 case UNSPEC:
10753 return arm_unspec_cost (x, outer_code, speed_p, cost);
10754
10755 case PC:
10756 /* Reading the PC is like reading any other register. Writing it
10757 is more expensive, but we take that into account elsewhere. */
10758 *cost = 0;
10759 return true;
10760
10761 case ZERO_EXTRACT:
10762 /* TODO: Simple zero_extract of bottom bits using AND. */
10763 /* Fall through. */
10764 case SIGN_EXTRACT:
10765 if (arm_arch6
10766 && mode == SImode
10767 && CONST_INT_P (XEXP (x, 1))
10768 && CONST_INT_P (XEXP (x, 2)))
10769 {
10770 if (speed_p)
10771 *cost += extra_cost->alu.bfx;
10772 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10773 return true;
10774 }
10775 /* Without UBFX/SBFX, need to resort to shift operations. */
10776 *cost += COSTS_N_INSNS (1);
10777 if (speed_p)
10778 *cost += 2 * extra_cost->alu.shift;
10779 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10780 return true;
10781
10782 case FLOAT_EXTEND:
10783 if (TARGET_HARD_FLOAT)
10784 {
10785 if (speed_p)
10786 *cost += extra_cost->fp[mode == DFmode].widen;
10787 if (!TARGET_VFP5
10788 && GET_MODE (XEXP (x, 0)) == HFmode)
10789 {
10790 /* Pre v8, widening HF->DF is a two-step process, first
10791 widening to SFmode. */
10792 *cost += COSTS_N_INSNS (1);
10793 if (speed_p)
10794 *cost += extra_cost->fp[0].widen;
10795 }
10796 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10797 return true;
10798 }
10799
10800 *cost = LIBCALL_COST (1);
10801 return false;
10802
10803 case FLOAT_TRUNCATE:
10804 if (TARGET_HARD_FLOAT)
10805 {
10806 if (speed_p)
10807 *cost += extra_cost->fp[mode == DFmode].narrow;
10808 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10809 return true;
10810 /* Vector modes? */
10811 }
10812 *cost = LIBCALL_COST (1);
10813 return false;
10814
10815 case FMA:
10816 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10817 {
10818 rtx op0 = XEXP (x, 0);
10819 rtx op1 = XEXP (x, 1);
10820 rtx op2 = XEXP (x, 2);
10821
10822
10823 /* vfms or vfnma. */
10824 if (GET_CODE (op0) == NEG)
10825 op0 = XEXP (op0, 0);
10826
10827 /* vfnms or vfnma. */
10828 if (GET_CODE (op2) == NEG)
10829 op2 = XEXP (op2, 0);
10830
10831 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10832 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10833 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10834
10835 if (speed_p)
10836 *cost += extra_cost->fp[mode ==DFmode].fma;
10837
10838 return true;
10839 }
10840
10841 *cost = LIBCALL_COST (3);
10842 return false;
10843
10844 case FIX:
10845 case UNSIGNED_FIX:
10846 if (TARGET_HARD_FLOAT)
10847 {
10848 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10849 a vcvt fixed-point conversion. */
10850 if (code == FIX && mode == SImode
10851 && GET_CODE (XEXP (x, 0)) == FIX
10852 && GET_MODE (XEXP (x, 0)) == SFmode
10853 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10854 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10855 > 0)
10856 {
10857 if (speed_p)
10858 *cost += extra_cost->fp[0].toint;
10859
10860 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10861 code, 0, speed_p);
10862 return true;
10863 }
10864
10865 if (GET_MODE_CLASS (mode) == MODE_INT)
10866 {
10867 mode = GET_MODE (XEXP (x, 0));
10868 if (speed_p)
10869 *cost += extra_cost->fp[mode == DFmode].toint;
10870 /* Strip of the 'cost' of rounding towards zero. */
10871 if (GET_CODE (XEXP (x, 0)) == FIX)
10872 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10873 0, speed_p);
10874 else
10875 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10876 /* ??? Increase the cost to deal with transferring from
10877 FP -> CORE registers? */
10878 return true;
10879 }
10880 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10881 && TARGET_VFP5)
10882 {
10883 if (speed_p)
10884 *cost += extra_cost->fp[mode == DFmode].roundint;
10885 return false;
10886 }
10887 /* Vector costs? */
10888 }
10889 *cost = LIBCALL_COST (1);
10890 return false;
10891
10892 case FLOAT:
10893 case UNSIGNED_FLOAT:
10894 if (TARGET_HARD_FLOAT)
10895 {
10896 /* ??? Increase the cost to deal with transferring from CORE
10897 -> FP registers? */
10898 if (speed_p)
10899 *cost += extra_cost->fp[mode == DFmode].fromint;
10900 return false;
10901 }
10902 *cost = LIBCALL_COST (1);
10903 return false;
10904
10905 case CALL:
10906 return true;
10907
10908 case ASM_OPERANDS:
10909 {
10910 /* Just a guess. Guess number of instructions in the asm
10911 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10912 though (see PR60663). */
10913 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10914 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10915
10916 *cost = COSTS_N_INSNS (asm_length + num_operands);
10917 return true;
10918 }
10919 default:
10920 if (mode != VOIDmode)
10921 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10922 else
10923 *cost = COSTS_N_INSNS (4); /* Who knows? */
10924 return false;
10925 }
10926 }
10927
10928 #undef HANDLE_NARROW_SHIFT_ARITH
10929
10930 /* RTX costs entry point. */
10931
10932 static bool
10933 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10934 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10935 {
10936 bool result;
10937 int code = GET_CODE (x);
10938 gcc_assert (current_tune->insn_extra_cost);
10939
10940 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10941 (enum rtx_code) outer_code,
10942 current_tune->insn_extra_cost,
10943 total, speed);
10944
10945 if (dump_file && (dump_flags & TDF_DETAILS))
10946 {
10947 print_rtl_single (dump_file, x);
10948 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10949 *total, result ? "final" : "partial");
10950 }
10951 return result;
10952 }
10953
10954 /* All address computations that can be done are free, but rtx cost returns
10955 the same for practically all of them. So we weight the different types
10956 of address here in the order (most pref first):
10957 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10958 static inline int
10959 arm_arm_address_cost (rtx x)
10960 {
10961 enum rtx_code c = GET_CODE (x);
10962
10963 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10964 return 0;
10965 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10966 return 10;
10967
10968 if (c == PLUS)
10969 {
10970 if (CONST_INT_P (XEXP (x, 1)))
10971 return 2;
10972
10973 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10974 return 3;
10975
10976 return 4;
10977 }
10978
10979 return 6;
10980 }
10981
10982 static inline int
10983 arm_thumb_address_cost (rtx x)
10984 {
10985 enum rtx_code c = GET_CODE (x);
10986
10987 if (c == REG)
10988 return 1;
10989 if (c == PLUS
10990 && REG_P (XEXP (x, 0))
10991 && CONST_INT_P (XEXP (x, 1)))
10992 return 1;
10993
10994 return 2;
10995 }
10996
10997 static int
10998 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10999 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11000 {
11001 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11002 }
11003
11004 /* Adjust cost hook for XScale. */
11005 static bool
11006 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11007 int * cost)
11008 {
11009 /* Some true dependencies can have a higher cost depending
11010 on precisely how certain input operands are used. */
11011 if (dep_type == 0
11012 && recog_memoized (insn) >= 0
11013 && recog_memoized (dep) >= 0)
11014 {
11015 int shift_opnum = get_attr_shift (insn);
11016 enum attr_type attr_type = get_attr_type (dep);
11017
11018 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11019 operand for INSN. If we have a shifted input operand and the
11020 instruction we depend on is another ALU instruction, then we may
11021 have to account for an additional stall. */
11022 if (shift_opnum != 0
11023 && (attr_type == TYPE_ALU_SHIFT_IMM
11024 || attr_type == TYPE_ALUS_SHIFT_IMM
11025 || attr_type == TYPE_LOGIC_SHIFT_IMM
11026 || attr_type == TYPE_LOGICS_SHIFT_IMM
11027 || attr_type == TYPE_ALU_SHIFT_REG
11028 || attr_type == TYPE_ALUS_SHIFT_REG
11029 || attr_type == TYPE_LOGIC_SHIFT_REG
11030 || attr_type == TYPE_LOGICS_SHIFT_REG
11031 || attr_type == TYPE_MOV_SHIFT
11032 || attr_type == TYPE_MVN_SHIFT
11033 || attr_type == TYPE_MOV_SHIFT_REG
11034 || attr_type == TYPE_MVN_SHIFT_REG))
11035 {
11036 rtx shifted_operand;
11037 int opno;
11038
11039 /* Get the shifted operand. */
11040 extract_insn (insn);
11041 shifted_operand = recog_data.operand[shift_opnum];
11042
11043 /* Iterate over all the operands in DEP. If we write an operand
11044 that overlaps with SHIFTED_OPERAND, then we have increase the
11045 cost of this dependency. */
11046 extract_insn (dep);
11047 preprocess_constraints (dep);
11048 for (opno = 0; opno < recog_data.n_operands; opno++)
11049 {
11050 /* We can ignore strict inputs. */
11051 if (recog_data.operand_type[opno] == OP_IN)
11052 continue;
11053
11054 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11055 shifted_operand))
11056 {
11057 *cost = 2;
11058 return false;
11059 }
11060 }
11061 }
11062 }
11063 return true;
11064 }
11065
11066 /* Adjust cost hook for Cortex A9. */
11067 static bool
11068 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11069 int * cost)
11070 {
11071 switch (dep_type)
11072 {
11073 case REG_DEP_ANTI:
11074 *cost = 0;
11075 return false;
11076
11077 case REG_DEP_TRUE:
11078 case REG_DEP_OUTPUT:
11079 if (recog_memoized (insn) >= 0
11080 && recog_memoized (dep) >= 0)
11081 {
11082 if (GET_CODE (PATTERN (insn)) == SET)
11083 {
11084 if (GET_MODE_CLASS
11085 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11086 || GET_MODE_CLASS
11087 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11088 {
11089 enum attr_type attr_type_insn = get_attr_type (insn);
11090 enum attr_type attr_type_dep = get_attr_type (dep);
11091
11092 /* By default all dependencies of the form
11093 s0 = s0 <op> s1
11094 s0 = s0 <op> s2
11095 have an extra latency of 1 cycle because
11096 of the input and output dependency in this
11097 case. However this gets modeled as an true
11098 dependency and hence all these checks. */
11099 if (REG_P (SET_DEST (PATTERN (insn)))
11100 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11101 {
11102 /* FMACS is a special case where the dependent
11103 instruction can be issued 3 cycles before
11104 the normal latency in case of an output
11105 dependency. */
11106 if ((attr_type_insn == TYPE_FMACS
11107 || attr_type_insn == TYPE_FMACD)
11108 && (attr_type_dep == TYPE_FMACS
11109 || attr_type_dep == TYPE_FMACD))
11110 {
11111 if (dep_type == REG_DEP_OUTPUT)
11112 *cost = insn_default_latency (dep) - 3;
11113 else
11114 *cost = insn_default_latency (dep);
11115 return false;
11116 }
11117 else
11118 {
11119 if (dep_type == REG_DEP_OUTPUT)
11120 *cost = insn_default_latency (dep) + 1;
11121 else
11122 *cost = insn_default_latency (dep);
11123 }
11124 return false;
11125 }
11126 }
11127 }
11128 }
11129 break;
11130
11131 default:
11132 gcc_unreachable ();
11133 }
11134
11135 return true;
11136 }
11137
11138 /* Adjust cost hook for FA726TE. */
11139 static bool
11140 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11141 int * cost)
11142 {
11143 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11144 have penalty of 3. */
11145 if (dep_type == REG_DEP_TRUE
11146 && recog_memoized (insn) >= 0
11147 && recog_memoized (dep) >= 0
11148 && get_attr_conds (dep) == CONDS_SET)
11149 {
11150 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11151 if (get_attr_conds (insn) == CONDS_USE
11152 && get_attr_type (insn) != TYPE_BRANCH)
11153 {
11154 *cost = 3;
11155 return false;
11156 }
11157
11158 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11159 || get_attr_conds (insn) == CONDS_USE)
11160 {
11161 *cost = 0;
11162 return false;
11163 }
11164 }
11165
11166 return true;
11167 }
11168
11169 /* Implement TARGET_REGISTER_MOVE_COST.
11170
11171 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11172 it is typically more expensive than a single memory access. We set
11173 the cost to less than two memory accesses so that floating
11174 point to integer conversion does not go through memory. */
11175
11176 int
11177 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11178 reg_class_t from, reg_class_t to)
11179 {
11180 if (TARGET_32BIT)
11181 {
11182 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11183 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11184 return 15;
11185 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11186 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11187 return 4;
11188 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11189 return 20;
11190 else
11191 return 2;
11192 }
11193 else
11194 {
11195 if (from == HI_REGS || to == HI_REGS)
11196 return 4;
11197 else
11198 return 2;
11199 }
11200 }
11201
11202 /* Implement TARGET_MEMORY_MOVE_COST. */
11203
11204 int
11205 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11206 bool in ATTRIBUTE_UNUSED)
11207 {
11208 if (TARGET_32BIT)
11209 return 10;
11210 else
11211 {
11212 if (GET_MODE_SIZE (mode) < 4)
11213 return 8;
11214 else
11215 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11216 }
11217 }
11218
11219 /* Vectorizer cost model implementation. */
11220
11221 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11222 static int
11223 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11224 tree vectype,
11225 int misalign ATTRIBUTE_UNUSED)
11226 {
11227 unsigned elements;
11228
11229 switch (type_of_cost)
11230 {
11231 case scalar_stmt:
11232 return current_tune->vec_costs->scalar_stmt_cost;
11233
11234 case scalar_load:
11235 return current_tune->vec_costs->scalar_load_cost;
11236
11237 case scalar_store:
11238 return current_tune->vec_costs->scalar_store_cost;
11239
11240 case vector_stmt:
11241 return current_tune->vec_costs->vec_stmt_cost;
11242
11243 case vector_load:
11244 return current_tune->vec_costs->vec_align_load_cost;
11245
11246 case vector_store:
11247 return current_tune->vec_costs->vec_store_cost;
11248
11249 case vec_to_scalar:
11250 return current_tune->vec_costs->vec_to_scalar_cost;
11251
11252 case scalar_to_vec:
11253 return current_tune->vec_costs->scalar_to_vec_cost;
11254
11255 case unaligned_load:
11256 case vector_gather_load:
11257 return current_tune->vec_costs->vec_unalign_load_cost;
11258
11259 case unaligned_store:
11260 case vector_scatter_store:
11261 return current_tune->vec_costs->vec_unalign_store_cost;
11262
11263 case cond_branch_taken:
11264 return current_tune->vec_costs->cond_taken_branch_cost;
11265
11266 case cond_branch_not_taken:
11267 return current_tune->vec_costs->cond_not_taken_branch_cost;
11268
11269 case vec_perm:
11270 case vec_promote_demote:
11271 return current_tune->vec_costs->vec_stmt_cost;
11272
11273 case vec_construct:
11274 elements = TYPE_VECTOR_SUBPARTS (vectype);
11275 return elements / 2 + 1;
11276
11277 default:
11278 gcc_unreachable ();
11279 }
11280 }
11281
11282 /* Implement targetm.vectorize.add_stmt_cost. */
11283
11284 static unsigned
11285 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11286 struct _stmt_vec_info *stmt_info, int misalign,
11287 enum vect_cost_model_location where)
11288 {
11289 unsigned *cost = (unsigned *) data;
11290 unsigned retval = 0;
11291
11292 if (flag_vect_cost_model)
11293 {
11294 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11295 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11296
11297 /* Statements in an inner loop relative to the loop being
11298 vectorized are weighted more heavily. The value here is
11299 arbitrary and could potentially be improved with analysis. */
11300 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11301 count *= 50; /* FIXME. */
11302
11303 retval = (unsigned) (count * stmt_cost);
11304 cost[where] += retval;
11305 }
11306
11307 return retval;
11308 }
11309
11310 /* Return true if and only if this insn can dual-issue only as older. */
11311 static bool
11312 cortexa7_older_only (rtx_insn *insn)
11313 {
11314 if (recog_memoized (insn) < 0)
11315 return false;
11316
11317 switch (get_attr_type (insn))
11318 {
11319 case TYPE_ALU_DSP_REG:
11320 case TYPE_ALU_SREG:
11321 case TYPE_ALUS_SREG:
11322 case TYPE_LOGIC_REG:
11323 case TYPE_LOGICS_REG:
11324 case TYPE_ADC_REG:
11325 case TYPE_ADCS_REG:
11326 case TYPE_ADR:
11327 case TYPE_BFM:
11328 case TYPE_REV:
11329 case TYPE_MVN_REG:
11330 case TYPE_SHIFT_IMM:
11331 case TYPE_SHIFT_REG:
11332 case TYPE_LOAD_BYTE:
11333 case TYPE_LOAD_4:
11334 case TYPE_STORE_4:
11335 case TYPE_FFARITHS:
11336 case TYPE_FADDS:
11337 case TYPE_FFARITHD:
11338 case TYPE_FADDD:
11339 case TYPE_FMOV:
11340 case TYPE_F_CVT:
11341 case TYPE_FCMPS:
11342 case TYPE_FCMPD:
11343 case TYPE_FCONSTS:
11344 case TYPE_FCONSTD:
11345 case TYPE_FMULS:
11346 case TYPE_FMACS:
11347 case TYPE_FMULD:
11348 case TYPE_FMACD:
11349 case TYPE_FDIVS:
11350 case TYPE_FDIVD:
11351 case TYPE_F_MRC:
11352 case TYPE_F_MRRC:
11353 case TYPE_F_FLAG:
11354 case TYPE_F_LOADS:
11355 case TYPE_F_STORES:
11356 return true;
11357 default:
11358 return false;
11359 }
11360 }
11361
11362 /* Return true if and only if this insn can dual-issue as younger. */
11363 static bool
11364 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11365 {
11366 if (recog_memoized (insn) < 0)
11367 {
11368 if (verbose > 5)
11369 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11370 return false;
11371 }
11372
11373 switch (get_attr_type (insn))
11374 {
11375 case TYPE_ALU_IMM:
11376 case TYPE_ALUS_IMM:
11377 case TYPE_LOGIC_IMM:
11378 case TYPE_LOGICS_IMM:
11379 case TYPE_EXTEND:
11380 case TYPE_MVN_IMM:
11381 case TYPE_MOV_IMM:
11382 case TYPE_MOV_REG:
11383 case TYPE_MOV_SHIFT:
11384 case TYPE_MOV_SHIFT_REG:
11385 case TYPE_BRANCH:
11386 case TYPE_CALL:
11387 return true;
11388 default:
11389 return false;
11390 }
11391 }
11392
11393
11394 /* Look for an instruction that can dual issue only as an older
11395 instruction, and move it in front of any instructions that can
11396 dual-issue as younger, while preserving the relative order of all
11397 other instructions in the ready list. This is a hueuristic to help
11398 dual-issue in later cycles, by postponing issue of more flexible
11399 instructions. This heuristic may affect dual issue opportunities
11400 in the current cycle. */
11401 static void
11402 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11403 int *n_readyp, int clock)
11404 {
11405 int i;
11406 int first_older_only = -1, first_younger = -1;
11407
11408 if (verbose > 5)
11409 fprintf (file,
11410 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11411 clock,
11412 *n_readyp);
11413
11414 /* Traverse the ready list from the head (the instruction to issue
11415 first), and looking for the first instruction that can issue as
11416 younger and the first instruction that can dual-issue only as
11417 older. */
11418 for (i = *n_readyp - 1; i >= 0; i--)
11419 {
11420 rtx_insn *insn = ready[i];
11421 if (cortexa7_older_only (insn))
11422 {
11423 first_older_only = i;
11424 if (verbose > 5)
11425 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11426 break;
11427 }
11428 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11429 first_younger = i;
11430 }
11431
11432 /* Nothing to reorder because either no younger insn found or insn
11433 that can dual-issue only as older appears before any insn that
11434 can dual-issue as younger. */
11435 if (first_younger == -1)
11436 {
11437 if (verbose > 5)
11438 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11439 return;
11440 }
11441
11442 /* Nothing to reorder because no older-only insn in the ready list. */
11443 if (first_older_only == -1)
11444 {
11445 if (verbose > 5)
11446 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11447 return;
11448 }
11449
11450 /* Move first_older_only insn before first_younger. */
11451 if (verbose > 5)
11452 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11453 INSN_UID(ready [first_older_only]),
11454 INSN_UID(ready [first_younger]));
11455 rtx_insn *first_older_only_insn = ready [first_older_only];
11456 for (i = first_older_only; i < first_younger; i++)
11457 {
11458 ready[i] = ready[i+1];
11459 }
11460
11461 ready[i] = first_older_only_insn;
11462 return;
11463 }
11464
11465 /* Implement TARGET_SCHED_REORDER. */
11466 static int
11467 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11468 int clock)
11469 {
11470 switch (arm_tune)
11471 {
11472 case TARGET_CPU_cortexa7:
11473 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11474 break;
11475 default:
11476 /* Do nothing for other cores. */
11477 break;
11478 }
11479
11480 return arm_issue_rate ();
11481 }
11482
11483 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11484 It corrects the value of COST based on the relationship between
11485 INSN and DEP through the dependence LINK. It returns the new
11486 value. There is a per-core adjust_cost hook to adjust scheduler costs
11487 and the per-core hook can choose to completely override the generic
11488 adjust_cost function. Only put bits of code into arm_adjust_cost that
11489 are common across all cores. */
11490 static int
11491 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11492 unsigned int)
11493 {
11494 rtx i_pat, d_pat;
11495
11496 /* When generating Thumb-1 code, we want to place flag-setting operations
11497 close to a conditional branch which depends on them, so that we can
11498 omit the comparison. */
11499 if (TARGET_THUMB1
11500 && dep_type == 0
11501 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11502 && recog_memoized (dep) >= 0
11503 && get_attr_conds (dep) == CONDS_SET)
11504 return 0;
11505
11506 if (current_tune->sched_adjust_cost != NULL)
11507 {
11508 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11509 return cost;
11510 }
11511
11512 /* XXX Is this strictly true? */
11513 if (dep_type == REG_DEP_ANTI
11514 || dep_type == REG_DEP_OUTPUT)
11515 return 0;
11516
11517 /* Call insns don't incur a stall, even if they follow a load. */
11518 if (dep_type == 0
11519 && CALL_P (insn))
11520 return 1;
11521
11522 if ((i_pat = single_set (insn)) != NULL
11523 && MEM_P (SET_SRC (i_pat))
11524 && (d_pat = single_set (dep)) != NULL
11525 && MEM_P (SET_DEST (d_pat)))
11526 {
11527 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11528 /* This is a load after a store, there is no conflict if the load reads
11529 from a cached area. Assume that loads from the stack, and from the
11530 constant pool are cached, and that others will miss. This is a
11531 hack. */
11532
11533 if ((GET_CODE (src_mem) == SYMBOL_REF
11534 && CONSTANT_POOL_ADDRESS_P (src_mem))
11535 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11536 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11537 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11538 return 1;
11539 }
11540
11541 return cost;
11542 }
11543
11544 int
11545 arm_max_conditional_execute (void)
11546 {
11547 return max_insns_skipped;
11548 }
11549
11550 static int
11551 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11552 {
11553 if (TARGET_32BIT)
11554 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11555 else
11556 return (optimize > 0) ? 2 : 0;
11557 }
11558
11559 static int
11560 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11561 {
11562 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11563 }
11564
11565 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11566 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11567 sequences of non-executed instructions in IT blocks probably take the same
11568 amount of time as executed instructions (and the IT instruction itself takes
11569 space in icache). This function was experimentally determined to give good
11570 results on a popular embedded benchmark. */
11571
11572 static int
11573 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11574 {
11575 return (TARGET_32BIT && speed_p) ? 1
11576 : arm_default_branch_cost (speed_p, predictable_p);
11577 }
11578
11579 static int
11580 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11581 {
11582 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11583 }
11584
11585 static bool fp_consts_inited = false;
11586
11587 static REAL_VALUE_TYPE value_fp0;
11588
11589 static void
11590 init_fp_table (void)
11591 {
11592 REAL_VALUE_TYPE r;
11593
11594 r = REAL_VALUE_ATOF ("0", DFmode);
11595 value_fp0 = r;
11596 fp_consts_inited = true;
11597 }
11598
11599 /* Return TRUE if rtx X is a valid immediate FP constant. */
11600 int
11601 arm_const_double_rtx (rtx x)
11602 {
11603 const REAL_VALUE_TYPE *r;
11604
11605 if (!fp_consts_inited)
11606 init_fp_table ();
11607
11608 r = CONST_DOUBLE_REAL_VALUE (x);
11609 if (REAL_VALUE_MINUS_ZERO (*r))
11610 return 0;
11611
11612 if (real_equal (r, &value_fp0))
11613 return 1;
11614
11615 return 0;
11616 }
11617
11618 /* VFPv3 has a fairly wide range of representable immediates, formed from
11619 "quarter-precision" floating-point values. These can be evaluated using this
11620 formula (with ^ for exponentiation):
11621
11622 -1^s * n * 2^-r
11623
11624 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11625 16 <= n <= 31 and 0 <= r <= 7.
11626
11627 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11628
11629 - A (most-significant) is the sign bit.
11630 - BCD are the exponent (encoded as r XOR 3).
11631 - EFGH are the mantissa (encoded as n - 16).
11632 */
11633
11634 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11635 fconst[sd] instruction, or -1 if X isn't suitable. */
11636 static int
11637 vfp3_const_double_index (rtx x)
11638 {
11639 REAL_VALUE_TYPE r, m;
11640 int sign, exponent;
11641 unsigned HOST_WIDE_INT mantissa, mant_hi;
11642 unsigned HOST_WIDE_INT mask;
11643 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11644 bool fail;
11645
11646 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11647 return -1;
11648
11649 r = *CONST_DOUBLE_REAL_VALUE (x);
11650
11651 /* We can't represent these things, so detect them first. */
11652 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11653 return -1;
11654
11655 /* Extract sign, exponent and mantissa. */
11656 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11657 r = real_value_abs (&r);
11658 exponent = REAL_EXP (&r);
11659 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11660 highest (sign) bit, with a fixed binary point at bit point_pos.
11661 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11662 bits for the mantissa, this may fail (low bits would be lost). */
11663 real_ldexp (&m, &r, point_pos - exponent);
11664 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11665 mantissa = w.elt (0);
11666 mant_hi = w.elt (1);
11667
11668 /* If there are bits set in the low part of the mantissa, we can't
11669 represent this value. */
11670 if (mantissa != 0)
11671 return -1;
11672
11673 /* Now make it so that mantissa contains the most-significant bits, and move
11674 the point_pos to indicate that the least-significant bits have been
11675 discarded. */
11676 point_pos -= HOST_BITS_PER_WIDE_INT;
11677 mantissa = mant_hi;
11678
11679 /* We can permit four significant bits of mantissa only, plus a high bit
11680 which is always 1. */
11681 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11682 if ((mantissa & mask) != 0)
11683 return -1;
11684
11685 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11686 mantissa >>= point_pos - 5;
11687
11688 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11689 floating-point immediate zero with Neon using an integer-zero load, but
11690 that case is handled elsewhere.) */
11691 if (mantissa == 0)
11692 return -1;
11693
11694 gcc_assert (mantissa >= 16 && mantissa <= 31);
11695
11696 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11697 normalized significands are in the range [1, 2). (Our mantissa is shifted
11698 left 4 places at this point relative to normalized IEEE754 values). GCC
11699 internally uses [0.5, 1) (see real.c), so the exponent returned from
11700 REAL_EXP must be altered. */
11701 exponent = 5 - exponent;
11702
11703 if (exponent < 0 || exponent > 7)
11704 return -1;
11705
11706 /* Sign, mantissa and exponent are now in the correct form to plug into the
11707 formula described in the comment above. */
11708 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11709 }
11710
11711 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11712 int
11713 vfp3_const_double_rtx (rtx x)
11714 {
11715 if (!TARGET_VFP3)
11716 return 0;
11717
11718 return vfp3_const_double_index (x) != -1;
11719 }
11720
11721 /* Recognize immediates which can be used in various Neon instructions. Legal
11722 immediates are described by the following table (for VMVN variants, the
11723 bitwise inverse of the constant shown is recognized. In either case, VMOV
11724 is output and the correct instruction to use for a given constant is chosen
11725 by the assembler). The constant shown is replicated across all elements of
11726 the destination vector.
11727
11728 insn elems variant constant (binary)
11729 ---- ----- ------- -----------------
11730 vmov i32 0 00000000 00000000 00000000 abcdefgh
11731 vmov i32 1 00000000 00000000 abcdefgh 00000000
11732 vmov i32 2 00000000 abcdefgh 00000000 00000000
11733 vmov i32 3 abcdefgh 00000000 00000000 00000000
11734 vmov i16 4 00000000 abcdefgh
11735 vmov i16 5 abcdefgh 00000000
11736 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11737 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11738 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11739 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11740 vmvn i16 10 00000000 abcdefgh
11741 vmvn i16 11 abcdefgh 00000000
11742 vmov i32 12 00000000 00000000 abcdefgh 11111111
11743 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11744 vmov i32 14 00000000 abcdefgh 11111111 11111111
11745 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11746 vmov i8 16 abcdefgh
11747 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11748 eeeeeeee ffffffff gggggggg hhhhhhhh
11749 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11750 vmov f32 19 00000000 00000000 00000000 00000000
11751
11752 For case 18, B = !b. Representable values are exactly those accepted by
11753 vfp3_const_double_index, but are output as floating-point numbers rather
11754 than indices.
11755
11756 For case 19, we will change it to vmov.i32 when assembling.
11757
11758 Variants 0-5 (inclusive) may also be used as immediates for the second
11759 operand of VORR/VBIC instructions.
11760
11761 The INVERSE argument causes the bitwise inverse of the given operand to be
11762 recognized instead (used for recognizing legal immediates for the VAND/VORN
11763 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11764 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11765 output, rather than the real insns vbic/vorr).
11766
11767 INVERSE makes no difference to the recognition of float vectors.
11768
11769 The return value is the variant of immediate as shown in the above table, or
11770 -1 if the given value doesn't match any of the listed patterns.
11771 */
11772 static int
11773 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11774 rtx *modconst, int *elementwidth)
11775 {
11776 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11777 matches = 1; \
11778 for (i = 0; i < idx; i += (STRIDE)) \
11779 if (!(TEST)) \
11780 matches = 0; \
11781 if (matches) \
11782 { \
11783 immtype = (CLASS); \
11784 elsize = (ELSIZE); \
11785 break; \
11786 }
11787
11788 unsigned int i, elsize = 0, idx = 0, n_elts;
11789 unsigned int innersize;
11790 unsigned char bytes[16];
11791 int immtype = -1, matches;
11792 unsigned int invmask = inverse ? 0xff : 0;
11793 bool vector = GET_CODE (op) == CONST_VECTOR;
11794
11795 if (vector)
11796 n_elts = CONST_VECTOR_NUNITS (op);
11797 else
11798 {
11799 n_elts = 1;
11800 if (mode == VOIDmode)
11801 mode = DImode;
11802 }
11803
11804 innersize = GET_MODE_UNIT_SIZE (mode);
11805
11806 /* Vectors of float constants. */
11807 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11808 {
11809 rtx el0 = CONST_VECTOR_ELT (op, 0);
11810
11811 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11812 return -1;
11813
11814 /* FP16 vectors cannot be represented. */
11815 if (GET_MODE_INNER (mode) == HFmode)
11816 return -1;
11817
11818 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11819 are distinct in this context. */
11820 if (!const_vec_duplicate_p (op))
11821 return -1;
11822
11823 if (modconst)
11824 *modconst = CONST_VECTOR_ELT (op, 0);
11825
11826 if (elementwidth)
11827 *elementwidth = 0;
11828
11829 if (el0 == CONST0_RTX (GET_MODE (el0)))
11830 return 19;
11831 else
11832 return 18;
11833 }
11834
11835 /* The tricks done in the code below apply for little-endian vector layout.
11836 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11837 FIXME: Implement logic for big-endian vectors. */
11838 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11839 return -1;
11840
11841 /* Splat vector constant out into a byte vector. */
11842 for (i = 0; i < n_elts; i++)
11843 {
11844 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11845 unsigned HOST_WIDE_INT elpart;
11846
11847 gcc_assert (CONST_INT_P (el));
11848 elpart = INTVAL (el);
11849
11850 for (unsigned int byte = 0; byte < innersize; byte++)
11851 {
11852 bytes[idx++] = (elpart & 0xff) ^ invmask;
11853 elpart >>= BITS_PER_UNIT;
11854 }
11855 }
11856
11857 /* Sanity check. */
11858 gcc_assert (idx == GET_MODE_SIZE (mode));
11859
11860 do
11861 {
11862 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11863 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11864
11865 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11866 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11867
11868 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11869 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11870
11871 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11872 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11873
11874 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11875
11876 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11877
11878 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11879 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11880
11881 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11882 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11883
11884 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11885 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11886
11887 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11888 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11889
11890 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11891
11892 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11893
11894 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11895 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11896
11897 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11898 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11899
11900 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11901 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11902
11903 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11904 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11905
11906 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11907
11908 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11909 && bytes[i] == bytes[(i + 8) % idx]);
11910 }
11911 while (0);
11912
11913 if (immtype == -1)
11914 return -1;
11915
11916 if (elementwidth)
11917 *elementwidth = elsize;
11918
11919 if (modconst)
11920 {
11921 unsigned HOST_WIDE_INT imm = 0;
11922
11923 /* Un-invert bytes of recognized vector, if necessary. */
11924 if (invmask != 0)
11925 for (i = 0; i < idx; i++)
11926 bytes[i] ^= invmask;
11927
11928 if (immtype == 17)
11929 {
11930 /* FIXME: Broken on 32-bit H_W_I hosts. */
11931 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11932
11933 for (i = 0; i < 8; i++)
11934 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11935 << (i * BITS_PER_UNIT);
11936
11937 *modconst = GEN_INT (imm);
11938 }
11939 else
11940 {
11941 unsigned HOST_WIDE_INT imm = 0;
11942
11943 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11944 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11945
11946 *modconst = GEN_INT (imm);
11947 }
11948 }
11949
11950 return immtype;
11951 #undef CHECK
11952 }
11953
11954 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11955 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11956 float elements), and a modified constant (whatever should be output for a
11957 VMOV) in *MODCONST. */
11958
11959 int
11960 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11961 rtx *modconst, int *elementwidth)
11962 {
11963 rtx tmpconst;
11964 int tmpwidth;
11965 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11966
11967 if (retval == -1)
11968 return 0;
11969
11970 if (modconst)
11971 *modconst = tmpconst;
11972
11973 if (elementwidth)
11974 *elementwidth = tmpwidth;
11975
11976 return 1;
11977 }
11978
11979 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11980 the immediate is valid, write a constant suitable for using as an operand
11981 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11982 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11983
11984 int
11985 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11986 rtx *modconst, int *elementwidth)
11987 {
11988 rtx tmpconst;
11989 int tmpwidth;
11990 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11991
11992 if (retval < 0 || retval > 5)
11993 return 0;
11994
11995 if (modconst)
11996 *modconst = tmpconst;
11997
11998 if (elementwidth)
11999 *elementwidth = tmpwidth;
12000
12001 return 1;
12002 }
12003
12004 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12005 the immediate is valid, write a constant suitable for using as an operand
12006 to VSHR/VSHL to *MODCONST and the corresponding element width to
12007 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12008 because they have different limitations. */
12009
12010 int
12011 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12012 rtx *modconst, int *elementwidth,
12013 bool isleftshift)
12014 {
12015 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12016 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12017 unsigned HOST_WIDE_INT last_elt = 0;
12018 unsigned HOST_WIDE_INT maxshift;
12019
12020 /* Split vector constant out into a byte vector. */
12021 for (i = 0; i < n_elts; i++)
12022 {
12023 rtx el = CONST_VECTOR_ELT (op, i);
12024 unsigned HOST_WIDE_INT elpart;
12025
12026 if (CONST_INT_P (el))
12027 elpart = INTVAL (el);
12028 else if (CONST_DOUBLE_P (el))
12029 return 0;
12030 else
12031 gcc_unreachable ();
12032
12033 if (i != 0 && elpart != last_elt)
12034 return 0;
12035
12036 last_elt = elpart;
12037 }
12038
12039 /* Shift less than element size. */
12040 maxshift = innersize * 8;
12041
12042 if (isleftshift)
12043 {
12044 /* Left shift immediate value can be from 0 to <size>-1. */
12045 if (last_elt >= maxshift)
12046 return 0;
12047 }
12048 else
12049 {
12050 /* Right shift immediate value can be from 1 to <size>. */
12051 if (last_elt == 0 || last_elt > maxshift)
12052 return 0;
12053 }
12054
12055 if (elementwidth)
12056 *elementwidth = innersize * 8;
12057
12058 if (modconst)
12059 *modconst = CONST_VECTOR_ELT (op, 0);
12060
12061 return 1;
12062 }
12063
12064 /* Return a string suitable for output of Neon immediate logic operation
12065 MNEM. */
12066
12067 char *
12068 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12069 int inverse, int quad)
12070 {
12071 int width, is_valid;
12072 static char templ[40];
12073
12074 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12075
12076 gcc_assert (is_valid != 0);
12077
12078 if (quad)
12079 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12080 else
12081 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12082
12083 return templ;
12084 }
12085
12086 /* Return a string suitable for output of Neon immediate shift operation
12087 (VSHR or VSHL) MNEM. */
12088
12089 char *
12090 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12091 machine_mode mode, int quad,
12092 bool isleftshift)
12093 {
12094 int width, is_valid;
12095 static char templ[40];
12096
12097 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12098 gcc_assert (is_valid != 0);
12099
12100 if (quad)
12101 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12102 else
12103 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12104
12105 return templ;
12106 }
12107
12108 /* Output a sequence of pairwise operations to implement a reduction.
12109 NOTE: We do "too much work" here, because pairwise operations work on two
12110 registers-worth of operands in one go. Unfortunately we can't exploit those
12111 extra calculations to do the full operation in fewer steps, I don't think.
12112 Although all vector elements of the result but the first are ignored, we
12113 actually calculate the same result in each of the elements. An alternative
12114 such as initially loading a vector with zero to use as each of the second
12115 operands would use up an additional register and take an extra instruction,
12116 for no particular gain. */
12117
12118 void
12119 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12120 rtx (*reduc) (rtx, rtx, rtx))
12121 {
12122 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12123 rtx tmpsum = op1;
12124
12125 for (i = parts / 2; i >= 1; i /= 2)
12126 {
12127 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12128 emit_insn (reduc (dest, tmpsum, tmpsum));
12129 tmpsum = dest;
12130 }
12131 }
12132
12133 /* If VALS is a vector constant that can be loaded into a register
12134 using VDUP, generate instructions to do so and return an RTX to
12135 assign to the register. Otherwise return NULL_RTX. */
12136
12137 static rtx
12138 neon_vdup_constant (rtx vals)
12139 {
12140 machine_mode mode = GET_MODE (vals);
12141 machine_mode inner_mode = GET_MODE_INNER (mode);
12142 rtx x;
12143
12144 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12145 return NULL_RTX;
12146
12147 if (!const_vec_duplicate_p (vals, &x))
12148 /* The elements are not all the same. We could handle repeating
12149 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12150 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12151 vdup.i16). */
12152 return NULL_RTX;
12153
12154 /* We can load this constant by using VDUP and a constant in a
12155 single ARM register. This will be cheaper than a vector
12156 load. */
12157
12158 x = copy_to_mode_reg (inner_mode, x);
12159 return gen_rtx_VEC_DUPLICATE (mode, x);
12160 }
12161
12162 /* Generate code to load VALS, which is a PARALLEL containing only
12163 constants (for vec_init) or CONST_VECTOR, efficiently into a
12164 register. Returns an RTX to copy into the register, or NULL_RTX
12165 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12166
12167 rtx
12168 neon_make_constant (rtx vals)
12169 {
12170 machine_mode mode = GET_MODE (vals);
12171 rtx target;
12172 rtx const_vec = NULL_RTX;
12173 int n_elts = GET_MODE_NUNITS (mode);
12174 int n_const = 0;
12175 int i;
12176
12177 if (GET_CODE (vals) == CONST_VECTOR)
12178 const_vec = vals;
12179 else if (GET_CODE (vals) == PARALLEL)
12180 {
12181 /* A CONST_VECTOR must contain only CONST_INTs and
12182 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12183 Only store valid constants in a CONST_VECTOR. */
12184 for (i = 0; i < n_elts; ++i)
12185 {
12186 rtx x = XVECEXP (vals, 0, i);
12187 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12188 n_const++;
12189 }
12190 if (n_const == n_elts)
12191 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12192 }
12193 else
12194 gcc_unreachable ();
12195
12196 if (const_vec != NULL
12197 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12198 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12199 return const_vec;
12200 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12201 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12202 pipeline cycle; creating the constant takes one or two ARM
12203 pipeline cycles. */
12204 return target;
12205 else if (const_vec != NULL_RTX)
12206 /* Load from constant pool. On Cortex-A8 this takes two cycles
12207 (for either double or quad vectors). We can not take advantage
12208 of single-cycle VLD1 because we need a PC-relative addressing
12209 mode. */
12210 return const_vec;
12211 else
12212 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12213 We can not construct an initializer. */
12214 return NULL_RTX;
12215 }
12216
12217 /* Initialize vector TARGET to VALS. */
12218
12219 void
12220 neon_expand_vector_init (rtx target, rtx vals)
12221 {
12222 machine_mode mode = GET_MODE (target);
12223 machine_mode inner_mode = GET_MODE_INNER (mode);
12224 int n_elts = GET_MODE_NUNITS (mode);
12225 int n_var = 0, one_var = -1;
12226 bool all_same = true;
12227 rtx x, mem;
12228 int i;
12229
12230 for (i = 0; i < n_elts; ++i)
12231 {
12232 x = XVECEXP (vals, 0, i);
12233 if (!CONSTANT_P (x))
12234 ++n_var, one_var = i;
12235
12236 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12237 all_same = false;
12238 }
12239
12240 if (n_var == 0)
12241 {
12242 rtx constant = neon_make_constant (vals);
12243 if (constant != NULL_RTX)
12244 {
12245 emit_move_insn (target, constant);
12246 return;
12247 }
12248 }
12249
12250 /* Splat a single non-constant element if we can. */
12251 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12252 {
12253 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12254 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12255 return;
12256 }
12257
12258 /* One field is non-constant. Load constant then overwrite varying
12259 field. This is more efficient than using the stack. */
12260 if (n_var == 1)
12261 {
12262 rtx copy = copy_rtx (vals);
12263 rtx index = GEN_INT (one_var);
12264
12265 /* Load constant part of vector, substitute neighboring value for
12266 varying element. */
12267 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12268 neon_expand_vector_init (target, copy);
12269
12270 /* Insert variable. */
12271 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12272 switch (mode)
12273 {
12274 case E_V8QImode:
12275 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12276 break;
12277 case E_V16QImode:
12278 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12279 break;
12280 case E_V4HImode:
12281 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12282 break;
12283 case E_V8HImode:
12284 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12285 break;
12286 case E_V2SImode:
12287 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12288 break;
12289 case E_V4SImode:
12290 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12291 break;
12292 case E_V2SFmode:
12293 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12294 break;
12295 case E_V4SFmode:
12296 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12297 break;
12298 case E_V2DImode:
12299 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12300 break;
12301 default:
12302 gcc_unreachable ();
12303 }
12304 return;
12305 }
12306
12307 /* Construct the vector in memory one field at a time
12308 and load the whole vector. */
12309 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12310 for (i = 0; i < n_elts; i++)
12311 emit_move_insn (adjust_address_nv (mem, inner_mode,
12312 i * GET_MODE_SIZE (inner_mode)),
12313 XVECEXP (vals, 0, i));
12314 emit_move_insn (target, mem);
12315 }
12316
12317 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12318 ERR if it doesn't. EXP indicates the source location, which includes the
12319 inlining history for intrinsics. */
12320
12321 static void
12322 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12323 const_tree exp, const char *desc)
12324 {
12325 HOST_WIDE_INT lane;
12326
12327 gcc_assert (CONST_INT_P (operand));
12328
12329 lane = INTVAL (operand);
12330
12331 if (lane < low || lane >= high)
12332 {
12333 if (exp)
12334 error ("%K%s %wd out of range %wd - %wd",
12335 exp, desc, lane, low, high - 1);
12336 else
12337 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12338 }
12339 }
12340
12341 /* Bounds-check lanes. */
12342
12343 void
12344 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12345 const_tree exp)
12346 {
12347 bounds_check (operand, low, high, exp, "lane");
12348 }
12349
12350 /* Bounds-check constants. */
12351
12352 void
12353 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12354 {
12355 bounds_check (operand, low, high, NULL_TREE, "constant");
12356 }
12357
12358 HOST_WIDE_INT
12359 neon_element_bits (machine_mode mode)
12360 {
12361 return GET_MODE_UNIT_BITSIZE (mode);
12362 }
12363
12364 \f
12365 /* Predicates for `match_operand' and `match_operator'. */
12366
12367 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12368 WB is true if full writeback address modes are allowed and is false
12369 if limited writeback address modes (POST_INC and PRE_DEC) are
12370 allowed. */
12371
12372 int
12373 arm_coproc_mem_operand (rtx op, bool wb)
12374 {
12375 rtx ind;
12376
12377 /* Reject eliminable registers. */
12378 if (! (reload_in_progress || reload_completed || lra_in_progress)
12379 && ( reg_mentioned_p (frame_pointer_rtx, op)
12380 || reg_mentioned_p (arg_pointer_rtx, op)
12381 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12382 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12383 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12384 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12385 return FALSE;
12386
12387 /* Constants are converted into offsets from labels. */
12388 if (!MEM_P (op))
12389 return FALSE;
12390
12391 ind = XEXP (op, 0);
12392
12393 if (reload_completed
12394 && (GET_CODE (ind) == LABEL_REF
12395 || (GET_CODE (ind) == CONST
12396 && GET_CODE (XEXP (ind, 0)) == PLUS
12397 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12398 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12399 return TRUE;
12400
12401 /* Match: (mem (reg)). */
12402 if (REG_P (ind))
12403 return arm_address_register_rtx_p (ind, 0);
12404
12405 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12406 acceptable in any case (subject to verification by
12407 arm_address_register_rtx_p). We need WB to be true to accept
12408 PRE_INC and POST_DEC. */
12409 if (GET_CODE (ind) == POST_INC
12410 || GET_CODE (ind) == PRE_DEC
12411 || (wb
12412 && (GET_CODE (ind) == PRE_INC
12413 || GET_CODE (ind) == POST_DEC)))
12414 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12415
12416 if (wb
12417 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12418 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12419 && GET_CODE (XEXP (ind, 1)) == PLUS
12420 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12421 ind = XEXP (ind, 1);
12422
12423 /* Match:
12424 (plus (reg)
12425 (const)). */
12426 if (GET_CODE (ind) == PLUS
12427 && REG_P (XEXP (ind, 0))
12428 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12429 && CONST_INT_P (XEXP (ind, 1))
12430 && INTVAL (XEXP (ind, 1)) > -1024
12431 && INTVAL (XEXP (ind, 1)) < 1024
12432 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12433 return TRUE;
12434
12435 return FALSE;
12436 }
12437
12438 /* Return TRUE if OP is a memory operand which we can load or store a vector
12439 to/from. TYPE is one of the following values:
12440 0 - Vector load/stor (vldr)
12441 1 - Core registers (ldm)
12442 2 - Element/structure loads (vld1)
12443 */
12444 int
12445 neon_vector_mem_operand (rtx op, int type, bool strict)
12446 {
12447 rtx ind;
12448
12449 /* Reject eliminable registers. */
12450 if (strict && ! (reload_in_progress || reload_completed)
12451 && (reg_mentioned_p (frame_pointer_rtx, op)
12452 || reg_mentioned_p (arg_pointer_rtx, op)
12453 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12454 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12455 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12456 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12457 return FALSE;
12458
12459 /* Constants are converted into offsets from labels. */
12460 if (!MEM_P (op))
12461 return FALSE;
12462
12463 ind = XEXP (op, 0);
12464
12465 if (reload_completed
12466 && (GET_CODE (ind) == LABEL_REF
12467 || (GET_CODE (ind) == CONST
12468 && GET_CODE (XEXP (ind, 0)) == PLUS
12469 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12470 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12471 return TRUE;
12472
12473 /* Match: (mem (reg)). */
12474 if (REG_P (ind))
12475 return arm_address_register_rtx_p (ind, 0);
12476
12477 /* Allow post-increment with Neon registers. */
12478 if ((type != 1 && GET_CODE (ind) == POST_INC)
12479 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12480 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12481
12482 /* Allow post-increment by register for VLDn */
12483 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12484 && GET_CODE (XEXP (ind, 1)) == PLUS
12485 && REG_P (XEXP (XEXP (ind, 1), 1)))
12486 return true;
12487
12488 /* Match:
12489 (plus (reg)
12490 (const)). */
12491 if (type == 0
12492 && GET_CODE (ind) == PLUS
12493 && REG_P (XEXP (ind, 0))
12494 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12495 && CONST_INT_P (XEXP (ind, 1))
12496 && INTVAL (XEXP (ind, 1)) > -1024
12497 /* For quad modes, we restrict the constant offset to be slightly less
12498 than what the instruction format permits. We have no such constraint
12499 on double mode offsets. (This must match arm_legitimate_index_p.) */
12500 && (INTVAL (XEXP (ind, 1))
12501 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12502 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12503 return TRUE;
12504
12505 return FALSE;
12506 }
12507
12508 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12509 type. */
12510 int
12511 neon_struct_mem_operand (rtx op)
12512 {
12513 rtx ind;
12514
12515 /* Reject eliminable registers. */
12516 if (! (reload_in_progress || reload_completed)
12517 && ( reg_mentioned_p (frame_pointer_rtx, op)
12518 || reg_mentioned_p (arg_pointer_rtx, op)
12519 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12520 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12521 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12522 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12523 return FALSE;
12524
12525 /* Constants are converted into offsets from labels. */
12526 if (!MEM_P (op))
12527 return FALSE;
12528
12529 ind = XEXP (op, 0);
12530
12531 if (reload_completed
12532 && (GET_CODE (ind) == LABEL_REF
12533 || (GET_CODE (ind) == CONST
12534 && GET_CODE (XEXP (ind, 0)) == PLUS
12535 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12536 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12537 return TRUE;
12538
12539 /* Match: (mem (reg)). */
12540 if (REG_P (ind))
12541 return arm_address_register_rtx_p (ind, 0);
12542
12543 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12544 if (GET_CODE (ind) == POST_INC
12545 || GET_CODE (ind) == PRE_DEC)
12546 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12547
12548 return FALSE;
12549 }
12550
12551 /* Return true if X is a register that will be eliminated later on. */
12552 int
12553 arm_eliminable_register (rtx x)
12554 {
12555 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12556 || REGNO (x) == ARG_POINTER_REGNUM
12557 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12558 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12559 }
12560
12561 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12562 coprocessor registers. Otherwise return NO_REGS. */
12563
12564 enum reg_class
12565 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12566 {
12567 if (mode == HFmode)
12568 {
12569 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12570 return GENERAL_REGS;
12571 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12572 return NO_REGS;
12573 return GENERAL_REGS;
12574 }
12575
12576 /* The neon move patterns handle all legitimate vector and struct
12577 addresses. */
12578 if (TARGET_NEON
12579 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12580 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12581 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12582 || VALID_NEON_STRUCT_MODE (mode)))
12583 return NO_REGS;
12584
12585 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12586 return NO_REGS;
12587
12588 return GENERAL_REGS;
12589 }
12590
12591 /* Values which must be returned in the most-significant end of the return
12592 register. */
12593
12594 static bool
12595 arm_return_in_msb (const_tree valtype)
12596 {
12597 return (TARGET_AAPCS_BASED
12598 && BYTES_BIG_ENDIAN
12599 && (AGGREGATE_TYPE_P (valtype)
12600 || TREE_CODE (valtype) == COMPLEX_TYPE
12601 || FIXED_POINT_TYPE_P (valtype)));
12602 }
12603
12604 /* Return TRUE if X references a SYMBOL_REF. */
12605 int
12606 symbol_mentioned_p (rtx x)
12607 {
12608 const char * fmt;
12609 int i;
12610
12611 if (GET_CODE (x) == SYMBOL_REF)
12612 return 1;
12613
12614 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12615 are constant offsets, not symbols. */
12616 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12617 return 0;
12618
12619 fmt = GET_RTX_FORMAT (GET_CODE (x));
12620
12621 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12622 {
12623 if (fmt[i] == 'E')
12624 {
12625 int j;
12626
12627 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12628 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12629 return 1;
12630 }
12631 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12632 return 1;
12633 }
12634
12635 return 0;
12636 }
12637
12638 /* Return TRUE if X references a LABEL_REF. */
12639 int
12640 label_mentioned_p (rtx x)
12641 {
12642 const char * fmt;
12643 int i;
12644
12645 if (GET_CODE (x) == LABEL_REF)
12646 return 1;
12647
12648 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12649 instruction, but they are constant offsets, not symbols. */
12650 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12651 return 0;
12652
12653 fmt = GET_RTX_FORMAT (GET_CODE (x));
12654 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12655 {
12656 if (fmt[i] == 'E')
12657 {
12658 int j;
12659
12660 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12661 if (label_mentioned_p (XVECEXP (x, i, j)))
12662 return 1;
12663 }
12664 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12665 return 1;
12666 }
12667
12668 return 0;
12669 }
12670
12671 int
12672 tls_mentioned_p (rtx x)
12673 {
12674 switch (GET_CODE (x))
12675 {
12676 case CONST:
12677 return tls_mentioned_p (XEXP (x, 0));
12678
12679 case UNSPEC:
12680 if (XINT (x, 1) == UNSPEC_TLS)
12681 return 1;
12682
12683 /* Fall through. */
12684 default:
12685 return 0;
12686 }
12687 }
12688
12689 /* Must not copy any rtx that uses a pc-relative address.
12690 Also, disallow copying of load-exclusive instructions that
12691 may appear after splitting of compare-and-swap-style operations
12692 so as to prevent those loops from being transformed away from their
12693 canonical forms (see PR 69904). */
12694
12695 static bool
12696 arm_cannot_copy_insn_p (rtx_insn *insn)
12697 {
12698 /* The tls call insn cannot be copied, as it is paired with a data
12699 word. */
12700 if (recog_memoized (insn) == CODE_FOR_tlscall)
12701 return true;
12702
12703 subrtx_iterator::array_type array;
12704 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12705 {
12706 const_rtx x = *iter;
12707 if (GET_CODE (x) == UNSPEC
12708 && (XINT (x, 1) == UNSPEC_PIC_BASE
12709 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12710 return true;
12711 }
12712
12713 rtx set = single_set (insn);
12714 if (set)
12715 {
12716 rtx src = SET_SRC (set);
12717 if (GET_CODE (src) == ZERO_EXTEND)
12718 src = XEXP (src, 0);
12719
12720 /* Catch the load-exclusive and load-acquire operations. */
12721 if (GET_CODE (src) == UNSPEC_VOLATILE
12722 && (XINT (src, 1) == VUNSPEC_LL
12723 || XINT (src, 1) == VUNSPEC_LAX))
12724 return true;
12725 }
12726 return false;
12727 }
12728
12729 enum rtx_code
12730 minmax_code (rtx x)
12731 {
12732 enum rtx_code code = GET_CODE (x);
12733
12734 switch (code)
12735 {
12736 case SMAX:
12737 return GE;
12738 case SMIN:
12739 return LE;
12740 case UMIN:
12741 return LEU;
12742 case UMAX:
12743 return GEU;
12744 default:
12745 gcc_unreachable ();
12746 }
12747 }
12748
12749 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12750
12751 bool
12752 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12753 int *mask, bool *signed_sat)
12754 {
12755 /* The high bound must be a power of two minus one. */
12756 int log = exact_log2 (INTVAL (hi_bound) + 1);
12757 if (log == -1)
12758 return false;
12759
12760 /* The low bound is either zero (for usat) or one less than the
12761 negation of the high bound (for ssat). */
12762 if (INTVAL (lo_bound) == 0)
12763 {
12764 if (mask)
12765 *mask = log;
12766 if (signed_sat)
12767 *signed_sat = false;
12768
12769 return true;
12770 }
12771
12772 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12773 {
12774 if (mask)
12775 *mask = log + 1;
12776 if (signed_sat)
12777 *signed_sat = true;
12778
12779 return true;
12780 }
12781
12782 return false;
12783 }
12784
12785 /* Return 1 if memory locations are adjacent. */
12786 int
12787 adjacent_mem_locations (rtx a, rtx b)
12788 {
12789 /* We don't guarantee to preserve the order of these memory refs. */
12790 if (volatile_refs_p (a) || volatile_refs_p (b))
12791 return 0;
12792
12793 if ((REG_P (XEXP (a, 0))
12794 || (GET_CODE (XEXP (a, 0)) == PLUS
12795 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12796 && (REG_P (XEXP (b, 0))
12797 || (GET_CODE (XEXP (b, 0)) == PLUS
12798 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12799 {
12800 HOST_WIDE_INT val0 = 0, val1 = 0;
12801 rtx reg0, reg1;
12802 int val_diff;
12803
12804 if (GET_CODE (XEXP (a, 0)) == PLUS)
12805 {
12806 reg0 = XEXP (XEXP (a, 0), 0);
12807 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12808 }
12809 else
12810 reg0 = XEXP (a, 0);
12811
12812 if (GET_CODE (XEXP (b, 0)) == PLUS)
12813 {
12814 reg1 = XEXP (XEXP (b, 0), 0);
12815 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12816 }
12817 else
12818 reg1 = XEXP (b, 0);
12819
12820 /* Don't accept any offset that will require multiple
12821 instructions to handle, since this would cause the
12822 arith_adjacentmem pattern to output an overlong sequence. */
12823 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12824 return 0;
12825
12826 /* Don't allow an eliminable register: register elimination can make
12827 the offset too large. */
12828 if (arm_eliminable_register (reg0))
12829 return 0;
12830
12831 val_diff = val1 - val0;
12832
12833 if (arm_ld_sched)
12834 {
12835 /* If the target has load delay slots, then there's no benefit
12836 to using an ldm instruction unless the offset is zero and
12837 we are optimizing for size. */
12838 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12839 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12840 && (val_diff == 4 || val_diff == -4));
12841 }
12842
12843 return ((REGNO (reg0) == REGNO (reg1))
12844 && (val_diff == 4 || val_diff == -4));
12845 }
12846
12847 return 0;
12848 }
12849
12850 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12851 for load operations, false for store operations. CONSECUTIVE is true
12852 if the register numbers in the operation must be consecutive in the register
12853 bank. RETURN_PC is true if value is to be loaded in PC.
12854 The pattern we are trying to match for load is:
12855 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12856 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12857 :
12858 :
12859 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12860 ]
12861 where
12862 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12863 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12864 3. If consecutive is TRUE, then for kth register being loaded,
12865 REGNO (R_dk) = REGNO (R_d0) + k.
12866 The pattern for store is similar. */
12867 bool
12868 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12869 bool consecutive, bool return_pc)
12870 {
12871 HOST_WIDE_INT count = XVECLEN (op, 0);
12872 rtx reg, mem, addr;
12873 unsigned regno;
12874 unsigned first_regno;
12875 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12876 rtx elt;
12877 bool addr_reg_in_reglist = false;
12878 bool update = false;
12879 int reg_increment;
12880 int offset_adj;
12881 int regs_per_val;
12882
12883 /* If not in SImode, then registers must be consecutive
12884 (e.g., VLDM instructions for DFmode). */
12885 gcc_assert ((mode == SImode) || consecutive);
12886 /* Setting return_pc for stores is illegal. */
12887 gcc_assert (!return_pc || load);
12888
12889 /* Set up the increments and the regs per val based on the mode. */
12890 reg_increment = GET_MODE_SIZE (mode);
12891 regs_per_val = reg_increment / 4;
12892 offset_adj = return_pc ? 1 : 0;
12893
12894 if (count <= 1
12895 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12896 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12897 return false;
12898
12899 /* Check if this is a write-back. */
12900 elt = XVECEXP (op, 0, offset_adj);
12901 if (GET_CODE (SET_SRC (elt)) == PLUS)
12902 {
12903 i++;
12904 base = 1;
12905 update = true;
12906
12907 /* The offset adjustment must be the number of registers being
12908 popped times the size of a single register. */
12909 if (!REG_P (SET_DEST (elt))
12910 || !REG_P (XEXP (SET_SRC (elt), 0))
12911 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12912 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12913 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12914 ((count - 1 - offset_adj) * reg_increment))
12915 return false;
12916 }
12917
12918 i = i + offset_adj;
12919 base = base + offset_adj;
12920 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12921 success depends on the type: VLDM can do just one reg,
12922 LDM must do at least two. */
12923 if ((count <= i) && (mode == SImode))
12924 return false;
12925
12926 elt = XVECEXP (op, 0, i - 1);
12927 if (GET_CODE (elt) != SET)
12928 return false;
12929
12930 if (load)
12931 {
12932 reg = SET_DEST (elt);
12933 mem = SET_SRC (elt);
12934 }
12935 else
12936 {
12937 reg = SET_SRC (elt);
12938 mem = SET_DEST (elt);
12939 }
12940
12941 if (!REG_P (reg) || !MEM_P (mem))
12942 return false;
12943
12944 regno = REGNO (reg);
12945 first_regno = regno;
12946 addr = XEXP (mem, 0);
12947 if (GET_CODE (addr) == PLUS)
12948 {
12949 if (!CONST_INT_P (XEXP (addr, 1)))
12950 return false;
12951
12952 offset = INTVAL (XEXP (addr, 1));
12953 addr = XEXP (addr, 0);
12954 }
12955
12956 if (!REG_P (addr))
12957 return false;
12958
12959 /* Don't allow SP to be loaded unless it is also the base register. It
12960 guarantees that SP is reset correctly when an LDM instruction
12961 is interrupted. Otherwise, we might end up with a corrupt stack. */
12962 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12963 return false;
12964
12965 for (; i < count; i++)
12966 {
12967 elt = XVECEXP (op, 0, i);
12968 if (GET_CODE (elt) != SET)
12969 return false;
12970
12971 if (load)
12972 {
12973 reg = SET_DEST (elt);
12974 mem = SET_SRC (elt);
12975 }
12976 else
12977 {
12978 reg = SET_SRC (elt);
12979 mem = SET_DEST (elt);
12980 }
12981
12982 if (!REG_P (reg)
12983 || GET_MODE (reg) != mode
12984 || REGNO (reg) <= regno
12985 || (consecutive
12986 && (REGNO (reg) !=
12987 (unsigned int) (first_regno + regs_per_val * (i - base))))
12988 /* Don't allow SP to be loaded unless it is also the base register. It
12989 guarantees that SP is reset correctly when an LDM instruction
12990 is interrupted. Otherwise, we might end up with a corrupt stack. */
12991 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12992 || !MEM_P (mem)
12993 || GET_MODE (mem) != mode
12994 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12995 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12996 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12997 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12998 offset + (i - base) * reg_increment))
12999 && (!REG_P (XEXP (mem, 0))
13000 || offset + (i - base) * reg_increment != 0)))
13001 return false;
13002
13003 regno = REGNO (reg);
13004 if (regno == REGNO (addr))
13005 addr_reg_in_reglist = true;
13006 }
13007
13008 if (load)
13009 {
13010 if (update && addr_reg_in_reglist)
13011 return false;
13012
13013 /* For Thumb-1, address register is always modified - either by write-back
13014 or by explicit load. If the pattern does not describe an update,
13015 then the address register must be in the list of loaded registers. */
13016 if (TARGET_THUMB1)
13017 return update || addr_reg_in_reglist;
13018 }
13019
13020 return true;
13021 }
13022
13023 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13024 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13025 instruction. ADD_OFFSET is nonzero if the base address register needs
13026 to be modified with an add instruction before we can use it. */
13027
13028 static bool
13029 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13030 int nops, HOST_WIDE_INT add_offset)
13031 {
13032 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13033 if the offset isn't small enough. The reason 2 ldrs are faster
13034 is because these ARMs are able to do more than one cache access
13035 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13036 whilst the ARM8 has a double bandwidth cache. This means that
13037 these cores can do both an instruction fetch and a data fetch in
13038 a single cycle, so the trick of calculating the address into a
13039 scratch register (one of the result regs) and then doing a load
13040 multiple actually becomes slower (and no smaller in code size).
13041 That is the transformation
13042
13043 ldr rd1, [rbase + offset]
13044 ldr rd2, [rbase + offset + 4]
13045
13046 to
13047
13048 add rd1, rbase, offset
13049 ldmia rd1, {rd1, rd2}
13050
13051 produces worse code -- '3 cycles + any stalls on rd2' instead of
13052 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13053 access per cycle, the first sequence could never complete in less
13054 than 6 cycles, whereas the ldm sequence would only take 5 and
13055 would make better use of sequential accesses if not hitting the
13056 cache.
13057
13058 We cheat here and test 'arm_ld_sched' which we currently know to
13059 only be true for the ARM8, ARM9 and StrongARM. If this ever
13060 changes, then the test below needs to be reworked. */
13061 if (nops == 2 && arm_ld_sched && add_offset != 0)
13062 return false;
13063
13064 /* XScale has load-store double instructions, but they have stricter
13065 alignment requirements than load-store multiple, so we cannot
13066 use them.
13067
13068 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13069 the pipeline until completion.
13070
13071 NREGS CYCLES
13072 1 3
13073 2 4
13074 3 5
13075 4 6
13076
13077 An ldr instruction takes 1-3 cycles, but does not block the
13078 pipeline.
13079
13080 NREGS CYCLES
13081 1 1-3
13082 2 2-6
13083 3 3-9
13084 4 4-12
13085
13086 Best case ldr will always win. However, the more ldr instructions
13087 we issue, the less likely we are to be able to schedule them well.
13088 Using ldr instructions also increases code size.
13089
13090 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13091 for counts of 3 or 4 regs. */
13092 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13093 return false;
13094 return true;
13095 }
13096
13097 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13098 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13099 an array ORDER which describes the sequence to use when accessing the
13100 offsets that produces an ascending order. In this sequence, each
13101 offset must be larger by exactly 4 than the previous one. ORDER[0]
13102 must have been filled in with the lowest offset by the caller.
13103 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13104 we use to verify that ORDER produces an ascending order of registers.
13105 Return true if it was possible to construct such an order, false if
13106 not. */
13107
13108 static bool
13109 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13110 int *unsorted_regs)
13111 {
13112 int i;
13113 for (i = 1; i < nops; i++)
13114 {
13115 int j;
13116
13117 order[i] = order[i - 1];
13118 for (j = 0; j < nops; j++)
13119 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13120 {
13121 /* We must find exactly one offset that is higher than the
13122 previous one by 4. */
13123 if (order[i] != order[i - 1])
13124 return false;
13125 order[i] = j;
13126 }
13127 if (order[i] == order[i - 1])
13128 return false;
13129 /* The register numbers must be ascending. */
13130 if (unsorted_regs != NULL
13131 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13132 return false;
13133 }
13134 return true;
13135 }
13136
13137 /* Used to determine in a peephole whether a sequence of load
13138 instructions can be changed into a load-multiple instruction.
13139 NOPS is the number of separate load instructions we are examining. The
13140 first NOPS entries in OPERANDS are the destination registers, the
13141 next NOPS entries are memory operands. If this function is
13142 successful, *BASE is set to the common base register of the memory
13143 accesses; *LOAD_OFFSET is set to the first memory location's offset
13144 from that base register.
13145 REGS is an array filled in with the destination register numbers.
13146 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13147 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13148 the sequence of registers in REGS matches the loads from ascending memory
13149 locations, and the function verifies that the register numbers are
13150 themselves ascending. If CHECK_REGS is false, the register numbers
13151 are stored in the order they are found in the operands. */
13152 static int
13153 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13154 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13155 {
13156 int unsorted_regs[MAX_LDM_STM_OPS];
13157 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13158 int order[MAX_LDM_STM_OPS];
13159 rtx base_reg_rtx = NULL;
13160 int base_reg = -1;
13161 int i, ldm_case;
13162
13163 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13164 easily extended if required. */
13165 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13166
13167 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13168
13169 /* Loop over the operands and check that the memory references are
13170 suitable (i.e. immediate offsets from the same base register). At
13171 the same time, extract the target register, and the memory
13172 offsets. */
13173 for (i = 0; i < nops; i++)
13174 {
13175 rtx reg;
13176 rtx offset;
13177
13178 /* Convert a subreg of a mem into the mem itself. */
13179 if (GET_CODE (operands[nops + i]) == SUBREG)
13180 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13181
13182 gcc_assert (MEM_P (operands[nops + i]));
13183
13184 /* Don't reorder volatile memory references; it doesn't seem worth
13185 looking for the case where the order is ok anyway. */
13186 if (MEM_VOLATILE_P (operands[nops + i]))
13187 return 0;
13188
13189 offset = const0_rtx;
13190
13191 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13192 || (GET_CODE (reg) == SUBREG
13193 && REG_P (reg = SUBREG_REG (reg))))
13194 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13195 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13196 || (GET_CODE (reg) == SUBREG
13197 && REG_P (reg = SUBREG_REG (reg))))
13198 && (CONST_INT_P (offset
13199 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13200 {
13201 if (i == 0)
13202 {
13203 base_reg = REGNO (reg);
13204 base_reg_rtx = reg;
13205 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13206 return 0;
13207 }
13208 else if (base_reg != (int) REGNO (reg))
13209 /* Not addressed from the same base register. */
13210 return 0;
13211
13212 unsorted_regs[i] = (REG_P (operands[i])
13213 ? REGNO (operands[i])
13214 : REGNO (SUBREG_REG (operands[i])));
13215
13216 /* If it isn't an integer register, or if it overwrites the
13217 base register but isn't the last insn in the list, then
13218 we can't do this. */
13219 if (unsorted_regs[i] < 0
13220 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13221 || unsorted_regs[i] > 14
13222 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13223 return 0;
13224
13225 /* Don't allow SP to be loaded unless it is also the base
13226 register. It guarantees that SP is reset correctly when
13227 an LDM instruction is interrupted. Otherwise, we might
13228 end up with a corrupt stack. */
13229 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13230 return 0;
13231
13232 unsorted_offsets[i] = INTVAL (offset);
13233 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13234 order[0] = i;
13235 }
13236 else
13237 /* Not a suitable memory address. */
13238 return 0;
13239 }
13240
13241 /* All the useful information has now been extracted from the
13242 operands into unsorted_regs and unsorted_offsets; additionally,
13243 order[0] has been set to the lowest offset in the list. Sort
13244 the offsets into order, verifying that they are adjacent, and
13245 check that the register numbers are ascending. */
13246 if (!compute_offset_order (nops, unsorted_offsets, order,
13247 check_regs ? unsorted_regs : NULL))
13248 return 0;
13249
13250 if (saved_order)
13251 memcpy (saved_order, order, sizeof order);
13252
13253 if (base)
13254 {
13255 *base = base_reg;
13256
13257 for (i = 0; i < nops; i++)
13258 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13259
13260 *load_offset = unsorted_offsets[order[0]];
13261 }
13262
13263 if (TARGET_THUMB1
13264 && !peep2_reg_dead_p (nops, base_reg_rtx))
13265 return 0;
13266
13267 if (unsorted_offsets[order[0]] == 0)
13268 ldm_case = 1; /* ldmia */
13269 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13270 ldm_case = 2; /* ldmib */
13271 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13272 ldm_case = 3; /* ldmda */
13273 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13274 ldm_case = 4; /* ldmdb */
13275 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13276 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13277 ldm_case = 5;
13278 else
13279 return 0;
13280
13281 if (!multiple_operation_profitable_p (false, nops,
13282 ldm_case == 5
13283 ? unsorted_offsets[order[0]] : 0))
13284 return 0;
13285
13286 return ldm_case;
13287 }
13288
13289 /* Used to determine in a peephole whether a sequence of store instructions can
13290 be changed into a store-multiple instruction.
13291 NOPS is the number of separate store instructions we are examining.
13292 NOPS_TOTAL is the total number of instructions recognized by the peephole
13293 pattern.
13294 The first NOPS entries in OPERANDS are the source registers, the next
13295 NOPS entries are memory operands. If this function is successful, *BASE is
13296 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13297 to the first memory location's offset from that base register. REGS is an
13298 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13299 likewise filled with the corresponding rtx's.
13300 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13301 numbers to an ascending order of stores.
13302 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13303 from ascending memory locations, and the function verifies that the register
13304 numbers are themselves ascending. If CHECK_REGS is false, the register
13305 numbers are stored in the order they are found in the operands. */
13306 static int
13307 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13308 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13309 HOST_WIDE_INT *load_offset, bool check_regs)
13310 {
13311 int unsorted_regs[MAX_LDM_STM_OPS];
13312 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13313 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13314 int order[MAX_LDM_STM_OPS];
13315 int base_reg = -1;
13316 rtx base_reg_rtx = NULL;
13317 int i, stm_case;
13318
13319 /* Write back of base register is currently only supported for Thumb 1. */
13320 int base_writeback = TARGET_THUMB1;
13321
13322 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13323 easily extended if required. */
13324 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13325
13326 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13327
13328 /* Loop over the operands and check that the memory references are
13329 suitable (i.e. immediate offsets from the same base register). At
13330 the same time, extract the target register, and the memory
13331 offsets. */
13332 for (i = 0; i < nops; i++)
13333 {
13334 rtx reg;
13335 rtx offset;
13336
13337 /* Convert a subreg of a mem into the mem itself. */
13338 if (GET_CODE (operands[nops + i]) == SUBREG)
13339 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13340
13341 gcc_assert (MEM_P (operands[nops + i]));
13342
13343 /* Don't reorder volatile memory references; it doesn't seem worth
13344 looking for the case where the order is ok anyway. */
13345 if (MEM_VOLATILE_P (operands[nops + i]))
13346 return 0;
13347
13348 offset = const0_rtx;
13349
13350 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13351 || (GET_CODE (reg) == SUBREG
13352 && REG_P (reg = SUBREG_REG (reg))))
13353 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13354 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13355 || (GET_CODE (reg) == SUBREG
13356 && REG_P (reg = SUBREG_REG (reg))))
13357 && (CONST_INT_P (offset
13358 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13359 {
13360 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13361 ? operands[i] : SUBREG_REG (operands[i]));
13362 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13363
13364 if (i == 0)
13365 {
13366 base_reg = REGNO (reg);
13367 base_reg_rtx = reg;
13368 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13369 return 0;
13370 }
13371 else if (base_reg != (int) REGNO (reg))
13372 /* Not addressed from the same base register. */
13373 return 0;
13374
13375 /* If it isn't an integer register, then we can't do this. */
13376 if (unsorted_regs[i] < 0
13377 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13378 /* The effects are unpredictable if the base register is
13379 both updated and stored. */
13380 || (base_writeback && unsorted_regs[i] == base_reg)
13381 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13382 || unsorted_regs[i] > 14)
13383 return 0;
13384
13385 unsorted_offsets[i] = INTVAL (offset);
13386 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13387 order[0] = i;
13388 }
13389 else
13390 /* Not a suitable memory address. */
13391 return 0;
13392 }
13393
13394 /* All the useful information has now been extracted from the
13395 operands into unsorted_regs and unsorted_offsets; additionally,
13396 order[0] has been set to the lowest offset in the list. Sort
13397 the offsets into order, verifying that they are adjacent, and
13398 check that the register numbers are ascending. */
13399 if (!compute_offset_order (nops, unsorted_offsets, order,
13400 check_regs ? unsorted_regs : NULL))
13401 return 0;
13402
13403 if (saved_order)
13404 memcpy (saved_order, order, sizeof order);
13405
13406 if (base)
13407 {
13408 *base = base_reg;
13409
13410 for (i = 0; i < nops; i++)
13411 {
13412 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13413 if (reg_rtxs)
13414 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13415 }
13416
13417 *load_offset = unsorted_offsets[order[0]];
13418 }
13419
13420 if (TARGET_THUMB1
13421 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13422 return 0;
13423
13424 if (unsorted_offsets[order[0]] == 0)
13425 stm_case = 1; /* stmia */
13426 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13427 stm_case = 2; /* stmib */
13428 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13429 stm_case = 3; /* stmda */
13430 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13431 stm_case = 4; /* stmdb */
13432 else
13433 return 0;
13434
13435 if (!multiple_operation_profitable_p (false, nops, 0))
13436 return 0;
13437
13438 return stm_case;
13439 }
13440 \f
13441 /* Routines for use in generating RTL. */
13442
13443 /* Generate a load-multiple instruction. COUNT is the number of loads in
13444 the instruction; REGS and MEMS are arrays containing the operands.
13445 BASEREG is the base register to be used in addressing the memory operands.
13446 WBACK_OFFSET is nonzero if the instruction should update the base
13447 register. */
13448
13449 static rtx
13450 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13451 HOST_WIDE_INT wback_offset)
13452 {
13453 int i = 0, j;
13454 rtx result;
13455
13456 if (!multiple_operation_profitable_p (false, count, 0))
13457 {
13458 rtx seq;
13459
13460 start_sequence ();
13461
13462 for (i = 0; i < count; i++)
13463 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13464
13465 if (wback_offset != 0)
13466 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13467
13468 seq = get_insns ();
13469 end_sequence ();
13470
13471 return seq;
13472 }
13473
13474 result = gen_rtx_PARALLEL (VOIDmode,
13475 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13476 if (wback_offset != 0)
13477 {
13478 XVECEXP (result, 0, 0)
13479 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13480 i = 1;
13481 count++;
13482 }
13483
13484 for (j = 0; i < count; i++, j++)
13485 XVECEXP (result, 0, i)
13486 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13487
13488 return result;
13489 }
13490
13491 /* Generate a store-multiple instruction. COUNT is the number of stores in
13492 the instruction; REGS and MEMS are arrays containing the operands.
13493 BASEREG is the base register to be used in addressing the memory operands.
13494 WBACK_OFFSET is nonzero if the instruction should update the base
13495 register. */
13496
13497 static rtx
13498 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13499 HOST_WIDE_INT wback_offset)
13500 {
13501 int i = 0, j;
13502 rtx result;
13503
13504 if (GET_CODE (basereg) == PLUS)
13505 basereg = XEXP (basereg, 0);
13506
13507 if (!multiple_operation_profitable_p (false, count, 0))
13508 {
13509 rtx seq;
13510
13511 start_sequence ();
13512
13513 for (i = 0; i < count; i++)
13514 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13515
13516 if (wback_offset != 0)
13517 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13518
13519 seq = get_insns ();
13520 end_sequence ();
13521
13522 return seq;
13523 }
13524
13525 result = gen_rtx_PARALLEL (VOIDmode,
13526 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13527 if (wback_offset != 0)
13528 {
13529 XVECEXP (result, 0, 0)
13530 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13531 i = 1;
13532 count++;
13533 }
13534
13535 for (j = 0; i < count; i++, j++)
13536 XVECEXP (result, 0, i)
13537 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13538
13539 return result;
13540 }
13541
13542 /* Generate either a load-multiple or a store-multiple instruction. This
13543 function can be used in situations where we can start with a single MEM
13544 rtx and adjust its address upwards.
13545 COUNT is the number of operations in the instruction, not counting a
13546 possible update of the base register. REGS is an array containing the
13547 register operands.
13548 BASEREG is the base register to be used in addressing the memory operands,
13549 which are constructed from BASEMEM.
13550 WRITE_BACK specifies whether the generated instruction should include an
13551 update of the base register.
13552 OFFSETP is used to pass an offset to and from this function; this offset
13553 is not used when constructing the address (instead BASEMEM should have an
13554 appropriate offset in its address), it is used only for setting
13555 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13556
13557 static rtx
13558 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13559 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13560 {
13561 rtx mems[MAX_LDM_STM_OPS];
13562 HOST_WIDE_INT offset = *offsetp;
13563 int i;
13564
13565 gcc_assert (count <= MAX_LDM_STM_OPS);
13566
13567 if (GET_CODE (basereg) == PLUS)
13568 basereg = XEXP (basereg, 0);
13569
13570 for (i = 0; i < count; i++)
13571 {
13572 rtx addr = plus_constant (Pmode, basereg, i * 4);
13573 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13574 offset += 4;
13575 }
13576
13577 if (write_back)
13578 *offsetp = offset;
13579
13580 if (is_load)
13581 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13582 write_back ? 4 * count : 0);
13583 else
13584 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13585 write_back ? 4 * count : 0);
13586 }
13587
13588 rtx
13589 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13590 rtx basemem, HOST_WIDE_INT *offsetp)
13591 {
13592 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13593 offsetp);
13594 }
13595
13596 rtx
13597 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13598 rtx basemem, HOST_WIDE_INT *offsetp)
13599 {
13600 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13601 offsetp);
13602 }
13603
13604 /* Called from a peephole2 expander to turn a sequence of loads into an
13605 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13606 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13607 is true if we can reorder the registers because they are used commutatively
13608 subsequently.
13609 Returns true iff we could generate a new instruction. */
13610
13611 bool
13612 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13613 {
13614 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13615 rtx mems[MAX_LDM_STM_OPS];
13616 int i, j, base_reg;
13617 rtx base_reg_rtx;
13618 HOST_WIDE_INT offset;
13619 int write_back = FALSE;
13620 int ldm_case;
13621 rtx addr;
13622
13623 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13624 &base_reg, &offset, !sort_regs);
13625
13626 if (ldm_case == 0)
13627 return false;
13628
13629 if (sort_regs)
13630 for (i = 0; i < nops - 1; i++)
13631 for (j = i + 1; j < nops; j++)
13632 if (regs[i] > regs[j])
13633 {
13634 int t = regs[i];
13635 regs[i] = regs[j];
13636 regs[j] = t;
13637 }
13638 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13639
13640 if (TARGET_THUMB1)
13641 {
13642 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13643 gcc_assert (ldm_case == 1 || ldm_case == 5);
13644 write_back = TRUE;
13645 }
13646
13647 if (ldm_case == 5)
13648 {
13649 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13650 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13651 offset = 0;
13652 if (!TARGET_THUMB1)
13653 base_reg_rtx = newbase;
13654 }
13655
13656 for (i = 0; i < nops; i++)
13657 {
13658 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13659 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13660 SImode, addr, 0);
13661 }
13662 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13663 write_back ? offset + i * 4 : 0));
13664 return true;
13665 }
13666
13667 /* Called from a peephole2 expander to turn a sequence of stores into an
13668 STM instruction. OPERANDS are the operands found by the peephole matcher;
13669 NOPS indicates how many separate stores we are trying to combine.
13670 Returns true iff we could generate a new instruction. */
13671
13672 bool
13673 gen_stm_seq (rtx *operands, int nops)
13674 {
13675 int i;
13676 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13677 rtx mems[MAX_LDM_STM_OPS];
13678 int base_reg;
13679 rtx base_reg_rtx;
13680 HOST_WIDE_INT offset;
13681 int write_back = FALSE;
13682 int stm_case;
13683 rtx addr;
13684 bool base_reg_dies;
13685
13686 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13687 mem_order, &base_reg, &offset, true);
13688
13689 if (stm_case == 0)
13690 return false;
13691
13692 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13693
13694 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13695 if (TARGET_THUMB1)
13696 {
13697 gcc_assert (base_reg_dies);
13698 write_back = TRUE;
13699 }
13700
13701 if (stm_case == 5)
13702 {
13703 gcc_assert (base_reg_dies);
13704 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13705 offset = 0;
13706 }
13707
13708 addr = plus_constant (Pmode, base_reg_rtx, offset);
13709
13710 for (i = 0; i < nops; i++)
13711 {
13712 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13713 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13714 SImode, addr, 0);
13715 }
13716 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13717 write_back ? offset + i * 4 : 0));
13718 return true;
13719 }
13720
13721 /* Called from a peephole2 expander to turn a sequence of stores that are
13722 preceded by constant loads into an STM instruction. OPERANDS are the
13723 operands found by the peephole matcher; NOPS indicates how many
13724 separate stores we are trying to combine; there are 2 * NOPS
13725 instructions in the peephole.
13726 Returns true iff we could generate a new instruction. */
13727
13728 bool
13729 gen_const_stm_seq (rtx *operands, int nops)
13730 {
13731 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13732 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13733 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13734 rtx mems[MAX_LDM_STM_OPS];
13735 int base_reg;
13736 rtx base_reg_rtx;
13737 HOST_WIDE_INT offset;
13738 int write_back = FALSE;
13739 int stm_case;
13740 rtx addr;
13741 bool base_reg_dies;
13742 int i, j;
13743 HARD_REG_SET allocated;
13744
13745 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13746 mem_order, &base_reg, &offset, false);
13747
13748 if (stm_case == 0)
13749 return false;
13750
13751 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13752
13753 /* If the same register is used more than once, try to find a free
13754 register. */
13755 CLEAR_HARD_REG_SET (allocated);
13756 for (i = 0; i < nops; i++)
13757 {
13758 for (j = i + 1; j < nops; j++)
13759 if (regs[i] == regs[j])
13760 {
13761 rtx t = peep2_find_free_register (0, nops * 2,
13762 TARGET_THUMB1 ? "l" : "r",
13763 SImode, &allocated);
13764 if (t == NULL_RTX)
13765 return false;
13766 reg_rtxs[i] = t;
13767 regs[i] = REGNO (t);
13768 }
13769 }
13770
13771 /* Compute an ordering that maps the register numbers to an ascending
13772 sequence. */
13773 reg_order[0] = 0;
13774 for (i = 0; i < nops; i++)
13775 if (regs[i] < regs[reg_order[0]])
13776 reg_order[0] = i;
13777
13778 for (i = 1; i < nops; i++)
13779 {
13780 int this_order = reg_order[i - 1];
13781 for (j = 0; j < nops; j++)
13782 if (regs[j] > regs[reg_order[i - 1]]
13783 && (this_order == reg_order[i - 1]
13784 || regs[j] < regs[this_order]))
13785 this_order = j;
13786 reg_order[i] = this_order;
13787 }
13788
13789 /* Ensure that registers that must be live after the instruction end
13790 up with the correct value. */
13791 for (i = 0; i < nops; i++)
13792 {
13793 int this_order = reg_order[i];
13794 if ((this_order != mem_order[i]
13795 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13796 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13797 return false;
13798 }
13799
13800 /* Load the constants. */
13801 for (i = 0; i < nops; i++)
13802 {
13803 rtx op = operands[2 * nops + mem_order[i]];
13804 sorted_regs[i] = regs[reg_order[i]];
13805 emit_move_insn (reg_rtxs[reg_order[i]], op);
13806 }
13807
13808 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13809
13810 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13811 if (TARGET_THUMB1)
13812 {
13813 gcc_assert (base_reg_dies);
13814 write_back = TRUE;
13815 }
13816
13817 if (stm_case == 5)
13818 {
13819 gcc_assert (base_reg_dies);
13820 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13821 offset = 0;
13822 }
13823
13824 addr = plus_constant (Pmode, base_reg_rtx, offset);
13825
13826 for (i = 0; i < nops; i++)
13827 {
13828 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13829 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13830 SImode, addr, 0);
13831 }
13832 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13833 write_back ? offset + i * 4 : 0));
13834 return true;
13835 }
13836
13837 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13838 unaligned copies on processors which support unaligned semantics for those
13839 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13840 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13841 An interleave factor of 1 (the minimum) will perform no interleaving.
13842 Load/store multiple are used for aligned addresses where possible. */
13843
13844 static void
13845 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13846 HOST_WIDE_INT length,
13847 unsigned int interleave_factor)
13848 {
13849 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13850 int *regnos = XALLOCAVEC (int, interleave_factor);
13851 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13852 HOST_WIDE_INT i, j;
13853 HOST_WIDE_INT remaining = length, words;
13854 rtx halfword_tmp = NULL, byte_tmp = NULL;
13855 rtx dst, src;
13856 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13857 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13858 HOST_WIDE_INT srcoffset, dstoffset;
13859 HOST_WIDE_INT src_autoinc, dst_autoinc;
13860 rtx mem, addr;
13861
13862 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13863
13864 /* Use hard registers if we have aligned source or destination so we can use
13865 load/store multiple with contiguous registers. */
13866 if (dst_aligned || src_aligned)
13867 for (i = 0; i < interleave_factor; i++)
13868 regs[i] = gen_rtx_REG (SImode, i);
13869 else
13870 for (i = 0; i < interleave_factor; i++)
13871 regs[i] = gen_reg_rtx (SImode);
13872
13873 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13874 src = copy_addr_to_reg (XEXP (srcbase, 0));
13875
13876 srcoffset = dstoffset = 0;
13877
13878 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13879 For copying the last bytes we want to subtract this offset again. */
13880 src_autoinc = dst_autoinc = 0;
13881
13882 for (i = 0; i < interleave_factor; i++)
13883 regnos[i] = i;
13884
13885 /* Copy BLOCK_SIZE_BYTES chunks. */
13886
13887 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13888 {
13889 /* Load words. */
13890 if (src_aligned && interleave_factor > 1)
13891 {
13892 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13893 TRUE, srcbase, &srcoffset));
13894 src_autoinc += UNITS_PER_WORD * interleave_factor;
13895 }
13896 else
13897 {
13898 for (j = 0; j < interleave_factor; j++)
13899 {
13900 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13901 - src_autoinc));
13902 mem = adjust_automodify_address (srcbase, SImode, addr,
13903 srcoffset + j * UNITS_PER_WORD);
13904 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13905 }
13906 srcoffset += block_size_bytes;
13907 }
13908
13909 /* Store words. */
13910 if (dst_aligned && interleave_factor > 1)
13911 {
13912 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13913 TRUE, dstbase, &dstoffset));
13914 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13915 }
13916 else
13917 {
13918 for (j = 0; j < interleave_factor; j++)
13919 {
13920 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13921 - dst_autoinc));
13922 mem = adjust_automodify_address (dstbase, SImode, addr,
13923 dstoffset + j * UNITS_PER_WORD);
13924 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13925 }
13926 dstoffset += block_size_bytes;
13927 }
13928
13929 remaining -= block_size_bytes;
13930 }
13931
13932 /* Copy any whole words left (note these aren't interleaved with any
13933 subsequent halfword/byte load/stores in the interests of simplicity). */
13934
13935 words = remaining / UNITS_PER_WORD;
13936
13937 gcc_assert (words < interleave_factor);
13938
13939 if (src_aligned && words > 1)
13940 {
13941 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13942 &srcoffset));
13943 src_autoinc += UNITS_PER_WORD * words;
13944 }
13945 else
13946 {
13947 for (j = 0; j < words; j++)
13948 {
13949 addr = plus_constant (Pmode, src,
13950 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13951 mem = adjust_automodify_address (srcbase, SImode, addr,
13952 srcoffset + j * UNITS_PER_WORD);
13953 if (src_aligned)
13954 emit_move_insn (regs[j], mem);
13955 else
13956 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13957 }
13958 srcoffset += words * UNITS_PER_WORD;
13959 }
13960
13961 if (dst_aligned && words > 1)
13962 {
13963 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13964 &dstoffset));
13965 dst_autoinc += words * UNITS_PER_WORD;
13966 }
13967 else
13968 {
13969 for (j = 0; j < words; j++)
13970 {
13971 addr = plus_constant (Pmode, dst,
13972 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13973 mem = adjust_automodify_address (dstbase, SImode, addr,
13974 dstoffset + j * UNITS_PER_WORD);
13975 if (dst_aligned)
13976 emit_move_insn (mem, regs[j]);
13977 else
13978 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13979 }
13980 dstoffset += words * UNITS_PER_WORD;
13981 }
13982
13983 remaining -= words * UNITS_PER_WORD;
13984
13985 gcc_assert (remaining < 4);
13986
13987 /* Copy a halfword if necessary. */
13988
13989 if (remaining >= 2)
13990 {
13991 halfword_tmp = gen_reg_rtx (SImode);
13992
13993 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13994 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13995 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13996
13997 /* Either write out immediately, or delay until we've loaded the last
13998 byte, depending on interleave factor. */
13999 if (interleave_factor == 1)
14000 {
14001 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14002 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14003 emit_insn (gen_unaligned_storehi (mem,
14004 gen_lowpart (HImode, halfword_tmp)));
14005 halfword_tmp = NULL;
14006 dstoffset += 2;
14007 }
14008
14009 remaining -= 2;
14010 srcoffset += 2;
14011 }
14012
14013 gcc_assert (remaining < 2);
14014
14015 /* Copy last byte. */
14016
14017 if ((remaining & 1) != 0)
14018 {
14019 byte_tmp = gen_reg_rtx (SImode);
14020
14021 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14022 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14023 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14024
14025 if (interleave_factor == 1)
14026 {
14027 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14028 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14029 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14030 byte_tmp = NULL;
14031 dstoffset++;
14032 }
14033
14034 remaining--;
14035 srcoffset++;
14036 }
14037
14038 /* Store last halfword if we haven't done so already. */
14039
14040 if (halfword_tmp)
14041 {
14042 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14043 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14044 emit_insn (gen_unaligned_storehi (mem,
14045 gen_lowpart (HImode, halfword_tmp)));
14046 dstoffset += 2;
14047 }
14048
14049 /* Likewise for last byte. */
14050
14051 if (byte_tmp)
14052 {
14053 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14054 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14055 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14056 dstoffset++;
14057 }
14058
14059 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14060 }
14061
14062 /* From mips_adjust_block_mem:
14063
14064 Helper function for doing a loop-based block operation on memory
14065 reference MEM. Each iteration of the loop will operate on LENGTH
14066 bytes of MEM.
14067
14068 Create a new base register for use within the loop and point it to
14069 the start of MEM. Create a new memory reference that uses this
14070 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14071
14072 static void
14073 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14074 rtx *loop_mem)
14075 {
14076 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14077
14078 /* Although the new mem does not refer to a known location,
14079 it does keep up to LENGTH bytes of alignment. */
14080 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14081 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14082 }
14083
14084 /* From mips_block_move_loop:
14085
14086 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14087 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14088 the memory regions do not overlap. */
14089
14090 static void
14091 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14092 unsigned int interleave_factor,
14093 HOST_WIDE_INT bytes_per_iter)
14094 {
14095 rtx src_reg, dest_reg, final_src, test;
14096 HOST_WIDE_INT leftover;
14097
14098 leftover = length % bytes_per_iter;
14099 length -= leftover;
14100
14101 /* Create registers and memory references for use within the loop. */
14102 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14103 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14104
14105 /* Calculate the value that SRC_REG should have after the last iteration of
14106 the loop. */
14107 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14108 0, 0, OPTAB_WIDEN);
14109
14110 /* Emit the start of the loop. */
14111 rtx_code_label *label = gen_label_rtx ();
14112 emit_label (label);
14113
14114 /* Emit the loop body. */
14115 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14116 interleave_factor);
14117
14118 /* Move on to the next block. */
14119 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14120 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14121
14122 /* Emit the loop condition. */
14123 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14124 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14125
14126 /* Mop up any left-over bytes. */
14127 if (leftover)
14128 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14129 }
14130
14131 /* Emit a block move when either the source or destination is unaligned (not
14132 aligned to a four-byte boundary). This may need further tuning depending on
14133 core type, optimize_size setting, etc. */
14134
14135 static int
14136 arm_movmemqi_unaligned (rtx *operands)
14137 {
14138 HOST_WIDE_INT length = INTVAL (operands[2]);
14139
14140 if (optimize_size)
14141 {
14142 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14143 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14144 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14145 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14146 or dst_aligned though: allow more interleaving in those cases since the
14147 resulting code can be smaller. */
14148 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14149 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14150
14151 if (length > 12)
14152 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14153 interleave_factor, bytes_per_iter);
14154 else
14155 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14156 interleave_factor);
14157 }
14158 else
14159 {
14160 /* Note that the loop created by arm_block_move_unaligned_loop may be
14161 subject to loop unrolling, which makes tuning this condition a little
14162 redundant. */
14163 if (length > 32)
14164 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14165 else
14166 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14167 }
14168
14169 return 1;
14170 }
14171
14172 int
14173 arm_gen_movmemqi (rtx *operands)
14174 {
14175 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14176 HOST_WIDE_INT srcoffset, dstoffset;
14177 rtx src, dst, srcbase, dstbase;
14178 rtx part_bytes_reg = NULL;
14179 rtx mem;
14180
14181 if (!CONST_INT_P (operands[2])
14182 || !CONST_INT_P (operands[3])
14183 || INTVAL (operands[2]) > 64)
14184 return 0;
14185
14186 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14187 return arm_movmemqi_unaligned (operands);
14188
14189 if (INTVAL (operands[3]) & 3)
14190 return 0;
14191
14192 dstbase = operands[0];
14193 srcbase = operands[1];
14194
14195 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14196 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14197
14198 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14199 out_words_to_go = INTVAL (operands[2]) / 4;
14200 last_bytes = INTVAL (operands[2]) & 3;
14201 dstoffset = srcoffset = 0;
14202
14203 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14204 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14205
14206 while (in_words_to_go >= 2)
14207 {
14208 if (in_words_to_go > 4)
14209 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14210 TRUE, srcbase, &srcoffset));
14211 else
14212 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14213 src, FALSE, srcbase,
14214 &srcoffset));
14215
14216 if (out_words_to_go)
14217 {
14218 if (out_words_to_go > 4)
14219 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14220 TRUE, dstbase, &dstoffset));
14221 else if (out_words_to_go != 1)
14222 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14223 out_words_to_go, dst,
14224 (last_bytes == 0
14225 ? FALSE : TRUE),
14226 dstbase, &dstoffset));
14227 else
14228 {
14229 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14230 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14231 if (last_bytes != 0)
14232 {
14233 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14234 dstoffset += 4;
14235 }
14236 }
14237 }
14238
14239 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14240 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14241 }
14242
14243 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14244 if (out_words_to_go)
14245 {
14246 rtx sreg;
14247
14248 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14249 sreg = copy_to_reg (mem);
14250
14251 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14252 emit_move_insn (mem, sreg);
14253 in_words_to_go--;
14254
14255 gcc_assert (!in_words_to_go); /* Sanity check */
14256 }
14257
14258 if (in_words_to_go)
14259 {
14260 gcc_assert (in_words_to_go > 0);
14261
14262 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14263 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14264 }
14265
14266 gcc_assert (!last_bytes || part_bytes_reg);
14267
14268 if (BYTES_BIG_ENDIAN && last_bytes)
14269 {
14270 rtx tmp = gen_reg_rtx (SImode);
14271
14272 /* The bytes we want are in the top end of the word. */
14273 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14274 GEN_INT (8 * (4 - last_bytes))));
14275 part_bytes_reg = tmp;
14276
14277 while (last_bytes)
14278 {
14279 mem = adjust_automodify_address (dstbase, QImode,
14280 plus_constant (Pmode, dst,
14281 last_bytes - 1),
14282 dstoffset + last_bytes - 1);
14283 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14284
14285 if (--last_bytes)
14286 {
14287 tmp = gen_reg_rtx (SImode);
14288 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14289 part_bytes_reg = tmp;
14290 }
14291 }
14292
14293 }
14294 else
14295 {
14296 if (last_bytes > 1)
14297 {
14298 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14299 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14300 last_bytes -= 2;
14301 if (last_bytes)
14302 {
14303 rtx tmp = gen_reg_rtx (SImode);
14304 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14305 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14306 part_bytes_reg = tmp;
14307 dstoffset += 2;
14308 }
14309 }
14310
14311 if (last_bytes)
14312 {
14313 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14314 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14315 }
14316 }
14317
14318 return 1;
14319 }
14320
14321 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14322 by mode size. */
14323 inline static rtx
14324 next_consecutive_mem (rtx mem)
14325 {
14326 machine_mode mode = GET_MODE (mem);
14327 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14328 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14329
14330 return adjust_automodify_address (mem, mode, addr, offset);
14331 }
14332
14333 /* Copy using LDRD/STRD instructions whenever possible.
14334 Returns true upon success. */
14335 bool
14336 gen_movmem_ldrd_strd (rtx *operands)
14337 {
14338 unsigned HOST_WIDE_INT len;
14339 HOST_WIDE_INT align;
14340 rtx src, dst, base;
14341 rtx reg0;
14342 bool src_aligned, dst_aligned;
14343 bool src_volatile, dst_volatile;
14344
14345 gcc_assert (CONST_INT_P (operands[2]));
14346 gcc_assert (CONST_INT_P (operands[3]));
14347
14348 len = UINTVAL (operands[2]);
14349 if (len > 64)
14350 return false;
14351
14352 /* Maximum alignment we can assume for both src and dst buffers. */
14353 align = INTVAL (operands[3]);
14354
14355 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14356 return false;
14357
14358 /* Place src and dst addresses in registers
14359 and update the corresponding mem rtx. */
14360 dst = operands[0];
14361 dst_volatile = MEM_VOLATILE_P (dst);
14362 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14363 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14364 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14365
14366 src = operands[1];
14367 src_volatile = MEM_VOLATILE_P (src);
14368 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14369 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14370 src = adjust_automodify_address (src, VOIDmode, base, 0);
14371
14372 if (!unaligned_access && !(src_aligned && dst_aligned))
14373 return false;
14374
14375 if (src_volatile || dst_volatile)
14376 return false;
14377
14378 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14379 if (!(dst_aligned || src_aligned))
14380 return arm_gen_movmemqi (operands);
14381
14382 /* If the either src or dst is unaligned we'll be accessing it as pairs
14383 of unaligned SImode accesses. Otherwise we can generate DImode
14384 ldrd/strd instructions. */
14385 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14386 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14387
14388 while (len >= 8)
14389 {
14390 len -= 8;
14391 reg0 = gen_reg_rtx (DImode);
14392 rtx low_reg = NULL_RTX;
14393 rtx hi_reg = NULL_RTX;
14394
14395 if (!src_aligned || !dst_aligned)
14396 {
14397 low_reg = gen_lowpart (SImode, reg0);
14398 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14399 }
14400 if (src_aligned)
14401 emit_move_insn (reg0, src);
14402 else
14403 {
14404 emit_insn (gen_unaligned_loadsi (low_reg, src));
14405 src = next_consecutive_mem (src);
14406 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14407 }
14408
14409 if (dst_aligned)
14410 emit_move_insn (dst, reg0);
14411 else
14412 {
14413 emit_insn (gen_unaligned_storesi (dst, low_reg));
14414 dst = next_consecutive_mem (dst);
14415 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14416 }
14417
14418 src = next_consecutive_mem (src);
14419 dst = next_consecutive_mem (dst);
14420 }
14421
14422 gcc_assert (len < 8);
14423 if (len >= 4)
14424 {
14425 /* More than a word but less than a double-word to copy. Copy a word. */
14426 reg0 = gen_reg_rtx (SImode);
14427 src = adjust_address (src, SImode, 0);
14428 dst = adjust_address (dst, SImode, 0);
14429 if (src_aligned)
14430 emit_move_insn (reg0, src);
14431 else
14432 emit_insn (gen_unaligned_loadsi (reg0, src));
14433
14434 if (dst_aligned)
14435 emit_move_insn (dst, reg0);
14436 else
14437 emit_insn (gen_unaligned_storesi (dst, reg0));
14438
14439 src = next_consecutive_mem (src);
14440 dst = next_consecutive_mem (dst);
14441 len -= 4;
14442 }
14443
14444 if (len == 0)
14445 return true;
14446
14447 /* Copy the remaining bytes. */
14448 if (len >= 2)
14449 {
14450 dst = adjust_address (dst, HImode, 0);
14451 src = adjust_address (src, HImode, 0);
14452 reg0 = gen_reg_rtx (SImode);
14453 if (src_aligned)
14454 emit_insn (gen_zero_extendhisi2 (reg0, src));
14455 else
14456 emit_insn (gen_unaligned_loadhiu (reg0, src));
14457
14458 if (dst_aligned)
14459 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14460 else
14461 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14462
14463 src = next_consecutive_mem (src);
14464 dst = next_consecutive_mem (dst);
14465 if (len == 2)
14466 return true;
14467 }
14468
14469 dst = adjust_address (dst, QImode, 0);
14470 src = adjust_address (src, QImode, 0);
14471 reg0 = gen_reg_rtx (QImode);
14472 emit_move_insn (reg0, src);
14473 emit_move_insn (dst, reg0);
14474 return true;
14475 }
14476
14477 /* Select a dominance comparison mode if possible for a test of the general
14478 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14479 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14480 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14481 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14482 In all cases OP will be either EQ or NE, but we don't need to know which
14483 here. If we are unable to support a dominance comparison we return
14484 CC mode. This will then fail to match for the RTL expressions that
14485 generate this call. */
14486 machine_mode
14487 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14488 {
14489 enum rtx_code cond1, cond2;
14490 int swapped = 0;
14491
14492 /* Currently we will probably get the wrong result if the individual
14493 comparisons are not simple. This also ensures that it is safe to
14494 reverse a comparison if necessary. */
14495 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14496 != CCmode)
14497 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14498 != CCmode))
14499 return CCmode;
14500
14501 /* The if_then_else variant of this tests the second condition if the
14502 first passes, but is true if the first fails. Reverse the first
14503 condition to get a true "inclusive-or" expression. */
14504 if (cond_or == DOM_CC_NX_OR_Y)
14505 cond1 = reverse_condition (cond1);
14506
14507 /* If the comparisons are not equal, and one doesn't dominate the other,
14508 then we can't do this. */
14509 if (cond1 != cond2
14510 && !comparison_dominates_p (cond1, cond2)
14511 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14512 return CCmode;
14513
14514 if (swapped)
14515 std::swap (cond1, cond2);
14516
14517 switch (cond1)
14518 {
14519 case EQ:
14520 if (cond_or == DOM_CC_X_AND_Y)
14521 return CC_DEQmode;
14522
14523 switch (cond2)
14524 {
14525 case EQ: return CC_DEQmode;
14526 case LE: return CC_DLEmode;
14527 case LEU: return CC_DLEUmode;
14528 case GE: return CC_DGEmode;
14529 case GEU: return CC_DGEUmode;
14530 default: gcc_unreachable ();
14531 }
14532
14533 case LT:
14534 if (cond_or == DOM_CC_X_AND_Y)
14535 return CC_DLTmode;
14536
14537 switch (cond2)
14538 {
14539 case LT:
14540 return CC_DLTmode;
14541 case LE:
14542 return CC_DLEmode;
14543 case NE:
14544 return CC_DNEmode;
14545 default:
14546 gcc_unreachable ();
14547 }
14548
14549 case GT:
14550 if (cond_or == DOM_CC_X_AND_Y)
14551 return CC_DGTmode;
14552
14553 switch (cond2)
14554 {
14555 case GT:
14556 return CC_DGTmode;
14557 case GE:
14558 return CC_DGEmode;
14559 case NE:
14560 return CC_DNEmode;
14561 default:
14562 gcc_unreachable ();
14563 }
14564
14565 case LTU:
14566 if (cond_or == DOM_CC_X_AND_Y)
14567 return CC_DLTUmode;
14568
14569 switch (cond2)
14570 {
14571 case LTU:
14572 return CC_DLTUmode;
14573 case LEU:
14574 return CC_DLEUmode;
14575 case NE:
14576 return CC_DNEmode;
14577 default:
14578 gcc_unreachable ();
14579 }
14580
14581 case GTU:
14582 if (cond_or == DOM_CC_X_AND_Y)
14583 return CC_DGTUmode;
14584
14585 switch (cond2)
14586 {
14587 case GTU:
14588 return CC_DGTUmode;
14589 case GEU:
14590 return CC_DGEUmode;
14591 case NE:
14592 return CC_DNEmode;
14593 default:
14594 gcc_unreachable ();
14595 }
14596
14597 /* The remaining cases only occur when both comparisons are the
14598 same. */
14599 case NE:
14600 gcc_assert (cond1 == cond2);
14601 return CC_DNEmode;
14602
14603 case LE:
14604 gcc_assert (cond1 == cond2);
14605 return CC_DLEmode;
14606
14607 case GE:
14608 gcc_assert (cond1 == cond2);
14609 return CC_DGEmode;
14610
14611 case LEU:
14612 gcc_assert (cond1 == cond2);
14613 return CC_DLEUmode;
14614
14615 case GEU:
14616 gcc_assert (cond1 == cond2);
14617 return CC_DGEUmode;
14618
14619 default:
14620 gcc_unreachable ();
14621 }
14622 }
14623
14624 machine_mode
14625 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14626 {
14627 /* All floating point compares return CCFP if it is an equality
14628 comparison, and CCFPE otherwise. */
14629 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14630 {
14631 switch (op)
14632 {
14633 case EQ:
14634 case NE:
14635 case UNORDERED:
14636 case ORDERED:
14637 case UNLT:
14638 case UNLE:
14639 case UNGT:
14640 case UNGE:
14641 case UNEQ:
14642 case LTGT:
14643 return CCFPmode;
14644
14645 case LT:
14646 case LE:
14647 case GT:
14648 case GE:
14649 return CCFPEmode;
14650
14651 default:
14652 gcc_unreachable ();
14653 }
14654 }
14655
14656 /* A compare with a shifted operand. Because of canonicalization, the
14657 comparison will have to be swapped when we emit the assembler. */
14658 if (GET_MODE (y) == SImode
14659 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14660 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14661 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14662 || GET_CODE (x) == ROTATERT))
14663 return CC_SWPmode;
14664
14665 /* This operation is performed swapped, but since we only rely on the Z
14666 flag we don't need an additional mode. */
14667 if (GET_MODE (y) == SImode
14668 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14669 && GET_CODE (x) == NEG
14670 && (op == EQ || op == NE))
14671 return CC_Zmode;
14672
14673 /* This is a special case that is used by combine to allow a
14674 comparison of a shifted byte load to be split into a zero-extend
14675 followed by a comparison of the shifted integer (only valid for
14676 equalities and unsigned inequalities). */
14677 if (GET_MODE (x) == SImode
14678 && GET_CODE (x) == ASHIFT
14679 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14680 && GET_CODE (XEXP (x, 0)) == SUBREG
14681 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14682 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14683 && (op == EQ || op == NE
14684 || op == GEU || op == GTU || op == LTU || op == LEU)
14685 && CONST_INT_P (y))
14686 return CC_Zmode;
14687
14688 /* A construct for a conditional compare, if the false arm contains
14689 0, then both conditions must be true, otherwise either condition
14690 must be true. Not all conditions are possible, so CCmode is
14691 returned if it can't be done. */
14692 if (GET_CODE (x) == IF_THEN_ELSE
14693 && (XEXP (x, 2) == const0_rtx
14694 || XEXP (x, 2) == const1_rtx)
14695 && COMPARISON_P (XEXP (x, 0))
14696 && COMPARISON_P (XEXP (x, 1)))
14697 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14698 INTVAL (XEXP (x, 2)));
14699
14700 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14701 if (GET_CODE (x) == AND
14702 && (op == EQ || op == NE)
14703 && COMPARISON_P (XEXP (x, 0))
14704 && COMPARISON_P (XEXP (x, 1)))
14705 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14706 DOM_CC_X_AND_Y);
14707
14708 if (GET_CODE (x) == IOR
14709 && (op == EQ || op == NE)
14710 && COMPARISON_P (XEXP (x, 0))
14711 && COMPARISON_P (XEXP (x, 1)))
14712 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14713 DOM_CC_X_OR_Y);
14714
14715 /* An operation (on Thumb) where we want to test for a single bit.
14716 This is done by shifting that bit up into the top bit of a
14717 scratch register; we can then branch on the sign bit. */
14718 if (TARGET_THUMB1
14719 && GET_MODE (x) == SImode
14720 && (op == EQ || op == NE)
14721 && GET_CODE (x) == ZERO_EXTRACT
14722 && XEXP (x, 1) == const1_rtx)
14723 return CC_Nmode;
14724
14725 /* An operation that sets the condition codes as a side-effect, the
14726 V flag is not set correctly, so we can only use comparisons where
14727 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14728 instead.) */
14729 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14730 if (GET_MODE (x) == SImode
14731 && y == const0_rtx
14732 && (op == EQ || op == NE || op == LT || op == GE)
14733 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14734 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14735 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14736 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14737 || GET_CODE (x) == LSHIFTRT
14738 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14739 || GET_CODE (x) == ROTATERT
14740 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14741 return CC_NOOVmode;
14742
14743 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14744 return CC_Zmode;
14745
14746 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14747 && GET_CODE (x) == PLUS
14748 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14749 return CC_Cmode;
14750
14751 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14752 {
14753 switch (op)
14754 {
14755 case EQ:
14756 case NE:
14757 /* A DImode comparison against zero can be implemented by
14758 or'ing the two halves together. */
14759 if (y == const0_rtx)
14760 return CC_Zmode;
14761
14762 /* We can do an equality test in three Thumb instructions. */
14763 if (!TARGET_32BIT)
14764 return CC_Zmode;
14765
14766 /* FALLTHROUGH */
14767
14768 case LTU:
14769 case LEU:
14770 case GTU:
14771 case GEU:
14772 /* DImode unsigned comparisons can be implemented by cmp +
14773 cmpeq without a scratch register. Not worth doing in
14774 Thumb-2. */
14775 if (TARGET_32BIT)
14776 return CC_CZmode;
14777
14778 /* FALLTHROUGH */
14779
14780 case LT:
14781 case LE:
14782 case GT:
14783 case GE:
14784 /* DImode signed and unsigned comparisons can be implemented
14785 by cmp + sbcs with a scratch register, but that does not
14786 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14787 gcc_assert (op != EQ && op != NE);
14788 return CC_NCVmode;
14789
14790 default:
14791 gcc_unreachable ();
14792 }
14793 }
14794
14795 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14796 return GET_MODE (x);
14797
14798 return CCmode;
14799 }
14800
14801 /* X and Y are two things to compare using CODE. Emit the compare insn and
14802 return the rtx for register 0 in the proper mode. FP means this is a
14803 floating point compare: I don't think that it is needed on the arm. */
14804 rtx
14805 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14806 {
14807 machine_mode mode;
14808 rtx cc_reg;
14809 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14810
14811 /* We might have X as a constant, Y as a register because of the predicates
14812 used for cmpdi. If so, force X to a register here. */
14813 if (dimode_comparison && !REG_P (x))
14814 x = force_reg (DImode, x);
14815
14816 mode = SELECT_CC_MODE (code, x, y);
14817 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14818
14819 if (dimode_comparison
14820 && mode != CC_CZmode)
14821 {
14822 rtx clobber, set;
14823
14824 /* To compare two non-zero values for equality, XOR them and
14825 then compare against zero. Not used for ARM mode; there
14826 CC_CZmode is cheaper. */
14827 if (mode == CC_Zmode && y != const0_rtx)
14828 {
14829 gcc_assert (!reload_completed);
14830 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14831 y = const0_rtx;
14832 }
14833
14834 /* A scratch register is required. */
14835 if (reload_completed)
14836 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14837 else
14838 scratch = gen_rtx_SCRATCH (SImode);
14839
14840 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14841 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14842 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14843 }
14844 else
14845 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14846
14847 return cc_reg;
14848 }
14849
14850 /* Generate a sequence of insns that will generate the correct return
14851 address mask depending on the physical architecture that the program
14852 is running on. */
14853 rtx
14854 arm_gen_return_addr_mask (void)
14855 {
14856 rtx reg = gen_reg_rtx (Pmode);
14857
14858 emit_insn (gen_return_addr_mask (reg));
14859 return reg;
14860 }
14861
14862 void
14863 arm_reload_in_hi (rtx *operands)
14864 {
14865 rtx ref = operands[1];
14866 rtx base, scratch;
14867 HOST_WIDE_INT offset = 0;
14868
14869 if (GET_CODE (ref) == SUBREG)
14870 {
14871 offset = SUBREG_BYTE (ref);
14872 ref = SUBREG_REG (ref);
14873 }
14874
14875 if (REG_P (ref))
14876 {
14877 /* We have a pseudo which has been spilt onto the stack; there
14878 are two cases here: the first where there is a simple
14879 stack-slot replacement and a second where the stack-slot is
14880 out of range, or is used as a subreg. */
14881 if (reg_equiv_mem (REGNO (ref)))
14882 {
14883 ref = reg_equiv_mem (REGNO (ref));
14884 base = find_replacement (&XEXP (ref, 0));
14885 }
14886 else
14887 /* The slot is out of range, or was dressed up in a SUBREG. */
14888 base = reg_equiv_address (REGNO (ref));
14889
14890 /* PR 62554: If there is no equivalent memory location then just move
14891 the value as an SImode register move. This happens when the target
14892 architecture variant does not have an HImode register move. */
14893 if (base == NULL)
14894 {
14895 gcc_assert (REG_P (operands[0]));
14896 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14897 gen_rtx_SUBREG (SImode, ref, 0)));
14898 return;
14899 }
14900 }
14901 else
14902 base = find_replacement (&XEXP (ref, 0));
14903
14904 /* Handle the case where the address is too complex to be offset by 1. */
14905 if (GET_CODE (base) == MINUS
14906 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14907 {
14908 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14909
14910 emit_set_insn (base_plus, base);
14911 base = base_plus;
14912 }
14913 else if (GET_CODE (base) == PLUS)
14914 {
14915 /* The addend must be CONST_INT, or we would have dealt with it above. */
14916 HOST_WIDE_INT hi, lo;
14917
14918 offset += INTVAL (XEXP (base, 1));
14919 base = XEXP (base, 0);
14920
14921 /* Rework the address into a legal sequence of insns. */
14922 /* Valid range for lo is -4095 -> 4095 */
14923 lo = (offset >= 0
14924 ? (offset & 0xfff)
14925 : -((-offset) & 0xfff));
14926
14927 /* Corner case, if lo is the max offset then we would be out of range
14928 once we have added the additional 1 below, so bump the msb into the
14929 pre-loading insn(s). */
14930 if (lo == 4095)
14931 lo &= 0x7ff;
14932
14933 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14934 ^ (HOST_WIDE_INT) 0x80000000)
14935 - (HOST_WIDE_INT) 0x80000000);
14936
14937 gcc_assert (hi + lo == offset);
14938
14939 if (hi != 0)
14940 {
14941 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14942
14943 /* Get the base address; addsi3 knows how to handle constants
14944 that require more than one insn. */
14945 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14946 base = base_plus;
14947 offset = lo;
14948 }
14949 }
14950
14951 /* Operands[2] may overlap operands[0] (though it won't overlap
14952 operands[1]), that's why we asked for a DImode reg -- so we can
14953 use the bit that does not overlap. */
14954 if (REGNO (operands[2]) == REGNO (operands[0]))
14955 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14956 else
14957 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14958
14959 emit_insn (gen_zero_extendqisi2 (scratch,
14960 gen_rtx_MEM (QImode,
14961 plus_constant (Pmode, base,
14962 offset))));
14963 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14964 gen_rtx_MEM (QImode,
14965 plus_constant (Pmode, base,
14966 offset + 1))));
14967 if (!BYTES_BIG_ENDIAN)
14968 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14969 gen_rtx_IOR (SImode,
14970 gen_rtx_ASHIFT
14971 (SImode,
14972 gen_rtx_SUBREG (SImode, operands[0], 0),
14973 GEN_INT (8)),
14974 scratch));
14975 else
14976 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14977 gen_rtx_IOR (SImode,
14978 gen_rtx_ASHIFT (SImode, scratch,
14979 GEN_INT (8)),
14980 gen_rtx_SUBREG (SImode, operands[0], 0)));
14981 }
14982
14983 /* Handle storing a half-word to memory during reload by synthesizing as two
14984 byte stores. Take care not to clobber the input values until after we
14985 have moved them somewhere safe. This code assumes that if the DImode
14986 scratch in operands[2] overlaps either the input value or output address
14987 in some way, then that value must die in this insn (we absolutely need
14988 two scratch registers for some corner cases). */
14989 void
14990 arm_reload_out_hi (rtx *operands)
14991 {
14992 rtx ref = operands[0];
14993 rtx outval = operands[1];
14994 rtx base, scratch;
14995 HOST_WIDE_INT offset = 0;
14996
14997 if (GET_CODE (ref) == SUBREG)
14998 {
14999 offset = SUBREG_BYTE (ref);
15000 ref = SUBREG_REG (ref);
15001 }
15002
15003 if (REG_P (ref))
15004 {
15005 /* We have a pseudo which has been spilt onto the stack; there
15006 are two cases here: the first where there is a simple
15007 stack-slot replacement and a second where the stack-slot is
15008 out of range, or is used as a subreg. */
15009 if (reg_equiv_mem (REGNO (ref)))
15010 {
15011 ref = reg_equiv_mem (REGNO (ref));
15012 base = find_replacement (&XEXP (ref, 0));
15013 }
15014 else
15015 /* The slot is out of range, or was dressed up in a SUBREG. */
15016 base = reg_equiv_address (REGNO (ref));
15017
15018 /* PR 62254: If there is no equivalent memory location then just move
15019 the value as an SImode register move. This happens when the target
15020 architecture variant does not have an HImode register move. */
15021 if (base == NULL)
15022 {
15023 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15024
15025 if (REG_P (outval))
15026 {
15027 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15028 gen_rtx_SUBREG (SImode, outval, 0)));
15029 }
15030 else /* SUBREG_P (outval) */
15031 {
15032 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15033 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15034 SUBREG_REG (outval)));
15035 else
15036 /* FIXME: Handle other cases ? */
15037 gcc_unreachable ();
15038 }
15039 return;
15040 }
15041 }
15042 else
15043 base = find_replacement (&XEXP (ref, 0));
15044
15045 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15046
15047 /* Handle the case where the address is too complex to be offset by 1. */
15048 if (GET_CODE (base) == MINUS
15049 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15050 {
15051 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15052
15053 /* Be careful not to destroy OUTVAL. */
15054 if (reg_overlap_mentioned_p (base_plus, outval))
15055 {
15056 /* Updating base_plus might destroy outval, see if we can
15057 swap the scratch and base_plus. */
15058 if (!reg_overlap_mentioned_p (scratch, outval))
15059 std::swap (scratch, base_plus);
15060 else
15061 {
15062 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15063
15064 /* Be conservative and copy OUTVAL into the scratch now,
15065 this should only be necessary if outval is a subreg
15066 of something larger than a word. */
15067 /* XXX Might this clobber base? I can't see how it can,
15068 since scratch is known to overlap with OUTVAL, and
15069 must be wider than a word. */
15070 emit_insn (gen_movhi (scratch_hi, outval));
15071 outval = scratch_hi;
15072 }
15073 }
15074
15075 emit_set_insn (base_plus, base);
15076 base = base_plus;
15077 }
15078 else if (GET_CODE (base) == PLUS)
15079 {
15080 /* The addend must be CONST_INT, or we would have dealt with it above. */
15081 HOST_WIDE_INT hi, lo;
15082
15083 offset += INTVAL (XEXP (base, 1));
15084 base = XEXP (base, 0);
15085
15086 /* Rework the address into a legal sequence of insns. */
15087 /* Valid range for lo is -4095 -> 4095 */
15088 lo = (offset >= 0
15089 ? (offset & 0xfff)
15090 : -((-offset) & 0xfff));
15091
15092 /* Corner case, if lo is the max offset then we would be out of range
15093 once we have added the additional 1 below, so bump the msb into the
15094 pre-loading insn(s). */
15095 if (lo == 4095)
15096 lo &= 0x7ff;
15097
15098 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15099 ^ (HOST_WIDE_INT) 0x80000000)
15100 - (HOST_WIDE_INT) 0x80000000);
15101
15102 gcc_assert (hi + lo == offset);
15103
15104 if (hi != 0)
15105 {
15106 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15107
15108 /* Be careful not to destroy OUTVAL. */
15109 if (reg_overlap_mentioned_p (base_plus, outval))
15110 {
15111 /* Updating base_plus might destroy outval, see if we
15112 can swap the scratch and base_plus. */
15113 if (!reg_overlap_mentioned_p (scratch, outval))
15114 std::swap (scratch, base_plus);
15115 else
15116 {
15117 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15118
15119 /* Be conservative and copy outval into scratch now,
15120 this should only be necessary if outval is a
15121 subreg of something larger than a word. */
15122 /* XXX Might this clobber base? I can't see how it
15123 can, since scratch is known to overlap with
15124 outval. */
15125 emit_insn (gen_movhi (scratch_hi, outval));
15126 outval = scratch_hi;
15127 }
15128 }
15129
15130 /* Get the base address; addsi3 knows how to handle constants
15131 that require more than one insn. */
15132 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15133 base = base_plus;
15134 offset = lo;
15135 }
15136 }
15137
15138 if (BYTES_BIG_ENDIAN)
15139 {
15140 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15141 plus_constant (Pmode, base,
15142 offset + 1)),
15143 gen_lowpart (QImode, outval)));
15144 emit_insn (gen_lshrsi3 (scratch,
15145 gen_rtx_SUBREG (SImode, outval, 0),
15146 GEN_INT (8)));
15147 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15148 offset)),
15149 gen_lowpart (QImode, scratch)));
15150 }
15151 else
15152 {
15153 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15154 offset)),
15155 gen_lowpart (QImode, outval)));
15156 emit_insn (gen_lshrsi3 (scratch,
15157 gen_rtx_SUBREG (SImode, outval, 0),
15158 GEN_INT (8)));
15159 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15160 plus_constant (Pmode, base,
15161 offset + 1)),
15162 gen_lowpart (QImode, scratch)));
15163 }
15164 }
15165
15166 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15167 (padded to the size of a word) should be passed in a register. */
15168
15169 static bool
15170 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15171 {
15172 if (TARGET_AAPCS_BASED)
15173 return must_pass_in_stack_var_size (mode, type);
15174 else
15175 return must_pass_in_stack_var_size_or_pad (mode, type);
15176 }
15177
15178
15179 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15180 byte of a stack argument has useful data. For legacy APCS ABIs we use
15181 the default. For AAPCS based ABIs small aggregate types are placed
15182 in the lowest memory address. */
15183
15184 static pad_direction
15185 arm_function_arg_padding (machine_mode mode, const_tree type)
15186 {
15187 if (!TARGET_AAPCS_BASED)
15188 return default_function_arg_padding (mode, type);
15189
15190 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15191 return PAD_DOWNWARD;
15192
15193 return PAD_UPWARD;
15194 }
15195
15196
15197 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15198 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15199 register has useful data, and return the opposite if the most
15200 significant byte does. */
15201
15202 bool
15203 arm_pad_reg_upward (machine_mode mode,
15204 tree type, int first ATTRIBUTE_UNUSED)
15205 {
15206 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15207 {
15208 /* For AAPCS, small aggregates, small fixed-point types,
15209 and small complex types are always padded upwards. */
15210 if (type)
15211 {
15212 if ((AGGREGATE_TYPE_P (type)
15213 || TREE_CODE (type) == COMPLEX_TYPE
15214 || FIXED_POINT_TYPE_P (type))
15215 && int_size_in_bytes (type) <= 4)
15216 return true;
15217 }
15218 else
15219 {
15220 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15221 && GET_MODE_SIZE (mode) <= 4)
15222 return true;
15223 }
15224 }
15225
15226 /* Otherwise, use default padding. */
15227 return !BYTES_BIG_ENDIAN;
15228 }
15229
15230 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15231 assuming that the address in the base register is word aligned. */
15232 bool
15233 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15234 {
15235 HOST_WIDE_INT max_offset;
15236
15237 /* Offset must be a multiple of 4 in Thumb mode. */
15238 if (TARGET_THUMB2 && ((offset & 3) != 0))
15239 return false;
15240
15241 if (TARGET_THUMB2)
15242 max_offset = 1020;
15243 else if (TARGET_ARM)
15244 max_offset = 255;
15245 else
15246 return false;
15247
15248 return ((offset <= max_offset) && (offset >= -max_offset));
15249 }
15250
15251 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15252 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15253 Assumes that the address in the base register RN is word aligned. Pattern
15254 guarantees that both memory accesses use the same base register,
15255 the offsets are constants within the range, and the gap between the offsets is 4.
15256 If preload complete then check that registers are legal. WBACK indicates whether
15257 address is updated. LOAD indicates whether memory access is load or store. */
15258 bool
15259 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15260 bool wback, bool load)
15261 {
15262 unsigned int t, t2, n;
15263
15264 if (!reload_completed)
15265 return true;
15266
15267 if (!offset_ok_for_ldrd_strd (offset))
15268 return false;
15269
15270 t = REGNO (rt);
15271 t2 = REGNO (rt2);
15272 n = REGNO (rn);
15273
15274 if ((TARGET_THUMB2)
15275 && ((wback && (n == t || n == t2))
15276 || (t == SP_REGNUM)
15277 || (t == PC_REGNUM)
15278 || (t2 == SP_REGNUM)
15279 || (t2 == PC_REGNUM)
15280 || (!load && (n == PC_REGNUM))
15281 || (load && (t == t2))
15282 /* Triggers Cortex-M3 LDRD errata. */
15283 || (!wback && load && fix_cm3_ldrd && (n == t))))
15284 return false;
15285
15286 if ((TARGET_ARM)
15287 && ((wback && (n == t || n == t2))
15288 || (t2 == PC_REGNUM)
15289 || (t % 2 != 0) /* First destination register is not even. */
15290 || (t2 != t + 1)
15291 /* PC can be used as base register (for offset addressing only),
15292 but it is depricated. */
15293 || (n == PC_REGNUM)))
15294 return false;
15295
15296 return true;
15297 }
15298
15299 /* Return true if a 64-bit access with alignment ALIGN and with a
15300 constant offset OFFSET from the base pointer is permitted on this
15301 architecture. */
15302 static bool
15303 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15304 {
15305 return (unaligned_access
15306 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15307 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15308 }
15309
15310 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15311 operand MEM's address contains an immediate offset from the base
15312 register and has no side effects, in which case it sets BASE,
15313 OFFSET and ALIGN accordingly. */
15314 static bool
15315 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15316 {
15317 rtx addr;
15318
15319 gcc_assert (base != NULL && offset != NULL);
15320
15321 /* TODO: Handle more general memory operand patterns, such as
15322 PRE_DEC and PRE_INC. */
15323
15324 if (side_effects_p (mem))
15325 return false;
15326
15327 /* Can't deal with subregs. */
15328 if (GET_CODE (mem) == SUBREG)
15329 return false;
15330
15331 gcc_assert (MEM_P (mem));
15332
15333 *offset = const0_rtx;
15334 *align = MEM_ALIGN (mem);
15335
15336 addr = XEXP (mem, 0);
15337
15338 /* If addr isn't valid for DImode, then we can't handle it. */
15339 if (!arm_legitimate_address_p (DImode, addr,
15340 reload_in_progress || reload_completed))
15341 return false;
15342
15343 if (REG_P (addr))
15344 {
15345 *base = addr;
15346 return true;
15347 }
15348 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15349 {
15350 *base = XEXP (addr, 0);
15351 *offset = XEXP (addr, 1);
15352 return (REG_P (*base) && CONST_INT_P (*offset));
15353 }
15354
15355 return false;
15356 }
15357
15358 /* Called from a peephole2 to replace two word-size accesses with a
15359 single LDRD/STRD instruction. Returns true iff we can generate a
15360 new instruction sequence. That is, both accesses use the same base
15361 register and the gap between constant offsets is 4. This function
15362 may reorder its operands to match ldrd/strd RTL templates.
15363 OPERANDS are the operands found by the peephole matcher;
15364 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15365 corresponding memory operands. LOAD indicaates whether the access
15366 is load or store. CONST_STORE indicates a store of constant
15367 integer values held in OPERANDS[4,5] and assumes that the pattern
15368 is of length 4 insn, for the purpose of checking dead registers.
15369 COMMUTE indicates that register operands may be reordered. */
15370 bool
15371 gen_operands_ldrd_strd (rtx *operands, bool load,
15372 bool const_store, bool commute)
15373 {
15374 int nops = 2;
15375 HOST_WIDE_INT offsets[2], offset, align[2];
15376 rtx base = NULL_RTX;
15377 rtx cur_base, cur_offset, tmp;
15378 int i, gap;
15379 HARD_REG_SET regset;
15380
15381 gcc_assert (!const_store || !load);
15382 /* Check that the memory references are immediate offsets from the
15383 same base register. Extract the base register, the destination
15384 registers, and the corresponding memory offsets. */
15385 for (i = 0; i < nops; i++)
15386 {
15387 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15388 &align[i]))
15389 return false;
15390
15391 if (i == 0)
15392 base = cur_base;
15393 else if (REGNO (base) != REGNO (cur_base))
15394 return false;
15395
15396 offsets[i] = INTVAL (cur_offset);
15397 if (GET_CODE (operands[i]) == SUBREG)
15398 {
15399 tmp = SUBREG_REG (operands[i]);
15400 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15401 operands[i] = tmp;
15402 }
15403 }
15404
15405 /* Make sure there is no dependency between the individual loads. */
15406 if (load && REGNO (operands[0]) == REGNO (base))
15407 return false; /* RAW */
15408
15409 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15410 return false; /* WAW */
15411
15412 /* If the same input register is used in both stores
15413 when storing different constants, try to find a free register.
15414 For example, the code
15415 mov r0, 0
15416 str r0, [r2]
15417 mov r0, 1
15418 str r0, [r2, #4]
15419 can be transformed into
15420 mov r1, 0
15421 mov r0, 1
15422 strd r1, r0, [r2]
15423 in Thumb mode assuming that r1 is free.
15424 For ARM mode do the same but only if the starting register
15425 can be made to be even. */
15426 if (const_store
15427 && REGNO (operands[0]) == REGNO (operands[1])
15428 && INTVAL (operands[4]) != INTVAL (operands[5]))
15429 {
15430 if (TARGET_THUMB2)
15431 {
15432 CLEAR_HARD_REG_SET (regset);
15433 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15434 if (tmp == NULL_RTX)
15435 return false;
15436
15437 /* Use the new register in the first load to ensure that
15438 if the original input register is not dead after peephole,
15439 then it will have the correct constant value. */
15440 operands[0] = tmp;
15441 }
15442 else if (TARGET_ARM)
15443 {
15444 int regno = REGNO (operands[0]);
15445 if (!peep2_reg_dead_p (4, operands[0]))
15446 {
15447 /* When the input register is even and is not dead after the
15448 pattern, it has to hold the second constant but we cannot
15449 form a legal STRD in ARM mode with this register as the second
15450 register. */
15451 if (regno % 2 == 0)
15452 return false;
15453
15454 /* Is regno-1 free? */
15455 SET_HARD_REG_SET (regset);
15456 CLEAR_HARD_REG_BIT(regset, regno - 1);
15457 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15458 if (tmp == NULL_RTX)
15459 return false;
15460
15461 operands[0] = tmp;
15462 }
15463 else
15464 {
15465 /* Find a DImode register. */
15466 CLEAR_HARD_REG_SET (regset);
15467 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15468 if (tmp != NULL_RTX)
15469 {
15470 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15471 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15472 }
15473 else
15474 {
15475 /* Can we use the input register to form a DI register? */
15476 SET_HARD_REG_SET (regset);
15477 CLEAR_HARD_REG_BIT(regset,
15478 regno % 2 == 0 ? regno + 1 : regno - 1);
15479 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15480 if (tmp == NULL_RTX)
15481 return false;
15482 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15483 }
15484 }
15485
15486 gcc_assert (operands[0] != NULL_RTX);
15487 gcc_assert (operands[1] != NULL_RTX);
15488 gcc_assert (REGNO (operands[0]) % 2 == 0);
15489 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15490 }
15491 }
15492
15493 /* Make sure the instructions are ordered with lower memory access first. */
15494 if (offsets[0] > offsets[1])
15495 {
15496 gap = offsets[0] - offsets[1];
15497 offset = offsets[1];
15498
15499 /* Swap the instructions such that lower memory is accessed first. */
15500 std::swap (operands[0], operands[1]);
15501 std::swap (operands[2], operands[3]);
15502 std::swap (align[0], align[1]);
15503 if (const_store)
15504 std::swap (operands[4], operands[5]);
15505 }
15506 else
15507 {
15508 gap = offsets[1] - offsets[0];
15509 offset = offsets[0];
15510 }
15511
15512 /* Make sure accesses are to consecutive memory locations. */
15513 if (gap != 4)
15514 return false;
15515
15516 if (!align_ok_ldrd_strd (align[0], offset))
15517 return false;
15518
15519 /* Make sure we generate legal instructions. */
15520 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15521 false, load))
15522 return true;
15523
15524 /* In Thumb state, where registers are almost unconstrained, there
15525 is little hope to fix it. */
15526 if (TARGET_THUMB2)
15527 return false;
15528
15529 if (load && commute)
15530 {
15531 /* Try reordering registers. */
15532 std::swap (operands[0], operands[1]);
15533 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15534 false, load))
15535 return true;
15536 }
15537
15538 if (const_store)
15539 {
15540 /* If input registers are dead after this pattern, they can be
15541 reordered or replaced by other registers that are free in the
15542 current pattern. */
15543 if (!peep2_reg_dead_p (4, operands[0])
15544 || !peep2_reg_dead_p (4, operands[1]))
15545 return false;
15546
15547 /* Try to reorder the input registers. */
15548 /* For example, the code
15549 mov r0, 0
15550 mov r1, 1
15551 str r1, [r2]
15552 str r0, [r2, #4]
15553 can be transformed into
15554 mov r1, 0
15555 mov r0, 1
15556 strd r0, [r2]
15557 */
15558 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15559 false, false))
15560 {
15561 std::swap (operands[0], operands[1]);
15562 return true;
15563 }
15564
15565 /* Try to find a free DI register. */
15566 CLEAR_HARD_REG_SET (regset);
15567 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15568 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15569 while (true)
15570 {
15571 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15572 if (tmp == NULL_RTX)
15573 return false;
15574
15575 /* DREG must be an even-numbered register in DImode.
15576 Split it into SI registers. */
15577 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15578 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15579 gcc_assert (operands[0] != NULL_RTX);
15580 gcc_assert (operands[1] != NULL_RTX);
15581 gcc_assert (REGNO (operands[0]) % 2 == 0);
15582 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15583
15584 return (operands_ok_ldrd_strd (operands[0], operands[1],
15585 base, offset,
15586 false, load));
15587 }
15588 }
15589
15590 return false;
15591 }
15592
15593
15594
15595 \f
15596 /* Print a symbolic form of X to the debug file, F. */
15597 static void
15598 arm_print_value (FILE *f, rtx x)
15599 {
15600 switch (GET_CODE (x))
15601 {
15602 case CONST_INT:
15603 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15604 return;
15605
15606 case CONST_DOUBLE:
15607 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15608 return;
15609
15610 case CONST_VECTOR:
15611 {
15612 int i;
15613
15614 fprintf (f, "<");
15615 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15616 {
15617 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15618 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15619 fputc (',', f);
15620 }
15621 fprintf (f, ">");
15622 }
15623 return;
15624
15625 case CONST_STRING:
15626 fprintf (f, "\"%s\"", XSTR (x, 0));
15627 return;
15628
15629 case SYMBOL_REF:
15630 fprintf (f, "`%s'", XSTR (x, 0));
15631 return;
15632
15633 case LABEL_REF:
15634 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15635 return;
15636
15637 case CONST:
15638 arm_print_value (f, XEXP (x, 0));
15639 return;
15640
15641 case PLUS:
15642 arm_print_value (f, XEXP (x, 0));
15643 fprintf (f, "+");
15644 arm_print_value (f, XEXP (x, 1));
15645 return;
15646
15647 case PC:
15648 fprintf (f, "pc");
15649 return;
15650
15651 default:
15652 fprintf (f, "????");
15653 return;
15654 }
15655 }
15656 \f
15657 /* Routines for manipulation of the constant pool. */
15658
15659 /* Arm instructions cannot load a large constant directly into a
15660 register; they have to come from a pc relative load. The constant
15661 must therefore be placed in the addressable range of the pc
15662 relative load. Depending on the precise pc relative load
15663 instruction the range is somewhere between 256 bytes and 4k. This
15664 means that we often have to dump a constant inside a function, and
15665 generate code to branch around it.
15666
15667 It is important to minimize this, since the branches will slow
15668 things down and make the code larger.
15669
15670 Normally we can hide the table after an existing unconditional
15671 branch so that there is no interruption of the flow, but in the
15672 worst case the code looks like this:
15673
15674 ldr rn, L1
15675 ...
15676 b L2
15677 align
15678 L1: .long value
15679 L2:
15680 ...
15681
15682 ldr rn, L3
15683 ...
15684 b L4
15685 align
15686 L3: .long value
15687 L4:
15688 ...
15689
15690 We fix this by performing a scan after scheduling, which notices
15691 which instructions need to have their operands fetched from the
15692 constant table and builds the table.
15693
15694 The algorithm starts by building a table of all the constants that
15695 need fixing up and all the natural barriers in the function (places
15696 where a constant table can be dropped without breaking the flow).
15697 For each fixup we note how far the pc-relative replacement will be
15698 able to reach and the offset of the instruction into the function.
15699
15700 Having built the table we then group the fixes together to form
15701 tables that are as large as possible (subject to addressing
15702 constraints) and emit each table of constants after the last
15703 barrier that is within range of all the instructions in the group.
15704 If a group does not contain a barrier, then we forcibly create one
15705 by inserting a jump instruction into the flow. Once the table has
15706 been inserted, the insns are then modified to reference the
15707 relevant entry in the pool.
15708
15709 Possible enhancements to the algorithm (not implemented) are:
15710
15711 1) For some processors and object formats, there may be benefit in
15712 aligning the pools to the start of cache lines; this alignment
15713 would need to be taken into account when calculating addressability
15714 of a pool. */
15715
15716 /* These typedefs are located at the start of this file, so that
15717 they can be used in the prototypes there. This comment is to
15718 remind readers of that fact so that the following structures
15719 can be understood more easily.
15720
15721 typedef struct minipool_node Mnode;
15722 typedef struct minipool_fixup Mfix; */
15723
15724 struct minipool_node
15725 {
15726 /* Doubly linked chain of entries. */
15727 Mnode * next;
15728 Mnode * prev;
15729 /* The maximum offset into the code that this entry can be placed. While
15730 pushing fixes for forward references, all entries are sorted in order
15731 of increasing max_address. */
15732 HOST_WIDE_INT max_address;
15733 /* Similarly for an entry inserted for a backwards ref. */
15734 HOST_WIDE_INT min_address;
15735 /* The number of fixes referencing this entry. This can become zero
15736 if we "unpush" an entry. In this case we ignore the entry when we
15737 come to emit the code. */
15738 int refcount;
15739 /* The offset from the start of the minipool. */
15740 HOST_WIDE_INT offset;
15741 /* The value in table. */
15742 rtx value;
15743 /* The mode of value. */
15744 machine_mode mode;
15745 /* The size of the value. With iWMMXt enabled
15746 sizes > 4 also imply an alignment of 8-bytes. */
15747 int fix_size;
15748 };
15749
15750 struct minipool_fixup
15751 {
15752 Mfix * next;
15753 rtx_insn * insn;
15754 HOST_WIDE_INT address;
15755 rtx * loc;
15756 machine_mode mode;
15757 int fix_size;
15758 rtx value;
15759 Mnode * minipool;
15760 HOST_WIDE_INT forwards;
15761 HOST_WIDE_INT backwards;
15762 };
15763
15764 /* Fixes less than a word need padding out to a word boundary. */
15765 #define MINIPOOL_FIX_SIZE(mode) \
15766 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15767
15768 static Mnode * minipool_vector_head;
15769 static Mnode * minipool_vector_tail;
15770 static rtx_code_label *minipool_vector_label;
15771 static int minipool_pad;
15772
15773 /* The linked list of all minipool fixes required for this function. */
15774 Mfix * minipool_fix_head;
15775 Mfix * minipool_fix_tail;
15776 /* The fix entry for the current minipool, once it has been placed. */
15777 Mfix * minipool_barrier;
15778
15779 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15780 #define JUMP_TABLES_IN_TEXT_SECTION 0
15781 #endif
15782
15783 static HOST_WIDE_INT
15784 get_jump_table_size (rtx_jump_table_data *insn)
15785 {
15786 /* ADDR_VECs only take room if read-only data does into the text
15787 section. */
15788 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15789 {
15790 rtx body = PATTERN (insn);
15791 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15792 HOST_WIDE_INT size;
15793 HOST_WIDE_INT modesize;
15794
15795 modesize = GET_MODE_SIZE (GET_MODE (body));
15796 size = modesize * XVECLEN (body, elt);
15797 switch (modesize)
15798 {
15799 case 1:
15800 /* Round up size of TBB table to a halfword boundary. */
15801 size = (size + 1) & ~HOST_WIDE_INT_1;
15802 break;
15803 case 2:
15804 /* No padding necessary for TBH. */
15805 break;
15806 case 4:
15807 /* Add two bytes for alignment on Thumb. */
15808 if (TARGET_THUMB)
15809 size += 2;
15810 break;
15811 default:
15812 gcc_unreachable ();
15813 }
15814 return size;
15815 }
15816
15817 return 0;
15818 }
15819
15820 /* Return the maximum amount of padding that will be inserted before
15821 label LABEL. */
15822
15823 static HOST_WIDE_INT
15824 get_label_padding (rtx label)
15825 {
15826 HOST_WIDE_INT align, min_insn_size;
15827
15828 align = 1 << label_to_alignment (label);
15829 min_insn_size = TARGET_THUMB ? 2 : 4;
15830 return align > min_insn_size ? align - min_insn_size : 0;
15831 }
15832
15833 /* Move a minipool fix MP from its current location to before MAX_MP.
15834 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15835 constraints may need updating. */
15836 static Mnode *
15837 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15838 HOST_WIDE_INT max_address)
15839 {
15840 /* The code below assumes these are different. */
15841 gcc_assert (mp != max_mp);
15842
15843 if (max_mp == NULL)
15844 {
15845 if (max_address < mp->max_address)
15846 mp->max_address = max_address;
15847 }
15848 else
15849 {
15850 if (max_address > max_mp->max_address - mp->fix_size)
15851 mp->max_address = max_mp->max_address - mp->fix_size;
15852 else
15853 mp->max_address = max_address;
15854
15855 /* Unlink MP from its current position. Since max_mp is non-null,
15856 mp->prev must be non-null. */
15857 mp->prev->next = mp->next;
15858 if (mp->next != NULL)
15859 mp->next->prev = mp->prev;
15860 else
15861 minipool_vector_tail = mp->prev;
15862
15863 /* Re-insert it before MAX_MP. */
15864 mp->next = max_mp;
15865 mp->prev = max_mp->prev;
15866 max_mp->prev = mp;
15867
15868 if (mp->prev != NULL)
15869 mp->prev->next = mp;
15870 else
15871 minipool_vector_head = mp;
15872 }
15873
15874 /* Save the new entry. */
15875 max_mp = mp;
15876
15877 /* Scan over the preceding entries and adjust their addresses as
15878 required. */
15879 while (mp->prev != NULL
15880 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15881 {
15882 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15883 mp = mp->prev;
15884 }
15885
15886 return max_mp;
15887 }
15888
15889 /* Add a constant to the minipool for a forward reference. Returns the
15890 node added or NULL if the constant will not fit in this pool. */
15891 static Mnode *
15892 add_minipool_forward_ref (Mfix *fix)
15893 {
15894 /* If set, max_mp is the first pool_entry that has a lower
15895 constraint than the one we are trying to add. */
15896 Mnode * max_mp = NULL;
15897 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15898 Mnode * mp;
15899
15900 /* If the minipool starts before the end of FIX->INSN then this FIX
15901 can not be placed into the current pool. Furthermore, adding the
15902 new constant pool entry may cause the pool to start FIX_SIZE bytes
15903 earlier. */
15904 if (minipool_vector_head &&
15905 (fix->address + get_attr_length (fix->insn)
15906 >= minipool_vector_head->max_address - fix->fix_size))
15907 return NULL;
15908
15909 /* Scan the pool to see if a constant with the same value has
15910 already been added. While we are doing this, also note the
15911 location where we must insert the constant if it doesn't already
15912 exist. */
15913 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15914 {
15915 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15916 && fix->mode == mp->mode
15917 && (!LABEL_P (fix->value)
15918 || (CODE_LABEL_NUMBER (fix->value)
15919 == CODE_LABEL_NUMBER (mp->value)))
15920 && rtx_equal_p (fix->value, mp->value))
15921 {
15922 /* More than one fix references this entry. */
15923 mp->refcount++;
15924 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15925 }
15926
15927 /* Note the insertion point if necessary. */
15928 if (max_mp == NULL
15929 && mp->max_address > max_address)
15930 max_mp = mp;
15931
15932 /* If we are inserting an 8-bytes aligned quantity and
15933 we have not already found an insertion point, then
15934 make sure that all such 8-byte aligned quantities are
15935 placed at the start of the pool. */
15936 if (ARM_DOUBLEWORD_ALIGN
15937 && max_mp == NULL
15938 && fix->fix_size >= 8
15939 && mp->fix_size < 8)
15940 {
15941 max_mp = mp;
15942 max_address = mp->max_address;
15943 }
15944 }
15945
15946 /* The value is not currently in the minipool, so we need to create
15947 a new entry for it. If MAX_MP is NULL, the entry will be put on
15948 the end of the list since the placement is less constrained than
15949 any existing entry. Otherwise, we insert the new fix before
15950 MAX_MP and, if necessary, adjust the constraints on the other
15951 entries. */
15952 mp = XNEW (Mnode);
15953 mp->fix_size = fix->fix_size;
15954 mp->mode = fix->mode;
15955 mp->value = fix->value;
15956 mp->refcount = 1;
15957 /* Not yet required for a backwards ref. */
15958 mp->min_address = -65536;
15959
15960 if (max_mp == NULL)
15961 {
15962 mp->max_address = max_address;
15963 mp->next = NULL;
15964 mp->prev = minipool_vector_tail;
15965
15966 if (mp->prev == NULL)
15967 {
15968 minipool_vector_head = mp;
15969 minipool_vector_label = gen_label_rtx ();
15970 }
15971 else
15972 mp->prev->next = mp;
15973
15974 minipool_vector_tail = mp;
15975 }
15976 else
15977 {
15978 if (max_address > max_mp->max_address - mp->fix_size)
15979 mp->max_address = max_mp->max_address - mp->fix_size;
15980 else
15981 mp->max_address = max_address;
15982
15983 mp->next = max_mp;
15984 mp->prev = max_mp->prev;
15985 max_mp->prev = mp;
15986 if (mp->prev != NULL)
15987 mp->prev->next = mp;
15988 else
15989 minipool_vector_head = mp;
15990 }
15991
15992 /* Save the new entry. */
15993 max_mp = mp;
15994
15995 /* Scan over the preceding entries and adjust their addresses as
15996 required. */
15997 while (mp->prev != NULL
15998 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15999 {
16000 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16001 mp = mp->prev;
16002 }
16003
16004 return max_mp;
16005 }
16006
16007 static Mnode *
16008 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16009 HOST_WIDE_INT min_address)
16010 {
16011 HOST_WIDE_INT offset;
16012
16013 /* The code below assumes these are different. */
16014 gcc_assert (mp != min_mp);
16015
16016 if (min_mp == NULL)
16017 {
16018 if (min_address > mp->min_address)
16019 mp->min_address = min_address;
16020 }
16021 else
16022 {
16023 /* We will adjust this below if it is too loose. */
16024 mp->min_address = min_address;
16025
16026 /* Unlink MP from its current position. Since min_mp is non-null,
16027 mp->next must be non-null. */
16028 mp->next->prev = mp->prev;
16029 if (mp->prev != NULL)
16030 mp->prev->next = mp->next;
16031 else
16032 minipool_vector_head = mp->next;
16033
16034 /* Reinsert it after MIN_MP. */
16035 mp->prev = min_mp;
16036 mp->next = min_mp->next;
16037 min_mp->next = mp;
16038 if (mp->next != NULL)
16039 mp->next->prev = mp;
16040 else
16041 minipool_vector_tail = mp;
16042 }
16043
16044 min_mp = mp;
16045
16046 offset = 0;
16047 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16048 {
16049 mp->offset = offset;
16050 if (mp->refcount > 0)
16051 offset += mp->fix_size;
16052
16053 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16054 mp->next->min_address = mp->min_address + mp->fix_size;
16055 }
16056
16057 return min_mp;
16058 }
16059
16060 /* Add a constant to the minipool for a backward reference. Returns the
16061 node added or NULL if the constant will not fit in this pool.
16062
16063 Note that the code for insertion for a backwards reference can be
16064 somewhat confusing because the calculated offsets for each fix do
16065 not take into account the size of the pool (which is still under
16066 construction. */
16067 static Mnode *
16068 add_minipool_backward_ref (Mfix *fix)
16069 {
16070 /* If set, min_mp is the last pool_entry that has a lower constraint
16071 than the one we are trying to add. */
16072 Mnode *min_mp = NULL;
16073 /* This can be negative, since it is only a constraint. */
16074 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16075 Mnode *mp;
16076
16077 /* If we can't reach the current pool from this insn, or if we can't
16078 insert this entry at the end of the pool without pushing other
16079 fixes out of range, then we don't try. This ensures that we
16080 can't fail later on. */
16081 if (min_address >= minipool_barrier->address
16082 || (minipool_vector_tail->min_address + fix->fix_size
16083 >= minipool_barrier->address))
16084 return NULL;
16085
16086 /* Scan the pool to see if a constant with the same value has
16087 already been added. While we are doing this, also note the
16088 location where we must insert the constant if it doesn't already
16089 exist. */
16090 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16091 {
16092 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16093 && fix->mode == mp->mode
16094 && (!LABEL_P (fix->value)
16095 || (CODE_LABEL_NUMBER (fix->value)
16096 == CODE_LABEL_NUMBER (mp->value)))
16097 && rtx_equal_p (fix->value, mp->value)
16098 /* Check that there is enough slack to move this entry to the
16099 end of the table (this is conservative). */
16100 && (mp->max_address
16101 > (minipool_barrier->address
16102 + minipool_vector_tail->offset
16103 + minipool_vector_tail->fix_size)))
16104 {
16105 mp->refcount++;
16106 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16107 }
16108
16109 if (min_mp != NULL)
16110 mp->min_address += fix->fix_size;
16111 else
16112 {
16113 /* Note the insertion point if necessary. */
16114 if (mp->min_address < min_address)
16115 {
16116 /* For now, we do not allow the insertion of 8-byte alignment
16117 requiring nodes anywhere but at the start of the pool. */
16118 if (ARM_DOUBLEWORD_ALIGN
16119 && fix->fix_size >= 8 && mp->fix_size < 8)
16120 return NULL;
16121 else
16122 min_mp = mp;
16123 }
16124 else if (mp->max_address
16125 < minipool_barrier->address + mp->offset + fix->fix_size)
16126 {
16127 /* Inserting before this entry would push the fix beyond
16128 its maximum address (which can happen if we have
16129 re-located a forwards fix); force the new fix to come
16130 after it. */
16131 if (ARM_DOUBLEWORD_ALIGN
16132 && fix->fix_size >= 8 && mp->fix_size < 8)
16133 return NULL;
16134 else
16135 {
16136 min_mp = mp;
16137 min_address = mp->min_address + fix->fix_size;
16138 }
16139 }
16140 /* Do not insert a non-8-byte aligned quantity before 8-byte
16141 aligned quantities. */
16142 else if (ARM_DOUBLEWORD_ALIGN
16143 && fix->fix_size < 8
16144 && mp->fix_size >= 8)
16145 {
16146 min_mp = mp;
16147 min_address = mp->min_address + fix->fix_size;
16148 }
16149 }
16150 }
16151
16152 /* We need to create a new entry. */
16153 mp = XNEW (Mnode);
16154 mp->fix_size = fix->fix_size;
16155 mp->mode = fix->mode;
16156 mp->value = fix->value;
16157 mp->refcount = 1;
16158 mp->max_address = minipool_barrier->address + 65536;
16159
16160 mp->min_address = min_address;
16161
16162 if (min_mp == NULL)
16163 {
16164 mp->prev = NULL;
16165 mp->next = minipool_vector_head;
16166
16167 if (mp->next == NULL)
16168 {
16169 minipool_vector_tail = mp;
16170 minipool_vector_label = gen_label_rtx ();
16171 }
16172 else
16173 mp->next->prev = mp;
16174
16175 minipool_vector_head = mp;
16176 }
16177 else
16178 {
16179 mp->next = min_mp->next;
16180 mp->prev = min_mp;
16181 min_mp->next = mp;
16182
16183 if (mp->next != NULL)
16184 mp->next->prev = mp;
16185 else
16186 minipool_vector_tail = mp;
16187 }
16188
16189 /* Save the new entry. */
16190 min_mp = mp;
16191
16192 if (mp->prev)
16193 mp = mp->prev;
16194 else
16195 mp->offset = 0;
16196
16197 /* Scan over the following entries and adjust their offsets. */
16198 while (mp->next != NULL)
16199 {
16200 if (mp->next->min_address < mp->min_address + mp->fix_size)
16201 mp->next->min_address = mp->min_address + mp->fix_size;
16202
16203 if (mp->refcount)
16204 mp->next->offset = mp->offset + mp->fix_size;
16205 else
16206 mp->next->offset = mp->offset;
16207
16208 mp = mp->next;
16209 }
16210
16211 return min_mp;
16212 }
16213
16214 static void
16215 assign_minipool_offsets (Mfix *barrier)
16216 {
16217 HOST_WIDE_INT offset = 0;
16218 Mnode *mp;
16219
16220 minipool_barrier = barrier;
16221
16222 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16223 {
16224 mp->offset = offset;
16225
16226 if (mp->refcount > 0)
16227 offset += mp->fix_size;
16228 }
16229 }
16230
16231 /* Output the literal table */
16232 static void
16233 dump_minipool (rtx_insn *scan)
16234 {
16235 Mnode * mp;
16236 Mnode * nmp;
16237 int align64 = 0;
16238
16239 if (ARM_DOUBLEWORD_ALIGN)
16240 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16241 if (mp->refcount > 0 && mp->fix_size >= 8)
16242 {
16243 align64 = 1;
16244 break;
16245 }
16246
16247 if (dump_file)
16248 fprintf (dump_file,
16249 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16250 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16251
16252 scan = emit_label_after (gen_label_rtx (), scan);
16253 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16254 scan = emit_label_after (minipool_vector_label, scan);
16255
16256 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16257 {
16258 if (mp->refcount > 0)
16259 {
16260 if (dump_file)
16261 {
16262 fprintf (dump_file,
16263 ";; Offset %u, min %ld, max %ld ",
16264 (unsigned) mp->offset, (unsigned long) mp->min_address,
16265 (unsigned long) mp->max_address);
16266 arm_print_value (dump_file, mp->value);
16267 fputc ('\n', dump_file);
16268 }
16269
16270 rtx val = copy_rtx (mp->value);
16271
16272 switch (GET_MODE_SIZE (mp->mode))
16273 {
16274 #ifdef HAVE_consttable_1
16275 case 1:
16276 scan = emit_insn_after (gen_consttable_1 (val), scan);
16277 break;
16278
16279 #endif
16280 #ifdef HAVE_consttable_2
16281 case 2:
16282 scan = emit_insn_after (gen_consttable_2 (val), scan);
16283 break;
16284
16285 #endif
16286 #ifdef HAVE_consttable_4
16287 case 4:
16288 scan = emit_insn_after (gen_consttable_4 (val), scan);
16289 break;
16290
16291 #endif
16292 #ifdef HAVE_consttable_8
16293 case 8:
16294 scan = emit_insn_after (gen_consttable_8 (val), scan);
16295 break;
16296
16297 #endif
16298 #ifdef HAVE_consttable_16
16299 case 16:
16300 scan = emit_insn_after (gen_consttable_16 (val), scan);
16301 break;
16302
16303 #endif
16304 default:
16305 gcc_unreachable ();
16306 }
16307 }
16308
16309 nmp = mp->next;
16310 free (mp);
16311 }
16312
16313 minipool_vector_head = minipool_vector_tail = NULL;
16314 scan = emit_insn_after (gen_consttable_end (), scan);
16315 scan = emit_barrier_after (scan);
16316 }
16317
16318 /* Return the cost of forcibly inserting a barrier after INSN. */
16319 static int
16320 arm_barrier_cost (rtx_insn *insn)
16321 {
16322 /* Basing the location of the pool on the loop depth is preferable,
16323 but at the moment, the basic block information seems to be
16324 corrupt by this stage of the compilation. */
16325 int base_cost = 50;
16326 rtx_insn *next = next_nonnote_insn (insn);
16327
16328 if (next != NULL && LABEL_P (next))
16329 base_cost -= 20;
16330
16331 switch (GET_CODE (insn))
16332 {
16333 case CODE_LABEL:
16334 /* It will always be better to place the table before the label, rather
16335 than after it. */
16336 return 50;
16337
16338 case INSN:
16339 case CALL_INSN:
16340 return base_cost;
16341
16342 case JUMP_INSN:
16343 return base_cost - 10;
16344
16345 default:
16346 return base_cost + 10;
16347 }
16348 }
16349
16350 /* Find the best place in the insn stream in the range
16351 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16352 Create the barrier by inserting a jump and add a new fix entry for
16353 it. */
16354 static Mfix *
16355 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16356 {
16357 HOST_WIDE_INT count = 0;
16358 rtx_barrier *barrier;
16359 rtx_insn *from = fix->insn;
16360 /* The instruction after which we will insert the jump. */
16361 rtx_insn *selected = NULL;
16362 int selected_cost;
16363 /* The address at which the jump instruction will be placed. */
16364 HOST_WIDE_INT selected_address;
16365 Mfix * new_fix;
16366 HOST_WIDE_INT max_count = max_address - fix->address;
16367 rtx_code_label *label = gen_label_rtx ();
16368
16369 selected_cost = arm_barrier_cost (from);
16370 selected_address = fix->address;
16371
16372 while (from && count < max_count)
16373 {
16374 rtx_jump_table_data *tmp;
16375 int new_cost;
16376
16377 /* This code shouldn't have been called if there was a natural barrier
16378 within range. */
16379 gcc_assert (!BARRIER_P (from));
16380
16381 /* Count the length of this insn. This must stay in sync with the
16382 code that pushes minipool fixes. */
16383 if (LABEL_P (from))
16384 count += get_label_padding (from);
16385 else
16386 count += get_attr_length (from);
16387
16388 /* If there is a jump table, add its length. */
16389 if (tablejump_p (from, NULL, &tmp))
16390 {
16391 count += get_jump_table_size (tmp);
16392
16393 /* Jump tables aren't in a basic block, so base the cost on
16394 the dispatch insn. If we select this location, we will
16395 still put the pool after the table. */
16396 new_cost = arm_barrier_cost (from);
16397
16398 if (count < max_count
16399 && (!selected || new_cost <= selected_cost))
16400 {
16401 selected = tmp;
16402 selected_cost = new_cost;
16403 selected_address = fix->address + count;
16404 }
16405
16406 /* Continue after the dispatch table. */
16407 from = NEXT_INSN (tmp);
16408 continue;
16409 }
16410
16411 new_cost = arm_barrier_cost (from);
16412
16413 if (count < max_count
16414 && (!selected || new_cost <= selected_cost))
16415 {
16416 selected = from;
16417 selected_cost = new_cost;
16418 selected_address = fix->address + count;
16419 }
16420
16421 from = NEXT_INSN (from);
16422 }
16423
16424 /* Make sure that we found a place to insert the jump. */
16425 gcc_assert (selected);
16426
16427 /* Make sure we do not split a call and its corresponding
16428 CALL_ARG_LOCATION note. */
16429 if (CALL_P (selected))
16430 {
16431 rtx_insn *next = NEXT_INSN (selected);
16432 if (next && NOTE_P (next)
16433 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16434 selected = next;
16435 }
16436
16437 /* Create a new JUMP_INSN that branches around a barrier. */
16438 from = emit_jump_insn_after (gen_jump (label), selected);
16439 JUMP_LABEL (from) = label;
16440 barrier = emit_barrier_after (from);
16441 emit_label_after (label, barrier);
16442
16443 /* Create a minipool barrier entry for the new barrier. */
16444 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16445 new_fix->insn = barrier;
16446 new_fix->address = selected_address;
16447 new_fix->next = fix->next;
16448 fix->next = new_fix;
16449
16450 return new_fix;
16451 }
16452
16453 /* Record that there is a natural barrier in the insn stream at
16454 ADDRESS. */
16455 static void
16456 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16457 {
16458 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16459
16460 fix->insn = insn;
16461 fix->address = address;
16462
16463 fix->next = NULL;
16464 if (minipool_fix_head != NULL)
16465 minipool_fix_tail->next = fix;
16466 else
16467 minipool_fix_head = fix;
16468
16469 minipool_fix_tail = fix;
16470 }
16471
16472 /* Record INSN, which will need fixing up to load a value from the
16473 minipool. ADDRESS is the offset of the insn since the start of the
16474 function; LOC is a pointer to the part of the insn which requires
16475 fixing; VALUE is the constant that must be loaded, which is of type
16476 MODE. */
16477 static void
16478 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16479 machine_mode mode, rtx value)
16480 {
16481 gcc_assert (!arm_disable_literal_pool);
16482 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16483
16484 fix->insn = insn;
16485 fix->address = address;
16486 fix->loc = loc;
16487 fix->mode = mode;
16488 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16489 fix->value = value;
16490 fix->forwards = get_attr_pool_range (insn);
16491 fix->backwards = get_attr_neg_pool_range (insn);
16492 fix->minipool = NULL;
16493
16494 /* If an insn doesn't have a range defined for it, then it isn't
16495 expecting to be reworked by this code. Better to stop now than
16496 to generate duff assembly code. */
16497 gcc_assert (fix->forwards || fix->backwards);
16498
16499 /* If an entry requires 8-byte alignment then assume all constant pools
16500 require 4 bytes of padding. Trying to do this later on a per-pool
16501 basis is awkward because existing pool entries have to be modified. */
16502 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16503 minipool_pad = 4;
16504
16505 if (dump_file)
16506 {
16507 fprintf (dump_file,
16508 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16509 GET_MODE_NAME (mode),
16510 INSN_UID (insn), (unsigned long) address,
16511 -1 * (long)fix->backwards, (long)fix->forwards);
16512 arm_print_value (dump_file, fix->value);
16513 fprintf (dump_file, "\n");
16514 }
16515
16516 /* Add it to the chain of fixes. */
16517 fix->next = NULL;
16518
16519 if (minipool_fix_head != NULL)
16520 minipool_fix_tail->next = fix;
16521 else
16522 minipool_fix_head = fix;
16523
16524 minipool_fix_tail = fix;
16525 }
16526
16527 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16528 Returns the number of insns needed, or 99 if we always want to synthesize
16529 the value. */
16530 int
16531 arm_max_const_double_inline_cost ()
16532 {
16533 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16534 }
16535
16536 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16537 Returns the number of insns needed, or 99 if we don't know how to
16538 do it. */
16539 int
16540 arm_const_double_inline_cost (rtx val)
16541 {
16542 rtx lowpart, highpart;
16543 machine_mode mode;
16544
16545 mode = GET_MODE (val);
16546
16547 if (mode == VOIDmode)
16548 mode = DImode;
16549
16550 gcc_assert (GET_MODE_SIZE (mode) == 8);
16551
16552 lowpart = gen_lowpart (SImode, val);
16553 highpart = gen_highpart_mode (SImode, mode, val);
16554
16555 gcc_assert (CONST_INT_P (lowpart));
16556 gcc_assert (CONST_INT_P (highpart));
16557
16558 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16559 NULL_RTX, NULL_RTX, 0, 0)
16560 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16561 NULL_RTX, NULL_RTX, 0, 0));
16562 }
16563
16564 /* Cost of loading a SImode constant. */
16565 static inline int
16566 arm_const_inline_cost (enum rtx_code code, rtx val)
16567 {
16568 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16569 NULL_RTX, NULL_RTX, 1, 0);
16570 }
16571
16572 /* Return true if it is worthwhile to split a 64-bit constant into two
16573 32-bit operations. This is the case if optimizing for size, or
16574 if we have load delay slots, or if one 32-bit part can be done with
16575 a single data operation. */
16576 bool
16577 arm_const_double_by_parts (rtx val)
16578 {
16579 machine_mode mode = GET_MODE (val);
16580 rtx part;
16581
16582 if (optimize_size || arm_ld_sched)
16583 return true;
16584
16585 if (mode == VOIDmode)
16586 mode = DImode;
16587
16588 part = gen_highpart_mode (SImode, mode, val);
16589
16590 gcc_assert (CONST_INT_P (part));
16591
16592 if (const_ok_for_arm (INTVAL (part))
16593 || const_ok_for_arm (~INTVAL (part)))
16594 return true;
16595
16596 part = gen_lowpart (SImode, val);
16597
16598 gcc_assert (CONST_INT_P (part));
16599
16600 if (const_ok_for_arm (INTVAL (part))
16601 || const_ok_for_arm (~INTVAL (part)))
16602 return true;
16603
16604 return false;
16605 }
16606
16607 /* Return true if it is possible to inline both the high and low parts
16608 of a 64-bit constant into 32-bit data processing instructions. */
16609 bool
16610 arm_const_double_by_immediates (rtx val)
16611 {
16612 machine_mode mode = GET_MODE (val);
16613 rtx part;
16614
16615 if (mode == VOIDmode)
16616 mode = DImode;
16617
16618 part = gen_highpart_mode (SImode, mode, val);
16619
16620 gcc_assert (CONST_INT_P (part));
16621
16622 if (!const_ok_for_arm (INTVAL (part)))
16623 return false;
16624
16625 part = gen_lowpart (SImode, val);
16626
16627 gcc_assert (CONST_INT_P (part));
16628
16629 if (!const_ok_for_arm (INTVAL (part)))
16630 return false;
16631
16632 return true;
16633 }
16634
16635 /* Scan INSN and note any of its operands that need fixing.
16636 If DO_PUSHES is false we do not actually push any of the fixups
16637 needed. */
16638 static void
16639 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16640 {
16641 int opno;
16642
16643 extract_constrain_insn (insn);
16644
16645 if (recog_data.n_alternatives == 0)
16646 return;
16647
16648 /* Fill in recog_op_alt with information about the constraints of
16649 this insn. */
16650 preprocess_constraints (insn);
16651
16652 const operand_alternative *op_alt = which_op_alt ();
16653 for (opno = 0; opno < recog_data.n_operands; opno++)
16654 {
16655 /* Things we need to fix can only occur in inputs. */
16656 if (recog_data.operand_type[opno] != OP_IN)
16657 continue;
16658
16659 /* If this alternative is a memory reference, then any mention
16660 of constants in this alternative is really to fool reload
16661 into allowing us to accept one there. We need to fix them up
16662 now so that we output the right code. */
16663 if (op_alt[opno].memory_ok)
16664 {
16665 rtx op = recog_data.operand[opno];
16666
16667 if (CONSTANT_P (op))
16668 {
16669 if (do_pushes)
16670 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16671 recog_data.operand_mode[opno], op);
16672 }
16673 else if (MEM_P (op)
16674 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16675 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16676 {
16677 if (do_pushes)
16678 {
16679 rtx cop = avoid_constant_pool_reference (op);
16680
16681 /* Casting the address of something to a mode narrower
16682 than a word can cause avoid_constant_pool_reference()
16683 to return the pool reference itself. That's no good to
16684 us here. Lets just hope that we can use the
16685 constant pool value directly. */
16686 if (op == cop)
16687 cop = get_pool_constant (XEXP (op, 0));
16688
16689 push_minipool_fix (insn, address,
16690 recog_data.operand_loc[opno],
16691 recog_data.operand_mode[opno], cop);
16692 }
16693
16694 }
16695 }
16696 }
16697
16698 return;
16699 }
16700
16701 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16702 and unions in the context of ARMv8-M Security Extensions. It is used as a
16703 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16704 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16705 or four masks, depending on whether it is being computed for a
16706 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16707 respectively. The tree for the type of the argument or a field within an
16708 argument is passed in ARG_TYPE, the current register this argument or field
16709 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16710 argument or field starts at is passed in STARTING_BIT and the last used bit
16711 is kept in LAST_USED_BIT which is also updated accordingly. */
16712
16713 static unsigned HOST_WIDE_INT
16714 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16715 uint32_t * padding_bits_to_clear,
16716 unsigned starting_bit, int * last_used_bit)
16717
16718 {
16719 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16720
16721 if (TREE_CODE (arg_type) == RECORD_TYPE)
16722 {
16723 unsigned current_bit = starting_bit;
16724 tree field;
16725 long int offset, size;
16726
16727
16728 field = TYPE_FIELDS (arg_type);
16729 while (field)
16730 {
16731 /* The offset within a structure is always an offset from
16732 the start of that structure. Make sure we take that into the
16733 calculation of the register based offset that we use here. */
16734 offset = starting_bit;
16735 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16736 offset %= 32;
16737
16738 /* This is the actual size of the field, for bitfields this is the
16739 bitfield width and not the container size. */
16740 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16741
16742 if (*last_used_bit != offset)
16743 {
16744 if (offset < *last_used_bit)
16745 {
16746 /* This field's offset is before the 'last_used_bit', that
16747 means this field goes on the next register. So we need to
16748 pad the rest of the current register and increase the
16749 register number. */
16750 uint32_t mask;
16751 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16752 mask++;
16753
16754 padding_bits_to_clear[*regno] |= mask;
16755 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16756 (*regno)++;
16757 }
16758 else
16759 {
16760 /* Otherwise we pad the bits between the last field's end and
16761 the start of the new field. */
16762 uint32_t mask;
16763
16764 mask = ((uint32_t)-1) >> (32 - offset);
16765 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16766 padding_bits_to_clear[*regno] |= mask;
16767 }
16768 current_bit = offset;
16769 }
16770
16771 /* Calculate further padding bits for inner structs/unions too. */
16772 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16773 {
16774 *last_used_bit = current_bit;
16775 not_to_clear_reg_mask
16776 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16777 padding_bits_to_clear, offset,
16778 last_used_bit);
16779 }
16780 else
16781 {
16782 /* Update 'current_bit' with this field's size. If the
16783 'current_bit' lies in a subsequent register, update 'regno' and
16784 reset 'current_bit' to point to the current bit in that new
16785 register. */
16786 current_bit += size;
16787 while (current_bit >= 32)
16788 {
16789 current_bit-=32;
16790 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16791 (*regno)++;
16792 }
16793 *last_used_bit = current_bit;
16794 }
16795
16796 field = TREE_CHAIN (field);
16797 }
16798 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16799 }
16800 else if (TREE_CODE (arg_type) == UNION_TYPE)
16801 {
16802 tree field, field_t;
16803 int i, regno_t, field_size;
16804 int max_reg = -1;
16805 int max_bit = -1;
16806 uint32_t mask;
16807 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16808 = {-1, -1, -1, -1};
16809
16810 /* To compute the padding bits in a union we only consider bits as
16811 padding bits if they are always either a padding bit or fall outside a
16812 fields size for all fields in the union. */
16813 field = TYPE_FIELDS (arg_type);
16814 while (field)
16815 {
16816 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16817 = {0U, 0U, 0U, 0U};
16818 int last_used_bit_t = *last_used_bit;
16819 regno_t = *regno;
16820 field_t = TREE_TYPE (field);
16821
16822 /* If the field's type is either a record or a union make sure to
16823 compute their padding bits too. */
16824 if (RECORD_OR_UNION_TYPE_P (field_t))
16825 not_to_clear_reg_mask
16826 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16827 &padding_bits_to_clear_t[0],
16828 starting_bit, &last_used_bit_t);
16829 else
16830 {
16831 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16832 regno_t = (field_size / 32) + *regno;
16833 last_used_bit_t = (starting_bit + field_size) % 32;
16834 }
16835
16836 for (i = *regno; i < regno_t; i++)
16837 {
16838 /* For all but the last register used by this field only keep the
16839 padding bits that were padding bits in this field. */
16840 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16841 }
16842
16843 /* For the last register, keep all padding bits that were padding
16844 bits in this field and any padding bits that are still valid
16845 as padding bits but fall outside of this field's size. */
16846 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16847 padding_bits_to_clear_res[regno_t]
16848 &= padding_bits_to_clear_t[regno_t] | mask;
16849
16850 /* Update the maximum size of the fields in terms of registers used
16851 ('max_reg') and the 'last_used_bit' in said register. */
16852 if (max_reg < regno_t)
16853 {
16854 max_reg = regno_t;
16855 max_bit = last_used_bit_t;
16856 }
16857 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16858 max_bit = last_used_bit_t;
16859
16860 field = TREE_CHAIN (field);
16861 }
16862
16863 /* Update the current padding_bits_to_clear using the intersection of the
16864 padding bits of all the fields. */
16865 for (i=*regno; i < max_reg; i++)
16866 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16867
16868 /* Do not keep trailing padding bits, we do not know yet whether this
16869 is the end of the argument. */
16870 mask = ((uint32_t) 1 << max_bit) - 1;
16871 padding_bits_to_clear[max_reg]
16872 |= padding_bits_to_clear_res[max_reg] & mask;
16873
16874 *regno = max_reg;
16875 *last_used_bit = max_bit;
16876 }
16877 else
16878 /* This function should only be used for structs and unions. */
16879 gcc_unreachable ();
16880
16881 return not_to_clear_reg_mask;
16882 }
16883
16884 /* In the context of ARMv8-M Security Extensions, this function is used for both
16885 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16886 registers are used when returning or passing arguments, which is then
16887 returned as a mask. It will also compute a mask to indicate padding/unused
16888 bits for each of these registers, and passes this through the
16889 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16890 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16891 the starting register used to pass this argument or return value is passed
16892 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16893 for struct and union types. */
16894
16895 static unsigned HOST_WIDE_INT
16896 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16897 uint32_t * padding_bits_to_clear)
16898
16899 {
16900 int last_used_bit = 0;
16901 unsigned HOST_WIDE_INT not_to_clear_mask;
16902
16903 if (RECORD_OR_UNION_TYPE_P (arg_type))
16904 {
16905 not_to_clear_mask
16906 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16907 padding_bits_to_clear, 0,
16908 &last_used_bit);
16909
16910
16911 /* If the 'last_used_bit' is not zero, that means we are still using a
16912 part of the last 'regno'. In such cases we must clear the trailing
16913 bits. Otherwise we are not using regno and we should mark it as to
16914 clear. */
16915 if (last_used_bit != 0)
16916 padding_bits_to_clear[regno]
16917 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16918 else
16919 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16920 }
16921 else
16922 {
16923 not_to_clear_mask = 0;
16924 /* We are not dealing with structs nor unions. So these arguments may be
16925 passed in floating point registers too. In some cases a BLKmode is
16926 used when returning or passing arguments in multiple VFP registers. */
16927 if (GET_MODE (arg_rtx) == BLKmode)
16928 {
16929 int i, arg_regs;
16930 rtx reg;
16931
16932 /* This should really only occur when dealing with the hard-float
16933 ABI. */
16934 gcc_assert (TARGET_HARD_FLOAT_ABI);
16935
16936 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16937 {
16938 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16939 gcc_assert (REG_P (reg));
16940
16941 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16942
16943 /* If we are dealing with DF mode, make sure we don't
16944 clear either of the registers it addresses. */
16945 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16946 if (arg_regs > 1)
16947 {
16948 unsigned HOST_WIDE_INT mask;
16949 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16950 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16951 not_to_clear_mask |= mask;
16952 }
16953 }
16954 }
16955 else
16956 {
16957 /* Otherwise we can rely on the MODE to determine how many registers
16958 are being used by this argument. */
16959 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16960 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16961 if (arg_regs > 1)
16962 {
16963 unsigned HOST_WIDE_INT
16964 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16965 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16966 not_to_clear_mask |= mask;
16967 }
16968 }
16969 }
16970
16971 return not_to_clear_mask;
16972 }
16973
16974 /* Clears caller saved registers not used to pass arguments before a
16975 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16976 registers is done in __gnu_cmse_nonsecure_call libcall.
16977 See libgcc/config/arm/cmse_nonsecure_call.S. */
16978
16979 static void
16980 cmse_nonsecure_call_clear_caller_saved (void)
16981 {
16982 basic_block bb;
16983
16984 FOR_EACH_BB_FN (bb, cfun)
16985 {
16986 rtx_insn *insn;
16987
16988 FOR_BB_INSNS (bb, insn)
16989 {
16990 uint64_t to_clear_mask, float_mask;
16991 rtx_insn *seq;
16992 rtx pat, call, unspec, reg, cleared_reg, tmp;
16993 unsigned int regno, maxregno;
16994 rtx address;
16995 CUMULATIVE_ARGS args_so_far_v;
16996 cumulative_args_t args_so_far;
16997 tree arg_type, fntype;
16998 bool using_r4, first_param = true;
16999 function_args_iterator args_iter;
17000 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17001 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
17002
17003 if (!NONDEBUG_INSN_P (insn))
17004 continue;
17005
17006 if (!CALL_P (insn))
17007 continue;
17008
17009 pat = PATTERN (insn);
17010 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17011 call = XVECEXP (pat, 0, 0);
17012
17013 /* Get the real call RTX if the insn sets a value, ie. returns. */
17014 if (GET_CODE (call) == SET)
17015 call = SET_SRC (call);
17016
17017 /* Check if it is a cmse_nonsecure_call. */
17018 unspec = XEXP (call, 0);
17019 if (GET_CODE (unspec) != UNSPEC
17020 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17021 continue;
17022
17023 /* Determine the caller-saved registers we need to clear. */
17024 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
17025 maxregno = NUM_ARG_REGS - 1;
17026 /* Only look at the caller-saved floating point registers in case of
17027 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17028 lazy store and loads which clear both caller- and callee-saved
17029 registers. */
17030 if (TARGET_HARD_FLOAT_ABI)
17031 {
17032 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
17033 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
17034 to_clear_mask |= float_mask;
17035 maxregno = D7_VFP_REGNUM;
17036 }
17037
17038 /* Make sure the register used to hold the function address is not
17039 cleared. */
17040 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17041 gcc_assert (MEM_P (address));
17042 gcc_assert (REG_P (XEXP (address, 0)));
17043 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17044
17045 /* Set basic block of call insn so that df rescan is performed on
17046 insns inserted here. */
17047 set_block_for_insn (insn, bb);
17048 df_set_flags (DF_DEFER_INSN_RESCAN);
17049 start_sequence ();
17050
17051 /* Make sure the scheduler doesn't schedule other insns beyond
17052 here. */
17053 emit_insn (gen_blockage ());
17054
17055 /* Walk through all arguments and clear registers appropriately.
17056 */
17057 fntype = TREE_TYPE (MEM_EXPR (address));
17058 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17059 NULL_TREE);
17060 args_so_far = pack_cumulative_args (&args_so_far_v);
17061 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17062 {
17063 rtx arg_rtx;
17064 machine_mode arg_mode = TYPE_MODE (arg_type);
17065
17066 if (VOID_TYPE_P (arg_type))
17067 continue;
17068
17069 if (!first_param)
17070 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17071 true);
17072
17073 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17074 true);
17075 gcc_assert (REG_P (arg_rtx));
17076 to_clear_mask
17077 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17078 REGNO (arg_rtx),
17079 padding_bits_to_clear_ptr);
17080
17081 first_param = false;
17082 }
17083
17084 /* Clear padding bits where needed. */
17085 cleared_reg = XEXP (address, 0);
17086 reg = gen_rtx_REG (SImode, IP_REGNUM);
17087 using_r4 = false;
17088 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17089 {
17090 if (padding_bits_to_clear[regno] == 0)
17091 continue;
17092
17093 /* If this is a Thumb-1 target copy the address of the function
17094 we are calling from 'r4' into 'ip' such that we can use r4 to
17095 clear the unused bits in the arguments. */
17096 if (TARGET_THUMB1 && !using_r4)
17097 {
17098 using_r4 = true;
17099 reg = cleared_reg;
17100 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17101 reg);
17102 }
17103
17104 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17105 emit_move_insn (reg, tmp);
17106 /* Also fill the top half of the negated
17107 padding_bits_to_clear. */
17108 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17109 {
17110 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17111 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17112 GEN_INT (16),
17113 GEN_INT (16)),
17114 tmp));
17115 }
17116
17117 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17118 gen_rtx_REG (SImode, regno),
17119 reg));
17120
17121 }
17122 if (using_r4)
17123 emit_move_insn (cleared_reg,
17124 gen_rtx_REG (SImode, IP_REGNUM));
17125
17126 /* We use right shift and left shift to clear the LSB of the address
17127 we jump to instead of using bic, to avoid having to use an extra
17128 register on Thumb-1. */
17129 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17130 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17131 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17132 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17133
17134 /* Clearing all registers that leak before doing a non-secure
17135 call. */
17136 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17137 {
17138 if (!(to_clear_mask & (1LL << regno)))
17139 continue;
17140
17141 /* If regno is an even vfp register and its successor is also to
17142 be cleared, use vmov. */
17143 if (IS_VFP_REGNUM (regno))
17144 {
17145 if (TARGET_VFP_DOUBLE
17146 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17147 && to_clear_mask & (1LL << (regno + 1)))
17148 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17149 CONST0_RTX (DFmode));
17150 else
17151 emit_move_insn (gen_rtx_REG (SFmode, regno),
17152 CONST0_RTX (SFmode));
17153 }
17154 else
17155 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17156 }
17157
17158 seq = get_insns ();
17159 end_sequence ();
17160 emit_insn_before (seq, insn);
17161
17162 }
17163 }
17164 }
17165
17166 /* Rewrite move insn into subtract of 0 if the condition codes will
17167 be useful in next conditional jump insn. */
17168
17169 static void
17170 thumb1_reorg (void)
17171 {
17172 basic_block bb;
17173
17174 FOR_EACH_BB_FN (bb, cfun)
17175 {
17176 rtx dest, src;
17177 rtx cmp, op0, op1, set = NULL;
17178 rtx_insn *prev, *insn = BB_END (bb);
17179 bool insn_clobbered = false;
17180
17181 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17182 insn = PREV_INSN (insn);
17183
17184 /* Find the last cbranchsi4_insn in basic block BB. */
17185 if (insn == BB_HEAD (bb)
17186 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17187 continue;
17188
17189 /* Get the register with which we are comparing. */
17190 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17191 op0 = XEXP (cmp, 0);
17192 op1 = XEXP (cmp, 1);
17193
17194 /* Check that comparison is against ZERO. */
17195 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17196 continue;
17197
17198 /* Find the first flag setting insn before INSN in basic block BB. */
17199 gcc_assert (insn != BB_HEAD (bb));
17200 for (prev = PREV_INSN (insn);
17201 (!insn_clobbered
17202 && prev != BB_HEAD (bb)
17203 && (NOTE_P (prev)
17204 || DEBUG_INSN_P (prev)
17205 || ((set = single_set (prev)) != NULL
17206 && get_attr_conds (prev) == CONDS_NOCOND)));
17207 prev = PREV_INSN (prev))
17208 {
17209 if (reg_set_p (op0, prev))
17210 insn_clobbered = true;
17211 }
17212
17213 /* Skip if op0 is clobbered by insn other than prev. */
17214 if (insn_clobbered)
17215 continue;
17216
17217 if (!set)
17218 continue;
17219
17220 dest = SET_DEST (set);
17221 src = SET_SRC (set);
17222 if (!low_register_operand (dest, SImode)
17223 || !low_register_operand (src, SImode))
17224 continue;
17225
17226 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17227 in INSN. Both src and dest of the move insn are checked. */
17228 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17229 {
17230 dest = copy_rtx (dest);
17231 src = copy_rtx (src);
17232 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17233 PATTERN (prev) = gen_rtx_SET (dest, src);
17234 INSN_CODE (prev) = -1;
17235 /* Set test register in INSN to dest. */
17236 XEXP (cmp, 0) = copy_rtx (dest);
17237 INSN_CODE (insn) = -1;
17238 }
17239 }
17240 }
17241
17242 /* Convert instructions to their cc-clobbering variant if possible, since
17243 that allows us to use smaller encodings. */
17244
17245 static void
17246 thumb2_reorg (void)
17247 {
17248 basic_block bb;
17249 regset_head live;
17250
17251 INIT_REG_SET (&live);
17252
17253 /* We are freeing block_for_insn in the toplev to keep compatibility
17254 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17255 compute_bb_for_insn ();
17256 df_analyze ();
17257
17258 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17259
17260 FOR_EACH_BB_FN (bb, cfun)
17261 {
17262 if ((current_tune->disparage_flag_setting_t16_encodings
17263 == tune_params::DISPARAGE_FLAGS_ALL)
17264 && optimize_bb_for_speed_p (bb))
17265 continue;
17266
17267 rtx_insn *insn;
17268 Convert_Action action = SKIP;
17269 Convert_Action action_for_partial_flag_setting
17270 = ((current_tune->disparage_flag_setting_t16_encodings
17271 != tune_params::DISPARAGE_FLAGS_NEITHER)
17272 && optimize_bb_for_speed_p (bb))
17273 ? SKIP : CONV;
17274
17275 COPY_REG_SET (&live, DF_LR_OUT (bb));
17276 df_simulate_initialize_backwards (bb, &live);
17277 FOR_BB_INSNS_REVERSE (bb, insn)
17278 {
17279 if (NONJUMP_INSN_P (insn)
17280 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17281 && GET_CODE (PATTERN (insn)) == SET)
17282 {
17283 action = SKIP;
17284 rtx pat = PATTERN (insn);
17285 rtx dst = XEXP (pat, 0);
17286 rtx src = XEXP (pat, 1);
17287 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17288
17289 if (UNARY_P (src) || BINARY_P (src))
17290 op0 = XEXP (src, 0);
17291
17292 if (BINARY_P (src))
17293 op1 = XEXP (src, 1);
17294
17295 if (low_register_operand (dst, SImode))
17296 {
17297 switch (GET_CODE (src))
17298 {
17299 case PLUS:
17300 /* Adding two registers and storing the result
17301 in the first source is already a 16-bit
17302 operation. */
17303 if (rtx_equal_p (dst, op0)
17304 && register_operand (op1, SImode))
17305 break;
17306
17307 if (low_register_operand (op0, SImode))
17308 {
17309 /* ADDS <Rd>,<Rn>,<Rm> */
17310 if (low_register_operand (op1, SImode))
17311 action = CONV;
17312 /* ADDS <Rdn>,#<imm8> */
17313 /* SUBS <Rdn>,#<imm8> */
17314 else if (rtx_equal_p (dst, op0)
17315 && CONST_INT_P (op1)
17316 && IN_RANGE (INTVAL (op1), -255, 255))
17317 action = CONV;
17318 /* ADDS <Rd>,<Rn>,#<imm3> */
17319 /* SUBS <Rd>,<Rn>,#<imm3> */
17320 else if (CONST_INT_P (op1)
17321 && IN_RANGE (INTVAL (op1), -7, 7))
17322 action = CONV;
17323 }
17324 /* ADCS <Rd>, <Rn> */
17325 else if (GET_CODE (XEXP (src, 0)) == PLUS
17326 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17327 && low_register_operand (XEXP (XEXP (src, 0), 1),
17328 SImode)
17329 && COMPARISON_P (op1)
17330 && cc_register (XEXP (op1, 0), VOIDmode)
17331 && maybe_get_arm_condition_code (op1) == ARM_CS
17332 && XEXP (op1, 1) == const0_rtx)
17333 action = CONV;
17334 break;
17335
17336 case MINUS:
17337 /* RSBS <Rd>,<Rn>,#0
17338 Not handled here: see NEG below. */
17339 /* SUBS <Rd>,<Rn>,#<imm3>
17340 SUBS <Rdn>,#<imm8>
17341 Not handled here: see PLUS above. */
17342 /* SUBS <Rd>,<Rn>,<Rm> */
17343 if (low_register_operand (op0, SImode)
17344 && low_register_operand (op1, SImode))
17345 action = CONV;
17346 break;
17347
17348 case MULT:
17349 /* MULS <Rdm>,<Rn>,<Rdm>
17350 As an exception to the rule, this is only used
17351 when optimizing for size since MULS is slow on all
17352 known implementations. We do not even want to use
17353 MULS in cold code, if optimizing for speed, so we
17354 test the global flag here. */
17355 if (!optimize_size)
17356 break;
17357 /* Fall through. */
17358 case AND:
17359 case IOR:
17360 case XOR:
17361 /* ANDS <Rdn>,<Rm> */
17362 if (rtx_equal_p (dst, op0)
17363 && low_register_operand (op1, SImode))
17364 action = action_for_partial_flag_setting;
17365 else if (rtx_equal_p (dst, op1)
17366 && low_register_operand (op0, SImode))
17367 action = action_for_partial_flag_setting == SKIP
17368 ? SKIP : SWAP_CONV;
17369 break;
17370
17371 case ASHIFTRT:
17372 case ASHIFT:
17373 case LSHIFTRT:
17374 /* ASRS <Rdn>,<Rm> */
17375 /* LSRS <Rdn>,<Rm> */
17376 /* LSLS <Rdn>,<Rm> */
17377 if (rtx_equal_p (dst, op0)
17378 && low_register_operand (op1, SImode))
17379 action = action_for_partial_flag_setting;
17380 /* ASRS <Rd>,<Rm>,#<imm5> */
17381 /* LSRS <Rd>,<Rm>,#<imm5> */
17382 /* LSLS <Rd>,<Rm>,#<imm5> */
17383 else if (low_register_operand (op0, SImode)
17384 && CONST_INT_P (op1)
17385 && IN_RANGE (INTVAL (op1), 0, 31))
17386 action = action_for_partial_flag_setting;
17387 break;
17388
17389 case ROTATERT:
17390 /* RORS <Rdn>,<Rm> */
17391 if (rtx_equal_p (dst, op0)
17392 && low_register_operand (op1, SImode))
17393 action = action_for_partial_flag_setting;
17394 break;
17395
17396 case NOT:
17397 /* MVNS <Rd>,<Rm> */
17398 if (low_register_operand (op0, SImode))
17399 action = action_for_partial_flag_setting;
17400 break;
17401
17402 case NEG:
17403 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17404 if (low_register_operand (op0, SImode))
17405 action = CONV;
17406 break;
17407
17408 case CONST_INT:
17409 /* MOVS <Rd>,#<imm8> */
17410 if (CONST_INT_P (src)
17411 && IN_RANGE (INTVAL (src), 0, 255))
17412 action = action_for_partial_flag_setting;
17413 break;
17414
17415 case REG:
17416 /* MOVS and MOV<c> with registers have different
17417 encodings, so are not relevant here. */
17418 break;
17419
17420 default:
17421 break;
17422 }
17423 }
17424
17425 if (action != SKIP)
17426 {
17427 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17428 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17429 rtvec vec;
17430
17431 if (action == SWAP_CONV)
17432 {
17433 src = copy_rtx (src);
17434 XEXP (src, 0) = op1;
17435 XEXP (src, 1) = op0;
17436 pat = gen_rtx_SET (dst, src);
17437 vec = gen_rtvec (2, pat, clobber);
17438 }
17439 else /* action == CONV */
17440 vec = gen_rtvec (2, pat, clobber);
17441
17442 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17443 INSN_CODE (insn) = -1;
17444 }
17445 }
17446
17447 if (NONDEBUG_INSN_P (insn))
17448 df_simulate_one_insn_backwards (bb, insn, &live);
17449 }
17450 }
17451
17452 CLEAR_REG_SET (&live);
17453 }
17454
17455 /* Gcc puts the pool in the wrong place for ARM, since we can only
17456 load addresses a limited distance around the pc. We do some
17457 special munging to move the constant pool values to the correct
17458 point in the code. */
17459 static void
17460 arm_reorg (void)
17461 {
17462 rtx_insn *insn;
17463 HOST_WIDE_INT address = 0;
17464 Mfix * fix;
17465
17466 if (use_cmse)
17467 cmse_nonsecure_call_clear_caller_saved ();
17468 if (TARGET_THUMB1)
17469 thumb1_reorg ();
17470 else if (TARGET_THUMB2)
17471 thumb2_reorg ();
17472
17473 /* Ensure all insns that must be split have been split at this point.
17474 Otherwise, the pool placement code below may compute incorrect
17475 insn lengths. Note that when optimizing, all insns have already
17476 been split at this point. */
17477 if (!optimize)
17478 split_all_insns_noflow ();
17479
17480 /* Make sure we do not attempt to create a literal pool even though it should
17481 no longer be necessary to create any. */
17482 if (arm_disable_literal_pool)
17483 return ;
17484
17485 minipool_fix_head = minipool_fix_tail = NULL;
17486
17487 /* The first insn must always be a note, or the code below won't
17488 scan it properly. */
17489 insn = get_insns ();
17490 gcc_assert (NOTE_P (insn));
17491 minipool_pad = 0;
17492
17493 /* Scan all the insns and record the operands that will need fixing. */
17494 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17495 {
17496 if (BARRIER_P (insn))
17497 push_minipool_barrier (insn, address);
17498 else if (INSN_P (insn))
17499 {
17500 rtx_jump_table_data *table;
17501
17502 note_invalid_constants (insn, address, true);
17503 address += get_attr_length (insn);
17504
17505 /* If the insn is a vector jump, add the size of the table
17506 and skip the table. */
17507 if (tablejump_p (insn, NULL, &table))
17508 {
17509 address += get_jump_table_size (table);
17510 insn = table;
17511 }
17512 }
17513 else if (LABEL_P (insn))
17514 /* Add the worst-case padding due to alignment. We don't add
17515 the _current_ padding because the minipool insertions
17516 themselves might change it. */
17517 address += get_label_padding (insn);
17518 }
17519
17520 fix = minipool_fix_head;
17521
17522 /* Now scan the fixups and perform the required changes. */
17523 while (fix)
17524 {
17525 Mfix * ftmp;
17526 Mfix * fdel;
17527 Mfix * last_added_fix;
17528 Mfix * last_barrier = NULL;
17529 Mfix * this_fix;
17530
17531 /* Skip any further barriers before the next fix. */
17532 while (fix && BARRIER_P (fix->insn))
17533 fix = fix->next;
17534
17535 /* No more fixes. */
17536 if (fix == NULL)
17537 break;
17538
17539 last_added_fix = NULL;
17540
17541 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17542 {
17543 if (BARRIER_P (ftmp->insn))
17544 {
17545 if (ftmp->address >= minipool_vector_head->max_address)
17546 break;
17547
17548 last_barrier = ftmp;
17549 }
17550 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17551 break;
17552
17553 last_added_fix = ftmp; /* Keep track of the last fix added. */
17554 }
17555
17556 /* If we found a barrier, drop back to that; any fixes that we
17557 could have reached but come after the barrier will now go in
17558 the next mini-pool. */
17559 if (last_barrier != NULL)
17560 {
17561 /* Reduce the refcount for those fixes that won't go into this
17562 pool after all. */
17563 for (fdel = last_barrier->next;
17564 fdel && fdel != ftmp;
17565 fdel = fdel->next)
17566 {
17567 fdel->minipool->refcount--;
17568 fdel->minipool = NULL;
17569 }
17570
17571 ftmp = last_barrier;
17572 }
17573 else
17574 {
17575 /* ftmp is first fix that we can't fit into this pool and
17576 there no natural barriers that we could use. Insert a
17577 new barrier in the code somewhere between the previous
17578 fix and this one, and arrange to jump around it. */
17579 HOST_WIDE_INT max_address;
17580
17581 /* The last item on the list of fixes must be a barrier, so
17582 we can never run off the end of the list of fixes without
17583 last_barrier being set. */
17584 gcc_assert (ftmp);
17585
17586 max_address = minipool_vector_head->max_address;
17587 /* Check that there isn't another fix that is in range that
17588 we couldn't fit into this pool because the pool was
17589 already too large: we need to put the pool before such an
17590 instruction. The pool itself may come just after the
17591 fix because create_fix_barrier also allows space for a
17592 jump instruction. */
17593 if (ftmp->address < max_address)
17594 max_address = ftmp->address + 1;
17595
17596 last_barrier = create_fix_barrier (last_added_fix, max_address);
17597 }
17598
17599 assign_minipool_offsets (last_barrier);
17600
17601 while (ftmp)
17602 {
17603 if (!BARRIER_P (ftmp->insn)
17604 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17605 == NULL))
17606 break;
17607
17608 ftmp = ftmp->next;
17609 }
17610
17611 /* Scan over the fixes we have identified for this pool, fixing them
17612 up and adding the constants to the pool itself. */
17613 for (this_fix = fix; this_fix && ftmp != this_fix;
17614 this_fix = this_fix->next)
17615 if (!BARRIER_P (this_fix->insn))
17616 {
17617 rtx addr
17618 = plus_constant (Pmode,
17619 gen_rtx_LABEL_REF (VOIDmode,
17620 minipool_vector_label),
17621 this_fix->minipool->offset);
17622 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17623 }
17624
17625 dump_minipool (last_barrier->insn);
17626 fix = ftmp;
17627 }
17628
17629 /* From now on we must synthesize any constants that we can't handle
17630 directly. This can happen if the RTL gets split during final
17631 instruction generation. */
17632 cfun->machine->after_arm_reorg = 1;
17633
17634 /* Free the minipool memory. */
17635 obstack_free (&minipool_obstack, minipool_startobj);
17636 }
17637 \f
17638 /* Routines to output assembly language. */
17639
17640 /* Return string representation of passed in real value. */
17641 static const char *
17642 fp_const_from_val (REAL_VALUE_TYPE *r)
17643 {
17644 if (!fp_consts_inited)
17645 init_fp_table ();
17646
17647 gcc_assert (real_equal (r, &value_fp0));
17648 return "0";
17649 }
17650
17651 /* OPERANDS[0] is the entire list of insns that constitute pop,
17652 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17653 is in the list, UPDATE is true iff the list contains explicit
17654 update of base register. */
17655 void
17656 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17657 bool update)
17658 {
17659 int i;
17660 char pattern[100];
17661 int offset;
17662 const char *conditional;
17663 int num_saves = XVECLEN (operands[0], 0);
17664 unsigned int regno;
17665 unsigned int regno_base = REGNO (operands[1]);
17666 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17667
17668 offset = 0;
17669 offset += update ? 1 : 0;
17670 offset += return_pc ? 1 : 0;
17671
17672 /* Is the base register in the list? */
17673 for (i = offset; i < num_saves; i++)
17674 {
17675 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17676 /* If SP is in the list, then the base register must be SP. */
17677 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17678 /* If base register is in the list, there must be no explicit update. */
17679 if (regno == regno_base)
17680 gcc_assert (!update);
17681 }
17682
17683 conditional = reverse ? "%?%D0" : "%?%d0";
17684 /* Can't use POP if returning from an interrupt. */
17685 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17686 sprintf (pattern, "pop%s\t{", conditional);
17687 else
17688 {
17689 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17690 It's just a convention, their semantics are identical. */
17691 if (regno_base == SP_REGNUM)
17692 sprintf (pattern, "ldmfd%s\t", conditional);
17693 else if (update)
17694 sprintf (pattern, "ldmia%s\t", conditional);
17695 else
17696 sprintf (pattern, "ldm%s\t", conditional);
17697
17698 strcat (pattern, reg_names[regno_base]);
17699 if (update)
17700 strcat (pattern, "!, {");
17701 else
17702 strcat (pattern, ", {");
17703 }
17704
17705 /* Output the first destination register. */
17706 strcat (pattern,
17707 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17708
17709 /* Output the rest of the destination registers. */
17710 for (i = offset + 1; i < num_saves; i++)
17711 {
17712 strcat (pattern, ", ");
17713 strcat (pattern,
17714 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17715 }
17716
17717 strcat (pattern, "}");
17718
17719 if (interrupt_p && return_pc)
17720 strcat (pattern, "^");
17721
17722 output_asm_insn (pattern, &cond);
17723 }
17724
17725
17726 /* Output the assembly for a store multiple. */
17727
17728 const char *
17729 vfp_output_vstmd (rtx * operands)
17730 {
17731 char pattern[100];
17732 int p;
17733 int base;
17734 int i;
17735 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17736 ? XEXP (operands[0], 0)
17737 : XEXP (XEXP (operands[0], 0), 0);
17738 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17739
17740 if (push_p)
17741 strcpy (pattern, "vpush%?.64\t{%P1");
17742 else
17743 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17744
17745 p = strlen (pattern);
17746
17747 gcc_assert (REG_P (operands[1]));
17748
17749 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17750 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17751 {
17752 p += sprintf (&pattern[p], ", d%d", base + i);
17753 }
17754 strcpy (&pattern[p], "}");
17755
17756 output_asm_insn (pattern, operands);
17757 return "";
17758 }
17759
17760
17761 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17762 number of bytes pushed. */
17763
17764 static int
17765 vfp_emit_fstmd (int base_reg, int count)
17766 {
17767 rtx par;
17768 rtx dwarf;
17769 rtx tmp, reg;
17770 int i;
17771
17772 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17773 register pairs are stored by a store multiple insn. We avoid this
17774 by pushing an extra pair. */
17775 if (count == 2 && !arm_arch6)
17776 {
17777 if (base_reg == LAST_VFP_REGNUM - 3)
17778 base_reg -= 2;
17779 count++;
17780 }
17781
17782 /* FSTMD may not store more than 16 doubleword registers at once. Split
17783 larger stores into multiple parts (up to a maximum of two, in
17784 practice). */
17785 if (count > 16)
17786 {
17787 int saved;
17788 /* NOTE: base_reg is an internal register number, so each D register
17789 counts as 2. */
17790 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17791 saved += vfp_emit_fstmd (base_reg, 16);
17792 return saved;
17793 }
17794
17795 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17796 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17797
17798 reg = gen_rtx_REG (DFmode, base_reg);
17799 base_reg += 2;
17800
17801 XVECEXP (par, 0, 0)
17802 = gen_rtx_SET (gen_frame_mem
17803 (BLKmode,
17804 gen_rtx_PRE_MODIFY (Pmode,
17805 stack_pointer_rtx,
17806 plus_constant
17807 (Pmode, stack_pointer_rtx,
17808 - (count * 8)))
17809 ),
17810 gen_rtx_UNSPEC (BLKmode,
17811 gen_rtvec (1, reg),
17812 UNSPEC_PUSH_MULT));
17813
17814 tmp = gen_rtx_SET (stack_pointer_rtx,
17815 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17816 RTX_FRAME_RELATED_P (tmp) = 1;
17817 XVECEXP (dwarf, 0, 0) = tmp;
17818
17819 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17820 RTX_FRAME_RELATED_P (tmp) = 1;
17821 XVECEXP (dwarf, 0, 1) = tmp;
17822
17823 for (i = 1; i < count; i++)
17824 {
17825 reg = gen_rtx_REG (DFmode, base_reg);
17826 base_reg += 2;
17827 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17828
17829 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17830 plus_constant (Pmode,
17831 stack_pointer_rtx,
17832 i * 8)),
17833 reg);
17834 RTX_FRAME_RELATED_P (tmp) = 1;
17835 XVECEXP (dwarf, 0, i + 1) = tmp;
17836 }
17837
17838 par = emit_insn (par);
17839 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17840 RTX_FRAME_RELATED_P (par) = 1;
17841
17842 return count * 8;
17843 }
17844
17845 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17846 has the cmse_nonsecure_call attribute and returns false otherwise. */
17847
17848 bool
17849 detect_cmse_nonsecure_call (tree addr)
17850 {
17851 if (!addr)
17852 return FALSE;
17853
17854 tree fntype = TREE_TYPE (addr);
17855 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17856 TYPE_ATTRIBUTES (fntype)))
17857 return TRUE;
17858 return FALSE;
17859 }
17860
17861
17862 /* Emit a call instruction with pattern PAT. ADDR is the address of
17863 the call target. */
17864
17865 void
17866 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17867 {
17868 rtx insn;
17869
17870 insn = emit_call_insn (pat);
17871
17872 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17873 If the call might use such an entry, add a use of the PIC register
17874 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17875 if (TARGET_VXWORKS_RTP
17876 && flag_pic
17877 && !sibcall
17878 && GET_CODE (addr) == SYMBOL_REF
17879 && (SYMBOL_REF_DECL (addr)
17880 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17881 : !SYMBOL_REF_LOCAL_P (addr)))
17882 {
17883 require_pic_register ();
17884 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17885 }
17886
17887 if (TARGET_AAPCS_BASED)
17888 {
17889 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17890 linker. We need to add an IP clobber to allow setting
17891 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17892 is not needed since it's a fixed register. */
17893 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17894 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17895 }
17896 }
17897
17898 /* Output a 'call' insn. */
17899 const char *
17900 output_call (rtx *operands)
17901 {
17902 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17903
17904 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17905 if (REGNO (operands[0]) == LR_REGNUM)
17906 {
17907 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17908 output_asm_insn ("mov%?\t%0, %|lr", operands);
17909 }
17910
17911 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17912
17913 if (TARGET_INTERWORK || arm_arch4t)
17914 output_asm_insn ("bx%?\t%0", operands);
17915 else
17916 output_asm_insn ("mov%?\t%|pc, %0", operands);
17917
17918 return "";
17919 }
17920
17921 /* Output a move from arm registers to arm registers of a long double
17922 OPERANDS[0] is the destination.
17923 OPERANDS[1] is the source. */
17924 const char *
17925 output_mov_long_double_arm_from_arm (rtx *operands)
17926 {
17927 /* We have to be careful here because the two might overlap. */
17928 int dest_start = REGNO (operands[0]);
17929 int src_start = REGNO (operands[1]);
17930 rtx ops[2];
17931 int i;
17932
17933 if (dest_start < src_start)
17934 {
17935 for (i = 0; i < 3; i++)
17936 {
17937 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17938 ops[1] = gen_rtx_REG (SImode, src_start + i);
17939 output_asm_insn ("mov%?\t%0, %1", ops);
17940 }
17941 }
17942 else
17943 {
17944 for (i = 2; i >= 0; i--)
17945 {
17946 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17947 ops[1] = gen_rtx_REG (SImode, src_start + i);
17948 output_asm_insn ("mov%?\t%0, %1", ops);
17949 }
17950 }
17951
17952 return "";
17953 }
17954
17955 void
17956 arm_emit_movpair (rtx dest, rtx src)
17957 {
17958 /* If the src is an immediate, simplify it. */
17959 if (CONST_INT_P (src))
17960 {
17961 HOST_WIDE_INT val = INTVAL (src);
17962 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17963 if ((val >> 16) & 0x0000ffff)
17964 {
17965 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17966 GEN_INT (16)),
17967 GEN_INT ((val >> 16) & 0x0000ffff));
17968 rtx_insn *insn = get_last_insn ();
17969 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17970 }
17971 return;
17972 }
17973 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17974 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17975 rtx_insn *insn = get_last_insn ();
17976 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17977 }
17978
17979 /* Output a move between double words. It must be REG<-MEM
17980 or MEM<-REG. */
17981 const char *
17982 output_move_double (rtx *operands, bool emit, int *count)
17983 {
17984 enum rtx_code code0 = GET_CODE (operands[0]);
17985 enum rtx_code code1 = GET_CODE (operands[1]);
17986 rtx otherops[3];
17987 if (count)
17988 *count = 1;
17989
17990 /* The only case when this might happen is when
17991 you are looking at the length of a DImode instruction
17992 that has an invalid constant in it. */
17993 if (code0 == REG && code1 != MEM)
17994 {
17995 gcc_assert (!emit);
17996 *count = 2;
17997 return "";
17998 }
17999
18000 if (code0 == REG)
18001 {
18002 unsigned int reg0 = REGNO (operands[0]);
18003
18004 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18005
18006 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18007
18008 switch (GET_CODE (XEXP (operands[1], 0)))
18009 {
18010 case REG:
18011
18012 if (emit)
18013 {
18014 if (TARGET_LDRD
18015 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18016 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18017 else
18018 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18019 }
18020 break;
18021
18022 case PRE_INC:
18023 gcc_assert (TARGET_LDRD);
18024 if (emit)
18025 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18026 break;
18027
18028 case PRE_DEC:
18029 if (emit)
18030 {
18031 if (TARGET_LDRD)
18032 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18033 else
18034 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18035 }
18036 break;
18037
18038 case POST_INC:
18039 if (emit)
18040 {
18041 if (TARGET_LDRD)
18042 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18043 else
18044 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18045 }
18046 break;
18047
18048 case POST_DEC:
18049 gcc_assert (TARGET_LDRD);
18050 if (emit)
18051 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18052 break;
18053
18054 case PRE_MODIFY:
18055 case POST_MODIFY:
18056 /* Autoicrement addressing modes should never have overlapping
18057 base and destination registers, and overlapping index registers
18058 are already prohibited, so this doesn't need to worry about
18059 fix_cm3_ldrd. */
18060 otherops[0] = operands[0];
18061 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18062 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18063
18064 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18065 {
18066 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18067 {
18068 /* Registers overlap so split out the increment. */
18069 if (emit)
18070 {
18071 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18072 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18073 }
18074 if (count)
18075 *count = 2;
18076 }
18077 else
18078 {
18079 /* Use a single insn if we can.
18080 FIXME: IWMMXT allows offsets larger than ldrd can
18081 handle, fix these up with a pair of ldr. */
18082 if (TARGET_THUMB2
18083 || !CONST_INT_P (otherops[2])
18084 || (INTVAL (otherops[2]) > -256
18085 && INTVAL (otherops[2]) < 256))
18086 {
18087 if (emit)
18088 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18089 }
18090 else
18091 {
18092 if (emit)
18093 {
18094 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18095 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18096 }
18097 if (count)
18098 *count = 2;
18099
18100 }
18101 }
18102 }
18103 else
18104 {
18105 /* Use a single insn if we can.
18106 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18107 fix these up with a pair of ldr. */
18108 if (TARGET_THUMB2
18109 || !CONST_INT_P (otherops[2])
18110 || (INTVAL (otherops[2]) > -256
18111 && INTVAL (otherops[2]) < 256))
18112 {
18113 if (emit)
18114 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18115 }
18116 else
18117 {
18118 if (emit)
18119 {
18120 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18121 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18122 }
18123 if (count)
18124 *count = 2;
18125 }
18126 }
18127 break;
18128
18129 case LABEL_REF:
18130 case CONST:
18131 /* We might be able to use ldrd %0, %1 here. However the range is
18132 different to ldr/adr, and it is broken on some ARMv7-M
18133 implementations. */
18134 /* Use the second register of the pair to avoid problematic
18135 overlap. */
18136 otherops[1] = operands[1];
18137 if (emit)
18138 output_asm_insn ("adr%?\t%0, %1", otherops);
18139 operands[1] = otherops[0];
18140 if (emit)
18141 {
18142 if (TARGET_LDRD)
18143 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18144 else
18145 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18146 }
18147
18148 if (count)
18149 *count = 2;
18150 break;
18151
18152 /* ??? This needs checking for thumb2. */
18153 default:
18154 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18155 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18156 {
18157 otherops[0] = operands[0];
18158 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18159 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18160
18161 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18162 {
18163 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18164 {
18165 switch ((int) INTVAL (otherops[2]))
18166 {
18167 case -8:
18168 if (emit)
18169 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18170 return "";
18171 case -4:
18172 if (TARGET_THUMB2)
18173 break;
18174 if (emit)
18175 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18176 return "";
18177 case 4:
18178 if (TARGET_THUMB2)
18179 break;
18180 if (emit)
18181 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18182 return "";
18183 }
18184 }
18185 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18186 operands[1] = otherops[0];
18187 if (TARGET_LDRD
18188 && (REG_P (otherops[2])
18189 || TARGET_THUMB2
18190 || (CONST_INT_P (otherops[2])
18191 && INTVAL (otherops[2]) > -256
18192 && INTVAL (otherops[2]) < 256)))
18193 {
18194 if (reg_overlap_mentioned_p (operands[0],
18195 otherops[2]))
18196 {
18197 /* Swap base and index registers over to
18198 avoid a conflict. */
18199 std::swap (otherops[1], otherops[2]);
18200 }
18201 /* If both registers conflict, it will usually
18202 have been fixed by a splitter. */
18203 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18204 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18205 {
18206 if (emit)
18207 {
18208 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18209 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18210 }
18211 if (count)
18212 *count = 2;
18213 }
18214 else
18215 {
18216 otherops[0] = operands[0];
18217 if (emit)
18218 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18219 }
18220 return "";
18221 }
18222
18223 if (CONST_INT_P (otherops[2]))
18224 {
18225 if (emit)
18226 {
18227 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18228 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18229 else
18230 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18231 }
18232 }
18233 else
18234 {
18235 if (emit)
18236 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18237 }
18238 }
18239 else
18240 {
18241 if (emit)
18242 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18243 }
18244
18245 if (count)
18246 *count = 2;
18247
18248 if (TARGET_LDRD)
18249 return "ldrd%?\t%0, [%1]";
18250
18251 return "ldmia%?\t%1, %M0";
18252 }
18253 else
18254 {
18255 otherops[1] = adjust_address (operands[1], SImode, 4);
18256 /* Take care of overlapping base/data reg. */
18257 if (reg_mentioned_p (operands[0], operands[1]))
18258 {
18259 if (emit)
18260 {
18261 output_asm_insn ("ldr%?\t%0, %1", otherops);
18262 output_asm_insn ("ldr%?\t%0, %1", operands);
18263 }
18264 if (count)
18265 *count = 2;
18266
18267 }
18268 else
18269 {
18270 if (emit)
18271 {
18272 output_asm_insn ("ldr%?\t%0, %1", operands);
18273 output_asm_insn ("ldr%?\t%0, %1", otherops);
18274 }
18275 if (count)
18276 *count = 2;
18277 }
18278 }
18279 }
18280 }
18281 else
18282 {
18283 /* Constraints should ensure this. */
18284 gcc_assert (code0 == MEM && code1 == REG);
18285 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18286 || (TARGET_ARM && TARGET_LDRD));
18287
18288 switch (GET_CODE (XEXP (operands[0], 0)))
18289 {
18290 case REG:
18291 if (emit)
18292 {
18293 if (TARGET_LDRD)
18294 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18295 else
18296 output_asm_insn ("stm%?\t%m0, %M1", operands);
18297 }
18298 break;
18299
18300 case PRE_INC:
18301 gcc_assert (TARGET_LDRD);
18302 if (emit)
18303 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18304 break;
18305
18306 case PRE_DEC:
18307 if (emit)
18308 {
18309 if (TARGET_LDRD)
18310 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18311 else
18312 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18313 }
18314 break;
18315
18316 case POST_INC:
18317 if (emit)
18318 {
18319 if (TARGET_LDRD)
18320 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18321 else
18322 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18323 }
18324 break;
18325
18326 case POST_DEC:
18327 gcc_assert (TARGET_LDRD);
18328 if (emit)
18329 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18330 break;
18331
18332 case PRE_MODIFY:
18333 case POST_MODIFY:
18334 otherops[0] = operands[1];
18335 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18336 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18337
18338 /* IWMMXT allows offsets larger than ldrd can handle,
18339 fix these up with a pair of ldr. */
18340 if (!TARGET_THUMB2
18341 && CONST_INT_P (otherops[2])
18342 && (INTVAL(otherops[2]) <= -256
18343 || INTVAL(otherops[2]) >= 256))
18344 {
18345 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18346 {
18347 if (emit)
18348 {
18349 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18350 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18351 }
18352 if (count)
18353 *count = 2;
18354 }
18355 else
18356 {
18357 if (emit)
18358 {
18359 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18360 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18361 }
18362 if (count)
18363 *count = 2;
18364 }
18365 }
18366 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18367 {
18368 if (emit)
18369 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18370 }
18371 else
18372 {
18373 if (emit)
18374 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18375 }
18376 break;
18377
18378 case PLUS:
18379 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18380 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18381 {
18382 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18383 {
18384 case -8:
18385 if (emit)
18386 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18387 return "";
18388
18389 case -4:
18390 if (TARGET_THUMB2)
18391 break;
18392 if (emit)
18393 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18394 return "";
18395
18396 case 4:
18397 if (TARGET_THUMB2)
18398 break;
18399 if (emit)
18400 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18401 return "";
18402 }
18403 }
18404 if (TARGET_LDRD
18405 && (REG_P (otherops[2])
18406 || TARGET_THUMB2
18407 || (CONST_INT_P (otherops[2])
18408 && INTVAL (otherops[2]) > -256
18409 && INTVAL (otherops[2]) < 256)))
18410 {
18411 otherops[0] = operands[1];
18412 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18413 if (emit)
18414 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18415 return "";
18416 }
18417 /* Fall through */
18418
18419 default:
18420 otherops[0] = adjust_address (operands[0], SImode, 4);
18421 otherops[1] = operands[1];
18422 if (emit)
18423 {
18424 output_asm_insn ("str%?\t%1, %0", operands);
18425 output_asm_insn ("str%?\t%H1, %0", otherops);
18426 }
18427 if (count)
18428 *count = 2;
18429 }
18430 }
18431
18432 return "";
18433 }
18434
18435 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18436 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18437
18438 const char *
18439 output_move_quad (rtx *operands)
18440 {
18441 if (REG_P (operands[0]))
18442 {
18443 /* Load, or reg->reg move. */
18444
18445 if (MEM_P (operands[1]))
18446 {
18447 switch (GET_CODE (XEXP (operands[1], 0)))
18448 {
18449 case REG:
18450 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18451 break;
18452
18453 case LABEL_REF:
18454 case CONST:
18455 output_asm_insn ("adr%?\t%0, %1", operands);
18456 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18457 break;
18458
18459 default:
18460 gcc_unreachable ();
18461 }
18462 }
18463 else
18464 {
18465 rtx ops[2];
18466 int dest, src, i;
18467
18468 gcc_assert (REG_P (operands[1]));
18469
18470 dest = REGNO (operands[0]);
18471 src = REGNO (operands[1]);
18472
18473 /* This seems pretty dumb, but hopefully GCC won't try to do it
18474 very often. */
18475 if (dest < src)
18476 for (i = 0; i < 4; i++)
18477 {
18478 ops[0] = gen_rtx_REG (SImode, dest + i);
18479 ops[1] = gen_rtx_REG (SImode, src + i);
18480 output_asm_insn ("mov%?\t%0, %1", ops);
18481 }
18482 else
18483 for (i = 3; i >= 0; i--)
18484 {
18485 ops[0] = gen_rtx_REG (SImode, dest + i);
18486 ops[1] = gen_rtx_REG (SImode, src + i);
18487 output_asm_insn ("mov%?\t%0, %1", ops);
18488 }
18489 }
18490 }
18491 else
18492 {
18493 gcc_assert (MEM_P (operands[0]));
18494 gcc_assert (REG_P (operands[1]));
18495 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18496
18497 switch (GET_CODE (XEXP (operands[0], 0)))
18498 {
18499 case REG:
18500 output_asm_insn ("stm%?\t%m0, %M1", operands);
18501 break;
18502
18503 default:
18504 gcc_unreachable ();
18505 }
18506 }
18507
18508 return "";
18509 }
18510
18511 /* Output a VFP load or store instruction. */
18512
18513 const char *
18514 output_move_vfp (rtx *operands)
18515 {
18516 rtx reg, mem, addr, ops[2];
18517 int load = REG_P (operands[0]);
18518 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18519 int sp = (!TARGET_VFP_FP16INST
18520 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18521 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18522 const char *templ;
18523 char buff[50];
18524 machine_mode mode;
18525
18526 reg = operands[!load];
18527 mem = operands[load];
18528
18529 mode = GET_MODE (reg);
18530
18531 gcc_assert (REG_P (reg));
18532 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18533 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18534 || mode == SFmode
18535 || mode == DFmode
18536 || mode == HImode
18537 || mode == SImode
18538 || mode == DImode
18539 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18540 gcc_assert (MEM_P (mem));
18541
18542 addr = XEXP (mem, 0);
18543
18544 switch (GET_CODE (addr))
18545 {
18546 case PRE_DEC:
18547 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18548 ops[0] = XEXP (addr, 0);
18549 ops[1] = reg;
18550 break;
18551
18552 case POST_INC:
18553 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18554 ops[0] = XEXP (addr, 0);
18555 ops[1] = reg;
18556 break;
18557
18558 default:
18559 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18560 ops[0] = reg;
18561 ops[1] = mem;
18562 break;
18563 }
18564
18565 sprintf (buff, templ,
18566 load ? "ld" : "st",
18567 dp ? "64" : sp ? "32" : "16",
18568 dp ? "P" : "",
18569 integer_p ? "\t%@ int" : "");
18570 output_asm_insn (buff, ops);
18571
18572 return "";
18573 }
18574
18575 /* Output a Neon double-word or quad-word load or store, or a load
18576 or store for larger structure modes.
18577
18578 WARNING: The ordering of elements is weird in big-endian mode,
18579 because the EABI requires that vectors stored in memory appear
18580 as though they were stored by a VSTM, as required by the EABI.
18581 GCC RTL defines element ordering based on in-memory order.
18582 This can be different from the architectural ordering of elements
18583 within a NEON register. The intrinsics defined in arm_neon.h use the
18584 NEON register element ordering, not the GCC RTL element ordering.
18585
18586 For example, the in-memory ordering of a big-endian a quadword
18587 vector with 16-bit elements when stored from register pair {d0,d1}
18588 will be (lowest address first, d0[N] is NEON register element N):
18589
18590 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18591
18592 When necessary, quadword registers (dN, dN+1) are moved to ARM
18593 registers from rN in the order:
18594
18595 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18596
18597 So that STM/LDM can be used on vectors in ARM registers, and the
18598 same memory layout will result as if VSTM/VLDM were used.
18599
18600 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18601 possible, which allows use of appropriate alignment tags.
18602 Note that the choice of "64" is independent of the actual vector
18603 element size; this size simply ensures that the behavior is
18604 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18605
18606 Due to limitations of those instructions, use of VST1.64/VLD1.64
18607 is not possible if:
18608 - the address contains PRE_DEC, or
18609 - the mode refers to more than 4 double-word registers
18610
18611 In those cases, it would be possible to replace VSTM/VLDM by a
18612 sequence of instructions; this is not currently implemented since
18613 this is not certain to actually improve performance. */
18614
18615 const char *
18616 output_move_neon (rtx *operands)
18617 {
18618 rtx reg, mem, addr, ops[2];
18619 int regno, nregs, load = REG_P (operands[0]);
18620 const char *templ;
18621 char buff[50];
18622 machine_mode mode;
18623
18624 reg = operands[!load];
18625 mem = operands[load];
18626
18627 mode = GET_MODE (reg);
18628
18629 gcc_assert (REG_P (reg));
18630 regno = REGNO (reg);
18631 nregs = REG_NREGS (reg) / 2;
18632 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18633 || NEON_REGNO_OK_FOR_QUAD (regno));
18634 gcc_assert (VALID_NEON_DREG_MODE (mode)
18635 || VALID_NEON_QREG_MODE (mode)
18636 || VALID_NEON_STRUCT_MODE (mode));
18637 gcc_assert (MEM_P (mem));
18638
18639 addr = XEXP (mem, 0);
18640
18641 /* Strip off const from addresses like (const (plus (...))). */
18642 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18643 addr = XEXP (addr, 0);
18644
18645 switch (GET_CODE (addr))
18646 {
18647 case POST_INC:
18648 /* We have to use vldm / vstm for too-large modes. */
18649 if (nregs > 4)
18650 {
18651 templ = "v%smia%%?\t%%0!, %%h1";
18652 ops[0] = XEXP (addr, 0);
18653 }
18654 else
18655 {
18656 templ = "v%s1.64\t%%h1, %%A0";
18657 ops[0] = mem;
18658 }
18659 ops[1] = reg;
18660 break;
18661
18662 case PRE_DEC:
18663 /* We have to use vldm / vstm in this case, since there is no
18664 pre-decrement form of the vld1 / vst1 instructions. */
18665 templ = "v%smdb%%?\t%%0!, %%h1";
18666 ops[0] = XEXP (addr, 0);
18667 ops[1] = reg;
18668 break;
18669
18670 case POST_MODIFY:
18671 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18672 gcc_unreachable ();
18673
18674 case REG:
18675 /* We have to use vldm / vstm for too-large modes. */
18676 if (nregs > 1)
18677 {
18678 if (nregs > 4)
18679 templ = "v%smia%%?\t%%m0, %%h1";
18680 else
18681 templ = "v%s1.64\t%%h1, %%A0";
18682
18683 ops[0] = mem;
18684 ops[1] = reg;
18685 break;
18686 }
18687 /* Fall through. */
18688 case LABEL_REF:
18689 case PLUS:
18690 {
18691 int i;
18692 int overlap = -1;
18693 for (i = 0; i < nregs; i++)
18694 {
18695 /* We're only using DImode here because it's a convenient size. */
18696 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18697 ops[1] = adjust_address (mem, DImode, 8 * i);
18698 if (reg_overlap_mentioned_p (ops[0], mem))
18699 {
18700 gcc_assert (overlap == -1);
18701 overlap = i;
18702 }
18703 else
18704 {
18705 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18706 output_asm_insn (buff, ops);
18707 }
18708 }
18709 if (overlap != -1)
18710 {
18711 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18712 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18713 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18714 output_asm_insn (buff, ops);
18715 }
18716
18717 return "";
18718 }
18719
18720 default:
18721 gcc_unreachable ();
18722 }
18723
18724 sprintf (buff, templ, load ? "ld" : "st");
18725 output_asm_insn (buff, ops);
18726
18727 return "";
18728 }
18729
18730 /* Compute and return the length of neon_mov<mode>, where <mode> is
18731 one of VSTRUCT modes: EI, OI, CI or XI. */
18732 int
18733 arm_attr_length_move_neon (rtx_insn *insn)
18734 {
18735 rtx reg, mem, addr;
18736 int load;
18737 machine_mode mode;
18738
18739 extract_insn_cached (insn);
18740
18741 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18742 {
18743 mode = GET_MODE (recog_data.operand[0]);
18744 switch (mode)
18745 {
18746 case E_EImode:
18747 case E_OImode:
18748 return 8;
18749 case E_CImode:
18750 return 12;
18751 case E_XImode:
18752 return 16;
18753 default:
18754 gcc_unreachable ();
18755 }
18756 }
18757
18758 load = REG_P (recog_data.operand[0]);
18759 reg = recog_data.operand[!load];
18760 mem = recog_data.operand[load];
18761
18762 gcc_assert (MEM_P (mem));
18763
18764 addr = XEXP (mem, 0);
18765
18766 /* Strip off const from addresses like (const (plus (...))). */
18767 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18768 addr = XEXP (addr, 0);
18769
18770 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18771 {
18772 int insns = REG_NREGS (reg) / 2;
18773 return insns * 4;
18774 }
18775 else
18776 return 4;
18777 }
18778
18779 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18780 return zero. */
18781
18782 int
18783 arm_address_offset_is_imm (rtx_insn *insn)
18784 {
18785 rtx mem, addr;
18786
18787 extract_insn_cached (insn);
18788
18789 if (REG_P (recog_data.operand[0]))
18790 return 0;
18791
18792 mem = recog_data.operand[0];
18793
18794 gcc_assert (MEM_P (mem));
18795
18796 addr = XEXP (mem, 0);
18797
18798 if (REG_P (addr)
18799 || (GET_CODE (addr) == PLUS
18800 && REG_P (XEXP (addr, 0))
18801 && CONST_INT_P (XEXP (addr, 1))))
18802 return 1;
18803 else
18804 return 0;
18805 }
18806
18807 /* Output an ADD r, s, #n where n may be too big for one instruction.
18808 If adding zero to one register, output nothing. */
18809 const char *
18810 output_add_immediate (rtx *operands)
18811 {
18812 HOST_WIDE_INT n = INTVAL (operands[2]);
18813
18814 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18815 {
18816 if (n < 0)
18817 output_multi_immediate (operands,
18818 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18819 -n);
18820 else
18821 output_multi_immediate (operands,
18822 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18823 n);
18824 }
18825
18826 return "";
18827 }
18828
18829 /* Output a multiple immediate operation.
18830 OPERANDS is the vector of operands referred to in the output patterns.
18831 INSTR1 is the output pattern to use for the first constant.
18832 INSTR2 is the output pattern to use for subsequent constants.
18833 IMMED_OP is the index of the constant slot in OPERANDS.
18834 N is the constant value. */
18835 static const char *
18836 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18837 int immed_op, HOST_WIDE_INT n)
18838 {
18839 #if HOST_BITS_PER_WIDE_INT > 32
18840 n &= 0xffffffff;
18841 #endif
18842
18843 if (n == 0)
18844 {
18845 /* Quick and easy output. */
18846 operands[immed_op] = const0_rtx;
18847 output_asm_insn (instr1, operands);
18848 }
18849 else
18850 {
18851 int i;
18852 const char * instr = instr1;
18853
18854 /* Note that n is never zero here (which would give no output). */
18855 for (i = 0; i < 32; i += 2)
18856 {
18857 if (n & (3 << i))
18858 {
18859 operands[immed_op] = GEN_INT (n & (255 << i));
18860 output_asm_insn (instr, operands);
18861 instr = instr2;
18862 i += 6;
18863 }
18864 }
18865 }
18866
18867 return "";
18868 }
18869
18870 /* Return the name of a shifter operation. */
18871 static const char *
18872 arm_shift_nmem(enum rtx_code code)
18873 {
18874 switch (code)
18875 {
18876 case ASHIFT:
18877 return ARM_LSL_NAME;
18878
18879 case ASHIFTRT:
18880 return "asr";
18881
18882 case LSHIFTRT:
18883 return "lsr";
18884
18885 case ROTATERT:
18886 return "ror";
18887
18888 default:
18889 abort();
18890 }
18891 }
18892
18893 /* Return the appropriate ARM instruction for the operation code.
18894 The returned result should not be overwritten. OP is the rtx of the
18895 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18896 was shifted. */
18897 const char *
18898 arithmetic_instr (rtx op, int shift_first_arg)
18899 {
18900 switch (GET_CODE (op))
18901 {
18902 case PLUS:
18903 return "add";
18904
18905 case MINUS:
18906 return shift_first_arg ? "rsb" : "sub";
18907
18908 case IOR:
18909 return "orr";
18910
18911 case XOR:
18912 return "eor";
18913
18914 case AND:
18915 return "and";
18916
18917 case ASHIFT:
18918 case ASHIFTRT:
18919 case LSHIFTRT:
18920 case ROTATERT:
18921 return arm_shift_nmem(GET_CODE(op));
18922
18923 default:
18924 gcc_unreachable ();
18925 }
18926 }
18927
18928 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18929 for the operation code. The returned result should not be overwritten.
18930 OP is the rtx code of the shift.
18931 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18932 shift. */
18933 static const char *
18934 shift_op (rtx op, HOST_WIDE_INT *amountp)
18935 {
18936 const char * mnem;
18937 enum rtx_code code = GET_CODE (op);
18938
18939 switch (code)
18940 {
18941 case ROTATE:
18942 if (!CONST_INT_P (XEXP (op, 1)))
18943 {
18944 output_operand_lossage ("invalid shift operand");
18945 return NULL;
18946 }
18947
18948 code = ROTATERT;
18949 *amountp = 32 - INTVAL (XEXP (op, 1));
18950 mnem = "ror";
18951 break;
18952
18953 case ASHIFT:
18954 case ASHIFTRT:
18955 case LSHIFTRT:
18956 case ROTATERT:
18957 mnem = arm_shift_nmem(code);
18958 if (CONST_INT_P (XEXP (op, 1)))
18959 {
18960 *amountp = INTVAL (XEXP (op, 1));
18961 }
18962 else if (REG_P (XEXP (op, 1)))
18963 {
18964 *amountp = -1;
18965 return mnem;
18966 }
18967 else
18968 {
18969 output_operand_lossage ("invalid shift operand");
18970 return NULL;
18971 }
18972 break;
18973
18974 case MULT:
18975 /* We never have to worry about the amount being other than a
18976 power of 2, since this case can never be reloaded from a reg. */
18977 if (!CONST_INT_P (XEXP (op, 1)))
18978 {
18979 output_operand_lossage ("invalid shift operand");
18980 return NULL;
18981 }
18982
18983 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18984
18985 /* Amount must be a power of two. */
18986 if (*amountp & (*amountp - 1))
18987 {
18988 output_operand_lossage ("invalid shift operand");
18989 return NULL;
18990 }
18991
18992 *amountp = exact_log2 (*amountp);
18993 gcc_assert (IN_RANGE (*amountp, 0, 31));
18994 return ARM_LSL_NAME;
18995
18996 default:
18997 output_operand_lossage ("invalid shift operand");
18998 return NULL;
18999 }
19000
19001 /* This is not 100% correct, but follows from the desire to merge
19002 multiplication by a power of 2 with the recognizer for a
19003 shift. >=32 is not a valid shift for "lsl", so we must try and
19004 output a shift that produces the correct arithmetical result.
19005 Using lsr #32 is identical except for the fact that the carry bit
19006 is not set correctly if we set the flags; but we never use the
19007 carry bit from such an operation, so we can ignore that. */
19008 if (code == ROTATERT)
19009 /* Rotate is just modulo 32. */
19010 *amountp &= 31;
19011 else if (*amountp != (*amountp & 31))
19012 {
19013 if (code == ASHIFT)
19014 mnem = "lsr";
19015 *amountp = 32;
19016 }
19017
19018 /* Shifts of 0 are no-ops. */
19019 if (*amountp == 0)
19020 return NULL;
19021
19022 return mnem;
19023 }
19024
19025 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19026 because /bin/as is horribly restrictive. The judgement about
19027 whether or not each character is 'printable' (and can be output as
19028 is) or not (and must be printed with an octal escape) must be made
19029 with reference to the *host* character set -- the situation is
19030 similar to that discussed in the comments above pp_c_char in
19031 c-pretty-print.c. */
19032
19033 #define MAX_ASCII_LEN 51
19034
19035 void
19036 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19037 {
19038 int i;
19039 int len_so_far = 0;
19040
19041 fputs ("\t.ascii\t\"", stream);
19042
19043 for (i = 0; i < len; i++)
19044 {
19045 int c = p[i];
19046
19047 if (len_so_far >= MAX_ASCII_LEN)
19048 {
19049 fputs ("\"\n\t.ascii\t\"", stream);
19050 len_so_far = 0;
19051 }
19052
19053 if (ISPRINT (c))
19054 {
19055 if (c == '\\' || c == '\"')
19056 {
19057 putc ('\\', stream);
19058 len_so_far++;
19059 }
19060 putc (c, stream);
19061 len_so_far++;
19062 }
19063 else
19064 {
19065 fprintf (stream, "\\%03o", c);
19066 len_so_far += 4;
19067 }
19068 }
19069
19070 fputs ("\"\n", stream);
19071 }
19072 \f
19073 /* Whether a register is callee saved or not. This is necessary because high
19074 registers are marked as caller saved when optimizing for size on Thumb-1
19075 targets despite being callee saved in order to avoid using them. */
19076 #define callee_saved_reg_p(reg) \
19077 (!call_used_regs[reg] \
19078 || (TARGET_THUMB1 && optimize_size \
19079 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19080
19081 /* Compute the register save mask for registers 0 through 12
19082 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19083
19084 static unsigned long
19085 arm_compute_save_reg0_reg12_mask (void)
19086 {
19087 unsigned long func_type = arm_current_func_type ();
19088 unsigned long save_reg_mask = 0;
19089 unsigned int reg;
19090
19091 if (IS_INTERRUPT (func_type))
19092 {
19093 unsigned int max_reg;
19094 /* Interrupt functions must not corrupt any registers,
19095 even call clobbered ones. If this is a leaf function
19096 we can just examine the registers used by the RTL, but
19097 otherwise we have to assume that whatever function is
19098 called might clobber anything, and so we have to save
19099 all the call-clobbered registers as well. */
19100 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19101 /* FIQ handlers have registers r8 - r12 banked, so
19102 we only need to check r0 - r7, Normal ISRs only
19103 bank r14 and r15, so we must check up to r12.
19104 r13 is the stack pointer which is always preserved,
19105 so we do not need to consider it here. */
19106 max_reg = 7;
19107 else
19108 max_reg = 12;
19109
19110 for (reg = 0; reg <= max_reg; reg++)
19111 if (df_regs_ever_live_p (reg)
19112 || (! crtl->is_leaf && call_used_regs[reg]))
19113 save_reg_mask |= (1 << reg);
19114
19115 /* Also save the pic base register if necessary. */
19116 if (flag_pic
19117 && !TARGET_SINGLE_PIC_BASE
19118 && arm_pic_register != INVALID_REGNUM
19119 && crtl->uses_pic_offset_table)
19120 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19121 }
19122 else if (IS_VOLATILE(func_type))
19123 {
19124 /* For noreturn functions we historically omitted register saves
19125 altogether. However this really messes up debugging. As a
19126 compromise save just the frame pointers. Combined with the link
19127 register saved elsewhere this should be sufficient to get
19128 a backtrace. */
19129 if (frame_pointer_needed)
19130 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19131 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19132 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19133 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19134 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19135 }
19136 else
19137 {
19138 /* In the normal case we only need to save those registers
19139 which are call saved and which are used by this function. */
19140 for (reg = 0; reg <= 11; reg++)
19141 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19142 save_reg_mask |= (1 << reg);
19143
19144 /* Handle the frame pointer as a special case. */
19145 if (frame_pointer_needed)
19146 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19147
19148 /* If we aren't loading the PIC register,
19149 don't stack it even though it may be live. */
19150 if (flag_pic
19151 && !TARGET_SINGLE_PIC_BASE
19152 && arm_pic_register != INVALID_REGNUM
19153 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19154 || crtl->uses_pic_offset_table))
19155 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19156
19157 /* The prologue will copy SP into R0, so save it. */
19158 if (IS_STACKALIGN (func_type))
19159 save_reg_mask |= 1;
19160 }
19161
19162 /* Save registers so the exception handler can modify them. */
19163 if (crtl->calls_eh_return)
19164 {
19165 unsigned int i;
19166
19167 for (i = 0; ; i++)
19168 {
19169 reg = EH_RETURN_DATA_REGNO (i);
19170 if (reg == INVALID_REGNUM)
19171 break;
19172 save_reg_mask |= 1 << reg;
19173 }
19174 }
19175
19176 return save_reg_mask;
19177 }
19178
19179 /* Return true if r3 is live at the start of the function. */
19180
19181 static bool
19182 arm_r3_live_at_start_p (void)
19183 {
19184 /* Just look at cfg info, which is still close enough to correct at this
19185 point. This gives false positives for broken functions that might use
19186 uninitialized data that happens to be allocated in r3, but who cares? */
19187 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19188 }
19189
19190 /* Compute the number of bytes used to store the static chain register on the
19191 stack, above the stack frame. We need to know this accurately to get the
19192 alignment of the rest of the stack frame correct. */
19193
19194 static int
19195 arm_compute_static_chain_stack_bytes (void)
19196 {
19197 /* See the defining assertion in arm_expand_prologue. */
19198 if (IS_NESTED (arm_current_func_type ())
19199 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19200 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19201 || flag_stack_clash_protection)
19202 && !df_regs_ever_live_p (LR_REGNUM)))
19203 && arm_r3_live_at_start_p ()
19204 && crtl->args.pretend_args_size == 0)
19205 return 4;
19206
19207 return 0;
19208 }
19209
19210 /* Compute a bit mask of which core registers need to be
19211 saved on the stack for the current function.
19212 This is used by arm_compute_frame_layout, which may add extra registers. */
19213
19214 static unsigned long
19215 arm_compute_save_core_reg_mask (void)
19216 {
19217 unsigned int save_reg_mask = 0;
19218 unsigned long func_type = arm_current_func_type ();
19219 unsigned int reg;
19220
19221 if (IS_NAKED (func_type))
19222 /* This should never really happen. */
19223 return 0;
19224
19225 /* If we are creating a stack frame, then we must save the frame pointer,
19226 IP (which will hold the old stack pointer), LR and the PC. */
19227 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19228 save_reg_mask |=
19229 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19230 | (1 << IP_REGNUM)
19231 | (1 << LR_REGNUM)
19232 | (1 << PC_REGNUM);
19233
19234 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19235
19236 /* Decide if we need to save the link register.
19237 Interrupt routines have their own banked link register,
19238 so they never need to save it.
19239 Otherwise if we do not use the link register we do not need to save
19240 it. If we are pushing other registers onto the stack however, we
19241 can save an instruction in the epilogue by pushing the link register
19242 now and then popping it back into the PC. This incurs extra memory
19243 accesses though, so we only do it when optimizing for size, and only
19244 if we know that we will not need a fancy return sequence. */
19245 if (df_regs_ever_live_p (LR_REGNUM)
19246 || (save_reg_mask
19247 && optimize_size
19248 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19249 && !crtl->tail_call_emit
19250 && !crtl->calls_eh_return))
19251 save_reg_mask |= 1 << LR_REGNUM;
19252
19253 if (cfun->machine->lr_save_eliminated)
19254 save_reg_mask &= ~ (1 << LR_REGNUM);
19255
19256 if (TARGET_REALLY_IWMMXT
19257 && ((bit_count (save_reg_mask)
19258 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19259 arm_compute_static_chain_stack_bytes())
19260 ) % 2) != 0)
19261 {
19262 /* The total number of registers that are going to be pushed
19263 onto the stack is odd. We need to ensure that the stack
19264 is 64-bit aligned before we start to save iWMMXt registers,
19265 and also before we start to create locals. (A local variable
19266 might be a double or long long which we will load/store using
19267 an iWMMXt instruction). Therefore we need to push another
19268 ARM register, so that the stack will be 64-bit aligned. We
19269 try to avoid using the arg registers (r0 -r3) as they might be
19270 used to pass values in a tail call. */
19271 for (reg = 4; reg <= 12; reg++)
19272 if ((save_reg_mask & (1 << reg)) == 0)
19273 break;
19274
19275 if (reg <= 12)
19276 save_reg_mask |= (1 << reg);
19277 else
19278 {
19279 cfun->machine->sibcall_blocked = 1;
19280 save_reg_mask |= (1 << 3);
19281 }
19282 }
19283
19284 /* We may need to push an additional register for use initializing the
19285 PIC base register. */
19286 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19287 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19288 {
19289 reg = thumb_find_work_register (1 << 4);
19290 if (!call_used_regs[reg])
19291 save_reg_mask |= (1 << reg);
19292 }
19293
19294 return save_reg_mask;
19295 }
19296
19297 /* Compute a bit mask of which core registers need to be
19298 saved on the stack for the current function. */
19299 static unsigned long
19300 thumb1_compute_save_core_reg_mask (void)
19301 {
19302 unsigned long mask;
19303 unsigned reg;
19304
19305 mask = 0;
19306 for (reg = 0; reg < 12; reg ++)
19307 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19308 mask |= 1 << reg;
19309
19310 /* Handle the frame pointer as a special case. */
19311 if (frame_pointer_needed)
19312 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19313
19314 if (flag_pic
19315 && !TARGET_SINGLE_PIC_BASE
19316 && arm_pic_register != INVALID_REGNUM
19317 && crtl->uses_pic_offset_table)
19318 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19319
19320 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19321 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19322 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19323
19324 /* LR will also be pushed if any lo regs are pushed. */
19325 if (mask & 0xff || thumb_force_lr_save ())
19326 mask |= (1 << LR_REGNUM);
19327
19328 /* Make sure we have a low work register if we need one.
19329 We will need one if we are going to push a high register,
19330 but we are not currently intending to push a low register. */
19331 if ((mask & 0xff) == 0
19332 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19333 {
19334 /* Use thumb_find_work_register to choose which register
19335 we will use. If the register is live then we will
19336 have to push it. Use LAST_LO_REGNUM as our fallback
19337 choice for the register to select. */
19338 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19339 /* Make sure the register returned by thumb_find_work_register is
19340 not part of the return value. */
19341 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19342 reg = LAST_LO_REGNUM;
19343
19344 if (callee_saved_reg_p (reg))
19345 mask |= 1 << reg;
19346 }
19347
19348 /* The 504 below is 8 bytes less than 512 because there are two possible
19349 alignment words. We can't tell here if they will be present or not so we
19350 have to play it safe and assume that they are. */
19351 if ((CALLER_INTERWORKING_SLOT_SIZE +
19352 ROUND_UP_WORD (get_frame_size ()) +
19353 crtl->outgoing_args_size) >= 504)
19354 {
19355 /* This is the same as the code in thumb1_expand_prologue() which
19356 determines which register to use for stack decrement. */
19357 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19358 if (mask & (1 << reg))
19359 break;
19360
19361 if (reg > LAST_LO_REGNUM)
19362 {
19363 /* Make sure we have a register available for stack decrement. */
19364 mask |= 1 << LAST_LO_REGNUM;
19365 }
19366 }
19367
19368 return mask;
19369 }
19370
19371
19372 /* Return the number of bytes required to save VFP registers. */
19373 static int
19374 arm_get_vfp_saved_size (void)
19375 {
19376 unsigned int regno;
19377 int count;
19378 int saved;
19379
19380 saved = 0;
19381 /* Space for saved VFP registers. */
19382 if (TARGET_HARD_FLOAT)
19383 {
19384 count = 0;
19385 for (regno = FIRST_VFP_REGNUM;
19386 regno < LAST_VFP_REGNUM;
19387 regno += 2)
19388 {
19389 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19390 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19391 {
19392 if (count > 0)
19393 {
19394 /* Workaround ARM10 VFPr1 bug. */
19395 if (count == 2 && !arm_arch6)
19396 count++;
19397 saved += count * 8;
19398 }
19399 count = 0;
19400 }
19401 else
19402 count++;
19403 }
19404 if (count > 0)
19405 {
19406 if (count == 2 && !arm_arch6)
19407 count++;
19408 saved += count * 8;
19409 }
19410 }
19411 return saved;
19412 }
19413
19414
19415 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19416 everything bar the final return instruction. If simple_return is true,
19417 then do not output epilogue, because it has already been emitted in RTL. */
19418 const char *
19419 output_return_instruction (rtx operand, bool really_return, bool reverse,
19420 bool simple_return)
19421 {
19422 char conditional[10];
19423 char instr[100];
19424 unsigned reg;
19425 unsigned long live_regs_mask;
19426 unsigned long func_type;
19427 arm_stack_offsets *offsets;
19428
19429 func_type = arm_current_func_type ();
19430
19431 if (IS_NAKED (func_type))
19432 return "";
19433
19434 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19435 {
19436 /* If this function was declared non-returning, and we have
19437 found a tail call, then we have to trust that the called
19438 function won't return. */
19439 if (really_return)
19440 {
19441 rtx ops[2];
19442
19443 /* Otherwise, trap an attempted return by aborting. */
19444 ops[0] = operand;
19445 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19446 : "abort");
19447 assemble_external_libcall (ops[1]);
19448 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19449 }
19450
19451 return "";
19452 }
19453
19454 gcc_assert (!cfun->calls_alloca || really_return);
19455
19456 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19457
19458 cfun->machine->return_used_this_function = 1;
19459
19460 offsets = arm_get_frame_offsets ();
19461 live_regs_mask = offsets->saved_regs_mask;
19462
19463 if (!simple_return && live_regs_mask)
19464 {
19465 const char * return_reg;
19466
19467 /* If we do not have any special requirements for function exit
19468 (e.g. interworking) then we can load the return address
19469 directly into the PC. Otherwise we must load it into LR. */
19470 if (really_return
19471 && !IS_CMSE_ENTRY (func_type)
19472 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19473 return_reg = reg_names[PC_REGNUM];
19474 else
19475 return_reg = reg_names[LR_REGNUM];
19476
19477 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19478 {
19479 /* There are three possible reasons for the IP register
19480 being saved. 1) a stack frame was created, in which case
19481 IP contains the old stack pointer, or 2) an ISR routine
19482 corrupted it, or 3) it was saved to align the stack on
19483 iWMMXt. In case 1, restore IP into SP, otherwise just
19484 restore IP. */
19485 if (frame_pointer_needed)
19486 {
19487 live_regs_mask &= ~ (1 << IP_REGNUM);
19488 live_regs_mask |= (1 << SP_REGNUM);
19489 }
19490 else
19491 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19492 }
19493
19494 /* On some ARM architectures it is faster to use LDR rather than
19495 LDM to load a single register. On other architectures, the
19496 cost is the same. In 26 bit mode, or for exception handlers,
19497 we have to use LDM to load the PC so that the CPSR is also
19498 restored. */
19499 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19500 if (live_regs_mask == (1U << reg))
19501 break;
19502
19503 if (reg <= LAST_ARM_REGNUM
19504 && (reg != LR_REGNUM
19505 || ! really_return
19506 || ! IS_INTERRUPT (func_type)))
19507 {
19508 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19509 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19510 }
19511 else
19512 {
19513 char *p;
19514 int first = 1;
19515
19516 /* Generate the load multiple instruction to restore the
19517 registers. Note we can get here, even if
19518 frame_pointer_needed is true, but only if sp already
19519 points to the base of the saved core registers. */
19520 if (live_regs_mask & (1 << SP_REGNUM))
19521 {
19522 unsigned HOST_WIDE_INT stack_adjust;
19523
19524 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19525 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19526
19527 if (stack_adjust && arm_arch5 && TARGET_ARM)
19528 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19529 else
19530 {
19531 /* If we can't use ldmib (SA110 bug),
19532 then try to pop r3 instead. */
19533 if (stack_adjust)
19534 live_regs_mask |= 1 << 3;
19535
19536 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19537 }
19538 }
19539 /* For interrupt returns we have to use an LDM rather than
19540 a POP so that we can use the exception return variant. */
19541 else if (IS_INTERRUPT (func_type))
19542 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19543 else
19544 sprintf (instr, "pop%s\t{", conditional);
19545
19546 p = instr + strlen (instr);
19547
19548 for (reg = 0; reg <= SP_REGNUM; reg++)
19549 if (live_regs_mask & (1 << reg))
19550 {
19551 int l = strlen (reg_names[reg]);
19552
19553 if (first)
19554 first = 0;
19555 else
19556 {
19557 memcpy (p, ", ", 2);
19558 p += 2;
19559 }
19560
19561 memcpy (p, "%|", 2);
19562 memcpy (p + 2, reg_names[reg], l);
19563 p += l + 2;
19564 }
19565
19566 if (live_regs_mask & (1 << LR_REGNUM))
19567 {
19568 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19569 /* If returning from an interrupt, restore the CPSR. */
19570 if (IS_INTERRUPT (func_type))
19571 strcat (p, "^");
19572 }
19573 else
19574 strcpy (p, "}");
19575 }
19576
19577 output_asm_insn (instr, & operand);
19578
19579 /* See if we need to generate an extra instruction to
19580 perform the actual function return. */
19581 if (really_return
19582 && func_type != ARM_FT_INTERWORKED
19583 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19584 {
19585 /* The return has already been handled
19586 by loading the LR into the PC. */
19587 return "";
19588 }
19589 }
19590
19591 if (really_return)
19592 {
19593 switch ((int) ARM_FUNC_TYPE (func_type))
19594 {
19595 case ARM_FT_ISR:
19596 case ARM_FT_FIQ:
19597 /* ??? This is wrong for unified assembly syntax. */
19598 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19599 break;
19600
19601 case ARM_FT_INTERWORKED:
19602 gcc_assert (arm_arch5 || arm_arch4t);
19603 sprintf (instr, "bx%s\t%%|lr", conditional);
19604 break;
19605
19606 case ARM_FT_EXCEPTION:
19607 /* ??? This is wrong for unified assembly syntax. */
19608 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19609 break;
19610
19611 default:
19612 if (IS_CMSE_ENTRY (func_type))
19613 {
19614 /* Check if we have to clear the 'GE bits' which is only used if
19615 parallel add and subtraction instructions are available. */
19616 if (TARGET_INT_SIMD)
19617 snprintf (instr, sizeof (instr),
19618 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19619 else
19620 snprintf (instr, sizeof (instr),
19621 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19622
19623 output_asm_insn (instr, & operand);
19624 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19625 {
19626 /* Clear the cumulative exception-status bits (0-4,7) and the
19627 condition code bits (28-31) of the FPSCR. We need to
19628 remember to clear the first scratch register used (IP) and
19629 save and restore the second (r4). */
19630 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19631 output_asm_insn (instr, & operand);
19632 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19633 output_asm_insn (instr, & operand);
19634 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19635 output_asm_insn (instr, & operand);
19636 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19637 output_asm_insn (instr, & operand);
19638 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19639 output_asm_insn (instr, & operand);
19640 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19641 output_asm_insn (instr, & operand);
19642 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19643 output_asm_insn (instr, & operand);
19644 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19645 output_asm_insn (instr, & operand);
19646 }
19647 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19648 }
19649 /* Use bx if it's available. */
19650 else if (arm_arch5 || arm_arch4t)
19651 sprintf (instr, "bx%s\t%%|lr", conditional);
19652 else
19653 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19654 break;
19655 }
19656
19657 output_asm_insn (instr, & operand);
19658 }
19659
19660 return "";
19661 }
19662
19663 /* Output in FILE asm statements needed to declare the NAME of the function
19664 defined by its DECL node. */
19665
19666 void
19667 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19668 {
19669 size_t cmse_name_len;
19670 char *cmse_name = 0;
19671 char cmse_prefix[] = "__acle_se_";
19672
19673 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19674 extra function label for each function with the 'cmse_nonsecure_entry'
19675 attribute. This extra function label should be prepended with
19676 '__acle_se_', telling the linker that it needs to create secure gateway
19677 veneers for this function. */
19678 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19679 DECL_ATTRIBUTES (decl)))
19680 {
19681 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19682 cmse_name = XALLOCAVEC (char, cmse_name_len);
19683 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19684 targetm.asm_out.globalize_label (file, cmse_name);
19685
19686 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19687 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19688 }
19689
19690 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19691 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19692 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19693 ASM_OUTPUT_LABEL (file, name);
19694
19695 if (cmse_name)
19696 ASM_OUTPUT_LABEL (file, cmse_name);
19697
19698 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19699 }
19700
19701 /* Write the function name into the code section, directly preceding
19702 the function prologue.
19703
19704 Code will be output similar to this:
19705 t0
19706 .ascii "arm_poke_function_name", 0
19707 .align
19708 t1
19709 .word 0xff000000 + (t1 - t0)
19710 arm_poke_function_name
19711 mov ip, sp
19712 stmfd sp!, {fp, ip, lr, pc}
19713 sub fp, ip, #4
19714
19715 When performing a stack backtrace, code can inspect the value
19716 of 'pc' stored at 'fp' + 0. If the trace function then looks
19717 at location pc - 12 and the top 8 bits are set, then we know
19718 that there is a function name embedded immediately preceding this
19719 location and has length ((pc[-3]) & 0xff000000).
19720
19721 We assume that pc is declared as a pointer to an unsigned long.
19722
19723 It is of no benefit to output the function name if we are assembling
19724 a leaf function. These function types will not contain a stack
19725 backtrace structure, therefore it is not possible to determine the
19726 function name. */
19727 void
19728 arm_poke_function_name (FILE *stream, const char *name)
19729 {
19730 unsigned long alignlength;
19731 unsigned long length;
19732 rtx x;
19733
19734 length = strlen (name) + 1;
19735 alignlength = ROUND_UP_WORD (length);
19736
19737 ASM_OUTPUT_ASCII (stream, name, length);
19738 ASM_OUTPUT_ALIGN (stream, 2);
19739 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19740 assemble_aligned_integer (UNITS_PER_WORD, x);
19741 }
19742
19743 /* Place some comments into the assembler stream
19744 describing the current function. */
19745 static void
19746 arm_output_function_prologue (FILE *f)
19747 {
19748 unsigned long func_type;
19749
19750 /* Sanity check. */
19751 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19752
19753 func_type = arm_current_func_type ();
19754
19755 switch ((int) ARM_FUNC_TYPE (func_type))
19756 {
19757 default:
19758 case ARM_FT_NORMAL:
19759 break;
19760 case ARM_FT_INTERWORKED:
19761 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19762 break;
19763 case ARM_FT_ISR:
19764 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19765 break;
19766 case ARM_FT_FIQ:
19767 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19768 break;
19769 case ARM_FT_EXCEPTION:
19770 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19771 break;
19772 }
19773
19774 if (IS_NAKED (func_type))
19775 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19776
19777 if (IS_VOLATILE (func_type))
19778 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19779
19780 if (IS_NESTED (func_type))
19781 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19782 if (IS_STACKALIGN (func_type))
19783 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19784 if (IS_CMSE_ENTRY (func_type))
19785 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19786
19787 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19788 crtl->args.size,
19789 crtl->args.pretend_args_size,
19790 (HOST_WIDE_INT) get_frame_size ());
19791
19792 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19793 frame_pointer_needed,
19794 cfun->machine->uses_anonymous_args);
19795
19796 if (cfun->machine->lr_save_eliminated)
19797 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19798
19799 if (crtl->calls_eh_return)
19800 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19801
19802 }
19803
19804 static void
19805 arm_output_function_epilogue (FILE *)
19806 {
19807 arm_stack_offsets *offsets;
19808
19809 if (TARGET_THUMB1)
19810 {
19811 int regno;
19812
19813 /* Emit any call-via-reg trampolines that are needed for v4t support
19814 of call_reg and call_value_reg type insns. */
19815 for (regno = 0; regno < LR_REGNUM; regno++)
19816 {
19817 rtx label = cfun->machine->call_via[regno];
19818
19819 if (label != NULL)
19820 {
19821 switch_to_section (function_section (current_function_decl));
19822 targetm.asm_out.internal_label (asm_out_file, "L",
19823 CODE_LABEL_NUMBER (label));
19824 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19825 }
19826 }
19827
19828 /* ??? Probably not safe to set this here, since it assumes that a
19829 function will be emitted as assembly immediately after we generate
19830 RTL for it. This does not happen for inline functions. */
19831 cfun->machine->return_used_this_function = 0;
19832 }
19833 else /* TARGET_32BIT */
19834 {
19835 /* We need to take into account any stack-frame rounding. */
19836 offsets = arm_get_frame_offsets ();
19837
19838 gcc_assert (!use_return_insn (FALSE, NULL)
19839 || (cfun->machine->return_used_this_function != 0)
19840 || offsets->saved_regs == offsets->outgoing_args
19841 || frame_pointer_needed);
19842 }
19843 }
19844
19845 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19846 STR and STRD. If an even number of registers are being pushed, one
19847 or more STRD patterns are created for each register pair. If an
19848 odd number of registers are pushed, emit an initial STR followed by
19849 as many STRD instructions as are needed. This works best when the
19850 stack is initially 64-bit aligned (the normal case), since it
19851 ensures that each STRD is also 64-bit aligned. */
19852 static void
19853 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19854 {
19855 int num_regs = 0;
19856 int i;
19857 int regno;
19858 rtx par = NULL_RTX;
19859 rtx dwarf = NULL_RTX;
19860 rtx tmp;
19861 bool first = true;
19862
19863 num_regs = bit_count (saved_regs_mask);
19864
19865 /* Must be at least one register to save, and can't save SP or PC. */
19866 gcc_assert (num_regs > 0 && num_regs <= 14);
19867 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19868 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19869
19870 /* Create sequence for DWARF info. All the frame-related data for
19871 debugging is held in this wrapper. */
19872 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19873
19874 /* Describe the stack adjustment. */
19875 tmp = gen_rtx_SET (stack_pointer_rtx,
19876 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19877 RTX_FRAME_RELATED_P (tmp) = 1;
19878 XVECEXP (dwarf, 0, 0) = tmp;
19879
19880 /* Find the first register. */
19881 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19882 ;
19883
19884 i = 0;
19885
19886 /* If there's an odd number of registers to push. Start off by
19887 pushing a single register. This ensures that subsequent strd
19888 operations are dword aligned (assuming that SP was originally
19889 64-bit aligned). */
19890 if ((num_regs & 1) != 0)
19891 {
19892 rtx reg, mem, insn;
19893
19894 reg = gen_rtx_REG (SImode, regno);
19895 if (num_regs == 1)
19896 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19897 stack_pointer_rtx));
19898 else
19899 mem = gen_frame_mem (Pmode,
19900 gen_rtx_PRE_MODIFY
19901 (Pmode, stack_pointer_rtx,
19902 plus_constant (Pmode, stack_pointer_rtx,
19903 -4 * num_regs)));
19904
19905 tmp = gen_rtx_SET (mem, reg);
19906 RTX_FRAME_RELATED_P (tmp) = 1;
19907 insn = emit_insn (tmp);
19908 RTX_FRAME_RELATED_P (insn) = 1;
19909 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19910 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19911 RTX_FRAME_RELATED_P (tmp) = 1;
19912 i++;
19913 regno++;
19914 XVECEXP (dwarf, 0, i) = tmp;
19915 first = false;
19916 }
19917
19918 while (i < num_regs)
19919 if (saved_regs_mask & (1 << regno))
19920 {
19921 rtx reg1, reg2, mem1, mem2;
19922 rtx tmp0, tmp1, tmp2;
19923 int regno2;
19924
19925 /* Find the register to pair with this one. */
19926 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19927 regno2++)
19928 ;
19929
19930 reg1 = gen_rtx_REG (SImode, regno);
19931 reg2 = gen_rtx_REG (SImode, regno2);
19932
19933 if (first)
19934 {
19935 rtx insn;
19936
19937 first = false;
19938 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19939 stack_pointer_rtx,
19940 -4 * num_regs));
19941 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19942 stack_pointer_rtx,
19943 -4 * (num_regs - 1)));
19944 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19945 plus_constant (Pmode, stack_pointer_rtx,
19946 -4 * (num_regs)));
19947 tmp1 = gen_rtx_SET (mem1, reg1);
19948 tmp2 = gen_rtx_SET (mem2, reg2);
19949 RTX_FRAME_RELATED_P (tmp0) = 1;
19950 RTX_FRAME_RELATED_P (tmp1) = 1;
19951 RTX_FRAME_RELATED_P (tmp2) = 1;
19952 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19953 XVECEXP (par, 0, 0) = tmp0;
19954 XVECEXP (par, 0, 1) = tmp1;
19955 XVECEXP (par, 0, 2) = tmp2;
19956 insn = emit_insn (par);
19957 RTX_FRAME_RELATED_P (insn) = 1;
19958 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19959 }
19960 else
19961 {
19962 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19963 stack_pointer_rtx,
19964 4 * i));
19965 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19966 stack_pointer_rtx,
19967 4 * (i + 1)));
19968 tmp1 = gen_rtx_SET (mem1, reg1);
19969 tmp2 = gen_rtx_SET (mem2, reg2);
19970 RTX_FRAME_RELATED_P (tmp1) = 1;
19971 RTX_FRAME_RELATED_P (tmp2) = 1;
19972 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19973 XVECEXP (par, 0, 0) = tmp1;
19974 XVECEXP (par, 0, 1) = tmp2;
19975 emit_insn (par);
19976 }
19977
19978 /* Create unwind information. This is an approximation. */
19979 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19980 plus_constant (Pmode,
19981 stack_pointer_rtx,
19982 4 * i)),
19983 reg1);
19984 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19985 plus_constant (Pmode,
19986 stack_pointer_rtx,
19987 4 * (i + 1))),
19988 reg2);
19989
19990 RTX_FRAME_RELATED_P (tmp1) = 1;
19991 RTX_FRAME_RELATED_P (tmp2) = 1;
19992 XVECEXP (dwarf, 0, i + 1) = tmp1;
19993 XVECEXP (dwarf, 0, i + 2) = tmp2;
19994 i += 2;
19995 regno = regno2 + 1;
19996 }
19997 else
19998 regno++;
19999
20000 return;
20001 }
20002
20003 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20004 whenever possible, otherwise it emits single-word stores. The first store
20005 also allocates stack space for all saved registers, using writeback with
20006 post-addressing mode. All other stores use offset addressing. If no STRD
20007 can be emitted, this function emits a sequence of single-word stores,
20008 and not an STM as before, because single-word stores provide more freedom
20009 scheduling and can be turned into an STM by peephole optimizations. */
20010 static void
20011 arm_emit_strd_push (unsigned long saved_regs_mask)
20012 {
20013 int num_regs = 0;
20014 int i, j, dwarf_index = 0;
20015 int offset = 0;
20016 rtx dwarf = NULL_RTX;
20017 rtx insn = NULL_RTX;
20018 rtx tmp, mem;
20019
20020 /* TODO: A more efficient code can be emitted by changing the
20021 layout, e.g., first push all pairs that can use STRD to keep the
20022 stack aligned, and then push all other registers. */
20023 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20024 if (saved_regs_mask & (1 << i))
20025 num_regs++;
20026
20027 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20028 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20029 gcc_assert (num_regs > 0);
20030
20031 /* Create sequence for DWARF info. */
20032 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20033
20034 /* For dwarf info, we generate explicit stack update. */
20035 tmp = gen_rtx_SET (stack_pointer_rtx,
20036 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20037 RTX_FRAME_RELATED_P (tmp) = 1;
20038 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20039
20040 /* Save registers. */
20041 offset = - 4 * num_regs;
20042 j = 0;
20043 while (j <= LAST_ARM_REGNUM)
20044 if (saved_regs_mask & (1 << j))
20045 {
20046 if ((j % 2 == 0)
20047 && (saved_regs_mask & (1 << (j + 1))))
20048 {
20049 /* Current register and previous register form register pair for
20050 which STRD can be generated. */
20051 if (offset < 0)
20052 {
20053 /* Allocate stack space for all saved registers. */
20054 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20055 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20056 mem = gen_frame_mem (DImode, tmp);
20057 offset = 0;
20058 }
20059 else if (offset > 0)
20060 mem = gen_frame_mem (DImode,
20061 plus_constant (Pmode,
20062 stack_pointer_rtx,
20063 offset));
20064 else
20065 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20066
20067 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20068 RTX_FRAME_RELATED_P (tmp) = 1;
20069 tmp = emit_insn (tmp);
20070
20071 /* Record the first store insn. */
20072 if (dwarf_index == 1)
20073 insn = tmp;
20074
20075 /* Generate dwarf info. */
20076 mem = gen_frame_mem (SImode,
20077 plus_constant (Pmode,
20078 stack_pointer_rtx,
20079 offset));
20080 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20081 RTX_FRAME_RELATED_P (tmp) = 1;
20082 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20083
20084 mem = gen_frame_mem (SImode,
20085 plus_constant (Pmode,
20086 stack_pointer_rtx,
20087 offset + 4));
20088 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20089 RTX_FRAME_RELATED_P (tmp) = 1;
20090 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20091
20092 offset += 8;
20093 j += 2;
20094 }
20095 else
20096 {
20097 /* Emit a single word store. */
20098 if (offset < 0)
20099 {
20100 /* Allocate stack space for all saved registers. */
20101 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20102 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20103 mem = gen_frame_mem (SImode, tmp);
20104 offset = 0;
20105 }
20106 else if (offset > 0)
20107 mem = gen_frame_mem (SImode,
20108 plus_constant (Pmode,
20109 stack_pointer_rtx,
20110 offset));
20111 else
20112 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20113
20114 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20115 RTX_FRAME_RELATED_P (tmp) = 1;
20116 tmp = emit_insn (tmp);
20117
20118 /* Record the first store insn. */
20119 if (dwarf_index == 1)
20120 insn = tmp;
20121
20122 /* Generate dwarf info. */
20123 mem = gen_frame_mem (SImode,
20124 plus_constant(Pmode,
20125 stack_pointer_rtx,
20126 offset));
20127 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20128 RTX_FRAME_RELATED_P (tmp) = 1;
20129 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20130
20131 offset += 4;
20132 j += 1;
20133 }
20134 }
20135 else
20136 j++;
20137
20138 /* Attach dwarf info to the first insn we generate. */
20139 gcc_assert (insn != NULL_RTX);
20140 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20141 RTX_FRAME_RELATED_P (insn) = 1;
20142 }
20143
20144 /* Generate and emit an insn that we will recognize as a push_multi.
20145 Unfortunately, since this insn does not reflect very well the actual
20146 semantics of the operation, we need to annotate the insn for the benefit
20147 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20148 MASK for registers that should be annotated for DWARF2 frame unwind
20149 information. */
20150 static rtx
20151 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20152 {
20153 int num_regs = 0;
20154 int num_dwarf_regs = 0;
20155 int i, j;
20156 rtx par;
20157 rtx dwarf;
20158 int dwarf_par_index;
20159 rtx tmp, reg;
20160
20161 /* We don't record the PC in the dwarf frame information. */
20162 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20163
20164 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20165 {
20166 if (mask & (1 << i))
20167 num_regs++;
20168 if (dwarf_regs_mask & (1 << i))
20169 num_dwarf_regs++;
20170 }
20171
20172 gcc_assert (num_regs && num_regs <= 16);
20173 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20174
20175 /* For the body of the insn we are going to generate an UNSPEC in
20176 parallel with several USEs. This allows the insn to be recognized
20177 by the push_multi pattern in the arm.md file.
20178
20179 The body of the insn looks something like this:
20180
20181 (parallel [
20182 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20183 (const_int:SI <num>)))
20184 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20185 (use (reg:SI XX))
20186 (use (reg:SI YY))
20187 ...
20188 ])
20189
20190 For the frame note however, we try to be more explicit and actually
20191 show each register being stored into the stack frame, plus a (single)
20192 decrement of the stack pointer. We do it this way in order to be
20193 friendly to the stack unwinding code, which only wants to see a single
20194 stack decrement per instruction. The RTL we generate for the note looks
20195 something like this:
20196
20197 (sequence [
20198 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20199 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20200 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20201 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20202 ...
20203 ])
20204
20205 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20206 instead we'd have a parallel expression detailing all
20207 the stores to the various memory addresses so that debug
20208 information is more up-to-date. Remember however while writing
20209 this to take care of the constraints with the push instruction.
20210
20211 Note also that this has to be taken care of for the VFP registers.
20212
20213 For more see PR43399. */
20214
20215 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20216 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20217 dwarf_par_index = 1;
20218
20219 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20220 {
20221 if (mask & (1 << i))
20222 {
20223 reg = gen_rtx_REG (SImode, i);
20224
20225 XVECEXP (par, 0, 0)
20226 = gen_rtx_SET (gen_frame_mem
20227 (BLKmode,
20228 gen_rtx_PRE_MODIFY (Pmode,
20229 stack_pointer_rtx,
20230 plus_constant
20231 (Pmode, stack_pointer_rtx,
20232 -4 * num_regs))
20233 ),
20234 gen_rtx_UNSPEC (BLKmode,
20235 gen_rtvec (1, reg),
20236 UNSPEC_PUSH_MULT));
20237
20238 if (dwarf_regs_mask & (1 << i))
20239 {
20240 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20241 reg);
20242 RTX_FRAME_RELATED_P (tmp) = 1;
20243 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20244 }
20245
20246 break;
20247 }
20248 }
20249
20250 for (j = 1, i++; j < num_regs; i++)
20251 {
20252 if (mask & (1 << i))
20253 {
20254 reg = gen_rtx_REG (SImode, i);
20255
20256 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20257
20258 if (dwarf_regs_mask & (1 << i))
20259 {
20260 tmp
20261 = gen_rtx_SET (gen_frame_mem
20262 (SImode,
20263 plus_constant (Pmode, stack_pointer_rtx,
20264 4 * j)),
20265 reg);
20266 RTX_FRAME_RELATED_P (tmp) = 1;
20267 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20268 }
20269
20270 j++;
20271 }
20272 }
20273
20274 par = emit_insn (par);
20275
20276 tmp = gen_rtx_SET (stack_pointer_rtx,
20277 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20278 RTX_FRAME_RELATED_P (tmp) = 1;
20279 XVECEXP (dwarf, 0, 0) = tmp;
20280
20281 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20282
20283 return par;
20284 }
20285
20286 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20287 SIZE is the offset to be adjusted.
20288 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20289 static void
20290 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20291 {
20292 rtx dwarf;
20293
20294 RTX_FRAME_RELATED_P (insn) = 1;
20295 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20296 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20297 }
20298
20299 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20300 SAVED_REGS_MASK shows which registers need to be restored.
20301
20302 Unfortunately, since this insn does not reflect very well the actual
20303 semantics of the operation, we need to annotate the insn for the benefit
20304 of DWARF2 frame unwind information. */
20305 static void
20306 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20307 {
20308 int num_regs = 0;
20309 int i, j;
20310 rtx par;
20311 rtx dwarf = NULL_RTX;
20312 rtx tmp, reg;
20313 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20314 int offset_adj;
20315 int emit_update;
20316
20317 offset_adj = return_in_pc ? 1 : 0;
20318 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20319 if (saved_regs_mask & (1 << i))
20320 num_regs++;
20321
20322 gcc_assert (num_regs && num_regs <= 16);
20323
20324 /* If SP is in reglist, then we don't emit SP update insn. */
20325 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20326
20327 /* The parallel needs to hold num_regs SETs
20328 and one SET for the stack update. */
20329 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20330
20331 if (return_in_pc)
20332 XVECEXP (par, 0, 0) = ret_rtx;
20333
20334 if (emit_update)
20335 {
20336 /* Increment the stack pointer, based on there being
20337 num_regs 4-byte registers to restore. */
20338 tmp = gen_rtx_SET (stack_pointer_rtx,
20339 plus_constant (Pmode,
20340 stack_pointer_rtx,
20341 4 * num_regs));
20342 RTX_FRAME_RELATED_P (tmp) = 1;
20343 XVECEXP (par, 0, offset_adj) = tmp;
20344 }
20345
20346 /* Now restore every reg, which may include PC. */
20347 for (j = 0, i = 0; j < num_regs; i++)
20348 if (saved_regs_mask & (1 << i))
20349 {
20350 reg = gen_rtx_REG (SImode, i);
20351 if ((num_regs == 1) && emit_update && !return_in_pc)
20352 {
20353 /* Emit single load with writeback. */
20354 tmp = gen_frame_mem (SImode,
20355 gen_rtx_POST_INC (Pmode,
20356 stack_pointer_rtx));
20357 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20358 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20359 return;
20360 }
20361
20362 tmp = gen_rtx_SET (reg,
20363 gen_frame_mem
20364 (SImode,
20365 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20366 RTX_FRAME_RELATED_P (tmp) = 1;
20367 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20368
20369 /* We need to maintain a sequence for DWARF info too. As dwarf info
20370 should not have PC, skip PC. */
20371 if (i != PC_REGNUM)
20372 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20373
20374 j++;
20375 }
20376
20377 if (return_in_pc)
20378 par = emit_jump_insn (par);
20379 else
20380 par = emit_insn (par);
20381
20382 REG_NOTES (par) = dwarf;
20383 if (!return_in_pc)
20384 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20385 stack_pointer_rtx, stack_pointer_rtx);
20386 }
20387
20388 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20389 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20390
20391 Unfortunately, since this insn does not reflect very well the actual
20392 semantics of the operation, we need to annotate the insn for the benefit
20393 of DWARF2 frame unwind information. */
20394 static void
20395 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20396 {
20397 int i, j;
20398 rtx par;
20399 rtx dwarf = NULL_RTX;
20400 rtx tmp, reg;
20401
20402 gcc_assert (num_regs && num_regs <= 32);
20403
20404 /* Workaround ARM10 VFPr1 bug. */
20405 if (num_regs == 2 && !arm_arch6)
20406 {
20407 if (first_reg == 15)
20408 first_reg--;
20409
20410 num_regs++;
20411 }
20412
20413 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20414 there could be up to 32 D-registers to restore.
20415 If there are more than 16 D-registers, make two recursive calls,
20416 each of which emits one pop_multi instruction. */
20417 if (num_regs > 16)
20418 {
20419 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20420 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20421 return;
20422 }
20423
20424 /* The parallel needs to hold num_regs SETs
20425 and one SET for the stack update. */
20426 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20427
20428 /* Increment the stack pointer, based on there being
20429 num_regs 8-byte registers to restore. */
20430 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20431 RTX_FRAME_RELATED_P (tmp) = 1;
20432 XVECEXP (par, 0, 0) = tmp;
20433
20434 /* Now show every reg that will be restored, using a SET for each. */
20435 for (j = 0, i=first_reg; j < num_regs; i += 2)
20436 {
20437 reg = gen_rtx_REG (DFmode, i);
20438
20439 tmp = gen_rtx_SET (reg,
20440 gen_frame_mem
20441 (DFmode,
20442 plus_constant (Pmode, base_reg, 8 * j)));
20443 RTX_FRAME_RELATED_P (tmp) = 1;
20444 XVECEXP (par, 0, j + 1) = tmp;
20445
20446 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20447
20448 j++;
20449 }
20450
20451 par = emit_insn (par);
20452 REG_NOTES (par) = dwarf;
20453
20454 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20455 if (REGNO (base_reg) == IP_REGNUM)
20456 {
20457 RTX_FRAME_RELATED_P (par) = 1;
20458 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20459 }
20460 else
20461 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20462 base_reg, base_reg);
20463 }
20464
20465 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20466 number of registers are being popped, multiple LDRD patterns are created for
20467 all register pairs. If odd number of registers are popped, last register is
20468 loaded by using LDR pattern. */
20469 static void
20470 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20471 {
20472 int num_regs = 0;
20473 int i, j;
20474 rtx par = NULL_RTX;
20475 rtx dwarf = NULL_RTX;
20476 rtx tmp, reg, tmp1;
20477 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20478
20479 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20480 if (saved_regs_mask & (1 << i))
20481 num_regs++;
20482
20483 gcc_assert (num_regs && num_regs <= 16);
20484
20485 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20486 to be popped. So, if num_regs is even, now it will become odd,
20487 and we can generate pop with PC. If num_regs is odd, it will be
20488 even now, and ldr with return can be generated for PC. */
20489 if (return_in_pc)
20490 num_regs--;
20491
20492 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20493
20494 /* Var j iterates over all the registers to gather all the registers in
20495 saved_regs_mask. Var i gives index of saved registers in stack frame.
20496 A PARALLEL RTX of register-pair is created here, so that pattern for
20497 LDRD can be matched. As PC is always last register to be popped, and
20498 we have already decremented num_regs if PC, we don't have to worry
20499 about PC in this loop. */
20500 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20501 if (saved_regs_mask & (1 << j))
20502 {
20503 /* Create RTX for memory load. */
20504 reg = gen_rtx_REG (SImode, j);
20505 tmp = gen_rtx_SET (reg,
20506 gen_frame_mem (SImode,
20507 plus_constant (Pmode,
20508 stack_pointer_rtx, 4 * i)));
20509 RTX_FRAME_RELATED_P (tmp) = 1;
20510
20511 if (i % 2 == 0)
20512 {
20513 /* When saved-register index (i) is even, the RTX to be emitted is
20514 yet to be created. Hence create it first. The LDRD pattern we
20515 are generating is :
20516 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20517 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20518 where target registers need not be consecutive. */
20519 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20520 dwarf = NULL_RTX;
20521 }
20522
20523 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20524 added as 0th element and if i is odd, reg_i is added as 1st element
20525 of LDRD pattern shown above. */
20526 XVECEXP (par, 0, (i % 2)) = tmp;
20527 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20528
20529 if ((i % 2) == 1)
20530 {
20531 /* When saved-register index (i) is odd, RTXs for both the registers
20532 to be loaded are generated in above given LDRD pattern, and the
20533 pattern can be emitted now. */
20534 par = emit_insn (par);
20535 REG_NOTES (par) = dwarf;
20536 RTX_FRAME_RELATED_P (par) = 1;
20537 }
20538
20539 i++;
20540 }
20541
20542 /* If the number of registers pushed is odd AND return_in_pc is false OR
20543 number of registers are even AND return_in_pc is true, last register is
20544 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20545 then LDR with post increment. */
20546
20547 /* Increment the stack pointer, based on there being
20548 num_regs 4-byte registers to restore. */
20549 tmp = gen_rtx_SET (stack_pointer_rtx,
20550 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20551 RTX_FRAME_RELATED_P (tmp) = 1;
20552 tmp = emit_insn (tmp);
20553 if (!return_in_pc)
20554 {
20555 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20556 stack_pointer_rtx, stack_pointer_rtx);
20557 }
20558
20559 dwarf = NULL_RTX;
20560
20561 if (((num_regs % 2) == 1 && !return_in_pc)
20562 || ((num_regs % 2) == 0 && return_in_pc))
20563 {
20564 /* Scan for the single register to be popped. Skip until the saved
20565 register is found. */
20566 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20567
20568 /* Gen LDR with post increment here. */
20569 tmp1 = gen_rtx_MEM (SImode,
20570 gen_rtx_POST_INC (SImode,
20571 stack_pointer_rtx));
20572 set_mem_alias_set (tmp1, get_frame_alias_set ());
20573
20574 reg = gen_rtx_REG (SImode, j);
20575 tmp = gen_rtx_SET (reg, tmp1);
20576 RTX_FRAME_RELATED_P (tmp) = 1;
20577 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20578
20579 if (return_in_pc)
20580 {
20581 /* If return_in_pc, j must be PC_REGNUM. */
20582 gcc_assert (j == PC_REGNUM);
20583 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20584 XVECEXP (par, 0, 0) = ret_rtx;
20585 XVECEXP (par, 0, 1) = tmp;
20586 par = emit_jump_insn (par);
20587 }
20588 else
20589 {
20590 par = emit_insn (tmp);
20591 REG_NOTES (par) = dwarf;
20592 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20593 stack_pointer_rtx, stack_pointer_rtx);
20594 }
20595
20596 }
20597 else if ((num_regs % 2) == 1 && return_in_pc)
20598 {
20599 /* There are 2 registers to be popped. So, generate the pattern
20600 pop_multiple_with_stack_update_and_return to pop in PC. */
20601 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20602 }
20603
20604 return;
20605 }
20606
20607 /* LDRD in ARM mode needs consecutive registers as operands. This function
20608 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20609 offset addressing and then generates one separate stack udpate. This provides
20610 more scheduling freedom, compared to writeback on every load. However,
20611 if the function returns using load into PC directly
20612 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20613 before the last load. TODO: Add a peephole optimization to recognize
20614 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20615 peephole optimization to merge the load at stack-offset zero
20616 with the stack update instruction using load with writeback
20617 in post-index addressing mode. */
20618 static void
20619 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20620 {
20621 int j = 0;
20622 int offset = 0;
20623 rtx par = NULL_RTX;
20624 rtx dwarf = NULL_RTX;
20625 rtx tmp, mem;
20626
20627 /* Restore saved registers. */
20628 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20629 j = 0;
20630 while (j <= LAST_ARM_REGNUM)
20631 if (saved_regs_mask & (1 << j))
20632 {
20633 if ((j % 2) == 0
20634 && (saved_regs_mask & (1 << (j + 1)))
20635 && (j + 1) != PC_REGNUM)
20636 {
20637 /* Current register and next register form register pair for which
20638 LDRD can be generated. PC is always the last register popped, and
20639 we handle it separately. */
20640 if (offset > 0)
20641 mem = gen_frame_mem (DImode,
20642 plus_constant (Pmode,
20643 stack_pointer_rtx,
20644 offset));
20645 else
20646 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20647
20648 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20649 tmp = emit_insn (tmp);
20650 RTX_FRAME_RELATED_P (tmp) = 1;
20651
20652 /* Generate dwarf info. */
20653
20654 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20655 gen_rtx_REG (SImode, j),
20656 NULL_RTX);
20657 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20658 gen_rtx_REG (SImode, j + 1),
20659 dwarf);
20660
20661 REG_NOTES (tmp) = dwarf;
20662
20663 offset += 8;
20664 j += 2;
20665 }
20666 else if (j != PC_REGNUM)
20667 {
20668 /* Emit a single word load. */
20669 if (offset > 0)
20670 mem = gen_frame_mem (SImode,
20671 plus_constant (Pmode,
20672 stack_pointer_rtx,
20673 offset));
20674 else
20675 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20676
20677 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20678 tmp = emit_insn (tmp);
20679 RTX_FRAME_RELATED_P (tmp) = 1;
20680
20681 /* Generate dwarf info. */
20682 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20683 gen_rtx_REG (SImode, j),
20684 NULL_RTX);
20685
20686 offset += 4;
20687 j += 1;
20688 }
20689 else /* j == PC_REGNUM */
20690 j++;
20691 }
20692 else
20693 j++;
20694
20695 /* Update the stack. */
20696 if (offset > 0)
20697 {
20698 tmp = gen_rtx_SET (stack_pointer_rtx,
20699 plus_constant (Pmode,
20700 stack_pointer_rtx,
20701 offset));
20702 tmp = emit_insn (tmp);
20703 arm_add_cfa_adjust_cfa_note (tmp, offset,
20704 stack_pointer_rtx, stack_pointer_rtx);
20705 offset = 0;
20706 }
20707
20708 if (saved_regs_mask & (1 << PC_REGNUM))
20709 {
20710 /* Only PC is to be popped. */
20711 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20712 XVECEXP (par, 0, 0) = ret_rtx;
20713 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20714 gen_frame_mem (SImode,
20715 gen_rtx_POST_INC (SImode,
20716 stack_pointer_rtx)));
20717 RTX_FRAME_RELATED_P (tmp) = 1;
20718 XVECEXP (par, 0, 1) = tmp;
20719 par = emit_jump_insn (par);
20720
20721 /* Generate dwarf info. */
20722 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20723 gen_rtx_REG (SImode, PC_REGNUM),
20724 NULL_RTX);
20725 REG_NOTES (par) = dwarf;
20726 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20727 stack_pointer_rtx, stack_pointer_rtx);
20728 }
20729 }
20730
20731 /* Calculate the size of the return value that is passed in registers. */
20732 static unsigned
20733 arm_size_return_regs (void)
20734 {
20735 machine_mode mode;
20736
20737 if (crtl->return_rtx != 0)
20738 mode = GET_MODE (crtl->return_rtx);
20739 else
20740 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20741
20742 return GET_MODE_SIZE (mode);
20743 }
20744
20745 /* Return true if the current function needs to save/restore LR. */
20746 static bool
20747 thumb_force_lr_save (void)
20748 {
20749 return !cfun->machine->lr_save_eliminated
20750 && (!crtl->is_leaf
20751 || thumb_far_jump_used_p ()
20752 || df_regs_ever_live_p (LR_REGNUM));
20753 }
20754
20755 /* We do not know if r3 will be available because
20756 we do have an indirect tailcall happening in this
20757 particular case. */
20758 static bool
20759 is_indirect_tailcall_p (rtx call)
20760 {
20761 rtx pat = PATTERN (call);
20762
20763 /* Indirect tail call. */
20764 pat = XVECEXP (pat, 0, 0);
20765 if (GET_CODE (pat) == SET)
20766 pat = SET_SRC (pat);
20767
20768 pat = XEXP (XEXP (pat, 0), 0);
20769 return REG_P (pat);
20770 }
20771
20772 /* Return true if r3 is used by any of the tail call insns in the
20773 current function. */
20774 static bool
20775 any_sibcall_could_use_r3 (void)
20776 {
20777 edge_iterator ei;
20778 edge e;
20779
20780 if (!crtl->tail_call_emit)
20781 return false;
20782 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20783 if (e->flags & EDGE_SIBCALL)
20784 {
20785 rtx_insn *call = BB_END (e->src);
20786 if (!CALL_P (call))
20787 call = prev_nonnote_nondebug_insn (call);
20788 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20789 if (find_regno_fusage (call, USE, 3)
20790 || is_indirect_tailcall_p (call))
20791 return true;
20792 }
20793 return false;
20794 }
20795
20796
20797 /* Compute the distance from register FROM to register TO.
20798 These can be the arg pointer (26), the soft frame pointer (25),
20799 the stack pointer (13) or the hard frame pointer (11).
20800 In thumb mode r7 is used as the soft frame pointer, if needed.
20801 Typical stack layout looks like this:
20802
20803 old stack pointer -> | |
20804 ----
20805 | | \
20806 | | saved arguments for
20807 | | vararg functions
20808 | | /
20809 --
20810 hard FP & arg pointer -> | | \
20811 | | stack
20812 | | frame
20813 | | /
20814 --
20815 | | \
20816 | | call saved
20817 | | registers
20818 soft frame pointer -> | | /
20819 --
20820 | | \
20821 | | local
20822 | | variables
20823 locals base pointer -> | | /
20824 --
20825 | | \
20826 | | outgoing
20827 | | arguments
20828 current stack pointer -> | | /
20829 --
20830
20831 For a given function some or all of these stack components
20832 may not be needed, giving rise to the possibility of
20833 eliminating some of the registers.
20834
20835 The values returned by this function must reflect the behavior
20836 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20837
20838 The sign of the number returned reflects the direction of stack
20839 growth, so the values are positive for all eliminations except
20840 from the soft frame pointer to the hard frame pointer.
20841
20842 SFP may point just inside the local variables block to ensure correct
20843 alignment. */
20844
20845
20846 /* Return cached stack offsets. */
20847
20848 static arm_stack_offsets *
20849 arm_get_frame_offsets (void)
20850 {
20851 struct arm_stack_offsets *offsets;
20852
20853 offsets = &cfun->machine->stack_offsets;
20854
20855 return offsets;
20856 }
20857
20858
20859 /* Calculate stack offsets. These are used to calculate register elimination
20860 offsets and in prologue/epilogue code. Also calculates which registers
20861 should be saved. */
20862
20863 static void
20864 arm_compute_frame_layout (void)
20865 {
20866 struct arm_stack_offsets *offsets;
20867 unsigned long func_type;
20868 int saved;
20869 int core_saved;
20870 HOST_WIDE_INT frame_size;
20871 int i;
20872
20873 offsets = &cfun->machine->stack_offsets;
20874
20875 /* Initially this is the size of the local variables. It will translated
20876 into an offset once we have determined the size of preceding data. */
20877 frame_size = ROUND_UP_WORD (get_frame_size ());
20878
20879 /* Space for variadic functions. */
20880 offsets->saved_args = crtl->args.pretend_args_size;
20881
20882 /* In Thumb mode this is incorrect, but never used. */
20883 offsets->frame
20884 = (offsets->saved_args
20885 + arm_compute_static_chain_stack_bytes ()
20886 + (frame_pointer_needed ? 4 : 0));
20887
20888 if (TARGET_32BIT)
20889 {
20890 unsigned int regno;
20891
20892 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20893 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20894 saved = core_saved;
20895
20896 /* We know that SP will be doubleword aligned on entry, and we must
20897 preserve that condition at any subroutine call. We also require the
20898 soft frame pointer to be doubleword aligned. */
20899
20900 if (TARGET_REALLY_IWMMXT)
20901 {
20902 /* Check for the call-saved iWMMXt registers. */
20903 for (regno = FIRST_IWMMXT_REGNUM;
20904 regno <= LAST_IWMMXT_REGNUM;
20905 regno++)
20906 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20907 saved += 8;
20908 }
20909
20910 func_type = arm_current_func_type ();
20911 /* Space for saved VFP registers. */
20912 if (! IS_VOLATILE (func_type)
20913 && TARGET_HARD_FLOAT)
20914 saved += arm_get_vfp_saved_size ();
20915 }
20916 else /* TARGET_THUMB1 */
20917 {
20918 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20919 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20920 saved = core_saved;
20921 if (TARGET_BACKTRACE)
20922 saved += 16;
20923 }
20924
20925 /* Saved registers include the stack frame. */
20926 offsets->saved_regs
20927 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20928 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20929
20930 /* A leaf function does not need any stack alignment if it has nothing
20931 on the stack. */
20932 if (crtl->is_leaf && frame_size == 0
20933 /* However if it calls alloca(), we have a dynamically allocated
20934 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20935 && ! cfun->calls_alloca)
20936 {
20937 offsets->outgoing_args = offsets->soft_frame;
20938 offsets->locals_base = offsets->soft_frame;
20939 return;
20940 }
20941
20942 /* Ensure SFP has the correct alignment. */
20943 if (ARM_DOUBLEWORD_ALIGN
20944 && (offsets->soft_frame & 7))
20945 {
20946 offsets->soft_frame += 4;
20947 /* Try to align stack by pushing an extra reg. Don't bother doing this
20948 when there is a stack frame as the alignment will be rolled into
20949 the normal stack adjustment. */
20950 if (frame_size + crtl->outgoing_args_size == 0)
20951 {
20952 int reg = -1;
20953
20954 /* Register r3 is caller-saved. Normally it does not need to be
20955 saved on entry by the prologue. However if we choose to save
20956 it for padding then we may confuse the compiler into thinking
20957 a prologue sequence is required when in fact it is not. This
20958 will occur when shrink-wrapping if r3 is used as a scratch
20959 register and there are no other callee-saved writes.
20960
20961 This situation can be avoided when other callee-saved registers
20962 are available and r3 is not mandatory if we choose a callee-saved
20963 register for padding. */
20964 bool prefer_callee_reg_p = false;
20965
20966 /* If it is safe to use r3, then do so. This sometimes
20967 generates better code on Thumb-2 by avoiding the need to
20968 use 32-bit push/pop instructions. */
20969 if (! any_sibcall_could_use_r3 ()
20970 && arm_size_return_regs () <= 12
20971 && (offsets->saved_regs_mask & (1 << 3)) == 0
20972 && (TARGET_THUMB2
20973 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20974 {
20975 reg = 3;
20976 if (!TARGET_THUMB2)
20977 prefer_callee_reg_p = true;
20978 }
20979 if (reg == -1
20980 || prefer_callee_reg_p)
20981 {
20982 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20983 {
20984 /* Avoid fixed registers; they may be changed at
20985 arbitrary times so it's unsafe to restore them
20986 during the epilogue. */
20987 if (!fixed_regs[i]
20988 && (offsets->saved_regs_mask & (1 << i)) == 0)
20989 {
20990 reg = i;
20991 break;
20992 }
20993 }
20994 }
20995
20996 if (reg != -1)
20997 {
20998 offsets->saved_regs += 4;
20999 offsets->saved_regs_mask |= (1 << reg);
21000 }
21001 }
21002 }
21003
21004 offsets->locals_base = offsets->soft_frame + frame_size;
21005 offsets->outgoing_args = (offsets->locals_base
21006 + crtl->outgoing_args_size);
21007
21008 if (ARM_DOUBLEWORD_ALIGN)
21009 {
21010 /* Ensure SP remains doubleword aligned. */
21011 if (offsets->outgoing_args & 7)
21012 offsets->outgoing_args += 4;
21013 gcc_assert (!(offsets->outgoing_args & 7));
21014 }
21015 }
21016
21017
21018 /* Calculate the relative offsets for the different stack pointers. Positive
21019 offsets are in the direction of stack growth. */
21020
21021 HOST_WIDE_INT
21022 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21023 {
21024 arm_stack_offsets *offsets;
21025
21026 offsets = arm_get_frame_offsets ();
21027
21028 /* OK, now we have enough information to compute the distances.
21029 There must be an entry in these switch tables for each pair
21030 of registers in ELIMINABLE_REGS, even if some of the entries
21031 seem to be redundant or useless. */
21032 switch (from)
21033 {
21034 case ARG_POINTER_REGNUM:
21035 switch (to)
21036 {
21037 case THUMB_HARD_FRAME_POINTER_REGNUM:
21038 return 0;
21039
21040 case FRAME_POINTER_REGNUM:
21041 /* This is the reverse of the soft frame pointer
21042 to hard frame pointer elimination below. */
21043 return offsets->soft_frame - offsets->saved_args;
21044
21045 case ARM_HARD_FRAME_POINTER_REGNUM:
21046 /* This is only non-zero in the case where the static chain register
21047 is stored above the frame. */
21048 return offsets->frame - offsets->saved_args - 4;
21049
21050 case STACK_POINTER_REGNUM:
21051 /* If nothing has been pushed on the stack at all
21052 then this will return -4. This *is* correct! */
21053 return offsets->outgoing_args - (offsets->saved_args + 4);
21054
21055 default:
21056 gcc_unreachable ();
21057 }
21058 gcc_unreachable ();
21059
21060 case FRAME_POINTER_REGNUM:
21061 switch (to)
21062 {
21063 case THUMB_HARD_FRAME_POINTER_REGNUM:
21064 return 0;
21065
21066 case ARM_HARD_FRAME_POINTER_REGNUM:
21067 /* The hard frame pointer points to the top entry in the
21068 stack frame. The soft frame pointer to the bottom entry
21069 in the stack frame. If there is no stack frame at all,
21070 then they are identical. */
21071
21072 return offsets->frame - offsets->soft_frame;
21073
21074 case STACK_POINTER_REGNUM:
21075 return offsets->outgoing_args - offsets->soft_frame;
21076
21077 default:
21078 gcc_unreachable ();
21079 }
21080 gcc_unreachable ();
21081
21082 default:
21083 /* You cannot eliminate from the stack pointer.
21084 In theory you could eliminate from the hard frame
21085 pointer to the stack pointer, but this will never
21086 happen, since if a stack frame is not needed the
21087 hard frame pointer will never be used. */
21088 gcc_unreachable ();
21089 }
21090 }
21091
21092 /* Given FROM and TO register numbers, say whether this elimination is
21093 allowed. Frame pointer elimination is automatically handled.
21094
21095 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21096 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21097 pointer, we must eliminate FRAME_POINTER_REGNUM into
21098 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21099 ARG_POINTER_REGNUM. */
21100
21101 bool
21102 arm_can_eliminate (const int from, const int to)
21103 {
21104 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21105 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21106 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21107 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21108 true);
21109 }
21110
21111 /* Emit RTL to save coprocessor registers on function entry. Returns the
21112 number of bytes pushed. */
21113
21114 static int
21115 arm_save_coproc_regs(void)
21116 {
21117 int saved_size = 0;
21118 unsigned reg;
21119 unsigned start_reg;
21120 rtx insn;
21121
21122 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21123 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21124 {
21125 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21126 insn = gen_rtx_MEM (V2SImode, insn);
21127 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21128 RTX_FRAME_RELATED_P (insn) = 1;
21129 saved_size += 8;
21130 }
21131
21132 if (TARGET_HARD_FLOAT)
21133 {
21134 start_reg = FIRST_VFP_REGNUM;
21135
21136 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21137 {
21138 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21139 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21140 {
21141 if (start_reg != reg)
21142 saved_size += vfp_emit_fstmd (start_reg,
21143 (reg - start_reg) / 2);
21144 start_reg = reg + 2;
21145 }
21146 }
21147 if (start_reg != reg)
21148 saved_size += vfp_emit_fstmd (start_reg,
21149 (reg - start_reg) / 2);
21150 }
21151 return saved_size;
21152 }
21153
21154
21155 /* Set the Thumb frame pointer from the stack pointer. */
21156
21157 static void
21158 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21159 {
21160 HOST_WIDE_INT amount;
21161 rtx insn, dwarf;
21162
21163 amount = offsets->outgoing_args - offsets->locals_base;
21164 if (amount < 1024)
21165 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21166 stack_pointer_rtx, GEN_INT (amount)));
21167 else
21168 {
21169 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21170 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21171 expects the first two operands to be the same. */
21172 if (TARGET_THUMB2)
21173 {
21174 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21175 stack_pointer_rtx,
21176 hard_frame_pointer_rtx));
21177 }
21178 else
21179 {
21180 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21181 hard_frame_pointer_rtx,
21182 stack_pointer_rtx));
21183 }
21184 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21185 plus_constant (Pmode, stack_pointer_rtx, amount));
21186 RTX_FRAME_RELATED_P (dwarf) = 1;
21187 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21188 }
21189
21190 RTX_FRAME_RELATED_P (insn) = 1;
21191 }
21192
21193 struct scratch_reg {
21194 rtx reg;
21195 bool saved;
21196 };
21197
21198 /* Return a short-lived scratch register for use as a 2nd scratch register on
21199 function entry after the registers are saved in the prologue. This register
21200 must be released by means of release_scratch_register_on_entry. IP is not
21201 considered since it is always used as the 1st scratch register if available.
21202
21203 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21204 mask of live registers. */
21205
21206 static void
21207 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21208 unsigned long live_regs)
21209 {
21210 int regno = -1;
21211
21212 sr->saved = false;
21213
21214 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21215 regno = LR_REGNUM;
21216 else
21217 {
21218 unsigned int i;
21219
21220 for (i = 4; i < 11; i++)
21221 if (regno1 != i && (live_regs & (1 << i)) != 0)
21222 {
21223 regno = i;
21224 break;
21225 }
21226
21227 if (regno < 0)
21228 {
21229 /* If IP is used as the 1st scratch register for a nested function,
21230 then either r3 wasn't available or is used to preserve IP. */
21231 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21232 regno1 = 3;
21233 regno = (regno1 == 3 ? 2 : 3);
21234 sr->saved
21235 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21236 regno);
21237 }
21238 }
21239
21240 sr->reg = gen_rtx_REG (SImode, regno);
21241 if (sr->saved)
21242 {
21243 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21244 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21245 rtx x = gen_rtx_SET (stack_pointer_rtx,
21246 plus_constant (Pmode, stack_pointer_rtx, -4));
21247 RTX_FRAME_RELATED_P (insn) = 1;
21248 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21249 }
21250 }
21251
21252 /* Release a scratch register obtained from the preceding function. */
21253
21254 static void
21255 release_scratch_register_on_entry (struct scratch_reg *sr)
21256 {
21257 if (sr->saved)
21258 {
21259 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21260 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21261 rtx x = gen_rtx_SET (stack_pointer_rtx,
21262 plus_constant (Pmode, stack_pointer_rtx, 4));
21263 RTX_FRAME_RELATED_P (insn) = 1;
21264 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21265 }
21266 }
21267
21268 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21269
21270 #if PROBE_INTERVAL > 4096
21271 #error Cannot use indexed addressing mode for stack probing
21272 #endif
21273
21274 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21275 inclusive. These are offsets from the current stack pointer. REGNO1
21276 is the index number of the 1st scratch register and LIVE_REGS is the
21277 mask of live registers. */
21278
21279 static void
21280 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21281 unsigned int regno1, unsigned long live_regs)
21282 {
21283 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21284
21285 /* See if we have a constant small number of probes to generate. If so,
21286 that's the easy case. */
21287 if (size <= PROBE_INTERVAL)
21288 {
21289 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21290 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21291 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21292 }
21293
21294 /* The run-time loop is made up of 10 insns in the generic case while the
21295 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21296 else if (size <= 5 * PROBE_INTERVAL)
21297 {
21298 HOST_WIDE_INT i, rem;
21299
21300 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21301 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21302 emit_stack_probe (reg1);
21303
21304 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21305 it exceeds SIZE. If only two probes are needed, this will not
21306 generate any code. Then probe at FIRST + SIZE. */
21307 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21308 {
21309 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21310 emit_stack_probe (reg1);
21311 }
21312
21313 rem = size - (i - PROBE_INTERVAL);
21314 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21315 {
21316 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21317 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21318 }
21319 else
21320 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21321 }
21322
21323 /* Otherwise, do the same as above, but in a loop. Note that we must be
21324 extra careful with variables wrapping around because we might be at
21325 the very top (or the very bottom) of the address space and we have
21326 to be able to handle this case properly; in particular, we use an
21327 equality test for the loop condition. */
21328 else
21329 {
21330 HOST_WIDE_INT rounded_size;
21331 struct scratch_reg sr;
21332
21333 get_scratch_register_on_entry (&sr, regno1, live_regs);
21334
21335 emit_move_insn (reg1, GEN_INT (first));
21336
21337
21338 /* Step 1: round SIZE to the previous multiple of the interval. */
21339
21340 rounded_size = size & -PROBE_INTERVAL;
21341 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21342
21343
21344 /* Step 2: compute initial and final value of the loop counter. */
21345
21346 /* TEST_ADDR = SP + FIRST. */
21347 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21348
21349 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21350 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21351
21352
21353 /* Step 3: the loop
21354
21355 do
21356 {
21357 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21358 probe at TEST_ADDR
21359 }
21360 while (TEST_ADDR != LAST_ADDR)
21361
21362 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21363 until it is equal to ROUNDED_SIZE. */
21364
21365 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21366
21367
21368 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21369 that SIZE is equal to ROUNDED_SIZE. */
21370
21371 if (size != rounded_size)
21372 {
21373 HOST_WIDE_INT rem = size - rounded_size;
21374
21375 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21376 {
21377 emit_set_insn (sr.reg,
21378 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21379 emit_stack_probe (plus_constant (Pmode, sr.reg,
21380 PROBE_INTERVAL - rem));
21381 }
21382 else
21383 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21384 }
21385
21386 release_scratch_register_on_entry (&sr);
21387 }
21388
21389 /* Make sure nothing is scheduled before we are done. */
21390 emit_insn (gen_blockage ());
21391 }
21392
21393 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21394 absolute addresses. */
21395
21396 const char *
21397 output_probe_stack_range (rtx reg1, rtx reg2)
21398 {
21399 static int labelno = 0;
21400 char loop_lab[32];
21401 rtx xops[2];
21402
21403 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21404
21405 /* Loop. */
21406 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21407
21408 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21409 xops[0] = reg1;
21410 xops[1] = GEN_INT (PROBE_INTERVAL);
21411 output_asm_insn ("sub\t%0, %0, %1", xops);
21412
21413 /* Probe at TEST_ADDR. */
21414 output_asm_insn ("str\tr0, [%0, #0]", xops);
21415
21416 /* Test if TEST_ADDR == LAST_ADDR. */
21417 xops[1] = reg2;
21418 output_asm_insn ("cmp\t%0, %1", xops);
21419
21420 /* Branch. */
21421 fputs ("\tbne\t", asm_out_file);
21422 assemble_name_raw (asm_out_file, loop_lab);
21423 fputc ('\n', asm_out_file);
21424
21425 return "";
21426 }
21427
21428 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21429 function. */
21430 void
21431 arm_expand_prologue (void)
21432 {
21433 rtx amount;
21434 rtx insn;
21435 rtx ip_rtx;
21436 unsigned long live_regs_mask;
21437 unsigned long func_type;
21438 int fp_offset = 0;
21439 int saved_pretend_args = 0;
21440 int saved_regs = 0;
21441 unsigned HOST_WIDE_INT args_to_push;
21442 HOST_WIDE_INT size;
21443 arm_stack_offsets *offsets;
21444 bool clobber_ip;
21445
21446 func_type = arm_current_func_type ();
21447
21448 /* Naked functions don't have prologues. */
21449 if (IS_NAKED (func_type))
21450 {
21451 if (flag_stack_usage_info)
21452 current_function_static_stack_size = 0;
21453 return;
21454 }
21455
21456 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21457 args_to_push = crtl->args.pretend_args_size;
21458
21459 /* Compute which register we will have to save onto the stack. */
21460 offsets = arm_get_frame_offsets ();
21461 live_regs_mask = offsets->saved_regs_mask;
21462
21463 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21464
21465 if (IS_STACKALIGN (func_type))
21466 {
21467 rtx r0, r1;
21468
21469 /* Handle a word-aligned stack pointer. We generate the following:
21470
21471 mov r0, sp
21472 bic r1, r0, #7
21473 mov sp, r1
21474 <save and restore r0 in normal prologue/epilogue>
21475 mov sp, r0
21476 bx lr
21477
21478 The unwinder doesn't need to know about the stack realignment.
21479 Just tell it we saved SP in r0. */
21480 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21481
21482 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21483 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21484
21485 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21486 RTX_FRAME_RELATED_P (insn) = 1;
21487 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21488
21489 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21490
21491 /* ??? The CFA changes here, which may cause GDB to conclude that it
21492 has entered a different function. That said, the unwind info is
21493 correct, individually, before and after this instruction because
21494 we've described the save of SP, which will override the default
21495 handling of SP as restoring from the CFA. */
21496 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21497 }
21498
21499 /* The static chain register is the same as the IP register. If it is
21500 clobbered when creating the frame, we need to save and restore it. */
21501 clobber_ip = IS_NESTED (func_type)
21502 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21503 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21504 || flag_stack_clash_protection)
21505 && !df_regs_ever_live_p (LR_REGNUM)
21506 && arm_r3_live_at_start_p ()));
21507
21508 /* Find somewhere to store IP whilst the frame is being created.
21509 We try the following places in order:
21510
21511 1. The last argument register r3 if it is available.
21512 2. A slot on the stack above the frame if there are no
21513 arguments to push onto the stack.
21514 3. Register r3 again, after pushing the argument registers
21515 onto the stack, if this is a varargs function.
21516 4. The last slot on the stack created for the arguments to
21517 push, if this isn't a varargs function.
21518
21519 Note - we only need to tell the dwarf2 backend about the SP
21520 adjustment in the second variant; the static chain register
21521 doesn't need to be unwound, as it doesn't contain a value
21522 inherited from the caller. */
21523 if (clobber_ip)
21524 {
21525 if (!arm_r3_live_at_start_p ())
21526 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21527 else if (args_to_push == 0)
21528 {
21529 rtx addr, dwarf;
21530
21531 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21532 saved_regs += 4;
21533
21534 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21535 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21536 fp_offset = 4;
21537
21538 /* Just tell the dwarf backend that we adjusted SP. */
21539 dwarf = gen_rtx_SET (stack_pointer_rtx,
21540 plus_constant (Pmode, stack_pointer_rtx,
21541 -fp_offset));
21542 RTX_FRAME_RELATED_P (insn) = 1;
21543 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21544 }
21545 else
21546 {
21547 /* Store the args on the stack. */
21548 if (cfun->machine->uses_anonymous_args)
21549 {
21550 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21551 (0xf0 >> (args_to_push / 4)) & 0xf);
21552 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21553 saved_pretend_args = 1;
21554 }
21555 else
21556 {
21557 rtx addr, dwarf;
21558
21559 if (args_to_push == 4)
21560 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21561 else
21562 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21563 plus_constant (Pmode,
21564 stack_pointer_rtx,
21565 -args_to_push));
21566
21567 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21568
21569 /* Just tell the dwarf backend that we adjusted SP. */
21570 dwarf = gen_rtx_SET (stack_pointer_rtx,
21571 plus_constant (Pmode, stack_pointer_rtx,
21572 -args_to_push));
21573 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21574 }
21575
21576 RTX_FRAME_RELATED_P (insn) = 1;
21577 fp_offset = args_to_push;
21578 args_to_push = 0;
21579 }
21580 }
21581
21582 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21583 {
21584 if (IS_INTERRUPT (func_type))
21585 {
21586 /* Interrupt functions must not corrupt any registers.
21587 Creating a frame pointer however, corrupts the IP
21588 register, so we must push it first. */
21589 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21590
21591 /* Do not set RTX_FRAME_RELATED_P on this insn.
21592 The dwarf stack unwinding code only wants to see one
21593 stack decrement per function, and this is not it. If
21594 this instruction is labeled as being part of the frame
21595 creation sequence then dwarf2out_frame_debug_expr will
21596 die when it encounters the assignment of IP to FP
21597 later on, since the use of SP here establishes SP as
21598 the CFA register and not IP.
21599
21600 Anyway this instruction is not really part of the stack
21601 frame creation although it is part of the prologue. */
21602 }
21603
21604 insn = emit_set_insn (ip_rtx,
21605 plus_constant (Pmode, stack_pointer_rtx,
21606 fp_offset));
21607 RTX_FRAME_RELATED_P (insn) = 1;
21608 }
21609
21610 if (args_to_push)
21611 {
21612 /* Push the argument registers, or reserve space for them. */
21613 if (cfun->machine->uses_anonymous_args)
21614 insn = emit_multi_reg_push
21615 ((0xf0 >> (args_to_push / 4)) & 0xf,
21616 (0xf0 >> (args_to_push / 4)) & 0xf);
21617 else
21618 insn = emit_insn
21619 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21620 GEN_INT (- args_to_push)));
21621 RTX_FRAME_RELATED_P (insn) = 1;
21622 }
21623
21624 /* If this is an interrupt service routine, and the link register
21625 is going to be pushed, and we're not generating extra
21626 push of IP (needed when frame is needed and frame layout if apcs),
21627 subtracting four from LR now will mean that the function return
21628 can be done with a single instruction. */
21629 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21630 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21631 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21632 && TARGET_ARM)
21633 {
21634 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21635
21636 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21637 }
21638
21639 if (live_regs_mask)
21640 {
21641 unsigned long dwarf_regs_mask = live_regs_mask;
21642
21643 saved_regs += bit_count (live_regs_mask) * 4;
21644 if (optimize_size && !frame_pointer_needed
21645 && saved_regs == offsets->saved_regs - offsets->saved_args)
21646 {
21647 /* If no coprocessor registers are being pushed and we don't have
21648 to worry about a frame pointer then push extra registers to
21649 create the stack frame. This is done in a way that does not
21650 alter the frame layout, so is independent of the epilogue. */
21651 int n;
21652 int frame;
21653 n = 0;
21654 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21655 n++;
21656 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21657 if (frame && n * 4 >= frame)
21658 {
21659 n = frame / 4;
21660 live_regs_mask |= (1 << n) - 1;
21661 saved_regs += frame;
21662 }
21663 }
21664
21665 if (TARGET_LDRD
21666 && current_tune->prefer_ldrd_strd
21667 && !optimize_function_for_size_p (cfun))
21668 {
21669 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21670 if (TARGET_THUMB2)
21671 thumb2_emit_strd_push (live_regs_mask);
21672 else if (TARGET_ARM
21673 && !TARGET_APCS_FRAME
21674 && !IS_INTERRUPT (func_type))
21675 arm_emit_strd_push (live_regs_mask);
21676 else
21677 {
21678 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21679 RTX_FRAME_RELATED_P (insn) = 1;
21680 }
21681 }
21682 else
21683 {
21684 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21685 RTX_FRAME_RELATED_P (insn) = 1;
21686 }
21687 }
21688
21689 if (! IS_VOLATILE (func_type))
21690 saved_regs += arm_save_coproc_regs ();
21691
21692 if (frame_pointer_needed && TARGET_ARM)
21693 {
21694 /* Create the new frame pointer. */
21695 if (TARGET_APCS_FRAME)
21696 {
21697 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21698 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21699 RTX_FRAME_RELATED_P (insn) = 1;
21700 }
21701 else
21702 {
21703 insn = GEN_INT (saved_regs - (4 + fp_offset));
21704 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21705 stack_pointer_rtx, insn));
21706 RTX_FRAME_RELATED_P (insn) = 1;
21707 }
21708 }
21709
21710 size = offsets->outgoing_args - offsets->saved_args;
21711 if (flag_stack_usage_info)
21712 current_function_static_stack_size = size;
21713
21714 /* If this isn't an interrupt service routine and we have a frame, then do
21715 stack checking. We use IP as the first scratch register, except for the
21716 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21717 if (!IS_INTERRUPT (func_type)
21718 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21719 || flag_stack_clash_protection))
21720 {
21721 unsigned int regno;
21722
21723 if (!IS_NESTED (func_type) || clobber_ip)
21724 regno = IP_REGNUM;
21725 else if (df_regs_ever_live_p (LR_REGNUM))
21726 regno = LR_REGNUM;
21727 else
21728 regno = 3;
21729
21730 if (crtl->is_leaf && !cfun->calls_alloca)
21731 {
21732 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21733 arm_emit_probe_stack_range (get_stack_check_protect (),
21734 size - get_stack_check_protect (),
21735 regno, live_regs_mask);
21736 }
21737 else if (size > 0)
21738 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21739 regno, live_regs_mask);
21740 }
21741
21742 /* Recover the static chain register. */
21743 if (clobber_ip)
21744 {
21745 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21746 insn = gen_rtx_REG (SImode, 3);
21747 else
21748 {
21749 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21750 insn = gen_frame_mem (SImode, insn);
21751 }
21752 emit_set_insn (ip_rtx, insn);
21753 emit_insn (gen_force_register_use (ip_rtx));
21754 }
21755
21756 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21757 {
21758 /* This add can produce multiple insns for a large constant, so we
21759 need to get tricky. */
21760 rtx_insn *last = get_last_insn ();
21761
21762 amount = GEN_INT (offsets->saved_args + saved_regs
21763 - offsets->outgoing_args);
21764
21765 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21766 amount));
21767 do
21768 {
21769 last = last ? NEXT_INSN (last) : get_insns ();
21770 RTX_FRAME_RELATED_P (last) = 1;
21771 }
21772 while (last != insn);
21773
21774 /* If the frame pointer is needed, emit a special barrier that
21775 will prevent the scheduler from moving stores to the frame
21776 before the stack adjustment. */
21777 if (frame_pointer_needed)
21778 emit_insn (gen_stack_tie (stack_pointer_rtx,
21779 hard_frame_pointer_rtx));
21780 }
21781
21782
21783 if (frame_pointer_needed && TARGET_THUMB2)
21784 thumb_set_frame_pointer (offsets);
21785
21786 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21787 {
21788 unsigned long mask;
21789
21790 mask = live_regs_mask;
21791 mask &= THUMB2_WORK_REGS;
21792 if (!IS_NESTED (func_type))
21793 mask |= (1 << IP_REGNUM);
21794 arm_load_pic_register (mask);
21795 }
21796
21797 /* If we are profiling, make sure no instructions are scheduled before
21798 the call to mcount. Similarly if the user has requested no
21799 scheduling in the prolog. Similarly if we want non-call exceptions
21800 using the EABI unwinder, to prevent faulting instructions from being
21801 swapped with a stack adjustment. */
21802 if (crtl->profile || !TARGET_SCHED_PROLOG
21803 || (arm_except_unwind_info (&global_options) == UI_TARGET
21804 && cfun->can_throw_non_call_exceptions))
21805 emit_insn (gen_blockage ());
21806
21807 /* If the link register is being kept alive, with the return address in it,
21808 then make sure that it does not get reused by the ce2 pass. */
21809 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21810 cfun->machine->lr_save_eliminated = 1;
21811 }
21812 \f
21813 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21814 static void
21815 arm_print_condition (FILE *stream)
21816 {
21817 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21818 {
21819 /* Branch conversion is not implemented for Thumb-2. */
21820 if (TARGET_THUMB)
21821 {
21822 output_operand_lossage ("predicated Thumb instruction");
21823 return;
21824 }
21825 if (current_insn_predicate != NULL)
21826 {
21827 output_operand_lossage
21828 ("predicated instruction in conditional sequence");
21829 return;
21830 }
21831
21832 fputs (arm_condition_codes[arm_current_cc], stream);
21833 }
21834 else if (current_insn_predicate)
21835 {
21836 enum arm_cond_code code;
21837
21838 if (TARGET_THUMB1)
21839 {
21840 output_operand_lossage ("predicated Thumb instruction");
21841 return;
21842 }
21843
21844 code = get_arm_condition_code (current_insn_predicate);
21845 fputs (arm_condition_codes[code], stream);
21846 }
21847 }
21848
21849
21850 /* Globally reserved letters: acln
21851 Puncutation letters currently used: @_|?().!#
21852 Lower case letters currently used: bcdefhimpqtvwxyz
21853 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21854 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21855
21856 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21857
21858 If CODE is 'd', then the X is a condition operand and the instruction
21859 should only be executed if the condition is true.
21860 if CODE is 'D', then the X is a condition operand and the instruction
21861 should only be executed if the condition is false: however, if the mode
21862 of the comparison is CCFPEmode, then always execute the instruction -- we
21863 do this because in these circumstances !GE does not necessarily imply LT;
21864 in these cases the instruction pattern will take care to make sure that
21865 an instruction containing %d will follow, thereby undoing the effects of
21866 doing this instruction unconditionally.
21867 If CODE is 'N' then X is a floating point operand that must be negated
21868 before output.
21869 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21870 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21871 static void
21872 arm_print_operand (FILE *stream, rtx x, int code)
21873 {
21874 switch (code)
21875 {
21876 case '@':
21877 fputs (ASM_COMMENT_START, stream);
21878 return;
21879
21880 case '_':
21881 fputs (user_label_prefix, stream);
21882 return;
21883
21884 case '|':
21885 fputs (REGISTER_PREFIX, stream);
21886 return;
21887
21888 case '?':
21889 arm_print_condition (stream);
21890 return;
21891
21892 case '.':
21893 /* The current condition code for a condition code setting instruction.
21894 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21895 fputc('s', stream);
21896 arm_print_condition (stream);
21897 return;
21898
21899 case '!':
21900 /* If the instruction is conditionally executed then print
21901 the current condition code, otherwise print 's'. */
21902 gcc_assert (TARGET_THUMB2);
21903 if (current_insn_predicate)
21904 arm_print_condition (stream);
21905 else
21906 fputc('s', stream);
21907 break;
21908
21909 /* %# is a "break" sequence. It doesn't output anything, but is used to
21910 separate e.g. operand numbers from following text, if that text consists
21911 of further digits which we don't want to be part of the operand
21912 number. */
21913 case '#':
21914 return;
21915
21916 case 'N':
21917 {
21918 REAL_VALUE_TYPE r;
21919 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21920 fprintf (stream, "%s", fp_const_from_val (&r));
21921 }
21922 return;
21923
21924 /* An integer or symbol address without a preceding # sign. */
21925 case 'c':
21926 switch (GET_CODE (x))
21927 {
21928 case CONST_INT:
21929 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21930 break;
21931
21932 case SYMBOL_REF:
21933 output_addr_const (stream, x);
21934 break;
21935
21936 case CONST:
21937 if (GET_CODE (XEXP (x, 0)) == PLUS
21938 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21939 {
21940 output_addr_const (stream, x);
21941 break;
21942 }
21943 /* Fall through. */
21944
21945 default:
21946 output_operand_lossage ("Unsupported operand for code '%c'", code);
21947 }
21948 return;
21949
21950 /* An integer that we want to print in HEX. */
21951 case 'x':
21952 switch (GET_CODE (x))
21953 {
21954 case CONST_INT:
21955 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21956 break;
21957
21958 default:
21959 output_operand_lossage ("Unsupported operand for code '%c'", code);
21960 }
21961 return;
21962
21963 case 'B':
21964 if (CONST_INT_P (x))
21965 {
21966 HOST_WIDE_INT val;
21967 val = ARM_SIGN_EXTEND (~INTVAL (x));
21968 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21969 }
21970 else
21971 {
21972 putc ('~', stream);
21973 output_addr_const (stream, x);
21974 }
21975 return;
21976
21977 case 'b':
21978 /* Print the log2 of a CONST_INT. */
21979 {
21980 HOST_WIDE_INT val;
21981
21982 if (!CONST_INT_P (x)
21983 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21984 output_operand_lossage ("Unsupported operand for code '%c'", code);
21985 else
21986 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21987 }
21988 return;
21989
21990 case 'L':
21991 /* The low 16 bits of an immediate constant. */
21992 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21993 return;
21994
21995 case 'i':
21996 fprintf (stream, "%s", arithmetic_instr (x, 1));
21997 return;
21998
21999 case 'I':
22000 fprintf (stream, "%s", arithmetic_instr (x, 0));
22001 return;
22002
22003 case 'S':
22004 {
22005 HOST_WIDE_INT val;
22006 const char *shift;
22007
22008 shift = shift_op (x, &val);
22009
22010 if (shift)
22011 {
22012 fprintf (stream, ", %s ", shift);
22013 if (val == -1)
22014 arm_print_operand (stream, XEXP (x, 1), 0);
22015 else
22016 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22017 }
22018 }
22019 return;
22020
22021 /* An explanation of the 'Q', 'R' and 'H' register operands:
22022
22023 In a pair of registers containing a DI or DF value the 'Q'
22024 operand returns the register number of the register containing
22025 the least significant part of the value. The 'R' operand returns
22026 the register number of the register containing the most
22027 significant part of the value.
22028
22029 The 'H' operand returns the higher of the two register numbers.
22030 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22031 same as the 'Q' operand, since the most significant part of the
22032 value is held in the lower number register. The reverse is true
22033 on systems where WORDS_BIG_ENDIAN is false.
22034
22035 The purpose of these operands is to distinguish between cases
22036 where the endian-ness of the values is important (for example
22037 when they are added together), and cases where the endian-ness
22038 is irrelevant, but the order of register operations is important.
22039 For example when loading a value from memory into a register
22040 pair, the endian-ness does not matter. Provided that the value
22041 from the lower memory address is put into the lower numbered
22042 register, and the value from the higher address is put into the
22043 higher numbered register, the load will work regardless of whether
22044 the value being loaded is big-wordian or little-wordian. The
22045 order of the two register loads can matter however, if the address
22046 of the memory location is actually held in one of the registers
22047 being overwritten by the load.
22048
22049 The 'Q' and 'R' constraints are also available for 64-bit
22050 constants. */
22051 case 'Q':
22052 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22053 {
22054 rtx part = gen_lowpart (SImode, x);
22055 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22056 return;
22057 }
22058
22059 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22060 {
22061 output_operand_lossage ("invalid operand for code '%c'", code);
22062 return;
22063 }
22064
22065 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22066 return;
22067
22068 case 'R':
22069 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22070 {
22071 machine_mode mode = GET_MODE (x);
22072 rtx part;
22073
22074 if (mode == VOIDmode)
22075 mode = DImode;
22076 part = gen_highpart_mode (SImode, mode, x);
22077 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22078 return;
22079 }
22080
22081 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22082 {
22083 output_operand_lossage ("invalid operand for code '%c'", code);
22084 return;
22085 }
22086
22087 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22088 return;
22089
22090 case 'H':
22091 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22092 {
22093 output_operand_lossage ("invalid operand for code '%c'", code);
22094 return;
22095 }
22096
22097 asm_fprintf (stream, "%r", REGNO (x) + 1);
22098 return;
22099
22100 case 'J':
22101 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22102 {
22103 output_operand_lossage ("invalid operand for code '%c'", code);
22104 return;
22105 }
22106
22107 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22108 return;
22109
22110 case 'K':
22111 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22112 {
22113 output_operand_lossage ("invalid operand for code '%c'", code);
22114 return;
22115 }
22116
22117 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22118 return;
22119
22120 case 'm':
22121 asm_fprintf (stream, "%r",
22122 REG_P (XEXP (x, 0))
22123 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22124 return;
22125
22126 case 'M':
22127 asm_fprintf (stream, "{%r-%r}",
22128 REGNO (x),
22129 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22130 return;
22131
22132 /* Like 'M', but writing doubleword vector registers, for use by Neon
22133 insns. */
22134 case 'h':
22135 {
22136 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22137 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22138 if (numregs == 1)
22139 asm_fprintf (stream, "{d%d}", regno);
22140 else
22141 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22142 }
22143 return;
22144
22145 case 'd':
22146 /* CONST_TRUE_RTX means always -- that's the default. */
22147 if (x == const_true_rtx)
22148 return;
22149
22150 if (!COMPARISON_P (x))
22151 {
22152 output_operand_lossage ("invalid operand for code '%c'", code);
22153 return;
22154 }
22155
22156 fputs (arm_condition_codes[get_arm_condition_code (x)],
22157 stream);
22158 return;
22159
22160 case 'D':
22161 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22162 want to do that. */
22163 if (x == const_true_rtx)
22164 {
22165 output_operand_lossage ("instruction never executed");
22166 return;
22167 }
22168 if (!COMPARISON_P (x))
22169 {
22170 output_operand_lossage ("invalid operand for code '%c'", code);
22171 return;
22172 }
22173
22174 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22175 (get_arm_condition_code (x))],
22176 stream);
22177 return;
22178
22179 case 's':
22180 case 'V':
22181 case 'W':
22182 case 'X':
22183 case 'Y':
22184 case 'Z':
22185 /* Former Maverick support, removed after GCC-4.7. */
22186 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22187 return;
22188
22189 case 'U':
22190 if (!REG_P (x)
22191 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22192 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22193 /* Bad value for wCG register number. */
22194 {
22195 output_operand_lossage ("invalid operand for code '%c'", code);
22196 return;
22197 }
22198
22199 else
22200 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22201 return;
22202
22203 /* Print an iWMMXt control register name. */
22204 case 'w':
22205 if (!CONST_INT_P (x)
22206 || INTVAL (x) < 0
22207 || INTVAL (x) >= 16)
22208 /* Bad value for wC register number. */
22209 {
22210 output_operand_lossage ("invalid operand for code '%c'", code);
22211 return;
22212 }
22213
22214 else
22215 {
22216 static const char * wc_reg_names [16] =
22217 {
22218 "wCID", "wCon", "wCSSF", "wCASF",
22219 "wC4", "wC5", "wC6", "wC7",
22220 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22221 "wC12", "wC13", "wC14", "wC15"
22222 };
22223
22224 fputs (wc_reg_names [INTVAL (x)], stream);
22225 }
22226 return;
22227
22228 /* Print the high single-precision register of a VFP double-precision
22229 register. */
22230 case 'p':
22231 {
22232 machine_mode mode = GET_MODE (x);
22233 int regno;
22234
22235 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22236 {
22237 output_operand_lossage ("invalid operand for code '%c'", code);
22238 return;
22239 }
22240
22241 regno = REGNO (x);
22242 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22243 {
22244 output_operand_lossage ("invalid operand for code '%c'", code);
22245 return;
22246 }
22247
22248 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22249 }
22250 return;
22251
22252 /* Print a VFP/Neon double precision or quad precision register name. */
22253 case 'P':
22254 case 'q':
22255 {
22256 machine_mode mode = GET_MODE (x);
22257 int is_quad = (code == 'q');
22258 int regno;
22259
22260 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22261 {
22262 output_operand_lossage ("invalid operand for code '%c'", code);
22263 return;
22264 }
22265
22266 if (!REG_P (x)
22267 || !IS_VFP_REGNUM (REGNO (x)))
22268 {
22269 output_operand_lossage ("invalid operand for code '%c'", code);
22270 return;
22271 }
22272
22273 regno = REGNO (x);
22274 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22275 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22276 {
22277 output_operand_lossage ("invalid operand for code '%c'", code);
22278 return;
22279 }
22280
22281 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22282 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22283 }
22284 return;
22285
22286 /* These two codes print the low/high doubleword register of a Neon quad
22287 register, respectively. For pair-structure types, can also print
22288 low/high quadword registers. */
22289 case 'e':
22290 case 'f':
22291 {
22292 machine_mode mode = GET_MODE (x);
22293 int regno;
22294
22295 if ((GET_MODE_SIZE (mode) != 16
22296 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22297 {
22298 output_operand_lossage ("invalid operand for code '%c'", code);
22299 return;
22300 }
22301
22302 regno = REGNO (x);
22303 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22304 {
22305 output_operand_lossage ("invalid operand for code '%c'", code);
22306 return;
22307 }
22308
22309 if (GET_MODE_SIZE (mode) == 16)
22310 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22311 + (code == 'f' ? 1 : 0));
22312 else
22313 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22314 + (code == 'f' ? 1 : 0));
22315 }
22316 return;
22317
22318 /* Print a VFPv3 floating-point constant, represented as an integer
22319 index. */
22320 case 'G':
22321 {
22322 int index = vfp3_const_double_index (x);
22323 gcc_assert (index != -1);
22324 fprintf (stream, "%d", index);
22325 }
22326 return;
22327
22328 /* Print bits representing opcode features for Neon.
22329
22330 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22331 and polynomials as unsigned.
22332
22333 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22334
22335 Bit 2 is 1 for rounding functions, 0 otherwise. */
22336
22337 /* Identify the type as 's', 'u', 'p' or 'f'. */
22338 case 'T':
22339 {
22340 HOST_WIDE_INT bits = INTVAL (x);
22341 fputc ("uspf"[bits & 3], stream);
22342 }
22343 return;
22344
22345 /* Likewise, but signed and unsigned integers are both 'i'. */
22346 case 'F':
22347 {
22348 HOST_WIDE_INT bits = INTVAL (x);
22349 fputc ("iipf"[bits & 3], stream);
22350 }
22351 return;
22352
22353 /* As for 'T', but emit 'u' instead of 'p'. */
22354 case 't':
22355 {
22356 HOST_WIDE_INT bits = INTVAL (x);
22357 fputc ("usuf"[bits & 3], stream);
22358 }
22359 return;
22360
22361 /* Bit 2: rounding (vs none). */
22362 case 'O':
22363 {
22364 HOST_WIDE_INT bits = INTVAL (x);
22365 fputs ((bits & 4) != 0 ? "r" : "", stream);
22366 }
22367 return;
22368
22369 /* Memory operand for vld1/vst1 instruction. */
22370 case 'A':
22371 {
22372 rtx addr;
22373 bool postinc = FALSE;
22374 rtx postinc_reg = NULL;
22375 unsigned align, memsize, align_bits;
22376
22377 gcc_assert (MEM_P (x));
22378 addr = XEXP (x, 0);
22379 if (GET_CODE (addr) == POST_INC)
22380 {
22381 postinc = 1;
22382 addr = XEXP (addr, 0);
22383 }
22384 if (GET_CODE (addr) == POST_MODIFY)
22385 {
22386 postinc_reg = XEXP( XEXP (addr, 1), 1);
22387 addr = XEXP (addr, 0);
22388 }
22389 asm_fprintf (stream, "[%r", REGNO (addr));
22390
22391 /* We know the alignment of this access, so we can emit a hint in the
22392 instruction (for some alignments) as an aid to the memory subsystem
22393 of the target. */
22394 align = MEM_ALIGN (x) >> 3;
22395 memsize = MEM_SIZE (x);
22396
22397 /* Only certain alignment specifiers are supported by the hardware. */
22398 if (memsize == 32 && (align % 32) == 0)
22399 align_bits = 256;
22400 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22401 align_bits = 128;
22402 else if (memsize >= 8 && (align % 8) == 0)
22403 align_bits = 64;
22404 else
22405 align_bits = 0;
22406
22407 if (align_bits != 0)
22408 asm_fprintf (stream, ":%d", align_bits);
22409
22410 asm_fprintf (stream, "]");
22411
22412 if (postinc)
22413 fputs("!", stream);
22414 if (postinc_reg)
22415 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22416 }
22417 return;
22418
22419 case 'C':
22420 {
22421 rtx addr;
22422
22423 gcc_assert (MEM_P (x));
22424 addr = XEXP (x, 0);
22425 gcc_assert (REG_P (addr));
22426 asm_fprintf (stream, "[%r]", REGNO (addr));
22427 }
22428 return;
22429
22430 /* Translate an S register number into a D register number and element index. */
22431 case 'y':
22432 {
22433 machine_mode mode = GET_MODE (x);
22434 int regno;
22435
22436 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22437 {
22438 output_operand_lossage ("invalid operand for code '%c'", code);
22439 return;
22440 }
22441
22442 regno = REGNO (x);
22443 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22444 {
22445 output_operand_lossage ("invalid operand for code '%c'", code);
22446 return;
22447 }
22448
22449 regno = regno - FIRST_VFP_REGNUM;
22450 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22451 }
22452 return;
22453
22454 case 'v':
22455 gcc_assert (CONST_DOUBLE_P (x));
22456 int result;
22457 result = vfp3_const_double_for_fract_bits (x);
22458 if (result == 0)
22459 result = vfp3_const_double_for_bits (x);
22460 fprintf (stream, "#%d", result);
22461 return;
22462
22463 /* Register specifier for vld1.16/vst1.16. Translate the S register
22464 number into a D register number and element index. */
22465 case 'z':
22466 {
22467 machine_mode mode = GET_MODE (x);
22468 int regno;
22469
22470 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22471 {
22472 output_operand_lossage ("invalid operand for code '%c'", code);
22473 return;
22474 }
22475
22476 regno = REGNO (x);
22477 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22478 {
22479 output_operand_lossage ("invalid operand for code '%c'", code);
22480 return;
22481 }
22482
22483 regno = regno - FIRST_VFP_REGNUM;
22484 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22485 }
22486 return;
22487
22488 default:
22489 if (x == 0)
22490 {
22491 output_operand_lossage ("missing operand");
22492 return;
22493 }
22494
22495 switch (GET_CODE (x))
22496 {
22497 case REG:
22498 asm_fprintf (stream, "%r", REGNO (x));
22499 break;
22500
22501 case MEM:
22502 output_address (GET_MODE (x), XEXP (x, 0));
22503 break;
22504
22505 case CONST_DOUBLE:
22506 {
22507 char fpstr[20];
22508 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22509 sizeof (fpstr), 0, 1);
22510 fprintf (stream, "#%s", fpstr);
22511 }
22512 break;
22513
22514 default:
22515 gcc_assert (GET_CODE (x) != NEG);
22516 fputc ('#', stream);
22517 if (GET_CODE (x) == HIGH)
22518 {
22519 fputs (":lower16:", stream);
22520 x = XEXP (x, 0);
22521 }
22522
22523 output_addr_const (stream, x);
22524 break;
22525 }
22526 }
22527 }
22528 \f
22529 /* Target hook for printing a memory address. */
22530 static void
22531 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22532 {
22533 if (TARGET_32BIT)
22534 {
22535 int is_minus = GET_CODE (x) == MINUS;
22536
22537 if (REG_P (x))
22538 asm_fprintf (stream, "[%r]", REGNO (x));
22539 else if (GET_CODE (x) == PLUS || is_minus)
22540 {
22541 rtx base = XEXP (x, 0);
22542 rtx index = XEXP (x, 1);
22543 HOST_WIDE_INT offset = 0;
22544 if (!REG_P (base)
22545 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22546 {
22547 /* Ensure that BASE is a register. */
22548 /* (one of them must be). */
22549 /* Also ensure the SP is not used as in index register. */
22550 std::swap (base, index);
22551 }
22552 switch (GET_CODE (index))
22553 {
22554 case CONST_INT:
22555 offset = INTVAL (index);
22556 if (is_minus)
22557 offset = -offset;
22558 asm_fprintf (stream, "[%r, #%wd]",
22559 REGNO (base), offset);
22560 break;
22561
22562 case REG:
22563 asm_fprintf (stream, "[%r, %s%r]",
22564 REGNO (base), is_minus ? "-" : "",
22565 REGNO (index));
22566 break;
22567
22568 case MULT:
22569 case ASHIFTRT:
22570 case LSHIFTRT:
22571 case ASHIFT:
22572 case ROTATERT:
22573 {
22574 asm_fprintf (stream, "[%r, %s%r",
22575 REGNO (base), is_minus ? "-" : "",
22576 REGNO (XEXP (index, 0)));
22577 arm_print_operand (stream, index, 'S');
22578 fputs ("]", stream);
22579 break;
22580 }
22581
22582 default:
22583 gcc_unreachable ();
22584 }
22585 }
22586 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22587 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22588 {
22589 gcc_assert (REG_P (XEXP (x, 0)));
22590
22591 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22592 asm_fprintf (stream, "[%r, #%s%d]!",
22593 REGNO (XEXP (x, 0)),
22594 GET_CODE (x) == PRE_DEC ? "-" : "",
22595 GET_MODE_SIZE (mode));
22596 else
22597 asm_fprintf (stream, "[%r], #%s%d",
22598 REGNO (XEXP (x, 0)),
22599 GET_CODE (x) == POST_DEC ? "-" : "",
22600 GET_MODE_SIZE (mode));
22601 }
22602 else if (GET_CODE (x) == PRE_MODIFY)
22603 {
22604 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22605 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22606 asm_fprintf (stream, "#%wd]!",
22607 INTVAL (XEXP (XEXP (x, 1), 1)));
22608 else
22609 asm_fprintf (stream, "%r]!",
22610 REGNO (XEXP (XEXP (x, 1), 1)));
22611 }
22612 else if (GET_CODE (x) == POST_MODIFY)
22613 {
22614 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22615 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22616 asm_fprintf (stream, "#%wd",
22617 INTVAL (XEXP (XEXP (x, 1), 1)));
22618 else
22619 asm_fprintf (stream, "%r",
22620 REGNO (XEXP (XEXP (x, 1), 1)));
22621 }
22622 else output_addr_const (stream, x);
22623 }
22624 else
22625 {
22626 if (REG_P (x))
22627 asm_fprintf (stream, "[%r]", REGNO (x));
22628 else if (GET_CODE (x) == POST_INC)
22629 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22630 else if (GET_CODE (x) == PLUS)
22631 {
22632 gcc_assert (REG_P (XEXP (x, 0)));
22633 if (CONST_INT_P (XEXP (x, 1)))
22634 asm_fprintf (stream, "[%r, #%wd]",
22635 REGNO (XEXP (x, 0)),
22636 INTVAL (XEXP (x, 1)));
22637 else
22638 asm_fprintf (stream, "[%r, %r]",
22639 REGNO (XEXP (x, 0)),
22640 REGNO (XEXP (x, 1)));
22641 }
22642 else
22643 output_addr_const (stream, x);
22644 }
22645 }
22646 \f
22647 /* Target hook for indicating whether a punctuation character for
22648 TARGET_PRINT_OPERAND is valid. */
22649 static bool
22650 arm_print_operand_punct_valid_p (unsigned char code)
22651 {
22652 return (code == '@' || code == '|' || code == '.'
22653 || code == '(' || code == ')' || code == '#'
22654 || (TARGET_32BIT && (code == '?'))
22655 || (TARGET_THUMB2 && (code == '!'))
22656 || (TARGET_THUMB && (code == '_')));
22657 }
22658 \f
22659 /* Target hook for assembling integer objects. The ARM version needs to
22660 handle word-sized values specially. */
22661 static bool
22662 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22663 {
22664 machine_mode mode;
22665
22666 if (size == UNITS_PER_WORD && aligned_p)
22667 {
22668 fputs ("\t.word\t", asm_out_file);
22669 output_addr_const (asm_out_file, x);
22670
22671 /* Mark symbols as position independent. We only do this in the
22672 .text segment, not in the .data segment. */
22673 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22674 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22675 {
22676 /* See legitimize_pic_address for an explanation of the
22677 TARGET_VXWORKS_RTP check. */
22678 /* References to weak symbols cannot be resolved locally:
22679 they may be overridden by a non-weak definition at link
22680 time. */
22681 if (!arm_pic_data_is_text_relative
22682 || (GET_CODE (x) == SYMBOL_REF
22683 && (!SYMBOL_REF_LOCAL_P (x)
22684 || (SYMBOL_REF_DECL (x)
22685 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22686 fputs ("(GOT)", asm_out_file);
22687 else
22688 fputs ("(GOTOFF)", asm_out_file);
22689 }
22690 fputc ('\n', asm_out_file);
22691 return true;
22692 }
22693
22694 mode = GET_MODE (x);
22695
22696 if (arm_vector_mode_supported_p (mode))
22697 {
22698 int i, units;
22699
22700 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22701
22702 units = CONST_VECTOR_NUNITS (x);
22703 size = GET_MODE_UNIT_SIZE (mode);
22704
22705 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22706 for (i = 0; i < units; i++)
22707 {
22708 rtx elt = CONST_VECTOR_ELT (x, i);
22709 assemble_integer
22710 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22711 }
22712 else
22713 for (i = 0; i < units; i++)
22714 {
22715 rtx elt = CONST_VECTOR_ELT (x, i);
22716 assemble_real
22717 (*CONST_DOUBLE_REAL_VALUE (elt),
22718 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22719 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22720 }
22721
22722 return true;
22723 }
22724
22725 return default_assemble_integer (x, size, aligned_p);
22726 }
22727
22728 static void
22729 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22730 {
22731 section *s;
22732
22733 if (!TARGET_AAPCS_BASED)
22734 {
22735 (is_ctor ?
22736 default_named_section_asm_out_constructor
22737 : default_named_section_asm_out_destructor) (symbol, priority);
22738 return;
22739 }
22740
22741 /* Put these in the .init_array section, using a special relocation. */
22742 if (priority != DEFAULT_INIT_PRIORITY)
22743 {
22744 char buf[18];
22745 sprintf (buf, "%s.%.5u",
22746 is_ctor ? ".init_array" : ".fini_array",
22747 priority);
22748 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22749 }
22750 else if (is_ctor)
22751 s = ctors_section;
22752 else
22753 s = dtors_section;
22754
22755 switch_to_section (s);
22756 assemble_align (POINTER_SIZE);
22757 fputs ("\t.word\t", asm_out_file);
22758 output_addr_const (asm_out_file, symbol);
22759 fputs ("(target1)\n", asm_out_file);
22760 }
22761
22762 /* Add a function to the list of static constructors. */
22763
22764 static void
22765 arm_elf_asm_constructor (rtx symbol, int priority)
22766 {
22767 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22768 }
22769
22770 /* Add a function to the list of static destructors. */
22771
22772 static void
22773 arm_elf_asm_destructor (rtx symbol, int priority)
22774 {
22775 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22776 }
22777 \f
22778 /* A finite state machine takes care of noticing whether or not instructions
22779 can be conditionally executed, and thus decrease execution time and code
22780 size by deleting branch instructions. The fsm is controlled by
22781 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22782
22783 /* The state of the fsm controlling condition codes are:
22784 0: normal, do nothing special
22785 1: make ASM_OUTPUT_OPCODE not output this instruction
22786 2: make ASM_OUTPUT_OPCODE not output this instruction
22787 3: make instructions conditional
22788 4: make instructions conditional
22789
22790 State transitions (state->state by whom under condition):
22791 0 -> 1 final_prescan_insn if the `target' is a label
22792 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22793 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22794 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22795 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22796 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22797 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22798 (the target insn is arm_target_insn).
22799
22800 If the jump clobbers the conditions then we use states 2 and 4.
22801
22802 A similar thing can be done with conditional return insns.
22803
22804 XXX In case the `target' is an unconditional branch, this conditionalising
22805 of the instructions always reduces code size, but not always execution
22806 time. But then, I want to reduce the code size to somewhere near what
22807 /bin/cc produces. */
22808
22809 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22810 instructions. When a COND_EXEC instruction is seen the subsequent
22811 instructions are scanned so that multiple conditional instructions can be
22812 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22813 specify the length and true/false mask for the IT block. These will be
22814 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22815
22816 /* Returns the index of the ARM condition code string in
22817 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22818 COMPARISON should be an rtx like `(eq (...) (...))'. */
22819
22820 enum arm_cond_code
22821 maybe_get_arm_condition_code (rtx comparison)
22822 {
22823 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22824 enum arm_cond_code code;
22825 enum rtx_code comp_code = GET_CODE (comparison);
22826
22827 if (GET_MODE_CLASS (mode) != MODE_CC)
22828 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22829 XEXP (comparison, 1));
22830
22831 switch (mode)
22832 {
22833 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22834 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22835 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22836 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22837 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22838 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22839 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22840 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22841 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22842 case E_CC_DLTUmode: code = ARM_CC;
22843
22844 dominance:
22845 if (comp_code == EQ)
22846 return ARM_INVERSE_CONDITION_CODE (code);
22847 if (comp_code == NE)
22848 return code;
22849 return ARM_NV;
22850
22851 case E_CC_NOOVmode:
22852 switch (comp_code)
22853 {
22854 case NE: return ARM_NE;
22855 case EQ: return ARM_EQ;
22856 case GE: return ARM_PL;
22857 case LT: return ARM_MI;
22858 default: return ARM_NV;
22859 }
22860
22861 case E_CC_Zmode:
22862 switch (comp_code)
22863 {
22864 case NE: return ARM_NE;
22865 case EQ: return ARM_EQ;
22866 default: return ARM_NV;
22867 }
22868
22869 case E_CC_Nmode:
22870 switch (comp_code)
22871 {
22872 case NE: return ARM_MI;
22873 case EQ: return ARM_PL;
22874 default: return ARM_NV;
22875 }
22876
22877 case E_CCFPEmode:
22878 case E_CCFPmode:
22879 /* We can handle all cases except UNEQ and LTGT. */
22880 switch (comp_code)
22881 {
22882 case GE: return ARM_GE;
22883 case GT: return ARM_GT;
22884 case LE: return ARM_LS;
22885 case LT: return ARM_MI;
22886 case NE: return ARM_NE;
22887 case EQ: return ARM_EQ;
22888 case ORDERED: return ARM_VC;
22889 case UNORDERED: return ARM_VS;
22890 case UNLT: return ARM_LT;
22891 case UNLE: return ARM_LE;
22892 case UNGT: return ARM_HI;
22893 case UNGE: return ARM_PL;
22894 /* UNEQ and LTGT do not have a representation. */
22895 case UNEQ: /* Fall through. */
22896 case LTGT: /* Fall through. */
22897 default: return ARM_NV;
22898 }
22899
22900 case E_CC_SWPmode:
22901 switch (comp_code)
22902 {
22903 case NE: return ARM_NE;
22904 case EQ: return ARM_EQ;
22905 case GE: return ARM_LE;
22906 case GT: return ARM_LT;
22907 case LE: return ARM_GE;
22908 case LT: return ARM_GT;
22909 case GEU: return ARM_LS;
22910 case GTU: return ARM_CC;
22911 case LEU: return ARM_CS;
22912 case LTU: return ARM_HI;
22913 default: return ARM_NV;
22914 }
22915
22916 case E_CC_Cmode:
22917 switch (comp_code)
22918 {
22919 case LTU: return ARM_CS;
22920 case GEU: return ARM_CC;
22921 case NE: return ARM_CS;
22922 case EQ: return ARM_CC;
22923 default: return ARM_NV;
22924 }
22925
22926 case E_CC_CZmode:
22927 switch (comp_code)
22928 {
22929 case NE: return ARM_NE;
22930 case EQ: return ARM_EQ;
22931 case GEU: return ARM_CS;
22932 case GTU: return ARM_HI;
22933 case LEU: return ARM_LS;
22934 case LTU: return ARM_CC;
22935 default: return ARM_NV;
22936 }
22937
22938 case E_CC_NCVmode:
22939 switch (comp_code)
22940 {
22941 case GE: return ARM_GE;
22942 case LT: return ARM_LT;
22943 case GEU: return ARM_CS;
22944 case LTU: return ARM_CC;
22945 default: return ARM_NV;
22946 }
22947
22948 case E_CC_Vmode:
22949 switch (comp_code)
22950 {
22951 case NE: return ARM_VS;
22952 case EQ: return ARM_VC;
22953 default: return ARM_NV;
22954 }
22955
22956 case E_CCmode:
22957 switch (comp_code)
22958 {
22959 case NE: return ARM_NE;
22960 case EQ: return ARM_EQ;
22961 case GE: return ARM_GE;
22962 case GT: return ARM_GT;
22963 case LE: return ARM_LE;
22964 case LT: return ARM_LT;
22965 case GEU: return ARM_CS;
22966 case GTU: return ARM_HI;
22967 case LEU: return ARM_LS;
22968 case LTU: return ARM_CC;
22969 default: return ARM_NV;
22970 }
22971
22972 default: gcc_unreachable ();
22973 }
22974 }
22975
22976 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22977 static enum arm_cond_code
22978 get_arm_condition_code (rtx comparison)
22979 {
22980 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22981 gcc_assert (code != ARM_NV);
22982 return code;
22983 }
22984
22985 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22986 code registers when not targetting Thumb1. The VFP condition register
22987 only exists when generating hard-float code. */
22988 static bool
22989 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22990 {
22991 if (!TARGET_32BIT)
22992 return false;
22993
22994 *p1 = CC_REGNUM;
22995 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22996 return true;
22997 }
22998
22999 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23000 instructions. */
23001 void
23002 thumb2_final_prescan_insn (rtx_insn *insn)
23003 {
23004 rtx_insn *first_insn = insn;
23005 rtx body = PATTERN (insn);
23006 rtx predicate;
23007 enum arm_cond_code code;
23008 int n;
23009 int mask;
23010 int max;
23011
23012 /* max_insns_skipped in the tune was already taken into account in the
23013 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23014 just emit the IT blocks as we can. It does not make sense to split
23015 the IT blocks. */
23016 max = MAX_INSN_PER_IT_BLOCK;
23017
23018 /* Remove the previous insn from the count of insns to be output. */
23019 if (arm_condexec_count)
23020 arm_condexec_count--;
23021
23022 /* Nothing to do if we are already inside a conditional block. */
23023 if (arm_condexec_count)
23024 return;
23025
23026 if (GET_CODE (body) != COND_EXEC)
23027 return;
23028
23029 /* Conditional jumps are implemented directly. */
23030 if (JUMP_P (insn))
23031 return;
23032
23033 predicate = COND_EXEC_TEST (body);
23034 arm_current_cc = get_arm_condition_code (predicate);
23035
23036 n = get_attr_ce_count (insn);
23037 arm_condexec_count = 1;
23038 arm_condexec_mask = (1 << n) - 1;
23039 arm_condexec_masklen = n;
23040 /* See if subsequent instructions can be combined into the same block. */
23041 for (;;)
23042 {
23043 insn = next_nonnote_insn (insn);
23044
23045 /* Jumping into the middle of an IT block is illegal, so a label or
23046 barrier terminates the block. */
23047 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23048 break;
23049
23050 body = PATTERN (insn);
23051 /* USE and CLOBBER aren't really insns, so just skip them. */
23052 if (GET_CODE (body) == USE
23053 || GET_CODE (body) == CLOBBER)
23054 continue;
23055
23056 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23057 if (GET_CODE (body) != COND_EXEC)
23058 break;
23059 /* Maximum number of conditionally executed instructions in a block. */
23060 n = get_attr_ce_count (insn);
23061 if (arm_condexec_masklen + n > max)
23062 break;
23063
23064 predicate = COND_EXEC_TEST (body);
23065 code = get_arm_condition_code (predicate);
23066 mask = (1 << n) - 1;
23067 if (arm_current_cc == code)
23068 arm_condexec_mask |= (mask << arm_condexec_masklen);
23069 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23070 break;
23071
23072 arm_condexec_count++;
23073 arm_condexec_masklen += n;
23074
23075 /* A jump must be the last instruction in a conditional block. */
23076 if (JUMP_P (insn))
23077 break;
23078 }
23079 /* Restore recog_data (getting the attributes of other insns can
23080 destroy this array, but final.c assumes that it remains intact
23081 across this call). */
23082 extract_constrain_insn_cached (first_insn);
23083 }
23084
23085 void
23086 arm_final_prescan_insn (rtx_insn *insn)
23087 {
23088 /* BODY will hold the body of INSN. */
23089 rtx body = PATTERN (insn);
23090
23091 /* This will be 1 if trying to repeat the trick, and things need to be
23092 reversed if it appears to fail. */
23093 int reverse = 0;
23094
23095 /* If we start with a return insn, we only succeed if we find another one. */
23096 int seeking_return = 0;
23097 enum rtx_code return_code = UNKNOWN;
23098
23099 /* START_INSN will hold the insn from where we start looking. This is the
23100 first insn after the following code_label if REVERSE is true. */
23101 rtx_insn *start_insn = insn;
23102
23103 /* If in state 4, check if the target branch is reached, in order to
23104 change back to state 0. */
23105 if (arm_ccfsm_state == 4)
23106 {
23107 if (insn == arm_target_insn)
23108 {
23109 arm_target_insn = NULL;
23110 arm_ccfsm_state = 0;
23111 }
23112 return;
23113 }
23114
23115 /* If in state 3, it is possible to repeat the trick, if this insn is an
23116 unconditional branch to a label, and immediately following this branch
23117 is the previous target label which is only used once, and the label this
23118 branch jumps to is not too far off. */
23119 if (arm_ccfsm_state == 3)
23120 {
23121 if (simplejump_p (insn))
23122 {
23123 start_insn = next_nonnote_insn (start_insn);
23124 if (BARRIER_P (start_insn))
23125 {
23126 /* XXX Isn't this always a barrier? */
23127 start_insn = next_nonnote_insn (start_insn);
23128 }
23129 if (LABEL_P (start_insn)
23130 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23131 && LABEL_NUSES (start_insn) == 1)
23132 reverse = TRUE;
23133 else
23134 return;
23135 }
23136 else if (ANY_RETURN_P (body))
23137 {
23138 start_insn = next_nonnote_insn (start_insn);
23139 if (BARRIER_P (start_insn))
23140 start_insn = next_nonnote_insn (start_insn);
23141 if (LABEL_P (start_insn)
23142 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23143 && LABEL_NUSES (start_insn) == 1)
23144 {
23145 reverse = TRUE;
23146 seeking_return = 1;
23147 return_code = GET_CODE (body);
23148 }
23149 else
23150 return;
23151 }
23152 else
23153 return;
23154 }
23155
23156 gcc_assert (!arm_ccfsm_state || reverse);
23157 if (!JUMP_P (insn))
23158 return;
23159
23160 /* This jump might be paralleled with a clobber of the condition codes
23161 the jump should always come first */
23162 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23163 body = XVECEXP (body, 0, 0);
23164
23165 if (reverse
23166 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23167 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23168 {
23169 int insns_skipped;
23170 int fail = FALSE, succeed = FALSE;
23171 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23172 int then_not_else = TRUE;
23173 rtx_insn *this_insn = start_insn;
23174 rtx label = 0;
23175
23176 /* Register the insn jumped to. */
23177 if (reverse)
23178 {
23179 if (!seeking_return)
23180 label = XEXP (SET_SRC (body), 0);
23181 }
23182 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23183 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23184 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23185 {
23186 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23187 then_not_else = FALSE;
23188 }
23189 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23190 {
23191 seeking_return = 1;
23192 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23193 }
23194 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23195 {
23196 seeking_return = 1;
23197 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23198 then_not_else = FALSE;
23199 }
23200 else
23201 gcc_unreachable ();
23202
23203 /* See how many insns this branch skips, and what kind of insns. If all
23204 insns are okay, and the label or unconditional branch to the same
23205 label is not too far away, succeed. */
23206 for (insns_skipped = 0;
23207 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23208 {
23209 rtx scanbody;
23210
23211 this_insn = next_nonnote_insn (this_insn);
23212 if (!this_insn)
23213 break;
23214
23215 switch (GET_CODE (this_insn))
23216 {
23217 case CODE_LABEL:
23218 /* Succeed if it is the target label, otherwise fail since
23219 control falls in from somewhere else. */
23220 if (this_insn == label)
23221 {
23222 arm_ccfsm_state = 1;
23223 succeed = TRUE;
23224 }
23225 else
23226 fail = TRUE;
23227 break;
23228
23229 case BARRIER:
23230 /* Succeed if the following insn is the target label.
23231 Otherwise fail.
23232 If return insns are used then the last insn in a function
23233 will be a barrier. */
23234 this_insn = next_nonnote_insn (this_insn);
23235 if (this_insn && this_insn == label)
23236 {
23237 arm_ccfsm_state = 1;
23238 succeed = TRUE;
23239 }
23240 else
23241 fail = TRUE;
23242 break;
23243
23244 case CALL_INSN:
23245 /* The AAPCS says that conditional calls should not be
23246 used since they make interworking inefficient (the
23247 linker can't transform BL<cond> into BLX). That's
23248 only a problem if the machine has BLX. */
23249 if (arm_arch5)
23250 {
23251 fail = TRUE;
23252 break;
23253 }
23254
23255 /* Succeed if the following insn is the target label, or
23256 if the following two insns are a barrier and the
23257 target label. */
23258 this_insn = next_nonnote_insn (this_insn);
23259 if (this_insn && BARRIER_P (this_insn))
23260 this_insn = next_nonnote_insn (this_insn);
23261
23262 if (this_insn && this_insn == label
23263 && insns_skipped < max_insns_skipped)
23264 {
23265 arm_ccfsm_state = 1;
23266 succeed = TRUE;
23267 }
23268 else
23269 fail = TRUE;
23270 break;
23271
23272 case JUMP_INSN:
23273 /* If this is an unconditional branch to the same label, succeed.
23274 If it is to another label, do nothing. If it is conditional,
23275 fail. */
23276 /* XXX Probably, the tests for SET and the PC are
23277 unnecessary. */
23278
23279 scanbody = PATTERN (this_insn);
23280 if (GET_CODE (scanbody) == SET
23281 && GET_CODE (SET_DEST (scanbody)) == PC)
23282 {
23283 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23284 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23285 {
23286 arm_ccfsm_state = 2;
23287 succeed = TRUE;
23288 }
23289 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23290 fail = TRUE;
23291 }
23292 /* Fail if a conditional return is undesirable (e.g. on a
23293 StrongARM), but still allow this if optimizing for size. */
23294 else if (GET_CODE (scanbody) == return_code
23295 && !use_return_insn (TRUE, NULL)
23296 && !optimize_size)
23297 fail = TRUE;
23298 else if (GET_CODE (scanbody) == return_code)
23299 {
23300 arm_ccfsm_state = 2;
23301 succeed = TRUE;
23302 }
23303 else if (GET_CODE (scanbody) == PARALLEL)
23304 {
23305 switch (get_attr_conds (this_insn))
23306 {
23307 case CONDS_NOCOND:
23308 break;
23309 default:
23310 fail = TRUE;
23311 break;
23312 }
23313 }
23314 else
23315 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23316
23317 break;
23318
23319 case INSN:
23320 /* Instructions using or affecting the condition codes make it
23321 fail. */
23322 scanbody = PATTERN (this_insn);
23323 if (!(GET_CODE (scanbody) == SET
23324 || GET_CODE (scanbody) == PARALLEL)
23325 || get_attr_conds (this_insn) != CONDS_NOCOND)
23326 fail = TRUE;
23327 break;
23328
23329 default:
23330 break;
23331 }
23332 }
23333 if (succeed)
23334 {
23335 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23336 arm_target_label = CODE_LABEL_NUMBER (label);
23337 else
23338 {
23339 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23340
23341 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23342 {
23343 this_insn = next_nonnote_insn (this_insn);
23344 gcc_assert (!this_insn
23345 || (!BARRIER_P (this_insn)
23346 && !LABEL_P (this_insn)));
23347 }
23348 if (!this_insn)
23349 {
23350 /* Oh, dear! we ran off the end.. give up. */
23351 extract_constrain_insn_cached (insn);
23352 arm_ccfsm_state = 0;
23353 arm_target_insn = NULL;
23354 return;
23355 }
23356 arm_target_insn = this_insn;
23357 }
23358
23359 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23360 what it was. */
23361 if (!reverse)
23362 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23363
23364 if (reverse || then_not_else)
23365 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23366 }
23367
23368 /* Restore recog_data (getting the attributes of other insns can
23369 destroy this array, but final.c assumes that it remains intact
23370 across this call. */
23371 extract_constrain_insn_cached (insn);
23372 }
23373 }
23374
23375 /* Output IT instructions. */
23376 void
23377 thumb2_asm_output_opcode (FILE * stream)
23378 {
23379 char buff[5];
23380 int n;
23381
23382 if (arm_condexec_mask)
23383 {
23384 for (n = 0; n < arm_condexec_masklen; n++)
23385 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23386 buff[n] = 0;
23387 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23388 arm_condition_codes[arm_current_cc]);
23389 arm_condexec_mask = 0;
23390 }
23391 }
23392
23393 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23394 UNITS_PER_WORD bytes wide. */
23395 static unsigned int
23396 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23397 {
23398 if (TARGET_32BIT
23399 && regno > PC_REGNUM
23400 && regno != FRAME_POINTER_REGNUM
23401 && regno != ARG_POINTER_REGNUM
23402 && !IS_VFP_REGNUM (regno))
23403 return 1;
23404
23405 return ARM_NUM_REGS (mode);
23406 }
23407
23408 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23409 static bool
23410 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23411 {
23412 if (GET_MODE_CLASS (mode) == MODE_CC)
23413 return (regno == CC_REGNUM
23414 || (TARGET_HARD_FLOAT
23415 && regno == VFPCC_REGNUM));
23416
23417 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23418 return false;
23419
23420 if (TARGET_THUMB1)
23421 /* For the Thumb we only allow values bigger than SImode in
23422 registers 0 - 6, so that there is always a second low
23423 register available to hold the upper part of the value.
23424 We probably we ought to ensure that the register is the
23425 start of an even numbered register pair. */
23426 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23427
23428 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23429 {
23430 if (mode == SFmode || mode == SImode)
23431 return VFP_REGNO_OK_FOR_SINGLE (regno);
23432
23433 if (mode == DFmode)
23434 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23435
23436 if (mode == HFmode)
23437 return VFP_REGNO_OK_FOR_SINGLE (regno);
23438
23439 /* VFP registers can hold HImode values. */
23440 if (mode == HImode)
23441 return VFP_REGNO_OK_FOR_SINGLE (regno);
23442
23443 if (TARGET_NEON)
23444 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23445 || (VALID_NEON_QREG_MODE (mode)
23446 && NEON_REGNO_OK_FOR_QUAD (regno))
23447 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23448 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23449 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23450 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23451 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23452
23453 return false;
23454 }
23455
23456 if (TARGET_REALLY_IWMMXT)
23457 {
23458 if (IS_IWMMXT_GR_REGNUM (regno))
23459 return mode == SImode;
23460
23461 if (IS_IWMMXT_REGNUM (regno))
23462 return VALID_IWMMXT_REG_MODE (mode);
23463 }
23464
23465 /* We allow almost any value to be stored in the general registers.
23466 Restrict doubleword quantities to even register pairs in ARM state
23467 so that we can use ldrd. Do not allow very large Neon structure
23468 opaque modes in general registers; they would use too many. */
23469 if (regno <= LAST_ARM_REGNUM)
23470 {
23471 if (ARM_NUM_REGS (mode) > 4)
23472 return false;
23473
23474 if (TARGET_THUMB2)
23475 return true;
23476
23477 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23478 }
23479
23480 if (regno == FRAME_POINTER_REGNUM
23481 || regno == ARG_POINTER_REGNUM)
23482 /* We only allow integers in the fake hard registers. */
23483 return GET_MODE_CLASS (mode) == MODE_INT;
23484
23485 return false;
23486 }
23487
23488 /* Implement TARGET_MODES_TIEABLE_P. */
23489
23490 static bool
23491 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23492 {
23493 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23494 return true;
23495
23496 /* We specifically want to allow elements of "structure" modes to
23497 be tieable to the structure. This more general condition allows
23498 other rarer situations too. */
23499 if (TARGET_NEON
23500 && (VALID_NEON_DREG_MODE (mode1)
23501 || VALID_NEON_QREG_MODE (mode1)
23502 || VALID_NEON_STRUCT_MODE (mode1))
23503 && (VALID_NEON_DREG_MODE (mode2)
23504 || VALID_NEON_QREG_MODE (mode2)
23505 || VALID_NEON_STRUCT_MODE (mode2)))
23506 return true;
23507
23508 return false;
23509 }
23510
23511 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23512 not used in arm mode. */
23513
23514 enum reg_class
23515 arm_regno_class (int regno)
23516 {
23517 if (regno == PC_REGNUM)
23518 return NO_REGS;
23519
23520 if (TARGET_THUMB1)
23521 {
23522 if (regno == STACK_POINTER_REGNUM)
23523 return STACK_REG;
23524 if (regno == CC_REGNUM)
23525 return CC_REG;
23526 if (regno < 8)
23527 return LO_REGS;
23528 return HI_REGS;
23529 }
23530
23531 if (TARGET_THUMB2 && regno < 8)
23532 return LO_REGS;
23533
23534 if ( regno <= LAST_ARM_REGNUM
23535 || regno == FRAME_POINTER_REGNUM
23536 || regno == ARG_POINTER_REGNUM)
23537 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23538
23539 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23540 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23541
23542 if (IS_VFP_REGNUM (regno))
23543 {
23544 if (regno <= D7_VFP_REGNUM)
23545 return VFP_D0_D7_REGS;
23546 else if (regno <= LAST_LO_VFP_REGNUM)
23547 return VFP_LO_REGS;
23548 else
23549 return VFP_HI_REGS;
23550 }
23551
23552 if (IS_IWMMXT_REGNUM (regno))
23553 return IWMMXT_REGS;
23554
23555 if (IS_IWMMXT_GR_REGNUM (regno))
23556 return IWMMXT_GR_REGS;
23557
23558 return NO_REGS;
23559 }
23560
23561 /* Handle a special case when computing the offset
23562 of an argument from the frame pointer. */
23563 int
23564 arm_debugger_arg_offset (int value, rtx addr)
23565 {
23566 rtx_insn *insn;
23567
23568 /* We are only interested if dbxout_parms() failed to compute the offset. */
23569 if (value != 0)
23570 return 0;
23571
23572 /* We can only cope with the case where the address is held in a register. */
23573 if (!REG_P (addr))
23574 return 0;
23575
23576 /* If we are using the frame pointer to point at the argument, then
23577 an offset of 0 is correct. */
23578 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23579 return 0;
23580
23581 /* If we are using the stack pointer to point at the
23582 argument, then an offset of 0 is correct. */
23583 /* ??? Check this is consistent with thumb2 frame layout. */
23584 if ((TARGET_THUMB || !frame_pointer_needed)
23585 && REGNO (addr) == SP_REGNUM)
23586 return 0;
23587
23588 /* Oh dear. The argument is pointed to by a register rather
23589 than being held in a register, or being stored at a known
23590 offset from the frame pointer. Since GDB only understands
23591 those two kinds of argument we must translate the address
23592 held in the register into an offset from the frame pointer.
23593 We do this by searching through the insns for the function
23594 looking to see where this register gets its value. If the
23595 register is initialized from the frame pointer plus an offset
23596 then we are in luck and we can continue, otherwise we give up.
23597
23598 This code is exercised by producing debugging information
23599 for a function with arguments like this:
23600
23601 double func (double a, double b, int c, double d) {return d;}
23602
23603 Without this code the stab for parameter 'd' will be set to
23604 an offset of 0 from the frame pointer, rather than 8. */
23605
23606 /* The if() statement says:
23607
23608 If the insn is a normal instruction
23609 and if the insn is setting the value in a register
23610 and if the register being set is the register holding the address of the argument
23611 and if the address is computing by an addition
23612 that involves adding to a register
23613 which is the frame pointer
23614 a constant integer
23615
23616 then... */
23617
23618 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23619 {
23620 if ( NONJUMP_INSN_P (insn)
23621 && GET_CODE (PATTERN (insn)) == SET
23622 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23623 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23624 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23625 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23626 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23627 )
23628 {
23629 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23630
23631 break;
23632 }
23633 }
23634
23635 if (value == 0)
23636 {
23637 debug_rtx (addr);
23638 warning (0, "unable to compute real location of stacked parameter");
23639 value = 8; /* XXX magic hack */
23640 }
23641
23642 return value;
23643 }
23644 \f
23645 /* Implement TARGET_PROMOTED_TYPE. */
23646
23647 static tree
23648 arm_promoted_type (const_tree t)
23649 {
23650 if (SCALAR_FLOAT_TYPE_P (t)
23651 && TYPE_PRECISION (t) == 16
23652 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23653 return float_type_node;
23654 return NULL_TREE;
23655 }
23656
23657 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23658 This simply adds HFmode as a supported mode; even though we don't
23659 implement arithmetic on this type directly, it's supported by
23660 optabs conversions, much the way the double-word arithmetic is
23661 special-cased in the default hook. */
23662
23663 static bool
23664 arm_scalar_mode_supported_p (scalar_mode mode)
23665 {
23666 if (mode == HFmode)
23667 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23668 else if (ALL_FIXED_POINT_MODE_P (mode))
23669 return true;
23670 else
23671 return default_scalar_mode_supported_p (mode);
23672 }
23673
23674 /* Set the value of FLT_EVAL_METHOD.
23675 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23676
23677 0: evaluate all operations and constants, whose semantic type has at
23678 most the range and precision of type float, to the range and
23679 precision of float; evaluate all other operations and constants to
23680 the range and precision of the semantic type;
23681
23682 N, where _FloatN is a supported interchange floating type
23683 evaluate all operations and constants, whose semantic type has at
23684 most the range and precision of _FloatN type, to the range and
23685 precision of the _FloatN type; evaluate all other operations and
23686 constants to the range and precision of the semantic type;
23687
23688 If we have the ARMv8.2-A extensions then we support _Float16 in native
23689 precision, so we should set this to 16. Otherwise, we support the type,
23690 but want to evaluate expressions in float precision, so set this to
23691 0. */
23692
23693 static enum flt_eval_method
23694 arm_excess_precision (enum excess_precision_type type)
23695 {
23696 switch (type)
23697 {
23698 case EXCESS_PRECISION_TYPE_FAST:
23699 case EXCESS_PRECISION_TYPE_STANDARD:
23700 /* We can calculate either in 16-bit range and precision or
23701 32-bit range and precision. Make that decision based on whether
23702 we have native support for the ARMv8.2-A 16-bit floating-point
23703 instructions or not. */
23704 return (TARGET_VFP_FP16INST
23705 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23706 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23707 case EXCESS_PRECISION_TYPE_IMPLICIT:
23708 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23709 default:
23710 gcc_unreachable ();
23711 }
23712 return FLT_EVAL_METHOD_UNPREDICTABLE;
23713 }
23714
23715
23716 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23717 _Float16 if we are using anything other than ieee format for 16-bit
23718 floating point. Otherwise, punt to the default implementation. */
23719 static opt_scalar_float_mode
23720 arm_floatn_mode (int n, bool extended)
23721 {
23722 if (!extended && n == 16)
23723 {
23724 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23725 return HFmode;
23726 return opt_scalar_float_mode ();
23727 }
23728
23729 return default_floatn_mode (n, extended);
23730 }
23731
23732
23733 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23734 not to early-clobber SRC registers in the process.
23735
23736 We assume that the operands described by SRC and DEST represent a
23737 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23738 number of components into which the copy has been decomposed. */
23739 void
23740 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23741 {
23742 unsigned int i;
23743
23744 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23745 || REGNO (operands[0]) < REGNO (operands[1]))
23746 {
23747 for (i = 0; i < count; i++)
23748 {
23749 operands[2 * i] = dest[i];
23750 operands[2 * i + 1] = src[i];
23751 }
23752 }
23753 else
23754 {
23755 for (i = 0; i < count; i++)
23756 {
23757 operands[2 * i] = dest[count - i - 1];
23758 operands[2 * i + 1] = src[count - i - 1];
23759 }
23760 }
23761 }
23762
23763 /* Split operands into moves from op[1] + op[2] into op[0]. */
23764
23765 void
23766 neon_split_vcombine (rtx operands[3])
23767 {
23768 unsigned int dest = REGNO (operands[0]);
23769 unsigned int src1 = REGNO (operands[1]);
23770 unsigned int src2 = REGNO (operands[2]);
23771 machine_mode halfmode = GET_MODE (operands[1]);
23772 unsigned int halfregs = REG_NREGS (operands[1]);
23773 rtx destlo, desthi;
23774
23775 if (src1 == dest && src2 == dest + halfregs)
23776 {
23777 /* No-op move. Can't split to nothing; emit something. */
23778 emit_note (NOTE_INSN_DELETED);
23779 return;
23780 }
23781
23782 /* Preserve register attributes for variable tracking. */
23783 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23784 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23785 GET_MODE_SIZE (halfmode));
23786
23787 /* Special case of reversed high/low parts. Use VSWP. */
23788 if (src2 == dest && src1 == dest + halfregs)
23789 {
23790 rtx x = gen_rtx_SET (destlo, operands[1]);
23791 rtx y = gen_rtx_SET (desthi, operands[2]);
23792 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23793 return;
23794 }
23795
23796 if (!reg_overlap_mentioned_p (operands[2], destlo))
23797 {
23798 /* Try to avoid unnecessary moves if part of the result
23799 is in the right place already. */
23800 if (src1 != dest)
23801 emit_move_insn (destlo, operands[1]);
23802 if (src2 != dest + halfregs)
23803 emit_move_insn (desthi, operands[2]);
23804 }
23805 else
23806 {
23807 if (src2 != dest + halfregs)
23808 emit_move_insn (desthi, operands[2]);
23809 if (src1 != dest)
23810 emit_move_insn (destlo, operands[1]);
23811 }
23812 }
23813 \f
23814 /* Return the number (counting from 0) of
23815 the least significant set bit in MASK. */
23816
23817 inline static int
23818 number_of_first_bit_set (unsigned mask)
23819 {
23820 return ctz_hwi (mask);
23821 }
23822
23823 /* Like emit_multi_reg_push, but allowing for a different set of
23824 registers to be described as saved. MASK is the set of registers
23825 to be saved; REAL_REGS is the set of registers to be described as
23826 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23827
23828 static rtx_insn *
23829 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23830 {
23831 unsigned long regno;
23832 rtx par[10], tmp, reg;
23833 rtx_insn *insn;
23834 int i, j;
23835
23836 /* Build the parallel of the registers actually being stored. */
23837 for (i = 0; mask; ++i, mask &= mask - 1)
23838 {
23839 regno = ctz_hwi (mask);
23840 reg = gen_rtx_REG (SImode, regno);
23841
23842 if (i == 0)
23843 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23844 else
23845 tmp = gen_rtx_USE (VOIDmode, reg);
23846
23847 par[i] = tmp;
23848 }
23849
23850 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23851 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23852 tmp = gen_frame_mem (BLKmode, tmp);
23853 tmp = gen_rtx_SET (tmp, par[0]);
23854 par[0] = tmp;
23855
23856 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23857 insn = emit_insn (tmp);
23858
23859 /* Always build the stack adjustment note for unwind info. */
23860 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23861 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23862 par[0] = tmp;
23863
23864 /* Build the parallel of the registers recorded as saved for unwind. */
23865 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23866 {
23867 regno = ctz_hwi (real_regs);
23868 reg = gen_rtx_REG (SImode, regno);
23869
23870 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23871 tmp = gen_frame_mem (SImode, tmp);
23872 tmp = gen_rtx_SET (tmp, reg);
23873 RTX_FRAME_RELATED_P (tmp) = 1;
23874 par[j + 1] = tmp;
23875 }
23876
23877 if (j == 0)
23878 tmp = par[0];
23879 else
23880 {
23881 RTX_FRAME_RELATED_P (par[0]) = 1;
23882 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23883 }
23884
23885 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23886
23887 return insn;
23888 }
23889
23890 /* Emit code to push or pop registers to or from the stack. F is the
23891 assembly file. MASK is the registers to pop. */
23892 static void
23893 thumb_pop (FILE *f, unsigned long mask)
23894 {
23895 int regno;
23896 int lo_mask = mask & 0xFF;
23897
23898 gcc_assert (mask);
23899
23900 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23901 {
23902 /* Special case. Do not generate a POP PC statement here, do it in
23903 thumb_exit() */
23904 thumb_exit (f, -1);
23905 return;
23906 }
23907
23908 fprintf (f, "\tpop\t{");
23909
23910 /* Look at the low registers first. */
23911 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23912 {
23913 if (lo_mask & 1)
23914 {
23915 asm_fprintf (f, "%r", regno);
23916
23917 if ((lo_mask & ~1) != 0)
23918 fprintf (f, ", ");
23919 }
23920 }
23921
23922 if (mask & (1 << PC_REGNUM))
23923 {
23924 /* Catch popping the PC. */
23925 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23926 || IS_CMSE_ENTRY (arm_current_func_type ()))
23927 {
23928 /* The PC is never poped directly, instead
23929 it is popped into r3 and then BX is used. */
23930 fprintf (f, "}\n");
23931
23932 thumb_exit (f, -1);
23933
23934 return;
23935 }
23936 else
23937 {
23938 if (mask & 0xFF)
23939 fprintf (f, ", ");
23940
23941 asm_fprintf (f, "%r", PC_REGNUM);
23942 }
23943 }
23944
23945 fprintf (f, "}\n");
23946 }
23947
23948 /* Generate code to return from a thumb function.
23949 If 'reg_containing_return_addr' is -1, then the return address is
23950 actually on the stack, at the stack pointer. */
23951 static void
23952 thumb_exit (FILE *f, int reg_containing_return_addr)
23953 {
23954 unsigned regs_available_for_popping;
23955 unsigned regs_to_pop;
23956 int pops_needed;
23957 unsigned available;
23958 unsigned required;
23959 machine_mode mode;
23960 int size;
23961 int restore_a4 = FALSE;
23962
23963 /* Compute the registers we need to pop. */
23964 regs_to_pop = 0;
23965 pops_needed = 0;
23966
23967 if (reg_containing_return_addr == -1)
23968 {
23969 regs_to_pop |= 1 << LR_REGNUM;
23970 ++pops_needed;
23971 }
23972
23973 if (TARGET_BACKTRACE)
23974 {
23975 /* Restore the (ARM) frame pointer and stack pointer. */
23976 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23977 pops_needed += 2;
23978 }
23979
23980 /* If there is nothing to pop then just emit the BX instruction and
23981 return. */
23982 if (pops_needed == 0)
23983 {
23984 if (crtl->calls_eh_return)
23985 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23986
23987 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23988 {
23989 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23990 reg_containing_return_addr);
23991 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23992 }
23993 else
23994 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23995 return;
23996 }
23997 /* Otherwise if we are not supporting interworking and we have not created
23998 a backtrace structure and the function was not entered in ARM mode then
23999 just pop the return address straight into the PC. */
24000 else if (!TARGET_INTERWORK
24001 && !TARGET_BACKTRACE
24002 && !is_called_in_ARM_mode (current_function_decl)
24003 && !crtl->calls_eh_return
24004 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24005 {
24006 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24007 return;
24008 }
24009
24010 /* Find out how many of the (return) argument registers we can corrupt. */
24011 regs_available_for_popping = 0;
24012
24013 /* If returning via __builtin_eh_return, the bottom three registers
24014 all contain information needed for the return. */
24015 if (crtl->calls_eh_return)
24016 size = 12;
24017 else
24018 {
24019 /* If we can deduce the registers used from the function's
24020 return value. This is more reliable that examining
24021 df_regs_ever_live_p () because that will be set if the register is
24022 ever used in the function, not just if the register is used
24023 to hold a return value. */
24024
24025 if (crtl->return_rtx != 0)
24026 mode = GET_MODE (crtl->return_rtx);
24027 else
24028 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24029
24030 size = GET_MODE_SIZE (mode);
24031
24032 if (size == 0)
24033 {
24034 /* In a void function we can use any argument register.
24035 In a function that returns a structure on the stack
24036 we can use the second and third argument registers. */
24037 if (mode == VOIDmode)
24038 regs_available_for_popping =
24039 (1 << ARG_REGISTER (1))
24040 | (1 << ARG_REGISTER (2))
24041 | (1 << ARG_REGISTER (3));
24042 else
24043 regs_available_for_popping =
24044 (1 << ARG_REGISTER (2))
24045 | (1 << ARG_REGISTER (3));
24046 }
24047 else if (size <= 4)
24048 regs_available_for_popping =
24049 (1 << ARG_REGISTER (2))
24050 | (1 << ARG_REGISTER (3));
24051 else if (size <= 8)
24052 regs_available_for_popping =
24053 (1 << ARG_REGISTER (3));
24054 }
24055
24056 /* Match registers to be popped with registers into which we pop them. */
24057 for (available = regs_available_for_popping,
24058 required = regs_to_pop;
24059 required != 0 && available != 0;
24060 available &= ~(available & - available),
24061 required &= ~(required & - required))
24062 -- pops_needed;
24063
24064 /* If we have any popping registers left over, remove them. */
24065 if (available > 0)
24066 regs_available_for_popping &= ~available;
24067
24068 /* Otherwise if we need another popping register we can use
24069 the fourth argument register. */
24070 else if (pops_needed)
24071 {
24072 /* If we have not found any free argument registers and
24073 reg a4 contains the return address, we must move it. */
24074 if (regs_available_for_popping == 0
24075 && reg_containing_return_addr == LAST_ARG_REGNUM)
24076 {
24077 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24078 reg_containing_return_addr = LR_REGNUM;
24079 }
24080 else if (size > 12)
24081 {
24082 /* Register a4 is being used to hold part of the return value,
24083 but we have dire need of a free, low register. */
24084 restore_a4 = TRUE;
24085
24086 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24087 }
24088
24089 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24090 {
24091 /* The fourth argument register is available. */
24092 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24093
24094 --pops_needed;
24095 }
24096 }
24097
24098 /* Pop as many registers as we can. */
24099 thumb_pop (f, regs_available_for_popping);
24100
24101 /* Process the registers we popped. */
24102 if (reg_containing_return_addr == -1)
24103 {
24104 /* The return address was popped into the lowest numbered register. */
24105 regs_to_pop &= ~(1 << LR_REGNUM);
24106
24107 reg_containing_return_addr =
24108 number_of_first_bit_set (regs_available_for_popping);
24109
24110 /* Remove this register for the mask of available registers, so that
24111 the return address will not be corrupted by further pops. */
24112 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24113 }
24114
24115 /* If we popped other registers then handle them here. */
24116 if (regs_available_for_popping)
24117 {
24118 int frame_pointer;
24119
24120 /* Work out which register currently contains the frame pointer. */
24121 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24122
24123 /* Move it into the correct place. */
24124 asm_fprintf (f, "\tmov\t%r, %r\n",
24125 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24126
24127 /* (Temporarily) remove it from the mask of popped registers. */
24128 regs_available_for_popping &= ~(1 << frame_pointer);
24129 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24130
24131 if (regs_available_for_popping)
24132 {
24133 int stack_pointer;
24134
24135 /* We popped the stack pointer as well,
24136 find the register that contains it. */
24137 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24138
24139 /* Move it into the stack register. */
24140 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24141
24142 /* At this point we have popped all necessary registers, so
24143 do not worry about restoring regs_available_for_popping
24144 to its correct value:
24145
24146 assert (pops_needed == 0)
24147 assert (regs_available_for_popping == (1 << frame_pointer))
24148 assert (regs_to_pop == (1 << STACK_POINTER)) */
24149 }
24150 else
24151 {
24152 /* Since we have just move the popped value into the frame
24153 pointer, the popping register is available for reuse, and
24154 we know that we still have the stack pointer left to pop. */
24155 regs_available_for_popping |= (1 << frame_pointer);
24156 }
24157 }
24158
24159 /* If we still have registers left on the stack, but we no longer have
24160 any registers into which we can pop them, then we must move the return
24161 address into the link register and make available the register that
24162 contained it. */
24163 if (regs_available_for_popping == 0 && pops_needed > 0)
24164 {
24165 regs_available_for_popping |= 1 << reg_containing_return_addr;
24166
24167 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24168 reg_containing_return_addr);
24169
24170 reg_containing_return_addr = LR_REGNUM;
24171 }
24172
24173 /* If we have registers left on the stack then pop some more.
24174 We know that at most we will want to pop FP and SP. */
24175 if (pops_needed > 0)
24176 {
24177 int popped_into;
24178 int move_to;
24179
24180 thumb_pop (f, regs_available_for_popping);
24181
24182 /* We have popped either FP or SP.
24183 Move whichever one it is into the correct register. */
24184 popped_into = number_of_first_bit_set (regs_available_for_popping);
24185 move_to = number_of_first_bit_set (regs_to_pop);
24186
24187 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24188 --pops_needed;
24189 }
24190
24191 /* If we still have not popped everything then we must have only
24192 had one register available to us and we are now popping the SP. */
24193 if (pops_needed > 0)
24194 {
24195 int popped_into;
24196
24197 thumb_pop (f, regs_available_for_popping);
24198
24199 popped_into = number_of_first_bit_set (regs_available_for_popping);
24200
24201 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24202 /*
24203 assert (regs_to_pop == (1 << STACK_POINTER))
24204 assert (pops_needed == 1)
24205 */
24206 }
24207
24208 /* If necessary restore the a4 register. */
24209 if (restore_a4)
24210 {
24211 if (reg_containing_return_addr != LR_REGNUM)
24212 {
24213 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24214 reg_containing_return_addr = LR_REGNUM;
24215 }
24216
24217 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24218 }
24219
24220 if (crtl->calls_eh_return)
24221 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24222
24223 /* Return to caller. */
24224 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24225 {
24226 /* This is for the cases where LR is not being used to contain the return
24227 address. It may therefore contain information that we might not want
24228 to leak, hence it must be cleared. The value in R0 will never be a
24229 secret at this point, so it is safe to use it, see the clearing code
24230 in 'cmse_nonsecure_entry_clear_before_return'. */
24231 if (reg_containing_return_addr != LR_REGNUM)
24232 asm_fprintf (f, "\tmov\tlr, r0\n");
24233
24234 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24235 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24236 }
24237 else
24238 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24239 }
24240 \f
24241 /* Scan INSN just before assembler is output for it.
24242 For Thumb-1, we track the status of the condition codes; this
24243 information is used in the cbranchsi4_insn pattern. */
24244 void
24245 thumb1_final_prescan_insn (rtx_insn *insn)
24246 {
24247 if (flag_print_asm_name)
24248 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24249 INSN_ADDRESSES (INSN_UID (insn)));
24250 /* Don't overwrite the previous setter when we get to a cbranch. */
24251 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24252 {
24253 enum attr_conds conds;
24254
24255 if (cfun->machine->thumb1_cc_insn)
24256 {
24257 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24258 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24259 CC_STATUS_INIT;
24260 }
24261 conds = get_attr_conds (insn);
24262 if (conds == CONDS_SET)
24263 {
24264 rtx set = single_set (insn);
24265 cfun->machine->thumb1_cc_insn = insn;
24266 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24267 cfun->machine->thumb1_cc_op1 = const0_rtx;
24268 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24269 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24270 {
24271 rtx src1 = XEXP (SET_SRC (set), 1);
24272 if (src1 == const0_rtx)
24273 cfun->machine->thumb1_cc_mode = CCmode;
24274 }
24275 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24276 {
24277 /* Record the src register operand instead of dest because
24278 cprop_hardreg pass propagates src. */
24279 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24280 }
24281 }
24282 else if (conds != CONDS_NOCOND)
24283 cfun->machine->thumb1_cc_insn = NULL_RTX;
24284 }
24285
24286 /* Check if unexpected far jump is used. */
24287 if (cfun->machine->lr_save_eliminated
24288 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24289 internal_error("Unexpected thumb1 far jump");
24290 }
24291
24292 int
24293 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24294 {
24295 unsigned HOST_WIDE_INT mask = 0xff;
24296 int i;
24297
24298 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24299 if (val == 0) /* XXX */
24300 return 0;
24301
24302 for (i = 0; i < 25; i++)
24303 if ((val & (mask << i)) == val)
24304 return 1;
24305
24306 return 0;
24307 }
24308
24309 /* Returns nonzero if the current function contains,
24310 or might contain a far jump. */
24311 static int
24312 thumb_far_jump_used_p (void)
24313 {
24314 rtx_insn *insn;
24315 bool far_jump = false;
24316 unsigned int func_size = 0;
24317
24318 /* If we have already decided that far jumps may be used,
24319 do not bother checking again, and always return true even if
24320 it turns out that they are not being used. Once we have made
24321 the decision that far jumps are present (and that hence the link
24322 register will be pushed onto the stack) we cannot go back on it. */
24323 if (cfun->machine->far_jump_used)
24324 return 1;
24325
24326 /* If this function is not being called from the prologue/epilogue
24327 generation code then it must be being called from the
24328 INITIAL_ELIMINATION_OFFSET macro. */
24329 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24330 {
24331 /* In this case we know that we are being asked about the elimination
24332 of the arg pointer register. If that register is not being used,
24333 then there are no arguments on the stack, and we do not have to
24334 worry that a far jump might force the prologue to push the link
24335 register, changing the stack offsets. In this case we can just
24336 return false, since the presence of far jumps in the function will
24337 not affect stack offsets.
24338
24339 If the arg pointer is live (or if it was live, but has now been
24340 eliminated and so set to dead) then we do have to test to see if
24341 the function might contain a far jump. This test can lead to some
24342 false negatives, since before reload is completed, then length of
24343 branch instructions is not known, so gcc defaults to returning their
24344 longest length, which in turn sets the far jump attribute to true.
24345
24346 A false negative will not result in bad code being generated, but it
24347 will result in a needless push and pop of the link register. We
24348 hope that this does not occur too often.
24349
24350 If we need doubleword stack alignment this could affect the other
24351 elimination offsets so we can't risk getting it wrong. */
24352 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24353 cfun->machine->arg_pointer_live = 1;
24354 else if (!cfun->machine->arg_pointer_live)
24355 return 0;
24356 }
24357
24358 /* We should not change far_jump_used during or after reload, as there is
24359 no chance to change stack frame layout. */
24360 if (reload_in_progress || reload_completed)
24361 return 0;
24362
24363 /* Check to see if the function contains a branch
24364 insn with the far jump attribute set. */
24365 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24366 {
24367 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24368 {
24369 far_jump = true;
24370 }
24371 func_size += get_attr_length (insn);
24372 }
24373
24374 /* Attribute far_jump will always be true for thumb1 before
24375 shorten_branch pass. So checking far_jump attribute before
24376 shorten_branch isn't much useful.
24377
24378 Following heuristic tries to estimate more accurately if a far jump
24379 may finally be used. The heuristic is very conservative as there is
24380 no chance to roll-back the decision of not to use far jump.
24381
24382 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24383 2-byte insn is associated with a 4 byte constant pool. Using
24384 function size 2048/3 as the threshold is conservative enough. */
24385 if (far_jump)
24386 {
24387 if ((func_size * 3) >= 2048)
24388 {
24389 /* Record the fact that we have decided that
24390 the function does use far jumps. */
24391 cfun->machine->far_jump_used = 1;
24392 return 1;
24393 }
24394 }
24395
24396 return 0;
24397 }
24398
24399 /* Return nonzero if FUNC must be entered in ARM mode. */
24400 static bool
24401 is_called_in_ARM_mode (tree func)
24402 {
24403 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24404
24405 /* Ignore the problem about functions whose address is taken. */
24406 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24407 return true;
24408
24409 #ifdef ARM_PE
24410 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24411 #else
24412 return false;
24413 #endif
24414 }
24415
24416 /* Given the stack offsets and register mask in OFFSETS, decide how
24417 many additional registers to push instead of subtracting a constant
24418 from SP. For epilogues the principle is the same except we use pop.
24419 FOR_PROLOGUE indicates which we're generating. */
24420 static int
24421 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24422 {
24423 HOST_WIDE_INT amount;
24424 unsigned long live_regs_mask = offsets->saved_regs_mask;
24425 /* Extract a mask of the ones we can give to the Thumb's push/pop
24426 instruction. */
24427 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24428 /* Then count how many other high registers will need to be pushed. */
24429 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24430 int n_free, reg_base, size;
24431
24432 if (!for_prologue && frame_pointer_needed)
24433 amount = offsets->locals_base - offsets->saved_regs;
24434 else
24435 amount = offsets->outgoing_args - offsets->saved_regs;
24436
24437 /* If the stack frame size is 512 exactly, we can save one load
24438 instruction, which should make this a win even when optimizing
24439 for speed. */
24440 if (!optimize_size && amount != 512)
24441 return 0;
24442
24443 /* Can't do this if there are high registers to push. */
24444 if (high_regs_pushed != 0)
24445 return 0;
24446
24447 /* Shouldn't do it in the prologue if no registers would normally
24448 be pushed at all. In the epilogue, also allow it if we'll have
24449 a pop insn for the PC. */
24450 if (l_mask == 0
24451 && (for_prologue
24452 || TARGET_BACKTRACE
24453 || (live_regs_mask & 1 << LR_REGNUM) == 0
24454 || TARGET_INTERWORK
24455 || crtl->args.pretend_args_size != 0))
24456 return 0;
24457
24458 /* Don't do this if thumb_expand_prologue wants to emit instructions
24459 between the push and the stack frame allocation. */
24460 if (for_prologue
24461 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24462 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24463 return 0;
24464
24465 reg_base = 0;
24466 n_free = 0;
24467 if (!for_prologue)
24468 {
24469 size = arm_size_return_regs ();
24470 reg_base = ARM_NUM_INTS (size);
24471 live_regs_mask >>= reg_base;
24472 }
24473
24474 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24475 && (for_prologue || call_used_regs[reg_base + n_free]))
24476 {
24477 live_regs_mask >>= 1;
24478 n_free++;
24479 }
24480
24481 if (n_free == 0)
24482 return 0;
24483 gcc_assert (amount / 4 * 4 == amount);
24484
24485 if (amount >= 512 && (amount - n_free * 4) < 512)
24486 return (amount - 508) / 4;
24487 if (amount <= n_free * 4)
24488 return amount / 4;
24489 return 0;
24490 }
24491
24492 /* The bits which aren't usefully expanded as rtl. */
24493 const char *
24494 thumb1_unexpanded_epilogue (void)
24495 {
24496 arm_stack_offsets *offsets;
24497 int regno;
24498 unsigned long live_regs_mask = 0;
24499 int high_regs_pushed = 0;
24500 int extra_pop;
24501 int had_to_push_lr;
24502 int size;
24503
24504 if (cfun->machine->return_used_this_function != 0)
24505 return "";
24506
24507 if (IS_NAKED (arm_current_func_type ()))
24508 return "";
24509
24510 offsets = arm_get_frame_offsets ();
24511 live_regs_mask = offsets->saved_regs_mask;
24512 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24513
24514 /* If we can deduce the registers used from the function's return value.
24515 This is more reliable that examining df_regs_ever_live_p () because that
24516 will be set if the register is ever used in the function, not just if
24517 the register is used to hold a return value. */
24518 size = arm_size_return_regs ();
24519
24520 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24521 if (extra_pop > 0)
24522 {
24523 unsigned long extra_mask = (1 << extra_pop) - 1;
24524 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24525 }
24526
24527 /* The prolog may have pushed some high registers to use as
24528 work registers. e.g. the testsuite file:
24529 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24530 compiles to produce:
24531 push {r4, r5, r6, r7, lr}
24532 mov r7, r9
24533 mov r6, r8
24534 push {r6, r7}
24535 as part of the prolog. We have to undo that pushing here. */
24536
24537 if (high_regs_pushed)
24538 {
24539 unsigned long mask = live_regs_mask & 0xff;
24540 int next_hi_reg;
24541
24542 /* The available low registers depend on the size of the value we are
24543 returning. */
24544 if (size <= 12)
24545 mask |= 1 << 3;
24546 if (size <= 8)
24547 mask |= 1 << 2;
24548
24549 if (mask == 0)
24550 /* Oh dear! We have no low registers into which we can pop
24551 high registers! */
24552 internal_error
24553 ("no low registers available for popping high registers");
24554
24555 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24556 if (live_regs_mask & (1 << next_hi_reg))
24557 break;
24558
24559 while (high_regs_pushed)
24560 {
24561 /* Find lo register(s) into which the high register(s) can
24562 be popped. */
24563 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24564 {
24565 if (mask & (1 << regno))
24566 high_regs_pushed--;
24567 if (high_regs_pushed == 0)
24568 break;
24569 }
24570
24571 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24572
24573 /* Pop the values into the low register(s). */
24574 thumb_pop (asm_out_file, mask);
24575
24576 /* Move the value(s) into the high registers. */
24577 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24578 {
24579 if (mask & (1 << regno))
24580 {
24581 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24582 regno);
24583
24584 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24585 if (live_regs_mask & (1 << next_hi_reg))
24586 break;
24587 }
24588 }
24589 }
24590 live_regs_mask &= ~0x0f00;
24591 }
24592
24593 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24594 live_regs_mask &= 0xff;
24595
24596 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24597 {
24598 /* Pop the return address into the PC. */
24599 if (had_to_push_lr)
24600 live_regs_mask |= 1 << PC_REGNUM;
24601
24602 /* Either no argument registers were pushed or a backtrace
24603 structure was created which includes an adjusted stack
24604 pointer, so just pop everything. */
24605 if (live_regs_mask)
24606 thumb_pop (asm_out_file, live_regs_mask);
24607
24608 /* We have either just popped the return address into the
24609 PC or it is was kept in LR for the entire function.
24610 Note that thumb_pop has already called thumb_exit if the
24611 PC was in the list. */
24612 if (!had_to_push_lr)
24613 thumb_exit (asm_out_file, LR_REGNUM);
24614 }
24615 else
24616 {
24617 /* Pop everything but the return address. */
24618 if (live_regs_mask)
24619 thumb_pop (asm_out_file, live_regs_mask);
24620
24621 if (had_to_push_lr)
24622 {
24623 if (size > 12)
24624 {
24625 /* We have no free low regs, so save one. */
24626 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24627 LAST_ARG_REGNUM);
24628 }
24629
24630 /* Get the return address into a temporary register. */
24631 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24632
24633 if (size > 12)
24634 {
24635 /* Move the return address to lr. */
24636 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24637 LAST_ARG_REGNUM);
24638 /* Restore the low register. */
24639 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24640 IP_REGNUM);
24641 regno = LR_REGNUM;
24642 }
24643 else
24644 regno = LAST_ARG_REGNUM;
24645 }
24646 else
24647 regno = LR_REGNUM;
24648
24649 /* Remove the argument registers that were pushed onto the stack. */
24650 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24651 SP_REGNUM, SP_REGNUM,
24652 crtl->args.pretend_args_size);
24653
24654 thumb_exit (asm_out_file, regno);
24655 }
24656
24657 return "";
24658 }
24659
24660 /* Functions to save and restore machine-specific function data. */
24661 static struct machine_function *
24662 arm_init_machine_status (void)
24663 {
24664 struct machine_function *machine;
24665 machine = ggc_cleared_alloc<machine_function> ();
24666
24667 #if ARM_FT_UNKNOWN != 0
24668 machine->func_type = ARM_FT_UNKNOWN;
24669 #endif
24670 return machine;
24671 }
24672
24673 /* Return an RTX indicating where the return address to the
24674 calling function can be found. */
24675 rtx
24676 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24677 {
24678 if (count != 0)
24679 return NULL_RTX;
24680
24681 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24682 }
24683
24684 /* Do anything needed before RTL is emitted for each function. */
24685 void
24686 arm_init_expanders (void)
24687 {
24688 /* Arrange to initialize and mark the machine per-function status. */
24689 init_machine_status = arm_init_machine_status;
24690
24691 /* This is to stop the combine pass optimizing away the alignment
24692 adjustment of va_arg. */
24693 /* ??? It is claimed that this should not be necessary. */
24694 if (cfun)
24695 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24696 }
24697
24698 /* Check that FUNC is called with a different mode. */
24699
24700 bool
24701 arm_change_mode_p (tree func)
24702 {
24703 if (TREE_CODE (func) != FUNCTION_DECL)
24704 return false;
24705
24706 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24707
24708 if (!callee_tree)
24709 callee_tree = target_option_default_node;
24710
24711 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24712 int flags = callee_opts->x_target_flags;
24713
24714 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24715 }
24716
24717 /* Like arm_compute_initial_elimination offset. Simpler because there
24718 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24719 to point at the base of the local variables after static stack
24720 space for a function has been allocated. */
24721
24722 HOST_WIDE_INT
24723 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24724 {
24725 arm_stack_offsets *offsets;
24726
24727 offsets = arm_get_frame_offsets ();
24728
24729 switch (from)
24730 {
24731 case ARG_POINTER_REGNUM:
24732 switch (to)
24733 {
24734 case STACK_POINTER_REGNUM:
24735 return offsets->outgoing_args - offsets->saved_args;
24736
24737 case FRAME_POINTER_REGNUM:
24738 return offsets->soft_frame - offsets->saved_args;
24739
24740 case ARM_HARD_FRAME_POINTER_REGNUM:
24741 return offsets->saved_regs - offsets->saved_args;
24742
24743 case THUMB_HARD_FRAME_POINTER_REGNUM:
24744 return offsets->locals_base - offsets->saved_args;
24745
24746 default:
24747 gcc_unreachable ();
24748 }
24749 break;
24750
24751 case FRAME_POINTER_REGNUM:
24752 switch (to)
24753 {
24754 case STACK_POINTER_REGNUM:
24755 return offsets->outgoing_args - offsets->soft_frame;
24756
24757 case ARM_HARD_FRAME_POINTER_REGNUM:
24758 return offsets->saved_regs - offsets->soft_frame;
24759
24760 case THUMB_HARD_FRAME_POINTER_REGNUM:
24761 return offsets->locals_base - offsets->soft_frame;
24762
24763 default:
24764 gcc_unreachable ();
24765 }
24766 break;
24767
24768 default:
24769 gcc_unreachable ();
24770 }
24771 }
24772
24773 /* Generate the function's prologue. */
24774
24775 void
24776 thumb1_expand_prologue (void)
24777 {
24778 rtx_insn *insn;
24779
24780 HOST_WIDE_INT amount;
24781 HOST_WIDE_INT size;
24782 arm_stack_offsets *offsets;
24783 unsigned long func_type;
24784 int regno;
24785 unsigned long live_regs_mask;
24786 unsigned long l_mask;
24787 unsigned high_regs_pushed = 0;
24788 bool lr_needs_saving;
24789
24790 func_type = arm_current_func_type ();
24791
24792 /* Naked functions don't have prologues. */
24793 if (IS_NAKED (func_type))
24794 {
24795 if (flag_stack_usage_info)
24796 current_function_static_stack_size = 0;
24797 return;
24798 }
24799
24800 if (IS_INTERRUPT (func_type))
24801 {
24802 error ("interrupt Service Routines cannot be coded in Thumb mode");
24803 return;
24804 }
24805
24806 if (is_called_in_ARM_mode (current_function_decl))
24807 emit_insn (gen_prologue_thumb1_interwork ());
24808
24809 offsets = arm_get_frame_offsets ();
24810 live_regs_mask = offsets->saved_regs_mask;
24811 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24812
24813 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24814 l_mask = live_regs_mask & 0x40ff;
24815 /* Then count how many other high registers will need to be pushed. */
24816 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24817
24818 if (crtl->args.pretend_args_size)
24819 {
24820 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24821
24822 if (cfun->machine->uses_anonymous_args)
24823 {
24824 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24825 unsigned long mask;
24826
24827 mask = 1ul << (LAST_ARG_REGNUM + 1);
24828 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24829
24830 insn = thumb1_emit_multi_reg_push (mask, 0);
24831 }
24832 else
24833 {
24834 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24835 stack_pointer_rtx, x));
24836 }
24837 RTX_FRAME_RELATED_P (insn) = 1;
24838 }
24839
24840 if (TARGET_BACKTRACE)
24841 {
24842 HOST_WIDE_INT offset = 0;
24843 unsigned work_register;
24844 rtx work_reg, x, arm_hfp_rtx;
24845
24846 /* We have been asked to create a stack backtrace structure.
24847 The code looks like this:
24848
24849 0 .align 2
24850 0 func:
24851 0 sub SP, #16 Reserve space for 4 registers.
24852 2 push {R7} Push low registers.
24853 4 add R7, SP, #20 Get the stack pointer before the push.
24854 6 str R7, [SP, #8] Store the stack pointer
24855 (before reserving the space).
24856 8 mov R7, PC Get hold of the start of this code + 12.
24857 10 str R7, [SP, #16] Store it.
24858 12 mov R7, FP Get hold of the current frame pointer.
24859 14 str R7, [SP, #4] Store it.
24860 16 mov R7, LR Get hold of the current return address.
24861 18 str R7, [SP, #12] Store it.
24862 20 add R7, SP, #16 Point at the start of the
24863 backtrace structure.
24864 22 mov FP, R7 Put this value into the frame pointer. */
24865
24866 work_register = thumb_find_work_register (live_regs_mask);
24867 work_reg = gen_rtx_REG (SImode, work_register);
24868 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24869
24870 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24871 stack_pointer_rtx, GEN_INT (-16)));
24872 RTX_FRAME_RELATED_P (insn) = 1;
24873
24874 if (l_mask)
24875 {
24876 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24877 RTX_FRAME_RELATED_P (insn) = 1;
24878 lr_needs_saving = false;
24879
24880 offset = bit_count (l_mask) * UNITS_PER_WORD;
24881 }
24882
24883 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24884 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24885
24886 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24887 x = gen_frame_mem (SImode, x);
24888 emit_move_insn (x, work_reg);
24889
24890 /* Make sure that the instruction fetching the PC is in the right place
24891 to calculate "start of backtrace creation code + 12". */
24892 /* ??? The stores using the common WORK_REG ought to be enough to
24893 prevent the scheduler from doing anything weird. Failing that
24894 we could always move all of the following into an UNSPEC_VOLATILE. */
24895 if (l_mask)
24896 {
24897 x = gen_rtx_REG (SImode, PC_REGNUM);
24898 emit_move_insn (work_reg, x);
24899
24900 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24901 x = gen_frame_mem (SImode, x);
24902 emit_move_insn (x, work_reg);
24903
24904 emit_move_insn (work_reg, arm_hfp_rtx);
24905
24906 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24907 x = gen_frame_mem (SImode, x);
24908 emit_move_insn (x, work_reg);
24909 }
24910 else
24911 {
24912 emit_move_insn (work_reg, arm_hfp_rtx);
24913
24914 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24915 x = gen_frame_mem (SImode, x);
24916 emit_move_insn (x, work_reg);
24917
24918 x = gen_rtx_REG (SImode, PC_REGNUM);
24919 emit_move_insn (work_reg, x);
24920
24921 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24922 x = gen_frame_mem (SImode, x);
24923 emit_move_insn (x, work_reg);
24924 }
24925
24926 x = gen_rtx_REG (SImode, LR_REGNUM);
24927 emit_move_insn (work_reg, x);
24928
24929 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24930 x = gen_frame_mem (SImode, x);
24931 emit_move_insn (x, work_reg);
24932
24933 x = GEN_INT (offset + 12);
24934 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24935
24936 emit_move_insn (arm_hfp_rtx, work_reg);
24937 }
24938 /* Optimization: If we are not pushing any low registers but we are going
24939 to push some high registers then delay our first push. This will just
24940 be a push of LR and we can combine it with the push of the first high
24941 register. */
24942 else if ((l_mask & 0xff) != 0
24943 || (high_regs_pushed == 0 && lr_needs_saving))
24944 {
24945 unsigned long mask = l_mask;
24946 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24947 insn = thumb1_emit_multi_reg_push (mask, mask);
24948 RTX_FRAME_RELATED_P (insn) = 1;
24949 lr_needs_saving = false;
24950 }
24951
24952 if (high_regs_pushed)
24953 {
24954 unsigned pushable_regs;
24955 unsigned next_hi_reg;
24956 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24957 : crtl->args.info.nregs;
24958 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24959
24960 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24961 if (live_regs_mask & (1 << next_hi_reg))
24962 break;
24963
24964 /* Here we need to mask out registers used for passing arguments
24965 even if they can be pushed. This is to avoid using them to stash the high
24966 registers. Such kind of stash may clobber the use of arguments. */
24967 pushable_regs = l_mask & (~arg_regs_mask);
24968 if (lr_needs_saving)
24969 pushable_regs &= ~(1 << LR_REGNUM);
24970
24971 if (pushable_regs == 0)
24972 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24973
24974 while (high_regs_pushed > 0)
24975 {
24976 unsigned long real_regs_mask = 0;
24977 unsigned long push_mask = 0;
24978
24979 for (regno = LR_REGNUM; regno >= 0; regno --)
24980 {
24981 if (pushable_regs & (1 << regno))
24982 {
24983 emit_move_insn (gen_rtx_REG (SImode, regno),
24984 gen_rtx_REG (SImode, next_hi_reg));
24985
24986 high_regs_pushed --;
24987 real_regs_mask |= (1 << next_hi_reg);
24988 push_mask |= (1 << regno);
24989
24990 if (high_regs_pushed)
24991 {
24992 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24993 next_hi_reg --)
24994 if (live_regs_mask & (1 << next_hi_reg))
24995 break;
24996 }
24997 else
24998 break;
24999 }
25000 }
25001
25002 /* If we had to find a work register and we have not yet
25003 saved the LR then add it to the list of regs to push. */
25004 if (lr_needs_saving)
25005 {
25006 push_mask |= 1 << LR_REGNUM;
25007 real_regs_mask |= 1 << LR_REGNUM;
25008 lr_needs_saving = false;
25009 }
25010
25011 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25012 RTX_FRAME_RELATED_P (insn) = 1;
25013 }
25014 }
25015
25016 /* Load the pic register before setting the frame pointer,
25017 so we can use r7 as a temporary work register. */
25018 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25019 arm_load_pic_register (live_regs_mask);
25020
25021 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25022 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25023 stack_pointer_rtx);
25024
25025 size = offsets->outgoing_args - offsets->saved_args;
25026 if (flag_stack_usage_info)
25027 current_function_static_stack_size = size;
25028
25029 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25030 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25031 || flag_stack_clash_protection)
25032 && size)
25033 sorry ("-fstack-check=specific for Thumb-1");
25034
25035 amount = offsets->outgoing_args - offsets->saved_regs;
25036 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25037 if (amount)
25038 {
25039 if (amount < 512)
25040 {
25041 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25042 GEN_INT (- amount)));
25043 RTX_FRAME_RELATED_P (insn) = 1;
25044 }
25045 else
25046 {
25047 rtx reg, dwarf;
25048
25049 /* The stack decrement is too big for an immediate value in a single
25050 insn. In theory we could issue multiple subtracts, but after
25051 three of them it becomes more space efficient to place the full
25052 value in the constant pool and load into a register. (Also the
25053 ARM debugger really likes to see only one stack decrement per
25054 function). So instead we look for a scratch register into which
25055 we can load the decrement, and then we subtract this from the
25056 stack pointer. Unfortunately on the thumb the only available
25057 scratch registers are the argument registers, and we cannot use
25058 these as they may hold arguments to the function. Instead we
25059 attempt to locate a call preserved register which is used by this
25060 function. If we can find one, then we know that it will have
25061 been pushed at the start of the prologue and so we can corrupt
25062 it now. */
25063 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25064 if (live_regs_mask & (1 << regno))
25065 break;
25066
25067 gcc_assert(regno <= LAST_LO_REGNUM);
25068
25069 reg = gen_rtx_REG (SImode, regno);
25070
25071 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25072
25073 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25074 stack_pointer_rtx, reg));
25075
25076 dwarf = gen_rtx_SET (stack_pointer_rtx,
25077 plus_constant (Pmode, stack_pointer_rtx,
25078 -amount));
25079 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25080 RTX_FRAME_RELATED_P (insn) = 1;
25081 }
25082 }
25083
25084 if (frame_pointer_needed)
25085 thumb_set_frame_pointer (offsets);
25086
25087 /* If we are profiling, make sure no instructions are scheduled before
25088 the call to mcount. Similarly if the user has requested no
25089 scheduling in the prolog. Similarly if we want non-call exceptions
25090 using the EABI unwinder, to prevent faulting instructions from being
25091 swapped with a stack adjustment. */
25092 if (crtl->profile || !TARGET_SCHED_PROLOG
25093 || (arm_except_unwind_info (&global_options) == UI_TARGET
25094 && cfun->can_throw_non_call_exceptions))
25095 emit_insn (gen_blockage ());
25096
25097 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25098 if (live_regs_mask & 0xff)
25099 cfun->machine->lr_save_eliminated = 0;
25100 }
25101
25102 /* Clear caller saved registers not used to pass return values and leaked
25103 condition flags before exiting a cmse_nonsecure_entry function. */
25104
25105 void
25106 cmse_nonsecure_entry_clear_before_return (void)
25107 {
25108 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25109 uint32_t padding_bits_to_clear = 0;
25110 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25111 auto_sbitmap to_clear_bitmap (maxregno + 1);
25112 tree result_type;
25113 rtx result_rtl;
25114
25115 bitmap_clear (to_clear_bitmap);
25116 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25117 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25118
25119 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25120 registers. */
25121 if (TARGET_HARD_FLOAT)
25122 {
25123 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25124
25125 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25126
25127 /* Make sure we don't clear the two scratch registers used to clear the
25128 relevant FPSCR bits in output_return_instruction. */
25129 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25130 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25131 emit_use (gen_rtx_REG (SImode, 4));
25132 bitmap_clear_bit (to_clear_bitmap, 4);
25133 }
25134
25135 /* If the user has defined registers to be caller saved, these are no longer
25136 restored by the function before returning and must thus be cleared for
25137 security purposes. */
25138 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25139 {
25140 /* We do not touch registers that can be used to pass arguments as per
25141 the AAPCS, since these should never be made callee-saved by user
25142 options. */
25143 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25144 continue;
25145 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25146 continue;
25147 if (call_used_regs[regno])
25148 bitmap_set_bit (to_clear_bitmap, regno);
25149 }
25150
25151 /* Make sure we do not clear the registers used to return the result in. */
25152 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25153 if (!VOID_TYPE_P (result_type))
25154 {
25155 uint64_t to_clear_return_mask;
25156 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25157
25158 /* No need to check that we return in registers, because we don't
25159 support returning on stack yet. */
25160 gcc_assert (REG_P (result_rtl));
25161 to_clear_return_mask
25162 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25163 padding_bits_to_clear_ptr);
25164 if (to_clear_return_mask)
25165 {
25166 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25167 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25168 {
25169 if (to_clear_return_mask & (1ULL << regno))
25170 bitmap_clear_bit (to_clear_bitmap, regno);
25171 }
25172 }
25173 }
25174
25175 if (padding_bits_to_clear != 0)
25176 {
25177 rtx reg_rtx;
25178 auto_sbitmap to_clear_arg_regs_bitmap (R0_REGNUM + NUM_ARG_REGS);
25179
25180 /* Padding bits to clear is not 0 so we know we are dealing with
25181 returning a composite type, which only uses r0. Let's make sure that
25182 r1-r3 is cleared too, we will use r1 as a scratch register. */
25183 bitmap_clear (to_clear_arg_regs_bitmap);
25184 bitmap_set_range (to_clear_arg_regs_bitmap, R0_REGNUM + 1,
25185 NUM_ARG_REGS - 1);
25186 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25187
25188 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25189
25190 /* Fill the lower half of the negated padding_bits_to_clear. */
25191 emit_move_insn (reg_rtx,
25192 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25193
25194 /* Also fill the top half of the negated padding_bits_to_clear. */
25195 if (((~padding_bits_to_clear) >> 16) > 0)
25196 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25197 GEN_INT (16),
25198 GEN_INT (16)),
25199 GEN_INT ((~padding_bits_to_clear) >> 16)));
25200
25201 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25202 gen_rtx_REG (SImode, R0_REGNUM),
25203 reg_rtx));
25204 }
25205
25206 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25207 {
25208 if (!bitmap_bit_p (to_clear_bitmap, regno))
25209 continue;
25210
25211 if (IS_VFP_REGNUM (regno))
25212 {
25213 /* If regno is an even vfp register and its successor is also to
25214 be cleared, use vmov. */
25215 if (TARGET_VFP_DOUBLE
25216 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25217 && bitmap_bit_p (to_clear_bitmap, regno + 1))
25218 {
25219 emit_move_insn (gen_rtx_REG (DFmode, regno),
25220 CONST1_RTX (DFmode));
25221 emit_use (gen_rtx_REG (DFmode, regno));
25222 regno++;
25223 }
25224 else
25225 {
25226 emit_move_insn (gen_rtx_REG (SFmode, regno),
25227 CONST1_RTX (SFmode));
25228 emit_use (gen_rtx_REG (SFmode, regno));
25229 }
25230 }
25231 else
25232 {
25233 if (TARGET_THUMB1)
25234 {
25235 if (regno == R0_REGNUM)
25236 emit_move_insn (gen_rtx_REG (SImode, regno),
25237 const0_rtx);
25238 else
25239 /* R0 has either been cleared before, see code above, or it
25240 holds a return value, either way it is not secret
25241 information. */
25242 emit_move_insn (gen_rtx_REG (SImode, regno),
25243 gen_rtx_REG (SImode, R0_REGNUM));
25244 emit_use (gen_rtx_REG (SImode, regno));
25245 }
25246 else
25247 {
25248 emit_move_insn (gen_rtx_REG (SImode, regno),
25249 gen_rtx_REG (SImode, LR_REGNUM));
25250 emit_use (gen_rtx_REG (SImode, regno));
25251 }
25252 }
25253 }
25254 }
25255
25256 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25257 POP instruction can be generated. LR should be replaced by PC. All
25258 the checks required are already done by USE_RETURN_INSN (). Hence,
25259 all we really need to check here is if single register is to be
25260 returned, or multiple register return. */
25261 void
25262 thumb2_expand_return (bool simple_return)
25263 {
25264 int i, num_regs;
25265 unsigned long saved_regs_mask;
25266 arm_stack_offsets *offsets;
25267
25268 offsets = arm_get_frame_offsets ();
25269 saved_regs_mask = offsets->saved_regs_mask;
25270
25271 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25272 if (saved_regs_mask & (1 << i))
25273 num_regs++;
25274
25275 if (!simple_return && saved_regs_mask)
25276 {
25277 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25278 functions or adapt code to handle according to ACLE. This path should
25279 not be reachable for cmse_nonsecure_entry functions though we prefer
25280 to assert it for now to ensure that future code changes do not silently
25281 change this behavior. */
25282 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25283 if (num_regs == 1)
25284 {
25285 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25286 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25287 rtx addr = gen_rtx_MEM (SImode,
25288 gen_rtx_POST_INC (SImode,
25289 stack_pointer_rtx));
25290 set_mem_alias_set (addr, get_frame_alias_set ());
25291 XVECEXP (par, 0, 0) = ret_rtx;
25292 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25293 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25294 emit_jump_insn (par);
25295 }
25296 else
25297 {
25298 saved_regs_mask &= ~ (1 << LR_REGNUM);
25299 saved_regs_mask |= (1 << PC_REGNUM);
25300 arm_emit_multi_reg_pop (saved_regs_mask);
25301 }
25302 }
25303 else
25304 {
25305 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25306 cmse_nonsecure_entry_clear_before_return ();
25307 emit_jump_insn (simple_return_rtx);
25308 }
25309 }
25310
25311 void
25312 thumb1_expand_epilogue (void)
25313 {
25314 HOST_WIDE_INT amount;
25315 arm_stack_offsets *offsets;
25316 int regno;
25317
25318 /* Naked functions don't have prologues. */
25319 if (IS_NAKED (arm_current_func_type ()))
25320 return;
25321
25322 offsets = arm_get_frame_offsets ();
25323 amount = offsets->outgoing_args - offsets->saved_regs;
25324
25325 if (frame_pointer_needed)
25326 {
25327 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25328 amount = offsets->locals_base - offsets->saved_regs;
25329 }
25330 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25331
25332 gcc_assert (amount >= 0);
25333 if (amount)
25334 {
25335 emit_insn (gen_blockage ());
25336
25337 if (amount < 512)
25338 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25339 GEN_INT (amount)));
25340 else
25341 {
25342 /* r3 is always free in the epilogue. */
25343 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25344
25345 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25346 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25347 }
25348 }
25349
25350 /* Emit a USE (stack_pointer_rtx), so that
25351 the stack adjustment will not be deleted. */
25352 emit_insn (gen_force_register_use (stack_pointer_rtx));
25353
25354 if (crtl->profile || !TARGET_SCHED_PROLOG)
25355 emit_insn (gen_blockage ());
25356
25357 /* Emit a clobber for each insn that will be restored in the epilogue,
25358 so that flow2 will get register lifetimes correct. */
25359 for (regno = 0; regno < 13; regno++)
25360 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25361 emit_clobber (gen_rtx_REG (SImode, regno));
25362
25363 if (! df_regs_ever_live_p (LR_REGNUM))
25364 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25365
25366 /* Clear all caller-saved regs that are not used to return. */
25367 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25368 cmse_nonsecure_entry_clear_before_return ();
25369 }
25370
25371 /* Epilogue code for APCS frame. */
25372 static void
25373 arm_expand_epilogue_apcs_frame (bool really_return)
25374 {
25375 unsigned long func_type;
25376 unsigned long saved_regs_mask;
25377 int num_regs = 0;
25378 int i;
25379 int floats_from_frame = 0;
25380 arm_stack_offsets *offsets;
25381
25382 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25383 func_type = arm_current_func_type ();
25384
25385 /* Get frame offsets for ARM. */
25386 offsets = arm_get_frame_offsets ();
25387 saved_regs_mask = offsets->saved_regs_mask;
25388
25389 /* Find the offset of the floating-point save area in the frame. */
25390 floats_from_frame
25391 = (offsets->saved_args
25392 + arm_compute_static_chain_stack_bytes ()
25393 - offsets->frame);
25394
25395 /* Compute how many core registers saved and how far away the floats are. */
25396 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25397 if (saved_regs_mask & (1 << i))
25398 {
25399 num_regs++;
25400 floats_from_frame += 4;
25401 }
25402
25403 if (TARGET_HARD_FLOAT)
25404 {
25405 int start_reg;
25406 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25407
25408 /* The offset is from IP_REGNUM. */
25409 int saved_size = arm_get_vfp_saved_size ();
25410 if (saved_size > 0)
25411 {
25412 rtx_insn *insn;
25413 floats_from_frame += saved_size;
25414 insn = emit_insn (gen_addsi3 (ip_rtx,
25415 hard_frame_pointer_rtx,
25416 GEN_INT (-floats_from_frame)));
25417 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25418 ip_rtx, hard_frame_pointer_rtx);
25419 }
25420
25421 /* Generate VFP register multi-pop. */
25422 start_reg = FIRST_VFP_REGNUM;
25423
25424 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25425 /* Look for a case where a reg does not need restoring. */
25426 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25427 && (!df_regs_ever_live_p (i + 1)
25428 || call_used_regs[i + 1]))
25429 {
25430 if (start_reg != i)
25431 arm_emit_vfp_multi_reg_pop (start_reg,
25432 (i - start_reg) / 2,
25433 gen_rtx_REG (SImode,
25434 IP_REGNUM));
25435 start_reg = i + 2;
25436 }
25437
25438 /* Restore the remaining regs that we have discovered (or possibly
25439 even all of them, if the conditional in the for loop never
25440 fired). */
25441 if (start_reg != i)
25442 arm_emit_vfp_multi_reg_pop (start_reg,
25443 (i - start_reg) / 2,
25444 gen_rtx_REG (SImode, IP_REGNUM));
25445 }
25446
25447 if (TARGET_IWMMXT)
25448 {
25449 /* The frame pointer is guaranteed to be non-double-word aligned, as
25450 it is set to double-word-aligned old_stack_pointer - 4. */
25451 rtx_insn *insn;
25452 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25453
25454 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25455 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25456 {
25457 rtx addr = gen_frame_mem (V2SImode,
25458 plus_constant (Pmode, hard_frame_pointer_rtx,
25459 - lrm_count * 4));
25460 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25461 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25462 gen_rtx_REG (V2SImode, i),
25463 NULL_RTX);
25464 lrm_count += 2;
25465 }
25466 }
25467
25468 /* saved_regs_mask should contain IP which contains old stack pointer
25469 at the time of activation creation. Since SP and IP are adjacent registers,
25470 we can restore the value directly into SP. */
25471 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25472 saved_regs_mask &= ~(1 << IP_REGNUM);
25473 saved_regs_mask |= (1 << SP_REGNUM);
25474
25475 /* There are two registers left in saved_regs_mask - LR and PC. We
25476 only need to restore LR (the return address), but to
25477 save time we can load it directly into PC, unless we need a
25478 special function exit sequence, or we are not really returning. */
25479 if (really_return
25480 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25481 && !crtl->calls_eh_return)
25482 /* Delete LR from the register mask, so that LR on
25483 the stack is loaded into the PC in the register mask. */
25484 saved_regs_mask &= ~(1 << LR_REGNUM);
25485 else
25486 saved_regs_mask &= ~(1 << PC_REGNUM);
25487
25488 num_regs = bit_count (saved_regs_mask);
25489 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25490 {
25491 rtx_insn *insn;
25492 emit_insn (gen_blockage ());
25493 /* Unwind the stack to just below the saved registers. */
25494 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25495 hard_frame_pointer_rtx,
25496 GEN_INT (- 4 * num_regs)));
25497
25498 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25499 stack_pointer_rtx, hard_frame_pointer_rtx);
25500 }
25501
25502 arm_emit_multi_reg_pop (saved_regs_mask);
25503
25504 if (IS_INTERRUPT (func_type))
25505 {
25506 /* Interrupt handlers will have pushed the
25507 IP onto the stack, so restore it now. */
25508 rtx_insn *insn;
25509 rtx addr = gen_rtx_MEM (SImode,
25510 gen_rtx_POST_INC (SImode,
25511 stack_pointer_rtx));
25512 set_mem_alias_set (addr, get_frame_alias_set ());
25513 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25514 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25515 gen_rtx_REG (SImode, IP_REGNUM),
25516 NULL_RTX);
25517 }
25518
25519 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25520 return;
25521
25522 if (crtl->calls_eh_return)
25523 emit_insn (gen_addsi3 (stack_pointer_rtx,
25524 stack_pointer_rtx,
25525 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25526
25527 if (IS_STACKALIGN (func_type))
25528 /* Restore the original stack pointer. Before prologue, the stack was
25529 realigned and the original stack pointer saved in r0. For details,
25530 see comment in arm_expand_prologue. */
25531 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25532
25533 emit_jump_insn (simple_return_rtx);
25534 }
25535
25536 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25537 function is not a sibcall. */
25538 void
25539 arm_expand_epilogue (bool really_return)
25540 {
25541 unsigned long func_type;
25542 unsigned long saved_regs_mask;
25543 int num_regs = 0;
25544 int i;
25545 int amount;
25546 arm_stack_offsets *offsets;
25547
25548 func_type = arm_current_func_type ();
25549
25550 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25551 let output_return_instruction take care of instruction emission if any. */
25552 if (IS_NAKED (func_type)
25553 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25554 {
25555 if (really_return)
25556 emit_jump_insn (simple_return_rtx);
25557 return;
25558 }
25559
25560 /* If we are throwing an exception, then we really must be doing a
25561 return, so we can't tail-call. */
25562 gcc_assert (!crtl->calls_eh_return || really_return);
25563
25564 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25565 {
25566 arm_expand_epilogue_apcs_frame (really_return);
25567 return;
25568 }
25569
25570 /* Get frame offsets for ARM. */
25571 offsets = arm_get_frame_offsets ();
25572 saved_regs_mask = offsets->saved_regs_mask;
25573 num_regs = bit_count (saved_regs_mask);
25574
25575 if (frame_pointer_needed)
25576 {
25577 rtx_insn *insn;
25578 /* Restore stack pointer if necessary. */
25579 if (TARGET_ARM)
25580 {
25581 /* In ARM mode, frame pointer points to first saved register.
25582 Restore stack pointer to last saved register. */
25583 amount = offsets->frame - offsets->saved_regs;
25584
25585 /* Force out any pending memory operations that reference stacked data
25586 before stack de-allocation occurs. */
25587 emit_insn (gen_blockage ());
25588 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25589 hard_frame_pointer_rtx,
25590 GEN_INT (amount)));
25591 arm_add_cfa_adjust_cfa_note (insn, amount,
25592 stack_pointer_rtx,
25593 hard_frame_pointer_rtx);
25594
25595 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25596 deleted. */
25597 emit_insn (gen_force_register_use (stack_pointer_rtx));
25598 }
25599 else
25600 {
25601 /* In Thumb-2 mode, the frame pointer points to the last saved
25602 register. */
25603 amount = offsets->locals_base - offsets->saved_regs;
25604 if (amount)
25605 {
25606 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25607 hard_frame_pointer_rtx,
25608 GEN_INT (amount)));
25609 arm_add_cfa_adjust_cfa_note (insn, amount,
25610 hard_frame_pointer_rtx,
25611 hard_frame_pointer_rtx);
25612 }
25613
25614 /* Force out any pending memory operations that reference stacked data
25615 before stack de-allocation occurs. */
25616 emit_insn (gen_blockage ());
25617 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25618 hard_frame_pointer_rtx));
25619 arm_add_cfa_adjust_cfa_note (insn, 0,
25620 stack_pointer_rtx,
25621 hard_frame_pointer_rtx);
25622 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25623 deleted. */
25624 emit_insn (gen_force_register_use (stack_pointer_rtx));
25625 }
25626 }
25627 else
25628 {
25629 /* Pop off outgoing args and local frame to adjust stack pointer to
25630 last saved register. */
25631 amount = offsets->outgoing_args - offsets->saved_regs;
25632 if (amount)
25633 {
25634 rtx_insn *tmp;
25635 /* Force out any pending memory operations that reference stacked data
25636 before stack de-allocation occurs. */
25637 emit_insn (gen_blockage ());
25638 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25639 stack_pointer_rtx,
25640 GEN_INT (amount)));
25641 arm_add_cfa_adjust_cfa_note (tmp, amount,
25642 stack_pointer_rtx, stack_pointer_rtx);
25643 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25644 not deleted. */
25645 emit_insn (gen_force_register_use (stack_pointer_rtx));
25646 }
25647 }
25648
25649 if (TARGET_HARD_FLOAT)
25650 {
25651 /* Generate VFP register multi-pop. */
25652 int end_reg = LAST_VFP_REGNUM + 1;
25653
25654 /* Scan the registers in reverse order. We need to match
25655 any groupings made in the prologue and generate matching
25656 vldm operations. The need to match groups is because,
25657 unlike pop, vldm can only do consecutive regs. */
25658 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25659 /* Look for a case where a reg does not need restoring. */
25660 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25661 && (!df_regs_ever_live_p (i + 1)
25662 || call_used_regs[i + 1]))
25663 {
25664 /* Restore the regs discovered so far (from reg+2 to
25665 end_reg). */
25666 if (end_reg > i + 2)
25667 arm_emit_vfp_multi_reg_pop (i + 2,
25668 (end_reg - (i + 2)) / 2,
25669 stack_pointer_rtx);
25670 end_reg = i;
25671 }
25672
25673 /* Restore the remaining regs that we have discovered (or possibly
25674 even all of them, if the conditional in the for loop never
25675 fired). */
25676 if (end_reg > i + 2)
25677 arm_emit_vfp_multi_reg_pop (i + 2,
25678 (end_reg - (i + 2)) / 2,
25679 stack_pointer_rtx);
25680 }
25681
25682 if (TARGET_IWMMXT)
25683 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25684 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25685 {
25686 rtx_insn *insn;
25687 rtx addr = gen_rtx_MEM (V2SImode,
25688 gen_rtx_POST_INC (SImode,
25689 stack_pointer_rtx));
25690 set_mem_alias_set (addr, get_frame_alias_set ());
25691 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25692 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25693 gen_rtx_REG (V2SImode, i),
25694 NULL_RTX);
25695 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25696 stack_pointer_rtx, stack_pointer_rtx);
25697 }
25698
25699 if (saved_regs_mask)
25700 {
25701 rtx insn;
25702 bool return_in_pc = false;
25703
25704 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25705 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25706 && !IS_CMSE_ENTRY (func_type)
25707 && !IS_STACKALIGN (func_type)
25708 && really_return
25709 && crtl->args.pretend_args_size == 0
25710 && saved_regs_mask & (1 << LR_REGNUM)
25711 && !crtl->calls_eh_return)
25712 {
25713 saved_regs_mask &= ~(1 << LR_REGNUM);
25714 saved_regs_mask |= (1 << PC_REGNUM);
25715 return_in_pc = true;
25716 }
25717
25718 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25719 {
25720 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25721 if (saved_regs_mask & (1 << i))
25722 {
25723 rtx addr = gen_rtx_MEM (SImode,
25724 gen_rtx_POST_INC (SImode,
25725 stack_pointer_rtx));
25726 set_mem_alias_set (addr, get_frame_alias_set ());
25727
25728 if (i == PC_REGNUM)
25729 {
25730 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25731 XVECEXP (insn, 0, 0) = ret_rtx;
25732 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25733 addr);
25734 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25735 insn = emit_jump_insn (insn);
25736 }
25737 else
25738 {
25739 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25740 addr));
25741 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25742 gen_rtx_REG (SImode, i),
25743 NULL_RTX);
25744 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25745 stack_pointer_rtx,
25746 stack_pointer_rtx);
25747 }
25748 }
25749 }
25750 else
25751 {
25752 if (TARGET_LDRD
25753 && current_tune->prefer_ldrd_strd
25754 && !optimize_function_for_size_p (cfun))
25755 {
25756 if (TARGET_THUMB2)
25757 thumb2_emit_ldrd_pop (saved_regs_mask);
25758 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25759 arm_emit_ldrd_pop (saved_regs_mask);
25760 else
25761 arm_emit_multi_reg_pop (saved_regs_mask);
25762 }
25763 else
25764 arm_emit_multi_reg_pop (saved_regs_mask);
25765 }
25766
25767 if (return_in_pc)
25768 return;
25769 }
25770
25771 amount
25772 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25773 if (amount)
25774 {
25775 int i, j;
25776 rtx dwarf = NULL_RTX;
25777 rtx_insn *tmp =
25778 emit_insn (gen_addsi3 (stack_pointer_rtx,
25779 stack_pointer_rtx,
25780 GEN_INT (amount)));
25781
25782 RTX_FRAME_RELATED_P (tmp) = 1;
25783
25784 if (cfun->machine->uses_anonymous_args)
25785 {
25786 /* Restore pretend args. Refer arm_expand_prologue on how to save
25787 pretend_args in stack. */
25788 int num_regs = crtl->args.pretend_args_size / 4;
25789 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25790 for (j = 0, i = 0; j < num_regs; i++)
25791 if (saved_regs_mask & (1 << i))
25792 {
25793 rtx reg = gen_rtx_REG (SImode, i);
25794 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25795 j++;
25796 }
25797 REG_NOTES (tmp) = dwarf;
25798 }
25799 arm_add_cfa_adjust_cfa_note (tmp, amount,
25800 stack_pointer_rtx, stack_pointer_rtx);
25801 }
25802
25803 /* Clear all caller-saved regs that are not used to return. */
25804 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25805 {
25806 /* CMSE_ENTRY always returns. */
25807 gcc_assert (really_return);
25808 cmse_nonsecure_entry_clear_before_return ();
25809 }
25810
25811 if (!really_return)
25812 return;
25813
25814 if (crtl->calls_eh_return)
25815 emit_insn (gen_addsi3 (stack_pointer_rtx,
25816 stack_pointer_rtx,
25817 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25818
25819 if (IS_STACKALIGN (func_type))
25820 /* Restore the original stack pointer. Before prologue, the stack was
25821 realigned and the original stack pointer saved in r0. For details,
25822 see comment in arm_expand_prologue. */
25823 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25824
25825 emit_jump_insn (simple_return_rtx);
25826 }
25827
25828 /* Implementation of insn prologue_thumb1_interwork. This is the first
25829 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25830
25831 const char *
25832 thumb1_output_interwork (void)
25833 {
25834 const char * name;
25835 FILE *f = asm_out_file;
25836
25837 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25838 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25839 == SYMBOL_REF);
25840 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25841
25842 /* Generate code sequence to switch us into Thumb mode. */
25843 /* The .code 32 directive has already been emitted by
25844 ASM_DECLARE_FUNCTION_NAME. */
25845 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25846 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25847
25848 /* Generate a label, so that the debugger will notice the
25849 change in instruction sets. This label is also used by
25850 the assembler to bypass the ARM code when this function
25851 is called from a Thumb encoded function elsewhere in the
25852 same file. Hence the definition of STUB_NAME here must
25853 agree with the definition in gas/config/tc-arm.c. */
25854
25855 #define STUB_NAME ".real_start_of"
25856
25857 fprintf (f, "\t.code\t16\n");
25858 #ifdef ARM_PE
25859 if (arm_dllexport_name_p (name))
25860 name = arm_strip_name_encoding (name);
25861 #endif
25862 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25863 fprintf (f, "\t.thumb_func\n");
25864 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25865
25866 return "";
25867 }
25868
25869 /* Handle the case of a double word load into a low register from
25870 a computed memory address. The computed address may involve a
25871 register which is overwritten by the load. */
25872 const char *
25873 thumb_load_double_from_address (rtx *operands)
25874 {
25875 rtx addr;
25876 rtx base;
25877 rtx offset;
25878 rtx arg1;
25879 rtx arg2;
25880
25881 gcc_assert (REG_P (operands[0]));
25882 gcc_assert (MEM_P (operands[1]));
25883
25884 /* Get the memory address. */
25885 addr = XEXP (operands[1], 0);
25886
25887 /* Work out how the memory address is computed. */
25888 switch (GET_CODE (addr))
25889 {
25890 case REG:
25891 operands[2] = adjust_address (operands[1], SImode, 4);
25892
25893 if (REGNO (operands[0]) == REGNO (addr))
25894 {
25895 output_asm_insn ("ldr\t%H0, %2", operands);
25896 output_asm_insn ("ldr\t%0, %1", operands);
25897 }
25898 else
25899 {
25900 output_asm_insn ("ldr\t%0, %1", operands);
25901 output_asm_insn ("ldr\t%H0, %2", operands);
25902 }
25903 break;
25904
25905 case CONST:
25906 /* Compute <address> + 4 for the high order load. */
25907 operands[2] = adjust_address (operands[1], SImode, 4);
25908
25909 output_asm_insn ("ldr\t%0, %1", operands);
25910 output_asm_insn ("ldr\t%H0, %2", operands);
25911 break;
25912
25913 case PLUS:
25914 arg1 = XEXP (addr, 0);
25915 arg2 = XEXP (addr, 1);
25916
25917 if (CONSTANT_P (arg1))
25918 base = arg2, offset = arg1;
25919 else
25920 base = arg1, offset = arg2;
25921
25922 gcc_assert (REG_P (base));
25923
25924 /* Catch the case of <address> = <reg> + <reg> */
25925 if (REG_P (offset))
25926 {
25927 int reg_offset = REGNO (offset);
25928 int reg_base = REGNO (base);
25929 int reg_dest = REGNO (operands[0]);
25930
25931 /* Add the base and offset registers together into the
25932 higher destination register. */
25933 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25934 reg_dest + 1, reg_base, reg_offset);
25935
25936 /* Load the lower destination register from the address in
25937 the higher destination register. */
25938 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25939 reg_dest, reg_dest + 1);
25940
25941 /* Load the higher destination register from its own address
25942 plus 4. */
25943 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25944 reg_dest + 1, reg_dest + 1);
25945 }
25946 else
25947 {
25948 /* Compute <address> + 4 for the high order load. */
25949 operands[2] = adjust_address (operands[1], SImode, 4);
25950
25951 /* If the computed address is held in the low order register
25952 then load the high order register first, otherwise always
25953 load the low order register first. */
25954 if (REGNO (operands[0]) == REGNO (base))
25955 {
25956 output_asm_insn ("ldr\t%H0, %2", operands);
25957 output_asm_insn ("ldr\t%0, %1", operands);
25958 }
25959 else
25960 {
25961 output_asm_insn ("ldr\t%0, %1", operands);
25962 output_asm_insn ("ldr\t%H0, %2", operands);
25963 }
25964 }
25965 break;
25966
25967 case LABEL_REF:
25968 /* With no registers to worry about we can just load the value
25969 directly. */
25970 operands[2] = adjust_address (operands[1], SImode, 4);
25971
25972 output_asm_insn ("ldr\t%H0, %2", operands);
25973 output_asm_insn ("ldr\t%0, %1", operands);
25974 break;
25975
25976 default:
25977 gcc_unreachable ();
25978 }
25979
25980 return "";
25981 }
25982
25983 const char *
25984 thumb_output_move_mem_multiple (int n, rtx *operands)
25985 {
25986 switch (n)
25987 {
25988 case 2:
25989 if (REGNO (operands[4]) > REGNO (operands[5]))
25990 std::swap (operands[4], operands[5]);
25991
25992 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25993 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25994 break;
25995
25996 case 3:
25997 if (REGNO (operands[4]) > REGNO (operands[5]))
25998 std::swap (operands[4], operands[5]);
25999 if (REGNO (operands[5]) > REGNO (operands[6]))
26000 std::swap (operands[5], operands[6]);
26001 if (REGNO (operands[4]) > REGNO (operands[5]))
26002 std::swap (operands[4], operands[5]);
26003
26004 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26005 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26006 break;
26007
26008 default:
26009 gcc_unreachable ();
26010 }
26011
26012 return "";
26013 }
26014
26015 /* Output a call-via instruction for thumb state. */
26016 const char *
26017 thumb_call_via_reg (rtx reg)
26018 {
26019 int regno = REGNO (reg);
26020 rtx *labelp;
26021
26022 gcc_assert (regno < LR_REGNUM);
26023
26024 /* If we are in the normal text section we can use a single instance
26025 per compilation unit. If we are doing function sections, then we need
26026 an entry per section, since we can't rely on reachability. */
26027 if (in_section == text_section)
26028 {
26029 thumb_call_reg_needed = 1;
26030
26031 if (thumb_call_via_label[regno] == NULL)
26032 thumb_call_via_label[regno] = gen_label_rtx ();
26033 labelp = thumb_call_via_label + regno;
26034 }
26035 else
26036 {
26037 if (cfun->machine->call_via[regno] == NULL)
26038 cfun->machine->call_via[regno] = gen_label_rtx ();
26039 labelp = cfun->machine->call_via + regno;
26040 }
26041
26042 output_asm_insn ("bl\t%a0", labelp);
26043 return "";
26044 }
26045
26046 /* Routines for generating rtl. */
26047 void
26048 thumb_expand_movmemqi (rtx *operands)
26049 {
26050 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26051 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26052 HOST_WIDE_INT len = INTVAL (operands[2]);
26053 HOST_WIDE_INT offset = 0;
26054
26055 while (len >= 12)
26056 {
26057 emit_insn (gen_movmem12b (out, in, out, in));
26058 len -= 12;
26059 }
26060
26061 if (len >= 8)
26062 {
26063 emit_insn (gen_movmem8b (out, in, out, in));
26064 len -= 8;
26065 }
26066
26067 if (len >= 4)
26068 {
26069 rtx reg = gen_reg_rtx (SImode);
26070 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26071 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26072 len -= 4;
26073 offset += 4;
26074 }
26075
26076 if (len >= 2)
26077 {
26078 rtx reg = gen_reg_rtx (HImode);
26079 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26080 plus_constant (Pmode, in,
26081 offset))));
26082 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26083 offset)),
26084 reg));
26085 len -= 2;
26086 offset += 2;
26087 }
26088
26089 if (len)
26090 {
26091 rtx reg = gen_reg_rtx (QImode);
26092 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26093 plus_constant (Pmode, in,
26094 offset))));
26095 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26096 offset)),
26097 reg));
26098 }
26099 }
26100
26101 void
26102 thumb_reload_out_hi (rtx *operands)
26103 {
26104 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26105 }
26106
26107 /* Return the length of a function name prefix
26108 that starts with the character 'c'. */
26109 static int
26110 arm_get_strip_length (int c)
26111 {
26112 switch (c)
26113 {
26114 ARM_NAME_ENCODING_LENGTHS
26115 default: return 0;
26116 }
26117 }
26118
26119 /* Return a pointer to a function's name with any
26120 and all prefix encodings stripped from it. */
26121 const char *
26122 arm_strip_name_encoding (const char *name)
26123 {
26124 int skip;
26125
26126 while ((skip = arm_get_strip_length (* name)))
26127 name += skip;
26128
26129 return name;
26130 }
26131
26132 /* If there is a '*' anywhere in the name's prefix, then
26133 emit the stripped name verbatim, otherwise prepend an
26134 underscore if leading underscores are being used. */
26135 void
26136 arm_asm_output_labelref (FILE *stream, const char *name)
26137 {
26138 int skip;
26139 int verbatim = 0;
26140
26141 while ((skip = arm_get_strip_length (* name)))
26142 {
26143 verbatim |= (*name == '*');
26144 name += skip;
26145 }
26146
26147 if (verbatim)
26148 fputs (name, stream);
26149 else
26150 asm_fprintf (stream, "%U%s", name);
26151 }
26152
26153 /* This function is used to emit an EABI tag and its associated value.
26154 We emit the numerical value of the tag in case the assembler does not
26155 support textual tags. (Eg gas prior to 2.20). If requested we include
26156 the tag name in a comment so that anyone reading the assembler output
26157 will know which tag is being set.
26158
26159 This function is not static because arm-c.c needs it too. */
26160
26161 void
26162 arm_emit_eabi_attribute (const char *name, int num, int val)
26163 {
26164 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26165 if (flag_verbose_asm || flag_debug_asm)
26166 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26167 asm_fprintf (asm_out_file, "\n");
26168 }
26169
26170 /* This function is used to print CPU tuning information as comment
26171 in assembler file. Pointers are not printed for now. */
26172
26173 void
26174 arm_print_tune_info (void)
26175 {
26176 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26177 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26178 current_tune->constant_limit);
26179 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26180 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26181 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26182 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26183 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26184 "prefetch.l1_cache_size:\t%d\n",
26185 current_tune->prefetch.l1_cache_size);
26186 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26187 "prefetch.l1_cache_line_size:\t%d\n",
26188 current_tune->prefetch.l1_cache_line_size);
26189 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26190 "prefer_constant_pool:\t%d\n",
26191 (int) current_tune->prefer_constant_pool);
26192 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26193 "branch_cost:\t(s:speed, p:predictable)\n");
26194 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26195 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26196 current_tune->branch_cost (false, false));
26197 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26198 current_tune->branch_cost (false, true));
26199 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26200 current_tune->branch_cost (true, false));
26201 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26202 current_tune->branch_cost (true, true));
26203 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26204 "prefer_ldrd_strd:\t%d\n",
26205 (int) current_tune->prefer_ldrd_strd);
26206 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26207 "logical_op_non_short_circuit:\t[%d,%d]\n",
26208 (int) current_tune->logical_op_non_short_circuit_thumb,
26209 (int) current_tune->logical_op_non_short_circuit_arm);
26210 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26211 "prefer_neon_for_64bits:\t%d\n",
26212 (int) current_tune->prefer_neon_for_64bits);
26213 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26214 "disparage_flag_setting_t16_encodings:\t%d\n",
26215 (int) current_tune->disparage_flag_setting_t16_encodings);
26216 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26217 "string_ops_prefer_neon:\t%d\n",
26218 (int) current_tune->string_ops_prefer_neon);
26219 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26220 "max_insns_inline_memset:\t%d\n",
26221 current_tune->max_insns_inline_memset);
26222 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26223 current_tune->fusible_ops);
26224 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26225 (int) current_tune->sched_autopref);
26226 }
26227
26228 /* Print .arch and .arch_extension directives corresponding to the
26229 current architecture configuration. */
26230 static void
26231 arm_print_asm_arch_directives ()
26232 {
26233 const arch_option *arch
26234 = arm_parse_arch_option_name (all_architectures, "-march",
26235 arm_active_target.arch_name);
26236 auto_sbitmap opt_bits (isa_num_bits);
26237
26238 gcc_assert (arch);
26239
26240 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26241 if (!arch->common.extensions)
26242 return;
26243
26244 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26245 opt->name != NULL;
26246 opt++)
26247 {
26248 if (!opt->remove)
26249 {
26250 arm_initialize_isa (opt_bits, opt->isa_bits);
26251
26252 /* If every feature bit of this option is set in the target
26253 ISA specification, print out the option name. However,
26254 don't print anything if all the bits are part of the
26255 FPU specification. */
26256 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26257 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26258 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26259 }
26260 }
26261 }
26262
26263 static void
26264 arm_file_start (void)
26265 {
26266 int val;
26267
26268 if (TARGET_BPABI)
26269 {
26270 /* We don't have a specified CPU. Use the architecture to
26271 generate the tags.
26272
26273 Note: it might be better to do this unconditionally, then the
26274 assembler would not need to know about all new CPU names as
26275 they are added. */
26276 if (!arm_active_target.core_name)
26277 {
26278 /* armv7ve doesn't support any extensions. */
26279 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26280 {
26281 /* Keep backward compatability for assemblers
26282 which don't support armv7ve. */
26283 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26284 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26285 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26286 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26287 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26288 }
26289 else
26290 arm_print_asm_arch_directives ();
26291 }
26292 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26293 asm_fprintf (asm_out_file, "\t.arch %s\n",
26294 arm_active_target.core_name + 8);
26295 else
26296 {
26297 const char* truncated_name
26298 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26299 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26300 }
26301
26302 if (print_tune_info)
26303 arm_print_tune_info ();
26304
26305 if (! TARGET_SOFT_FLOAT)
26306 {
26307 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26308 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26309
26310 if (TARGET_HARD_FLOAT_ABI)
26311 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26312 }
26313
26314 /* Some of these attributes only apply when the corresponding features
26315 are used. However we don't have any easy way of figuring this out.
26316 Conservatively record the setting that would have been used. */
26317
26318 if (flag_rounding_math)
26319 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26320
26321 if (!flag_unsafe_math_optimizations)
26322 {
26323 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26324 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26325 }
26326 if (flag_signaling_nans)
26327 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26328
26329 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26330 flag_finite_math_only ? 1 : 3);
26331
26332 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26333 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26334 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26335 flag_short_enums ? 1 : 2);
26336
26337 /* Tag_ABI_optimization_goals. */
26338 if (optimize_size)
26339 val = 4;
26340 else if (optimize >= 2)
26341 val = 2;
26342 else if (optimize)
26343 val = 1;
26344 else
26345 val = 6;
26346 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26347
26348 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26349 unaligned_access);
26350
26351 if (arm_fp16_format)
26352 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26353 (int) arm_fp16_format);
26354
26355 if (arm_lang_output_object_attributes_hook)
26356 arm_lang_output_object_attributes_hook();
26357 }
26358
26359 default_file_start ();
26360 }
26361
26362 static void
26363 arm_file_end (void)
26364 {
26365 int regno;
26366
26367 if (NEED_INDICATE_EXEC_STACK)
26368 /* Add .note.GNU-stack. */
26369 file_end_indicate_exec_stack ();
26370
26371 if (! thumb_call_reg_needed)
26372 return;
26373
26374 switch_to_section (text_section);
26375 asm_fprintf (asm_out_file, "\t.code 16\n");
26376 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26377
26378 for (regno = 0; regno < LR_REGNUM; regno++)
26379 {
26380 rtx label = thumb_call_via_label[regno];
26381
26382 if (label != 0)
26383 {
26384 targetm.asm_out.internal_label (asm_out_file, "L",
26385 CODE_LABEL_NUMBER (label));
26386 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26387 }
26388 }
26389 }
26390
26391 #ifndef ARM_PE
26392 /* Symbols in the text segment can be accessed without indirecting via the
26393 constant pool; it may take an extra binary operation, but this is still
26394 faster than indirecting via memory. Don't do this when not optimizing,
26395 since we won't be calculating al of the offsets necessary to do this
26396 simplification. */
26397
26398 static void
26399 arm_encode_section_info (tree decl, rtx rtl, int first)
26400 {
26401 if (optimize > 0 && TREE_CONSTANT (decl))
26402 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26403
26404 default_encode_section_info (decl, rtl, first);
26405 }
26406 #endif /* !ARM_PE */
26407
26408 static void
26409 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26410 {
26411 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26412 && !strcmp (prefix, "L"))
26413 {
26414 arm_ccfsm_state = 0;
26415 arm_target_insn = NULL;
26416 }
26417 default_internal_label (stream, prefix, labelno);
26418 }
26419
26420 /* Output code to add DELTA to the first argument, and then jump
26421 to FUNCTION. Used for C++ multiple inheritance. */
26422
26423 static void
26424 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26425 HOST_WIDE_INT, tree function)
26426 {
26427 static int thunk_label = 0;
26428 char label[256];
26429 char labelpc[256];
26430 int mi_delta = delta;
26431 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26432 int shift = 0;
26433 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26434 ? 1 : 0);
26435 if (mi_delta < 0)
26436 mi_delta = - mi_delta;
26437
26438 final_start_function (emit_barrier (), file, 1);
26439
26440 if (TARGET_THUMB1)
26441 {
26442 int labelno = thunk_label++;
26443 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26444 /* Thunks are entered in arm mode when available. */
26445 if (TARGET_THUMB1_ONLY)
26446 {
26447 /* push r3 so we can use it as a temporary. */
26448 /* TODO: Omit this save if r3 is not used. */
26449 fputs ("\tpush {r3}\n", file);
26450 fputs ("\tldr\tr3, ", file);
26451 }
26452 else
26453 {
26454 fputs ("\tldr\tr12, ", file);
26455 }
26456 assemble_name (file, label);
26457 fputc ('\n', file);
26458 if (flag_pic)
26459 {
26460 /* If we are generating PIC, the ldr instruction below loads
26461 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26462 the address of the add + 8, so we have:
26463
26464 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26465 = target + 1.
26466
26467 Note that we have "+ 1" because some versions of GNU ld
26468 don't set the low bit of the result for R_ARM_REL32
26469 relocations against thumb function symbols.
26470 On ARMv6M this is +4, not +8. */
26471 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26472 assemble_name (file, labelpc);
26473 fputs (":\n", file);
26474 if (TARGET_THUMB1_ONLY)
26475 {
26476 /* This is 2 insns after the start of the thunk, so we know it
26477 is 4-byte aligned. */
26478 fputs ("\tadd\tr3, pc, r3\n", file);
26479 fputs ("\tmov r12, r3\n", file);
26480 }
26481 else
26482 fputs ("\tadd\tr12, pc, r12\n", file);
26483 }
26484 else if (TARGET_THUMB1_ONLY)
26485 fputs ("\tmov r12, r3\n", file);
26486 }
26487 if (TARGET_THUMB1_ONLY)
26488 {
26489 if (mi_delta > 255)
26490 {
26491 fputs ("\tldr\tr3, ", file);
26492 assemble_name (file, label);
26493 fputs ("+4\n", file);
26494 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26495 mi_op, this_regno, this_regno);
26496 }
26497 else if (mi_delta != 0)
26498 {
26499 /* Thumb1 unified syntax requires s suffix in instruction name when
26500 one of the operands is immediate. */
26501 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26502 mi_op, this_regno, this_regno,
26503 mi_delta);
26504 }
26505 }
26506 else
26507 {
26508 /* TODO: Use movw/movt for large constants when available. */
26509 while (mi_delta != 0)
26510 {
26511 if ((mi_delta & (3 << shift)) == 0)
26512 shift += 2;
26513 else
26514 {
26515 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26516 mi_op, this_regno, this_regno,
26517 mi_delta & (0xff << shift));
26518 mi_delta &= ~(0xff << shift);
26519 shift += 8;
26520 }
26521 }
26522 }
26523 if (TARGET_THUMB1)
26524 {
26525 if (TARGET_THUMB1_ONLY)
26526 fputs ("\tpop\t{r3}\n", file);
26527
26528 fprintf (file, "\tbx\tr12\n");
26529 ASM_OUTPUT_ALIGN (file, 2);
26530 assemble_name (file, label);
26531 fputs (":\n", file);
26532 if (flag_pic)
26533 {
26534 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26535 rtx tem = XEXP (DECL_RTL (function), 0);
26536 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26537 pipeline offset is four rather than eight. Adjust the offset
26538 accordingly. */
26539 tem = plus_constant (GET_MODE (tem), tem,
26540 TARGET_THUMB1_ONLY ? -3 : -7);
26541 tem = gen_rtx_MINUS (GET_MODE (tem),
26542 tem,
26543 gen_rtx_SYMBOL_REF (Pmode,
26544 ggc_strdup (labelpc)));
26545 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26546 }
26547 else
26548 /* Output ".word .LTHUNKn". */
26549 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26550
26551 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26552 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26553 }
26554 else
26555 {
26556 fputs ("\tb\t", file);
26557 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26558 if (NEED_PLT_RELOC)
26559 fputs ("(PLT)", file);
26560 fputc ('\n', file);
26561 }
26562
26563 final_end_function ();
26564 }
26565
26566 /* MI thunk handling for TARGET_32BIT. */
26567
26568 static void
26569 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26570 HOST_WIDE_INT vcall_offset, tree function)
26571 {
26572 /* On ARM, this_regno is R0 or R1 depending on
26573 whether the function returns an aggregate or not.
26574 */
26575 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26576 function)
26577 ? R1_REGNUM : R0_REGNUM);
26578
26579 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26580 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26581 reload_completed = 1;
26582 emit_note (NOTE_INSN_PROLOGUE_END);
26583
26584 /* Add DELTA to THIS_RTX. */
26585 if (delta != 0)
26586 arm_split_constant (PLUS, Pmode, NULL_RTX,
26587 delta, this_rtx, this_rtx, false);
26588
26589 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26590 if (vcall_offset != 0)
26591 {
26592 /* Load *THIS_RTX. */
26593 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26594 /* Compute *THIS_RTX + VCALL_OFFSET. */
26595 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26596 false);
26597 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26598 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26599 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26600 }
26601
26602 /* Generate a tail call to the target function. */
26603 if (!TREE_USED (function))
26604 {
26605 assemble_external (function);
26606 TREE_USED (function) = 1;
26607 }
26608 rtx funexp = XEXP (DECL_RTL (function), 0);
26609 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26610 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26611 SIBLING_CALL_P (insn) = 1;
26612
26613 insn = get_insns ();
26614 shorten_branches (insn);
26615 final_start_function (insn, file, 1);
26616 final (insn, file, 1);
26617 final_end_function ();
26618
26619 /* Stop pretending this is a post-reload pass. */
26620 reload_completed = 0;
26621 }
26622
26623 /* Output code to add DELTA to the first argument, and then jump
26624 to FUNCTION. Used for C++ multiple inheritance. */
26625
26626 static void
26627 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26628 HOST_WIDE_INT vcall_offset, tree function)
26629 {
26630 if (TARGET_32BIT)
26631 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26632 else
26633 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26634 }
26635
26636 int
26637 arm_emit_vector_const (FILE *file, rtx x)
26638 {
26639 int i;
26640 const char * pattern;
26641
26642 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26643
26644 switch (GET_MODE (x))
26645 {
26646 case E_V2SImode: pattern = "%08x"; break;
26647 case E_V4HImode: pattern = "%04x"; break;
26648 case E_V8QImode: pattern = "%02x"; break;
26649 default: gcc_unreachable ();
26650 }
26651
26652 fprintf (file, "0x");
26653 for (i = CONST_VECTOR_NUNITS (x); i--;)
26654 {
26655 rtx element;
26656
26657 element = CONST_VECTOR_ELT (x, i);
26658 fprintf (file, pattern, INTVAL (element));
26659 }
26660
26661 return 1;
26662 }
26663
26664 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26665 HFmode constant pool entries are actually loaded with ldr. */
26666 void
26667 arm_emit_fp16_const (rtx c)
26668 {
26669 long bits;
26670
26671 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26672 if (WORDS_BIG_ENDIAN)
26673 assemble_zeros (2);
26674 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26675 if (!WORDS_BIG_ENDIAN)
26676 assemble_zeros (2);
26677 }
26678
26679 const char *
26680 arm_output_load_gr (rtx *operands)
26681 {
26682 rtx reg;
26683 rtx offset;
26684 rtx wcgr;
26685 rtx sum;
26686
26687 if (!MEM_P (operands [1])
26688 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26689 || !REG_P (reg = XEXP (sum, 0))
26690 || !CONST_INT_P (offset = XEXP (sum, 1))
26691 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26692 return "wldrw%?\t%0, %1";
26693
26694 /* Fix up an out-of-range load of a GR register. */
26695 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26696 wcgr = operands[0];
26697 operands[0] = reg;
26698 output_asm_insn ("ldr%?\t%0, %1", operands);
26699
26700 operands[0] = wcgr;
26701 operands[1] = reg;
26702 output_asm_insn ("tmcr%?\t%0, %1", operands);
26703 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26704
26705 return "";
26706 }
26707
26708 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26709
26710 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26711 named arg and all anonymous args onto the stack.
26712 XXX I know the prologue shouldn't be pushing registers, but it is faster
26713 that way. */
26714
26715 static void
26716 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26717 machine_mode mode,
26718 tree type,
26719 int *pretend_size,
26720 int second_time ATTRIBUTE_UNUSED)
26721 {
26722 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26723 int nregs;
26724
26725 cfun->machine->uses_anonymous_args = 1;
26726 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26727 {
26728 nregs = pcum->aapcs_ncrn;
26729 if (nregs & 1)
26730 {
26731 int res = arm_needs_doubleword_align (mode, type);
26732 if (res < 0 && warn_psabi)
26733 inform (input_location, "parameter passing for argument of "
26734 "type %qT changed in GCC 7.1", type);
26735 else if (res > 0)
26736 nregs++;
26737 }
26738 }
26739 else
26740 nregs = pcum->nregs;
26741
26742 if (nregs < NUM_ARG_REGS)
26743 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26744 }
26745
26746 /* We can't rely on the caller doing the proper promotion when
26747 using APCS or ATPCS. */
26748
26749 static bool
26750 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26751 {
26752 return !TARGET_AAPCS_BASED;
26753 }
26754
26755 static machine_mode
26756 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26757 machine_mode mode,
26758 int *punsignedp ATTRIBUTE_UNUSED,
26759 const_tree fntype ATTRIBUTE_UNUSED,
26760 int for_return ATTRIBUTE_UNUSED)
26761 {
26762 if (GET_MODE_CLASS (mode) == MODE_INT
26763 && GET_MODE_SIZE (mode) < 4)
26764 return SImode;
26765
26766 return mode;
26767 }
26768
26769
26770 static bool
26771 arm_default_short_enums (void)
26772 {
26773 return ARM_DEFAULT_SHORT_ENUMS;
26774 }
26775
26776
26777 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26778
26779 static bool
26780 arm_align_anon_bitfield (void)
26781 {
26782 return TARGET_AAPCS_BASED;
26783 }
26784
26785
26786 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26787
26788 static tree
26789 arm_cxx_guard_type (void)
26790 {
26791 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26792 }
26793
26794
26795 /* The EABI says test the least significant bit of a guard variable. */
26796
26797 static bool
26798 arm_cxx_guard_mask_bit (void)
26799 {
26800 return TARGET_AAPCS_BASED;
26801 }
26802
26803
26804 /* The EABI specifies that all array cookies are 8 bytes long. */
26805
26806 static tree
26807 arm_get_cookie_size (tree type)
26808 {
26809 tree size;
26810
26811 if (!TARGET_AAPCS_BASED)
26812 return default_cxx_get_cookie_size (type);
26813
26814 size = build_int_cst (sizetype, 8);
26815 return size;
26816 }
26817
26818
26819 /* The EABI says that array cookies should also contain the element size. */
26820
26821 static bool
26822 arm_cookie_has_size (void)
26823 {
26824 return TARGET_AAPCS_BASED;
26825 }
26826
26827
26828 /* The EABI says constructors and destructors should return a pointer to
26829 the object constructed/destroyed. */
26830
26831 static bool
26832 arm_cxx_cdtor_returns_this (void)
26833 {
26834 return TARGET_AAPCS_BASED;
26835 }
26836
26837 /* The EABI says that an inline function may never be the key
26838 method. */
26839
26840 static bool
26841 arm_cxx_key_method_may_be_inline (void)
26842 {
26843 return !TARGET_AAPCS_BASED;
26844 }
26845
26846 static void
26847 arm_cxx_determine_class_data_visibility (tree decl)
26848 {
26849 if (!TARGET_AAPCS_BASED
26850 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26851 return;
26852
26853 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26854 is exported. However, on systems without dynamic vague linkage,
26855 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26856 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26857 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26858 else
26859 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26860 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26861 }
26862
26863 static bool
26864 arm_cxx_class_data_always_comdat (void)
26865 {
26866 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26867 vague linkage if the class has no key function. */
26868 return !TARGET_AAPCS_BASED;
26869 }
26870
26871
26872 /* The EABI says __aeabi_atexit should be used to register static
26873 destructors. */
26874
26875 static bool
26876 arm_cxx_use_aeabi_atexit (void)
26877 {
26878 return TARGET_AAPCS_BASED;
26879 }
26880
26881
26882 void
26883 arm_set_return_address (rtx source, rtx scratch)
26884 {
26885 arm_stack_offsets *offsets;
26886 HOST_WIDE_INT delta;
26887 rtx addr, mem;
26888 unsigned long saved_regs;
26889
26890 offsets = arm_get_frame_offsets ();
26891 saved_regs = offsets->saved_regs_mask;
26892
26893 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26894 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26895 else
26896 {
26897 if (frame_pointer_needed)
26898 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26899 else
26900 {
26901 /* LR will be the first saved register. */
26902 delta = offsets->outgoing_args - (offsets->frame + 4);
26903
26904
26905 if (delta >= 4096)
26906 {
26907 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26908 GEN_INT (delta & ~4095)));
26909 addr = scratch;
26910 delta &= 4095;
26911 }
26912 else
26913 addr = stack_pointer_rtx;
26914
26915 addr = plus_constant (Pmode, addr, delta);
26916 }
26917
26918 /* The store needs to be marked to prevent DSE from deleting
26919 it as dead if it is based on fp. */
26920 mem = gen_frame_mem (Pmode, addr);
26921 MEM_VOLATILE_P (mem) = true;
26922 emit_move_insn (mem, source);
26923 }
26924 }
26925
26926
26927 void
26928 thumb_set_return_address (rtx source, rtx scratch)
26929 {
26930 arm_stack_offsets *offsets;
26931 HOST_WIDE_INT delta;
26932 HOST_WIDE_INT limit;
26933 int reg;
26934 rtx addr, mem;
26935 unsigned long mask;
26936
26937 emit_use (source);
26938
26939 offsets = arm_get_frame_offsets ();
26940 mask = offsets->saved_regs_mask;
26941 if (mask & (1 << LR_REGNUM))
26942 {
26943 limit = 1024;
26944 /* Find the saved regs. */
26945 if (frame_pointer_needed)
26946 {
26947 delta = offsets->soft_frame - offsets->saved_args;
26948 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26949 if (TARGET_THUMB1)
26950 limit = 128;
26951 }
26952 else
26953 {
26954 delta = offsets->outgoing_args - offsets->saved_args;
26955 reg = SP_REGNUM;
26956 }
26957 /* Allow for the stack frame. */
26958 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26959 delta -= 16;
26960 /* The link register is always the first saved register. */
26961 delta -= 4;
26962
26963 /* Construct the address. */
26964 addr = gen_rtx_REG (SImode, reg);
26965 if (delta > limit)
26966 {
26967 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26968 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26969 addr = scratch;
26970 }
26971 else
26972 addr = plus_constant (Pmode, addr, delta);
26973
26974 /* The store needs to be marked to prevent DSE from deleting
26975 it as dead if it is based on fp. */
26976 mem = gen_frame_mem (Pmode, addr);
26977 MEM_VOLATILE_P (mem) = true;
26978 emit_move_insn (mem, source);
26979 }
26980 else
26981 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26982 }
26983
26984 /* Implements target hook vector_mode_supported_p. */
26985 bool
26986 arm_vector_mode_supported_p (machine_mode mode)
26987 {
26988 /* Neon also supports V2SImode, etc. listed in the clause below. */
26989 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26990 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26991 || mode == V2DImode || mode == V8HFmode))
26992 return true;
26993
26994 if ((TARGET_NEON || TARGET_IWMMXT)
26995 && ((mode == V2SImode)
26996 || (mode == V4HImode)
26997 || (mode == V8QImode)))
26998 return true;
26999
27000 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27001 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27002 || mode == V2HAmode))
27003 return true;
27004
27005 return false;
27006 }
27007
27008 /* Implements target hook array_mode_supported_p. */
27009
27010 static bool
27011 arm_array_mode_supported_p (machine_mode mode,
27012 unsigned HOST_WIDE_INT nelems)
27013 {
27014 if (TARGET_NEON
27015 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27016 && (nelems >= 2 && nelems <= 4))
27017 return true;
27018
27019 return false;
27020 }
27021
27022 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27023 registers when autovectorizing for Neon, at least until multiple vector
27024 widths are supported properly by the middle-end. */
27025
27026 static machine_mode
27027 arm_preferred_simd_mode (scalar_mode mode)
27028 {
27029 if (TARGET_NEON)
27030 switch (mode)
27031 {
27032 case E_SFmode:
27033 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27034 case E_SImode:
27035 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27036 case E_HImode:
27037 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27038 case E_QImode:
27039 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27040 case E_DImode:
27041 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27042 return V2DImode;
27043 break;
27044
27045 default:;
27046 }
27047
27048 if (TARGET_REALLY_IWMMXT)
27049 switch (mode)
27050 {
27051 case E_SImode:
27052 return V2SImode;
27053 case E_HImode:
27054 return V4HImode;
27055 case E_QImode:
27056 return V8QImode;
27057
27058 default:;
27059 }
27060
27061 return word_mode;
27062 }
27063
27064 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27065
27066 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27067 using r0-r4 for function arguments, r7 for the stack frame and don't have
27068 enough left over to do doubleword arithmetic. For Thumb-2 all the
27069 potentially problematic instructions accept high registers so this is not
27070 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27071 that require many low registers. */
27072 static bool
27073 arm_class_likely_spilled_p (reg_class_t rclass)
27074 {
27075 if ((TARGET_THUMB1 && rclass == LO_REGS)
27076 || rclass == CC_REG)
27077 return true;
27078
27079 return false;
27080 }
27081
27082 /* Implements target hook small_register_classes_for_mode_p. */
27083 bool
27084 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27085 {
27086 return TARGET_THUMB1;
27087 }
27088
27089 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27090 ARM insns and therefore guarantee that the shift count is modulo 256.
27091 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27092 guarantee no particular behavior for out-of-range counts. */
27093
27094 static unsigned HOST_WIDE_INT
27095 arm_shift_truncation_mask (machine_mode mode)
27096 {
27097 return mode == SImode ? 255 : 0;
27098 }
27099
27100
27101 /* Map internal gcc register numbers to DWARF2 register numbers. */
27102
27103 unsigned int
27104 arm_dbx_register_number (unsigned int regno)
27105 {
27106 if (regno < 16)
27107 return regno;
27108
27109 if (IS_VFP_REGNUM (regno))
27110 {
27111 /* See comment in arm_dwarf_register_span. */
27112 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27113 return 64 + regno - FIRST_VFP_REGNUM;
27114 else
27115 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27116 }
27117
27118 if (IS_IWMMXT_GR_REGNUM (regno))
27119 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27120
27121 if (IS_IWMMXT_REGNUM (regno))
27122 return 112 + regno - FIRST_IWMMXT_REGNUM;
27123
27124 return DWARF_FRAME_REGISTERS;
27125 }
27126
27127 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27128 GCC models tham as 64 32-bit registers, so we need to describe this to
27129 the DWARF generation code. Other registers can use the default. */
27130 static rtx
27131 arm_dwarf_register_span (rtx rtl)
27132 {
27133 machine_mode mode;
27134 unsigned regno;
27135 rtx parts[16];
27136 int nregs;
27137 int i;
27138
27139 regno = REGNO (rtl);
27140 if (!IS_VFP_REGNUM (regno))
27141 return NULL_RTX;
27142
27143 /* XXX FIXME: The EABI defines two VFP register ranges:
27144 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27145 256-287: D0-D31
27146 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27147 corresponding D register. Until GDB supports this, we shall use the
27148 legacy encodings. We also use these encodings for D0-D15 for
27149 compatibility with older debuggers. */
27150 mode = GET_MODE (rtl);
27151 if (GET_MODE_SIZE (mode) < 8)
27152 return NULL_RTX;
27153
27154 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27155 {
27156 nregs = GET_MODE_SIZE (mode) / 4;
27157 for (i = 0; i < nregs; i += 2)
27158 if (TARGET_BIG_END)
27159 {
27160 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27161 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27162 }
27163 else
27164 {
27165 parts[i] = gen_rtx_REG (SImode, regno + i);
27166 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27167 }
27168 }
27169 else
27170 {
27171 nregs = GET_MODE_SIZE (mode) / 8;
27172 for (i = 0; i < nregs; i++)
27173 parts[i] = gen_rtx_REG (DImode, regno + i);
27174 }
27175
27176 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27177 }
27178
27179 #if ARM_UNWIND_INFO
27180 /* Emit unwind directives for a store-multiple instruction or stack pointer
27181 push during alignment.
27182 These should only ever be generated by the function prologue code, so
27183 expect them to have a particular form.
27184 The store-multiple instruction sometimes pushes pc as the last register,
27185 although it should not be tracked into unwind information, or for -Os
27186 sometimes pushes some dummy registers before first register that needs
27187 to be tracked in unwind information; such dummy registers are there just
27188 to avoid separate stack adjustment, and will not be restored in the
27189 epilogue. */
27190
27191 static void
27192 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27193 {
27194 int i;
27195 HOST_WIDE_INT offset;
27196 HOST_WIDE_INT nregs;
27197 int reg_size;
27198 unsigned reg;
27199 unsigned lastreg;
27200 unsigned padfirst = 0, padlast = 0;
27201 rtx e;
27202
27203 e = XVECEXP (p, 0, 0);
27204 gcc_assert (GET_CODE (e) == SET);
27205
27206 /* First insn will adjust the stack pointer. */
27207 gcc_assert (GET_CODE (e) == SET
27208 && REG_P (SET_DEST (e))
27209 && REGNO (SET_DEST (e)) == SP_REGNUM
27210 && GET_CODE (SET_SRC (e)) == PLUS);
27211
27212 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27213 nregs = XVECLEN (p, 0) - 1;
27214 gcc_assert (nregs);
27215
27216 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27217 if (reg < 16)
27218 {
27219 /* For -Os dummy registers can be pushed at the beginning to
27220 avoid separate stack pointer adjustment. */
27221 e = XVECEXP (p, 0, 1);
27222 e = XEXP (SET_DEST (e), 0);
27223 if (GET_CODE (e) == PLUS)
27224 padfirst = INTVAL (XEXP (e, 1));
27225 gcc_assert (padfirst == 0 || optimize_size);
27226 /* The function prologue may also push pc, but not annotate it as it is
27227 never restored. We turn this into a stack pointer adjustment. */
27228 e = XVECEXP (p, 0, nregs);
27229 e = XEXP (SET_DEST (e), 0);
27230 if (GET_CODE (e) == PLUS)
27231 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27232 else
27233 padlast = offset - 4;
27234 gcc_assert (padlast == 0 || padlast == 4);
27235 if (padlast == 4)
27236 fprintf (asm_out_file, "\t.pad #4\n");
27237 reg_size = 4;
27238 fprintf (asm_out_file, "\t.save {");
27239 }
27240 else if (IS_VFP_REGNUM (reg))
27241 {
27242 reg_size = 8;
27243 fprintf (asm_out_file, "\t.vsave {");
27244 }
27245 else
27246 /* Unknown register type. */
27247 gcc_unreachable ();
27248
27249 /* If the stack increment doesn't match the size of the saved registers,
27250 something has gone horribly wrong. */
27251 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27252
27253 offset = padfirst;
27254 lastreg = 0;
27255 /* The remaining insns will describe the stores. */
27256 for (i = 1; i <= nregs; i++)
27257 {
27258 /* Expect (set (mem <addr>) (reg)).
27259 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27260 e = XVECEXP (p, 0, i);
27261 gcc_assert (GET_CODE (e) == SET
27262 && MEM_P (SET_DEST (e))
27263 && REG_P (SET_SRC (e)));
27264
27265 reg = REGNO (SET_SRC (e));
27266 gcc_assert (reg >= lastreg);
27267
27268 if (i != 1)
27269 fprintf (asm_out_file, ", ");
27270 /* We can't use %r for vfp because we need to use the
27271 double precision register names. */
27272 if (IS_VFP_REGNUM (reg))
27273 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27274 else
27275 asm_fprintf (asm_out_file, "%r", reg);
27276
27277 if (flag_checking)
27278 {
27279 /* Check that the addresses are consecutive. */
27280 e = XEXP (SET_DEST (e), 0);
27281 if (GET_CODE (e) == PLUS)
27282 gcc_assert (REG_P (XEXP (e, 0))
27283 && REGNO (XEXP (e, 0)) == SP_REGNUM
27284 && CONST_INT_P (XEXP (e, 1))
27285 && offset == INTVAL (XEXP (e, 1)));
27286 else
27287 gcc_assert (i == 1
27288 && REG_P (e)
27289 && REGNO (e) == SP_REGNUM);
27290 offset += reg_size;
27291 }
27292 }
27293 fprintf (asm_out_file, "}\n");
27294 if (padfirst)
27295 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27296 }
27297
27298 /* Emit unwind directives for a SET. */
27299
27300 static void
27301 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27302 {
27303 rtx e0;
27304 rtx e1;
27305 unsigned reg;
27306
27307 e0 = XEXP (p, 0);
27308 e1 = XEXP (p, 1);
27309 switch (GET_CODE (e0))
27310 {
27311 case MEM:
27312 /* Pushing a single register. */
27313 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27314 || !REG_P (XEXP (XEXP (e0, 0), 0))
27315 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27316 abort ();
27317
27318 asm_fprintf (asm_out_file, "\t.save ");
27319 if (IS_VFP_REGNUM (REGNO (e1)))
27320 asm_fprintf(asm_out_file, "{d%d}\n",
27321 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27322 else
27323 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27324 break;
27325
27326 case REG:
27327 if (REGNO (e0) == SP_REGNUM)
27328 {
27329 /* A stack increment. */
27330 if (GET_CODE (e1) != PLUS
27331 || !REG_P (XEXP (e1, 0))
27332 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27333 || !CONST_INT_P (XEXP (e1, 1)))
27334 abort ();
27335
27336 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27337 -INTVAL (XEXP (e1, 1)));
27338 }
27339 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27340 {
27341 HOST_WIDE_INT offset;
27342
27343 if (GET_CODE (e1) == PLUS)
27344 {
27345 if (!REG_P (XEXP (e1, 0))
27346 || !CONST_INT_P (XEXP (e1, 1)))
27347 abort ();
27348 reg = REGNO (XEXP (e1, 0));
27349 offset = INTVAL (XEXP (e1, 1));
27350 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27351 HARD_FRAME_POINTER_REGNUM, reg,
27352 offset);
27353 }
27354 else if (REG_P (e1))
27355 {
27356 reg = REGNO (e1);
27357 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27358 HARD_FRAME_POINTER_REGNUM, reg);
27359 }
27360 else
27361 abort ();
27362 }
27363 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27364 {
27365 /* Move from sp to reg. */
27366 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27367 }
27368 else if (GET_CODE (e1) == PLUS
27369 && REG_P (XEXP (e1, 0))
27370 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27371 && CONST_INT_P (XEXP (e1, 1)))
27372 {
27373 /* Set reg to offset from sp. */
27374 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27375 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27376 }
27377 else
27378 abort ();
27379 break;
27380
27381 default:
27382 abort ();
27383 }
27384 }
27385
27386
27387 /* Emit unwind directives for the given insn. */
27388
27389 static void
27390 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27391 {
27392 rtx note, pat;
27393 bool handled_one = false;
27394
27395 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27396 return;
27397
27398 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27399 && (TREE_NOTHROW (current_function_decl)
27400 || crtl->all_throwers_are_sibcalls))
27401 return;
27402
27403 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27404 return;
27405
27406 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27407 {
27408 switch (REG_NOTE_KIND (note))
27409 {
27410 case REG_FRAME_RELATED_EXPR:
27411 pat = XEXP (note, 0);
27412 goto found;
27413
27414 case REG_CFA_REGISTER:
27415 pat = XEXP (note, 0);
27416 if (pat == NULL)
27417 {
27418 pat = PATTERN (insn);
27419 if (GET_CODE (pat) == PARALLEL)
27420 pat = XVECEXP (pat, 0, 0);
27421 }
27422
27423 /* Only emitted for IS_STACKALIGN re-alignment. */
27424 {
27425 rtx dest, src;
27426 unsigned reg;
27427
27428 src = SET_SRC (pat);
27429 dest = SET_DEST (pat);
27430
27431 gcc_assert (src == stack_pointer_rtx);
27432 reg = REGNO (dest);
27433 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27434 reg + 0x90, reg);
27435 }
27436 handled_one = true;
27437 break;
27438
27439 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27440 to get correct dwarf information for shrink-wrap. We should not
27441 emit unwind information for it because these are used either for
27442 pretend arguments or notes to adjust sp and restore registers from
27443 stack. */
27444 case REG_CFA_DEF_CFA:
27445 case REG_CFA_ADJUST_CFA:
27446 case REG_CFA_RESTORE:
27447 return;
27448
27449 case REG_CFA_EXPRESSION:
27450 case REG_CFA_OFFSET:
27451 /* ??? Only handling here what we actually emit. */
27452 gcc_unreachable ();
27453
27454 default:
27455 break;
27456 }
27457 }
27458 if (handled_one)
27459 return;
27460 pat = PATTERN (insn);
27461 found:
27462
27463 switch (GET_CODE (pat))
27464 {
27465 case SET:
27466 arm_unwind_emit_set (asm_out_file, pat);
27467 break;
27468
27469 case SEQUENCE:
27470 /* Store multiple. */
27471 arm_unwind_emit_sequence (asm_out_file, pat);
27472 break;
27473
27474 default:
27475 abort();
27476 }
27477 }
27478
27479
27480 /* Output a reference from a function exception table to the type_info
27481 object X. The EABI specifies that the symbol should be relocated by
27482 an R_ARM_TARGET2 relocation. */
27483
27484 static bool
27485 arm_output_ttype (rtx x)
27486 {
27487 fputs ("\t.word\t", asm_out_file);
27488 output_addr_const (asm_out_file, x);
27489 /* Use special relocations for symbol references. */
27490 if (!CONST_INT_P (x))
27491 fputs ("(TARGET2)", asm_out_file);
27492 fputc ('\n', asm_out_file);
27493
27494 return TRUE;
27495 }
27496
27497 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27498
27499 static void
27500 arm_asm_emit_except_personality (rtx personality)
27501 {
27502 fputs ("\t.personality\t", asm_out_file);
27503 output_addr_const (asm_out_file, personality);
27504 fputc ('\n', asm_out_file);
27505 }
27506 #endif /* ARM_UNWIND_INFO */
27507
27508 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27509
27510 static void
27511 arm_asm_init_sections (void)
27512 {
27513 #if ARM_UNWIND_INFO
27514 exception_section = get_unnamed_section (0, output_section_asm_op,
27515 "\t.handlerdata");
27516 #endif /* ARM_UNWIND_INFO */
27517
27518 #ifdef OBJECT_FORMAT_ELF
27519 if (target_pure_code)
27520 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27521 #endif
27522 }
27523
27524 /* Output unwind directives for the start/end of a function. */
27525
27526 void
27527 arm_output_fn_unwind (FILE * f, bool prologue)
27528 {
27529 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27530 return;
27531
27532 if (prologue)
27533 fputs ("\t.fnstart\n", f);
27534 else
27535 {
27536 /* If this function will never be unwound, then mark it as such.
27537 The came condition is used in arm_unwind_emit to suppress
27538 the frame annotations. */
27539 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27540 && (TREE_NOTHROW (current_function_decl)
27541 || crtl->all_throwers_are_sibcalls))
27542 fputs("\t.cantunwind\n", f);
27543
27544 fputs ("\t.fnend\n", f);
27545 }
27546 }
27547
27548 static bool
27549 arm_emit_tls_decoration (FILE *fp, rtx x)
27550 {
27551 enum tls_reloc reloc;
27552 rtx val;
27553
27554 val = XVECEXP (x, 0, 0);
27555 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27556
27557 output_addr_const (fp, val);
27558
27559 switch (reloc)
27560 {
27561 case TLS_GD32:
27562 fputs ("(tlsgd)", fp);
27563 break;
27564 case TLS_LDM32:
27565 fputs ("(tlsldm)", fp);
27566 break;
27567 case TLS_LDO32:
27568 fputs ("(tlsldo)", fp);
27569 break;
27570 case TLS_IE32:
27571 fputs ("(gottpoff)", fp);
27572 break;
27573 case TLS_LE32:
27574 fputs ("(tpoff)", fp);
27575 break;
27576 case TLS_DESCSEQ:
27577 fputs ("(tlsdesc)", fp);
27578 break;
27579 default:
27580 gcc_unreachable ();
27581 }
27582
27583 switch (reloc)
27584 {
27585 case TLS_GD32:
27586 case TLS_LDM32:
27587 case TLS_IE32:
27588 case TLS_DESCSEQ:
27589 fputs (" + (. - ", fp);
27590 output_addr_const (fp, XVECEXP (x, 0, 2));
27591 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27592 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27593 output_addr_const (fp, XVECEXP (x, 0, 3));
27594 fputc (')', fp);
27595 break;
27596 default:
27597 break;
27598 }
27599
27600 return TRUE;
27601 }
27602
27603 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27604
27605 static void
27606 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27607 {
27608 gcc_assert (size == 4);
27609 fputs ("\t.word\t", file);
27610 output_addr_const (file, x);
27611 fputs ("(tlsldo)", file);
27612 }
27613
27614 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27615
27616 static bool
27617 arm_output_addr_const_extra (FILE *fp, rtx x)
27618 {
27619 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27620 return arm_emit_tls_decoration (fp, x);
27621 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27622 {
27623 char label[256];
27624 int labelno = INTVAL (XVECEXP (x, 0, 0));
27625
27626 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27627 assemble_name_raw (fp, label);
27628
27629 return TRUE;
27630 }
27631 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27632 {
27633 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27634 if (GOT_PCREL)
27635 fputs ("+.", fp);
27636 fputs ("-(", fp);
27637 output_addr_const (fp, XVECEXP (x, 0, 0));
27638 fputc (')', fp);
27639 return TRUE;
27640 }
27641 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27642 {
27643 output_addr_const (fp, XVECEXP (x, 0, 0));
27644 if (GOT_PCREL)
27645 fputs ("+.", fp);
27646 fputs ("-(", fp);
27647 output_addr_const (fp, XVECEXP (x, 0, 1));
27648 fputc (')', fp);
27649 return TRUE;
27650 }
27651 else if (GET_CODE (x) == CONST_VECTOR)
27652 return arm_emit_vector_const (fp, x);
27653
27654 return FALSE;
27655 }
27656
27657 /* Output assembly for a shift instruction.
27658 SET_FLAGS determines how the instruction modifies the condition codes.
27659 0 - Do not set condition codes.
27660 1 - Set condition codes.
27661 2 - Use smallest instruction. */
27662 const char *
27663 arm_output_shift(rtx * operands, int set_flags)
27664 {
27665 char pattern[100];
27666 static const char flag_chars[3] = {'?', '.', '!'};
27667 const char *shift;
27668 HOST_WIDE_INT val;
27669 char c;
27670
27671 c = flag_chars[set_flags];
27672 shift = shift_op(operands[3], &val);
27673 if (shift)
27674 {
27675 if (val != -1)
27676 operands[2] = GEN_INT(val);
27677 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27678 }
27679 else
27680 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27681
27682 output_asm_insn (pattern, operands);
27683 return "";
27684 }
27685
27686 /* Output assembly for a WMMX immediate shift instruction. */
27687 const char *
27688 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27689 {
27690 int shift = INTVAL (operands[2]);
27691 char templ[50];
27692 machine_mode opmode = GET_MODE (operands[0]);
27693
27694 gcc_assert (shift >= 0);
27695
27696 /* If the shift value in the register versions is > 63 (for D qualifier),
27697 31 (for W qualifier) or 15 (for H qualifier). */
27698 if (((opmode == V4HImode) && (shift > 15))
27699 || ((opmode == V2SImode) && (shift > 31))
27700 || ((opmode == DImode) && (shift > 63)))
27701 {
27702 if (wror_or_wsra)
27703 {
27704 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27705 output_asm_insn (templ, operands);
27706 if (opmode == DImode)
27707 {
27708 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27709 output_asm_insn (templ, operands);
27710 }
27711 }
27712 else
27713 {
27714 /* The destination register will contain all zeros. */
27715 sprintf (templ, "wzero\t%%0");
27716 output_asm_insn (templ, operands);
27717 }
27718 return "";
27719 }
27720
27721 if ((opmode == DImode) && (shift > 32))
27722 {
27723 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27724 output_asm_insn (templ, operands);
27725 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27726 output_asm_insn (templ, operands);
27727 }
27728 else
27729 {
27730 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27731 output_asm_insn (templ, operands);
27732 }
27733 return "";
27734 }
27735
27736 /* Output assembly for a WMMX tinsr instruction. */
27737 const char *
27738 arm_output_iwmmxt_tinsr (rtx *operands)
27739 {
27740 int mask = INTVAL (operands[3]);
27741 int i;
27742 char templ[50];
27743 int units = mode_nunits[GET_MODE (operands[0])];
27744 gcc_assert ((mask & (mask - 1)) == 0);
27745 for (i = 0; i < units; ++i)
27746 {
27747 if ((mask & 0x01) == 1)
27748 {
27749 break;
27750 }
27751 mask >>= 1;
27752 }
27753 gcc_assert (i < units);
27754 {
27755 switch (GET_MODE (operands[0]))
27756 {
27757 case E_V8QImode:
27758 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27759 break;
27760 case E_V4HImode:
27761 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27762 break;
27763 case E_V2SImode:
27764 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27765 break;
27766 default:
27767 gcc_unreachable ();
27768 break;
27769 }
27770 output_asm_insn (templ, operands);
27771 }
27772 return "";
27773 }
27774
27775 /* Output a Thumb-1 casesi dispatch sequence. */
27776 const char *
27777 thumb1_output_casesi (rtx *operands)
27778 {
27779 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27780
27781 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27782
27783 switch (GET_MODE(diff_vec))
27784 {
27785 case E_QImode:
27786 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27787 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27788 case E_HImode:
27789 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27790 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27791 case E_SImode:
27792 return "bl\t%___gnu_thumb1_case_si";
27793 default:
27794 gcc_unreachable ();
27795 }
27796 }
27797
27798 /* Output a Thumb-2 casesi instruction. */
27799 const char *
27800 thumb2_output_casesi (rtx *operands)
27801 {
27802 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27803
27804 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27805
27806 output_asm_insn ("cmp\t%0, %1", operands);
27807 output_asm_insn ("bhi\t%l3", operands);
27808 switch (GET_MODE(diff_vec))
27809 {
27810 case E_QImode:
27811 return "tbb\t[%|pc, %0]";
27812 case E_HImode:
27813 return "tbh\t[%|pc, %0, lsl #1]";
27814 case E_SImode:
27815 if (flag_pic)
27816 {
27817 output_asm_insn ("adr\t%4, %l2", operands);
27818 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27819 output_asm_insn ("add\t%4, %4, %5", operands);
27820 return "bx\t%4";
27821 }
27822 else
27823 {
27824 output_asm_insn ("adr\t%4, %l2", operands);
27825 return "ldr\t%|pc, [%4, %0, lsl #2]";
27826 }
27827 default:
27828 gcc_unreachable ();
27829 }
27830 }
27831
27832 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27833 per-core tuning structs. */
27834 static int
27835 arm_issue_rate (void)
27836 {
27837 return current_tune->issue_rate;
27838 }
27839
27840 /* Return how many instructions should scheduler lookahead to choose the
27841 best one. */
27842 static int
27843 arm_first_cycle_multipass_dfa_lookahead (void)
27844 {
27845 int issue_rate = arm_issue_rate ();
27846
27847 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27848 }
27849
27850 /* Enable modeling of L2 auto-prefetcher. */
27851 static int
27852 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27853 {
27854 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27855 }
27856
27857 const char *
27858 arm_mangle_type (const_tree type)
27859 {
27860 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27861 has to be managled as if it is in the "std" namespace. */
27862 if (TARGET_AAPCS_BASED
27863 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27864 return "St9__va_list";
27865
27866 /* Half-precision float. */
27867 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27868 return "Dh";
27869
27870 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27871 builtin type. */
27872 if (TYPE_NAME (type) != NULL)
27873 return arm_mangle_builtin_type (type);
27874
27875 /* Use the default mangling. */
27876 return NULL;
27877 }
27878
27879 /* Order of allocation of core registers for Thumb: this allocation is
27880 written over the corresponding initial entries of the array
27881 initialized with REG_ALLOC_ORDER. We allocate all low registers
27882 first. Saving and restoring a low register is usually cheaper than
27883 using a call-clobbered high register. */
27884
27885 static const int thumb_core_reg_alloc_order[] =
27886 {
27887 3, 2, 1, 0, 4, 5, 6, 7,
27888 12, 14, 8, 9, 10, 11
27889 };
27890
27891 /* Adjust register allocation order when compiling for Thumb. */
27892
27893 void
27894 arm_order_regs_for_local_alloc (void)
27895 {
27896 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27897 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27898 if (TARGET_THUMB)
27899 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27900 sizeof (thumb_core_reg_alloc_order));
27901 }
27902
27903 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27904
27905 bool
27906 arm_frame_pointer_required (void)
27907 {
27908 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27909 return true;
27910
27911 /* If the function receives nonlocal gotos, it needs to save the frame
27912 pointer in the nonlocal_goto_save_area object. */
27913 if (cfun->has_nonlocal_label)
27914 return true;
27915
27916 /* The frame pointer is required for non-leaf APCS frames. */
27917 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27918 return true;
27919
27920 /* If we are probing the stack in the prologue, we will have a faulting
27921 instruction prior to the stack adjustment and this requires a frame
27922 pointer if we want to catch the exception using the EABI unwinder. */
27923 if (!IS_INTERRUPT (arm_current_func_type ())
27924 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27925 || flag_stack_clash_protection)
27926 && arm_except_unwind_info (&global_options) == UI_TARGET
27927 && cfun->can_throw_non_call_exceptions)
27928 {
27929 HOST_WIDE_INT size = get_frame_size ();
27930
27931 /* That's irrelevant if there is no stack adjustment. */
27932 if (size <= 0)
27933 return false;
27934
27935 /* That's relevant only if there is a stack probe. */
27936 if (crtl->is_leaf && !cfun->calls_alloca)
27937 {
27938 /* We don't have the final size of the frame so adjust. */
27939 size += 32 * UNITS_PER_WORD;
27940 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
27941 return true;
27942 }
27943 else
27944 return true;
27945 }
27946
27947 return false;
27948 }
27949
27950 /* Only thumb1 can't support conditional execution, so return true if
27951 the target is not thumb1. */
27952 static bool
27953 arm_have_conditional_execution (void)
27954 {
27955 return !TARGET_THUMB1;
27956 }
27957
27958 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27959 static HOST_WIDE_INT
27960 arm_vector_alignment (const_tree type)
27961 {
27962 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27963
27964 if (TARGET_AAPCS_BASED)
27965 align = MIN (align, 64);
27966
27967 return align;
27968 }
27969
27970 static unsigned int
27971 arm_autovectorize_vector_sizes (void)
27972 {
27973 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27974 }
27975
27976 static bool
27977 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27978 {
27979 /* Vectors which aren't in packed structures will not be less aligned than
27980 the natural alignment of their element type, so this is safe. */
27981 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27982 return !is_packed;
27983
27984 return default_builtin_vector_alignment_reachable (type, is_packed);
27985 }
27986
27987 static bool
27988 arm_builtin_support_vector_misalignment (machine_mode mode,
27989 const_tree type, int misalignment,
27990 bool is_packed)
27991 {
27992 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27993 {
27994 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27995
27996 if (is_packed)
27997 return align == 1;
27998
27999 /* If the misalignment is unknown, we should be able to handle the access
28000 so long as it is not to a member of a packed data structure. */
28001 if (misalignment == -1)
28002 return true;
28003
28004 /* Return true if the misalignment is a multiple of the natural alignment
28005 of the vector's element type. This is probably always going to be
28006 true in practice, since we've already established that this isn't a
28007 packed access. */
28008 return ((misalignment % align) == 0);
28009 }
28010
28011 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28012 is_packed);
28013 }
28014
28015 static void
28016 arm_conditional_register_usage (void)
28017 {
28018 int regno;
28019
28020 if (TARGET_THUMB1 && optimize_size)
28021 {
28022 /* When optimizing for size on Thumb-1, it's better not
28023 to use the HI regs, because of the overhead of
28024 stacking them. */
28025 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28026 fixed_regs[regno] = call_used_regs[regno] = 1;
28027 }
28028
28029 /* The link register can be clobbered by any branch insn,
28030 but we have no way to track that at present, so mark
28031 it as unavailable. */
28032 if (TARGET_THUMB1)
28033 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28034
28035 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28036 {
28037 /* VFPv3 registers are disabled when earlier VFP
28038 versions are selected due to the definition of
28039 LAST_VFP_REGNUM. */
28040 for (regno = FIRST_VFP_REGNUM;
28041 regno <= LAST_VFP_REGNUM; ++ regno)
28042 {
28043 fixed_regs[regno] = 0;
28044 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28045 || regno >= FIRST_VFP_REGNUM + 32;
28046 }
28047 }
28048
28049 if (TARGET_REALLY_IWMMXT)
28050 {
28051 regno = FIRST_IWMMXT_GR_REGNUM;
28052 /* The 2002/10/09 revision of the XScale ABI has wCG0
28053 and wCG1 as call-preserved registers. The 2002/11/21
28054 revision changed this so that all wCG registers are
28055 scratch registers. */
28056 for (regno = FIRST_IWMMXT_GR_REGNUM;
28057 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28058 fixed_regs[regno] = 0;
28059 /* The XScale ABI has wR0 - wR9 as scratch registers,
28060 the rest as call-preserved registers. */
28061 for (regno = FIRST_IWMMXT_REGNUM;
28062 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28063 {
28064 fixed_regs[regno] = 0;
28065 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28066 }
28067 }
28068
28069 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28070 {
28071 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28072 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28073 }
28074 else if (TARGET_APCS_STACK)
28075 {
28076 fixed_regs[10] = 1;
28077 call_used_regs[10] = 1;
28078 }
28079 /* -mcaller-super-interworking reserves r11 for calls to
28080 _interwork_r11_call_via_rN(). Making the register global
28081 is an easy way of ensuring that it remains valid for all
28082 calls. */
28083 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28084 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28085 {
28086 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28087 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28088 if (TARGET_CALLER_INTERWORKING)
28089 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28090 }
28091 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28092 }
28093
28094 static reg_class_t
28095 arm_preferred_rename_class (reg_class_t rclass)
28096 {
28097 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28098 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28099 and code size can be reduced. */
28100 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28101 return LO_REGS;
28102 else
28103 return NO_REGS;
28104 }
28105
28106 /* Compute the attribute "length" of insn "*push_multi".
28107 So this function MUST be kept in sync with that insn pattern. */
28108 int
28109 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28110 {
28111 int i, regno, hi_reg;
28112 int num_saves = XVECLEN (parallel_op, 0);
28113
28114 /* ARM mode. */
28115 if (TARGET_ARM)
28116 return 4;
28117 /* Thumb1 mode. */
28118 if (TARGET_THUMB1)
28119 return 2;
28120
28121 /* Thumb2 mode. */
28122 regno = REGNO (first_op);
28123 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28124 list is 8-bit. Normally this means all registers in the list must be
28125 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28126 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28127 with 16-bit encoding. */
28128 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28129 for (i = 1; i < num_saves && !hi_reg; i++)
28130 {
28131 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28132 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28133 }
28134
28135 if (!hi_reg)
28136 return 2;
28137 return 4;
28138 }
28139
28140 /* Compute the attribute "length" of insn. Currently, this function is used
28141 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28142 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28143 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28144 true if OPERANDS contains insn which explicit updates base register. */
28145
28146 int
28147 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28148 {
28149 /* ARM mode. */
28150 if (TARGET_ARM)
28151 return 4;
28152 /* Thumb1 mode. */
28153 if (TARGET_THUMB1)
28154 return 2;
28155
28156 rtx parallel_op = operands[0];
28157 /* Initialize to elements number of PARALLEL. */
28158 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28159 /* Initialize the value to base register. */
28160 unsigned regno = REGNO (operands[1]);
28161 /* Skip return and write back pattern.
28162 We only need register pop pattern for later analysis. */
28163 unsigned first_indx = 0;
28164 first_indx += return_pc ? 1 : 0;
28165 first_indx += write_back_p ? 1 : 0;
28166
28167 /* A pop operation can be done through LDM or POP. If the base register is SP
28168 and if it's with write back, then a LDM will be alias of POP. */
28169 bool pop_p = (regno == SP_REGNUM && write_back_p);
28170 bool ldm_p = !pop_p;
28171
28172 /* Check base register for LDM. */
28173 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28174 return 4;
28175
28176 /* Check each register in the list. */
28177 for (; indx >= first_indx; indx--)
28178 {
28179 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28180 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28181 comment in arm_attr_length_push_multi. */
28182 if (REGNO_REG_CLASS (regno) == HI_REGS
28183 && (regno != PC_REGNUM || ldm_p))
28184 return 4;
28185 }
28186
28187 return 2;
28188 }
28189
28190 /* Compute the number of instructions emitted by output_move_double. */
28191 int
28192 arm_count_output_move_double_insns (rtx *operands)
28193 {
28194 int count;
28195 rtx ops[2];
28196 /* output_move_double may modify the operands array, so call it
28197 here on a copy of the array. */
28198 ops[0] = operands[0];
28199 ops[1] = operands[1];
28200 output_move_double (ops, false, &count);
28201 return count;
28202 }
28203
28204 int
28205 vfp3_const_double_for_fract_bits (rtx operand)
28206 {
28207 REAL_VALUE_TYPE r0;
28208
28209 if (!CONST_DOUBLE_P (operand))
28210 return 0;
28211
28212 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28213 if (exact_real_inverse (DFmode, &r0)
28214 && !REAL_VALUE_NEGATIVE (r0))
28215 {
28216 if (exact_real_truncate (DFmode, &r0))
28217 {
28218 HOST_WIDE_INT value = real_to_integer (&r0);
28219 value = value & 0xffffffff;
28220 if ((value != 0) && ( (value & (value - 1)) == 0))
28221 {
28222 int ret = exact_log2 (value);
28223 gcc_assert (IN_RANGE (ret, 0, 31));
28224 return ret;
28225 }
28226 }
28227 }
28228 return 0;
28229 }
28230
28231 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28232 log2 is in [1, 32], return that log2. Otherwise return -1.
28233 This is used in the patterns for vcvt.s32.f32 floating-point to
28234 fixed-point conversions. */
28235
28236 int
28237 vfp3_const_double_for_bits (rtx x)
28238 {
28239 const REAL_VALUE_TYPE *r;
28240
28241 if (!CONST_DOUBLE_P (x))
28242 return -1;
28243
28244 r = CONST_DOUBLE_REAL_VALUE (x);
28245
28246 if (REAL_VALUE_NEGATIVE (*r)
28247 || REAL_VALUE_ISNAN (*r)
28248 || REAL_VALUE_ISINF (*r)
28249 || !real_isinteger (r, SFmode))
28250 return -1;
28251
28252 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28253
28254 /* The exact_log2 above will have returned -1 if this is
28255 not an exact log2. */
28256 if (!IN_RANGE (hwint, 1, 32))
28257 return -1;
28258
28259 return hwint;
28260 }
28261
28262 \f
28263 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28264
28265 static void
28266 arm_pre_atomic_barrier (enum memmodel model)
28267 {
28268 if (need_atomic_barrier_p (model, true))
28269 emit_insn (gen_memory_barrier ());
28270 }
28271
28272 static void
28273 arm_post_atomic_barrier (enum memmodel model)
28274 {
28275 if (need_atomic_barrier_p (model, false))
28276 emit_insn (gen_memory_barrier ());
28277 }
28278
28279 /* Emit the load-exclusive and store-exclusive instructions.
28280 Use acquire and release versions if necessary. */
28281
28282 static void
28283 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28284 {
28285 rtx (*gen) (rtx, rtx);
28286
28287 if (acq)
28288 {
28289 switch (mode)
28290 {
28291 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28292 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28293 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28294 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28295 default:
28296 gcc_unreachable ();
28297 }
28298 }
28299 else
28300 {
28301 switch (mode)
28302 {
28303 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28304 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28305 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28306 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28307 default:
28308 gcc_unreachable ();
28309 }
28310 }
28311
28312 emit_insn (gen (rval, mem));
28313 }
28314
28315 static void
28316 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28317 rtx mem, bool rel)
28318 {
28319 rtx (*gen) (rtx, rtx, rtx);
28320
28321 if (rel)
28322 {
28323 switch (mode)
28324 {
28325 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28326 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28327 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28328 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28329 default:
28330 gcc_unreachable ();
28331 }
28332 }
28333 else
28334 {
28335 switch (mode)
28336 {
28337 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28338 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28339 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28340 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28341 default:
28342 gcc_unreachable ();
28343 }
28344 }
28345
28346 emit_insn (gen (bval, rval, mem));
28347 }
28348
28349 /* Mark the previous jump instruction as unlikely. */
28350
28351 static void
28352 emit_unlikely_jump (rtx insn)
28353 {
28354 rtx_insn *jump = emit_jump_insn (insn);
28355 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28356 }
28357
28358 /* Expand a compare and swap pattern. */
28359
28360 void
28361 arm_expand_compare_and_swap (rtx operands[])
28362 {
28363 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28364 machine_mode mode;
28365 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28366
28367 bval = operands[0];
28368 rval = operands[1];
28369 mem = operands[2];
28370 oldval = operands[3];
28371 newval = operands[4];
28372 is_weak = operands[5];
28373 mod_s = operands[6];
28374 mod_f = operands[7];
28375 mode = GET_MODE (mem);
28376
28377 /* Normally the succ memory model must be stronger than fail, but in the
28378 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28379 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28380
28381 if (TARGET_HAVE_LDACQ
28382 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28383 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28384 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28385
28386 switch (mode)
28387 {
28388 case E_QImode:
28389 case E_HImode:
28390 /* For narrow modes, we're going to perform the comparison in SImode,
28391 so do the zero-extension now. */
28392 rval = gen_reg_rtx (SImode);
28393 oldval = convert_modes (SImode, mode, oldval, true);
28394 /* FALLTHRU */
28395
28396 case E_SImode:
28397 /* Force the value into a register if needed. We waited until after
28398 the zero-extension above to do this properly. */
28399 if (!arm_add_operand (oldval, SImode))
28400 oldval = force_reg (SImode, oldval);
28401 break;
28402
28403 case E_DImode:
28404 if (!cmpdi_operand (oldval, mode))
28405 oldval = force_reg (mode, oldval);
28406 break;
28407
28408 default:
28409 gcc_unreachable ();
28410 }
28411
28412 if (TARGET_THUMB1)
28413 {
28414 switch (mode)
28415 {
28416 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28417 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28418 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28419 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28420 default:
28421 gcc_unreachable ();
28422 }
28423 }
28424 else
28425 {
28426 switch (mode)
28427 {
28428 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28429 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28430 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28431 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28432 default:
28433 gcc_unreachable ();
28434 }
28435 }
28436
28437 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28438 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28439
28440 if (mode == QImode || mode == HImode)
28441 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28442
28443 /* In all cases, we arrange for success to be signaled by Z set.
28444 This arrangement allows for the boolean result to be used directly
28445 in a subsequent branch, post optimization. For Thumb-1 targets, the
28446 boolean negation of the result is also stored in bval because Thumb-1
28447 backend lacks dependency tracking for CC flag due to flag-setting not
28448 being represented at RTL level. */
28449 if (TARGET_THUMB1)
28450 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28451 else
28452 {
28453 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28454 emit_insn (gen_rtx_SET (bval, x));
28455 }
28456 }
28457
28458 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28459 another memory store between the load-exclusive and store-exclusive can
28460 reset the monitor from Exclusive to Open state. This means we must wait
28461 until after reload to split the pattern, lest we get a register spill in
28462 the middle of the atomic sequence. Success of the compare and swap is
28463 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28464 for Thumb-1 targets (ie. negation of the boolean value returned by
28465 atomic_compare_and_swapmode standard pattern in operand 0). */
28466
28467 void
28468 arm_split_compare_and_swap (rtx operands[])
28469 {
28470 rtx rval, mem, oldval, newval, neg_bval;
28471 machine_mode mode;
28472 enum memmodel mod_s, mod_f;
28473 bool is_weak;
28474 rtx_code_label *label1, *label2;
28475 rtx x, cond;
28476
28477 rval = operands[1];
28478 mem = operands[2];
28479 oldval = operands[3];
28480 newval = operands[4];
28481 is_weak = (operands[5] != const0_rtx);
28482 mod_s = memmodel_from_int (INTVAL (operands[6]));
28483 mod_f = memmodel_from_int (INTVAL (operands[7]));
28484 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28485 mode = GET_MODE (mem);
28486
28487 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28488
28489 bool use_acquire = TARGET_HAVE_LDACQ
28490 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28491 || is_mm_release (mod_s));
28492
28493 bool use_release = TARGET_HAVE_LDACQ
28494 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28495 || is_mm_acquire (mod_s));
28496
28497 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28498 a full barrier is emitted after the store-release. */
28499 if (is_armv8_sync)
28500 use_acquire = false;
28501
28502 /* Checks whether a barrier is needed and emits one accordingly. */
28503 if (!(use_acquire || use_release))
28504 arm_pre_atomic_barrier (mod_s);
28505
28506 label1 = NULL;
28507 if (!is_weak)
28508 {
28509 label1 = gen_label_rtx ();
28510 emit_label (label1);
28511 }
28512 label2 = gen_label_rtx ();
28513
28514 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28515
28516 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28517 as required to communicate with arm_expand_compare_and_swap. */
28518 if (TARGET_32BIT)
28519 {
28520 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28521 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28522 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28523 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28524 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28525 }
28526 else
28527 {
28528 emit_move_insn (neg_bval, const1_rtx);
28529 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28530 if (thumb1_cmpneg_operand (oldval, SImode))
28531 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28532 label2, cond));
28533 else
28534 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28535 }
28536
28537 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28538
28539 /* Weak or strong, we want EQ to be true for success, so that we
28540 match the flags that we got from the compare above. */
28541 if (TARGET_32BIT)
28542 {
28543 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28544 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28545 emit_insn (gen_rtx_SET (cond, x));
28546 }
28547
28548 if (!is_weak)
28549 {
28550 /* Z is set to boolean value of !neg_bval, as required to communicate
28551 with arm_expand_compare_and_swap. */
28552 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28553 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28554 }
28555
28556 if (!is_mm_relaxed (mod_f))
28557 emit_label (label2);
28558
28559 /* Checks whether a barrier is needed and emits one accordingly. */
28560 if (is_armv8_sync
28561 || !(use_acquire || use_release))
28562 arm_post_atomic_barrier (mod_s);
28563
28564 if (is_mm_relaxed (mod_f))
28565 emit_label (label2);
28566 }
28567
28568 /* Split an atomic operation pattern. Operation is given by CODE and is one
28569 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28570 operation). Operation is performed on the content at MEM and on VALUE
28571 following the memory model MODEL_RTX. The content at MEM before and after
28572 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28573 success of the operation is returned in COND. Using a scratch register or
28574 an operand register for these determines what result is returned for that
28575 pattern. */
28576
28577 void
28578 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28579 rtx value, rtx model_rtx, rtx cond)
28580 {
28581 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28582 machine_mode mode = GET_MODE (mem);
28583 machine_mode wmode = (mode == DImode ? DImode : SImode);
28584 rtx_code_label *label;
28585 bool all_low_regs, bind_old_new;
28586 rtx x;
28587
28588 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28589
28590 bool use_acquire = TARGET_HAVE_LDACQ
28591 && !(is_mm_relaxed (model) || is_mm_consume (model)
28592 || is_mm_release (model));
28593
28594 bool use_release = TARGET_HAVE_LDACQ
28595 && !(is_mm_relaxed (model) || is_mm_consume (model)
28596 || is_mm_acquire (model));
28597
28598 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28599 a full barrier is emitted after the store-release. */
28600 if (is_armv8_sync)
28601 use_acquire = false;
28602
28603 /* Checks whether a barrier is needed and emits one accordingly. */
28604 if (!(use_acquire || use_release))
28605 arm_pre_atomic_barrier (model);
28606
28607 label = gen_label_rtx ();
28608 emit_label (label);
28609
28610 if (new_out)
28611 new_out = gen_lowpart (wmode, new_out);
28612 if (old_out)
28613 old_out = gen_lowpart (wmode, old_out);
28614 else
28615 old_out = new_out;
28616 value = simplify_gen_subreg (wmode, value, mode, 0);
28617
28618 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28619
28620 /* Does the operation require destination and first operand to use the same
28621 register? This is decided by register constraints of relevant insn
28622 patterns in thumb1.md. */
28623 gcc_assert (!new_out || REG_P (new_out));
28624 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28625 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28626 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28627 bind_old_new =
28628 (TARGET_THUMB1
28629 && code != SET
28630 && code != MINUS
28631 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28632
28633 /* We want to return the old value while putting the result of the operation
28634 in the same register as the old value so copy the old value over to the
28635 destination register and use that register for the operation. */
28636 if (old_out && bind_old_new)
28637 {
28638 emit_move_insn (new_out, old_out);
28639 old_out = new_out;
28640 }
28641
28642 switch (code)
28643 {
28644 case SET:
28645 new_out = value;
28646 break;
28647
28648 case NOT:
28649 x = gen_rtx_AND (wmode, old_out, value);
28650 emit_insn (gen_rtx_SET (new_out, x));
28651 x = gen_rtx_NOT (wmode, new_out);
28652 emit_insn (gen_rtx_SET (new_out, x));
28653 break;
28654
28655 case MINUS:
28656 if (CONST_INT_P (value))
28657 {
28658 value = GEN_INT (-INTVAL (value));
28659 code = PLUS;
28660 }
28661 /* FALLTHRU */
28662
28663 case PLUS:
28664 if (mode == DImode)
28665 {
28666 /* DImode plus/minus need to clobber flags. */
28667 /* The adddi3 and subdi3 patterns are incorrectly written so that
28668 they require matching operands, even when we could easily support
28669 three operands. Thankfully, this can be fixed up post-splitting,
28670 as the individual add+adc patterns do accept three operands and
28671 post-reload cprop can make these moves go away. */
28672 emit_move_insn (new_out, old_out);
28673 if (code == PLUS)
28674 x = gen_adddi3 (new_out, new_out, value);
28675 else
28676 x = gen_subdi3 (new_out, new_out, value);
28677 emit_insn (x);
28678 break;
28679 }
28680 /* FALLTHRU */
28681
28682 default:
28683 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28684 emit_insn (gen_rtx_SET (new_out, x));
28685 break;
28686 }
28687
28688 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28689 use_release);
28690
28691 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28692 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28693
28694 /* Checks whether a barrier is needed and emits one accordingly. */
28695 if (is_armv8_sync
28696 || !(use_acquire || use_release))
28697 arm_post_atomic_barrier (model);
28698 }
28699 \f
28700 #define MAX_VECT_LEN 16
28701
28702 struct expand_vec_perm_d
28703 {
28704 rtx target, op0, op1;
28705 auto_vec_perm_indices perm;
28706 machine_mode vmode;
28707 bool one_vector_p;
28708 bool testing_p;
28709 };
28710
28711 /* Generate a variable permutation. */
28712
28713 static void
28714 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28715 {
28716 machine_mode vmode = GET_MODE (target);
28717 bool one_vector_p = rtx_equal_p (op0, op1);
28718
28719 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28720 gcc_checking_assert (GET_MODE (op0) == vmode);
28721 gcc_checking_assert (GET_MODE (op1) == vmode);
28722 gcc_checking_assert (GET_MODE (sel) == vmode);
28723 gcc_checking_assert (TARGET_NEON);
28724
28725 if (one_vector_p)
28726 {
28727 if (vmode == V8QImode)
28728 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28729 else
28730 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28731 }
28732 else
28733 {
28734 rtx pair;
28735
28736 if (vmode == V8QImode)
28737 {
28738 pair = gen_reg_rtx (V16QImode);
28739 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28740 pair = gen_lowpart (TImode, pair);
28741 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28742 }
28743 else
28744 {
28745 pair = gen_reg_rtx (OImode);
28746 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28747 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28748 }
28749 }
28750 }
28751
28752 void
28753 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28754 {
28755 machine_mode vmode = GET_MODE (target);
28756 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28757 bool one_vector_p = rtx_equal_p (op0, op1);
28758 rtx rmask[MAX_VECT_LEN], mask;
28759
28760 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28761 numbering of elements for big-endian, we must reverse the order. */
28762 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28763
28764 /* The VTBL instruction does not use a modulo index, so we must take care
28765 of that ourselves. */
28766 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28767 for (i = 0; i < nelt; ++i)
28768 rmask[i] = mask;
28769 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28770 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28771
28772 arm_expand_vec_perm_1 (target, op0, op1, sel);
28773 }
28774
28775 /* Map lane ordering between architectural lane order, and GCC lane order,
28776 taking into account ABI. See comment above output_move_neon for details. */
28777
28778 static int
28779 neon_endian_lane_map (machine_mode mode, int lane)
28780 {
28781 if (BYTES_BIG_ENDIAN)
28782 {
28783 int nelems = GET_MODE_NUNITS (mode);
28784 /* Reverse lane order. */
28785 lane = (nelems - 1 - lane);
28786 /* Reverse D register order, to match ABI. */
28787 if (GET_MODE_SIZE (mode) == 16)
28788 lane = lane ^ (nelems / 2);
28789 }
28790 return lane;
28791 }
28792
28793 /* Some permutations index into pairs of vectors, this is a helper function
28794 to map indexes into those pairs of vectors. */
28795
28796 static int
28797 neon_pair_endian_lane_map (machine_mode mode, int lane)
28798 {
28799 int nelem = GET_MODE_NUNITS (mode);
28800 if (BYTES_BIG_ENDIAN)
28801 lane =
28802 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28803 return lane;
28804 }
28805
28806 /* Generate or test for an insn that supports a constant permutation. */
28807
28808 /* Recognize patterns for the VUZP insns. */
28809
28810 static bool
28811 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28812 {
28813 unsigned int i, odd, mask, nelt = d->perm.length ();
28814 rtx out0, out1, in0, in1;
28815 rtx (*gen)(rtx, rtx, rtx, rtx);
28816 int first_elem;
28817 int swap_nelt;
28818
28819 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28820 return false;
28821
28822 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28823 big endian pattern on 64 bit vectors, so we correct for that. */
28824 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28825 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28826
28827 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28828
28829 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28830 odd = 0;
28831 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28832 odd = 1;
28833 else
28834 return false;
28835 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28836
28837 for (i = 0; i < nelt; i++)
28838 {
28839 unsigned elt =
28840 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28841 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28842 return false;
28843 }
28844
28845 /* Success! */
28846 if (d->testing_p)
28847 return true;
28848
28849 switch (d->vmode)
28850 {
28851 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28852 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28853 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28854 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28855 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28856 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28857 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28858 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28859 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28860 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28861 default:
28862 gcc_unreachable ();
28863 }
28864
28865 in0 = d->op0;
28866 in1 = d->op1;
28867 if (swap_nelt != 0)
28868 std::swap (in0, in1);
28869
28870 out0 = d->target;
28871 out1 = gen_reg_rtx (d->vmode);
28872 if (odd)
28873 std::swap (out0, out1);
28874
28875 emit_insn (gen (out0, in0, in1, out1));
28876 return true;
28877 }
28878
28879 /* Recognize patterns for the VZIP insns. */
28880
28881 static bool
28882 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28883 {
28884 unsigned int i, high, mask, nelt = d->perm.length ();
28885 rtx out0, out1, in0, in1;
28886 rtx (*gen)(rtx, rtx, rtx, rtx);
28887 int first_elem;
28888 bool is_swapped;
28889
28890 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28891 return false;
28892
28893 is_swapped = BYTES_BIG_ENDIAN;
28894
28895 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28896
28897 high = nelt / 2;
28898 if (first_elem == neon_endian_lane_map (d->vmode, high))
28899 ;
28900 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28901 high = 0;
28902 else
28903 return false;
28904 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28905
28906 for (i = 0; i < nelt / 2; i++)
28907 {
28908 unsigned elt =
28909 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28910 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28911 != elt)
28912 return false;
28913 elt =
28914 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28915 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28916 != elt)
28917 return false;
28918 }
28919
28920 /* Success! */
28921 if (d->testing_p)
28922 return true;
28923
28924 switch (d->vmode)
28925 {
28926 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28927 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28928 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28929 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28930 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28931 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28932 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28933 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28934 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28935 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28936 default:
28937 gcc_unreachable ();
28938 }
28939
28940 in0 = d->op0;
28941 in1 = d->op1;
28942 if (is_swapped)
28943 std::swap (in0, in1);
28944
28945 out0 = d->target;
28946 out1 = gen_reg_rtx (d->vmode);
28947 if (high)
28948 std::swap (out0, out1);
28949
28950 emit_insn (gen (out0, in0, in1, out1));
28951 return true;
28952 }
28953
28954 /* Recognize patterns for the VREV insns. */
28955
28956 static bool
28957 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28958 {
28959 unsigned int i, j, diff, nelt = d->perm.length ();
28960 rtx (*gen)(rtx, rtx);
28961
28962 if (!d->one_vector_p)
28963 return false;
28964
28965 diff = d->perm[0];
28966 switch (diff)
28967 {
28968 case 7:
28969 switch (d->vmode)
28970 {
28971 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28972 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28973 default:
28974 return false;
28975 }
28976 break;
28977 case 3:
28978 switch (d->vmode)
28979 {
28980 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28981 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28982 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28983 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28984 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28985 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28986 default:
28987 return false;
28988 }
28989 break;
28990 case 1:
28991 switch (d->vmode)
28992 {
28993 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28994 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28995 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28996 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
28997 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
28998 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
28999 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
29000 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
29001 default:
29002 return false;
29003 }
29004 break;
29005 default:
29006 return false;
29007 }
29008
29009 for (i = 0; i < nelt ; i += diff + 1)
29010 for (j = 0; j <= diff; j += 1)
29011 {
29012 /* This is guaranteed to be true as the value of diff
29013 is 7, 3, 1 and we should have enough elements in the
29014 queue to generate this. Getting a vector mask with a
29015 value of diff other than these values implies that
29016 something is wrong by the time we get here. */
29017 gcc_assert (i + j < nelt);
29018 if (d->perm[i + j] != i + diff - j)
29019 return false;
29020 }
29021
29022 /* Success! */
29023 if (d->testing_p)
29024 return true;
29025
29026 emit_insn (gen (d->target, d->op0));
29027 return true;
29028 }
29029
29030 /* Recognize patterns for the VTRN insns. */
29031
29032 static bool
29033 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29034 {
29035 unsigned int i, odd, mask, nelt = d->perm.length ();
29036 rtx out0, out1, in0, in1;
29037 rtx (*gen)(rtx, rtx, rtx, rtx);
29038
29039 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29040 return false;
29041
29042 /* Note that these are little-endian tests. Adjust for big-endian later. */
29043 if (d->perm[0] == 0)
29044 odd = 0;
29045 else if (d->perm[0] == 1)
29046 odd = 1;
29047 else
29048 return false;
29049 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29050
29051 for (i = 0; i < nelt; i += 2)
29052 {
29053 if (d->perm[i] != i + odd)
29054 return false;
29055 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29056 return false;
29057 }
29058
29059 /* Success! */
29060 if (d->testing_p)
29061 return true;
29062
29063 switch (d->vmode)
29064 {
29065 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29066 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29067 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29068 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29069 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29070 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29071 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29072 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29073 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29074 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29075 default:
29076 gcc_unreachable ();
29077 }
29078
29079 in0 = d->op0;
29080 in1 = d->op1;
29081 if (BYTES_BIG_ENDIAN)
29082 {
29083 std::swap (in0, in1);
29084 odd = !odd;
29085 }
29086
29087 out0 = d->target;
29088 out1 = gen_reg_rtx (d->vmode);
29089 if (odd)
29090 std::swap (out0, out1);
29091
29092 emit_insn (gen (out0, in0, in1, out1));
29093 return true;
29094 }
29095
29096 /* Recognize patterns for the VEXT insns. */
29097
29098 static bool
29099 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29100 {
29101 unsigned int i, nelt = d->perm.length ();
29102 rtx (*gen) (rtx, rtx, rtx, rtx);
29103 rtx offset;
29104
29105 unsigned int location;
29106
29107 unsigned int next = d->perm[0] + 1;
29108
29109 /* TODO: Handle GCC's numbering of elements for big-endian. */
29110 if (BYTES_BIG_ENDIAN)
29111 return false;
29112
29113 /* Check if the extracted indexes are increasing by one. */
29114 for (i = 1; i < nelt; next++, i++)
29115 {
29116 /* If we hit the most significant element of the 2nd vector in
29117 the previous iteration, no need to test further. */
29118 if (next == 2 * nelt)
29119 return false;
29120
29121 /* If we are operating on only one vector: it could be a
29122 rotation. If there are only two elements of size < 64, let
29123 arm_evpc_neon_vrev catch it. */
29124 if (d->one_vector_p && (next == nelt))
29125 {
29126 if ((nelt == 2) && (d->vmode != V2DImode))
29127 return false;
29128 else
29129 next = 0;
29130 }
29131
29132 if (d->perm[i] != next)
29133 return false;
29134 }
29135
29136 location = d->perm[0];
29137
29138 switch (d->vmode)
29139 {
29140 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29141 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29142 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29143 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29144 case E_V2SImode: gen = gen_neon_vextv2si; break;
29145 case E_V4SImode: gen = gen_neon_vextv4si; break;
29146 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29147 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29148 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29149 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29150 case E_V2DImode: gen = gen_neon_vextv2di; break;
29151 default:
29152 return false;
29153 }
29154
29155 /* Success! */
29156 if (d->testing_p)
29157 return true;
29158
29159 offset = GEN_INT (location);
29160 emit_insn (gen (d->target, d->op0, d->op1, offset));
29161 return true;
29162 }
29163
29164 /* The NEON VTBL instruction is a fully variable permuation that's even
29165 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29166 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29167 can do slightly better by expanding this as a constant where we don't
29168 have to apply a mask. */
29169
29170 static bool
29171 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29172 {
29173 rtx rperm[MAX_VECT_LEN], sel;
29174 machine_mode vmode = d->vmode;
29175 unsigned int i, nelt = d->perm.length ();
29176
29177 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29178 numbering of elements for big-endian, we must reverse the order. */
29179 if (BYTES_BIG_ENDIAN)
29180 return false;
29181
29182 if (d->testing_p)
29183 return true;
29184
29185 /* Generic code will try constant permutation twice. Once with the
29186 original mode and again with the elements lowered to QImode.
29187 So wait and don't do the selector expansion ourselves. */
29188 if (vmode != V8QImode && vmode != V16QImode)
29189 return false;
29190
29191 for (i = 0; i < nelt; ++i)
29192 rperm[i] = GEN_INT (d->perm[i]);
29193 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29194 sel = force_reg (vmode, sel);
29195
29196 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29197 return true;
29198 }
29199
29200 static bool
29201 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29202 {
29203 /* Check if the input mask matches vext before reordering the
29204 operands. */
29205 if (TARGET_NEON)
29206 if (arm_evpc_neon_vext (d))
29207 return true;
29208
29209 /* The pattern matching functions above are written to look for a small
29210 number to begin the sequence (0, 1, N/2). If we begin with an index
29211 from the second operand, we can swap the operands. */
29212 unsigned int nelt = d->perm.length ();
29213 if (d->perm[0] >= nelt)
29214 {
29215 for (unsigned int i = 0; i < nelt; ++i)
29216 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29217
29218 std::swap (d->op0, d->op1);
29219 }
29220
29221 if (TARGET_NEON)
29222 {
29223 if (arm_evpc_neon_vuzp (d))
29224 return true;
29225 if (arm_evpc_neon_vzip (d))
29226 return true;
29227 if (arm_evpc_neon_vrev (d))
29228 return true;
29229 if (arm_evpc_neon_vtrn (d))
29230 return true;
29231 return arm_evpc_neon_vtbl (d);
29232 }
29233 return false;
29234 }
29235
29236 /* Expand a vec_perm_const pattern. */
29237
29238 bool
29239 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29240 {
29241 struct expand_vec_perm_d d;
29242 int i, nelt, which;
29243
29244 d.target = target;
29245 d.op0 = op0;
29246 d.op1 = op1;
29247
29248 d.vmode = GET_MODE (target);
29249 gcc_assert (VECTOR_MODE_P (d.vmode));
29250 d.testing_p = false;
29251
29252 nelt = GET_MODE_NUNITS (d.vmode);
29253 d.perm.reserve (nelt);
29254 for (i = which = 0; i < nelt; ++i)
29255 {
29256 rtx e = XVECEXP (sel, 0, i);
29257 int ei = INTVAL (e) & (2 * nelt - 1);
29258 which |= (ei < nelt ? 1 : 2);
29259 d.perm.quick_push (ei);
29260 }
29261
29262 switch (which)
29263 {
29264 default:
29265 gcc_unreachable();
29266
29267 case 3:
29268 d.one_vector_p = false;
29269 if (!rtx_equal_p (op0, op1))
29270 break;
29271
29272 /* The elements of PERM do not suggest that only the first operand
29273 is used, but both operands are identical. Allow easier matching
29274 of the permutation by folding the permutation into the single
29275 input vector. */
29276 /* FALLTHRU */
29277 case 2:
29278 for (i = 0; i < nelt; ++i)
29279 d.perm[i] &= nelt - 1;
29280 d.op0 = op1;
29281 d.one_vector_p = true;
29282 break;
29283
29284 case 1:
29285 d.op1 = op0;
29286 d.one_vector_p = true;
29287 break;
29288 }
29289
29290 return arm_expand_vec_perm_const_1 (&d);
29291 }
29292
29293 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29294
29295 static bool
29296 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29297 {
29298 struct expand_vec_perm_d d;
29299 unsigned int i, nelt, which;
29300 bool ret;
29301
29302 d.vmode = vmode;
29303 d.testing_p = true;
29304 d.perm.safe_splice (sel);
29305
29306 /* Categorize the set of elements in the selector. */
29307 nelt = GET_MODE_NUNITS (d.vmode);
29308 for (i = which = 0; i < nelt; ++i)
29309 {
29310 unsigned int e = d.perm[i];
29311 gcc_assert (e < 2 * nelt);
29312 which |= (e < nelt ? 1 : 2);
29313 }
29314
29315 /* For all elements from second vector, fold the elements to first. */
29316 if (which == 2)
29317 for (i = 0; i < nelt; ++i)
29318 d.perm[i] -= nelt;
29319
29320 /* Check whether the mask can be applied to the vector type. */
29321 d.one_vector_p = (which != 3);
29322
29323 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29324 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29325 if (!d.one_vector_p)
29326 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29327
29328 start_sequence ();
29329 ret = arm_expand_vec_perm_const_1 (&d);
29330 end_sequence ();
29331
29332 return ret;
29333 }
29334
29335 bool
29336 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29337 {
29338 /* If we are soft float and we do not have ldrd
29339 then all auto increment forms are ok. */
29340 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29341 return true;
29342
29343 switch (code)
29344 {
29345 /* Post increment and Pre Decrement are supported for all
29346 instruction forms except for vector forms. */
29347 case ARM_POST_INC:
29348 case ARM_PRE_DEC:
29349 if (VECTOR_MODE_P (mode))
29350 {
29351 if (code != ARM_PRE_DEC)
29352 return true;
29353 else
29354 return false;
29355 }
29356
29357 return true;
29358
29359 case ARM_POST_DEC:
29360 case ARM_PRE_INC:
29361 /* Without LDRD and mode size greater than
29362 word size, there is no point in auto-incrementing
29363 because ldm and stm will not have these forms. */
29364 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29365 return false;
29366
29367 /* Vector and floating point modes do not support
29368 these auto increment forms. */
29369 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29370 return false;
29371
29372 return true;
29373
29374 default:
29375 return false;
29376
29377 }
29378
29379 return false;
29380 }
29381
29382 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29383 on ARM, since we know that shifts by negative amounts are no-ops.
29384 Additionally, the default expansion code is not available or suitable
29385 for post-reload insn splits (this can occur when the register allocator
29386 chooses not to do a shift in NEON).
29387
29388 This function is used in both initial expand and post-reload splits, and
29389 handles all kinds of 64-bit shifts.
29390
29391 Input requirements:
29392 - It is safe for the input and output to be the same register, but
29393 early-clobber rules apply for the shift amount and scratch registers.
29394 - Shift by register requires both scratch registers. In all other cases
29395 the scratch registers may be NULL.
29396 - Ashiftrt by a register also clobbers the CC register. */
29397 void
29398 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29399 rtx amount, rtx scratch1, rtx scratch2)
29400 {
29401 rtx out_high = gen_highpart (SImode, out);
29402 rtx out_low = gen_lowpart (SImode, out);
29403 rtx in_high = gen_highpart (SImode, in);
29404 rtx in_low = gen_lowpart (SImode, in);
29405
29406 /* Terminology:
29407 in = the register pair containing the input value.
29408 out = the destination register pair.
29409 up = the high- or low-part of each pair.
29410 down = the opposite part to "up".
29411 In a shift, we can consider bits to shift from "up"-stream to
29412 "down"-stream, so in a left-shift "up" is the low-part and "down"
29413 is the high-part of each register pair. */
29414
29415 rtx out_up = code == ASHIFT ? out_low : out_high;
29416 rtx out_down = code == ASHIFT ? out_high : out_low;
29417 rtx in_up = code == ASHIFT ? in_low : in_high;
29418 rtx in_down = code == ASHIFT ? in_high : in_low;
29419
29420 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29421 gcc_assert (out
29422 && (REG_P (out) || GET_CODE (out) == SUBREG)
29423 && GET_MODE (out) == DImode);
29424 gcc_assert (in
29425 && (REG_P (in) || GET_CODE (in) == SUBREG)
29426 && GET_MODE (in) == DImode);
29427 gcc_assert (amount
29428 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29429 && GET_MODE (amount) == SImode)
29430 || CONST_INT_P (amount)));
29431 gcc_assert (scratch1 == NULL
29432 || (GET_CODE (scratch1) == SCRATCH)
29433 || (GET_MODE (scratch1) == SImode
29434 && REG_P (scratch1)));
29435 gcc_assert (scratch2 == NULL
29436 || (GET_CODE (scratch2) == SCRATCH)
29437 || (GET_MODE (scratch2) == SImode
29438 && REG_P (scratch2)));
29439 gcc_assert (!REG_P (out) || !REG_P (amount)
29440 || !HARD_REGISTER_P (out)
29441 || (REGNO (out) != REGNO (amount)
29442 && REGNO (out) + 1 != REGNO (amount)));
29443
29444 /* Macros to make following code more readable. */
29445 #define SUB_32(DEST,SRC) \
29446 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29447 #define RSB_32(DEST,SRC) \
29448 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29449 #define SUB_S_32(DEST,SRC) \
29450 gen_addsi3_compare0 ((DEST), (SRC), \
29451 GEN_INT (-32))
29452 #define SET(DEST,SRC) \
29453 gen_rtx_SET ((DEST), (SRC))
29454 #define SHIFT(CODE,SRC,AMOUNT) \
29455 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29456 #define LSHIFT(CODE,SRC,AMOUNT) \
29457 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29458 SImode, (SRC), (AMOUNT))
29459 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29460 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29461 SImode, (SRC), (AMOUNT))
29462 #define ORR(A,B) \
29463 gen_rtx_IOR (SImode, (A), (B))
29464 #define BRANCH(COND,LABEL) \
29465 gen_arm_cond_branch ((LABEL), \
29466 gen_rtx_ ## COND (CCmode, cc_reg, \
29467 const0_rtx), \
29468 cc_reg)
29469
29470 /* Shifts by register and shifts by constant are handled separately. */
29471 if (CONST_INT_P (amount))
29472 {
29473 /* We have a shift-by-constant. */
29474
29475 /* First, handle out-of-range shift amounts.
29476 In both cases we try to match the result an ARM instruction in a
29477 shift-by-register would give. This helps reduce execution
29478 differences between optimization levels, but it won't stop other
29479 parts of the compiler doing different things. This is "undefined
29480 behavior, in any case. */
29481 if (INTVAL (amount) <= 0)
29482 emit_insn (gen_movdi (out, in));
29483 else if (INTVAL (amount) >= 64)
29484 {
29485 if (code == ASHIFTRT)
29486 {
29487 rtx const31_rtx = GEN_INT (31);
29488 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29489 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29490 }
29491 else
29492 emit_insn (gen_movdi (out, const0_rtx));
29493 }
29494
29495 /* Now handle valid shifts. */
29496 else if (INTVAL (amount) < 32)
29497 {
29498 /* Shifts by a constant less than 32. */
29499 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29500
29501 /* Clearing the out register in DImode first avoids lots
29502 of spilling and results in less stack usage.
29503 Later this redundant insn is completely removed.
29504 Do that only if "in" and "out" are different registers. */
29505 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29506 emit_insn (SET (out, const0_rtx));
29507 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29508 emit_insn (SET (out_down,
29509 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29510 out_down)));
29511 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29512 }
29513 else
29514 {
29515 /* Shifts by a constant greater than 31. */
29516 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29517
29518 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29519 emit_insn (SET (out, const0_rtx));
29520 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29521 if (code == ASHIFTRT)
29522 emit_insn (gen_ashrsi3 (out_up, in_up,
29523 GEN_INT (31)));
29524 else
29525 emit_insn (SET (out_up, const0_rtx));
29526 }
29527 }
29528 else
29529 {
29530 /* We have a shift-by-register. */
29531 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29532
29533 /* This alternative requires the scratch registers. */
29534 gcc_assert (scratch1 && REG_P (scratch1));
29535 gcc_assert (scratch2 && REG_P (scratch2));
29536
29537 /* We will need the values "amount-32" and "32-amount" later.
29538 Swapping them around now allows the later code to be more general. */
29539 switch (code)
29540 {
29541 case ASHIFT:
29542 emit_insn (SUB_32 (scratch1, amount));
29543 emit_insn (RSB_32 (scratch2, amount));
29544 break;
29545 case ASHIFTRT:
29546 emit_insn (RSB_32 (scratch1, amount));
29547 /* Also set CC = amount > 32. */
29548 emit_insn (SUB_S_32 (scratch2, amount));
29549 break;
29550 case LSHIFTRT:
29551 emit_insn (RSB_32 (scratch1, amount));
29552 emit_insn (SUB_32 (scratch2, amount));
29553 break;
29554 default:
29555 gcc_unreachable ();
29556 }
29557
29558 /* Emit code like this:
29559
29560 arithmetic-left:
29561 out_down = in_down << amount;
29562 out_down = (in_up << (amount - 32)) | out_down;
29563 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29564 out_up = in_up << amount;
29565
29566 arithmetic-right:
29567 out_down = in_down >> amount;
29568 out_down = (in_up << (32 - amount)) | out_down;
29569 if (amount < 32)
29570 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29571 out_up = in_up << amount;
29572
29573 logical-right:
29574 out_down = in_down >> amount;
29575 out_down = (in_up << (32 - amount)) | out_down;
29576 if (amount < 32)
29577 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29578 out_up = in_up << amount;
29579
29580 The ARM and Thumb2 variants are the same but implemented slightly
29581 differently. If this were only called during expand we could just
29582 use the Thumb2 case and let combine do the right thing, but this
29583 can also be called from post-reload splitters. */
29584
29585 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29586
29587 if (!TARGET_THUMB2)
29588 {
29589 /* Emit code for ARM mode. */
29590 emit_insn (SET (out_down,
29591 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29592 if (code == ASHIFTRT)
29593 {
29594 rtx_code_label *done_label = gen_label_rtx ();
29595 emit_jump_insn (BRANCH (LT, done_label));
29596 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29597 out_down)));
29598 emit_label (done_label);
29599 }
29600 else
29601 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29602 out_down)));
29603 }
29604 else
29605 {
29606 /* Emit code for Thumb2 mode.
29607 Thumb2 can't do shift and or in one insn. */
29608 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29609 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29610
29611 if (code == ASHIFTRT)
29612 {
29613 rtx_code_label *done_label = gen_label_rtx ();
29614 emit_jump_insn (BRANCH (LT, done_label));
29615 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29616 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29617 emit_label (done_label);
29618 }
29619 else
29620 {
29621 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29622 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29623 }
29624 }
29625
29626 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29627 }
29628
29629 #undef SUB_32
29630 #undef RSB_32
29631 #undef SUB_S_32
29632 #undef SET
29633 #undef SHIFT
29634 #undef LSHIFT
29635 #undef REV_LSHIFT
29636 #undef ORR
29637 #undef BRANCH
29638 }
29639
29640 /* Returns true if the pattern is a valid symbolic address, which is either a
29641 symbol_ref or (symbol_ref + addend).
29642
29643 According to the ARM ELF ABI, the initial addend of REL-type relocations
29644 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29645 literal field of the instruction as a 16-bit signed value in the range
29646 -32768 <= A < 32768. */
29647
29648 bool
29649 arm_valid_symbolic_address_p (rtx addr)
29650 {
29651 rtx xop0, xop1 = NULL_RTX;
29652 rtx tmp = addr;
29653
29654 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29655 return true;
29656
29657 /* (const (plus: symbol_ref const_int)) */
29658 if (GET_CODE (addr) == CONST)
29659 tmp = XEXP (addr, 0);
29660
29661 if (GET_CODE (tmp) == PLUS)
29662 {
29663 xop0 = XEXP (tmp, 0);
29664 xop1 = XEXP (tmp, 1);
29665
29666 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29667 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29668 }
29669
29670 return false;
29671 }
29672
29673 /* Returns true if a valid comparison operation and makes
29674 the operands in a form that is valid. */
29675 bool
29676 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29677 {
29678 enum rtx_code code = GET_CODE (*comparison);
29679 int code_int;
29680 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29681 ? GET_MODE (*op2) : GET_MODE (*op1);
29682
29683 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29684
29685 if (code == UNEQ || code == LTGT)
29686 return false;
29687
29688 code_int = (int)code;
29689 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29690 PUT_CODE (*comparison, (enum rtx_code)code_int);
29691
29692 switch (mode)
29693 {
29694 case E_SImode:
29695 if (!arm_add_operand (*op1, mode))
29696 *op1 = force_reg (mode, *op1);
29697 if (!arm_add_operand (*op2, mode))
29698 *op2 = force_reg (mode, *op2);
29699 return true;
29700
29701 case E_DImode:
29702 if (!cmpdi_operand (*op1, mode))
29703 *op1 = force_reg (mode, *op1);
29704 if (!cmpdi_operand (*op2, mode))
29705 *op2 = force_reg (mode, *op2);
29706 return true;
29707
29708 case E_HFmode:
29709 if (!TARGET_VFP_FP16INST)
29710 break;
29711 /* FP16 comparisons are done in SF mode. */
29712 mode = SFmode;
29713 *op1 = convert_to_mode (mode, *op1, 1);
29714 *op2 = convert_to_mode (mode, *op2, 1);
29715 /* Fall through. */
29716 case E_SFmode:
29717 case E_DFmode:
29718 if (!vfp_compare_operand (*op1, mode))
29719 *op1 = force_reg (mode, *op1);
29720 if (!vfp_compare_operand (*op2, mode))
29721 *op2 = force_reg (mode, *op2);
29722 return true;
29723 default:
29724 break;
29725 }
29726
29727 return false;
29728
29729 }
29730
29731 /* Maximum number of instructions to set block of memory. */
29732 static int
29733 arm_block_set_max_insns (void)
29734 {
29735 if (optimize_function_for_size_p (cfun))
29736 return 4;
29737 else
29738 return current_tune->max_insns_inline_memset;
29739 }
29740
29741 /* Return TRUE if it's profitable to set block of memory for
29742 non-vectorized case. VAL is the value to set the memory
29743 with. LENGTH is the number of bytes to set. ALIGN is the
29744 alignment of the destination memory in bytes. UNALIGNED_P
29745 is TRUE if we can only set the memory with instructions
29746 meeting alignment requirements. USE_STRD_P is TRUE if we
29747 can use strd to set the memory. */
29748 static bool
29749 arm_block_set_non_vect_profit_p (rtx val,
29750 unsigned HOST_WIDE_INT length,
29751 unsigned HOST_WIDE_INT align,
29752 bool unaligned_p, bool use_strd_p)
29753 {
29754 int num = 0;
29755 /* For leftovers in bytes of 0-7, we can set the memory block using
29756 strb/strh/str with minimum instruction number. */
29757 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29758
29759 if (unaligned_p)
29760 {
29761 num = arm_const_inline_cost (SET, val);
29762 num += length / align + length % align;
29763 }
29764 else if (use_strd_p)
29765 {
29766 num = arm_const_double_inline_cost (val);
29767 num += (length >> 3) + leftover[length & 7];
29768 }
29769 else
29770 {
29771 num = arm_const_inline_cost (SET, val);
29772 num += (length >> 2) + leftover[length & 3];
29773 }
29774
29775 /* We may be able to combine last pair STRH/STRB into a single STR
29776 by shifting one byte back. */
29777 if (unaligned_access && length > 3 && (length & 3) == 3)
29778 num--;
29779
29780 return (num <= arm_block_set_max_insns ());
29781 }
29782
29783 /* Return TRUE if it's profitable to set block of memory for
29784 vectorized case. LENGTH is the number of bytes to set.
29785 ALIGN is the alignment of destination memory in bytes.
29786 MODE is the vector mode used to set the memory. */
29787 static bool
29788 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29789 unsigned HOST_WIDE_INT align,
29790 machine_mode mode)
29791 {
29792 int num;
29793 bool unaligned_p = ((align & 3) != 0);
29794 unsigned int nelt = GET_MODE_NUNITS (mode);
29795
29796 /* Instruction loading constant value. */
29797 num = 1;
29798 /* Instructions storing the memory. */
29799 num += (length + nelt - 1) / nelt;
29800 /* Instructions adjusting the address expression. Only need to
29801 adjust address expression if it's 4 bytes aligned and bytes
29802 leftover can only be stored by mis-aligned store instruction. */
29803 if (!unaligned_p && (length & 3) != 0)
29804 num++;
29805
29806 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29807 if (!unaligned_p && mode == V16QImode)
29808 num--;
29809
29810 return (num <= arm_block_set_max_insns ());
29811 }
29812
29813 /* Set a block of memory using vectorization instructions for the
29814 unaligned case. We fill the first LENGTH bytes of the memory
29815 area starting from DSTBASE with byte constant VALUE. ALIGN is
29816 the alignment requirement of memory. Return TRUE if succeeded. */
29817 static bool
29818 arm_block_set_unaligned_vect (rtx dstbase,
29819 unsigned HOST_WIDE_INT length,
29820 unsigned HOST_WIDE_INT value,
29821 unsigned HOST_WIDE_INT align)
29822 {
29823 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29824 rtx dst, mem;
29825 rtx val_elt, val_vec, reg;
29826 rtx rval[MAX_VECT_LEN];
29827 rtx (*gen_func) (rtx, rtx);
29828 machine_mode mode;
29829 unsigned HOST_WIDE_INT v = value;
29830 unsigned int offset = 0;
29831 gcc_assert ((align & 0x3) != 0);
29832 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29833 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29834 if (length >= nelt_v16)
29835 {
29836 mode = V16QImode;
29837 gen_func = gen_movmisalignv16qi;
29838 }
29839 else
29840 {
29841 mode = V8QImode;
29842 gen_func = gen_movmisalignv8qi;
29843 }
29844 nelt_mode = GET_MODE_NUNITS (mode);
29845 gcc_assert (length >= nelt_mode);
29846 /* Skip if it isn't profitable. */
29847 if (!arm_block_set_vect_profit_p (length, align, mode))
29848 return false;
29849
29850 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29851 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29852
29853 v = sext_hwi (v, BITS_PER_WORD);
29854 val_elt = GEN_INT (v);
29855 for (j = 0; j < nelt_mode; j++)
29856 rval[j] = val_elt;
29857
29858 reg = gen_reg_rtx (mode);
29859 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29860 /* Emit instruction loading the constant value. */
29861 emit_move_insn (reg, val_vec);
29862
29863 /* Handle nelt_mode bytes in a vector. */
29864 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29865 {
29866 emit_insn ((*gen_func) (mem, reg));
29867 if (i + 2 * nelt_mode <= length)
29868 {
29869 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29870 offset += nelt_mode;
29871 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29872 }
29873 }
29874
29875 /* If there are not less than nelt_v8 bytes leftover, we must be in
29876 V16QI mode. */
29877 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29878
29879 /* Handle (8, 16) bytes leftover. */
29880 if (i + nelt_v8 < length)
29881 {
29882 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29883 offset += length - i;
29884 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29885
29886 /* We are shifting bytes back, set the alignment accordingly. */
29887 if ((length & 1) != 0 && align >= 2)
29888 set_mem_align (mem, BITS_PER_UNIT);
29889
29890 emit_insn (gen_movmisalignv16qi (mem, reg));
29891 }
29892 /* Handle (0, 8] bytes leftover. */
29893 else if (i < length && i + nelt_v8 >= length)
29894 {
29895 if (mode == V16QImode)
29896 reg = gen_lowpart (V8QImode, reg);
29897
29898 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29899 + (nelt_mode - nelt_v8))));
29900 offset += (length - i) + (nelt_mode - nelt_v8);
29901 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29902
29903 /* We are shifting bytes back, set the alignment accordingly. */
29904 if ((length & 1) != 0 && align >= 2)
29905 set_mem_align (mem, BITS_PER_UNIT);
29906
29907 emit_insn (gen_movmisalignv8qi (mem, reg));
29908 }
29909
29910 return true;
29911 }
29912
29913 /* Set a block of memory using vectorization instructions for the
29914 aligned case. We fill the first LENGTH bytes of the memory area
29915 starting from DSTBASE with byte constant VALUE. ALIGN is the
29916 alignment requirement of memory. Return TRUE if succeeded. */
29917 static bool
29918 arm_block_set_aligned_vect (rtx dstbase,
29919 unsigned HOST_WIDE_INT length,
29920 unsigned HOST_WIDE_INT value,
29921 unsigned HOST_WIDE_INT align)
29922 {
29923 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29924 rtx dst, addr, mem;
29925 rtx val_elt, val_vec, reg;
29926 rtx rval[MAX_VECT_LEN];
29927 machine_mode mode;
29928 unsigned HOST_WIDE_INT v = value;
29929 unsigned int offset = 0;
29930
29931 gcc_assert ((align & 0x3) == 0);
29932 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29933 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29934 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29935 mode = V16QImode;
29936 else
29937 mode = V8QImode;
29938
29939 nelt_mode = GET_MODE_NUNITS (mode);
29940 gcc_assert (length >= nelt_mode);
29941 /* Skip if it isn't profitable. */
29942 if (!arm_block_set_vect_profit_p (length, align, mode))
29943 return false;
29944
29945 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29946
29947 v = sext_hwi (v, BITS_PER_WORD);
29948 val_elt = GEN_INT (v);
29949 for (j = 0; j < nelt_mode; j++)
29950 rval[j] = val_elt;
29951
29952 reg = gen_reg_rtx (mode);
29953 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29954 /* Emit instruction loading the constant value. */
29955 emit_move_insn (reg, val_vec);
29956
29957 i = 0;
29958 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29959 if (mode == V16QImode)
29960 {
29961 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29962 emit_insn (gen_movmisalignv16qi (mem, reg));
29963 i += nelt_mode;
29964 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29965 if (i + nelt_v8 < length && i + nelt_v16 > length)
29966 {
29967 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29968 offset += length - nelt_mode;
29969 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29970 /* We are shifting bytes back, set the alignment accordingly. */
29971 if ((length & 0x3) == 0)
29972 set_mem_align (mem, BITS_PER_UNIT * 4);
29973 else if ((length & 0x1) == 0)
29974 set_mem_align (mem, BITS_PER_UNIT * 2);
29975 else
29976 set_mem_align (mem, BITS_PER_UNIT);
29977
29978 emit_insn (gen_movmisalignv16qi (mem, reg));
29979 return true;
29980 }
29981 /* Fall through for bytes leftover. */
29982 mode = V8QImode;
29983 nelt_mode = GET_MODE_NUNITS (mode);
29984 reg = gen_lowpart (V8QImode, reg);
29985 }
29986
29987 /* Handle 8 bytes in a vector. */
29988 for (; (i + nelt_mode <= length); i += nelt_mode)
29989 {
29990 addr = plus_constant (Pmode, dst, i);
29991 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29992 emit_move_insn (mem, reg);
29993 }
29994
29995 /* Handle single word leftover by shifting 4 bytes back. We can
29996 use aligned access for this case. */
29997 if (i + UNITS_PER_WORD == length)
29998 {
29999 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30000 offset += i - UNITS_PER_WORD;
30001 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30002 /* We are shifting 4 bytes back, set the alignment accordingly. */
30003 if (align > UNITS_PER_WORD)
30004 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30005
30006 emit_move_insn (mem, reg);
30007 }
30008 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30009 We have to use unaligned access for this case. */
30010 else if (i < length)
30011 {
30012 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30013 offset += length - nelt_mode;
30014 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30015 /* We are shifting bytes back, set the alignment accordingly. */
30016 if ((length & 1) == 0)
30017 set_mem_align (mem, BITS_PER_UNIT * 2);
30018 else
30019 set_mem_align (mem, BITS_PER_UNIT);
30020
30021 emit_insn (gen_movmisalignv8qi (mem, reg));
30022 }
30023
30024 return true;
30025 }
30026
30027 /* Set a block of memory using plain strh/strb instructions, only
30028 using instructions allowed by ALIGN on processor. We fill the
30029 first LENGTH bytes of the memory area starting from DSTBASE
30030 with byte constant VALUE. ALIGN is the alignment requirement
30031 of memory. */
30032 static bool
30033 arm_block_set_unaligned_non_vect (rtx dstbase,
30034 unsigned HOST_WIDE_INT length,
30035 unsigned HOST_WIDE_INT value,
30036 unsigned HOST_WIDE_INT align)
30037 {
30038 unsigned int i;
30039 rtx dst, addr, mem;
30040 rtx val_exp, val_reg, reg;
30041 machine_mode mode;
30042 HOST_WIDE_INT v = value;
30043
30044 gcc_assert (align == 1 || align == 2);
30045
30046 if (align == 2)
30047 v |= (value << BITS_PER_UNIT);
30048
30049 v = sext_hwi (v, BITS_PER_WORD);
30050 val_exp = GEN_INT (v);
30051 /* Skip if it isn't profitable. */
30052 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30053 align, true, false))
30054 return false;
30055
30056 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30057 mode = (align == 2 ? HImode : QImode);
30058 val_reg = force_reg (SImode, val_exp);
30059 reg = gen_lowpart (mode, val_reg);
30060
30061 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30062 {
30063 addr = plus_constant (Pmode, dst, i);
30064 mem = adjust_automodify_address (dstbase, mode, addr, i);
30065 emit_move_insn (mem, reg);
30066 }
30067
30068 /* Handle single byte leftover. */
30069 if (i + 1 == length)
30070 {
30071 reg = gen_lowpart (QImode, val_reg);
30072 addr = plus_constant (Pmode, dst, i);
30073 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30074 emit_move_insn (mem, reg);
30075 i++;
30076 }
30077
30078 gcc_assert (i == length);
30079 return true;
30080 }
30081
30082 /* Set a block of memory using plain strd/str/strh/strb instructions,
30083 to permit unaligned copies on processors which support unaligned
30084 semantics for those instructions. We fill the first LENGTH bytes
30085 of the memory area starting from DSTBASE with byte constant VALUE.
30086 ALIGN is the alignment requirement of memory. */
30087 static bool
30088 arm_block_set_aligned_non_vect (rtx dstbase,
30089 unsigned HOST_WIDE_INT length,
30090 unsigned HOST_WIDE_INT value,
30091 unsigned HOST_WIDE_INT align)
30092 {
30093 unsigned int i;
30094 rtx dst, addr, mem;
30095 rtx val_exp, val_reg, reg;
30096 unsigned HOST_WIDE_INT v;
30097 bool use_strd_p;
30098
30099 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30100 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30101
30102 v = (value | (value << 8) | (value << 16) | (value << 24));
30103 if (length < UNITS_PER_WORD)
30104 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30105
30106 if (use_strd_p)
30107 v |= (v << BITS_PER_WORD);
30108 else
30109 v = sext_hwi (v, BITS_PER_WORD);
30110
30111 val_exp = GEN_INT (v);
30112 /* Skip if it isn't profitable. */
30113 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30114 align, false, use_strd_p))
30115 {
30116 if (!use_strd_p)
30117 return false;
30118
30119 /* Try without strd. */
30120 v = (v >> BITS_PER_WORD);
30121 v = sext_hwi (v, BITS_PER_WORD);
30122 val_exp = GEN_INT (v);
30123 use_strd_p = false;
30124 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30125 align, false, use_strd_p))
30126 return false;
30127 }
30128
30129 i = 0;
30130 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30131 /* Handle double words using strd if possible. */
30132 if (use_strd_p)
30133 {
30134 val_reg = force_reg (DImode, val_exp);
30135 reg = val_reg;
30136 for (; (i + 8 <= length); i += 8)
30137 {
30138 addr = plus_constant (Pmode, dst, i);
30139 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30140 emit_move_insn (mem, reg);
30141 }
30142 }
30143 else
30144 val_reg = force_reg (SImode, val_exp);
30145
30146 /* Handle words. */
30147 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30148 for (; (i + 4 <= length); i += 4)
30149 {
30150 addr = plus_constant (Pmode, dst, i);
30151 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30152 if ((align & 3) == 0)
30153 emit_move_insn (mem, reg);
30154 else
30155 emit_insn (gen_unaligned_storesi (mem, reg));
30156 }
30157
30158 /* Merge last pair of STRH and STRB into a STR if possible. */
30159 if (unaligned_access && i > 0 && (i + 3) == length)
30160 {
30161 addr = plus_constant (Pmode, dst, i - 1);
30162 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30163 /* We are shifting one byte back, set the alignment accordingly. */
30164 if ((align & 1) == 0)
30165 set_mem_align (mem, BITS_PER_UNIT);
30166
30167 /* Most likely this is an unaligned access, and we can't tell at
30168 compilation time. */
30169 emit_insn (gen_unaligned_storesi (mem, reg));
30170 return true;
30171 }
30172
30173 /* Handle half word leftover. */
30174 if (i + 2 <= length)
30175 {
30176 reg = gen_lowpart (HImode, val_reg);
30177 addr = plus_constant (Pmode, dst, i);
30178 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30179 if ((align & 1) == 0)
30180 emit_move_insn (mem, reg);
30181 else
30182 emit_insn (gen_unaligned_storehi (mem, reg));
30183
30184 i += 2;
30185 }
30186
30187 /* Handle single byte leftover. */
30188 if (i + 1 == length)
30189 {
30190 reg = gen_lowpart (QImode, val_reg);
30191 addr = plus_constant (Pmode, dst, i);
30192 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30193 emit_move_insn (mem, reg);
30194 }
30195
30196 return true;
30197 }
30198
30199 /* Set a block of memory using vectorization instructions for both
30200 aligned and unaligned cases. We fill the first LENGTH bytes of
30201 the memory area starting from DSTBASE with byte constant VALUE.
30202 ALIGN is the alignment requirement of memory. */
30203 static bool
30204 arm_block_set_vect (rtx dstbase,
30205 unsigned HOST_WIDE_INT length,
30206 unsigned HOST_WIDE_INT value,
30207 unsigned HOST_WIDE_INT align)
30208 {
30209 /* Check whether we need to use unaligned store instruction. */
30210 if (((align & 3) != 0 || (length & 3) != 0)
30211 /* Check whether unaligned store instruction is available. */
30212 && (!unaligned_access || BYTES_BIG_ENDIAN))
30213 return false;
30214
30215 if ((align & 3) == 0)
30216 return arm_block_set_aligned_vect (dstbase, length, value, align);
30217 else
30218 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30219 }
30220
30221 /* Expand string store operation. Firstly we try to do that by using
30222 vectorization instructions, then try with ARM unaligned access and
30223 double-word store if profitable. OPERANDS[0] is the destination,
30224 OPERANDS[1] is the number of bytes, operands[2] is the value to
30225 initialize the memory, OPERANDS[3] is the known alignment of the
30226 destination. */
30227 bool
30228 arm_gen_setmem (rtx *operands)
30229 {
30230 rtx dstbase = operands[0];
30231 unsigned HOST_WIDE_INT length;
30232 unsigned HOST_WIDE_INT value;
30233 unsigned HOST_WIDE_INT align;
30234
30235 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30236 return false;
30237
30238 length = UINTVAL (operands[1]);
30239 if (length > 64)
30240 return false;
30241
30242 value = (UINTVAL (operands[2]) & 0xFF);
30243 align = UINTVAL (operands[3]);
30244 if (TARGET_NEON && length >= 8
30245 && current_tune->string_ops_prefer_neon
30246 && arm_block_set_vect (dstbase, length, value, align))
30247 return true;
30248
30249 if (!unaligned_access && (align & 3) != 0)
30250 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30251
30252 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30253 }
30254
30255
30256 static bool
30257 arm_macro_fusion_p (void)
30258 {
30259 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30260 }
30261
30262 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30263 for MOVW / MOVT macro fusion. */
30264
30265 static bool
30266 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30267 {
30268 /* We are trying to fuse
30269 movw imm / movt imm
30270 instructions as a group that gets scheduled together. */
30271
30272 rtx set_dest = SET_DEST (curr_set);
30273
30274 if (GET_MODE (set_dest) != SImode)
30275 return false;
30276
30277 /* We are trying to match:
30278 prev (movw) == (set (reg r0) (const_int imm16))
30279 curr (movt) == (set (zero_extract (reg r0)
30280 (const_int 16)
30281 (const_int 16))
30282 (const_int imm16_1))
30283 or
30284 prev (movw) == (set (reg r1)
30285 (high (symbol_ref ("SYM"))))
30286 curr (movt) == (set (reg r0)
30287 (lo_sum (reg r1)
30288 (symbol_ref ("SYM")))) */
30289
30290 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30291 {
30292 if (CONST_INT_P (SET_SRC (curr_set))
30293 && CONST_INT_P (SET_SRC (prev_set))
30294 && REG_P (XEXP (set_dest, 0))
30295 && REG_P (SET_DEST (prev_set))
30296 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30297 return true;
30298
30299 }
30300 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30301 && REG_P (SET_DEST (curr_set))
30302 && REG_P (SET_DEST (prev_set))
30303 && GET_CODE (SET_SRC (prev_set)) == HIGH
30304 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30305 return true;
30306
30307 return false;
30308 }
30309
30310 static bool
30311 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30312 {
30313 rtx prev_set = single_set (prev);
30314 rtx curr_set = single_set (curr);
30315
30316 if (!prev_set
30317 || !curr_set)
30318 return false;
30319
30320 if (any_condjump_p (curr))
30321 return false;
30322
30323 if (!arm_macro_fusion_p ())
30324 return false;
30325
30326 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30327 && aarch_crypto_can_dual_issue (prev, curr))
30328 return true;
30329
30330 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30331 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30332 return true;
30333
30334 return false;
30335 }
30336
30337 /* Return true iff the instruction fusion described by OP is enabled. */
30338 bool
30339 arm_fusion_enabled_p (tune_params::fuse_ops op)
30340 {
30341 return current_tune->fusible_ops & op;
30342 }
30343
30344 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30345 scheduled for speculative execution. Reject the long-running division
30346 and square-root instructions. */
30347
30348 static bool
30349 arm_sched_can_speculate_insn (rtx_insn *insn)
30350 {
30351 switch (get_attr_type (insn))
30352 {
30353 case TYPE_SDIV:
30354 case TYPE_UDIV:
30355 case TYPE_FDIVS:
30356 case TYPE_FDIVD:
30357 case TYPE_FSQRTS:
30358 case TYPE_FSQRTD:
30359 case TYPE_NEON_FP_SQRT_S:
30360 case TYPE_NEON_FP_SQRT_D:
30361 case TYPE_NEON_FP_SQRT_S_Q:
30362 case TYPE_NEON_FP_SQRT_D_Q:
30363 case TYPE_NEON_FP_DIV_S:
30364 case TYPE_NEON_FP_DIV_D:
30365 case TYPE_NEON_FP_DIV_S_Q:
30366 case TYPE_NEON_FP_DIV_D_Q:
30367 return false;
30368 default:
30369 return true;
30370 }
30371 }
30372
30373 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30374
30375 static unsigned HOST_WIDE_INT
30376 arm_asan_shadow_offset (void)
30377 {
30378 return HOST_WIDE_INT_1U << 29;
30379 }
30380
30381
30382 /* This is a temporary fix for PR60655. Ideally we need
30383 to handle most of these cases in the generic part but
30384 currently we reject minus (..) (sym_ref). We try to
30385 ameliorate the case with minus (sym_ref1) (sym_ref2)
30386 where they are in the same section. */
30387
30388 static bool
30389 arm_const_not_ok_for_debug_p (rtx p)
30390 {
30391 tree decl_op0 = NULL;
30392 tree decl_op1 = NULL;
30393
30394 if (GET_CODE (p) == MINUS)
30395 {
30396 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30397 {
30398 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30399 if (decl_op1
30400 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30401 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30402 {
30403 if ((VAR_P (decl_op1)
30404 || TREE_CODE (decl_op1) == CONST_DECL)
30405 && (VAR_P (decl_op0)
30406 || TREE_CODE (decl_op0) == CONST_DECL))
30407 return (get_variable_section (decl_op1, false)
30408 != get_variable_section (decl_op0, false));
30409
30410 if (TREE_CODE (decl_op1) == LABEL_DECL
30411 && TREE_CODE (decl_op0) == LABEL_DECL)
30412 return (DECL_CONTEXT (decl_op1)
30413 != DECL_CONTEXT (decl_op0));
30414 }
30415
30416 return true;
30417 }
30418 }
30419
30420 return false;
30421 }
30422
30423 /* return TRUE if x is a reference to a value in a constant pool */
30424 extern bool
30425 arm_is_constant_pool_ref (rtx x)
30426 {
30427 return (MEM_P (x)
30428 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30429 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30430 }
30431
30432 /* Remember the last target of arm_set_current_function. */
30433 static GTY(()) tree arm_previous_fndecl;
30434
30435 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30436
30437 void
30438 save_restore_target_globals (tree new_tree)
30439 {
30440 /* If we have a previous state, use it. */
30441 if (TREE_TARGET_GLOBALS (new_tree))
30442 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30443 else if (new_tree == target_option_default_node)
30444 restore_target_globals (&default_target_globals);
30445 else
30446 {
30447 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30448 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30449 }
30450
30451 arm_option_params_internal ();
30452 }
30453
30454 /* Invalidate arm_previous_fndecl. */
30455
30456 void
30457 arm_reset_previous_fndecl (void)
30458 {
30459 arm_previous_fndecl = NULL_TREE;
30460 }
30461
30462 /* Establish appropriate back-end context for processing the function
30463 FNDECL. The argument might be NULL to indicate processing at top
30464 level, outside of any function scope. */
30465
30466 static void
30467 arm_set_current_function (tree fndecl)
30468 {
30469 if (!fndecl || fndecl == arm_previous_fndecl)
30470 return;
30471
30472 tree old_tree = (arm_previous_fndecl
30473 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30474 : NULL_TREE);
30475
30476 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30477
30478 /* If current function has no attributes but previous one did,
30479 use the default node. */
30480 if (! new_tree && old_tree)
30481 new_tree = target_option_default_node;
30482
30483 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30484 the default have been handled by save_restore_target_globals from
30485 arm_pragma_target_parse. */
30486 if (old_tree == new_tree)
30487 return;
30488
30489 arm_previous_fndecl = fndecl;
30490
30491 /* First set the target options. */
30492 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30493
30494 save_restore_target_globals (new_tree);
30495 }
30496
30497 /* Implement TARGET_OPTION_PRINT. */
30498
30499 static void
30500 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30501 {
30502 int flags = ptr->x_target_flags;
30503 const char *fpu_name;
30504
30505 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30506 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30507
30508 fprintf (file, "%*sselected isa %s\n", indent, "",
30509 TARGET_THUMB2_P (flags) ? "thumb2" :
30510 TARGET_THUMB_P (flags) ? "thumb1" :
30511 "arm");
30512
30513 if (ptr->x_arm_arch_string)
30514 fprintf (file, "%*sselected architecture %s\n", indent, "",
30515 ptr->x_arm_arch_string);
30516
30517 if (ptr->x_arm_cpu_string)
30518 fprintf (file, "%*sselected CPU %s\n", indent, "",
30519 ptr->x_arm_cpu_string);
30520
30521 if (ptr->x_arm_tune_string)
30522 fprintf (file, "%*sselected tune %s\n", indent, "",
30523 ptr->x_arm_tune_string);
30524
30525 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30526 }
30527
30528 /* Hook to determine if one function can safely inline another. */
30529
30530 static bool
30531 arm_can_inline_p (tree caller, tree callee)
30532 {
30533 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30534 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30535 bool can_inline = true;
30536
30537 struct cl_target_option *caller_opts
30538 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30539 : target_option_default_node);
30540
30541 struct cl_target_option *callee_opts
30542 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30543 : target_option_default_node);
30544
30545 if (callee_opts == caller_opts)
30546 return true;
30547
30548 /* Callee's ISA features should be a subset of the caller's. */
30549 struct arm_build_target caller_target;
30550 struct arm_build_target callee_target;
30551 caller_target.isa = sbitmap_alloc (isa_num_bits);
30552 callee_target.isa = sbitmap_alloc (isa_num_bits);
30553
30554 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30555 false);
30556 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30557 false);
30558 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30559 can_inline = false;
30560
30561 sbitmap_free (caller_target.isa);
30562 sbitmap_free (callee_target.isa);
30563
30564 /* OK to inline between different modes.
30565 Function with mode specific instructions, e.g using asm,
30566 must be explicitly protected with noinline. */
30567 return can_inline;
30568 }
30569
30570 /* Hook to fix function's alignment affected by target attribute. */
30571
30572 static void
30573 arm_relayout_function (tree fndecl)
30574 {
30575 if (DECL_USER_ALIGN (fndecl))
30576 return;
30577
30578 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30579
30580 if (!callee_tree)
30581 callee_tree = target_option_default_node;
30582
30583 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30584 SET_DECL_ALIGN
30585 (fndecl,
30586 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30587 }
30588
30589 /* Inner function to process the attribute((target(...))), take an argument and
30590 set the current options from the argument. If we have a list, recursively
30591 go over the list. */
30592
30593 static bool
30594 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30595 {
30596 if (TREE_CODE (args) == TREE_LIST)
30597 {
30598 bool ret = true;
30599
30600 for (; args; args = TREE_CHAIN (args))
30601 if (TREE_VALUE (args)
30602 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30603 ret = false;
30604 return ret;
30605 }
30606
30607 else if (TREE_CODE (args) != STRING_CST)
30608 {
30609 error ("attribute %<target%> argument not a string");
30610 return false;
30611 }
30612
30613 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30614 char *q;
30615
30616 while ((q = strtok (argstr, ",")) != NULL)
30617 {
30618 while (ISSPACE (*q)) ++q;
30619
30620 argstr = NULL;
30621 if (!strncmp (q, "thumb", 5))
30622 opts->x_target_flags |= MASK_THUMB;
30623
30624 else if (!strncmp (q, "arm", 3))
30625 opts->x_target_flags &= ~MASK_THUMB;
30626
30627 else if (!strncmp (q, "fpu=", 4))
30628 {
30629 int fpu_index;
30630 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30631 &fpu_index, CL_TARGET))
30632 {
30633 error ("invalid fpu for attribute(target(\"%s\"))", q);
30634 return false;
30635 }
30636 if (fpu_index == TARGET_FPU_auto)
30637 {
30638 /* This doesn't really make sense until we support
30639 general dynamic selection of the architecture and all
30640 sub-features. */
30641 sorry ("auto fpu selection not currently permitted here");
30642 return false;
30643 }
30644 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30645 }
30646 else
30647 {
30648 error ("attribute(target(\"%s\")) is unknown", q);
30649 return false;
30650 }
30651 }
30652
30653 return true;
30654 }
30655
30656 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30657
30658 tree
30659 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30660 struct gcc_options *opts_set)
30661 {
30662 struct cl_target_option cl_opts;
30663
30664 if (!arm_valid_target_attribute_rec (args, opts))
30665 return NULL_TREE;
30666
30667 cl_target_option_save (&cl_opts, opts);
30668 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30669 arm_option_check_internal (opts);
30670 /* Do any overrides, such as global options arch=xxx. */
30671 arm_option_override_internal (opts, opts_set);
30672
30673 return build_target_option_node (opts);
30674 }
30675
30676 static void
30677 add_attribute (const char * mode, tree *attributes)
30678 {
30679 size_t len = strlen (mode);
30680 tree value = build_string (len, mode);
30681
30682 TREE_TYPE (value) = build_array_type (char_type_node,
30683 build_index_type (size_int (len)));
30684
30685 *attributes = tree_cons (get_identifier ("target"),
30686 build_tree_list (NULL_TREE, value),
30687 *attributes);
30688 }
30689
30690 /* For testing. Insert thumb or arm modes alternatively on functions. */
30691
30692 static void
30693 arm_insert_attributes (tree fndecl, tree * attributes)
30694 {
30695 const char *mode;
30696
30697 if (! TARGET_FLIP_THUMB)
30698 return;
30699
30700 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30701 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30702 return;
30703
30704 /* Nested definitions must inherit mode. */
30705 if (current_function_decl)
30706 {
30707 mode = TARGET_THUMB ? "thumb" : "arm";
30708 add_attribute (mode, attributes);
30709 return;
30710 }
30711
30712 /* If there is already a setting don't change it. */
30713 if (lookup_attribute ("target", *attributes) != NULL)
30714 return;
30715
30716 mode = thumb_flipper ? "thumb" : "arm";
30717 add_attribute (mode, attributes);
30718
30719 thumb_flipper = !thumb_flipper;
30720 }
30721
30722 /* Hook to validate attribute((target("string"))). */
30723
30724 static bool
30725 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30726 tree args, int ARG_UNUSED (flags))
30727 {
30728 bool ret = true;
30729 struct gcc_options func_options;
30730 tree cur_tree, new_optimize;
30731 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30732
30733 /* Get the optimization options of the current function. */
30734 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30735
30736 /* If the function changed the optimization levels as well as setting target
30737 options, start with the optimizations specified. */
30738 if (!func_optimize)
30739 func_optimize = optimization_default_node;
30740
30741 /* Init func_options. */
30742 memset (&func_options, 0, sizeof (func_options));
30743 init_options_struct (&func_options, NULL);
30744 lang_hooks.init_options_struct (&func_options);
30745
30746 /* Initialize func_options to the defaults. */
30747 cl_optimization_restore (&func_options,
30748 TREE_OPTIMIZATION (func_optimize));
30749
30750 cl_target_option_restore (&func_options,
30751 TREE_TARGET_OPTION (target_option_default_node));
30752
30753 /* Set func_options flags with new target mode. */
30754 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30755 &global_options_set);
30756
30757 if (cur_tree == NULL_TREE)
30758 ret = false;
30759
30760 new_optimize = build_optimization_node (&func_options);
30761
30762 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30763
30764 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30765
30766 finalize_options_struct (&func_options);
30767
30768 return ret;
30769 }
30770
30771 /* Match an ISA feature bitmap to a named FPU. We always use the
30772 first entry that exactly matches the feature set, so that we
30773 effectively canonicalize the FPU name for the assembler. */
30774 static const char*
30775 arm_identify_fpu_from_isa (sbitmap isa)
30776 {
30777 auto_sbitmap fpubits (isa_num_bits);
30778 auto_sbitmap cand_fpubits (isa_num_bits);
30779
30780 bitmap_and (fpubits, isa, isa_all_fpubits);
30781
30782 /* If there are no ISA feature bits relating to the FPU, we must be
30783 doing soft-float. */
30784 if (bitmap_empty_p (fpubits))
30785 return "softvfp";
30786
30787 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30788 {
30789 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30790 if (bitmap_equal_p (fpubits, cand_fpubits))
30791 return all_fpus[i].name;
30792 }
30793 /* We must find an entry, or things have gone wrong. */
30794 gcc_unreachable ();
30795 }
30796
30797 void
30798 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30799 {
30800
30801 fprintf (stream, "\t.syntax unified\n");
30802
30803 if (TARGET_THUMB)
30804 {
30805 if (is_called_in_ARM_mode (decl)
30806 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30807 && cfun->is_thunk))
30808 fprintf (stream, "\t.code 32\n");
30809 else if (TARGET_THUMB1)
30810 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30811 else
30812 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30813 }
30814 else
30815 fprintf (stream, "\t.arm\n");
30816
30817 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30818 (TARGET_SOFT_FLOAT
30819 ? "softvfp"
30820 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30821
30822 if (TARGET_POKE_FUNCTION_NAME)
30823 arm_poke_function_name (stream, (const char *) name);
30824 }
30825
30826 /* If MEM is in the form of [base+offset], extract the two parts
30827 of address and set to BASE and OFFSET, otherwise return false
30828 after clearing BASE and OFFSET. */
30829
30830 static bool
30831 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30832 {
30833 rtx addr;
30834
30835 gcc_assert (MEM_P (mem));
30836
30837 addr = XEXP (mem, 0);
30838
30839 /* Strip off const from addresses like (const (addr)). */
30840 if (GET_CODE (addr) == CONST)
30841 addr = XEXP (addr, 0);
30842
30843 if (GET_CODE (addr) == REG)
30844 {
30845 *base = addr;
30846 *offset = const0_rtx;
30847 return true;
30848 }
30849
30850 if (GET_CODE (addr) == PLUS
30851 && GET_CODE (XEXP (addr, 0)) == REG
30852 && CONST_INT_P (XEXP (addr, 1)))
30853 {
30854 *base = XEXP (addr, 0);
30855 *offset = XEXP (addr, 1);
30856 return true;
30857 }
30858
30859 *base = NULL_RTX;
30860 *offset = NULL_RTX;
30861
30862 return false;
30863 }
30864
30865 /* If INSN is a load or store of address in the form of [base+offset],
30866 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30867 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30868 otherwise return FALSE. */
30869
30870 static bool
30871 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30872 {
30873 rtx x, dest, src;
30874
30875 gcc_assert (INSN_P (insn));
30876 x = PATTERN (insn);
30877 if (GET_CODE (x) != SET)
30878 return false;
30879
30880 src = SET_SRC (x);
30881 dest = SET_DEST (x);
30882 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30883 {
30884 *is_load = false;
30885 extract_base_offset_in_addr (dest, base, offset);
30886 }
30887 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30888 {
30889 *is_load = true;
30890 extract_base_offset_in_addr (src, base, offset);
30891 }
30892 else
30893 return false;
30894
30895 return (*base != NULL_RTX && *offset != NULL_RTX);
30896 }
30897
30898 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30899
30900 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30901 and PRI are only calculated for these instructions. For other instruction,
30902 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30903 instruction fusion can be supported by returning different priorities.
30904
30905 It's important that irrelevant instructions get the largest FUSION_PRI. */
30906
30907 static void
30908 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30909 int *fusion_pri, int *pri)
30910 {
30911 int tmp, off_val;
30912 bool is_load;
30913 rtx base, offset;
30914
30915 gcc_assert (INSN_P (insn));
30916
30917 tmp = max_pri - 1;
30918 if (!fusion_load_store (insn, &base, &offset, &is_load))
30919 {
30920 *pri = tmp;
30921 *fusion_pri = tmp;
30922 return;
30923 }
30924
30925 /* Load goes first. */
30926 if (is_load)
30927 *fusion_pri = tmp - 1;
30928 else
30929 *fusion_pri = tmp - 2;
30930
30931 tmp /= 2;
30932
30933 /* INSN with smaller base register goes first. */
30934 tmp -= ((REGNO (base) & 0xff) << 20);
30935
30936 /* INSN with smaller offset goes first. */
30937 off_val = (int)(INTVAL (offset));
30938 if (off_val >= 0)
30939 tmp -= (off_val & 0xfffff);
30940 else
30941 tmp += ((- off_val) & 0xfffff);
30942
30943 *pri = tmp;
30944 return;
30945 }
30946
30947
30948 /* Construct and return a PARALLEL RTX vector with elements numbering the
30949 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30950 the vector - from the perspective of the architecture. This does not
30951 line up with GCC's perspective on lane numbers, so we end up with
30952 different masks depending on our target endian-ness. The diagram
30953 below may help. We must draw the distinction when building masks
30954 which select one half of the vector. An instruction selecting
30955 architectural low-lanes for a big-endian target, must be described using
30956 a mask selecting GCC high-lanes.
30957
30958 Big-Endian Little-Endian
30959
30960 GCC 0 1 2 3 3 2 1 0
30961 | x | x | x | x | | x | x | x | x |
30962 Architecture 3 2 1 0 3 2 1 0
30963
30964 Low Mask: { 2, 3 } { 0, 1 }
30965 High Mask: { 0, 1 } { 2, 3 }
30966 */
30967
30968 rtx
30969 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30970 {
30971 int nunits = GET_MODE_NUNITS (mode);
30972 rtvec v = rtvec_alloc (nunits / 2);
30973 int high_base = nunits / 2;
30974 int low_base = 0;
30975 int base;
30976 rtx t1;
30977 int i;
30978
30979 if (BYTES_BIG_ENDIAN)
30980 base = high ? low_base : high_base;
30981 else
30982 base = high ? high_base : low_base;
30983
30984 for (i = 0; i < nunits / 2; i++)
30985 RTVEC_ELT (v, i) = GEN_INT (base + i);
30986
30987 t1 = gen_rtx_PARALLEL (mode, v);
30988 return t1;
30989 }
30990
30991 /* Check OP for validity as a PARALLEL RTX vector with elements
30992 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30993 from the perspective of the architecture. See the diagram above
30994 arm_simd_vect_par_cnst_half_p for more details. */
30995
30996 bool
30997 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30998 bool high)
30999 {
31000 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31001 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31002 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31003 int i = 0;
31004
31005 if (!VECTOR_MODE_P (mode))
31006 return false;
31007
31008 if (count_op != count_ideal)
31009 return false;
31010
31011 for (i = 0; i < count_ideal; i++)
31012 {
31013 rtx elt_op = XVECEXP (op, 0, i);
31014 rtx elt_ideal = XVECEXP (ideal, 0, i);
31015
31016 if (!CONST_INT_P (elt_op)
31017 || INTVAL (elt_ideal) != INTVAL (elt_op))
31018 return false;
31019 }
31020 return true;
31021 }
31022
31023 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31024 in Thumb1. */
31025 static bool
31026 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31027 const_tree)
31028 {
31029 /* For now, we punt and not handle this for TARGET_THUMB1. */
31030 if (vcall_offset && TARGET_THUMB1)
31031 return false;
31032
31033 /* Otherwise ok. */
31034 return true;
31035 }
31036
31037 /* Generate RTL for a conditional branch with rtx comparison CODE in
31038 mode CC_MODE. The destination of the unlikely conditional branch
31039 is LABEL_REF. */
31040
31041 void
31042 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31043 rtx label_ref)
31044 {
31045 rtx x;
31046 x = gen_rtx_fmt_ee (code, VOIDmode,
31047 gen_rtx_REG (cc_mode, CC_REGNUM),
31048 const0_rtx);
31049
31050 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31051 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31052 pc_rtx);
31053 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31054 }
31055
31056 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31057
31058 For pure-code sections there is no letter code for this attribute, so
31059 output all the section flags numerically when this is needed. */
31060
31061 static bool
31062 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31063 {
31064
31065 if (flags & SECTION_ARM_PURECODE)
31066 {
31067 *num = 0x20000000;
31068
31069 if (!(flags & SECTION_DEBUG))
31070 *num |= 0x2;
31071 if (flags & SECTION_EXCLUDE)
31072 *num |= 0x80000000;
31073 if (flags & SECTION_WRITE)
31074 *num |= 0x1;
31075 if (flags & SECTION_CODE)
31076 *num |= 0x4;
31077 if (flags & SECTION_MERGE)
31078 *num |= 0x10;
31079 if (flags & SECTION_STRINGS)
31080 *num |= 0x20;
31081 if (flags & SECTION_TLS)
31082 *num |= 0x400;
31083 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31084 *num |= 0x200;
31085
31086 return true;
31087 }
31088
31089 return false;
31090 }
31091
31092 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31093
31094 If pure-code is passed as an option, make sure all functions are in
31095 sections that have the SHF_ARM_PURECODE attribute. */
31096
31097 static section *
31098 arm_function_section (tree decl, enum node_frequency freq,
31099 bool startup, bool exit)
31100 {
31101 const char * section_name;
31102 section * sec;
31103
31104 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31105 return default_function_section (decl, freq, startup, exit);
31106
31107 if (!target_pure_code)
31108 return default_function_section (decl, freq, startup, exit);
31109
31110
31111 section_name = DECL_SECTION_NAME (decl);
31112
31113 /* If a function is not in a named section then it falls under the 'default'
31114 text section, also known as '.text'. We can preserve previous behavior as
31115 the default text section already has the SHF_ARM_PURECODE section
31116 attribute. */
31117 if (!section_name)
31118 {
31119 section *default_sec = default_function_section (decl, freq, startup,
31120 exit);
31121
31122 /* If default_sec is not null, then it must be a special section like for
31123 example .text.startup. We set the pure-code attribute and return the
31124 same section to preserve existing behavior. */
31125 if (default_sec)
31126 default_sec->common.flags |= SECTION_ARM_PURECODE;
31127 return default_sec;
31128 }
31129
31130 /* Otherwise look whether a section has already been created with
31131 'section_name'. */
31132 sec = get_named_section (decl, section_name, 0);
31133 if (!sec)
31134 /* If that is not the case passing NULL as the section's name to
31135 'get_named_section' will create a section with the declaration's
31136 section name. */
31137 sec = get_named_section (decl, NULL, 0);
31138
31139 /* Set the SHF_ARM_PURECODE attribute. */
31140 sec->common.flags |= SECTION_ARM_PURECODE;
31141
31142 return sec;
31143 }
31144
31145 /* Implements the TARGET_SECTION_FLAGS hook.
31146
31147 If DECL is a function declaration and pure-code is passed as an option
31148 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31149 section's name and RELOC indicates whether the declarations initializer may
31150 contain runtime relocations. */
31151
31152 static unsigned int
31153 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31154 {
31155 unsigned int flags = default_section_type_flags (decl, name, reloc);
31156
31157 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31158 flags |= SECTION_ARM_PURECODE;
31159
31160 return flags;
31161 }
31162
31163 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31164
31165 static void
31166 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31167 rtx op0, rtx op1,
31168 rtx *quot_p, rtx *rem_p)
31169 {
31170 if (mode == SImode)
31171 gcc_assert (!TARGET_IDIV);
31172
31173 scalar_int_mode libval_mode
31174 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31175
31176 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31177 libval_mode,
31178 op0, GET_MODE (op0),
31179 op1, GET_MODE (op1));
31180
31181 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31182 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31183 GET_MODE_SIZE (mode));
31184
31185 gcc_assert (quotient);
31186 gcc_assert (remainder);
31187
31188 *quot_p = quotient;
31189 *rem_p = remainder;
31190 }
31191
31192 /* This function checks for the availability of the coprocessor builtin passed
31193 in BUILTIN for the current target. Returns true if it is available and
31194 false otherwise. If a BUILTIN is passed for which this function has not
31195 been implemented it will cause an exception. */
31196
31197 bool
31198 arm_coproc_builtin_available (enum unspecv builtin)
31199 {
31200 /* None of these builtins are available in Thumb mode if the target only
31201 supports Thumb-1. */
31202 if (TARGET_THUMB1)
31203 return false;
31204
31205 switch (builtin)
31206 {
31207 case VUNSPEC_CDP:
31208 case VUNSPEC_LDC:
31209 case VUNSPEC_LDCL:
31210 case VUNSPEC_STC:
31211 case VUNSPEC_STCL:
31212 case VUNSPEC_MCR:
31213 case VUNSPEC_MRC:
31214 if (arm_arch4)
31215 return true;
31216 break;
31217 case VUNSPEC_CDP2:
31218 case VUNSPEC_LDC2:
31219 case VUNSPEC_LDC2L:
31220 case VUNSPEC_STC2:
31221 case VUNSPEC_STC2L:
31222 case VUNSPEC_MCR2:
31223 case VUNSPEC_MRC2:
31224 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31225 ARMv8-{A,M}. */
31226 if (arm_arch5)
31227 return true;
31228 break;
31229 case VUNSPEC_MCRR:
31230 case VUNSPEC_MRRC:
31231 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31232 ARMv8-{A,M}. */
31233 if (arm_arch6 || arm_arch5te)
31234 return true;
31235 break;
31236 case VUNSPEC_MCRR2:
31237 case VUNSPEC_MRRC2:
31238 if (arm_arch6)
31239 return true;
31240 break;
31241 default:
31242 gcc_unreachable ();
31243 }
31244 return false;
31245 }
31246
31247 /* This function returns true if OP is a valid memory operand for the ldc and
31248 stc coprocessor instructions and false otherwise. */
31249
31250 bool
31251 arm_coproc_ldc_stc_legitimate_address (rtx op)
31252 {
31253 HOST_WIDE_INT range;
31254 /* Has to be a memory operand. */
31255 if (!MEM_P (op))
31256 return false;
31257
31258 op = XEXP (op, 0);
31259
31260 /* We accept registers. */
31261 if (REG_P (op))
31262 return true;
31263
31264 switch GET_CODE (op)
31265 {
31266 case PLUS:
31267 {
31268 /* Or registers with an offset. */
31269 if (!REG_P (XEXP (op, 0)))
31270 return false;
31271
31272 op = XEXP (op, 1);
31273
31274 /* The offset must be an immediate though. */
31275 if (!CONST_INT_P (op))
31276 return false;
31277
31278 range = INTVAL (op);
31279
31280 /* Within the range of [-1020,1020]. */
31281 if (!IN_RANGE (range, -1020, 1020))
31282 return false;
31283
31284 /* And a multiple of 4. */
31285 return (range % 4) == 0;
31286 }
31287 case PRE_INC:
31288 case POST_INC:
31289 case PRE_DEC:
31290 case POST_DEC:
31291 return REG_P (XEXP (op, 0));
31292 default:
31293 gcc_unreachable ();
31294 }
31295 return false;
31296 }
31297
31298 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31299
31300 In VFPv1, VFP registers could only be accessed in the mode they were
31301 set, so subregs would be invalid there. However, we don't support
31302 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31303
31304 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31305 VFP registers in little-endian order. We can't describe that accurately to
31306 GCC, so avoid taking subregs of such values.
31307
31308 The only exception is going from a 128-bit to a 64-bit type. In that
31309 case the data layout happens to be consistent for big-endian, so we
31310 explicitly allow that case. */
31311
31312 static bool
31313 arm_can_change_mode_class (machine_mode from, machine_mode to,
31314 reg_class_t rclass)
31315 {
31316 if (TARGET_BIG_END
31317 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31318 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31319 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31320 && reg_classes_intersect_p (VFP_REGS, rclass))
31321 return false;
31322 return true;
31323 }
31324
31325 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31326 strcpy from constants will be faster. */
31327
31328 static HOST_WIDE_INT
31329 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31330 {
31331 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31332 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31333 return MAX (align, BITS_PER_WORD * factor);
31334 return align;
31335 }
31336
31337 #if CHECKING_P
31338 namespace selftest {
31339
31340 /* Scan the static data tables generated by parsecpu.awk looking for
31341 potential issues with the data. We primarily check for
31342 inconsistencies in the option extensions at present (extensions
31343 that duplicate others but aren't marked as aliases). Furthermore,
31344 for correct canonicalization later options must never be a subset
31345 of an earlier option. Any extension should also only specify other
31346 feature bits and never an architecture bit. The architecture is inferred
31347 from the declaration of the extension. */
31348 static void
31349 arm_test_cpu_arch_data (void)
31350 {
31351 const arch_option *arch;
31352 const cpu_option *cpu;
31353 auto_sbitmap target_isa (isa_num_bits);
31354 auto_sbitmap isa1 (isa_num_bits);
31355 auto_sbitmap isa2 (isa_num_bits);
31356
31357 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31358 {
31359 const cpu_arch_extension *ext1, *ext2;
31360
31361 if (arch->common.extensions == NULL)
31362 continue;
31363
31364 arm_initialize_isa (target_isa, arch->common.isa_bits);
31365
31366 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31367 {
31368 if (ext1->alias)
31369 continue;
31370
31371 arm_initialize_isa (isa1, ext1->isa_bits);
31372 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31373 {
31374 if (ext2->alias || ext1->remove != ext2->remove)
31375 continue;
31376
31377 arm_initialize_isa (isa2, ext2->isa_bits);
31378 /* If the option is a subset of the parent option, it doesn't
31379 add anything and so isn't useful. */
31380 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31381
31382 /* If the extension specifies any architectural bits then
31383 disallow it. Extensions should only specify feature bits. */
31384 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31385 }
31386 }
31387 }
31388
31389 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31390 {
31391 const cpu_arch_extension *ext1, *ext2;
31392
31393 if (cpu->common.extensions == NULL)
31394 continue;
31395
31396 arm_initialize_isa (target_isa, arch->common.isa_bits);
31397
31398 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31399 {
31400 if (ext1->alias)
31401 continue;
31402
31403 arm_initialize_isa (isa1, ext1->isa_bits);
31404 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31405 {
31406 if (ext2->alias || ext1->remove != ext2->remove)
31407 continue;
31408
31409 arm_initialize_isa (isa2, ext2->isa_bits);
31410 /* If the option is a subset of the parent option, it doesn't
31411 add anything and so isn't useful. */
31412 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31413
31414 /* If the extension specifies any architectural bits then
31415 disallow it. Extensions should only specify feature bits. */
31416 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31417 }
31418 }
31419 }
31420 }
31421
31422 /* Scan the static data tables generated by parsecpu.awk looking for
31423 potential issues with the data. Here we check for consistency between the
31424 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31425 a feature bit that is not defined by any FPU flag. */
31426 static void
31427 arm_test_fpu_data (void)
31428 {
31429 auto_sbitmap isa_all_fpubits (isa_num_bits);
31430 auto_sbitmap fpubits (isa_num_bits);
31431 auto_sbitmap tmpset (isa_num_bits);
31432
31433 static const enum isa_feature fpu_bitlist[]
31434 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31435 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31436
31437 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31438 {
31439 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31440 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31441 bitmap_clear (isa_all_fpubits);
31442 bitmap_copy (isa_all_fpubits, tmpset);
31443 }
31444
31445 if (!bitmap_empty_p (isa_all_fpubits))
31446 {
31447 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31448 " group that are not defined by any FPU.\n"
31449 " Check your arm-cpus.in.\n");
31450 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31451 }
31452 }
31453
31454 static void
31455 arm_run_selftests (void)
31456 {
31457 arm_test_cpu_arch_data ();
31458 arm_test_fpu_data ();
31459 }
31460 } /* Namespace selftest. */
31461
31462 #undef TARGET_RUN_TARGET_SELFTESTS
31463 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31464 #endif /* CHECKING_P */
31465
31466 struct gcc_target targetm = TARGET_INITIALIZER;
31467
31468 #include "gt-arm.h"