]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
[arm] auto-generate arm-isa.h from CPU descriptions
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #include "config.h"
24 #include "system.h"
25 #include "coretypes.h"
26 #include "backend.h"
27 #include "target.h"
28 #include "rtl.h"
29 #include "tree.h"
30 #include "memmodel.h"
31 #include "cfghooks.h"
32 #include "df.h"
33 #include "tm_p.h"
34 #include "stringpool.h"
35 #include "attribs.h"
36 #include "optabs.h"
37 #include "regs.h"
38 #include "emit-rtl.h"
39 #include "recog.h"
40 #include "cgraph.h"
41 #include "diagnostic-core.h"
42 #include "alias.h"
43 #include "fold-const.h"
44 #include "stor-layout.h"
45 #include "calls.h"
46 #include "varasm.h"
47 #include "output.h"
48 #include "insn-attr.h"
49 #include "flags.h"
50 #include "reload.h"
51 #include "explow.h"
52 #include "expr.h"
53 #include "cfgrtl.h"
54 #include "sched-int.h"
55 #include "common/common-target.h"
56 #include "langhooks.h"
57 #include "intl.h"
58 #include "libfuncs.h"
59 #include "params.h"
60 #include "opts.h"
61 #include "dumpfile.h"
62 #include "target-globals.h"
63 #include "builtins.h"
64 #include "tm-constrs.h"
65 #include "rtl-iter.h"
66 #include "optabs-libfuncs.h"
67 #include "gimplify.h"
68 #include "gimple.h"
69 #include "selftest.h"
70
71 /* This file should be included last. */
72 #include "target-def.h"
73
74 /* Forward definitions of types. */
75 typedef struct minipool_node Mnode;
76 typedef struct minipool_fixup Mfix;
77
78 void (*arm_lang_output_object_attributes_hook)(void);
79
80 struct four_ints
81 {
82 int i[4];
83 };
84
85 /* Forward function declarations. */
86 static bool arm_const_not_ok_for_debug_p (rtx);
87 static int arm_needs_doubleword_align (machine_mode, const_tree);
88 static int arm_compute_static_chain_stack_bytes (void);
89 static arm_stack_offsets *arm_get_frame_offsets (void);
90 static void arm_compute_frame_layout (void);
91 static void arm_add_gc_roots (void);
92 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
93 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
94 static unsigned bit_count (unsigned long);
95 static unsigned bitmap_popcount (const sbitmap);
96 static int arm_address_register_rtx_p (rtx, int);
97 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
98 static bool is_called_in_ARM_mode (tree);
99 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
100 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
101 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
102 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
103 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
104 inline static int thumb1_index_register_rtx_p (rtx, int);
105 static int thumb_far_jump_used_p (void);
106 static bool thumb_force_lr_save (void);
107 static unsigned arm_size_return_regs (void);
108 static bool arm_assemble_integer (rtx, unsigned int, int);
109 static void arm_print_operand (FILE *, rtx, int);
110 static void arm_print_operand_address (FILE *, machine_mode, rtx);
111 static bool arm_print_operand_punct_valid_p (unsigned char code);
112 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
113 static arm_cc get_arm_condition_code (rtx);
114 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
115 static const char *output_multi_immediate (rtx *, const char *, const char *,
116 int, HOST_WIDE_INT);
117 static const char *shift_op (rtx, HOST_WIDE_INT *);
118 static struct machine_function *arm_init_machine_status (void);
119 static void thumb_exit (FILE *, int);
120 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
121 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
122 static Mnode *add_minipool_forward_ref (Mfix *);
123 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
124 static Mnode *add_minipool_backward_ref (Mfix *);
125 static void assign_minipool_offsets (Mfix *);
126 static void arm_print_value (FILE *, rtx);
127 static void dump_minipool (rtx_insn *);
128 static int arm_barrier_cost (rtx_insn *);
129 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
130 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
131 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
132 machine_mode, rtx);
133 static void arm_reorg (void);
134 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
135 static unsigned long arm_compute_save_reg0_reg12_mask (void);
136 static unsigned long arm_compute_save_core_reg_mask (void);
137 static unsigned long arm_isr_value (tree);
138 static unsigned long arm_compute_func_type (void);
139 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
140 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
141 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
142 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
143 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
144 #endif
145 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
146 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
147 static void arm_output_function_epilogue (FILE *);
148 static void arm_output_function_prologue (FILE *);
149 static int arm_comp_type_attributes (const_tree, const_tree);
150 static void arm_set_default_type_attributes (tree);
151 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
152 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
153 static int optimal_immediate_sequence (enum rtx_code code,
154 unsigned HOST_WIDE_INT val,
155 struct four_ints *return_sequence);
156 static int optimal_immediate_sequence_1 (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence,
159 int i);
160 static int arm_get_strip_length (int);
161 static bool arm_function_ok_for_sibcall (tree, tree);
162 static machine_mode arm_promote_function_mode (const_tree,
163 machine_mode, int *,
164 const_tree, int);
165 static bool arm_return_in_memory (const_tree, const_tree);
166 static rtx arm_function_value (const_tree, const_tree, bool);
167 static rtx arm_libcall_value_1 (machine_mode);
168 static rtx arm_libcall_value (machine_mode, const_rtx);
169 static bool arm_function_value_regno_p (const unsigned int);
170 static void arm_internal_label (FILE *, const char *, unsigned long);
171 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
172 tree);
173 static bool arm_have_conditional_execution (void);
174 static bool arm_cannot_force_const_mem (machine_mode, rtx);
175 static bool arm_legitimate_constant_p (machine_mode, rtx);
176 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
177 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
178 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
179 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
180 static void emit_constant_insn (rtx cond, rtx pattern);
181 static rtx_insn *emit_set_insn (rtx, rtx);
182 static rtx emit_multi_reg_push (unsigned long, unsigned long);
183 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
184 tree, bool);
185 static rtx arm_function_arg (cumulative_args_t, machine_mode,
186 const_tree, bool);
187 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
188 const_tree, bool);
189 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
190 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
191 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
192 const_tree);
193 static rtx aapcs_libcall_value (machine_mode);
194 static int aapcs_select_return_coproc (const_tree, const_tree);
195
196 #ifdef OBJECT_FORMAT_ELF
197 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
198 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
199 #endif
200 #ifndef ARM_PE
201 static void arm_encode_section_info (tree, rtx, int);
202 #endif
203
204 static void arm_file_end (void);
205 static void arm_file_start (void);
206 static void arm_insert_attributes (tree, tree *);
207
208 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
209 tree, int *, int);
210 static bool arm_pass_by_reference (cumulative_args_t,
211 machine_mode, const_tree, bool);
212 static bool arm_promote_prototypes (const_tree);
213 static bool arm_default_short_enums (void);
214 static bool arm_align_anon_bitfield (void);
215 static bool arm_return_in_msb (const_tree);
216 static bool arm_must_pass_in_stack (machine_mode, const_tree);
217 static bool arm_return_in_memory (const_tree, const_tree);
218 #if ARM_UNWIND_INFO
219 static void arm_unwind_emit (FILE *, rtx_insn *);
220 static bool arm_output_ttype (rtx);
221 static void arm_asm_emit_except_personality (rtx);
222 #endif
223 static void arm_asm_init_sections (void);
224 static rtx arm_dwarf_register_span (rtx);
225
226 static tree arm_cxx_guard_type (void);
227 static bool arm_cxx_guard_mask_bit (void);
228 static tree arm_get_cookie_size (tree);
229 static bool arm_cookie_has_size (void);
230 static bool arm_cxx_cdtor_returns_this (void);
231 static bool arm_cxx_key_method_may_be_inline (void);
232 static void arm_cxx_determine_class_data_visibility (tree);
233 static bool arm_cxx_class_data_always_comdat (void);
234 static bool arm_cxx_use_aeabi_atexit (void);
235 static void arm_init_libfuncs (void);
236 static tree arm_build_builtin_va_list (void);
237 static void arm_expand_builtin_va_start (tree, rtx);
238 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
239 static void arm_option_override (void);
240 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
241 static void arm_option_restore (struct gcc_options *,
242 struct cl_target_option *);
243 static void arm_override_options_after_change (void);
244 static void arm_option_print (FILE *, int, struct cl_target_option *);
245 static void arm_set_current_function (tree);
246 static bool arm_can_inline_p (tree, tree);
247 static void arm_relayout_function (tree);
248 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
249 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
250 static bool arm_sched_can_speculate_insn (rtx_insn *);
251 static bool arm_macro_fusion_p (void);
252 static bool arm_cannot_copy_insn_p (rtx_insn *);
253 static int arm_issue_rate (void);
254 static int arm_first_cycle_multipass_dfa_lookahead (void);
255 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
256 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
257 static bool arm_output_addr_const_extra (FILE *, rtx);
258 static bool arm_allocate_stack_slots_for_args (void);
259 static bool arm_warn_func_return (tree);
260 static tree arm_promoted_type (const_tree t);
261 static bool arm_scalar_mode_supported_p (scalar_mode);
262 static bool arm_frame_pointer_required (void);
263 static bool arm_can_eliminate (const int, const int);
264 static void arm_asm_trampoline_template (FILE *);
265 static void arm_trampoline_init (rtx, tree, rtx);
266 static rtx arm_trampoline_adjust_address (rtx);
267 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
268 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
269 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
270 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
271 static bool arm_array_mode_supported_p (machine_mode,
272 unsigned HOST_WIDE_INT);
273 static machine_mode arm_preferred_simd_mode (scalar_mode);
274 static bool arm_class_likely_spilled_p (reg_class_t);
275 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
276 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
277 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
278 const_tree type,
279 int misalignment,
280 bool is_packed);
281 static void arm_conditional_register_usage (void);
282 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
283 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
284 static unsigned int arm_autovectorize_vector_sizes (void);
285 static int arm_default_branch_cost (bool, bool);
286 static int arm_cortex_a5_branch_cost (bool, bool);
287 static int arm_cortex_m_branch_cost (bool, bool);
288 static int arm_cortex_m7_branch_cost (bool, bool);
289
290 static bool arm_vectorize_vec_perm_const_ok (machine_mode vmode,
291 const unsigned char *sel);
292
293 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
294
295 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
296 tree vectype,
297 int misalign ATTRIBUTE_UNUSED);
298 static unsigned arm_add_stmt_cost (void *data, int count,
299 enum vect_cost_for_stmt kind,
300 struct _stmt_vec_info *stmt_info,
301 int misalign,
302 enum vect_cost_model_location where);
303
304 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
305 bool op0_preserve_value);
306 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
307
308 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
309 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
310 const_tree);
311 static section *arm_function_section (tree, enum node_frequency, bool, bool);
312 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
313 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
314 int reloc);
315 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
316 static opt_scalar_float_mode arm_floatn_mode (int, bool);
317 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
318 static bool arm_modes_tieable_p (machine_mode, machine_mode);
319 \f
320 /* Table of machine attributes. */
321 static const struct attribute_spec arm_attribute_table[] =
322 {
323 /* { name, min_len, max_len, decl_req, type_req, fn_type_req, handler,
324 affects_type_identity } */
325 /* Function calls made to this symbol must be done indirectly, because
326 it may lie outside of the 26 bit addressing range of a normal function
327 call. */
328 { "long_call", 0, 0, false, true, true, NULL, false },
329 /* Whereas these functions are always known to reside within the 26 bit
330 addressing range. */
331 { "short_call", 0, 0, false, true, true, NULL, false },
332 /* Specify the procedure call conventions for a function. */
333 { "pcs", 1, 1, false, true, true, arm_handle_pcs_attribute,
334 false },
335 /* Interrupt Service Routines have special prologue and epilogue requirements. */
336 { "isr", 0, 1, false, false, false, arm_handle_isr_attribute,
337 false },
338 { "interrupt", 0, 1, false, false, false, arm_handle_isr_attribute,
339 false },
340 { "naked", 0, 0, true, false, false, arm_handle_fndecl_attribute,
341 false },
342 #ifdef ARM_PE
343 /* ARM/PE has three new attributes:
344 interfacearm - ?
345 dllexport - for exporting a function/variable that will live in a dll
346 dllimport - for importing a function/variable from a dll
347
348 Microsoft allows multiple declspecs in one __declspec, separating
349 them with spaces. We do NOT support this. Instead, use __declspec
350 multiple times.
351 */
352 { "dllimport", 0, 0, true, false, false, NULL, false },
353 { "dllexport", 0, 0, true, false, false, NULL, false },
354 { "interfacearm", 0, 0, true, false, false, arm_handle_fndecl_attribute,
355 false },
356 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
357 { "dllimport", 0, 0, false, false, false, handle_dll_attribute, false },
358 { "dllexport", 0, 0, false, false, false, handle_dll_attribute, false },
359 { "notshared", 0, 0, false, true, false, arm_handle_notshared_attribute,
360 false },
361 #endif
362 /* ARMv8-M Security Extensions support. */
363 { "cmse_nonsecure_entry", 0, 0, true, false, false,
364 arm_handle_cmse_nonsecure_entry, false },
365 { "cmse_nonsecure_call", 0, 0, true, false, false,
366 arm_handle_cmse_nonsecure_call, true },
367 { NULL, 0, 0, false, false, false, NULL, false }
368 };
369 \f
370 /* Initialize the GCC target structure. */
371 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
372 #undef TARGET_MERGE_DECL_ATTRIBUTES
373 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
374 #endif
375
376 #undef TARGET_LEGITIMIZE_ADDRESS
377 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
378
379 #undef TARGET_ATTRIBUTE_TABLE
380 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
381
382 #undef TARGET_INSERT_ATTRIBUTES
383 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
384
385 #undef TARGET_ASM_FILE_START
386 #define TARGET_ASM_FILE_START arm_file_start
387 #undef TARGET_ASM_FILE_END
388 #define TARGET_ASM_FILE_END arm_file_end
389
390 #undef TARGET_ASM_ALIGNED_SI_OP
391 #define TARGET_ASM_ALIGNED_SI_OP NULL
392 #undef TARGET_ASM_INTEGER
393 #define TARGET_ASM_INTEGER arm_assemble_integer
394
395 #undef TARGET_PRINT_OPERAND
396 #define TARGET_PRINT_OPERAND arm_print_operand
397 #undef TARGET_PRINT_OPERAND_ADDRESS
398 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
399 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
400 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
401
402 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
403 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
404
405 #undef TARGET_ASM_FUNCTION_PROLOGUE
406 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
407
408 #undef TARGET_ASM_FUNCTION_EPILOGUE
409 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
410
411 #undef TARGET_CAN_INLINE_P
412 #define TARGET_CAN_INLINE_P arm_can_inline_p
413
414 #undef TARGET_RELAYOUT_FUNCTION
415 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
416
417 #undef TARGET_OPTION_OVERRIDE
418 #define TARGET_OPTION_OVERRIDE arm_option_override
419
420 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
421 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
422
423 #undef TARGET_OPTION_SAVE
424 #define TARGET_OPTION_SAVE arm_option_save
425
426 #undef TARGET_OPTION_RESTORE
427 #define TARGET_OPTION_RESTORE arm_option_restore
428
429 #undef TARGET_OPTION_PRINT
430 #define TARGET_OPTION_PRINT arm_option_print
431
432 #undef TARGET_COMP_TYPE_ATTRIBUTES
433 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
434
435 #undef TARGET_SCHED_CAN_SPECULATE_INSN
436 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
437
438 #undef TARGET_SCHED_MACRO_FUSION_P
439 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
440
441 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
442 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
443
444 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
445 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
446
447 #undef TARGET_SCHED_ADJUST_COST
448 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
449
450 #undef TARGET_SET_CURRENT_FUNCTION
451 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
452
453 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
454 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
455
456 #undef TARGET_SCHED_REORDER
457 #define TARGET_SCHED_REORDER arm_sched_reorder
458
459 #undef TARGET_REGISTER_MOVE_COST
460 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
461
462 #undef TARGET_MEMORY_MOVE_COST
463 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
464
465 #undef TARGET_ENCODE_SECTION_INFO
466 #ifdef ARM_PE
467 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
468 #else
469 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
470 #endif
471
472 #undef TARGET_STRIP_NAME_ENCODING
473 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
474
475 #undef TARGET_ASM_INTERNAL_LABEL
476 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
477
478 #undef TARGET_FLOATN_MODE
479 #define TARGET_FLOATN_MODE arm_floatn_mode
480
481 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
482 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
483
484 #undef TARGET_FUNCTION_VALUE
485 #define TARGET_FUNCTION_VALUE arm_function_value
486
487 #undef TARGET_LIBCALL_VALUE
488 #define TARGET_LIBCALL_VALUE arm_libcall_value
489
490 #undef TARGET_FUNCTION_VALUE_REGNO_P
491 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
492
493 #undef TARGET_ASM_OUTPUT_MI_THUNK
494 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
495 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
496 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
497
498 #undef TARGET_RTX_COSTS
499 #define TARGET_RTX_COSTS arm_rtx_costs
500 #undef TARGET_ADDRESS_COST
501 #define TARGET_ADDRESS_COST arm_address_cost
502
503 #undef TARGET_SHIFT_TRUNCATION_MASK
504 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
505 #undef TARGET_VECTOR_MODE_SUPPORTED_P
506 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
507 #undef TARGET_ARRAY_MODE_SUPPORTED_P
508 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
509 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
510 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
511 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
512 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
513 arm_autovectorize_vector_sizes
514
515 #undef TARGET_MACHINE_DEPENDENT_REORG
516 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
517
518 #undef TARGET_INIT_BUILTINS
519 #define TARGET_INIT_BUILTINS arm_init_builtins
520 #undef TARGET_EXPAND_BUILTIN
521 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
522 #undef TARGET_BUILTIN_DECL
523 #define TARGET_BUILTIN_DECL arm_builtin_decl
524
525 #undef TARGET_INIT_LIBFUNCS
526 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
527
528 #undef TARGET_PROMOTE_FUNCTION_MODE
529 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
530 #undef TARGET_PROMOTE_PROTOTYPES
531 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
532 #undef TARGET_PASS_BY_REFERENCE
533 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
534 #undef TARGET_ARG_PARTIAL_BYTES
535 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
536 #undef TARGET_FUNCTION_ARG
537 #define TARGET_FUNCTION_ARG arm_function_arg
538 #undef TARGET_FUNCTION_ARG_ADVANCE
539 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
540 #undef TARGET_FUNCTION_ARG_PADDING
541 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
542 #undef TARGET_FUNCTION_ARG_BOUNDARY
543 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
544
545 #undef TARGET_SETUP_INCOMING_VARARGS
546 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
547
548 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
549 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
550
551 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
552 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
553 #undef TARGET_TRAMPOLINE_INIT
554 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
555 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
556 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
557
558 #undef TARGET_WARN_FUNC_RETURN
559 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
560
561 #undef TARGET_DEFAULT_SHORT_ENUMS
562 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
563
564 #undef TARGET_ALIGN_ANON_BITFIELD
565 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
566
567 #undef TARGET_NARROW_VOLATILE_BITFIELD
568 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
569
570 #undef TARGET_CXX_GUARD_TYPE
571 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
572
573 #undef TARGET_CXX_GUARD_MASK_BIT
574 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
575
576 #undef TARGET_CXX_GET_COOKIE_SIZE
577 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
578
579 #undef TARGET_CXX_COOKIE_HAS_SIZE
580 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
581
582 #undef TARGET_CXX_CDTOR_RETURNS_THIS
583 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
584
585 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
586 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
587
588 #undef TARGET_CXX_USE_AEABI_ATEXIT
589 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
590
591 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
592 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
593 arm_cxx_determine_class_data_visibility
594
595 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
596 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
597
598 #undef TARGET_RETURN_IN_MSB
599 #define TARGET_RETURN_IN_MSB arm_return_in_msb
600
601 #undef TARGET_RETURN_IN_MEMORY
602 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
603
604 #undef TARGET_MUST_PASS_IN_STACK
605 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
606
607 #if ARM_UNWIND_INFO
608 #undef TARGET_ASM_UNWIND_EMIT
609 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
610
611 /* EABI unwinding tables use a different format for the typeinfo tables. */
612 #undef TARGET_ASM_TTYPE
613 #define TARGET_ASM_TTYPE arm_output_ttype
614
615 #undef TARGET_ARM_EABI_UNWINDER
616 #define TARGET_ARM_EABI_UNWINDER true
617
618 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
619 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
620
621 #endif /* ARM_UNWIND_INFO */
622
623 #undef TARGET_ASM_INIT_SECTIONS
624 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
625
626 #undef TARGET_DWARF_REGISTER_SPAN
627 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
628
629 #undef TARGET_CANNOT_COPY_INSN_P
630 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
631
632 #ifdef HAVE_AS_TLS
633 #undef TARGET_HAVE_TLS
634 #define TARGET_HAVE_TLS true
635 #endif
636
637 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
638 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
639
640 #undef TARGET_LEGITIMATE_CONSTANT_P
641 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
642
643 #undef TARGET_CANNOT_FORCE_CONST_MEM
644 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
645
646 #undef TARGET_MAX_ANCHOR_OFFSET
647 #define TARGET_MAX_ANCHOR_OFFSET 4095
648
649 /* The minimum is set such that the total size of the block
650 for a particular anchor is -4088 + 1 + 4095 bytes, which is
651 divisible by eight, ensuring natural spacing of anchors. */
652 #undef TARGET_MIN_ANCHOR_OFFSET
653 #define TARGET_MIN_ANCHOR_OFFSET -4088
654
655 #undef TARGET_SCHED_ISSUE_RATE
656 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
657
658 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
659 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
660 arm_first_cycle_multipass_dfa_lookahead
661
662 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
663 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
664 arm_first_cycle_multipass_dfa_lookahead_guard
665
666 #undef TARGET_MANGLE_TYPE
667 #define TARGET_MANGLE_TYPE arm_mangle_type
668
669 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
670 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
671
672 #undef TARGET_BUILD_BUILTIN_VA_LIST
673 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
674 #undef TARGET_EXPAND_BUILTIN_VA_START
675 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
676 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
677 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
678
679 #ifdef HAVE_AS_TLS
680 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
681 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
682 #endif
683
684 #undef TARGET_LEGITIMATE_ADDRESS_P
685 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
686
687 #undef TARGET_PREFERRED_RELOAD_CLASS
688 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
689
690 #undef TARGET_PROMOTED_TYPE
691 #define TARGET_PROMOTED_TYPE arm_promoted_type
692
693 #undef TARGET_SCALAR_MODE_SUPPORTED_P
694 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
695
696 #undef TARGET_COMPUTE_FRAME_LAYOUT
697 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
698
699 #undef TARGET_FRAME_POINTER_REQUIRED
700 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
701
702 #undef TARGET_CAN_ELIMINATE
703 #define TARGET_CAN_ELIMINATE arm_can_eliminate
704
705 #undef TARGET_CONDITIONAL_REGISTER_USAGE
706 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
707
708 #undef TARGET_CLASS_LIKELY_SPILLED_P
709 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
710
711 #undef TARGET_VECTORIZE_BUILTINS
712 #define TARGET_VECTORIZE_BUILTINS
713
714 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
715 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
716 arm_builtin_vectorized_function
717
718 #undef TARGET_VECTOR_ALIGNMENT
719 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
720
721 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
722 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
723 arm_vector_alignment_reachable
724
725 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
726 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
727 arm_builtin_support_vector_misalignment
728
729 #undef TARGET_PREFERRED_RENAME_CLASS
730 #define TARGET_PREFERRED_RENAME_CLASS \
731 arm_preferred_rename_class
732
733 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
734 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
735 arm_vectorize_vec_perm_const_ok
736
737 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
738 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
739 arm_builtin_vectorization_cost
740 #undef TARGET_VECTORIZE_ADD_STMT_COST
741 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
742
743 #undef TARGET_CANONICALIZE_COMPARISON
744 #define TARGET_CANONICALIZE_COMPARISON \
745 arm_canonicalize_comparison
746
747 #undef TARGET_ASAN_SHADOW_OFFSET
748 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
749
750 #undef MAX_INSN_PER_IT_BLOCK
751 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
752
753 #undef TARGET_CAN_USE_DOLOOP_P
754 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
755
756 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
757 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
758
759 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
760 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
761
762 #undef TARGET_SCHED_FUSION_PRIORITY
763 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
764
765 #undef TARGET_ASM_FUNCTION_SECTION
766 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
767
768 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
769 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
770
771 #undef TARGET_SECTION_TYPE_FLAGS
772 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
773
774 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
775 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
776
777 #undef TARGET_C_EXCESS_PRECISION
778 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
779
780 /* Although the architecture reserves bits 0 and 1, only the former is
781 used for ARM/Thumb ISA selection in v7 and earlier versions. */
782 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
783 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
784
785 #undef TARGET_FIXED_CONDITION_CODE_REGS
786 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
787
788 #undef TARGET_HARD_REGNO_MODE_OK
789 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
790
791 #undef TARGET_MODES_TIEABLE_P
792 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
793 \f
794 /* Obstack for minipool constant handling. */
795 static struct obstack minipool_obstack;
796 static char * minipool_startobj;
797
798 /* The maximum number of insns skipped which
799 will be conditionalised if possible. */
800 static int max_insns_skipped = 5;
801
802 extern FILE * asm_out_file;
803
804 /* True if we are currently building a constant table. */
805 int making_const_table;
806
807 /* The processor for which instructions should be scheduled. */
808 enum processor_type arm_tune = TARGET_CPU_arm_none;
809
810 /* The current tuning set. */
811 const struct tune_params *current_tune;
812
813 /* Which floating point hardware to schedule for. */
814 int arm_fpu_attr;
815
816 /* Used for Thumb call_via trampolines. */
817 rtx thumb_call_via_label[14];
818 static int thumb_call_reg_needed;
819
820 /* The bits in this mask specify which instruction scheduling options should
821 be used. */
822 unsigned int tune_flags = 0;
823
824 /* The highest ARM architecture version supported by the
825 target. */
826 enum base_architecture arm_base_arch = BASE_ARCH_0;
827
828 /* Active target architecture and tuning. */
829
830 struct arm_build_target arm_active_target;
831
832 /* The following are used in the arm.md file as equivalents to bits
833 in the above two flag variables. */
834
835 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
836 int arm_arch3m = 0;
837
838 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
839 int arm_arch4 = 0;
840
841 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
842 int arm_arch4t = 0;
843
844 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
845 int arm_arch5 = 0;
846
847 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
848 int arm_arch5e = 0;
849
850 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
851 int arm_arch5te = 0;
852
853 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
854 int arm_arch6 = 0;
855
856 /* Nonzero if this chip supports the ARM 6K extensions. */
857 int arm_arch6k = 0;
858
859 /* Nonzero if this chip supports the ARM 6KZ extensions. */
860 int arm_arch6kz = 0;
861
862 /* Nonzero if instructions present in ARMv6-M can be used. */
863 int arm_arch6m = 0;
864
865 /* Nonzero if this chip supports the ARM 7 extensions. */
866 int arm_arch7 = 0;
867
868 /* Nonzero if this chip supports the Large Physical Address Extension. */
869 int arm_arch_lpae = 0;
870
871 /* Nonzero if instructions not present in the 'M' profile can be used. */
872 int arm_arch_notm = 0;
873
874 /* Nonzero if instructions present in ARMv7E-M can be used. */
875 int arm_arch7em = 0;
876
877 /* Nonzero if instructions present in ARMv8 can be used. */
878 int arm_arch8 = 0;
879
880 /* Nonzero if this chip supports the ARMv8.1 extensions. */
881 int arm_arch8_1 = 0;
882
883 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
884 int arm_arch8_2 = 0;
885
886 /* Nonzero if this chip supports the FP16 instructions extension of ARM
887 Architecture 8.2. */
888 int arm_fp16_inst = 0;
889
890 /* Nonzero if this chip can benefit from load scheduling. */
891 int arm_ld_sched = 0;
892
893 /* Nonzero if this chip is a StrongARM. */
894 int arm_tune_strongarm = 0;
895
896 /* Nonzero if this chip supports Intel Wireless MMX technology. */
897 int arm_arch_iwmmxt = 0;
898
899 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
900 int arm_arch_iwmmxt2 = 0;
901
902 /* Nonzero if this chip is an XScale. */
903 int arm_arch_xscale = 0;
904
905 /* Nonzero if tuning for XScale */
906 int arm_tune_xscale = 0;
907
908 /* Nonzero if we want to tune for stores that access the write-buffer.
909 This typically means an ARM6 or ARM7 with MMU or MPU. */
910 int arm_tune_wbuf = 0;
911
912 /* Nonzero if tuning for Cortex-A9. */
913 int arm_tune_cortex_a9 = 0;
914
915 /* Nonzero if we should define __THUMB_INTERWORK__ in the
916 preprocessor.
917 XXX This is a bit of a hack, it's intended to help work around
918 problems in GLD which doesn't understand that armv5t code is
919 interworking clean. */
920 int arm_cpp_interwork = 0;
921
922 /* Nonzero if chip supports Thumb 1. */
923 int arm_arch_thumb1;
924
925 /* Nonzero if chip supports Thumb 2. */
926 int arm_arch_thumb2;
927
928 /* Nonzero if chip supports integer division instruction. */
929 int arm_arch_arm_hwdiv;
930 int arm_arch_thumb_hwdiv;
931
932 /* Nonzero if chip disallows volatile memory access in IT block. */
933 int arm_arch_no_volatile_ce;
934
935 /* Nonzero if we should use Neon to handle 64-bits operations rather
936 than core registers. */
937 int prefer_neon_for_64bits = 0;
938
939 /* Nonzero if we shouldn't use literal pools. */
940 bool arm_disable_literal_pool = false;
941
942 /* The register number to be used for the PIC offset register. */
943 unsigned arm_pic_register = INVALID_REGNUM;
944
945 enum arm_pcs arm_pcs_default;
946
947 /* For an explanation of these variables, see final_prescan_insn below. */
948 int arm_ccfsm_state;
949 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
950 enum arm_cond_code arm_current_cc;
951
952 rtx arm_target_insn;
953 int arm_target_label;
954 /* The number of conditionally executed insns, including the current insn. */
955 int arm_condexec_count = 0;
956 /* A bitmask specifying the patterns for the IT block.
957 Zero means do not output an IT block before this insn. */
958 int arm_condexec_mask = 0;
959 /* The number of bits used in arm_condexec_mask. */
960 int arm_condexec_masklen = 0;
961
962 /* Nonzero if chip supports the ARMv8 CRC instructions. */
963 int arm_arch_crc = 0;
964
965 /* Nonzero if chip supports the ARMv8-M security extensions. */
966 int arm_arch_cmse = 0;
967
968 /* Nonzero if the core has a very small, high-latency, multiply unit. */
969 int arm_m_profile_small_mul = 0;
970
971 /* The condition codes of the ARM, and the inverse function. */
972 static const char * const arm_condition_codes[] =
973 {
974 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
975 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
976 };
977
978 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
979 int arm_regs_in_sequence[] =
980 {
981 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
982 };
983
984 #define ARM_LSL_NAME "lsl"
985 #define streq(string1, string2) (strcmp (string1, string2) == 0)
986
987 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
988 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
989 | (1 << PIC_OFFSET_TABLE_REGNUM)))
990 \f
991 /* Initialization code. */
992
993 struct cpu_tune
994 {
995 enum processor_type scheduler;
996 unsigned int tune_flags;
997 const struct tune_params *tune;
998 };
999
1000 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1001 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1002 { \
1003 num_slots, \
1004 l1_size, \
1005 l1_line_size \
1006 }
1007
1008 /* arm generic vectorizer costs. */
1009 static const
1010 struct cpu_vec_costs arm_default_vec_cost = {
1011 1, /* scalar_stmt_cost. */
1012 1, /* scalar load_cost. */
1013 1, /* scalar_store_cost. */
1014 1, /* vec_stmt_cost. */
1015 1, /* vec_to_scalar_cost. */
1016 1, /* scalar_to_vec_cost. */
1017 1, /* vec_align_load_cost. */
1018 1, /* vec_unalign_load_cost. */
1019 1, /* vec_unalign_store_cost. */
1020 1, /* vec_store_cost. */
1021 3, /* cond_taken_branch_cost. */
1022 1, /* cond_not_taken_branch_cost. */
1023 };
1024
1025 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1026 #include "aarch-cost-tables.h"
1027
1028
1029
1030 const struct cpu_cost_table cortexa9_extra_costs =
1031 {
1032 /* ALU */
1033 {
1034 0, /* arith. */
1035 0, /* logical. */
1036 0, /* shift. */
1037 COSTS_N_INSNS (1), /* shift_reg. */
1038 COSTS_N_INSNS (1), /* arith_shift. */
1039 COSTS_N_INSNS (2), /* arith_shift_reg. */
1040 0, /* log_shift. */
1041 COSTS_N_INSNS (1), /* log_shift_reg. */
1042 COSTS_N_INSNS (1), /* extend. */
1043 COSTS_N_INSNS (2), /* extend_arith. */
1044 COSTS_N_INSNS (1), /* bfi. */
1045 COSTS_N_INSNS (1), /* bfx. */
1046 0, /* clz. */
1047 0, /* rev. */
1048 0, /* non_exec. */
1049 true /* non_exec_costs_exec. */
1050 },
1051 {
1052 /* MULT SImode */
1053 {
1054 COSTS_N_INSNS (3), /* simple. */
1055 COSTS_N_INSNS (3), /* flag_setting. */
1056 COSTS_N_INSNS (2), /* extend. */
1057 COSTS_N_INSNS (3), /* add. */
1058 COSTS_N_INSNS (2), /* extend_add. */
1059 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1060 },
1061 /* MULT DImode */
1062 {
1063 0, /* simple (N/A). */
1064 0, /* flag_setting (N/A). */
1065 COSTS_N_INSNS (4), /* extend. */
1066 0, /* add (N/A). */
1067 COSTS_N_INSNS (4), /* extend_add. */
1068 0 /* idiv (N/A). */
1069 }
1070 },
1071 /* LD/ST */
1072 {
1073 COSTS_N_INSNS (2), /* load. */
1074 COSTS_N_INSNS (2), /* load_sign_extend. */
1075 COSTS_N_INSNS (2), /* ldrd. */
1076 COSTS_N_INSNS (2), /* ldm_1st. */
1077 1, /* ldm_regs_per_insn_1st. */
1078 2, /* ldm_regs_per_insn_subsequent. */
1079 COSTS_N_INSNS (5), /* loadf. */
1080 COSTS_N_INSNS (5), /* loadd. */
1081 COSTS_N_INSNS (1), /* load_unaligned. */
1082 COSTS_N_INSNS (2), /* store. */
1083 COSTS_N_INSNS (2), /* strd. */
1084 COSTS_N_INSNS (2), /* stm_1st. */
1085 1, /* stm_regs_per_insn_1st. */
1086 2, /* stm_regs_per_insn_subsequent. */
1087 COSTS_N_INSNS (1), /* storef. */
1088 COSTS_N_INSNS (1), /* stored. */
1089 COSTS_N_INSNS (1), /* store_unaligned. */
1090 COSTS_N_INSNS (1), /* loadv. */
1091 COSTS_N_INSNS (1) /* storev. */
1092 },
1093 {
1094 /* FP SFmode */
1095 {
1096 COSTS_N_INSNS (14), /* div. */
1097 COSTS_N_INSNS (4), /* mult. */
1098 COSTS_N_INSNS (7), /* mult_addsub. */
1099 COSTS_N_INSNS (30), /* fma. */
1100 COSTS_N_INSNS (3), /* addsub. */
1101 COSTS_N_INSNS (1), /* fpconst. */
1102 COSTS_N_INSNS (1), /* neg. */
1103 COSTS_N_INSNS (3), /* compare. */
1104 COSTS_N_INSNS (3), /* widen. */
1105 COSTS_N_INSNS (3), /* narrow. */
1106 COSTS_N_INSNS (3), /* toint. */
1107 COSTS_N_INSNS (3), /* fromint. */
1108 COSTS_N_INSNS (3) /* roundint. */
1109 },
1110 /* FP DFmode */
1111 {
1112 COSTS_N_INSNS (24), /* div. */
1113 COSTS_N_INSNS (5), /* mult. */
1114 COSTS_N_INSNS (8), /* mult_addsub. */
1115 COSTS_N_INSNS (30), /* fma. */
1116 COSTS_N_INSNS (3), /* addsub. */
1117 COSTS_N_INSNS (1), /* fpconst. */
1118 COSTS_N_INSNS (1), /* neg. */
1119 COSTS_N_INSNS (3), /* compare. */
1120 COSTS_N_INSNS (3), /* widen. */
1121 COSTS_N_INSNS (3), /* narrow. */
1122 COSTS_N_INSNS (3), /* toint. */
1123 COSTS_N_INSNS (3), /* fromint. */
1124 COSTS_N_INSNS (3) /* roundint. */
1125 }
1126 },
1127 /* Vector */
1128 {
1129 COSTS_N_INSNS (1) /* alu. */
1130 }
1131 };
1132
1133 const struct cpu_cost_table cortexa8_extra_costs =
1134 {
1135 /* ALU */
1136 {
1137 0, /* arith. */
1138 0, /* logical. */
1139 COSTS_N_INSNS (1), /* shift. */
1140 0, /* shift_reg. */
1141 COSTS_N_INSNS (1), /* arith_shift. */
1142 0, /* arith_shift_reg. */
1143 COSTS_N_INSNS (1), /* log_shift. */
1144 0, /* log_shift_reg. */
1145 0, /* extend. */
1146 0, /* extend_arith. */
1147 0, /* bfi. */
1148 0, /* bfx. */
1149 0, /* clz. */
1150 0, /* rev. */
1151 0, /* non_exec. */
1152 true /* non_exec_costs_exec. */
1153 },
1154 {
1155 /* MULT SImode */
1156 {
1157 COSTS_N_INSNS (1), /* simple. */
1158 COSTS_N_INSNS (1), /* flag_setting. */
1159 COSTS_N_INSNS (1), /* extend. */
1160 COSTS_N_INSNS (1), /* add. */
1161 COSTS_N_INSNS (1), /* extend_add. */
1162 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1163 },
1164 /* MULT DImode */
1165 {
1166 0, /* simple (N/A). */
1167 0, /* flag_setting (N/A). */
1168 COSTS_N_INSNS (2), /* extend. */
1169 0, /* add (N/A). */
1170 COSTS_N_INSNS (2), /* extend_add. */
1171 0 /* idiv (N/A). */
1172 }
1173 },
1174 /* LD/ST */
1175 {
1176 COSTS_N_INSNS (1), /* load. */
1177 COSTS_N_INSNS (1), /* load_sign_extend. */
1178 COSTS_N_INSNS (1), /* ldrd. */
1179 COSTS_N_INSNS (1), /* ldm_1st. */
1180 1, /* ldm_regs_per_insn_1st. */
1181 2, /* ldm_regs_per_insn_subsequent. */
1182 COSTS_N_INSNS (1), /* loadf. */
1183 COSTS_N_INSNS (1), /* loadd. */
1184 COSTS_N_INSNS (1), /* load_unaligned. */
1185 COSTS_N_INSNS (1), /* store. */
1186 COSTS_N_INSNS (1), /* strd. */
1187 COSTS_N_INSNS (1), /* stm_1st. */
1188 1, /* stm_regs_per_insn_1st. */
1189 2, /* stm_regs_per_insn_subsequent. */
1190 COSTS_N_INSNS (1), /* storef. */
1191 COSTS_N_INSNS (1), /* stored. */
1192 COSTS_N_INSNS (1), /* store_unaligned. */
1193 COSTS_N_INSNS (1), /* loadv. */
1194 COSTS_N_INSNS (1) /* storev. */
1195 },
1196 {
1197 /* FP SFmode */
1198 {
1199 COSTS_N_INSNS (36), /* div. */
1200 COSTS_N_INSNS (11), /* mult. */
1201 COSTS_N_INSNS (20), /* mult_addsub. */
1202 COSTS_N_INSNS (30), /* fma. */
1203 COSTS_N_INSNS (9), /* addsub. */
1204 COSTS_N_INSNS (3), /* fpconst. */
1205 COSTS_N_INSNS (3), /* neg. */
1206 COSTS_N_INSNS (6), /* compare. */
1207 COSTS_N_INSNS (4), /* widen. */
1208 COSTS_N_INSNS (4), /* narrow. */
1209 COSTS_N_INSNS (8), /* toint. */
1210 COSTS_N_INSNS (8), /* fromint. */
1211 COSTS_N_INSNS (8) /* roundint. */
1212 },
1213 /* FP DFmode */
1214 {
1215 COSTS_N_INSNS (64), /* div. */
1216 COSTS_N_INSNS (16), /* mult. */
1217 COSTS_N_INSNS (25), /* mult_addsub. */
1218 COSTS_N_INSNS (30), /* fma. */
1219 COSTS_N_INSNS (9), /* addsub. */
1220 COSTS_N_INSNS (3), /* fpconst. */
1221 COSTS_N_INSNS (3), /* neg. */
1222 COSTS_N_INSNS (6), /* compare. */
1223 COSTS_N_INSNS (6), /* widen. */
1224 COSTS_N_INSNS (6), /* narrow. */
1225 COSTS_N_INSNS (8), /* toint. */
1226 COSTS_N_INSNS (8), /* fromint. */
1227 COSTS_N_INSNS (8) /* roundint. */
1228 }
1229 },
1230 /* Vector */
1231 {
1232 COSTS_N_INSNS (1) /* alu. */
1233 }
1234 };
1235
1236 const struct cpu_cost_table cortexa5_extra_costs =
1237 {
1238 /* ALU */
1239 {
1240 0, /* arith. */
1241 0, /* logical. */
1242 COSTS_N_INSNS (1), /* shift. */
1243 COSTS_N_INSNS (1), /* shift_reg. */
1244 COSTS_N_INSNS (1), /* arith_shift. */
1245 COSTS_N_INSNS (1), /* arith_shift_reg. */
1246 COSTS_N_INSNS (1), /* log_shift. */
1247 COSTS_N_INSNS (1), /* log_shift_reg. */
1248 COSTS_N_INSNS (1), /* extend. */
1249 COSTS_N_INSNS (1), /* extend_arith. */
1250 COSTS_N_INSNS (1), /* bfi. */
1251 COSTS_N_INSNS (1), /* bfx. */
1252 COSTS_N_INSNS (1), /* clz. */
1253 COSTS_N_INSNS (1), /* rev. */
1254 0, /* non_exec. */
1255 true /* non_exec_costs_exec. */
1256 },
1257
1258 {
1259 /* MULT SImode */
1260 {
1261 0, /* simple. */
1262 COSTS_N_INSNS (1), /* flag_setting. */
1263 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (1), /* add. */
1265 COSTS_N_INSNS (1), /* extend_add. */
1266 COSTS_N_INSNS (7) /* idiv. */
1267 },
1268 /* MULT DImode */
1269 {
1270 0, /* simple (N/A). */
1271 0, /* flag_setting (N/A). */
1272 COSTS_N_INSNS (1), /* extend. */
1273 0, /* add. */
1274 COSTS_N_INSNS (2), /* extend_add. */
1275 0 /* idiv (N/A). */
1276 }
1277 },
1278 /* LD/ST */
1279 {
1280 COSTS_N_INSNS (1), /* load. */
1281 COSTS_N_INSNS (1), /* load_sign_extend. */
1282 COSTS_N_INSNS (6), /* ldrd. */
1283 COSTS_N_INSNS (1), /* ldm_1st. */
1284 1, /* ldm_regs_per_insn_1st. */
1285 2, /* ldm_regs_per_insn_subsequent. */
1286 COSTS_N_INSNS (2), /* loadf. */
1287 COSTS_N_INSNS (4), /* loadd. */
1288 COSTS_N_INSNS (1), /* load_unaligned. */
1289 COSTS_N_INSNS (1), /* store. */
1290 COSTS_N_INSNS (3), /* strd. */
1291 COSTS_N_INSNS (1), /* stm_1st. */
1292 1, /* stm_regs_per_insn_1st. */
1293 2, /* stm_regs_per_insn_subsequent. */
1294 COSTS_N_INSNS (2), /* storef. */
1295 COSTS_N_INSNS (2), /* stored. */
1296 COSTS_N_INSNS (1), /* store_unaligned. */
1297 COSTS_N_INSNS (1), /* loadv. */
1298 COSTS_N_INSNS (1) /* storev. */
1299 },
1300 {
1301 /* FP SFmode */
1302 {
1303 COSTS_N_INSNS (15), /* div. */
1304 COSTS_N_INSNS (3), /* mult. */
1305 COSTS_N_INSNS (7), /* mult_addsub. */
1306 COSTS_N_INSNS (7), /* fma. */
1307 COSTS_N_INSNS (3), /* addsub. */
1308 COSTS_N_INSNS (3), /* fpconst. */
1309 COSTS_N_INSNS (3), /* neg. */
1310 COSTS_N_INSNS (3), /* compare. */
1311 COSTS_N_INSNS (3), /* widen. */
1312 COSTS_N_INSNS (3), /* narrow. */
1313 COSTS_N_INSNS (3), /* toint. */
1314 COSTS_N_INSNS (3), /* fromint. */
1315 COSTS_N_INSNS (3) /* roundint. */
1316 },
1317 /* FP DFmode */
1318 {
1319 COSTS_N_INSNS (30), /* div. */
1320 COSTS_N_INSNS (6), /* mult. */
1321 COSTS_N_INSNS (10), /* mult_addsub. */
1322 COSTS_N_INSNS (7), /* fma. */
1323 COSTS_N_INSNS (3), /* addsub. */
1324 COSTS_N_INSNS (3), /* fpconst. */
1325 COSTS_N_INSNS (3), /* neg. */
1326 COSTS_N_INSNS (3), /* compare. */
1327 COSTS_N_INSNS (3), /* widen. */
1328 COSTS_N_INSNS (3), /* narrow. */
1329 COSTS_N_INSNS (3), /* toint. */
1330 COSTS_N_INSNS (3), /* fromint. */
1331 COSTS_N_INSNS (3) /* roundint. */
1332 }
1333 },
1334 /* Vector */
1335 {
1336 COSTS_N_INSNS (1) /* alu. */
1337 }
1338 };
1339
1340
1341 const struct cpu_cost_table cortexa7_extra_costs =
1342 {
1343 /* ALU */
1344 {
1345 0, /* arith. */
1346 0, /* logical. */
1347 COSTS_N_INSNS (1), /* shift. */
1348 COSTS_N_INSNS (1), /* shift_reg. */
1349 COSTS_N_INSNS (1), /* arith_shift. */
1350 COSTS_N_INSNS (1), /* arith_shift_reg. */
1351 COSTS_N_INSNS (1), /* log_shift. */
1352 COSTS_N_INSNS (1), /* log_shift_reg. */
1353 COSTS_N_INSNS (1), /* extend. */
1354 COSTS_N_INSNS (1), /* extend_arith. */
1355 COSTS_N_INSNS (1), /* bfi. */
1356 COSTS_N_INSNS (1), /* bfx. */
1357 COSTS_N_INSNS (1), /* clz. */
1358 COSTS_N_INSNS (1), /* rev. */
1359 0, /* non_exec. */
1360 true /* non_exec_costs_exec. */
1361 },
1362
1363 {
1364 /* MULT SImode */
1365 {
1366 0, /* simple. */
1367 COSTS_N_INSNS (1), /* flag_setting. */
1368 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (1), /* add. */
1370 COSTS_N_INSNS (1), /* extend_add. */
1371 COSTS_N_INSNS (7) /* idiv. */
1372 },
1373 /* MULT DImode */
1374 {
1375 0, /* simple (N/A). */
1376 0, /* flag_setting (N/A). */
1377 COSTS_N_INSNS (1), /* extend. */
1378 0, /* add. */
1379 COSTS_N_INSNS (2), /* extend_add. */
1380 0 /* idiv (N/A). */
1381 }
1382 },
1383 /* LD/ST */
1384 {
1385 COSTS_N_INSNS (1), /* load. */
1386 COSTS_N_INSNS (1), /* load_sign_extend. */
1387 COSTS_N_INSNS (3), /* ldrd. */
1388 COSTS_N_INSNS (1), /* ldm_1st. */
1389 1, /* ldm_regs_per_insn_1st. */
1390 2, /* ldm_regs_per_insn_subsequent. */
1391 COSTS_N_INSNS (2), /* loadf. */
1392 COSTS_N_INSNS (2), /* loadd. */
1393 COSTS_N_INSNS (1), /* load_unaligned. */
1394 COSTS_N_INSNS (1), /* store. */
1395 COSTS_N_INSNS (3), /* strd. */
1396 COSTS_N_INSNS (1), /* stm_1st. */
1397 1, /* stm_regs_per_insn_1st. */
1398 2, /* stm_regs_per_insn_subsequent. */
1399 COSTS_N_INSNS (2), /* storef. */
1400 COSTS_N_INSNS (2), /* stored. */
1401 COSTS_N_INSNS (1), /* store_unaligned. */
1402 COSTS_N_INSNS (1), /* loadv. */
1403 COSTS_N_INSNS (1) /* storev. */
1404 },
1405 {
1406 /* FP SFmode */
1407 {
1408 COSTS_N_INSNS (15), /* div. */
1409 COSTS_N_INSNS (3), /* mult. */
1410 COSTS_N_INSNS (7), /* mult_addsub. */
1411 COSTS_N_INSNS (7), /* fma. */
1412 COSTS_N_INSNS (3), /* addsub. */
1413 COSTS_N_INSNS (3), /* fpconst. */
1414 COSTS_N_INSNS (3), /* neg. */
1415 COSTS_N_INSNS (3), /* compare. */
1416 COSTS_N_INSNS (3), /* widen. */
1417 COSTS_N_INSNS (3), /* narrow. */
1418 COSTS_N_INSNS (3), /* toint. */
1419 COSTS_N_INSNS (3), /* fromint. */
1420 COSTS_N_INSNS (3) /* roundint. */
1421 },
1422 /* FP DFmode */
1423 {
1424 COSTS_N_INSNS (30), /* div. */
1425 COSTS_N_INSNS (6), /* mult. */
1426 COSTS_N_INSNS (10), /* mult_addsub. */
1427 COSTS_N_INSNS (7), /* fma. */
1428 COSTS_N_INSNS (3), /* addsub. */
1429 COSTS_N_INSNS (3), /* fpconst. */
1430 COSTS_N_INSNS (3), /* neg. */
1431 COSTS_N_INSNS (3), /* compare. */
1432 COSTS_N_INSNS (3), /* widen. */
1433 COSTS_N_INSNS (3), /* narrow. */
1434 COSTS_N_INSNS (3), /* toint. */
1435 COSTS_N_INSNS (3), /* fromint. */
1436 COSTS_N_INSNS (3) /* roundint. */
1437 }
1438 },
1439 /* Vector */
1440 {
1441 COSTS_N_INSNS (1) /* alu. */
1442 }
1443 };
1444
1445 const struct cpu_cost_table cortexa12_extra_costs =
1446 {
1447 /* ALU */
1448 {
1449 0, /* arith. */
1450 0, /* logical. */
1451 0, /* shift. */
1452 COSTS_N_INSNS (1), /* shift_reg. */
1453 COSTS_N_INSNS (1), /* arith_shift. */
1454 COSTS_N_INSNS (1), /* arith_shift_reg. */
1455 COSTS_N_INSNS (1), /* log_shift. */
1456 COSTS_N_INSNS (1), /* log_shift_reg. */
1457 0, /* extend. */
1458 COSTS_N_INSNS (1), /* extend_arith. */
1459 0, /* bfi. */
1460 COSTS_N_INSNS (1), /* bfx. */
1461 COSTS_N_INSNS (1), /* clz. */
1462 COSTS_N_INSNS (1), /* rev. */
1463 0, /* non_exec. */
1464 true /* non_exec_costs_exec. */
1465 },
1466 /* MULT SImode */
1467 {
1468 {
1469 COSTS_N_INSNS (2), /* simple. */
1470 COSTS_N_INSNS (3), /* flag_setting. */
1471 COSTS_N_INSNS (2), /* extend. */
1472 COSTS_N_INSNS (3), /* add. */
1473 COSTS_N_INSNS (2), /* extend_add. */
1474 COSTS_N_INSNS (18) /* idiv. */
1475 },
1476 /* MULT DImode */
1477 {
1478 0, /* simple (N/A). */
1479 0, /* flag_setting (N/A). */
1480 COSTS_N_INSNS (3), /* extend. */
1481 0, /* add (N/A). */
1482 COSTS_N_INSNS (3), /* extend_add. */
1483 0 /* idiv (N/A). */
1484 }
1485 },
1486 /* LD/ST */
1487 {
1488 COSTS_N_INSNS (3), /* load. */
1489 COSTS_N_INSNS (3), /* load_sign_extend. */
1490 COSTS_N_INSNS (3), /* ldrd. */
1491 COSTS_N_INSNS (3), /* ldm_1st. */
1492 1, /* ldm_regs_per_insn_1st. */
1493 2, /* ldm_regs_per_insn_subsequent. */
1494 COSTS_N_INSNS (3), /* loadf. */
1495 COSTS_N_INSNS (3), /* loadd. */
1496 0, /* load_unaligned. */
1497 0, /* store. */
1498 0, /* strd. */
1499 0, /* stm_1st. */
1500 1, /* stm_regs_per_insn_1st. */
1501 2, /* stm_regs_per_insn_subsequent. */
1502 COSTS_N_INSNS (2), /* storef. */
1503 COSTS_N_INSNS (2), /* stored. */
1504 0, /* store_unaligned. */
1505 COSTS_N_INSNS (1), /* loadv. */
1506 COSTS_N_INSNS (1) /* storev. */
1507 },
1508 {
1509 /* FP SFmode */
1510 {
1511 COSTS_N_INSNS (17), /* div. */
1512 COSTS_N_INSNS (4), /* mult. */
1513 COSTS_N_INSNS (8), /* mult_addsub. */
1514 COSTS_N_INSNS (8), /* fma. */
1515 COSTS_N_INSNS (4), /* addsub. */
1516 COSTS_N_INSNS (2), /* fpconst. */
1517 COSTS_N_INSNS (2), /* neg. */
1518 COSTS_N_INSNS (2), /* compare. */
1519 COSTS_N_INSNS (4), /* widen. */
1520 COSTS_N_INSNS (4), /* narrow. */
1521 COSTS_N_INSNS (4), /* toint. */
1522 COSTS_N_INSNS (4), /* fromint. */
1523 COSTS_N_INSNS (4) /* roundint. */
1524 },
1525 /* FP DFmode */
1526 {
1527 COSTS_N_INSNS (31), /* div. */
1528 COSTS_N_INSNS (4), /* mult. */
1529 COSTS_N_INSNS (8), /* mult_addsub. */
1530 COSTS_N_INSNS (8), /* fma. */
1531 COSTS_N_INSNS (4), /* addsub. */
1532 COSTS_N_INSNS (2), /* fpconst. */
1533 COSTS_N_INSNS (2), /* neg. */
1534 COSTS_N_INSNS (2), /* compare. */
1535 COSTS_N_INSNS (4), /* widen. */
1536 COSTS_N_INSNS (4), /* narrow. */
1537 COSTS_N_INSNS (4), /* toint. */
1538 COSTS_N_INSNS (4), /* fromint. */
1539 COSTS_N_INSNS (4) /* roundint. */
1540 }
1541 },
1542 /* Vector */
1543 {
1544 COSTS_N_INSNS (1) /* alu. */
1545 }
1546 };
1547
1548 const struct cpu_cost_table cortexa15_extra_costs =
1549 {
1550 /* ALU */
1551 {
1552 0, /* arith. */
1553 0, /* logical. */
1554 0, /* shift. */
1555 0, /* shift_reg. */
1556 COSTS_N_INSNS (1), /* arith_shift. */
1557 COSTS_N_INSNS (1), /* arith_shift_reg. */
1558 COSTS_N_INSNS (1), /* log_shift. */
1559 COSTS_N_INSNS (1), /* log_shift_reg. */
1560 0, /* extend. */
1561 COSTS_N_INSNS (1), /* extend_arith. */
1562 COSTS_N_INSNS (1), /* bfi. */
1563 0, /* bfx. */
1564 0, /* clz. */
1565 0, /* rev. */
1566 0, /* non_exec. */
1567 true /* non_exec_costs_exec. */
1568 },
1569 /* MULT SImode */
1570 {
1571 {
1572 COSTS_N_INSNS (2), /* simple. */
1573 COSTS_N_INSNS (3), /* flag_setting. */
1574 COSTS_N_INSNS (2), /* extend. */
1575 COSTS_N_INSNS (2), /* add. */
1576 COSTS_N_INSNS (2), /* extend_add. */
1577 COSTS_N_INSNS (18) /* idiv. */
1578 },
1579 /* MULT DImode */
1580 {
1581 0, /* simple (N/A). */
1582 0, /* flag_setting (N/A). */
1583 COSTS_N_INSNS (3), /* extend. */
1584 0, /* add (N/A). */
1585 COSTS_N_INSNS (3), /* extend_add. */
1586 0 /* idiv (N/A). */
1587 }
1588 },
1589 /* LD/ST */
1590 {
1591 COSTS_N_INSNS (3), /* load. */
1592 COSTS_N_INSNS (3), /* load_sign_extend. */
1593 COSTS_N_INSNS (3), /* ldrd. */
1594 COSTS_N_INSNS (4), /* ldm_1st. */
1595 1, /* ldm_regs_per_insn_1st. */
1596 2, /* ldm_regs_per_insn_subsequent. */
1597 COSTS_N_INSNS (4), /* loadf. */
1598 COSTS_N_INSNS (4), /* loadd. */
1599 0, /* load_unaligned. */
1600 0, /* store. */
1601 0, /* strd. */
1602 COSTS_N_INSNS (1), /* stm_1st. */
1603 1, /* stm_regs_per_insn_1st. */
1604 2, /* stm_regs_per_insn_subsequent. */
1605 0, /* storef. */
1606 0, /* stored. */
1607 0, /* store_unaligned. */
1608 COSTS_N_INSNS (1), /* loadv. */
1609 COSTS_N_INSNS (1) /* storev. */
1610 },
1611 {
1612 /* FP SFmode */
1613 {
1614 COSTS_N_INSNS (17), /* div. */
1615 COSTS_N_INSNS (4), /* mult. */
1616 COSTS_N_INSNS (8), /* mult_addsub. */
1617 COSTS_N_INSNS (8), /* fma. */
1618 COSTS_N_INSNS (4), /* addsub. */
1619 COSTS_N_INSNS (2), /* fpconst. */
1620 COSTS_N_INSNS (2), /* neg. */
1621 COSTS_N_INSNS (5), /* compare. */
1622 COSTS_N_INSNS (4), /* widen. */
1623 COSTS_N_INSNS (4), /* narrow. */
1624 COSTS_N_INSNS (4), /* toint. */
1625 COSTS_N_INSNS (4), /* fromint. */
1626 COSTS_N_INSNS (4) /* roundint. */
1627 },
1628 /* FP DFmode */
1629 {
1630 COSTS_N_INSNS (31), /* div. */
1631 COSTS_N_INSNS (4), /* mult. */
1632 COSTS_N_INSNS (8), /* mult_addsub. */
1633 COSTS_N_INSNS (8), /* fma. */
1634 COSTS_N_INSNS (4), /* addsub. */
1635 COSTS_N_INSNS (2), /* fpconst. */
1636 COSTS_N_INSNS (2), /* neg. */
1637 COSTS_N_INSNS (2), /* compare. */
1638 COSTS_N_INSNS (4), /* widen. */
1639 COSTS_N_INSNS (4), /* narrow. */
1640 COSTS_N_INSNS (4), /* toint. */
1641 COSTS_N_INSNS (4), /* fromint. */
1642 COSTS_N_INSNS (4) /* roundint. */
1643 }
1644 },
1645 /* Vector */
1646 {
1647 COSTS_N_INSNS (1) /* alu. */
1648 }
1649 };
1650
1651 const struct cpu_cost_table v7m_extra_costs =
1652 {
1653 /* ALU */
1654 {
1655 0, /* arith. */
1656 0, /* logical. */
1657 0, /* shift. */
1658 0, /* shift_reg. */
1659 0, /* arith_shift. */
1660 COSTS_N_INSNS (1), /* arith_shift_reg. */
1661 0, /* log_shift. */
1662 COSTS_N_INSNS (1), /* log_shift_reg. */
1663 0, /* extend. */
1664 COSTS_N_INSNS (1), /* extend_arith. */
1665 0, /* bfi. */
1666 0, /* bfx. */
1667 0, /* clz. */
1668 0, /* rev. */
1669 COSTS_N_INSNS (1), /* non_exec. */
1670 false /* non_exec_costs_exec. */
1671 },
1672 {
1673 /* MULT SImode */
1674 {
1675 COSTS_N_INSNS (1), /* simple. */
1676 COSTS_N_INSNS (1), /* flag_setting. */
1677 COSTS_N_INSNS (2), /* extend. */
1678 COSTS_N_INSNS (1), /* add. */
1679 COSTS_N_INSNS (3), /* extend_add. */
1680 COSTS_N_INSNS (8) /* idiv. */
1681 },
1682 /* MULT DImode */
1683 {
1684 0, /* simple (N/A). */
1685 0, /* flag_setting (N/A). */
1686 COSTS_N_INSNS (2), /* extend. */
1687 0, /* add (N/A). */
1688 COSTS_N_INSNS (3), /* extend_add. */
1689 0 /* idiv (N/A). */
1690 }
1691 },
1692 /* LD/ST */
1693 {
1694 COSTS_N_INSNS (2), /* load. */
1695 0, /* load_sign_extend. */
1696 COSTS_N_INSNS (3), /* ldrd. */
1697 COSTS_N_INSNS (2), /* ldm_1st. */
1698 1, /* ldm_regs_per_insn_1st. */
1699 1, /* ldm_regs_per_insn_subsequent. */
1700 COSTS_N_INSNS (2), /* loadf. */
1701 COSTS_N_INSNS (3), /* loadd. */
1702 COSTS_N_INSNS (1), /* load_unaligned. */
1703 COSTS_N_INSNS (2), /* store. */
1704 COSTS_N_INSNS (3), /* strd. */
1705 COSTS_N_INSNS (2), /* stm_1st. */
1706 1, /* stm_regs_per_insn_1st. */
1707 1, /* stm_regs_per_insn_subsequent. */
1708 COSTS_N_INSNS (2), /* storef. */
1709 COSTS_N_INSNS (3), /* stored. */
1710 COSTS_N_INSNS (1), /* store_unaligned. */
1711 COSTS_N_INSNS (1), /* loadv. */
1712 COSTS_N_INSNS (1) /* storev. */
1713 },
1714 {
1715 /* FP SFmode */
1716 {
1717 COSTS_N_INSNS (7), /* div. */
1718 COSTS_N_INSNS (2), /* mult. */
1719 COSTS_N_INSNS (5), /* mult_addsub. */
1720 COSTS_N_INSNS (3), /* fma. */
1721 COSTS_N_INSNS (1), /* addsub. */
1722 0, /* fpconst. */
1723 0, /* neg. */
1724 0, /* compare. */
1725 0, /* widen. */
1726 0, /* narrow. */
1727 0, /* toint. */
1728 0, /* fromint. */
1729 0 /* roundint. */
1730 },
1731 /* FP DFmode */
1732 {
1733 COSTS_N_INSNS (15), /* div. */
1734 COSTS_N_INSNS (5), /* mult. */
1735 COSTS_N_INSNS (7), /* mult_addsub. */
1736 COSTS_N_INSNS (7), /* fma. */
1737 COSTS_N_INSNS (3), /* addsub. */
1738 0, /* fpconst. */
1739 0, /* neg. */
1740 0, /* compare. */
1741 0, /* widen. */
1742 0, /* narrow. */
1743 0, /* toint. */
1744 0, /* fromint. */
1745 0 /* roundint. */
1746 }
1747 },
1748 /* Vector */
1749 {
1750 COSTS_N_INSNS (1) /* alu. */
1751 }
1752 };
1753
1754 const struct tune_params arm_slowmul_tune =
1755 {
1756 &generic_extra_costs, /* Insn extra costs. */
1757 NULL, /* Sched adj cost. */
1758 arm_default_branch_cost,
1759 &arm_default_vec_cost,
1760 3, /* Constant limit. */
1761 5, /* Max cond insns. */
1762 8, /* Memset max inline. */
1763 1, /* Issue rate. */
1764 ARM_PREFETCH_NOT_BENEFICIAL,
1765 tune_params::PREF_CONST_POOL_TRUE,
1766 tune_params::PREF_LDRD_FALSE,
1767 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1768 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1769 tune_params::DISPARAGE_FLAGS_NEITHER,
1770 tune_params::PREF_NEON_64_FALSE,
1771 tune_params::PREF_NEON_STRINGOPS_FALSE,
1772 tune_params::FUSE_NOTHING,
1773 tune_params::SCHED_AUTOPREF_OFF
1774 };
1775
1776 const struct tune_params arm_fastmul_tune =
1777 {
1778 &generic_extra_costs, /* Insn extra costs. */
1779 NULL, /* Sched adj cost. */
1780 arm_default_branch_cost,
1781 &arm_default_vec_cost,
1782 1, /* Constant limit. */
1783 5, /* Max cond insns. */
1784 8, /* Memset max inline. */
1785 1, /* Issue rate. */
1786 ARM_PREFETCH_NOT_BENEFICIAL,
1787 tune_params::PREF_CONST_POOL_TRUE,
1788 tune_params::PREF_LDRD_FALSE,
1789 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1790 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1791 tune_params::DISPARAGE_FLAGS_NEITHER,
1792 tune_params::PREF_NEON_64_FALSE,
1793 tune_params::PREF_NEON_STRINGOPS_FALSE,
1794 tune_params::FUSE_NOTHING,
1795 tune_params::SCHED_AUTOPREF_OFF
1796 };
1797
1798 /* StrongARM has early execution of branches, so a sequence that is worth
1799 skipping is shorter. Set max_insns_skipped to a lower value. */
1800
1801 const struct tune_params arm_strongarm_tune =
1802 {
1803 &generic_extra_costs, /* Insn extra costs. */
1804 NULL, /* Sched adj cost. */
1805 arm_default_branch_cost,
1806 &arm_default_vec_cost,
1807 1, /* Constant limit. */
1808 3, /* Max cond insns. */
1809 8, /* Memset max inline. */
1810 1, /* Issue rate. */
1811 ARM_PREFETCH_NOT_BENEFICIAL,
1812 tune_params::PREF_CONST_POOL_TRUE,
1813 tune_params::PREF_LDRD_FALSE,
1814 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1815 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1816 tune_params::DISPARAGE_FLAGS_NEITHER,
1817 tune_params::PREF_NEON_64_FALSE,
1818 tune_params::PREF_NEON_STRINGOPS_FALSE,
1819 tune_params::FUSE_NOTHING,
1820 tune_params::SCHED_AUTOPREF_OFF
1821 };
1822
1823 const struct tune_params arm_xscale_tune =
1824 {
1825 &generic_extra_costs, /* Insn extra costs. */
1826 xscale_sched_adjust_cost,
1827 arm_default_branch_cost,
1828 &arm_default_vec_cost,
1829 2, /* Constant limit. */
1830 3, /* Max cond insns. */
1831 8, /* Memset max inline. */
1832 1, /* Issue rate. */
1833 ARM_PREFETCH_NOT_BENEFICIAL,
1834 tune_params::PREF_CONST_POOL_TRUE,
1835 tune_params::PREF_LDRD_FALSE,
1836 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1837 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1838 tune_params::DISPARAGE_FLAGS_NEITHER,
1839 tune_params::PREF_NEON_64_FALSE,
1840 tune_params::PREF_NEON_STRINGOPS_FALSE,
1841 tune_params::FUSE_NOTHING,
1842 tune_params::SCHED_AUTOPREF_OFF
1843 };
1844
1845 const struct tune_params arm_9e_tune =
1846 {
1847 &generic_extra_costs, /* Insn extra costs. */
1848 NULL, /* Sched adj cost. */
1849 arm_default_branch_cost,
1850 &arm_default_vec_cost,
1851 1, /* Constant limit. */
1852 5, /* Max cond insns. */
1853 8, /* Memset max inline. */
1854 1, /* Issue rate. */
1855 ARM_PREFETCH_NOT_BENEFICIAL,
1856 tune_params::PREF_CONST_POOL_TRUE,
1857 tune_params::PREF_LDRD_FALSE,
1858 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1859 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1860 tune_params::DISPARAGE_FLAGS_NEITHER,
1861 tune_params::PREF_NEON_64_FALSE,
1862 tune_params::PREF_NEON_STRINGOPS_FALSE,
1863 tune_params::FUSE_NOTHING,
1864 tune_params::SCHED_AUTOPREF_OFF
1865 };
1866
1867 const struct tune_params arm_marvell_pj4_tune =
1868 {
1869 &generic_extra_costs, /* Insn extra costs. */
1870 NULL, /* Sched adj cost. */
1871 arm_default_branch_cost,
1872 &arm_default_vec_cost,
1873 1, /* Constant limit. */
1874 5, /* Max cond insns. */
1875 8, /* Memset max inline. */
1876 2, /* Issue rate. */
1877 ARM_PREFETCH_NOT_BENEFICIAL,
1878 tune_params::PREF_CONST_POOL_TRUE,
1879 tune_params::PREF_LDRD_FALSE,
1880 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1881 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1882 tune_params::DISPARAGE_FLAGS_NEITHER,
1883 tune_params::PREF_NEON_64_FALSE,
1884 tune_params::PREF_NEON_STRINGOPS_FALSE,
1885 tune_params::FUSE_NOTHING,
1886 tune_params::SCHED_AUTOPREF_OFF
1887 };
1888
1889 const struct tune_params arm_v6t2_tune =
1890 {
1891 &generic_extra_costs, /* Insn extra costs. */
1892 NULL, /* Sched adj cost. */
1893 arm_default_branch_cost,
1894 &arm_default_vec_cost,
1895 1, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL,
1900 tune_params::PREF_CONST_POOL_FALSE,
1901 tune_params::PREF_LDRD_FALSE,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER,
1905 tune_params::PREF_NEON_64_FALSE,
1906 tune_params::PREF_NEON_STRINGOPS_FALSE,
1907 tune_params::FUSE_NOTHING,
1908 tune_params::SCHED_AUTOPREF_OFF
1909 };
1910
1911
1912 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1913 const struct tune_params arm_cortex_tune =
1914 {
1915 &generic_extra_costs,
1916 NULL, /* Sched adj cost. */
1917 arm_default_branch_cost,
1918 &arm_default_vec_cost,
1919 1, /* Constant limit. */
1920 5, /* Max cond insns. */
1921 8, /* Memset max inline. */
1922 2, /* Issue rate. */
1923 ARM_PREFETCH_NOT_BENEFICIAL,
1924 tune_params::PREF_CONST_POOL_FALSE,
1925 tune_params::PREF_LDRD_FALSE,
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1927 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1928 tune_params::DISPARAGE_FLAGS_NEITHER,
1929 tune_params::PREF_NEON_64_FALSE,
1930 tune_params::PREF_NEON_STRINGOPS_FALSE,
1931 tune_params::FUSE_NOTHING,
1932 tune_params::SCHED_AUTOPREF_OFF
1933 };
1934
1935 const struct tune_params arm_cortex_a8_tune =
1936 {
1937 &cortexa8_extra_costs,
1938 NULL, /* Sched adj cost. */
1939 arm_default_branch_cost,
1940 &arm_default_vec_cost,
1941 1, /* Constant limit. */
1942 5, /* Max cond insns. */
1943 8, /* Memset max inline. */
1944 2, /* Issue rate. */
1945 ARM_PREFETCH_NOT_BENEFICIAL,
1946 tune_params::PREF_CONST_POOL_FALSE,
1947 tune_params::PREF_LDRD_FALSE,
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1950 tune_params::DISPARAGE_FLAGS_NEITHER,
1951 tune_params::PREF_NEON_64_FALSE,
1952 tune_params::PREF_NEON_STRINGOPS_TRUE,
1953 tune_params::FUSE_NOTHING,
1954 tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957 const struct tune_params arm_cortex_a7_tune =
1958 {
1959 &cortexa7_extra_costs,
1960 NULL, /* Sched adj cost. */
1961 arm_default_branch_cost,
1962 &arm_default_vec_cost,
1963 1, /* Constant limit. */
1964 5, /* Max cond insns. */
1965 8, /* Memset max inline. */
1966 2, /* Issue rate. */
1967 ARM_PREFETCH_NOT_BENEFICIAL,
1968 tune_params::PREF_CONST_POOL_FALSE,
1969 tune_params::PREF_LDRD_FALSE,
1970 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1972 tune_params::DISPARAGE_FLAGS_NEITHER,
1973 tune_params::PREF_NEON_64_FALSE,
1974 tune_params::PREF_NEON_STRINGOPS_TRUE,
1975 tune_params::FUSE_NOTHING,
1976 tune_params::SCHED_AUTOPREF_OFF
1977 };
1978
1979 const struct tune_params arm_cortex_a15_tune =
1980 {
1981 &cortexa15_extra_costs,
1982 NULL, /* Sched adj cost. */
1983 arm_default_branch_cost,
1984 &arm_default_vec_cost,
1985 1, /* Constant limit. */
1986 2, /* Max cond insns. */
1987 8, /* Memset max inline. */
1988 3, /* Issue rate. */
1989 ARM_PREFETCH_NOT_BENEFICIAL,
1990 tune_params::PREF_CONST_POOL_FALSE,
1991 tune_params::PREF_LDRD_TRUE,
1992 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1993 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1994 tune_params::DISPARAGE_FLAGS_ALL,
1995 tune_params::PREF_NEON_64_FALSE,
1996 tune_params::PREF_NEON_STRINGOPS_TRUE,
1997 tune_params::FUSE_NOTHING,
1998 tune_params::SCHED_AUTOPREF_FULL
1999 };
2000
2001 const struct tune_params arm_cortex_a35_tune =
2002 {
2003 &cortexa53_extra_costs,
2004 NULL, /* Sched adj cost. */
2005 arm_default_branch_cost,
2006 &arm_default_vec_cost,
2007 1, /* Constant limit. */
2008 5, /* Max cond insns. */
2009 8, /* Memset max inline. */
2010 1, /* Issue rate. */
2011 ARM_PREFETCH_NOT_BENEFICIAL,
2012 tune_params::PREF_CONST_POOL_FALSE,
2013 tune_params::PREF_LDRD_FALSE,
2014 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2015 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2016 tune_params::DISPARAGE_FLAGS_NEITHER,
2017 tune_params::PREF_NEON_64_FALSE,
2018 tune_params::PREF_NEON_STRINGOPS_TRUE,
2019 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2020 tune_params::SCHED_AUTOPREF_OFF
2021 };
2022
2023 const struct tune_params arm_cortex_a53_tune =
2024 {
2025 &cortexa53_extra_costs,
2026 NULL, /* Sched adj cost. */
2027 arm_default_branch_cost,
2028 &arm_default_vec_cost,
2029 1, /* Constant limit. */
2030 5, /* Max cond insns. */
2031 8, /* Memset max inline. */
2032 2, /* Issue rate. */
2033 ARM_PREFETCH_NOT_BENEFICIAL,
2034 tune_params::PREF_CONST_POOL_FALSE,
2035 tune_params::PREF_LDRD_FALSE,
2036 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2037 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2038 tune_params::DISPARAGE_FLAGS_NEITHER,
2039 tune_params::PREF_NEON_64_FALSE,
2040 tune_params::PREF_NEON_STRINGOPS_TRUE,
2041 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2042 tune_params::SCHED_AUTOPREF_OFF
2043 };
2044
2045 const struct tune_params arm_cortex_a57_tune =
2046 {
2047 &cortexa57_extra_costs,
2048 NULL, /* Sched adj cost. */
2049 arm_default_branch_cost,
2050 &arm_default_vec_cost,
2051 1, /* Constant limit. */
2052 2, /* Max cond insns. */
2053 8, /* Memset max inline. */
2054 3, /* Issue rate. */
2055 ARM_PREFETCH_NOT_BENEFICIAL,
2056 tune_params::PREF_CONST_POOL_FALSE,
2057 tune_params::PREF_LDRD_TRUE,
2058 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2059 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2060 tune_params::DISPARAGE_FLAGS_ALL,
2061 tune_params::PREF_NEON_64_FALSE,
2062 tune_params::PREF_NEON_STRINGOPS_TRUE,
2063 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2064 tune_params::SCHED_AUTOPREF_FULL
2065 };
2066
2067 const struct tune_params arm_exynosm1_tune =
2068 {
2069 &exynosm1_extra_costs,
2070 NULL, /* Sched adj cost. */
2071 arm_default_branch_cost,
2072 &arm_default_vec_cost,
2073 1, /* Constant limit. */
2074 2, /* Max cond insns. */
2075 8, /* Memset max inline. */
2076 3, /* Issue rate. */
2077 ARM_PREFETCH_NOT_BENEFICIAL,
2078 tune_params::PREF_CONST_POOL_FALSE,
2079 tune_params::PREF_LDRD_TRUE,
2080 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2081 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2082 tune_params::DISPARAGE_FLAGS_ALL,
2083 tune_params::PREF_NEON_64_FALSE,
2084 tune_params::PREF_NEON_STRINGOPS_TRUE,
2085 tune_params::FUSE_NOTHING,
2086 tune_params::SCHED_AUTOPREF_OFF
2087 };
2088
2089 const struct tune_params arm_xgene1_tune =
2090 {
2091 &xgene1_extra_costs,
2092 NULL, /* Sched adj cost. */
2093 arm_default_branch_cost,
2094 &arm_default_vec_cost,
2095 1, /* Constant limit. */
2096 2, /* Max cond insns. */
2097 32, /* Memset max inline. */
2098 4, /* Issue rate. */
2099 ARM_PREFETCH_NOT_BENEFICIAL,
2100 tune_params::PREF_CONST_POOL_FALSE,
2101 tune_params::PREF_LDRD_TRUE,
2102 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2103 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2104 tune_params::DISPARAGE_FLAGS_ALL,
2105 tune_params::PREF_NEON_64_FALSE,
2106 tune_params::PREF_NEON_STRINGOPS_FALSE,
2107 tune_params::FUSE_NOTHING,
2108 tune_params::SCHED_AUTOPREF_OFF
2109 };
2110
2111 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2112 less appealing. Set max_insns_skipped to a low value. */
2113
2114 const struct tune_params arm_cortex_a5_tune =
2115 {
2116 &cortexa5_extra_costs,
2117 NULL, /* Sched adj cost. */
2118 arm_cortex_a5_branch_cost,
2119 &arm_default_vec_cost,
2120 1, /* Constant limit. */
2121 1, /* Max cond insns. */
2122 8, /* Memset max inline. */
2123 2, /* Issue rate. */
2124 ARM_PREFETCH_NOT_BENEFICIAL,
2125 tune_params::PREF_CONST_POOL_FALSE,
2126 tune_params::PREF_LDRD_FALSE,
2127 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2128 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2129 tune_params::DISPARAGE_FLAGS_NEITHER,
2130 tune_params::PREF_NEON_64_FALSE,
2131 tune_params::PREF_NEON_STRINGOPS_TRUE,
2132 tune_params::FUSE_NOTHING,
2133 tune_params::SCHED_AUTOPREF_OFF
2134 };
2135
2136 const struct tune_params arm_cortex_a9_tune =
2137 {
2138 &cortexa9_extra_costs,
2139 cortex_a9_sched_adjust_cost,
2140 arm_default_branch_cost,
2141 &arm_default_vec_cost,
2142 1, /* Constant limit. */
2143 5, /* Max cond insns. */
2144 8, /* Memset max inline. */
2145 2, /* Issue rate. */
2146 ARM_PREFETCH_BENEFICIAL(4,32,32),
2147 tune_params::PREF_CONST_POOL_FALSE,
2148 tune_params::PREF_LDRD_FALSE,
2149 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2150 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2151 tune_params::DISPARAGE_FLAGS_NEITHER,
2152 tune_params::PREF_NEON_64_FALSE,
2153 tune_params::PREF_NEON_STRINGOPS_FALSE,
2154 tune_params::FUSE_NOTHING,
2155 tune_params::SCHED_AUTOPREF_OFF
2156 };
2157
2158 const struct tune_params arm_cortex_a12_tune =
2159 {
2160 &cortexa12_extra_costs,
2161 NULL, /* Sched adj cost. */
2162 arm_default_branch_cost,
2163 &arm_default_vec_cost, /* Vectorizer costs. */
2164 1, /* Constant limit. */
2165 2, /* Max cond insns. */
2166 8, /* Memset max inline. */
2167 2, /* Issue rate. */
2168 ARM_PREFETCH_NOT_BENEFICIAL,
2169 tune_params::PREF_CONST_POOL_FALSE,
2170 tune_params::PREF_LDRD_TRUE,
2171 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2172 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2173 tune_params::DISPARAGE_FLAGS_ALL,
2174 tune_params::PREF_NEON_64_FALSE,
2175 tune_params::PREF_NEON_STRINGOPS_TRUE,
2176 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2177 tune_params::SCHED_AUTOPREF_OFF
2178 };
2179
2180 const struct tune_params arm_cortex_a73_tune =
2181 {
2182 &cortexa57_extra_costs,
2183 NULL, /* Sched adj cost. */
2184 arm_default_branch_cost,
2185 &arm_default_vec_cost, /* Vectorizer costs. */
2186 1, /* Constant limit. */
2187 2, /* Max cond insns. */
2188 8, /* Memset max inline. */
2189 2, /* Issue rate. */
2190 ARM_PREFETCH_NOT_BENEFICIAL,
2191 tune_params::PREF_CONST_POOL_FALSE,
2192 tune_params::PREF_LDRD_TRUE,
2193 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2194 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2195 tune_params::DISPARAGE_FLAGS_ALL,
2196 tune_params::PREF_NEON_64_FALSE,
2197 tune_params::PREF_NEON_STRINGOPS_TRUE,
2198 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2199 tune_params::SCHED_AUTOPREF_FULL
2200 };
2201
2202 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2203 cycle to execute each. An LDR from the constant pool also takes two cycles
2204 to execute, but mildly increases pipelining opportunity (consecutive
2205 loads/stores can be pipelined together, saving one cycle), and may also
2206 improve icache utilisation. Hence we prefer the constant pool for such
2207 processors. */
2208
2209 const struct tune_params arm_v7m_tune =
2210 {
2211 &v7m_extra_costs,
2212 NULL, /* Sched adj cost. */
2213 arm_cortex_m_branch_cost,
2214 &arm_default_vec_cost,
2215 1, /* Constant limit. */
2216 2, /* Max cond insns. */
2217 8, /* Memset max inline. */
2218 1, /* Issue rate. */
2219 ARM_PREFETCH_NOT_BENEFICIAL,
2220 tune_params::PREF_CONST_POOL_TRUE,
2221 tune_params::PREF_LDRD_FALSE,
2222 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2223 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2224 tune_params::DISPARAGE_FLAGS_NEITHER,
2225 tune_params::PREF_NEON_64_FALSE,
2226 tune_params::PREF_NEON_STRINGOPS_FALSE,
2227 tune_params::FUSE_NOTHING,
2228 tune_params::SCHED_AUTOPREF_OFF
2229 };
2230
2231 /* Cortex-M7 tuning. */
2232
2233 const struct tune_params arm_cortex_m7_tune =
2234 {
2235 &v7m_extra_costs,
2236 NULL, /* Sched adj cost. */
2237 arm_cortex_m7_branch_cost,
2238 &arm_default_vec_cost,
2239 0, /* Constant limit. */
2240 1, /* Max cond insns. */
2241 8, /* Memset max inline. */
2242 2, /* Issue rate. */
2243 ARM_PREFETCH_NOT_BENEFICIAL,
2244 tune_params::PREF_CONST_POOL_TRUE,
2245 tune_params::PREF_LDRD_FALSE,
2246 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2247 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2248 tune_params::DISPARAGE_FLAGS_NEITHER,
2249 tune_params::PREF_NEON_64_FALSE,
2250 tune_params::PREF_NEON_STRINGOPS_FALSE,
2251 tune_params::FUSE_NOTHING,
2252 tune_params::SCHED_AUTOPREF_OFF
2253 };
2254
2255 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2256 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2257 cortex-m23. */
2258 const struct tune_params arm_v6m_tune =
2259 {
2260 &generic_extra_costs, /* Insn extra costs. */
2261 NULL, /* Sched adj cost. */
2262 arm_default_branch_cost,
2263 &arm_default_vec_cost, /* Vectorizer costs. */
2264 1, /* Constant limit. */
2265 5, /* Max cond insns. */
2266 8, /* Memset max inline. */
2267 1, /* Issue rate. */
2268 ARM_PREFETCH_NOT_BENEFICIAL,
2269 tune_params::PREF_CONST_POOL_FALSE,
2270 tune_params::PREF_LDRD_FALSE,
2271 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2272 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2273 tune_params::DISPARAGE_FLAGS_NEITHER,
2274 tune_params::PREF_NEON_64_FALSE,
2275 tune_params::PREF_NEON_STRINGOPS_FALSE,
2276 tune_params::FUSE_NOTHING,
2277 tune_params::SCHED_AUTOPREF_OFF
2278 };
2279
2280 const struct tune_params arm_fa726te_tune =
2281 {
2282 &generic_extra_costs, /* Insn extra costs. */
2283 fa726te_sched_adjust_cost,
2284 arm_default_branch_cost,
2285 &arm_default_vec_cost,
2286 1, /* Constant limit. */
2287 5, /* Max cond insns. */
2288 8, /* Memset max inline. */
2289 2, /* Issue rate. */
2290 ARM_PREFETCH_NOT_BENEFICIAL,
2291 tune_params::PREF_CONST_POOL_TRUE,
2292 tune_params::PREF_LDRD_FALSE,
2293 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2294 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2295 tune_params::DISPARAGE_FLAGS_NEITHER,
2296 tune_params::PREF_NEON_64_FALSE,
2297 tune_params::PREF_NEON_STRINGOPS_FALSE,
2298 tune_params::FUSE_NOTHING,
2299 tune_params::SCHED_AUTOPREF_OFF
2300 };
2301
2302 /* Auto-generated CPU, FPU and architecture tables. */
2303 #include "arm-cpu-data.h"
2304
2305 /* The name of the preprocessor macro to define for this architecture. PROFILE
2306 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2307 is thus chosen to be big enough to hold the longest architecture name. */
2308
2309 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2310
2311 /* Supported TLS relocations. */
2312
2313 enum tls_reloc {
2314 TLS_GD32,
2315 TLS_LDM32,
2316 TLS_LDO32,
2317 TLS_IE32,
2318 TLS_LE32,
2319 TLS_DESCSEQ /* GNU scheme */
2320 };
2321
2322 /* The maximum number of insns to be used when loading a constant. */
2323 inline static int
2324 arm_constant_limit (bool size_p)
2325 {
2326 return size_p ? 1 : current_tune->constant_limit;
2327 }
2328
2329 /* Emit an insn that's a simple single-set. Both the operands must be known
2330 to be valid. */
2331 inline static rtx_insn *
2332 emit_set_insn (rtx x, rtx y)
2333 {
2334 return emit_insn (gen_rtx_SET (x, y));
2335 }
2336
2337 /* Return the number of bits set in VALUE. */
2338 static unsigned
2339 bit_count (unsigned long value)
2340 {
2341 unsigned long count = 0;
2342
2343 while (value)
2344 {
2345 count++;
2346 value &= value - 1; /* Clear the least-significant set bit. */
2347 }
2348
2349 return count;
2350 }
2351
2352 /* Return the number of bits set in BMAP. */
2353 static unsigned
2354 bitmap_popcount (const sbitmap bmap)
2355 {
2356 unsigned int count = 0;
2357 unsigned int n = 0;
2358 sbitmap_iterator sbi;
2359
2360 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2361 count++;
2362 return count;
2363 }
2364
2365 typedef struct
2366 {
2367 machine_mode mode;
2368 const char *name;
2369 } arm_fixed_mode_set;
2370
2371 /* A small helper for setting fixed-point library libfuncs. */
2372
2373 static void
2374 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2375 const char *funcname, const char *modename,
2376 int num_suffix)
2377 {
2378 char buffer[50];
2379
2380 if (num_suffix == 0)
2381 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2382 else
2383 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2384
2385 set_optab_libfunc (optable, mode, buffer);
2386 }
2387
2388 static void
2389 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2390 machine_mode from, const char *funcname,
2391 const char *toname, const char *fromname)
2392 {
2393 char buffer[50];
2394 const char *maybe_suffix_2 = "";
2395
2396 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2397 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2398 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2399 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2400 maybe_suffix_2 = "2";
2401
2402 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2403 maybe_suffix_2);
2404
2405 set_conv_libfunc (optable, to, from, buffer);
2406 }
2407
2408 /* Set up library functions unique to ARM. */
2409
2410 static void
2411 arm_init_libfuncs (void)
2412 {
2413 /* For Linux, we have access to kernel support for atomic operations. */
2414 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2415 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2416
2417 /* There are no special library functions unless we are using the
2418 ARM BPABI. */
2419 if (!TARGET_BPABI)
2420 return;
2421
2422 /* The functions below are described in Section 4 of the "Run-Time
2423 ABI for the ARM architecture", Version 1.0. */
2424
2425 /* Double-precision floating-point arithmetic. Table 2. */
2426 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2427 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2428 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2429 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2430 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2431
2432 /* Double-precision comparisons. Table 3. */
2433 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2434 set_optab_libfunc (ne_optab, DFmode, NULL);
2435 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2436 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2437 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2438 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2439 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2440
2441 /* Single-precision floating-point arithmetic. Table 4. */
2442 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2443 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2444 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2445 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2446 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2447
2448 /* Single-precision comparisons. Table 5. */
2449 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2450 set_optab_libfunc (ne_optab, SFmode, NULL);
2451 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2452 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2453 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2454 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2455 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2456
2457 /* Floating-point to integer conversions. Table 6. */
2458 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2459 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2460 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2461 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2462 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2463 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2464 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2465 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2466
2467 /* Conversions between floating types. Table 7. */
2468 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2469 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2470
2471 /* Integer to floating-point conversions. Table 8. */
2472 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2473 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2474 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2475 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2476 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2477 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2478 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2479 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2480
2481 /* Long long. Table 9. */
2482 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2483 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2484 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2485 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2486 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2487 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2488 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2489 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2490
2491 /* Integer (32/32->32) division. \S 4.3.1. */
2492 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2493 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2494
2495 /* The divmod functions are designed so that they can be used for
2496 plain division, even though they return both the quotient and the
2497 remainder. The quotient is returned in the usual location (i.e.,
2498 r0 for SImode, {r0, r1} for DImode), just as would be expected
2499 for an ordinary division routine. Because the AAPCS calling
2500 conventions specify that all of { r0, r1, r2, r3 } are
2501 callee-saved registers, there is no need to tell the compiler
2502 explicitly that those registers are clobbered by these
2503 routines. */
2504 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2505 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2506
2507 /* For SImode division the ABI provides div-without-mod routines,
2508 which are faster. */
2509 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2510 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2511
2512 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2513 divmod libcalls instead. */
2514 set_optab_libfunc (smod_optab, DImode, NULL);
2515 set_optab_libfunc (umod_optab, DImode, NULL);
2516 set_optab_libfunc (smod_optab, SImode, NULL);
2517 set_optab_libfunc (umod_optab, SImode, NULL);
2518
2519 /* Half-precision float operations. The compiler handles all operations
2520 with NULL libfuncs by converting the SFmode. */
2521 switch (arm_fp16_format)
2522 {
2523 case ARM_FP16_FORMAT_IEEE:
2524 case ARM_FP16_FORMAT_ALTERNATIVE:
2525
2526 /* Conversions. */
2527 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2528 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2529 ? "__gnu_f2h_ieee"
2530 : "__gnu_f2h_alternative"));
2531 set_conv_libfunc (sext_optab, SFmode, HFmode,
2532 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2533 ? "__gnu_h2f_ieee"
2534 : "__gnu_h2f_alternative"));
2535
2536 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2537 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2538 ? "__gnu_d2h_ieee"
2539 : "__gnu_d2h_alternative"));
2540
2541 /* Arithmetic. */
2542 set_optab_libfunc (add_optab, HFmode, NULL);
2543 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2544 set_optab_libfunc (smul_optab, HFmode, NULL);
2545 set_optab_libfunc (neg_optab, HFmode, NULL);
2546 set_optab_libfunc (sub_optab, HFmode, NULL);
2547
2548 /* Comparisons. */
2549 set_optab_libfunc (eq_optab, HFmode, NULL);
2550 set_optab_libfunc (ne_optab, HFmode, NULL);
2551 set_optab_libfunc (lt_optab, HFmode, NULL);
2552 set_optab_libfunc (le_optab, HFmode, NULL);
2553 set_optab_libfunc (ge_optab, HFmode, NULL);
2554 set_optab_libfunc (gt_optab, HFmode, NULL);
2555 set_optab_libfunc (unord_optab, HFmode, NULL);
2556 break;
2557
2558 default:
2559 break;
2560 }
2561
2562 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2563 {
2564 const arm_fixed_mode_set fixed_arith_modes[] =
2565 {
2566 { E_QQmode, "qq" },
2567 { E_UQQmode, "uqq" },
2568 { E_HQmode, "hq" },
2569 { E_UHQmode, "uhq" },
2570 { E_SQmode, "sq" },
2571 { E_USQmode, "usq" },
2572 { E_DQmode, "dq" },
2573 { E_UDQmode, "udq" },
2574 { E_TQmode, "tq" },
2575 { E_UTQmode, "utq" },
2576 { E_HAmode, "ha" },
2577 { E_UHAmode, "uha" },
2578 { E_SAmode, "sa" },
2579 { E_USAmode, "usa" },
2580 { E_DAmode, "da" },
2581 { E_UDAmode, "uda" },
2582 { E_TAmode, "ta" },
2583 { E_UTAmode, "uta" }
2584 };
2585 const arm_fixed_mode_set fixed_conv_modes[] =
2586 {
2587 { E_QQmode, "qq" },
2588 { E_UQQmode, "uqq" },
2589 { E_HQmode, "hq" },
2590 { E_UHQmode, "uhq" },
2591 { E_SQmode, "sq" },
2592 { E_USQmode, "usq" },
2593 { E_DQmode, "dq" },
2594 { E_UDQmode, "udq" },
2595 { E_TQmode, "tq" },
2596 { E_UTQmode, "utq" },
2597 { E_HAmode, "ha" },
2598 { E_UHAmode, "uha" },
2599 { E_SAmode, "sa" },
2600 { E_USAmode, "usa" },
2601 { E_DAmode, "da" },
2602 { E_UDAmode, "uda" },
2603 { E_TAmode, "ta" },
2604 { E_UTAmode, "uta" },
2605 { E_QImode, "qi" },
2606 { E_HImode, "hi" },
2607 { E_SImode, "si" },
2608 { E_DImode, "di" },
2609 { E_TImode, "ti" },
2610 { E_SFmode, "sf" },
2611 { E_DFmode, "df" }
2612 };
2613 unsigned int i, j;
2614
2615 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2616 {
2617 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2618 "add", fixed_arith_modes[i].name, 3);
2619 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2620 "ssadd", fixed_arith_modes[i].name, 3);
2621 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2622 "usadd", fixed_arith_modes[i].name, 3);
2623 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2624 "sub", fixed_arith_modes[i].name, 3);
2625 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2626 "sssub", fixed_arith_modes[i].name, 3);
2627 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2628 "ussub", fixed_arith_modes[i].name, 3);
2629 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2630 "mul", fixed_arith_modes[i].name, 3);
2631 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2632 "ssmul", fixed_arith_modes[i].name, 3);
2633 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2634 "usmul", fixed_arith_modes[i].name, 3);
2635 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2636 "div", fixed_arith_modes[i].name, 3);
2637 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2638 "udiv", fixed_arith_modes[i].name, 3);
2639 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2640 "ssdiv", fixed_arith_modes[i].name, 3);
2641 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2642 "usdiv", fixed_arith_modes[i].name, 3);
2643 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2644 "neg", fixed_arith_modes[i].name, 2);
2645 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2646 "ssneg", fixed_arith_modes[i].name, 2);
2647 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2648 "usneg", fixed_arith_modes[i].name, 2);
2649 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2650 "ashl", fixed_arith_modes[i].name, 3);
2651 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2652 "ashr", fixed_arith_modes[i].name, 3);
2653 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2654 "lshr", fixed_arith_modes[i].name, 3);
2655 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2656 "ssashl", fixed_arith_modes[i].name, 3);
2657 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2658 "usashl", fixed_arith_modes[i].name, 3);
2659 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2660 "cmp", fixed_arith_modes[i].name, 2);
2661 }
2662
2663 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2664 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2665 {
2666 if (i == j
2667 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2668 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2669 continue;
2670
2671 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2672 fixed_conv_modes[j].mode, "fract",
2673 fixed_conv_modes[i].name,
2674 fixed_conv_modes[j].name);
2675 arm_set_fixed_conv_libfunc (satfract_optab,
2676 fixed_conv_modes[i].mode,
2677 fixed_conv_modes[j].mode, "satfract",
2678 fixed_conv_modes[i].name,
2679 fixed_conv_modes[j].name);
2680 arm_set_fixed_conv_libfunc (fractuns_optab,
2681 fixed_conv_modes[i].mode,
2682 fixed_conv_modes[j].mode, "fractuns",
2683 fixed_conv_modes[i].name,
2684 fixed_conv_modes[j].name);
2685 arm_set_fixed_conv_libfunc (satfractuns_optab,
2686 fixed_conv_modes[i].mode,
2687 fixed_conv_modes[j].mode, "satfractuns",
2688 fixed_conv_modes[i].name,
2689 fixed_conv_modes[j].name);
2690 }
2691 }
2692
2693 if (TARGET_AAPCS_BASED)
2694 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2695 }
2696
2697 /* On AAPCS systems, this is the "struct __va_list". */
2698 static GTY(()) tree va_list_type;
2699
2700 /* Return the type to use as __builtin_va_list. */
2701 static tree
2702 arm_build_builtin_va_list (void)
2703 {
2704 tree va_list_name;
2705 tree ap_field;
2706
2707 if (!TARGET_AAPCS_BASED)
2708 return std_build_builtin_va_list ();
2709
2710 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2711 defined as:
2712
2713 struct __va_list
2714 {
2715 void *__ap;
2716 };
2717
2718 The C Library ABI further reinforces this definition in \S
2719 4.1.
2720
2721 We must follow this definition exactly. The structure tag
2722 name is visible in C++ mangled names, and thus forms a part
2723 of the ABI. The field name may be used by people who
2724 #include <stdarg.h>. */
2725 /* Create the type. */
2726 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2727 /* Give it the required name. */
2728 va_list_name = build_decl (BUILTINS_LOCATION,
2729 TYPE_DECL,
2730 get_identifier ("__va_list"),
2731 va_list_type);
2732 DECL_ARTIFICIAL (va_list_name) = 1;
2733 TYPE_NAME (va_list_type) = va_list_name;
2734 TYPE_STUB_DECL (va_list_type) = va_list_name;
2735 /* Create the __ap field. */
2736 ap_field = build_decl (BUILTINS_LOCATION,
2737 FIELD_DECL,
2738 get_identifier ("__ap"),
2739 ptr_type_node);
2740 DECL_ARTIFICIAL (ap_field) = 1;
2741 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2742 TYPE_FIELDS (va_list_type) = ap_field;
2743 /* Compute its layout. */
2744 layout_type (va_list_type);
2745
2746 return va_list_type;
2747 }
2748
2749 /* Return an expression of type "void *" pointing to the next
2750 available argument in a variable-argument list. VALIST is the
2751 user-level va_list object, of type __builtin_va_list. */
2752 static tree
2753 arm_extract_valist_ptr (tree valist)
2754 {
2755 if (TREE_TYPE (valist) == error_mark_node)
2756 return error_mark_node;
2757
2758 /* On an AAPCS target, the pointer is stored within "struct
2759 va_list". */
2760 if (TARGET_AAPCS_BASED)
2761 {
2762 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2763 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2764 valist, ap_field, NULL_TREE);
2765 }
2766
2767 return valist;
2768 }
2769
2770 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2771 static void
2772 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2773 {
2774 valist = arm_extract_valist_ptr (valist);
2775 std_expand_builtin_va_start (valist, nextarg);
2776 }
2777
2778 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2779 static tree
2780 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2781 gimple_seq *post_p)
2782 {
2783 valist = arm_extract_valist_ptr (valist);
2784 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2785 }
2786
2787 /* Check any incompatible options that the user has specified. */
2788 static void
2789 arm_option_check_internal (struct gcc_options *opts)
2790 {
2791 int flags = opts->x_target_flags;
2792
2793 /* iWMMXt and NEON are incompatible. */
2794 if (TARGET_IWMMXT
2795 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2796 error ("iWMMXt and NEON are incompatible");
2797
2798 /* Make sure that the processor choice does not conflict with any of the
2799 other command line choices. */
2800 if (TARGET_ARM_P (flags)
2801 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2802 error ("target CPU does not support ARM mode");
2803
2804 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2805 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2806 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2807
2808 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2809 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2810
2811 /* If this target is normally configured to use APCS frames, warn if they
2812 are turned off and debugging is turned on. */
2813 if (TARGET_ARM_P (flags)
2814 && write_symbols != NO_DEBUG
2815 && !TARGET_APCS_FRAME
2816 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2817 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2818
2819 /* iWMMXt unsupported under Thumb mode. */
2820 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2821 error ("iWMMXt unsupported under Thumb mode");
2822
2823 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2824 error ("can not use -mtp=cp15 with 16-bit Thumb");
2825
2826 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2827 {
2828 error ("RTP PIC is incompatible with Thumb");
2829 flag_pic = 0;
2830 }
2831
2832 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2833 with MOVT. */
2834 if ((target_pure_code || target_slow_flash_data)
2835 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2836 {
2837 const char *flag = (target_pure_code ? "-mpure-code" :
2838 "-mslow-flash-data");
2839 error ("%s only supports non-pic code on M-profile targets with the "
2840 "MOVT instruction", flag);
2841 }
2842
2843 }
2844
2845 /* Recompute the global settings depending on target attribute options. */
2846
2847 static void
2848 arm_option_params_internal (void)
2849 {
2850 /* If we are not using the default (ARM mode) section anchor offset
2851 ranges, then set the correct ranges now. */
2852 if (TARGET_THUMB1)
2853 {
2854 /* Thumb-1 LDR instructions cannot have negative offsets.
2855 Permissible positive offset ranges are 5-bit (for byte loads),
2856 6-bit (for halfword loads), or 7-bit (for word loads).
2857 Empirical results suggest a 7-bit anchor range gives the best
2858 overall code size. */
2859 targetm.min_anchor_offset = 0;
2860 targetm.max_anchor_offset = 127;
2861 }
2862 else if (TARGET_THUMB2)
2863 {
2864 /* The minimum is set such that the total size of the block
2865 for a particular anchor is 248 + 1 + 4095 bytes, which is
2866 divisible by eight, ensuring natural spacing of anchors. */
2867 targetm.min_anchor_offset = -248;
2868 targetm.max_anchor_offset = 4095;
2869 }
2870 else
2871 {
2872 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2873 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2874 }
2875
2876 /* Increase the number of conditional instructions with -Os. */
2877 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2878
2879 /* For THUMB2, we limit the conditional sequence to one IT block. */
2880 if (TARGET_THUMB2)
2881 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2882 }
2883
2884 /* True if -mflip-thumb should next add an attribute for the default
2885 mode, false if it should next add an attribute for the opposite mode. */
2886 static GTY(()) bool thumb_flipper;
2887
2888 /* Options after initial target override. */
2889 static GTY(()) tree init_optimize;
2890
2891 static void
2892 arm_override_options_after_change_1 (struct gcc_options *opts)
2893 {
2894 if (opts->x_align_functions <= 0)
2895 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2896 && opts->x_optimize_size ? 2 : 4;
2897 }
2898
2899 /* Implement targetm.override_options_after_change. */
2900
2901 static void
2902 arm_override_options_after_change (void)
2903 {
2904 arm_configure_build_target (&arm_active_target,
2905 TREE_TARGET_OPTION (target_option_default_node),
2906 &global_options_set, false);
2907
2908 arm_override_options_after_change_1 (&global_options);
2909 }
2910
2911 /* Implement TARGET_OPTION_SAVE. */
2912 static void
2913 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2914 {
2915 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2916 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2917 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2918 }
2919
2920 /* Implement TARGET_OPTION_RESTORE. */
2921 static void
2922 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2923 {
2924 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2925 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2926 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2927 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2928 false);
2929 }
2930
2931 /* Reset options between modes that the user has specified. */
2932 static void
2933 arm_option_override_internal (struct gcc_options *opts,
2934 struct gcc_options *opts_set)
2935 {
2936 arm_override_options_after_change_1 (opts);
2937
2938 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2939 {
2940 /* The default is to enable interworking, so this warning message would
2941 be confusing to users who have just compiled with, eg, -march=armv3. */
2942 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
2943 opts->x_target_flags &= ~MASK_INTERWORK;
2944 }
2945
2946 if (TARGET_THUMB_P (opts->x_target_flags)
2947 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
2948 {
2949 warning (0, "target CPU does not support THUMB instructions");
2950 opts->x_target_flags &= ~MASK_THUMB;
2951 }
2952
2953 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
2954 {
2955 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
2956 opts->x_target_flags &= ~MASK_APCS_FRAME;
2957 }
2958
2959 /* Callee super interworking implies thumb interworking. Adding
2960 this to the flags here simplifies the logic elsewhere. */
2961 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
2962 opts->x_target_flags |= MASK_INTERWORK;
2963
2964 /* need to remember initial values so combinaisons of options like
2965 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
2966 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
2967
2968 if (! opts_set->x_arm_restrict_it)
2969 opts->x_arm_restrict_it = arm_arch8;
2970
2971 /* ARM execution state and M profile don't have [restrict] IT. */
2972 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
2973 opts->x_arm_restrict_it = 0;
2974
2975 /* Enable -munaligned-access by default for
2976 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
2977 i.e. Thumb2 and ARM state only.
2978 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
2979 - ARMv8 architecture-base processors.
2980
2981 Disable -munaligned-access by default for
2982 - all pre-ARMv6 architecture-based processors
2983 - ARMv6-M architecture-based processors
2984 - ARMv8-M Baseline processors. */
2985
2986 if (! opts_set->x_unaligned_access)
2987 {
2988 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
2989 && arm_arch6 && (arm_arch_notm || arm_arch7));
2990 }
2991 else if (opts->x_unaligned_access == 1
2992 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
2993 {
2994 warning (0, "target CPU does not support unaligned accesses");
2995 opts->x_unaligned_access = 0;
2996 }
2997
2998 /* Don't warn since it's on by default in -O2. */
2999 if (TARGET_THUMB1_P (opts->x_target_flags))
3000 opts->x_flag_schedule_insns = 0;
3001 else
3002 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3003
3004 /* Disable shrink-wrap when optimizing function for size, since it tends to
3005 generate additional returns. */
3006 if (optimize_function_for_size_p (cfun)
3007 && TARGET_THUMB2_P (opts->x_target_flags))
3008 opts->x_flag_shrink_wrap = false;
3009 else
3010 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3011
3012 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3013 - epilogue_insns - does not accurately model the corresponding insns
3014 emitted in the asm file. In particular, see the comment in thumb_exit
3015 'Find out how many of the (return) argument registers we can corrupt'.
3016 As a consequence, the epilogue may clobber registers without fipa-ra
3017 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3018 TODO: Accurately model clobbers for epilogue_insns and reenable
3019 fipa-ra. */
3020 if (TARGET_THUMB1_P (opts->x_target_flags))
3021 opts->x_flag_ipa_ra = 0;
3022 else
3023 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3024
3025 /* Thumb2 inline assembly code should always use unified syntax.
3026 This will apply to ARM and Thumb1 eventually. */
3027 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3028
3029 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3030 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3031 #endif
3032 }
3033
3034 static sbitmap isa_all_fpubits;
3035 static sbitmap isa_quirkbits;
3036
3037 /* Configure a build target TARGET from the user-specified options OPTS and
3038 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3039 architecture have been specified, but the two are not identical. */
3040 void
3041 arm_configure_build_target (struct arm_build_target *target,
3042 struct cl_target_option *opts,
3043 struct gcc_options *opts_set,
3044 bool warn_compatible)
3045 {
3046 const cpu_option *arm_selected_tune = NULL;
3047 const arch_option *arm_selected_arch = NULL;
3048 const cpu_option *arm_selected_cpu = NULL;
3049 const arm_fpu_desc *arm_selected_fpu = NULL;
3050 const char *tune_opts = NULL;
3051 const char *arch_opts = NULL;
3052 const char *cpu_opts = NULL;
3053
3054 bitmap_clear (target->isa);
3055 target->core_name = NULL;
3056 target->arch_name = NULL;
3057
3058 if (opts_set->x_arm_arch_string)
3059 {
3060 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3061 "-march",
3062 opts->x_arm_arch_string);
3063 arch_opts = strchr (opts->x_arm_arch_string, '+');
3064 }
3065
3066 if (opts_set->x_arm_cpu_string)
3067 {
3068 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3069 opts->x_arm_cpu_string);
3070 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3071 arm_selected_tune = arm_selected_cpu;
3072 /* If taking the tuning from -mcpu, we don't need to rescan the
3073 options for tuning. */
3074 }
3075
3076 if (opts_set->x_arm_tune_string)
3077 {
3078 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3079 opts->x_arm_tune_string);
3080 tune_opts = strchr (opts->x_arm_tune_string, '+');
3081 }
3082
3083 if (arm_selected_arch)
3084 {
3085 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3086 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3087 arch_opts);
3088
3089 if (arm_selected_cpu)
3090 {
3091 auto_sbitmap cpu_isa (isa_num_bits);
3092 auto_sbitmap isa_delta (isa_num_bits);
3093
3094 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3095 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3096 cpu_opts);
3097 bitmap_xor (isa_delta, cpu_isa, target->isa);
3098 /* Ignore any bits that are quirk bits. */
3099 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3100 /* Ignore (for now) any bits that might be set by -mfpu. */
3101 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3102
3103 if (!bitmap_empty_p (isa_delta))
3104 {
3105 if (warn_compatible)
3106 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3107 arm_selected_cpu->common.name,
3108 arm_selected_arch->common.name);
3109 /* -march wins for code generation.
3110 -mcpu wins for default tuning. */
3111 if (!arm_selected_tune)
3112 arm_selected_tune = arm_selected_cpu;
3113
3114 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3115 target->arch_name = arm_selected_arch->common.name;
3116 }
3117 else
3118 {
3119 /* Architecture and CPU are essentially the same.
3120 Prefer the CPU setting. */
3121 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3122 target->core_name = arm_selected_cpu->common.name;
3123 /* Copy the CPU's capabilities, so that we inherit the
3124 appropriate extensions and quirks. */
3125 bitmap_copy (target->isa, cpu_isa);
3126 }
3127 }
3128 else
3129 {
3130 /* Pick a CPU based on the architecture. */
3131 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3132 target->arch_name = arm_selected_arch->common.name;
3133 /* Note: target->core_name is left unset in this path. */
3134 }
3135 }
3136 else if (arm_selected_cpu)
3137 {
3138 target->core_name = arm_selected_cpu->common.name;
3139 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3140 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3141 cpu_opts);
3142 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3143 }
3144 /* If the user did not specify a processor or architecture, choose
3145 one for them. */
3146 else
3147 {
3148 const cpu_option *sel;
3149 auto_sbitmap sought_isa (isa_num_bits);
3150 bitmap_clear (sought_isa);
3151 auto_sbitmap default_isa (isa_num_bits);
3152
3153 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3154 TARGET_CPU_DEFAULT);
3155 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3156 gcc_assert (arm_selected_cpu->common.name);
3157
3158 /* RWE: All of the selection logic below (to the end of this
3159 'if' clause) looks somewhat suspect. It appears to be mostly
3160 there to support forcing thumb support when the default CPU
3161 does not have thumb (somewhat dubious in terms of what the
3162 user might be expecting). I think it should be removed once
3163 support for the pre-thumb era cores is removed. */
3164 sel = arm_selected_cpu;
3165 arm_initialize_isa (default_isa, sel->common.isa_bits);
3166 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3167 cpu_opts);
3168
3169 /* Now check to see if the user has specified any command line
3170 switches that require certain abilities from the cpu. */
3171
3172 if (TARGET_INTERWORK || TARGET_THUMB)
3173 {
3174 bitmap_set_bit (sought_isa, isa_bit_thumb);
3175 bitmap_set_bit (sought_isa, isa_bit_mode32);
3176
3177 /* There are no ARM processors that support both APCS-26 and
3178 interworking. Therefore we forcibly remove MODE26 from
3179 from the isa features here (if it was set), so that the
3180 search below will always be able to find a compatible
3181 processor. */
3182 bitmap_clear_bit (default_isa, isa_bit_mode26);
3183 }
3184
3185 /* If there are such requirements and the default CPU does not
3186 satisfy them, we need to run over the complete list of
3187 cores looking for one that is satisfactory. */
3188 if (!bitmap_empty_p (sought_isa)
3189 && !bitmap_subset_p (sought_isa, default_isa))
3190 {
3191 auto_sbitmap candidate_isa (isa_num_bits);
3192 /* We're only interested in a CPU with at least the
3193 capabilities of the default CPU and the required
3194 additional features. */
3195 bitmap_ior (default_isa, default_isa, sought_isa);
3196
3197 /* Try to locate a CPU type that supports all of the abilities
3198 of the default CPU, plus the extra abilities requested by
3199 the user. */
3200 for (sel = all_cores; sel->common.name != NULL; sel++)
3201 {
3202 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3203 /* An exact match? */
3204 if (bitmap_equal_p (default_isa, candidate_isa))
3205 break;
3206 }
3207
3208 if (sel->common.name == NULL)
3209 {
3210 unsigned current_bit_count = isa_num_bits;
3211 const cpu_option *best_fit = NULL;
3212
3213 /* Ideally we would like to issue an error message here
3214 saying that it was not possible to find a CPU compatible
3215 with the default CPU, but which also supports the command
3216 line options specified by the programmer, and so they
3217 ought to use the -mcpu=<name> command line option to
3218 override the default CPU type.
3219
3220 If we cannot find a CPU that has exactly the
3221 characteristics of the default CPU and the given
3222 command line options we scan the array again looking
3223 for a best match. The best match must have at least
3224 the capabilities of the perfect match. */
3225 for (sel = all_cores; sel->common.name != NULL; sel++)
3226 {
3227 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3228
3229 if (bitmap_subset_p (default_isa, candidate_isa))
3230 {
3231 unsigned count;
3232
3233 bitmap_and_compl (candidate_isa, candidate_isa,
3234 default_isa);
3235 count = bitmap_popcount (candidate_isa);
3236
3237 if (count < current_bit_count)
3238 {
3239 best_fit = sel;
3240 current_bit_count = count;
3241 }
3242 }
3243
3244 gcc_assert (best_fit);
3245 sel = best_fit;
3246 }
3247 }
3248 arm_selected_cpu = sel;
3249 }
3250
3251 /* Now we know the CPU, we can finally initialize the target
3252 structure. */
3253 target->core_name = arm_selected_cpu->common.name;
3254 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3255 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3256 cpu_opts);
3257 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3258 }
3259
3260 gcc_assert (arm_selected_cpu);
3261 gcc_assert (arm_selected_arch);
3262
3263 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3264 {
3265 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3266 auto_sbitmap fpu_bits (isa_num_bits);
3267
3268 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3269 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3270 bitmap_ior (target->isa, target->isa, fpu_bits);
3271 }
3272
3273 if (!arm_selected_tune)
3274 arm_selected_tune = arm_selected_cpu;
3275 else /* Validate the features passed to -mtune. */
3276 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3277
3278 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3279
3280 /* Finish initializing the target structure. */
3281 target->arch_pp_name = arm_selected_arch->arch;
3282 target->base_arch = arm_selected_arch->base_arch;
3283 target->profile = arm_selected_arch->profile;
3284
3285 target->tune_flags = tune_data->tune_flags;
3286 target->tune = tune_data->tune;
3287 target->tune_core = tune_data->scheduler;
3288 }
3289
3290 /* Fix up any incompatible options that the user has specified. */
3291 static void
3292 arm_option_override (void)
3293 {
3294 static const enum isa_feature fpu_bitlist[]
3295 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3296 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3297 cl_target_option opts;
3298
3299 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3300 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3301
3302 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3303 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3304
3305 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3306
3307 if (!global_options_set.x_arm_fpu_index)
3308 {
3309 bool ok;
3310 int fpu_index;
3311
3312 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3313 CL_TARGET);
3314 gcc_assert (ok);
3315 arm_fpu_index = (enum fpu_type) fpu_index;
3316 }
3317
3318 cl_target_option_save (&opts, &global_options);
3319 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3320 true);
3321
3322 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3323 SUBTARGET_OVERRIDE_OPTIONS;
3324 #endif
3325
3326 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3327 arm_base_arch = arm_active_target.base_arch;
3328
3329 arm_tune = arm_active_target.tune_core;
3330 tune_flags = arm_active_target.tune_flags;
3331 current_tune = arm_active_target.tune;
3332
3333 /* TBD: Dwarf info for apcs frame is not handled yet. */
3334 if (TARGET_APCS_FRAME)
3335 flag_shrink_wrap = false;
3336
3337 /* BPABI targets use linker tricks to allow interworking on cores
3338 without thumb support. */
3339 if (TARGET_INTERWORK
3340 && !TARGET_BPABI
3341 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3342 {
3343 warning (0, "target CPU does not support interworking" );
3344 target_flags &= ~MASK_INTERWORK;
3345 }
3346
3347 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3348 {
3349 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3350 target_flags |= MASK_APCS_FRAME;
3351 }
3352
3353 if (TARGET_POKE_FUNCTION_NAME)
3354 target_flags |= MASK_APCS_FRAME;
3355
3356 if (TARGET_APCS_REENT && flag_pic)
3357 error ("-fpic and -mapcs-reent are incompatible");
3358
3359 if (TARGET_APCS_REENT)
3360 warning (0, "APCS reentrant code not supported. Ignored");
3361
3362 /* Initialize boolean versions of the architectural flags, for use
3363 in the arm.md file. */
3364 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3365 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3366 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3367 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3368 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3369 arm_arch5te = arm_arch5e
3370 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3371 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3372 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3373 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3374 arm_arch6m = arm_arch6 && !arm_arch_notm;
3375 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3376 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3377 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3378 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3379 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3380 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3381 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3382 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3383 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3384 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3385 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3386 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3387 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3388 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3389 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3390 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3391 if (arm_fp16_inst)
3392 {
3393 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3394 error ("selected fp16 options are incompatible");
3395 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3396 }
3397
3398
3399 /* Set up some tuning parameters. */
3400 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3401 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3402 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3403 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3404 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3405 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3406
3407 /* And finally, set up some quirks. */
3408 arm_arch_no_volatile_ce
3409 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3410 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3411 isa_bit_quirk_armv6kz);
3412
3413 /* V5 code we generate is completely interworking capable, so we turn off
3414 TARGET_INTERWORK here to avoid many tests later on. */
3415
3416 /* XXX However, we must pass the right pre-processor defines to CPP
3417 or GLD can get confused. This is a hack. */
3418 if (TARGET_INTERWORK)
3419 arm_cpp_interwork = 1;
3420
3421 if (arm_arch5)
3422 target_flags &= ~MASK_INTERWORK;
3423
3424 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3425 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3426
3427 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3428 error ("iwmmxt abi requires an iwmmxt capable cpu");
3429
3430 /* If soft-float is specified then don't use FPU. */
3431 if (TARGET_SOFT_FLOAT)
3432 arm_fpu_attr = FPU_NONE;
3433 else
3434 arm_fpu_attr = FPU_VFP;
3435
3436 if (TARGET_AAPCS_BASED)
3437 {
3438 if (TARGET_CALLER_INTERWORKING)
3439 error ("AAPCS does not support -mcaller-super-interworking");
3440 else
3441 if (TARGET_CALLEE_INTERWORKING)
3442 error ("AAPCS does not support -mcallee-super-interworking");
3443 }
3444
3445 /* __fp16 support currently assumes the core has ldrh. */
3446 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3447 sorry ("__fp16 and no ldrh");
3448
3449 if (TARGET_AAPCS_BASED)
3450 {
3451 if (arm_abi == ARM_ABI_IWMMXT)
3452 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3453 else if (TARGET_HARD_FLOAT_ABI)
3454 {
3455 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3456 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3457 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3458 }
3459 else
3460 arm_pcs_default = ARM_PCS_AAPCS;
3461 }
3462 else
3463 {
3464 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3465 sorry ("-mfloat-abi=hard and VFP");
3466
3467 if (arm_abi == ARM_ABI_APCS)
3468 arm_pcs_default = ARM_PCS_APCS;
3469 else
3470 arm_pcs_default = ARM_PCS_ATPCS;
3471 }
3472
3473 /* For arm2/3 there is no need to do any scheduling if we are doing
3474 software floating-point. */
3475 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3476 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3477
3478 /* Use the cp15 method if it is available. */
3479 if (target_thread_pointer == TP_AUTO)
3480 {
3481 if (arm_arch6k && !TARGET_THUMB1)
3482 target_thread_pointer = TP_CP15;
3483 else
3484 target_thread_pointer = TP_SOFT;
3485 }
3486
3487 /* Override the default structure alignment for AAPCS ABI. */
3488 if (!global_options_set.x_arm_structure_size_boundary)
3489 {
3490 if (TARGET_AAPCS_BASED)
3491 arm_structure_size_boundary = 8;
3492 }
3493 else
3494 {
3495 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3496
3497 if (arm_structure_size_boundary != 8
3498 && arm_structure_size_boundary != 32
3499 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3500 {
3501 if (ARM_DOUBLEWORD_ALIGN)
3502 warning (0,
3503 "structure size boundary can only be set to 8, 32 or 64");
3504 else
3505 warning (0, "structure size boundary can only be set to 8 or 32");
3506 arm_structure_size_boundary
3507 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3508 }
3509 }
3510
3511 if (TARGET_VXWORKS_RTP)
3512 {
3513 if (!global_options_set.x_arm_pic_data_is_text_relative)
3514 arm_pic_data_is_text_relative = 0;
3515 }
3516 else if (flag_pic
3517 && !arm_pic_data_is_text_relative
3518 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3519 /* When text & data segments don't have a fixed displacement, the
3520 intended use is with a single, read only, pic base register.
3521 Unless the user explicitly requested not to do that, set
3522 it. */
3523 target_flags |= MASK_SINGLE_PIC_BASE;
3524
3525 /* If stack checking is disabled, we can use r10 as the PIC register,
3526 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3527 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3528 {
3529 if (TARGET_VXWORKS_RTP)
3530 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3531 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3532 }
3533
3534 if (flag_pic && TARGET_VXWORKS_RTP)
3535 arm_pic_register = 9;
3536
3537 if (arm_pic_register_string != NULL)
3538 {
3539 int pic_register = decode_reg_name (arm_pic_register_string);
3540
3541 if (!flag_pic)
3542 warning (0, "-mpic-register= is useless without -fpic");
3543
3544 /* Prevent the user from choosing an obviously stupid PIC register. */
3545 else if (pic_register < 0 || call_used_regs[pic_register]
3546 || pic_register == HARD_FRAME_POINTER_REGNUM
3547 || pic_register == STACK_POINTER_REGNUM
3548 || pic_register >= PC_REGNUM
3549 || (TARGET_VXWORKS_RTP
3550 && (unsigned int) pic_register != arm_pic_register))
3551 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3552 else
3553 arm_pic_register = pic_register;
3554 }
3555
3556 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3557 if (fix_cm3_ldrd == 2)
3558 {
3559 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3560 fix_cm3_ldrd = 1;
3561 else
3562 fix_cm3_ldrd = 0;
3563 }
3564
3565 /* Hot/Cold partitioning is not currently supported, since we can't
3566 handle literal pool placement in that case. */
3567 if (flag_reorder_blocks_and_partition)
3568 {
3569 inform (input_location,
3570 "-freorder-blocks-and-partition not supported on this architecture");
3571 flag_reorder_blocks_and_partition = 0;
3572 flag_reorder_blocks = 1;
3573 }
3574
3575 if (flag_pic)
3576 /* Hoisting PIC address calculations more aggressively provides a small,
3577 but measurable, size reduction for PIC code. Therefore, we decrease
3578 the bar for unrestricted expression hoisting to the cost of PIC address
3579 calculation, which is 2 instructions. */
3580 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3581 global_options.x_param_values,
3582 global_options_set.x_param_values);
3583
3584 /* ARM EABI defaults to strict volatile bitfields. */
3585 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3586 && abi_version_at_least(2))
3587 flag_strict_volatile_bitfields = 1;
3588
3589 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3590 have deemed it beneficial (signified by setting
3591 prefetch.num_slots to 1 or more). */
3592 if (flag_prefetch_loop_arrays < 0
3593 && HAVE_prefetch
3594 && optimize >= 3
3595 && current_tune->prefetch.num_slots > 0)
3596 flag_prefetch_loop_arrays = 1;
3597
3598 /* Set up parameters to be used in prefetching algorithm. Do not
3599 override the defaults unless we are tuning for a core we have
3600 researched values for. */
3601 if (current_tune->prefetch.num_slots > 0)
3602 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3603 current_tune->prefetch.num_slots,
3604 global_options.x_param_values,
3605 global_options_set.x_param_values);
3606 if (current_tune->prefetch.l1_cache_line_size >= 0)
3607 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3608 current_tune->prefetch.l1_cache_line_size,
3609 global_options.x_param_values,
3610 global_options_set.x_param_values);
3611 if (current_tune->prefetch.l1_cache_size >= 0)
3612 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3613 current_tune->prefetch.l1_cache_size,
3614 global_options.x_param_values,
3615 global_options_set.x_param_values);
3616
3617 /* Use Neon to perform 64-bits operations rather than core
3618 registers. */
3619 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3620 if (use_neon_for_64bits == 1)
3621 prefer_neon_for_64bits = true;
3622
3623 /* Use the alternative scheduling-pressure algorithm by default. */
3624 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3625 global_options.x_param_values,
3626 global_options_set.x_param_values);
3627
3628 /* Look through ready list and all of queue for instructions
3629 relevant for L2 auto-prefetcher. */
3630 int param_sched_autopref_queue_depth;
3631
3632 switch (current_tune->sched_autopref)
3633 {
3634 case tune_params::SCHED_AUTOPREF_OFF:
3635 param_sched_autopref_queue_depth = -1;
3636 break;
3637
3638 case tune_params::SCHED_AUTOPREF_RANK:
3639 param_sched_autopref_queue_depth = 0;
3640 break;
3641
3642 case tune_params::SCHED_AUTOPREF_FULL:
3643 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3644 break;
3645
3646 default:
3647 gcc_unreachable ();
3648 }
3649
3650 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3651 param_sched_autopref_queue_depth,
3652 global_options.x_param_values,
3653 global_options_set.x_param_values);
3654
3655 /* Currently, for slow flash data, we just disable literal pools. We also
3656 disable it for pure-code. */
3657 if (target_slow_flash_data || target_pure_code)
3658 arm_disable_literal_pool = true;
3659
3660 if (use_cmse && !arm_arch_cmse)
3661 error ("target CPU does not support ARMv8-M Security Extensions");
3662
3663 /* Disable scheduling fusion by default if it's not armv7 processor
3664 or doesn't prefer ldrd/strd. */
3665 if (flag_schedule_fusion == 2
3666 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3667 flag_schedule_fusion = 0;
3668
3669 /* Need to remember initial options before they are overriden. */
3670 init_optimize = build_optimization_node (&global_options);
3671
3672 arm_option_override_internal (&global_options, &global_options_set);
3673 arm_option_check_internal (&global_options);
3674 arm_option_params_internal ();
3675
3676 /* Create the default target_options structure. */
3677 target_option_default_node = target_option_current_node
3678 = build_target_option_node (&global_options);
3679
3680 /* Register global variables with the garbage collector. */
3681 arm_add_gc_roots ();
3682
3683 /* Init initial mode for testing. */
3684 thumb_flipper = TARGET_THUMB;
3685 }
3686
3687 static void
3688 arm_add_gc_roots (void)
3689 {
3690 gcc_obstack_init(&minipool_obstack);
3691 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3692 }
3693 \f
3694 /* A table of known ARM exception types.
3695 For use with the interrupt function attribute. */
3696
3697 typedef struct
3698 {
3699 const char *const arg;
3700 const unsigned long return_value;
3701 }
3702 isr_attribute_arg;
3703
3704 static const isr_attribute_arg isr_attribute_args [] =
3705 {
3706 { "IRQ", ARM_FT_ISR },
3707 { "irq", ARM_FT_ISR },
3708 { "FIQ", ARM_FT_FIQ },
3709 { "fiq", ARM_FT_FIQ },
3710 { "ABORT", ARM_FT_ISR },
3711 { "abort", ARM_FT_ISR },
3712 { "ABORT", ARM_FT_ISR },
3713 { "abort", ARM_FT_ISR },
3714 { "UNDEF", ARM_FT_EXCEPTION },
3715 { "undef", ARM_FT_EXCEPTION },
3716 { "SWI", ARM_FT_EXCEPTION },
3717 { "swi", ARM_FT_EXCEPTION },
3718 { NULL, ARM_FT_NORMAL }
3719 };
3720
3721 /* Returns the (interrupt) function type of the current
3722 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3723
3724 static unsigned long
3725 arm_isr_value (tree argument)
3726 {
3727 const isr_attribute_arg * ptr;
3728 const char * arg;
3729
3730 if (!arm_arch_notm)
3731 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3732
3733 /* No argument - default to IRQ. */
3734 if (argument == NULL_TREE)
3735 return ARM_FT_ISR;
3736
3737 /* Get the value of the argument. */
3738 if (TREE_VALUE (argument) == NULL_TREE
3739 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3740 return ARM_FT_UNKNOWN;
3741
3742 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3743
3744 /* Check it against the list of known arguments. */
3745 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3746 if (streq (arg, ptr->arg))
3747 return ptr->return_value;
3748
3749 /* An unrecognized interrupt type. */
3750 return ARM_FT_UNKNOWN;
3751 }
3752
3753 /* Computes the type of the current function. */
3754
3755 static unsigned long
3756 arm_compute_func_type (void)
3757 {
3758 unsigned long type = ARM_FT_UNKNOWN;
3759 tree a;
3760 tree attr;
3761
3762 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3763
3764 /* Decide if the current function is volatile. Such functions
3765 never return, and many memory cycles can be saved by not storing
3766 register values that will never be needed again. This optimization
3767 was added to speed up context switching in a kernel application. */
3768 if (optimize > 0
3769 && (TREE_NOTHROW (current_function_decl)
3770 || !(flag_unwind_tables
3771 || (flag_exceptions
3772 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3773 && TREE_THIS_VOLATILE (current_function_decl))
3774 type |= ARM_FT_VOLATILE;
3775
3776 if (cfun->static_chain_decl != NULL)
3777 type |= ARM_FT_NESTED;
3778
3779 attr = DECL_ATTRIBUTES (current_function_decl);
3780
3781 a = lookup_attribute ("naked", attr);
3782 if (a != NULL_TREE)
3783 type |= ARM_FT_NAKED;
3784
3785 a = lookup_attribute ("isr", attr);
3786 if (a == NULL_TREE)
3787 a = lookup_attribute ("interrupt", attr);
3788
3789 if (a == NULL_TREE)
3790 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3791 else
3792 type |= arm_isr_value (TREE_VALUE (a));
3793
3794 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3795 type |= ARM_FT_CMSE_ENTRY;
3796
3797 return type;
3798 }
3799
3800 /* Returns the type of the current function. */
3801
3802 unsigned long
3803 arm_current_func_type (void)
3804 {
3805 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3806 cfun->machine->func_type = arm_compute_func_type ();
3807
3808 return cfun->machine->func_type;
3809 }
3810
3811 bool
3812 arm_allocate_stack_slots_for_args (void)
3813 {
3814 /* Naked functions should not allocate stack slots for arguments. */
3815 return !IS_NAKED (arm_current_func_type ());
3816 }
3817
3818 static bool
3819 arm_warn_func_return (tree decl)
3820 {
3821 /* Naked functions are implemented entirely in assembly, including the
3822 return sequence, so suppress warnings about this. */
3823 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3824 }
3825
3826 \f
3827 /* Output assembler code for a block containing the constant parts
3828 of a trampoline, leaving space for the variable parts.
3829
3830 On the ARM, (if r8 is the static chain regnum, and remembering that
3831 referencing pc adds an offset of 8) the trampoline looks like:
3832 ldr r8, [pc, #0]
3833 ldr pc, [pc]
3834 .word static chain value
3835 .word function's address
3836 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3837
3838 static void
3839 arm_asm_trampoline_template (FILE *f)
3840 {
3841 fprintf (f, "\t.syntax unified\n");
3842
3843 if (TARGET_ARM)
3844 {
3845 fprintf (f, "\t.arm\n");
3846 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3847 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3848 }
3849 else if (TARGET_THUMB2)
3850 {
3851 fprintf (f, "\t.thumb\n");
3852 /* The Thumb-2 trampoline is similar to the arm implementation.
3853 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3854 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3855 STATIC_CHAIN_REGNUM, PC_REGNUM);
3856 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3857 }
3858 else
3859 {
3860 ASM_OUTPUT_ALIGN (f, 2);
3861 fprintf (f, "\t.code\t16\n");
3862 fprintf (f, ".Ltrampoline_start:\n");
3863 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3864 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3865 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3866 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3867 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3868 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3869 }
3870 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3871 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3872 }
3873
3874 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3875
3876 static void
3877 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3878 {
3879 rtx fnaddr, mem, a_tramp;
3880
3881 emit_block_move (m_tramp, assemble_trampoline_template (),
3882 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3883
3884 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3885 emit_move_insn (mem, chain_value);
3886
3887 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3888 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3889 emit_move_insn (mem, fnaddr);
3890
3891 a_tramp = XEXP (m_tramp, 0);
3892 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3893 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3894 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3895 }
3896
3897 /* Thumb trampolines should be entered in thumb mode, so set
3898 the bottom bit of the address. */
3899
3900 static rtx
3901 arm_trampoline_adjust_address (rtx addr)
3902 {
3903 if (TARGET_THUMB)
3904 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3905 NULL, 0, OPTAB_LIB_WIDEN);
3906 return addr;
3907 }
3908 \f
3909 /* Return 1 if it is possible to return using a single instruction.
3910 If SIBLING is non-null, this is a test for a return before a sibling
3911 call. SIBLING is the call insn, so we can examine its register usage. */
3912
3913 int
3914 use_return_insn (int iscond, rtx sibling)
3915 {
3916 int regno;
3917 unsigned int func_type;
3918 unsigned long saved_int_regs;
3919 unsigned HOST_WIDE_INT stack_adjust;
3920 arm_stack_offsets *offsets;
3921
3922 /* Never use a return instruction before reload has run. */
3923 if (!reload_completed)
3924 return 0;
3925
3926 func_type = arm_current_func_type ();
3927
3928 /* Naked, volatile and stack alignment functions need special
3929 consideration. */
3930 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
3931 return 0;
3932
3933 /* So do interrupt functions that use the frame pointer and Thumb
3934 interrupt functions. */
3935 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
3936 return 0;
3937
3938 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
3939 && !optimize_function_for_size_p (cfun))
3940 return 0;
3941
3942 offsets = arm_get_frame_offsets ();
3943 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
3944
3945 /* As do variadic functions. */
3946 if (crtl->args.pretend_args_size
3947 || cfun->machine->uses_anonymous_args
3948 /* Or if the function calls __builtin_eh_return () */
3949 || crtl->calls_eh_return
3950 /* Or if the function calls alloca */
3951 || cfun->calls_alloca
3952 /* Or if there is a stack adjustment. However, if the stack pointer
3953 is saved on the stack, we can use a pre-incrementing stack load. */
3954 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
3955 && stack_adjust == 4))
3956 /* Or if the static chain register was saved above the frame, under the
3957 assumption that the stack pointer isn't saved on the stack. */
3958 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
3959 && arm_compute_static_chain_stack_bytes() != 0))
3960 return 0;
3961
3962 saved_int_regs = offsets->saved_regs_mask;
3963
3964 /* Unfortunately, the insn
3965
3966 ldmib sp, {..., sp, ...}
3967
3968 triggers a bug on most SA-110 based devices, such that the stack
3969 pointer won't be correctly restored if the instruction takes a
3970 page fault. We work around this problem by popping r3 along with
3971 the other registers, since that is never slower than executing
3972 another instruction.
3973
3974 We test for !arm_arch5 here, because code for any architecture
3975 less than this could potentially be run on one of the buggy
3976 chips. */
3977 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
3978 {
3979 /* Validate that r3 is a call-clobbered register (always true in
3980 the default abi) ... */
3981 if (!call_used_regs[3])
3982 return 0;
3983
3984 /* ... that it isn't being used for a return value ... */
3985 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
3986 return 0;
3987
3988 /* ... or for a tail-call argument ... */
3989 if (sibling)
3990 {
3991 gcc_assert (CALL_P (sibling));
3992
3993 if (find_regno_fusage (sibling, USE, 3))
3994 return 0;
3995 }
3996
3997 /* ... and that there are no call-saved registers in r0-r2
3998 (always true in the default ABI). */
3999 if (saved_int_regs & 0x7)
4000 return 0;
4001 }
4002
4003 /* Can't be done if interworking with Thumb, and any registers have been
4004 stacked. */
4005 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4006 return 0;
4007
4008 /* On StrongARM, conditional returns are expensive if they aren't
4009 taken and multiple registers have been stacked. */
4010 if (iscond && arm_tune_strongarm)
4011 {
4012 /* Conditional return when just the LR is stored is a simple
4013 conditional-load instruction, that's not expensive. */
4014 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4015 return 0;
4016
4017 if (flag_pic
4018 && arm_pic_register != INVALID_REGNUM
4019 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4020 return 0;
4021 }
4022
4023 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4024 several instructions if anything needs to be popped. */
4025 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4026 return 0;
4027
4028 /* If there are saved registers but the LR isn't saved, then we need
4029 two instructions for the return. */
4030 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4031 return 0;
4032
4033 /* Can't be done if any of the VFP regs are pushed,
4034 since this also requires an insn. */
4035 if (TARGET_HARD_FLOAT)
4036 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4037 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4038 return 0;
4039
4040 if (TARGET_REALLY_IWMMXT)
4041 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4042 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4043 return 0;
4044
4045 return 1;
4046 }
4047
4048 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4049 shrink-wrapping if possible. This is the case if we need to emit a
4050 prologue, which we can test by looking at the offsets. */
4051 bool
4052 use_simple_return_p (void)
4053 {
4054 arm_stack_offsets *offsets;
4055
4056 /* Note this function can be called before or after reload. */
4057 if (!reload_completed)
4058 arm_compute_frame_layout ();
4059
4060 offsets = arm_get_frame_offsets ();
4061 return offsets->outgoing_args != 0;
4062 }
4063
4064 /* Return TRUE if int I is a valid immediate ARM constant. */
4065
4066 int
4067 const_ok_for_arm (HOST_WIDE_INT i)
4068 {
4069 int lowbit;
4070
4071 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4072 be all zero, or all one. */
4073 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4074 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4075 != ((~(unsigned HOST_WIDE_INT) 0)
4076 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4077 return FALSE;
4078
4079 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4080
4081 /* Fast return for 0 and small values. We must do this for zero, since
4082 the code below can't handle that one case. */
4083 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4084 return TRUE;
4085
4086 /* Get the number of trailing zeros. */
4087 lowbit = ffs((int) i) - 1;
4088
4089 /* Only even shifts are allowed in ARM mode so round down to the
4090 nearest even number. */
4091 if (TARGET_ARM)
4092 lowbit &= ~1;
4093
4094 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4095 return TRUE;
4096
4097 if (TARGET_ARM)
4098 {
4099 /* Allow rotated constants in ARM mode. */
4100 if (lowbit <= 4
4101 && ((i & ~0xc000003f) == 0
4102 || (i & ~0xf000000f) == 0
4103 || (i & ~0xfc000003) == 0))
4104 return TRUE;
4105 }
4106 else if (TARGET_THUMB2)
4107 {
4108 HOST_WIDE_INT v;
4109
4110 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4111 v = i & 0xff;
4112 v |= v << 16;
4113 if (i == v || i == (v | (v << 8)))
4114 return TRUE;
4115
4116 /* Allow repeated pattern 0xXY00XY00. */
4117 v = i & 0xff00;
4118 v |= v << 16;
4119 if (i == v)
4120 return TRUE;
4121 }
4122 else if (TARGET_HAVE_MOVT)
4123 {
4124 /* Thumb-1 Targets with MOVT. */
4125 if (i > 0xffff)
4126 return FALSE;
4127 else
4128 return TRUE;
4129 }
4130
4131 return FALSE;
4132 }
4133
4134 /* Return true if I is a valid constant for the operation CODE. */
4135 int
4136 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4137 {
4138 if (const_ok_for_arm (i))
4139 return 1;
4140
4141 switch (code)
4142 {
4143 case SET:
4144 /* See if we can use movw. */
4145 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4146 return 1;
4147 else
4148 /* Otherwise, try mvn. */
4149 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4150
4151 case PLUS:
4152 /* See if we can use addw or subw. */
4153 if (TARGET_THUMB2
4154 && ((i & 0xfffff000) == 0
4155 || ((-i) & 0xfffff000) == 0))
4156 return 1;
4157 /* Fall through. */
4158 case COMPARE:
4159 case EQ:
4160 case NE:
4161 case GT:
4162 case LE:
4163 case LT:
4164 case GE:
4165 case GEU:
4166 case LTU:
4167 case GTU:
4168 case LEU:
4169 case UNORDERED:
4170 case ORDERED:
4171 case UNEQ:
4172 case UNGE:
4173 case UNLT:
4174 case UNGT:
4175 case UNLE:
4176 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4177
4178 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4179 case XOR:
4180 return 0;
4181
4182 case IOR:
4183 if (TARGET_THUMB2)
4184 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4185 return 0;
4186
4187 case AND:
4188 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4189
4190 default:
4191 gcc_unreachable ();
4192 }
4193 }
4194
4195 /* Return true if I is a valid di mode constant for the operation CODE. */
4196 int
4197 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4198 {
4199 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4200 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4201 rtx hi = GEN_INT (hi_val);
4202 rtx lo = GEN_INT (lo_val);
4203
4204 if (TARGET_THUMB1)
4205 return 0;
4206
4207 switch (code)
4208 {
4209 case AND:
4210 case IOR:
4211 case XOR:
4212 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4213 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4214 case PLUS:
4215 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4216
4217 default:
4218 return 0;
4219 }
4220 }
4221
4222 /* Emit a sequence of insns to handle a large constant.
4223 CODE is the code of the operation required, it can be any of SET, PLUS,
4224 IOR, AND, XOR, MINUS;
4225 MODE is the mode in which the operation is being performed;
4226 VAL is the integer to operate on;
4227 SOURCE is the other operand (a register, or a null-pointer for SET);
4228 SUBTARGETS means it is safe to create scratch registers if that will
4229 either produce a simpler sequence, or we will want to cse the values.
4230 Return value is the number of insns emitted. */
4231
4232 /* ??? Tweak this for thumb2. */
4233 int
4234 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4235 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4236 {
4237 rtx cond;
4238
4239 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4240 cond = COND_EXEC_TEST (PATTERN (insn));
4241 else
4242 cond = NULL_RTX;
4243
4244 if (subtargets || code == SET
4245 || (REG_P (target) && REG_P (source)
4246 && REGNO (target) != REGNO (source)))
4247 {
4248 /* After arm_reorg has been called, we can't fix up expensive
4249 constants by pushing them into memory so we must synthesize
4250 them in-line, regardless of the cost. This is only likely to
4251 be more costly on chips that have load delay slots and we are
4252 compiling without running the scheduler (so no splitting
4253 occurred before the final instruction emission).
4254
4255 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4256 */
4257 if (!cfun->machine->after_arm_reorg
4258 && !cond
4259 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4260 1, 0)
4261 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4262 + (code != SET))))
4263 {
4264 if (code == SET)
4265 {
4266 /* Currently SET is the only monadic value for CODE, all
4267 the rest are diadic. */
4268 if (TARGET_USE_MOVT)
4269 arm_emit_movpair (target, GEN_INT (val));
4270 else
4271 emit_set_insn (target, GEN_INT (val));
4272
4273 return 1;
4274 }
4275 else
4276 {
4277 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4278
4279 if (TARGET_USE_MOVT)
4280 arm_emit_movpair (temp, GEN_INT (val));
4281 else
4282 emit_set_insn (temp, GEN_INT (val));
4283
4284 /* For MINUS, the value is subtracted from, since we never
4285 have subtraction of a constant. */
4286 if (code == MINUS)
4287 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4288 else
4289 emit_set_insn (target,
4290 gen_rtx_fmt_ee (code, mode, source, temp));
4291 return 2;
4292 }
4293 }
4294 }
4295
4296 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4297 1);
4298 }
4299
4300 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4301 ARM/THUMB2 immediates, and add up to VAL.
4302 Thr function return value gives the number of insns required. */
4303 static int
4304 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4305 struct four_ints *return_sequence)
4306 {
4307 int best_consecutive_zeros = 0;
4308 int i;
4309 int best_start = 0;
4310 int insns1, insns2;
4311 struct four_ints tmp_sequence;
4312
4313 /* If we aren't targeting ARM, the best place to start is always at
4314 the bottom, otherwise look more closely. */
4315 if (TARGET_ARM)
4316 {
4317 for (i = 0; i < 32; i += 2)
4318 {
4319 int consecutive_zeros = 0;
4320
4321 if (!(val & (3 << i)))
4322 {
4323 while ((i < 32) && !(val & (3 << i)))
4324 {
4325 consecutive_zeros += 2;
4326 i += 2;
4327 }
4328 if (consecutive_zeros > best_consecutive_zeros)
4329 {
4330 best_consecutive_zeros = consecutive_zeros;
4331 best_start = i - consecutive_zeros;
4332 }
4333 i -= 2;
4334 }
4335 }
4336 }
4337
4338 /* So long as it won't require any more insns to do so, it's
4339 desirable to emit a small constant (in bits 0...9) in the last
4340 insn. This way there is more chance that it can be combined with
4341 a later addressing insn to form a pre-indexed load or store
4342 operation. Consider:
4343
4344 *((volatile int *)0xe0000100) = 1;
4345 *((volatile int *)0xe0000110) = 2;
4346
4347 We want this to wind up as:
4348
4349 mov rA, #0xe0000000
4350 mov rB, #1
4351 str rB, [rA, #0x100]
4352 mov rB, #2
4353 str rB, [rA, #0x110]
4354
4355 rather than having to synthesize both large constants from scratch.
4356
4357 Therefore, we calculate how many insns would be required to emit
4358 the constant starting from `best_start', and also starting from
4359 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4360 yield a shorter sequence, we may as well use zero. */
4361 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4362 if (best_start != 0
4363 && ((HOST_WIDE_INT_1U << best_start) < val))
4364 {
4365 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4366 if (insns2 <= insns1)
4367 {
4368 *return_sequence = tmp_sequence;
4369 insns1 = insns2;
4370 }
4371 }
4372
4373 return insns1;
4374 }
4375
4376 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4377 static int
4378 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4379 struct four_ints *return_sequence, int i)
4380 {
4381 int remainder = val & 0xffffffff;
4382 int insns = 0;
4383
4384 /* Try and find a way of doing the job in either two or three
4385 instructions.
4386
4387 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4388 location. We start at position I. This may be the MSB, or
4389 optimial_immediate_sequence may have positioned it at the largest block
4390 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4391 wrapping around to the top of the word when we drop off the bottom.
4392 In the worst case this code should produce no more than four insns.
4393
4394 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4395 constants, shifted to any arbitrary location. We should always start
4396 at the MSB. */
4397 do
4398 {
4399 int end;
4400 unsigned int b1, b2, b3, b4;
4401 unsigned HOST_WIDE_INT result;
4402 int loc;
4403
4404 gcc_assert (insns < 4);
4405
4406 if (i <= 0)
4407 i += 32;
4408
4409 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4410 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4411 {
4412 loc = i;
4413 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4414 /* We can use addw/subw for the last 12 bits. */
4415 result = remainder;
4416 else
4417 {
4418 /* Use an 8-bit shifted/rotated immediate. */
4419 end = i - 8;
4420 if (end < 0)
4421 end += 32;
4422 result = remainder & ((0x0ff << end)
4423 | ((i < end) ? (0xff >> (32 - end))
4424 : 0));
4425 i -= 8;
4426 }
4427 }
4428 else
4429 {
4430 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4431 arbitrary shifts. */
4432 i -= TARGET_ARM ? 2 : 1;
4433 continue;
4434 }
4435
4436 /* Next, see if we can do a better job with a thumb2 replicated
4437 constant.
4438
4439 We do it this way around to catch the cases like 0x01F001E0 where
4440 two 8-bit immediates would work, but a replicated constant would
4441 make it worse.
4442
4443 TODO: 16-bit constants that don't clear all the bits, but still win.
4444 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4445 if (TARGET_THUMB2)
4446 {
4447 b1 = (remainder & 0xff000000) >> 24;
4448 b2 = (remainder & 0x00ff0000) >> 16;
4449 b3 = (remainder & 0x0000ff00) >> 8;
4450 b4 = remainder & 0xff;
4451
4452 if (loc > 24)
4453 {
4454 /* The 8-bit immediate already found clears b1 (and maybe b2),
4455 but must leave b3 and b4 alone. */
4456
4457 /* First try to find a 32-bit replicated constant that clears
4458 almost everything. We can assume that we can't do it in one,
4459 or else we wouldn't be here. */
4460 unsigned int tmp = b1 & b2 & b3 & b4;
4461 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4462 + (tmp << 24);
4463 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4464 + (tmp == b3) + (tmp == b4);
4465 if (tmp
4466 && (matching_bytes >= 3
4467 || (matching_bytes == 2
4468 && const_ok_for_op (remainder & ~tmp2, code))))
4469 {
4470 /* At least 3 of the bytes match, and the fourth has at
4471 least as many bits set, or two of the bytes match
4472 and it will only require one more insn to finish. */
4473 result = tmp2;
4474 i = tmp != b1 ? 32
4475 : tmp != b2 ? 24
4476 : tmp != b3 ? 16
4477 : 8;
4478 }
4479
4480 /* Second, try to find a 16-bit replicated constant that can
4481 leave three of the bytes clear. If b2 or b4 is already
4482 zero, then we can. If the 8-bit from above would not
4483 clear b2 anyway, then we still win. */
4484 else if (b1 == b3 && (!b2 || !b4
4485 || (remainder & 0x00ff0000 & ~result)))
4486 {
4487 result = remainder & 0xff00ff00;
4488 i = 24;
4489 }
4490 }
4491 else if (loc > 16)
4492 {
4493 /* The 8-bit immediate already found clears b2 (and maybe b3)
4494 and we don't get here unless b1 is alredy clear, but it will
4495 leave b4 unchanged. */
4496
4497 /* If we can clear b2 and b4 at once, then we win, since the
4498 8-bits couldn't possibly reach that far. */
4499 if (b2 == b4)
4500 {
4501 result = remainder & 0x00ff00ff;
4502 i = 16;
4503 }
4504 }
4505 }
4506
4507 return_sequence->i[insns++] = result;
4508 remainder &= ~result;
4509
4510 if (code == SET || code == MINUS)
4511 code = PLUS;
4512 }
4513 while (remainder);
4514
4515 return insns;
4516 }
4517
4518 /* Emit an instruction with the indicated PATTERN. If COND is
4519 non-NULL, conditionalize the execution of the instruction on COND
4520 being true. */
4521
4522 static void
4523 emit_constant_insn (rtx cond, rtx pattern)
4524 {
4525 if (cond)
4526 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4527 emit_insn (pattern);
4528 }
4529
4530 /* As above, but extra parameter GENERATE which, if clear, suppresses
4531 RTL generation. */
4532
4533 static int
4534 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4535 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4536 int subtargets, int generate)
4537 {
4538 int can_invert = 0;
4539 int can_negate = 0;
4540 int final_invert = 0;
4541 int i;
4542 int set_sign_bit_copies = 0;
4543 int clear_sign_bit_copies = 0;
4544 int clear_zero_bit_copies = 0;
4545 int set_zero_bit_copies = 0;
4546 int insns = 0, neg_insns, inv_insns;
4547 unsigned HOST_WIDE_INT temp1, temp2;
4548 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4549 struct four_ints *immediates;
4550 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4551
4552 /* Find out which operations are safe for a given CODE. Also do a quick
4553 check for degenerate cases; these can occur when DImode operations
4554 are split. */
4555 switch (code)
4556 {
4557 case SET:
4558 can_invert = 1;
4559 break;
4560
4561 case PLUS:
4562 can_negate = 1;
4563 break;
4564
4565 case IOR:
4566 if (remainder == 0xffffffff)
4567 {
4568 if (generate)
4569 emit_constant_insn (cond,
4570 gen_rtx_SET (target,
4571 GEN_INT (ARM_SIGN_EXTEND (val))));
4572 return 1;
4573 }
4574
4575 if (remainder == 0)
4576 {
4577 if (reload_completed && rtx_equal_p (target, source))
4578 return 0;
4579
4580 if (generate)
4581 emit_constant_insn (cond, gen_rtx_SET (target, source));
4582 return 1;
4583 }
4584 break;
4585
4586 case AND:
4587 if (remainder == 0)
4588 {
4589 if (generate)
4590 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4591 return 1;
4592 }
4593 if (remainder == 0xffffffff)
4594 {
4595 if (reload_completed && rtx_equal_p (target, source))
4596 return 0;
4597 if (generate)
4598 emit_constant_insn (cond, gen_rtx_SET (target, source));
4599 return 1;
4600 }
4601 can_invert = 1;
4602 break;
4603
4604 case XOR:
4605 if (remainder == 0)
4606 {
4607 if (reload_completed && rtx_equal_p (target, source))
4608 return 0;
4609 if (generate)
4610 emit_constant_insn (cond, gen_rtx_SET (target, source));
4611 return 1;
4612 }
4613
4614 if (remainder == 0xffffffff)
4615 {
4616 if (generate)
4617 emit_constant_insn (cond,
4618 gen_rtx_SET (target,
4619 gen_rtx_NOT (mode, source)));
4620 return 1;
4621 }
4622 final_invert = 1;
4623 break;
4624
4625 case MINUS:
4626 /* We treat MINUS as (val - source), since (source - val) is always
4627 passed as (source + (-val)). */
4628 if (remainder == 0)
4629 {
4630 if (generate)
4631 emit_constant_insn (cond,
4632 gen_rtx_SET (target,
4633 gen_rtx_NEG (mode, source)));
4634 return 1;
4635 }
4636 if (const_ok_for_arm (val))
4637 {
4638 if (generate)
4639 emit_constant_insn (cond,
4640 gen_rtx_SET (target,
4641 gen_rtx_MINUS (mode, GEN_INT (val),
4642 source)));
4643 return 1;
4644 }
4645
4646 break;
4647
4648 default:
4649 gcc_unreachable ();
4650 }
4651
4652 /* If we can do it in one insn get out quickly. */
4653 if (const_ok_for_op (val, code))
4654 {
4655 if (generate)
4656 emit_constant_insn (cond,
4657 gen_rtx_SET (target,
4658 (source
4659 ? gen_rtx_fmt_ee (code, mode, source,
4660 GEN_INT (val))
4661 : GEN_INT (val))));
4662 return 1;
4663 }
4664
4665 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4666 insn. */
4667 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4668 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4669 {
4670 if (generate)
4671 {
4672 if (mode == SImode && i == 16)
4673 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4674 smaller insn. */
4675 emit_constant_insn (cond,
4676 gen_zero_extendhisi2
4677 (target, gen_lowpart (HImode, source)));
4678 else
4679 /* Extz only supports SImode, but we can coerce the operands
4680 into that mode. */
4681 emit_constant_insn (cond,
4682 gen_extzv_t2 (gen_lowpart (SImode, target),
4683 gen_lowpart (SImode, source),
4684 GEN_INT (i), const0_rtx));
4685 }
4686
4687 return 1;
4688 }
4689
4690 /* Calculate a few attributes that may be useful for specific
4691 optimizations. */
4692 /* Count number of leading zeros. */
4693 for (i = 31; i >= 0; i--)
4694 {
4695 if ((remainder & (1 << i)) == 0)
4696 clear_sign_bit_copies++;
4697 else
4698 break;
4699 }
4700
4701 /* Count number of leading 1's. */
4702 for (i = 31; i >= 0; i--)
4703 {
4704 if ((remainder & (1 << i)) != 0)
4705 set_sign_bit_copies++;
4706 else
4707 break;
4708 }
4709
4710 /* Count number of trailing zero's. */
4711 for (i = 0; i <= 31; i++)
4712 {
4713 if ((remainder & (1 << i)) == 0)
4714 clear_zero_bit_copies++;
4715 else
4716 break;
4717 }
4718
4719 /* Count number of trailing 1's. */
4720 for (i = 0; i <= 31; i++)
4721 {
4722 if ((remainder & (1 << i)) != 0)
4723 set_zero_bit_copies++;
4724 else
4725 break;
4726 }
4727
4728 switch (code)
4729 {
4730 case SET:
4731 /* See if we can do this by sign_extending a constant that is known
4732 to be negative. This is a good, way of doing it, since the shift
4733 may well merge into a subsequent insn. */
4734 if (set_sign_bit_copies > 1)
4735 {
4736 if (const_ok_for_arm
4737 (temp1 = ARM_SIGN_EXTEND (remainder
4738 << (set_sign_bit_copies - 1))))
4739 {
4740 if (generate)
4741 {
4742 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4743 emit_constant_insn (cond,
4744 gen_rtx_SET (new_src, GEN_INT (temp1)));
4745 emit_constant_insn (cond,
4746 gen_ashrsi3 (target, new_src,
4747 GEN_INT (set_sign_bit_copies - 1)));
4748 }
4749 return 2;
4750 }
4751 /* For an inverted constant, we will need to set the low bits,
4752 these will be shifted out of harm's way. */
4753 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4754 if (const_ok_for_arm (~temp1))
4755 {
4756 if (generate)
4757 {
4758 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4759 emit_constant_insn (cond,
4760 gen_rtx_SET (new_src, GEN_INT (temp1)));
4761 emit_constant_insn (cond,
4762 gen_ashrsi3 (target, new_src,
4763 GEN_INT (set_sign_bit_copies - 1)));
4764 }
4765 return 2;
4766 }
4767 }
4768
4769 /* See if we can calculate the value as the difference between two
4770 valid immediates. */
4771 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4772 {
4773 int topshift = clear_sign_bit_copies & ~1;
4774
4775 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4776 & (0xff000000 >> topshift));
4777
4778 /* If temp1 is zero, then that means the 9 most significant
4779 bits of remainder were 1 and we've caused it to overflow.
4780 When topshift is 0 we don't need to do anything since we
4781 can borrow from 'bit 32'. */
4782 if (temp1 == 0 && topshift != 0)
4783 temp1 = 0x80000000 >> (topshift - 1);
4784
4785 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4786
4787 if (const_ok_for_arm (temp2))
4788 {
4789 if (generate)
4790 {
4791 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4792 emit_constant_insn (cond,
4793 gen_rtx_SET (new_src, GEN_INT (temp1)));
4794 emit_constant_insn (cond,
4795 gen_addsi3 (target, new_src,
4796 GEN_INT (-temp2)));
4797 }
4798
4799 return 2;
4800 }
4801 }
4802
4803 /* See if we can generate this by setting the bottom (or the top)
4804 16 bits, and then shifting these into the other half of the
4805 word. We only look for the simplest cases, to do more would cost
4806 too much. Be careful, however, not to generate this when the
4807 alternative would take fewer insns. */
4808 if (val & 0xffff0000)
4809 {
4810 temp1 = remainder & 0xffff0000;
4811 temp2 = remainder & 0x0000ffff;
4812
4813 /* Overlaps outside this range are best done using other methods. */
4814 for (i = 9; i < 24; i++)
4815 {
4816 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4817 && !const_ok_for_arm (temp2))
4818 {
4819 rtx new_src = (subtargets
4820 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4821 : target);
4822 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4823 source, subtargets, generate);
4824 source = new_src;
4825 if (generate)
4826 emit_constant_insn
4827 (cond,
4828 gen_rtx_SET
4829 (target,
4830 gen_rtx_IOR (mode,
4831 gen_rtx_ASHIFT (mode, source,
4832 GEN_INT (i)),
4833 source)));
4834 return insns + 1;
4835 }
4836 }
4837
4838 /* Don't duplicate cases already considered. */
4839 for (i = 17; i < 24; i++)
4840 {
4841 if (((temp1 | (temp1 >> i)) == remainder)
4842 && !const_ok_for_arm (temp1))
4843 {
4844 rtx new_src = (subtargets
4845 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4846 : target);
4847 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4848 source, subtargets, generate);
4849 source = new_src;
4850 if (generate)
4851 emit_constant_insn
4852 (cond,
4853 gen_rtx_SET (target,
4854 gen_rtx_IOR
4855 (mode,
4856 gen_rtx_LSHIFTRT (mode, source,
4857 GEN_INT (i)),
4858 source)));
4859 return insns + 1;
4860 }
4861 }
4862 }
4863 break;
4864
4865 case IOR:
4866 case XOR:
4867 /* If we have IOR or XOR, and the constant can be loaded in a
4868 single instruction, and we can find a temporary to put it in,
4869 then this can be done in two instructions instead of 3-4. */
4870 if (subtargets
4871 /* TARGET can't be NULL if SUBTARGETS is 0 */
4872 || (reload_completed && !reg_mentioned_p (target, source)))
4873 {
4874 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4875 {
4876 if (generate)
4877 {
4878 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4879
4880 emit_constant_insn (cond,
4881 gen_rtx_SET (sub, GEN_INT (val)));
4882 emit_constant_insn (cond,
4883 gen_rtx_SET (target,
4884 gen_rtx_fmt_ee (code, mode,
4885 source, sub)));
4886 }
4887 return 2;
4888 }
4889 }
4890
4891 if (code == XOR)
4892 break;
4893
4894 /* Convert.
4895 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4896 and the remainder 0s for e.g. 0xfff00000)
4897 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4898
4899 This can be done in 2 instructions by using shifts with mov or mvn.
4900 e.g. for
4901 x = x | 0xfff00000;
4902 we generate.
4903 mvn r0, r0, asl #12
4904 mvn r0, r0, lsr #12 */
4905 if (set_sign_bit_copies > 8
4906 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4907 {
4908 if (generate)
4909 {
4910 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4911 rtx shift = GEN_INT (set_sign_bit_copies);
4912
4913 emit_constant_insn
4914 (cond,
4915 gen_rtx_SET (sub,
4916 gen_rtx_NOT (mode,
4917 gen_rtx_ASHIFT (mode,
4918 source,
4919 shift))));
4920 emit_constant_insn
4921 (cond,
4922 gen_rtx_SET (target,
4923 gen_rtx_NOT (mode,
4924 gen_rtx_LSHIFTRT (mode, sub,
4925 shift))));
4926 }
4927 return 2;
4928 }
4929
4930 /* Convert
4931 x = y | constant (which has set_zero_bit_copies number of trailing ones).
4932 to
4933 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
4934
4935 For eg. r0 = r0 | 0xfff
4936 mvn r0, r0, lsr #12
4937 mvn r0, r0, asl #12
4938
4939 */
4940 if (set_zero_bit_copies > 8
4941 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
4942 {
4943 if (generate)
4944 {
4945 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4946 rtx shift = GEN_INT (set_zero_bit_copies);
4947
4948 emit_constant_insn
4949 (cond,
4950 gen_rtx_SET (sub,
4951 gen_rtx_NOT (mode,
4952 gen_rtx_LSHIFTRT (mode,
4953 source,
4954 shift))));
4955 emit_constant_insn
4956 (cond,
4957 gen_rtx_SET (target,
4958 gen_rtx_NOT (mode,
4959 gen_rtx_ASHIFT (mode, sub,
4960 shift))));
4961 }
4962 return 2;
4963 }
4964
4965 /* This will never be reached for Thumb2 because orn is a valid
4966 instruction. This is for Thumb1 and the ARM 32 bit cases.
4967
4968 x = y | constant (such that ~constant is a valid constant)
4969 Transform this to
4970 x = ~(~y & ~constant).
4971 */
4972 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
4973 {
4974 if (generate)
4975 {
4976 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4977 emit_constant_insn (cond,
4978 gen_rtx_SET (sub,
4979 gen_rtx_NOT (mode, source)));
4980 source = sub;
4981 if (subtargets)
4982 sub = gen_reg_rtx (mode);
4983 emit_constant_insn (cond,
4984 gen_rtx_SET (sub,
4985 gen_rtx_AND (mode, source,
4986 GEN_INT (temp1))));
4987 emit_constant_insn (cond,
4988 gen_rtx_SET (target,
4989 gen_rtx_NOT (mode, sub)));
4990 }
4991 return 3;
4992 }
4993 break;
4994
4995 case AND:
4996 /* See if two shifts will do 2 or more insn's worth of work. */
4997 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
4998 {
4999 HOST_WIDE_INT shift_mask = ((0xffffffff
5000 << (32 - clear_sign_bit_copies))
5001 & 0xffffffff);
5002
5003 if ((remainder | shift_mask) != 0xffffffff)
5004 {
5005 HOST_WIDE_INT new_val
5006 = ARM_SIGN_EXTEND (remainder | shift_mask);
5007
5008 if (generate)
5009 {
5010 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5011 insns = arm_gen_constant (AND, SImode, cond, new_val,
5012 new_src, source, subtargets, 1);
5013 source = new_src;
5014 }
5015 else
5016 {
5017 rtx targ = subtargets ? NULL_RTX : target;
5018 insns = arm_gen_constant (AND, mode, cond, new_val,
5019 targ, source, subtargets, 0);
5020 }
5021 }
5022
5023 if (generate)
5024 {
5025 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5026 rtx shift = GEN_INT (clear_sign_bit_copies);
5027
5028 emit_insn (gen_ashlsi3 (new_src, source, shift));
5029 emit_insn (gen_lshrsi3 (target, new_src, shift));
5030 }
5031
5032 return insns + 2;
5033 }
5034
5035 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5036 {
5037 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5038
5039 if ((remainder | shift_mask) != 0xffffffff)
5040 {
5041 HOST_WIDE_INT new_val
5042 = ARM_SIGN_EXTEND (remainder | shift_mask);
5043 if (generate)
5044 {
5045 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5046
5047 insns = arm_gen_constant (AND, mode, cond, new_val,
5048 new_src, source, subtargets, 1);
5049 source = new_src;
5050 }
5051 else
5052 {
5053 rtx targ = subtargets ? NULL_RTX : target;
5054
5055 insns = arm_gen_constant (AND, mode, cond, new_val,
5056 targ, source, subtargets, 0);
5057 }
5058 }
5059
5060 if (generate)
5061 {
5062 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5063 rtx shift = GEN_INT (clear_zero_bit_copies);
5064
5065 emit_insn (gen_lshrsi3 (new_src, source, shift));
5066 emit_insn (gen_ashlsi3 (target, new_src, shift));
5067 }
5068
5069 return insns + 2;
5070 }
5071
5072 break;
5073
5074 default:
5075 break;
5076 }
5077
5078 /* Calculate what the instruction sequences would be if we generated it
5079 normally, negated, or inverted. */
5080 if (code == AND)
5081 /* AND cannot be split into multiple insns, so invert and use BIC. */
5082 insns = 99;
5083 else
5084 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5085
5086 if (can_negate)
5087 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5088 &neg_immediates);
5089 else
5090 neg_insns = 99;
5091
5092 if (can_invert || final_invert)
5093 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5094 &inv_immediates);
5095 else
5096 inv_insns = 99;
5097
5098 immediates = &pos_immediates;
5099
5100 /* Is the negated immediate sequence more efficient? */
5101 if (neg_insns < insns && neg_insns <= inv_insns)
5102 {
5103 insns = neg_insns;
5104 immediates = &neg_immediates;
5105 }
5106 else
5107 can_negate = 0;
5108
5109 /* Is the inverted immediate sequence more efficient?
5110 We must allow for an extra NOT instruction for XOR operations, although
5111 there is some chance that the final 'mvn' will get optimized later. */
5112 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5113 {
5114 insns = inv_insns;
5115 immediates = &inv_immediates;
5116 }
5117 else
5118 {
5119 can_invert = 0;
5120 final_invert = 0;
5121 }
5122
5123 /* Now output the chosen sequence as instructions. */
5124 if (generate)
5125 {
5126 for (i = 0; i < insns; i++)
5127 {
5128 rtx new_src, temp1_rtx;
5129
5130 temp1 = immediates->i[i];
5131
5132 if (code == SET || code == MINUS)
5133 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5134 else if ((final_invert || i < (insns - 1)) && subtargets)
5135 new_src = gen_reg_rtx (mode);
5136 else
5137 new_src = target;
5138
5139 if (can_invert)
5140 temp1 = ~temp1;
5141 else if (can_negate)
5142 temp1 = -temp1;
5143
5144 temp1 = trunc_int_for_mode (temp1, mode);
5145 temp1_rtx = GEN_INT (temp1);
5146
5147 if (code == SET)
5148 ;
5149 else if (code == MINUS)
5150 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5151 else
5152 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5153
5154 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5155 source = new_src;
5156
5157 if (code == SET)
5158 {
5159 can_negate = can_invert;
5160 can_invert = 0;
5161 code = PLUS;
5162 }
5163 else if (code == MINUS)
5164 code = PLUS;
5165 }
5166 }
5167
5168 if (final_invert)
5169 {
5170 if (generate)
5171 emit_constant_insn (cond, gen_rtx_SET (target,
5172 gen_rtx_NOT (mode, source)));
5173 insns++;
5174 }
5175
5176 return insns;
5177 }
5178
5179 /* Canonicalize a comparison so that we are more likely to recognize it.
5180 This can be done for a few constant compares, where we can make the
5181 immediate value easier to load. */
5182
5183 static void
5184 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5185 bool op0_preserve_value)
5186 {
5187 machine_mode mode;
5188 unsigned HOST_WIDE_INT i, maxval;
5189
5190 mode = GET_MODE (*op0);
5191 if (mode == VOIDmode)
5192 mode = GET_MODE (*op1);
5193
5194 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5195
5196 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5197 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5198 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5199 for GTU/LEU in Thumb mode. */
5200 if (mode == DImode)
5201 {
5202
5203 if (*code == GT || *code == LE
5204 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5205 {
5206 /* Missing comparison. First try to use an available
5207 comparison. */
5208 if (CONST_INT_P (*op1))
5209 {
5210 i = INTVAL (*op1);
5211 switch (*code)
5212 {
5213 case GT:
5214 case LE:
5215 if (i != maxval
5216 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5217 {
5218 *op1 = GEN_INT (i + 1);
5219 *code = *code == GT ? GE : LT;
5220 return;
5221 }
5222 break;
5223 case GTU:
5224 case LEU:
5225 if (i != ~((unsigned HOST_WIDE_INT) 0)
5226 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5227 {
5228 *op1 = GEN_INT (i + 1);
5229 *code = *code == GTU ? GEU : LTU;
5230 return;
5231 }
5232 break;
5233 default:
5234 gcc_unreachable ();
5235 }
5236 }
5237
5238 /* If that did not work, reverse the condition. */
5239 if (!op0_preserve_value)
5240 {
5241 std::swap (*op0, *op1);
5242 *code = (int)swap_condition ((enum rtx_code)*code);
5243 }
5244 }
5245 return;
5246 }
5247
5248 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5249 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5250 to facilitate possible combining with a cmp into 'ands'. */
5251 if (mode == SImode
5252 && GET_CODE (*op0) == ZERO_EXTEND
5253 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5254 && GET_MODE (XEXP (*op0, 0)) == QImode
5255 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5256 && subreg_lowpart_p (XEXP (*op0, 0))
5257 && *op1 == const0_rtx)
5258 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5259 GEN_INT (255));
5260
5261 /* Comparisons smaller than DImode. Only adjust comparisons against
5262 an out-of-range constant. */
5263 if (!CONST_INT_P (*op1)
5264 || const_ok_for_arm (INTVAL (*op1))
5265 || const_ok_for_arm (- INTVAL (*op1)))
5266 return;
5267
5268 i = INTVAL (*op1);
5269
5270 switch (*code)
5271 {
5272 case EQ:
5273 case NE:
5274 return;
5275
5276 case GT:
5277 case LE:
5278 if (i != maxval
5279 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5280 {
5281 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5282 *code = *code == GT ? GE : LT;
5283 return;
5284 }
5285 break;
5286
5287 case GE:
5288 case LT:
5289 if (i != ~maxval
5290 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5291 {
5292 *op1 = GEN_INT (i - 1);
5293 *code = *code == GE ? GT : LE;
5294 return;
5295 }
5296 break;
5297
5298 case GTU:
5299 case LEU:
5300 if (i != ~((unsigned HOST_WIDE_INT) 0)
5301 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5302 {
5303 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5304 *code = *code == GTU ? GEU : LTU;
5305 return;
5306 }
5307 break;
5308
5309 case GEU:
5310 case LTU:
5311 if (i != 0
5312 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5313 {
5314 *op1 = GEN_INT (i - 1);
5315 *code = *code == GEU ? GTU : LEU;
5316 return;
5317 }
5318 break;
5319
5320 default:
5321 gcc_unreachable ();
5322 }
5323 }
5324
5325
5326 /* Define how to find the value returned by a function. */
5327
5328 static rtx
5329 arm_function_value(const_tree type, const_tree func,
5330 bool outgoing ATTRIBUTE_UNUSED)
5331 {
5332 machine_mode mode;
5333 int unsignedp ATTRIBUTE_UNUSED;
5334 rtx r ATTRIBUTE_UNUSED;
5335
5336 mode = TYPE_MODE (type);
5337
5338 if (TARGET_AAPCS_BASED)
5339 return aapcs_allocate_return_reg (mode, type, func);
5340
5341 /* Promote integer types. */
5342 if (INTEGRAL_TYPE_P (type))
5343 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5344
5345 /* Promotes small structs returned in a register to full-word size
5346 for big-endian AAPCS. */
5347 if (arm_return_in_msb (type))
5348 {
5349 HOST_WIDE_INT size = int_size_in_bytes (type);
5350 if (size % UNITS_PER_WORD != 0)
5351 {
5352 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5353 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5354 }
5355 }
5356
5357 return arm_libcall_value_1 (mode);
5358 }
5359
5360 /* libcall hashtable helpers. */
5361
5362 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5363 {
5364 static inline hashval_t hash (const rtx_def *);
5365 static inline bool equal (const rtx_def *, const rtx_def *);
5366 static inline void remove (rtx_def *);
5367 };
5368
5369 inline bool
5370 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5371 {
5372 return rtx_equal_p (p1, p2);
5373 }
5374
5375 inline hashval_t
5376 libcall_hasher::hash (const rtx_def *p1)
5377 {
5378 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5379 }
5380
5381 typedef hash_table<libcall_hasher> libcall_table_type;
5382
5383 static void
5384 add_libcall (libcall_table_type *htab, rtx libcall)
5385 {
5386 *htab->find_slot (libcall, INSERT) = libcall;
5387 }
5388
5389 static bool
5390 arm_libcall_uses_aapcs_base (const_rtx libcall)
5391 {
5392 static bool init_done = false;
5393 static libcall_table_type *libcall_htab = NULL;
5394
5395 if (!init_done)
5396 {
5397 init_done = true;
5398
5399 libcall_htab = new libcall_table_type (31);
5400 add_libcall (libcall_htab,
5401 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5402 add_libcall (libcall_htab,
5403 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5404 add_libcall (libcall_htab,
5405 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5406 add_libcall (libcall_htab,
5407 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5408
5409 add_libcall (libcall_htab,
5410 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5411 add_libcall (libcall_htab,
5412 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5413 add_libcall (libcall_htab,
5414 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5415 add_libcall (libcall_htab,
5416 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5417
5418 add_libcall (libcall_htab,
5419 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5420 add_libcall (libcall_htab,
5421 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5422 add_libcall (libcall_htab,
5423 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5424 add_libcall (libcall_htab,
5425 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5426 add_libcall (libcall_htab,
5427 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5428 add_libcall (libcall_htab,
5429 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5430 add_libcall (libcall_htab,
5431 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5432 add_libcall (libcall_htab,
5433 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5434
5435 /* Values from double-precision helper functions are returned in core
5436 registers if the selected core only supports single-precision
5437 arithmetic, even if we are using the hard-float ABI. The same is
5438 true for single-precision helpers, but we will never be using the
5439 hard-float ABI on a CPU which doesn't support single-precision
5440 operations in hardware. */
5441 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5442 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5443 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5444 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5445 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5446 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5447 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5448 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5449 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5450 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5451 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5452 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5453 SFmode));
5454 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5455 DFmode));
5456 add_libcall (libcall_htab,
5457 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5458 }
5459
5460 return libcall && libcall_htab->find (libcall) != NULL;
5461 }
5462
5463 static rtx
5464 arm_libcall_value_1 (machine_mode mode)
5465 {
5466 if (TARGET_AAPCS_BASED)
5467 return aapcs_libcall_value (mode);
5468 else if (TARGET_IWMMXT_ABI
5469 && arm_vector_mode_supported_p (mode))
5470 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5471 else
5472 return gen_rtx_REG (mode, ARG_REGISTER (1));
5473 }
5474
5475 /* Define how to find the value returned by a library function
5476 assuming the value has mode MODE. */
5477
5478 static rtx
5479 arm_libcall_value (machine_mode mode, const_rtx libcall)
5480 {
5481 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5482 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5483 {
5484 /* The following libcalls return their result in integer registers,
5485 even though they return a floating point value. */
5486 if (arm_libcall_uses_aapcs_base (libcall))
5487 return gen_rtx_REG (mode, ARG_REGISTER(1));
5488
5489 }
5490
5491 return arm_libcall_value_1 (mode);
5492 }
5493
5494 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5495
5496 static bool
5497 arm_function_value_regno_p (const unsigned int regno)
5498 {
5499 if (regno == ARG_REGISTER (1)
5500 || (TARGET_32BIT
5501 && TARGET_AAPCS_BASED
5502 && TARGET_HARD_FLOAT
5503 && regno == FIRST_VFP_REGNUM)
5504 || (TARGET_IWMMXT_ABI
5505 && regno == FIRST_IWMMXT_REGNUM))
5506 return true;
5507
5508 return false;
5509 }
5510
5511 /* Determine the amount of memory needed to store the possible return
5512 registers of an untyped call. */
5513 int
5514 arm_apply_result_size (void)
5515 {
5516 int size = 16;
5517
5518 if (TARGET_32BIT)
5519 {
5520 if (TARGET_HARD_FLOAT_ABI)
5521 size += 32;
5522 if (TARGET_IWMMXT_ABI)
5523 size += 8;
5524 }
5525
5526 return size;
5527 }
5528
5529 /* Decide whether TYPE should be returned in memory (true)
5530 or in a register (false). FNTYPE is the type of the function making
5531 the call. */
5532 static bool
5533 arm_return_in_memory (const_tree type, const_tree fntype)
5534 {
5535 HOST_WIDE_INT size;
5536
5537 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5538
5539 if (TARGET_AAPCS_BASED)
5540 {
5541 /* Simple, non-aggregate types (ie not including vectors and
5542 complex) are always returned in a register (or registers).
5543 We don't care about which register here, so we can short-cut
5544 some of the detail. */
5545 if (!AGGREGATE_TYPE_P (type)
5546 && TREE_CODE (type) != VECTOR_TYPE
5547 && TREE_CODE (type) != COMPLEX_TYPE)
5548 return false;
5549
5550 /* Any return value that is no larger than one word can be
5551 returned in r0. */
5552 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5553 return false;
5554
5555 /* Check any available co-processors to see if they accept the
5556 type as a register candidate (VFP, for example, can return
5557 some aggregates in consecutive registers). These aren't
5558 available if the call is variadic. */
5559 if (aapcs_select_return_coproc (type, fntype) >= 0)
5560 return false;
5561
5562 /* Vector values should be returned using ARM registers, not
5563 memory (unless they're over 16 bytes, which will break since
5564 we only have four call-clobbered registers to play with). */
5565 if (TREE_CODE (type) == VECTOR_TYPE)
5566 return (size < 0 || size > (4 * UNITS_PER_WORD));
5567
5568 /* The rest go in memory. */
5569 return true;
5570 }
5571
5572 if (TREE_CODE (type) == VECTOR_TYPE)
5573 return (size < 0 || size > (4 * UNITS_PER_WORD));
5574
5575 if (!AGGREGATE_TYPE_P (type) &&
5576 (TREE_CODE (type) != VECTOR_TYPE))
5577 /* All simple types are returned in registers. */
5578 return false;
5579
5580 if (arm_abi != ARM_ABI_APCS)
5581 {
5582 /* ATPCS and later return aggregate types in memory only if they are
5583 larger than a word (or are variable size). */
5584 return (size < 0 || size > UNITS_PER_WORD);
5585 }
5586
5587 /* For the arm-wince targets we choose to be compatible with Microsoft's
5588 ARM and Thumb compilers, which always return aggregates in memory. */
5589 #ifndef ARM_WINCE
5590 /* All structures/unions bigger than one word are returned in memory.
5591 Also catch the case where int_size_in_bytes returns -1. In this case
5592 the aggregate is either huge or of variable size, and in either case
5593 we will want to return it via memory and not in a register. */
5594 if (size < 0 || size > UNITS_PER_WORD)
5595 return true;
5596
5597 if (TREE_CODE (type) == RECORD_TYPE)
5598 {
5599 tree field;
5600
5601 /* For a struct the APCS says that we only return in a register
5602 if the type is 'integer like' and every addressable element
5603 has an offset of zero. For practical purposes this means
5604 that the structure can have at most one non bit-field element
5605 and that this element must be the first one in the structure. */
5606
5607 /* Find the first field, ignoring non FIELD_DECL things which will
5608 have been created by C++. */
5609 for (field = TYPE_FIELDS (type);
5610 field && TREE_CODE (field) != FIELD_DECL;
5611 field = DECL_CHAIN (field))
5612 continue;
5613
5614 if (field == NULL)
5615 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5616
5617 /* Check that the first field is valid for returning in a register. */
5618
5619 /* ... Floats are not allowed */
5620 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5621 return true;
5622
5623 /* ... Aggregates that are not themselves valid for returning in
5624 a register are not allowed. */
5625 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5626 return true;
5627
5628 /* Now check the remaining fields, if any. Only bitfields are allowed,
5629 since they are not addressable. */
5630 for (field = DECL_CHAIN (field);
5631 field;
5632 field = DECL_CHAIN (field))
5633 {
5634 if (TREE_CODE (field) != FIELD_DECL)
5635 continue;
5636
5637 if (!DECL_BIT_FIELD_TYPE (field))
5638 return true;
5639 }
5640
5641 return false;
5642 }
5643
5644 if (TREE_CODE (type) == UNION_TYPE)
5645 {
5646 tree field;
5647
5648 /* Unions can be returned in registers if every element is
5649 integral, or can be returned in an integer register. */
5650 for (field = TYPE_FIELDS (type);
5651 field;
5652 field = DECL_CHAIN (field))
5653 {
5654 if (TREE_CODE (field) != FIELD_DECL)
5655 continue;
5656
5657 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5658 return true;
5659
5660 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5661 return true;
5662 }
5663
5664 return false;
5665 }
5666 #endif /* not ARM_WINCE */
5667
5668 /* Return all other types in memory. */
5669 return true;
5670 }
5671
5672 const struct pcs_attribute_arg
5673 {
5674 const char *arg;
5675 enum arm_pcs value;
5676 } pcs_attribute_args[] =
5677 {
5678 {"aapcs", ARM_PCS_AAPCS},
5679 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5680 #if 0
5681 /* We could recognize these, but changes would be needed elsewhere
5682 * to implement them. */
5683 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5684 {"atpcs", ARM_PCS_ATPCS},
5685 {"apcs", ARM_PCS_APCS},
5686 #endif
5687 {NULL, ARM_PCS_UNKNOWN}
5688 };
5689
5690 static enum arm_pcs
5691 arm_pcs_from_attribute (tree attr)
5692 {
5693 const struct pcs_attribute_arg *ptr;
5694 const char *arg;
5695
5696 /* Get the value of the argument. */
5697 if (TREE_VALUE (attr) == NULL_TREE
5698 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5699 return ARM_PCS_UNKNOWN;
5700
5701 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5702
5703 /* Check it against the list of known arguments. */
5704 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5705 if (streq (arg, ptr->arg))
5706 return ptr->value;
5707
5708 /* An unrecognized interrupt type. */
5709 return ARM_PCS_UNKNOWN;
5710 }
5711
5712 /* Get the PCS variant to use for this call. TYPE is the function's type
5713 specification, DECL is the specific declartion. DECL may be null if
5714 the call could be indirect or if this is a library call. */
5715 static enum arm_pcs
5716 arm_get_pcs_model (const_tree type, const_tree decl)
5717 {
5718 bool user_convention = false;
5719 enum arm_pcs user_pcs = arm_pcs_default;
5720 tree attr;
5721
5722 gcc_assert (type);
5723
5724 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5725 if (attr)
5726 {
5727 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5728 user_convention = true;
5729 }
5730
5731 if (TARGET_AAPCS_BASED)
5732 {
5733 /* Detect varargs functions. These always use the base rules
5734 (no argument is ever a candidate for a co-processor
5735 register). */
5736 bool base_rules = stdarg_p (type);
5737
5738 if (user_convention)
5739 {
5740 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5741 sorry ("non-AAPCS derived PCS variant");
5742 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5743 error ("variadic functions must use the base AAPCS variant");
5744 }
5745
5746 if (base_rules)
5747 return ARM_PCS_AAPCS;
5748 else if (user_convention)
5749 return user_pcs;
5750 else if (decl && flag_unit_at_a_time)
5751 {
5752 /* Local functions never leak outside this compilation unit,
5753 so we are free to use whatever conventions are
5754 appropriate. */
5755 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5756 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5757 if (i && i->local)
5758 return ARM_PCS_AAPCS_LOCAL;
5759 }
5760 }
5761 else if (user_convention && user_pcs != arm_pcs_default)
5762 sorry ("PCS variant");
5763
5764 /* For everything else we use the target's default. */
5765 return arm_pcs_default;
5766 }
5767
5768
5769 static void
5770 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5771 const_tree fntype ATTRIBUTE_UNUSED,
5772 rtx libcall ATTRIBUTE_UNUSED,
5773 const_tree fndecl ATTRIBUTE_UNUSED)
5774 {
5775 /* Record the unallocated VFP registers. */
5776 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5777 pcum->aapcs_vfp_reg_alloc = 0;
5778 }
5779
5780 /* Walk down the type tree of TYPE counting consecutive base elements.
5781 If *MODEP is VOIDmode, then set it to the first valid floating point
5782 type. If a non-floating point type is found, or if a floating point
5783 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5784 otherwise return the count in the sub-tree. */
5785 static int
5786 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5787 {
5788 machine_mode mode;
5789 HOST_WIDE_INT size;
5790
5791 switch (TREE_CODE (type))
5792 {
5793 case REAL_TYPE:
5794 mode = TYPE_MODE (type);
5795 if (mode != DFmode && mode != SFmode && mode != HFmode)
5796 return -1;
5797
5798 if (*modep == VOIDmode)
5799 *modep = mode;
5800
5801 if (*modep == mode)
5802 return 1;
5803
5804 break;
5805
5806 case COMPLEX_TYPE:
5807 mode = TYPE_MODE (TREE_TYPE (type));
5808 if (mode != DFmode && mode != SFmode)
5809 return -1;
5810
5811 if (*modep == VOIDmode)
5812 *modep = mode;
5813
5814 if (*modep == mode)
5815 return 2;
5816
5817 break;
5818
5819 case VECTOR_TYPE:
5820 /* Use V2SImode and V4SImode as representatives of all 64-bit
5821 and 128-bit vector types, whether or not those modes are
5822 supported with the present options. */
5823 size = int_size_in_bytes (type);
5824 switch (size)
5825 {
5826 case 8:
5827 mode = V2SImode;
5828 break;
5829 case 16:
5830 mode = V4SImode;
5831 break;
5832 default:
5833 return -1;
5834 }
5835
5836 if (*modep == VOIDmode)
5837 *modep = mode;
5838
5839 /* Vector modes are considered to be opaque: two vectors are
5840 equivalent for the purposes of being homogeneous aggregates
5841 if they are the same size. */
5842 if (*modep == mode)
5843 return 1;
5844
5845 break;
5846
5847 case ARRAY_TYPE:
5848 {
5849 int count;
5850 tree index = TYPE_DOMAIN (type);
5851
5852 /* Can't handle incomplete types nor sizes that are not
5853 fixed. */
5854 if (!COMPLETE_TYPE_P (type)
5855 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5856 return -1;
5857
5858 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5859 if (count == -1
5860 || !index
5861 || !TYPE_MAX_VALUE (index)
5862 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5863 || !TYPE_MIN_VALUE (index)
5864 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5865 || count < 0)
5866 return -1;
5867
5868 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5869 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5870
5871 /* There must be no padding. */
5872 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5873 return -1;
5874
5875 return count;
5876 }
5877
5878 case RECORD_TYPE:
5879 {
5880 int count = 0;
5881 int sub_count;
5882 tree field;
5883
5884 /* Can't handle incomplete types nor sizes that are not
5885 fixed. */
5886 if (!COMPLETE_TYPE_P (type)
5887 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5888 return -1;
5889
5890 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5891 {
5892 if (TREE_CODE (field) != FIELD_DECL)
5893 continue;
5894
5895 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5896 if (sub_count < 0)
5897 return -1;
5898 count += sub_count;
5899 }
5900
5901 /* There must be no padding. */
5902 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5903 return -1;
5904
5905 return count;
5906 }
5907
5908 case UNION_TYPE:
5909 case QUAL_UNION_TYPE:
5910 {
5911 /* These aren't very interesting except in a degenerate case. */
5912 int count = 0;
5913 int sub_count;
5914 tree field;
5915
5916 /* Can't handle incomplete types nor sizes that are not
5917 fixed. */
5918 if (!COMPLETE_TYPE_P (type)
5919 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5920 return -1;
5921
5922 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5923 {
5924 if (TREE_CODE (field) != FIELD_DECL)
5925 continue;
5926
5927 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5928 if (sub_count < 0)
5929 return -1;
5930 count = count > sub_count ? count : sub_count;
5931 }
5932
5933 /* There must be no padding. */
5934 if (wi::ne_p (TYPE_SIZE (type), count * GET_MODE_BITSIZE (*modep)))
5935 return -1;
5936
5937 return count;
5938 }
5939
5940 default:
5941 break;
5942 }
5943
5944 return -1;
5945 }
5946
5947 /* Return true if PCS_VARIANT should use VFP registers. */
5948 static bool
5949 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
5950 {
5951 if (pcs_variant == ARM_PCS_AAPCS_VFP)
5952 {
5953 static bool seen_thumb1_vfp = false;
5954
5955 if (TARGET_THUMB1 && !seen_thumb1_vfp)
5956 {
5957 sorry ("Thumb-1 hard-float VFP ABI");
5958 /* sorry() is not immediately fatal, so only display this once. */
5959 seen_thumb1_vfp = true;
5960 }
5961
5962 return true;
5963 }
5964
5965 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
5966 return false;
5967
5968 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
5969 (TARGET_VFP_DOUBLE || !is_double));
5970 }
5971
5972 /* Return true if an argument whose type is TYPE, or mode is MODE, is
5973 suitable for passing or returning in VFP registers for the PCS
5974 variant selected. If it is, then *BASE_MODE is updated to contain
5975 a machine mode describing each element of the argument's type and
5976 *COUNT to hold the number of such elements. */
5977 static bool
5978 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
5979 machine_mode mode, const_tree type,
5980 machine_mode *base_mode, int *count)
5981 {
5982 machine_mode new_mode = VOIDmode;
5983
5984 /* If we have the type information, prefer that to working things
5985 out from the mode. */
5986 if (type)
5987 {
5988 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
5989
5990 if (ag_count > 0 && ag_count <= 4)
5991 *count = ag_count;
5992 else
5993 return false;
5994 }
5995 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
5996 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
5997 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
5998 {
5999 *count = 1;
6000 new_mode = mode;
6001 }
6002 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6003 {
6004 *count = 2;
6005 new_mode = (mode == DCmode ? DFmode : SFmode);
6006 }
6007 else
6008 return false;
6009
6010
6011 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6012 return false;
6013
6014 *base_mode = new_mode;
6015 return true;
6016 }
6017
6018 static bool
6019 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6020 machine_mode mode, const_tree type)
6021 {
6022 int count ATTRIBUTE_UNUSED;
6023 machine_mode ag_mode ATTRIBUTE_UNUSED;
6024
6025 if (!use_vfp_abi (pcs_variant, false))
6026 return false;
6027 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6028 &ag_mode, &count);
6029 }
6030
6031 static bool
6032 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6033 const_tree type)
6034 {
6035 if (!use_vfp_abi (pcum->pcs_variant, false))
6036 return false;
6037
6038 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6039 &pcum->aapcs_vfp_rmode,
6040 &pcum->aapcs_vfp_rcount);
6041 }
6042
6043 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6044 for the behaviour of this function. */
6045
6046 static bool
6047 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6048 const_tree type ATTRIBUTE_UNUSED)
6049 {
6050 int rmode_size
6051 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6052 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6053 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6054 int regno;
6055
6056 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6057 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6058 {
6059 pcum->aapcs_vfp_reg_alloc = mask << regno;
6060 if (mode == BLKmode
6061 || (mode == TImode && ! TARGET_NEON)
6062 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6063 {
6064 int i;
6065 int rcount = pcum->aapcs_vfp_rcount;
6066 int rshift = shift;
6067 machine_mode rmode = pcum->aapcs_vfp_rmode;
6068 rtx par;
6069 if (!TARGET_NEON)
6070 {
6071 /* Avoid using unsupported vector modes. */
6072 if (rmode == V2SImode)
6073 rmode = DImode;
6074 else if (rmode == V4SImode)
6075 {
6076 rmode = DImode;
6077 rcount *= 2;
6078 rshift /= 2;
6079 }
6080 }
6081 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6082 for (i = 0; i < rcount; i++)
6083 {
6084 rtx tmp = gen_rtx_REG (rmode,
6085 FIRST_VFP_REGNUM + regno + i * rshift);
6086 tmp = gen_rtx_EXPR_LIST
6087 (VOIDmode, tmp,
6088 GEN_INT (i * GET_MODE_SIZE (rmode)));
6089 XVECEXP (par, 0, i) = tmp;
6090 }
6091
6092 pcum->aapcs_reg = par;
6093 }
6094 else
6095 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6096 return true;
6097 }
6098 return false;
6099 }
6100
6101 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6102 comment there for the behaviour of this function. */
6103
6104 static rtx
6105 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6106 machine_mode mode,
6107 const_tree type ATTRIBUTE_UNUSED)
6108 {
6109 if (!use_vfp_abi (pcs_variant, false))
6110 return NULL;
6111
6112 if (mode == BLKmode
6113 || (GET_MODE_CLASS (mode) == MODE_INT
6114 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6115 && !TARGET_NEON))
6116 {
6117 int count;
6118 machine_mode ag_mode;
6119 int i;
6120 rtx par;
6121 int shift;
6122
6123 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6124 &ag_mode, &count);
6125
6126 if (!TARGET_NEON)
6127 {
6128 if (ag_mode == V2SImode)
6129 ag_mode = DImode;
6130 else if (ag_mode == V4SImode)
6131 {
6132 ag_mode = DImode;
6133 count *= 2;
6134 }
6135 }
6136 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6137 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6138 for (i = 0; i < count; i++)
6139 {
6140 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6141 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6142 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6143 XVECEXP (par, 0, i) = tmp;
6144 }
6145
6146 return par;
6147 }
6148
6149 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6150 }
6151
6152 static void
6153 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6154 machine_mode mode ATTRIBUTE_UNUSED,
6155 const_tree type ATTRIBUTE_UNUSED)
6156 {
6157 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6158 pcum->aapcs_vfp_reg_alloc = 0;
6159 return;
6160 }
6161
6162 #define AAPCS_CP(X) \
6163 { \
6164 aapcs_ ## X ## _cum_init, \
6165 aapcs_ ## X ## _is_call_candidate, \
6166 aapcs_ ## X ## _allocate, \
6167 aapcs_ ## X ## _is_return_candidate, \
6168 aapcs_ ## X ## _allocate_return_reg, \
6169 aapcs_ ## X ## _advance \
6170 }
6171
6172 /* Table of co-processors that can be used to pass arguments in
6173 registers. Idealy no arugment should be a candidate for more than
6174 one co-processor table entry, but the table is processed in order
6175 and stops after the first match. If that entry then fails to put
6176 the argument into a co-processor register, the argument will go on
6177 the stack. */
6178 static struct
6179 {
6180 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6181 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6182
6183 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6184 BLKmode) is a candidate for this co-processor's registers; this
6185 function should ignore any position-dependent state in
6186 CUMULATIVE_ARGS and only use call-type dependent information. */
6187 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6188
6189 /* Return true if the argument does get a co-processor register; it
6190 should set aapcs_reg to an RTX of the register allocated as is
6191 required for a return from FUNCTION_ARG. */
6192 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6193
6194 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6195 be returned in this co-processor's registers. */
6196 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6197
6198 /* Allocate and return an RTX element to hold the return type of a call. This
6199 routine must not fail and will only be called if is_return_candidate
6200 returned true with the same parameters. */
6201 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6202
6203 /* Finish processing this argument and prepare to start processing
6204 the next one. */
6205 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6206 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6207 {
6208 AAPCS_CP(vfp)
6209 };
6210
6211 #undef AAPCS_CP
6212
6213 static int
6214 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6215 const_tree type)
6216 {
6217 int i;
6218
6219 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6220 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6221 return i;
6222
6223 return -1;
6224 }
6225
6226 static int
6227 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6228 {
6229 /* We aren't passed a decl, so we can't check that a call is local.
6230 However, it isn't clear that that would be a win anyway, since it
6231 might limit some tail-calling opportunities. */
6232 enum arm_pcs pcs_variant;
6233
6234 if (fntype)
6235 {
6236 const_tree fndecl = NULL_TREE;
6237
6238 if (TREE_CODE (fntype) == FUNCTION_DECL)
6239 {
6240 fndecl = fntype;
6241 fntype = TREE_TYPE (fntype);
6242 }
6243
6244 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6245 }
6246 else
6247 pcs_variant = arm_pcs_default;
6248
6249 if (pcs_variant != ARM_PCS_AAPCS)
6250 {
6251 int i;
6252
6253 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6254 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6255 TYPE_MODE (type),
6256 type))
6257 return i;
6258 }
6259 return -1;
6260 }
6261
6262 static rtx
6263 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6264 const_tree fntype)
6265 {
6266 /* We aren't passed a decl, so we can't check that a call is local.
6267 However, it isn't clear that that would be a win anyway, since it
6268 might limit some tail-calling opportunities. */
6269 enum arm_pcs pcs_variant;
6270 int unsignedp ATTRIBUTE_UNUSED;
6271
6272 if (fntype)
6273 {
6274 const_tree fndecl = NULL_TREE;
6275
6276 if (TREE_CODE (fntype) == FUNCTION_DECL)
6277 {
6278 fndecl = fntype;
6279 fntype = TREE_TYPE (fntype);
6280 }
6281
6282 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6283 }
6284 else
6285 pcs_variant = arm_pcs_default;
6286
6287 /* Promote integer types. */
6288 if (type && INTEGRAL_TYPE_P (type))
6289 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6290
6291 if (pcs_variant != ARM_PCS_AAPCS)
6292 {
6293 int i;
6294
6295 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6296 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6297 type))
6298 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6299 mode, type);
6300 }
6301
6302 /* Promotes small structs returned in a register to full-word size
6303 for big-endian AAPCS. */
6304 if (type && arm_return_in_msb (type))
6305 {
6306 HOST_WIDE_INT size = int_size_in_bytes (type);
6307 if (size % UNITS_PER_WORD != 0)
6308 {
6309 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6310 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6311 }
6312 }
6313
6314 return gen_rtx_REG (mode, R0_REGNUM);
6315 }
6316
6317 static rtx
6318 aapcs_libcall_value (machine_mode mode)
6319 {
6320 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6321 && GET_MODE_SIZE (mode) <= 4)
6322 mode = SImode;
6323
6324 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6325 }
6326
6327 /* Lay out a function argument using the AAPCS rules. The rule
6328 numbers referred to here are those in the AAPCS. */
6329 static void
6330 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6331 const_tree type, bool named)
6332 {
6333 int nregs, nregs2;
6334 int ncrn;
6335
6336 /* We only need to do this once per argument. */
6337 if (pcum->aapcs_arg_processed)
6338 return;
6339
6340 pcum->aapcs_arg_processed = true;
6341
6342 /* Special case: if named is false then we are handling an incoming
6343 anonymous argument which is on the stack. */
6344 if (!named)
6345 return;
6346
6347 /* Is this a potential co-processor register candidate? */
6348 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6349 {
6350 int slot = aapcs_select_call_coproc (pcum, mode, type);
6351 pcum->aapcs_cprc_slot = slot;
6352
6353 /* We don't have to apply any of the rules from part B of the
6354 preparation phase, these are handled elsewhere in the
6355 compiler. */
6356
6357 if (slot >= 0)
6358 {
6359 /* A Co-processor register candidate goes either in its own
6360 class of registers or on the stack. */
6361 if (!pcum->aapcs_cprc_failed[slot])
6362 {
6363 /* C1.cp - Try to allocate the argument to co-processor
6364 registers. */
6365 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6366 return;
6367
6368 /* C2.cp - Put the argument on the stack and note that we
6369 can't assign any more candidates in this slot. We also
6370 need to note that we have allocated stack space, so that
6371 we won't later try to split a non-cprc candidate between
6372 core registers and the stack. */
6373 pcum->aapcs_cprc_failed[slot] = true;
6374 pcum->can_split = false;
6375 }
6376
6377 /* We didn't get a register, so this argument goes on the
6378 stack. */
6379 gcc_assert (pcum->can_split == false);
6380 return;
6381 }
6382 }
6383
6384 /* C3 - For double-word aligned arguments, round the NCRN up to the
6385 next even number. */
6386 ncrn = pcum->aapcs_ncrn;
6387 if (ncrn & 1)
6388 {
6389 int res = arm_needs_doubleword_align (mode, type);
6390 /* Only warn during RTL expansion of call stmts, otherwise we would
6391 warn e.g. during gimplification even on functions that will be
6392 always inlined, and we'd warn multiple times. Don't warn when
6393 called in expand_function_start either, as we warn instead in
6394 arm_function_arg_boundary in that case. */
6395 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6396 inform (input_location, "parameter passing for argument of type "
6397 "%qT changed in GCC 7.1", type);
6398 else if (res > 0)
6399 ncrn++;
6400 }
6401
6402 nregs = ARM_NUM_REGS2(mode, type);
6403
6404 /* Sigh, this test should really assert that nregs > 0, but a GCC
6405 extension allows empty structs and then gives them empty size; it
6406 then allows such a structure to be passed by value. For some of
6407 the code below we have to pretend that such an argument has
6408 non-zero size so that we 'locate' it correctly either in
6409 registers or on the stack. */
6410 gcc_assert (nregs >= 0);
6411
6412 nregs2 = nregs ? nregs : 1;
6413
6414 /* C4 - Argument fits entirely in core registers. */
6415 if (ncrn + nregs2 <= NUM_ARG_REGS)
6416 {
6417 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6418 pcum->aapcs_next_ncrn = ncrn + nregs;
6419 return;
6420 }
6421
6422 /* C5 - Some core registers left and there are no arguments already
6423 on the stack: split this argument between the remaining core
6424 registers and the stack. */
6425 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6426 {
6427 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6428 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6429 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6430 return;
6431 }
6432
6433 /* C6 - NCRN is set to 4. */
6434 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6435
6436 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6437 return;
6438 }
6439
6440 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6441 for a call to a function whose data type is FNTYPE.
6442 For a library call, FNTYPE is NULL. */
6443 void
6444 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6445 rtx libname,
6446 tree fndecl ATTRIBUTE_UNUSED)
6447 {
6448 /* Long call handling. */
6449 if (fntype)
6450 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6451 else
6452 pcum->pcs_variant = arm_pcs_default;
6453
6454 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6455 {
6456 if (arm_libcall_uses_aapcs_base (libname))
6457 pcum->pcs_variant = ARM_PCS_AAPCS;
6458
6459 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6460 pcum->aapcs_reg = NULL_RTX;
6461 pcum->aapcs_partial = 0;
6462 pcum->aapcs_arg_processed = false;
6463 pcum->aapcs_cprc_slot = -1;
6464 pcum->can_split = true;
6465
6466 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6467 {
6468 int i;
6469
6470 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6471 {
6472 pcum->aapcs_cprc_failed[i] = false;
6473 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6474 }
6475 }
6476 return;
6477 }
6478
6479 /* Legacy ABIs */
6480
6481 /* On the ARM, the offset starts at 0. */
6482 pcum->nregs = 0;
6483 pcum->iwmmxt_nregs = 0;
6484 pcum->can_split = true;
6485
6486 /* Varargs vectors are treated the same as long long.
6487 named_count avoids having to change the way arm handles 'named' */
6488 pcum->named_count = 0;
6489 pcum->nargs = 0;
6490
6491 if (TARGET_REALLY_IWMMXT && fntype)
6492 {
6493 tree fn_arg;
6494
6495 for (fn_arg = TYPE_ARG_TYPES (fntype);
6496 fn_arg;
6497 fn_arg = TREE_CHAIN (fn_arg))
6498 pcum->named_count += 1;
6499
6500 if (! pcum->named_count)
6501 pcum->named_count = INT_MAX;
6502 }
6503 }
6504
6505 /* Return 1 if double word alignment is required for argument passing.
6506 Return -1 if double word alignment used to be required for argument
6507 passing before PR77728 ABI fix, but is not required anymore.
6508 Return 0 if double word alignment is not required and wasn't requried
6509 before either. */
6510 static int
6511 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6512 {
6513 if (!type)
6514 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6515
6516 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6517 if (!AGGREGATE_TYPE_P (type))
6518 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6519
6520 /* Array types: Use member alignment of element type. */
6521 if (TREE_CODE (type) == ARRAY_TYPE)
6522 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6523
6524 int ret = 0;
6525 /* Record/aggregate types: Use greatest member alignment of any member. */
6526 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6527 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6528 {
6529 if (TREE_CODE (field) == FIELD_DECL)
6530 return 1;
6531 else
6532 /* Before PR77728 fix, we were incorrectly considering also
6533 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6534 Make sure we can warn about that with -Wpsabi. */
6535 ret = -1;
6536 }
6537
6538 return ret;
6539 }
6540
6541
6542 /* Determine where to put an argument to a function.
6543 Value is zero to push the argument on the stack,
6544 or a hard register in which to store the argument.
6545
6546 MODE is the argument's machine mode.
6547 TYPE is the data type of the argument (as a tree).
6548 This is null for libcalls where that information may
6549 not be available.
6550 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6551 the preceding args and about the function being called.
6552 NAMED is nonzero if this argument is a named parameter
6553 (otherwise it is an extra parameter matching an ellipsis).
6554
6555 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6556 other arguments are passed on the stack. If (NAMED == 0) (which happens
6557 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6558 defined), say it is passed in the stack (function_prologue will
6559 indeed make it pass in the stack if necessary). */
6560
6561 static rtx
6562 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6563 const_tree type, bool named)
6564 {
6565 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6566 int nregs;
6567
6568 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6569 a call insn (op3 of a call_value insn). */
6570 if (mode == VOIDmode)
6571 return const0_rtx;
6572
6573 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6574 {
6575 aapcs_layout_arg (pcum, mode, type, named);
6576 return pcum->aapcs_reg;
6577 }
6578
6579 /* Varargs vectors are treated the same as long long.
6580 named_count avoids having to change the way arm handles 'named' */
6581 if (TARGET_IWMMXT_ABI
6582 && arm_vector_mode_supported_p (mode)
6583 && pcum->named_count > pcum->nargs + 1)
6584 {
6585 if (pcum->iwmmxt_nregs <= 9)
6586 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6587 else
6588 {
6589 pcum->can_split = false;
6590 return NULL_RTX;
6591 }
6592 }
6593
6594 /* Put doubleword aligned quantities in even register pairs. */
6595 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6596 {
6597 int res = arm_needs_doubleword_align (mode, type);
6598 if (res < 0 && warn_psabi)
6599 inform (input_location, "parameter passing for argument of type "
6600 "%qT changed in GCC 7.1", type);
6601 else if (res > 0)
6602 pcum->nregs++;
6603 }
6604
6605 /* Only allow splitting an arg between regs and memory if all preceding
6606 args were allocated to regs. For args passed by reference we only count
6607 the reference pointer. */
6608 if (pcum->can_split)
6609 nregs = 1;
6610 else
6611 nregs = ARM_NUM_REGS2 (mode, type);
6612
6613 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6614 return NULL_RTX;
6615
6616 return gen_rtx_REG (mode, pcum->nregs);
6617 }
6618
6619 static unsigned int
6620 arm_function_arg_boundary (machine_mode mode, const_tree type)
6621 {
6622 if (!ARM_DOUBLEWORD_ALIGN)
6623 return PARM_BOUNDARY;
6624
6625 int res = arm_needs_doubleword_align (mode, type);
6626 if (res < 0 && warn_psabi)
6627 inform (input_location, "parameter passing for argument of type %qT "
6628 "changed in GCC 7.1", type);
6629
6630 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6631 }
6632
6633 static int
6634 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6635 tree type, bool named)
6636 {
6637 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6638 int nregs = pcum->nregs;
6639
6640 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6641 {
6642 aapcs_layout_arg (pcum, mode, type, named);
6643 return pcum->aapcs_partial;
6644 }
6645
6646 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6647 return 0;
6648
6649 if (NUM_ARG_REGS > nregs
6650 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6651 && pcum->can_split)
6652 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6653
6654 return 0;
6655 }
6656
6657 /* Update the data in PCUM to advance over an argument
6658 of mode MODE and data type TYPE.
6659 (TYPE is null for libcalls where that information may not be available.) */
6660
6661 static void
6662 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6663 const_tree type, bool named)
6664 {
6665 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6666
6667 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6668 {
6669 aapcs_layout_arg (pcum, mode, type, named);
6670
6671 if (pcum->aapcs_cprc_slot >= 0)
6672 {
6673 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6674 type);
6675 pcum->aapcs_cprc_slot = -1;
6676 }
6677
6678 /* Generic stuff. */
6679 pcum->aapcs_arg_processed = false;
6680 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6681 pcum->aapcs_reg = NULL_RTX;
6682 pcum->aapcs_partial = 0;
6683 }
6684 else
6685 {
6686 pcum->nargs += 1;
6687 if (arm_vector_mode_supported_p (mode)
6688 && pcum->named_count > pcum->nargs
6689 && TARGET_IWMMXT_ABI)
6690 pcum->iwmmxt_nregs += 1;
6691 else
6692 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6693 }
6694 }
6695
6696 /* Variable sized types are passed by reference. This is a GCC
6697 extension to the ARM ABI. */
6698
6699 static bool
6700 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6701 machine_mode mode ATTRIBUTE_UNUSED,
6702 const_tree type, bool named ATTRIBUTE_UNUSED)
6703 {
6704 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6705 }
6706 \f
6707 /* Encode the current state of the #pragma [no_]long_calls. */
6708 typedef enum
6709 {
6710 OFF, /* No #pragma [no_]long_calls is in effect. */
6711 LONG, /* #pragma long_calls is in effect. */
6712 SHORT /* #pragma no_long_calls is in effect. */
6713 } arm_pragma_enum;
6714
6715 static arm_pragma_enum arm_pragma_long_calls = OFF;
6716
6717 void
6718 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6719 {
6720 arm_pragma_long_calls = LONG;
6721 }
6722
6723 void
6724 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6725 {
6726 arm_pragma_long_calls = SHORT;
6727 }
6728
6729 void
6730 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6731 {
6732 arm_pragma_long_calls = OFF;
6733 }
6734 \f
6735 /* Handle an attribute requiring a FUNCTION_DECL;
6736 arguments as in struct attribute_spec.handler. */
6737 static tree
6738 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6739 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6740 {
6741 if (TREE_CODE (*node) != FUNCTION_DECL)
6742 {
6743 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6744 name);
6745 *no_add_attrs = true;
6746 }
6747
6748 return NULL_TREE;
6749 }
6750
6751 /* Handle an "interrupt" or "isr" attribute;
6752 arguments as in struct attribute_spec.handler. */
6753 static tree
6754 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6755 bool *no_add_attrs)
6756 {
6757 if (DECL_P (*node))
6758 {
6759 if (TREE_CODE (*node) != FUNCTION_DECL)
6760 {
6761 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6762 name);
6763 *no_add_attrs = true;
6764 }
6765 /* FIXME: the argument if any is checked for type attributes;
6766 should it be checked for decl ones? */
6767 }
6768 else
6769 {
6770 if (TREE_CODE (*node) == FUNCTION_TYPE
6771 || TREE_CODE (*node) == METHOD_TYPE)
6772 {
6773 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6774 {
6775 warning (OPT_Wattributes, "%qE attribute ignored",
6776 name);
6777 *no_add_attrs = true;
6778 }
6779 }
6780 else if (TREE_CODE (*node) == POINTER_TYPE
6781 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6782 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6783 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6784 {
6785 *node = build_variant_type_copy (*node);
6786 TREE_TYPE (*node) = build_type_attribute_variant
6787 (TREE_TYPE (*node),
6788 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6789 *no_add_attrs = true;
6790 }
6791 else
6792 {
6793 /* Possibly pass this attribute on from the type to a decl. */
6794 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6795 | (int) ATTR_FLAG_FUNCTION_NEXT
6796 | (int) ATTR_FLAG_ARRAY_NEXT))
6797 {
6798 *no_add_attrs = true;
6799 return tree_cons (name, args, NULL_TREE);
6800 }
6801 else
6802 {
6803 warning (OPT_Wattributes, "%qE attribute ignored",
6804 name);
6805 }
6806 }
6807 }
6808
6809 return NULL_TREE;
6810 }
6811
6812 /* Handle a "pcs" attribute; arguments as in struct
6813 attribute_spec.handler. */
6814 static tree
6815 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6816 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6817 {
6818 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6819 {
6820 warning (OPT_Wattributes, "%qE attribute ignored", name);
6821 *no_add_attrs = true;
6822 }
6823 return NULL_TREE;
6824 }
6825
6826 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6827 /* Handle the "notshared" attribute. This attribute is another way of
6828 requesting hidden visibility. ARM's compiler supports
6829 "__declspec(notshared)"; we support the same thing via an
6830 attribute. */
6831
6832 static tree
6833 arm_handle_notshared_attribute (tree *node,
6834 tree name ATTRIBUTE_UNUSED,
6835 tree args ATTRIBUTE_UNUSED,
6836 int flags ATTRIBUTE_UNUSED,
6837 bool *no_add_attrs)
6838 {
6839 tree decl = TYPE_NAME (*node);
6840
6841 if (decl)
6842 {
6843 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6844 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6845 *no_add_attrs = false;
6846 }
6847 return NULL_TREE;
6848 }
6849 #endif
6850
6851 /* This function returns true if a function with declaration FNDECL and type
6852 FNTYPE uses the stack to pass arguments or return variables and false
6853 otherwise. This is used for functions with the attributes
6854 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6855 diagnostic messages if the stack is used. NAME is the name of the attribute
6856 used. */
6857
6858 static bool
6859 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6860 {
6861 function_args_iterator args_iter;
6862 CUMULATIVE_ARGS args_so_far_v;
6863 cumulative_args_t args_so_far;
6864 bool first_param = true;
6865 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6866
6867 /* Error out if any argument is passed on the stack. */
6868 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6869 args_so_far = pack_cumulative_args (&args_so_far_v);
6870 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6871 {
6872 rtx arg_rtx;
6873 machine_mode arg_mode = TYPE_MODE (arg_type);
6874
6875 prev_arg_type = arg_type;
6876 if (VOID_TYPE_P (arg_type))
6877 continue;
6878
6879 if (!first_param)
6880 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6881 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6882 if (!arg_rtx
6883 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6884 {
6885 error ("%qE attribute not available to functions with arguments "
6886 "passed on the stack", name);
6887 return true;
6888 }
6889 first_param = false;
6890 }
6891
6892 /* Error out for variadic functions since we cannot control how many
6893 arguments will be passed and thus stack could be used. stdarg_p () is not
6894 used for the checking to avoid browsing arguments twice. */
6895 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6896 {
6897 error ("%qE attribute not available to functions with variable number "
6898 "of arguments", name);
6899 return true;
6900 }
6901
6902 /* Error out if return value is passed on the stack. */
6903 ret_type = TREE_TYPE (fntype);
6904 if (arm_return_in_memory (ret_type, fntype))
6905 {
6906 error ("%qE attribute not available to functions that return value on "
6907 "the stack", name);
6908 return true;
6909 }
6910 return false;
6911 }
6912
6913 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
6914 function will check whether the attribute is allowed here and will add the
6915 attribute to the function declaration tree or otherwise issue a warning. */
6916
6917 static tree
6918 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
6919 tree /* args */,
6920 int /* flags */,
6921 bool *no_add_attrs)
6922 {
6923 tree fndecl;
6924
6925 if (!use_cmse)
6926 {
6927 *no_add_attrs = true;
6928 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6929 name);
6930 return NULL_TREE;
6931 }
6932
6933 /* Ignore attribute for function types. */
6934 if (TREE_CODE (*node) != FUNCTION_DECL)
6935 {
6936 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6937 name);
6938 *no_add_attrs = true;
6939 return NULL_TREE;
6940 }
6941
6942 fndecl = *node;
6943
6944 /* Warn for static linkage functions. */
6945 if (!TREE_PUBLIC (fndecl))
6946 {
6947 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
6948 "with static linkage", name);
6949 *no_add_attrs = true;
6950 return NULL_TREE;
6951 }
6952
6953 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
6954 TREE_TYPE (fndecl));
6955 return NULL_TREE;
6956 }
6957
6958
6959 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
6960 function will check whether the attribute is allowed here and will add the
6961 attribute to the function type tree or otherwise issue a diagnostic. The
6962 reason we check this at declaration time is to only allow the use of the
6963 attribute with declarations of function pointers and not function
6964 declarations. This function checks NODE is of the expected type and issues
6965 diagnostics otherwise using NAME. If it is not of the expected type
6966 *NO_ADD_ATTRS will be set to true. */
6967
6968 static tree
6969 arm_handle_cmse_nonsecure_call (tree *node, tree name,
6970 tree /* args */,
6971 int /* flags */,
6972 bool *no_add_attrs)
6973 {
6974 tree decl = NULL_TREE, fntype = NULL_TREE;
6975 tree type;
6976
6977 if (!use_cmse)
6978 {
6979 *no_add_attrs = true;
6980 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
6981 name);
6982 return NULL_TREE;
6983 }
6984
6985 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
6986 {
6987 decl = *node;
6988 fntype = TREE_TYPE (decl);
6989 }
6990
6991 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
6992 fntype = TREE_TYPE (fntype);
6993
6994 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
6995 {
6996 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
6997 "function pointer", name);
6998 *no_add_attrs = true;
6999 return NULL_TREE;
7000 }
7001
7002 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7003
7004 if (*no_add_attrs)
7005 return NULL_TREE;
7006
7007 /* Prevent trees being shared among function types with and without
7008 cmse_nonsecure_call attribute. */
7009 type = TREE_TYPE (decl);
7010
7011 type = build_distinct_type_copy (type);
7012 TREE_TYPE (decl) = type;
7013 fntype = type;
7014
7015 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7016 {
7017 type = fntype;
7018 fntype = TREE_TYPE (fntype);
7019 fntype = build_distinct_type_copy (fntype);
7020 TREE_TYPE (type) = fntype;
7021 }
7022
7023 /* Construct a type attribute and add it to the function type. */
7024 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7025 TYPE_ATTRIBUTES (fntype));
7026 TYPE_ATTRIBUTES (fntype) = attrs;
7027 return NULL_TREE;
7028 }
7029
7030 /* Return 0 if the attributes for two types are incompatible, 1 if they
7031 are compatible, and 2 if they are nearly compatible (which causes a
7032 warning to be generated). */
7033 static int
7034 arm_comp_type_attributes (const_tree type1, const_tree type2)
7035 {
7036 int l1, l2, s1, s2;
7037
7038 /* Check for mismatch of non-default calling convention. */
7039 if (TREE_CODE (type1) != FUNCTION_TYPE)
7040 return 1;
7041
7042 /* Check for mismatched call attributes. */
7043 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7044 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7045 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7046 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7047
7048 /* Only bother to check if an attribute is defined. */
7049 if (l1 | l2 | s1 | s2)
7050 {
7051 /* If one type has an attribute, the other must have the same attribute. */
7052 if ((l1 != l2) || (s1 != s2))
7053 return 0;
7054
7055 /* Disallow mixed attributes. */
7056 if ((l1 & s2) || (l2 & s1))
7057 return 0;
7058 }
7059
7060 /* Check for mismatched ISR attribute. */
7061 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7062 if (! l1)
7063 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7064 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7065 if (! l2)
7066 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7067 if (l1 != l2)
7068 return 0;
7069
7070 l1 = lookup_attribute ("cmse_nonsecure_call",
7071 TYPE_ATTRIBUTES (type1)) != NULL;
7072 l2 = lookup_attribute ("cmse_nonsecure_call",
7073 TYPE_ATTRIBUTES (type2)) != NULL;
7074
7075 if (l1 != l2)
7076 return 0;
7077
7078 return 1;
7079 }
7080
7081 /* Assigns default attributes to newly defined type. This is used to
7082 set short_call/long_call attributes for function types of
7083 functions defined inside corresponding #pragma scopes. */
7084 static void
7085 arm_set_default_type_attributes (tree type)
7086 {
7087 /* Add __attribute__ ((long_call)) to all functions, when
7088 inside #pragma long_calls or __attribute__ ((short_call)),
7089 when inside #pragma no_long_calls. */
7090 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7091 {
7092 tree type_attr_list, attr_name;
7093 type_attr_list = TYPE_ATTRIBUTES (type);
7094
7095 if (arm_pragma_long_calls == LONG)
7096 attr_name = get_identifier ("long_call");
7097 else if (arm_pragma_long_calls == SHORT)
7098 attr_name = get_identifier ("short_call");
7099 else
7100 return;
7101
7102 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7103 TYPE_ATTRIBUTES (type) = type_attr_list;
7104 }
7105 }
7106 \f
7107 /* Return true if DECL is known to be linked into section SECTION. */
7108
7109 static bool
7110 arm_function_in_section_p (tree decl, section *section)
7111 {
7112 /* We can only be certain about the prevailing symbol definition. */
7113 if (!decl_binds_to_current_def_p (decl))
7114 return false;
7115
7116 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7117 if (!DECL_SECTION_NAME (decl))
7118 {
7119 /* Make sure that we will not create a unique section for DECL. */
7120 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7121 return false;
7122 }
7123
7124 return function_section (decl) == section;
7125 }
7126
7127 /* Return nonzero if a 32-bit "long_call" should be generated for
7128 a call from the current function to DECL. We generate a long_call
7129 if the function:
7130
7131 a. has an __attribute__((long call))
7132 or b. is within the scope of a #pragma long_calls
7133 or c. the -mlong-calls command line switch has been specified
7134
7135 However we do not generate a long call if the function:
7136
7137 d. has an __attribute__ ((short_call))
7138 or e. is inside the scope of a #pragma no_long_calls
7139 or f. is defined in the same section as the current function. */
7140
7141 bool
7142 arm_is_long_call_p (tree decl)
7143 {
7144 tree attrs;
7145
7146 if (!decl)
7147 return TARGET_LONG_CALLS;
7148
7149 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7150 if (lookup_attribute ("short_call", attrs))
7151 return false;
7152
7153 /* For "f", be conservative, and only cater for cases in which the
7154 whole of the current function is placed in the same section. */
7155 if (!flag_reorder_blocks_and_partition
7156 && TREE_CODE (decl) == FUNCTION_DECL
7157 && arm_function_in_section_p (decl, current_function_section ()))
7158 return false;
7159
7160 if (lookup_attribute ("long_call", attrs))
7161 return true;
7162
7163 return TARGET_LONG_CALLS;
7164 }
7165
7166 /* Return nonzero if it is ok to make a tail-call to DECL. */
7167 static bool
7168 arm_function_ok_for_sibcall (tree decl, tree exp)
7169 {
7170 unsigned long func_type;
7171
7172 if (cfun->machine->sibcall_blocked)
7173 return false;
7174
7175 /* Never tailcall something if we are generating code for Thumb-1. */
7176 if (TARGET_THUMB1)
7177 return false;
7178
7179 /* The PIC register is live on entry to VxWorks PLT entries, so we
7180 must make the call before restoring the PIC register. */
7181 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7182 return false;
7183
7184 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7185 may be used both as target of the call and base register for restoring
7186 the VFP registers */
7187 if (TARGET_APCS_FRAME && TARGET_ARM
7188 && TARGET_HARD_FLOAT
7189 && decl && arm_is_long_call_p (decl))
7190 return false;
7191
7192 /* If we are interworking and the function is not declared static
7193 then we can't tail-call it unless we know that it exists in this
7194 compilation unit (since it might be a Thumb routine). */
7195 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7196 && !TREE_ASM_WRITTEN (decl))
7197 return false;
7198
7199 func_type = arm_current_func_type ();
7200 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7201 if (IS_INTERRUPT (func_type))
7202 return false;
7203
7204 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7205 generated for entry functions themselves. */
7206 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7207 return false;
7208
7209 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7210 this would complicate matters for later code generation. */
7211 if (TREE_CODE (exp) == CALL_EXPR)
7212 {
7213 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7214 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7215 return false;
7216 }
7217
7218 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7219 {
7220 /* Check that the return value locations are the same. For
7221 example that we aren't returning a value from the sibling in
7222 a VFP register but then need to transfer it to a core
7223 register. */
7224 rtx a, b;
7225 tree decl_or_type = decl;
7226
7227 /* If it is an indirect function pointer, get the function type. */
7228 if (!decl)
7229 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7230
7231 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7232 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7233 cfun->decl, false);
7234 if (!rtx_equal_p (a, b))
7235 return false;
7236 }
7237
7238 /* Never tailcall if function may be called with a misaligned SP. */
7239 if (IS_STACKALIGN (func_type))
7240 return false;
7241
7242 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7243 references should become a NOP. Don't convert such calls into
7244 sibling calls. */
7245 if (TARGET_AAPCS_BASED
7246 && arm_abi == ARM_ABI_AAPCS
7247 && decl
7248 && DECL_WEAK (decl))
7249 return false;
7250
7251 /* We cannot do a tailcall for an indirect call by descriptor if all the
7252 argument registers are used because the only register left to load the
7253 address is IP and it will already contain the static chain. */
7254 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7255 {
7256 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7257 CUMULATIVE_ARGS cum;
7258 cumulative_args_t cum_v;
7259
7260 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7261 cum_v = pack_cumulative_args (&cum);
7262
7263 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7264 {
7265 tree type = TREE_VALUE (t);
7266 if (!VOID_TYPE_P (type))
7267 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7268 }
7269
7270 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7271 return false;
7272 }
7273
7274 /* Everything else is ok. */
7275 return true;
7276 }
7277
7278 \f
7279 /* Addressing mode support functions. */
7280
7281 /* Return nonzero if X is a legitimate immediate operand when compiling
7282 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7283 int
7284 legitimate_pic_operand_p (rtx x)
7285 {
7286 if (GET_CODE (x) == SYMBOL_REF
7287 || (GET_CODE (x) == CONST
7288 && GET_CODE (XEXP (x, 0)) == PLUS
7289 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7290 return 0;
7291
7292 return 1;
7293 }
7294
7295 /* Record that the current function needs a PIC register. Initialize
7296 cfun->machine->pic_reg if we have not already done so. */
7297
7298 static void
7299 require_pic_register (void)
7300 {
7301 /* A lot of the logic here is made obscure by the fact that this
7302 routine gets called as part of the rtx cost estimation process.
7303 We don't want those calls to affect any assumptions about the real
7304 function; and further, we can't call entry_of_function() until we
7305 start the real expansion process. */
7306 if (!crtl->uses_pic_offset_table)
7307 {
7308 gcc_assert (can_create_pseudo_p ());
7309 if (arm_pic_register != INVALID_REGNUM
7310 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7311 {
7312 if (!cfun->machine->pic_reg)
7313 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7314
7315 /* Play games to avoid marking the function as needing pic
7316 if we are being called as part of the cost-estimation
7317 process. */
7318 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7319 crtl->uses_pic_offset_table = 1;
7320 }
7321 else
7322 {
7323 rtx_insn *seq, *insn;
7324
7325 if (!cfun->machine->pic_reg)
7326 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7327
7328 /* Play games to avoid marking the function as needing pic
7329 if we are being called as part of the cost-estimation
7330 process. */
7331 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7332 {
7333 crtl->uses_pic_offset_table = 1;
7334 start_sequence ();
7335
7336 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7337 && arm_pic_register > LAST_LO_REGNUM)
7338 emit_move_insn (cfun->machine->pic_reg,
7339 gen_rtx_REG (Pmode, arm_pic_register));
7340 else
7341 arm_load_pic_register (0UL);
7342
7343 seq = get_insns ();
7344 end_sequence ();
7345
7346 for (insn = seq; insn; insn = NEXT_INSN (insn))
7347 if (INSN_P (insn))
7348 INSN_LOCATION (insn) = prologue_location;
7349
7350 /* We can be called during expansion of PHI nodes, where
7351 we can't yet emit instructions directly in the final
7352 insn stream. Queue the insns on the entry edge, they will
7353 be committed after everything else is expanded. */
7354 insert_insn_on_edge (seq,
7355 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7356 }
7357 }
7358 }
7359 }
7360
7361 rtx
7362 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7363 {
7364 if (GET_CODE (orig) == SYMBOL_REF
7365 || GET_CODE (orig) == LABEL_REF)
7366 {
7367 if (reg == 0)
7368 {
7369 gcc_assert (can_create_pseudo_p ());
7370 reg = gen_reg_rtx (Pmode);
7371 }
7372
7373 /* VxWorks does not impose a fixed gap between segments; the run-time
7374 gap can be different from the object-file gap. We therefore can't
7375 use GOTOFF unless we are absolutely sure that the symbol is in the
7376 same segment as the GOT. Unfortunately, the flexibility of linker
7377 scripts means that we can't be sure of that in general, so assume
7378 that GOTOFF is never valid on VxWorks. */
7379 /* References to weak symbols cannot be resolved locally: they
7380 may be overridden by a non-weak definition at link time. */
7381 rtx_insn *insn;
7382 if ((GET_CODE (orig) == LABEL_REF
7383 || (GET_CODE (orig) == SYMBOL_REF
7384 && SYMBOL_REF_LOCAL_P (orig)
7385 && (SYMBOL_REF_DECL (orig)
7386 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7387 && NEED_GOT_RELOC
7388 && arm_pic_data_is_text_relative)
7389 insn = arm_pic_static_addr (orig, reg);
7390 else
7391 {
7392 rtx pat;
7393 rtx mem;
7394
7395 /* If this function doesn't have a pic register, create one now. */
7396 require_pic_register ();
7397
7398 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7399
7400 /* Make the MEM as close to a constant as possible. */
7401 mem = SET_SRC (pat);
7402 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7403 MEM_READONLY_P (mem) = 1;
7404 MEM_NOTRAP_P (mem) = 1;
7405
7406 insn = emit_insn (pat);
7407 }
7408
7409 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7410 by loop. */
7411 set_unique_reg_note (insn, REG_EQUAL, orig);
7412
7413 return reg;
7414 }
7415 else if (GET_CODE (orig) == CONST)
7416 {
7417 rtx base, offset;
7418
7419 if (GET_CODE (XEXP (orig, 0)) == PLUS
7420 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7421 return orig;
7422
7423 /* Handle the case where we have: const (UNSPEC_TLS). */
7424 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7425 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7426 return orig;
7427
7428 /* Handle the case where we have:
7429 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7430 CONST_INT. */
7431 if (GET_CODE (XEXP (orig, 0)) == PLUS
7432 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7433 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7434 {
7435 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7436 return orig;
7437 }
7438
7439 if (reg == 0)
7440 {
7441 gcc_assert (can_create_pseudo_p ());
7442 reg = gen_reg_rtx (Pmode);
7443 }
7444
7445 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7446
7447 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7448 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7449 base == reg ? 0 : reg);
7450
7451 if (CONST_INT_P (offset))
7452 {
7453 /* The base register doesn't really matter, we only want to
7454 test the index for the appropriate mode. */
7455 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7456 {
7457 gcc_assert (can_create_pseudo_p ());
7458 offset = force_reg (Pmode, offset);
7459 }
7460
7461 if (CONST_INT_P (offset))
7462 return plus_constant (Pmode, base, INTVAL (offset));
7463 }
7464
7465 if (GET_MODE_SIZE (mode) > 4
7466 && (GET_MODE_CLASS (mode) == MODE_INT
7467 || TARGET_SOFT_FLOAT))
7468 {
7469 emit_insn (gen_addsi3 (reg, base, offset));
7470 return reg;
7471 }
7472
7473 return gen_rtx_PLUS (Pmode, base, offset);
7474 }
7475
7476 return orig;
7477 }
7478
7479
7480 /* Find a spare register to use during the prolog of a function. */
7481
7482 static int
7483 thumb_find_work_register (unsigned long pushed_regs_mask)
7484 {
7485 int reg;
7486
7487 /* Check the argument registers first as these are call-used. The
7488 register allocation order means that sometimes r3 might be used
7489 but earlier argument registers might not, so check them all. */
7490 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7491 if (!df_regs_ever_live_p (reg))
7492 return reg;
7493
7494 /* Before going on to check the call-saved registers we can try a couple
7495 more ways of deducing that r3 is available. The first is when we are
7496 pushing anonymous arguments onto the stack and we have less than 4
7497 registers worth of fixed arguments(*). In this case r3 will be part of
7498 the variable argument list and so we can be sure that it will be
7499 pushed right at the start of the function. Hence it will be available
7500 for the rest of the prologue.
7501 (*): ie crtl->args.pretend_args_size is greater than 0. */
7502 if (cfun->machine->uses_anonymous_args
7503 && crtl->args.pretend_args_size > 0)
7504 return LAST_ARG_REGNUM;
7505
7506 /* The other case is when we have fixed arguments but less than 4 registers
7507 worth. In this case r3 might be used in the body of the function, but
7508 it is not being used to convey an argument into the function. In theory
7509 we could just check crtl->args.size to see how many bytes are
7510 being passed in argument registers, but it seems that it is unreliable.
7511 Sometimes it will have the value 0 when in fact arguments are being
7512 passed. (See testcase execute/20021111-1.c for an example). So we also
7513 check the args_info.nregs field as well. The problem with this field is
7514 that it makes no allowances for arguments that are passed to the
7515 function but which are not used. Hence we could miss an opportunity
7516 when a function has an unused argument in r3. But it is better to be
7517 safe than to be sorry. */
7518 if (! cfun->machine->uses_anonymous_args
7519 && crtl->args.size >= 0
7520 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7521 && (TARGET_AAPCS_BASED
7522 ? crtl->args.info.aapcs_ncrn < 4
7523 : crtl->args.info.nregs < 4))
7524 return LAST_ARG_REGNUM;
7525
7526 /* Otherwise look for a call-saved register that is going to be pushed. */
7527 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7528 if (pushed_regs_mask & (1 << reg))
7529 return reg;
7530
7531 if (TARGET_THUMB2)
7532 {
7533 /* Thumb-2 can use high regs. */
7534 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7535 if (pushed_regs_mask & (1 << reg))
7536 return reg;
7537 }
7538 /* Something went wrong - thumb_compute_save_reg_mask()
7539 should have arranged for a suitable register to be pushed. */
7540 gcc_unreachable ();
7541 }
7542
7543 static GTY(()) int pic_labelno;
7544
7545 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7546 low register. */
7547
7548 void
7549 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7550 {
7551 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7552
7553 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7554 return;
7555
7556 gcc_assert (flag_pic);
7557
7558 pic_reg = cfun->machine->pic_reg;
7559 if (TARGET_VXWORKS_RTP)
7560 {
7561 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7562 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7563 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7564
7565 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7566
7567 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7568 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7569 }
7570 else
7571 {
7572 /* We use an UNSPEC rather than a LABEL_REF because this label
7573 never appears in the code stream. */
7574
7575 labelno = GEN_INT (pic_labelno++);
7576 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7577 l1 = gen_rtx_CONST (VOIDmode, l1);
7578
7579 /* On the ARM the PC register contains 'dot + 8' at the time of the
7580 addition, on the Thumb it is 'dot + 4'. */
7581 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7582 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7583 UNSPEC_GOTSYM_OFF);
7584 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7585
7586 if (TARGET_32BIT)
7587 {
7588 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7589 }
7590 else /* TARGET_THUMB1 */
7591 {
7592 if (arm_pic_register != INVALID_REGNUM
7593 && REGNO (pic_reg) > LAST_LO_REGNUM)
7594 {
7595 /* We will have pushed the pic register, so we should always be
7596 able to find a work register. */
7597 pic_tmp = gen_rtx_REG (SImode,
7598 thumb_find_work_register (saved_regs));
7599 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7600 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7601 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7602 }
7603 else if (arm_pic_register != INVALID_REGNUM
7604 && arm_pic_register > LAST_LO_REGNUM
7605 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7606 {
7607 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7608 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7609 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7610 }
7611 else
7612 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7613 }
7614 }
7615
7616 /* Need to emit this whether or not we obey regdecls,
7617 since setjmp/longjmp can cause life info to screw up. */
7618 emit_use (pic_reg);
7619 }
7620
7621 /* Generate code to load the address of a static var when flag_pic is set. */
7622 static rtx_insn *
7623 arm_pic_static_addr (rtx orig, rtx reg)
7624 {
7625 rtx l1, labelno, offset_rtx;
7626
7627 gcc_assert (flag_pic);
7628
7629 /* We use an UNSPEC rather than a LABEL_REF because this label
7630 never appears in the code stream. */
7631 labelno = GEN_INT (pic_labelno++);
7632 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7633 l1 = gen_rtx_CONST (VOIDmode, l1);
7634
7635 /* On the ARM the PC register contains 'dot + 8' at the time of the
7636 addition, on the Thumb it is 'dot + 4'. */
7637 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7638 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7639 UNSPEC_SYMBOL_OFFSET);
7640 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7641
7642 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7643 }
7644
7645 /* Return nonzero if X is valid as an ARM state addressing register. */
7646 static int
7647 arm_address_register_rtx_p (rtx x, int strict_p)
7648 {
7649 int regno;
7650
7651 if (!REG_P (x))
7652 return 0;
7653
7654 regno = REGNO (x);
7655
7656 if (strict_p)
7657 return ARM_REGNO_OK_FOR_BASE_P (regno);
7658
7659 return (regno <= LAST_ARM_REGNUM
7660 || regno >= FIRST_PSEUDO_REGISTER
7661 || regno == FRAME_POINTER_REGNUM
7662 || regno == ARG_POINTER_REGNUM);
7663 }
7664
7665 /* Return TRUE if this rtx is the difference of a symbol and a label,
7666 and will reduce to a PC-relative relocation in the object file.
7667 Expressions like this can be left alone when generating PIC, rather
7668 than forced through the GOT. */
7669 static int
7670 pcrel_constant_p (rtx x)
7671 {
7672 if (GET_CODE (x) == MINUS)
7673 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7674
7675 return FALSE;
7676 }
7677
7678 /* Return true if X will surely end up in an index register after next
7679 splitting pass. */
7680 static bool
7681 will_be_in_index_register (const_rtx x)
7682 {
7683 /* arm.md: calculate_pic_address will split this into a register. */
7684 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7685 }
7686
7687 /* Return nonzero if X is a valid ARM state address operand. */
7688 int
7689 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7690 int strict_p)
7691 {
7692 bool use_ldrd;
7693 enum rtx_code code = GET_CODE (x);
7694
7695 if (arm_address_register_rtx_p (x, strict_p))
7696 return 1;
7697
7698 use_ldrd = (TARGET_LDRD
7699 && (mode == DImode || mode == DFmode));
7700
7701 if (code == POST_INC || code == PRE_DEC
7702 || ((code == PRE_INC || code == POST_DEC)
7703 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7704 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7705
7706 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7707 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7708 && GET_CODE (XEXP (x, 1)) == PLUS
7709 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7710 {
7711 rtx addend = XEXP (XEXP (x, 1), 1);
7712
7713 /* Don't allow ldrd post increment by register because it's hard
7714 to fixup invalid register choices. */
7715 if (use_ldrd
7716 && GET_CODE (x) == POST_MODIFY
7717 && REG_P (addend))
7718 return 0;
7719
7720 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7721 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7722 }
7723
7724 /* After reload constants split into minipools will have addresses
7725 from a LABEL_REF. */
7726 else if (reload_completed
7727 && (code == LABEL_REF
7728 || (code == CONST
7729 && GET_CODE (XEXP (x, 0)) == PLUS
7730 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7731 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7732 return 1;
7733
7734 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7735 return 0;
7736
7737 else if (code == PLUS)
7738 {
7739 rtx xop0 = XEXP (x, 0);
7740 rtx xop1 = XEXP (x, 1);
7741
7742 return ((arm_address_register_rtx_p (xop0, strict_p)
7743 && ((CONST_INT_P (xop1)
7744 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7745 || (!strict_p && will_be_in_index_register (xop1))))
7746 || (arm_address_register_rtx_p (xop1, strict_p)
7747 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7748 }
7749
7750 #if 0
7751 /* Reload currently can't handle MINUS, so disable this for now */
7752 else if (GET_CODE (x) == MINUS)
7753 {
7754 rtx xop0 = XEXP (x, 0);
7755 rtx xop1 = XEXP (x, 1);
7756
7757 return (arm_address_register_rtx_p (xop0, strict_p)
7758 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7759 }
7760 #endif
7761
7762 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7763 && code == SYMBOL_REF
7764 && CONSTANT_POOL_ADDRESS_P (x)
7765 && ! (flag_pic
7766 && symbol_mentioned_p (get_pool_constant (x))
7767 && ! pcrel_constant_p (get_pool_constant (x))))
7768 return 1;
7769
7770 return 0;
7771 }
7772
7773 /* Return true if we can avoid creating a constant pool entry for x. */
7774 static bool
7775 can_avoid_literal_pool_for_label_p (rtx x)
7776 {
7777 /* Normally we can assign constant values to target registers without
7778 the help of constant pool. But there are cases we have to use constant
7779 pool like:
7780 1) assign a label to register.
7781 2) sign-extend a 8bit value to 32bit and then assign to register.
7782
7783 Constant pool access in format:
7784 (set (reg r0) (mem (symbol_ref (".LC0"))))
7785 will cause the use of literal pool (later in function arm_reorg).
7786 So here we mark such format as an invalid format, then the compiler
7787 will adjust it into:
7788 (set (reg r0) (symbol_ref (".LC0")))
7789 (set (reg r0) (mem (reg r0))).
7790 No extra register is required, and (mem (reg r0)) won't cause the use
7791 of literal pools. */
7792 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7793 && CONSTANT_POOL_ADDRESS_P (x))
7794 return 1;
7795 return 0;
7796 }
7797
7798
7799 /* Return nonzero if X is a valid Thumb-2 address operand. */
7800 static int
7801 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7802 {
7803 bool use_ldrd;
7804 enum rtx_code code = GET_CODE (x);
7805
7806 if (arm_address_register_rtx_p (x, strict_p))
7807 return 1;
7808
7809 use_ldrd = (TARGET_LDRD
7810 && (mode == DImode || mode == DFmode));
7811
7812 if (code == POST_INC || code == PRE_DEC
7813 || ((code == PRE_INC || code == POST_DEC)
7814 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7815 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7816
7817 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7818 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7819 && GET_CODE (XEXP (x, 1)) == PLUS
7820 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7821 {
7822 /* Thumb-2 only has autoincrement by constant. */
7823 rtx addend = XEXP (XEXP (x, 1), 1);
7824 HOST_WIDE_INT offset;
7825
7826 if (!CONST_INT_P (addend))
7827 return 0;
7828
7829 offset = INTVAL(addend);
7830 if (GET_MODE_SIZE (mode) <= 4)
7831 return (offset > -256 && offset < 256);
7832
7833 return (use_ldrd && offset > -1024 && offset < 1024
7834 && (offset & 3) == 0);
7835 }
7836
7837 /* After reload constants split into minipools will have addresses
7838 from a LABEL_REF. */
7839 else if (reload_completed
7840 && (code == LABEL_REF
7841 || (code == CONST
7842 && GET_CODE (XEXP (x, 0)) == PLUS
7843 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7844 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7845 return 1;
7846
7847 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7848 return 0;
7849
7850 else if (code == PLUS)
7851 {
7852 rtx xop0 = XEXP (x, 0);
7853 rtx xop1 = XEXP (x, 1);
7854
7855 return ((arm_address_register_rtx_p (xop0, strict_p)
7856 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7857 || (!strict_p && will_be_in_index_register (xop1))))
7858 || (arm_address_register_rtx_p (xop1, strict_p)
7859 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7860 }
7861
7862 else if (can_avoid_literal_pool_for_label_p (x))
7863 return 0;
7864
7865 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7866 && code == SYMBOL_REF
7867 && CONSTANT_POOL_ADDRESS_P (x)
7868 && ! (flag_pic
7869 && symbol_mentioned_p (get_pool_constant (x))
7870 && ! pcrel_constant_p (get_pool_constant (x))))
7871 return 1;
7872
7873 return 0;
7874 }
7875
7876 /* Return nonzero if INDEX is valid for an address index operand in
7877 ARM state. */
7878 static int
7879 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7880 int strict_p)
7881 {
7882 HOST_WIDE_INT range;
7883 enum rtx_code code = GET_CODE (index);
7884
7885 /* Standard coprocessor addressing modes. */
7886 if (TARGET_HARD_FLOAT
7887 && (mode == SFmode || mode == DFmode))
7888 return (code == CONST_INT && INTVAL (index) < 1024
7889 && INTVAL (index) > -1024
7890 && (INTVAL (index) & 3) == 0);
7891
7892 /* For quad modes, we restrict the constant offset to be slightly less
7893 than what the instruction format permits. We do this because for
7894 quad mode moves, we will actually decompose them into two separate
7895 double-mode reads or writes. INDEX must therefore be a valid
7896 (double-mode) offset and so should INDEX+8. */
7897 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7898 return (code == CONST_INT
7899 && INTVAL (index) < 1016
7900 && INTVAL (index) > -1024
7901 && (INTVAL (index) & 3) == 0);
7902
7903 /* We have no such constraint on double mode offsets, so we permit the
7904 full range of the instruction format. */
7905 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7906 return (code == CONST_INT
7907 && INTVAL (index) < 1024
7908 && INTVAL (index) > -1024
7909 && (INTVAL (index) & 3) == 0);
7910
7911 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7912 return (code == CONST_INT
7913 && INTVAL (index) < 1024
7914 && INTVAL (index) > -1024
7915 && (INTVAL (index) & 3) == 0);
7916
7917 if (arm_address_register_rtx_p (index, strict_p)
7918 && (GET_MODE_SIZE (mode) <= 4))
7919 return 1;
7920
7921 if (mode == DImode || mode == DFmode)
7922 {
7923 if (code == CONST_INT)
7924 {
7925 HOST_WIDE_INT val = INTVAL (index);
7926
7927 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
7928 If vldr is selected it uses arm_coproc_mem_operand. */
7929 if (TARGET_LDRD)
7930 return val > -256 && val < 256;
7931 else
7932 return val > -4096 && val < 4092;
7933 }
7934
7935 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
7936 }
7937
7938 if (GET_MODE_SIZE (mode) <= 4
7939 && ! (arm_arch4
7940 && (mode == HImode
7941 || mode == HFmode
7942 || (mode == QImode && outer == SIGN_EXTEND))))
7943 {
7944 if (code == MULT)
7945 {
7946 rtx xiop0 = XEXP (index, 0);
7947 rtx xiop1 = XEXP (index, 1);
7948
7949 return ((arm_address_register_rtx_p (xiop0, strict_p)
7950 && power_of_two_operand (xiop1, SImode))
7951 || (arm_address_register_rtx_p (xiop1, strict_p)
7952 && power_of_two_operand (xiop0, SImode)));
7953 }
7954 else if (code == LSHIFTRT || code == ASHIFTRT
7955 || code == ASHIFT || code == ROTATERT)
7956 {
7957 rtx op = XEXP (index, 1);
7958
7959 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
7960 && CONST_INT_P (op)
7961 && INTVAL (op) > 0
7962 && INTVAL (op) <= 31);
7963 }
7964 }
7965
7966 /* For ARM v4 we may be doing a sign-extend operation during the
7967 load. */
7968 if (arm_arch4)
7969 {
7970 if (mode == HImode
7971 || mode == HFmode
7972 || (outer == SIGN_EXTEND && mode == QImode))
7973 range = 256;
7974 else
7975 range = 4096;
7976 }
7977 else
7978 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
7979
7980 return (code == CONST_INT
7981 && INTVAL (index) < range
7982 && INTVAL (index) > -range);
7983 }
7984
7985 /* Return true if OP is a valid index scaling factor for Thumb-2 address
7986 index operand. i.e. 1, 2, 4 or 8. */
7987 static bool
7988 thumb2_index_mul_operand (rtx op)
7989 {
7990 HOST_WIDE_INT val;
7991
7992 if (!CONST_INT_P (op))
7993 return false;
7994
7995 val = INTVAL(op);
7996 return (val == 1 || val == 2 || val == 4 || val == 8);
7997 }
7998
7999 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8000 static int
8001 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8002 {
8003 enum rtx_code code = GET_CODE (index);
8004
8005 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8006 /* Standard coprocessor addressing modes. */
8007 if (TARGET_HARD_FLOAT
8008 && (mode == SFmode || mode == DFmode))
8009 return (code == CONST_INT && INTVAL (index) < 1024
8010 /* Thumb-2 allows only > -256 index range for it's core register
8011 load/stores. Since we allow SF/DF in core registers, we have
8012 to use the intersection between -256~4096 (core) and -1024~1024
8013 (coprocessor). */
8014 && INTVAL (index) > -256
8015 && (INTVAL (index) & 3) == 0);
8016
8017 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8018 {
8019 /* For DImode assume values will usually live in core regs
8020 and only allow LDRD addressing modes. */
8021 if (!TARGET_LDRD || mode != DImode)
8022 return (code == CONST_INT
8023 && INTVAL (index) < 1024
8024 && INTVAL (index) > -1024
8025 && (INTVAL (index) & 3) == 0);
8026 }
8027
8028 /* For quad modes, we restrict the constant offset to be slightly less
8029 than what the instruction format permits. We do this because for
8030 quad mode moves, we will actually decompose them into two separate
8031 double-mode reads or writes. INDEX must therefore be a valid
8032 (double-mode) offset and so should INDEX+8. */
8033 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8034 return (code == CONST_INT
8035 && INTVAL (index) < 1016
8036 && INTVAL (index) > -1024
8037 && (INTVAL (index) & 3) == 0);
8038
8039 /* We have no such constraint on double mode offsets, so we permit the
8040 full range of the instruction format. */
8041 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8042 return (code == CONST_INT
8043 && INTVAL (index) < 1024
8044 && INTVAL (index) > -1024
8045 && (INTVAL (index) & 3) == 0);
8046
8047 if (arm_address_register_rtx_p (index, strict_p)
8048 && (GET_MODE_SIZE (mode) <= 4))
8049 return 1;
8050
8051 if (mode == DImode || mode == DFmode)
8052 {
8053 if (code == CONST_INT)
8054 {
8055 HOST_WIDE_INT val = INTVAL (index);
8056 /* Thumb-2 ldrd only has reg+const addressing modes.
8057 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8058 If vldr is selected it uses arm_coproc_mem_operand. */
8059 if (TARGET_LDRD)
8060 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8061 else
8062 return IN_RANGE (val, -255, 4095 - 4);
8063 }
8064 else
8065 return 0;
8066 }
8067
8068 if (code == MULT)
8069 {
8070 rtx xiop0 = XEXP (index, 0);
8071 rtx xiop1 = XEXP (index, 1);
8072
8073 return ((arm_address_register_rtx_p (xiop0, strict_p)
8074 && thumb2_index_mul_operand (xiop1))
8075 || (arm_address_register_rtx_p (xiop1, strict_p)
8076 && thumb2_index_mul_operand (xiop0)));
8077 }
8078 else if (code == ASHIFT)
8079 {
8080 rtx op = XEXP (index, 1);
8081
8082 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8083 && CONST_INT_P (op)
8084 && INTVAL (op) > 0
8085 && INTVAL (op) <= 3);
8086 }
8087
8088 return (code == CONST_INT
8089 && INTVAL (index) < 4096
8090 && INTVAL (index) > -256);
8091 }
8092
8093 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8094 static int
8095 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8096 {
8097 int regno;
8098
8099 if (!REG_P (x))
8100 return 0;
8101
8102 regno = REGNO (x);
8103
8104 if (strict_p)
8105 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8106
8107 return (regno <= LAST_LO_REGNUM
8108 || regno > LAST_VIRTUAL_REGISTER
8109 || regno == FRAME_POINTER_REGNUM
8110 || (GET_MODE_SIZE (mode) >= 4
8111 && (regno == STACK_POINTER_REGNUM
8112 || regno >= FIRST_PSEUDO_REGISTER
8113 || x == hard_frame_pointer_rtx
8114 || x == arg_pointer_rtx)));
8115 }
8116
8117 /* Return nonzero if x is a legitimate index register. This is the case
8118 for any base register that can access a QImode object. */
8119 inline static int
8120 thumb1_index_register_rtx_p (rtx x, int strict_p)
8121 {
8122 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8123 }
8124
8125 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8126
8127 The AP may be eliminated to either the SP or the FP, so we use the
8128 least common denominator, e.g. SImode, and offsets from 0 to 64.
8129
8130 ??? Verify whether the above is the right approach.
8131
8132 ??? Also, the FP may be eliminated to the SP, so perhaps that
8133 needs special handling also.
8134
8135 ??? Look at how the mips16 port solves this problem. It probably uses
8136 better ways to solve some of these problems.
8137
8138 Although it is not incorrect, we don't accept QImode and HImode
8139 addresses based on the frame pointer or arg pointer until the
8140 reload pass starts. This is so that eliminating such addresses
8141 into stack based ones won't produce impossible code. */
8142 int
8143 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8144 {
8145 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8146 return 0;
8147
8148 /* ??? Not clear if this is right. Experiment. */
8149 if (GET_MODE_SIZE (mode) < 4
8150 && !(reload_in_progress || reload_completed)
8151 && (reg_mentioned_p (frame_pointer_rtx, x)
8152 || reg_mentioned_p (arg_pointer_rtx, x)
8153 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8154 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8155 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8156 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8157 return 0;
8158
8159 /* Accept any base register. SP only in SImode or larger. */
8160 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8161 return 1;
8162
8163 /* This is PC relative data before arm_reorg runs. */
8164 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8165 && GET_CODE (x) == SYMBOL_REF
8166 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8167 return 1;
8168
8169 /* This is PC relative data after arm_reorg runs. */
8170 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8171 && reload_completed
8172 && (GET_CODE (x) == LABEL_REF
8173 || (GET_CODE (x) == CONST
8174 && GET_CODE (XEXP (x, 0)) == PLUS
8175 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8176 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8177 return 1;
8178
8179 /* Post-inc indexing only supported for SImode and larger. */
8180 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8181 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8182 return 1;
8183
8184 else if (GET_CODE (x) == PLUS)
8185 {
8186 /* REG+REG address can be any two index registers. */
8187 /* We disallow FRAME+REG addressing since we know that FRAME
8188 will be replaced with STACK, and SP relative addressing only
8189 permits SP+OFFSET. */
8190 if (GET_MODE_SIZE (mode) <= 4
8191 && XEXP (x, 0) != frame_pointer_rtx
8192 && XEXP (x, 1) != frame_pointer_rtx
8193 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8194 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8195 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8196 return 1;
8197
8198 /* REG+const has 5-7 bit offset for non-SP registers. */
8199 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8200 || XEXP (x, 0) == arg_pointer_rtx)
8201 && CONST_INT_P (XEXP (x, 1))
8202 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8203 return 1;
8204
8205 /* REG+const has 10-bit offset for SP, but only SImode and
8206 larger is supported. */
8207 /* ??? Should probably check for DI/DFmode overflow here
8208 just like GO_IF_LEGITIMATE_OFFSET does. */
8209 else if (REG_P (XEXP (x, 0))
8210 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8211 && GET_MODE_SIZE (mode) >= 4
8212 && CONST_INT_P (XEXP (x, 1))
8213 && INTVAL (XEXP (x, 1)) >= 0
8214 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8215 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8216 return 1;
8217
8218 else if (REG_P (XEXP (x, 0))
8219 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8220 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8221 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8222 && REGNO (XEXP (x, 0))
8223 <= LAST_VIRTUAL_POINTER_REGISTER))
8224 && GET_MODE_SIZE (mode) >= 4
8225 && CONST_INT_P (XEXP (x, 1))
8226 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8227 return 1;
8228 }
8229
8230 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8231 && GET_MODE_SIZE (mode) == 4
8232 && GET_CODE (x) == SYMBOL_REF
8233 && CONSTANT_POOL_ADDRESS_P (x)
8234 && ! (flag_pic
8235 && symbol_mentioned_p (get_pool_constant (x))
8236 && ! pcrel_constant_p (get_pool_constant (x))))
8237 return 1;
8238
8239 return 0;
8240 }
8241
8242 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8243 instruction of mode MODE. */
8244 int
8245 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8246 {
8247 switch (GET_MODE_SIZE (mode))
8248 {
8249 case 1:
8250 return val >= 0 && val < 32;
8251
8252 case 2:
8253 return val >= 0 && val < 64 && (val & 1) == 0;
8254
8255 default:
8256 return (val >= 0
8257 && (val + GET_MODE_SIZE (mode)) <= 128
8258 && (val & 3) == 0);
8259 }
8260 }
8261
8262 bool
8263 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8264 {
8265 if (TARGET_ARM)
8266 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8267 else if (TARGET_THUMB2)
8268 return thumb2_legitimate_address_p (mode, x, strict_p);
8269 else /* if (TARGET_THUMB1) */
8270 return thumb1_legitimate_address_p (mode, x, strict_p);
8271 }
8272
8273 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8274
8275 Given an rtx X being reloaded into a reg required to be
8276 in class CLASS, return the class of reg to actually use.
8277 In general this is just CLASS, but for the Thumb core registers and
8278 immediate constants we prefer a LO_REGS class or a subset. */
8279
8280 static reg_class_t
8281 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8282 {
8283 if (TARGET_32BIT)
8284 return rclass;
8285 else
8286 {
8287 if (rclass == GENERAL_REGS)
8288 return LO_REGS;
8289 else
8290 return rclass;
8291 }
8292 }
8293
8294 /* Build the SYMBOL_REF for __tls_get_addr. */
8295
8296 static GTY(()) rtx tls_get_addr_libfunc;
8297
8298 static rtx
8299 get_tls_get_addr (void)
8300 {
8301 if (!tls_get_addr_libfunc)
8302 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8303 return tls_get_addr_libfunc;
8304 }
8305
8306 rtx
8307 arm_load_tp (rtx target)
8308 {
8309 if (!target)
8310 target = gen_reg_rtx (SImode);
8311
8312 if (TARGET_HARD_TP)
8313 {
8314 /* Can return in any reg. */
8315 emit_insn (gen_load_tp_hard (target));
8316 }
8317 else
8318 {
8319 /* Always returned in r0. Immediately copy the result into a pseudo,
8320 otherwise other uses of r0 (e.g. setting up function arguments) may
8321 clobber the value. */
8322
8323 rtx tmp;
8324
8325 emit_insn (gen_load_tp_soft ());
8326
8327 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8328 emit_move_insn (target, tmp);
8329 }
8330 return target;
8331 }
8332
8333 static rtx
8334 load_tls_operand (rtx x, rtx reg)
8335 {
8336 rtx tmp;
8337
8338 if (reg == NULL_RTX)
8339 reg = gen_reg_rtx (SImode);
8340
8341 tmp = gen_rtx_CONST (SImode, x);
8342
8343 emit_move_insn (reg, tmp);
8344
8345 return reg;
8346 }
8347
8348 static rtx_insn *
8349 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8350 {
8351 rtx label, labelno, sum;
8352
8353 gcc_assert (reloc != TLS_DESCSEQ);
8354 start_sequence ();
8355
8356 labelno = GEN_INT (pic_labelno++);
8357 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8358 label = gen_rtx_CONST (VOIDmode, label);
8359
8360 sum = gen_rtx_UNSPEC (Pmode,
8361 gen_rtvec (4, x, GEN_INT (reloc), label,
8362 GEN_INT (TARGET_ARM ? 8 : 4)),
8363 UNSPEC_TLS);
8364 reg = load_tls_operand (sum, reg);
8365
8366 if (TARGET_ARM)
8367 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8368 else
8369 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8370
8371 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8372 LCT_PURE, /* LCT_CONST? */
8373 Pmode, reg, Pmode);
8374
8375 rtx_insn *insns = get_insns ();
8376 end_sequence ();
8377
8378 return insns;
8379 }
8380
8381 static rtx
8382 arm_tls_descseq_addr (rtx x, rtx reg)
8383 {
8384 rtx labelno = GEN_INT (pic_labelno++);
8385 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8386 rtx sum = gen_rtx_UNSPEC (Pmode,
8387 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8388 gen_rtx_CONST (VOIDmode, label),
8389 GEN_INT (!TARGET_ARM)),
8390 UNSPEC_TLS);
8391 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8392
8393 emit_insn (gen_tlscall (x, labelno));
8394 if (!reg)
8395 reg = gen_reg_rtx (SImode);
8396 else
8397 gcc_assert (REGNO (reg) != R0_REGNUM);
8398
8399 emit_move_insn (reg, reg0);
8400
8401 return reg;
8402 }
8403
8404 rtx
8405 legitimize_tls_address (rtx x, rtx reg)
8406 {
8407 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8408 rtx_insn *insns;
8409 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8410
8411 switch (model)
8412 {
8413 case TLS_MODEL_GLOBAL_DYNAMIC:
8414 if (TARGET_GNU2_TLS)
8415 {
8416 reg = arm_tls_descseq_addr (x, reg);
8417
8418 tp = arm_load_tp (NULL_RTX);
8419
8420 dest = gen_rtx_PLUS (Pmode, tp, reg);
8421 }
8422 else
8423 {
8424 /* Original scheme */
8425 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8426 dest = gen_reg_rtx (Pmode);
8427 emit_libcall_block (insns, dest, ret, x);
8428 }
8429 return dest;
8430
8431 case TLS_MODEL_LOCAL_DYNAMIC:
8432 if (TARGET_GNU2_TLS)
8433 {
8434 reg = arm_tls_descseq_addr (x, reg);
8435
8436 tp = arm_load_tp (NULL_RTX);
8437
8438 dest = gen_rtx_PLUS (Pmode, tp, reg);
8439 }
8440 else
8441 {
8442 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8443
8444 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8445 share the LDM result with other LD model accesses. */
8446 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8447 UNSPEC_TLS);
8448 dest = gen_reg_rtx (Pmode);
8449 emit_libcall_block (insns, dest, ret, eqv);
8450
8451 /* Load the addend. */
8452 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8453 GEN_INT (TLS_LDO32)),
8454 UNSPEC_TLS);
8455 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8456 dest = gen_rtx_PLUS (Pmode, dest, addend);
8457 }
8458 return dest;
8459
8460 case TLS_MODEL_INITIAL_EXEC:
8461 labelno = GEN_INT (pic_labelno++);
8462 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8463 label = gen_rtx_CONST (VOIDmode, label);
8464 sum = gen_rtx_UNSPEC (Pmode,
8465 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8466 GEN_INT (TARGET_ARM ? 8 : 4)),
8467 UNSPEC_TLS);
8468 reg = load_tls_operand (sum, reg);
8469
8470 if (TARGET_ARM)
8471 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8472 else if (TARGET_THUMB2)
8473 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8474 else
8475 {
8476 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8477 emit_move_insn (reg, gen_const_mem (SImode, reg));
8478 }
8479
8480 tp = arm_load_tp (NULL_RTX);
8481
8482 return gen_rtx_PLUS (Pmode, tp, reg);
8483
8484 case TLS_MODEL_LOCAL_EXEC:
8485 tp = arm_load_tp (NULL_RTX);
8486
8487 reg = gen_rtx_UNSPEC (Pmode,
8488 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8489 UNSPEC_TLS);
8490 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8491
8492 return gen_rtx_PLUS (Pmode, tp, reg);
8493
8494 default:
8495 abort ();
8496 }
8497 }
8498
8499 /* Try machine-dependent ways of modifying an illegitimate address
8500 to be legitimate. If we find one, return the new, valid address. */
8501 rtx
8502 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8503 {
8504 if (arm_tls_referenced_p (x))
8505 {
8506 rtx addend = NULL;
8507
8508 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8509 {
8510 addend = XEXP (XEXP (x, 0), 1);
8511 x = XEXP (XEXP (x, 0), 0);
8512 }
8513
8514 if (GET_CODE (x) != SYMBOL_REF)
8515 return x;
8516
8517 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8518
8519 x = legitimize_tls_address (x, NULL_RTX);
8520
8521 if (addend)
8522 {
8523 x = gen_rtx_PLUS (SImode, x, addend);
8524 orig_x = x;
8525 }
8526 else
8527 return x;
8528 }
8529
8530 if (!TARGET_ARM)
8531 {
8532 /* TODO: legitimize_address for Thumb2. */
8533 if (TARGET_THUMB2)
8534 return x;
8535 return thumb_legitimize_address (x, orig_x, mode);
8536 }
8537
8538 if (GET_CODE (x) == PLUS)
8539 {
8540 rtx xop0 = XEXP (x, 0);
8541 rtx xop1 = XEXP (x, 1);
8542
8543 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8544 xop0 = force_reg (SImode, xop0);
8545
8546 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8547 && !symbol_mentioned_p (xop1))
8548 xop1 = force_reg (SImode, xop1);
8549
8550 if (ARM_BASE_REGISTER_RTX_P (xop0)
8551 && CONST_INT_P (xop1))
8552 {
8553 HOST_WIDE_INT n, low_n;
8554 rtx base_reg, val;
8555 n = INTVAL (xop1);
8556
8557 /* VFP addressing modes actually allow greater offsets, but for
8558 now we just stick with the lowest common denominator. */
8559 if (mode == DImode || mode == DFmode)
8560 {
8561 low_n = n & 0x0f;
8562 n &= ~0x0f;
8563 if (low_n > 4)
8564 {
8565 n += 16;
8566 low_n -= 16;
8567 }
8568 }
8569 else
8570 {
8571 low_n = ((mode) == TImode ? 0
8572 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8573 n -= low_n;
8574 }
8575
8576 base_reg = gen_reg_rtx (SImode);
8577 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8578 emit_move_insn (base_reg, val);
8579 x = plus_constant (Pmode, base_reg, low_n);
8580 }
8581 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8582 x = gen_rtx_PLUS (SImode, xop0, xop1);
8583 }
8584
8585 /* XXX We don't allow MINUS any more -- see comment in
8586 arm_legitimate_address_outer_p (). */
8587 else if (GET_CODE (x) == MINUS)
8588 {
8589 rtx xop0 = XEXP (x, 0);
8590 rtx xop1 = XEXP (x, 1);
8591
8592 if (CONSTANT_P (xop0))
8593 xop0 = force_reg (SImode, xop0);
8594
8595 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8596 xop1 = force_reg (SImode, xop1);
8597
8598 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8599 x = gen_rtx_MINUS (SImode, xop0, xop1);
8600 }
8601
8602 /* Make sure to take full advantage of the pre-indexed addressing mode
8603 with absolute addresses which often allows for the base register to
8604 be factorized for multiple adjacent memory references, and it might
8605 even allows for the mini pool to be avoided entirely. */
8606 else if (CONST_INT_P (x) && optimize > 0)
8607 {
8608 unsigned int bits;
8609 HOST_WIDE_INT mask, base, index;
8610 rtx base_reg;
8611
8612 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8613 use a 8-bit index. So let's use a 12-bit index for SImode only and
8614 hope that arm_gen_constant will enable ldrb to use more bits. */
8615 bits = (mode == SImode) ? 12 : 8;
8616 mask = (1 << bits) - 1;
8617 base = INTVAL (x) & ~mask;
8618 index = INTVAL (x) & mask;
8619 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8620 {
8621 /* It'll most probably be more efficient to generate the base
8622 with more bits set and use a negative index instead. */
8623 base |= mask;
8624 index -= mask;
8625 }
8626 base_reg = force_reg (SImode, GEN_INT (base));
8627 x = plus_constant (Pmode, base_reg, index);
8628 }
8629
8630 if (flag_pic)
8631 {
8632 /* We need to find and carefully transform any SYMBOL and LABEL
8633 references; so go back to the original address expression. */
8634 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8635
8636 if (new_x != orig_x)
8637 x = new_x;
8638 }
8639
8640 return x;
8641 }
8642
8643
8644 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8645 to be legitimate. If we find one, return the new, valid address. */
8646 rtx
8647 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8648 {
8649 if (GET_CODE (x) == PLUS
8650 && CONST_INT_P (XEXP (x, 1))
8651 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8652 || INTVAL (XEXP (x, 1)) < 0))
8653 {
8654 rtx xop0 = XEXP (x, 0);
8655 rtx xop1 = XEXP (x, 1);
8656 HOST_WIDE_INT offset = INTVAL (xop1);
8657
8658 /* Try and fold the offset into a biasing of the base register and
8659 then offsetting that. Don't do this when optimizing for space
8660 since it can cause too many CSEs. */
8661 if (optimize_size && offset >= 0
8662 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8663 {
8664 HOST_WIDE_INT delta;
8665
8666 if (offset >= 256)
8667 delta = offset - (256 - GET_MODE_SIZE (mode));
8668 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8669 delta = 31 * GET_MODE_SIZE (mode);
8670 else
8671 delta = offset & (~31 * GET_MODE_SIZE (mode));
8672
8673 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8674 NULL_RTX);
8675 x = plus_constant (Pmode, xop0, delta);
8676 }
8677 else if (offset < 0 && offset > -256)
8678 /* Small negative offsets are best done with a subtract before the
8679 dereference, forcing these into a register normally takes two
8680 instructions. */
8681 x = force_operand (x, NULL_RTX);
8682 else
8683 {
8684 /* For the remaining cases, force the constant into a register. */
8685 xop1 = force_reg (SImode, xop1);
8686 x = gen_rtx_PLUS (SImode, xop0, xop1);
8687 }
8688 }
8689 else if (GET_CODE (x) == PLUS
8690 && s_register_operand (XEXP (x, 1), SImode)
8691 && !s_register_operand (XEXP (x, 0), SImode))
8692 {
8693 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8694
8695 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8696 }
8697
8698 if (flag_pic)
8699 {
8700 /* We need to find and carefully transform any SYMBOL and LABEL
8701 references; so go back to the original address expression. */
8702 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8703
8704 if (new_x != orig_x)
8705 x = new_x;
8706 }
8707
8708 return x;
8709 }
8710
8711 /* Return TRUE if X contains any TLS symbol references. */
8712
8713 bool
8714 arm_tls_referenced_p (rtx x)
8715 {
8716 if (! TARGET_HAVE_TLS)
8717 return false;
8718
8719 subrtx_iterator::array_type array;
8720 FOR_EACH_SUBRTX (iter, array, x, ALL)
8721 {
8722 const_rtx x = *iter;
8723 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8724 {
8725 /* ARM currently does not provide relocations to encode TLS variables
8726 into AArch32 instructions, only data, so there is no way to
8727 currently implement these if a literal pool is disabled. */
8728 if (arm_disable_literal_pool)
8729 sorry ("accessing thread-local storage is not currently supported "
8730 "with -mpure-code or -mslow-flash-data");
8731
8732 return true;
8733 }
8734
8735 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8736 TLS offsets, not real symbol references. */
8737 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8738 iter.skip_subrtxes ();
8739 }
8740 return false;
8741 }
8742
8743 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8744
8745 On the ARM, allow any integer (invalid ones are removed later by insn
8746 patterns), nice doubles and symbol_refs which refer to the function's
8747 constant pool XXX.
8748
8749 When generating pic allow anything. */
8750
8751 static bool
8752 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8753 {
8754 return flag_pic || !label_mentioned_p (x);
8755 }
8756
8757 static bool
8758 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8759 {
8760 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8761 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8762 for ARMv8-M Baseline or later the result is valid. */
8763 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8764 x = XEXP (x, 0);
8765
8766 return (CONST_INT_P (x)
8767 || CONST_DOUBLE_P (x)
8768 || CONSTANT_ADDRESS_P (x)
8769 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8770 || flag_pic);
8771 }
8772
8773 static bool
8774 arm_legitimate_constant_p (machine_mode mode, rtx x)
8775 {
8776 return (!arm_cannot_force_const_mem (mode, x)
8777 && (TARGET_32BIT
8778 ? arm_legitimate_constant_p_1 (mode, x)
8779 : thumb_legitimate_constant_p (mode, x)));
8780 }
8781
8782 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8783
8784 static bool
8785 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8786 {
8787 rtx base, offset;
8788
8789 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8790 {
8791 split_const (x, &base, &offset);
8792 if (GET_CODE (base) == SYMBOL_REF
8793 && !offset_within_block_p (base, INTVAL (offset)))
8794 return true;
8795 }
8796 return arm_tls_referenced_p (x);
8797 }
8798 \f
8799 #define REG_OR_SUBREG_REG(X) \
8800 (REG_P (X) \
8801 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8802
8803 #define REG_OR_SUBREG_RTX(X) \
8804 (REG_P (X) ? (X) : SUBREG_REG (X))
8805
8806 static inline int
8807 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8808 {
8809 machine_mode mode = GET_MODE (x);
8810 int total, words;
8811
8812 switch (code)
8813 {
8814 case ASHIFT:
8815 case ASHIFTRT:
8816 case LSHIFTRT:
8817 case ROTATERT:
8818 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8819
8820 case PLUS:
8821 case MINUS:
8822 case COMPARE:
8823 case NEG:
8824 case NOT:
8825 return COSTS_N_INSNS (1);
8826
8827 case MULT:
8828 if (arm_arch6m && arm_m_profile_small_mul)
8829 return COSTS_N_INSNS (32);
8830
8831 if (CONST_INT_P (XEXP (x, 1)))
8832 {
8833 int cycles = 0;
8834 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8835
8836 while (i)
8837 {
8838 i >>= 2;
8839 cycles++;
8840 }
8841 return COSTS_N_INSNS (2) + cycles;
8842 }
8843 return COSTS_N_INSNS (1) + 16;
8844
8845 case SET:
8846 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8847 the mode. */
8848 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8849 return (COSTS_N_INSNS (words)
8850 + 4 * ((MEM_P (SET_SRC (x)))
8851 + MEM_P (SET_DEST (x))));
8852
8853 case CONST_INT:
8854 if (outer == SET)
8855 {
8856 if (UINTVAL (x) < 256
8857 /* 16-bit constant. */
8858 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8859 return 0;
8860 if (thumb_shiftable_const (INTVAL (x)))
8861 return COSTS_N_INSNS (2);
8862 return COSTS_N_INSNS (3);
8863 }
8864 else if ((outer == PLUS || outer == COMPARE)
8865 && INTVAL (x) < 256 && INTVAL (x) > -256)
8866 return 0;
8867 else if ((outer == IOR || outer == XOR || outer == AND)
8868 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8869 return COSTS_N_INSNS (1);
8870 else if (outer == AND)
8871 {
8872 int i;
8873 /* This duplicates the tests in the andsi3 expander. */
8874 for (i = 9; i <= 31; i++)
8875 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8876 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8877 return COSTS_N_INSNS (2);
8878 }
8879 else if (outer == ASHIFT || outer == ASHIFTRT
8880 || outer == LSHIFTRT)
8881 return 0;
8882 return COSTS_N_INSNS (2);
8883
8884 case CONST:
8885 case CONST_DOUBLE:
8886 case LABEL_REF:
8887 case SYMBOL_REF:
8888 return COSTS_N_INSNS (3);
8889
8890 case UDIV:
8891 case UMOD:
8892 case DIV:
8893 case MOD:
8894 return 100;
8895
8896 case TRUNCATE:
8897 return 99;
8898
8899 case AND:
8900 case XOR:
8901 case IOR:
8902 /* XXX guess. */
8903 return 8;
8904
8905 case MEM:
8906 /* XXX another guess. */
8907 /* Memory costs quite a lot for the first word, but subsequent words
8908 load at the equivalent of a single insn each. */
8909 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8910 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8911 ? 4 : 0));
8912
8913 case IF_THEN_ELSE:
8914 /* XXX a guess. */
8915 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
8916 return 14;
8917 return 2;
8918
8919 case SIGN_EXTEND:
8920 case ZERO_EXTEND:
8921 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
8922 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
8923
8924 if (mode == SImode)
8925 return total;
8926
8927 if (arm_arch6)
8928 return total + COSTS_N_INSNS (1);
8929
8930 /* Assume a two-shift sequence. Increase the cost slightly so
8931 we prefer actual shifts over an extend operation. */
8932 return total + 1 + COSTS_N_INSNS (2);
8933
8934 default:
8935 return 99;
8936 }
8937 }
8938
8939 /* Estimates the size cost of thumb1 instructions.
8940 For now most of the code is copied from thumb1_rtx_costs. We need more
8941 fine grain tuning when we have more related test cases. */
8942 static inline int
8943 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8944 {
8945 machine_mode mode = GET_MODE (x);
8946 int words, cost;
8947
8948 switch (code)
8949 {
8950 case ASHIFT:
8951 case ASHIFTRT:
8952 case LSHIFTRT:
8953 case ROTATERT:
8954 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8955
8956 case PLUS:
8957 case MINUS:
8958 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
8959 defined by RTL expansion, especially for the expansion of
8960 multiplication. */
8961 if ((GET_CODE (XEXP (x, 0)) == MULT
8962 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
8963 || (GET_CODE (XEXP (x, 1)) == MULT
8964 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
8965 return COSTS_N_INSNS (2);
8966 /* Fall through. */
8967 case COMPARE:
8968 case NEG:
8969 case NOT:
8970 return COSTS_N_INSNS (1);
8971
8972 case MULT:
8973 if (CONST_INT_P (XEXP (x, 1)))
8974 {
8975 /* Thumb1 mul instruction can't operate on const. We must Load it
8976 into a register first. */
8977 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
8978 /* For the targets which have a very small and high-latency multiply
8979 unit, we prefer to synthesize the mult with up to 5 instructions,
8980 giving a good balance between size and performance. */
8981 if (arm_arch6m && arm_m_profile_small_mul)
8982 return COSTS_N_INSNS (5);
8983 else
8984 return COSTS_N_INSNS (1) + const_size;
8985 }
8986 return COSTS_N_INSNS (1);
8987
8988 case SET:
8989 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8990 the mode. */
8991 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8992 cost = COSTS_N_INSNS (words);
8993 if (satisfies_constraint_J (SET_SRC (x))
8994 || satisfies_constraint_K (SET_SRC (x))
8995 /* Too big an immediate for a 2-byte mov, using MOVT. */
8996 || (CONST_INT_P (SET_SRC (x))
8997 && UINTVAL (SET_SRC (x)) >= 256
8998 && TARGET_HAVE_MOVT
8999 && satisfies_constraint_j (SET_SRC (x)))
9000 /* thumb1_movdi_insn. */
9001 || ((words > 1) && MEM_P (SET_SRC (x))))
9002 cost += COSTS_N_INSNS (1);
9003 return cost;
9004
9005 case CONST_INT:
9006 if (outer == SET)
9007 {
9008 if (UINTVAL (x) < 256)
9009 return COSTS_N_INSNS (1);
9010 /* movw is 4byte long. */
9011 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9012 return COSTS_N_INSNS (2);
9013 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9014 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9015 return COSTS_N_INSNS (2);
9016 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9017 if (thumb_shiftable_const (INTVAL (x)))
9018 return COSTS_N_INSNS (2);
9019 return COSTS_N_INSNS (3);
9020 }
9021 else if ((outer == PLUS || outer == COMPARE)
9022 && INTVAL (x) < 256 && INTVAL (x) > -256)
9023 return 0;
9024 else if ((outer == IOR || outer == XOR || outer == AND)
9025 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9026 return COSTS_N_INSNS (1);
9027 else if (outer == AND)
9028 {
9029 int i;
9030 /* This duplicates the tests in the andsi3 expander. */
9031 for (i = 9; i <= 31; i++)
9032 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9033 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9034 return COSTS_N_INSNS (2);
9035 }
9036 else if (outer == ASHIFT || outer == ASHIFTRT
9037 || outer == LSHIFTRT)
9038 return 0;
9039 return COSTS_N_INSNS (2);
9040
9041 case CONST:
9042 case CONST_DOUBLE:
9043 case LABEL_REF:
9044 case SYMBOL_REF:
9045 return COSTS_N_INSNS (3);
9046
9047 case UDIV:
9048 case UMOD:
9049 case DIV:
9050 case MOD:
9051 return 100;
9052
9053 case TRUNCATE:
9054 return 99;
9055
9056 case AND:
9057 case XOR:
9058 case IOR:
9059 return COSTS_N_INSNS (1);
9060
9061 case MEM:
9062 return (COSTS_N_INSNS (1)
9063 + COSTS_N_INSNS (1)
9064 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9065 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9066 ? COSTS_N_INSNS (1) : 0));
9067
9068 case IF_THEN_ELSE:
9069 /* XXX a guess. */
9070 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9071 return 14;
9072 return 2;
9073
9074 case ZERO_EXTEND:
9075 /* XXX still guessing. */
9076 switch (GET_MODE (XEXP (x, 0)))
9077 {
9078 case E_QImode:
9079 return (1 + (mode == DImode ? 4 : 0)
9080 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9081
9082 case E_HImode:
9083 return (4 + (mode == DImode ? 4 : 0)
9084 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9085
9086 case E_SImode:
9087 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9088
9089 default:
9090 return 99;
9091 }
9092
9093 default:
9094 return 99;
9095 }
9096 }
9097
9098 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9099 operand, then return the operand that is being shifted. If the shift
9100 is not by a constant, then set SHIFT_REG to point to the operand.
9101 Return NULL if OP is not a shifter operand. */
9102 static rtx
9103 shifter_op_p (rtx op, rtx *shift_reg)
9104 {
9105 enum rtx_code code = GET_CODE (op);
9106
9107 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9108 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9109 return XEXP (op, 0);
9110 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9111 return XEXP (op, 0);
9112 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9113 || code == ASHIFTRT)
9114 {
9115 if (!CONST_INT_P (XEXP (op, 1)))
9116 *shift_reg = XEXP (op, 1);
9117 return XEXP (op, 0);
9118 }
9119
9120 return NULL;
9121 }
9122
9123 static bool
9124 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9125 {
9126 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9127 rtx_code code = GET_CODE (x);
9128 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9129
9130 switch (XINT (x, 1))
9131 {
9132 case UNSPEC_UNALIGNED_LOAD:
9133 /* We can only do unaligned loads into the integer unit, and we can't
9134 use LDM or LDRD. */
9135 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9136 if (speed_p)
9137 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9138 + extra_cost->ldst.load_unaligned);
9139
9140 #ifdef NOT_YET
9141 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9142 ADDR_SPACE_GENERIC, speed_p);
9143 #endif
9144 return true;
9145
9146 case UNSPEC_UNALIGNED_STORE:
9147 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9148 if (speed_p)
9149 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9150 + extra_cost->ldst.store_unaligned);
9151
9152 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9153 #ifdef NOT_YET
9154 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9155 ADDR_SPACE_GENERIC, speed_p);
9156 #endif
9157 return true;
9158
9159 case UNSPEC_VRINTZ:
9160 case UNSPEC_VRINTP:
9161 case UNSPEC_VRINTM:
9162 case UNSPEC_VRINTR:
9163 case UNSPEC_VRINTX:
9164 case UNSPEC_VRINTA:
9165 if (speed_p)
9166 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9167
9168 return true;
9169 default:
9170 *cost = COSTS_N_INSNS (2);
9171 break;
9172 }
9173 return true;
9174 }
9175
9176 /* Cost of a libcall. We assume one insn per argument, an amount for the
9177 call (one insn for -Os) and then one for processing the result. */
9178 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9179
9180 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9181 do \
9182 { \
9183 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9184 if (shift_op != NULL \
9185 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9186 { \
9187 if (shift_reg) \
9188 { \
9189 if (speed_p) \
9190 *cost += extra_cost->alu.arith_shift_reg; \
9191 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9192 ASHIFT, 1, speed_p); \
9193 } \
9194 else if (speed_p) \
9195 *cost += extra_cost->alu.arith_shift; \
9196 \
9197 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9198 ASHIFT, 0, speed_p) \
9199 + rtx_cost (XEXP (x, 1 - IDX), \
9200 GET_MODE (shift_op), \
9201 OP, 1, speed_p)); \
9202 return true; \
9203 } \
9204 } \
9205 while (0);
9206
9207 /* RTX costs. Make an estimate of the cost of executing the operation
9208 X, which is contained with an operation with code OUTER_CODE.
9209 SPEED_P indicates whether the cost desired is the performance cost,
9210 or the size cost. The estimate is stored in COST and the return
9211 value is TRUE if the cost calculation is final, or FALSE if the
9212 caller should recurse through the operands of X to add additional
9213 costs.
9214
9215 We currently make no attempt to model the size savings of Thumb-2
9216 16-bit instructions. At the normal points in compilation where
9217 this code is called we have no measure of whether the condition
9218 flags are live or not, and thus no realistic way to determine what
9219 the size will eventually be. */
9220 static bool
9221 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9222 const struct cpu_cost_table *extra_cost,
9223 int *cost, bool speed_p)
9224 {
9225 machine_mode mode = GET_MODE (x);
9226
9227 *cost = COSTS_N_INSNS (1);
9228
9229 if (TARGET_THUMB1)
9230 {
9231 if (speed_p)
9232 *cost = thumb1_rtx_costs (x, code, outer_code);
9233 else
9234 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9235 return true;
9236 }
9237
9238 switch (code)
9239 {
9240 case SET:
9241 *cost = 0;
9242 /* SET RTXs don't have a mode so we get it from the destination. */
9243 mode = GET_MODE (SET_DEST (x));
9244
9245 if (REG_P (SET_SRC (x))
9246 && REG_P (SET_DEST (x)))
9247 {
9248 /* Assume that most copies can be done with a single insn,
9249 unless we don't have HW FP, in which case everything
9250 larger than word mode will require two insns. */
9251 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9252 && GET_MODE_SIZE (mode) > 4)
9253 || mode == DImode)
9254 ? 2 : 1);
9255 /* Conditional register moves can be encoded
9256 in 16 bits in Thumb mode. */
9257 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9258 *cost >>= 1;
9259
9260 return true;
9261 }
9262
9263 if (CONST_INT_P (SET_SRC (x)))
9264 {
9265 /* Handle CONST_INT here, since the value doesn't have a mode
9266 and we would otherwise be unable to work out the true cost. */
9267 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9268 0, speed_p);
9269 outer_code = SET;
9270 /* Slightly lower the cost of setting a core reg to a constant.
9271 This helps break up chains and allows for better scheduling. */
9272 if (REG_P (SET_DEST (x))
9273 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9274 *cost -= 1;
9275 x = SET_SRC (x);
9276 /* Immediate moves with an immediate in the range [0, 255] can be
9277 encoded in 16 bits in Thumb mode. */
9278 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9279 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9280 *cost >>= 1;
9281 goto const_int_cost;
9282 }
9283
9284 return false;
9285
9286 case MEM:
9287 /* A memory access costs 1 insn if the mode is small, or the address is
9288 a single register, otherwise it costs one insn per word. */
9289 if (REG_P (XEXP (x, 0)))
9290 *cost = COSTS_N_INSNS (1);
9291 else if (flag_pic
9292 && GET_CODE (XEXP (x, 0)) == PLUS
9293 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9294 /* This will be split into two instructions.
9295 See arm.md:calculate_pic_address. */
9296 *cost = COSTS_N_INSNS (2);
9297 else
9298 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
9299
9300 /* For speed optimizations, add the costs of the address and
9301 accessing memory. */
9302 if (speed_p)
9303 #ifdef NOT_YET
9304 *cost += (extra_cost->ldst.load
9305 + arm_address_cost (XEXP (x, 0), mode,
9306 ADDR_SPACE_GENERIC, speed_p));
9307 #else
9308 *cost += extra_cost->ldst.load;
9309 #endif
9310 return true;
9311
9312 case PARALLEL:
9313 {
9314 /* Calculations of LDM costs are complex. We assume an initial cost
9315 (ldm_1st) which will load the number of registers mentioned in
9316 ldm_regs_per_insn_1st registers; then each additional
9317 ldm_regs_per_insn_subsequent registers cost one more insn. The
9318 formula for N regs is thus:
9319
9320 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9321 + ldm_regs_per_insn_subsequent - 1)
9322 / ldm_regs_per_insn_subsequent).
9323
9324 Additional costs may also be added for addressing. A similar
9325 formula is used for STM. */
9326
9327 bool is_ldm = load_multiple_operation (x, SImode);
9328 bool is_stm = store_multiple_operation (x, SImode);
9329
9330 if (is_ldm || is_stm)
9331 {
9332 if (speed_p)
9333 {
9334 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9335 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9336 ? extra_cost->ldst.ldm_regs_per_insn_1st
9337 : extra_cost->ldst.stm_regs_per_insn_1st;
9338 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9339 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9340 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9341
9342 *cost += regs_per_insn_1st
9343 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9344 + regs_per_insn_sub - 1)
9345 / regs_per_insn_sub);
9346 return true;
9347 }
9348
9349 }
9350 return false;
9351 }
9352 case DIV:
9353 case UDIV:
9354 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9355 && (mode == SFmode || !TARGET_VFP_SINGLE))
9356 *cost += COSTS_N_INSNS (speed_p
9357 ? extra_cost->fp[mode != SFmode].div : 0);
9358 else if (mode == SImode && TARGET_IDIV)
9359 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9360 else
9361 *cost = LIBCALL_COST (2);
9362
9363 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9364 possible udiv is prefered. */
9365 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9366 return false; /* All arguments must be in registers. */
9367
9368 case MOD:
9369 /* MOD by a power of 2 can be expanded as:
9370 rsbs r1, r0, #0
9371 and r0, r0, #(n - 1)
9372 and r1, r1, #(n - 1)
9373 rsbpl r0, r1, #0. */
9374 if (CONST_INT_P (XEXP (x, 1))
9375 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9376 && mode == SImode)
9377 {
9378 *cost += COSTS_N_INSNS (3);
9379
9380 if (speed_p)
9381 *cost += 2 * extra_cost->alu.logical
9382 + extra_cost->alu.arith;
9383 return true;
9384 }
9385
9386 /* Fall-through. */
9387 case UMOD:
9388 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9389 possible udiv is prefered. */
9390 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9391 return false; /* All arguments must be in registers. */
9392
9393 case ROTATE:
9394 if (mode == SImode && REG_P (XEXP (x, 1)))
9395 {
9396 *cost += (COSTS_N_INSNS (1)
9397 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9398 if (speed_p)
9399 *cost += extra_cost->alu.shift_reg;
9400 return true;
9401 }
9402 /* Fall through */
9403 case ROTATERT:
9404 case ASHIFT:
9405 case LSHIFTRT:
9406 case ASHIFTRT:
9407 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9408 {
9409 *cost += (COSTS_N_INSNS (2)
9410 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9411 if (speed_p)
9412 *cost += 2 * extra_cost->alu.shift;
9413 return true;
9414 }
9415 else if (mode == SImode)
9416 {
9417 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9418 /* Slightly disparage register shifts at -Os, but not by much. */
9419 if (!CONST_INT_P (XEXP (x, 1)))
9420 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9421 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9422 return true;
9423 }
9424 else if (GET_MODE_CLASS (mode) == MODE_INT
9425 && GET_MODE_SIZE (mode) < 4)
9426 {
9427 if (code == ASHIFT)
9428 {
9429 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9430 /* Slightly disparage register shifts at -Os, but not by
9431 much. */
9432 if (!CONST_INT_P (XEXP (x, 1)))
9433 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9434 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9435 }
9436 else if (code == LSHIFTRT || code == ASHIFTRT)
9437 {
9438 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9439 {
9440 /* Can use SBFX/UBFX. */
9441 if (speed_p)
9442 *cost += extra_cost->alu.bfx;
9443 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9444 }
9445 else
9446 {
9447 *cost += COSTS_N_INSNS (1);
9448 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9449 if (speed_p)
9450 {
9451 if (CONST_INT_P (XEXP (x, 1)))
9452 *cost += 2 * extra_cost->alu.shift;
9453 else
9454 *cost += (extra_cost->alu.shift
9455 + extra_cost->alu.shift_reg);
9456 }
9457 else
9458 /* Slightly disparage register shifts. */
9459 *cost += !CONST_INT_P (XEXP (x, 1));
9460 }
9461 }
9462 else /* Rotates. */
9463 {
9464 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9465 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9466 if (speed_p)
9467 {
9468 if (CONST_INT_P (XEXP (x, 1)))
9469 *cost += (2 * extra_cost->alu.shift
9470 + extra_cost->alu.log_shift);
9471 else
9472 *cost += (extra_cost->alu.shift
9473 + extra_cost->alu.shift_reg
9474 + extra_cost->alu.log_shift_reg);
9475 }
9476 }
9477 return true;
9478 }
9479
9480 *cost = LIBCALL_COST (2);
9481 return false;
9482
9483 case BSWAP:
9484 if (arm_arch6)
9485 {
9486 if (mode == SImode)
9487 {
9488 if (speed_p)
9489 *cost += extra_cost->alu.rev;
9490
9491 return false;
9492 }
9493 }
9494 else
9495 {
9496 /* No rev instruction available. Look at arm_legacy_rev
9497 and thumb_legacy_rev for the form of RTL used then. */
9498 if (TARGET_THUMB)
9499 {
9500 *cost += COSTS_N_INSNS (9);
9501
9502 if (speed_p)
9503 {
9504 *cost += 6 * extra_cost->alu.shift;
9505 *cost += 3 * extra_cost->alu.logical;
9506 }
9507 }
9508 else
9509 {
9510 *cost += COSTS_N_INSNS (4);
9511
9512 if (speed_p)
9513 {
9514 *cost += 2 * extra_cost->alu.shift;
9515 *cost += extra_cost->alu.arith_shift;
9516 *cost += 2 * extra_cost->alu.logical;
9517 }
9518 }
9519 return true;
9520 }
9521 return false;
9522
9523 case MINUS:
9524 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9525 && (mode == SFmode || !TARGET_VFP_SINGLE))
9526 {
9527 if (GET_CODE (XEXP (x, 0)) == MULT
9528 || GET_CODE (XEXP (x, 1)) == MULT)
9529 {
9530 rtx mul_op0, mul_op1, sub_op;
9531
9532 if (speed_p)
9533 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9534
9535 if (GET_CODE (XEXP (x, 0)) == MULT)
9536 {
9537 mul_op0 = XEXP (XEXP (x, 0), 0);
9538 mul_op1 = XEXP (XEXP (x, 0), 1);
9539 sub_op = XEXP (x, 1);
9540 }
9541 else
9542 {
9543 mul_op0 = XEXP (XEXP (x, 1), 0);
9544 mul_op1 = XEXP (XEXP (x, 1), 1);
9545 sub_op = XEXP (x, 0);
9546 }
9547
9548 /* The first operand of the multiply may be optionally
9549 negated. */
9550 if (GET_CODE (mul_op0) == NEG)
9551 mul_op0 = XEXP (mul_op0, 0);
9552
9553 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9554 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9555 + rtx_cost (sub_op, mode, code, 0, speed_p));
9556
9557 return true;
9558 }
9559
9560 if (speed_p)
9561 *cost += extra_cost->fp[mode != SFmode].addsub;
9562 return false;
9563 }
9564
9565 if (mode == SImode)
9566 {
9567 rtx shift_by_reg = NULL;
9568 rtx shift_op;
9569 rtx non_shift_op;
9570
9571 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9572 if (shift_op == NULL)
9573 {
9574 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9575 non_shift_op = XEXP (x, 0);
9576 }
9577 else
9578 non_shift_op = XEXP (x, 1);
9579
9580 if (shift_op != NULL)
9581 {
9582 if (shift_by_reg != NULL)
9583 {
9584 if (speed_p)
9585 *cost += extra_cost->alu.arith_shift_reg;
9586 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9587 }
9588 else if (speed_p)
9589 *cost += extra_cost->alu.arith_shift;
9590
9591 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9592 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9593 return true;
9594 }
9595
9596 if (arm_arch_thumb2
9597 && GET_CODE (XEXP (x, 1)) == MULT)
9598 {
9599 /* MLS. */
9600 if (speed_p)
9601 *cost += extra_cost->mult[0].add;
9602 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9603 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9604 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9605 return true;
9606 }
9607
9608 if (CONST_INT_P (XEXP (x, 0)))
9609 {
9610 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9611 INTVAL (XEXP (x, 0)), NULL_RTX,
9612 NULL_RTX, 1, 0);
9613 *cost = COSTS_N_INSNS (insns);
9614 if (speed_p)
9615 *cost += insns * extra_cost->alu.arith;
9616 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9617 return true;
9618 }
9619 else if (speed_p)
9620 *cost += extra_cost->alu.arith;
9621
9622 return false;
9623 }
9624
9625 if (GET_MODE_CLASS (mode) == MODE_INT
9626 && GET_MODE_SIZE (mode) < 4)
9627 {
9628 rtx shift_op, shift_reg;
9629 shift_reg = NULL;
9630
9631 /* We check both sides of the MINUS for shifter operands since,
9632 unlike PLUS, it's not commutative. */
9633
9634 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0)
9635 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1)
9636
9637 /* Slightly disparage, as we might need to widen the result. */
9638 *cost += 1;
9639 if (speed_p)
9640 *cost += extra_cost->alu.arith;
9641
9642 if (CONST_INT_P (XEXP (x, 0)))
9643 {
9644 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9645 return true;
9646 }
9647
9648 return false;
9649 }
9650
9651 if (mode == DImode)
9652 {
9653 *cost += COSTS_N_INSNS (1);
9654
9655 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9656 {
9657 rtx op1 = XEXP (x, 1);
9658
9659 if (speed_p)
9660 *cost += 2 * extra_cost->alu.arith;
9661
9662 if (GET_CODE (op1) == ZERO_EXTEND)
9663 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9664 0, speed_p);
9665 else
9666 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9667 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9668 0, speed_p);
9669 return true;
9670 }
9671 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9672 {
9673 if (speed_p)
9674 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9675 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9676 0, speed_p)
9677 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9678 return true;
9679 }
9680 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9681 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9682 {
9683 if (speed_p)
9684 *cost += (extra_cost->alu.arith
9685 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9686 ? extra_cost->alu.arith
9687 : extra_cost->alu.arith_shift));
9688 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9689 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9690 GET_CODE (XEXP (x, 1)), 0, speed_p));
9691 return true;
9692 }
9693
9694 if (speed_p)
9695 *cost += 2 * extra_cost->alu.arith;
9696 return false;
9697 }
9698
9699 /* Vector mode? */
9700
9701 *cost = LIBCALL_COST (2);
9702 return false;
9703
9704 case PLUS:
9705 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9706 && (mode == SFmode || !TARGET_VFP_SINGLE))
9707 {
9708 if (GET_CODE (XEXP (x, 0)) == MULT)
9709 {
9710 rtx mul_op0, mul_op1, add_op;
9711
9712 if (speed_p)
9713 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9714
9715 mul_op0 = XEXP (XEXP (x, 0), 0);
9716 mul_op1 = XEXP (XEXP (x, 0), 1);
9717 add_op = XEXP (x, 1);
9718
9719 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9720 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9721 + rtx_cost (add_op, mode, code, 0, speed_p));
9722
9723 return true;
9724 }
9725
9726 if (speed_p)
9727 *cost += extra_cost->fp[mode != SFmode].addsub;
9728 return false;
9729 }
9730 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9731 {
9732 *cost = LIBCALL_COST (2);
9733 return false;
9734 }
9735
9736 /* Narrow modes can be synthesized in SImode, but the range
9737 of useful sub-operations is limited. Check for shift operations
9738 on one of the operands. Only left shifts can be used in the
9739 narrow modes. */
9740 if (GET_MODE_CLASS (mode) == MODE_INT
9741 && GET_MODE_SIZE (mode) < 4)
9742 {
9743 rtx shift_op, shift_reg;
9744 shift_reg = NULL;
9745
9746 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0)
9747
9748 if (CONST_INT_P (XEXP (x, 1)))
9749 {
9750 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9751 INTVAL (XEXP (x, 1)), NULL_RTX,
9752 NULL_RTX, 1, 0);
9753 *cost = COSTS_N_INSNS (insns);
9754 if (speed_p)
9755 *cost += insns * extra_cost->alu.arith;
9756 /* Slightly penalize a narrow operation as the result may
9757 need widening. */
9758 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9759 return true;
9760 }
9761
9762 /* Slightly penalize a narrow operation as the result may
9763 need widening. */
9764 *cost += 1;
9765 if (speed_p)
9766 *cost += extra_cost->alu.arith;
9767
9768 return false;
9769 }
9770
9771 if (mode == SImode)
9772 {
9773 rtx shift_op, shift_reg;
9774
9775 if (TARGET_INT_SIMD
9776 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9777 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9778 {
9779 /* UXTA[BH] or SXTA[BH]. */
9780 if (speed_p)
9781 *cost += extra_cost->alu.extend_arith;
9782 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9783 0, speed_p)
9784 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9785 return true;
9786 }
9787
9788 shift_reg = NULL;
9789 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9790 if (shift_op != NULL)
9791 {
9792 if (shift_reg)
9793 {
9794 if (speed_p)
9795 *cost += extra_cost->alu.arith_shift_reg;
9796 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9797 }
9798 else if (speed_p)
9799 *cost += extra_cost->alu.arith_shift;
9800
9801 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9802 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9803 return true;
9804 }
9805 if (GET_CODE (XEXP (x, 0)) == MULT)
9806 {
9807 rtx mul_op = XEXP (x, 0);
9808
9809 if (TARGET_DSP_MULTIPLY
9810 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9811 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9812 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9813 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9814 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9815 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9816 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9817 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9818 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9819 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9820 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9821 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9822 == 16))))))
9823 {
9824 /* SMLA[BT][BT]. */
9825 if (speed_p)
9826 *cost += extra_cost->mult[0].extend_add;
9827 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9828 SIGN_EXTEND, 0, speed_p)
9829 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9830 SIGN_EXTEND, 0, speed_p)
9831 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9832 return true;
9833 }
9834
9835 if (speed_p)
9836 *cost += extra_cost->mult[0].add;
9837 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9838 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9839 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9840 return true;
9841 }
9842 if (CONST_INT_P (XEXP (x, 1)))
9843 {
9844 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9845 INTVAL (XEXP (x, 1)), NULL_RTX,
9846 NULL_RTX, 1, 0);
9847 *cost = COSTS_N_INSNS (insns);
9848 if (speed_p)
9849 *cost += insns * extra_cost->alu.arith;
9850 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9851 return true;
9852 }
9853 else if (speed_p)
9854 *cost += extra_cost->alu.arith;
9855
9856 return false;
9857 }
9858
9859 if (mode == DImode)
9860 {
9861 if (arm_arch3m
9862 && GET_CODE (XEXP (x, 0)) == MULT
9863 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
9864 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
9865 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
9866 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
9867 {
9868 if (speed_p)
9869 *cost += extra_cost->mult[1].extend_add;
9870 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
9871 ZERO_EXTEND, 0, speed_p)
9872 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
9873 ZERO_EXTEND, 0, speed_p)
9874 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9875 return true;
9876 }
9877
9878 *cost += COSTS_N_INSNS (1);
9879
9880 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9881 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9882 {
9883 if (speed_p)
9884 *cost += (extra_cost->alu.arith
9885 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9886 ? extra_cost->alu.arith
9887 : extra_cost->alu.arith_shift));
9888
9889 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9890 0, speed_p)
9891 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9892 return true;
9893 }
9894
9895 if (speed_p)
9896 *cost += 2 * extra_cost->alu.arith;
9897 return false;
9898 }
9899
9900 /* Vector mode? */
9901 *cost = LIBCALL_COST (2);
9902 return false;
9903 case IOR:
9904 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
9905 {
9906 if (speed_p)
9907 *cost += extra_cost->alu.rev;
9908
9909 return true;
9910 }
9911 /* Fall through. */
9912 case AND: case XOR:
9913 if (mode == SImode)
9914 {
9915 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
9916 rtx op0 = XEXP (x, 0);
9917 rtx shift_op, shift_reg;
9918
9919 if (subcode == NOT
9920 && (code == AND
9921 || (code == IOR && TARGET_THUMB2)))
9922 op0 = XEXP (op0, 0);
9923
9924 shift_reg = NULL;
9925 shift_op = shifter_op_p (op0, &shift_reg);
9926 if (shift_op != NULL)
9927 {
9928 if (shift_reg)
9929 {
9930 if (speed_p)
9931 *cost += extra_cost->alu.log_shift_reg;
9932 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9933 }
9934 else if (speed_p)
9935 *cost += extra_cost->alu.log_shift;
9936
9937 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9938 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9939 return true;
9940 }
9941
9942 if (CONST_INT_P (XEXP (x, 1)))
9943 {
9944 int insns = arm_gen_constant (code, SImode, NULL_RTX,
9945 INTVAL (XEXP (x, 1)), NULL_RTX,
9946 NULL_RTX, 1, 0);
9947
9948 *cost = COSTS_N_INSNS (insns);
9949 if (speed_p)
9950 *cost += insns * extra_cost->alu.logical;
9951 *cost += rtx_cost (op0, mode, code, 0, speed_p);
9952 return true;
9953 }
9954
9955 if (speed_p)
9956 *cost += extra_cost->alu.logical;
9957 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
9958 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9959 return true;
9960 }
9961
9962 if (mode == DImode)
9963 {
9964 rtx op0 = XEXP (x, 0);
9965 enum rtx_code subcode = GET_CODE (op0);
9966
9967 *cost += COSTS_N_INSNS (1);
9968
9969 if (subcode == NOT
9970 && (code == AND
9971 || (code == IOR && TARGET_THUMB2)))
9972 op0 = XEXP (op0, 0);
9973
9974 if (GET_CODE (op0) == ZERO_EXTEND)
9975 {
9976 if (speed_p)
9977 *cost += 2 * extra_cost->alu.logical;
9978
9979 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
9980 0, speed_p)
9981 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9982 return true;
9983 }
9984 else if (GET_CODE (op0) == SIGN_EXTEND)
9985 {
9986 if (speed_p)
9987 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
9988
9989 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
9990 0, speed_p)
9991 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
9992 return true;
9993 }
9994
9995 if (speed_p)
9996 *cost += 2 * extra_cost->alu.logical;
9997
9998 return true;
9999 }
10000 /* Vector mode? */
10001
10002 *cost = LIBCALL_COST (2);
10003 return false;
10004
10005 case MULT:
10006 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10007 && (mode == SFmode || !TARGET_VFP_SINGLE))
10008 {
10009 rtx op0 = XEXP (x, 0);
10010
10011 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10012 op0 = XEXP (op0, 0);
10013
10014 if (speed_p)
10015 *cost += extra_cost->fp[mode != SFmode].mult;
10016
10017 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10018 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10019 return true;
10020 }
10021 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10022 {
10023 *cost = LIBCALL_COST (2);
10024 return false;
10025 }
10026
10027 if (mode == SImode)
10028 {
10029 if (TARGET_DSP_MULTIPLY
10030 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10031 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10032 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10033 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10034 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10035 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10036 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10037 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10038 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10039 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10040 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10041 && (INTVAL (XEXP (XEXP (x, 1), 1))
10042 == 16))))))
10043 {
10044 /* SMUL[TB][TB]. */
10045 if (speed_p)
10046 *cost += extra_cost->mult[0].extend;
10047 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10048 SIGN_EXTEND, 0, speed_p);
10049 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10050 SIGN_EXTEND, 1, speed_p);
10051 return true;
10052 }
10053 if (speed_p)
10054 *cost += extra_cost->mult[0].simple;
10055 return false;
10056 }
10057
10058 if (mode == DImode)
10059 {
10060 if (arm_arch3m
10061 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10062 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10063 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10064 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10065 {
10066 if (speed_p)
10067 *cost += extra_cost->mult[1].extend;
10068 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10069 ZERO_EXTEND, 0, speed_p)
10070 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10071 ZERO_EXTEND, 0, speed_p));
10072 return true;
10073 }
10074
10075 *cost = LIBCALL_COST (2);
10076 return false;
10077 }
10078
10079 /* Vector mode? */
10080 *cost = LIBCALL_COST (2);
10081 return false;
10082
10083 case NEG:
10084 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10085 && (mode == SFmode || !TARGET_VFP_SINGLE))
10086 {
10087 if (GET_CODE (XEXP (x, 0)) == MULT)
10088 {
10089 /* VNMUL. */
10090 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10091 return true;
10092 }
10093
10094 if (speed_p)
10095 *cost += extra_cost->fp[mode != SFmode].neg;
10096
10097 return false;
10098 }
10099 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10100 {
10101 *cost = LIBCALL_COST (1);
10102 return false;
10103 }
10104
10105 if (mode == SImode)
10106 {
10107 if (GET_CODE (XEXP (x, 0)) == ABS)
10108 {
10109 *cost += COSTS_N_INSNS (1);
10110 /* Assume the non-flag-changing variant. */
10111 if (speed_p)
10112 *cost += (extra_cost->alu.log_shift
10113 + extra_cost->alu.arith_shift);
10114 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10115 return true;
10116 }
10117
10118 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10119 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10120 {
10121 *cost += COSTS_N_INSNS (1);
10122 /* No extra cost for MOV imm and MVN imm. */
10123 /* If the comparison op is using the flags, there's no further
10124 cost, otherwise we need to add the cost of the comparison. */
10125 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10126 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10127 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10128 {
10129 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10130 *cost += (COSTS_N_INSNS (1)
10131 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10132 0, speed_p)
10133 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10134 1, speed_p));
10135 if (speed_p)
10136 *cost += extra_cost->alu.arith;
10137 }
10138 return true;
10139 }
10140
10141 if (speed_p)
10142 *cost += extra_cost->alu.arith;
10143 return false;
10144 }
10145
10146 if (GET_MODE_CLASS (mode) == MODE_INT
10147 && GET_MODE_SIZE (mode) < 4)
10148 {
10149 /* Slightly disparage, as we might need an extend operation. */
10150 *cost += 1;
10151 if (speed_p)
10152 *cost += extra_cost->alu.arith;
10153 return false;
10154 }
10155
10156 if (mode == DImode)
10157 {
10158 *cost += COSTS_N_INSNS (1);
10159 if (speed_p)
10160 *cost += 2 * extra_cost->alu.arith;
10161 return false;
10162 }
10163
10164 /* Vector mode? */
10165 *cost = LIBCALL_COST (1);
10166 return false;
10167
10168 case NOT:
10169 if (mode == SImode)
10170 {
10171 rtx shift_op;
10172 rtx shift_reg = NULL;
10173
10174 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10175
10176 if (shift_op)
10177 {
10178 if (shift_reg != NULL)
10179 {
10180 if (speed_p)
10181 *cost += extra_cost->alu.log_shift_reg;
10182 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10183 }
10184 else if (speed_p)
10185 *cost += extra_cost->alu.log_shift;
10186 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10187 return true;
10188 }
10189
10190 if (speed_p)
10191 *cost += extra_cost->alu.logical;
10192 return false;
10193 }
10194 if (mode == DImode)
10195 {
10196 *cost += COSTS_N_INSNS (1);
10197 return false;
10198 }
10199
10200 /* Vector mode? */
10201
10202 *cost += LIBCALL_COST (1);
10203 return false;
10204
10205 case IF_THEN_ELSE:
10206 {
10207 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10208 {
10209 *cost += COSTS_N_INSNS (3);
10210 return true;
10211 }
10212 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10213 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10214
10215 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10216 /* Assume that if one arm of the if_then_else is a register,
10217 that it will be tied with the result and eliminate the
10218 conditional insn. */
10219 if (REG_P (XEXP (x, 1)))
10220 *cost += op2cost;
10221 else if (REG_P (XEXP (x, 2)))
10222 *cost += op1cost;
10223 else
10224 {
10225 if (speed_p)
10226 {
10227 if (extra_cost->alu.non_exec_costs_exec)
10228 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10229 else
10230 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10231 }
10232 else
10233 *cost += op1cost + op2cost;
10234 }
10235 }
10236 return true;
10237
10238 case COMPARE:
10239 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10240 *cost = 0;
10241 else
10242 {
10243 machine_mode op0mode;
10244 /* We'll mostly assume that the cost of a compare is the cost of the
10245 LHS. However, there are some notable exceptions. */
10246
10247 /* Floating point compares are never done as side-effects. */
10248 op0mode = GET_MODE (XEXP (x, 0));
10249 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10250 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10251 {
10252 if (speed_p)
10253 *cost += extra_cost->fp[op0mode != SFmode].compare;
10254
10255 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10256 {
10257 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10258 return true;
10259 }
10260
10261 return false;
10262 }
10263 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10264 {
10265 *cost = LIBCALL_COST (2);
10266 return false;
10267 }
10268
10269 /* DImode compares normally take two insns. */
10270 if (op0mode == DImode)
10271 {
10272 *cost += COSTS_N_INSNS (1);
10273 if (speed_p)
10274 *cost += 2 * extra_cost->alu.arith;
10275 return false;
10276 }
10277
10278 if (op0mode == SImode)
10279 {
10280 rtx shift_op;
10281 rtx shift_reg;
10282
10283 if (XEXP (x, 1) == const0_rtx
10284 && !(REG_P (XEXP (x, 0))
10285 || (GET_CODE (XEXP (x, 0)) == SUBREG
10286 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10287 {
10288 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10289
10290 /* Multiply operations that set the flags are often
10291 significantly more expensive. */
10292 if (speed_p
10293 && GET_CODE (XEXP (x, 0)) == MULT
10294 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10295 *cost += extra_cost->mult[0].flag_setting;
10296
10297 if (speed_p
10298 && GET_CODE (XEXP (x, 0)) == PLUS
10299 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10300 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10301 0), 1), mode))
10302 *cost += extra_cost->mult[0].flag_setting;
10303 return true;
10304 }
10305
10306 shift_reg = NULL;
10307 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10308 if (shift_op != NULL)
10309 {
10310 if (shift_reg != NULL)
10311 {
10312 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10313 1, speed_p);
10314 if (speed_p)
10315 *cost += extra_cost->alu.arith_shift_reg;
10316 }
10317 else if (speed_p)
10318 *cost += extra_cost->alu.arith_shift;
10319 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10320 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10321 return true;
10322 }
10323
10324 if (speed_p)
10325 *cost += extra_cost->alu.arith;
10326 if (CONST_INT_P (XEXP (x, 1))
10327 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10328 {
10329 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10330 return true;
10331 }
10332 return false;
10333 }
10334
10335 /* Vector mode? */
10336
10337 *cost = LIBCALL_COST (2);
10338 return false;
10339 }
10340 return true;
10341
10342 case EQ:
10343 case NE:
10344 case LT:
10345 case LE:
10346 case GT:
10347 case GE:
10348 case LTU:
10349 case LEU:
10350 case GEU:
10351 case GTU:
10352 case ORDERED:
10353 case UNORDERED:
10354 case UNEQ:
10355 case UNLE:
10356 case UNLT:
10357 case UNGE:
10358 case UNGT:
10359 case LTGT:
10360 if (outer_code == SET)
10361 {
10362 /* Is it a store-flag operation? */
10363 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10364 && XEXP (x, 1) == const0_rtx)
10365 {
10366 /* Thumb also needs an IT insn. */
10367 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10368 return true;
10369 }
10370 if (XEXP (x, 1) == const0_rtx)
10371 {
10372 switch (code)
10373 {
10374 case LT:
10375 /* LSR Rd, Rn, #31. */
10376 if (speed_p)
10377 *cost += extra_cost->alu.shift;
10378 break;
10379
10380 case EQ:
10381 /* RSBS T1, Rn, #0
10382 ADC Rd, Rn, T1. */
10383
10384 case NE:
10385 /* SUBS T1, Rn, #1
10386 SBC Rd, Rn, T1. */
10387 *cost += COSTS_N_INSNS (1);
10388 break;
10389
10390 case LE:
10391 /* RSBS T1, Rn, Rn, LSR #31
10392 ADC Rd, Rn, T1. */
10393 *cost += COSTS_N_INSNS (1);
10394 if (speed_p)
10395 *cost += extra_cost->alu.arith_shift;
10396 break;
10397
10398 case GT:
10399 /* RSB Rd, Rn, Rn, ASR #1
10400 LSR Rd, Rd, #31. */
10401 *cost += COSTS_N_INSNS (1);
10402 if (speed_p)
10403 *cost += (extra_cost->alu.arith_shift
10404 + extra_cost->alu.shift);
10405 break;
10406
10407 case GE:
10408 /* ASR Rd, Rn, #31
10409 ADD Rd, Rn, #1. */
10410 *cost += COSTS_N_INSNS (1);
10411 if (speed_p)
10412 *cost += extra_cost->alu.shift;
10413 break;
10414
10415 default:
10416 /* Remaining cases are either meaningless or would take
10417 three insns anyway. */
10418 *cost = COSTS_N_INSNS (3);
10419 break;
10420 }
10421 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10422 return true;
10423 }
10424 else
10425 {
10426 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10427 if (CONST_INT_P (XEXP (x, 1))
10428 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10429 {
10430 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10431 return true;
10432 }
10433
10434 return false;
10435 }
10436 }
10437 /* Not directly inside a set. If it involves the condition code
10438 register it must be the condition for a branch, cond_exec or
10439 I_T_E operation. Since the comparison is performed elsewhere
10440 this is just the control part which has no additional
10441 cost. */
10442 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10443 && XEXP (x, 1) == const0_rtx)
10444 {
10445 *cost = 0;
10446 return true;
10447 }
10448 return false;
10449
10450 case ABS:
10451 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10452 && (mode == SFmode || !TARGET_VFP_SINGLE))
10453 {
10454 if (speed_p)
10455 *cost += extra_cost->fp[mode != SFmode].neg;
10456
10457 return false;
10458 }
10459 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10460 {
10461 *cost = LIBCALL_COST (1);
10462 return false;
10463 }
10464
10465 if (mode == SImode)
10466 {
10467 if (speed_p)
10468 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10469 return false;
10470 }
10471 /* Vector mode? */
10472 *cost = LIBCALL_COST (1);
10473 return false;
10474
10475 case SIGN_EXTEND:
10476 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10477 && MEM_P (XEXP (x, 0)))
10478 {
10479 if (mode == DImode)
10480 *cost += COSTS_N_INSNS (1);
10481
10482 if (!speed_p)
10483 return true;
10484
10485 if (GET_MODE (XEXP (x, 0)) == SImode)
10486 *cost += extra_cost->ldst.load;
10487 else
10488 *cost += extra_cost->ldst.load_sign_extend;
10489
10490 if (mode == DImode)
10491 *cost += extra_cost->alu.shift;
10492
10493 return true;
10494 }
10495
10496 /* Widening from less than 32-bits requires an extend operation. */
10497 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10498 {
10499 /* We have SXTB/SXTH. */
10500 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10501 if (speed_p)
10502 *cost += extra_cost->alu.extend;
10503 }
10504 else if (GET_MODE (XEXP (x, 0)) != SImode)
10505 {
10506 /* Needs two shifts. */
10507 *cost += COSTS_N_INSNS (1);
10508 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10509 if (speed_p)
10510 *cost += 2 * extra_cost->alu.shift;
10511 }
10512
10513 /* Widening beyond 32-bits requires one more insn. */
10514 if (mode == DImode)
10515 {
10516 *cost += COSTS_N_INSNS (1);
10517 if (speed_p)
10518 *cost += extra_cost->alu.shift;
10519 }
10520
10521 return true;
10522
10523 case ZERO_EXTEND:
10524 if ((arm_arch4
10525 || GET_MODE (XEXP (x, 0)) == SImode
10526 || GET_MODE (XEXP (x, 0)) == QImode)
10527 && MEM_P (XEXP (x, 0)))
10528 {
10529 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10530
10531 if (mode == DImode)
10532 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10533
10534 return true;
10535 }
10536
10537 /* Widening from less than 32-bits requires an extend operation. */
10538 if (GET_MODE (XEXP (x, 0)) == QImode)
10539 {
10540 /* UXTB can be a shorter instruction in Thumb2, but it might
10541 be slower than the AND Rd, Rn, #255 alternative. When
10542 optimizing for speed it should never be slower to use
10543 AND, and we don't really model 16-bit vs 32-bit insns
10544 here. */
10545 if (speed_p)
10546 *cost += extra_cost->alu.logical;
10547 }
10548 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10549 {
10550 /* We have UXTB/UXTH. */
10551 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10552 if (speed_p)
10553 *cost += extra_cost->alu.extend;
10554 }
10555 else if (GET_MODE (XEXP (x, 0)) != SImode)
10556 {
10557 /* Needs two shifts. It's marginally preferable to use
10558 shifts rather than two BIC instructions as the second
10559 shift may merge with a subsequent insn as a shifter
10560 op. */
10561 *cost = COSTS_N_INSNS (2);
10562 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10563 if (speed_p)
10564 *cost += 2 * extra_cost->alu.shift;
10565 }
10566
10567 /* Widening beyond 32-bits requires one more insn. */
10568 if (mode == DImode)
10569 {
10570 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10571 }
10572
10573 return true;
10574
10575 case CONST_INT:
10576 *cost = 0;
10577 /* CONST_INT has no mode, so we cannot tell for sure how many
10578 insns are really going to be needed. The best we can do is
10579 look at the value passed. If it fits in SImode, then assume
10580 that's the mode it will be used for. Otherwise assume it
10581 will be used in DImode. */
10582 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10583 mode = SImode;
10584 else
10585 mode = DImode;
10586
10587 /* Avoid blowing up in arm_gen_constant (). */
10588 if (!(outer_code == PLUS
10589 || outer_code == AND
10590 || outer_code == IOR
10591 || outer_code == XOR
10592 || outer_code == MINUS))
10593 outer_code = SET;
10594
10595 const_int_cost:
10596 if (mode == SImode)
10597 {
10598 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10599 INTVAL (x), NULL, NULL,
10600 0, 0));
10601 /* Extra costs? */
10602 }
10603 else
10604 {
10605 *cost += COSTS_N_INSNS (arm_gen_constant
10606 (outer_code, SImode, NULL,
10607 trunc_int_for_mode (INTVAL (x), SImode),
10608 NULL, NULL, 0, 0)
10609 + arm_gen_constant (outer_code, SImode, NULL,
10610 INTVAL (x) >> 32, NULL,
10611 NULL, 0, 0));
10612 /* Extra costs? */
10613 }
10614
10615 return true;
10616
10617 case CONST:
10618 case LABEL_REF:
10619 case SYMBOL_REF:
10620 if (speed_p)
10621 {
10622 if (arm_arch_thumb2 && !flag_pic)
10623 *cost += COSTS_N_INSNS (1);
10624 else
10625 *cost += extra_cost->ldst.load;
10626 }
10627 else
10628 *cost += COSTS_N_INSNS (1);
10629
10630 if (flag_pic)
10631 {
10632 *cost += COSTS_N_INSNS (1);
10633 if (speed_p)
10634 *cost += extra_cost->alu.arith;
10635 }
10636
10637 return true;
10638
10639 case CONST_FIXED:
10640 *cost = COSTS_N_INSNS (4);
10641 /* Fixme. */
10642 return true;
10643
10644 case CONST_DOUBLE:
10645 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10646 && (mode == SFmode || !TARGET_VFP_SINGLE))
10647 {
10648 if (vfp3_const_double_rtx (x))
10649 {
10650 if (speed_p)
10651 *cost += extra_cost->fp[mode == DFmode].fpconst;
10652 return true;
10653 }
10654
10655 if (speed_p)
10656 {
10657 if (mode == DFmode)
10658 *cost += extra_cost->ldst.loadd;
10659 else
10660 *cost += extra_cost->ldst.loadf;
10661 }
10662 else
10663 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10664
10665 return true;
10666 }
10667 *cost = COSTS_N_INSNS (4);
10668 return true;
10669
10670 case CONST_VECTOR:
10671 /* Fixme. */
10672 if (TARGET_NEON
10673 && TARGET_HARD_FLOAT
10674 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10675 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10676 *cost = COSTS_N_INSNS (1);
10677 else
10678 *cost = COSTS_N_INSNS (4);
10679 return true;
10680
10681 case HIGH:
10682 case LO_SUM:
10683 /* When optimizing for size, we prefer constant pool entries to
10684 MOVW/MOVT pairs, so bump the cost of these slightly. */
10685 if (!speed_p)
10686 *cost += 1;
10687 return true;
10688
10689 case CLZ:
10690 if (speed_p)
10691 *cost += extra_cost->alu.clz;
10692 return false;
10693
10694 case SMIN:
10695 if (XEXP (x, 1) == const0_rtx)
10696 {
10697 if (speed_p)
10698 *cost += extra_cost->alu.log_shift;
10699 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10700 return true;
10701 }
10702 /* Fall through. */
10703 case SMAX:
10704 case UMIN:
10705 case UMAX:
10706 *cost += COSTS_N_INSNS (1);
10707 return false;
10708
10709 case TRUNCATE:
10710 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10711 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10712 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10713 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10714 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10715 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10716 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10717 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10718 == ZERO_EXTEND))))
10719 {
10720 if (speed_p)
10721 *cost += extra_cost->mult[1].extend;
10722 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10723 ZERO_EXTEND, 0, speed_p)
10724 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10725 ZERO_EXTEND, 0, speed_p));
10726 return true;
10727 }
10728 *cost = LIBCALL_COST (1);
10729 return false;
10730
10731 case UNSPEC_VOLATILE:
10732 case UNSPEC:
10733 return arm_unspec_cost (x, outer_code, speed_p, cost);
10734
10735 case PC:
10736 /* Reading the PC is like reading any other register. Writing it
10737 is more expensive, but we take that into account elsewhere. */
10738 *cost = 0;
10739 return true;
10740
10741 case ZERO_EXTRACT:
10742 /* TODO: Simple zero_extract of bottom bits using AND. */
10743 /* Fall through. */
10744 case SIGN_EXTRACT:
10745 if (arm_arch6
10746 && mode == SImode
10747 && CONST_INT_P (XEXP (x, 1))
10748 && CONST_INT_P (XEXP (x, 2)))
10749 {
10750 if (speed_p)
10751 *cost += extra_cost->alu.bfx;
10752 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10753 return true;
10754 }
10755 /* Without UBFX/SBFX, need to resort to shift operations. */
10756 *cost += COSTS_N_INSNS (1);
10757 if (speed_p)
10758 *cost += 2 * extra_cost->alu.shift;
10759 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10760 return true;
10761
10762 case FLOAT_EXTEND:
10763 if (TARGET_HARD_FLOAT)
10764 {
10765 if (speed_p)
10766 *cost += extra_cost->fp[mode == DFmode].widen;
10767 if (!TARGET_VFP5
10768 && GET_MODE (XEXP (x, 0)) == HFmode)
10769 {
10770 /* Pre v8, widening HF->DF is a two-step process, first
10771 widening to SFmode. */
10772 *cost += COSTS_N_INSNS (1);
10773 if (speed_p)
10774 *cost += extra_cost->fp[0].widen;
10775 }
10776 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10777 return true;
10778 }
10779
10780 *cost = LIBCALL_COST (1);
10781 return false;
10782
10783 case FLOAT_TRUNCATE:
10784 if (TARGET_HARD_FLOAT)
10785 {
10786 if (speed_p)
10787 *cost += extra_cost->fp[mode == DFmode].narrow;
10788 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10789 return true;
10790 /* Vector modes? */
10791 }
10792 *cost = LIBCALL_COST (1);
10793 return false;
10794
10795 case FMA:
10796 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10797 {
10798 rtx op0 = XEXP (x, 0);
10799 rtx op1 = XEXP (x, 1);
10800 rtx op2 = XEXP (x, 2);
10801
10802
10803 /* vfms or vfnma. */
10804 if (GET_CODE (op0) == NEG)
10805 op0 = XEXP (op0, 0);
10806
10807 /* vfnms or vfnma. */
10808 if (GET_CODE (op2) == NEG)
10809 op2 = XEXP (op2, 0);
10810
10811 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10812 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10813 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10814
10815 if (speed_p)
10816 *cost += extra_cost->fp[mode ==DFmode].fma;
10817
10818 return true;
10819 }
10820
10821 *cost = LIBCALL_COST (3);
10822 return false;
10823
10824 case FIX:
10825 case UNSIGNED_FIX:
10826 if (TARGET_HARD_FLOAT)
10827 {
10828 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10829 a vcvt fixed-point conversion. */
10830 if (code == FIX && mode == SImode
10831 && GET_CODE (XEXP (x, 0)) == FIX
10832 && GET_MODE (XEXP (x, 0)) == SFmode
10833 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10834 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10835 > 0)
10836 {
10837 if (speed_p)
10838 *cost += extra_cost->fp[0].toint;
10839
10840 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10841 code, 0, speed_p);
10842 return true;
10843 }
10844
10845 if (GET_MODE_CLASS (mode) == MODE_INT)
10846 {
10847 mode = GET_MODE (XEXP (x, 0));
10848 if (speed_p)
10849 *cost += extra_cost->fp[mode == DFmode].toint;
10850 /* Strip of the 'cost' of rounding towards zero. */
10851 if (GET_CODE (XEXP (x, 0)) == FIX)
10852 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10853 0, speed_p);
10854 else
10855 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10856 /* ??? Increase the cost to deal with transferring from
10857 FP -> CORE registers? */
10858 return true;
10859 }
10860 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
10861 && TARGET_VFP5)
10862 {
10863 if (speed_p)
10864 *cost += extra_cost->fp[mode == DFmode].roundint;
10865 return false;
10866 }
10867 /* Vector costs? */
10868 }
10869 *cost = LIBCALL_COST (1);
10870 return false;
10871
10872 case FLOAT:
10873 case UNSIGNED_FLOAT:
10874 if (TARGET_HARD_FLOAT)
10875 {
10876 /* ??? Increase the cost to deal with transferring from CORE
10877 -> FP registers? */
10878 if (speed_p)
10879 *cost += extra_cost->fp[mode == DFmode].fromint;
10880 return false;
10881 }
10882 *cost = LIBCALL_COST (1);
10883 return false;
10884
10885 case CALL:
10886 return true;
10887
10888 case ASM_OPERANDS:
10889 {
10890 /* Just a guess. Guess number of instructions in the asm
10891 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
10892 though (see PR60663). */
10893 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
10894 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
10895
10896 *cost = COSTS_N_INSNS (asm_length + num_operands);
10897 return true;
10898 }
10899 default:
10900 if (mode != VOIDmode)
10901 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
10902 else
10903 *cost = COSTS_N_INSNS (4); /* Who knows? */
10904 return false;
10905 }
10906 }
10907
10908 #undef HANDLE_NARROW_SHIFT_ARITH
10909
10910 /* RTX costs entry point. */
10911
10912 static bool
10913 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
10914 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
10915 {
10916 bool result;
10917 int code = GET_CODE (x);
10918 gcc_assert (current_tune->insn_extra_cost);
10919
10920 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
10921 (enum rtx_code) outer_code,
10922 current_tune->insn_extra_cost,
10923 total, speed);
10924
10925 if (dump_file && (dump_flags & TDF_DETAILS))
10926 {
10927 print_rtl_single (dump_file, x);
10928 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
10929 *total, result ? "final" : "partial");
10930 }
10931 return result;
10932 }
10933
10934 /* All address computations that can be done are free, but rtx cost returns
10935 the same for practically all of them. So we weight the different types
10936 of address here in the order (most pref first):
10937 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
10938 static inline int
10939 arm_arm_address_cost (rtx x)
10940 {
10941 enum rtx_code c = GET_CODE (x);
10942
10943 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
10944 return 0;
10945 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
10946 return 10;
10947
10948 if (c == PLUS)
10949 {
10950 if (CONST_INT_P (XEXP (x, 1)))
10951 return 2;
10952
10953 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
10954 return 3;
10955
10956 return 4;
10957 }
10958
10959 return 6;
10960 }
10961
10962 static inline int
10963 arm_thumb_address_cost (rtx x)
10964 {
10965 enum rtx_code c = GET_CODE (x);
10966
10967 if (c == REG)
10968 return 1;
10969 if (c == PLUS
10970 && REG_P (XEXP (x, 0))
10971 && CONST_INT_P (XEXP (x, 1)))
10972 return 1;
10973
10974 return 2;
10975 }
10976
10977 static int
10978 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
10979 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
10980 {
10981 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
10982 }
10983
10984 /* Adjust cost hook for XScale. */
10985 static bool
10986 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
10987 int * cost)
10988 {
10989 /* Some true dependencies can have a higher cost depending
10990 on precisely how certain input operands are used. */
10991 if (dep_type == 0
10992 && recog_memoized (insn) >= 0
10993 && recog_memoized (dep) >= 0)
10994 {
10995 int shift_opnum = get_attr_shift (insn);
10996 enum attr_type attr_type = get_attr_type (dep);
10997
10998 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
10999 operand for INSN. If we have a shifted input operand and the
11000 instruction we depend on is another ALU instruction, then we may
11001 have to account for an additional stall. */
11002 if (shift_opnum != 0
11003 && (attr_type == TYPE_ALU_SHIFT_IMM
11004 || attr_type == TYPE_ALUS_SHIFT_IMM
11005 || attr_type == TYPE_LOGIC_SHIFT_IMM
11006 || attr_type == TYPE_LOGICS_SHIFT_IMM
11007 || attr_type == TYPE_ALU_SHIFT_REG
11008 || attr_type == TYPE_ALUS_SHIFT_REG
11009 || attr_type == TYPE_LOGIC_SHIFT_REG
11010 || attr_type == TYPE_LOGICS_SHIFT_REG
11011 || attr_type == TYPE_MOV_SHIFT
11012 || attr_type == TYPE_MVN_SHIFT
11013 || attr_type == TYPE_MOV_SHIFT_REG
11014 || attr_type == TYPE_MVN_SHIFT_REG))
11015 {
11016 rtx shifted_operand;
11017 int opno;
11018
11019 /* Get the shifted operand. */
11020 extract_insn (insn);
11021 shifted_operand = recog_data.operand[shift_opnum];
11022
11023 /* Iterate over all the operands in DEP. If we write an operand
11024 that overlaps with SHIFTED_OPERAND, then we have increase the
11025 cost of this dependency. */
11026 extract_insn (dep);
11027 preprocess_constraints (dep);
11028 for (opno = 0; opno < recog_data.n_operands; opno++)
11029 {
11030 /* We can ignore strict inputs. */
11031 if (recog_data.operand_type[opno] == OP_IN)
11032 continue;
11033
11034 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11035 shifted_operand))
11036 {
11037 *cost = 2;
11038 return false;
11039 }
11040 }
11041 }
11042 }
11043 return true;
11044 }
11045
11046 /* Adjust cost hook for Cortex A9. */
11047 static bool
11048 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11049 int * cost)
11050 {
11051 switch (dep_type)
11052 {
11053 case REG_DEP_ANTI:
11054 *cost = 0;
11055 return false;
11056
11057 case REG_DEP_TRUE:
11058 case REG_DEP_OUTPUT:
11059 if (recog_memoized (insn) >= 0
11060 && recog_memoized (dep) >= 0)
11061 {
11062 if (GET_CODE (PATTERN (insn)) == SET)
11063 {
11064 if (GET_MODE_CLASS
11065 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11066 || GET_MODE_CLASS
11067 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11068 {
11069 enum attr_type attr_type_insn = get_attr_type (insn);
11070 enum attr_type attr_type_dep = get_attr_type (dep);
11071
11072 /* By default all dependencies of the form
11073 s0 = s0 <op> s1
11074 s0 = s0 <op> s2
11075 have an extra latency of 1 cycle because
11076 of the input and output dependency in this
11077 case. However this gets modeled as an true
11078 dependency and hence all these checks. */
11079 if (REG_P (SET_DEST (PATTERN (insn)))
11080 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11081 {
11082 /* FMACS is a special case where the dependent
11083 instruction can be issued 3 cycles before
11084 the normal latency in case of an output
11085 dependency. */
11086 if ((attr_type_insn == TYPE_FMACS
11087 || attr_type_insn == TYPE_FMACD)
11088 && (attr_type_dep == TYPE_FMACS
11089 || attr_type_dep == TYPE_FMACD))
11090 {
11091 if (dep_type == REG_DEP_OUTPUT)
11092 *cost = insn_default_latency (dep) - 3;
11093 else
11094 *cost = insn_default_latency (dep);
11095 return false;
11096 }
11097 else
11098 {
11099 if (dep_type == REG_DEP_OUTPUT)
11100 *cost = insn_default_latency (dep) + 1;
11101 else
11102 *cost = insn_default_latency (dep);
11103 }
11104 return false;
11105 }
11106 }
11107 }
11108 }
11109 break;
11110
11111 default:
11112 gcc_unreachable ();
11113 }
11114
11115 return true;
11116 }
11117
11118 /* Adjust cost hook for FA726TE. */
11119 static bool
11120 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11121 int * cost)
11122 {
11123 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11124 have penalty of 3. */
11125 if (dep_type == REG_DEP_TRUE
11126 && recog_memoized (insn) >= 0
11127 && recog_memoized (dep) >= 0
11128 && get_attr_conds (dep) == CONDS_SET)
11129 {
11130 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11131 if (get_attr_conds (insn) == CONDS_USE
11132 && get_attr_type (insn) != TYPE_BRANCH)
11133 {
11134 *cost = 3;
11135 return false;
11136 }
11137
11138 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11139 || get_attr_conds (insn) == CONDS_USE)
11140 {
11141 *cost = 0;
11142 return false;
11143 }
11144 }
11145
11146 return true;
11147 }
11148
11149 /* Implement TARGET_REGISTER_MOVE_COST.
11150
11151 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11152 it is typically more expensive than a single memory access. We set
11153 the cost to less than two memory accesses so that floating
11154 point to integer conversion does not go through memory. */
11155
11156 int
11157 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11158 reg_class_t from, reg_class_t to)
11159 {
11160 if (TARGET_32BIT)
11161 {
11162 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11163 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11164 return 15;
11165 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11166 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11167 return 4;
11168 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11169 return 20;
11170 else
11171 return 2;
11172 }
11173 else
11174 {
11175 if (from == HI_REGS || to == HI_REGS)
11176 return 4;
11177 else
11178 return 2;
11179 }
11180 }
11181
11182 /* Implement TARGET_MEMORY_MOVE_COST. */
11183
11184 int
11185 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11186 bool in ATTRIBUTE_UNUSED)
11187 {
11188 if (TARGET_32BIT)
11189 return 10;
11190 else
11191 {
11192 if (GET_MODE_SIZE (mode) < 4)
11193 return 8;
11194 else
11195 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11196 }
11197 }
11198
11199 /* Vectorizer cost model implementation. */
11200
11201 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11202 static int
11203 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11204 tree vectype,
11205 int misalign ATTRIBUTE_UNUSED)
11206 {
11207 unsigned elements;
11208
11209 switch (type_of_cost)
11210 {
11211 case scalar_stmt:
11212 return current_tune->vec_costs->scalar_stmt_cost;
11213
11214 case scalar_load:
11215 return current_tune->vec_costs->scalar_load_cost;
11216
11217 case scalar_store:
11218 return current_tune->vec_costs->scalar_store_cost;
11219
11220 case vector_stmt:
11221 return current_tune->vec_costs->vec_stmt_cost;
11222
11223 case vector_load:
11224 return current_tune->vec_costs->vec_align_load_cost;
11225
11226 case vector_store:
11227 return current_tune->vec_costs->vec_store_cost;
11228
11229 case vec_to_scalar:
11230 return current_tune->vec_costs->vec_to_scalar_cost;
11231
11232 case scalar_to_vec:
11233 return current_tune->vec_costs->scalar_to_vec_cost;
11234
11235 case unaligned_load:
11236 return current_tune->vec_costs->vec_unalign_load_cost;
11237
11238 case unaligned_store:
11239 return current_tune->vec_costs->vec_unalign_store_cost;
11240
11241 case cond_branch_taken:
11242 return current_tune->vec_costs->cond_taken_branch_cost;
11243
11244 case cond_branch_not_taken:
11245 return current_tune->vec_costs->cond_not_taken_branch_cost;
11246
11247 case vec_perm:
11248 case vec_promote_demote:
11249 return current_tune->vec_costs->vec_stmt_cost;
11250
11251 case vec_construct:
11252 elements = TYPE_VECTOR_SUBPARTS (vectype);
11253 return elements / 2 + 1;
11254
11255 default:
11256 gcc_unreachable ();
11257 }
11258 }
11259
11260 /* Implement targetm.vectorize.add_stmt_cost. */
11261
11262 static unsigned
11263 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11264 struct _stmt_vec_info *stmt_info, int misalign,
11265 enum vect_cost_model_location where)
11266 {
11267 unsigned *cost = (unsigned *) data;
11268 unsigned retval = 0;
11269
11270 if (flag_vect_cost_model)
11271 {
11272 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11273 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11274
11275 /* Statements in an inner loop relative to the loop being
11276 vectorized are weighted more heavily. The value here is
11277 arbitrary and could potentially be improved with analysis. */
11278 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11279 count *= 50; /* FIXME. */
11280
11281 retval = (unsigned) (count * stmt_cost);
11282 cost[where] += retval;
11283 }
11284
11285 return retval;
11286 }
11287
11288 /* Return true if and only if this insn can dual-issue only as older. */
11289 static bool
11290 cortexa7_older_only (rtx_insn *insn)
11291 {
11292 if (recog_memoized (insn) < 0)
11293 return false;
11294
11295 switch (get_attr_type (insn))
11296 {
11297 case TYPE_ALU_DSP_REG:
11298 case TYPE_ALU_SREG:
11299 case TYPE_ALUS_SREG:
11300 case TYPE_LOGIC_REG:
11301 case TYPE_LOGICS_REG:
11302 case TYPE_ADC_REG:
11303 case TYPE_ADCS_REG:
11304 case TYPE_ADR:
11305 case TYPE_BFM:
11306 case TYPE_REV:
11307 case TYPE_MVN_REG:
11308 case TYPE_SHIFT_IMM:
11309 case TYPE_SHIFT_REG:
11310 case TYPE_LOAD_BYTE:
11311 case TYPE_LOAD1:
11312 case TYPE_STORE1:
11313 case TYPE_FFARITHS:
11314 case TYPE_FADDS:
11315 case TYPE_FFARITHD:
11316 case TYPE_FADDD:
11317 case TYPE_FMOV:
11318 case TYPE_F_CVT:
11319 case TYPE_FCMPS:
11320 case TYPE_FCMPD:
11321 case TYPE_FCONSTS:
11322 case TYPE_FCONSTD:
11323 case TYPE_FMULS:
11324 case TYPE_FMACS:
11325 case TYPE_FMULD:
11326 case TYPE_FMACD:
11327 case TYPE_FDIVS:
11328 case TYPE_FDIVD:
11329 case TYPE_F_MRC:
11330 case TYPE_F_MRRC:
11331 case TYPE_F_FLAG:
11332 case TYPE_F_LOADS:
11333 case TYPE_F_STORES:
11334 return true;
11335 default:
11336 return false;
11337 }
11338 }
11339
11340 /* Return true if and only if this insn can dual-issue as younger. */
11341 static bool
11342 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11343 {
11344 if (recog_memoized (insn) < 0)
11345 {
11346 if (verbose > 5)
11347 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11348 return false;
11349 }
11350
11351 switch (get_attr_type (insn))
11352 {
11353 case TYPE_ALU_IMM:
11354 case TYPE_ALUS_IMM:
11355 case TYPE_LOGIC_IMM:
11356 case TYPE_LOGICS_IMM:
11357 case TYPE_EXTEND:
11358 case TYPE_MVN_IMM:
11359 case TYPE_MOV_IMM:
11360 case TYPE_MOV_REG:
11361 case TYPE_MOV_SHIFT:
11362 case TYPE_MOV_SHIFT_REG:
11363 case TYPE_BRANCH:
11364 case TYPE_CALL:
11365 return true;
11366 default:
11367 return false;
11368 }
11369 }
11370
11371
11372 /* Look for an instruction that can dual issue only as an older
11373 instruction, and move it in front of any instructions that can
11374 dual-issue as younger, while preserving the relative order of all
11375 other instructions in the ready list. This is a hueuristic to help
11376 dual-issue in later cycles, by postponing issue of more flexible
11377 instructions. This heuristic may affect dual issue opportunities
11378 in the current cycle. */
11379 static void
11380 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11381 int *n_readyp, int clock)
11382 {
11383 int i;
11384 int first_older_only = -1, first_younger = -1;
11385
11386 if (verbose > 5)
11387 fprintf (file,
11388 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11389 clock,
11390 *n_readyp);
11391
11392 /* Traverse the ready list from the head (the instruction to issue
11393 first), and looking for the first instruction that can issue as
11394 younger and the first instruction that can dual-issue only as
11395 older. */
11396 for (i = *n_readyp - 1; i >= 0; i--)
11397 {
11398 rtx_insn *insn = ready[i];
11399 if (cortexa7_older_only (insn))
11400 {
11401 first_older_only = i;
11402 if (verbose > 5)
11403 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11404 break;
11405 }
11406 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11407 first_younger = i;
11408 }
11409
11410 /* Nothing to reorder because either no younger insn found or insn
11411 that can dual-issue only as older appears before any insn that
11412 can dual-issue as younger. */
11413 if (first_younger == -1)
11414 {
11415 if (verbose > 5)
11416 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11417 return;
11418 }
11419
11420 /* Nothing to reorder because no older-only insn in the ready list. */
11421 if (first_older_only == -1)
11422 {
11423 if (verbose > 5)
11424 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11425 return;
11426 }
11427
11428 /* Move first_older_only insn before first_younger. */
11429 if (verbose > 5)
11430 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11431 INSN_UID(ready [first_older_only]),
11432 INSN_UID(ready [first_younger]));
11433 rtx_insn *first_older_only_insn = ready [first_older_only];
11434 for (i = first_older_only; i < first_younger; i++)
11435 {
11436 ready[i] = ready[i+1];
11437 }
11438
11439 ready[i] = first_older_only_insn;
11440 return;
11441 }
11442
11443 /* Implement TARGET_SCHED_REORDER. */
11444 static int
11445 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11446 int clock)
11447 {
11448 switch (arm_tune)
11449 {
11450 case TARGET_CPU_cortexa7:
11451 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11452 break;
11453 default:
11454 /* Do nothing for other cores. */
11455 break;
11456 }
11457
11458 return arm_issue_rate ();
11459 }
11460
11461 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11462 It corrects the value of COST based on the relationship between
11463 INSN and DEP through the dependence LINK. It returns the new
11464 value. There is a per-core adjust_cost hook to adjust scheduler costs
11465 and the per-core hook can choose to completely override the generic
11466 adjust_cost function. Only put bits of code into arm_adjust_cost that
11467 are common across all cores. */
11468 static int
11469 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11470 unsigned int)
11471 {
11472 rtx i_pat, d_pat;
11473
11474 /* When generating Thumb-1 code, we want to place flag-setting operations
11475 close to a conditional branch which depends on them, so that we can
11476 omit the comparison. */
11477 if (TARGET_THUMB1
11478 && dep_type == 0
11479 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11480 && recog_memoized (dep) >= 0
11481 && get_attr_conds (dep) == CONDS_SET)
11482 return 0;
11483
11484 if (current_tune->sched_adjust_cost != NULL)
11485 {
11486 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11487 return cost;
11488 }
11489
11490 /* XXX Is this strictly true? */
11491 if (dep_type == REG_DEP_ANTI
11492 || dep_type == REG_DEP_OUTPUT)
11493 return 0;
11494
11495 /* Call insns don't incur a stall, even if they follow a load. */
11496 if (dep_type == 0
11497 && CALL_P (insn))
11498 return 1;
11499
11500 if ((i_pat = single_set (insn)) != NULL
11501 && MEM_P (SET_SRC (i_pat))
11502 && (d_pat = single_set (dep)) != NULL
11503 && MEM_P (SET_DEST (d_pat)))
11504 {
11505 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11506 /* This is a load after a store, there is no conflict if the load reads
11507 from a cached area. Assume that loads from the stack, and from the
11508 constant pool are cached, and that others will miss. This is a
11509 hack. */
11510
11511 if ((GET_CODE (src_mem) == SYMBOL_REF
11512 && CONSTANT_POOL_ADDRESS_P (src_mem))
11513 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11514 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11515 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11516 return 1;
11517 }
11518
11519 return cost;
11520 }
11521
11522 int
11523 arm_max_conditional_execute (void)
11524 {
11525 return max_insns_skipped;
11526 }
11527
11528 static int
11529 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11530 {
11531 if (TARGET_32BIT)
11532 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11533 else
11534 return (optimize > 0) ? 2 : 0;
11535 }
11536
11537 static int
11538 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11539 {
11540 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11541 }
11542
11543 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11544 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11545 sequences of non-executed instructions in IT blocks probably take the same
11546 amount of time as executed instructions (and the IT instruction itself takes
11547 space in icache). This function was experimentally determined to give good
11548 results on a popular embedded benchmark. */
11549
11550 static int
11551 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11552 {
11553 return (TARGET_32BIT && speed_p) ? 1
11554 : arm_default_branch_cost (speed_p, predictable_p);
11555 }
11556
11557 static int
11558 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11559 {
11560 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11561 }
11562
11563 static bool fp_consts_inited = false;
11564
11565 static REAL_VALUE_TYPE value_fp0;
11566
11567 static void
11568 init_fp_table (void)
11569 {
11570 REAL_VALUE_TYPE r;
11571
11572 r = REAL_VALUE_ATOF ("0", DFmode);
11573 value_fp0 = r;
11574 fp_consts_inited = true;
11575 }
11576
11577 /* Return TRUE if rtx X is a valid immediate FP constant. */
11578 int
11579 arm_const_double_rtx (rtx x)
11580 {
11581 const REAL_VALUE_TYPE *r;
11582
11583 if (!fp_consts_inited)
11584 init_fp_table ();
11585
11586 r = CONST_DOUBLE_REAL_VALUE (x);
11587 if (REAL_VALUE_MINUS_ZERO (*r))
11588 return 0;
11589
11590 if (real_equal (r, &value_fp0))
11591 return 1;
11592
11593 return 0;
11594 }
11595
11596 /* VFPv3 has a fairly wide range of representable immediates, formed from
11597 "quarter-precision" floating-point values. These can be evaluated using this
11598 formula (with ^ for exponentiation):
11599
11600 -1^s * n * 2^-r
11601
11602 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11603 16 <= n <= 31 and 0 <= r <= 7.
11604
11605 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11606
11607 - A (most-significant) is the sign bit.
11608 - BCD are the exponent (encoded as r XOR 3).
11609 - EFGH are the mantissa (encoded as n - 16).
11610 */
11611
11612 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11613 fconst[sd] instruction, or -1 if X isn't suitable. */
11614 static int
11615 vfp3_const_double_index (rtx x)
11616 {
11617 REAL_VALUE_TYPE r, m;
11618 int sign, exponent;
11619 unsigned HOST_WIDE_INT mantissa, mant_hi;
11620 unsigned HOST_WIDE_INT mask;
11621 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11622 bool fail;
11623
11624 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11625 return -1;
11626
11627 r = *CONST_DOUBLE_REAL_VALUE (x);
11628
11629 /* We can't represent these things, so detect them first. */
11630 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11631 return -1;
11632
11633 /* Extract sign, exponent and mantissa. */
11634 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11635 r = real_value_abs (&r);
11636 exponent = REAL_EXP (&r);
11637 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11638 highest (sign) bit, with a fixed binary point at bit point_pos.
11639 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11640 bits for the mantissa, this may fail (low bits would be lost). */
11641 real_ldexp (&m, &r, point_pos - exponent);
11642 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11643 mantissa = w.elt (0);
11644 mant_hi = w.elt (1);
11645
11646 /* If there are bits set in the low part of the mantissa, we can't
11647 represent this value. */
11648 if (mantissa != 0)
11649 return -1;
11650
11651 /* Now make it so that mantissa contains the most-significant bits, and move
11652 the point_pos to indicate that the least-significant bits have been
11653 discarded. */
11654 point_pos -= HOST_BITS_PER_WIDE_INT;
11655 mantissa = mant_hi;
11656
11657 /* We can permit four significant bits of mantissa only, plus a high bit
11658 which is always 1. */
11659 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11660 if ((mantissa & mask) != 0)
11661 return -1;
11662
11663 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11664 mantissa >>= point_pos - 5;
11665
11666 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11667 floating-point immediate zero with Neon using an integer-zero load, but
11668 that case is handled elsewhere.) */
11669 if (mantissa == 0)
11670 return -1;
11671
11672 gcc_assert (mantissa >= 16 && mantissa <= 31);
11673
11674 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11675 normalized significands are in the range [1, 2). (Our mantissa is shifted
11676 left 4 places at this point relative to normalized IEEE754 values). GCC
11677 internally uses [0.5, 1) (see real.c), so the exponent returned from
11678 REAL_EXP must be altered. */
11679 exponent = 5 - exponent;
11680
11681 if (exponent < 0 || exponent > 7)
11682 return -1;
11683
11684 /* Sign, mantissa and exponent are now in the correct form to plug into the
11685 formula described in the comment above. */
11686 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11687 }
11688
11689 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11690 int
11691 vfp3_const_double_rtx (rtx x)
11692 {
11693 if (!TARGET_VFP3)
11694 return 0;
11695
11696 return vfp3_const_double_index (x) != -1;
11697 }
11698
11699 /* Recognize immediates which can be used in various Neon instructions. Legal
11700 immediates are described by the following table (for VMVN variants, the
11701 bitwise inverse of the constant shown is recognized. In either case, VMOV
11702 is output and the correct instruction to use for a given constant is chosen
11703 by the assembler). The constant shown is replicated across all elements of
11704 the destination vector.
11705
11706 insn elems variant constant (binary)
11707 ---- ----- ------- -----------------
11708 vmov i32 0 00000000 00000000 00000000 abcdefgh
11709 vmov i32 1 00000000 00000000 abcdefgh 00000000
11710 vmov i32 2 00000000 abcdefgh 00000000 00000000
11711 vmov i32 3 abcdefgh 00000000 00000000 00000000
11712 vmov i16 4 00000000 abcdefgh
11713 vmov i16 5 abcdefgh 00000000
11714 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11715 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11716 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11717 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11718 vmvn i16 10 00000000 abcdefgh
11719 vmvn i16 11 abcdefgh 00000000
11720 vmov i32 12 00000000 00000000 abcdefgh 11111111
11721 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11722 vmov i32 14 00000000 abcdefgh 11111111 11111111
11723 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11724 vmov i8 16 abcdefgh
11725 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11726 eeeeeeee ffffffff gggggggg hhhhhhhh
11727 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11728 vmov f32 19 00000000 00000000 00000000 00000000
11729
11730 For case 18, B = !b. Representable values are exactly those accepted by
11731 vfp3_const_double_index, but are output as floating-point numbers rather
11732 than indices.
11733
11734 For case 19, we will change it to vmov.i32 when assembling.
11735
11736 Variants 0-5 (inclusive) may also be used as immediates for the second
11737 operand of VORR/VBIC instructions.
11738
11739 The INVERSE argument causes the bitwise inverse of the given operand to be
11740 recognized instead (used for recognizing legal immediates for the VAND/VORN
11741 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11742 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11743 output, rather than the real insns vbic/vorr).
11744
11745 INVERSE makes no difference to the recognition of float vectors.
11746
11747 The return value is the variant of immediate as shown in the above table, or
11748 -1 if the given value doesn't match any of the listed patterns.
11749 */
11750 static int
11751 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11752 rtx *modconst, int *elementwidth)
11753 {
11754 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11755 matches = 1; \
11756 for (i = 0; i < idx; i += (STRIDE)) \
11757 if (!(TEST)) \
11758 matches = 0; \
11759 if (matches) \
11760 { \
11761 immtype = (CLASS); \
11762 elsize = (ELSIZE); \
11763 break; \
11764 }
11765
11766 unsigned int i, elsize = 0, idx = 0, n_elts;
11767 unsigned int innersize;
11768 unsigned char bytes[16];
11769 int immtype = -1, matches;
11770 unsigned int invmask = inverse ? 0xff : 0;
11771 bool vector = GET_CODE (op) == CONST_VECTOR;
11772
11773 if (vector)
11774 n_elts = CONST_VECTOR_NUNITS (op);
11775 else
11776 {
11777 n_elts = 1;
11778 if (mode == VOIDmode)
11779 mode = DImode;
11780 }
11781
11782 innersize = GET_MODE_UNIT_SIZE (mode);
11783
11784 /* Vectors of float constants. */
11785 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11786 {
11787 rtx el0 = CONST_VECTOR_ELT (op, 0);
11788
11789 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11790 return -1;
11791
11792 /* FP16 vectors cannot be represented. */
11793 if (GET_MODE_INNER (mode) == HFmode)
11794 return -1;
11795
11796 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11797 are distinct in this context. */
11798 if (!const_vec_duplicate_p (op))
11799 return -1;
11800
11801 if (modconst)
11802 *modconst = CONST_VECTOR_ELT (op, 0);
11803
11804 if (elementwidth)
11805 *elementwidth = 0;
11806
11807 if (el0 == CONST0_RTX (GET_MODE (el0)))
11808 return 19;
11809 else
11810 return 18;
11811 }
11812
11813 /* The tricks done in the code below apply for little-endian vector layout.
11814 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11815 FIXME: Implement logic for big-endian vectors. */
11816 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11817 return -1;
11818
11819 /* Splat vector constant out into a byte vector. */
11820 for (i = 0; i < n_elts; i++)
11821 {
11822 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11823 unsigned HOST_WIDE_INT elpart;
11824
11825 gcc_assert (CONST_INT_P (el));
11826 elpart = INTVAL (el);
11827
11828 for (unsigned int byte = 0; byte < innersize; byte++)
11829 {
11830 bytes[idx++] = (elpart & 0xff) ^ invmask;
11831 elpart >>= BITS_PER_UNIT;
11832 }
11833 }
11834
11835 /* Sanity check. */
11836 gcc_assert (idx == GET_MODE_SIZE (mode));
11837
11838 do
11839 {
11840 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11841 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11842
11843 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11844 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11845
11846 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11847 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11848
11849 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11850 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11851
11852 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11853
11854 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
11855
11856 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
11857 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11858
11859 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11860 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11861
11862 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
11863 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11864
11865 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
11866 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
11867
11868 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
11869
11870 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
11871
11872 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
11873 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11874
11875 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11876 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
11877
11878 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
11879 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11880
11881 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
11882 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
11883
11884 CHECK (1, 8, 16, bytes[i] == bytes[0]);
11885
11886 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
11887 && bytes[i] == bytes[(i + 8) % idx]);
11888 }
11889 while (0);
11890
11891 if (immtype == -1)
11892 return -1;
11893
11894 if (elementwidth)
11895 *elementwidth = elsize;
11896
11897 if (modconst)
11898 {
11899 unsigned HOST_WIDE_INT imm = 0;
11900
11901 /* Un-invert bytes of recognized vector, if necessary. */
11902 if (invmask != 0)
11903 for (i = 0; i < idx; i++)
11904 bytes[i] ^= invmask;
11905
11906 if (immtype == 17)
11907 {
11908 /* FIXME: Broken on 32-bit H_W_I hosts. */
11909 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
11910
11911 for (i = 0; i < 8; i++)
11912 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
11913 << (i * BITS_PER_UNIT);
11914
11915 *modconst = GEN_INT (imm);
11916 }
11917 else
11918 {
11919 unsigned HOST_WIDE_INT imm = 0;
11920
11921 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
11922 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
11923
11924 *modconst = GEN_INT (imm);
11925 }
11926 }
11927
11928 return immtype;
11929 #undef CHECK
11930 }
11931
11932 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
11933 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
11934 float elements), and a modified constant (whatever should be output for a
11935 VMOV) in *MODCONST. */
11936
11937 int
11938 neon_immediate_valid_for_move (rtx op, machine_mode mode,
11939 rtx *modconst, int *elementwidth)
11940 {
11941 rtx tmpconst;
11942 int tmpwidth;
11943 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
11944
11945 if (retval == -1)
11946 return 0;
11947
11948 if (modconst)
11949 *modconst = tmpconst;
11950
11951 if (elementwidth)
11952 *elementwidth = tmpwidth;
11953
11954 return 1;
11955 }
11956
11957 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
11958 the immediate is valid, write a constant suitable for using as an operand
11959 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
11960 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
11961
11962 int
11963 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
11964 rtx *modconst, int *elementwidth)
11965 {
11966 rtx tmpconst;
11967 int tmpwidth;
11968 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
11969
11970 if (retval < 0 || retval > 5)
11971 return 0;
11972
11973 if (modconst)
11974 *modconst = tmpconst;
11975
11976 if (elementwidth)
11977 *elementwidth = tmpwidth;
11978
11979 return 1;
11980 }
11981
11982 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
11983 the immediate is valid, write a constant suitable for using as an operand
11984 to VSHR/VSHL to *MODCONST and the corresponding element width to
11985 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
11986 because they have different limitations. */
11987
11988 int
11989 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
11990 rtx *modconst, int *elementwidth,
11991 bool isleftshift)
11992 {
11993 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
11994 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
11995 unsigned HOST_WIDE_INT last_elt = 0;
11996 unsigned HOST_WIDE_INT maxshift;
11997
11998 /* Split vector constant out into a byte vector. */
11999 for (i = 0; i < n_elts; i++)
12000 {
12001 rtx el = CONST_VECTOR_ELT (op, i);
12002 unsigned HOST_WIDE_INT elpart;
12003
12004 if (CONST_INT_P (el))
12005 elpart = INTVAL (el);
12006 else if (CONST_DOUBLE_P (el))
12007 return 0;
12008 else
12009 gcc_unreachable ();
12010
12011 if (i != 0 && elpart != last_elt)
12012 return 0;
12013
12014 last_elt = elpart;
12015 }
12016
12017 /* Shift less than element size. */
12018 maxshift = innersize * 8;
12019
12020 if (isleftshift)
12021 {
12022 /* Left shift immediate value can be from 0 to <size>-1. */
12023 if (last_elt >= maxshift)
12024 return 0;
12025 }
12026 else
12027 {
12028 /* Right shift immediate value can be from 1 to <size>. */
12029 if (last_elt == 0 || last_elt > maxshift)
12030 return 0;
12031 }
12032
12033 if (elementwidth)
12034 *elementwidth = innersize * 8;
12035
12036 if (modconst)
12037 *modconst = CONST_VECTOR_ELT (op, 0);
12038
12039 return 1;
12040 }
12041
12042 /* Return a string suitable for output of Neon immediate logic operation
12043 MNEM. */
12044
12045 char *
12046 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12047 int inverse, int quad)
12048 {
12049 int width, is_valid;
12050 static char templ[40];
12051
12052 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12053
12054 gcc_assert (is_valid != 0);
12055
12056 if (quad)
12057 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12058 else
12059 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12060
12061 return templ;
12062 }
12063
12064 /* Return a string suitable for output of Neon immediate shift operation
12065 (VSHR or VSHL) MNEM. */
12066
12067 char *
12068 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12069 machine_mode mode, int quad,
12070 bool isleftshift)
12071 {
12072 int width, is_valid;
12073 static char templ[40];
12074
12075 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12076 gcc_assert (is_valid != 0);
12077
12078 if (quad)
12079 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12080 else
12081 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12082
12083 return templ;
12084 }
12085
12086 /* Output a sequence of pairwise operations to implement a reduction.
12087 NOTE: We do "too much work" here, because pairwise operations work on two
12088 registers-worth of operands in one go. Unfortunately we can't exploit those
12089 extra calculations to do the full operation in fewer steps, I don't think.
12090 Although all vector elements of the result but the first are ignored, we
12091 actually calculate the same result in each of the elements. An alternative
12092 such as initially loading a vector with zero to use as each of the second
12093 operands would use up an additional register and take an extra instruction,
12094 for no particular gain. */
12095
12096 void
12097 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12098 rtx (*reduc) (rtx, rtx, rtx))
12099 {
12100 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12101 rtx tmpsum = op1;
12102
12103 for (i = parts / 2; i >= 1; i /= 2)
12104 {
12105 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12106 emit_insn (reduc (dest, tmpsum, tmpsum));
12107 tmpsum = dest;
12108 }
12109 }
12110
12111 /* If VALS is a vector constant that can be loaded into a register
12112 using VDUP, generate instructions to do so and return an RTX to
12113 assign to the register. Otherwise return NULL_RTX. */
12114
12115 static rtx
12116 neon_vdup_constant (rtx vals)
12117 {
12118 machine_mode mode = GET_MODE (vals);
12119 machine_mode inner_mode = GET_MODE_INNER (mode);
12120 rtx x;
12121
12122 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12123 return NULL_RTX;
12124
12125 if (!const_vec_duplicate_p (vals, &x))
12126 /* The elements are not all the same. We could handle repeating
12127 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12128 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12129 vdup.i16). */
12130 return NULL_RTX;
12131
12132 /* We can load this constant by using VDUP and a constant in a
12133 single ARM register. This will be cheaper than a vector
12134 load. */
12135
12136 x = copy_to_mode_reg (inner_mode, x);
12137 return gen_rtx_VEC_DUPLICATE (mode, x);
12138 }
12139
12140 /* Generate code to load VALS, which is a PARALLEL containing only
12141 constants (for vec_init) or CONST_VECTOR, efficiently into a
12142 register. Returns an RTX to copy into the register, or NULL_RTX
12143 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12144
12145 rtx
12146 neon_make_constant (rtx vals)
12147 {
12148 machine_mode mode = GET_MODE (vals);
12149 rtx target;
12150 rtx const_vec = NULL_RTX;
12151 int n_elts = GET_MODE_NUNITS (mode);
12152 int n_const = 0;
12153 int i;
12154
12155 if (GET_CODE (vals) == CONST_VECTOR)
12156 const_vec = vals;
12157 else if (GET_CODE (vals) == PARALLEL)
12158 {
12159 /* A CONST_VECTOR must contain only CONST_INTs and
12160 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12161 Only store valid constants in a CONST_VECTOR. */
12162 for (i = 0; i < n_elts; ++i)
12163 {
12164 rtx x = XVECEXP (vals, 0, i);
12165 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12166 n_const++;
12167 }
12168 if (n_const == n_elts)
12169 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12170 }
12171 else
12172 gcc_unreachable ();
12173
12174 if (const_vec != NULL
12175 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12176 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12177 return const_vec;
12178 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12179 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12180 pipeline cycle; creating the constant takes one or two ARM
12181 pipeline cycles. */
12182 return target;
12183 else if (const_vec != NULL_RTX)
12184 /* Load from constant pool. On Cortex-A8 this takes two cycles
12185 (for either double or quad vectors). We can not take advantage
12186 of single-cycle VLD1 because we need a PC-relative addressing
12187 mode. */
12188 return const_vec;
12189 else
12190 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12191 We can not construct an initializer. */
12192 return NULL_RTX;
12193 }
12194
12195 /* Initialize vector TARGET to VALS. */
12196
12197 void
12198 neon_expand_vector_init (rtx target, rtx vals)
12199 {
12200 machine_mode mode = GET_MODE (target);
12201 machine_mode inner_mode = GET_MODE_INNER (mode);
12202 int n_elts = GET_MODE_NUNITS (mode);
12203 int n_var = 0, one_var = -1;
12204 bool all_same = true;
12205 rtx x, mem;
12206 int i;
12207
12208 for (i = 0; i < n_elts; ++i)
12209 {
12210 x = XVECEXP (vals, 0, i);
12211 if (!CONSTANT_P (x))
12212 ++n_var, one_var = i;
12213
12214 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12215 all_same = false;
12216 }
12217
12218 if (n_var == 0)
12219 {
12220 rtx constant = neon_make_constant (vals);
12221 if (constant != NULL_RTX)
12222 {
12223 emit_move_insn (target, constant);
12224 return;
12225 }
12226 }
12227
12228 /* Splat a single non-constant element if we can. */
12229 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12230 {
12231 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12232 emit_insn (gen_rtx_SET (target, gen_rtx_VEC_DUPLICATE (mode, x)));
12233 return;
12234 }
12235
12236 /* One field is non-constant. Load constant then overwrite varying
12237 field. This is more efficient than using the stack. */
12238 if (n_var == 1)
12239 {
12240 rtx copy = copy_rtx (vals);
12241 rtx index = GEN_INT (one_var);
12242
12243 /* Load constant part of vector, substitute neighboring value for
12244 varying element. */
12245 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12246 neon_expand_vector_init (target, copy);
12247
12248 /* Insert variable. */
12249 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12250 switch (mode)
12251 {
12252 case E_V8QImode:
12253 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12254 break;
12255 case E_V16QImode:
12256 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12257 break;
12258 case E_V4HImode:
12259 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12260 break;
12261 case E_V8HImode:
12262 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12263 break;
12264 case E_V2SImode:
12265 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12266 break;
12267 case E_V4SImode:
12268 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12269 break;
12270 case E_V2SFmode:
12271 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12272 break;
12273 case E_V4SFmode:
12274 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12275 break;
12276 case E_V2DImode:
12277 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12278 break;
12279 default:
12280 gcc_unreachable ();
12281 }
12282 return;
12283 }
12284
12285 /* Construct the vector in memory one field at a time
12286 and load the whole vector. */
12287 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12288 for (i = 0; i < n_elts; i++)
12289 emit_move_insn (adjust_address_nv (mem, inner_mode,
12290 i * GET_MODE_SIZE (inner_mode)),
12291 XVECEXP (vals, 0, i));
12292 emit_move_insn (target, mem);
12293 }
12294
12295 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12296 ERR if it doesn't. EXP indicates the source location, which includes the
12297 inlining history for intrinsics. */
12298
12299 static void
12300 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12301 const_tree exp, const char *desc)
12302 {
12303 HOST_WIDE_INT lane;
12304
12305 gcc_assert (CONST_INT_P (operand));
12306
12307 lane = INTVAL (operand);
12308
12309 if (lane < low || lane >= high)
12310 {
12311 if (exp)
12312 error ("%K%s %wd out of range %wd - %wd",
12313 exp, desc, lane, low, high - 1);
12314 else
12315 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12316 }
12317 }
12318
12319 /* Bounds-check lanes. */
12320
12321 void
12322 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12323 const_tree exp)
12324 {
12325 bounds_check (operand, low, high, exp, "lane");
12326 }
12327
12328 /* Bounds-check constants. */
12329
12330 void
12331 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12332 {
12333 bounds_check (operand, low, high, NULL_TREE, "constant");
12334 }
12335
12336 HOST_WIDE_INT
12337 neon_element_bits (machine_mode mode)
12338 {
12339 return GET_MODE_UNIT_BITSIZE (mode);
12340 }
12341
12342 \f
12343 /* Predicates for `match_operand' and `match_operator'. */
12344
12345 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12346 WB is true if full writeback address modes are allowed and is false
12347 if limited writeback address modes (POST_INC and PRE_DEC) are
12348 allowed. */
12349
12350 int
12351 arm_coproc_mem_operand (rtx op, bool wb)
12352 {
12353 rtx ind;
12354
12355 /* Reject eliminable registers. */
12356 if (! (reload_in_progress || reload_completed || lra_in_progress)
12357 && ( reg_mentioned_p (frame_pointer_rtx, op)
12358 || reg_mentioned_p (arg_pointer_rtx, op)
12359 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12360 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12361 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12362 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12363 return FALSE;
12364
12365 /* Constants are converted into offsets from labels. */
12366 if (!MEM_P (op))
12367 return FALSE;
12368
12369 ind = XEXP (op, 0);
12370
12371 if (reload_completed
12372 && (GET_CODE (ind) == LABEL_REF
12373 || (GET_CODE (ind) == CONST
12374 && GET_CODE (XEXP (ind, 0)) == PLUS
12375 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12376 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12377 return TRUE;
12378
12379 /* Match: (mem (reg)). */
12380 if (REG_P (ind))
12381 return arm_address_register_rtx_p (ind, 0);
12382
12383 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12384 acceptable in any case (subject to verification by
12385 arm_address_register_rtx_p). We need WB to be true to accept
12386 PRE_INC and POST_DEC. */
12387 if (GET_CODE (ind) == POST_INC
12388 || GET_CODE (ind) == PRE_DEC
12389 || (wb
12390 && (GET_CODE (ind) == PRE_INC
12391 || GET_CODE (ind) == POST_DEC)))
12392 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12393
12394 if (wb
12395 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12396 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12397 && GET_CODE (XEXP (ind, 1)) == PLUS
12398 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12399 ind = XEXP (ind, 1);
12400
12401 /* Match:
12402 (plus (reg)
12403 (const)). */
12404 if (GET_CODE (ind) == PLUS
12405 && REG_P (XEXP (ind, 0))
12406 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12407 && CONST_INT_P (XEXP (ind, 1))
12408 && INTVAL (XEXP (ind, 1)) > -1024
12409 && INTVAL (XEXP (ind, 1)) < 1024
12410 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12411 return TRUE;
12412
12413 return FALSE;
12414 }
12415
12416 /* Return TRUE if OP is a memory operand which we can load or store a vector
12417 to/from. TYPE is one of the following values:
12418 0 - Vector load/stor (vldr)
12419 1 - Core registers (ldm)
12420 2 - Element/structure loads (vld1)
12421 */
12422 int
12423 neon_vector_mem_operand (rtx op, int type, bool strict)
12424 {
12425 rtx ind;
12426
12427 /* Reject eliminable registers. */
12428 if (strict && ! (reload_in_progress || reload_completed)
12429 && (reg_mentioned_p (frame_pointer_rtx, op)
12430 || reg_mentioned_p (arg_pointer_rtx, op)
12431 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12432 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12433 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12434 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12435 return FALSE;
12436
12437 /* Constants are converted into offsets from labels. */
12438 if (!MEM_P (op))
12439 return FALSE;
12440
12441 ind = XEXP (op, 0);
12442
12443 if (reload_completed
12444 && (GET_CODE (ind) == LABEL_REF
12445 || (GET_CODE (ind) == CONST
12446 && GET_CODE (XEXP (ind, 0)) == PLUS
12447 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12448 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12449 return TRUE;
12450
12451 /* Match: (mem (reg)). */
12452 if (REG_P (ind))
12453 return arm_address_register_rtx_p (ind, 0);
12454
12455 /* Allow post-increment with Neon registers. */
12456 if ((type != 1 && GET_CODE (ind) == POST_INC)
12457 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12458 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12459
12460 /* Allow post-increment by register for VLDn */
12461 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12462 && GET_CODE (XEXP (ind, 1)) == PLUS
12463 && REG_P (XEXP (XEXP (ind, 1), 1)))
12464 return true;
12465
12466 /* Match:
12467 (plus (reg)
12468 (const)). */
12469 if (type == 0
12470 && GET_CODE (ind) == PLUS
12471 && REG_P (XEXP (ind, 0))
12472 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12473 && CONST_INT_P (XEXP (ind, 1))
12474 && INTVAL (XEXP (ind, 1)) > -1024
12475 /* For quad modes, we restrict the constant offset to be slightly less
12476 than what the instruction format permits. We have no such constraint
12477 on double mode offsets. (This must match arm_legitimate_index_p.) */
12478 && (INTVAL (XEXP (ind, 1))
12479 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12480 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12481 return TRUE;
12482
12483 return FALSE;
12484 }
12485
12486 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12487 type. */
12488 int
12489 neon_struct_mem_operand (rtx op)
12490 {
12491 rtx ind;
12492
12493 /* Reject eliminable registers. */
12494 if (! (reload_in_progress || reload_completed)
12495 && ( reg_mentioned_p (frame_pointer_rtx, op)
12496 || reg_mentioned_p (arg_pointer_rtx, op)
12497 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12498 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12499 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12500 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12501 return FALSE;
12502
12503 /* Constants are converted into offsets from labels. */
12504 if (!MEM_P (op))
12505 return FALSE;
12506
12507 ind = XEXP (op, 0);
12508
12509 if (reload_completed
12510 && (GET_CODE (ind) == LABEL_REF
12511 || (GET_CODE (ind) == CONST
12512 && GET_CODE (XEXP (ind, 0)) == PLUS
12513 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12514 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12515 return TRUE;
12516
12517 /* Match: (mem (reg)). */
12518 if (REG_P (ind))
12519 return arm_address_register_rtx_p (ind, 0);
12520
12521 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12522 if (GET_CODE (ind) == POST_INC
12523 || GET_CODE (ind) == PRE_DEC)
12524 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12525
12526 return FALSE;
12527 }
12528
12529 /* Return true if X is a register that will be eliminated later on. */
12530 int
12531 arm_eliminable_register (rtx x)
12532 {
12533 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12534 || REGNO (x) == ARG_POINTER_REGNUM
12535 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12536 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12537 }
12538
12539 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12540 coprocessor registers. Otherwise return NO_REGS. */
12541
12542 enum reg_class
12543 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12544 {
12545 if (mode == HFmode)
12546 {
12547 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12548 return GENERAL_REGS;
12549 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12550 return NO_REGS;
12551 return GENERAL_REGS;
12552 }
12553
12554 /* The neon move patterns handle all legitimate vector and struct
12555 addresses. */
12556 if (TARGET_NEON
12557 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12558 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12559 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12560 || VALID_NEON_STRUCT_MODE (mode)))
12561 return NO_REGS;
12562
12563 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12564 return NO_REGS;
12565
12566 return GENERAL_REGS;
12567 }
12568
12569 /* Values which must be returned in the most-significant end of the return
12570 register. */
12571
12572 static bool
12573 arm_return_in_msb (const_tree valtype)
12574 {
12575 return (TARGET_AAPCS_BASED
12576 && BYTES_BIG_ENDIAN
12577 && (AGGREGATE_TYPE_P (valtype)
12578 || TREE_CODE (valtype) == COMPLEX_TYPE
12579 || FIXED_POINT_TYPE_P (valtype)));
12580 }
12581
12582 /* Return TRUE if X references a SYMBOL_REF. */
12583 int
12584 symbol_mentioned_p (rtx x)
12585 {
12586 const char * fmt;
12587 int i;
12588
12589 if (GET_CODE (x) == SYMBOL_REF)
12590 return 1;
12591
12592 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12593 are constant offsets, not symbols. */
12594 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12595 return 0;
12596
12597 fmt = GET_RTX_FORMAT (GET_CODE (x));
12598
12599 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12600 {
12601 if (fmt[i] == 'E')
12602 {
12603 int j;
12604
12605 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12606 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12607 return 1;
12608 }
12609 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12610 return 1;
12611 }
12612
12613 return 0;
12614 }
12615
12616 /* Return TRUE if X references a LABEL_REF. */
12617 int
12618 label_mentioned_p (rtx x)
12619 {
12620 const char * fmt;
12621 int i;
12622
12623 if (GET_CODE (x) == LABEL_REF)
12624 return 1;
12625
12626 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12627 instruction, but they are constant offsets, not symbols. */
12628 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12629 return 0;
12630
12631 fmt = GET_RTX_FORMAT (GET_CODE (x));
12632 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12633 {
12634 if (fmt[i] == 'E')
12635 {
12636 int j;
12637
12638 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12639 if (label_mentioned_p (XVECEXP (x, i, j)))
12640 return 1;
12641 }
12642 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12643 return 1;
12644 }
12645
12646 return 0;
12647 }
12648
12649 int
12650 tls_mentioned_p (rtx x)
12651 {
12652 switch (GET_CODE (x))
12653 {
12654 case CONST:
12655 return tls_mentioned_p (XEXP (x, 0));
12656
12657 case UNSPEC:
12658 if (XINT (x, 1) == UNSPEC_TLS)
12659 return 1;
12660
12661 /* Fall through. */
12662 default:
12663 return 0;
12664 }
12665 }
12666
12667 /* Must not copy any rtx that uses a pc-relative address.
12668 Also, disallow copying of load-exclusive instructions that
12669 may appear after splitting of compare-and-swap-style operations
12670 so as to prevent those loops from being transformed away from their
12671 canonical forms (see PR 69904). */
12672
12673 static bool
12674 arm_cannot_copy_insn_p (rtx_insn *insn)
12675 {
12676 /* The tls call insn cannot be copied, as it is paired with a data
12677 word. */
12678 if (recog_memoized (insn) == CODE_FOR_tlscall)
12679 return true;
12680
12681 subrtx_iterator::array_type array;
12682 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12683 {
12684 const_rtx x = *iter;
12685 if (GET_CODE (x) == UNSPEC
12686 && (XINT (x, 1) == UNSPEC_PIC_BASE
12687 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12688 return true;
12689 }
12690
12691 rtx set = single_set (insn);
12692 if (set)
12693 {
12694 rtx src = SET_SRC (set);
12695 if (GET_CODE (src) == ZERO_EXTEND)
12696 src = XEXP (src, 0);
12697
12698 /* Catch the load-exclusive and load-acquire operations. */
12699 if (GET_CODE (src) == UNSPEC_VOLATILE
12700 && (XINT (src, 1) == VUNSPEC_LL
12701 || XINT (src, 1) == VUNSPEC_LAX))
12702 return true;
12703 }
12704 return false;
12705 }
12706
12707 enum rtx_code
12708 minmax_code (rtx x)
12709 {
12710 enum rtx_code code = GET_CODE (x);
12711
12712 switch (code)
12713 {
12714 case SMAX:
12715 return GE;
12716 case SMIN:
12717 return LE;
12718 case UMIN:
12719 return LEU;
12720 case UMAX:
12721 return GEU;
12722 default:
12723 gcc_unreachable ();
12724 }
12725 }
12726
12727 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12728
12729 bool
12730 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12731 int *mask, bool *signed_sat)
12732 {
12733 /* The high bound must be a power of two minus one. */
12734 int log = exact_log2 (INTVAL (hi_bound) + 1);
12735 if (log == -1)
12736 return false;
12737
12738 /* The low bound is either zero (for usat) or one less than the
12739 negation of the high bound (for ssat). */
12740 if (INTVAL (lo_bound) == 0)
12741 {
12742 if (mask)
12743 *mask = log;
12744 if (signed_sat)
12745 *signed_sat = false;
12746
12747 return true;
12748 }
12749
12750 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12751 {
12752 if (mask)
12753 *mask = log + 1;
12754 if (signed_sat)
12755 *signed_sat = true;
12756
12757 return true;
12758 }
12759
12760 return false;
12761 }
12762
12763 /* Return 1 if memory locations are adjacent. */
12764 int
12765 adjacent_mem_locations (rtx a, rtx b)
12766 {
12767 /* We don't guarantee to preserve the order of these memory refs. */
12768 if (volatile_refs_p (a) || volatile_refs_p (b))
12769 return 0;
12770
12771 if ((REG_P (XEXP (a, 0))
12772 || (GET_CODE (XEXP (a, 0)) == PLUS
12773 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12774 && (REG_P (XEXP (b, 0))
12775 || (GET_CODE (XEXP (b, 0)) == PLUS
12776 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12777 {
12778 HOST_WIDE_INT val0 = 0, val1 = 0;
12779 rtx reg0, reg1;
12780 int val_diff;
12781
12782 if (GET_CODE (XEXP (a, 0)) == PLUS)
12783 {
12784 reg0 = XEXP (XEXP (a, 0), 0);
12785 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12786 }
12787 else
12788 reg0 = XEXP (a, 0);
12789
12790 if (GET_CODE (XEXP (b, 0)) == PLUS)
12791 {
12792 reg1 = XEXP (XEXP (b, 0), 0);
12793 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12794 }
12795 else
12796 reg1 = XEXP (b, 0);
12797
12798 /* Don't accept any offset that will require multiple
12799 instructions to handle, since this would cause the
12800 arith_adjacentmem pattern to output an overlong sequence. */
12801 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12802 return 0;
12803
12804 /* Don't allow an eliminable register: register elimination can make
12805 the offset too large. */
12806 if (arm_eliminable_register (reg0))
12807 return 0;
12808
12809 val_diff = val1 - val0;
12810
12811 if (arm_ld_sched)
12812 {
12813 /* If the target has load delay slots, then there's no benefit
12814 to using an ldm instruction unless the offset is zero and
12815 we are optimizing for size. */
12816 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12817 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12818 && (val_diff == 4 || val_diff == -4));
12819 }
12820
12821 return ((REGNO (reg0) == REGNO (reg1))
12822 && (val_diff == 4 || val_diff == -4));
12823 }
12824
12825 return 0;
12826 }
12827
12828 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12829 for load operations, false for store operations. CONSECUTIVE is true
12830 if the register numbers in the operation must be consecutive in the register
12831 bank. RETURN_PC is true if value is to be loaded in PC.
12832 The pattern we are trying to match for load is:
12833 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12834 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12835 :
12836 :
12837 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12838 ]
12839 where
12840 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12841 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12842 3. If consecutive is TRUE, then for kth register being loaded,
12843 REGNO (R_dk) = REGNO (R_d0) + k.
12844 The pattern for store is similar. */
12845 bool
12846 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12847 bool consecutive, bool return_pc)
12848 {
12849 HOST_WIDE_INT count = XVECLEN (op, 0);
12850 rtx reg, mem, addr;
12851 unsigned regno;
12852 unsigned first_regno;
12853 HOST_WIDE_INT i = 1, base = 0, offset = 0;
12854 rtx elt;
12855 bool addr_reg_in_reglist = false;
12856 bool update = false;
12857 int reg_increment;
12858 int offset_adj;
12859 int regs_per_val;
12860
12861 /* If not in SImode, then registers must be consecutive
12862 (e.g., VLDM instructions for DFmode). */
12863 gcc_assert ((mode == SImode) || consecutive);
12864 /* Setting return_pc for stores is illegal. */
12865 gcc_assert (!return_pc || load);
12866
12867 /* Set up the increments and the regs per val based on the mode. */
12868 reg_increment = GET_MODE_SIZE (mode);
12869 regs_per_val = reg_increment / 4;
12870 offset_adj = return_pc ? 1 : 0;
12871
12872 if (count <= 1
12873 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
12874 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
12875 return false;
12876
12877 /* Check if this is a write-back. */
12878 elt = XVECEXP (op, 0, offset_adj);
12879 if (GET_CODE (SET_SRC (elt)) == PLUS)
12880 {
12881 i++;
12882 base = 1;
12883 update = true;
12884
12885 /* The offset adjustment must be the number of registers being
12886 popped times the size of a single register. */
12887 if (!REG_P (SET_DEST (elt))
12888 || !REG_P (XEXP (SET_SRC (elt), 0))
12889 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
12890 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
12891 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
12892 ((count - 1 - offset_adj) * reg_increment))
12893 return false;
12894 }
12895
12896 i = i + offset_adj;
12897 base = base + offset_adj;
12898 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
12899 success depends on the type: VLDM can do just one reg,
12900 LDM must do at least two. */
12901 if ((count <= i) && (mode == SImode))
12902 return false;
12903
12904 elt = XVECEXP (op, 0, i - 1);
12905 if (GET_CODE (elt) != SET)
12906 return false;
12907
12908 if (load)
12909 {
12910 reg = SET_DEST (elt);
12911 mem = SET_SRC (elt);
12912 }
12913 else
12914 {
12915 reg = SET_SRC (elt);
12916 mem = SET_DEST (elt);
12917 }
12918
12919 if (!REG_P (reg) || !MEM_P (mem))
12920 return false;
12921
12922 regno = REGNO (reg);
12923 first_regno = regno;
12924 addr = XEXP (mem, 0);
12925 if (GET_CODE (addr) == PLUS)
12926 {
12927 if (!CONST_INT_P (XEXP (addr, 1)))
12928 return false;
12929
12930 offset = INTVAL (XEXP (addr, 1));
12931 addr = XEXP (addr, 0);
12932 }
12933
12934 if (!REG_P (addr))
12935 return false;
12936
12937 /* Don't allow SP to be loaded unless it is also the base register. It
12938 guarantees that SP is reset correctly when an LDM instruction
12939 is interrupted. Otherwise, we might end up with a corrupt stack. */
12940 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12941 return false;
12942
12943 for (; i < count; i++)
12944 {
12945 elt = XVECEXP (op, 0, i);
12946 if (GET_CODE (elt) != SET)
12947 return false;
12948
12949 if (load)
12950 {
12951 reg = SET_DEST (elt);
12952 mem = SET_SRC (elt);
12953 }
12954 else
12955 {
12956 reg = SET_SRC (elt);
12957 mem = SET_DEST (elt);
12958 }
12959
12960 if (!REG_P (reg)
12961 || GET_MODE (reg) != mode
12962 || REGNO (reg) <= regno
12963 || (consecutive
12964 && (REGNO (reg) !=
12965 (unsigned int) (first_regno + regs_per_val * (i - base))))
12966 /* Don't allow SP to be loaded unless it is also the base register. It
12967 guarantees that SP is reset correctly when an LDM instruction
12968 is interrupted. Otherwise, we might end up with a corrupt stack. */
12969 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
12970 || !MEM_P (mem)
12971 || GET_MODE (mem) != mode
12972 || ((GET_CODE (XEXP (mem, 0)) != PLUS
12973 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
12974 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
12975 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
12976 offset + (i - base) * reg_increment))
12977 && (!REG_P (XEXP (mem, 0))
12978 || offset + (i - base) * reg_increment != 0)))
12979 return false;
12980
12981 regno = REGNO (reg);
12982 if (regno == REGNO (addr))
12983 addr_reg_in_reglist = true;
12984 }
12985
12986 if (load)
12987 {
12988 if (update && addr_reg_in_reglist)
12989 return false;
12990
12991 /* For Thumb-1, address register is always modified - either by write-back
12992 or by explicit load. If the pattern does not describe an update,
12993 then the address register must be in the list of loaded registers. */
12994 if (TARGET_THUMB1)
12995 return update || addr_reg_in_reglist;
12996 }
12997
12998 return true;
12999 }
13000
13001 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13002 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13003 instruction. ADD_OFFSET is nonzero if the base address register needs
13004 to be modified with an add instruction before we can use it. */
13005
13006 static bool
13007 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13008 int nops, HOST_WIDE_INT add_offset)
13009 {
13010 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13011 if the offset isn't small enough. The reason 2 ldrs are faster
13012 is because these ARMs are able to do more than one cache access
13013 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13014 whilst the ARM8 has a double bandwidth cache. This means that
13015 these cores can do both an instruction fetch and a data fetch in
13016 a single cycle, so the trick of calculating the address into a
13017 scratch register (one of the result regs) and then doing a load
13018 multiple actually becomes slower (and no smaller in code size).
13019 That is the transformation
13020
13021 ldr rd1, [rbase + offset]
13022 ldr rd2, [rbase + offset + 4]
13023
13024 to
13025
13026 add rd1, rbase, offset
13027 ldmia rd1, {rd1, rd2}
13028
13029 produces worse code -- '3 cycles + any stalls on rd2' instead of
13030 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13031 access per cycle, the first sequence could never complete in less
13032 than 6 cycles, whereas the ldm sequence would only take 5 and
13033 would make better use of sequential accesses if not hitting the
13034 cache.
13035
13036 We cheat here and test 'arm_ld_sched' which we currently know to
13037 only be true for the ARM8, ARM9 and StrongARM. If this ever
13038 changes, then the test below needs to be reworked. */
13039 if (nops == 2 && arm_ld_sched && add_offset != 0)
13040 return false;
13041
13042 /* XScale has load-store double instructions, but they have stricter
13043 alignment requirements than load-store multiple, so we cannot
13044 use them.
13045
13046 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13047 the pipeline until completion.
13048
13049 NREGS CYCLES
13050 1 3
13051 2 4
13052 3 5
13053 4 6
13054
13055 An ldr instruction takes 1-3 cycles, but does not block the
13056 pipeline.
13057
13058 NREGS CYCLES
13059 1 1-3
13060 2 2-6
13061 3 3-9
13062 4 4-12
13063
13064 Best case ldr will always win. However, the more ldr instructions
13065 we issue, the less likely we are to be able to schedule them well.
13066 Using ldr instructions also increases code size.
13067
13068 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13069 for counts of 3 or 4 regs. */
13070 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13071 return false;
13072 return true;
13073 }
13074
13075 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13076 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13077 an array ORDER which describes the sequence to use when accessing the
13078 offsets that produces an ascending order. In this sequence, each
13079 offset must be larger by exactly 4 than the previous one. ORDER[0]
13080 must have been filled in with the lowest offset by the caller.
13081 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13082 we use to verify that ORDER produces an ascending order of registers.
13083 Return true if it was possible to construct such an order, false if
13084 not. */
13085
13086 static bool
13087 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13088 int *unsorted_regs)
13089 {
13090 int i;
13091 for (i = 1; i < nops; i++)
13092 {
13093 int j;
13094
13095 order[i] = order[i - 1];
13096 for (j = 0; j < nops; j++)
13097 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13098 {
13099 /* We must find exactly one offset that is higher than the
13100 previous one by 4. */
13101 if (order[i] != order[i - 1])
13102 return false;
13103 order[i] = j;
13104 }
13105 if (order[i] == order[i - 1])
13106 return false;
13107 /* The register numbers must be ascending. */
13108 if (unsorted_regs != NULL
13109 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13110 return false;
13111 }
13112 return true;
13113 }
13114
13115 /* Used to determine in a peephole whether a sequence of load
13116 instructions can be changed into a load-multiple instruction.
13117 NOPS is the number of separate load instructions we are examining. The
13118 first NOPS entries in OPERANDS are the destination registers, the
13119 next NOPS entries are memory operands. If this function is
13120 successful, *BASE is set to the common base register of the memory
13121 accesses; *LOAD_OFFSET is set to the first memory location's offset
13122 from that base register.
13123 REGS is an array filled in with the destination register numbers.
13124 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13125 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13126 the sequence of registers in REGS matches the loads from ascending memory
13127 locations, and the function verifies that the register numbers are
13128 themselves ascending. If CHECK_REGS is false, the register numbers
13129 are stored in the order they are found in the operands. */
13130 static int
13131 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13132 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13133 {
13134 int unsorted_regs[MAX_LDM_STM_OPS];
13135 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13136 int order[MAX_LDM_STM_OPS];
13137 rtx base_reg_rtx = NULL;
13138 int base_reg = -1;
13139 int i, ldm_case;
13140
13141 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13142 easily extended if required. */
13143 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13144
13145 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13146
13147 /* Loop over the operands and check that the memory references are
13148 suitable (i.e. immediate offsets from the same base register). At
13149 the same time, extract the target register, and the memory
13150 offsets. */
13151 for (i = 0; i < nops; i++)
13152 {
13153 rtx reg;
13154 rtx offset;
13155
13156 /* Convert a subreg of a mem into the mem itself. */
13157 if (GET_CODE (operands[nops + i]) == SUBREG)
13158 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13159
13160 gcc_assert (MEM_P (operands[nops + i]));
13161
13162 /* Don't reorder volatile memory references; it doesn't seem worth
13163 looking for the case where the order is ok anyway. */
13164 if (MEM_VOLATILE_P (operands[nops + i]))
13165 return 0;
13166
13167 offset = const0_rtx;
13168
13169 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13170 || (GET_CODE (reg) == SUBREG
13171 && REG_P (reg = SUBREG_REG (reg))))
13172 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13173 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13174 || (GET_CODE (reg) == SUBREG
13175 && REG_P (reg = SUBREG_REG (reg))))
13176 && (CONST_INT_P (offset
13177 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13178 {
13179 if (i == 0)
13180 {
13181 base_reg = REGNO (reg);
13182 base_reg_rtx = reg;
13183 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13184 return 0;
13185 }
13186 else if (base_reg != (int) REGNO (reg))
13187 /* Not addressed from the same base register. */
13188 return 0;
13189
13190 unsorted_regs[i] = (REG_P (operands[i])
13191 ? REGNO (operands[i])
13192 : REGNO (SUBREG_REG (operands[i])));
13193
13194 /* If it isn't an integer register, or if it overwrites the
13195 base register but isn't the last insn in the list, then
13196 we can't do this. */
13197 if (unsorted_regs[i] < 0
13198 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13199 || unsorted_regs[i] > 14
13200 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13201 return 0;
13202
13203 /* Don't allow SP to be loaded unless it is also the base
13204 register. It guarantees that SP is reset correctly when
13205 an LDM instruction is interrupted. Otherwise, we might
13206 end up with a corrupt stack. */
13207 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13208 return 0;
13209
13210 unsorted_offsets[i] = INTVAL (offset);
13211 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13212 order[0] = i;
13213 }
13214 else
13215 /* Not a suitable memory address. */
13216 return 0;
13217 }
13218
13219 /* All the useful information has now been extracted from the
13220 operands into unsorted_regs and unsorted_offsets; additionally,
13221 order[0] has been set to the lowest offset in the list. Sort
13222 the offsets into order, verifying that they are adjacent, and
13223 check that the register numbers are ascending. */
13224 if (!compute_offset_order (nops, unsorted_offsets, order,
13225 check_regs ? unsorted_regs : NULL))
13226 return 0;
13227
13228 if (saved_order)
13229 memcpy (saved_order, order, sizeof order);
13230
13231 if (base)
13232 {
13233 *base = base_reg;
13234
13235 for (i = 0; i < nops; i++)
13236 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13237
13238 *load_offset = unsorted_offsets[order[0]];
13239 }
13240
13241 if (TARGET_THUMB1
13242 && !peep2_reg_dead_p (nops, base_reg_rtx))
13243 return 0;
13244
13245 if (unsorted_offsets[order[0]] == 0)
13246 ldm_case = 1; /* ldmia */
13247 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13248 ldm_case = 2; /* ldmib */
13249 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13250 ldm_case = 3; /* ldmda */
13251 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13252 ldm_case = 4; /* ldmdb */
13253 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13254 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13255 ldm_case = 5;
13256 else
13257 return 0;
13258
13259 if (!multiple_operation_profitable_p (false, nops,
13260 ldm_case == 5
13261 ? unsorted_offsets[order[0]] : 0))
13262 return 0;
13263
13264 return ldm_case;
13265 }
13266
13267 /* Used to determine in a peephole whether a sequence of store instructions can
13268 be changed into a store-multiple instruction.
13269 NOPS is the number of separate store instructions we are examining.
13270 NOPS_TOTAL is the total number of instructions recognized by the peephole
13271 pattern.
13272 The first NOPS entries in OPERANDS are the source registers, the next
13273 NOPS entries are memory operands. If this function is successful, *BASE is
13274 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13275 to the first memory location's offset from that base register. REGS is an
13276 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13277 likewise filled with the corresponding rtx's.
13278 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13279 numbers to an ascending order of stores.
13280 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13281 from ascending memory locations, and the function verifies that the register
13282 numbers are themselves ascending. If CHECK_REGS is false, the register
13283 numbers are stored in the order they are found in the operands. */
13284 static int
13285 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13286 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13287 HOST_WIDE_INT *load_offset, bool check_regs)
13288 {
13289 int unsorted_regs[MAX_LDM_STM_OPS];
13290 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13291 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13292 int order[MAX_LDM_STM_OPS];
13293 int base_reg = -1;
13294 rtx base_reg_rtx = NULL;
13295 int i, stm_case;
13296
13297 /* Write back of base register is currently only supported for Thumb 1. */
13298 int base_writeback = TARGET_THUMB1;
13299
13300 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13301 easily extended if required. */
13302 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13303
13304 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13305
13306 /* Loop over the operands and check that the memory references are
13307 suitable (i.e. immediate offsets from the same base register). At
13308 the same time, extract the target register, and the memory
13309 offsets. */
13310 for (i = 0; i < nops; i++)
13311 {
13312 rtx reg;
13313 rtx offset;
13314
13315 /* Convert a subreg of a mem into the mem itself. */
13316 if (GET_CODE (operands[nops + i]) == SUBREG)
13317 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13318
13319 gcc_assert (MEM_P (operands[nops + i]));
13320
13321 /* Don't reorder volatile memory references; it doesn't seem worth
13322 looking for the case where the order is ok anyway. */
13323 if (MEM_VOLATILE_P (operands[nops + i]))
13324 return 0;
13325
13326 offset = const0_rtx;
13327
13328 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13329 || (GET_CODE (reg) == SUBREG
13330 && REG_P (reg = SUBREG_REG (reg))))
13331 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13332 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13333 || (GET_CODE (reg) == SUBREG
13334 && REG_P (reg = SUBREG_REG (reg))))
13335 && (CONST_INT_P (offset
13336 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13337 {
13338 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13339 ? operands[i] : SUBREG_REG (operands[i]));
13340 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13341
13342 if (i == 0)
13343 {
13344 base_reg = REGNO (reg);
13345 base_reg_rtx = reg;
13346 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13347 return 0;
13348 }
13349 else if (base_reg != (int) REGNO (reg))
13350 /* Not addressed from the same base register. */
13351 return 0;
13352
13353 /* If it isn't an integer register, then we can't do this. */
13354 if (unsorted_regs[i] < 0
13355 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13356 /* The effects are unpredictable if the base register is
13357 both updated and stored. */
13358 || (base_writeback && unsorted_regs[i] == base_reg)
13359 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13360 || unsorted_regs[i] > 14)
13361 return 0;
13362
13363 unsorted_offsets[i] = INTVAL (offset);
13364 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13365 order[0] = i;
13366 }
13367 else
13368 /* Not a suitable memory address. */
13369 return 0;
13370 }
13371
13372 /* All the useful information has now been extracted from the
13373 operands into unsorted_regs and unsorted_offsets; additionally,
13374 order[0] has been set to the lowest offset in the list. Sort
13375 the offsets into order, verifying that they are adjacent, and
13376 check that the register numbers are ascending. */
13377 if (!compute_offset_order (nops, unsorted_offsets, order,
13378 check_regs ? unsorted_regs : NULL))
13379 return 0;
13380
13381 if (saved_order)
13382 memcpy (saved_order, order, sizeof order);
13383
13384 if (base)
13385 {
13386 *base = base_reg;
13387
13388 for (i = 0; i < nops; i++)
13389 {
13390 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13391 if (reg_rtxs)
13392 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13393 }
13394
13395 *load_offset = unsorted_offsets[order[0]];
13396 }
13397
13398 if (TARGET_THUMB1
13399 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13400 return 0;
13401
13402 if (unsorted_offsets[order[0]] == 0)
13403 stm_case = 1; /* stmia */
13404 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13405 stm_case = 2; /* stmib */
13406 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13407 stm_case = 3; /* stmda */
13408 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13409 stm_case = 4; /* stmdb */
13410 else
13411 return 0;
13412
13413 if (!multiple_operation_profitable_p (false, nops, 0))
13414 return 0;
13415
13416 return stm_case;
13417 }
13418 \f
13419 /* Routines for use in generating RTL. */
13420
13421 /* Generate a load-multiple instruction. COUNT is the number of loads in
13422 the instruction; REGS and MEMS are arrays containing the operands.
13423 BASEREG is the base register to be used in addressing the memory operands.
13424 WBACK_OFFSET is nonzero if the instruction should update the base
13425 register. */
13426
13427 static rtx
13428 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13429 HOST_WIDE_INT wback_offset)
13430 {
13431 int i = 0, j;
13432 rtx result;
13433
13434 if (!multiple_operation_profitable_p (false, count, 0))
13435 {
13436 rtx seq;
13437
13438 start_sequence ();
13439
13440 for (i = 0; i < count; i++)
13441 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13442
13443 if (wback_offset != 0)
13444 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13445
13446 seq = get_insns ();
13447 end_sequence ();
13448
13449 return seq;
13450 }
13451
13452 result = gen_rtx_PARALLEL (VOIDmode,
13453 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13454 if (wback_offset != 0)
13455 {
13456 XVECEXP (result, 0, 0)
13457 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13458 i = 1;
13459 count++;
13460 }
13461
13462 for (j = 0; i < count; i++, j++)
13463 XVECEXP (result, 0, i)
13464 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13465
13466 return result;
13467 }
13468
13469 /* Generate a store-multiple instruction. COUNT is the number of stores in
13470 the instruction; REGS and MEMS are arrays containing the operands.
13471 BASEREG is the base register to be used in addressing the memory operands.
13472 WBACK_OFFSET is nonzero if the instruction should update the base
13473 register. */
13474
13475 static rtx
13476 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13477 HOST_WIDE_INT wback_offset)
13478 {
13479 int i = 0, j;
13480 rtx result;
13481
13482 if (GET_CODE (basereg) == PLUS)
13483 basereg = XEXP (basereg, 0);
13484
13485 if (!multiple_operation_profitable_p (false, count, 0))
13486 {
13487 rtx seq;
13488
13489 start_sequence ();
13490
13491 for (i = 0; i < count; i++)
13492 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13493
13494 if (wback_offset != 0)
13495 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13496
13497 seq = get_insns ();
13498 end_sequence ();
13499
13500 return seq;
13501 }
13502
13503 result = gen_rtx_PARALLEL (VOIDmode,
13504 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13505 if (wback_offset != 0)
13506 {
13507 XVECEXP (result, 0, 0)
13508 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13509 i = 1;
13510 count++;
13511 }
13512
13513 for (j = 0; i < count; i++, j++)
13514 XVECEXP (result, 0, i)
13515 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13516
13517 return result;
13518 }
13519
13520 /* Generate either a load-multiple or a store-multiple instruction. This
13521 function can be used in situations where we can start with a single MEM
13522 rtx and adjust its address upwards.
13523 COUNT is the number of operations in the instruction, not counting a
13524 possible update of the base register. REGS is an array containing the
13525 register operands.
13526 BASEREG is the base register to be used in addressing the memory operands,
13527 which are constructed from BASEMEM.
13528 WRITE_BACK specifies whether the generated instruction should include an
13529 update of the base register.
13530 OFFSETP is used to pass an offset to and from this function; this offset
13531 is not used when constructing the address (instead BASEMEM should have an
13532 appropriate offset in its address), it is used only for setting
13533 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13534
13535 static rtx
13536 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13537 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13538 {
13539 rtx mems[MAX_LDM_STM_OPS];
13540 HOST_WIDE_INT offset = *offsetp;
13541 int i;
13542
13543 gcc_assert (count <= MAX_LDM_STM_OPS);
13544
13545 if (GET_CODE (basereg) == PLUS)
13546 basereg = XEXP (basereg, 0);
13547
13548 for (i = 0; i < count; i++)
13549 {
13550 rtx addr = plus_constant (Pmode, basereg, i * 4);
13551 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13552 offset += 4;
13553 }
13554
13555 if (write_back)
13556 *offsetp = offset;
13557
13558 if (is_load)
13559 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13560 write_back ? 4 * count : 0);
13561 else
13562 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13563 write_back ? 4 * count : 0);
13564 }
13565
13566 rtx
13567 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13568 rtx basemem, HOST_WIDE_INT *offsetp)
13569 {
13570 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13571 offsetp);
13572 }
13573
13574 rtx
13575 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13576 rtx basemem, HOST_WIDE_INT *offsetp)
13577 {
13578 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13579 offsetp);
13580 }
13581
13582 /* Called from a peephole2 expander to turn a sequence of loads into an
13583 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13584 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13585 is true if we can reorder the registers because they are used commutatively
13586 subsequently.
13587 Returns true iff we could generate a new instruction. */
13588
13589 bool
13590 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13591 {
13592 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13593 rtx mems[MAX_LDM_STM_OPS];
13594 int i, j, base_reg;
13595 rtx base_reg_rtx;
13596 HOST_WIDE_INT offset;
13597 int write_back = FALSE;
13598 int ldm_case;
13599 rtx addr;
13600
13601 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13602 &base_reg, &offset, !sort_regs);
13603
13604 if (ldm_case == 0)
13605 return false;
13606
13607 if (sort_regs)
13608 for (i = 0; i < nops - 1; i++)
13609 for (j = i + 1; j < nops; j++)
13610 if (regs[i] > regs[j])
13611 {
13612 int t = regs[i];
13613 regs[i] = regs[j];
13614 regs[j] = t;
13615 }
13616 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13617
13618 if (TARGET_THUMB1)
13619 {
13620 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13621 gcc_assert (ldm_case == 1 || ldm_case == 5);
13622 write_back = TRUE;
13623 }
13624
13625 if (ldm_case == 5)
13626 {
13627 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13628 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13629 offset = 0;
13630 if (!TARGET_THUMB1)
13631 base_reg_rtx = newbase;
13632 }
13633
13634 for (i = 0; i < nops; i++)
13635 {
13636 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13637 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13638 SImode, addr, 0);
13639 }
13640 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13641 write_back ? offset + i * 4 : 0));
13642 return true;
13643 }
13644
13645 /* Called from a peephole2 expander to turn a sequence of stores into an
13646 STM instruction. OPERANDS are the operands found by the peephole matcher;
13647 NOPS indicates how many separate stores we are trying to combine.
13648 Returns true iff we could generate a new instruction. */
13649
13650 bool
13651 gen_stm_seq (rtx *operands, int nops)
13652 {
13653 int i;
13654 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13655 rtx mems[MAX_LDM_STM_OPS];
13656 int base_reg;
13657 rtx base_reg_rtx;
13658 HOST_WIDE_INT offset;
13659 int write_back = FALSE;
13660 int stm_case;
13661 rtx addr;
13662 bool base_reg_dies;
13663
13664 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13665 mem_order, &base_reg, &offset, true);
13666
13667 if (stm_case == 0)
13668 return false;
13669
13670 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13671
13672 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13673 if (TARGET_THUMB1)
13674 {
13675 gcc_assert (base_reg_dies);
13676 write_back = TRUE;
13677 }
13678
13679 if (stm_case == 5)
13680 {
13681 gcc_assert (base_reg_dies);
13682 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13683 offset = 0;
13684 }
13685
13686 addr = plus_constant (Pmode, base_reg_rtx, offset);
13687
13688 for (i = 0; i < nops; i++)
13689 {
13690 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13691 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13692 SImode, addr, 0);
13693 }
13694 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13695 write_back ? offset + i * 4 : 0));
13696 return true;
13697 }
13698
13699 /* Called from a peephole2 expander to turn a sequence of stores that are
13700 preceded by constant loads into an STM instruction. OPERANDS are the
13701 operands found by the peephole matcher; NOPS indicates how many
13702 separate stores we are trying to combine; there are 2 * NOPS
13703 instructions in the peephole.
13704 Returns true iff we could generate a new instruction. */
13705
13706 bool
13707 gen_const_stm_seq (rtx *operands, int nops)
13708 {
13709 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13710 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13711 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13712 rtx mems[MAX_LDM_STM_OPS];
13713 int base_reg;
13714 rtx base_reg_rtx;
13715 HOST_WIDE_INT offset;
13716 int write_back = FALSE;
13717 int stm_case;
13718 rtx addr;
13719 bool base_reg_dies;
13720 int i, j;
13721 HARD_REG_SET allocated;
13722
13723 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13724 mem_order, &base_reg, &offset, false);
13725
13726 if (stm_case == 0)
13727 return false;
13728
13729 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13730
13731 /* If the same register is used more than once, try to find a free
13732 register. */
13733 CLEAR_HARD_REG_SET (allocated);
13734 for (i = 0; i < nops; i++)
13735 {
13736 for (j = i + 1; j < nops; j++)
13737 if (regs[i] == regs[j])
13738 {
13739 rtx t = peep2_find_free_register (0, nops * 2,
13740 TARGET_THUMB1 ? "l" : "r",
13741 SImode, &allocated);
13742 if (t == NULL_RTX)
13743 return false;
13744 reg_rtxs[i] = t;
13745 regs[i] = REGNO (t);
13746 }
13747 }
13748
13749 /* Compute an ordering that maps the register numbers to an ascending
13750 sequence. */
13751 reg_order[0] = 0;
13752 for (i = 0; i < nops; i++)
13753 if (regs[i] < regs[reg_order[0]])
13754 reg_order[0] = i;
13755
13756 for (i = 1; i < nops; i++)
13757 {
13758 int this_order = reg_order[i - 1];
13759 for (j = 0; j < nops; j++)
13760 if (regs[j] > regs[reg_order[i - 1]]
13761 && (this_order == reg_order[i - 1]
13762 || regs[j] < regs[this_order]))
13763 this_order = j;
13764 reg_order[i] = this_order;
13765 }
13766
13767 /* Ensure that registers that must be live after the instruction end
13768 up with the correct value. */
13769 for (i = 0; i < nops; i++)
13770 {
13771 int this_order = reg_order[i];
13772 if ((this_order != mem_order[i]
13773 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13774 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13775 return false;
13776 }
13777
13778 /* Load the constants. */
13779 for (i = 0; i < nops; i++)
13780 {
13781 rtx op = operands[2 * nops + mem_order[i]];
13782 sorted_regs[i] = regs[reg_order[i]];
13783 emit_move_insn (reg_rtxs[reg_order[i]], op);
13784 }
13785
13786 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13787
13788 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13789 if (TARGET_THUMB1)
13790 {
13791 gcc_assert (base_reg_dies);
13792 write_back = TRUE;
13793 }
13794
13795 if (stm_case == 5)
13796 {
13797 gcc_assert (base_reg_dies);
13798 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13799 offset = 0;
13800 }
13801
13802 addr = plus_constant (Pmode, base_reg_rtx, offset);
13803
13804 for (i = 0; i < nops; i++)
13805 {
13806 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13807 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13808 SImode, addr, 0);
13809 }
13810 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13811 write_back ? offset + i * 4 : 0));
13812 return true;
13813 }
13814
13815 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13816 unaligned copies on processors which support unaligned semantics for those
13817 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13818 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13819 An interleave factor of 1 (the minimum) will perform no interleaving.
13820 Load/store multiple are used for aligned addresses where possible. */
13821
13822 static void
13823 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13824 HOST_WIDE_INT length,
13825 unsigned int interleave_factor)
13826 {
13827 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13828 int *regnos = XALLOCAVEC (int, interleave_factor);
13829 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13830 HOST_WIDE_INT i, j;
13831 HOST_WIDE_INT remaining = length, words;
13832 rtx halfword_tmp = NULL, byte_tmp = NULL;
13833 rtx dst, src;
13834 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13835 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13836 HOST_WIDE_INT srcoffset, dstoffset;
13837 HOST_WIDE_INT src_autoinc, dst_autoinc;
13838 rtx mem, addr;
13839
13840 gcc_assert (1 <= interleave_factor && interleave_factor <= 4);
13841
13842 /* Use hard registers if we have aligned source or destination so we can use
13843 load/store multiple with contiguous registers. */
13844 if (dst_aligned || src_aligned)
13845 for (i = 0; i < interleave_factor; i++)
13846 regs[i] = gen_rtx_REG (SImode, i);
13847 else
13848 for (i = 0; i < interleave_factor; i++)
13849 regs[i] = gen_reg_rtx (SImode);
13850
13851 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13852 src = copy_addr_to_reg (XEXP (srcbase, 0));
13853
13854 srcoffset = dstoffset = 0;
13855
13856 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
13857 For copying the last bytes we want to subtract this offset again. */
13858 src_autoinc = dst_autoinc = 0;
13859
13860 for (i = 0; i < interleave_factor; i++)
13861 regnos[i] = i;
13862
13863 /* Copy BLOCK_SIZE_BYTES chunks. */
13864
13865 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
13866 {
13867 /* Load words. */
13868 if (src_aligned && interleave_factor > 1)
13869 {
13870 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
13871 TRUE, srcbase, &srcoffset));
13872 src_autoinc += UNITS_PER_WORD * interleave_factor;
13873 }
13874 else
13875 {
13876 for (j = 0; j < interleave_factor; j++)
13877 {
13878 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
13879 - src_autoinc));
13880 mem = adjust_automodify_address (srcbase, SImode, addr,
13881 srcoffset + j * UNITS_PER_WORD);
13882 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13883 }
13884 srcoffset += block_size_bytes;
13885 }
13886
13887 /* Store words. */
13888 if (dst_aligned && interleave_factor > 1)
13889 {
13890 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
13891 TRUE, dstbase, &dstoffset));
13892 dst_autoinc += UNITS_PER_WORD * interleave_factor;
13893 }
13894 else
13895 {
13896 for (j = 0; j < interleave_factor; j++)
13897 {
13898 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
13899 - dst_autoinc));
13900 mem = adjust_automodify_address (dstbase, SImode, addr,
13901 dstoffset + j * UNITS_PER_WORD);
13902 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13903 }
13904 dstoffset += block_size_bytes;
13905 }
13906
13907 remaining -= block_size_bytes;
13908 }
13909
13910 /* Copy any whole words left (note these aren't interleaved with any
13911 subsequent halfword/byte load/stores in the interests of simplicity). */
13912
13913 words = remaining / UNITS_PER_WORD;
13914
13915 gcc_assert (words < interleave_factor);
13916
13917 if (src_aligned && words > 1)
13918 {
13919 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
13920 &srcoffset));
13921 src_autoinc += UNITS_PER_WORD * words;
13922 }
13923 else
13924 {
13925 for (j = 0; j < words; j++)
13926 {
13927 addr = plus_constant (Pmode, src,
13928 srcoffset + j * UNITS_PER_WORD - src_autoinc);
13929 mem = adjust_automodify_address (srcbase, SImode, addr,
13930 srcoffset + j * UNITS_PER_WORD);
13931 if (src_aligned)
13932 emit_move_insn (regs[j], mem);
13933 else
13934 emit_insn (gen_unaligned_loadsi (regs[j], mem));
13935 }
13936 srcoffset += words * UNITS_PER_WORD;
13937 }
13938
13939 if (dst_aligned && words > 1)
13940 {
13941 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
13942 &dstoffset));
13943 dst_autoinc += words * UNITS_PER_WORD;
13944 }
13945 else
13946 {
13947 for (j = 0; j < words; j++)
13948 {
13949 addr = plus_constant (Pmode, dst,
13950 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
13951 mem = adjust_automodify_address (dstbase, SImode, addr,
13952 dstoffset + j * UNITS_PER_WORD);
13953 if (dst_aligned)
13954 emit_move_insn (mem, regs[j]);
13955 else
13956 emit_insn (gen_unaligned_storesi (mem, regs[j]));
13957 }
13958 dstoffset += words * UNITS_PER_WORD;
13959 }
13960
13961 remaining -= words * UNITS_PER_WORD;
13962
13963 gcc_assert (remaining < 4);
13964
13965 /* Copy a halfword if necessary. */
13966
13967 if (remaining >= 2)
13968 {
13969 halfword_tmp = gen_reg_rtx (SImode);
13970
13971 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
13972 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
13973 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
13974
13975 /* Either write out immediately, or delay until we've loaded the last
13976 byte, depending on interleave factor. */
13977 if (interleave_factor == 1)
13978 {
13979 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
13980 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
13981 emit_insn (gen_unaligned_storehi (mem,
13982 gen_lowpart (HImode, halfword_tmp)));
13983 halfword_tmp = NULL;
13984 dstoffset += 2;
13985 }
13986
13987 remaining -= 2;
13988 srcoffset += 2;
13989 }
13990
13991 gcc_assert (remaining < 2);
13992
13993 /* Copy last byte. */
13994
13995 if ((remaining & 1) != 0)
13996 {
13997 byte_tmp = gen_reg_rtx (SImode);
13998
13999 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14000 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14001 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14002
14003 if (interleave_factor == 1)
14004 {
14005 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14006 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14007 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14008 byte_tmp = NULL;
14009 dstoffset++;
14010 }
14011
14012 remaining--;
14013 srcoffset++;
14014 }
14015
14016 /* Store last halfword if we haven't done so already. */
14017
14018 if (halfword_tmp)
14019 {
14020 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14021 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14022 emit_insn (gen_unaligned_storehi (mem,
14023 gen_lowpart (HImode, halfword_tmp)));
14024 dstoffset += 2;
14025 }
14026
14027 /* Likewise for last byte. */
14028
14029 if (byte_tmp)
14030 {
14031 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14032 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14033 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14034 dstoffset++;
14035 }
14036
14037 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14038 }
14039
14040 /* From mips_adjust_block_mem:
14041
14042 Helper function for doing a loop-based block operation on memory
14043 reference MEM. Each iteration of the loop will operate on LENGTH
14044 bytes of MEM.
14045
14046 Create a new base register for use within the loop and point it to
14047 the start of MEM. Create a new memory reference that uses this
14048 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14049
14050 static void
14051 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14052 rtx *loop_mem)
14053 {
14054 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14055
14056 /* Although the new mem does not refer to a known location,
14057 it does keep up to LENGTH bytes of alignment. */
14058 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14059 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14060 }
14061
14062 /* From mips_block_move_loop:
14063
14064 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14065 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14066 the memory regions do not overlap. */
14067
14068 static void
14069 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14070 unsigned int interleave_factor,
14071 HOST_WIDE_INT bytes_per_iter)
14072 {
14073 rtx src_reg, dest_reg, final_src, test;
14074 HOST_WIDE_INT leftover;
14075
14076 leftover = length % bytes_per_iter;
14077 length -= leftover;
14078
14079 /* Create registers and memory references for use within the loop. */
14080 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14081 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14082
14083 /* Calculate the value that SRC_REG should have after the last iteration of
14084 the loop. */
14085 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14086 0, 0, OPTAB_WIDEN);
14087
14088 /* Emit the start of the loop. */
14089 rtx_code_label *label = gen_label_rtx ();
14090 emit_label (label);
14091
14092 /* Emit the loop body. */
14093 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14094 interleave_factor);
14095
14096 /* Move on to the next block. */
14097 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14098 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14099
14100 /* Emit the loop condition. */
14101 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14102 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14103
14104 /* Mop up any left-over bytes. */
14105 if (leftover)
14106 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14107 }
14108
14109 /* Emit a block move when either the source or destination is unaligned (not
14110 aligned to a four-byte boundary). This may need further tuning depending on
14111 core type, optimize_size setting, etc. */
14112
14113 static int
14114 arm_movmemqi_unaligned (rtx *operands)
14115 {
14116 HOST_WIDE_INT length = INTVAL (operands[2]);
14117
14118 if (optimize_size)
14119 {
14120 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14121 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14122 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14123 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14124 or dst_aligned though: allow more interleaving in those cases since the
14125 resulting code can be smaller. */
14126 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14127 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14128
14129 if (length > 12)
14130 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14131 interleave_factor, bytes_per_iter);
14132 else
14133 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14134 interleave_factor);
14135 }
14136 else
14137 {
14138 /* Note that the loop created by arm_block_move_unaligned_loop may be
14139 subject to loop unrolling, which makes tuning this condition a little
14140 redundant. */
14141 if (length > 32)
14142 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14143 else
14144 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14145 }
14146
14147 return 1;
14148 }
14149
14150 int
14151 arm_gen_movmemqi (rtx *operands)
14152 {
14153 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14154 HOST_WIDE_INT srcoffset, dstoffset;
14155 rtx src, dst, srcbase, dstbase;
14156 rtx part_bytes_reg = NULL;
14157 rtx mem;
14158
14159 if (!CONST_INT_P (operands[2])
14160 || !CONST_INT_P (operands[3])
14161 || INTVAL (operands[2]) > 64)
14162 return 0;
14163
14164 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14165 return arm_movmemqi_unaligned (operands);
14166
14167 if (INTVAL (operands[3]) & 3)
14168 return 0;
14169
14170 dstbase = operands[0];
14171 srcbase = operands[1];
14172
14173 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14174 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14175
14176 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14177 out_words_to_go = INTVAL (operands[2]) / 4;
14178 last_bytes = INTVAL (operands[2]) & 3;
14179 dstoffset = srcoffset = 0;
14180
14181 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14182 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14183
14184 while (in_words_to_go >= 2)
14185 {
14186 if (in_words_to_go > 4)
14187 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14188 TRUE, srcbase, &srcoffset));
14189 else
14190 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14191 src, FALSE, srcbase,
14192 &srcoffset));
14193
14194 if (out_words_to_go)
14195 {
14196 if (out_words_to_go > 4)
14197 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14198 TRUE, dstbase, &dstoffset));
14199 else if (out_words_to_go != 1)
14200 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14201 out_words_to_go, dst,
14202 (last_bytes == 0
14203 ? FALSE : TRUE),
14204 dstbase, &dstoffset));
14205 else
14206 {
14207 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14208 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14209 if (last_bytes != 0)
14210 {
14211 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14212 dstoffset += 4;
14213 }
14214 }
14215 }
14216
14217 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14218 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14219 }
14220
14221 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14222 if (out_words_to_go)
14223 {
14224 rtx sreg;
14225
14226 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14227 sreg = copy_to_reg (mem);
14228
14229 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14230 emit_move_insn (mem, sreg);
14231 in_words_to_go--;
14232
14233 gcc_assert (!in_words_to_go); /* Sanity check */
14234 }
14235
14236 if (in_words_to_go)
14237 {
14238 gcc_assert (in_words_to_go > 0);
14239
14240 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14241 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14242 }
14243
14244 gcc_assert (!last_bytes || part_bytes_reg);
14245
14246 if (BYTES_BIG_ENDIAN && last_bytes)
14247 {
14248 rtx tmp = gen_reg_rtx (SImode);
14249
14250 /* The bytes we want are in the top end of the word. */
14251 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14252 GEN_INT (8 * (4 - last_bytes))));
14253 part_bytes_reg = tmp;
14254
14255 while (last_bytes)
14256 {
14257 mem = adjust_automodify_address (dstbase, QImode,
14258 plus_constant (Pmode, dst,
14259 last_bytes - 1),
14260 dstoffset + last_bytes - 1);
14261 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14262
14263 if (--last_bytes)
14264 {
14265 tmp = gen_reg_rtx (SImode);
14266 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14267 part_bytes_reg = tmp;
14268 }
14269 }
14270
14271 }
14272 else
14273 {
14274 if (last_bytes > 1)
14275 {
14276 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14277 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14278 last_bytes -= 2;
14279 if (last_bytes)
14280 {
14281 rtx tmp = gen_reg_rtx (SImode);
14282 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14283 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14284 part_bytes_reg = tmp;
14285 dstoffset += 2;
14286 }
14287 }
14288
14289 if (last_bytes)
14290 {
14291 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14292 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14293 }
14294 }
14295
14296 return 1;
14297 }
14298
14299 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14300 by mode size. */
14301 inline static rtx
14302 next_consecutive_mem (rtx mem)
14303 {
14304 machine_mode mode = GET_MODE (mem);
14305 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14306 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14307
14308 return adjust_automodify_address (mem, mode, addr, offset);
14309 }
14310
14311 /* Copy using LDRD/STRD instructions whenever possible.
14312 Returns true upon success. */
14313 bool
14314 gen_movmem_ldrd_strd (rtx *operands)
14315 {
14316 unsigned HOST_WIDE_INT len;
14317 HOST_WIDE_INT align;
14318 rtx src, dst, base;
14319 rtx reg0;
14320 bool src_aligned, dst_aligned;
14321 bool src_volatile, dst_volatile;
14322
14323 gcc_assert (CONST_INT_P (operands[2]));
14324 gcc_assert (CONST_INT_P (operands[3]));
14325
14326 len = UINTVAL (operands[2]);
14327 if (len > 64)
14328 return false;
14329
14330 /* Maximum alignment we can assume for both src and dst buffers. */
14331 align = INTVAL (operands[3]);
14332
14333 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14334 return false;
14335
14336 /* Place src and dst addresses in registers
14337 and update the corresponding mem rtx. */
14338 dst = operands[0];
14339 dst_volatile = MEM_VOLATILE_P (dst);
14340 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14341 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14342 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14343
14344 src = operands[1];
14345 src_volatile = MEM_VOLATILE_P (src);
14346 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14347 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14348 src = adjust_automodify_address (src, VOIDmode, base, 0);
14349
14350 if (!unaligned_access && !(src_aligned && dst_aligned))
14351 return false;
14352
14353 if (src_volatile || dst_volatile)
14354 return false;
14355
14356 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14357 if (!(dst_aligned || src_aligned))
14358 return arm_gen_movmemqi (operands);
14359
14360 /* If the either src or dst is unaligned we'll be accessing it as pairs
14361 of unaligned SImode accesses. Otherwise we can generate DImode
14362 ldrd/strd instructions. */
14363 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14364 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14365
14366 while (len >= 8)
14367 {
14368 len -= 8;
14369 reg0 = gen_reg_rtx (DImode);
14370 rtx low_reg = NULL_RTX;
14371 rtx hi_reg = NULL_RTX;
14372
14373 if (!src_aligned || !dst_aligned)
14374 {
14375 low_reg = gen_lowpart (SImode, reg0);
14376 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14377 }
14378 if (src_aligned)
14379 emit_move_insn (reg0, src);
14380 else
14381 {
14382 emit_insn (gen_unaligned_loadsi (low_reg, src));
14383 src = next_consecutive_mem (src);
14384 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14385 }
14386
14387 if (dst_aligned)
14388 emit_move_insn (dst, reg0);
14389 else
14390 {
14391 emit_insn (gen_unaligned_storesi (dst, low_reg));
14392 dst = next_consecutive_mem (dst);
14393 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14394 }
14395
14396 src = next_consecutive_mem (src);
14397 dst = next_consecutive_mem (dst);
14398 }
14399
14400 gcc_assert (len < 8);
14401 if (len >= 4)
14402 {
14403 /* More than a word but less than a double-word to copy. Copy a word. */
14404 reg0 = gen_reg_rtx (SImode);
14405 src = adjust_address (src, SImode, 0);
14406 dst = adjust_address (dst, SImode, 0);
14407 if (src_aligned)
14408 emit_move_insn (reg0, src);
14409 else
14410 emit_insn (gen_unaligned_loadsi (reg0, src));
14411
14412 if (dst_aligned)
14413 emit_move_insn (dst, reg0);
14414 else
14415 emit_insn (gen_unaligned_storesi (dst, reg0));
14416
14417 src = next_consecutive_mem (src);
14418 dst = next_consecutive_mem (dst);
14419 len -= 4;
14420 }
14421
14422 if (len == 0)
14423 return true;
14424
14425 /* Copy the remaining bytes. */
14426 if (len >= 2)
14427 {
14428 dst = adjust_address (dst, HImode, 0);
14429 src = adjust_address (src, HImode, 0);
14430 reg0 = gen_reg_rtx (SImode);
14431 if (src_aligned)
14432 emit_insn (gen_zero_extendhisi2 (reg0, src));
14433 else
14434 emit_insn (gen_unaligned_loadhiu (reg0, src));
14435
14436 if (dst_aligned)
14437 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14438 else
14439 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14440
14441 src = next_consecutive_mem (src);
14442 dst = next_consecutive_mem (dst);
14443 if (len == 2)
14444 return true;
14445 }
14446
14447 dst = adjust_address (dst, QImode, 0);
14448 src = adjust_address (src, QImode, 0);
14449 reg0 = gen_reg_rtx (QImode);
14450 emit_move_insn (reg0, src);
14451 emit_move_insn (dst, reg0);
14452 return true;
14453 }
14454
14455 /* Select a dominance comparison mode if possible for a test of the general
14456 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14457 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14458 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14459 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14460 In all cases OP will be either EQ or NE, but we don't need to know which
14461 here. If we are unable to support a dominance comparison we return
14462 CC mode. This will then fail to match for the RTL expressions that
14463 generate this call. */
14464 machine_mode
14465 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14466 {
14467 enum rtx_code cond1, cond2;
14468 int swapped = 0;
14469
14470 /* Currently we will probably get the wrong result if the individual
14471 comparisons are not simple. This also ensures that it is safe to
14472 reverse a comparison if necessary. */
14473 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14474 != CCmode)
14475 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14476 != CCmode))
14477 return CCmode;
14478
14479 /* The if_then_else variant of this tests the second condition if the
14480 first passes, but is true if the first fails. Reverse the first
14481 condition to get a true "inclusive-or" expression. */
14482 if (cond_or == DOM_CC_NX_OR_Y)
14483 cond1 = reverse_condition (cond1);
14484
14485 /* If the comparisons are not equal, and one doesn't dominate the other,
14486 then we can't do this. */
14487 if (cond1 != cond2
14488 && !comparison_dominates_p (cond1, cond2)
14489 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14490 return CCmode;
14491
14492 if (swapped)
14493 std::swap (cond1, cond2);
14494
14495 switch (cond1)
14496 {
14497 case EQ:
14498 if (cond_or == DOM_CC_X_AND_Y)
14499 return CC_DEQmode;
14500
14501 switch (cond2)
14502 {
14503 case EQ: return CC_DEQmode;
14504 case LE: return CC_DLEmode;
14505 case LEU: return CC_DLEUmode;
14506 case GE: return CC_DGEmode;
14507 case GEU: return CC_DGEUmode;
14508 default: gcc_unreachable ();
14509 }
14510
14511 case LT:
14512 if (cond_or == DOM_CC_X_AND_Y)
14513 return CC_DLTmode;
14514
14515 switch (cond2)
14516 {
14517 case LT:
14518 return CC_DLTmode;
14519 case LE:
14520 return CC_DLEmode;
14521 case NE:
14522 return CC_DNEmode;
14523 default:
14524 gcc_unreachable ();
14525 }
14526
14527 case GT:
14528 if (cond_or == DOM_CC_X_AND_Y)
14529 return CC_DGTmode;
14530
14531 switch (cond2)
14532 {
14533 case GT:
14534 return CC_DGTmode;
14535 case GE:
14536 return CC_DGEmode;
14537 case NE:
14538 return CC_DNEmode;
14539 default:
14540 gcc_unreachable ();
14541 }
14542
14543 case LTU:
14544 if (cond_or == DOM_CC_X_AND_Y)
14545 return CC_DLTUmode;
14546
14547 switch (cond2)
14548 {
14549 case LTU:
14550 return CC_DLTUmode;
14551 case LEU:
14552 return CC_DLEUmode;
14553 case NE:
14554 return CC_DNEmode;
14555 default:
14556 gcc_unreachable ();
14557 }
14558
14559 case GTU:
14560 if (cond_or == DOM_CC_X_AND_Y)
14561 return CC_DGTUmode;
14562
14563 switch (cond2)
14564 {
14565 case GTU:
14566 return CC_DGTUmode;
14567 case GEU:
14568 return CC_DGEUmode;
14569 case NE:
14570 return CC_DNEmode;
14571 default:
14572 gcc_unreachable ();
14573 }
14574
14575 /* The remaining cases only occur when both comparisons are the
14576 same. */
14577 case NE:
14578 gcc_assert (cond1 == cond2);
14579 return CC_DNEmode;
14580
14581 case LE:
14582 gcc_assert (cond1 == cond2);
14583 return CC_DLEmode;
14584
14585 case GE:
14586 gcc_assert (cond1 == cond2);
14587 return CC_DGEmode;
14588
14589 case LEU:
14590 gcc_assert (cond1 == cond2);
14591 return CC_DLEUmode;
14592
14593 case GEU:
14594 gcc_assert (cond1 == cond2);
14595 return CC_DGEUmode;
14596
14597 default:
14598 gcc_unreachable ();
14599 }
14600 }
14601
14602 machine_mode
14603 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14604 {
14605 /* All floating point compares return CCFP if it is an equality
14606 comparison, and CCFPE otherwise. */
14607 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14608 {
14609 switch (op)
14610 {
14611 case EQ:
14612 case NE:
14613 case UNORDERED:
14614 case ORDERED:
14615 case UNLT:
14616 case UNLE:
14617 case UNGT:
14618 case UNGE:
14619 case UNEQ:
14620 case LTGT:
14621 return CCFPmode;
14622
14623 case LT:
14624 case LE:
14625 case GT:
14626 case GE:
14627 return CCFPEmode;
14628
14629 default:
14630 gcc_unreachable ();
14631 }
14632 }
14633
14634 /* A compare with a shifted operand. Because of canonicalization, the
14635 comparison will have to be swapped when we emit the assembler. */
14636 if (GET_MODE (y) == SImode
14637 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14638 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14639 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14640 || GET_CODE (x) == ROTATERT))
14641 return CC_SWPmode;
14642
14643 /* This operation is performed swapped, but since we only rely on the Z
14644 flag we don't need an additional mode. */
14645 if (GET_MODE (y) == SImode
14646 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14647 && GET_CODE (x) == NEG
14648 && (op == EQ || op == NE))
14649 return CC_Zmode;
14650
14651 /* This is a special case that is used by combine to allow a
14652 comparison of a shifted byte load to be split into a zero-extend
14653 followed by a comparison of the shifted integer (only valid for
14654 equalities and unsigned inequalities). */
14655 if (GET_MODE (x) == SImode
14656 && GET_CODE (x) == ASHIFT
14657 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14658 && GET_CODE (XEXP (x, 0)) == SUBREG
14659 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14660 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14661 && (op == EQ || op == NE
14662 || op == GEU || op == GTU || op == LTU || op == LEU)
14663 && CONST_INT_P (y))
14664 return CC_Zmode;
14665
14666 /* A construct for a conditional compare, if the false arm contains
14667 0, then both conditions must be true, otherwise either condition
14668 must be true. Not all conditions are possible, so CCmode is
14669 returned if it can't be done. */
14670 if (GET_CODE (x) == IF_THEN_ELSE
14671 && (XEXP (x, 2) == const0_rtx
14672 || XEXP (x, 2) == const1_rtx)
14673 && COMPARISON_P (XEXP (x, 0))
14674 && COMPARISON_P (XEXP (x, 1)))
14675 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14676 INTVAL (XEXP (x, 2)));
14677
14678 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14679 if (GET_CODE (x) == AND
14680 && (op == EQ || op == NE)
14681 && COMPARISON_P (XEXP (x, 0))
14682 && COMPARISON_P (XEXP (x, 1)))
14683 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14684 DOM_CC_X_AND_Y);
14685
14686 if (GET_CODE (x) == IOR
14687 && (op == EQ || op == NE)
14688 && COMPARISON_P (XEXP (x, 0))
14689 && COMPARISON_P (XEXP (x, 1)))
14690 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14691 DOM_CC_X_OR_Y);
14692
14693 /* An operation (on Thumb) where we want to test for a single bit.
14694 This is done by shifting that bit up into the top bit of a
14695 scratch register; we can then branch on the sign bit. */
14696 if (TARGET_THUMB1
14697 && GET_MODE (x) == SImode
14698 && (op == EQ || op == NE)
14699 && GET_CODE (x) == ZERO_EXTRACT
14700 && XEXP (x, 1) == const1_rtx)
14701 return CC_Nmode;
14702
14703 /* An operation that sets the condition codes as a side-effect, the
14704 V flag is not set correctly, so we can only use comparisons where
14705 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14706 instead.) */
14707 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14708 if (GET_MODE (x) == SImode
14709 && y == const0_rtx
14710 && (op == EQ || op == NE || op == LT || op == GE)
14711 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14712 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14713 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14714 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14715 || GET_CODE (x) == LSHIFTRT
14716 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14717 || GET_CODE (x) == ROTATERT
14718 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14719 return CC_NOOVmode;
14720
14721 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14722 return CC_Zmode;
14723
14724 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14725 && GET_CODE (x) == PLUS
14726 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14727 return CC_Cmode;
14728
14729 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14730 {
14731 switch (op)
14732 {
14733 case EQ:
14734 case NE:
14735 /* A DImode comparison against zero can be implemented by
14736 or'ing the two halves together. */
14737 if (y == const0_rtx)
14738 return CC_Zmode;
14739
14740 /* We can do an equality test in three Thumb instructions. */
14741 if (!TARGET_32BIT)
14742 return CC_Zmode;
14743
14744 /* FALLTHROUGH */
14745
14746 case LTU:
14747 case LEU:
14748 case GTU:
14749 case GEU:
14750 /* DImode unsigned comparisons can be implemented by cmp +
14751 cmpeq without a scratch register. Not worth doing in
14752 Thumb-2. */
14753 if (TARGET_32BIT)
14754 return CC_CZmode;
14755
14756 /* FALLTHROUGH */
14757
14758 case LT:
14759 case LE:
14760 case GT:
14761 case GE:
14762 /* DImode signed and unsigned comparisons can be implemented
14763 by cmp + sbcs with a scratch register, but that does not
14764 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14765 gcc_assert (op != EQ && op != NE);
14766 return CC_NCVmode;
14767
14768 default:
14769 gcc_unreachable ();
14770 }
14771 }
14772
14773 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14774 return GET_MODE (x);
14775
14776 return CCmode;
14777 }
14778
14779 /* X and Y are two things to compare using CODE. Emit the compare insn and
14780 return the rtx for register 0 in the proper mode. FP means this is a
14781 floating point compare: I don't think that it is needed on the arm. */
14782 rtx
14783 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14784 {
14785 machine_mode mode;
14786 rtx cc_reg;
14787 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14788
14789 /* We might have X as a constant, Y as a register because of the predicates
14790 used for cmpdi. If so, force X to a register here. */
14791 if (dimode_comparison && !REG_P (x))
14792 x = force_reg (DImode, x);
14793
14794 mode = SELECT_CC_MODE (code, x, y);
14795 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14796
14797 if (dimode_comparison
14798 && mode != CC_CZmode)
14799 {
14800 rtx clobber, set;
14801
14802 /* To compare two non-zero values for equality, XOR them and
14803 then compare against zero. Not used for ARM mode; there
14804 CC_CZmode is cheaper. */
14805 if (mode == CC_Zmode && y != const0_rtx)
14806 {
14807 gcc_assert (!reload_completed);
14808 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14809 y = const0_rtx;
14810 }
14811
14812 /* A scratch register is required. */
14813 if (reload_completed)
14814 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14815 else
14816 scratch = gen_rtx_SCRATCH (SImode);
14817
14818 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14819 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14820 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14821 }
14822 else
14823 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14824
14825 return cc_reg;
14826 }
14827
14828 /* Generate a sequence of insns that will generate the correct return
14829 address mask depending on the physical architecture that the program
14830 is running on. */
14831 rtx
14832 arm_gen_return_addr_mask (void)
14833 {
14834 rtx reg = gen_reg_rtx (Pmode);
14835
14836 emit_insn (gen_return_addr_mask (reg));
14837 return reg;
14838 }
14839
14840 void
14841 arm_reload_in_hi (rtx *operands)
14842 {
14843 rtx ref = operands[1];
14844 rtx base, scratch;
14845 HOST_WIDE_INT offset = 0;
14846
14847 if (GET_CODE (ref) == SUBREG)
14848 {
14849 offset = SUBREG_BYTE (ref);
14850 ref = SUBREG_REG (ref);
14851 }
14852
14853 if (REG_P (ref))
14854 {
14855 /* We have a pseudo which has been spilt onto the stack; there
14856 are two cases here: the first where there is a simple
14857 stack-slot replacement and a second where the stack-slot is
14858 out of range, or is used as a subreg. */
14859 if (reg_equiv_mem (REGNO (ref)))
14860 {
14861 ref = reg_equiv_mem (REGNO (ref));
14862 base = find_replacement (&XEXP (ref, 0));
14863 }
14864 else
14865 /* The slot is out of range, or was dressed up in a SUBREG. */
14866 base = reg_equiv_address (REGNO (ref));
14867
14868 /* PR 62554: If there is no equivalent memory location then just move
14869 the value as an SImode register move. This happens when the target
14870 architecture variant does not have an HImode register move. */
14871 if (base == NULL)
14872 {
14873 gcc_assert (REG_P (operands[0]));
14874 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
14875 gen_rtx_SUBREG (SImode, ref, 0)));
14876 return;
14877 }
14878 }
14879 else
14880 base = find_replacement (&XEXP (ref, 0));
14881
14882 /* Handle the case where the address is too complex to be offset by 1. */
14883 if (GET_CODE (base) == MINUS
14884 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
14885 {
14886 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14887
14888 emit_set_insn (base_plus, base);
14889 base = base_plus;
14890 }
14891 else if (GET_CODE (base) == PLUS)
14892 {
14893 /* The addend must be CONST_INT, or we would have dealt with it above. */
14894 HOST_WIDE_INT hi, lo;
14895
14896 offset += INTVAL (XEXP (base, 1));
14897 base = XEXP (base, 0);
14898
14899 /* Rework the address into a legal sequence of insns. */
14900 /* Valid range for lo is -4095 -> 4095 */
14901 lo = (offset >= 0
14902 ? (offset & 0xfff)
14903 : -((-offset) & 0xfff));
14904
14905 /* Corner case, if lo is the max offset then we would be out of range
14906 once we have added the additional 1 below, so bump the msb into the
14907 pre-loading insn(s). */
14908 if (lo == 4095)
14909 lo &= 0x7ff;
14910
14911 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
14912 ^ (HOST_WIDE_INT) 0x80000000)
14913 - (HOST_WIDE_INT) 0x80000000);
14914
14915 gcc_assert (hi + lo == offset);
14916
14917 if (hi != 0)
14918 {
14919 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14920
14921 /* Get the base address; addsi3 knows how to handle constants
14922 that require more than one insn. */
14923 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
14924 base = base_plus;
14925 offset = lo;
14926 }
14927 }
14928
14929 /* Operands[2] may overlap operands[0] (though it won't overlap
14930 operands[1]), that's why we asked for a DImode reg -- so we can
14931 use the bit that does not overlap. */
14932 if (REGNO (operands[2]) == REGNO (operands[0]))
14933 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
14934 else
14935 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
14936
14937 emit_insn (gen_zero_extendqisi2 (scratch,
14938 gen_rtx_MEM (QImode,
14939 plus_constant (Pmode, base,
14940 offset))));
14941 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
14942 gen_rtx_MEM (QImode,
14943 plus_constant (Pmode, base,
14944 offset + 1))));
14945 if (!BYTES_BIG_ENDIAN)
14946 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14947 gen_rtx_IOR (SImode,
14948 gen_rtx_ASHIFT
14949 (SImode,
14950 gen_rtx_SUBREG (SImode, operands[0], 0),
14951 GEN_INT (8)),
14952 scratch));
14953 else
14954 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
14955 gen_rtx_IOR (SImode,
14956 gen_rtx_ASHIFT (SImode, scratch,
14957 GEN_INT (8)),
14958 gen_rtx_SUBREG (SImode, operands[0], 0)));
14959 }
14960
14961 /* Handle storing a half-word to memory during reload by synthesizing as two
14962 byte stores. Take care not to clobber the input values until after we
14963 have moved them somewhere safe. This code assumes that if the DImode
14964 scratch in operands[2] overlaps either the input value or output address
14965 in some way, then that value must die in this insn (we absolutely need
14966 two scratch registers for some corner cases). */
14967 void
14968 arm_reload_out_hi (rtx *operands)
14969 {
14970 rtx ref = operands[0];
14971 rtx outval = operands[1];
14972 rtx base, scratch;
14973 HOST_WIDE_INT offset = 0;
14974
14975 if (GET_CODE (ref) == SUBREG)
14976 {
14977 offset = SUBREG_BYTE (ref);
14978 ref = SUBREG_REG (ref);
14979 }
14980
14981 if (REG_P (ref))
14982 {
14983 /* We have a pseudo which has been spilt onto the stack; there
14984 are two cases here: the first where there is a simple
14985 stack-slot replacement and a second where the stack-slot is
14986 out of range, or is used as a subreg. */
14987 if (reg_equiv_mem (REGNO (ref)))
14988 {
14989 ref = reg_equiv_mem (REGNO (ref));
14990 base = find_replacement (&XEXP (ref, 0));
14991 }
14992 else
14993 /* The slot is out of range, or was dressed up in a SUBREG. */
14994 base = reg_equiv_address (REGNO (ref));
14995
14996 /* PR 62254: If there is no equivalent memory location then just move
14997 the value as an SImode register move. This happens when the target
14998 architecture variant does not have an HImode register move. */
14999 if (base == NULL)
15000 {
15001 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15002
15003 if (REG_P (outval))
15004 {
15005 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15006 gen_rtx_SUBREG (SImode, outval, 0)));
15007 }
15008 else /* SUBREG_P (outval) */
15009 {
15010 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15011 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15012 SUBREG_REG (outval)));
15013 else
15014 /* FIXME: Handle other cases ? */
15015 gcc_unreachable ();
15016 }
15017 return;
15018 }
15019 }
15020 else
15021 base = find_replacement (&XEXP (ref, 0));
15022
15023 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15024
15025 /* Handle the case where the address is too complex to be offset by 1. */
15026 if (GET_CODE (base) == MINUS
15027 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15028 {
15029 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15030
15031 /* Be careful not to destroy OUTVAL. */
15032 if (reg_overlap_mentioned_p (base_plus, outval))
15033 {
15034 /* Updating base_plus might destroy outval, see if we can
15035 swap the scratch and base_plus. */
15036 if (!reg_overlap_mentioned_p (scratch, outval))
15037 std::swap (scratch, base_plus);
15038 else
15039 {
15040 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15041
15042 /* Be conservative and copy OUTVAL into the scratch now,
15043 this should only be necessary if outval is a subreg
15044 of something larger than a word. */
15045 /* XXX Might this clobber base? I can't see how it can,
15046 since scratch is known to overlap with OUTVAL, and
15047 must be wider than a word. */
15048 emit_insn (gen_movhi (scratch_hi, outval));
15049 outval = scratch_hi;
15050 }
15051 }
15052
15053 emit_set_insn (base_plus, base);
15054 base = base_plus;
15055 }
15056 else if (GET_CODE (base) == PLUS)
15057 {
15058 /* The addend must be CONST_INT, or we would have dealt with it above. */
15059 HOST_WIDE_INT hi, lo;
15060
15061 offset += INTVAL (XEXP (base, 1));
15062 base = XEXP (base, 0);
15063
15064 /* Rework the address into a legal sequence of insns. */
15065 /* Valid range for lo is -4095 -> 4095 */
15066 lo = (offset >= 0
15067 ? (offset & 0xfff)
15068 : -((-offset) & 0xfff));
15069
15070 /* Corner case, if lo is the max offset then we would be out of range
15071 once we have added the additional 1 below, so bump the msb into the
15072 pre-loading insn(s). */
15073 if (lo == 4095)
15074 lo &= 0x7ff;
15075
15076 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15077 ^ (HOST_WIDE_INT) 0x80000000)
15078 - (HOST_WIDE_INT) 0x80000000);
15079
15080 gcc_assert (hi + lo == offset);
15081
15082 if (hi != 0)
15083 {
15084 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15085
15086 /* Be careful not to destroy OUTVAL. */
15087 if (reg_overlap_mentioned_p (base_plus, outval))
15088 {
15089 /* Updating base_plus might destroy outval, see if we
15090 can swap the scratch and base_plus. */
15091 if (!reg_overlap_mentioned_p (scratch, outval))
15092 std::swap (scratch, base_plus);
15093 else
15094 {
15095 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15096
15097 /* Be conservative and copy outval into scratch now,
15098 this should only be necessary if outval is a
15099 subreg of something larger than a word. */
15100 /* XXX Might this clobber base? I can't see how it
15101 can, since scratch is known to overlap with
15102 outval. */
15103 emit_insn (gen_movhi (scratch_hi, outval));
15104 outval = scratch_hi;
15105 }
15106 }
15107
15108 /* Get the base address; addsi3 knows how to handle constants
15109 that require more than one insn. */
15110 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15111 base = base_plus;
15112 offset = lo;
15113 }
15114 }
15115
15116 if (BYTES_BIG_ENDIAN)
15117 {
15118 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15119 plus_constant (Pmode, base,
15120 offset + 1)),
15121 gen_lowpart (QImode, outval)));
15122 emit_insn (gen_lshrsi3 (scratch,
15123 gen_rtx_SUBREG (SImode, outval, 0),
15124 GEN_INT (8)));
15125 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15126 offset)),
15127 gen_lowpart (QImode, scratch)));
15128 }
15129 else
15130 {
15131 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15132 offset)),
15133 gen_lowpart (QImode, outval)));
15134 emit_insn (gen_lshrsi3 (scratch,
15135 gen_rtx_SUBREG (SImode, outval, 0),
15136 GEN_INT (8)));
15137 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15138 plus_constant (Pmode, base,
15139 offset + 1)),
15140 gen_lowpart (QImode, scratch)));
15141 }
15142 }
15143
15144 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15145 (padded to the size of a word) should be passed in a register. */
15146
15147 static bool
15148 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15149 {
15150 if (TARGET_AAPCS_BASED)
15151 return must_pass_in_stack_var_size (mode, type);
15152 else
15153 return must_pass_in_stack_var_size_or_pad (mode, type);
15154 }
15155
15156
15157 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15158 byte of a stack argument has useful data. For legacy APCS ABIs we use
15159 the default. For AAPCS based ABIs small aggregate types are placed
15160 in the lowest memory address. */
15161
15162 static pad_direction
15163 arm_function_arg_padding (machine_mode mode, const_tree type)
15164 {
15165 if (!TARGET_AAPCS_BASED)
15166 return default_function_arg_padding (mode, type);
15167
15168 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15169 return PAD_DOWNWARD;
15170
15171 return PAD_UPWARD;
15172 }
15173
15174
15175 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15176 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15177 register has useful data, and return the opposite if the most
15178 significant byte does. */
15179
15180 bool
15181 arm_pad_reg_upward (machine_mode mode,
15182 tree type, int first ATTRIBUTE_UNUSED)
15183 {
15184 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15185 {
15186 /* For AAPCS, small aggregates, small fixed-point types,
15187 and small complex types are always padded upwards. */
15188 if (type)
15189 {
15190 if ((AGGREGATE_TYPE_P (type)
15191 || TREE_CODE (type) == COMPLEX_TYPE
15192 || FIXED_POINT_TYPE_P (type))
15193 && int_size_in_bytes (type) <= 4)
15194 return true;
15195 }
15196 else
15197 {
15198 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15199 && GET_MODE_SIZE (mode) <= 4)
15200 return true;
15201 }
15202 }
15203
15204 /* Otherwise, use default padding. */
15205 return !BYTES_BIG_ENDIAN;
15206 }
15207
15208 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15209 assuming that the address in the base register is word aligned. */
15210 bool
15211 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15212 {
15213 HOST_WIDE_INT max_offset;
15214
15215 /* Offset must be a multiple of 4 in Thumb mode. */
15216 if (TARGET_THUMB2 && ((offset & 3) != 0))
15217 return false;
15218
15219 if (TARGET_THUMB2)
15220 max_offset = 1020;
15221 else if (TARGET_ARM)
15222 max_offset = 255;
15223 else
15224 return false;
15225
15226 return ((offset <= max_offset) && (offset >= -max_offset));
15227 }
15228
15229 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15230 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15231 Assumes that the address in the base register RN is word aligned. Pattern
15232 guarantees that both memory accesses use the same base register,
15233 the offsets are constants within the range, and the gap between the offsets is 4.
15234 If preload complete then check that registers are legal. WBACK indicates whether
15235 address is updated. LOAD indicates whether memory access is load or store. */
15236 bool
15237 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15238 bool wback, bool load)
15239 {
15240 unsigned int t, t2, n;
15241
15242 if (!reload_completed)
15243 return true;
15244
15245 if (!offset_ok_for_ldrd_strd (offset))
15246 return false;
15247
15248 t = REGNO (rt);
15249 t2 = REGNO (rt2);
15250 n = REGNO (rn);
15251
15252 if ((TARGET_THUMB2)
15253 && ((wback && (n == t || n == t2))
15254 || (t == SP_REGNUM)
15255 || (t == PC_REGNUM)
15256 || (t2 == SP_REGNUM)
15257 || (t2 == PC_REGNUM)
15258 || (!load && (n == PC_REGNUM))
15259 || (load && (t == t2))
15260 /* Triggers Cortex-M3 LDRD errata. */
15261 || (!wback && load && fix_cm3_ldrd && (n == t))))
15262 return false;
15263
15264 if ((TARGET_ARM)
15265 && ((wback && (n == t || n == t2))
15266 || (t2 == PC_REGNUM)
15267 || (t % 2 != 0) /* First destination register is not even. */
15268 || (t2 != t + 1)
15269 /* PC can be used as base register (for offset addressing only),
15270 but it is depricated. */
15271 || (n == PC_REGNUM)))
15272 return false;
15273
15274 return true;
15275 }
15276
15277 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15278 operand MEM's address contains an immediate offset from the base
15279 register and has no side effects, in which case it sets BASE and
15280 OFFSET accordingly. */
15281 static bool
15282 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset)
15283 {
15284 rtx addr;
15285
15286 gcc_assert (base != NULL && offset != NULL);
15287
15288 /* TODO: Handle more general memory operand patterns, such as
15289 PRE_DEC and PRE_INC. */
15290
15291 if (side_effects_p (mem))
15292 return false;
15293
15294 /* Can't deal with subregs. */
15295 if (GET_CODE (mem) == SUBREG)
15296 return false;
15297
15298 gcc_assert (MEM_P (mem));
15299
15300 *offset = const0_rtx;
15301
15302 addr = XEXP (mem, 0);
15303
15304 /* If addr isn't valid for DImode, then we can't handle it. */
15305 if (!arm_legitimate_address_p (DImode, addr,
15306 reload_in_progress || reload_completed))
15307 return false;
15308
15309 if (REG_P (addr))
15310 {
15311 *base = addr;
15312 return true;
15313 }
15314 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15315 {
15316 *base = XEXP (addr, 0);
15317 *offset = XEXP (addr, 1);
15318 return (REG_P (*base) && CONST_INT_P (*offset));
15319 }
15320
15321 return false;
15322 }
15323
15324 /* Called from a peephole2 to replace two word-size accesses with a
15325 single LDRD/STRD instruction. Returns true iff we can generate a
15326 new instruction sequence. That is, both accesses use the same base
15327 register and the gap between constant offsets is 4. This function
15328 may reorder its operands to match ldrd/strd RTL templates.
15329 OPERANDS are the operands found by the peephole matcher;
15330 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15331 corresponding memory operands. LOAD indicaates whether the access
15332 is load or store. CONST_STORE indicates a store of constant
15333 integer values held in OPERANDS[4,5] and assumes that the pattern
15334 is of length 4 insn, for the purpose of checking dead registers.
15335 COMMUTE indicates that register operands may be reordered. */
15336 bool
15337 gen_operands_ldrd_strd (rtx *operands, bool load,
15338 bool const_store, bool commute)
15339 {
15340 int nops = 2;
15341 HOST_WIDE_INT offsets[2], offset;
15342 rtx base = NULL_RTX;
15343 rtx cur_base, cur_offset, tmp;
15344 int i, gap;
15345 HARD_REG_SET regset;
15346
15347 gcc_assert (!const_store || !load);
15348 /* Check that the memory references are immediate offsets from the
15349 same base register. Extract the base register, the destination
15350 registers, and the corresponding memory offsets. */
15351 for (i = 0; i < nops; i++)
15352 {
15353 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset))
15354 return false;
15355
15356 if (i == 0)
15357 base = cur_base;
15358 else if (REGNO (base) != REGNO (cur_base))
15359 return false;
15360
15361 offsets[i] = INTVAL (cur_offset);
15362 if (GET_CODE (operands[i]) == SUBREG)
15363 {
15364 tmp = SUBREG_REG (operands[i]);
15365 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15366 operands[i] = tmp;
15367 }
15368 }
15369
15370 /* Make sure there is no dependency between the individual loads. */
15371 if (load && REGNO (operands[0]) == REGNO (base))
15372 return false; /* RAW */
15373
15374 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15375 return false; /* WAW */
15376
15377 /* If the same input register is used in both stores
15378 when storing different constants, try to find a free register.
15379 For example, the code
15380 mov r0, 0
15381 str r0, [r2]
15382 mov r0, 1
15383 str r0, [r2, #4]
15384 can be transformed into
15385 mov r1, 0
15386 mov r0, 1
15387 strd r1, r0, [r2]
15388 in Thumb mode assuming that r1 is free.
15389 For ARM mode do the same but only if the starting register
15390 can be made to be even. */
15391 if (const_store
15392 && REGNO (operands[0]) == REGNO (operands[1])
15393 && INTVAL (operands[4]) != INTVAL (operands[5]))
15394 {
15395 if (TARGET_THUMB2)
15396 {
15397 CLEAR_HARD_REG_SET (regset);
15398 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15399 if (tmp == NULL_RTX)
15400 return false;
15401
15402 /* Use the new register in the first load to ensure that
15403 if the original input register is not dead after peephole,
15404 then it will have the correct constant value. */
15405 operands[0] = tmp;
15406 }
15407 else if (TARGET_ARM)
15408 {
15409 int regno = REGNO (operands[0]);
15410 if (!peep2_reg_dead_p (4, operands[0]))
15411 {
15412 /* When the input register is even and is not dead after the
15413 pattern, it has to hold the second constant but we cannot
15414 form a legal STRD in ARM mode with this register as the second
15415 register. */
15416 if (regno % 2 == 0)
15417 return false;
15418
15419 /* Is regno-1 free? */
15420 SET_HARD_REG_SET (regset);
15421 CLEAR_HARD_REG_BIT(regset, regno - 1);
15422 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15423 if (tmp == NULL_RTX)
15424 return false;
15425
15426 operands[0] = tmp;
15427 }
15428 else
15429 {
15430 /* Find a DImode register. */
15431 CLEAR_HARD_REG_SET (regset);
15432 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15433 if (tmp != NULL_RTX)
15434 {
15435 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15436 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15437 }
15438 else
15439 {
15440 /* Can we use the input register to form a DI register? */
15441 SET_HARD_REG_SET (regset);
15442 CLEAR_HARD_REG_BIT(regset,
15443 regno % 2 == 0 ? regno + 1 : regno - 1);
15444 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15445 if (tmp == NULL_RTX)
15446 return false;
15447 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15448 }
15449 }
15450
15451 gcc_assert (operands[0] != NULL_RTX);
15452 gcc_assert (operands[1] != NULL_RTX);
15453 gcc_assert (REGNO (operands[0]) % 2 == 0);
15454 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15455 }
15456 }
15457
15458 /* Make sure the instructions are ordered with lower memory access first. */
15459 if (offsets[0] > offsets[1])
15460 {
15461 gap = offsets[0] - offsets[1];
15462 offset = offsets[1];
15463
15464 /* Swap the instructions such that lower memory is accessed first. */
15465 std::swap (operands[0], operands[1]);
15466 std::swap (operands[2], operands[3]);
15467 if (const_store)
15468 std::swap (operands[4], operands[5]);
15469 }
15470 else
15471 {
15472 gap = offsets[1] - offsets[0];
15473 offset = offsets[0];
15474 }
15475
15476 /* Make sure accesses are to consecutive memory locations. */
15477 if (gap != 4)
15478 return false;
15479
15480 /* Make sure we generate legal instructions. */
15481 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15482 false, load))
15483 return true;
15484
15485 /* In Thumb state, where registers are almost unconstrained, there
15486 is little hope to fix it. */
15487 if (TARGET_THUMB2)
15488 return false;
15489
15490 if (load && commute)
15491 {
15492 /* Try reordering registers. */
15493 std::swap (operands[0], operands[1]);
15494 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15495 false, load))
15496 return true;
15497 }
15498
15499 if (const_store)
15500 {
15501 /* If input registers are dead after this pattern, they can be
15502 reordered or replaced by other registers that are free in the
15503 current pattern. */
15504 if (!peep2_reg_dead_p (4, operands[0])
15505 || !peep2_reg_dead_p (4, operands[1]))
15506 return false;
15507
15508 /* Try to reorder the input registers. */
15509 /* For example, the code
15510 mov r0, 0
15511 mov r1, 1
15512 str r1, [r2]
15513 str r0, [r2, #4]
15514 can be transformed into
15515 mov r1, 0
15516 mov r0, 1
15517 strd r0, [r2]
15518 */
15519 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15520 false, false))
15521 {
15522 std::swap (operands[0], operands[1]);
15523 return true;
15524 }
15525
15526 /* Try to find a free DI register. */
15527 CLEAR_HARD_REG_SET (regset);
15528 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15529 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15530 while (true)
15531 {
15532 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15533 if (tmp == NULL_RTX)
15534 return false;
15535
15536 /* DREG must be an even-numbered register in DImode.
15537 Split it into SI registers. */
15538 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15539 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15540 gcc_assert (operands[0] != NULL_RTX);
15541 gcc_assert (operands[1] != NULL_RTX);
15542 gcc_assert (REGNO (operands[0]) % 2 == 0);
15543 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15544
15545 return (operands_ok_ldrd_strd (operands[0], operands[1],
15546 base, offset,
15547 false, load));
15548 }
15549 }
15550
15551 return false;
15552 }
15553
15554
15555
15556 \f
15557 /* Print a symbolic form of X to the debug file, F. */
15558 static void
15559 arm_print_value (FILE *f, rtx x)
15560 {
15561 switch (GET_CODE (x))
15562 {
15563 case CONST_INT:
15564 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15565 return;
15566
15567 case CONST_DOUBLE:
15568 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15569 return;
15570
15571 case CONST_VECTOR:
15572 {
15573 int i;
15574
15575 fprintf (f, "<");
15576 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15577 {
15578 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15579 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15580 fputc (',', f);
15581 }
15582 fprintf (f, ">");
15583 }
15584 return;
15585
15586 case CONST_STRING:
15587 fprintf (f, "\"%s\"", XSTR (x, 0));
15588 return;
15589
15590 case SYMBOL_REF:
15591 fprintf (f, "`%s'", XSTR (x, 0));
15592 return;
15593
15594 case LABEL_REF:
15595 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15596 return;
15597
15598 case CONST:
15599 arm_print_value (f, XEXP (x, 0));
15600 return;
15601
15602 case PLUS:
15603 arm_print_value (f, XEXP (x, 0));
15604 fprintf (f, "+");
15605 arm_print_value (f, XEXP (x, 1));
15606 return;
15607
15608 case PC:
15609 fprintf (f, "pc");
15610 return;
15611
15612 default:
15613 fprintf (f, "????");
15614 return;
15615 }
15616 }
15617 \f
15618 /* Routines for manipulation of the constant pool. */
15619
15620 /* Arm instructions cannot load a large constant directly into a
15621 register; they have to come from a pc relative load. The constant
15622 must therefore be placed in the addressable range of the pc
15623 relative load. Depending on the precise pc relative load
15624 instruction the range is somewhere between 256 bytes and 4k. This
15625 means that we often have to dump a constant inside a function, and
15626 generate code to branch around it.
15627
15628 It is important to minimize this, since the branches will slow
15629 things down and make the code larger.
15630
15631 Normally we can hide the table after an existing unconditional
15632 branch so that there is no interruption of the flow, but in the
15633 worst case the code looks like this:
15634
15635 ldr rn, L1
15636 ...
15637 b L2
15638 align
15639 L1: .long value
15640 L2:
15641 ...
15642
15643 ldr rn, L3
15644 ...
15645 b L4
15646 align
15647 L3: .long value
15648 L4:
15649 ...
15650
15651 We fix this by performing a scan after scheduling, which notices
15652 which instructions need to have their operands fetched from the
15653 constant table and builds the table.
15654
15655 The algorithm starts by building a table of all the constants that
15656 need fixing up and all the natural barriers in the function (places
15657 where a constant table can be dropped without breaking the flow).
15658 For each fixup we note how far the pc-relative replacement will be
15659 able to reach and the offset of the instruction into the function.
15660
15661 Having built the table we then group the fixes together to form
15662 tables that are as large as possible (subject to addressing
15663 constraints) and emit each table of constants after the last
15664 barrier that is within range of all the instructions in the group.
15665 If a group does not contain a barrier, then we forcibly create one
15666 by inserting a jump instruction into the flow. Once the table has
15667 been inserted, the insns are then modified to reference the
15668 relevant entry in the pool.
15669
15670 Possible enhancements to the algorithm (not implemented) are:
15671
15672 1) For some processors and object formats, there may be benefit in
15673 aligning the pools to the start of cache lines; this alignment
15674 would need to be taken into account when calculating addressability
15675 of a pool. */
15676
15677 /* These typedefs are located at the start of this file, so that
15678 they can be used in the prototypes there. This comment is to
15679 remind readers of that fact so that the following structures
15680 can be understood more easily.
15681
15682 typedef struct minipool_node Mnode;
15683 typedef struct minipool_fixup Mfix; */
15684
15685 struct minipool_node
15686 {
15687 /* Doubly linked chain of entries. */
15688 Mnode * next;
15689 Mnode * prev;
15690 /* The maximum offset into the code that this entry can be placed. While
15691 pushing fixes for forward references, all entries are sorted in order
15692 of increasing max_address. */
15693 HOST_WIDE_INT max_address;
15694 /* Similarly for an entry inserted for a backwards ref. */
15695 HOST_WIDE_INT min_address;
15696 /* The number of fixes referencing this entry. This can become zero
15697 if we "unpush" an entry. In this case we ignore the entry when we
15698 come to emit the code. */
15699 int refcount;
15700 /* The offset from the start of the minipool. */
15701 HOST_WIDE_INT offset;
15702 /* The value in table. */
15703 rtx value;
15704 /* The mode of value. */
15705 machine_mode mode;
15706 /* The size of the value. With iWMMXt enabled
15707 sizes > 4 also imply an alignment of 8-bytes. */
15708 int fix_size;
15709 };
15710
15711 struct minipool_fixup
15712 {
15713 Mfix * next;
15714 rtx_insn * insn;
15715 HOST_WIDE_INT address;
15716 rtx * loc;
15717 machine_mode mode;
15718 int fix_size;
15719 rtx value;
15720 Mnode * minipool;
15721 HOST_WIDE_INT forwards;
15722 HOST_WIDE_INT backwards;
15723 };
15724
15725 /* Fixes less than a word need padding out to a word boundary. */
15726 #define MINIPOOL_FIX_SIZE(mode) \
15727 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15728
15729 static Mnode * minipool_vector_head;
15730 static Mnode * minipool_vector_tail;
15731 static rtx_code_label *minipool_vector_label;
15732 static int minipool_pad;
15733
15734 /* The linked list of all minipool fixes required for this function. */
15735 Mfix * minipool_fix_head;
15736 Mfix * minipool_fix_tail;
15737 /* The fix entry for the current minipool, once it has been placed. */
15738 Mfix * minipool_barrier;
15739
15740 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15741 #define JUMP_TABLES_IN_TEXT_SECTION 0
15742 #endif
15743
15744 static HOST_WIDE_INT
15745 get_jump_table_size (rtx_jump_table_data *insn)
15746 {
15747 /* ADDR_VECs only take room if read-only data does into the text
15748 section. */
15749 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15750 {
15751 rtx body = PATTERN (insn);
15752 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15753 HOST_WIDE_INT size;
15754 HOST_WIDE_INT modesize;
15755
15756 modesize = GET_MODE_SIZE (GET_MODE (body));
15757 size = modesize * XVECLEN (body, elt);
15758 switch (modesize)
15759 {
15760 case 1:
15761 /* Round up size of TBB table to a halfword boundary. */
15762 size = (size + 1) & ~HOST_WIDE_INT_1;
15763 break;
15764 case 2:
15765 /* No padding necessary for TBH. */
15766 break;
15767 case 4:
15768 /* Add two bytes for alignment on Thumb. */
15769 if (TARGET_THUMB)
15770 size += 2;
15771 break;
15772 default:
15773 gcc_unreachable ();
15774 }
15775 return size;
15776 }
15777
15778 return 0;
15779 }
15780
15781 /* Return the maximum amount of padding that will be inserted before
15782 label LABEL. */
15783
15784 static HOST_WIDE_INT
15785 get_label_padding (rtx label)
15786 {
15787 HOST_WIDE_INT align, min_insn_size;
15788
15789 align = 1 << label_to_alignment (label);
15790 min_insn_size = TARGET_THUMB ? 2 : 4;
15791 return align > min_insn_size ? align - min_insn_size : 0;
15792 }
15793
15794 /* Move a minipool fix MP from its current location to before MAX_MP.
15795 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15796 constraints may need updating. */
15797 static Mnode *
15798 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15799 HOST_WIDE_INT max_address)
15800 {
15801 /* The code below assumes these are different. */
15802 gcc_assert (mp != max_mp);
15803
15804 if (max_mp == NULL)
15805 {
15806 if (max_address < mp->max_address)
15807 mp->max_address = max_address;
15808 }
15809 else
15810 {
15811 if (max_address > max_mp->max_address - mp->fix_size)
15812 mp->max_address = max_mp->max_address - mp->fix_size;
15813 else
15814 mp->max_address = max_address;
15815
15816 /* Unlink MP from its current position. Since max_mp is non-null,
15817 mp->prev must be non-null. */
15818 mp->prev->next = mp->next;
15819 if (mp->next != NULL)
15820 mp->next->prev = mp->prev;
15821 else
15822 minipool_vector_tail = mp->prev;
15823
15824 /* Re-insert it before MAX_MP. */
15825 mp->next = max_mp;
15826 mp->prev = max_mp->prev;
15827 max_mp->prev = mp;
15828
15829 if (mp->prev != NULL)
15830 mp->prev->next = mp;
15831 else
15832 minipool_vector_head = mp;
15833 }
15834
15835 /* Save the new entry. */
15836 max_mp = mp;
15837
15838 /* Scan over the preceding entries and adjust their addresses as
15839 required. */
15840 while (mp->prev != NULL
15841 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15842 {
15843 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15844 mp = mp->prev;
15845 }
15846
15847 return max_mp;
15848 }
15849
15850 /* Add a constant to the minipool for a forward reference. Returns the
15851 node added or NULL if the constant will not fit in this pool. */
15852 static Mnode *
15853 add_minipool_forward_ref (Mfix *fix)
15854 {
15855 /* If set, max_mp is the first pool_entry that has a lower
15856 constraint than the one we are trying to add. */
15857 Mnode * max_mp = NULL;
15858 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
15859 Mnode * mp;
15860
15861 /* If the minipool starts before the end of FIX->INSN then this FIX
15862 can not be placed into the current pool. Furthermore, adding the
15863 new constant pool entry may cause the pool to start FIX_SIZE bytes
15864 earlier. */
15865 if (minipool_vector_head &&
15866 (fix->address + get_attr_length (fix->insn)
15867 >= minipool_vector_head->max_address - fix->fix_size))
15868 return NULL;
15869
15870 /* Scan the pool to see if a constant with the same value has
15871 already been added. While we are doing this, also note the
15872 location where we must insert the constant if it doesn't already
15873 exist. */
15874 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
15875 {
15876 if (GET_CODE (fix->value) == GET_CODE (mp->value)
15877 && fix->mode == mp->mode
15878 && (!LABEL_P (fix->value)
15879 || (CODE_LABEL_NUMBER (fix->value)
15880 == CODE_LABEL_NUMBER (mp->value)))
15881 && rtx_equal_p (fix->value, mp->value))
15882 {
15883 /* More than one fix references this entry. */
15884 mp->refcount++;
15885 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
15886 }
15887
15888 /* Note the insertion point if necessary. */
15889 if (max_mp == NULL
15890 && mp->max_address > max_address)
15891 max_mp = mp;
15892
15893 /* If we are inserting an 8-bytes aligned quantity and
15894 we have not already found an insertion point, then
15895 make sure that all such 8-byte aligned quantities are
15896 placed at the start of the pool. */
15897 if (ARM_DOUBLEWORD_ALIGN
15898 && max_mp == NULL
15899 && fix->fix_size >= 8
15900 && mp->fix_size < 8)
15901 {
15902 max_mp = mp;
15903 max_address = mp->max_address;
15904 }
15905 }
15906
15907 /* The value is not currently in the minipool, so we need to create
15908 a new entry for it. If MAX_MP is NULL, the entry will be put on
15909 the end of the list since the placement is less constrained than
15910 any existing entry. Otherwise, we insert the new fix before
15911 MAX_MP and, if necessary, adjust the constraints on the other
15912 entries. */
15913 mp = XNEW (Mnode);
15914 mp->fix_size = fix->fix_size;
15915 mp->mode = fix->mode;
15916 mp->value = fix->value;
15917 mp->refcount = 1;
15918 /* Not yet required for a backwards ref. */
15919 mp->min_address = -65536;
15920
15921 if (max_mp == NULL)
15922 {
15923 mp->max_address = max_address;
15924 mp->next = NULL;
15925 mp->prev = minipool_vector_tail;
15926
15927 if (mp->prev == NULL)
15928 {
15929 minipool_vector_head = mp;
15930 minipool_vector_label = gen_label_rtx ();
15931 }
15932 else
15933 mp->prev->next = mp;
15934
15935 minipool_vector_tail = mp;
15936 }
15937 else
15938 {
15939 if (max_address > max_mp->max_address - mp->fix_size)
15940 mp->max_address = max_mp->max_address - mp->fix_size;
15941 else
15942 mp->max_address = max_address;
15943
15944 mp->next = max_mp;
15945 mp->prev = max_mp->prev;
15946 max_mp->prev = mp;
15947 if (mp->prev != NULL)
15948 mp->prev->next = mp;
15949 else
15950 minipool_vector_head = mp;
15951 }
15952
15953 /* Save the new entry. */
15954 max_mp = mp;
15955
15956 /* Scan over the preceding entries and adjust their addresses as
15957 required. */
15958 while (mp->prev != NULL
15959 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
15960 {
15961 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
15962 mp = mp->prev;
15963 }
15964
15965 return max_mp;
15966 }
15967
15968 static Mnode *
15969 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
15970 HOST_WIDE_INT min_address)
15971 {
15972 HOST_WIDE_INT offset;
15973
15974 /* The code below assumes these are different. */
15975 gcc_assert (mp != min_mp);
15976
15977 if (min_mp == NULL)
15978 {
15979 if (min_address > mp->min_address)
15980 mp->min_address = min_address;
15981 }
15982 else
15983 {
15984 /* We will adjust this below if it is too loose. */
15985 mp->min_address = min_address;
15986
15987 /* Unlink MP from its current position. Since min_mp is non-null,
15988 mp->next must be non-null. */
15989 mp->next->prev = mp->prev;
15990 if (mp->prev != NULL)
15991 mp->prev->next = mp->next;
15992 else
15993 minipool_vector_head = mp->next;
15994
15995 /* Reinsert it after MIN_MP. */
15996 mp->prev = min_mp;
15997 mp->next = min_mp->next;
15998 min_mp->next = mp;
15999 if (mp->next != NULL)
16000 mp->next->prev = mp;
16001 else
16002 minipool_vector_tail = mp;
16003 }
16004
16005 min_mp = mp;
16006
16007 offset = 0;
16008 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16009 {
16010 mp->offset = offset;
16011 if (mp->refcount > 0)
16012 offset += mp->fix_size;
16013
16014 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16015 mp->next->min_address = mp->min_address + mp->fix_size;
16016 }
16017
16018 return min_mp;
16019 }
16020
16021 /* Add a constant to the minipool for a backward reference. Returns the
16022 node added or NULL if the constant will not fit in this pool.
16023
16024 Note that the code for insertion for a backwards reference can be
16025 somewhat confusing because the calculated offsets for each fix do
16026 not take into account the size of the pool (which is still under
16027 construction. */
16028 static Mnode *
16029 add_minipool_backward_ref (Mfix *fix)
16030 {
16031 /* If set, min_mp is the last pool_entry that has a lower constraint
16032 than the one we are trying to add. */
16033 Mnode *min_mp = NULL;
16034 /* This can be negative, since it is only a constraint. */
16035 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16036 Mnode *mp;
16037
16038 /* If we can't reach the current pool from this insn, or if we can't
16039 insert this entry at the end of the pool without pushing other
16040 fixes out of range, then we don't try. This ensures that we
16041 can't fail later on. */
16042 if (min_address >= minipool_barrier->address
16043 || (minipool_vector_tail->min_address + fix->fix_size
16044 >= minipool_barrier->address))
16045 return NULL;
16046
16047 /* Scan the pool to see if a constant with the same value has
16048 already been added. While we are doing this, also note the
16049 location where we must insert the constant if it doesn't already
16050 exist. */
16051 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16052 {
16053 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16054 && fix->mode == mp->mode
16055 && (!LABEL_P (fix->value)
16056 || (CODE_LABEL_NUMBER (fix->value)
16057 == CODE_LABEL_NUMBER (mp->value)))
16058 && rtx_equal_p (fix->value, mp->value)
16059 /* Check that there is enough slack to move this entry to the
16060 end of the table (this is conservative). */
16061 && (mp->max_address
16062 > (minipool_barrier->address
16063 + minipool_vector_tail->offset
16064 + minipool_vector_tail->fix_size)))
16065 {
16066 mp->refcount++;
16067 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16068 }
16069
16070 if (min_mp != NULL)
16071 mp->min_address += fix->fix_size;
16072 else
16073 {
16074 /* Note the insertion point if necessary. */
16075 if (mp->min_address < min_address)
16076 {
16077 /* For now, we do not allow the insertion of 8-byte alignment
16078 requiring nodes anywhere but at the start of the pool. */
16079 if (ARM_DOUBLEWORD_ALIGN
16080 && fix->fix_size >= 8 && mp->fix_size < 8)
16081 return NULL;
16082 else
16083 min_mp = mp;
16084 }
16085 else if (mp->max_address
16086 < minipool_barrier->address + mp->offset + fix->fix_size)
16087 {
16088 /* Inserting before this entry would push the fix beyond
16089 its maximum address (which can happen if we have
16090 re-located a forwards fix); force the new fix to come
16091 after it. */
16092 if (ARM_DOUBLEWORD_ALIGN
16093 && fix->fix_size >= 8 && mp->fix_size < 8)
16094 return NULL;
16095 else
16096 {
16097 min_mp = mp;
16098 min_address = mp->min_address + fix->fix_size;
16099 }
16100 }
16101 /* Do not insert a non-8-byte aligned quantity before 8-byte
16102 aligned quantities. */
16103 else if (ARM_DOUBLEWORD_ALIGN
16104 && fix->fix_size < 8
16105 && mp->fix_size >= 8)
16106 {
16107 min_mp = mp;
16108 min_address = mp->min_address + fix->fix_size;
16109 }
16110 }
16111 }
16112
16113 /* We need to create a new entry. */
16114 mp = XNEW (Mnode);
16115 mp->fix_size = fix->fix_size;
16116 mp->mode = fix->mode;
16117 mp->value = fix->value;
16118 mp->refcount = 1;
16119 mp->max_address = minipool_barrier->address + 65536;
16120
16121 mp->min_address = min_address;
16122
16123 if (min_mp == NULL)
16124 {
16125 mp->prev = NULL;
16126 mp->next = minipool_vector_head;
16127
16128 if (mp->next == NULL)
16129 {
16130 minipool_vector_tail = mp;
16131 minipool_vector_label = gen_label_rtx ();
16132 }
16133 else
16134 mp->next->prev = mp;
16135
16136 minipool_vector_head = mp;
16137 }
16138 else
16139 {
16140 mp->next = min_mp->next;
16141 mp->prev = min_mp;
16142 min_mp->next = mp;
16143
16144 if (mp->next != NULL)
16145 mp->next->prev = mp;
16146 else
16147 minipool_vector_tail = mp;
16148 }
16149
16150 /* Save the new entry. */
16151 min_mp = mp;
16152
16153 if (mp->prev)
16154 mp = mp->prev;
16155 else
16156 mp->offset = 0;
16157
16158 /* Scan over the following entries and adjust their offsets. */
16159 while (mp->next != NULL)
16160 {
16161 if (mp->next->min_address < mp->min_address + mp->fix_size)
16162 mp->next->min_address = mp->min_address + mp->fix_size;
16163
16164 if (mp->refcount)
16165 mp->next->offset = mp->offset + mp->fix_size;
16166 else
16167 mp->next->offset = mp->offset;
16168
16169 mp = mp->next;
16170 }
16171
16172 return min_mp;
16173 }
16174
16175 static void
16176 assign_minipool_offsets (Mfix *barrier)
16177 {
16178 HOST_WIDE_INT offset = 0;
16179 Mnode *mp;
16180
16181 minipool_barrier = barrier;
16182
16183 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16184 {
16185 mp->offset = offset;
16186
16187 if (mp->refcount > 0)
16188 offset += mp->fix_size;
16189 }
16190 }
16191
16192 /* Output the literal table */
16193 static void
16194 dump_minipool (rtx_insn *scan)
16195 {
16196 Mnode * mp;
16197 Mnode * nmp;
16198 int align64 = 0;
16199
16200 if (ARM_DOUBLEWORD_ALIGN)
16201 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16202 if (mp->refcount > 0 && mp->fix_size >= 8)
16203 {
16204 align64 = 1;
16205 break;
16206 }
16207
16208 if (dump_file)
16209 fprintf (dump_file,
16210 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16211 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16212
16213 scan = emit_label_after (gen_label_rtx (), scan);
16214 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16215 scan = emit_label_after (minipool_vector_label, scan);
16216
16217 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16218 {
16219 if (mp->refcount > 0)
16220 {
16221 if (dump_file)
16222 {
16223 fprintf (dump_file,
16224 ";; Offset %u, min %ld, max %ld ",
16225 (unsigned) mp->offset, (unsigned long) mp->min_address,
16226 (unsigned long) mp->max_address);
16227 arm_print_value (dump_file, mp->value);
16228 fputc ('\n', dump_file);
16229 }
16230
16231 rtx val = copy_rtx (mp->value);
16232
16233 switch (GET_MODE_SIZE (mp->mode))
16234 {
16235 #ifdef HAVE_consttable_1
16236 case 1:
16237 scan = emit_insn_after (gen_consttable_1 (val), scan);
16238 break;
16239
16240 #endif
16241 #ifdef HAVE_consttable_2
16242 case 2:
16243 scan = emit_insn_after (gen_consttable_2 (val), scan);
16244 break;
16245
16246 #endif
16247 #ifdef HAVE_consttable_4
16248 case 4:
16249 scan = emit_insn_after (gen_consttable_4 (val), scan);
16250 break;
16251
16252 #endif
16253 #ifdef HAVE_consttable_8
16254 case 8:
16255 scan = emit_insn_after (gen_consttable_8 (val), scan);
16256 break;
16257
16258 #endif
16259 #ifdef HAVE_consttable_16
16260 case 16:
16261 scan = emit_insn_after (gen_consttable_16 (val), scan);
16262 break;
16263
16264 #endif
16265 default:
16266 gcc_unreachable ();
16267 }
16268 }
16269
16270 nmp = mp->next;
16271 free (mp);
16272 }
16273
16274 minipool_vector_head = minipool_vector_tail = NULL;
16275 scan = emit_insn_after (gen_consttable_end (), scan);
16276 scan = emit_barrier_after (scan);
16277 }
16278
16279 /* Return the cost of forcibly inserting a barrier after INSN. */
16280 static int
16281 arm_barrier_cost (rtx_insn *insn)
16282 {
16283 /* Basing the location of the pool on the loop depth is preferable,
16284 but at the moment, the basic block information seems to be
16285 corrupt by this stage of the compilation. */
16286 int base_cost = 50;
16287 rtx_insn *next = next_nonnote_insn (insn);
16288
16289 if (next != NULL && LABEL_P (next))
16290 base_cost -= 20;
16291
16292 switch (GET_CODE (insn))
16293 {
16294 case CODE_LABEL:
16295 /* It will always be better to place the table before the label, rather
16296 than after it. */
16297 return 50;
16298
16299 case INSN:
16300 case CALL_INSN:
16301 return base_cost;
16302
16303 case JUMP_INSN:
16304 return base_cost - 10;
16305
16306 default:
16307 return base_cost + 10;
16308 }
16309 }
16310
16311 /* Find the best place in the insn stream in the range
16312 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16313 Create the barrier by inserting a jump and add a new fix entry for
16314 it. */
16315 static Mfix *
16316 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16317 {
16318 HOST_WIDE_INT count = 0;
16319 rtx_barrier *barrier;
16320 rtx_insn *from = fix->insn;
16321 /* The instruction after which we will insert the jump. */
16322 rtx_insn *selected = NULL;
16323 int selected_cost;
16324 /* The address at which the jump instruction will be placed. */
16325 HOST_WIDE_INT selected_address;
16326 Mfix * new_fix;
16327 HOST_WIDE_INT max_count = max_address - fix->address;
16328 rtx_code_label *label = gen_label_rtx ();
16329
16330 selected_cost = arm_barrier_cost (from);
16331 selected_address = fix->address;
16332
16333 while (from && count < max_count)
16334 {
16335 rtx_jump_table_data *tmp;
16336 int new_cost;
16337
16338 /* This code shouldn't have been called if there was a natural barrier
16339 within range. */
16340 gcc_assert (!BARRIER_P (from));
16341
16342 /* Count the length of this insn. This must stay in sync with the
16343 code that pushes minipool fixes. */
16344 if (LABEL_P (from))
16345 count += get_label_padding (from);
16346 else
16347 count += get_attr_length (from);
16348
16349 /* If there is a jump table, add its length. */
16350 if (tablejump_p (from, NULL, &tmp))
16351 {
16352 count += get_jump_table_size (tmp);
16353
16354 /* Jump tables aren't in a basic block, so base the cost on
16355 the dispatch insn. If we select this location, we will
16356 still put the pool after the table. */
16357 new_cost = arm_barrier_cost (from);
16358
16359 if (count < max_count
16360 && (!selected || new_cost <= selected_cost))
16361 {
16362 selected = tmp;
16363 selected_cost = new_cost;
16364 selected_address = fix->address + count;
16365 }
16366
16367 /* Continue after the dispatch table. */
16368 from = NEXT_INSN (tmp);
16369 continue;
16370 }
16371
16372 new_cost = arm_barrier_cost (from);
16373
16374 if (count < max_count
16375 && (!selected || new_cost <= selected_cost))
16376 {
16377 selected = from;
16378 selected_cost = new_cost;
16379 selected_address = fix->address + count;
16380 }
16381
16382 from = NEXT_INSN (from);
16383 }
16384
16385 /* Make sure that we found a place to insert the jump. */
16386 gcc_assert (selected);
16387
16388 /* Make sure we do not split a call and its corresponding
16389 CALL_ARG_LOCATION note. */
16390 if (CALL_P (selected))
16391 {
16392 rtx_insn *next = NEXT_INSN (selected);
16393 if (next && NOTE_P (next)
16394 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16395 selected = next;
16396 }
16397
16398 /* Create a new JUMP_INSN that branches around a barrier. */
16399 from = emit_jump_insn_after (gen_jump (label), selected);
16400 JUMP_LABEL (from) = label;
16401 barrier = emit_barrier_after (from);
16402 emit_label_after (label, barrier);
16403
16404 /* Create a minipool barrier entry for the new barrier. */
16405 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16406 new_fix->insn = barrier;
16407 new_fix->address = selected_address;
16408 new_fix->next = fix->next;
16409 fix->next = new_fix;
16410
16411 return new_fix;
16412 }
16413
16414 /* Record that there is a natural barrier in the insn stream at
16415 ADDRESS. */
16416 static void
16417 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16418 {
16419 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16420
16421 fix->insn = insn;
16422 fix->address = address;
16423
16424 fix->next = NULL;
16425 if (minipool_fix_head != NULL)
16426 minipool_fix_tail->next = fix;
16427 else
16428 minipool_fix_head = fix;
16429
16430 minipool_fix_tail = fix;
16431 }
16432
16433 /* Record INSN, which will need fixing up to load a value from the
16434 minipool. ADDRESS is the offset of the insn since the start of the
16435 function; LOC is a pointer to the part of the insn which requires
16436 fixing; VALUE is the constant that must be loaded, which is of type
16437 MODE. */
16438 static void
16439 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16440 machine_mode mode, rtx value)
16441 {
16442 gcc_assert (!arm_disable_literal_pool);
16443 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16444
16445 fix->insn = insn;
16446 fix->address = address;
16447 fix->loc = loc;
16448 fix->mode = mode;
16449 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16450 fix->value = value;
16451 fix->forwards = get_attr_pool_range (insn);
16452 fix->backwards = get_attr_neg_pool_range (insn);
16453 fix->minipool = NULL;
16454
16455 /* If an insn doesn't have a range defined for it, then it isn't
16456 expecting to be reworked by this code. Better to stop now than
16457 to generate duff assembly code. */
16458 gcc_assert (fix->forwards || fix->backwards);
16459
16460 /* If an entry requires 8-byte alignment then assume all constant pools
16461 require 4 bytes of padding. Trying to do this later on a per-pool
16462 basis is awkward because existing pool entries have to be modified. */
16463 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16464 minipool_pad = 4;
16465
16466 if (dump_file)
16467 {
16468 fprintf (dump_file,
16469 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16470 GET_MODE_NAME (mode),
16471 INSN_UID (insn), (unsigned long) address,
16472 -1 * (long)fix->backwards, (long)fix->forwards);
16473 arm_print_value (dump_file, fix->value);
16474 fprintf (dump_file, "\n");
16475 }
16476
16477 /* Add it to the chain of fixes. */
16478 fix->next = NULL;
16479
16480 if (minipool_fix_head != NULL)
16481 minipool_fix_tail->next = fix;
16482 else
16483 minipool_fix_head = fix;
16484
16485 minipool_fix_tail = fix;
16486 }
16487
16488 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16489 Returns the number of insns needed, or 99 if we always want to synthesize
16490 the value. */
16491 int
16492 arm_max_const_double_inline_cost ()
16493 {
16494 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16495 }
16496
16497 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16498 Returns the number of insns needed, or 99 if we don't know how to
16499 do it. */
16500 int
16501 arm_const_double_inline_cost (rtx val)
16502 {
16503 rtx lowpart, highpart;
16504 machine_mode mode;
16505
16506 mode = GET_MODE (val);
16507
16508 if (mode == VOIDmode)
16509 mode = DImode;
16510
16511 gcc_assert (GET_MODE_SIZE (mode) == 8);
16512
16513 lowpart = gen_lowpart (SImode, val);
16514 highpart = gen_highpart_mode (SImode, mode, val);
16515
16516 gcc_assert (CONST_INT_P (lowpart));
16517 gcc_assert (CONST_INT_P (highpart));
16518
16519 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16520 NULL_RTX, NULL_RTX, 0, 0)
16521 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16522 NULL_RTX, NULL_RTX, 0, 0));
16523 }
16524
16525 /* Cost of loading a SImode constant. */
16526 static inline int
16527 arm_const_inline_cost (enum rtx_code code, rtx val)
16528 {
16529 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16530 NULL_RTX, NULL_RTX, 1, 0);
16531 }
16532
16533 /* Return true if it is worthwhile to split a 64-bit constant into two
16534 32-bit operations. This is the case if optimizing for size, or
16535 if we have load delay slots, or if one 32-bit part can be done with
16536 a single data operation. */
16537 bool
16538 arm_const_double_by_parts (rtx val)
16539 {
16540 machine_mode mode = GET_MODE (val);
16541 rtx part;
16542
16543 if (optimize_size || arm_ld_sched)
16544 return true;
16545
16546 if (mode == VOIDmode)
16547 mode = DImode;
16548
16549 part = gen_highpart_mode (SImode, mode, val);
16550
16551 gcc_assert (CONST_INT_P (part));
16552
16553 if (const_ok_for_arm (INTVAL (part))
16554 || const_ok_for_arm (~INTVAL (part)))
16555 return true;
16556
16557 part = gen_lowpart (SImode, val);
16558
16559 gcc_assert (CONST_INT_P (part));
16560
16561 if (const_ok_for_arm (INTVAL (part))
16562 || const_ok_for_arm (~INTVAL (part)))
16563 return true;
16564
16565 return false;
16566 }
16567
16568 /* Return true if it is possible to inline both the high and low parts
16569 of a 64-bit constant into 32-bit data processing instructions. */
16570 bool
16571 arm_const_double_by_immediates (rtx val)
16572 {
16573 machine_mode mode = GET_MODE (val);
16574 rtx part;
16575
16576 if (mode == VOIDmode)
16577 mode = DImode;
16578
16579 part = gen_highpart_mode (SImode, mode, val);
16580
16581 gcc_assert (CONST_INT_P (part));
16582
16583 if (!const_ok_for_arm (INTVAL (part)))
16584 return false;
16585
16586 part = gen_lowpart (SImode, val);
16587
16588 gcc_assert (CONST_INT_P (part));
16589
16590 if (!const_ok_for_arm (INTVAL (part)))
16591 return false;
16592
16593 return true;
16594 }
16595
16596 /* Scan INSN and note any of its operands that need fixing.
16597 If DO_PUSHES is false we do not actually push any of the fixups
16598 needed. */
16599 static void
16600 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16601 {
16602 int opno;
16603
16604 extract_constrain_insn (insn);
16605
16606 if (recog_data.n_alternatives == 0)
16607 return;
16608
16609 /* Fill in recog_op_alt with information about the constraints of
16610 this insn. */
16611 preprocess_constraints (insn);
16612
16613 const operand_alternative *op_alt = which_op_alt ();
16614 for (opno = 0; opno < recog_data.n_operands; opno++)
16615 {
16616 /* Things we need to fix can only occur in inputs. */
16617 if (recog_data.operand_type[opno] != OP_IN)
16618 continue;
16619
16620 /* If this alternative is a memory reference, then any mention
16621 of constants in this alternative is really to fool reload
16622 into allowing us to accept one there. We need to fix them up
16623 now so that we output the right code. */
16624 if (op_alt[opno].memory_ok)
16625 {
16626 rtx op = recog_data.operand[opno];
16627
16628 if (CONSTANT_P (op))
16629 {
16630 if (do_pushes)
16631 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16632 recog_data.operand_mode[opno], op);
16633 }
16634 else if (MEM_P (op)
16635 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16636 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16637 {
16638 if (do_pushes)
16639 {
16640 rtx cop = avoid_constant_pool_reference (op);
16641
16642 /* Casting the address of something to a mode narrower
16643 than a word can cause avoid_constant_pool_reference()
16644 to return the pool reference itself. That's no good to
16645 us here. Lets just hope that we can use the
16646 constant pool value directly. */
16647 if (op == cop)
16648 cop = get_pool_constant (XEXP (op, 0));
16649
16650 push_minipool_fix (insn, address,
16651 recog_data.operand_loc[opno],
16652 recog_data.operand_mode[opno], cop);
16653 }
16654
16655 }
16656 }
16657 }
16658
16659 return;
16660 }
16661
16662 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16663 and unions in the context of ARMv8-M Security Extensions. It is used as a
16664 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16665 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16666 or four masks, depending on whether it is being computed for a
16667 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16668 respectively. The tree for the type of the argument or a field within an
16669 argument is passed in ARG_TYPE, the current register this argument or field
16670 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16671 argument or field starts at is passed in STARTING_BIT and the last used bit
16672 is kept in LAST_USED_BIT which is also updated accordingly. */
16673
16674 static unsigned HOST_WIDE_INT
16675 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16676 uint32_t * padding_bits_to_clear,
16677 unsigned starting_bit, int * last_used_bit)
16678
16679 {
16680 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16681
16682 if (TREE_CODE (arg_type) == RECORD_TYPE)
16683 {
16684 unsigned current_bit = starting_bit;
16685 tree field;
16686 long int offset, size;
16687
16688
16689 field = TYPE_FIELDS (arg_type);
16690 while (field)
16691 {
16692 /* The offset within a structure is always an offset from
16693 the start of that structure. Make sure we take that into the
16694 calculation of the register based offset that we use here. */
16695 offset = starting_bit;
16696 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16697 offset %= 32;
16698
16699 /* This is the actual size of the field, for bitfields this is the
16700 bitfield width and not the container size. */
16701 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16702
16703 if (*last_used_bit != offset)
16704 {
16705 if (offset < *last_used_bit)
16706 {
16707 /* This field's offset is before the 'last_used_bit', that
16708 means this field goes on the next register. So we need to
16709 pad the rest of the current register and increase the
16710 register number. */
16711 uint32_t mask;
16712 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16713 mask++;
16714
16715 padding_bits_to_clear[*regno] |= mask;
16716 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16717 (*regno)++;
16718 }
16719 else
16720 {
16721 /* Otherwise we pad the bits between the last field's end and
16722 the start of the new field. */
16723 uint32_t mask;
16724
16725 mask = ((uint32_t)-1) >> (32 - offset);
16726 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16727 padding_bits_to_clear[*regno] |= mask;
16728 }
16729 current_bit = offset;
16730 }
16731
16732 /* Calculate further padding bits for inner structs/unions too. */
16733 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16734 {
16735 *last_used_bit = current_bit;
16736 not_to_clear_reg_mask
16737 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16738 padding_bits_to_clear, offset,
16739 last_used_bit);
16740 }
16741 else
16742 {
16743 /* Update 'current_bit' with this field's size. If the
16744 'current_bit' lies in a subsequent register, update 'regno' and
16745 reset 'current_bit' to point to the current bit in that new
16746 register. */
16747 current_bit += size;
16748 while (current_bit >= 32)
16749 {
16750 current_bit-=32;
16751 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16752 (*regno)++;
16753 }
16754 *last_used_bit = current_bit;
16755 }
16756
16757 field = TREE_CHAIN (field);
16758 }
16759 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16760 }
16761 else if (TREE_CODE (arg_type) == UNION_TYPE)
16762 {
16763 tree field, field_t;
16764 int i, regno_t, field_size;
16765 int max_reg = -1;
16766 int max_bit = -1;
16767 uint32_t mask;
16768 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16769 = {-1, -1, -1, -1};
16770
16771 /* To compute the padding bits in a union we only consider bits as
16772 padding bits if they are always either a padding bit or fall outside a
16773 fields size for all fields in the union. */
16774 field = TYPE_FIELDS (arg_type);
16775 while (field)
16776 {
16777 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16778 = {0U, 0U, 0U, 0U};
16779 int last_used_bit_t = *last_used_bit;
16780 regno_t = *regno;
16781 field_t = TREE_TYPE (field);
16782
16783 /* If the field's type is either a record or a union make sure to
16784 compute their padding bits too. */
16785 if (RECORD_OR_UNION_TYPE_P (field_t))
16786 not_to_clear_reg_mask
16787 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16788 &padding_bits_to_clear_t[0],
16789 starting_bit, &last_used_bit_t);
16790 else
16791 {
16792 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16793 regno_t = (field_size / 32) + *regno;
16794 last_used_bit_t = (starting_bit + field_size) % 32;
16795 }
16796
16797 for (i = *regno; i < regno_t; i++)
16798 {
16799 /* For all but the last register used by this field only keep the
16800 padding bits that were padding bits in this field. */
16801 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16802 }
16803
16804 /* For the last register, keep all padding bits that were padding
16805 bits in this field and any padding bits that are still valid
16806 as padding bits but fall outside of this field's size. */
16807 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16808 padding_bits_to_clear_res[regno_t]
16809 &= padding_bits_to_clear_t[regno_t] | mask;
16810
16811 /* Update the maximum size of the fields in terms of registers used
16812 ('max_reg') and the 'last_used_bit' in said register. */
16813 if (max_reg < regno_t)
16814 {
16815 max_reg = regno_t;
16816 max_bit = last_used_bit_t;
16817 }
16818 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16819 max_bit = last_used_bit_t;
16820
16821 field = TREE_CHAIN (field);
16822 }
16823
16824 /* Update the current padding_bits_to_clear using the intersection of the
16825 padding bits of all the fields. */
16826 for (i=*regno; i < max_reg; i++)
16827 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16828
16829 /* Do not keep trailing padding bits, we do not know yet whether this
16830 is the end of the argument. */
16831 mask = ((uint32_t) 1 << max_bit) - 1;
16832 padding_bits_to_clear[max_reg]
16833 |= padding_bits_to_clear_res[max_reg] & mask;
16834
16835 *regno = max_reg;
16836 *last_used_bit = max_bit;
16837 }
16838 else
16839 /* This function should only be used for structs and unions. */
16840 gcc_unreachable ();
16841
16842 return not_to_clear_reg_mask;
16843 }
16844
16845 /* In the context of ARMv8-M Security Extensions, this function is used for both
16846 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
16847 registers are used when returning or passing arguments, which is then
16848 returned as a mask. It will also compute a mask to indicate padding/unused
16849 bits for each of these registers, and passes this through the
16850 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
16851 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
16852 the starting register used to pass this argument or return value is passed
16853 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
16854 for struct and union types. */
16855
16856 static unsigned HOST_WIDE_INT
16857 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
16858 uint32_t * padding_bits_to_clear)
16859
16860 {
16861 int last_used_bit = 0;
16862 unsigned HOST_WIDE_INT not_to_clear_mask;
16863
16864 if (RECORD_OR_UNION_TYPE_P (arg_type))
16865 {
16866 not_to_clear_mask
16867 = comp_not_to_clear_mask_str_un (arg_type, &regno,
16868 padding_bits_to_clear, 0,
16869 &last_used_bit);
16870
16871
16872 /* If the 'last_used_bit' is not zero, that means we are still using a
16873 part of the last 'regno'. In such cases we must clear the trailing
16874 bits. Otherwise we are not using regno and we should mark it as to
16875 clear. */
16876 if (last_used_bit != 0)
16877 padding_bits_to_clear[regno]
16878 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
16879 else
16880 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
16881 }
16882 else
16883 {
16884 not_to_clear_mask = 0;
16885 /* We are not dealing with structs nor unions. So these arguments may be
16886 passed in floating point registers too. In some cases a BLKmode is
16887 used when returning or passing arguments in multiple VFP registers. */
16888 if (GET_MODE (arg_rtx) == BLKmode)
16889 {
16890 int i, arg_regs;
16891 rtx reg;
16892
16893 /* This should really only occur when dealing with the hard-float
16894 ABI. */
16895 gcc_assert (TARGET_HARD_FLOAT_ABI);
16896
16897 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
16898 {
16899 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
16900 gcc_assert (REG_P (reg));
16901
16902 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
16903
16904 /* If we are dealing with DF mode, make sure we don't
16905 clear either of the registers it addresses. */
16906 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
16907 if (arg_regs > 1)
16908 {
16909 unsigned HOST_WIDE_INT mask;
16910 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
16911 mask -= HOST_WIDE_INT_1U << REGNO (reg);
16912 not_to_clear_mask |= mask;
16913 }
16914 }
16915 }
16916 else
16917 {
16918 /* Otherwise we can rely on the MODE to determine how many registers
16919 are being used by this argument. */
16920 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
16921 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16922 if (arg_regs > 1)
16923 {
16924 unsigned HOST_WIDE_INT
16925 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
16926 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
16927 not_to_clear_mask |= mask;
16928 }
16929 }
16930 }
16931
16932 return not_to_clear_mask;
16933 }
16934
16935 /* Clears caller saved registers not used to pass arguments before a
16936 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
16937 registers is done in __gnu_cmse_nonsecure_call libcall.
16938 See libgcc/config/arm/cmse_nonsecure_call.S. */
16939
16940 static void
16941 cmse_nonsecure_call_clear_caller_saved (void)
16942 {
16943 basic_block bb;
16944
16945 FOR_EACH_BB_FN (bb, cfun)
16946 {
16947 rtx_insn *insn;
16948
16949 FOR_BB_INSNS (bb, insn)
16950 {
16951 uint64_t to_clear_mask, float_mask;
16952 rtx_insn *seq;
16953 rtx pat, call, unspec, reg, cleared_reg, tmp;
16954 unsigned int regno, maxregno;
16955 rtx address;
16956 CUMULATIVE_ARGS args_so_far_v;
16957 cumulative_args_t args_so_far;
16958 tree arg_type, fntype;
16959 bool using_r4, first_param = true;
16960 function_args_iterator args_iter;
16961 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
16962 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear[0];
16963
16964 if (!NONDEBUG_INSN_P (insn))
16965 continue;
16966
16967 if (!CALL_P (insn))
16968 continue;
16969
16970 pat = PATTERN (insn);
16971 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
16972 call = XVECEXP (pat, 0, 0);
16973
16974 /* Get the real call RTX if the insn sets a value, ie. returns. */
16975 if (GET_CODE (call) == SET)
16976 call = SET_SRC (call);
16977
16978 /* Check if it is a cmse_nonsecure_call. */
16979 unspec = XEXP (call, 0);
16980 if (GET_CODE (unspec) != UNSPEC
16981 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
16982 continue;
16983
16984 /* Determine the caller-saved registers we need to clear. */
16985 to_clear_mask = (1LL << (NUM_ARG_REGS)) - 1;
16986 maxregno = NUM_ARG_REGS - 1;
16987 /* Only look at the caller-saved floating point registers in case of
16988 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
16989 lazy store and loads which clear both caller- and callee-saved
16990 registers. */
16991 if (TARGET_HARD_FLOAT_ABI)
16992 {
16993 float_mask = (1LL << (D7_VFP_REGNUM + 1)) - 1;
16994 float_mask &= ~((1LL << FIRST_VFP_REGNUM) - 1);
16995 to_clear_mask |= float_mask;
16996 maxregno = D7_VFP_REGNUM;
16997 }
16998
16999 /* Make sure the register used to hold the function address is not
17000 cleared. */
17001 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17002 gcc_assert (MEM_P (address));
17003 gcc_assert (REG_P (XEXP (address, 0)));
17004 to_clear_mask &= ~(1LL << REGNO (XEXP (address, 0)));
17005
17006 /* Set basic block of call insn so that df rescan is performed on
17007 insns inserted here. */
17008 set_block_for_insn (insn, bb);
17009 df_set_flags (DF_DEFER_INSN_RESCAN);
17010 start_sequence ();
17011
17012 /* Make sure the scheduler doesn't schedule other insns beyond
17013 here. */
17014 emit_insn (gen_blockage ());
17015
17016 /* Walk through all arguments and clear registers appropriately.
17017 */
17018 fntype = TREE_TYPE (MEM_EXPR (address));
17019 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17020 NULL_TREE);
17021 args_so_far = pack_cumulative_args (&args_so_far_v);
17022 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17023 {
17024 rtx arg_rtx;
17025 machine_mode arg_mode = TYPE_MODE (arg_type);
17026
17027 if (VOID_TYPE_P (arg_type))
17028 continue;
17029
17030 if (!first_param)
17031 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17032 true);
17033
17034 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17035 true);
17036 gcc_assert (REG_P (arg_rtx));
17037 to_clear_mask
17038 &= ~compute_not_to_clear_mask (arg_type, arg_rtx,
17039 REGNO (arg_rtx),
17040 padding_bits_to_clear_ptr);
17041
17042 first_param = false;
17043 }
17044
17045 /* Clear padding bits where needed. */
17046 cleared_reg = XEXP (address, 0);
17047 reg = gen_rtx_REG (SImode, IP_REGNUM);
17048 using_r4 = false;
17049 for (regno = R0_REGNUM; regno < NUM_ARG_REGS; regno++)
17050 {
17051 if (padding_bits_to_clear[regno] == 0)
17052 continue;
17053
17054 /* If this is a Thumb-1 target copy the address of the function
17055 we are calling from 'r4' into 'ip' such that we can use r4 to
17056 clear the unused bits in the arguments. */
17057 if (TARGET_THUMB1 && !using_r4)
17058 {
17059 using_r4 = true;
17060 reg = cleared_reg;
17061 emit_move_insn (gen_rtx_REG (SImode, IP_REGNUM),
17062 reg);
17063 }
17064
17065 tmp = GEN_INT ((((~padding_bits_to_clear[regno]) << 16u) >> 16u));
17066 emit_move_insn (reg, tmp);
17067 /* Also fill the top half of the negated
17068 padding_bits_to_clear. */
17069 if (((~padding_bits_to_clear[regno]) >> 16) > 0)
17070 {
17071 tmp = GEN_INT ((~padding_bits_to_clear[regno]) >> 16);
17072 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg,
17073 GEN_INT (16),
17074 GEN_INT (16)),
17075 tmp));
17076 }
17077
17078 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, regno),
17079 gen_rtx_REG (SImode, regno),
17080 reg));
17081
17082 }
17083 if (using_r4)
17084 emit_move_insn (cleared_reg,
17085 gen_rtx_REG (SImode, IP_REGNUM));
17086
17087 /* We use right shift and left shift to clear the LSB of the address
17088 we jump to instead of using bic, to avoid having to use an extra
17089 register on Thumb-1. */
17090 tmp = gen_rtx_LSHIFTRT (SImode, cleared_reg, const1_rtx);
17091 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17092 tmp = gen_rtx_ASHIFT (SImode, cleared_reg, const1_rtx);
17093 emit_insn (gen_rtx_SET (cleared_reg, tmp));
17094
17095 /* Clearing all registers that leak before doing a non-secure
17096 call. */
17097 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17098 {
17099 if (!(to_clear_mask & (1LL << regno)))
17100 continue;
17101
17102 /* If regno is an even vfp register and its successor is also to
17103 be cleared, use vmov. */
17104 if (IS_VFP_REGNUM (regno))
17105 {
17106 if (TARGET_VFP_DOUBLE
17107 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17108 && to_clear_mask & (1LL << (regno + 1)))
17109 emit_move_insn (gen_rtx_REG (DFmode, regno++),
17110 CONST0_RTX (DFmode));
17111 else
17112 emit_move_insn (gen_rtx_REG (SFmode, regno),
17113 CONST0_RTX (SFmode));
17114 }
17115 else
17116 emit_move_insn (gen_rtx_REG (SImode, regno), cleared_reg);
17117 }
17118
17119 seq = get_insns ();
17120 end_sequence ();
17121 emit_insn_before (seq, insn);
17122
17123 }
17124 }
17125 }
17126
17127 /* Rewrite move insn into subtract of 0 if the condition codes will
17128 be useful in next conditional jump insn. */
17129
17130 static void
17131 thumb1_reorg (void)
17132 {
17133 basic_block bb;
17134
17135 FOR_EACH_BB_FN (bb, cfun)
17136 {
17137 rtx dest, src;
17138 rtx cmp, op0, op1, set = NULL;
17139 rtx_insn *prev, *insn = BB_END (bb);
17140 bool insn_clobbered = false;
17141
17142 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17143 insn = PREV_INSN (insn);
17144
17145 /* Find the last cbranchsi4_insn in basic block BB. */
17146 if (insn == BB_HEAD (bb)
17147 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17148 continue;
17149
17150 /* Get the register with which we are comparing. */
17151 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17152 op0 = XEXP (cmp, 0);
17153 op1 = XEXP (cmp, 1);
17154
17155 /* Check that comparison is against ZERO. */
17156 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17157 continue;
17158
17159 /* Find the first flag setting insn before INSN in basic block BB. */
17160 gcc_assert (insn != BB_HEAD (bb));
17161 for (prev = PREV_INSN (insn);
17162 (!insn_clobbered
17163 && prev != BB_HEAD (bb)
17164 && (NOTE_P (prev)
17165 || DEBUG_INSN_P (prev)
17166 || ((set = single_set (prev)) != NULL
17167 && get_attr_conds (prev) == CONDS_NOCOND)));
17168 prev = PREV_INSN (prev))
17169 {
17170 if (reg_set_p (op0, prev))
17171 insn_clobbered = true;
17172 }
17173
17174 /* Skip if op0 is clobbered by insn other than prev. */
17175 if (insn_clobbered)
17176 continue;
17177
17178 if (!set)
17179 continue;
17180
17181 dest = SET_DEST (set);
17182 src = SET_SRC (set);
17183 if (!low_register_operand (dest, SImode)
17184 || !low_register_operand (src, SImode))
17185 continue;
17186
17187 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17188 in INSN. Both src and dest of the move insn are checked. */
17189 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17190 {
17191 dest = copy_rtx (dest);
17192 src = copy_rtx (src);
17193 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17194 PATTERN (prev) = gen_rtx_SET (dest, src);
17195 INSN_CODE (prev) = -1;
17196 /* Set test register in INSN to dest. */
17197 XEXP (cmp, 0) = copy_rtx (dest);
17198 INSN_CODE (insn) = -1;
17199 }
17200 }
17201 }
17202
17203 /* Convert instructions to their cc-clobbering variant if possible, since
17204 that allows us to use smaller encodings. */
17205
17206 static void
17207 thumb2_reorg (void)
17208 {
17209 basic_block bb;
17210 regset_head live;
17211
17212 INIT_REG_SET (&live);
17213
17214 /* We are freeing block_for_insn in the toplev to keep compatibility
17215 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17216 compute_bb_for_insn ();
17217 df_analyze ();
17218
17219 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17220
17221 FOR_EACH_BB_FN (bb, cfun)
17222 {
17223 if ((current_tune->disparage_flag_setting_t16_encodings
17224 == tune_params::DISPARAGE_FLAGS_ALL)
17225 && optimize_bb_for_speed_p (bb))
17226 continue;
17227
17228 rtx_insn *insn;
17229 Convert_Action action = SKIP;
17230 Convert_Action action_for_partial_flag_setting
17231 = ((current_tune->disparage_flag_setting_t16_encodings
17232 != tune_params::DISPARAGE_FLAGS_NEITHER)
17233 && optimize_bb_for_speed_p (bb))
17234 ? SKIP : CONV;
17235
17236 COPY_REG_SET (&live, DF_LR_OUT (bb));
17237 df_simulate_initialize_backwards (bb, &live);
17238 FOR_BB_INSNS_REVERSE (bb, insn)
17239 {
17240 if (NONJUMP_INSN_P (insn)
17241 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17242 && GET_CODE (PATTERN (insn)) == SET)
17243 {
17244 action = SKIP;
17245 rtx pat = PATTERN (insn);
17246 rtx dst = XEXP (pat, 0);
17247 rtx src = XEXP (pat, 1);
17248 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17249
17250 if (UNARY_P (src) || BINARY_P (src))
17251 op0 = XEXP (src, 0);
17252
17253 if (BINARY_P (src))
17254 op1 = XEXP (src, 1);
17255
17256 if (low_register_operand (dst, SImode))
17257 {
17258 switch (GET_CODE (src))
17259 {
17260 case PLUS:
17261 /* Adding two registers and storing the result
17262 in the first source is already a 16-bit
17263 operation. */
17264 if (rtx_equal_p (dst, op0)
17265 && register_operand (op1, SImode))
17266 break;
17267
17268 if (low_register_operand (op0, SImode))
17269 {
17270 /* ADDS <Rd>,<Rn>,<Rm> */
17271 if (low_register_operand (op1, SImode))
17272 action = CONV;
17273 /* ADDS <Rdn>,#<imm8> */
17274 /* SUBS <Rdn>,#<imm8> */
17275 else if (rtx_equal_p (dst, op0)
17276 && CONST_INT_P (op1)
17277 && IN_RANGE (INTVAL (op1), -255, 255))
17278 action = CONV;
17279 /* ADDS <Rd>,<Rn>,#<imm3> */
17280 /* SUBS <Rd>,<Rn>,#<imm3> */
17281 else if (CONST_INT_P (op1)
17282 && IN_RANGE (INTVAL (op1), -7, 7))
17283 action = CONV;
17284 }
17285 /* ADCS <Rd>, <Rn> */
17286 else if (GET_CODE (XEXP (src, 0)) == PLUS
17287 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17288 && low_register_operand (XEXP (XEXP (src, 0), 1),
17289 SImode)
17290 && COMPARISON_P (op1)
17291 && cc_register (XEXP (op1, 0), VOIDmode)
17292 && maybe_get_arm_condition_code (op1) == ARM_CS
17293 && XEXP (op1, 1) == const0_rtx)
17294 action = CONV;
17295 break;
17296
17297 case MINUS:
17298 /* RSBS <Rd>,<Rn>,#0
17299 Not handled here: see NEG below. */
17300 /* SUBS <Rd>,<Rn>,#<imm3>
17301 SUBS <Rdn>,#<imm8>
17302 Not handled here: see PLUS above. */
17303 /* SUBS <Rd>,<Rn>,<Rm> */
17304 if (low_register_operand (op0, SImode)
17305 && low_register_operand (op1, SImode))
17306 action = CONV;
17307 break;
17308
17309 case MULT:
17310 /* MULS <Rdm>,<Rn>,<Rdm>
17311 As an exception to the rule, this is only used
17312 when optimizing for size since MULS is slow on all
17313 known implementations. We do not even want to use
17314 MULS in cold code, if optimizing for speed, so we
17315 test the global flag here. */
17316 if (!optimize_size)
17317 break;
17318 /* Fall through. */
17319 case AND:
17320 case IOR:
17321 case XOR:
17322 /* ANDS <Rdn>,<Rm> */
17323 if (rtx_equal_p (dst, op0)
17324 && low_register_operand (op1, SImode))
17325 action = action_for_partial_flag_setting;
17326 else if (rtx_equal_p (dst, op1)
17327 && low_register_operand (op0, SImode))
17328 action = action_for_partial_flag_setting == SKIP
17329 ? SKIP : SWAP_CONV;
17330 break;
17331
17332 case ASHIFTRT:
17333 case ASHIFT:
17334 case LSHIFTRT:
17335 /* ASRS <Rdn>,<Rm> */
17336 /* LSRS <Rdn>,<Rm> */
17337 /* LSLS <Rdn>,<Rm> */
17338 if (rtx_equal_p (dst, op0)
17339 && low_register_operand (op1, SImode))
17340 action = action_for_partial_flag_setting;
17341 /* ASRS <Rd>,<Rm>,#<imm5> */
17342 /* LSRS <Rd>,<Rm>,#<imm5> */
17343 /* LSLS <Rd>,<Rm>,#<imm5> */
17344 else if (low_register_operand (op0, SImode)
17345 && CONST_INT_P (op1)
17346 && IN_RANGE (INTVAL (op1), 0, 31))
17347 action = action_for_partial_flag_setting;
17348 break;
17349
17350 case ROTATERT:
17351 /* RORS <Rdn>,<Rm> */
17352 if (rtx_equal_p (dst, op0)
17353 && low_register_operand (op1, SImode))
17354 action = action_for_partial_flag_setting;
17355 break;
17356
17357 case NOT:
17358 /* MVNS <Rd>,<Rm> */
17359 if (low_register_operand (op0, SImode))
17360 action = action_for_partial_flag_setting;
17361 break;
17362
17363 case NEG:
17364 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17365 if (low_register_operand (op0, SImode))
17366 action = CONV;
17367 break;
17368
17369 case CONST_INT:
17370 /* MOVS <Rd>,#<imm8> */
17371 if (CONST_INT_P (src)
17372 && IN_RANGE (INTVAL (src), 0, 255))
17373 action = action_for_partial_flag_setting;
17374 break;
17375
17376 case REG:
17377 /* MOVS and MOV<c> with registers have different
17378 encodings, so are not relevant here. */
17379 break;
17380
17381 default:
17382 break;
17383 }
17384 }
17385
17386 if (action != SKIP)
17387 {
17388 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17389 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17390 rtvec vec;
17391
17392 if (action == SWAP_CONV)
17393 {
17394 src = copy_rtx (src);
17395 XEXP (src, 0) = op1;
17396 XEXP (src, 1) = op0;
17397 pat = gen_rtx_SET (dst, src);
17398 vec = gen_rtvec (2, pat, clobber);
17399 }
17400 else /* action == CONV */
17401 vec = gen_rtvec (2, pat, clobber);
17402
17403 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17404 INSN_CODE (insn) = -1;
17405 }
17406 }
17407
17408 if (NONDEBUG_INSN_P (insn))
17409 df_simulate_one_insn_backwards (bb, insn, &live);
17410 }
17411 }
17412
17413 CLEAR_REG_SET (&live);
17414 }
17415
17416 /* Gcc puts the pool in the wrong place for ARM, since we can only
17417 load addresses a limited distance around the pc. We do some
17418 special munging to move the constant pool values to the correct
17419 point in the code. */
17420 static void
17421 arm_reorg (void)
17422 {
17423 rtx_insn *insn;
17424 HOST_WIDE_INT address = 0;
17425 Mfix * fix;
17426
17427 if (use_cmse)
17428 cmse_nonsecure_call_clear_caller_saved ();
17429 if (TARGET_THUMB1)
17430 thumb1_reorg ();
17431 else if (TARGET_THUMB2)
17432 thumb2_reorg ();
17433
17434 /* Ensure all insns that must be split have been split at this point.
17435 Otherwise, the pool placement code below may compute incorrect
17436 insn lengths. Note that when optimizing, all insns have already
17437 been split at this point. */
17438 if (!optimize)
17439 split_all_insns_noflow ();
17440
17441 /* Make sure we do not attempt to create a literal pool even though it should
17442 no longer be necessary to create any. */
17443 if (arm_disable_literal_pool)
17444 return ;
17445
17446 minipool_fix_head = minipool_fix_tail = NULL;
17447
17448 /* The first insn must always be a note, or the code below won't
17449 scan it properly. */
17450 insn = get_insns ();
17451 gcc_assert (NOTE_P (insn));
17452 minipool_pad = 0;
17453
17454 /* Scan all the insns and record the operands that will need fixing. */
17455 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17456 {
17457 if (BARRIER_P (insn))
17458 push_minipool_barrier (insn, address);
17459 else if (INSN_P (insn))
17460 {
17461 rtx_jump_table_data *table;
17462
17463 note_invalid_constants (insn, address, true);
17464 address += get_attr_length (insn);
17465
17466 /* If the insn is a vector jump, add the size of the table
17467 and skip the table. */
17468 if (tablejump_p (insn, NULL, &table))
17469 {
17470 address += get_jump_table_size (table);
17471 insn = table;
17472 }
17473 }
17474 else if (LABEL_P (insn))
17475 /* Add the worst-case padding due to alignment. We don't add
17476 the _current_ padding because the minipool insertions
17477 themselves might change it. */
17478 address += get_label_padding (insn);
17479 }
17480
17481 fix = minipool_fix_head;
17482
17483 /* Now scan the fixups and perform the required changes. */
17484 while (fix)
17485 {
17486 Mfix * ftmp;
17487 Mfix * fdel;
17488 Mfix * last_added_fix;
17489 Mfix * last_barrier = NULL;
17490 Mfix * this_fix;
17491
17492 /* Skip any further barriers before the next fix. */
17493 while (fix && BARRIER_P (fix->insn))
17494 fix = fix->next;
17495
17496 /* No more fixes. */
17497 if (fix == NULL)
17498 break;
17499
17500 last_added_fix = NULL;
17501
17502 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17503 {
17504 if (BARRIER_P (ftmp->insn))
17505 {
17506 if (ftmp->address >= minipool_vector_head->max_address)
17507 break;
17508
17509 last_barrier = ftmp;
17510 }
17511 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17512 break;
17513
17514 last_added_fix = ftmp; /* Keep track of the last fix added. */
17515 }
17516
17517 /* If we found a barrier, drop back to that; any fixes that we
17518 could have reached but come after the barrier will now go in
17519 the next mini-pool. */
17520 if (last_barrier != NULL)
17521 {
17522 /* Reduce the refcount for those fixes that won't go into this
17523 pool after all. */
17524 for (fdel = last_barrier->next;
17525 fdel && fdel != ftmp;
17526 fdel = fdel->next)
17527 {
17528 fdel->minipool->refcount--;
17529 fdel->minipool = NULL;
17530 }
17531
17532 ftmp = last_barrier;
17533 }
17534 else
17535 {
17536 /* ftmp is first fix that we can't fit into this pool and
17537 there no natural barriers that we could use. Insert a
17538 new barrier in the code somewhere between the previous
17539 fix and this one, and arrange to jump around it. */
17540 HOST_WIDE_INT max_address;
17541
17542 /* The last item on the list of fixes must be a barrier, so
17543 we can never run off the end of the list of fixes without
17544 last_barrier being set. */
17545 gcc_assert (ftmp);
17546
17547 max_address = minipool_vector_head->max_address;
17548 /* Check that there isn't another fix that is in range that
17549 we couldn't fit into this pool because the pool was
17550 already too large: we need to put the pool before such an
17551 instruction. The pool itself may come just after the
17552 fix because create_fix_barrier also allows space for a
17553 jump instruction. */
17554 if (ftmp->address < max_address)
17555 max_address = ftmp->address + 1;
17556
17557 last_barrier = create_fix_barrier (last_added_fix, max_address);
17558 }
17559
17560 assign_minipool_offsets (last_barrier);
17561
17562 while (ftmp)
17563 {
17564 if (!BARRIER_P (ftmp->insn)
17565 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17566 == NULL))
17567 break;
17568
17569 ftmp = ftmp->next;
17570 }
17571
17572 /* Scan over the fixes we have identified for this pool, fixing them
17573 up and adding the constants to the pool itself. */
17574 for (this_fix = fix; this_fix && ftmp != this_fix;
17575 this_fix = this_fix->next)
17576 if (!BARRIER_P (this_fix->insn))
17577 {
17578 rtx addr
17579 = plus_constant (Pmode,
17580 gen_rtx_LABEL_REF (VOIDmode,
17581 minipool_vector_label),
17582 this_fix->minipool->offset);
17583 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17584 }
17585
17586 dump_minipool (last_barrier->insn);
17587 fix = ftmp;
17588 }
17589
17590 /* From now on we must synthesize any constants that we can't handle
17591 directly. This can happen if the RTL gets split during final
17592 instruction generation. */
17593 cfun->machine->after_arm_reorg = 1;
17594
17595 /* Free the minipool memory. */
17596 obstack_free (&minipool_obstack, minipool_startobj);
17597 }
17598 \f
17599 /* Routines to output assembly language. */
17600
17601 /* Return string representation of passed in real value. */
17602 static const char *
17603 fp_const_from_val (REAL_VALUE_TYPE *r)
17604 {
17605 if (!fp_consts_inited)
17606 init_fp_table ();
17607
17608 gcc_assert (real_equal (r, &value_fp0));
17609 return "0";
17610 }
17611
17612 /* OPERANDS[0] is the entire list of insns that constitute pop,
17613 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17614 is in the list, UPDATE is true iff the list contains explicit
17615 update of base register. */
17616 void
17617 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17618 bool update)
17619 {
17620 int i;
17621 char pattern[100];
17622 int offset;
17623 const char *conditional;
17624 int num_saves = XVECLEN (operands[0], 0);
17625 unsigned int regno;
17626 unsigned int regno_base = REGNO (operands[1]);
17627 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17628
17629 offset = 0;
17630 offset += update ? 1 : 0;
17631 offset += return_pc ? 1 : 0;
17632
17633 /* Is the base register in the list? */
17634 for (i = offset; i < num_saves; i++)
17635 {
17636 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17637 /* If SP is in the list, then the base register must be SP. */
17638 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17639 /* If base register is in the list, there must be no explicit update. */
17640 if (regno == regno_base)
17641 gcc_assert (!update);
17642 }
17643
17644 conditional = reverse ? "%?%D0" : "%?%d0";
17645 /* Can't use POP if returning from an interrupt. */
17646 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17647 sprintf (pattern, "pop%s\t{", conditional);
17648 else
17649 {
17650 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17651 It's just a convention, their semantics are identical. */
17652 if (regno_base == SP_REGNUM)
17653 sprintf (pattern, "ldmfd%s\t", conditional);
17654 else if (update)
17655 sprintf (pattern, "ldmia%s\t", conditional);
17656 else
17657 sprintf (pattern, "ldm%s\t", conditional);
17658
17659 strcat (pattern, reg_names[regno_base]);
17660 if (update)
17661 strcat (pattern, "!, {");
17662 else
17663 strcat (pattern, ", {");
17664 }
17665
17666 /* Output the first destination register. */
17667 strcat (pattern,
17668 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17669
17670 /* Output the rest of the destination registers. */
17671 for (i = offset + 1; i < num_saves; i++)
17672 {
17673 strcat (pattern, ", ");
17674 strcat (pattern,
17675 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17676 }
17677
17678 strcat (pattern, "}");
17679
17680 if (interrupt_p && return_pc)
17681 strcat (pattern, "^");
17682
17683 output_asm_insn (pattern, &cond);
17684 }
17685
17686
17687 /* Output the assembly for a store multiple. */
17688
17689 const char *
17690 vfp_output_vstmd (rtx * operands)
17691 {
17692 char pattern[100];
17693 int p;
17694 int base;
17695 int i;
17696 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17697 ? XEXP (operands[0], 0)
17698 : XEXP (XEXP (operands[0], 0), 0);
17699 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17700
17701 if (push_p)
17702 strcpy (pattern, "vpush%?.64\t{%P1");
17703 else
17704 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17705
17706 p = strlen (pattern);
17707
17708 gcc_assert (REG_P (operands[1]));
17709
17710 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17711 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17712 {
17713 p += sprintf (&pattern[p], ", d%d", base + i);
17714 }
17715 strcpy (&pattern[p], "}");
17716
17717 output_asm_insn (pattern, operands);
17718 return "";
17719 }
17720
17721
17722 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17723 number of bytes pushed. */
17724
17725 static int
17726 vfp_emit_fstmd (int base_reg, int count)
17727 {
17728 rtx par;
17729 rtx dwarf;
17730 rtx tmp, reg;
17731 int i;
17732
17733 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17734 register pairs are stored by a store multiple insn. We avoid this
17735 by pushing an extra pair. */
17736 if (count == 2 && !arm_arch6)
17737 {
17738 if (base_reg == LAST_VFP_REGNUM - 3)
17739 base_reg -= 2;
17740 count++;
17741 }
17742
17743 /* FSTMD may not store more than 16 doubleword registers at once. Split
17744 larger stores into multiple parts (up to a maximum of two, in
17745 practice). */
17746 if (count > 16)
17747 {
17748 int saved;
17749 /* NOTE: base_reg is an internal register number, so each D register
17750 counts as 2. */
17751 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17752 saved += vfp_emit_fstmd (base_reg, 16);
17753 return saved;
17754 }
17755
17756 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17757 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17758
17759 reg = gen_rtx_REG (DFmode, base_reg);
17760 base_reg += 2;
17761
17762 XVECEXP (par, 0, 0)
17763 = gen_rtx_SET (gen_frame_mem
17764 (BLKmode,
17765 gen_rtx_PRE_MODIFY (Pmode,
17766 stack_pointer_rtx,
17767 plus_constant
17768 (Pmode, stack_pointer_rtx,
17769 - (count * 8)))
17770 ),
17771 gen_rtx_UNSPEC (BLKmode,
17772 gen_rtvec (1, reg),
17773 UNSPEC_PUSH_MULT));
17774
17775 tmp = gen_rtx_SET (stack_pointer_rtx,
17776 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
17777 RTX_FRAME_RELATED_P (tmp) = 1;
17778 XVECEXP (dwarf, 0, 0) = tmp;
17779
17780 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
17781 RTX_FRAME_RELATED_P (tmp) = 1;
17782 XVECEXP (dwarf, 0, 1) = tmp;
17783
17784 for (i = 1; i < count; i++)
17785 {
17786 reg = gen_rtx_REG (DFmode, base_reg);
17787 base_reg += 2;
17788 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
17789
17790 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
17791 plus_constant (Pmode,
17792 stack_pointer_rtx,
17793 i * 8)),
17794 reg);
17795 RTX_FRAME_RELATED_P (tmp) = 1;
17796 XVECEXP (dwarf, 0, i + 1) = tmp;
17797 }
17798
17799 par = emit_insn (par);
17800 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
17801 RTX_FRAME_RELATED_P (par) = 1;
17802
17803 return count * 8;
17804 }
17805
17806 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
17807 has the cmse_nonsecure_call attribute and returns false otherwise. */
17808
17809 bool
17810 detect_cmse_nonsecure_call (tree addr)
17811 {
17812 if (!addr)
17813 return FALSE;
17814
17815 tree fntype = TREE_TYPE (addr);
17816 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
17817 TYPE_ATTRIBUTES (fntype)))
17818 return TRUE;
17819 return FALSE;
17820 }
17821
17822
17823 /* Emit a call instruction with pattern PAT. ADDR is the address of
17824 the call target. */
17825
17826 void
17827 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
17828 {
17829 rtx insn;
17830
17831 insn = emit_call_insn (pat);
17832
17833 /* The PIC register is live on entry to VxWorks PIC PLT entries.
17834 If the call might use such an entry, add a use of the PIC register
17835 to the instruction's CALL_INSN_FUNCTION_USAGE. */
17836 if (TARGET_VXWORKS_RTP
17837 && flag_pic
17838 && !sibcall
17839 && GET_CODE (addr) == SYMBOL_REF
17840 && (SYMBOL_REF_DECL (addr)
17841 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
17842 : !SYMBOL_REF_LOCAL_P (addr)))
17843 {
17844 require_pic_register ();
17845 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
17846 }
17847
17848 if (TARGET_AAPCS_BASED)
17849 {
17850 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
17851 linker. We need to add an IP clobber to allow setting
17852 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
17853 is not needed since it's a fixed register. */
17854 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
17855 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
17856 }
17857 }
17858
17859 /* Output a 'call' insn. */
17860 const char *
17861 output_call (rtx *operands)
17862 {
17863 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
17864
17865 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
17866 if (REGNO (operands[0]) == LR_REGNUM)
17867 {
17868 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
17869 output_asm_insn ("mov%?\t%0, %|lr", operands);
17870 }
17871
17872 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
17873
17874 if (TARGET_INTERWORK || arm_arch4t)
17875 output_asm_insn ("bx%?\t%0", operands);
17876 else
17877 output_asm_insn ("mov%?\t%|pc, %0", operands);
17878
17879 return "";
17880 }
17881
17882 /* Output a move from arm registers to arm registers of a long double
17883 OPERANDS[0] is the destination.
17884 OPERANDS[1] is the source. */
17885 const char *
17886 output_mov_long_double_arm_from_arm (rtx *operands)
17887 {
17888 /* We have to be careful here because the two might overlap. */
17889 int dest_start = REGNO (operands[0]);
17890 int src_start = REGNO (operands[1]);
17891 rtx ops[2];
17892 int i;
17893
17894 if (dest_start < src_start)
17895 {
17896 for (i = 0; i < 3; i++)
17897 {
17898 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17899 ops[1] = gen_rtx_REG (SImode, src_start + i);
17900 output_asm_insn ("mov%?\t%0, %1", ops);
17901 }
17902 }
17903 else
17904 {
17905 for (i = 2; i >= 0; i--)
17906 {
17907 ops[0] = gen_rtx_REG (SImode, dest_start + i);
17908 ops[1] = gen_rtx_REG (SImode, src_start + i);
17909 output_asm_insn ("mov%?\t%0, %1", ops);
17910 }
17911 }
17912
17913 return "";
17914 }
17915
17916 void
17917 arm_emit_movpair (rtx dest, rtx src)
17918 {
17919 /* If the src is an immediate, simplify it. */
17920 if (CONST_INT_P (src))
17921 {
17922 HOST_WIDE_INT val = INTVAL (src);
17923 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
17924 if ((val >> 16) & 0x0000ffff)
17925 {
17926 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
17927 GEN_INT (16)),
17928 GEN_INT ((val >> 16) & 0x0000ffff));
17929 rtx_insn *insn = get_last_insn ();
17930 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17931 }
17932 return;
17933 }
17934 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
17935 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
17936 rtx_insn *insn = get_last_insn ();
17937 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
17938 }
17939
17940 /* Output a move between double words. It must be REG<-MEM
17941 or MEM<-REG. */
17942 const char *
17943 output_move_double (rtx *operands, bool emit, int *count)
17944 {
17945 enum rtx_code code0 = GET_CODE (operands[0]);
17946 enum rtx_code code1 = GET_CODE (operands[1]);
17947 rtx otherops[3];
17948 if (count)
17949 *count = 1;
17950
17951 /* The only case when this might happen is when
17952 you are looking at the length of a DImode instruction
17953 that has an invalid constant in it. */
17954 if (code0 == REG && code1 != MEM)
17955 {
17956 gcc_assert (!emit);
17957 *count = 2;
17958 return "";
17959 }
17960
17961 if (code0 == REG)
17962 {
17963 unsigned int reg0 = REGNO (operands[0]);
17964
17965 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
17966
17967 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
17968
17969 switch (GET_CODE (XEXP (operands[1], 0)))
17970 {
17971 case REG:
17972
17973 if (emit)
17974 {
17975 if (TARGET_LDRD
17976 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
17977 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
17978 else
17979 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
17980 }
17981 break;
17982
17983 case PRE_INC:
17984 gcc_assert (TARGET_LDRD);
17985 if (emit)
17986 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
17987 break;
17988
17989 case PRE_DEC:
17990 if (emit)
17991 {
17992 if (TARGET_LDRD)
17993 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
17994 else
17995 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
17996 }
17997 break;
17998
17999 case POST_INC:
18000 if (emit)
18001 {
18002 if (TARGET_LDRD)
18003 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18004 else
18005 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18006 }
18007 break;
18008
18009 case POST_DEC:
18010 gcc_assert (TARGET_LDRD);
18011 if (emit)
18012 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18013 break;
18014
18015 case PRE_MODIFY:
18016 case POST_MODIFY:
18017 /* Autoicrement addressing modes should never have overlapping
18018 base and destination registers, and overlapping index registers
18019 are already prohibited, so this doesn't need to worry about
18020 fix_cm3_ldrd. */
18021 otherops[0] = operands[0];
18022 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18023 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18024
18025 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18026 {
18027 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18028 {
18029 /* Registers overlap so split out the increment. */
18030 if (emit)
18031 {
18032 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18033 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18034 }
18035 if (count)
18036 *count = 2;
18037 }
18038 else
18039 {
18040 /* Use a single insn if we can.
18041 FIXME: IWMMXT allows offsets larger than ldrd can
18042 handle, fix these up with a pair of ldr. */
18043 if (TARGET_THUMB2
18044 || !CONST_INT_P (otherops[2])
18045 || (INTVAL (otherops[2]) > -256
18046 && INTVAL (otherops[2]) < 256))
18047 {
18048 if (emit)
18049 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18050 }
18051 else
18052 {
18053 if (emit)
18054 {
18055 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18056 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18057 }
18058 if (count)
18059 *count = 2;
18060
18061 }
18062 }
18063 }
18064 else
18065 {
18066 /* Use a single insn if we can.
18067 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18068 fix these up with a pair of ldr. */
18069 if (TARGET_THUMB2
18070 || !CONST_INT_P (otherops[2])
18071 || (INTVAL (otherops[2]) > -256
18072 && INTVAL (otherops[2]) < 256))
18073 {
18074 if (emit)
18075 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18076 }
18077 else
18078 {
18079 if (emit)
18080 {
18081 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18082 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18083 }
18084 if (count)
18085 *count = 2;
18086 }
18087 }
18088 break;
18089
18090 case LABEL_REF:
18091 case CONST:
18092 /* We might be able to use ldrd %0, %1 here. However the range is
18093 different to ldr/adr, and it is broken on some ARMv7-M
18094 implementations. */
18095 /* Use the second register of the pair to avoid problematic
18096 overlap. */
18097 otherops[1] = operands[1];
18098 if (emit)
18099 output_asm_insn ("adr%?\t%0, %1", otherops);
18100 operands[1] = otherops[0];
18101 if (emit)
18102 {
18103 if (TARGET_LDRD)
18104 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18105 else
18106 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18107 }
18108
18109 if (count)
18110 *count = 2;
18111 break;
18112
18113 /* ??? This needs checking for thumb2. */
18114 default:
18115 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18116 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18117 {
18118 otherops[0] = operands[0];
18119 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18120 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18121
18122 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18123 {
18124 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18125 {
18126 switch ((int) INTVAL (otherops[2]))
18127 {
18128 case -8:
18129 if (emit)
18130 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18131 return "";
18132 case -4:
18133 if (TARGET_THUMB2)
18134 break;
18135 if (emit)
18136 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18137 return "";
18138 case 4:
18139 if (TARGET_THUMB2)
18140 break;
18141 if (emit)
18142 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18143 return "";
18144 }
18145 }
18146 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18147 operands[1] = otherops[0];
18148 if (TARGET_LDRD
18149 && (REG_P (otherops[2])
18150 || TARGET_THUMB2
18151 || (CONST_INT_P (otherops[2])
18152 && INTVAL (otherops[2]) > -256
18153 && INTVAL (otherops[2]) < 256)))
18154 {
18155 if (reg_overlap_mentioned_p (operands[0],
18156 otherops[2]))
18157 {
18158 /* Swap base and index registers over to
18159 avoid a conflict. */
18160 std::swap (otherops[1], otherops[2]);
18161 }
18162 /* If both registers conflict, it will usually
18163 have been fixed by a splitter. */
18164 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18165 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18166 {
18167 if (emit)
18168 {
18169 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18170 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18171 }
18172 if (count)
18173 *count = 2;
18174 }
18175 else
18176 {
18177 otherops[0] = operands[0];
18178 if (emit)
18179 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18180 }
18181 return "";
18182 }
18183
18184 if (CONST_INT_P (otherops[2]))
18185 {
18186 if (emit)
18187 {
18188 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18189 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18190 else
18191 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18192 }
18193 }
18194 else
18195 {
18196 if (emit)
18197 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18198 }
18199 }
18200 else
18201 {
18202 if (emit)
18203 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18204 }
18205
18206 if (count)
18207 *count = 2;
18208
18209 if (TARGET_LDRD)
18210 return "ldrd%?\t%0, [%1]";
18211
18212 return "ldmia%?\t%1, %M0";
18213 }
18214 else
18215 {
18216 otherops[1] = adjust_address (operands[1], SImode, 4);
18217 /* Take care of overlapping base/data reg. */
18218 if (reg_mentioned_p (operands[0], operands[1]))
18219 {
18220 if (emit)
18221 {
18222 output_asm_insn ("ldr%?\t%0, %1", otherops);
18223 output_asm_insn ("ldr%?\t%0, %1", operands);
18224 }
18225 if (count)
18226 *count = 2;
18227
18228 }
18229 else
18230 {
18231 if (emit)
18232 {
18233 output_asm_insn ("ldr%?\t%0, %1", operands);
18234 output_asm_insn ("ldr%?\t%0, %1", otherops);
18235 }
18236 if (count)
18237 *count = 2;
18238 }
18239 }
18240 }
18241 }
18242 else
18243 {
18244 /* Constraints should ensure this. */
18245 gcc_assert (code0 == MEM && code1 == REG);
18246 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18247 || (TARGET_ARM && TARGET_LDRD));
18248
18249 switch (GET_CODE (XEXP (operands[0], 0)))
18250 {
18251 case REG:
18252 if (emit)
18253 {
18254 if (TARGET_LDRD)
18255 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18256 else
18257 output_asm_insn ("stm%?\t%m0, %M1", operands);
18258 }
18259 break;
18260
18261 case PRE_INC:
18262 gcc_assert (TARGET_LDRD);
18263 if (emit)
18264 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18265 break;
18266
18267 case PRE_DEC:
18268 if (emit)
18269 {
18270 if (TARGET_LDRD)
18271 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18272 else
18273 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18274 }
18275 break;
18276
18277 case POST_INC:
18278 if (emit)
18279 {
18280 if (TARGET_LDRD)
18281 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18282 else
18283 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18284 }
18285 break;
18286
18287 case POST_DEC:
18288 gcc_assert (TARGET_LDRD);
18289 if (emit)
18290 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18291 break;
18292
18293 case PRE_MODIFY:
18294 case POST_MODIFY:
18295 otherops[0] = operands[1];
18296 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18297 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18298
18299 /* IWMMXT allows offsets larger than ldrd can handle,
18300 fix these up with a pair of ldr. */
18301 if (!TARGET_THUMB2
18302 && CONST_INT_P (otherops[2])
18303 && (INTVAL(otherops[2]) <= -256
18304 || INTVAL(otherops[2]) >= 256))
18305 {
18306 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18307 {
18308 if (emit)
18309 {
18310 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18311 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18312 }
18313 if (count)
18314 *count = 2;
18315 }
18316 else
18317 {
18318 if (emit)
18319 {
18320 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18321 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18322 }
18323 if (count)
18324 *count = 2;
18325 }
18326 }
18327 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18328 {
18329 if (emit)
18330 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18331 }
18332 else
18333 {
18334 if (emit)
18335 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18336 }
18337 break;
18338
18339 case PLUS:
18340 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18341 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18342 {
18343 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18344 {
18345 case -8:
18346 if (emit)
18347 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18348 return "";
18349
18350 case -4:
18351 if (TARGET_THUMB2)
18352 break;
18353 if (emit)
18354 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18355 return "";
18356
18357 case 4:
18358 if (TARGET_THUMB2)
18359 break;
18360 if (emit)
18361 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18362 return "";
18363 }
18364 }
18365 if (TARGET_LDRD
18366 && (REG_P (otherops[2])
18367 || TARGET_THUMB2
18368 || (CONST_INT_P (otherops[2])
18369 && INTVAL (otherops[2]) > -256
18370 && INTVAL (otherops[2]) < 256)))
18371 {
18372 otherops[0] = operands[1];
18373 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18374 if (emit)
18375 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18376 return "";
18377 }
18378 /* Fall through */
18379
18380 default:
18381 otherops[0] = adjust_address (operands[0], SImode, 4);
18382 otherops[1] = operands[1];
18383 if (emit)
18384 {
18385 output_asm_insn ("str%?\t%1, %0", operands);
18386 output_asm_insn ("str%?\t%H1, %0", otherops);
18387 }
18388 if (count)
18389 *count = 2;
18390 }
18391 }
18392
18393 return "";
18394 }
18395
18396 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18397 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18398
18399 const char *
18400 output_move_quad (rtx *operands)
18401 {
18402 if (REG_P (operands[0]))
18403 {
18404 /* Load, or reg->reg move. */
18405
18406 if (MEM_P (operands[1]))
18407 {
18408 switch (GET_CODE (XEXP (operands[1], 0)))
18409 {
18410 case REG:
18411 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18412 break;
18413
18414 case LABEL_REF:
18415 case CONST:
18416 output_asm_insn ("adr%?\t%0, %1", operands);
18417 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18418 break;
18419
18420 default:
18421 gcc_unreachable ();
18422 }
18423 }
18424 else
18425 {
18426 rtx ops[2];
18427 int dest, src, i;
18428
18429 gcc_assert (REG_P (operands[1]));
18430
18431 dest = REGNO (operands[0]);
18432 src = REGNO (operands[1]);
18433
18434 /* This seems pretty dumb, but hopefully GCC won't try to do it
18435 very often. */
18436 if (dest < src)
18437 for (i = 0; i < 4; i++)
18438 {
18439 ops[0] = gen_rtx_REG (SImode, dest + i);
18440 ops[1] = gen_rtx_REG (SImode, src + i);
18441 output_asm_insn ("mov%?\t%0, %1", ops);
18442 }
18443 else
18444 for (i = 3; i >= 0; i--)
18445 {
18446 ops[0] = gen_rtx_REG (SImode, dest + i);
18447 ops[1] = gen_rtx_REG (SImode, src + i);
18448 output_asm_insn ("mov%?\t%0, %1", ops);
18449 }
18450 }
18451 }
18452 else
18453 {
18454 gcc_assert (MEM_P (operands[0]));
18455 gcc_assert (REG_P (operands[1]));
18456 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18457
18458 switch (GET_CODE (XEXP (operands[0], 0)))
18459 {
18460 case REG:
18461 output_asm_insn ("stm%?\t%m0, %M1", operands);
18462 break;
18463
18464 default:
18465 gcc_unreachable ();
18466 }
18467 }
18468
18469 return "";
18470 }
18471
18472 /* Output a VFP load or store instruction. */
18473
18474 const char *
18475 output_move_vfp (rtx *operands)
18476 {
18477 rtx reg, mem, addr, ops[2];
18478 int load = REG_P (operands[0]);
18479 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18480 int sp = (!TARGET_VFP_FP16INST
18481 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18482 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18483 const char *templ;
18484 char buff[50];
18485 machine_mode mode;
18486
18487 reg = operands[!load];
18488 mem = operands[load];
18489
18490 mode = GET_MODE (reg);
18491
18492 gcc_assert (REG_P (reg));
18493 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18494 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18495 || mode == SFmode
18496 || mode == DFmode
18497 || mode == HImode
18498 || mode == SImode
18499 || mode == DImode
18500 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18501 gcc_assert (MEM_P (mem));
18502
18503 addr = XEXP (mem, 0);
18504
18505 switch (GET_CODE (addr))
18506 {
18507 case PRE_DEC:
18508 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18509 ops[0] = XEXP (addr, 0);
18510 ops[1] = reg;
18511 break;
18512
18513 case POST_INC:
18514 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18515 ops[0] = XEXP (addr, 0);
18516 ops[1] = reg;
18517 break;
18518
18519 default:
18520 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18521 ops[0] = reg;
18522 ops[1] = mem;
18523 break;
18524 }
18525
18526 sprintf (buff, templ,
18527 load ? "ld" : "st",
18528 dp ? "64" : sp ? "32" : "16",
18529 dp ? "P" : "",
18530 integer_p ? "\t%@ int" : "");
18531 output_asm_insn (buff, ops);
18532
18533 return "";
18534 }
18535
18536 /* Output a Neon double-word or quad-word load or store, or a load
18537 or store for larger structure modes.
18538
18539 WARNING: The ordering of elements is weird in big-endian mode,
18540 because the EABI requires that vectors stored in memory appear
18541 as though they were stored by a VSTM, as required by the EABI.
18542 GCC RTL defines element ordering based on in-memory order.
18543 This can be different from the architectural ordering of elements
18544 within a NEON register. The intrinsics defined in arm_neon.h use the
18545 NEON register element ordering, not the GCC RTL element ordering.
18546
18547 For example, the in-memory ordering of a big-endian a quadword
18548 vector with 16-bit elements when stored from register pair {d0,d1}
18549 will be (lowest address first, d0[N] is NEON register element N):
18550
18551 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18552
18553 When necessary, quadword registers (dN, dN+1) are moved to ARM
18554 registers from rN in the order:
18555
18556 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18557
18558 So that STM/LDM can be used on vectors in ARM registers, and the
18559 same memory layout will result as if VSTM/VLDM were used.
18560
18561 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18562 possible, which allows use of appropriate alignment tags.
18563 Note that the choice of "64" is independent of the actual vector
18564 element size; this size simply ensures that the behavior is
18565 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18566
18567 Due to limitations of those instructions, use of VST1.64/VLD1.64
18568 is not possible if:
18569 - the address contains PRE_DEC, or
18570 - the mode refers to more than 4 double-word registers
18571
18572 In those cases, it would be possible to replace VSTM/VLDM by a
18573 sequence of instructions; this is not currently implemented since
18574 this is not certain to actually improve performance. */
18575
18576 const char *
18577 output_move_neon (rtx *operands)
18578 {
18579 rtx reg, mem, addr, ops[2];
18580 int regno, nregs, load = REG_P (operands[0]);
18581 const char *templ;
18582 char buff[50];
18583 machine_mode mode;
18584
18585 reg = operands[!load];
18586 mem = operands[load];
18587
18588 mode = GET_MODE (reg);
18589
18590 gcc_assert (REG_P (reg));
18591 regno = REGNO (reg);
18592 nregs = HARD_REGNO_NREGS (regno, mode) / 2;
18593 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18594 || NEON_REGNO_OK_FOR_QUAD (regno));
18595 gcc_assert (VALID_NEON_DREG_MODE (mode)
18596 || VALID_NEON_QREG_MODE (mode)
18597 || VALID_NEON_STRUCT_MODE (mode));
18598 gcc_assert (MEM_P (mem));
18599
18600 addr = XEXP (mem, 0);
18601
18602 /* Strip off const from addresses like (const (plus (...))). */
18603 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18604 addr = XEXP (addr, 0);
18605
18606 switch (GET_CODE (addr))
18607 {
18608 case POST_INC:
18609 /* We have to use vldm / vstm for too-large modes. */
18610 if (nregs > 4)
18611 {
18612 templ = "v%smia%%?\t%%0!, %%h1";
18613 ops[0] = XEXP (addr, 0);
18614 }
18615 else
18616 {
18617 templ = "v%s1.64\t%%h1, %%A0";
18618 ops[0] = mem;
18619 }
18620 ops[1] = reg;
18621 break;
18622
18623 case PRE_DEC:
18624 /* We have to use vldm / vstm in this case, since there is no
18625 pre-decrement form of the vld1 / vst1 instructions. */
18626 templ = "v%smdb%%?\t%%0!, %%h1";
18627 ops[0] = XEXP (addr, 0);
18628 ops[1] = reg;
18629 break;
18630
18631 case POST_MODIFY:
18632 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18633 gcc_unreachable ();
18634
18635 case REG:
18636 /* We have to use vldm / vstm for too-large modes. */
18637 if (nregs > 1)
18638 {
18639 if (nregs > 4)
18640 templ = "v%smia%%?\t%%m0, %%h1";
18641 else
18642 templ = "v%s1.64\t%%h1, %%A0";
18643
18644 ops[0] = mem;
18645 ops[1] = reg;
18646 break;
18647 }
18648 /* Fall through. */
18649 case LABEL_REF:
18650 case PLUS:
18651 {
18652 int i;
18653 int overlap = -1;
18654 for (i = 0; i < nregs; i++)
18655 {
18656 /* We're only using DImode here because it's a convenient size. */
18657 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18658 ops[1] = adjust_address (mem, DImode, 8 * i);
18659 if (reg_overlap_mentioned_p (ops[0], mem))
18660 {
18661 gcc_assert (overlap == -1);
18662 overlap = i;
18663 }
18664 else
18665 {
18666 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18667 output_asm_insn (buff, ops);
18668 }
18669 }
18670 if (overlap != -1)
18671 {
18672 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18673 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18674 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18675 output_asm_insn (buff, ops);
18676 }
18677
18678 return "";
18679 }
18680
18681 default:
18682 gcc_unreachable ();
18683 }
18684
18685 sprintf (buff, templ, load ? "ld" : "st");
18686 output_asm_insn (buff, ops);
18687
18688 return "";
18689 }
18690
18691 /* Compute and return the length of neon_mov<mode>, where <mode> is
18692 one of VSTRUCT modes: EI, OI, CI or XI. */
18693 int
18694 arm_attr_length_move_neon (rtx_insn *insn)
18695 {
18696 rtx reg, mem, addr;
18697 int load;
18698 machine_mode mode;
18699
18700 extract_insn_cached (insn);
18701
18702 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18703 {
18704 mode = GET_MODE (recog_data.operand[0]);
18705 switch (mode)
18706 {
18707 case E_EImode:
18708 case E_OImode:
18709 return 8;
18710 case E_CImode:
18711 return 12;
18712 case E_XImode:
18713 return 16;
18714 default:
18715 gcc_unreachable ();
18716 }
18717 }
18718
18719 load = REG_P (recog_data.operand[0]);
18720 reg = recog_data.operand[!load];
18721 mem = recog_data.operand[load];
18722
18723 gcc_assert (MEM_P (mem));
18724
18725 mode = GET_MODE (reg);
18726 addr = XEXP (mem, 0);
18727
18728 /* Strip off const from addresses like (const (plus (...))). */
18729 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18730 addr = XEXP (addr, 0);
18731
18732 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18733 {
18734 int insns = HARD_REGNO_NREGS (REGNO (reg), mode) / 2;
18735 return insns * 4;
18736 }
18737 else
18738 return 4;
18739 }
18740
18741 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18742 return zero. */
18743
18744 int
18745 arm_address_offset_is_imm (rtx_insn *insn)
18746 {
18747 rtx mem, addr;
18748
18749 extract_insn_cached (insn);
18750
18751 if (REG_P (recog_data.operand[0]))
18752 return 0;
18753
18754 mem = recog_data.operand[0];
18755
18756 gcc_assert (MEM_P (mem));
18757
18758 addr = XEXP (mem, 0);
18759
18760 if (REG_P (addr)
18761 || (GET_CODE (addr) == PLUS
18762 && REG_P (XEXP (addr, 0))
18763 && CONST_INT_P (XEXP (addr, 1))))
18764 return 1;
18765 else
18766 return 0;
18767 }
18768
18769 /* Output an ADD r, s, #n where n may be too big for one instruction.
18770 If adding zero to one register, output nothing. */
18771 const char *
18772 output_add_immediate (rtx *operands)
18773 {
18774 HOST_WIDE_INT n = INTVAL (operands[2]);
18775
18776 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
18777 {
18778 if (n < 0)
18779 output_multi_immediate (operands,
18780 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
18781 -n);
18782 else
18783 output_multi_immediate (operands,
18784 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
18785 n);
18786 }
18787
18788 return "";
18789 }
18790
18791 /* Output a multiple immediate operation.
18792 OPERANDS is the vector of operands referred to in the output patterns.
18793 INSTR1 is the output pattern to use for the first constant.
18794 INSTR2 is the output pattern to use for subsequent constants.
18795 IMMED_OP is the index of the constant slot in OPERANDS.
18796 N is the constant value. */
18797 static const char *
18798 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
18799 int immed_op, HOST_WIDE_INT n)
18800 {
18801 #if HOST_BITS_PER_WIDE_INT > 32
18802 n &= 0xffffffff;
18803 #endif
18804
18805 if (n == 0)
18806 {
18807 /* Quick and easy output. */
18808 operands[immed_op] = const0_rtx;
18809 output_asm_insn (instr1, operands);
18810 }
18811 else
18812 {
18813 int i;
18814 const char * instr = instr1;
18815
18816 /* Note that n is never zero here (which would give no output). */
18817 for (i = 0; i < 32; i += 2)
18818 {
18819 if (n & (3 << i))
18820 {
18821 operands[immed_op] = GEN_INT (n & (255 << i));
18822 output_asm_insn (instr, operands);
18823 instr = instr2;
18824 i += 6;
18825 }
18826 }
18827 }
18828
18829 return "";
18830 }
18831
18832 /* Return the name of a shifter operation. */
18833 static const char *
18834 arm_shift_nmem(enum rtx_code code)
18835 {
18836 switch (code)
18837 {
18838 case ASHIFT:
18839 return ARM_LSL_NAME;
18840
18841 case ASHIFTRT:
18842 return "asr";
18843
18844 case LSHIFTRT:
18845 return "lsr";
18846
18847 case ROTATERT:
18848 return "ror";
18849
18850 default:
18851 abort();
18852 }
18853 }
18854
18855 /* Return the appropriate ARM instruction for the operation code.
18856 The returned result should not be overwritten. OP is the rtx of the
18857 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
18858 was shifted. */
18859 const char *
18860 arithmetic_instr (rtx op, int shift_first_arg)
18861 {
18862 switch (GET_CODE (op))
18863 {
18864 case PLUS:
18865 return "add";
18866
18867 case MINUS:
18868 return shift_first_arg ? "rsb" : "sub";
18869
18870 case IOR:
18871 return "orr";
18872
18873 case XOR:
18874 return "eor";
18875
18876 case AND:
18877 return "and";
18878
18879 case ASHIFT:
18880 case ASHIFTRT:
18881 case LSHIFTRT:
18882 case ROTATERT:
18883 return arm_shift_nmem(GET_CODE(op));
18884
18885 default:
18886 gcc_unreachable ();
18887 }
18888 }
18889
18890 /* Ensure valid constant shifts and return the appropriate shift mnemonic
18891 for the operation code. The returned result should not be overwritten.
18892 OP is the rtx code of the shift.
18893 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
18894 shift. */
18895 static const char *
18896 shift_op (rtx op, HOST_WIDE_INT *amountp)
18897 {
18898 const char * mnem;
18899 enum rtx_code code = GET_CODE (op);
18900
18901 switch (code)
18902 {
18903 case ROTATE:
18904 if (!CONST_INT_P (XEXP (op, 1)))
18905 {
18906 output_operand_lossage ("invalid shift operand");
18907 return NULL;
18908 }
18909
18910 code = ROTATERT;
18911 *amountp = 32 - INTVAL (XEXP (op, 1));
18912 mnem = "ror";
18913 break;
18914
18915 case ASHIFT:
18916 case ASHIFTRT:
18917 case LSHIFTRT:
18918 case ROTATERT:
18919 mnem = arm_shift_nmem(code);
18920 if (CONST_INT_P (XEXP (op, 1)))
18921 {
18922 *amountp = INTVAL (XEXP (op, 1));
18923 }
18924 else if (REG_P (XEXP (op, 1)))
18925 {
18926 *amountp = -1;
18927 return mnem;
18928 }
18929 else
18930 {
18931 output_operand_lossage ("invalid shift operand");
18932 return NULL;
18933 }
18934 break;
18935
18936 case MULT:
18937 /* We never have to worry about the amount being other than a
18938 power of 2, since this case can never be reloaded from a reg. */
18939 if (!CONST_INT_P (XEXP (op, 1)))
18940 {
18941 output_operand_lossage ("invalid shift operand");
18942 return NULL;
18943 }
18944
18945 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
18946
18947 /* Amount must be a power of two. */
18948 if (*amountp & (*amountp - 1))
18949 {
18950 output_operand_lossage ("invalid shift operand");
18951 return NULL;
18952 }
18953
18954 *amountp = exact_log2 (*amountp);
18955 gcc_assert (IN_RANGE (*amountp, 0, 31));
18956 return ARM_LSL_NAME;
18957
18958 default:
18959 output_operand_lossage ("invalid shift operand");
18960 return NULL;
18961 }
18962
18963 /* This is not 100% correct, but follows from the desire to merge
18964 multiplication by a power of 2 with the recognizer for a
18965 shift. >=32 is not a valid shift for "lsl", so we must try and
18966 output a shift that produces the correct arithmetical result.
18967 Using lsr #32 is identical except for the fact that the carry bit
18968 is not set correctly if we set the flags; but we never use the
18969 carry bit from such an operation, so we can ignore that. */
18970 if (code == ROTATERT)
18971 /* Rotate is just modulo 32. */
18972 *amountp &= 31;
18973 else if (*amountp != (*amountp & 31))
18974 {
18975 if (code == ASHIFT)
18976 mnem = "lsr";
18977 *amountp = 32;
18978 }
18979
18980 /* Shifts of 0 are no-ops. */
18981 if (*amountp == 0)
18982 return NULL;
18983
18984 return mnem;
18985 }
18986
18987 /* Output a .ascii pseudo-op, keeping track of lengths. This is
18988 because /bin/as is horribly restrictive. The judgement about
18989 whether or not each character is 'printable' (and can be output as
18990 is) or not (and must be printed with an octal escape) must be made
18991 with reference to the *host* character set -- the situation is
18992 similar to that discussed in the comments above pp_c_char in
18993 c-pretty-print.c. */
18994
18995 #define MAX_ASCII_LEN 51
18996
18997 void
18998 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
18999 {
19000 int i;
19001 int len_so_far = 0;
19002
19003 fputs ("\t.ascii\t\"", stream);
19004
19005 for (i = 0; i < len; i++)
19006 {
19007 int c = p[i];
19008
19009 if (len_so_far >= MAX_ASCII_LEN)
19010 {
19011 fputs ("\"\n\t.ascii\t\"", stream);
19012 len_so_far = 0;
19013 }
19014
19015 if (ISPRINT (c))
19016 {
19017 if (c == '\\' || c == '\"')
19018 {
19019 putc ('\\', stream);
19020 len_so_far++;
19021 }
19022 putc (c, stream);
19023 len_so_far++;
19024 }
19025 else
19026 {
19027 fprintf (stream, "\\%03o", c);
19028 len_so_far += 4;
19029 }
19030 }
19031
19032 fputs ("\"\n", stream);
19033 }
19034 \f
19035 /* Whether a register is callee saved or not. This is necessary because high
19036 registers are marked as caller saved when optimizing for size on Thumb-1
19037 targets despite being callee saved in order to avoid using them. */
19038 #define callee_saved_reg_p(reg) \
19039 (!call_used_regs[reg] \
19040 || (TARGET_THUMB1 && optimize_size \
19041 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19042
19043 /* Compute the register save mask for registers 0 through 12
19044 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19045
19046 static unsigned long
19047 arm_compute_save_reg0_reg12_mask (void)
19048 {
19049 unsigned long func_type = arm_current_func_type ();
19050 unsigned long save_reg_mask = 0;
19051 unsigned int reg;
19052
19053 if (IS_INTERRUPT (func_type))
19054 {
19055 unsigned int max_reg;
19056 /* Interrupt functions must not corrupt any registers,
19057 even call clobbered ones. If this is a leaf function
19058 we can just examine the registers used by the RTL, but
19059 otherwise we have to assume that whatever function is
19060 called might clobber anything, and so we have to save
19061 all the call-clobbered registers as well. */
19062 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19063 /* FIQ handlers have registers r8 - r12 banked, so
19064 we only need to check r0 - r7, Normal ISRs only
19065 bank r14 and r15, so we must check up to r12.
19066 r13 is the stack pointer which is always preserved,
19067 so we do not need to consider it here. */
19068 max_reg = 7;
19069 else
19070 max_reg = 12;
19071
19072 for (reg = 0; reg <= max_reg; reg++)
19073 if (df_regs_ever_live_p (reg)
19074 || (! crtl->is_leaf && call_used_regs[reg]))
19075 save_reg_mask |= (1 << reg);
19076
19077 /* Also save the pic base register if necessary. */
19078 if (flag_pic
19079 && !TARGET_SINGLE_PIC_BASE
19080 && arm_pic_register != INVALID_REGNUM
19081 && crtl->uses_pic_offset_table)
19082 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19083 }
19084 else if (IS_VOLATILE(func_type))
19085 {
19086 /* For noreturn functions we historically omitted register saves
19087 altogether. However this really messes up debugging. As a
19088 compromise save just the frame pointers. Combined with the link
19089 register saved elsewhere this should be sufficient to get
19090 a backtrace. */
19091 if (frame_pointer_needed)
19092 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19093 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19094 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19095 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19096 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19097 }
19098 else
19099 {
19100 /* In the normal case we only need to save those registers
19101 which are call saved and which are used by this function. */
19102 for (reg = 0; reg <= 11; reg++)
19103 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19104 save_reg_mask |= (1 << reg);
19105
19106 /* Handle the frame pointer as a special case. */
19107 if (frame_pointer_needed)
19108 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19109
19110 /* If we aren't loading the PIC register,
19111 don't stack it even though it may be live. */
19112 if (flag_pic
19113 && !TARGET_SINGLE_PIC_BASE
19114 && arm_pic_register != INVALID_REGNUM
19115 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19116 || crtl->uses_pic_offset_table))
19117 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19118
19119 /* The prologue will copy SP into R0, so save it. */
19120 if (IS_STACKALIGN (func_type))
19121 save_reg_mask |= 1;
19122 }
19123
19124 /* Save registers so the exception handler can modify them. */
19125 if (crtl->calls_eh_return)
19126 {
19127 unsigned int i;
19128
19129 for (i = 0; ; i++)
19130 {
19131 reg = EH_RETURN_DATA_REGNO (i);
19132 if (reg == INVALID_REGNUM)
19133 break;
19134 save_reg_mask |= 1 << reg;
19135 }
19136 }
19137
19138 return save_reg_mask;
19139 }
19140
19141 /* Return true if r3 is live at the start of the function. */
19142
19143 static bool
19144 arm_r3_live_at_start_p (void)
19145 {
19146 /* Just look at cfg info, which is still close enough to correct at this
19147 point. This gives false positives for broken functions that might use
19148 uninitialized data that happens to be allocated in r3, but who cares? */
19149 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19150 }
19151
19152 /* Compute the number of bytes used to store the static chain register on the
19153 stack, above the stack frame. We need to know this accurately to get the
19154 alignment of the rest of the stack frame correct. */
19155
19156 static int
19157 arm_compute_static_chain_stack_bytes (void)
19158 {
19159 /* See the defining assertion in arm_expand_prologue. */
19160 if (IS_NESTED (arm_current_func_type ())
19161 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19162 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19163 && !df_regs_ever_live_p (LR_REGNUM)))
19164 && arm_r3_live_at_start_p ()
19165 && crtl->args.pretend_args_size == 0)
19166 return 4;
19167
19168 return 0;
19169 }
19170
19171 /* Compute a bit mask of which core registers need to be
19172 saved on the stack for the current function.
19173 This is used by arm_compute_frame_layout, which may add extra registers. */
19174
19175 static unsigned long
19176 arm_compute_save_core_reg_mask (void)
19177 {
19178 unsigned int save_reg_mask = 0;
19179 unsigned long func_type = arm_current_func_type ();
19180 unsigned int reg;
19181
19182 if (IS_NAKED (func_type))
19183 /* This should never really happen. */
19184 return 0;
19185
19186 /* If we are creating a stack frame, then we must save the frame pointer,
19187 IP (which will hold the old stack pointer), LR and the PC. */
19188 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19189 save_reg_mask |=
19190 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19191 | (1 << IP_REGNUM)
19192 | (1 << LR_REGNUM)
19193 | (1 << PC_REGNUM);
19194
19195 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19196
19197 /* Decide if we need to save the link register.
19198 Interrupt routines have their own banked link register,
19199 so they never need to save it.
19200 Otherwise if we do not use the link register we do not need to save
19201 it. If we are pushing other registers onto the stack however, we
19202 can save an instruction in the epilogue by pushing the link register
19203 now and then popping it back into the PC. This incurs extra memory
19204 accesses though, so we only do it when optimizing for size, and only
19205 if we know that we will not need a fancy return sequence. */
19206 if (df_regs_ever_live_p (LR_REGNUM)
19207 || (save_reg_mask
19208 && optimize_size
19209 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19210 && !crtl->tail_call_emit
19211 && !crtl->calls_eh_return))
19212 save_reg_mask |= 1 << LR_REGNUM;
19213
19214 if (cfun->machine->lr_save_eliminated)
19215 save_reg_mask &= ~ (1 << LR_REGNUM);
19216
19217 if (TARGET_REALLY_IWMMXT
19218 && ((bit_count (save_reg_mask)
19219 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19220 arm_compute_static_chain_stack_bytes())
19221 ) % 2) != 0)
19222 {
19223 /* The total number of registers that are going to be pushed
19224 onto the stack is odd. We need to ensure that the stack
19225 is 64-bit aligned before we start to save iWMMXt registers,
19226 and also before we start to create locals. (A local variable
19227 might be a double or long long which we will load/store using
19228 an iWMMXt instruction). Therefore we need to push another
19229 ARM register, so that the stack will be 64-bit aligned. We
19230 try to avoid using the arg registers (r0 -r3) as they might be
19231 used to pass values in a tail call. */
19232 for (reg = 4; reg <= 12; reg++)
19233 if ((save_reg_mask & (1 << reg)) == 0)
19234 break;
19235
19236 if (reg <= 12)
19237 save_reg_mask |= (1 << reg);
19238 else
19239 {
19240 cfun->machine->sibcall_blocked = 1;
19241 save_reg_mask |= (1 << 3);
19242 }
19243 }
19244
19245 /* We may need to push an additional register for use initializing the
19246 PIC base register. */
19247 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19248 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19249 {
19250 reg = thumb_find_work_register (1 << 4);
19251 if (!call_used_regs[reg])
19252 save_reg_mask |= (1 << reg);
19253 }
19254
19255 return save_reg_mask;
19256 }
19257
19258 /* Compute a bit mask of which core registers need to be
19259 saved on the stack for the current function. */
19260 static unsigned long
19261 thumb1_compute_save_core_reg_mask (void)
19262 {
19263 unsigned long mask;
19264 unsigned reg;
19265
19266 mask = 0;
19267 for (reg = 0; reg < 12; reg ++)
19268 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19269 mask |= 1 << reg;
19270
19271 /* Handle the frame pointer as a special case. */
19272 if (frame_pointer_needed)
19273 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19274
19275 if (flag_pic
19276 && !TARGET_SINGLE_PIC_BASE
19277 && arm_pic_register != INVALID_REGNUM
19278 && crtl->uses_pic_offset_table)
19279 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19280
19281 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19282 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19283 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19284
19285 /* LR will also be pushed if any lo regs are pushed. */
19286 if (mask & 0xff || thumb_force_lr_save ())
19287 mask |= (1 << LR_REGNUM);
19288
19289 /* Make sure we have a low work register if we need one.
19290 We will need one if we are going to push a high register,
19291 but we are not currently intending to push a low register. */
19292 if ((mask & 0xff) == 0
19293 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19294 {
19295 /* Use thumb_find_work_register to choose which register
19296 we will use. If the register is live then we will
19297 have to push it. Use LAST_LO_REGNUM as our fallback
19298 choice for the register to select. */
19299 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19300 /* Make sure the register returned by thumb_find_work_register is
19301 not part of the return value. */
19302 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19303 reg = LAST_LO_REGNUM;
19304
19305 if (callee_saved_reg_p (reg))
19306 mask |= 1 << reg;
19307 }
19308
19309 /* The 504 below is 8 bytes less than 512 because there are two possible
19310 alignment words. We can't tell here if they will be present or not so we
19311 have to play it safe and assume that they are. */
19312 if ((CALLER_INTERWORKING_SLOT_SIZE +
19313 ROUND_UP_WORD (get_frame_size ()) +
19314 crtl->outgoing_args_size) >= 504)
19315 {
19316 /* This is the same as the code in thumb1_expand_prologue() which
19317 determines which register to use for stack decrement. */
19318 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19319 if (mask & (1 << reg))
19320 break;
19321
19322 if (reg > LAST_LO_REGNUM)
19323 {
19324 /* Make sure we have a register available for stack decrement. */
19325 mask |= 1 << LAST_LO_REGNUM;
19326 }
19327 }
19328
19329 return mask;
19330 }
19331
19332
19333 /* Return the number of bytes required to save VFP registers. */
19334 static int
19335 arm_get_vfp_saved_size (void)
19336 {
19337 unsigned int regno;
19338 int count;
19339 int saved;
19340
19341 saved = 0;
19342 /* Space for saved VFP registers. */
19343 if (TARGET_HARD_FLOAT)
19344 {
19345 count = 0;
19346 for (regno = FIRST_VFP_REGNUM;
19347 regno < LAST_VFP_REGNUM;
19348 regno += 2)
19349 {
19350 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19351 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19352 {
19353 if (count > 0)
19354 {
19355 /* Workaround ARM10 VFPr1 bug. */
19356 if (count == 2 && !arm_arch6)
19357 count++;
19358 saved += count * 8;
19359 }
19360 count = 0;
19361 }
19362 else
19363 count++;
19364 }
19365 if (count > 0)
19366 {
19367 if (count == 2 && !arm_arch6)
19368 count++;
19369 saved += count * 8;
19370 }
19371 }
19372 return saved;
19373 }
19374
19375
19376 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19377 everything bar the final return instruction. If simple_return is true,
19378 then do not output epilogue, because it has already been emitted in RTL. */
19379 const char *
19380 output_return_instruction (rtx operand, bool really_return, bool reverse,
19381 bool simple_return)
19382 {
19383 char conditional[10];
19384 char instr[100];
19385 unsigned reg;
19386 unsigned long live_regs_mask;
19387 unsigned long func_type;
19388 arm_stack_offsets *offsets;
19389
19390 func_type = arm_current_func_type ();
19391
19392 if (IS_NAKED (func_type))
19393 return "";
19394
19395 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19396 {
19397 /* If this function was declared non-returning, and we have
19398 found a tail call, then we have to trust that the called
19399 function won't return. */
19400 if (really_return)
19401 {
19402 rtx ops[2];
19403
19404 /* Otherwise, trap an attempted return by aborting. */
19405 ops[0] = operand;
19406 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19407 : "abort");
19408 assemble_external_libcall (ops[1]);
19409 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19410 }
19411
19412 return "";
19413 }
19414
19415 gcc_assert (!cfun->calls_alloca || really_return);
19416
19417 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19418
19419 cfun->machine->return_used_this_function = 1;
19420
19421 offsets = arm_get_frame_offsets ();
19422 live_regs_mask = offsets->saved_regs_mask;
19423
19424 if (!simple_return && live_regs_mask)
19425 {
19426 const char * return_reg;
19427
19428 /* If we do not have any special requirements for function exit
19429 (e.g. interworking) then we can load the return address
19430 directly into the PC. Otherwise we must load it into LR. */
19431 if (really_return
19432 && !IS_CMSE_ENTRY (func_type)
19433 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19434 return_reg = reg_names[PC_REGNUM];
19435 else
19436 return_reg = reg_names[LR_REGNUM];
19437
19438 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19439 {
19440 /* There are three possible reasons for the IP register
19441 being saved. 1) a stack frame was created, in which case
19442 IP contains the old stack pointer, or 2) an ISR routine
19443 corrupted it, or 3) it was saved to align the stack on
19444 iWMMXt. In case 1, restore IP into SP, otherwise just
19445 restore IP. */
19446 if (frame_pointer_needed)
19447 {
19448 live_regs_mask &= ~ (1 << IP_REGNUM);
19449 live_regs_mask |= (1 << SP_REGNUM);
19450 }
19451 else
19452 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19453 }
19454
19455 /* On some ARM architectures it is faster to use LDR rather than
19456 LDM to load a single register. On other architectures, the
19457 cost is the same. In 26 bit mode, or for exception handlers,
19458 we have to use LDM to load the PC so that the CPSR is also
19459 restored. */
19460 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19461 if (live_regs_mask == (1U << reg))
19462 break;
19463
19464 if (reg <= LAST_ARM_REGNUM
19465 && (reg != LR_REGNUM
19466 || ! really_return
19467 || ! IS_INTERRUPT (func_type)))
19468 {
19469 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19470 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19471 }
19472 else
19473 {
19474 char *p;
19475 int first = 1;
19476
19477 /* Generate the load multiple instruction to restore the
19478 registers. Note we can get here, even if
19479 frame_pointer_needed is true, but only if sp already
19480 points to the base of the saved core registers. */
19481 if (live_regs_mask & (1 << SP_REGNUM))
19482 {
19483 unsigned HOST_WIDE_INT stack_adjust;
19484
19485 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19486 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19487
19488 if (stack_adjust && arm_arch5 && TARGET_ARM)
19489 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19490 else
19491 {
19492 /* If we can't use ldmib (SA110 bug),
19493 then try to pop r3 instead. */
19494 if (stack_adjust)
19495 live_regs_mask |= 1 << 3;
19496
19497 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19498 }
19499 }
19500 /* For interrupt returns we have to use an LDM rather than
19501 a POP so that we can use the exception return variant. */
19502 else if (IS_INTERRUPT (func_type))
19503 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19504 else
19505 sprintf (instr, "pop%s\t{", conditional);
19506
19507 p = instr + strlen (instr);
19508
19509 for (reg = 0; reg <= SP_REGNUM; reg++)
19510 if (live_regs_mask & (1 << reg))
19511 {
19512 int l = strlen (reg_names[reg]);
19513
19514 if (first)
19515 first = 0;
19516 else
19517 {
19518 memcpy (p, ", ", 2);
19519 p += 2;
19520 }
19521
19522 memcpy (p, "%|", 2);
19523 memcpy (p + 2, reg_names[reg], l);
19524 p += l + 2;
19525 }
19526
19527 if (live_regs_mask & (1 << LR_REGNUM))
19528 {
19529 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19530 /* If returning from an interrupt, restore the CPSR. */
19531 if (IS_INTERRUPT (func_type))
19532 strcat (p, "^");
19533 }
19534 else
19535 strcpy (p, "}");
19536 }
19537
19538 output_asm_insn (instr, & operand);
19539
19540 /* See if we need to generate an extra instruction to
19541 perform the actual function return. */
19542 if (really_return
19543 && func_type != ARM_FT_INTERWORKED
19544 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19545 {
19546 /* The return has already been handled
19547 by loading the LR into the PC. */
19548 return "";
19549 }
19550 }
19551
19552 if (really_return)
19553 {
19554 switch ((int) ARM_FUNC_TYPE (func_type))
19555 {
19556 case ARM_FT_ISR:
19557 case ARM_FT_FIQ:
19558 /* ??? This is wrong for unified assembly syntax. */
19559 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19560 break;
19561
19562 case ARM_FT_INTERWORKED:
19563 gcc_assert (arm_arch5 || arm_arch4t);
19564 sprintf (instr, "bx%s\t%%|lr", conditional);
19565 break;
19566
19567 case ARM_FT_EXCEPTION:
19568 /* ??? This is wrong for unified assembly syntax. */
19569 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19570 break;
19571
19572 default:
19573 if (IS_CMSE_ENTRY (func_type))
19574 {
19575 /* Check if we have to clear the 'GE bits' which is only used if
19576 parallel add and subtraction instructions are available. */
19577 if (TARGET_INT_SIMD)
19578 snprintf (instr, sizeof (instr),
19579 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19580 else
19581 snprintf (instr, sizeof (instr),
19582 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19583
19584 output_asm_insn (instr, & operand);
19585 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19586 {
19587 /* Clear the cumulative exception-status bits (0-4,7) and the
19588 condition code bits (28-31) of the FPSCR. We need to
19589 remember to clear the first scratch register used (IP) and
19590 save and restore the second (r4). */
19591 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19592 output_asm_insn (instr, & operand);
19593 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19594 output_asm_insn (instr, & operand);
19595 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19596 output_asm_insn (instr, & operand);
19597 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19598 output_asm_insn (instr, & operand);
19599 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19600 output_asm_insn (instr, & operand);
19601 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19602 output_asm_insn (instr, & operand);
19603 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19604 output_asm_insn (instr, & operand);
19605 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19606 output_asm_insn (instr, & operand);
19607 }
19608 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19609 }
19610 /* Use bx if it's available. */
19611 else if (arm_arch5 || arm_arch4t)
19612 sprintf (instr, "bx%s\t%%|lr", conditional);
19613 else
19614 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19615 break;
19616 }
19617
19618 output_asm_insn (instr, & operand);
19619 }
19620
19621 return "";
19622 }
19623
19624 /* Output in FILE asm statements needed to declare the NAME of the function
19625 defined by its DECL node. */
19626
19627 void
19628 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19629 {
19630 size_t cmse_name_len;
19631 char *cmse_name = 0;
19632 char cmse_prefix[] = "__acle_se_";
19633
19634 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19635 extra function label for each function with the 'cmse_nonsecure_entry'
19636 attribute. This extra function label should be prepended with
19637 '__acle_se_', telling the linker that it needs to create secure gateway
19638 veneers for this function. */
19639 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19640 DECL_ATTRIBUTES (decl)))
19641 {
19642 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19643 cmse_name = XALLOCAVEC (char, cmse_name_len);
19644 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19645 targetm.asm_out.globalize_label (file, cmse_name);
19646
19647 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19648 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19649 }
19650
19651 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19652 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19653 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19654 ASM_OUTPUT_LABEL (file, name);
19655
19656 if (cmse_name)
19657 ASM_OUTPUT_LABEL (file, cmse_name);
19658
19659 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19660 }
19661
19662 /* Write the function name into the code section, directly preceding
19663 the function prologue.
19664
19665 Code will be output similar to this:
19666 t0
19667 .ascii "arm_poke_function_name", 0
19668 .align
19669 t1
19670 .word 0xff000000 + (t1 - t0)
19671 arm_poke_function_name
19672 mov ip, sp
19673 stmfd sp!, {fp, ip, lr, pc}
19674 sub fp, ip, #4
19675
19676 When performing a stack backtrace, code can inspect the value
19677 of 'pc' stored at 'fp' + 0. If the trace function then looks
19678 at location pc - 12 and the top 8 bits are set, then we know
19679 that there is a function name embedded immediately preceding this
19680 location and has length ((pc[-3]) & 0xff000000).
19681
19682 We assume that pc is declared as a pointer to an unsigned long.
19683
19684 It is of no benefit to output the function name if we are assembling
19685 a leaf function. These function types will not contain a stack
19686 backtrace structure, therefore it is not possible to determine the
19687 function name. */
19688 void
19689 arm_poke_function_name (FILE *stream, const char *name)
19690 {
19691 unsigned long alignlength;
19692 unsigned long length;
19693 rtx x;
19694
19695 length = strlen (name) + 1;
19696 alignlength = ROUND_UP_WORD (length);
19697
19698 ASM_OUTPUT_ASCII (stream, name, length);
19699 ASM_OUTPUT_ALIGN (stream, 2);
19700 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19701 assemble_aligned_integer (UNITS_PER_WORD, x);
19702 }
19703
19704 /* Place some comments into the assembler stream
19705 describing the current function. */
19706 static void
19707 arm_output_function_prologue (FILE *f)
19708 {
19709 unsigned long func_type;
19710
19711 /* Sanity check. */
19712 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19713
19714 func_type = arm_current_func_type ();
19715
19716 switch ((int) ARM_FUNC_TYPE (func_type))
19717 {
19718 default:
19719 case ARM_FT_NORMAL:
19720 break;
19721 case ARM_FT_INTERWORKED:
19722 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19723 break;
19724 case ARM_FT_ISR:
19725 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19726 break;
19727 case ARM_FT_FIQ:
19728 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19729 break;
19730 case ARM_FT_EXCEPTION:
19731 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19732 break;
19733 }
19734
19735 if (IS_NAKED (func_type))
19736 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19737
19738 if (IS_VOLATILE (func_type))
19739 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19740
19741 if (IS_NESTED (func_type))
19742 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19743 if (IS_STACKALIGN (func_type))
19744 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19745 if (IS_CMSE_ENTRY (func_type))
19746 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19747
19748 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19749 crtl->args.size,
19750 crtl->args.pretend_args_size,
19751 (HOST_WIDE_INT) get_frame_size ());
19752
19753 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
19754 frame_pointer_needed,
19755 cfun->machine->uses_anonymous_args);
19756
19757 if (cfun->machine->lr_save_eliminated)
19758 asm_fprintf (f, "\t%@ link register save eliminated.\n");
19759
19760 if (crtl->calls_eh_return)
19761 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
19762
19763 }
19764
19765 static void
19766 arm_output_function_epilogue (FILE *)
19767 {
19768 arm_stack_offsets *offsets;
19769
19770 if (TARGET_THUMB1)
19771 {
19772 int regno;
19773
19774 /* Emit any call-via-reg trampolines that are needed for v4t support
19775 of call_reg and call_value_reg type insns. */
19776 for (regno = 0; regno < LR_REGNUM; regno++)
19777 {
19778 rtx label = cfun->machine->call_via[regno];
19779
19780 if (label != NULL)
19781 {
19782 switch_to_section (function_section (current_function_decl));
19783 targetm.asm_out.internal_label (asm_out_file, "L",
19784 CODE_LABEL_NUMBER (label));
19785 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
19786 }
19787 }
19788
19789 /* ??? Probably not safe to set this here, since it assumes that a
19790 function will be emitted as assembly immediately after we generate
19791 RTL for it. This does not happen for inline functions. */
19792 cfun->machine->return_used_this_function = 0;
19793 }
19794 else /* TARGET_32BIT */
19795 {
19796 /* We need to take into account any stack-frame rounding. */
19797 offsets = arm_get_frame_offsets ();
19798
19799 gcc_assert (!use_return_insn (FALSE, NULL)
19800 || (cfun->machine->return_used_this_function != 0)
19801 || offsets->saved_regs == offsets->outgoing_args
19802 || frame_pointer_needed);
19803 }
19804 }
19805
19806 /* Generate and emit a sequence of insns equivalent to PUSH, but using
19807 STR and STRD. If an even number of registers are being pushed, one
19808 or more STRD patterns are created for each register pair. If an
19809 odd number of registers are pushed, emit an initial STR followed by
19810 as many STRD instructions as are needed. This works best when the
19811 stack is initially 64-bit aligned (the normal case), since it
19812 ensures that each STRD is also 64-bit aligned. */
19813 static void
19814 thumb2_emit_strd_push (unsigned long saved_regs_mask)
19815 {
19816 int num_regs = 0;
19817 int i;
19818 int regno;
19819 rtx par = NULL_RTX;
19820 rtx dwarf = NULL_RTX;
19821 rtx tmp;
19822 bool first = true;
19823
19824 num_regs = bit_count (saved_regs_mask);
19825
19826 /* Must be at least one register to save, and can't save SP or PC. */
19827 gcc_assert (num_regs > 0 && num_regs <= 14);
19828 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19829 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19830
19831 /* Create sequence for DWARF info. All the frame-related data for
19832 debugging is held in this wrapper. */
19833 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19834
19835 /* Describe the stack adjustment. */
19836 tmp = gen_rtx_SET (stack_pointer_rtx,
19837 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19838 RTX_FRAME_RELATED_P (tmp) = 1;
19839 XVECEXP (dwarf, 0, 0) = tmp;
19840
19841 /* Find the first register. */
19842 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
19843 ;
19844
19845 i = 0;
19846
19847 /* If there's an odd number of registers to push. Start off by
19848 pushing a single register. This ensures that subsequent strd
19849 operations are dword aligned (assuming that SP was originally
19850 64-bit aligned). */
19851 if ((num_regs & 1) != 0)
19852 {
19853 rtx reg, mem, insn;
19854
19855 reg = gen_rtx_REG (SImode, regno);
19856 if (num_regs == 1)
19857 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
19858 stack_pointer_rtx));
19859 else
19860 mem = gen_frame_mem (Pmode,
19861 gen_rtx_PRE_MODIFY
19862 (Pmode, stack_pointer_rtx,
19863 plus_constant (Pmode, stack_pointer_rtx,
19864 -4 * num_regs)));
19865
19866 tmp = gen_rtx_SET (mem, reg);
19867 RTX_FRAME_RELATED_P (tmp) = 1;
19868 insn = emit_insn (tmp);
19869 RTX_FRAME_RELATED_P (insn) = 1;
19870 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19871 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
19872 RTX_FRAME_RELATED_P (tmp) = 1;
19873 i++;
19874 regno++;
19875 XVECEXP (dwarf, 0, i) = tmp;
19876 first = false;
19877 }
19878
19879 while (i < num_regs)
19880 if (saved_regs_mask & (1 << regno))
19881 {
19882 rtx reg1, reg2, mem1, mem2;
19883 rtx tmp0, tmp1, tmp2;
19884 int regno2;
19885
19886 /* Find the register to pair with this one. */
19887 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
19888 regno2++)
19889 ;
19890
19891 reg1 = gen_rtx_REG (SImode, regno);
19892 reg2 = gen_rtx_REG (SImode, regno2);
19893
19894 if (first)
19895 {
19896 rtx insn;
19897
19898 first = false;
19899 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19900 stack_pointer_rtx,
19901 -4 * num_regs));
19902 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19903 stack_pointer_rtx,
19904 -4 * (num_regs - 1)));
19905 tmp0 = gen_rtx_SET (stack_pointer_rtx,
19906 plus_constant (Pmode, stack_pointer_rtx,
19907 -4 * (num_regs)));
19908 tmp1 = gen_rtx_SET (mem1, reg1);
19909 tmp2 = gen_rtx_SET (mem2, reg2);
19910 RTX_FRAME_RELATED_P (tmp0) = 1;
19911 RTX_FRAME_RELATED_P (tmp1) = 1;
19912 RTX_FRAME_RELATED_P (tmp2) = 1;
19913 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
19914 XVECEXP (par, 0, 0) = tmp0;
19915 XVECEXP (par, 0, 1) = tmp1;
19916 XVECEXP (par, 0, 2) = tmp2;
19917 insn = emit_insn (par);
19918 RTX_FRAME_RELATED_P (insn) = 1;
19919 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
19920 }
19921 else
19922 {
19923 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
19924 stack_pointer_rtx,
19925 4 * i));
19926 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
19927 stack_pointer_rtx,
19928 4 * (i + 1)));
19929 tmp1 = gen_rtx_SET (mem1, reg1);
19930 tmp2 = gen_rtx_SET (mem2, reg2);
19931 RTX_FRAME_RELATED_P (tmp1) = 1;
19932 RTX_FRAME_RELATED_P (tmp2) = 1;
19933 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
19934 XVECEXP (par, 0, 0) = tmp1;
19935 XVECEXP (par, 0, 1) = tmp2;
19936 emit_insn (par);
19937 }
19938
19939 /* Create unwind information. This is an approximation. */
19940 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
19941 plus_constant (Pmode,
19942 stack_pointer_rtx,
19943 4 * i)),
19944 reg1);
19945 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
19946 plus_constant (Pmode,
19947 stack_pointer_rtx,
19948 4 * (i + 1))),
19949 reg2);
19950
19951 RTX_FRAME_RELATED_P (tmp1) = 1;
19952 RTX_FRAME_RELATED_P (tmp2) = 1;
19953 XVECEXP (dwarf, 0, i + 1) = tmp1;
19954 XVECEXP (dwarf, 0, i + 2) = tmp2;
19955 i += 2;
19956 regno = regno2 + 1;
19957 }
19958 else
19959 regno++;
19960
19961 return;
19962 }
19963
19964 /* STRD in ARM mode requires consecutive registers. This function emits STRD
19965 whenever possible, otherwise it emits single-word stores. The first store
19966 also allocates stack space for all saved registers, using writeback with
19967 post-addressing mode. All other stores use offset addressing. If no STRD
19968 can be emitted, this function emits a sequence of single-word stores,
19969 and not an STM as before, because single-word stores provide more freedom
19970 scheduling and can be turned into an STM by peephole optimizations. */
19971 static void
19972 arm_emit_strd_push (unsigned long saved_regs_mask)
19973 {
19974 int num_regs = 0;
19975 int i, j, dwarf_index = 0;
19976 int offset = 0;
19977 rtx dwarf = NULL_RTX;
19978 rtx insn = NULL_RTX;
19979 rtx tmp, mem;
19980
19981 /* TODO: A more efficient code can be emitted by changing the
19982 layout, e.g., first push all pairs that can use STRD to keep the
19983 stack aligned, and then push all other registers. */
19984 for (i = 0; i <= LAST_ARM_REGNUM; i++)
19985 if (saved_regs_mask & (1 << i))
19986 num_regs++;
19987
19988 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
19989 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
19990 gcc_assert (num_regs > 0);
19991
19992 /* Create sequence for DWARF info. */
19993 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
19994
19995 /* For dwarf info, we generate explicit stack update. */
19996 tmp = gen_rtx_SET (stack_pointer_rtx,
19997 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
19998 RTX_FRAME_RELATED_P (tmp) = 1;
19999 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20000
20001 /* Save registers. */
20002 offset = - 4 * num_regs;
20003 j = 0;
20004 while (j <= LAST_ARM_REGNUM)
20005 if (saved_regs_mask & (1 << j))
20006 {
20007 if ((j % 2 == 0)
20008 && (saved_regs_mask & (1 << (j + 1))))
20009 {
20010 /* Current register and previous register form register pair for
20011 which STRD can be generated. */
20012 if (offset < 0)
20013 {
20014 /* Allocate stack space for all saved registers. */
20015 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20016 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20017 mem = gen_frame_mem (DImode, tmp);
20018 offset = 0;
20019 }
20020 else if (offset > 0)
20021 mem = gen_frame_mem (DImode,
20022 plus_constant (Pmode,
20023 stack_pointer_rtx,
20024 offset));
20025 else
20026 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20027
20028 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20029 RTX_FRAME_RELATED_P (tmp) = 1;
20030 tmp = emit_insn (tmp);
20031
20032 /* Record the first store insn. */
20033 if (dwarf_index == 1)
20034 insn = tmp;
20035
20036 /* Generate dwarf info. */
20037 mem = gen_frame_mem (SImode,
20038 plus_constant (Pmode,
20039 stack_pointer_rtx,
20040 offset));
20041 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20042 RTX_FRAME_RELATED_P (tmp) = 1;
20043 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20044
20045 mem = gen_frame_mem (SImode,
20046 plus_constant (Pmode,
20047 stack_pointer_rtx,
20048 offset + 4));
20049 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20050 RTX_FRAME_RELATED_P (tmp) = 1;
20051 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20052
20053 offset += 8;
20054 j += 2;
20055 }
20056 else
20057 {
20058 /* Emit a single word store. */
20059 if (offset < 0)
20060 {
20061 /* Allocate stack space for all saved registers. */
20062 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20063 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20064 mem = gen_frame_mem (SImode, tmp);
20065 offset = 0;
20066 }
20067 else if (offset > 0)
20068 mem = gen_frame_mem (SImode,
20069 plus_constant (Pmode,
20070 stack_pointer_rtx,
20071 offset));
20072 else
20073 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20074
20075 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20076 RTX_FRAME_RELATED_P (tmp) = 1;
20077 tmp = emit_insn (tmp);
20078
20079 /* Record the first store insn. */
20080 if (dwarf_index == 1)
20081 insn = tmp;
20082
20083 /* Generate dwarf info. */
20084 mem = gen_frame_mem (SImode,
20085 plus_constant(Pmode,
20086 stack_pointer_rtx,
20087 offset));
20088 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20089 RTX_FRAME_RELATED_P (tmp) = 1;
20090 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20091
20092 offset += 4;
20093 j += 1;
20094 }
20095 }
20096 else
20097 j++;
20098
20099 /* Attach dwarf info to the first insn we generate. */
20100 gcc_assert (insn != NULL_RTX);
20101 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20102 RTX_FRAME_RELATED_P (insn) = 1;
20103 }
20104
20105 /* Generate and emit an insn that we will recognize as a push_multi.
20106 Unfortunately, since this insn does not reflect very well the actual
20107 semantics of the operation, we need to annotate the insn for the benefit
20108 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20109 MASK for registers that should be annotated for DWARF2 frame unwind
20110 information. */
20111 static rtx
20112 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20113 {
20114 int num_regs = 0;
20115 int num_dwarf_regs = 0;
20116 int i, j;
20117 rtx par;
20118 rtx dwarf;
20119 int dwarf_par_index;
20120 rtx tmp, reg;
20121
20122 /* We don't record the PC in the dwarf frame information. */
20123 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20124
20125 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20126 {
20127 if (mask & (1 << i))
20128 num_regs++;
20129 if (dwarf_regs_mask & (1 << i))
20130 num_dwarf_regs++;
20131 }
20132
20133 gcc_assert (num_regs && num_regs <= 16);
20134 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20135
20136 /* For the body of the insn we are going to generate an UNSPEC in
20137 parallel with several USEs. This allows the insn to be recognized
20138 by the push_multi pattern in the arm.md file.
20139
20140 The body of the insn looks something like this:
20141
20142 (parallel [
20143 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20144 (const_int:SI <num>)))
20145 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20146 (use (reg:SI XX))
20147 (use (reg:SI YY))
20148 ...
20149 ])
20150
20151 For the frame note however, we try to be more explicit and actually
20152 show each register being stored into the stack frame, plus a (single)
20153 decrement of the stack pointer. We do it this way in order to be
20154 friendly to the stack unwinding code, which only wants to see a single
20155 stack decrement per instruction. The RTL we generate for the note looks
20156 something like this:
20157
20158 (sequence [
20159 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20160 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20161 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20162 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20163 ...
20164 ])
20165
20166 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20167 instead we'd have a parallel expression detailing all
20168 the stores to the various memory addresses so that debug
20169 information is more up-to-date. Remember however while writing
20170 this to take care of the constraints with the push instruction.
20171
20172 Note also that this has to be taken care of for the VFP registers.
20173
20174 For more see PR43399. */
20175
20176 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20177 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20178 dwarf_par_index = 1;
20179
20180 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20181 {
20182 if (mask & (1 << i))
20183 {
20184 reg = gen_rtx_REG (SImode, i);
20185
20186 XVECEXP (par, 0, 0)
20187 = gen_rtx_SET (gen_frame_mem
20188 (BLKmode,
20189 gen_rtx_PRE_MODIFY (Pmode,
20190 stack_pointer_rtx,
20191 plus_constant
20192 (Pmode, stack_pointer_rtx,
20193 -4 * num_regs))
20194 ),
20195 gen_rtx_UNSPEC (BLKmode,
20196 gen_rtvec (1, reg),
20197 UNSPEC_PUSH_MULT));
20198
20199 if (dwarf_regs_mask & (1 << i))
20200 {
20201 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20202 reg);
20203 RTX_FRAME_RELATED_P (tmp) = 1;
20204 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20205 }
20206
20207 break;
20208 }
20209 }
20210
20211 for (j = 1, i++; j < num_regs; i++)
20212 {
20213 if (mask & (1 << i))
20214 {
20215 reg = gen_rtx_REG (SImode, i);
20216
20217 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20218
20219 if (dwarf_regs_mask & (1 << i))
20220 {
20221 tmp
20222 = gen_rtx_SET (gen_frame_mem
20223 (SImode,
20224 plus_constant (Pmode, stack_pointer_rtx,
20225 4 * j)),
20226 reg);
20227 RTX_FRAME_RELATED_P (tmp) = 1;
20228 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20229 }
20230
20231 j++;
20232 }
20233 }
20234
20235 par = emit_insn (par);
20236
20237 tmp = gen_rtx_SET (stack_pointer_rtx,
20238 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20239 RTX_FRAME_RELATED_P (tmp) = 1;
20240 XVECEXP (dwarf, 0, 0) = tmp;
20241
20242 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20243
20244 return par;
20245 }
20246
20247 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20248 SIZE is the offset to be adjusted.
20249 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20250 static void
20251 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20252 {
20253 rtx dwarf;
20254
20255 RTX_FRAME_RELATED_P (insn) = 1;
20256 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20257 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20258 }
20259
20260 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20261 SAVED_REGS_MASK shows which registers need to be restored.
20262
20263 Unfortunately, since this insn does not reflect very well the actual
20264 semantics of the operation, we need to annotate the insn for the benefit
20265 of DWARF2 frame unwind information. */
20266 static void
20267 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20268 {
20269 int num_regs = 0;
20270 int i, j;
20271 rtx par;
20272 rtx dwarf = NULL_RTX;
20273 rtx tmp, reg;
20274 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20275 int offset_adj;
20276 int emit_update;
20277
20278 offset_adj = return_in_pc ? 1 : 0;
20279 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20280 if (saved_regs_mask & (1 << i))
20281 num_regs++;
20282
20283 gcc_assert (num_regs && num_regs <= 16);
20284
20285 /* If SP is in reglist, then we don't emit SP update insn. */
20286 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20287
20288 /* The parallel needs to hold num_regs SETs
20289 and one SET for the stack update. */
20290 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20291
20292 if (return_in_pc)
20293 XVECEXP (par, 0, 0) = ret_rtx;
20294
20295 if (emit_update)
20296 {
20297 /* Increment the stack pointer, based on there being
20298 num_regs 4-byte registers to restore. */
20299 tmp = gen_rtx_SET (stack_pointer_rtx,
20300 plus_constant (Pmode,
20301 stack_pointer_rtx,
20302 4 * num_regs));
20303 RTX_FRAME_RELATED_P (tmp) = 1;
20304 XVECEXP (par, 0, offset_adj) = tmp;
20305 }
20306
20307 /* Now restore every reg, which may include PC. */
20308 for (j = 0, i = 0; j < num_regs; i++)
20309 if (saved_regs_mask & (1 << i))
20310 {
20311 reg = gen_rtx_REG (SImode, i);
20312 if ((num_regs == 1) && emit_update && !return_in_pc)
20313 {
20314 /* Emit single load with writeback. */
20315 tmp = gen_frame_mem (SImode,
20316 gen_rtx_POST_INC (Pmode,
20317 stack_pointer_rtx));
20318 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20319 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20320 return;
20321 }
20322
20323 tmp = gen_rtx_SET (reg,
20324 gen_frame_mem
20325 (SImode,
20326 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20327 RTX_FRAME_RELATED_P (tmp) = 1;
20328 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20329
20330 /* We need to maintain a sequence for DWARF info too. As dwarf info
20331 should not have PC, skip PC. */
20332 if (i != PC_REGNUM)
20333 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20334
20335 j++;
20336 }
20337
20338 if (return_in_pc)
20339 par = emit_jump_insn (par);
20340 else
20341 par = emit_insn (par);
20342
20343 REG_NOTES (par) = dwarf;
20344 if (!return_in_pc)
20345 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20346 stack_pointer_rtx, stack_pointer_rtx);
20347 }
20348
20349 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20350 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20351
20352 Unfortunately, since this insn does not reflect very well the actual
20353 semantics of the operation, we need to annotate the insn for the benefit
20354 of DWARF2 frame unwind information. */
20355 static void
20356 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20357 {
20358 int i, j;
20359 rtx par;
20360 rtx dwarf = NULL_RTX;
20361 rtx tmp, reg;
20362
20363 gcc_assert (num_regs && num_regs <= 32);
20364
20365 /* Workaround ARM10 VFPr1 bug. */
20366 if (num_regs == 2 && !arm_arch6)
20367 {
20368 if (first_reg == 15)
20369 first_reg--;
20370
20371 num_regs++;
20372 }
20373
20374 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20375 there could be up to 32 D-registers to restore.
20376 If there are more than 16 D-registers, make two recursive calls,
20377 each of which emits one pop_multi instruction. */
20378 if (num_regs > 16)
20379 {
20380 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20381 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20382 return;
20383 }
20384
20385 /* The parallel needs to hold num_regs SETs
20386 and one SET for the stack update. */
20387 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20388
20389 /* Increment the stack pointer, based on there being
20390 num_regs 8-byte registers to restore. */
20391 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20392 RTX_FRAME_RELATED_P (tmp) = 1;
20393 XVECEXP (par, 0, 0) = tmp;
20394
20395 /* Now show every reg that will be restored, using a SET for each. */
20396 for (j = 0, i=first_reg; j < num_regs; i += 2)
20397 {
20398 reg = gen_rtx_REG (DFmode, i);
20399
20400 tmp = gen_rtx_SET (reg,
20401 gen_frame_mem
20402 (DFmode,
20403 plus_constant (Pmode, base_reg, 8 * j)));
20404 RTX_FRAME_RELATED_P (tmp) = 1;
20405 XVECEXP (par, 0, j + 1) = tmp;
20406
20407 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20408
20409 j++;
20410 }
20411
20412 par = emit_insn (par);
20413 REG_NOTES (par) = dwarf;
20414
20415 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20416 if (REGNO (base_reg) == IP_REGNUM)
20417 {
20418 RTX_FRAME_RELATED_P (par) = 1;
20419 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20420 }
20421 else
20422 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20423 base_reg, base_reg);
20424 }
20425
20426 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20427 number of registers are being popped, multiple LDRD patterns are created for
20428 all register pairs. If odd number of registers are popped, last register is
20429 loaded by using LDR pattern. */
20430 static void
20431 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20432 {
20433 int num_regs = 0;
20434 int i, j;
20435 rtx par = NULL_RTX;
20436 rtx dwarf = NULL_RTX;
20437 rtx tmp, reg, tmp1;
20438 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20439
20440 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20441 if (saved_regs_mask & (1 << i))
20442 num_regs++;
20443
20444 gcc_assert (num_regs && num_regs <= 16);
20445
20446 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20447 to be popped. So, if num_regs is even, now it will become odd,
20448 and we can generate pop with PC. If num_regs is odd, it will be
20449 even now, and ldr with return can be generated for PC. */
20450 if (return_in_pc)
20451 num_regs--;
20452
20453 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20454
20455 /* Var j iterates over all the registers to gather all the registers in
20456 saved_regs_mask. Var i gives index of saved registers in stack frame.
20457 A PARALLEL RTX of register-pair is created here, so that pattern for
20458 LDRD can be matched. As PC is always last register to be popped, and
20459 we have already decremented num_regs if PC, we don't have to worry
20460 about PC in this loop. */
20461 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20462 if (saved_regs_mask & (1 << j))
20463 {
20464 /* Create RTX for memory load. */
20465 reg = gen_rtx_REG (SImode, j);
20466 tmp = gen_rtx_SET (reg,
20467 gen_frame_mem (SImode,
20468 plus_constant (Pmode,
20469 stack_pointer_rtx, 4 * i)));
20470 RTX_FRAME_RELATED_P (tmp) = 1;
20471
20472 if (i % 2 == 0)
20473 {
20474 /* When saved-register index (i) is even, the RTX to be emitted is
20475 yet to be created. Hence create it first. The LDRD pattern we
20476 are generating is :
20477 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20478 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20479 where target registers need not be consecutive. */
20480 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20481 dwarf = NULL_RTX;
20482 }
20483
20484 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20485 added as 0th element and if i is odd, reg_i is added as 1st element
20486 of LDRD pattern shown above. */
20487 XVECEXP (par, 0, (i % 2)) = tmp;
20488 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20489
20490 if ((i % 2) == 1)
20491 {
20492 /* When saved-register index (i) is odd, RTXs for both the registers
20493 to be loaded are generated in above given LDRD pattern, and the
20494 pattern can be emitted now. */
20495 par = emit_insn (par);
20496 REG_NOTES (par) = dwarf;
20497 RTX_FRAME_RELATED_P (par) = 1;
20498 }
20499
20500 i++;
20501 }
20502
20503 /* If the number of registers pushed is odd AND return_in_pc is false OR
20504 number of registers are even AND return_in_pc is true, last register is
20505 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20506 then LDR with post increment. */
20507
20508 /* Increment the stack pointer, based on there being
20509 num_regs 4-byte registers to restore. */
20510 tmp = gen_rtx_SET (stack_pointer_rtx,
20511 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20512 RTX_FRAME_RELATED_P (tmp) = 1;
20513 tmp = emit_insn (tmp);
20514 if (!return_in_pc)
20515 {
20516 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20517 stack_pointer_rtx, stack_pointer_rtx);
20518 }
20519
20520 dwarf = NULL_RTX;
20521
20522 if (((num_regs % 2) == 1 && !return_in_pc)
20523 || ((num_regs % 2) == 0 && return_in_pc))
20524 {
20525 /* Scan for the single register to be popped. Skip until the saved
20526 register is found. */
20527 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20528
20529 /* Gen LDR with post increment here. */
20530 tmp1 = gen_rtx_MEM (SImode,
20531 gen_rtx_POST_INC (SImode,
20532 stack_pointer_rtx));
20533 set_mem_alias_set (tmp1, get_frame_alias_set ());
20534
20535 reg = gen_rtx_REG (SImode, j);
20536 tmp = gen_rtx_SET (reg, tmp1);
20537 RTX_FRAME_RELATED_P (tmp) = 1;
20538 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20539
20540 if (return_in_pc)
20541 {
20542 /* If return_in_pc, j must be PC_REGNUM. */
20543 gcc_assert (j == PC_REGNUM);
20544 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20545 XVECEXP (par, 0, 0) = ret_rtx;
20546 XVECEXP (par, 0, 1) = tmp;
20547 par = emit_jump_insn (par);
20548 }
20549 else
20550 {
20551 par = emit_insn (tmp);
20552 REG_NOTES (par) = dwarf;
20553 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20554 stack_pointer_rtx, stack_pointer_rtx);
20555 }
20556
20557 }
20558 else if ((num_regs % 2) == 1 && return_in_pc)
20559 {
20560 /* There are 2 registers to be popped. So, generate the pattern
20561 pop_multiple_with_stack_update_and_return to pop in PC. */
20562 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20563 }
20564
20565 return;
20566 }
20567
20568 /* LDRD in ARM mode needs consecutive registers as operands. This function
20569 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20570 offset addressing and then generates one separate stack udpate. This provides
20571 more scheduling freedom, compared to writeback on every load. However,
20572 if the function returns using load into PC directly
20573 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20574 before the last load. TODO: Add a peephole optimization to recognize
20575 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20576 peephole optimization to merge the load at stack-offset zero
20577 with the stack update instruction using load with writeback
20578 in post-index addressing mode. */
20579 static void
20580 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20581 {
20582 int j = 0;
20583 int offset = 0;
20584 rtx par = NULL_RTX;
20585 rtx dwarf = NULL_RTX;
20586 rtx tmp, mem;
20587
20588 /* Restore saved registers. */
20589 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20590 j = 0;
20591 while (j <= LAST_ARM_REGNUM)
20592 if (saved_regs_mask & (1 << j))
20593 {
20594 if ((j % 2) == 0
20595 && (saved_regs_mask & (1 << (j + 1)))
20596 && (j + 1) != PC_REGNUM)
20597 {
20598 /* Current register and next register form register pair for which
20599 LDRD can be generated. PC is always the last register popped, and
20600 we handle it separately. */
20601 if (offset > 0)
20602 mem = gen_frame_mem (DImode,
20603 plus_constant (Pmode,
20604 stack_pointer_rtx,
20605 offset));
20606 else
20607 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20608
20609 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20610 tmp = emit_insn (tmp);
20611 RTX_FRAME_RELATED_P (tmp) = 1;
20612
20613 /* Generate dwarf info. */
20614
20615 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20616 gen_rtx_REG (SImode, j),
20617 NULL_RTX);
20618 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20619 gen_rtx_REG (SImode, j + 1),
20620 dwarf);
20621
20622 REG_NOTES (tmp) = dwarf;
20623
20624 offset += 8;
20625 j += 2;
20626 }
20627 else if (j != PC_REGNUM)
20628 {
20629 /* Emit a single word load. */
20630 if (offset > 0)
20631 mem = gen_frame_mem (SImode,
20632 plus_constant (Pmode,
20633 stack_pointer_rtx,
20634 offset));
20635 else
20636 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20637
20638 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20639 tmp = emit_insn (tmp);
20640 RTX_FRAME_RELATED_P (tmp) = 1;
20641
20642 /* Generate dwarf info. */
20643 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20644 gen_rtx_REG (SImode, j),
20645 NULL_RTX);
20646
20647 offset += 4;
20648 j += 1;
20649 }
20650 else /* j == PC_REGNUM */
20651 j++;
20652 }
20653 else
20654 j++;
20655
20656 /* Update the stack. */
20657 if (offset > 0)
20658 {
20659 tmp = gen_rtx_SET (stack_pointer_rtx,
20660 plus_constant (Pmode,
20661 stack_pointer_rtx,
20662 offset));
20663 tmp = emit_insn (tmp);
20664 arm_add_cfa_adjust_cfa_note (tmp, offset,
20665 stack_pointer_rtx, stack_pointer_rtx);
20666 offset = 0;
20667 }
20668
20669 if (saved_regs_mask & (1 << PC_REGNUM))
20670 {
20671 /* Only PC is to be popped. */
20672 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20673 XVECEXP (par, 0, 0) = ret_rtx;
20674 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20675 gen_frame_mem (SImode,
20676 gen_rtx_POST_INC (SImode,
20677 stack_pointer_rtx)));
20678 RTX_FRAME_RELATED_P (tmp) = 1;
20679 XVECEXP (par, 0, 1) = tmp;
20680 par = emit_jump_insn (par);
20681
20682 /* Generate dwarf info. */
20683 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20684 gen_rtx_REG (SImode, PC_REGNUM),
20685 NULL_RTX);
20686 REG_NOTES (par) = dwarf;
20687 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20688 stack_pointer_rtx, stack_pointer_rtx);
20689 }
20690 }
20691
20692 /* Calculate the size of the return value that is passed in registers. */
20693 static unsigned
20694 arm_size_return_regs (void)
20695 {
20696 machine_mode mode;
20697
20698 if (crtl->return_rtx != 0)
20699 mode = GET_MODE (crtl->return_rtx);
20700 else
20701 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20702
20703 return GET_MODE_SIZE (mode);
20704 }
20705
20706 /* Return true if the current function needs to save/restore LR. */
20707 static bool
20708 thumb_force_lr_save (void)
20709 {
20710 return !cfun->machine->lr_save_eliminated
20711 && (!crtl->is_leaf
20712 || thumb_far_jump_used_p ()
20713 || df_regs_ever_live_p (LR_REGNUM));
20714 }
20715
20716 /* We do not know if r3 will be available because
20717 we do have an indirect tailcall happening in this
20718 particular case. */
20719 static bool
20720 is_indirect_tailcall_p (rtx call)
20721 {
20722 rtx pat = PATTERN (call);
20723
20724 /* Indirect tail call. */
20725 pat = XVECEXP (pat, 0, 0);
20726 if (GET_CODE (pat) == SET)
20727 pat = SET_SRC (pat);
20728
20729 pat = XEXP (XEXP (pat, 0), 0);
20730 return REG_P (pat);
20731 }
20732
20733 /* Return true if r3 is used by any of the tail call insns in the
20734 current function. */
20735 static bool
20736 any_sibcall_could_use_r3 (void)
20737 {
20738 edge_iterator ei;
20739 edge e;
20740
20741 if (!crtl->tail_call_emit)
20742 return false;
20743 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20744 if (e->flags & EDGE_SIBCALL)
20745 {
20746 rtx_insn *call = BB_END (e->src);
20747 if (!CALL_P (call))
20748 call = prev_nonnote_nondebug_insn (call);
20749 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20750 if (find_regno_fusage (call, USE, 3)
20751 || is_indirect_tailcall_p (call))
20752 return true;
20753 }
20754 return false;
20755 }
20756
20757
20758 /* Compute the distance from register FROM to register TO.
20759 These can be the arg pointer (26), the soft frame pointer (25),
20760 the stack pointer (13) or the hard frame pointer (11).
20761 In thumb mode r7 is used as the soft frame pointer, if needed.
20762 Typical stack layout looks like this:
20763
20764 old stack pointer -> | |
20765 ----
20766 | | \
20767 | | saved arguments for
20768 | | vararg functions
20769 | | /
20770 --
20771 hard FP & arg pointer -> | | \
20772 | | stack
20773 | | frame
20774 | | /
20775 --
20776 | | \
20777 | | call saved
20778 | | registers
20779 soft frame pointer -> | | /
20780 --
20781 | | \
20782 | | local
20783 | | variables
20784 locals base pointer -> | | /
20785 --
20786 | | \
20787 | | outgoing
20788 | | arguments
20789 current stack pointer -> | | /
20790 --
20791
20792 For a given function some or all of these stack components
20793 may not be needed, giving rise to the possibility of
20794 eliminating some of the registers.
20795
20796 The values returned by this function must reflect the behavior
20797 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
20798
20799 The sign of the number returned reflects the direction of stack
20800 growth, so the values are positive for all eliminations except
20801 from the soft frame pointer to the hard frame pointer.
20802
20803 SFP may point just inside the local variables block to ensure correct
20804 alignment. */
20805
20806
20807 /* Return cached stack offsets. */
20808
20809 static arm_stack_offsets *
20810 arm_get_frame_offsets (void)
20811 {
20812 struct arm_stack_offsets *offsets;
20813
20814 offsets = &cfun->machine->stack_offsets;
20815
20816 return offsets;
20817 }
20818
20819
20820 /* Calculate stack offsets. These are used to calculate register elimination
20821 offsets and in prologue/epilogue code. Also calculates which registers
20822 should be saved. */
20823
20824 static void
20825 arm_compute_frame_layout (void)
20826 {
20827 struct arm_stack_offsets *offsets;
20828 unsigned long func_type;
20829 int saved;
20830 int core_saved;
20831 HOST_WIDE_INT frame_size;
20832 int i;
20833
20834 offsets = &cfun->machine->stack_offsets;
20835
20836 /* Initially this is the size of the local variables. It will translated
20837 into an offset once we have determined the size of preceding data. */
20838 frame_size = ROUND_UP_WORD (get_frame_size ());
20839
20840 /* Space for variadic functions. */
20841 offsets->saved_args = crtl->args.pretend_args_size;
20842
20843 /* In Thumb mode this is incorrect, but never used. */
20844 offsets->frame
20845 = (offsets->saved_args
20846 + arm_compute_static_chain_stack_bytes ()
20847 + (frame_pointer_needed ? 4 : 0));
20848
20849 if (TARGET_32BIT)
20850 {
20851 unsigned int regno;
20852
20853 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
20854 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20855 saved = core_saved;
20856
20857 /* We know that SP will be doubleword aligned on entry, and we must
20858 preserve that condition at any subroutine call. We also require the
20859 soft frame pointer to be doubleword aligned. */
20860
20861 if (TARGET_REALLY_IWMMXT)
20862 {
20863 /* Check for the call-saved iWMMXt registers. */
20864 for (regno = FIRST_IWMMXT_REGNUM;
20865 regno <= LAST_IWMMXT_REGNUM;
20866 regno++)
20867 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
20868 saved += 8;
20869 }
20870
20871 func_type = arm_current_func_type ();
20872 /* Space for saved VFP registers. */
20873 if (! IS_VOLATILE (func_type)
20874 && TARGET_HARD_FLOAT)
20875 saved += arm_get_vfp_saved_size ();
20876 }
20877 else /* TARGET_THUMB1 */
20878 {
20879 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
20880 core_saved = bit_count (offsets->saved_regs_mask) * 4;
20881 saved = core_saved;
20882 if (TARGET_BACKTRACE)
20883 saved += 16;
20884 }
20885
20886 /* Saved registers include the stack frame. */
20887 offsets->saved_regs
20888 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
20889 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
20890
20891 /* A leaf function does not need any stack alignment if it has nothing
20892 on the stack. */
20893 if (crtl->is_leaf && frame_size == 0
20894 /* However if it calls alloca(), we have a dynamically allocated
20895 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
20896 && ! cfun->calls_alloca)
20897 {
20898 offsets->outgoing_args = offsets->soft_frame;
20899 offsets->locals_base = offsets->soft_frame;
20900 return;
20901 }
20902
20903 /* Ensure SFP has the correct alignment. */
20904 if (ARM_DOUBLEWORD_ALIGN
20905 && (offsets->soft_frame & 7))
20906 {
20907 offsets->soft_frame += 4;
20908 /* Try to align stack by pushing an extra reg. Don't bother doing this
20909 when there is a stack frame as the alignment will be rolled into
20910 the normal stack adjustment. */
20911 if (frame_size + crtl->outgoing_args_size == 0)
20912 {
20913 int reg = -1;
20914
20915 /* Register r3 is caller-saved. Normally it does not need to be
20916 saved on entry by the prologue. However if we choose to save
20917 it for padding then we may confuse the compiler into thinking
20918 a prologue sequence is required when in fact it is not. This
20919 will occur when shrink-wrapping if r3 is used as a scratch
20920 register and there are no other callee-saved writes.
20921
20922 This situation can be avoided when other callee-saved registers
20923 are available and r3 is not mandatory if we choose a callee-saved
20924 register for padding. */
20925 bool prefer_callee_reg_p = false;
20926
20927 /* If it is safe to use r3, then do so. This sometimes
20928 generates better code on Thumb-2 by avoiding the need to
20929 use 32-bit push/pop instructions. */
20930 if (! any_sibcall_could_use_r3 ()
20931 && arm_size_return_regs () <= 12
20932 && (offsets->saved_regs_mask & (1 << 3)) == 0
20933 && (TARGET_THUMB2
20934 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
20935 {
20936 reg = 3;
20937 if (!TARGET_THUMB2)
20938 prefer_callee_reg_p = true;
20939 }
20940 if (reg == -1
20941 || prefer_callee_reg_p)
20942 {
20943 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
20944 {
20945 /* Avoid fixed registers; they may be changed at
20946 arbitrary times so it's unsafe to restore them
20947 during the epilogue. */
20948 if (!fixed_regs[i]
20949 && (offsets->saved_regs_mask & (1 << i)) == 0)
20950 {
20951 reg = i;
20952 break;
20953 }
20954 }
20955 }
20956
20957 if (reg != -1)
20958 {
20959 offsets->saved_regs += 4;
20960 offsets->saved_regs_mask |= (1 << reg);
20961 }
20962 }
20963 }
20964
20965 offsets->locals_base = offsets->soft_frame + frame_size;
20966 offsets->outgoing_args = (offsets->locals_base
20967 + crtl->outgoing_args_size);
20968
20969 if (ARM_DOUBLEWORD_ALIGN)
20970 {
20971 /* Ensure SP remains doubleword aligned. */
20972 if (offsets->outgoing_args & 7)
20973 offsets->outgoing_args += 4;
20974 gcc_assert (!(offsets->outgoing_args & 7));
20975 }
20976 }
20977
20978
20979 /* Calculate the relative offsets for the different stack pointers. Positive
20980 offsets are in the direction of stack growth. */
20981
20982 HOST_WIDE_INT
20983 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
20984 {
20985 arm_stack_offsets *offsets;
20986
20987 offsets = arm_get_frame_offsets ();
20988
20989 /* OK, now we have enough information to compute the distances.
20990 There must be an entry in these switch tables for each pair
20991 of registers in ELIMINABLE_REGS, even if some of the entries
20992 seem to be redundant or useless. */
20993 switch (from)
20994 {
20995 case ARG_POINTER_REGNUM:
20996 switch (to)
20997 {
20998 case THUMB_HARD_FRAME_POINTER_REGNUM:
20999 return 0;
21000
21001 case FRAME_POINTER_REGNUM:
21002 /* This is the reverse of the soft frame pointer
21003 to hard frame pointer elimination below. */
21004 return offsets->soft_frame - offsets->saved_args;
21005
21006 case ARM_HARD_FRAME_POINTER_REGNUM:
21007 /* This is only non-zero in the case where the static chain register
21008 is stored above the frame. */
21009 return offsets->frame - offsets->saved_args - 4;
21010
21011 case STACK_POINTER_REGNUM:
21012 /* If nothing has been pushed on the stack at all
21013 then this will return -4. This *is* correct! */
21014 return offsets->outgoing_args - (offsets->saved_args + 4);
21015
21016 default:
21017 gcc_unreachable ();
21018 }
21019 gcc_unreachable ();
21020
21021 case FRAME_POINTER_REGNUM:
21022 switch (to)
21023 {
21024 case THUMB_HARD_FRAME_POINTER_REGNUM:
21025 return 0;
21026
21027 case ARM_HARD_FRAME_POINTER_REGNUM:
21028 /* The hard frame pointer points to the top entry in the
21029 stack frame. The soft frame pointer to the bottom entry
21030 in the stack frame. If there is no stack frame at all,
21031 then they are identical. */
21032
21033 return offsets->frame - offsets->soft_frame;
21034
21035 case STACK_POINTER_REGNUM:
21036 return offsets->outgoing_args - offsets->soft_frame;
21037
21038 default:
21039 gcc_unreachable ();
21040 }
21041 gcc_unreachable ();
21042
21043 default:
21044 /* You cannot eliminate from the stack pointer.
21045 In theory you could eliminate from the hard frame
21046 pointer to the stack pointer, but this will never
21047 happen, since if a stack frame is not needed the
21048 hard frame pointer will never be used. */
21049 gcc_unreachable ();
21050 }
21051 }
21052
21053 /* Given FROM and TO register numbers, say whether this elimination is
21054 allowed. Frame pointer elimination is automatically handled.
21055
21056 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21057 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21058 pointer, we must eliminate FRAME_POINTER_REGNUM into
21059 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21060 ARG_POINTER_REGNUM. */
21061
21062 bool
21063 arm_can_eliminate (const int from, const int to)
21064 {
21065 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21066 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21067 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21068 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21069 true);
21070 }
21071
21072 /* Emit RTL to save coprocessor registers on function entry. Returns the
21073 number of bytes pushed. */
21074
21075 static int
21076 arm_save_coproc_regs(void)
21077 {
21078 int saved_size = 0;
21079 unsigned reg;
21080 unsigned start_reg;
21081 rtx insn;
21082
21083 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21084 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21085 {
21086 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21087 insn = gen_rtx_MEM (V2SImode, insn);
21088 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21089 RTX_FRAME_RELATED_P (insn) = 1;
21090 saved_size += 8;
21091 }
21092
21093 if (TARGET_HARD_FLOAT)
21094 {
21095 start_reg = FIRST_VFP_REGNUM;
21096
21097 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21098 {
21099 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21100 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21101 {
21102 if (start_reg != reg)
21103 saved_size += vfp_emit_fstmd (start_reg,
21104 (reg - start_reg) / 2);
21105 start_reg = reg + 2;
21106 }
21107 }
21108 if (start_reg != reg)
21109 saved_size += vfp_emit_fstmd (start_reg,
21110 (reg - start_reg) / 2);
21111 }
21112 return saved_size;
21113 }
21114
21115
21116 /* Set the Thumb frame pointer from the stack pointer. */
21117
21118 static void
21119 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21120 {
21121 HOST_WIDE_INT amount;
21122 rtx insn, dwarf;
21123
21124 amount = offsets->outgoing_args - offsets->locals_base;
21125 if (amount < 1024)
21126 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21127 stack_pointer_rtx, GEN_INT (amount)));
21128 else
21129 {
21130 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21131 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21132 expects the first two operands to be the same. */
21133 if (TARGET_THUMB2)
21134 {
21135 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21136 stack_pointer_rtx,
21137 hard_frame_pointer_rtx));
21138 }
21139 else
21140 {
21141 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21142 hard_frame_pointer_rtx,
21143 stack_pointer_rtx));
21144 }
21145 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21146 plus_constant (Pmode, stack_pointer_rtx, amount));
21147 RTX_FRAME_RELATED_P (dwarf) = 1;
21148 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21149 }
21150
21151 RTX_FRAME_RELATED_P (insn) = 1;
21152 }
21153
21154 struct scratch_reg {
21155 rtx reg;
21156 bool saved;
21157 };
21158
21159 /* Return a short-lived scratch register for use as a 2nd scratch register on
21160 function entry after the registers are saved in the prologue. This register
21161 must be released by means of release_scratch_register_on_entry. IP is not
21162 considered since it is always used as the 1st scratch register if available.
21163
21164 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21165 mask of live registers. */
21166
21167 static void
21168 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21169 unsigned long live_regs)
21170 {
21171 int regno = -1;
21172
21173 sr->saved = false;
21174
21175 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21176 regno = LR_REGNUM;
21177 else
21178 {
21179 unsigned int i;
21180
21181 for (i = 4; i < 11; i++)
21182 if (regno1 != i && (live_regs & (1 << i)) != 0)
21183 {
21184 regno = i;
21185 break;
21186 }
21187
21188 if (regno < 0)
21189 {
21190 /* If IP is used as the 1st scratch register for a nested function,
21191 then either r3 wasn't available or is used to preserve IP. */
21192 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21193 regno1 = 3;
21194 regno = (regno1 == 3 ? 2 : 3);
21195 sr->saved
21196 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21197 regno);
21198 }
21199 }
21200
21201 sr->reg = gen_rtx_REG (SImode, regno);
21202 if (sr->saved)
21203 {
21204 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21205 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21206 rtx x = gen_rtx_SET (stack_pointer_rtx,
21207 plus_constant (Pmode, stack_pointer_rtx, -4));
21208 RTX_FRAME_RELATED_P (insn) = 1;
21209 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21210 }
21211 }
21212
21213 /* Release a scratch register obtained from the preceding function. */
21214
21215 static void
21216 release_scratch_register_on_entry (struct scratch_reg *sr)
21217 {
21218 if (sr->saved)
21219 {
21220 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21221 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21222 rtx x = gen_rtx_SET (stack_pointer_rtx,
21223 plus_constant (Pmode, stack_pointer_rtx, 4));
21224 RTX_FRAME_RELATED_P (insn) = 1;
21225 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21226 }
21227 }
21228
21229 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21230
21231 #if PROBE_INTERVAL > 4096
21232 #error Cannot use indexed addressing mode for stack probing
21233 #endif
21234
21235 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21236 inclusive. These are offsets from the current stack pointer. REGNO1
21237 is the index number of the 1st scratch register and LIVE_REGS is the
21238 mask of live registers. */
21239
21240 static void
21241 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21242 unsigned int regno1, unsigned long live_regs)
21243 {
21244 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21245
21246 /* See if we have a constant small number of probes to generate. If so,
21247 that's the easy case. */
21248 if (size <= PROBE_INTERVAL)
21249 {
21250 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21251 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21252 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21253 }
21254
21255 /* The run-time loop is made up of 10 insns in the generic case while the
21256 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21257 else if (size <= 5 * PROBE_INTERVAL)
21258 {
21259 HOST_WIDE_INT i, rem;
21260
21261 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21262 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21263 emit_stack_probe (reg1);
21264
21265 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21266 it exceeds SIZE. If only two probes are needed, this will not
21267 generate any code. Then probe at FIRST + SIZE. */
21268 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21269 {
21270 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21271 emit_stack_probe (reg1);
21272 }
21273
21274 rem = size - (i - PROBE_INTERVAL);
21275 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21276 {
21277 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21278 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21279 }
21280 else
21281 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21282 }
21283
21284 /* Otherwise, do the same as above, but in a loop. Note that we must be
21285 extra careful with variables wrapping around because we might be at
21286 the very top (or the very bottom) of the address space and we have
21287 to be able to handle this case properly; in particular, we use an
21288 equality test for the loop condition. */
21289 else
21290 {
21291 HOST_WIDE_INT rounded_size;
21292 struct scratch_reg sr;
21293
21294 get_scratch_register_on_entry (&sr, regno1, live_regs);
21295
21296 emit_move_insn (reg1, GEN_INT (first));
21297
21298
21299 /* Step 1: round SIZE to the previous multiple of the interval. */
21300
21301 rounded_size = size & -PROBE_INTERVAL;
21302 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21303
21304
21305 /* Step 2: compute initial and final value of the loop counter. */
21306
21307 /* TEST_ADDR = SP + FIRST. */
21308 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21309
21310 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21311 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21312
21313
21314 /* Step 3: the loop
21315
21316 do
21317 {
21318 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21319 probe at TEST_ADDR
21320 }
21321 while (TEST_ADDR != LAST_ADDR)
21322
21323 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21324 until it is equal to ROUNDED_SIZE. */
21325
21326 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21327
21328
21329 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21330 that SIZE is equal to ROUNDED_SIZE. */
21331
21332 if (size != rounded_size)
21333 {
21334 HOST_WIDE_INT rem = size - rounded_size;
21335
21336 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21337 {
21338 emit_set_insn (sr.reg,
21339 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21340 emit_stack_probe (plus_constant (Pmode, sr.reg,
21341 PROBE_INTERVAL - rem));
21342 }
21343 else
21344 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21345 }
21346
21347 release_scratch_register_on_entry (&sr);
21348 }
21349
21350 /* Make sure nothing is scheduled before we are done. */
21351 emit_insn (gen_blockage ());
21352 }
21353
21354 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21355 absolute addresses. */
21356
21357 const char *
21358 output_probe_stack_range (rtx reg1, rtx reg2)
21359 {
21360 static int labelno = 0;
21361 char loop_lab[32];
21362 rtx xops[2];
21363
21364 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21365
21366 /* Loop. */
21367 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21368
21369 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21370 xops[0] = reg1;
21371 xops[1] = GEN_INT (PROBE_INTERVAL);
21372 output_asm_insn ("sub\t%0, %0, %1", xops);
21373
21374 /* Probe at TEST_ADDR. */
21375 output_asm_insn ("str\tr0, [%0, #0]", xops);
21376
21377 /* Test if TEST_ADDR == LAST_ADDR. */
21378 xops[1] = reg2;
21379 output_asm_insn ("cmp\t%0, %1", xops);
21380
21381 /* Branch. */
21382 fputs ("\tbne\t", asm_out_file);
21383 assemble_name_raw (asm_out_file, loop_lab);
21384 fputc ('\n', asm_out_file);
21385
21386 return "";
21387 }
21388
21389 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21390 function. */
21391 void
21392 arm_expand_prologue (void)
21393 {
21394 rtx amount;
21395 rtx insn;
21396 rtx ip_rtx;
21397 unsigned long live_regs_mask;
21398 unsigned long func_type;
21399 int fp_offset = 0;
21400 int saved_pretend_args = 0;
21401 int saved_regs = 0;
21402 unsigned HOST_WIDE_INT args_to_push;
21403 HOST_WIDE_INT size;
21404 arm_stack_offsets *offsets;
21405 bool clobber_ip;
21406
21407 func_type = arm_current_func_type ();
21408
21409 /* Naked functions don't have prologues. */
21410 if (IS_NAKED (func_type))
21411 {
21412 if (flag_stack_usage_info)
21413 current_function_static_stack_size = 0;
21414 return;
21415 }
21416
21417 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21418 args_to_push = crtl->args.pretend_args_size;
21419
21420 /* Compute which register we will have to save onto the stack. */
21421 offsets = arm_get_frame_offsets ();
21422 live_regs_mask = offsets->saved_regs_mask;
21423
21424 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21425
21426 if (IS_STACKALIGN (func_type))
21427 {
21428 rtx r0, r1;
21429
21430 /* Handle a word-aligned stack pointer. We generate the following:
21431
21432 mov r0, sp
21433 bic r1, r0, #7
21434 mov sp, r1
21435 <save and restore r0 in normal prologue/epilogue>
21436 mov sp, r0
21437 bx lr
21438
21439 The unwinder doesn't need to know about the stack realignment.
21440 Just tell it we saved SP in r0. */
21441 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21442
21443 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21444 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21445
21446 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21447 RTX_FRAME_RELATED_P (insn) = 1;
21448 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21449
21450 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21451
21452 /* ??? The CFA changes here, which may cause GDB to conclude that it
21453 has entered a different function. That said, the unwind info is
21454 correct, individually, before and after this instruction because
21455 we've described the save of SP, which will override the default
21456 handling of SP as restoring from the CFA. */
21457 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21458 }
21459
21460 /* The static chain register is the same as the IP register. If it is
21461 clobbered when creating the frame, we need to save and restore it. */
21462 clobber_ip = IS_NESTED (func_type)
21463 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21464 || (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21465 && !df_regs_ever_live_p (LR_REGNUM)
21466 && arm_r3_live_at_start_p ()));
21467
21468 /* Find somewhere to store IP whilst the frame is being created.
21469 We try the following places in order:
21470
21471 1. The last argument register r3 if it is available.
21472 2. A slot on the stack above the frame if there are no
21473 arguments to push onto the stack.
21474 3. Register r3 again, after pushing the argument registers
21475 onto the stack, if this is a varargs function.
21476 4. The last slot on the stack created for the arguments to
21477 push, if this isn't a varargs function.
21478
21479 Note - we only need to tell the dwarf2 backend about the SP
21480 adjustment in the second variant; the static chain register
21481 doesn't need to be unwound, as it doesn't contain a value
21482 inherited from the caller. */
21483 if (clobber_ip)
21484 {
21485 if (!arm_r3_live_at_start_p ())
21486 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21487 else if (args_to_push == 0)
21488 {
21489 rtx addr, dwarf;
21490
21491 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21492 saved_regs += 4;
21493
21494 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21495 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21496 fp_offset = 4;
21497
21498 /* Just tell the dwarf backend that we adjusted SP. */
21499 dwarf = gen_rtx_SET (stack_pointer_rtx,
21500 plus_constant (Pmode, stack_pointer_rtx,
21501 -fp_offset));
21502 RTX_FRAME_RELATED_P (insn) = 1;
21503 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21504 }
21505 else
21506 {
21507 /* Store the args on the stack. */
21508 if (cfun->machine->uses_anonymous_args)
21509 {
21510 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21511 (0xf0 >> (args_to_push / 4)) & 0xf);
21512 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21513 saved_pretend_args = 1;
21514 }
21515 else
21516 {
21517 rtx addr, dwarf;
21518
21519 if (args_to_push == 4)
21520 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21521 else
21522 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21523 plus_constant (Pmode,
21524 stack_pointer_rtx,
21525 -args_to_push));
21526
21527 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21528
21529 /* Just tell the dwarf backend that we adjusted SP. */
21530 dwarf = gen_rtx_SET (stack_pointer_rtx,
21531 plus_constant (Pmode, stack_pointer_rtx,
21532 -args_to_push));
21533 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21534 }
21535
21536 RTX_FRAME_RELATED_P (insn) = 1;
21537 fp_offset = args_to_push;
21538 args_to_push = 0;
21539 }
21540 }
21541
21542 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21543 {
21544 if (IS_INTERRUPT (func_type))
21545 {
21546 /* Interrupt functions must not corrupt any registers.
21547 Creating a frame pointer however, corrupts the IP
21548 register, so we must push it first. */
21549 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21550
21551 /* Do not set RTX_FRAME_RELATED_P on this insn.
21552 The dwarf stack unwinding code only wants to see one
21553 stack decrement per function, and this is not it. If
21554 this instruction is labeled as being part of the frame
21555 creation sequence then dwarf2out_frame_debug_expr will
21556 die when it encounters the assignment of IP to FP
21557 later on, since the use of SP here establishes SP as
21558 the CFA register and not IP.
21559
21560 Anyway this instruction is not really part of the stack
21561 frame creation although it is part of the prologue. */
21562 }
21563
21564 insn = emit_set_insn (ip_rtx,
21565 plus_constant (Pmode, stack_pointer_rtx,
21566 fp_offset));
21567 RTX_FRAME_RELATED_P (insn) = 1;
21568 }
21569
21570 if (args_to_push)
21571 {
21572 /* Push the argument registers, or reserve space for them. */
21573 if (cfun->machine->uses_anonymous_args)
21574 insn = emit_multi_reg_push
21575 ((0xf0 >> (args_to_push / 4)) & 0xf,
21576 (0xf0 >> (args_to_push / 4)) & 0xf);
21577 else
21578 insn = emit_insn
21579 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21580 GEN_INT (- args_to_push)));
21581 RTX_FRAME_RELATED_P (insn) = 1;
21582 }
21583
21584 /* If this is an interrupt service routine, and the link register
21585 is going to be pushed, and we're not generating extra
21586 push of IP (needed when frame is needed and frame layout if apcs),
21587 subtracting four from LR now will mean that the function return
21588 can be done with a single instruction. */
21589 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21590 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21591 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21592 && TARGET_ARM)
21593 {
21594 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21595
21596 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21597 }
21598
21599 if (live_regs_mask)
21600 {
21601 unsigned long dwarf_regs_mask = live_regs_mask;
21602
21603 saved_regs += bit_count (live_regs_mask) * 4;
21604 if (optimize_size && !frame_pointer_needed
21605 && saved_regs == offsets->saved_regs - offsets->saved_args)
21606 {
21607 /* If no coprocessor registers are being pushed and we don't have
21608 to worry about a frame pointer then push extra registers to
21609 create the stack frame. This is done in a way that does not
21610 alter the frame layout, so is independent of the epilogue. */
21611 int n;
21612 int frame;
21613 n = 0;
21614 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21615 n++;
21616 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21617 if (frame && n * 4 >= frame)
21618 {
21619 n = frame / 4;
21620 live_regs_mask |= (1 << n) - 1;
21621 saved_regs += frame;
21622 }
21623 }
21624
21625 if (TARGET_LDRD
21626 && current_tune->prefer_ldrd_strd
21627 && !optimize_function_for_size_p (cfun))
21628 {
21629 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21630 if (TARGET_THUMB2)
21631 thumb2_emit_strd_push (live_regs_mask);
21632 else if (TARGET_ARM
21633 && !TARGET_APCS_FRAME
21634 && !IS_INTERRUPT (func_type))
21635 arm_emit_strd_push (live_regs_mask);
21636 else
21637 {
21638 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21639 RTX_FRAME_RELATED_P (insn) = 1;
21640 }
21641 }
21642 else
21643 {
21644 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21645 RTX_FRAME_RELATED_P (insn) = 1;
21646 }
21647 }
21648
21649 if (! IS_VOLATILE (func_type))
21650 saved_regs += arm_save_coproc_regs ();
21651
21652 if (frame_pointer_needed && TARGET_ARM)
21653 {
21654 /* Create the new frame pointer. */
21655 if (TARGET_APCS_FRAME)
21656 {
21657 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21658 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21659 RTX_FRAME_RELATED_P (insn) = 1;
21660 }
21661 else
21662 {
21663 insn = GEN_INT (saved_regs - (4 + fp_offset));
21664 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21665 stack_pointer_rtx, insn));
21666 RTX_FRAME_RELATED_P (insn) = 1;
21667 }
21668 }
21669
21670 size = offsets->outgoing_args - offsets->saved_args;
21671 if (flag_stack_usage_info)
21672 current_function_static_stack_size = size;
21673
21674 /* If this isn't an interrupt service routine and we have a frame, then do
21675 stack checking. We use IP as the first scratch register, except for the
21676 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21677 if (!IS_INTERRUPT (func_type)
21678 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK)
21679 {
21680 unsigned int regno;
21681
21682 if (!IS_NESTED (func_type) || clobber_ip)
21683 regno = IP_REGNUM;
21684 else if (df_regs_ever_live_p (LR_REGNUM))
21685 regno = LR_REGNUM;
21686 else
21687 regno = 3;
21688
21689 if (crtl->is_leaf && !cfun->calls_alloca)
21690 {
21691 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
21692 arm_emit_probe_stack_range (STACK_CHECK_PROTECT,
21693 size - STACK_CHECK_PROTECT,
21694 regno, live_regs_mask);
21695 }
21696 else if (size > 0)
21697 arm_emit_probe_stack_range (STACK_CHECK_PROTECT, size,
21698 regno, live_regs_mask);
21699 }
21700
21701 /* Recover the static chain register. */
21702 if (clobber_ip)
21703 {
21704 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21705 insn = gen_rtx_REG (SImode, 3);
21706 else
21707 {
21708 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21709 insn = gen_frame_mem (SImode, insn);
21710 }
21711 emit_set_insn (ip_rtx, insn);
21712 emit_insn (gen_force_register_use (ip_rtx));
21713 }
21714
21715 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21716 {
21717 /* This add can produce multiple insns for a large constant, so we
21718 need to get tricky. */
21719 rtx_insn *last = get_last_insn ();
21720
21721 amount = GEN_INT (offsets->saved_args + saved_regs
21722 - offsets->outgoing_args);
21723
21724 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21725 amount));
21726 do
21727 {
21728 last = last ? NEXT_INSN (last) : get_insns ();
21729 RTX_FRAME_RELATED_P (last) = 1;
21730 }
21731 while (last != insn);
21732
21733 /* If the frame pointer is needed, emit a special barrier that
21734 will prevent the scheduler from moving stores to the frame
21735 before the stack adjustment. */
21736 if (frame_pointer_needed)
21737 emit_insn (gen_stack_tie (stack_pointer_rtx,
21738 hard_frame_pointer_rtx));
21739 }
21740
21741
21742 if (frame_pointer_needed && TARGET_THUMB2)
21743 thumb_set_frame_pointer (offsets);
21744
21745 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21746 {
21747 unsigned long mask;
21748
21749 mask = live_regs_mask;
21750 mask &= THUMB2_WORK_REGS;
21751 if (!IS_NESTED (func_type))
21752 mask |= (1 << IP_REGNUM);
21753 arm_load_pic_register (mask);
21754 }
21755
21756 /* If we are profiling, make sure no instructions are scheduled before
21757 the call to mcount. Similarly if the user has requested no
21758 scheduling in the prolog. Similarly if we want non-call exceptions
21759 using the EABI unwinder, to prevent faulting instructions from being
21760 swapped with a stack adjustment. */
21761 if (crtl->profile || !TARGET_SCHED_PROLOG
21762 || (arm_except_unwind_info (&global_options) == UI_TARGET
21763 && cfun->can_throw_non_call_exceptions))
21764 emit_insn (gen_blockage ());
21765
21766 /* If the link register is being kept alive, with the return address in it,
21767 then make sure that it does not get reused by the ce2 pass. */
21768 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
21769 cfun->machine->lr_save_eliminated = 1;
21770 }
21771 \f
21772 /* Print condition code to STREAM. Helper function for arm_print_operand. */
21773 static void
21774 arm_print_condition (FILE *stream)
21775 {
21776 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
21777 {
21778 /* Branch conversion is not implemented for Thumb-2. */
21779 if (TARGET_THUMB)
21780 {
21781 output_operand_lossage ("predicated Thumb instruction");
21782 return;
21783 }
21784 if (current_insn_predicate != NULL)
21785 {
21786 output_operand_lossage
21787 ("predicated instruction in conditional sequence");
21788 return;
21789 }
21790
21791 fputs (arm_condition_codes[arm_current_cc], stream);
21792 }
21793 else if (current_insn_predicate)
21794 {
21795 enum arm_cond_code code;
21796
21797 if (TARGET_THUMB1)
21798 {
21799 output_operand_lossage ("predicated Thumb instruction");
21800 return;
21801 }
21802
21803 code = get_arm_condition_code (current_insn_predicate);
21804 fputs (arm_condition_codes[code], stream);
21805 }
21806 }
21807
21808
21809 /* Globally reserved letters: acln
21810 Puncutation letters currently used: @_|?().!#
21811 Lower case letters currently used: bcdefhimpqtvwxyz
21812 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
21813 Letters previously used, but now deprecated/obsolete: sVWXYZ.
21814
21815 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
21816
21817 If CODE is 'd', then the X is a condition operand and the instruction
21818 should only be executed if the condition is true.
21819 if CODE is 'D', then the X is a condition operand and the instruction
21820 should only be executed if the condition is false: however, if the mode
21821 of the comparison is CCFPEmode, then always execute the instruction -- we
21822 do this because in these circumstances !GE does not necessarily imply LT;
21823 in these cases the instruction pattern will take care to make sure that
21824 an instruction containing %d will follow, thereby undoing the effects of
21825 doing this instruction unconditionally.
21826 If CODE is 'N' then X is a floating point operand that must be negated
21827 before output.
21828 If CODE is 'B' then output a bitwise inverted value of X (a const int).
21829 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
21830 static void
21831 arm_print_operand (FILE *stream, rtx x, int code)
21832 {
21833 switch (code)
21834 {
21835 case '@':
21836 fputs (ASM_COMMENT_START, stream);
21837 return;
21838
21839 case '_':
21840 fputs (user_label_prefix, stream);
21841 return;
21842
21843 case '|':
21844 fputs (REGISTER_PREFIX, stream);
21845 return;
21846
21847 case '?':
21848 arm_print_condition (stream);
21849 return;
21850
21851 case '.':
21852 /* The current condition code for a condition code setting instruction.
21853 Preceded by 's' in unified syntax, otherwise followed by 's'. */
21854 fputc('s', stream);
21855 arm_print_condition (stream);
21856 return;
21857
21858 case '!':
21859 /* If the instruction is conditionally executed then print
21860 the current condition code, otherwise print 's'. */
21861 gcc_assert (TARGET_THUMB2);
21862 if (current_insn_predicate)
21863 arm_print_condition (stream);
21864 else
21865 fputc('s', stream);
21866 break;
21867
21868 /* %# is a "break" sequence. It doesn't output anything, but is used to
21869 separate e.g. operand numbers from following text, if that text consists
21870 of further digits which we don't want to be part of the operand
21871 number. */
21872 case '#':
21873 return;
21874
21875 case 'N':
21876 {
21877 REAL_VALUE_TYPE r;
21878 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
21879 fprintf (stream, "%s", fp_const_from_val (&r));
21880 }
21881 return;
21882
21883 /* An integer or symbol address without a preceding # sign. */
21884 case 'c':
21885 switch (GET_CODE (x))
21886 {
21887 case CONST_INT:
21888 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
21889 break;
21890
21891 case SYMBOL_REF:
21892 output_addr_const (stream, x);
21893 break;
21894
21895 case CONST:
21896 if (GET_CODE (XEXP (x, 0)) == PLUS
21897 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
21898 {
21899 output_addr_const (stream, x);
21900 break;
21901 }
21902 /* Fall through. */
21903
21904 default:
21905 output_operand_lossage ("Unsupported operand for code '%c'", code);
21906 }
21907 return;
21908
21909 /* An integer that we want to print in HEX. */
21910 case 'x':
21911 switch (GET_CODE (x))
21912 {
21913 case CONST_INT:
21914 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
21915 break;
21916
21917 default:
21918 output_operand_lossage ("Unsupported operand for code '%c'", code);
21919 }
21920 return;
21921
21922 case 'B':
21923 if (CONST_INT_P (x))
21924 {
21925 HOST_WIDE_INT val;
21926 val = ARM_SIGN_EXTEND (~INTVAL (x));
21927 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
21928 }
21929 else
21930 {
21931 putc ('~', stream);
21932 output_addr_const (stream, x);
21933 }
21934 return;
21935
21936 case 'b':
21937 /* Print the log2 of a CONST_INT. */
21938 {
21939 HOST_WIDE_INT val;
21940
21941 if (!CONST_INT_P (x)
21942 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
21943 output_operand_lossage ("Unsupported operand for code '%c'", code);
21944 else
21945 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21946 }
21947 return;
21948
21949 case 'L':
21950 /* The low 16 bits of an immediate constant. */
21951 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
21952 return;
21953
21954 case 'i':
21955 fprintf (stream, "%s", arithmetic_instr (x, 1));
21956 return;
21957
21958 case 'I':
21959 fprintf (stream, "%s", arithmetic_instr (x, 0));
21960 return;
21961
21962 case 'S':
21963 {
21964 HOST_WIDE_INT val;
21965 const char *shift;
21966
21967 shift = shift_op (x, &val);
21968
21969 if (shift)
21970 {
21971 fprintf (stream, ", %s ", shift);
21972 if (val == -1)
21973 arm_print_operand (stream, XEXP (x, 1), 0);
21974 else
21975 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
21976 }
21977 }
21978 return;
21979
21980 /* An explanation of the 'Q', 'R' and 'H' register operands:
21981
21982 In a pair of registers containing a DI or DF value the 'Q'
21983 operand returns the register number of the register containing
21984 the least significant part of the value. The 'R' operand returns
21985 the register number of the register containing the most
21986 significant part of the value.
21987
21988 The 'H' operand returns the higher of the two register numbers.
21989 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
21990 same as the 'Q' operand, since the most significant part of the
21991 value is held in the lower number register. The reverse is true
21992 on systems where WORDS_BIG_ENDIAN is false.
21993
21994 The purpose of these operands is to distinguish between cases
21995 where the endian-ness of the values is important (for example
21996 when they are added together), and cases where the endian-ness
21997 is irrelevant, but the order of register operations is important.
21998 For example when loading a value from memory into a register
21999 pair, the endian-ness does not matter. Provided that the value
22000 from the lower memory address is put into the lower numbered
22001 register, and the value from the higher address is put into the
22002 higher numbered register, the load will work regardless of whether
22003 the value being loaded is big-wordian or little-wordian. The
22004 order of the two register loads can matter however, if the address
22005 of the memory location is actually held in one of the registers
22006 being overwritten by the load.
22007
22008 The 'Q' and 'R' constraints are also available for 64-bit
22009 constants. */
22010 case 'Q':
22011 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22012 {
22013 rtx part = gen_lowpart (SImode, x);
22014 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22015 return;
22016 }
22017
22018 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22019 {
22020 output_operand_lossage ("invalid operand for code '%c'", code);
22021 return;
22022 }
22023
22024 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22025 return;
22026
22027 case 'R':
22028 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22029 {
22030 machine_mode mode = GET_MODE (x);
22031 rtx part;
22032
22033 if (mode == VOIDmode)
22034 mode = DImode;
22035 part = gen_highpart_mode (SImode, mode, x);
22036 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22037 return;
22038 }
22039
22040 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22041 {
22042 output_operand_lossage ("invalid operand for code '%c'", code);
22043 return;
22044 }
22045
22046 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22047 return;
22048
22049 case 'H':
22050 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22051 {
22052 output_operand_lossage ("invalid operand for code '%c'", code);
22053 return;
22054 }
22055
22056 asm_fprintf (stream, "%r", REGNO (x) + 1);
22057 return;
22058
22059 case 'J':
22060 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22061 {
22062 output_operand_lossage ("invalid operand for code '%c'", code);
22063 return;
22064 }
22065
22066 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22067 return;
22068
22069 case 'K':
22070 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22071 {
22072 output_operand_lossage ("invalid operand for code '%c'", code);
22073 return;
22074 }
22075
22076 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22077 return;
22078
22079 case 'm':
22080 asm_fprintf (stream, "%r",
22081 REG_P (XEXP (x, 0))
22082 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22083 return;
22084
22085 case 'M':
22086 asm_fprintf (stream, "{%r-%r}",
22087 REGNO (x),
22088 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22089 return;
22090
22091 /* Like 'M', but writing doubleword vector registers, for use by Neon
22092 insns. */
22093 case 'h':
22094 {
22095 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22096 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22097 if (numregs == 1)
22098 asm_fprintf (stream, "{d%d}", regno);
22099 else
22100 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22101 }
22102 return;
22103
22104 case 'd':
22105 /* CONST_TRUE_RTX means always -- that's the default. */
22106 if (x == const_true_rtx)
22107 return;
22108
22109 if (!COMPARISON_P (x))
22110 {
22111 output_operand_lossage ("invalid operand for code '%c'", code);
22112 return;
22113 }
22114
22115 fputs (arm_condition_codes[get_arm_condition_code (x)],
22116 stream);
22117 return;
22118
22119 case 'D':
22120 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22121 want to do that. */
22122 if (x == const_true_rtx)
22123 {
22124 output_operand_lossage ("instruction never executed");
22125 return;
22126 }
22127 if (!COMPARISON_P (x))
22128 {
22129 output_operand_lossage ("invalid operand for code '%c'", code);
22130 return;
22131 }
22132
22133 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22134 (get_arm_condition_code (x))],
22135 stream);
22136 return;
22137
22138 case 's':
22139 case 'V':
22140 case 'W':
22141 case 'X':
22142 case 'Y':
22143 case 'Z':
22144 /* Former Maverick support, removed after GCC-4.7. */
22145 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22146 return;
22147
22148 case 'U':
22149 if (!REG_P (x)
22150 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22151 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22152 /* Bad value for wCG register number. */
22153 {
22154 output_operand_lossage ("invalid operand for code '%c'", code);
22155 return;
22156 }
22157
22158 else
22159 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22160 return;
22161
22162 /* Print an iWMMXt control register name. */
22163 case 'w':
22164 if (!CONST_INT_P (x)
22165 || INTVAL (x) < 0
22166 || INTVAL (x) >= 16)
22167 /* Bad value for wC register number. */
22168 {
22169 output_operand_lossage ("invalid operand for code '%c'", code);
22170 return;
22171 }
22172
22173 else
22174 {
22175 static const char * wc_reg_names [16] =
22176 {
22177 "wCID", "wCon", "wCSSF", "wCASF",
22178 "wC4", "wC5", "wC6", "wC7",
22179 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22180 "wC12", "wC13", "wC14", "wC15"
22181 };
22182
22183 fputs (wc_reg_names [INTVAL (x)], stream);
22184 }
22185 return;
22186
22187 /* Print the high single-precision register of a VFP double-precision
22188 register. */
22189 case 'p':
22190 {
22191 machine_mode mode = GET_MODE (x);
22192 int regno;
22193
22194 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22195 {
22196 output_operand_lossage ("invalid operand for code '%c'", code);
22197 return;
22198 }
22199
22200 regno = REGNO (x);
22201 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22202 {
22203 output_operand_lossage ("invalid operand for code '%c'", code);
22204 return;
22205 }
22206
22207 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22208 }
22209 return;
22210
22211 /* Print a VFP/Neon double precision or quad precision register name. */
22212 case 'P':
22213 case 'q':
22214 {
22215 machine_mode mode = GET_MODE (x);
22216 int is_quad = (code == 'q');
22217 int regno;
22218
22219 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22220 {
22221 output_operand_lossage ("invalid operand for code '%c'", code);
22222 return;
22223 }
22224
22225 if (!REG_P (x)
22226 || !IS_VFP_REGNUM (REGNO (x)))
22227 {
22228 output_operand_lossage ("invalid operand for code '%c'", code);
22229 return;
22230 }
22231
22232 regno = REGNO (x);
22233 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22234 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22235 {
22236 output_operand_lossage ("invalid operand for code '%c'", code);
22237 return;
22238 }
22239
22240 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22241 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22242 }
22243 return;
22244
22245 /* These two codes print the low/high doubleword register of a Neon quad
22246 register, respectively. For pair-structure types, can also print
22247 low/high quadword registers. */
22248 case 'e':
22249 case 'f':
22250 {
22251 machine_mode mode = GET_MODE (x);
22252 int regno;
22253
22254 if ((GET_MODE_SIZE (mode) != 16
22255 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22256 {
22257 output_operand_lossage ("invalid operand for code '%c'", code);
22258 return;
22259 }
22260
22261 regno = REGNO (x);
22262 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22263 {
22264 output_operand_lossage ("invalid operand for code '%c'", code);
22265 return;
22266 }
22267
22268 if (GET_MODE_SIZE (mode) == 16)
22269 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22270 + (code == 'f' ? 1 : 0));
22271 else
22272 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22273 + (code == 'f' ? 1 : 0));
22274 }
22275 return;
22276
22277 /* Print a VFPv3 floating-point constant, represented as an integer
22278 index. */
22279 case 'G':
22280 {
22281 int index = vfp3_const_double_index (x);
22282 gcc_assert (index != -1);
22283 fprintf (stream, "%d", index);
22284 }
22285 return;
22286
22287 /* Print bits representing opcode features for Neon.
22288
22289 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22290 and polynomials as unsigned.
22291
22292 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22293
22294 Bit 2 is 1 for rounding functions, 0 otherwise. */
22295
22296 /* Identify the type as 's', 'u', 'p' or 'f'. */
22297 case 'T':
22298 {
22299 HOST_WIDE_INT bits = INTVAL (x);
22300 fputc ("uspf"[bits & 3], stream);
22301 }
22302 return;
22303
22304 /* Likewise, but signed and unsigned integers are both 'i'. */
22305 case 'F':
22306 {
22307 HOST_WIDE_INT bits = INTVAL (x);
22308 fputc ("iipf"[bits & 3], stream);
22309 }
22310 return;
22311
22312 /* As for 'T', but emit 'u' instead of 'p'. */
22313 case 't':
22314 {
22315 HOST_WIDE_INT bits = INTVAL (x);
22316 fputc ("usuf"[bits & 3], stream);
22317 }
22318 return;
22319
22320 /* Bit 2: rounding (vs none). */
22321 case 'O':
22322 {
22323 HOST_WIDE_INT bits = INTVAL (x);
22324 fputs ((bits & 4) != 0 ? "r" : "", stream);
22325 }
22326 return;
22327
22328 /* Memory operand for vld1/vst1 instruction. */
22329 case 'A':
22330 {
22331 rtx addr;
22332 bool postinc = FALSE;
22333 rtx postinc_reg = NULL;
22334 unsigned align, memsize, align_bits;
22335
22336 gcc_assert (MEM_P (x));
22337 addr = XEXP (x, 0);
22338 if (GET_CODE (addr) == POST_INC)
22339 {
22340 postinc = 1;
22341 addr = XEXP (addr, 0);
22342 }
22343 if (GET_CODE (addr) == POST_MODIFY)
22344 {
22345 postinc_reg = XEXP( XEXP (addr, 1), 1);
22346 addr = XEXP (addr, 0);
22347 }
22348 asm_fprintf (stream, "[%r", REGNO (addr));
22349
22350 /* We know the alignment of this access, so we can emit a hint in the
22351 instruction (for some alignments) as an aid to the memory subsystem
22352 of the target. */
22353 align = MEM_ALIGN (x) >> 3;
22354 memsize = MEM_SIZE (x);
22355
22356 /* Only certain alignment specifiers are supported by the hardware. */
22357 if (memsize == 32 && (align % 32) == 0)
22358 align_bits = 256;
22359 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22360 align_bits = 128;
22361 else if (memsize >= 8 && (align % 8) == 0)
22362 align_bits = 64;
22363 else
22364 align_bits = 0;
22365
22366 if (align_bits != 0)
22367 asm_fprintf (stream, ":%d", align_bits);
22368
22369 asm_fprintf (stream, "]");
22370
22371 if (postinc)
22372 fputs("!", stream);
22373 if (postinc_reg)
22374 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22375 }
22376 return;
22377
22378 case 'C':
22379 {
22380 rtx addr;
22381
22382 gcc_assert (MEM_P (x));
22383 addr = XEXP (x, 0);
22384 gcc_assert (REG_P (addr));
22385 asm_fprintf (stream, "[%r]", REGNO (addr));
22386 }
22387 return;
22388
22389 /* Translate an S register number into a D register number and element index. */
22390 case 'y':
22391 {
22392 machine_mode mode = GET_MODE (x);
22393 int regno;
22394
22395 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22396 {
22397 output_operand_lossage ("invalid operand for code '%c'", code);
22398 return;
22399 }
22400
22401 regno = REGNO (x);
22402 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22403 {
22404 output_operand_lossage ("invalid operand for code '%c'", code);
22405 return;
22406 }
22407
22408 regno = regno - FIRST_VFP_REGNUM;
22409 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22410 }
22411 return;
22412
22413 case 'v':
22414 gcc_assert (CONST_DOUBLE_P (x));
22415 int result;
22416 result = vfp3_const_double_for_fract_bits (x);
22417 if (result == 0)
22418 result = vfp3_const_double_for_bits (x);
22419 fprintf (stream, "#%d", result);
22420 return;
22421
22422 /* Register specifier for vld1.16/vst1.16. Translate the S register
22423 number into a D register number and element index. */
22424 case 'z':
22425 {
22426 machine_mode mode = GET_MODE (x);
22427 int regno;
22428
22429 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22430 {
22431 output_operand_lossage ("invalid operand for code '%c'", code);
22432 return;
22433 }
22434
22435 regno = REGNO (x);
22436 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22437 {
22438 output_operand_lossage ("invalid operand for code '%c'", code);
22439 return;
22440 }
22441
22442 regno = regno - FIRST_VFP_REGNUM;
22443 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22444 }
22445 return;
22446
22447 default:
22448 if (x == 0)
22449 {
22450 output_operand_lossage ("missing operand");
22451 return;
22452 }
22453
22454 switch (GET_CODE (x))
22455 {
22456 case REG:
22457 asm_fprintf (stream, "%r", REGNO (x));
22458 break;
22459
22460 case MEM:
22461 output_address (GET_MODE (x), XEXP (x, 0));
22462 break;
22463
22464 case CONST_DOUBLE:
22465 {
22466 char fpstr[20];
22467 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22468 sizeof (fpstr), 0, 1);
22469 fprintf (stream, "#%s", fpstr);
22470 }
22471 break;
22472
22473 default:
22474 gcc_assert (GET_CODE (x) != NEG);
22475 fputc ('#', stream);
22476 if (GET_CODE (x) == HIGH)
22477 {
22478 fputs (":lower16:", stream);
22479 x = XEXP (x, 0);
22480 }
22481
22482 output_addr_const (stream, x);
22483 break;
22484 }
22485 }
22486 }
22487 \f
22488 /* Target hook for printing a memory address. */
22489 static void
22490 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22491 {
22492 if (TARGET_32BIT)
22493 {
22494 int is_minus = GET_CODE (x) == MINUS;
22495
22496 if (REG_P (x))
22497 asm_fprintf (stream, "[%r]", REGNO (x));
22498 else if (GET_CODE (x) == PLUS || is_minus)
22499 {
22500 rtx base = XEXP (x, 0);
22501 rtx index = XEXP (x, 1);
22502 HOST_WIDE_INT offset = 0;
22503 if (!REG_P (base)
22504 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22505 {
22506 /* Ensure that BASE is a register. */
22507 /* (one of them must be). */
22508 /* Also ensure the SP is not used as in index register. */
22509 std::swap (base, index);
22510 }
22511 switch (GET_CODE (index))
22512 {
22513 case CONST_INT:
22514 offset = INTVAL (index);
22515 if (is_minus)
22516 offset = -offset;
22517 asm_fprintf (stream, "[%r, #%wd]",
22518 REGNO (base), offset);
22519 break;
22520
22521 case REG:
22522 asm_fprintf (stream, "[%r, %s%r]",
22523 REGNO (base), is_minus ? "-" : "",
22524 REGNO (index));
22525 break;
22526
22527 case MULT:
22528 case ASHIFTRT:
22529 case LSHIFTRT:
22530 case ASHIFT:
22531 case ROTATERT:
22532 {
22533 asm_fprintf (stream, "[%r, %s%r",
22534 REGNO (base), is_minus ? "-" : "",
22535 REGNO (XEXP (index, 0)));
22536 arm_print_operand (stream, index, 'S');
22537 fputs ("]", stream);
22538 break;
22539 }
22540
22541 default:
22542 gcc_unreachable ();
22543 }
22544 }
22545 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22546 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22547 {
22548 gcc_assert (REG_P (XEXP (x, 0)));
22549
22550 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22551 asm_fprintf (stream, "[%r, #%s%d]!",
22552 REGNO (XEXP (x, 0)),
22553 GET_CODE (x) == PRE_DEC ? "-" : "",
22554 GET_MODE_SIZE (mode));
22555 else
22556 asm_fprintf (stream, "[%r], #%s%d",
22557 REGNO (XEXP (x, 0)),
22558 GET_CODE (x) == POST_DEC ? "-" : "",
22559 GET_MODE_SIZE (mode));
22560 }
22561 else if (GET_CODE (x) == PRE_MODIFY)
22562 {
22563 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22564 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22565 asm_fprintf (stream, "#%wd]!",
22566 INTVAL (XEXP (XEXP (x, 1), 1)));
22567 else
22568 asm_fprintf (stream, "%r]!",
22569 REGNO (XEXP (XEXP (x, 1), 1)));
22570 }
22571 else if (GET_CODE (x) == POST_MODIFY)
22572 {
22573 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22574 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22575 asm_fprintf (stream, "#%wd",
22576 INTVAL (XEXP (XEXP (x, 1), 1)));
22577 else
22578 asm_fprintf (stream, "%r",
22579 REGNO (XEXP (XEXP (x, 1), 1)));
22580 }
22581 else output_addr_const (stream, x);
22582 }
22583 else
22584 {
22585 if (REG_P (x))
22586 asm_fprintf (stream, "[%r]", REGNO (x));
22587 else if (GET_CODE (x) == POST_INC)
22588 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22589 else if (GET_CODE (x) == PLUS)
22590 {
22591 gcc_assert (REG_P (XEXP (x, 0)));
22592 if (CONST_INT_P (XEXP (x, 1)))
22593 asm_fprintf (stream, "[%r, #%wd]",
22594 REGNO (XEXP (x, 0)),
22595 INTVAL (XEXP (x, 1)));
22596 else
22597 asm_fprintf (stream, "[%r, %r]",
22598 REGNO (XEXP (x, 0)),
22599 REGNO (XEXP (x, 1)));
22600 }
22601 else
22602 output_addr_const (stream, x);
22603 }
22604 }
22605 \f
22606 /* Target hook for indicating whether a punctuation character for
22607 TARGET_PRINT_OPERAND is valid. */
22608 static bool
22609 arm_print_operand_punct_valid_p (unsigned char code)
22610 {
22611 return (code == '@' || code == '|' || code == '.'
22612 || code == '(' || code == ')' || code == '#'
22613 || (TARGET_32BIT && (code == '?'))
22614 || (TARGET_THUMB2 && (code == '!'))
22615 || (TARGET_THUMB && (code == '_')));
22616 }
22617 \f
22618 /* Target hook for assembling integer objects. The ARM version needs to
22619 handle word-sized values specially. */
22620 static bool
22621 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22622 {
22623 machine_mode mode;
22624
22625 if (size == UNITS_PER_WORD && aligned_p)
22626 {
22627 fputs ("\t.word\t", asm_out_file);
22628 output_addr_const (asm_out_file, x);
22629
22630 /* Mark symbols as position independent. We only do this in the
22631 .text segment, not in the .data segment. */
22632 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22633 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22634 {
22635 /* See legitimize_pic_address for an explanation of the
22636 TARGET_VXWORKS_RTP check. */
22637 /* References to weak symbols cannot be resolved locally:
22638 they may be overridden by a non-weak definition at link
22639 time. */
22640 if (!arm_pic_data_is_text_relative
22641 || (GET_CODE (x) == SYMBOL_REF
22642 && (!SYMBOL_REF_LOCAL_P (x)
22643 || (SYMBOL_REF_DECL (x)
22644 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22645 fputs ("(GOT)", asm_out_file);
22646 else
22647 fputs ("(GOTOFF)", asm_out_file);
22648 }
22649 fputc ('\n', asm_out_file);
22650 return true;
22651 }
22652
22653 mode = GET_MODE (x);
22654
22655 if (arm_vector_mode_supported_p (mode))
22656 {
22657 int i, units;
22658
22659 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22660
22661 units = CONST_VECTOR_NUNITS (x);
22662 size = GET_MODE_UNIT_SIZE (mode);
22663
22664 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22665 for (i = 0; i < units; i++)
22666 {
22667 rtx elt = CONST_VECTOR_ELT (x, i);
22668 assemble_integer
22669 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22670 }
22671 else
22672 for (i = 0; i < units; i++)
22673 {
22674 rtx elt = CONST_VECTOR_ELT (x, i);
22675 assemble_real
22676 (*CONST_DOUBLE_REAL_VALUE (elt),
22677 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22678 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22679 }
22680
22681 return true;
22682 }
22683
22684 return default_assemble_integer (x, size, aligned_p);
22685 }
22686
22687 static void
22688 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22689 {
22690 section *s;
22691
22692 if (!TARGET_AAPCS_BASED)
22693 {
22694 (is_ctor ?
22695 default_named_section_asm_out_constructor
22696 : default_named_section_asm_out_destructor) (symbol, priority);
22697 return;
22698 }
22699
22700 /* Put these in the .init_array section, using a special relocation. */
22701 if (priority != DEFAULT_INIT_PRIORITY)
22702 {
22703 char buf[18];
22704 sprintf (buf, "%s.%.5u",
22705 is_ctor ? ".init_array" : ".fini_array",
22706 priority);
22707 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22708 }
22709 else if (is_ctor)
22710 s = ctors_section;
22711 else
22712 s = dtors_section;
22713
22714 switch_to_section (s);
22715 assemble_align (POINTER_SIZE);
22716 fputs ("\t.word\t", asm_out_file);
22717 output_addr_const (asm_out_file, symbol);
22718 fputs ("(target1)\n", asm_out_file);
22719 }
22720
22721 /* Add a function to the list of static constructors. */
22722
22723 static void
22724 arm_elf_asm_constructor (rtx symbol, int priority)
22725 {
22726 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22727 }
22728
22729 /* Add a function to the list of static destructors. */
22730
22731 static void
22732 arm_elf_asm_destructor (rtx symbol, int priority)
22733 {
22734 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22735 }
22736 \f
22737 /* A finite state machine takes care of noticing whether or not instructions
22738 can be conditionally executed, and thus decrease execution time and code
22739 size by deleting branch instructions. The fsm is controlled by
22740 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22741
22742 /* The state of the fsm controlling condition codes are:
22743 0: normal, do nothing special
22744 1: make ASM_OUTPUT_OPCODE not output this instruction
22745 2: make ASM_OUTPUT_OPCODE not output this instruction
22746 3: make instructions conditional
22747 4: make instructions conditional
22748
22749 State transitions (state->state by whom under condition):
22750 0 -> 1 final_prescan_insn if the `target' is a label
22751 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
22752 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
22753 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
22754 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
22755 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
22756 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
22757 (the target insn is arm_target_insn).
22758
22759 If the jump clobbers the conditions then we use states 2 and 4.
22760
22761 A similar thing can be done with conditional return insns.
22762
22763 XXX In case the `target' is an unconditional branch, this conditionalising
22764 of the instructions always reduces code size, but not always execution
22765 time. But then, I want to reduce the code size to somewhere near what
22766 /bin/cc produces. */
22767
22768 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
22769 instructions. When a COND_EXEC instruction is seen the subsequent
22770 instructions are scanned so that multiple conditional instructions can be
22771 combined into a single IT block. arm_condexec_count and arm_condexec_mask
22772 specify the length and true/false mask for the IT block. These will be
22773 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
22774
22775 /* Returns the index of the ARM condition code string in
22776 `arm_condition_codes', or ARM_NV if the comparison is invalid.
22777 COMPARISON should be an rtx like `(eq (...) (...))'. */
22778
22779 enum arm_cond_code
22780 maybe_get_arm_condition_code (rtx comparison)
22781 {
22782 machine_mode mode = GET_MODE (XEXP (comparison, 0));
22783 enum arm_cond_code code;
22784 enum rtx_code comp_code = GET_CODE (comparison);
22785
22786 if (GET_MODE_CLASS (mode) != MODE_CC)
22787 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
22788 XEXP (comparison, 1));
22789
22790 switch (mode)
22791 {
22792 case E_CC_DNEmode: code = ARM_NE; goto dominance;
22793 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
22794 case E_CC_DGEmode: code = ARM_GE; goto dominance;
22795 case E_CC_DGTmode: code = ARM_GT; goto dominance;
22796 case E_CC_DLEmode: code = ARM_LE; goto dominance;
22797 case E_CC_DLTmode: code = ARM_LT; goto dominance;
22798 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
22799 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
22800 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
22801 case E_CC_DLTUmode: code = ARM_CC;
22802
22803 dominance:
22804 if (comp_code == EQ)
22805 return ARM_INVERSE_CONDITION_CODE (code);
22806 if (comp_code == NE)
22807 return code;
22808 return ARM_NV;
22809
22810 case E_CC_NOOVmode:
22811 switch (comp_code)
22812 {
22813 case NE: return ARM_NE;
22814 case EQ: return ARM_EQ;
22815 case GE: return ARM_PL;
22816 case LT: return ARM_MI;
22817 default: return ARM_NV;
22818 }
22819
22820 case E_CC_Zmode:
22821 switch (comp_code)
22822 {
22823 case NE: return ARM_NE;
22824 case EQ: return ARM_EQ;
22825 default: return ARM_NV;
22826 }
22827
22828 case E_CC_Nmode:
22829 switch (comp_code)
22830 {
22831 case NE: return ARM_MI;
22832 case EQ: return ARM_PL;
22833 default: return ARM_NV;
22834 }
22835
22836 case E_CCFPEmode:
22837 case E_CCFPmode:
22838 /* We can handle all cases except UNEQ and LTGT. */
22839 switch (comp_code)
22840 {
22841 case GE: return ARM_GE;
22842 case GT: return ARM_GT;
22843 case LE: return ARM_LS;
22844 case LT: return ARM_MI;
22845 case NE: return ARM_NE;
22846 case EQ: return ARM_EQ;
22847 case ORDERED: return ARM_VC;
22848 case UNORDERED: return ARM_VS;
22849 case UNLT: return ARM_LT;
22850 case UNLE: return ARM_LE;
22851 case UNGT: return ARM_HI;
22852 case UNGE: return ARM_PL;
22853 /* UNEQ and LTGT do not have a representation. */
22854 case UNEQ: /* Fall through. */
22855 case LTGT: /* Fall through. */
22856 default: return ARM_NV;
22857 }
22858
22859 case E_CC_SWPmode:
22860 switch (comp_code)
22861 {
22862 case NE: return ARM_NE;
22863 case EQ: return ARM_EQ;
22864 case GE: return ARM_LE;
22865 case GT: return ARM_LT;
22866 case LE: return ARM_GE;
22867 case LT: return ARM_GT;
22868 case GEU: return ARM_LS;
22869 case GTU: return ARM_CC;
22870 case LEU: return ARM_CS;
22871 case LTU: return ARM_HI;
22872 default: return ARM_NV;
22873 }
22874
22875 case E_CC_Cmode:
22876 switch (comp_code)
22877 {
22878 case LTU: return ARM_CS;
22879 case GEU: return ARM_CC;
22880 case NE: return ARM_CS;
22881 case EQ: return ARM_CC;
22882 default: return ARM_NV;
22883 }
22884
22885 case E_CC_CZmode:
22886 switch (comp_code)
22887 {
22888 case NE: return ARM_NE;
22889 case EQ: return ARM_EQ;
22890 case GEU: return ARM_CS;
22891 case GTU: return ARM_HI;
22892 case LEU: return ARM_LS;
22893 case LTU: return ARM_CC;
22894 default: return ARM_NV;
22895 }
22896
22897 case E_CC_NCVmode:
22898 switch (comp_code)
22899 {
22900 case GE: return ARM_GE;
22901 case LT: return ARM_LT;
22902 case GEU: return ARM_CS;
22903 case LTU: return ARM_CC;
22904 default: return ARM_NV;
22905 }
22906
22907 case E_CC_Vmode:
22908 switch (comp_code)
22909 {
22910 case NE: return ARM_VS;
22911 case EQ: return ARM_VC;
22912 default: return ARM_NV;
22913 }
22914
22915 case E_CCmode:
22916 switch (comp_code)
22917 {
22918 case NE: return ARM_NE;
22919 case EQ: return ARM_EQ;
22920 case GE: return ARM_GE;
22921 case GT: return ARM_GT;
22922 case LE: return ARM_LE;
22923 case LT: return ARM_LT;
22924 case GEU: return ARM_CS;
22925 case GTU: return ARM_HI;
22926 case LEU: return ARM_LS;
22927 case LTU: return ARM_CC;
22928 default: return ARM_NV;
22929 }
22930
22931 default: gcc_unreachable ();
22932 }
22933 }
22934
22935 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
22936 static enum arm_cond_code
22937 get_arm_condition_code (rtx comparison)
22938 {
22939 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
22940 gcc_assert (code != ARM_NV);
22941 return code;
22942 }
22943
22944 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
22945 code registers when not targetting Thumb1. The VFP condition register
22946 only exists when generating hard-float code. */
22947 static bool
22948 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
22949 {
22950 if (!TARGET_32BIT)
22951 return false;
22952
22953 *p1 = CC_REGNUM;
22954 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
22955 return true;
22956 }
22957
22958 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
22959 instructions. */
22960 void
22961 thumb2_final_prescan_insn (rtx_insn *insn)
22962 {
22963 rtx_insn *first_insn = insn;
22964 rtx body = PATTERN (insn);
22965 rtx predicate;
22966 enum arm_cond_code code;
22967 int n;
22968 int mask;
22969 int max;
22970
22971 /* max_insns_skipped in the tune was already taken into account in the
22972 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
22973 just emit the IT blocks as we can. It does not make sense to split
22974 the IT blocks. */
22975 max = MAX_INSN_PER_IT_BLOCK;
22976
22977 /* Remove the previous insn from the count of insns to be output. */
22978 if (arm_condexec_count)
22979 arm_condexec_count--;
22980
22981 /* Nothing to do if we are already inside a conditional block. */
22982 if (arm_condexec_count)
22983 return;
22984
22985 if (GET_CODE (body) != COND_EXEC)
22986 return;
22987
22988 /* Conditional jumps are implemented directly. */
22989 if (JUMP_P (insn))
22990 return;
22991
22992 predicate = COND_EXEC_TEST (body);
22993 arm_current_cc = get_arm_condition_code (predicate);
22994
22995 n = get_attr_ce_count (insn);
22996 arm_condexec_count = 1;
22997 arm_condexec_mask = (1 << n) - 1;
22998 arm_condexec_masklen = n;
22999 /* See if subsequent instructions can be combined into the same block. */
23000 for (;;)
23001 {
23002 insn = next_nonnote_insn (insn);
23003
23004 /* Jumping into the middle of an IT block is illegal, so a label or
23005 barrier terminates the block. */
23006 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23007 break;
23008
23009 body = PATTERN (insn);
23010 /* USE and CLOBBER aren't really insns, so just skip them. */
23011 if (GET_CODE (body) == USE
23012 || GET_CODE (body) == CLOBBER)
23013 continue;
23014
23015 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23016 if (GET_CODE (body) != COND_EXEC)
23017 break;
23018 /* Maximum number of conditionally executed instructions in a block. */
23019 n = get_attr_ce_count (insn);
23020 if (arm_condexec_masklen + n > max)
23021 break;
23022
23023 predicate = COND_EXEC_TEST (body);
23024 code = get_arm_condition_code (predicate);
23025 mask = (1 << n) - 1;
23026 if (arm_current_cc == code)
23027 arm_condexec_mask |= (mask << arm_condexec_masklen);
23028 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23029 break;
23030
23031 arm_condexec_count++;
23032 arm_condexec_masklen += n;
23033
23034 /* A jump must be the last instruction in a conditional block. */
23035 if (JUMP_P (insn))
23036 break;
23037 }
23038 /* Restore recog_data (getting the attributes of other insns can
23039 destroy this array, but final.c assumes that it remains intact
23040 across this call). */
23041 extract_constrain_insn_cached (first_insn);
23042 }
23043
23044 void
23045 arm_final_prescan_insn (rtx_insn *insn)
23046 {
23047 /* BODY will hold the body of INSN. */
23048 rtx body = PATTERN (insn);
23049
23050 /* This will be 1 if trying to repeat the trick, and things need to be
23051 reversed if it appears to fail. */
23052 int reverse = 0;
23053
23054 /* If we start with a return insn, we only succeed if we find another one. */
23055 int seeking_return = 0;
23056 enum rtx_code return_code = UNKNOWN;
23057
23058 /* START_INSN will hold the insn from where we start looking. This is the
23059 first insn after the following code_label if REVERSE is true. */
23060 rtx_insn *start_insn = insn;
23061
23062 /* If in state 4, check if the target branch is reached, in order to
23063 change back to state 0. */
23064 if (arm_ccfsm_state == 4)
23065 {
23066 if (insn == arm_target_insn)
23067 {
23068 arm_target_insn = NULL;
23069 arm_ccfsm_state = 0;
23070 }
23071 return;
23072 }
23073
23074 /* If in state 3, it is possible to repeat the trick, if this insn is an
23075 unconditional branch to a label, and immediately following this branch
23076 is the previous target label which is only used once, and the label this
23077 branch jumps to is not too far off. */
23078 if (arm_ccfsm_state == 3)
23079 {
23080 if (simplejump_p (insn))
23081 {
23082 start_insn = next_nonnote_insn (start_insn);
23083 if (BARRIER_P (start_insn))
23084 {
23085 /* XXX Isn't this always a barrier? */
23086 start_insn = next_nonnote_insn (start_insn);
23087 }
23088 if (LABEL_P (start_insn)
23089 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23090 && LABEL_NUSES (start_insn) == 1)
23091 reverse = TRUE;
23092 else
23093 return;
23094 }
23095 else if (ANY_RETURN_P (body))
23096 {
23097 start_insn = next_nonnote_insn (start_insn);
23098 if (BARRIER_P (start_insn))
23099 start_insn = next_nonnote_insn (start_insn);
23100 if (LABEL_P (start_insn)
23101 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23102 && LABEL_NUSES (start_insn) == 1)
23103 {
23104 reverse = TRUE;
23105 seeking_return = 1;
23106 return_code = GET_CODE (body);
23107 }
23108 else
23109 return;
23110 }
23111 else
23112 return;
23113 }
23114
23115 gcc_assert (!arm_ccfsm_state || reverse);
23116 if (!JUMP_P (insn))
23117 return;
23118
23119 /* This jump might be paralleled with a clobber of the condition codes
23120 the jump should always come first */
23121 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23122 body = XVECEXP (body, 0, 0);
23123
23124 if (reverse
23125 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23126 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23127 {
23128 int insns_skipped;
23129 int fail = FALSE, succeed = FALSE;
23130 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23131 int then_not_else = TRUE;
23132 rtx_insn *this_insn = start_insn;
23133 rtx label = 0;
23134
23135 /* Register the insn jumped to. */
23136 if (reverse)
23137 {
23138 if (!seeking_return)
23139 label = XEXP (SET_SRC (body), 0);
23140 }
23141 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23142 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23143 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23144 {
23145 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23146 then_not_else = FALSE;
23147 }
23148 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23149 {
23150 seeking_return = 1;
23151 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23152 }
23153 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23154 {
23155 seeking_return = 1;
23156 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23157 then_not_else = FALSE;
23158 }
23159 else
23160 gcc_unreachable ();
23161
23162 /* See how many insns this branch skips, and what kind of insns. If all
23163 insns are okay, and the label or unconditional branch to the same
23164 label is not too far away, succeed. */
23165 for (insns_skipped = 0;
23166 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23167 {
23168 rtx scanbody;
23169
23170 this_insn = next_nonnote_insn (this_insn);
23171 if (!this_insn)
23172 break;
23173
23174 switch (GET_CODE (this_insn))
23175 {
23176 case CODE_LABEL:
23177 /* Succeed if it is the target label, otherwise fail since
23178 control falls in from somewhere else. */
23179 if (this_insn == label)
23180 {
23181 arm_ccfsm_state = 1;
23182 succeed = TRUE;
23183 }
23184 else
23185 fail = TRUE;
23186 break;
23187
23188 case BARRIER:
23189 /* Succeed if the following insn is the target label.
23190 Otherwise fail.
23191 If return insns are used then the last insn in a function
23192 will be a barrier. */
23193 this_insn = next_nonnote_insn (this_insn);
23194 if (this_insn && this_insn == label)
23195 {
23196 arm_ccfsm_state = 1;
23197 succeed = TRUE;
23198 }
23199 else
23200 fail = TRUE;
23201 break;
23202
23203 case CALL_INSN:
23204 /* The AAPCS says that conditional calls should not be
23205 used since they make interworking inefficient (the
23206 linker can't transform BL<cond> into BLX). That's
23207 only a problem if the machine has BLX. */
23208 if (arm_arch5)
23209 {
23210 fail = TRUE;
23211 break;
23212 }
23213
23214 /* Succeed if the following insn is the target label, or
23215 if the following two insns are a barrier and the
23216 target label. */
23217 this_insn = next_nonnote_insn (this_insn);
23218 if (this_insn && BARRIER_P (this_insn))
23219 this_insn = next_nonnote_insn (this_insn);
23220
23221 if (this_insn && this_insn == label
23222 && insns_skipped < max_insns_skipped)
23223 {
23224 arm_ccfsm_state = 1;
23225 succeed = TRUE;
23226 }
23227 else
23228 fail = TRUE;
23229 break;
23230
23231 case JUMP_INSN:
23232 /* If this is an unconditional branch to the same label, succeed.
23233 If it is to another label, do nothing. If it is conditional,
23234 fail. */
23235 /* XXX Probably, the tests for SET and the PC are
23236 unnecessary. */
23237
23238 scanbody = PATTERN (this_insn);
23239 if (GET_CODE (scanbody) == SET
23240 && GET_CODE (SET_DEST (scanbody)) == PC)
23241 {
23242 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23243 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23244 {
23245 arm_ccfsm_state = 2;
23246 succeed = TRUE;
23247 }
23248 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23249 fail = TRUE;
23250 }
23251 /* Fail if a conditional return is undesirable (e.g. on a
23252 StrongARM), but still allow this if optimizing for size. */
23253 else if (GET_CODE (scanbody) == return_code
23254 && !use_return_insn (TRUE, NULL)
23255 && !optimize_size)
23256 fail = TRUE;
23257 else if (GET_CODE (scanbody) == return_code)
23258 {
23259 arm_ccfsm_state = 2;
23260 succeed = TRUE;
23261 }
23262 else if (GET_CODE (scanbody) == PARALLEL)
23263 {
23264 switch (get_attr_conds (this_insn))
23265 {
23266 case CONDS_NOCOND:
23267 break;
23268 default:
23269 fail = TRUE;
23270 break;
23271 }
23272 }
23273 else
23274 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23275
23276 break;
23277
23278 case INSN:
23279 /* Instructions using or affecting the condition codes make it
23280 fail. */
23281 scanbody = PATTERN (this_insn);
23282 if (!(GET_CODE (scanbody) == SET
23283 || GET_CODE (scanbody) == PARALLEL)
23284 || get_attr_conds (this_insn) != CONDS_NOCOND)
23285 fail = TRUE;
23286 break;
23287
23288 default:
23289 break;
23290 }
23291 }
23292 if (succeed)
23293 {
23294 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23295 arm_target_label = CODE_LABEL_NUMBER (label);
23296 else
23297 {
23298 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23299
23300 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23301 {
23302 this_insn = next_nonnote_insn (this_insn);
23303 gcc_assert (!this_insn
23304 || (!BARRIER_P (this_insn)
23305 && !LABEL_P (this_insn)));
23306 }
23307 if (!this_insn)
23308 {
23309 /* Oh, dear! we ran off the end.. give up. */
23310 extract_constrain_insn_cached (insn);
23311 arm_ccfsm_state = 0;
23312 arm_target_insn = NULL;
23313 return;
23314 }
23315 arm_target_insn = this_insn;
23316 }
23317
23318 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23319 what it was. */
23320 if (!reverse)
23321 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23322
23323 if (reverse || then_not_else)
23324 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23325 }
23326
23327 /* Restore recog_data (getting the attributes of other insns can
23328 destroy this array, but final.c assumes that it remains intact
23329 across this call. */
23330 extract_constrain_insn_cached (insn);
23331 }
23332 }
23333
23334 /* Output IT instructions. */
23335 void
23336 thumb2_asm_output_opcode (FILE * stream)
23337 {
23338 char buff[5];
23339 int n;
23340
23341 if (arm_condexec_mask)
23342 {
23343 for (n = 0; n < arm_condexec_masklen; n++)
23344 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23345 buff[n] = 0;
23346 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23347 arm_condition_codes[arm_current_cc]);
23348 arm_condexec_mask = 0;
23349 }
23350 }
23351
23352 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23353 static bool
23354 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23355 {
23356 if (GET_MODE_CLASS (mode) == MODE_CC)
23357 return (regno == CC_REGNUM
23358 || (TARGET_HARD_FLOAT
23359 && regno == VFPCC_REGNUM));
23360
23361 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23362 return false;
23363
23364 if (TARGET_THUMB1)
23365 /* For the Thumb we only allow values bigger than SImode in
23366 registers 0 - 6, so that there is always a second low
23367 register available to hold the upper part of the value.
23368 We probably we ought to ensure that the register is the
23369 start of an even numbered register pair. */
23370 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23371
23372 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23373 {
23374 if (mode == SFmode || mode == SImode)
23375 return VFP_REGNO_OK_FOR_SINGLE (regno);
23376
23377 if (mode == DFmode)
23378 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23379
23380 if (mode == HFmode)
23381 return VFP_REGNO_OK_FOR_SINGLE (regno);
23382
23383 /* VFP registers can hold HImode values. */
23384 if (mode == HImode)
23385 return VFP_REGNO_OK_FOR_SINGLE (regno);
23386
23387 if (TARGET_NEON)
23388 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23389 || (VALID_NEON_QREG_MODE (mode)
23390 && NEON_REGNO_OK_FOR_QUAD (regno))
23391 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23392 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23393 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23394 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23395 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23396
23397 return false;
23398 }
23399
23400 if (TARGET_REALLY_IWMMXT)
23401 {
23402 if (IS_IWMMXT_GR_REGNUM (regno))
23403 return mode == SImode;
23404
23405 if (IS_IWMMXT_REGNUM (regno))
23406 return VALID_IWMMXT_REG_MODE (mode);
23407 }
23408
23409 /* We allow almost any value to be stored in the general registers.
23410 Restrict doubleword quantities to even register pairs in ARM state
23411 so that we can use ldrd. Do not allow very large Neon structure
23412 opaque modes in general registers; they would use too many. */
23413 if (regno <= LAST_ARM_REGNUM)
23414 {
23415 if (ARM_NUM_REGS (mode) > 4)
23416 return false;
23417
23418 if (TARGET_THUMB2)
23419 return true;
23420
23421 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23422 }
23423
23424 if (regno == FRAME_POINTER_REGNUM
23425 || regno == ARG_POINTER_REGNUM)
23426 /* We only allow integers in the fake hard registers. */
23427 return GET_MODE_CLASS (mode) == MODE_INT;
23428
23429 return false;
23430 }
23431
23432 /* Implement TARGET_MODES_TIEABLE_P. */
23433
23434 static bool
23435 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23436 {
23437 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23438 return true;
23439
23440 /* We specifically want to allow elements of "structure" modes to
23441 be tieable to the structure. This more general condition allows
23442 other rarer situations too. */
23443 if (TARGET_NEON
23444 && (VALID_NEON_DREG_MODE (mode1)
23445 || VALID_NEON_QREG_MODE (mode1)
23446 || VALID_NEON_STRUCT_MODE (mode1))
23447 && (VALID_NEON_DREG_MODE (mode2)
23448 || VALID_NEON_QREG_MODE (mode2)
23449 || VALID_NEON_STRUCT_MODE (mode2)))
23450 return true;
23451
23452 return false;
23453 }
23454
23455 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23456 not used in arm mode. */
23457
23458 enum reg_class
23459 arm_regno_class (int regno)
23460 {
23461 if (regno == PC_REGNUM)
23462 return NO_REGS;
23463
23464 if (TARGET_THUMB1)
23465 {
23466 if (regno == STACK_POINTER_REGNUM)
23467 return STACK_REG;
23468 if (regno == CC_REGNUM)
23469 return CC_REG;
23470 if (regno < 8)
23471 return LO_REGS;
23472 return HI_REGS;
23473 }
23474
23475 if (TARGET_THUMB2 && regno < 8)
23476 return LO_REGS;
23477
23478 if ( regno <= LAST_ARM_REGNUM
23479 || regno == FRAME_POINTER_REGNUM
23480 || regno == ARG_POINTER_REGNUM)
23481 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23482
23483 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23484 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23485
23486 if (IS_VFP_REGNUM (regno))
23487 {
23488 if (regno <= D7_VFP_REGNUM)
23489 return VFP_D0_D7_REGS;
23490 else if (regno <= LAST_LO_VFP_REGNUM)
23491 return VFP_LO_REGS;
23492 else
23493 return VFP_HI_REGS;
23494 }
23495
23496 if (IS_IWMMXT_REGNUM (regno))
23497 return IWMMXT_REGS;
23498
23499 if (IS_IWMMXT_GR_REGNUM (regno))
23500 return IWMMXT_GR_REGS;
23501
23502 return NO_REGS;
23503 }
23504
23505 /* Handle a special case when computing the offset
23506 of an argument from the frame pointer. */
23507 int
23508 arm_debugger_arg_offset (int value, rtx addr)
23509 {
23510 rtx_insn *insn;
23511
23512 /* We are only interested if dbxout_parms() failed to compute the offset. */
23513 if (value != 0)
23514 return 0;
23515
23516 /* We can only cope with the case where the address is held in a register. */
23517 if (!REG_P (addr))
23518 return 0;
23519
23520 /* If we are using the frame pointer to point at the argument, then
23521 an offset of 0 is correct. */
23522 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23523 return 0;
23524
23525 /* If we are using the stack pointer to point at the
23526 argument, then an offset of 0 is correct. */
23527 /* ??? Check this is consistent with thumb2 frame layout. */
23528 if ((TARGET_THUMB || !frame_pointer_needed)
23529 && REGNO (addr) == SP_REGNUM)
23530 return 0;
23531
23532 /* Oh dear. The argument is pointed to by a register rather
23533 than being held in a register, or being stored at a known
23534 offset from the frame pointer. Since GDB only understands
23535 those two kinds of argument we must translate the address
23536 held in the register into an offset from the frame pointer.
23537 We do this by searching through the insns for the function
23538 looking to see where this register gets its value. If the
23539 register is initialized from the frame pointer plus an offset
23540 then we are in luck and we can continue, otherwise we give up.
23541
23542 This code is exercised by producing debugging information
23543 for a function with arguments like this:
23544
23545 double func (double a, double b, int c, double d) {return d;}
23546
23547 Without this code the stab for parameter 'd' will be set to
23548 an offset of 0 from the frame pointer, rather than 8. */
23549
23550 /* The if() statement says:
23551
23552 If the insn is a normal instruction
23553 and if the insn is setting the value in a register
23554 and if the register being set is the register holding the address of the argument
23555 and if the address is computing by an addition
23556 that involves adding to a register
23557 which is the frame pointer
23558 a constant integer
23559
23560 then... */
23561
23562 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23563 {
23564 if ( NONJUMP_INSN_P (insn)
23565 && GET_CODE (PATTERN (insn)) == SET
23566 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23567 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23568 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23569 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23570 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23571 )
23572 {
23573 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23574
23575 break;
23576 }
23577 }
23578
23579 if (value == 0)
23580 {
23581 debug_rtx (addr);
23582 warning (0, "unable to compute real location of stacked parameter");
23583 value = 8; /* XXX magic hack */
23584 }
23585
23586 return value;
23587 }
23588 \f
23589 /* Implement TARGET_PROMOTED_TYPE. */
23590
23591 static tree
23592 arm_promoted_type (const_tree t)
23593 {
23594 if (SCALAR_FLOAT_TYPE_P (t)
23595 && TYPE_PRECISION (t) == 16
23596 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23597 return float_type_node;
23598 return NULL_TREE;
23599 }
23600
23601 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23602 This simply adds HFmode as a supported mode; even though we don't
23603 implement arithmetic on this type directly, it's supported by
23604 optabs conversions, much the way the double-word arithmetic is
23605 special-cased in the default hook. */
23606
23607 static bool
23608 arm_scalar_mode_supported_p (scalar_mode mode)
23609 {
23610 if (mode == HFmode)
23611 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23612 else if (ALL_FIXED_POINT_MODE_P (mode))
23613 return true;
23614 else
23615 return default_scalar_mode_supported_p (mode);
23616 }
23617
23618 /* Set the value of FLT_EVAL_METHOD.
23619 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23620
23621 0: evaluate all operations and constants, whose semantic type has at
23622 most the range and precision of type float, to the range and
23623 precision of float; evaluate all other operations and constants to
23624 the range and precision of the semantic type;
23625
23626 N, where _FloatN is a supported interchange floating type
23627 evaluate all operations and constants, whose semantic type has at
23628 most the range and precision of _FloatN type, to the range and
23629 precision of the _FloatN type; evaluate all other operations and
23630 constants to the range and precision of the semantic type;
23631
23632 If we have the ARMv8.2-A extensions then we support _Float16 in native
23633 precision, so we should set this to 16. Otherwise, we support the type,
23634 but want to evaluate expressions in float precision, so set this to
23635 0. */
23636
23637 static enum flt_eval_method
23638 arm_excess_precision (enum excess_precision_type type)
23639 {
23640 switch (type)
23641 {
23642 case EXCESS_PRECISION_TYPE_FAST:
23643 case EXCESS_PRECISION_TYPE_STANDARD:
23644 /* We can calculate either in 16-bit range and precision or
23645 32-bit range and precision. Make that decision based on whether
23646 we have native support for the ARMv8.2-A 16-bit floating-point
23647 instructions or not. */
23648 return (TARGET_VFP_FP16INST
23649 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23650 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23651 case EXCESS_PRECISION_TYPE_IMPLICIT:
23652 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23653 default:
23654 gcc_unreachable ();
23655 }
23656 return FLT_EVAL_METHOD_UNPREDICTABLE;
23657 }
23658
23659
23660 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23661 _Float16 if we are using anything other than ieee format for 16-bit
23662 floating point. Otherwise, punt to the default implementation. */
23663 static opt_scalar_float_mode
23664 arm_floatn_mode (int n, bool extended)
23665 {
23666 if (!extended && n == 16)
23667 {
23668 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23669 return HFmode;
23670 return opt_scalar_float_mode ();
23671 }
23672
23673 return default_floatn_mode (n, extended);
23674 }
23675
23676
23677 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23678 not to early-clobber SRC registers in the process.
23679
23680 We assume that the operands described by SRC and DEST represent a
23681 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23682 number of components into which the copy has been decomposed. */
23683 void
23684 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23685 {
23686 unsigned int i;
23687
23688 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23689 || REGNO (operands[0]) < REGNO (operands[1]))
23690 {
23691 for (i = 0; i < count; i++)
23692 {
23693 operands[2 * i] = dest[i];
23694 operands[2 * i + 1] = src[i];
23695 }
23696 }
23697 else
23698 {
23699 for (i = 0; i < count; i++)
23700 {
23701 operands[2 * i] = dest[count - i - 1];
23702 operands[2 * i + 1] = src[count - i - 1];
23703 }
23704 }
23705 }
23706
23707 /* Split operands into moves from op[1] + op[2] into op[0]. */
23708
23709 void
23710 neon_split_vcombine (rtx operands[3])
23711 {
23712 unsigned int dest = REGNO (operands[0]);
23713 unsigned int src1 = REGNO (operands[1]);
23714 unsigned int src2 = REGNO (operands[2]);
23715 machine_mode halfmode = GET_MODE (operands[1]);
23716 unsigned int halfregs = HARD_REGNO_NREGS (src1, halfmode);
23717 rtx destlo, desthi;
23718
23719 if (src1 == dest && src2 == dest + halfregs)
23720 {
23721 /* No-op move. Can't split to nothing; emit something. */
23722 emit_note (NOTE_INSN_DELETED);
23723 return;
23724 }
23725
23726 /* Preserve register attributes for variable tracking. */
23727 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23728 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23729 GET_MODE_SIZE (halfmode));
23730
23731 /* Special case of reversed high/low parts. Use VSWP. */
23732 if (src2 == dest && src1 == dest + halfregs)
23733 {
23734 rtx x = gen_rtx_SET (destlo, operands[1]);
23735 rtx y = gen_rtx_SET (desthi, operands[2]);
23736 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
23737 return;
23738 }
23739
23740 if (!reg_overlap_mentioned_p (operands[2], destlo))
23741 {
23742 /* Try to avoid unnecessary moves if part of the result
23743 is in the right place already. */
23744 if (src1 != dest)
23745 emit_move_insn (destlo, operands[1]);
23746 if (src2 != dest + halfregs)
23747 emit_move_insn (desthi, operands[2]);
23748 }
23749 else
23750 {
23751 if (src2 != dest + halfregs)
23752 emit_move_insn (desthi, operands[2]);
23753 if (src1 != dest)
23754 emit_move_insn (destlo, operands[1]);
23755 }
23756 }
23757 \f
23758 /* Return the number (counting from 0) of
23759 the least significant set bit in MASK. */
23760
23761 inline static int
23762 number_of_first_bit_set (unsigned mask)
23763 {
23764 return ctz_hwi (mask);
23765 }
23766
23767 /* Like emit_multi_reg_push, but allowing for a different set of
23768 registers to be described as saved. MASK is the set of registers
23769 to be saved; REAL_REGS is the set of registers to be described as
23770 saved. If REAL_REGS is 0, only describe the stack adjustment. */
23771
23772 static rtx_insn *
23773 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
23774 {
23775 unsigned long regno;
23776 rtx par[10], tmp, reg;
23777 rtx_insn *insn;
23778 int i, j;
23779
23780 /* Build the parallel of the registers actually being stored. */
23781 for (i = 0; mask; ++i, mask &= mask - 1)
23782 {
23783 regno = ctz_hwi (mask);
23784 reg = gen_rtx_REG (SImode, regno);
23785
23786 if (i == 0)
23787 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
23788 else
23789 tmp = gen_rtx_USE (VOIDmode, reg);
23790
23791 par[i] = tmp;
23792 }
23793
23794 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23795 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
23796 tmp = gen_frame_mem (BLKmode, tmp);
23797 tmp = gen_rtx_SET (tmp, par[0]);
23798 par[0] = tmp;
23799
23800 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
23801 insn = emit_insn (tmp);
23802
23803 /* Always build the stack adjustment note for unwind info. */
23804 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
23805 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
23806 par[0] = tmp;
23807
23808 /* Build the parallel of the registers recorded as saved for unwind. */
23809 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
23810 {
23811 regno = ctz_hwi (real_regs);
23812 reg = gen_rtx_REG (SImode, regno);
23813
23814 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
23815 tmp = gen_frame_mem (SImode, tmp);
23816 tmp = gen_rtx_SET (tmp, reg);
23817 RTX_FRAME_RELATED_P (tmp) = 1;
23818 par[j + 1] = tmp;
23819 }
23820
23821 if (j == 0)
23822 tmp = par[0];
23823 else
23824 {
23825 RTX_FRAME_RELATED_P (par[0]) = 1;
23826 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
23827 }
23828
23829 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
23830
23831 return insn;
23832 }
23833
23834 /* Emit code to push or pop registers to or from the stack. F is the
23835 assembly file. MASK is the registers to pop. */
23836 static void
23837 thumb_pop (FILE *f, unsigned long mask)
23838 {
23839 int regno;
23840 int lo_mask = mask & 0xFF;
23841
23842 gcc_assert (mask);
23843
23844 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
23845 {
23846 /* Special case. Do not generate a POP PC statement here, do it in
23847 thumb_exit() */
23848 thumb_exit (f, -1);
23849 return;
23850 }
23851
23852 fprintf (f, "\tpop\t{");
23853
23854 /* Look at the low registers first. */
23855 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
23856 {
23857 if (lo_mask & 1)
23858 {
23859 asm_fprintf (f, "%r", regno);
23860
23861 if ((lo_mask & ~1) != 0)
23862 fprintf (f, ", ");
23863 }
23864 }
23865
23866 if (mask & (1 << PC_REGNUM))
23867 {
23868 /* Catch popping the PC. */
23869 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
23870 || IS_CMSE_ENTRY (arm_current_func_type ()))
23871 {
23872 /* The PC is never poped directly, instead
23873 it is popped into r3 and then BX is used. */
23874 fprintf (f, "}\n");
23875
23876 thumb_exit (f, -1);
23877
23878 return;
23879 }
23880 else
23881 {
23882 if (mask & 0xFF)
23883 fprintf (f, ", ");
23884
23885 asm_fprintf (f, "%r", PC_REGNUM);
23886 }
23887 }
23888
23889 fprintf (f, "}\n");
23890 }
23891
23892 /* Generate code to return from a thumb function.
23893 If 'reg_containing_return_addr' is -1, then the return address is
23894 actually on the stack, at the stack pointer. */
23895 static void
23896 thumb_exit (FILE *f, int reg_containing_return_addr)
23897 {
23898 unsigned regs_available_for_popping;
23899 unsigned regs_to_pop;
23900 int pops_needed;
23901 unsigned available;
23902 unsigned required;
23903 machine_mode mode;
23904 int size;
23905 int restore_a4 = FALSE;
23906
23907 /* Compute the registers we need to pop. */
23908 regs_to_pop = 0;
23909 pops_needed = 0;
23910
23911 if (reg_containing_return_addr == -1)
23912 {
23913 regs_to_pop |= 1 << LR_REGNUM;
23914 ++pops_needed;
23915 }
23916
23917 if (TARGET_BACKTRACE)
23918 {
23919 /* Restore the (ARM) frame pointer and stack pointer. */
23920 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
23921 pops_needed += 2;
23922 }
23923
23924 /* If there is nothing to pop then just emit the BX instruction and
23925 return. */
23926 if (pops_needed == 0)
23927 {
23928 if (crtl->calls_eh_return)
23929 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
23930
23931 if (IS_CMSE_ENTRY (arm_current_func_type ()))
23932 {
23933 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
23934 reg_containing_return_addr);
23935 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
23936 }
23937 else
23938 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
23939 return;
23940 }
23941 /* Otherwise if we are not supporting interworking and we have not created
23942 a backtrace structure and the function was not entered in ARM mode then
23943 just pop the return address straight into the PC. */
23944 else if (!TARGET_INTERWORK
23945 && !TARGET_BACKTRACE
23946 && !is_called_in_ARM_mode (current_function_decl)
23947 && !crtl->calls_eh_return
23948 && !IS_CMSE_ENTRY (arm_current_func_type ()))
23949 {
23950 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
23951 return;
23952 }
23953
23954 /* Find out how many of the (return) argument registers we can corrupt. */
23955 regs_available_for_popping = 0;
23956
23957 /* If returning via __builtin_eh_return, the bottom three registers
23958 all contain information needed for the return. */
23959 if (crtl->calls_eh_return)
23960 size = 12;
23961 else
23962 {
23963 /* If we can deduce the registers used from the function's
23964 return value. This is more reliable that examining
23965 df_regs_ever_live_p () because that will be set if the register is
23966 ever used in the function, not just if the register is used
23967 to hold a return value. */
23968
23969 if (crtl->return_rtx != 0)
23970 mode = GET_MODE (crtl->return_rtx);
23971 else
23972 mode = DECL_MODE (DECL_RESULT (current_function_decl));
23973
23974 size = GET_MODE_SIZE (mode);
23975
23976 if (size == 0)
23977 {
23978 /* In a void function we can use any argument register.
23979 In a function that returns a structure on the stack
23980 we can use the second and third argument registers. */
23981 if (mode == VOIDmode)
23982 regs_available_for_popping =
23983 (1 << ARG_REGISTER (1))
23984 | (1 << ARG_REGISTER (2))
23985 | (1 << ARG_REGISTER (3));
23986 else
23987 regs_available_for_popping =
23988 (1 << ARG_REGISTER (2))
23989 | (1 << ARG_REGISTER (3));
23990 }
23991 else if (size <= 4)
23992 regs_available_for_popping =
23993 (1 << ARG_REGISTER (2))
23994 | (1 << ARG_REGISTER (3));
23995 else if (size <= 8)
23996 regs_available_for_popping =
23997 (1 << ARG_REGISTER (3));
23998 }
23999
24000 /* Match registers to be popped with registers into which we pop them. */
24001 for (available = regs_available_for_popping,
24002 required = regs_to_pop;
24003 required != 0 && available != 0;
24004 available &= ~(available & - available),
24005 required &= ~(required & - required))
24006 -- pops_needed;
24007
24008 /* If we have any popping registers left over, remove them. */
24009 if (available > 0)
24010 regs_available_for_popping &= ~available;
24011
24012 /* Otherwise if we need another popping register we can use
24013 the fourth argument register. */
24014 else if (pops_needed)
24015 {
24016 /* If we have not found any free argument registers and
24017 reg a4 contains the return address, we must move it. */
24018 if (regs_available_for_popping == 0
24019 && reg_containing_return_addr == LAST_ARG_REGNUM)
24020 {
24021 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24022 reg_containing_return_addr = LR_REGNUM;
24023 }
24024 else if (size > 12)
24025 {
24026 /* Register a4 is being used to hold part of the return value,
24027 but we have dire need of a free, low register. */
24028 restore_a4 = TRUE;
24029
24030 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24031 }
24032
24033 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24034 {
24035 /* The fourth argument register is available. */
24036 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24037
24038 --pops_needed;
24039 }
24040 }
24041
24042 /* Pop as many registers as we can. */
24043 thumb_pop (f, regs_available_for_popping);
24044
24045 /* Process the registers we popped. */
24046 if (reg_containing_return_addr == -1)
24047 {
24048 /* The return address was popped into the lowest numbered register. */
24049 regs_to_pop &= ~(1 << LR_REGNUM);
24050
24051 reg_containing_return_addr =
24052 number_of_first_bit_set (regs_available_for_popping);
24053
24054 /* Remove this register for the mask of available registers, so that
24055 the return address will not be corrupted by further pops. */
24056 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24057 }
24058
24059 /* If we popped other registers then handle them here. */
24060 if (regs_available_for_popping)
24061 {
24062 int frame_pointer;
24063
24064 /* Work out which register currently contains the frame pointer. */
24065 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24066
24067 /* Move it into the correct place. */
24068 asm_fprintf (f, "\tmov\t%r, %r\n",
24069 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24070
24071 /* (Temporarily) remove it from the mask of popped registers. */
24072 regs_available_for_popping &= ~(1 << frame_pointer);
24073 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24074
24075 if (regs_available_for_popping)
24076 {
24077 int stack_pointer;
24078
24079 /* We popped the stack pointer as well,
24080 find the register that contains it. */
24081 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24082
24083 /* Move it into the stack register. */
24084 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24085
24086 /* At this point we have popped all necessary registers, so
24087 do not worry about restoring regs_available_for_popping
24088 to its correct value:
24089
24090 assert (pops_needed == 0)
24091 assert (regs_available_for_popping == (1 << frame_pointer))
24092 assert (regs_to_pop == (1 << STACK_POINTER)) */
24093 }
24094 else
24095 {
24096 /* Since we have just move the popped value into the frame
24097 pointer, the popping register is available for reuse, and
24098 we know that we still have the stack pointer left to pop. */
24099 regs_available_for_popping |= (1 << frame_pointer);
24100 }
24101 }
24102
24103 /* If we still have registers left on the stack, but we no longer have
24104 any registers into which we can pop them, then we must move the return
24105 address into the link register and make available the register that
24106 contained it. */
24107 if (regs_available_for_popping == 0 && pops_needed > 0)
24108 {
24109 regs_available_for_popping |= 1 << reg_containing_return_addr;
24110
24111 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24112 reg_containing_return_addr);
24113
24114 reg_containing_return_addr = LR_REGNUM;
24115 }
24116
24117 /* If we have registers left on the stack then pop some more.
24118 We know that at most we will want to pop FP and SP. */
24119 if (pops_needed > 0)
24120 {
24121 int popped_into;
24122 int move_to;
24123
24124 thumb_pop (f, regs_available_for_popping);
24125
24126 /* We have popped either FP or SP.
24127 Move whichever one it is into the correct register. */
24128 popped_into = number_of_first_bit_set (regs_available_for_popping);
24129 move_to = number_of_first_bit_set (regs_to_pop);
24130
24131 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24132 --pops_needed;
24133 }
24134
24135 /* If we still have not popped everything then we must have only
24136 had one register available to us and we are now popping the SP. */
24137 if (pops_needed > 0)
24138 {
24139 int popped_into;
24140
24141 thumb_pop (f, regs_available_for_popping);
24142
24143 popped_into = number_of_first_bit_set (regs_available_for_popping);
24144
24145 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24146 /*
24147 assert (regs_to_pop == (1 << STACK_POINTER))
24148 assert (pops_needed == 1)
24149 */
24150 }
24151
24152 /* If necessary restore the a4 register. */
24153 if (restore_a4)
24154 {
24155 if (reg_containing_return_addr != LR_REGNUM)
24156 {
24157 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24158 reg_containing_return_addr = LR_REGNUM;
24159 }
24160
24161 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24162 }
24163
24164 if (crtl->calls_eh_return)
24165 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24166
24167 /* Return to caller. */
24168 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24169 {
24170 /* This is for the cases where LR is not being used to contain the return
24171 address. It may therefore contain information that we might not want
24172 to leak, hence it must be cleared. The value in R0 will never be a
24173 secret at this point, so it is safe to use it, see the clearing code
24174 in 'cmse_nonsecure_entry_clear_before_return'. */
24175 if (reg_containing_return_addr != LR_REGNUM)
24176 asm_fprintf (f, "\tmov\tlr, r0\n");
24177
24178 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24179 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24180 }
24181 else
24182 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24183 }
24184 \f
24185 /* Scan INSN just before assembler is output for it.
24186 For Thumb-1, we track the status of the condition codes; this
24187 information is used in the cbranchsi4_insn pattern. */
24188 void
24189 thumb1_final_prescan_insn (rtx_insn *insn)
24190 {
24191 if (flag_print_asm_name)
24192 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24193 INSN_ADDRESSES (INSN_UID (insn)));
24194 /* Don't overwrite the previous setter when we get to a cbranch. */
24195 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24196 {
24197 enum attr_conds conds;
24198
24199 if (cfun->machine->thumb1_cc_insn)
24200 {
24201 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24202 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24203 CC_STATUS_INIT;
24204 }
24205 conds = get_attr_conds (insn);
24206 if (conds == CONDS_SET)
24207 {
24208 rtx set = single_set (insn);
24209 cfun->machine->thumb1_cc_insn = insn;
24210 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24211 cfun->machine->thumb1_cc_op1 = const0_rtx;
24212 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24213 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24214 {
24215 rtx src1 = XEXP (SET_SRC (set), 1);
24216 if (src1 == const0_rtx)
24217 cfun->machine->thumb1_cc_mode = CCmode;
24218 }
24219 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24220 {
24221 /* Record the src register operand instead of dest because
24222 cprop_hardreg pass propagates src. */
24223 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24224 }
24225 }
24226 else if (conds != CONDS_NOCOND)
24227 cfun->machine->thumb1_cc_insn = NULL_RTX;
24228 }
24229
24230 /* Check if unexpected far jump is used. */
24231 if (cfun->machine->lr_save_eliminated
24232 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24233 internal_error("Unexpected thumb1 far jump");
24234 }
24235
24236 int
24237 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24238 {
24239 unsigned HOST_WIDE_INT mask = 0xff;
24240 int i;
24241
24242 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24243 if (val == 0) /* XXX */
24244 return 0;
24245
24246 for (i = 0; i < 25; i++)
24247 if ((val & (mask << i)) == val)
24248 return 1;
24249
24250 return 0;
24251 }
24252
24253 /* Returns nonzero if the current function contains,
24254 or might contain a far jump. */
24255 static int
24256 thumb_far_jump_used_p (void)
24257 {
24258 rtx_insn *insn;
24259 bool far_jump = false;
24260 unsigned int func_size = 0;
24261
24262 /* If we have already decided that far jumps may be used,
24263 do not bother checking again, and always return true even if
24264 it turns out that they are not being used. Once we have made
24265 the decision that far jumps are present (and that hence the link
24266 register will be pushed onto the stack) we cannot go back on it. */
24267 if (cfun->machine->far_jump_used)
24268 return 1;
24269
24270 /* If this function is not being called from the prologue/epilogue
24271 generation code then it must be being called from the
24272 INITIAL_ELIMINATION_OFFSET macro. */
24273 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24274 {
24275 /* In this case we know that we are being asked about the elimination
24276 of the arg pointer register. If that register is not being used,
24277 then there are no arguments on the stack, and we do not have to
24278 worry that a far jump might force the prologue to push the link
24279 register, changing the stack offsets. In this case we can just
24280 return false, since the presence of far jumps in the function will
24281 not affect stack offsets.
24282
24283 If the arg pointer is live (or if it was live, but has now been
24284 eliminated and so set to dead) then we do have to test to see if
24285 the function might contain a far jump. This test can lead to some
24286 false negatives, since before reload is completed, then length of
24287 branch instructions is not known, so gcc defaults to returning their
24288 longest length, which in turn sets the far jump attribute to true.
24289
24290 A false negative will not result in bad code being generated, but it
24291 will result in a needless push and pop of the link register. We
24292 hope that this does not occur too often.
24293
24294 If we need doubleword stack alignment this could affect the other
24295 elimination offsets so we can't risk getting it wrong. */
24296 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24297 cfun->machine->arg_pointer_live = 1;
24298 else if (!cfun->machine->arg_pointer_live)
24299 return 0;
24300 }
24301
24302 /* We should not change far_jump_used during or after reload, as there is
24303 no chance to change stack frame layout. */
24304 if (reload_in_progress || reload_completed)
24305 return 0;
24306
24307 /* Check to see if the function contains a branch
24308 insn with the far jump attribute set. */
24309 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24310 {
24311 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24312 {
24313 far_jump = true;
24314 }
24315 func_size += get_attr_length (insn);
24316 }
24317
24318 /* Attribute far_jump will always be true for thumb1 before
24319 shorten_branch pass. So checking far_jump attribute before
24320 shorten_branch isn't much useful.
24321
24322 Following heuristic tries to estimate more accurately if a far jump
24323 may finally be used. The heuristic is very conservative as there is
24324 no chance to roll-back the decision of not to use far jump.
24325
24326 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24327 2-byte insn is associated with a 4 byte constant pool. Using
24328 function size 2048/3 as the threshold is conservative enough. */
24329 if (far_jump)
24330 {
24331 if ((func_size * 3) >= 2048)
24332 {
24333 /* Record the fact that we have decided that
24334 the function does use far jumps. */
24335 cfun->machine->far_jump_used = 1;
24336 return 1;
24337 }
24338 }
24339
24340 return 0;
24341 }
24342
24343 /* Return nonzero if FUNC must be entered in ARM mode. */
24344 static bool
24345 is_called_in_ARM_mode (tree func)
24346 {
24347 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24348
24349 /* Ignore the problem about functions whose address is taken. */
24350 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24351 return true;
24352
24353 #ifdef ARM_PE
24354 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24355 #else
24356 return false;
24357 #endif
24358 }
24359
24360 /* Given the stack offsets and register mask in OFFSETS, decide how
24361 many additional registers to push instead of subtracting a constant
24362 from SP. For epilogues the principle is the same except we use pop.
24363 FOR_PROLOGUE indicates which we're generating. */
24364 static int
24365 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24366 {
24367 HOST_WIDE_INT amount;
24368 unsigned long live_regs_mask = offsets->saved_regs_mask;
24369 /* Extract a mask of the ones we can give to the Thumb's push/pop
24370 instruction. */
24371 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24372 /* Then count how many other high registers will need to be pushed. */
24373 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24374 int n_free, reg_base, size;
24375
24376 if (!for_prologue && frame_pointer_needed)
24377 amount = offsets->locals_base - offsets->saved_regs;
24378 else
24379 amount = offsets->outgoing_args - offsets->saved_regs;
24380
24381 /* If the stack frame size is 512 exactly, we can save one load
24382 instruction, which should make this a win even when optimizing
24383 for speed. */
24384 if (!optimize_size && amount != 512)
24385 return 0;
24386
24387 /* Can't do this if there are high registers to push. */
24388 if (high_regs_pushed != 0)
24389 return 0;
24390
24391 /* Shouldn't do it in the prologue if no registers would normally
24392 be pushed at all. In the epilogue, also allow it if we'll have
24393 a pop insn for the PC. */
24394 if (l_mask == 0
24395 && (for_prologue
24396 || TARGET_BACKTRACE
24397 || (live_regs_mask & 1 << LR_REGNUM) == 0
24398 || TARGET_INTERWORK
24399 || crtl->args.pretend_args_size != 0))
24400 return 0;
24401
24402 /* Don't do this if thumb_expand_prologue wants to emit instructions
24403 between the push and the stack frame allocation. */
24404 if (for_prologue
24405 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24406 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24407 return 0;
24408
24409 reg_base = 0;
24410 n_free = 0;
24411 if (!for_prologue)
24412 {
24413 size = arm_size_return_regs ();
24414 reg_base = ARM_NUM_INTS (size);
24415 live_regs_mask >>= reg_base;
24416 }
24417
24418 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24419 && (for_prologue || call_used_regs[reg_base + n_free]))
24420 {
24421 live_regs_mask >>= 1;
24422 n_free++;
24423 }
24424
24425 if (n_free == 0)
24426 return 0;
24427 gcc_assert (amount / 4 * 4 == amount);
24428
24429 if (amount >= 512 && (amount - n_free * 4) < 512)
24430 return (amount - 508) / 4;
24431 if (amount <= n_free * 4)
24432 return amount / 4;
24433 return 0;
24434 }
24435
24436 /* The bits which aren't usefully expanded as rtl. */
24437 const char *
24438 thumb1_unexpanded_epilogue (void)
24439 {
24440 arm_stack_offsets *offsets;
24441 int regno;
24442 unsigned long live_regs_mask = 0;
24443 int high_regs_pushed = 0;
24444 int extra_pop;
24445 int had_to_push_lr;
24446 int size;
24447
24448 if (cfun->machine->return_used_this_function != 0)
24449 return "";
24450
24451 if (IS_NAKED (arm_current_func_type ()))
24452 return "";
24453
24454 offsets = arm_get_frame_offsets ();
24455 live_regs_mask = offsets->saved_regs_mask;
24456 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24457
24458 /* If we can deduce the registers used from the function's return value.
24459 This is more reliable that examining df_regs_ever_live_p () because that
24460 will be set if the register is ever used in the function, not just if
24461 the register is used to hold a return value. */
24462 size = arm_size_return_regs ();
24463
24464 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24465 if (extra_pop > 0)
24466 {
24467 unsigned long extra_mask = (1 << extra_pop) - 1;
24468 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24469 }
24470
24471 /* The prolog may have pushed some high registers to use as
24472 work registers. e.g. the testsuite file:
24473 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24474 compiles to produce:
24475 push {r4, r5, r6, r7, lr}
24476 mov r7, r9
24477 mov r6, r8
24478 push {r6, r7}
24479 as part of the prolog. We have to undo that pushing here. */
24480
24481 if (high_regs_pushed)
24482 {
24483 unsigned long mask = live_regs_mask & 0xff;
24484 int next_hi_reg;
24485
24486 /* The available low registers depend on the size of the value we are
24487 returning. */
24488 if (size <= 12)
24489 mask |= 1 << 3;
24490 if (size <= 8)
24491 mask |= 1 << 2;
24492
24493 if (mask == 0)
24494 /* Oh dear! We have no low registers into which we can pop
24495 high registers! */
24496 internal_error
24497 ("no low registers available for popping high registers");
24498
24499 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24500 if (live_regs_mask & (1 << next_hi_reg))
24501 break;
24502
24503 while (high_regs_pushed)
24504 {
24505 /* Find lo register(s) into which the high register(s) can
24506 be popped. */
24507 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24508 {
24509 if (mask & (1 << regno))
24510 high_regs_pushed--;
24511 if (high_regs_pushed == 0)
24512 break;
24513 }
24514
24515 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24516
24517 /* Pop the values into the low register(s). */
24518 thumb_pop (asm_out_file, mask);
24519
24520 /* Move the value(s) into the high registers. */
24521 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24522 {
24523 if (mask & (1 << regno))
24524 {
24525 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24526 regno);
24527
24528 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24529 if (live_regs_mask & (1 << next_hi_reg))
24530 break;
24531 }
24532 }
24533 }
24534 live_regs_mask &= ~0x0f00;
24535 }
24536
24537 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24538 live_regs_mask &= 0xff;
24539
24540 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24541 {
24542 /* Pop the return address into the PC. */
24543 if (had_to_push_lr)
24544 live_regs_mask |= 1 << PC_REGNUM;
24545
24546 /* Either no argument registers were pushed or a backtrace
24547 structure was created which includes an adjusted stack
24548 pointer, so just pop everything. */
24549 if (live_regs_mask)
24550 thumb_pop (asm_out_file, live_regs_mask);
24551
24552 /* We have either just popped the return address into the
24553 PC or it is was kept in LR for the entire function.
24554 Note that thumb_pop has already called thumb_exit if the
24555 PC was in the list. */
24556 if (!had_to_push_lr)
24557 thumb_exit (asm_out_file, LR_REGNUM);
24558 }
24559 else
24560 {
24561 /* Pop everything but the return address. */
24562 if (live_regs_mask)
24563 thumb_pop (asm_out_file, live_regs_mask);
24564
24565 if (had_to_push_lr)
24566 {
24567 if (size > 12)
24568 {
24569 /* We have no free low regs, so save one. */
24570 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24571 LAST_ARG_REGNUM);
24572 }
24573
24574 /* Get the return address into a temporary register. */
24575 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24576
24577 if (size > 12)
24578 {
24579 /* Move the return address to lr. */
24580 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24581 LAST_ARG_REGNUM);
24582 /* Restore the low register. */
24583 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24584 IP_REGNUM);
24585 regno = LR_REGNUM;
24586 }
24587 else
24588 regno = LAST_ARG_REGNUM;
24589 }
24590 else
24591 regno = LR_REGNUM;
24592
24593 /* Remove the argument registers that were pushed onto the stack. */
24594 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24595 SP_REGNUM, SP_REGNUM,
24596 crtl->args.pretend_args_size);
24597
24598 thumb_exit (asm_out_file, regno);
24599 }
24600
24601 return "";
24602 }
24603
24604 /* Functions to save and restore machine-specific function data. */
24605 static struct machine_function *
24606 arm_init_machine_status (void)
24607 {
24608 struct machine_function *machine;
24609 machine = ggc_cleared_alloc<machine_function> ();
24610
24611 #if ARM_FT_UNKNOWN != 0
24612 machine->func_type = ARM_FT_UNKNOWN;
24613 #endif
24614 return machine;
24615 }
24616
24617 /* Return an RTX indicating where the return address to the
24618 calling function can be found. */
24619 rtx
24620 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24621 {
24622 if (count != 0)
24623 return NULL_RTX;
24624
24625 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24626 }
24627
24628 /* Do anything needed before RTL is emitted for each function. */
24629 void
24630 arm_init_expanders (void)
24631 {
24632 /* Arrange to initialize and mark the machine per-function status. */
24633 init_machine_status = arm_init_machine_status;
24634
24635 /* This is to stop the combine pass optimizing away the alignment
24636 adjustment of va_arg. */
24637 /* ??? It is claimed that this should not be necessary. */
24638 if (cfun)
24639 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24640 }
24641
24642 /* Check that FUNC is called with a different mode. */
24643
24644 bool
24645 arm_change_mode_p (tree func)
24646 {
24647 if (TREE_CODE (func) != FUNCTION_DECL)
24648 return false;
24649
24650 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24651
24652 if (!callee_tree)
24653 callee_tree = target_option_default_node;
24654
24655 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24656 int flags = callee_opts->x_target_flags;
24657
24658 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24659 }
24660
24661 /* Like arm_compute_initial_elimination offset. Simpler because there
24662 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24663 to point at the base of the local variables after static stack
24664 space for a function has been allocated. */
24665
24666 HOST_WIDE_INT
24667 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24668 {
24669 arm_stack_offsets *offsets;
24670
24671 offsets = arm_get_frame_offsets ();
24672
24673 switch (from)
24674 {
24675 case ARG_POINTER_REGNUM:
24676 switch (to)
24677 {
24678 case STACK_POINTER_REGNUM:
24679 return offsets->outgoing_args - offsets->saved_args;
24680
24681 case FRAME_POINTER_REGNUM:
24682 return offsets->soft_frame - offsets->saved_args;
24683
24684 case ARM_HARD_FRAME_POINTER_REGNUM:
24685 return offsets->saved_regs - offsets->saved_args;
24686
24687 case THUMB_HARD_FRAME_POINTER_REGNUM:
24688 return offsets->locals_base - offsets->saved_args;
24689
24690 default:
24691 gcc_unreachable ();
24692 }
24693 break;
24694
24695 case FRAME_POINTER_REGNUM:
24696 switch (to)
24697 {
24698 case STACK_POINTER_REGNUM:
24699 return offsets->outgoing_args - offsets->soft_frame;
24700
24701 case ARM_HARD_FRAME_POINTER_REGNUM:
24702 return offsets->saved_regs - offsets->soft_frame;
24703
24704 case THUMB_HARD_FRAME_POINTER_REGNUM:
24705 return offsets->locals_base - offsets->soft_frame;
24706
24707 default:
24708 gcc_unreachable ();
24709 }
24710 break;
24711
24712 default:
24713 gcc_unreachable ();
24714 }
24715 }
24716
24717 /* Generate the function's prologue. */
24718
24719 void
24720 thumb1_expand_prologue (void)
24721 {
24722 rtx_insn *insn;
24723
24724 HOST_WIDE_INT amount;
24725 HOST_WIDE_INT size;
24726 arm_stack_offsets *offsets;
24727 unsigned long func_type;
24728 int regno;
24729 unsigned long live_regs_mask;
24730 unsigned long l_mask;
24731 unsigned high_regs_pushed = 0;
24732 bool lr_needs_saving;
24733
24734 func_type = arm_current_func_type ();
24735
24736 /* Naked functions don't have prologues. */
24737 if (IS_NAKED (func_type))
24738 {
24739 if (flag_stack_usage_info)
24740 current_function_static_stack_size = 0;
24741 return;
24742 }
24743
24744 if (IS_INTERRUPT (func_type))
24745 {
24746 error ("interrupt Service Routines cannot be coded in Thumb mode");
24747 return;
24748 }
24749
24750 if (is_called_in_ARM_mode (current_function_decl))
24751 emit_insn (gen_prologue_thumb1_interwork ());
24752
24753 offsets = arm_get_frame_offsets ();
24754 live_regs_mask = offsets->saved_regs_mask;
24755 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
24756
24757 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
24758 l_mask = live_regs_mask & 0x40ff;
24759 /* Then count how many other high registers will need to be pushed. */
24760 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24761
24762 if (crtl->args.pretend_args_size)
24763 {
24764 rtx x = GEN_INT (-crtl->args.pretend_args_size);
24765
24766 if (cfun->machine->uses_anonymous_args)
24767 {
24768 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
24769 unsigned long mask;
24770
24771 mask = 1ul << (LAST_ARG_REGNUM + 1);
24772 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
24773
24774 insn = thumb1_emit_multi_reg_push (mask, 0);
24775 }
24776 else
24777 {
24778 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24779 stack_pointer_rtx, x));
24780 }
24781 RTX_FRAME_RELATED_P (insn) = 1;
24782 }
24783
24784 if (TARGET_BACKTRACE)
24785 {
24786 HOST_WIDE_INT offset = 0;
24787 unsigned work_register;
24788 rtx work_reg, x, arm_hfp_rtx;
24789
24790 /* We have been asked to create a stack backtrace structure.
24791 The code looks like this:
24792
24793 0 .align 2
24794 0 func:
24795 0 sub SP, #16 Reserve space for 4 registers.
24796 2 push {R7} Push low registers.
24797 4 add R7, SP, #20 Get the stack pointer before the push.
24798 6 str R7, [SP, #8] Store the stack pointer
24799 (before reserving the space).
24800 8 mov R7, PC Get hold of the start of this code + 12.
24801 10 str R7, [SP, #16] Store it.
24802 12 mov R7, FP Get hold of the current frame pointer.
24803 14 str R7, [SP, #4] Store it.
24804 16 mov R7, LR Get hold of the current return address.
24805 18 str R7, [SP, #12] Store it.
24806 20 add R7, SP, #16 Point at the start of the
24807 backtrace structure.
24808 22 mov FP, R7 Put this value into the frame pointer. */
24809
24810 work_register = thumb_find_work_register (live_regs_mask);
24811 work_reg = gen_rtx_REG (SImode, work_register);
24812 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
24813
24814 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
24815 stack_pointer_rtx, GEN_INT (-16)));
24816 RTX_FRAME_RELATED_P (insn) = 1;
24817
24818 if (l_mask)
24819 {
24820 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
24821 RTX_FRAME_RELATED_P (insn) = 1;
24822 lr_needs_saving = false;
24823
24824 offset = bit_count (l_mask) * UNITS_PER_WORD;
24825 }
24826
24827 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
24828 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24829
24830 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
24831 x = gen_frame_mem (SImode, x);
24832 emit_move_insn (x, work_reg);
24833
24834 /* Make sure that the instruction fetching the PC is in the right place
24835 to calculate "start of backtrace creation code + 12". */
24836 /* ??? The stores using the common WORK_REG ought to be enough to
24837 prevent the scheduler from doing anything weird. Failing that
24838 we could always move all of the following into an UNSPEC_VOLATILE. */
24839 if (l_mask)
24840 {
24841 x = gen_rtx_REG (SImode, PC_REGNUM);
24842 emit_move_insn (work_reg, x);
24843
24844 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24845 x = gen_frame_mem (SImode, x);
24846 emit_move_insn (x, work_reg);
24847
24848 emit_move_insn (work_reg, arm_hfp_rtx);
24849
24850 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24851 x = gen_frame_mem (SImode, x);
24852 emit_move_insn (x, work_reg);
24853 }
24854 else
24855 {
24856 emit_move_insn (work_reg, arm_hfp_rtx);
24857
24858 x = plus_constant (Pmode, stack_pointer_rtx, offset);
24859 x = gen_frame_mem (SImode, x);
24860 emit_move_insn (x, work_reg);
24861
24862 x = gen_rtx_REG (SImode, PC_REGNUM);
24863 emit_move_insn (work_reg, x);
24864
24865 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
24866 x = gen_frame_mem (SImode, x);
24867 emit_move_insn (x, work_reg);
24868 }
24869
24870 x = gen_rtx_REG (SImode, LR_REGNUM);
24871 emit_move_insn (work_reg, x);
24872
24873 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
24874 x = gen_frame_mem (SImode, x);
24875 emit_move_insn (x, work_reg);
24876
24877 x = GEN_INT (offset + 12);
24878 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
24879
24880 emit_move_insn (arm_hfp_rtx, work_reg);
24881 }
24882 /* Optimization: If we are not pushing any low registers but we are going
24883 to push some high registers then delay our first push. This will just
24884 be a push of LR and we can combine it with the push of the first high
24885 register. */
24886 else if ((l_mask & 0xff) != 0
24887 || (high_regs_pushed == 0 && lr_needs_saving))
24888 {
24889 unsigned long mask = l_mask;
24890 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
24891 insn = thumb1_emit_multi_reg_push (mask, mask);
24892 RTX_FRAME_RELATED_P (insn) = 1;
24893 lr_needs_saving = false;
24894 }
24895
24896 if (high_regs_pushed)
24897 {
24898 unsigned pushable_regs;
24899 unsigned next_hi_reg;
24900 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
24901 : crtl->args.info.nregs;
24902 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
24903
24904 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24905 if (live_regs_mask & (1 << next_hi_reg))
24906 break;
24907
24908 /* Here we need to mask out registers used for passing arguments
24909 even if they can be pushed. This is to avoid using them to stash the high
24910 registers. Such kind of stash may clobber the use of arguments. */
24911 pushable_regs = l_mask & (~arg_regs_mask);
24912 if (lr_needs_saving)
24913 pushable_regs &= ~(1 << LR_REGNUM);
24914
24915 if (pushable_regs == 0)
24916 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
24917
24918 while (high_regs_pushed > 0)
24919 {
24920 unsigned long real_regs_mask = 0;
24921 unsigned long push_mask = 0;
24922
24923 for (regno = LR_REGNUM; regno >= 0; regno --)
24924 {
24925 if (pushable_regs & (1 << regno))
24926 {
24927 emit_move_insn (gen_rtx_REG (SImode, regno),
24928 gen_rtx_REG (SImode, next_hi_reg));
24929
24930 high_regs_pushed --;
24931 real_regs_mask |= (1 << next_hi_reg);
24932 push_mask |= (1 << regno);
24933
24934 if (high_regs_pushed)
24935 {
24936 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
24937 next_hi_reg --)
24938 if (live_regs_mask & (1 << next_hi_reg))
24939 break;
24940 }
24941 else
24942 break;
24943 }
24944 }
24945
24946 /* If we had to find a work register and we have not yet
24947 saved the LR then add it to the list of regs to push. */
24948 if (lr_needs_saving)
24949 {
24950 push_mask |= 1 << LR_REGNUM;
24951 real_regs_mask |= 1 << LR_REGNUM;
24952 lr_needs_saving = false;
24953 }
24954
24955 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
24956 RTX_FRAME_RELATED_P (insn) = 1;
24957 }
24958 }
24959
24960 /* Load the pic register before setting the frame pointer,
24961 so we can use r7 as a temporary work register. */
24962 if (flag_pic && arm_pic_register != INVALID_REGNUM)
24963 arm_load_pic_register (live_regs_mask);
24964
24965 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
24966 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
24967 stack_pointer_rtx);
24968
24969 size = offsets->outgoing_args - offsets->saved_args;
24970 if (flag_stack_usage_info)
24971 current_function_static_stack_size = size;
24972
24973 /* If we have a frame, then do stack checking. FIXME: not implemented. */
24974 if (flag_stack_check == STATIC_BUILTIN_STACK_CHECK && size)
24975 sorry ("-fstack-check=specific for Thumb-1");
24976
24977 amount = offsets->outgoing_args - offsets->saved_regs;
24978 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
24979 if (amount)
24980 {
24981 if (amount < 512)
24982 {
24983 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
24984 GEN_INT (- amount)));
24985 RTX_FRAME_RELATED_P (insn) = 1;
24986 }
24987 else
24988 {
24989 rtx reg, dwarf;
24990
24991 /* The stack decrement is too big for an immediate value in a single
24992 insn. In theory we could issue multiple subtracts, but after
24993 three of them it becomes more space efficient to place the full
24994 value in the constant pool and load into a register. (Also the
24995 ARM debugger really likes to see only one stack decrement per
24996 function). So instead we look for a scratch register into which
24997 we can load the decrement, and then we subtract this from the
24998 stack pointer. Unfortunately on the thumb the only available
24999 scratch registers are the argument registers, and we cannot use
25000 these as they may hold arguments to the function. Instead we
25001 attempt to locate a call preserved register which is used by this
25002 function. If we can find one, then we know that it will have
25003 been pushed at the start of the prologue and so we can corrupt
25004 it now. */
25005 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25006 if (live_regs_mask & (1 << regno))
25007 break;
25008
25009 gcc_assert(regno <= LAST_LO_REGNUM);
25010
25011 reg = gen_rtx_REG (SImode, regno);
25012
25013 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25014
25015 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25016 stack_pointer_rtx, reg));
25017
25018 dwarf = gen_rtx_SET (stack_pointer_rtx,
25019 plus_constant (Pmode, stack_pointer_rtx,
25020 -amount));
25021 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25022 RTX_FRAME_RELATED_P (insn) = 1;
25023 }
25024 }
25025
25026 if (frame_pointer_needed)
25027 thumb_set_frame_pointer (offsets);
25028
25029 /* If we are profiling, make sure no instructions are scheduled before
25030 the call to mcount. Similarly if the user has requested no
25031 scheduling in the prolog. Similarly if we want non-call exceptions
25032 using the EABI unwinder, to prevent faulting instructions from being
25033 swapped with a stack adjustment. */
25034 if (crtl->profile || !TARGET_SCHED_PROLOG
25035 || (arm_except_unwind_info (&global_options) == UI_TARGET
25036 && cfun->can_throw_non_call_exceptions))
25037 emit_insn (gen_blockage ());
25038
25039 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25040 if (live_regs_mask & 0xff)
25041 cfun->machine->lr_save_eliminated = 0;
25042 }
25043
25044 /* Clear caller saved registers not used to pass return values and leaked
25045 condition flags before exiting a cmse_nonsecure_entry function. */
25046
25047 void
25048 cmse_nonsecure_entry_clear_before_return (void)
25049 {
25050 uint64_t to_clear_mask[2];
25051 uint32_t padding_bits_to_clear = 0;
25052 uint32_t * padding_bits_to_clear_ptr = &padding_bits_to_clear;
25053 int regno, maxregno = IP_REGNUM;
25054 tree result_type;
25055 rtx result_rtl;
25056
25057 to_clear_mask[0] = (1ULL << (NUM_ARG_REGS)) - 1;
25058 to_clear_mask[0] |= (1ULL << IP_REGNUM);
25059
25060 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25061 registers. We also check that TARGET_HARD_FLOAT and !TARGET_THUMB1 hold
25062 to make sure the instructions used to clear them are present. */
25063 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
25064 {
25065 uint64_t float_mask = (1ULL << (D7_VFP_REGNUM + 1)) - 1;
25066 maxregno = LAST_VFP_REGNUM;
25067
25068 float_mask &= ~((1ULL << FIRST_VFP_REGNUM) - 1);
25069 to_clear_mask[0] |= float_mask;
25070
25071 float_mask = (1ULL << (maxregno - 63)) - 1;
25072 to_clear_mask[1] = float_mask;
25073
25074 /* Make sure we don't clear the two scratch registers used to clear the
25075 relevant FPSCR bits in output_return_instruction. */
25076 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25077 to_clear_mask[0] &= ~(1ULL << IP_REGNUM);
25078 emit_use (gen_rtx_REG (SImode, 4));
25079 to_clear_mask[0] &= ~(1ULL << 4);
25080 }
25081
25082 /* If the user has defined registers to be caller saved, these are no longer
25083 restored by the function before returning and must thus be cleared for
25084 security purposes. */
25085 for (regno = NUM_ARG_REGS; regno < LAST_VFP_REGNUM; regno++)
25086 {
25087 /* We do not touch registers that can be used to pass arguments as per
25088 the AAPCS, since these should never be made callee-saved by user
25089 options. */
25090 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25091 continue;
25092 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25093 continue;
25094 if (call_used_regs[regno])
25095 to_clear_mask[regno / 64] |= (1ULL << (regno % 64));
25096 }
25097
25098 /* Make sure we do not clear the registers used to return the result in. */
25099 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25100 if (!VOID_TYPE_P (result_type))
25101 {
25102 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25103
25104 /* No need to check that we return in registers, because we don't
25105 support returning on stack yet. */
25106 to_clear_mask[0]
25107 &= ~compute_not_to_clear_mask (result_type, result_rtl, 0,
25108 padding_bits_to_clear_ptr);
25109 }
25110
25111 if (padding_bits_to_clear != 0)
25112 {
25113 rtx reg_rtx;
25114 /* Padding bits to clear is not 0 so we know we are dealing with
25115 returning a composite type, which only uses r0. Let's make sure that
25116 r1-r3 is cleared too, we will use r1 as a scratch register. */
25117 gcc_assert ((to_clear_mask[0] & 0xe) == 0xe);
25118
25119 reg_rtx = gen_rtx_REG (SImode, R1_REGNUM);
25120
25121 /* Fill the lower half of the negated padding_bits_to_clear. */
25122 emit_move_insn (reg_rtx,
25123 GEN_INT ((((~padding_bits_to_clear) << 16u) >> 16u)));
25124
25125 /* Also fill the top half of the negated padding_bits_to_clear. */
25126 if (((~padding_bits_to_clear) >> 16) > 0)
25127 emit_insn (gen_rtx_SET (gen_rtx_ZERO_EXTRACT (SImode, reg_rtx,
25128 GEN_INT (16),
25129 GEN_INT (16)),
25130 GEN_INT ((~padding_bits_to_clear) >> 16)));
25131
25132 emit_insn (gen_andsi3 (gen_rtx_REG (SImode, R0_REGNUM),
25133 gen_rtx_REG (SImode, R0_REGNUM),
25134 reg_rtx));
25135 }
25136
25137 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25138 {
25139 if (!(to_clear_mask[regno / 64] & (1ULL << (regno % 64))))
25140 continue;
25141
25142 if (IS_VFP_REGNUM (regno))
25143 {
25144 /* If regno is an even vfp register and its successor is also to
25145 be cleared, use vmov. */
25146 if (TARGET_VFP_DOUBLE
25147 && VFP_REGNO_OK_FOR_DOUBLE (regno)
25148 && to_clear_mask[regno / 64] & (1ULL << ((regno % 64) + 1)))
25149 {
25150 emit_move_insn (gen_rtx_REG (DFmode, regno),
25151 CONST1_RTX (DFmode));
25152 emit_use (gen_rtx_REG (DFmode, regno));
25153 regno++;
25154 }
25155 else
25156 {
25157 emit_move_insn (gen_rtx_REG (SFmode, regno),
25158 CONST1_RTX (SFmode));
25159 emit_use (gen_rtx_REG (SFmode, regno));
25160 }
25161 }
25162 else
25163 {
25164 if (TARGET_THUMB1)
25165 {
25166 if (regno == R0_REGNUM)
25167 emit_move_insn (gen_rtx_REG (SImode, regno),
25168 const0_rtx);
25169 else
25170 /* R0 has either been cleared before, see code above, or it
25171 holds a return value, either way it is not secret
25172 information. */
25173 emit_move_insn (gen_rtx_REG (SImode, regno),
25174 gen_rtx_REG (SImode, R0_REGNUM));
25175 emit_use (gen_rtx_REG (SImode, regno));
25176 }
25177 else
25178 {
25179 emit_move_insn (gen_rtx_REG (SImode, regno),
25180 gen_rtx_REG (SImode, LR_REGNUM));
25181 emit_use (gen_rtx_REG (SImode, regno));
25182 }
25183 }
25184 }
25185 }
25186
25187 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25188 POP instruction can be generated. LR should be replaced by PC. All
25189 the checks required are already done by USE_RETURN_INSN (). Hence,
25190 all we really need to check here is if single register is to be
25191 returned, or multiple register return. */
25192 void
25193 thumb2_expand_return (bool simple_return)
25194 {
25195 int i, num_regs;
25196 unsigned long saved_regs_mask;
25197 arm_stack_offsets *offsets;
25198
25199 offsets = arm_get_frame_offsets ();
25200 saved_regs_mask = offsets->saved_regs_mask;
25201
25202 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25203 if (saved_regs_mask & (1 << i))
25204 num_regs++;
25205
25206 if (!simple_return && saved_regs_mask)
25207 {
25208 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25209 functions or adapt code to handle according to ACLE. This path should
25210 not be reachable for cmse_nonsecure_entry functions though we prefer
25211 to assert it for now to ensure that future code changes do not silently
25212 change this behavior. */
25213 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25214 if (num_regs == 1)
25215 {
25216 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25217 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25218 rtx addr = gen_rtx_MEM (SImode,
25219 gen_rtx_POST_INC (SImode,
25220 stack_pointer_rtx));
25221 set_mem_alias_set (addr, get_frame_alias_set ());
25222 XVECEXP (par, 0, 0) = ret_rtx;
25223 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25224 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25225 emit_jump_insn (par);
25226 }
25227 else
25228 {
25229 saved_regs_mask &= ~ (1 << LR_REGNUM);
25230 saved_regs_mask |= (1 << PC_REGNUM);
25231 arm_emit_multi_reg_pop (saved_regs_mask);
25232 }
25233 }
25234 else
25235 {
25236 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25237 cmse_nonsecure_entry_clear_before_return ();
25238 emit_jump_insn (simple_return_rtx);
25239 }
25240 }
25241
25242 void
25243 thumb1_expand_epilogue (void)
25244 {
25245 HOST_WIDE_INT amount;
25246 arm_stack_offsets *offsets;
25247 int regno;
25248
25249 /* Naked functions don't have prologues. */
25250 if (IS_NAKED (arm_current_func_type ()))
25251 return;
25252
25253 offsets = arm_get_frame_offsets ();
25254 amount = offsets->outgoing_args - offsets->saved_regs;
25255
25256 if (frame_pointer_needed)
25257 {
25258 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25259 amount = offsets->locals_base - offsets->saved_regs;
25260 }
25261 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25262
25263 gcc_assert (amount >= 0);
25264 if (amount)
25265 {
25266 emit_insn (gen_blockage ());
25267
25268 if (amount < 512)
25269 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25270 GEN_INT (amount)));
25271 else
25272 {
25273 /* r3 is always free in the epilogue. */
25274 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25275
25276 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25277 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25278 }
25279 }
25280
25281 /* Emit a USE (stack_pointer_rtx), so that
25282 the stack adjustment will not be deleted. */
25283 emit_insn (gen_force_register_use (stack_pointer_rtx));
25284
25285 if (crtl->profile || !TARGET_SCHED_PROLOG)
25286 emit_insn (gen_blockage ());
25287
25288 /* Emit a clobber for each insn that will be restored in the epilogue,
25289 so that flow2 will get register lifetimes correct. */
25290 for (regno = 0; regno < 13; regno++)
25291 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25292 emit_clobber (gen_rtx_REG (SImode, regno));
25293
25294 if (! df_regs_ever_live_p (LR_REGNUM))
25295 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25296
25297 /* Clear all caller-saved regs that are not used to return. */
25298 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25299 cmse_nonsecure_entry_clear_before_return ();
25300 }
25301
25302 /* Epilogue code for APCS frame. */
25303 static void
25304 arm_expand_epilogue_apcs_frame (bool really_return)
25305 {
25306 unsigned long func_type;
25307 unsigned long saved_regs_mask;
25308 int num_regs = 0;
25309 int i;
25310 int floats_from_frame = 0;
25311 arm_stack_offsets *offsets;
25312
25313 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25314 func_type = arm_current_func_type ();
25315
25316 /* Get frame offsets for ARM. */
25317 offsets = arm_get_frame_offsets ();
25318 saved_regs_mask = offsets->saved_regs_mask;
25319
25320 /* Find the offset of the floating-point save area in the frame. */
25321 floats_from_frame
25322 = (offsets->saved_args
25323 + arm_compute_static_chain_stack_bytes ()
25324 - offsets->frame);
25325
25326 /* Compute how many core registers saved and how far away the floats are. */
25327 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25328 if (saved_regs_mask & (1 << i))
25329 {
25330 num_regs++;
25331 floats_from_frame += 4;
25332 }
25333
25334 if (TARGET_HARD_FLOAT)
25335 {
25336 int start_reg;
25337 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25338
25339 /* The offset is from IP_REGNUM. */
25340 int saved_size = arm_get_vfp_saved_size ();
25341 if (saved_size > 0)
25342 {
25343 rtx_insn *insn;
25344 floats_from_frame += saved_size;
25345 insn = emit_insn (gen_addsi3 (ip_rtx,
25346 hard_frame_pointer_rtx,
25347 GEN_INT (-floats_from_frame)));
25348 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25349 ip_rtx, hard_frame_pointer_rtx);
25350 }
25351
25352 /* Generate VFP register multi-pop. */
25353 start_reg = FIRST_VFP_REGNUM;
25354
25355 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25356 /* Look for a case where a reg does not need restoring. */
25357 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25358 && (!df_regs_ever_live_p (i + 1)
25359 || call_used_regs[i + 1]))
25360 {
25361 if (start_reg != i)
25362 arm_emit_vfp_multi_reg_pop (start_reg,
25363 (i - start_reg) / 2,
25364 gen_rtx_REG (SImode,
25365 IP_REGNUM));
25366 start_reg = i + 2;
25367 }
25368
25369 /* Restore the remaining regs that we have discovered (or possibly
25370 even all of them, if the conditional in the for loop never
25371 fired). */
25372 if (start_reg != i)
25373 arm_emit_vfp_multi_reg_pop (start_reg,
25374 (i - start_reg) / 2,
25375 gen_rtx_REG (SImode, IP_REGNUM));
25376 }
25377
25378 if (TARGET_IWMMXT)
25379 {
25380 /* The frame pointer is guaranteed to be non-double-word aligned, as
25381 it is set to double-word-aligned old_stack_pointer - 4. */
25382 rtx_insn *insn;
25383 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25384
25385 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25386 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25387 {
25388 rtx addr = gen_frame_mem (V2SImode,
25389 plus_constant (Pmode, hard_frame_pointer_rtx,
25390 - lrm_count * 4));
25391 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25392 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25393 gen_rtx_REG (V2SImode, i),
25394 NULL_RTX);
25395 lrm_count += 2;
25396 }
25397 }
25398
25399 /* saved_regs_mask should contain IP which contains old stack pointer
25400 at the time of activation creation. Since SP and IP are adjacent registers,
25401 we can restore the value directly into SP. */
25402 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25403 saved_regs_mask &= ~(1 << IP_REGNUM);
25404 saved_regs_mask |= (1 << SP_REGNUM);
25405
25406 /* There are two registers left in saved_regs_mask - LR and PC. We
25407 only need to restore LR (the return address), but to
25408 save time we can load it directly into PC, unless we need a
25409 special function exit sequence, or we are not really returning. */
25410 if (really_return
25411 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25412 && !crtl->calls_eh_return)
25413 /* Delete LR from the register mask, so that LR on
25414 the stack is loaded into the PC in the register mask. */
25415 saved_regs_mask &= ~(1 << LR_REGNUM);
25416 else
25417 saved_regs_mask &= ~(1 << PC_REGNUM);
25418
25419 num_regs = bit_count (saved_regs_mask);
25420 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25421 {
25422 rtx_insn *insn;
25423 emit_insn (gen_blockage ());
25424 /* Unwind the stack to just below the saved registers. */
25425 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25426 hard_frame_pointer_rtx,
25427 GEN_INT (- 4 * num_regs)));
25428
25429 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25430 stack_pointer_rtx, hard_frame_pointer_rtx);
25431 }
25432
25433 arm_emit_multi_reg_pop (saved_regs_mask);
25434
25435 if (IS_INTERRUPT (func_type))
25436 {
25437 /* Interrupt handlers will have pushed the
25438 IP onto the stack, so restore it now. */
25439 rtx_insn *insn;
25440 rtx addr = gen_rtx_MEM (SImode,
25441 gen_rtx_POST_INC (SImode,
25442 stack_pointer_rtx));
25443 set_mem_alias_set (addr, get_frame_alias_set ());
25444 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25445 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25446 gen_rtx_REG (SImode, IP_REGNUM),
25447 NULL_RTX);
25448 }
25449
25450 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25451 return;
25452
25453 if (crtl->calls_eh_return)
25454 emit_insn (gen_addsi3 (stack_pointer_rtx,
25455 stack_pointer_rtx,
25456 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25457
25458 if (IS_STACKALIGN (func_type))
25459 /* Restore the original stack pointer. Before prologue, the stack was
25460 realigned and the original stack pointer saved in r0. For details,
25461 see comment in arm_expand_prologue. */
25462 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25463
25464 emit_jump_insn (simple_return_rtx);
25465 }
25466
25467 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25468 function is not a sibcall. */
25469 void
25470 arm_expand_epilogue (bool really_return)
25471 {
25472 unsigned long func_type;
25473 unsigned long saved_regs_mask;
25474 int num_regs = 0;
25475 int i;
25476 int amount;
25477 arm_stack_offsets *offsets;
25478
25479 func_type = arm_current_func_type ();
25480
25481 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25482 let output_return_instruction take care of instruction emission if any. */
25483 if (IS_NAKED (func_type)
25484 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25485 {
25486 if (really_return)
25487 emit_jump_insn (simple_return_rtx);
25488 return;
25489 }
25490
25491 /* If we are throwing an exception, then we really must be doing a
25492 return, so we can't tail-call. */
25493 gcc_assert (!crtl->calls_eh_return || really_return);
25494
25495 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25496 {
25497 arm_expand_epilogue_apcs_frame (really_return);
25498 return;
25499 }
25500
25501 /* Get frame offsets for ARM. */
25502 offsets = arm_get_frame_offsets ();
25503 saved_regs_mask = offsets->saved_regs_mask;
25504 num_regs = bit_count (saved_regs_mask);
25505
25506 if (frame_pointer_needed)
25507 {
25508 rtx_insn *insn;
25509 /* Restore stack pointer if necessary. */
25510 if (TARGET_ARM)
25511 {
25512 /* In ARM mode, frame pointer points to first saved register.
25513 Restore stack pointer to last saved register. */
25514 amount = offsets->frame - offsets->saved_regs;
25515
25516 /* Force out any pending memory operations that reference stacked data
25517 before stack de-allocation occurs. */
25518 emit_insn (gen_blockage ());
25519 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25520 hard_frame_pointer_rtx,
25521 GEN_INT (amount)));
25522 arm_add_cfa_adjust_cfa_note (insn, amount,
25523 stack_pointer_rtx,
25524 hard_frame_pointer_rtx);
25525
25526 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25527 deleted. */
25528 emit_insn (gen_force_register_use (stack_pointer_rtx));
25529 }
25530 else
25531 {
25532 /* In Thumb-2 mode, the frame pointer points to the last saved
25533 register. */
25534 amount = offsets->locals_base - offsets->saved_regs;
25535 if (amount)
25536 {
25537 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25538 hard_frame_pointer_rtx,
25539 GEN_INT (amount)));
25540 arm_add_cfa_adjust_cfa_note (insn, amount,
25541 hard_frame_pointer_rtx,
25542 hard_frame_pointer_rtx);
25543 }
25544
25545 /* Force out any pending memory operations that reference stacked data
25546 before stack de-allocation occurs. */
25547 emit_insn (gen_blockage ());
25548 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25549 hard_frame_pointer_rtx));
25550 arm_add_cfa_adjust_cfa_note (insn, 0,
25551 stack_pointer_rtx,
25552 hard_frame_pointer_rtx);
25553 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25554 deleted. */
25555 emit_insn (gen_force_register_use (stack_pointer_rtx));
25556 }
25557 }
25558 else
25559 {
25560 /* Pop off outgoing args and local frame to adjust stack pointer to
25561 last saved register. */
25562 amount = offsets->outgoing_args - offsets->saved_regs;
25563 if (amount)
25564 {
25565 rtx_insn *tmp;
25566 /* Force out any pending memory operations that reference stacked data
25567 before stack de-allocation occurs. */
25568 emit_insn (gen_blockage ());
25569 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25570 stack_pointer_rtx,
25571 GEN_INT (amount)));
25572 arm_add_cfa_adjust_cfa_note (tmp, amount,
25573 stack_pointer_rtx, stack_pointer_rtx);
25574 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25575 not deleted. */
25576 emit_insn (gen_force_register_use (stack_pointer_rtx));
25577 }
25578 }
25579
25580 if (TARGET_HARD_FLOAT)
25581 {
25582 /* Generate VFP register multi-pop. */
25583 int end_reg = LAST_VFP_REGNUM + 1;
25584
25585 /* Scan the registers in reverse order. We need to match
25586 any groupings made in the prologue and generate matching
25587 vldm operations. The need to match groups is because,
25588 unlike pop, vldm can only do consecutive regs. */
25589 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25590 /* Look for a case where a reg does not need restoring. */
25591 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25592 && (!df_regs_ever_live_p (i + 1)
25593 || call_used_regs[i + 1]))
25594 {
25595 /* Restore the regs discovered so far (from reg+2 to
25596 end_reg). */
25597 if (end_reg > i + 2)
25598 arm_emit_vfp_multi_reg_pop (i + 2,
25599 (end_reg - (i + 2)) / 2,
25600 stack_pointer_rtx);
25601 end_reg = i;
25602 }
25603
25604 /* Restore the remaining regs that we have discovered (or possibly
25605 even all of them, if the conditional in the for loop never
25606 fired). */
25607 if (end_reg > i + 2)
25608 arm_emit_vfp_multi_reg_pop (i + 2,
25609 (end_reg - (i + 2)) / 2,
25610 stack_pointer_rtx);
25611 }
25612
25613 if (TARGET_IWMMXT)
25614 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25615 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25616 {
25617 rtx_insn *insn;
25618 rtx addr = gen_rtx_MEM (V2SImode,
25619 gen_rtx_POST_INC (SImode,
25620 stack_pointer_rtx));
25621 set_mem_alias_set (addr, get_frame_alias_set ());
25622 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25623 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25624 gen_rtx_REG (V2SImode, i),
25625 NULL_RTX);
25626 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25627 stack_pointer_rtx, stack_pointer_rtx);
25628 }
25629
25630 if (saved_regs_mask)
25631 {
25632 rtx insn;
25633 bool return_in_pc = false;
25634
25635 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25636 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25637 && !IS_CMSE_ENTRY (func_type)
25638 && !IS_STACKALIGN (func_type)
25639 && really_return
25640 && crtl->args.pretend_args_size == 0
25641 && saved_regs_mask & (1 << LR_REGNUM)
25642 && !crtl->calls_eh_return)
25643 {
25644 saved_regs_mask &= ~(1 << LR_REGNUM);
25645 saved_regs_mask |= (1 << PC_REGNUM);
25646 return_in_pc = true;
25647 }
25648
25649 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25650 {
25651 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25652 if (saved_regs_mask & (1 << i))
25653 {
25654 rtx addr = gen_rtx_MEM (SImode,
25655 gen_rtx_POST_INC (SImode,
25656 stack_pointer_rtx));
25657 set_mem_alias_set (addr, get_frame_alias_set ());
25658
25659 if (i == PC_REGNUM)
25660 {
25661 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25662 XVECEXP (insn, 0, 0) = ret_rtx;
25663 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25664 addr);
25665 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25666 insn = emit_jump_insn (insn);
25667 }
25668 else
25669 {
25670 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25671 addr));
25672 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25673 gen_rtx_REG (SImode, i),
25674 NULL_RTX);
25675 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25676 stack_pointer_rtx,
25677 stack_pointer_rtx);
25678 }
25679 }
25680 }
25681 else
25682 {
25683 if (TARGET_LDRD
25684 && current_tune->prefer_ldrd_strd
25685 && !optimize_function_for_size_p (cfun))
25686 {
25687 if (TARGET_THUMB2)
25688 thumb2_emit_ldrd_pop (saved_regs_mask);
25689 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25690 arm_emit_ldrd_pop (saved_regs_mask);
25691 else
25692 arm_emit_multi_reg_pop (saved_regs_mask);
25693 }
25694 else
25695 arm_emit_multi_reg_pop (saved_regs_mask);
25696 }
25697
25698 if (return_in_pc)
25699 return;
25700 }
25701
25702 amount
25703 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25704 if (amount)
25705 {
25706 int i, j;
25707 rtx dwarf = NULL_RTX;
25708 rtx_insn *tmp =
25709 emit_insn (gen_addsi3 (stack_pointer_rtx,
25710 stack_pointer_rtx,
25711 GEN_INT (amount)));
25712
25713 RTX_FRAME_RELATED_P (tmp) = 1;
25714
25715 if (cfun->machine->uses_anonymous_args)
25716 {
25717 /* Restore pretend args. Refer arm_expand_prologue on how to save
25718 pretend_args in stack. */
25719 int num_regs = crtl->args.pretend_args_size / 4;
25720 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25721 for (j = 0, i = 0; j < num_regs; i++)
25722 if (saved_regs_mask & (1 << i))
25723 {
25724 rtx reg = gen_rtx_REG (SImode, i);
25725 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25726 j++;
25727 }
25728 REG_NOTES (tmp) = dwarf;
25729 }
25730 arm_add_cfa_adjust_cfa_note (tmp, amount,
25731 stack_pointer_rtx, stack_pointer_rtx);
25732 }
25733
25734 /* Clear all caller-saved regs that are not used to return. */
25735 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25736 {
25737 /* CMSE_ENTRY always returns. */
25738 gcc_assert (really_return);
25739 cmse_nonsecure_entry_clear_before_return ();
25740 }
25741
25742 if (!really_return)
25743 return;
25744
25745 if (crtl->calls_eh_return)
25746 emit_insn (gen_addsi3 (stack_pointer_rtx,
25747 stack_pointer_rtx,
25748 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25749
25750 if (IS_STACKALIGN (func_type))
25751 /* Restore the original stack pointer. Before prologue, the stack was
25752 realigned and the original stack pointer saved in r0. For details,
25753 see comment in arm_expand_prologue. */
25754 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25755
25756 emit_jump_insn (simple_return_rtx);
25757 }
25758
25759 /* Implementation of insn prologue_thumb1_interwork. This is the first
25760 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25761
25762 const char *
25763 thumb1_output_interwork (void)
25764 {
25765 const char * name;
25766 FILE *f = asm_out_file;
25767
25768 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25769 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25770 == SYMBOL_REF);
25771 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25772
25773 /* Generate code sequence to switch us into Thumb mode. */
25774 /* The .code 32 directive has already been emitted by
25775 ASM_DECLARE_FUNCTION_NAME. */
25776 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25777 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25778
25779 /* Generate a label, so that the debugger will notice the
25780 change in instruction sets. This label is also used by
25781 the assembler to bypass the ARM code when this function
25782 is called from a Thumb encoded function elsewhere in the
25783 same file. Hence the definition of STUB_NAME here must
25784 agree with the definition in gas/config/tc-arm.c. */
25785
25786 #define STUB_NAME ".real_start_of"
25787
25788 fprintf (f, "\t.code\t16\n");
25789 #ifdef ARM_PE
25790 if (arm_dllexport_name_p (name))
25791 name = arm_strip_name_encoding (name);
25792 #endif
25793 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
25794 fprintf (f, "\t.thumb_func\n");
25795 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
25796
25797 return "";
25798 }
25799
25800 /* Handle the case of a double word load into a low register from
25801 a computed memory address. The computed address may involve a
25802 register which is overwritten by the load. */
25803 const char *
25804 thumb_load_double_from_address (rtx *operands)
25805 {
25806 rtx addr;
25807 rtx base;
25808 rtx offset;
25809 rtx arg1;
25810 rtx arg2;
25811
25812 gcc_assert (REG_P (operands[0]));
25813 gcc_assert (MEM_P (operands[1]));
25814
25815 /* Get the memory address. */
25816 addr = XEXP (operands[1], 0);
25817
25818 /* Work out how the memory address is computed. */
25819 switch (GET_CODE (addr))
25820 {
25821 case REG:
25822 operands[2] = adjust_address (operands[1], SImode, 4);
25823
25824 if (REGNO (operands[0]) == REGNO (addr))
25825 {
25826 output_asm_insn ("ldr\t%H0, %2", operands);
25827 output_asm_insn ("ldr\t%0, %1", operands);
25828 }
25829 else
25830 {
25831 output_asm_insn ("ldr\t%0, %1", operands);
25832 output_asm_insn ("ldr\t%H0, %2", operands);
25833 }
25834 break;
25835
25836 case CONST:
25837 /* Compute <address> + 4 for the high order load. */
25838 operands[2] = adjust_address (operands[1], SImode, 4);
25839
25840 output_asm_insn ("ldr\t%0, %1", operands);
25841 output_asm_insn ("ldr\t%H0, %2", operands);
25842 break;
25843
25844 case PLUS:
25845 arg1 = XEXP (addr, 0);
25846 arg2 = XEXP (addr, 1);
25847
25848 if (CONSTANT_P (arg1))
25849 base = arg2, offset = arg1;
25850 else
25851 base = arg1, offset = arg2;
25852
25853 gcc_assert (REG_P (base));
25854
25855 /* Catch the case of <address> = <reg> + <reg> */
25856 if (REG_P (offset))
25857 {
25858 int reg_offset = REGNO (offset);
25859 int reg_base = REGNO (base);
25860 int reg_dest = REGNO (operands[0]);
25861
25862 /* Add the base and offset registers together into the
25863 higher destination register. */
25864 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
25865 reg_dest + 1, reg_base, reg_offset);
25866
25867 /* Load the lower destination register from the address in
25868 the higher destination register. */
25869 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
25870 reg_dest, reg_dest + 1);
25871
25872 /* Load the higher destination register from its own address
25873 plus 4. */
25874 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
25875 reg_dest + 1, reg_dest + 1);
25876 }
25877 else
25878 {
25879 /* Compute <address> + 4 for the high order load. */
25880 operands[2] = adjust_address (operands[1], SImode, 4);
25881
25882 /* If the computed address is held in the low order register
25883 then load the high order register first, otherwise always
25884 load the low order register first. */
25885 if (REGNO (operands[0]) == REGNO (base))
25886 {
25887 output_asm_insn ("ldr\t%H0, %2", operands);
25888 output_asm_insn ("ldr\t%0, %1", operands);
25889 }
25890 else
25891 {
25892 output_asm_insn ("ldr\t%0, %1", operands);
25893 output_asm_insn ("ldr\t%H0, %2", operands);
25894 }
25895 }
25896 break;
25897
25898 case LABEL_REF:
25899 /* With no registers to worry about we can just load the value
25900 directly. */
25901 operands[2] = adjust_address (operands[1], SImode, 4);
25902
25903 output_asm_insn ("ldr\t%H0, %2", operands);
25904 output_asm_insn ("ldr\t%0, %1", operands);
25905 break;
25906
25907 default:
25908 gcc_unreachable ();
25909 }
25910
25911 return "";
25912 }
25913
25914 const char *
25915 thumb_output_move_mem_multiple (int n, rtx *operands)
25916 {
25917 switch (n)
25918 {
25919 case 2:
25920 if (REGNO (operands[4]) > REGNO (operands[5]))
25921 std::swap (operands[4], operands[5]);
25922
25923 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
25924 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
25925 break;
25926
25927 case 3:
25928 if (REGNO (operands[4]) > REGNO (operands[5]))
25929 std::swap (operands[4], operands[5]);
25930 if (REGNO (operands[5]) > REGNO (operands[6]))
25931 std::swap (operands[5], operands[6]);
25932 if (REGNO (operands[4]) > REGNO (operands[5]))
25933 std::swap (operands[4], operands[5]);
25934
25935 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
25936 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
25937 break;
25938
25939 default:
25940 gcc_unreachable ();
25941 }
25942
25943 return "";
25944 }
25945
25946 /* Output a call-via instruction for thumb state. */
25947 const char *
25948 thumb_call_via_reg (rtx reg)
25949 {
25950 int regno = REGNO (reg);
25951 rtx *labelp;
25952
25953 gcc_assert (regno < LR_REGNUM);
25954
25955 /* If we are in the normal text section we can use a single instance
25956 per compilation unit. If we are doing function sections, then we need
25957 an entry per section, since we can't rely on reachability. */
25958 if (in_section == text_section)
25959 {
25960 thumb_call_reg_needed = 1;
25961
25962 if (thumb_call_via_label[regno] == NULL)
25963 thumb_call_via_label[regno] = gen_label_rtx ();
25964 labelp = thumb_call_via_label + regno;
25965 }
25966 else
25967 {
25968 if (cfun->machine->call_via[regno] == NULL)
25969 cfun->machine->call_via[regno] = gen_label_rtx ();
25970 labelp = cfun->machine->call_via + regno;
25971 }
25972
25973 output_asm_insn ("bl\t%a0", labelp);
25974 return "";
25975 }
25976
25977 /* Routines for generating rtl. */
25978 void
25979 thumb_expand_movmemqi (rtx *operands)
25980 {
25981 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
25982 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
25983 HOST_WIDE_INT len = INTVAL (operands[2]);
25984 HOST_WIDE_INT offset = 0;
25985
25986 while (len >= 12)
25987 {
25988 emit_insn (gen_movmem12b (out, in, out, in));
25989 len -= 12;
25990 }
25991
25992 if (len >= 8)
25993 {
25994 emit_insn (gen_movmem8b (out, in, out, in));
25995 len -= 8;
25996 }
25997
25998 if (len >= 4)
25999 {
26000 rtx reg = gen_reg_rtx (SImode);
26001 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26002 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26003 len -= 4;
26004 offset += 4;
26005 }
26006
26007 if (len >= 2)
26008 {
26009 rtx reg = gen_reg_rtx (HImode);
26010 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26011 plus_constant (Pmode, in,
26012 offset))));
26013 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26014 offset)),
26015 reg));
26016 len -= 2;
26017 offset += 2;
26018 }
26019
26020 if (len)
26021 {
26022 rtx reg = gen_reg_rtx (QImode);
26023 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26024 plus_constant (Pmode, in,
26025 offset))));
26026 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26027 offset)),
26028 reg));
26029 }
26030 }
26031
26032 void
26033 thumb_reload_out_hi (rtx *operands)
26034 {
26035 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26036 }
26037
26038 /* Return the length of a function name prefix
26039 that starts with the character 'c'. */
26040 static int
26041 arm_get_strip_length (int c)
26042 {
26043 switch (c)
26044 {
26045 ARM_NAME_ENCODING_LENGTHS
26046 default: return 0;
26047 }
26048 }
26049
26050 /* Return a pointer to a function's name with any
26051 and all prefix encodings stripped from it. */
26052 const char *
26053 arm_strip_name_encoding (const char *name)
26054 {
26055 int skip;
26056
26057 while ((skip = arm_get_strip_length (* name)))
26058 name += skip;
26059
26060 return name;
26061 }
26062
26063 /* If there is a '*' anywhere in the name's prefix, then
26064 emit the stripped name verbatim, otherwise prepend an
26065 underscore if leading underscores are being used. */
26066 void
26067 arm_asm_output_labelref (FILE *stream, const char *name)
26068 {
26069 int skip;
26070 int verbatim = 0;
26071
26072 while ((skip = arm_get_strip_length (* name)))
26073 {
26074 verbatim |= (*name == '*');
26075 name += skip;
26076 }
26077
26078 if (verbatim)
26079 fputs (name, stream);
26080 else
26081 asm_fprintf (stream, "%U%s", name);
26082 }
26083
26084 /* This function is used to emit an EABI tag and its associated value.
26085 We emit the numerical value of the tag in case the assembler does not
26086 support textual tags. (Eg gas prior to 2.20). If requested we include
26087 the tag name in a comment so that anyone reading the assembler output
26088 will know which tag is being set.
26089
26090 This function is not static because arm-c.c needs it too. */
26091
26092 void
26093 arm_emit_eabi_attribute (const char *name, int num, int val)
26094 {
26095 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26096 if (flag_verbose_asm || flag_debug_asm)
26097 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26098 asm_fprintf (asm_out_file, "\n");
26099 }
26100
26101 /* This function is used to print CPU tuning information as comment
26102 in assembler file. Pointers are not printed for now. */
26103
26104 void
26105 arm_print_tune_info (void)
26106 {
26107 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26108 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26109 current_tune->constant_limit);
26110 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26111 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26112 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26113 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26114 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26115 "prefetch.l1_cache_size:\t%d\n",
26116 current_tune->prefetch.l1_cache_size);
26117 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26118 "prefetch.l1_cache_line_size:\t%d\n",
26119 current_tune->prefetch.l1_cache_line_size);
26120 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26121 "prefer_constant_pool:\t%d\n",
26122 (int) current_tune->prefer_constant_pool);
26123 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26124 "branch_cost:\t(s:speed, p:predictable)\n");
26125 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26126 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26127 current_tune->branch_cost (false, false));
26128 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26129 current_tune->branch_cost (false, true));
26130 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26131 current_tune->branch_cost (true, false));
26132 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26133 current_tune->branch_cost (true, true));
26134 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26135 "prefer_ldrd_strd:\t%d\n",
26136 (int) current_tune->prefer_ldrd_strd);
26137 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26138 "logical_op_non_short_circuit:\t[%d,%d]\n",
26139 (int) current_tune->logical_op_non_short_circuit_thumb,
26140 (int) current_tune->logical_op_non_short_circuit_arm);
26141 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26142 "prefer_neon_for_64bits:\t%d\n",
26143 (int) current_tune->prefer_neon_for_64bits);
26144 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26145 "disparage_flag_setting_t16_encodings:\t%d\n",
26146 (int) current_tune->disparage_flag_setting_t16_encodings);
26147 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26148 "string_ops_prefer_neon:\t%d\n",
26149 (int) current_tune->string_ops_prefer_neon);
26150 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26151 "max_insns_inline_memset:\t%d\n",
26152 current_tune->max_insns_inline_memset);
26153 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26154 current_tune->fusible_ops);
26155 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26156 (int) current_tune->sched_autopref);
26157 }
26158
26159 /* Print .arch and .arch_extension directives corresponding to the
26160 current architecture configuration. */
26161 static void
26162 arm_print_asm_arch_directives ()
26163 {
26164 const arch_option *arch
26165 = arm_parse_arch_option_name (all_architectures, "-march",
26166 arm_active_target.arch_name);
26167 auto_sbitmap opt_bits (isa_num_bits);
26168
26169 gcc_assert (arch);
26170
26171 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26172 if (!arch->common.extensions)
26173 return;
26174
26175 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26176 opt->name != NULL;
26177 opt++)
26178 {
26179 if (!opt->remove)
26180 {
26181 arm_initialize_isa (opt_bits, opt->isa_bits);
26182
26183 /* If every feature bit of this option is set in the target
26184 ISA specification, print out the option name. However,
26185 don't print anything if all the bits are part of the
26186 FPU specification. */
26187 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26188 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26189 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26190 }
26191 }
26192 }
26193
26194 static void
26195 arm_file_start (void)
26196 {
26197 int val;
26198
26199 if (TARGET_BPABI)
26200 {
26201 /* We don't have a specified CPU. Use the architecture to
26202 generate the tags.
26203
26204 Note: it might be better to do this unconditionally, then the
26205 assembler would not need to know about all new CPU names as
26206 they are added. */
26207 if (!arm_active_target.core_name)
26208 {
26209 /* armv7ve doesn't support any extensions. */
26210 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26211 {
26212 /* Keep backward compatability for assemblers
26213 which don't support armv7ve. */
26214 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26215 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26216 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26217 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26218 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26219 }
26220 else
26221 arm_print_asm_arch_directives ();
26222 }
26223 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26224 asm_fprintf (asm_out_file, "\t.arch %s\n",
26225 arm_active_target.core_name + 8);
26226 else
26227 {
26228 const char* truncated_name
26229 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26230 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26231 }
26232
26233 if (print_tune_info)
26234 arm_print_tune_info ();
26235
26236 if (! TARGET_SOFT_FLOAT)
26237 {
26238 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26239 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26240
26241 if (TARGET_HARD_FLOAT_ABI)
26242 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26243 }
26244
26245 /* Some of these attributes only apply when the corresponding features
26246 are used. However we don't have any easy way of figuring this out.
26247 Conservatively record the setting that would have been used. */
26248
26249 if (flag_rounding_math)
26250 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26251
26252 if (!flag_unsafe_math_optimizations)
26253 {
26254 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26255 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26256 }
26257 if (flag_signaling_nans)
26258 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26259
26260 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26261 flag_finite_math_only ? 1 : 3);
26262
26263 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26264 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26265 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26266 flag_short_enums ? 1 : 2);
26267
26268 /* Tag_ABI_optimization_goals. */
26269 if (optimize_size)
26270 val = 4;
26271 else if (optimize >= 2)
26272 val = 2;
26273 else if (optimize)
26274 val = 1;
26275 else
26276 val = 6;
26277 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26278
26279 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26280 unaligned_access);
26281
26282 if (arm_fp16_format)
26283 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26284 (int) arm_fp16_format);
26285
26286 if (arm_lang_output_object_attributes_hook)
26287 arm_lang_output_object_attributes_hook();
26288 }
26289
26290 default_file_start ();
26291 }
26292
26293 static void
26294 arm_file_end (void)
26295 {
26296 int regno;
26297
26298 if (NEED_INDICATE_EXEC_STACK)
26299 /* Add .note.GNU-stack. */
26300 file_end_indicate_exec_stack ();
26301
26302 if (! thumb_call_reg_needed)
26303 return;
26304
26305 switch_to_section (text_section);
26306 asm_fprintf (asm_out_file, "\t.code 16\n");
26307 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26308
26309 for (regno = 0; regno < LR_REGNUM; regno++)
26310 {
26311 rtx label = thumb_call_via_label[regno];
26312
26313 if (label != 0)
26314 {
26315 targetm.asm_out.internal_label (asm_out_file, "L",
26316 CODE_LABEL_NUMBER (label));
26317 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26318 }
26319 }
26320 }
26321
26322 #ifndef ARM_PE
26323 /* Symbols in the text segment can be accessed without indirecting via the
26324 constant pool; it may take an extra binary operation, but this is still
26325 faster than indirecting via memory. Don't do this when not optimizing,
26326 since we won't be calculating al of the offsets necessary to do this
26327 simplification. */
26328
26329 static void
26330 arm_encode_section_info (tree decl, rtx rtl, int first)
26331 {
26332 if (optimize > 0 && TREE_CONSTANT (decl))
26333 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26334
26335 default_encode_section_info (decl, rtl, first);
26336 }
26337 #endif /* !ARM_PE */
26338
26339 static void
26340 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26341 {
26342 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26343 && !strcmp (prefix, "L"))
26344 {
26345 arm_ccfsm_state = 0;
26346 arm_target_insn = NULL;
26347 }
26348 default_internal_label (stream, prefix, labelno);
26349 }
26350
26351 /* Output code to add DELTA to the first argument, and then jump
26352 to FUNCTION. Used for C++ multiple inheritance. */
26353
26354 static void
26355 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26356 HOST_WIDE_INT, tree function)
26357 {
26358 static int thunk_label = 0;
26359 char label[256];
26360 char labelpc[256];
26361 int mi_delta = delta;
26362 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26363 int shift = 0;
26364 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26365 ? 1 : 0);
26366 if (mi_delta < 0)
26367 mi_delta = - mi_delta;
26368
26369 final_start_function (emit_barrier (), file, 1);
26370
26371 if (TARGET_THUMB1)
26372 {
26373 int labelno = thunk_label++;
26374 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26375 /* Thunks are entered in arm mode when available. */
26376 if (TARGET_THUMB1_ONLY)
26377 {
26378 /* push r3 so we can use it as a temporary. */
26379 /* TODO: Omit this save if r3 is not used. */
26380 fputs ("\tpush {r3}\n", file);
26381 fputs ("\tldr\tr3, ", file);
26382 }
26383 else
26384 {
26385 fputs ("\tldr\tr12, ", file);
26386 }
26387 assemble_name (file, label);
26388 fputc ('\n', file);
26389 if (flag_pic)
26390 {
26391 /* If we are generating PIC, the ldr instruction below loads
26392 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26393 the address of the add + 8, so we have:
26394
26395 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26396 = target + 1.
26397
26398 Note that we have "+ 1" because some versions of GNU ld
26399 don't set the low bit of the result for R_ARM_REL32
26400 relocations against thumb function symbols.
26401 On ARMv6M this is +4, not +8. */
26402 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26403 assemble_name (file, labelpc);
26404 fputs (":\n", file);
26405 if (TARGET_THUMB1_ONLY)
26406 {
26407 /* This is 2 insns after the start of the thunk, so we know it
26408 is 4-byte aligned. */
26409 fputs ("\tadd\tr3, pc, r3\n", file);
26410 fputs ("\tmov r12, r3\n", file);
26411 }
26412 else
26413 fputs ("\tadd\tr12, pc, r12\n", file);
26414 }
26415 else if (TARGET_THUMB1_ONLY)
26416 fputs ("\tmov r12, r3\n", file);
26417 }
26418 if (TARGET_THUMB1_ONLY)
26419 {
26420 if (mi_delta > 255)
26421 {
26422 fputs ("\tldr\tr3, ", file);
26423 assemble_name (file, label);
26424 fputs ("+4\n", file);
26425 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26426 mi_op, this_regno, this_regno);
26427 }
26428 else if (mi_delta != 0)
26429 {
26430 /* Thumb1 unified syntax requires s suffix in instruction name when
26431 one of the operands is immediate. */
26432 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26433 mi_op, this_regno, this_regno,
26434 mi_delta);
26435 }
26436 }
26437 else
26438 {
26439 /* TODO: Use movw/movt for large constants when available. */
26440 while (mi_delta != 0)
26441 {
26442 if ((mi_delta & (3 << shift)) == 0)
26443 shift += 2;
26444 else
26445 {
26446 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26447 mi_op, this_regno, this_regno,
26448 mi_delta & (0xff << shift));
26449 mi_delta &= ~(0xff << shift);
26450 shift += 8;
26451 }
26452 }
26453 }
26454 if (TARGET_THUMB1)
26455 {
26456 if (TARGET_THUMB1_ONLY)
26457 fputs ("\tpop\t{r3}\n", file);
26458
26459 fprintf (file, "\tbx\tr12\n");
26460 ASM_OUTPUT_ALIGN (file, 2);
26461 assemble_name (file, label);
26462 fputs (":\n", file);
26463 if (flag_pic)
26464 {
26465 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26466 rtx tem = XEXP (DECL_RTL (function), 0);
26467 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26468 pipeline offset is four rather than eight. Adjust the offset
26469 accordingly. */
26470 tem = plus_constant (GET_MODE (tem), tem,
26471 TARGET_THUMB1_ONLY ? -3 : -7);
26472 tem = gen_rtx_MINUS (GET_MODE (tem),
26473 tem,
26474 gen_rtx_SYMBOL_REF (Pmode,
26475 ggc_strdup (labelpc)));
26476 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26477 }
26478 else
26479 /* Output ".word .LTHUNKn". */
26480 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26481
26482 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26483 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26484 }
26485 else
26486 {
26487 fputs ("\tb\t", file);
26488 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26489 if (NEED_PLT_RELOC)
26490 fputs ("(PLT)", file);
26491 fputc ('\n', file);
26492 }
26493
26494 final_end_function ();
26495 }
26496
26497 /* MI thunk handling for TARGET_32BIT. */
26498
26499 static void
26500 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26501 HOST_WIDE_INT vcall_offset, tree function)
26502 {
26503 /* On ARM, this_regno is R0 or R1 depending on
26504 whether the function returns an aggregate or not.
26505 */
26506 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26507 function)
26508 ? R1_REGNUM : R0_REGNUM);
26509
26510 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26511 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26512 reload_completed = 1;
26513 emit_note (NOTE_INSN_PROLOGUE_END);
26514
26515 /* Add DELTA to THIS_RTX. */
26516 if (delta != 0)
26517 arm_split_constant (PLUS, Pmode, NULL_RTX,
26518 delta, this_rtx, this_rtx, false);
26519
26520 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26521 if (vcall_offset != 0)
26522 {
26523 /* Load *THIS_RTX. */
26524 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26525 /* Compute *THIS_RTX + VCALL_OFFSET. */
26526 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26527 false);
26528 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26529 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26530 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26531 }
26532
26533 /* Generate a tail call to the target function. */
26534 if (!TREE_USED (function))
26535 {
26536 assemble_external (function);
26537 TREE_USED (function) = 1;
26538 }
26539 rtx funexp = XEXP (DECL_RTL (function), 0);
26540 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26541 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26542 SIBLING_CALL_P (insn) = 1;
26543
26544 insn = get_insns ();
26545 shorten_branches (insn);
26546 final_start_function (insn, file, 1);
26547 final (insn, file, 1);
26548 final_end_function ();
26549
26550 /* Stop pretending this is a post-reload pass. */
26551 reload_completed = 0;
26552 }
26553
26554 /* Output code to add DELTA to the first argument, and then jump
26555 to FUNCTION. Used for C++ multiple inheritance. */
26556
26557 static void
26558 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26559 HOST_WIDE_INT vcall_offset, tree function)
26560 {
26561 if (TARGET_32BIT)
26562 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26563 else
26564 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26565 }
26566
26567 int
26568 arm_emit_vector_const (FILE *file, rtx x)
26569 {
26570 int i;
26571 const char * pattern;
26572
26573 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26574
26575 switch (GET_MODE (x))
26576 {
26577 case E_V2SImode: pattern = "%08x"; break;
26578 case E_V4HImode: pattern = "%04x"; break;
26579 case E_V8QImode: pattern = "%02x"; break;
26580 default: gcc_unreachable ();
26581 }
26582
26583 fprintf (file, "0x");
26584 for (i = CONST_VECTOR_NUNITS (x); i--;)
26585 {
26586 rtx element;
26587
26588 element = CONST_VECTOR_ELT (x, i);
26589 fprintf (file, pattern, INTVAL (element));
26590 }
26591
26592 return 1;
26593 }
26594
26595 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26596 HFmode constant pool entries are actually loaded with ldr. */
26597 void
26598 arm_emit_fp16_const (rtx c)
26599 {
26600 long bits;
26601
26602 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26603 if (WORDS_BIG_ENDIAN)
26604 assemble_zeros (2);
26605 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26606 if (!WORDS_BIG_ENDIAN)
26607 assemble_zeros (2);
26608 }
26609
26610 const char *
26611 arm_output_load_gr (rtx *operands)
26612 {
26613 rtx reg;
26614 rtx offset;
26615 rtx wcgr;
26616 rtx sum;
26617
26618 if (!MEM_P (operands [1])
26619 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26620 || !REG_P (reg = XEXP (sum, 0))
26621 || !CONST_INT_P (offset = XEXP (sum, 1))
26622 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26623 return "wldrw%?\t%0, %1";
26624
26625 /* Fix up an out-of-range load of a GR register. */
26626 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26627 wcgr = operands[0];
26628 operands[0] = reg;
26629 output_asm_insn ("ldr%?\t%0, %1", operands);
26630
26631 operands[0] = wcgr;
26632 operands[1] = reg;
26633 output_asm_insn ("tmcr%?\t%0, %1", operands);
26634 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26635
26636 return "";
26637 }
26638
26639 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26640
26641 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26642 named arg and all anonymous args onto the stack.
26643 XXX I know the prologue shouldn't be pushing registers, but it is faster
26644 that way. */
26645
26646 static void
26647 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26648 machine_mode mode,
26649 tree type,
26650 int *pretend_size,
26651 int second_time ATTRIBUTE_UNUSED)
26652 {
26653 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26654 int nregs;
26655
26656 cfun->machine->uses_anonymous_args = 1;
26657 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26658 {
26659 nregs = pcum->aapcs_ncrn;
26660 if (nregs & 1)
26661 {
26662 int res = arm_needs_doubleword_align (mode, type);
26663 if (res < 0 && warn_psabi)
26664 inform (input_location, "parameter passing for argument of "
26665 "type %qT changed in GCC 7.1", type);
26666 else if (res > 0)
26667 nregs++;
26668 }
26669 }
26670 else
26671 nregs = pcum->nregs;
26672
26673 if (nregs < NUM_ARG_REGS)
26674 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26675 }
26676
26677 /* We can't rely on the caller doing the proper promotion when
26678 using APCS or ATPCS. */
26679
26680 static bool
26681 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26682 {
26683 return !TARGET_AAPCS_BASED;
26684 }
26685
26686 static machine_mode
26687 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26688 machine_mode mode,
26689 int *punsignedp ATTRIBUTE_UNUSED,
26690 const_tree fntype ATTRIBUTE_UNUSED,
26691 int for_return ATTRIBUTE_UNUSED)
26692 {
26693 if (GET_MODE_CLASS (mode) == MODE_INT
26694 && GET_MODE_SIZE (mode) < 4)
26695 return SImode;
26696
26697 return mode;
26698 }
26699
26700
26701 static bool
26702 arm_default_short_enums (void)
26703 {
26704 return ARM_DEFAULT_SHORT_ENUMS;
26705 }
26706
26707
26708 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26709
26710 static bool
26711 arm_align_anon_bitfield (void)
26712 {
26713 return TARGET_AAPCS_BASED;
26714 }
26715
26716
26717 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26718
26719 static tree
26720 arm_cxx_guard_type (void)
26721 {
26722 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26723 }
26724
26725
26726 /* The EABI says test the least significant bit of a guard variable. */
26727
26728 static bool
26729 arm_cxx_guard_mask_bit (void)
26730 {
26731 return TARGET_AAPCS_BASED;
26732 }
26733
26734
26735 /* The EABI specifies that all array cookies are 8 bytes long. */
26736
26737 static tree
26738 arm_get_cookie_size (tree type)
26739 {
26740 tree size;
26741
26742 if (!TARGET_AAPCS_BASED)
26743 return default_cxx_get_cookie_size (type);
26744
26745 size = build_int_cst (sizetype, 8);
26746 return size;
26747 }
26748
26749
26750 /* The EABI says that array cookies should also contain the element size. */
26751
26752 static bool
26753 arm_cookie_has_size (void)
26754 {
26755 return TARGET_AAPCS_BASED;
26756 }
26757
26758
26759 /* The EABI says constructors and destructors should return a pointer to
26760 the object constructed/destroyed. */
26761
26762 static bool
26763 arm_cxx_cdtor_returns_this (void)
26764 {
26765 return TARGET_AAPCS_BASED;
26766 }
26767
26768 /* The EABI says that an inline function may never be the key
26769 method. */
26770
26771 static bool
26772 arm_cxx_key_method_may_be_inline (void)
26773 {
26774 return !TARGET_AAPCS_BASED;
26775 }
26776
26777 static void
26778 arm_cxx_determine_class_data_visibility (tree decl)
26779 {
26780 if (!TARGET_AAPCS_BASED
26781 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
26782 return;
26783
26784 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
26785 is exported. However, on systems without dynamic vague linkage,
26786 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
26787 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
26788 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
26789 else
26790 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
26791 DECL_VISIBILITY_SPECIFIED (decl) = 1;
26792 }
26793
26794 static bool
26795 arm_cxx_class_data_always_comdat (void)
26796 {
26797 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
26798 vague linkage if the class has no key function. */
26799 return !TARGET_AAPCS_BASED;
26800 }
26801
26802
26803 /* The EABI says __aeabi_atexit should be used to register static
26804 destructors. */
26805
26806 static bool
26807 arm_cxx_use_aeabi_atexit (void)
26808 {
26809 return TARGET_AAPCS_BASED;
26810 }
26811
26812
26813 void
26814 arm_set_return_address (rtx source, rtx scratch)
26815 {
26816 arm_stack_offsets *offsets;
26817 HOST_WIDE_INT delta;
26818 rtx addr;
26819 unsigned long saved_regs;
26820
26821 offsets = arm_get_frame_offsets ();
26822 saved_regs = offsets->saved_regs_mask;
26823
26824 if ((saved_regs & (1 << LR_REGNUM)) == 0)
26825 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26826 else
26827 {
26828 if (frame_pointer_needed)
26829 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
26830 else
26831 {
26832 /* LR will be the first saved register. */
26833 delta = offsets->outgoing_args - (offsets->frame + 4);
26834
26835
26836 if (delta >= 4096)
26837 {
26838 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
26839 GEN_INT (delta & ~4095)));
26840 addr = scratch;
26841 delta &= 4095;
26842 }
26843 else
26844 addr = stack_pointer_rtx;
26845
26846 addr = plus_constant (Pmode, addr, delta);
26847 }
26848 /* The store needs to be marked as frame related in order to prevent
26849 DSE from deleting it as dead if it is based on fp. */
26850 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26851 RTX_FRAME_RELATED_P (insn) = 1;
26852 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26853 }
26854 }
26855
26856
26857 void
26858 thumb_set_return_address (rtx source, rtx scratch)
26859 {
26860 arm_stack_offsets *offsets;
26861 HOST_WIDE_INT delta;
26862 HOST_WIDE_INT limit;
26863 int reg;
26864 rtx addr;
26865 unsigned long mask;
26866
26867 emit_use (source);
26868
26869 offsets = arm_get_frame_offsets ();
26870 mask = offsets->saved_regs_mask;
26871 if (mask & (1 << LR_REGNUM))
26872 {
26873 limit = 1024;
26874 /* Find the saved regs. */
26875 if (frame_pointer_needed)
26876 {
26877 delta = offsets->soft_frame - offsets->saved_args;
26878 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
26879 if (TARGET_THUMB1)
26880 limit = 128;
26881 }
26882 else
26883 {
26884 delta = offsets->outgoing_args - offsets->saved_args;
26885 reg = SP_REGNUM;
26886 }
26887 /* Allow for the stack frame. */
26888 if (TARGET_THUMB1 && TARGET_BACKTRACE)
26889 delta -= 16;
26890 /* The link register is always the first saved register. */
26891 delta -= 4;
26892
26893 /* Construct the address. */
26894 addr = gen_rtx_REG (SImode, reg);
26895 if (delta > limit)
26896 {
26897 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
26898 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
26899 addr = scratch;
26900 }
26901 else
26902 addr = plus_constant (Pmode, addr, delta);
26903
26904 /* The store needs to be marked as frame related in order to prevent
26905 DSE from deleting it as dead if it is based on fp. */
26906 rtx insn = emit_move_insn (gen_frame_mem (Pmode, addr), source);
26907 RTX_FRAME_RELATED_P (insn) = 1;
26908 add_reg_note (insn, REG_CFA_RESTORE, gen_rtx_REG (Pmode, LR_REGNUM));
26909 }
26910 else
26911 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
26912 }
26913
26914 /* Implements target hook vector_mode_supported_p. */
26915 bool
26916 arm_vector_mode_supported_p (machine_mode mode)
26917 {
26918 /* Neon also supports V2SImode, etc. listed in the clause below. */
26919 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
26920 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
26921 || mode == V2DImode || mode == V8HFmode))
26922 return true;
26923
26924 if ((TARGET_NEON || TARGET_IWMMXT)
26925 && ((mode == V2SImode)
26926 || (mode == V4HImode)
26927 || (mode == V8QImode)))
26928 return true;
26929
26930 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
26931 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
26932 || mode == V2HAmode))
26933 return true;
26934
26935 return false;
26936 }
26937
26938 /* Implements target hook array_mode_supported_p. */
26939
26940 static bool
26941 arm_array_mode_supported_p (machine_mode mode,
26942 unsigned HOST_WIDE_INT nelems)
26943 {
26944 if (TARGET_NEON
26945 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
26946 && (nelems >= 2 && nelems <= 4))
26947 return true;
26948
26949 return false;
26950 }
26951
26952 /* Use the option -mvectorize-with-neon-double to override the use of quardword
26953 registers when autovectorizing for Neon, at least until multiple vector
26954 widths are supported properly by the middle-end. */
26955
26956 static machine_mode
26957 arm_preferred_simd_mode (scalar_mode mode)
26958 {
26959 if (TARGET_NEON)
26960 switch (mode)
26961 {
26962 case E_SFmode:
26963 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
26964 case E_SImode:
26965 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
26966 case E_HImode:
26967 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
26968 case E_QImode:
26969 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
26970 case E_DImode:
26971 if (!TARGET_NEON_VECTORIZE_DOUBLE)
26972 return V2DImode;
26973 break;
26974
26975 default:;
26976 }
26977
26978 if (TARGET_REALLY_IWMMXT)
26979 switch (mode)
26980 {
26981 case E_SImode:
26982 return V2SImode;
26983 case E_HImode:
26984 return V4HImode;
26985 case E_QImode:
26986 return V8QImode;
26987
26988 default:;
26989 }
26990
26991 return word_mode;
26992 }
26993
26994 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
26995
26996 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
26997 using r0-r4 for function arguments, r7 for the stack frame and don't have
26998 enough left over to do doubleword arithmetic. For Thumb-2 all the
26999 potentially problematic instructions accept high registers so this is not
27000 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27001 that require many low registers. */
27002 static bool
27003 arm_class_likely_spilled_p (reg_class_t rclass)
27004 {
27005 if ((TARGET_THUMB1 && rclass == LO_REGS)
27006 || rclass == CC_REG)
27007 return true;
27008
27009 return false;
27010 }
27011
27012 /* Implements target hook small_register_classes_for_mode_p. */
27013 bool
27014 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27015 {
27016 return TARGET_THUMB1;
27017 }
27018
27019 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27020 ARM insns and therefore guarantee that the shift count is modulo 256.
27021 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27022 guarantee no particular behavior for out-of-range counts. */
27023
27024 static unsigned HOST_WIDE_INT
27025 arm_shift_truncation_mask (machine_mode mode)
27026 {
27027 return mode == SImode ? 255 : 0;
27028 }
27029
27030
27031 /* Map internal gcc register numbers to DWARF2 register numbers. */
27032
27033 unsigned int
27034 arm_dbx_register_number (unsigned int regno)
27035 {
27036 if (regno < 16)
27037 return regno;
27038
27039 if (IS_VFP_REGNUM (regno))
27040 {
27041 /* See comment in arm_dwarf_register_span. */
27042 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27043 return 64 + regno - FIRST_VFP_REGNUM;
27044 else
27045 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27046 }
27047
27048 if (IS_IWMMXT_GR_REGNUM (regno))
27049 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27050
27051 if (IS_IWMMXT_REGNUM (regno))
27052 return 112 + regno - FIRST_IWMMXT_REGNUM;
27053
27054 return DWARF_FRAME_REGISTERS;
27055 }
27056
27057 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27058 GCC models tham as 64 32-bit registers, so we need to describe this to
27059 the DWARF generation code. Other registers can use the default. */
27060 static rtx
27061 arm_dwarf_register_span (rtx rtl)
27062 {
27063 machine_mode mode;
27064 unsigned regno;
27065 rtx parts[16];
27066 int nregs;
27067 int i;
27068
27069 regno = REGNO (rtl);
27070 if (!IS_VFP_REGNUM (regno))
27071 return NULL_RTX;
27072
27073 /* XXX FIXME: The EABI defines two VFP register ranges:
27074 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27075 256-287: D0-D31
27076 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27077 corresponding D register. Until GDB supports this, we shall use the
27078 legacy encodings. We also use these encodings for D0-D15 for
27079 compatibility with older debuggers. */
27080 mode = GET_MODE (rtl);
27081 if (GET_MODE_SIZE (mode) < 8)
27082 return NULL_RTX;
27083
27084 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27085 {
27086 nregs = GET_MODE_SIZE (mode) / 4;
27087 for (i = 0; i < nregs; i += 2)
27088 if (TARGET_BIG_END)
27089 {
27090 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27091 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27092 }
27093 else
27094 {
27095 parts[i] = gen_rtx_REG (SImode, regno + i);
27096 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27097 }
27098 }
27099 else
27100 {
27101 nregs = GET_MODE_SIZE (mode) / 8;
27102 for (i = 0; i < nregs; i++)
27103 parts[i] = gen_rtx_REG (DImode, regno + i);
27104 }
27105
27106 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27107 }
27108
27109 #if ARM_UNWIND_INFO
27110 /* Emit unwind directives for a store-multiple instruction or stack pointer
27111 push during alignment.
27112 These should only ever be generated by the function prologue code, so
27113 expect them to have a particular form.
27114 The store-multiple instruction sometimes pushes pc as the last register,
27115 although it should not be tracked into unwind information, or for -Os
27116 sometimes pushes some dummy registers before first register that needs
27117 to be tracked in unwind information; such dummy registers are there just
27118 to avoid separate stack adjustment, and will not be restored in the
27119 epilogue. */
27120
27121 static void
27122 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27123 {
27124 int i;
27125 HOST_WIDE_INT offset;
27126 HOST_WIDE_INT nregs;
27127 int reg_size;
27128 unsigned reg;
27129 unsigned lastreg;
27130 unsigned padfirst = 0, padlast = 0;
27131 rtx e;
27132
27133 e = XVECEXP (p, 0, 0);
27134 gcc_assert (GET_CODE (e) == SET);
27135
27136 /* First insn will adjust the stack pointer. */
27137 gcc_assert (GET_CODE (e) == SET
27138 && REG_P (SET_DEST (e))
27139 && REGNO (SET_DEST (e)) == SP_REGNUM
27140 && GET_CODE (SET_SRC (e)) == PLUS);
27141
27142 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27143 nregs = XVECLEN (p, 0) - 1;
27144 gcc_assert (nregs);
27145
27146 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27147 if (reg < 16)
27148 {
27149 /* For -Os dummy registers can be pushed at the beginning to
27150 avoid separate stack pointer adjustment. */
27151 e = XVECEXP (p, 0, 1);
27152 e = XEXP (SET_DEST (e), 0);
27153 if (GET_CODE (e) == PLUS)
27154 padfirst = INTVAL (XEXP (e, 1));
27155 gcc_assert (padfirst == 0 || optimize_size);
27156 /* The function prologue may also push pc, but not annotate it as it is
27157 never restored. We turn this into a stack pointer adjustment. */
27158 e = XVECEXP (p, 0, nregs);
27159 e = XEXP (SET_DEST (e), 0);
27160 if (GET_CODE (e) == PLUS)
27161 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27162 else
27163 padlast = offset - 4;
27164 gcc_assert (padlast == 0 || padlast == 4);
27165 if (padlast == 4)
27166 fprintf (asm_out_file, "\t.pad #4\n");
27167 reg_size = 4;
27168 fprintf (asm_out_file, "\t.save {");
27169 }
27170 else if (IS_VFP_REGNUM (reg))
27171 {
27172 reg_size = 8;
27173 fprintf (asm_out_file, "\t.vsave {");
27174 }
27175 else
27176 /* Unknown register type. */
27177 gcc_unreachable ();
27178
27179 /* If the stack increment doesn't match the size of the saved registers,
27180 something has gone horribly wrong. */
27181 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27182
27183 offset = padfirst;
27184 lastreg = 0;
27185 /* The remaining insns will describe the stores. */
27186 for (i = 1; i <= nregs; i++)
27187 {
27188 /* Expect (set (mem <addr>) (reg)).
27189 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27190 e = XVECEXP (p, 0, i);
27191 gcc_assert (GET_CODE (e) == SET
27192 && MEM_P (SET_DEST (e))
27193 && REG_P (SET_SRC (e)));
27194
27195 reg = REGNO (SET_SRC (e));
27196 gcc_assert (reg >= lastreg);
27197
27198 if (i != 1)
27199 fprintf (asm_out_file, ", ");
27200 /* We can't use %r for vfp because we need to use the
27201 double precision register names. */
27202 if (IS_VFP_REGNUM (reg))
27203 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27204 else
27205 asm_fprintf (asm_out_file, "%r", reg);
27206
27207 if (flag_checking)
27208 {
27209 /* Check that the addresses are consecutive. */
27210 e = XEXP (SET_DEST (e), 0);
27211 if (GET_CODE (e) == PLUS)
27212 gcc_assert (REG_P (XEXP (e, 0))
27213 && REGNO (XEXP (e, 0)) == SP_REGNUM
27214 && CONST_INT_P (XEXP (e, 1))
27215 && offset == INTVAL (XEXP (e, 1)));
27216 else
27217 gcc_assert (i == 1
27218 && REG_P (e)
27219 && REGNO (e) == SP_REGNUM);
27220 offset += reg_size;
27221 }
27222 }
27223 fprintf (asm_out_file, "}\n");
27224 if (padfirst)
27225 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27226 }
27227
27228 /* Emit unwind directives for a SET. */
27229
27230 static void
27231 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27232 {
27233 rtx e0;
27234 rtx e1;
27235 unsigned reg;
27236
27237 e0 = XEXP (p, 0);
27238 e1 = XEXP (p, 1);
27239 switch (GET_CODE (e0))
27240 {
27241 case MEM:
27242 /* Pushing a single register. */
27243 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27244 || !REG_P (XEXP (XEXP (e0, 0), 0))
27245 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27246 abort ();
27247
27248 asm_fprintf (asm_out_file, "\t.save ");
27249 if (IS_VFP_REGNUM (REGNO (e1)))
27250 asm_fprintf(asm_out_file, "{d%d}\n",
27251 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27252 else
27253 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27254 break;
27255
27256 case REG:
27257 if (REGNO (e0) == SP_REGNUM)
27258 {
27259 /* A stack increment. */
27260 if (GET_CODE (e1) != PLUS
27261 || !REG_P (XEXP (e1, 0))
27262 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27263 || !CONST_INT_P (XEXP (e1, 1)))
27264 abort ();
27265
27266 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27267 -INTVAL (XEXP (e1, 1)));
27268 }
27269 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27270 {
27271 HOST_WIDE_INT offset;
27272
27273 if (GET_CODE (e1) == PLUS)
27274 {
27275 if (!REG_P (XEXP (e1, 0))
27276 || !CONST_INT_P (XEXP (e1, 1)))
27277 abort ();
27278 reg = REGNO (XEXP (e1, 0));
27279 offset = INTVAL (XEXP (e1, 1));
27280 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27281 HARD_FRAME_POINTER_REGNUM, reg,
27282 offset);
27283 }
27284 else if (REG_P (e1))
27285 {
27286 reg = REGNO (e1);
27287 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27288 HARD_FRAME_POINTER_REGNUM, reg);
27289 }
27290 else
27291 abort ();
27292 }
27293 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27294 {
27295 /* Move from sp to reg. */
27296 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27297 }
27298 else if (GET_CODE (e1) == PLUS
27299 && REG_P (XEXP (e1, 0))
27300 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27301 && CONST_INT_P (XEXP (e1, 1)))
27302 {
27303 /* Set reg to offset from sp. */
27304 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27305 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27306 }
27307 else
27308 abort ();
27309 break;
27310
27311 default:
27312 abort ();
27313 }
27314 }
27315
27316
27317 /* Emit unwind directives for the given insn. */
27318
27319 static void
27320 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27321 {
27322 rtx note, pat;
27323 bool handled_one = false;
27324
27325 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27326 return;
27327
27328 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27329 && (TREE_NOTHROW (current_function_decl)
27330 || crtl->all_throwers_are_sibcalls))
27331 return;
27332
27333 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27334 return;
27335
27336 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27337 {
27338 switch (REG_NOTE_KIND (note))
27339 {
27340 case REG_FRAME_RELATED_EXPR:
27341 pat = XEXP (note, 0);
27342 goto found;
27343
27344 case REG_CFA_REGISTER:
27345 pat = XEXP (note, 0);
27346 if (pat == NULL)
27347 {
27348 pat = PATTERN (insn);
27349 if (GET_CODE (pat) == PARALLEL)
27350 pat = XVECEXP (pat, 0, 0);
27351 }
27352
27353 /* Only emitted for IS_STACKALIGN re-alignment. */
27354 {
27355 rtx dest, src;
27356 unsigned reg;
27357
27358 src = SET_SRC (pat);
27359 dest = SET_DEST (pat);
27360
27361 gcc_assert (src == stack_pointer_rtx);
27362 reg = REGNO (dest);
27363 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27364 reg + 0x90, reg);
27365 }
27366 handled_one = true;
27367 break;
27368
27369 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27370 to get correct dwarf information for shrink-wrap. We should not
27371 emit unwind information for it because these are used either for
27372 pretend arguments or notes to adjust sp and restore registers from
27373 stack. */
27374 case REG_CFA_DEF_CFA:
27375 case REG_CFA_ADJUST_CFA:
27376 case REG_CFA_RESTORE:
27377 return;
27378
27379 case REG_CFA_EXPRESSION:
27380 case REG_CFA_OFFSET:
27381 /* ??? Only handling here what we actually emit. */
27382 gcc_unreachable ();
27383
27384 default:
27385 break;
27386 }
27387 }
27388 if (handled_one)
27389 return;
27390 pat = PATTERN (insn);
27391 found:
27392
27393 switch (GET_CODE (pat))
27394 {
27395 case SET:
27396 arm_unwind_emit_set (asm_out_file, pat);
27397 break;
27398
27399 case SEQUENCE:
27400 /* Store multiple. */
27401 arm_unwind_emit_sequence (asm_out_file, pat);
27402 break;
27403
27404 default:
27405 abort();
27406 }
27407 }
27408
27409
27410 /* Output a reference from a function exception table to the type_info
27411 object X. The EABI specifies that the symbol should be relocated by
27412 an R_ARM_TARGET2 relocation. */
27413
27414 static bool
27415 arm_output_ttype (rtx x)
27416 {
27417 fputs ("\t.word\t", asm_out_file);
27418 output_addr_const (asm_out_file, x);
27419 /* Use special relocations for symbol references. */
27420 if (!CONST_INT_P (x))
27421 fputs ("(TARGET2)", asm_out_file);
27422 fputc ('\n', asm_out_file);
27423
27424 return TRUE;
27425 }
27426
27427 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27428
27429 static void
27430 arm_asm_emit_except_personality (rtx personality)
27431 {
27432 fputs ("\t.personality\t", asm_out_file);
27433 output_addr_const (asm_out_file, personality);
27434 fputc ('\n', asm_out_file);
27435 }
27436 #endif /* ARM_UNWIND_INFO */
27437
27438 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27439
27440 static void
27441 arm_asm_init_sections (void)
27442 {
27443 #if ARM_UNWIND_INFO
27444 exception_section = get_unnamed_section (0, output_section_asm_op,
27445 "\t.handlerdata");
27446 #endif /* ARM_UNWIND_INFO */
27447
27448 #ifdef OBJECT_FORMAT_ELF
27449 if (target_pure_code)
27450 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27451 #endif
27452 }
27453
27454 /* Output unwind directives for the start/end of a function. */
27455
27456 void
27457 arm_output_fn_unwind (FILE * f, bool prologue)
27458 {
27459 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27460 return;
27461
27462 if (prologue)
27463 fputs ("\t.fnstart\n", f);
27464 else
27465 {
27466 /* If this function will never be unwound, then mark it as such.
27467 The came condition is used in arm_unwind_emit to suppress
27468 the frame annotations. */
27469 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27470 && (TREE_NOTHROW (current_function_decl)
27471 || crtl->all_throwers_are_sibcalls))
27472 fputs("\t.cantunwind\n", f);
27473
27474 fputs ("\t.fnend\n", f);
27475 }
27476 }
27477
27478 static bool
27479 arm_emit_tls_decoration (FILE *fp, rtx x)
27480 {
27481 enum tls_reloc reloc;
27482 rtx val;
27483
27484 val = XVECEXP (x, 0, 0);
27485 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27486
27487 output_addr_const (fp, val);
27488
27489 switch (reloc)
27490 {
27491 case TLS_GD32:
27492 fputs ("(tlsgd)", fp);
27493 break;
27494 case TLS_LDM32:
27495 fputs ("(tlsldm)", fp);
27496 break;
27497 case TLS_LDO32:
27498 fputs ("(tlsldo)", fp);
27499 break;
27500 case TLS_IE32:
27501 fputs ("(gottpoff)", fp);
27502 break;
27503 case TLS_LE32:
27504 fputs ("(tpoff)", fp);
27505 break;
27506 case TLS_DESCSEQ:
27507 fputs ("(tlsdesc)", fp);
27508 break;
27509 default:
27510 gcc_unreachable ();
27511 }
27512
27513 switch (reloc)
27514 {
27515 case TLS_GD32:
27516 case TLS_LDM32:
27517 case TLS_IE32:
27518 case TLS_DESCSEQ:
27519 fputs (" + (. - ", fp);
27520 output_addr_const (fp, XVECEXP (x, 0, 2));
27521 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27522 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27523 output_addr_const (fp, XVECEXP (x, 0, 3));
27524 fputc (')', fp);
27525 break;
27526 default:
27527 break;
27528 }
27529
27530 return TRUE;
27531 }
27532
27533 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27534
27535 static void
27536 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27537 {
27538 gcc_assert (size == 4);
27539 fputs ("\t.word\t", file);
27540 output_addr_const (file, x);
27541 fputs ("(tlsldo)", file);
27542 }
27543
27544 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27545
27546 static bool
27547 arm_output_addr_const_extra (FILE *fp, rtx x)
27548 {
27549 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27550 return arm_emit_tls_decoration (fp, x);
27551 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27552 {
27553 char label[256];
27554 int labelno = INTVAL (XVECEXP (x, 0, 0));
27555
27556 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27557 assemble_name_raw (fp, label);
27558
27559 return TRUE;
27560 }
27561 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27562 {
27563 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27564 if (GOT_PCREL)
27565 fputs ("+.", fp);
27566 fputs ("-(", fp);
27567 output_addr_const (fp, XVECEXP (x, 0, 0));
27568 fputc (')', fp);
27569 return TRUE;
27570 }
27571 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27572 {
27573 output_addr_const (fp, XVECEXP (x, 0, 0));
27574 if (GOT_PCREL)
27575 fputs ("+.", fp);
27576 fputs ("-(", fp);
27577 output_addr_const (fp, XVECEXP (x, 0, 1));
27578 fputc (')', fp);
27579 return TRUE;
27580 }
27581 else if (GET_CODE (x) == CONST_VECTOR)
27582 return arm_emit_vector_const (fp, x);
27583
27584 return FALSE;
27585 }
27586
27587 /* Output assembly for a shift instruction.
27588 SET_FLAGS determines how the instruction modifies the condition codes.
27589 0 - Do not set condition codes.
27590 1 - Set condition codes.
27591 2 - Use smallest instruction. */
27592 const char *
27593 arm_output_shift(rtx * operands, int set_flags)
27594 {
27595 char pattern[100];
27596 static const char flag_chars[3] = {'?', '.', '!'};
27597 const char *shift;
27598 HOST_WIDE_INT val;
27599 char c;
27600
27601 c = flag_chars[set_flags];
27602 shift = shift_op(operands[3], &val);
27603 if (shift)
27604 {
27605 if (val != -1)
27606 operands[2] = GEN_INT(val);
27607 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27608 }
27609 else
27610 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27611
27612 output_asm_insn (pattern, operands);
27613 return "";
27614 }
27615
27616 /* Output assembly for a WMMX immediate shift instruction. */
27617 const char *
27618 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27619 {
27620 int shift = INTVAL (operands[2]);
27621 char templ[50];
27622 machine_mode opmode = GET_MODE (operands[0]);
27623
27624 gcc_assert (shift >= 0);
27625
27626 /* If the shift value in the register versions is > 63 (for D qualifier),
27627 31 (for W qualifier) or 15 (for H qualifier). */
27628 if (((opmode == V4HImode) && (shift > 15))
27629 || ((opmode == V2SImode) && (shift > 31))
27630 || ((opmode == DImode) && (shift > 63)))
27631 {
27632 if (wror_or_wsra)
27633 {
27634 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27635 output_asm_insn (templ, operands);
27636 if (opmode == DImode)
27637 {
27638 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27639 output_asm_insn (templ, operands);
27640 }
27641 }
27642 else
27643 {
27644 /* The destination register will contain all zeros. */
27645 sprintf (templ, "wzero\t%%0");
27646 output_asm_insn (templ, operands);
27647 }
27648 return "";
27649 }
27650
27651 if ((opmode == DImode) && (shift > 32))
27652 {
27653 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27654 output_asm_insn (templ, operands);
27655 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27656 output_asm_insn (templ, operands);
27657 }
27658 else
27659 {
27660 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27661 output_asm_insn (templ, operands);
27662 }
27663 return "";
27664 }
27665
27666 /* Output assembly for a WMMX tinsr instruction. */
27667 const char *
27668 arm_output_iwmmxt_tinsr (rtx *operands)
27669 {
27670 int mask = INTVAL (operands[3]);
27671 int i;
27672 char templ[50];
27673 int units = mode_nunits[GET_MODE (operands[0])];
27674 gcc_assert ((mask & (mask - 1)) == 0);
27675 for (i = 0; i < units; ++i)
27676 {
27677 if ((mask & 0x01) == 1)
27678 {
27679 break;
27680 }
27681 mask >>= 1;
27682 }
27683 gcc_assert (i < units);
27684 {
27685 switch (GET_MODE (operands[0]))
27686 {
27687 case E_V8QImode:
27688 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27689 break;
27690 case E_V4HImode:
27691 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27692 break;
27693 case E_V2SImode:
27694 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27695 break;
27696 default:
27697 gcc_unreachable ();
27698 break;
27699 }
27700 output_asm_insn (templ, operands);
27701 }
27702 return "";
27703 }
27704
27705 /* Output a Thumb-1 casesi dispatch sequence. */
27706 const char *
27707 thumb1_output_casesi (rtx *operands)
27708 {
27709 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27710
27711 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27712
27713 switch (GET_MODE(diff_vec))
27714 {
27715 case E_QImode:
27716 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27717 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27718 case E_HImode:
27719 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27720 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27721 case E_SImode:
27722 return "bl\t%___gnu_thumb1_case_si";
27723 default:
27724 gcc_unreachable ();
27725 }
27726 }
27727
27728 /* Output a Thumb-2 casesi instruction. */
27729 const char *
27730 thumb2_output_casesi (rtx *operands)
27731 {
27732 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27733
27734 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27735
27736 output_asm_insn ("cmp\t%0, %1", operands);
27737 output_asm_insn ("bhi\t%l3", operands);
27738 switch (GET_MODE(diff_vec))
27739 {
27740 case E_QImode:
27741 return "tbb\t[%|pc, %0]";
27742 case E_HImode:
27743 return "tbh\t[%|pc, %0, lsl #1]";
27744 case E_SImode:
27745 if (flag_pic)
27746 {
27747 output_asm_insn ("adr\t%4, %l2", operands);
27748 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27749 output_asm_insn ("add\t%4, %4, %5", operands);
27750 return "bx\t%4";
27751 }
27752 else
27753 {
27754 output_asm_insn ("adr\t%4, %l2", operands);
27755 return "ldr\t%|pc, [%4, %0, lsl #2]";
27756 }
27757 default:
27758 gcc_unreachable ();
27759 }
27760 }
27761
27762 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27763 per-core tuning structs. */
27764 static int
27765 arm_issue_rate (void)
27766 {
27767 return current_tune->issue_rate;
27768 }
27769
27770 /* Return how many instructions should scheduler lookahead to choose the
27771 best one. */
27772 static int
27773 arm_first_cycle_multipass_dfa_lookahead (void)
27774 {
27775 int issue_rate = arm_issue_rate ();
27776
27777 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
27778 }
27779
27780 /* Enable modeling of L2 auto-prefetcher. */
27781 static int
27782 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
27783 {
27784 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
27785 }
27786
27787 const char *
27788 arm_mangle_type (const_tree type)
27789 {
27790 /* The ARM ABI documents (10th October 2008) say that "__va_list"
27791 has to be managled as if it is in the "std" namespace. */
27792 if (TARGET_AAPCS_BASED
27793 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
27794 return "St9__va_list";
27795
27796 /* Half-precision float. */
27797 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
27798 return "Dh";
27799
27800 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
27801 builtin type. */
27802 if (TYPE_NAME (type) != NULL)
27803 return arm_mangle_builtin_type (type);
27804
27805 /* Use the default mangling. */
27806 return NULL;
27807 }
27808
27809 /* Order of allocation of core registers for Thumb: this allocation is
27810 written over the corresponding initial entries of the array
27811 initialized with REG_ALLOC_ORDER. We allocate all low registers
27812 first. Saving and restoring a low register is usually cheaper than
27813 using a call-clobbered high register. */
27814
27815 static const int thumb_core_reg_alloc_order[] =
27816 {
27817 3, 2, 1, 0, 4, 5, 6, 7,
27818 12, 14, 8, 9, 10, 11
27819 };
27820
27821 /* Adjust register allocation order when compiling for Thumb. */
27822
27823 void
27824 arm_order_regs_for_local_alloc (void)
27825 {
27826 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
27827 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
27828 if (TARGET_THUMB)
27829 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
27830 sizeof (thumb_core_reg_alloc_order));
27831 }
27832
27833 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
27834
27835 bool
27836 arm_frame_pointer_required (void)
27837 {
27838 if (SUBTARGET_FRAME_POINTER_REQUIRED)
27839 return true;
27840
27841 /* If the function receives nonlocal gotos, it needs to save the frame
27842 pointer in the nonlocal_goto_save_area object. */
27843 if (cfun->has_nonlocal_label)
27844 return true;
27845
27846 /* The frame pointer is required for non-leaf APCS frames. */
27847 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
27848 return true;
27849
27850 /* If we are probing the stack in the prologue, we will have a faulting
27851 instruction prior to the stack adjustment and this requires a frame
27852 pointer if we want to catch the exception using the EABI unwinder. */
27853 if (!IS_INTERRUPT (arm_current_func_type ())
27854 && flag_stack_check == STATIC_BUILTIN_STACK_CHECK
27855 && arm_except_unwind_info (&global_options) == UI_TARGET
27856 && cfun->can_throw_non_call_exceptions)
27857 {
27858 HOST_WIDE_INT size = get_frame_size ();
27859
27860 /* That's irrelevant if there is no stack adjustment. */
27861 if (size <= 0)
27862 return false;
27863
27864 /* That's relevant only if there is a stack probe. */
27865 if (crtl->is_leaf && !cfun->calls_alloca)
27866 {
27867 /* We don't have the final size of the frame so adjust. */
27868 size += 32 * UNITS_PER_WORD;
27869 if (size > PROBE_INTERVAL && size > STACK_CHECK_PROTECT)
27870 return true;
27871 }
27872 else
27873 return true;
27874 }
27875
27876 return false;
27877 }
27878
27879 /* Only thumb1 can't support conditional execution, so return true if
27880 the target is not thumb1. */
27881 static bool
27882 arm_have_conditional_execution (void)
27883 {
27884 return !TARGET_THUMB1;
27885 }
27886
27887 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
27888 static HOST_WIDE_INT
27889 arm_vector_alignment (const_tree type)
27890 {
27891 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
27892
27893 if (TARGET_AAPCS_BASED)
27894 align = MIN (align, 64);
27895
27896 return align;
27897 }
27898
27899 static unsigned int
27900 arm_autovectorize_vector_sizes (void)
27901 {
27902 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
27903 }
27904
27905 static bool
27906 arm_vector_alignment_reachable (const_tree type, bool is_packed)
27907 {
27908 /* Vectors which aren't in packed structures will not be less aligned than
27909 the natural alignment of their element type, so this is safe. */
27910 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27911 return !is_packed;
27912
27913 return default_builtin_vector_alignment_reachable (type, is_packed);
27914 }
27915
27916 static bool
27917 arm_builtin_support_vector_misalignment (machine_mode mode,
27918 const_tree type, int misalignment,
27919 bool is_packed)
27920 {
27921 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
27922 {
27923 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
27924
27925 if (is_packed)
27926 return align == 1;
27927
27928 /* If the misalignment is unknown, we should be able to handle the access
27929 so long as it is not to a member of a packed data structure. */
27930 if (misalignment == -1)
27931 return true;
27932
27933 /* Return true if the misalignment is a multiple of the natural alignment
27934 of the vector's element type. This is probably always going to be
27935 true in practice, since we've already established that this isn't a
27936 packed access. */
27937 return ((misalignment % align) == 0);
27938 }
27939
27940 return default_builtin_support_vector_misalignment (mode, type, misalignment,
27941 is_packed);
27942 }
27943
27944 static void
27945 arm_conditional_register_usage (void)
27946 {
27947 int regno;
27948
27949 if (TARGET_THUMB1 && optimize_size)
27950 {
27951 /* When optimizing for size on Thumb-1, it's better not
27952 to use the HI regs, because of the overhead of
27953 stacking them. */
27954 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
27955 fixed_regs[regno] = call_used_regs[regno] = 1;
27956 }
27957
27958 /* The link register can be clobbered by any branch insn,
27959 but we have no way to track that at present, so mark
27960 it as unavailable. */
27961 if (TARGET_THUMB1)
27962 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
27963
27964 if (TARGET_32BIT && TARGET_HARD_FLOAT)
27965 {
27966 /* VFPv3 registers are disabled when earlier VFP
27967 versions are selected due to the definition of
27968 LAST_VFP_REGNUM. */
27969 for (regno = FIRST_VFP_REGNUM;
27970 regno <= LAST_VFP_REGNUM; ++ regno)
27971 {
27972 fixed_regs[regno] = 0;
27973 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
27974 || regno >= FIRST_VFP_REGNUM + 32;
27975 }
27976 }
27977
27978 if (TARGET_REALLY_IWMMXT)
27979 {
27980 regno = FIRST_IWMMXT_GR_REGNUM;
27981 /* The 2002/10/09 revision of the XScale ABI has wCG0
27982 and wCG1 as call-preserved registers. The 2002/11/21
27983 revision changed this so that all wCG registers are
27984 scratch registers. */
27985 for (regno = FIRST_IWMMXT_GR_REGNUM;
27986 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
27987 fixed_regs[regno] = 0;
27988 /* The XScale ABI has wR0 - wR9 as scratch registers,
27989 the rest as call-preserved registers. */
27990 for (regno = FIRST_IWMMXT_REGNUM;
27991 regno <= LAST_IWMMXT_REGNUM; ++ regno)
27992 {
27993 fixed_regs[regno] = 0;
27994 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
27995 }
27996 }
27997
27998 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
27999 {
28000 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28001 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28002 }
28003 else if (TARGET_APCS_STACK)
28004 {
28005 fixed_regs[10] = 1;
28006 call_used_regs[10] = 1;
28007 }
28008 /* -mcaller-super-interworking reserves r11 for calls to
28009 _interwork_r11_call_via_rN(). Making the register global
28010 is an easy way of ensuring that it remains valid for all
28011 calls. */
28012 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28013 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28014 {
28015 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28016 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28017 if (TARGET_CALLER_INTERWORKING)
28018 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28019 }
28020 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28021 }
28022
28023 static reg_class_t
28024 arm_preferred_rename_class (reg_class_t rclass)
28025 {
28026 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28027 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28028 and code size can be reduced. */
28029 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28030 return LO_REGS;
28031 else
28032 return NO_REGS;
28033 }
28034
28035 /* Compute the attribute "length" of insn "*push_multi".
28036 So this function MUST be kept in sync with that insn pattern. */
28037 int
28038 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28039 {
28040 int i, regno, hi_reg;
28041 int num_saves = XVECLEN (parallel_op, 0);
28042
28043 /* ARM mode. */
28044 if (TARGET_ARM)
28045 return 4;
28046 /* Thumb1 mode. */
28047 if (TARGET_THUMB1)
28048 return 2;
28049
28050 /* Thumb2 mode. */
28051 regno = REGNO (first_op);
28052 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28053 list is 8-bit. Normally this means all registers in the list must be
28054 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28055 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28056 with 16-bit encoding. */
28057 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28058 for (i = 1; i < num_saves && !hi_reg; i++)
28059 {
28060 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28061 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28062 }
28063
28064 if (!hi_reg)
28065 return 2;
28066 return 4;
28067 }
28068
28069 /* Compute the attribute "length" of insn. Currently, this function is used
28070 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28071 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28072 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28073 true if OPERANDS contains insn which explicit updates base register. */
28074
28075 int
28076 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28077 {
28078 /* ARM mode. */
28079 if (TARGET_ARM)
28080 return 4;
28081 /* Thumb1 mode. */
28082 if (TARGET_THUMB1)
28083 return 2;
28084
28085 rtx parallel_op = operands[0];
28086 /* Initialize to elements number of PARALLEL. */
28087 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28088 /* Initialize the value to base register. */
28089 unsigned regno = REGNO (operands[1]);
28090 /* Skip return and write back pattern.
28091 We only need register pop pattern for later analysis. */
28092 unsigned first_indx = 0;
28093 first_indx += return_pc ? 1 : 0;
28094 first_indx += write_back_p ? 1 : 0;
28095
28096 /* A pop operation can be done through LDM or POP. If the base register is SP
28097 and if it's with write back, then a LDM will be alias of POP. */
28098 bool pop_p = (regno == SP_REGNUM && write_back_p);
28099 bool ldm_p = !pop_p;
28100
28101 /* Check base register for LDM. */
28102 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28103 return 4;
28104
28105 /* Check each register in the list. */
28106 for (; indx >= first_indx; indx--)
28107 {
28108 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28109 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28110 comment in arm_attr_length_push_multi. */
28111 if (REGNO_REG_CLASS (regno) == HI_REGS
28112 && (regno != PC_REGNUM || ldm_p))
28113 return 4;
28114 }
28115
28116 return 2;
28117 }
28118
28119 /* Compute the number of instructions emitted by output_move_double. */
28120 int
28121 arm_count_output_move_double_insns (rtx *operands)
28122 {
28123 int count;
28124 rtx ops[2];
28125 /* output_move_double may modify the operands array, so call it
28126 here on a copy of the array. */
28127 ops[0] = operands[0];
28128 ops[1] = operands[1];
28129 output_move_double (ops, false, &count);
28130 return count;
28131 }
28132
28133 int
28134 vfp3_const_double_for_fract_bits (rtx operand)
28135 {
28136 REAL_VALUE_TYPE r0;
28137
28138 if (!CONST_DOUBLE_P (operand))
28139 return 0;
28140
28141 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28142 if (exact_real_inverse (DFmode, &r0)
28143 && !REAL_VALUE_NEGATIVE (r0))
28144 {
28145 if (exact_real_truncate (DFmode, &r0))
28146 {
28147 HOST_WIDE_INT value = real_to_integer (&r0);
28148 value = value & 0xffffffff;
28149 if ((value != 0) && ( (value & (value - 1)) == 0))
28150 {
28151 int ret = exact_log2 (value);
28152 gcc_assert (IN_RANGE (ret, 0, 31));
28153 return ret;
28154 }
28155 }
28156 }
28157 return 0;
28158 }
28159
28160 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28161 log2 is in [1, 32], return that log2. Otherwise return -1.
28162 This is used in the patterns for vcvt.s32.f32 floating-point to
28163 fixed-point conversions. */
28164
28165 int
28166 vfp3_const_double_for_bits (rtx x)
28167 {
28168 const REAL_VALUE_TYPE *r;
28169
28170 if (!CONST_DOUBLE_P (x))
28171 return -1;
28172
28173 r = CONST_DOUBLE_REAL_VALUE (x);
28174
28175 if (REAL_VALUE_NEGATIVE (*r)
28176 || REAL_VALUE_ISNAN (*r)
28177 || REAL_VALUE_ISINF (*r)
28178 || !real_isinteger (r, SFmode))
28179 return -1;
28180
28181 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28182
28183 /* The exact_log2 above will have returned -1 if this is
28184 not an exact log2. */
28185 if (!IN_RANGE (hwint, 1, 32))
28186 return -1;
28187
28188 return hwint;
28189 }
28190
28191 \f
28192 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28193
28194 static void
28195 arm_pre_atomic_barrier (enum memmodel model)
28196 {
28197 if (need_atomic_barrier_p (model, true))
28198 emit_insn (gen_memory_barrier ());
28199 }
28200
28201 static void
28202 arm_post_atomic_barrier (enum memmodel model)
28203 {
28204 if (need_atomic_barrier_p (model, false))
28205 emit_insn (gen_memory_barrier ());
28206 }
28207
28208 /* Emit the load-exclusive and store-exclusive instructions.
28209 Use acquire and release versions if necessary. */
28210
28211 static void
28212 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28213 {
28214 rtx (*gen) (rtx, rtx);
28215
28216 if (acq)
28217 {
28218 switch (mode)
28219 {
28220 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28221 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28222 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28223 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28224 default:
28225 gcc_unreachable ();
28226 }
28227 }
28228 else
28229 {
28230 switch (mode)
28231 {
28232 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28233 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28234 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28235 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28236 default:
28237 gcc_unreachable ();
28238 }
28239 }
28240
28241 emit_insn (gen (rval, mem));
28242 }
28243
28244 static void
28245 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28246 rtx mem, bool rel)
28247 {
28248 rtx (*gen) (rtx, rtx, rtx);
28249
28250 if (rel)
28251 {
28252 switch (mode)
28253 {
28254 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28255 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28256 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28257 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28258 default:
28259 gcc_unreachable ();
28260 }
28261 }
28262 else
28263 {
28264 switch (mode)
28265 {
28266 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28267 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28268 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28269 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28270 default:
28271 gcc_unreachable ();
28272 }
28273 }
28274
28275 emit_insn (gen (bval, rval, mem));
28276 }
28277
28278 /* Mark the previous jump instruction as unlikely. */
28279
28280 static void
28281 emit_unlikely_jump (rtx insn)
28282 {
28283 rtx_insn *jump = emit_jump_insn (insn);
28284 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28285 }
28286
28287 /* Expand a compare and swap pattern. */
28288
28289 void
28290 arm_expand_compare_and_swap (rtx operands[])
28291 {
28292 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28293 machine_mode mode;
28294 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28295
28296 bval = operands[0];
28297 rval = operands[1];
28298 mem = operands[2];
28299 oldval = operands[3];
28300 newval = operands[4];
28301 is_weak = operands[5];
28302 mod_s = operands[6];
28303 mod_f = operands[7];
28304 mode = GET_MODE (mem);
28305
28306 /* Normally the succ memory model must be stronger than fail, but in the
28307 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28308 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28309
28310 if (TARGET_HAVE_LDACQ
28311 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28312 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28313 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28314
28315 switch (mode)
28316 {
28317 case E_QImode:
28318 case E_HImode:
28319 /* For narrow modes, we're going to perform the comparison in SImode,
28320 so do the zero-extension now. */
28321 rval = gen_reg_rtx (SImode);
28322 oldval = convert_modes (SImode, mode, oldval, true);
28323 /* FALLTHRU */
28324
28325 case E_SImode:
28326 /* Force the value into a register if needed. We waited until after
28327 the zero-extension above to do this properly. */
28328 if (!arm_add_operand (oldval, SImode))
28329 oldval = force_reg (SImode, oldval);
28330 break;
28331
28332 case E_DImode:
28333 if (!cmpdi_operand (oldval, mode))
28334 oldval = force_reg (mode, oldval);
28335 break;
28336
28337 default:
28338 gcc_unreachable ();
28339 }
28340
28341 if (TARGET_THUMB1)
28342 {
28343 switch (mode)
28344 {
28345 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28346 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28347 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28348 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28349 default:
28350 gcc_unreachable ();
28351 }
28352 }
28353 else
28354 {
28355 switch (mode)
28356 {
28357 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28358 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28359 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28360 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28361 default:
28362 gcc_unreachable ();
28363 }
28364 }
28365
28366 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28367 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28368
28369 if (mode == QImode || mode == HImode)
28370 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28371
28372 /* In all cases, we arrange for success to be signaled by Z set.
28373 This arrangement allows for the boolean result to be used directly
28374 in a subsequent branch, post optimization. For Thumb-1 targets, the
28375 boolean negation of the result is also stored in bval because Thumb-1
28376 backend lacks dependency tracking for CC flag due to flag-setting not
28377 being represented at RTL level. */
28378 if (TARGET_THUMB1)
28379 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28380 else
28381 {
28382 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28383 emit_insn (gen_rtx_SET (bval, x));
28384 }
28385 }
28386
28387 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28388 another memory store between the load-exclusive and store-exclusive can
28389 reset the monitor from Exclusive to Open state. This means we must wait
28390 until after reload to split the pattern, lest we get a register spill in
28391 the middle of the atomic sequence. Success of the compare and swap is
28392 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28393 for Thumb-1 targets (ie. negation of the boolean value returned by
28394 atomic_compare_and_swapmode standard pattern in operand 0). */
28395
28396 void
28397 arm_split_compare_and_swap (rtx operands[])
28398 {
28399 rtx rval, mem, oldval, newval, neg_bval;
28400 machine_mode mode;
28401 enum memmodel mod_s, mod_f;
28402 bool is_weak;
28403 rtx_code_label *label1, *label2;
28404 rtx x, cond;
28405
28406 rval = operands[1];
28407 mem = operands[2];
28408 oldval = operands[3];
28409 newval = operands[4];
28410 is_weak = (operands[5] != const0_rtx);
28411 mod_s = memmodel_from_int (INTVAL (operands[6]));
28412 mod_f = memmodel_from_int (INTVAL (operands[7]));
28413 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28414 mode = GET_MODE (mem);
28415
28416 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28417
28418 bool use_acquire = TARGET_HAVE_LDACQ
28419 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28420 || is_mm_release (mod_s));
28421
28422 bool use_release = TARGET_HAVE_LDACQ
28423 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28424 || is_mm_acquire (mod_s));
28425
28426 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28427 a full barrier is emitted after the store-release. */
28428 if (is_armv8_sync)
28429 use_acquire = false;
28430
28431 /* Checks whether a barrier is needed and emits one accordingly. */
28432 if (!(use_acquire || use_release))
28433 arm_pre_atomic_barrier (mod_s);
28434
28435 label1 = NULL;
28436 if (!is_weak)
28437 {
28438 label1 = gen_label_rtx ();
28439 emit_label (label1);
28440 }
28441 label2 = gen_label_rtx ();
28442
28443 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28444
28445 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28446 as required to communicate with arm_expand_compare_and_swap. */
28447 if (TARGET_32BIT)
28448 {
28449 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28450 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28451 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28452 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28453 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28454 }
28455 else
28456 {
28457 emit_move_insn (neg_bval, const1_rtx);
28458 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28459 if (thumb1_cmpneg_operand (oldval, SImode))
28460 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28461 label2, cond));
28462 else
28463 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28464 }
28465
28466 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28467
28468 /* Weak or strong, we want EQ to be true for success, so that we
28469 match the flags that we got from the compare above. */
28470 if (TARGET_32BIT)
28471 {
28472 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28473 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28474 emit_insn (gen_rtx_SET (cond, x));
28475 }
28476
28477 if (!is_weak)
28478 {
28479 /* Z is set to boolean value of !neg_bval, as required to communicate
28480 with arm_expand_compare_and_swap. */
28481 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28482 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28483 }
28484
28485 if (!is_mm_relaxed (mod_f))
28486 emit_label (label2);
28487
28488 /* Checks whether a barrier is needed and emits one accordingly. */
28489 if (is_armv8_sync
28490 || !(use_acquire || use_release))
28491 arm_post_atomic_barrier (mod_s);
28492
28493 if (is_mm_relaxed (mod_f))
28494 emit_label (label2);
28495 }
28496
28497 /* Split an atomic operation pattern. Operation is given by CODE and is one
28498 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28499 operation). Operation is performed on the content at MEM and on VALUE
28500 following the memory model MODEL_RTX. The content at MEM before and after
28501 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28502 success of the operation is returned in COND. Using a scratch register or
28503 an operand register for these determines what result is returned for that
28504 pattern. */
28505
28506 void
28507 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28508 rtx value, rtx model_rtx, rtx cond)
28509 {
28510 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28511 machine_mode mode = GET_MODE (mem);
28512 machine_mode wmode = (mode == DImode ? DImode : SImode);
28513 rtx_code_label *label;
28514 bool all_low_regs, bind_old_new;
28515 rtx x;
28516
28517 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28518
28519 bool use_acquire = TARGET_HAVE_LDACQ
28520 && !(is_mm_relaxed (model) || is_mm_consume (model)
28521 || is_mm_release (model));
28522
28523 bool use_release = TARGET_HAVE_LDACQ
28524 && !(is_mm_relaxed (model) || is_mm_consume (model)
28525 || is_mm_acquire (model));
28526
28527 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28528 a full barrier is emitted after the store-release. */
28529 if (is_armv8_sync)
28530 use_acquire = false;
28531
28532 /* Checks whether a barrier is needed and emits one accordingly. */
28533 if (!(use_acquire || use_release))
28534 arm_pre_atomic_barrier (model);
28535
28536 label = gen_label_rtx ();
28537 emit_label (label);
28538
28539 if (new_out)
28540 new_out = gen_lowpart (wmode, new_out);
28541 if (old_out)
28542 old_out = gen_lowpart (wmode, old_out);
28543 else
28544 old_out = new_out;
28545 value = simplify_gen_subreg (wmode, value, mode, 0);
28546
28547 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28548
28549 /* Does the operation require destination and first operand to use the same
28550 register? This is decided by register constraints of relevant insn
28551 patterns in thumb1.md. */
28552 gcc_assert (!new_out || REG_P (new_out));
28553 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28554 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28555 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28556 bind_old_new =
28557 (TARGET_THUMB1
28558 && code != SET
28559 && code != MINUS
28560 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28561
28562 /* We want to return the old value while putting the result of the operation
28563 in the same register as the old value so copy the old value over to the
28564 destination register and use that register for the operation. */
28565 if (old_out && bind_old_new)
28566 {
28567 emit_move_insn (new_out, old_out);
28568 old_out = new_out;
28569 }
28570
28571 switch (code)
28572 {
28573 case SET:
28574 new_out = value;
28575 break;
28576
28577 case NOT:
28578 x = gen_rtx_AND (wmode, old_out, value);
28579 emit_insn (gen_rtx_SET (new_out, x));
28580 x = gen_rtx_NOT (wmode, new_out);
28581 emit_insn (gen_rtx_SET (new_out, x));
28582 break;
28583
28584 case MINUS:
28585 if (CONST_INT_P (value))
28586 {
28587 value = GEN_INT (-INTVAL (value));
28588 code = PLUS;
28589 }
28590 /* FALLTHRU */
28591
28592 case PLUS:
28593 if (mode == DImode)
28594 {
28595 /* DImode plus/minus need to clobber flags. */
28596 /* The adddi3 and subdi3 patterns are incorrectly written so that
28597 they require matching operands, even when we could easily support
28598 three operands. Thankfully, this can be fixed up post-splitting,
28599 as the individual add+adc patterns do accept three operands and
28600 post-reload cprop can make these moves go away. */
28601 emit_move_insn (new_out, old_out);
28602 if (code == PLUS)
28603 x = gen_adddi3 (new_out, new_out, value);
28604 else
28605 x = gen_subdi3 (new_out, new_out, value);
28606 emit_insn (x);
28607 break;
28608 }
28609 /* FALLTHRU */
28610
28611 default:
28612 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28613 emit_insn (gen_rtx_SET (new_out, x));
28614 break;
28615 }
28616
28617 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28618 use_release);
28619
28620 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28621 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28622
28623 /* Checks whether a barrier is needed and emits one accordingly. */
28624 if (is_armv8_sync
28625 || !(use_acquire || use_release))
28626 arm_post_atomic_barrier (model);
28627 }
28628 \f
28629 #define MAX_VECT_LEN 16
28630
28631 struct expand_vec_perm_d
28632 {
28633 rtx target, op0, op1;
28634 unsigned char perm[MAX_VECT_LEN];
28635 machine_mode vmode;
28636 unsigned char nelt;
28637 bool one_vector_p;
28638 bool testing_p;
28639 };
28640
28641 /* Generate a variable permutation. */
28642
28643 static void
28644 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28645 {
28646 machine_mode vmode = GET_MODE (target);
28647 bool one_vector_p = rtx_equal_p (op0, op1);
28648
28649 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28650 gcc_checking_assert (GET_MODE (op0) == vmode);
28651 gcc_checking_assert (GET_MODE (op1) == vmode);
28652 gcc_checking_assert (GET_MODE (sel) == vmode);
28653 gcc_checking_assert (TARGET_NEON);
28654
28655 if (one_vector_p)
28656 {
28657 if (vmode == V8QImode)
28658 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28659 else
28660 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28661 }
28662 else
28663 {
28664 rtx pair;
28665
28666 if (vmode == V8QImode)
28667 {
28668 pair = gen_reg_rtx (V16QImode);
28669 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28670 pair = gen_lowpart (TImode, pair);
28671 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28672 }
28673 else
28674 {
28675 pair = gen_reg_rtx (OImode);
28676 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28677 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28678 }
28679 }
28680 }
28681
28682 void
28683 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28684 {
28685 machine_mode vmode = GET_MODE (target);
28686 unsigned int i, nelt = GET_MODE_NUNITS (vmode);
28687 bool one_vector_p = rtx_equal_p (op0, op1);
28688 rtx rmask[MAX_VECT_LEN], mask;
28689
28690 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28691 numbering of elements for big-endian, we must reverse the order. */
28692 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28693
28694 /* The VTBL instruction does not use a modulo index, so we must take care
28695 of that ourselves. */
28696 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28697 for (i = 0; i < nelt; ++i)
28698 rmask[i] = mask;
28699 mask = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rmask));
28700 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28701
28702 arm_expand_vec_perm_1 (target, op0, op1, sel);
28703 }
28704
28705 /* Map lane ordering between architectural lane order, and GCC lane order,
28706 taking into account ABI. See comment above output_move_neon for details. */
28707
28708 static int
28709 neon_endian_lane_map (machine_mode mode, int lane)
28710 {
28711 if (BYTES_BIG_ENDIAN)
28712 {
28713 int nelems = GET_MODE_NUNITS (mode);
28714 /* Reverse lane order. */
28715 lane = (nelems - 1 - lane);
28716 /* Reverse D register order, to match ABI. */
28717 if (GET_MODE_SIZE (mode) == 16)
28718 lane = lane ^ (nelems / 2);
28719 }
28720 return lane;
28721 }
28722
28723 /* Some permutations index into pairs of vectors, this is a helper function
28724 to map indexes into those pairs of vectors. */
28725
28726 static int
28727 neon_pair_endian_lane_map (machine_mode mode, int lane)
28728 {
28729 int nelem = GET_MODE_NUNITS (mode);
28730 if (BYTES_BIG_ENDIAN)
28731 lane =
28732 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28733 return lane;
28734 }
28735
28736 /* Generate or test for an insn that supports a constant permutation. */
28737
28738 /* Recognize patterns for the VUZP insns. */
28739
28740 static bool
28741 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28742 {
28743 unsigned int i, odd, mask, nelt = d->nelt;
28744 rtx out0, out1, in0, in1;
28745 rtx (*gen)(rtx, rtx, rtx, rtx);
28746 int first_elem;
28747 int swap_nelt;
28748
28749 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28750 return false;
28751
28752 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28753 big endian pattern on 64 bit vectors, so we correct for that. */
28754 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28755 && GET_MODE_SIZE (d->vmode) == 8 ? d->nelt : 0;
28756
28757 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28758
28759 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28760 odd = 0;
28761 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28762 odd = 1;
28763 else
28764 return false;
28765 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28766
28767 for (i = 0; i < nelt; i++)
28768 {
28769 unsigned elt =
28770 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28771 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28772 return false;
28773 }
28774
28775 /* Success! */
28776 if (d->testing_p)
28777 return true;
28778
28779 switch (d->vmode)
28780 {
28781 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
28782 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
28783 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
28784 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
28785 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
28786 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
28787 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
28788 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
28789 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
28790 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
28791 default:
28792 gcc_unreachable ();
28793 }
28794
28795 in0 = d->op0;
28796 in1 = d->op1;
28797 if (swap_nelt != 0)
28798 std::swap (in0, in1);
28799
28800 out0 = d->target;
28801 out1 = gen_reg_rtx (d->vmode);
28802 if (odd)
28803 std::swap (out0, out1);
28804
28805 emit_insn (gen (out0, in0, in1, out1));
28806 return true;
28807 }
28808
28809 /* Recognize patterns for the VZIP insns. */
28810
28811 static bool
28812 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
28813 {
28814 unsigned int i, high, mask, nelt = d->nelt;
28815 rtx out0, out1, in0, in1;
28816 rtx (*gen)(rtx, rtx, rtx, rtx);
28817 int first_elem;
28818 bool is_swapped;
28819
28820 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28821 return false;
28822
28823 is_swapped = BYTES_BIG_ENDIAN;
28824
28825 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
28826
28827 high = nelt / 2;
28828 if (first_elem == neon_endian_lane_map (d->vmode, high))
28829 ;
28830 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
28831 high = 0;
28832 else
28833 return false;
28834 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28835
28836 for (i = 0; i < nelt / 2; i++)
28837 {
28838 unsigned elt =
28839 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
28840 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
28841 != elt)
28842 return false;
28843 elt =
28844 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
28845 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
28846 != elt)
28847 return false;
28848 }
28849
28850 /* Success! */
28851 if (d->testing_p)
28852 return true;
28853
28854 switch (d->vmode)
28855 {
28856 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
28857 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
28858 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
28859 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
28860 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
28861 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
28862 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
28863 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
28864 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
28865 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
28866 default:
28867 gcc_unreachable ();
28868 }
28869
28870 in0 = d->op0;
28871 in1 = d->op1;
28872 if (is_swapped)
28873 std::swap (in0, in1);
28874
28875 out0 = d->target;
28876 out1 = gen_reg_rtx (d->vmode);
28877 if (high)
28878 std::swap (out0, out1);
28879
28880 emit_insn (gen (out0, in0, in1, out1));
28881 return true;
28882 }
28883
28884 /* Recognize patterns for the VREV insns. */
28885
28886 static bool
28887 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
28888 {
28889 unsigned int i, j, diff, nelt = d->nelt;
28890 rtx (*gen)(rtx, rtx);
28891
28892 if (!d->one_vector_p)
28893 return false;
28894
28895 diff = d->perm[0];
28896 switch (diff)
28897 {
28898 case 7:
28899 switch (d->vmode)
28900 {
28901 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
28902 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
28903 default:
28904 return false;
28905 }
28906 break;
28907 case 3:
28908 switch (d->vmode)
28909 {
28910 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
28911 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
28912 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
28913 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
28914 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
28915 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
28916 default:
28917 return false;
28918 }
28919 break;
28920 case 1:
28921 switch (d->vmode)
28922 {
28923 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
28924 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
28925 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
28926 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
28927 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
28928 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
28929 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
28930 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
28931 default:
28932 return false;
28933 }
28934 break;
28935 default:
28936 return false;
28937 }
28938
28939 for (i = 0; i < nelt ; i += diff + 1)
28940 for (j = 0; j <= diff; j += 1)
28941 {
28942 /* This is guaranteed to be true as the value of diff
28943 is 7, 3, 1 and we should have enough elements in the
28944 queue to generate this. Getting a vector mask with a
28945 value of diff other than these values implies that
28946 something is wrong by the time we get here. */
28947 gcc_assert (i + j < nelt);
28948 if (d->perm[i + j] != i + diff - j)
28949 return false;
28950 }
28951
28952 /* Success! */
28953 if (d->testing_p)
28954 return true;
28955
28956 emit_insn (gen (d->target, d->op0));
28957 return true;
28958 }
28959
28960 /* Recognize patterns for the VTRN insns. */
28961
28962 static bool
28963 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
28964 {
28965 unsigned int i, odd, mask, nelt = d->nelt;
28966 rtx out0, out1, in0, in1;
28967 rtx (*gen)(rtx, rtx, rtx, rtx);
28968
28969 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28970 return false;
28971
28972 /* Note that these are little-endian tests. Adjust for big-endian later. */
28973 if (d->perm[0] == 0)
28974 odd = 0;
28975 else if (d->perm[0] == 1)
28976 odd = 1;
28977 else
28978 return false;
28979 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28980
28981 for (i = 0; i < nelt; i += 2)
28982 {
28983 if (d->perm[i] != i + odd)
28984 return false;
28985 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
28986 return false;
28987 }
28988
28989 /* Success! */
28990 if (d->testing_p)
28991 return true;
28992
28993 switch (d->vmode)
28994 {
28995 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
28996 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
28997 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
28998 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
28999 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29000 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29001 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29002 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29003 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29004 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29005 default:
29006 gcc_unreachable ();
29007 }
29008
29009 in0 = d->op0;
29010 in1 = d->op1;
29011 if (BYTES_BIG_ENDIAN)
29012 {
29013 std::swap (in0, in1);
29014 odd = !odd;
29015 }
29016
29017 out0 = d->target;
29018 out1 = gen_reg_rtx (d->vmode);
29019 if (odd)
29020 std::swap (out0, out1);
29021
29022 emit_insn (gen (out0, in0, in1, out1));
29023 return true;
29024 }
29025
29026 /* Recognize patterns for the VEXT insns. */
29027
29028 static bool
29029 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29030 {
29031 unsigned int i, nelt = d->nelt;
29032 rtx (*gen) (rtx, rtx, rtx, rtx);
29033 rtx offset;
29034
29035 unsigned int location;
29036
29037 unsigned int next = d->perm[0] + 1;
29038
29039 /* TODO: Handle GCC's numbering of elements for big-endian. */
29040 if (BYTES_BIG_ENDIAN)
29041 return false;
29042
29043 /* Check if the extracted indexes are increasing by one. */
29044 for (i = 1; i < nelt; next++, i++)
29045 {
29046 /* If we hit the most significant element of the 2nd vector in
29047 the previous iteration, no need to test further. */
29048 if (next == 2 * nelt)
29049 return false;
29050
29051 /* If we are operating on only one vector: it could be a
29052 rotation. If there are only two elements of size < 64, let
29053 arm_evpc_neon_vrev catch it. */
29054 if (d->one_vector_p && (next == nelt))
29055 {
29056 if ((nelt == 2) && (d->vmode != V2DImode))
29057 return false;
29058 else
29059 next = 0;
29060 }
29061
29062 if (d->perm[i] != next)
29063 return false;
29064 }
29065
29066 location = d->perm[0];
29067
29068 switch (d->vmode)
29069 {
29070 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29071 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29072 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29073 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29074 case E_V2SImode: gen = gen_neon_vextv2si; break;
29075 case E_V4SImode: gen = gen_neon_vextv4si; break;
29076 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29077 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29078 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29079 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29080 case E_V2DImode: gen = gen_neon_vextv2di; break;
29081 default:
29082 return false;
29083 }
29084
29085 /* Success! */
29086 if (d->testing_p)
29087 return true;
29088
29089 offset = GEN_INT (location);
29090 emit_insn (gen (d->target, d->op0, d->op1, offset));
29091 return true;
29092 }
29093
29094 /* The NEON VTBL instruction is a fully variable permuation that's even
29095 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29096 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29097 can do slightly better by expanding this as a constant where we don't
29098 have to apply a mask. */
29099
29100 static bool
29101 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29102 {
29103 rtx rperm[MAX_VECT_LEN], sel;
29104 machine_mode vmode = d->vmode;
29105 unsigned int i, nelt = d->nelt;
29106
29107 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29108 numbering of elements for big-endian, we must reverse the order. */
29109 if (BYTES_BIG_ENDIAN)
29110 return false;
29111
29112 if (d->testing_p)
29113 return true;
29114
29115 /* Generic code will try constant permutation twice. Once with the
29116 original mode and again with the elements lowered to QImode.
29117 So wait and don't do the selector expansion ourselves. */
29118 if (vmode != V8QImode && vmode != V16QImode)
29119 return false;
29120
29121 for (i = 0; i < nelt; ++i)
29122 rperm[i] = GEN_INT (d->perm[i]);
29123 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29124 sel = force_reg (vmode, sel);
29125
29126 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29127 return true;
29128 }
29129
29130 static bool
29131 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29132 {
29133 /* Check if the input mask matches vext before reordering the
29134 operands. */
29135 if (TARGET_NEON)
29136 if (arm_evpc_neon_vext (d))
29137 return true;
29138
29139 /* The pattern matching functions above are written to look for a small
29140 number to begin the sequence (0, 1, N/2). If we begin with an index
29141 from the second operand, we can swap the operands. */
29142 if (d->perm[0] >= d->nelt)
29143 {
29144 unsigned i, nelt = d->nelt;
29145
29146 for (i = 0; i < nelt; ++i)
29147 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29148
29149 std::swap (d->op0, d->op1);
29150 }
29151
29152 if (TARGET_NEON)
29153 {
29154 if (arm_evpc_neon_vuzp (d))
29155 return true;
29156 if (arm_evpc_neon_vzip (d))
29157 return true;
29158 if (arm_evpc_neon_vrev (d))
29159 return true;
29160 if (arm_evpc_neon_vtrn (d))
29161 return true;
29162 return arm_evpc_neon_vtbl (d);
29163 }
29164 return false;
29165 }
29166
29167 /* Expand a vec_perm_const pattern. */
29168
29169 bool
29170 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29171 {
29172 struct expand_vec_perm_d d;
29173 int i, nelt, which;
29174
29175 d.target = target;
29176 d.op0 = op0;
29177 d.op1 = op1;
29178
29179 d.vmode = GET_MODE (target);
29180 gcc_assert (VECTOR_MODE_P (d.vmode));
29181 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29182 d.testing_p = false;
29183
29184 for (i = which = 0; i < nelt; ++i)
29185 {
29186 rtx e = XVECEXP (sel, 0, i);
29187 int ei = INTVAL (e) & (2 * nelt - 1);
29188 which |= (ei < nelt ? 1 : 2);
29189 d.perm[i] = ei;
29190 }
29191
29192 switch (which)
29193 {
29194 default:
29195 gcc_unreachable();
29196
29197 case 3:
29198 d.one_vector_p = false;
29199 if (!rtx_equal_p (op0, op1))
29200 break;
29201
29202 /* The elements of PERM do not suggest that only the first operand
29203 is used, but both operands are identical. Allow easier matching
29204 of the permutation by folding the permutation into the single
29205 input vector. */
29206 /* FALLTHRU */
29207 case 2:
29208 for (i = 0; i < nelt; ++i)
29209 d.perm[i] &= nelt - 1;
29210 d.op0 = op1;
29211 d.one_vector_p = true;
29212 break;
29213
29214 case 1:
29215 d.op1 = op0;
29216 d.one_vector_p = true;
29217 break;
29218 }
29219
29220 return arm_expand_vec_perm_const_1 (&d);
29221 }
29222
29223 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29224
29225 static bool
29226 arm_vectorize_vec_perm_const_ok (machine_mode vmode,
29227 const unsigned char *sel)
29228 {
29229 struct expand_vec_perm_d d;
29230 unsigned int i, nelt, which;
29231 bool ret;
29232
29233 d.vmode = vmode;
29234 d.nelt = nelt = GET_MODE_NUNITS (d.vmode);
29235 d.testing_p = true;
29236 memcpy (d.perm, sel, nelt);
29237
29238 /* Categorize the set of elements in the selector. */
29239 for (i = which = 0; i < nelt; ++i)
29240 {
29241 unsigned char e = d.perm[i];
29242 gcc_assert (e < 2 * nelt);
29243 which |= (e < nelt ? 1 : 2);
29244 }
29245
29246 /* For all elements from second vector, fold the elements to first. */
29247 if (which == 2)
29248 for (i = 0; i < nelt; ++i)
29249 d.perm[i] -= nelt;
29250
29251 /* Check whether the mask can be applied to the vector type. */
29252 d.one_vector_p = (which != 3);
29253
29254 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29255 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29256 if (!d.one_vector_p)
29257 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29258
29259 start_sequence ();
29260 ret = arm_expand_vec_perm_const_1 (&d);
29261 end_sequence ();
29262
29263 return ret;
29264 }
29265
29266 bool
29267 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29268 {
29269 /* If we are soft float and we do not have ldrd
29270 then all auto increment forms are ok. */
29271 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29272 return true;
29273
29274 switch (code)
29275 {
29276 /* Post increment and Pre Decrement are supported for all
29277 instruction forms except for vector forms. */
29278 case ARM_POST_INC:
29279 case ARM_PRE_DEC:
29280 if (VECTOR_MODE_P (mode))
29281 {
29282 if (code != ARM_PRE_DEC)
29283 return true;
29284 else
29285 return false;
29286 }
29287
29288 return true;
29289
29290 case ARM_POST_DEC:
29291 case ARM_PRE_INC:
29292 /* Without LDRD and mode size greater than
29293 word size, there is no point in auto-incrementing
29294 because ldm and stm will not have these forms. */
29295 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29296 return false;
29297
29298 /* Vector and floating point modes do not support
29299 these auto increment forms. */
29300 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29301 return false;
29302
29303 return true;
29304
29305 default:
29306 return false;
29307
29308 }
29309
29310 return false;
29311 }
29312
29313 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29314 on ARM, since we know that shifts by negative amounts are no-ops.
29315 Additionally, the default expansion code is not available or suitable
29316 for post-reload insn splits (this can occur when the register allocator
29317 chooses not to do a shift in NEON).
29318
29319 This function is used in both initial expand and post-reload splits, and
29320 handles all kinds of 64-bit shifts.
29321
29322 Input requirements:
29323 - It is safe for the input and output to be the same register, but
29324 early-clobber rules apply for the shift amount and scratch registers.
29325 - Shift by register requires both scratch registers. In all other cases
29326 the scratch registers may be NULL.
29327 - Ashiftrt by a register also clobbers the CC register. */
29328 void
29329 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29330 rtx amount, rtx scratch1, rtx scratch2)
29331 {
29332 rtx out_high = gen_highpart (SImode, out);
29333 rtx out_low = gen_lowpart (SImode, out);
29334 rtx in_high = gen_highpart (SImode, in);
29335 rtx in_low = gen_lowpart (SImode, in);
29336
29337 /* Terminology:
29338 in = the register pair containing the input value.
29339 out = the destination register pair.
29340 up = the high- or low-part of each pair.
29341 down = the opposite part to "up".
29342 In a shift, we can consider bits to shift from "up"-stream to
29343 "down"-stream, so in a left-shift "up" is the low-part and "down"
29344 is the high-part of each register pair. */
29345
29346 rtx out_up = code == ASHIFT ? out_low : out_high;
29347 rtx out_down = code == ASHIFT ? out_high : out_low;
29348 rtx in_up = code == ASHIFT ? in_low : in_high;
29349 rtx in_down = code == ASHIFT ? in_high : in_low;
29350
29351 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29352 gcc_assert (out
29353 && (REG_P (out) || GET_CODE (out) == SUBREG)
29354 && GET_MODE (out) == DImode);
29355 gcc_assert (in
29356 && (REG_P (in) || GET_CODE (in) == SUBREG)
29357 && GET_MODE (in) == DImode);
29358 gcc_assert (amount
29359 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29360 && GET_MODE (amount) == SImode)
29361 || CONST_INT_P (amount)));
29362 gcc_assert (scratch1 == NULL
29363 || (GET_CODE (scratch1) == SCRATCH)
29364 || (GET_MODE (scratch1) == SImode
29365 && REG_P (scratch1)));
29366 gcc_assert (scratch2 == NULL
29367 || (GET_CODE (scratch2) == SCRATCH)
29368 || (GET_MODE (scratch2) == SImode
29369 && REG_P (scratch2)));
29370 gcc_assert (!REG_P (out) || !REG_P (amount)
29371 || !HARD_REGISTER_P (out)
29372 || (REGNO (out) != REGNO (amount)
29373 && REGNO (out) + 1 != REGNO (amount)));
29374
29375 /* Macros to make following code more readable. */
29376 #define SUB_32(DEST,SRC) \
29377 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29378 #define RSB_32(DEST,SRC) \
29379 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29380 #define SUB_S_32(DEST,SRC) \
29381 gen_addsi3_compare0 ((DEST), (SRC), \
29382 GEN_INT (-32))
29383 #define SET(DEST,SRC) \
29384 gen_rtx_SET ((DEST), (SRC))
29385 #define SHIFT(CODE,SRC,AMOUNT) \
29386 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29387 #define LSHIFT(CODE,SRC,AMOUNT) \
29388 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29389 SImode, (SRC), (AMOUNT))
29390 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29391 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29392 SImode, (SRC), (AMOUNT))
29393 #define ORR(A,B) \
29394 gen_rtx_IOR (SImode, (A), (B))
29395 #define BRANCH(COND,LABEL) \
29396 gen_arm_cond_branch ((LABEL), \
29397 gen_rtx_ ## COND (CCmode, cc_reg, \
29398 const0_rtx), \
29399 cc_reg)
29400
29401 /* Shifts by register and shifts by constant are handled separately. */
29402 if (CONST_INT_P (amount))
29403 {
29404 /* We have a shift-by-constant. */
29405
29406 /* First, handle out-of-range shift amounts.
29407 In both cases we try to match the result an ARM instruction in a
29408 shift-by-register would give. This helps reduce execution
29409 differences between optimization levels, but it won't stop other
29410 parts of the compiler doing different things. This is "undefined
29411 behavior, in any case. */
29412 if (INTVAL (amount) <= 0)
29413 emit_insn (gen_movdi (out, in));
29414 else if (INTVAL (amount) >= 64)
29415 {
29416 if (code == ASHIFTRT)
29417 {
29418 rtx const31_rtx = GEN_INT (31);
29419 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29420 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29421 }
29422 else
29423 emit_insn (gen_movdi (out, const0_rtx));
29424 }
29425
29426 /* Now handle valid shifts. */
29427 else if (INTVAL (amount) < 32)
29428 {
29429 /* Shifts by a constant less than 32. */
29430 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29431
29432 /* Clearing the out register in DImode first avoids lots
29433 of spilling and results in less stack usage.
29434 Later this redundant insn is completely removed.
29435 Do that only if "in" and "out" are different registers. */
29436 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29437 emit_insn (SET (out, const0_rtx));
29438 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29439 emit_insn (SET (out_down,
29440 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29441 out_down)));
29442 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29443 }
29444 else
29445 {
29446 /* Shifts by a constant greater than 31. */
29447 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29448
29449 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29450 emit_insn (SET (out, const0_rtx));
29451 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29452 if (code == ASHIFTRT)
29453 emit_insn (gen_ashrsi3 (out_up, in_up,
29454 GEN_INT (31)));
29455 else
29456 emit_insn (SET (out_up, const0_rtx));
29457 }
29458 }
29459 else
29460 {
29461 /* We have a shift-by-register. */
29462 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29463
29464 /* This alternative requires the scratch registers. */
29465 gcc_assert (scratch1 && REG_P (scratch1));
29466 gcc_assert (scratch2 && REG_P (scratch2));
29467
29468 /* We will need the values "amount-32" and "32-amount" later.
29469 Swapping them around now allows the later code to be more general. */
29470 switch (code)
29471 {
29472 case ASHIFT:
29473 emit_insn (SUB_32 (scratch1, amount));
29474 emit_insn (RSB_32 (scratch2, amount));
29475 break;
29476 case ASHIFTRT:
29477 emit_insn (RSB_32 (scratch1, amount));
29478 /* Also set CC = amount > 32. */
29479 emit_insn (SUB_S_32 (scratch2, amount));
29480 break;
29481 case LSHIFTRT:
29482 emit_insn (RSB_32 (scratch1, amount));
29483 emit_insn (SUB_32 (scratch2, amount));
29484 break;
29485 default:
29486 gcc_unreachable ();
29487 }
29488
29489 /* Emit code like this:
29490
29491 arithmetic-left:
29492 out_down = in_down << amount;
29493 out_down = (in_up << (amount - 32)) | out_down;
29494 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29495 out_up = in_up << amount;
29496
29497 arithmetic-right:
29498 out_down = in_down >> amount;
29499 out_down = (in_up << (32 - amount)) | out_down;
29500 if (amount < 32)
29501 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29502 out_up = in_up << amount;
29503
29504 logical-right:
29505 out_down = in_down >> amount;
29506 out_down = (in_up << (32 - amount)) | out_down;
29507 if (amount < 32)
29508 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29509 out_up = in_up << amount;
29510
29511 The ARM and Thumb2 variants are the same but implemented slightly
29512 differently. If this were only called during expand we could just
29513 use the Thumb2 case and let combine do the right thing, but this
29514 can also be called from post-reload splitters. */
29515
29516 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29517
29518 if (!TARGET_THUMB2)
29519 {
29520 /* Emit code for ARM mode. */
29521 emit_insn (SET (out_down,
29522 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29523 if (code == ASHIFTRT)
29524 {
29525 rtx_code_label *done_label = gen_label_rtx ();
29526 emit_jump_insn (BRANCH (LT, done_label));
29527 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29528 out_down)));
29529 emit_label (done_label);
29530 }
29531 else
29532 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29533 out_down)));
29534 }
29535 else
29536 {
29537 /* Emit code for Thumb2 mode.
29538 Thumb2 can't do shift and or in one insn. */
29539 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29540 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29541
29542 if (code == ASHIFTRT)
29543 {
29544 rtx_code_label *done_label = gen_label_rtx ();
29545 emit_jump_insn (BRANCH (LT, done_label));
29546 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29547 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29548 emit_label (done_label);
29549 }
29550 else
29551 {
29552 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29553 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29554 }
29555 }
29556
29557 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29558 }
29559
29560 #undef SUB_32
29561 #undef RSB_32
29562 #undef SUB_S_32
29563 #undef SET
29564 #undef SHIFT
29565 #undef LSHIFT
29566 #undef REV_LSHIFT
29567 #undef ORR
29568 #undef BRANCH
29569 }
29570
29571 /* Returns true if the pattern is a valid symbolic address, which is either a
29572 symbol_ref or (symbol_ref + addend).
29573
29574 According to the ARM ELF ABI, the initial addend of REL-type relocations
29575 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29576 literal field of the instruction as a 16-bit signed value in the range
29577 -32768 <= A < 32768. */
29578
29579 bool
29580 arm_valid_symbolic_address_p (rtx addr)
29581 {
29582 rtx xop0, xop1 = NULL_RTX;
29583 rtx tmp = addr;
29584
29585 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29586 return true;
29587
29588 /* (const (plus: symbol_ref const_int)) */
29589 if (GET_CODE (addr) == CONST)
29590 tmp = XEXP (addr, 0);
29591
29592 if (GET_CODE (tmp) == PLUS)
29593 {
29594 xop0 = XEXP (tmp, 0);
29595 xop1 = XEXP (tmp, 1);
29596
29597 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29598 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29599 }
29600
29601 return false;
29602 }
29603
29604 /* Returns true if a valid comparison operation and makes
29605 the operands in a form that is valid. */
29606 bool
29607 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29608 {
29609 enum rtx_code code = GET_CODE (*comparison);
29610 int code_int;
29611 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29612 ? GET_MODE (*op2) : GET_MODE (*op1);
29613
29614 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29615
29616 if (code == UNEQ || code == LTGT)
29617 return false;
29618
29619 code_int = (int)code;
29620 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29621 PUT_CODE (*comparison, (enum rtx_code)code_int);
29622
29623 switch (mode)
29624 {
29625 case E_SImode:
29626 if (!arm_add_operand (*op1, mode))
29627 *op1 = force_reg (mode, *op1);
29628 if (!arm_add_operand (*op2, mode))
29629 *op2 = force_reg (mode, *op2);
29630 return true;
29631
29632 case E_DImode:
29633 if (!cmpdi_operand (*op1, mode))
29634 *op1 = force_reg (mode, *op1);
29635 if (!cmpdi_operand (*op2, mode))
29636 *op2 = force_reg (mode, *op2);
29637 return true;
29638
29639 case E_HFmode:
29640 if (!TARGET_VFP_FP16INST)
29641 break;
29642 /* FP16 comparisons are done in SF mode. */
29643 mode = SFmode;
29644 *op1 = convert_to_mode (mode, *op1, 1);
29645 *op2 = convert_to_mode (mode, *op2, 1);
29646 /* Fall through. */
29647 case E_SFmode:
29648 case E_DFmode:
29649 if (!vfp_compare_operand (*op1, mode))
29650 *op1 = force_reg (mode, *op1);
29651 if (!vfp_compare_operand (*op2, mode))
29652 *op2 = force_reg (mode, *op2);
29653 return true;
29654 default:
29655 break;
29656 }
29657
29658 return false;
29659
29660 }
29661
29662 /* Maximum number of instructions to set block of memory. */
29663 static int
29664 arm_block_set_max_insns (void)
29665 {
29666 if (optimize_function_for_size_p (cfun))
29667 return 4;
29668 else
29669 return current_tune->max_insns_inline_memset;
29670 }
29671
29672 /* Return TRUE if it's profitable to set block of memory for
29673 non-vectorized case. VAL is the value to set the memory
29674 with. LENGTH is the number of bytes to set. ALIGN is the
29675 alignment of the destination memory in bytes. UNALIGNED_P
29676 is TRUE if we can only set the memory with instructions
29677 meeting alignment requirements. USE_STRD_P is TRUE if we
29678 can use strd to set the memory. */
29679 static bool
29680 arm_block_set_non_vect_profit_p (rtx val,
29681 unsigned HOST_WIDE_INT length,
29682 unsigned HOST_WIDE_INT align,
29683 bool unaligned_p, bool use_strd_p)
29684 {
29685 int num = 0;
29686 /* For leftovers in bytes of 0-7, we can set the memory block using
29687 strb/strh/str with minimum instruction number. */
29688 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29689
29690 if (unaligned_p)
29691 {
29692 num = arm_const_inline_cost (SET, val);
29693 num += length / align + length % align;
29694 }
29695 else if (use_strd_p)
29696 {
29697 num = arm_const_double_inline_cost (val);
29698 num += (length >> 3) + leftover[length & 7];
29699 }
29700 else
29701 {
29702 num = arm_const_inline_cost (SET, val);
29703 num += (length >> 2) + leftover[length & 3];
29704 }
29705
29706 /* We may be able to combine last pair STRH/STRB into a single STR
29707 by shifting one byte back. */
29708 if (unaligned_access && length > 3 && (length & 3) == 3)
29709 num--;
29710
29711 return (num <= arm_block_set_max_insns ());
29712 }
29713
29714 /* Return TRUE if it's profitable to set block of memory for
29715 vectorized case. LENGTH is the number of bytes to set.
29716 ALIGN is the alignment of destination memory in bytes.
29717 MODE is the vector mode used to set the memory. */
29718 static bool
29719 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29720 unsigned HOST_WIDE_INT align,
29721 machine_mode mode)
29722 {
29723 int num;
29724 bool unaligned_p = ((align & 3) != 0);
29725 unsigned int nelt = GET_MODE_NUNITS (mode);
29726
29727 /* Instruction loading constant value. */
29728 num = 1;
29729 /* Instructions storing the memory. */
29730 num += (length + nelt - 1) / nelt;
29731 /* Instructions adjusting the address expression. Only need to
29732 adjust address expression if it's 4 bytes aligned and bytes
29733 leftover can only be stored by mis-aligned store instruction. */
29734 if (!unaligned_p && (length & 3) != 0)
29735 num++;
29736
29737 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29738 if (!unaligned_p && mode == V16QImode)
29739 num--;
29740
29741 return (num <= arm_block_set_max_insns ());
29742 }
29743
29744 /* Set a block of memory using vectorization instructions for the
29745 unaligned case. We fill the first LENGTH bytes of the memory
29746 area starting from DSTBASE with byte constant VALUE. ALIGN is
29747 the alignment requirement of memory. Return TRUE if succeeded. */
29748 static bool
29749 arm_block_set_unaligned_vect (rtx dstbase,
29750 unsigned HOST_WIDE_INT length,
29751 unsigned HOST_WIDE_INT value,
29752 unsigned HOST_WIDE_INT align)
29753 {
29754 unsigned int i, j, nelt_v16, nelt_v8, nelt_mode;
29755 rtx dst, mem;
29756 rtx val_elt, val_vec, reg;
29757 rtx rval[MAX_VECT_LEN];
29758 rtx (*gen_func) (rtx, rtx);
29759 machine_mode mode;
29760 unsigned HOST_WIDE_INT v = value;
29761 unsigned int offset = 0;
29762 gcc_assert ((align & 0x3) != 0);
29763 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29764 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29765 if (length >= nelt_v16)
29766 {
29767 mode = V16QImode;
29768 gen_func = gen_movmisalignv16qi;
29769 }
29770 else
29771 {
29772 mode = V8QImode;
29773 gen_func = gen_movmisalignv8qi;
29774 }
29775 nelt_mode = GET_MODE_NUNITS (mode);
29776 gcc_assert (length >= nelt_mode);
29777 /* Skip if it isn't profitable. */
29778 if (!arm_block_set_vect_profit_p (length, align, mode))
29779 return false;
29780
29781 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29782 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29783
29784 v = sext_hwi (v, BITS_PER_WORD);
29785 val_elt = GEN_INT (v);
29786 for (j = 0; j < nelt_mode; j++)
29787 rval[j] = val_elt;
29788
29789 reg = gen_reg_rtx (mode);
29790 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29791 /* Emit instruction loading the constant value. */
29792 emit_move_insn (reg, val_vec);
29793
29794 /* Handle nelt_mode bytes in a vector. */
29795 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29796 {
29797 emit_insn ((*gen_func) (mem, reg));
29798 if (i + 2 * nelt_mode <= length)
29799 {
29800 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29801 offset += nelt_mode;
29802 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29803 }
29804 }
29805
29806 /* If there are not less than nelt_v8 bytes leftover, we must be in
29807 V16QI mode. */
29808 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29809
29810 /* Handle (8, 16) bytes leftover. */
29811 if (i + nelt_v8 < length)
29812 {
29813 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29814 offset += length - i;
29815 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29816
29817 /* We are shifting bytes back, set the alignment accordingly. */
29818 if ((length & 1) != 0 && align >= 2)
29819 set_mem_align (mem, BITS_PER_UNIT);
29820
29821 emit_insn (gen_movmisalignv16qi (mem, reg));
29822 }
29823 /* Handle (0, 8] bytes leftover. */
29824 else if (i < length && i + nelt_v8 >= length)
29825 {
29826 if (mode == V16QImode)
29827 reg = gen_lowpart (V8QImode, reg);
29828
29829 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
29830 + (nelt_mode - nelt_v8))));
29831 offset += (length - i) + (nelt_mode - nelt_v8);
29832 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
29833
29834 /* We are shifting bytes back, set the alignment accordingly. */
29835 if ((length & 1) != 0 && align >= 2)
29836 set_mem_align (mem, BITS_PER_UNIT);
29837
29838 emit_insn (gen_movmisalignv8qi (mem, reg));
29839 }
29840
29841 return true;
29842 }
29843
29844 /* Set a block of memory using vectorization instructions for the
29845 aligned case. We fill the first LENGTH bytes of the memory area
29846 starting from DSTBASE with byte constant VALUE. ALIGN is the
29847 alignment requirement of memory. Return TRUE if succeeded. */
29848 static bool
29849 arm_block_set_aligned_vect (rtx dstbase,
29850 unsigned HOST_WIDE_INT length,
29851 unsigned HOST_WIDE_INT value,
29852 unsigned HOST_WIDE_INT align)
29853 {
29854 unsigned int i, j, nelt_v8, nelt_v16, nelt_mode;
29855 rtx dst, addr, mem;
29856 rtx val_elt, val_vec, reg;
29857 rtx rval[MAX_VECT_LEN];
29858 machine_mode mode;
29859 unsigned HOST_WIDE_INT v = value;
29860 unsigned int offset = 0;
29861
29862 gcc_assert ((align & 0x3) == 0);
29863 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29864 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29865 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
29866 mode = V16QImode;
29867 else
29868 mode = V8QImode;
29869
29870 nelt_mode = GET_MODE_NUNITS (mode);
29871 gcc_assert (length >= nelt_mode);
29872 /* Skip if it isn't profitable. */
29873 if (!arm_block_set_vect_profit_p (length, align, mode))
29874 return false;
29875
29876 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29877
29878 v = sext_hwi (v, BITS_PER_WORD);
29879 val_elt = GEN_INT (v);
29880 for (j = 0; j < nelt_mode; j++)
29881 rval[j] = val_elt;
29882
29883 reg = gen_reg_rtx (mode);
29884 val_vec = gen_rtx_CONST_VECTOR (mode, gen_rtvec_v (nelt_mode, rval));
29885 /* Emit instruction loading the constant value. */
29886 emit_move_insn (reg, val_vec);
29887
29888 i = 0;
29889 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
29890 if (mode == V16QImode)
29891 {
29892 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29893 emit_insn (gen_movmisalignv16qi (mem, reg));
29894 i += nelt_mode;
29895 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
29896 if (i + nelt_v8 < length && i + nelt_v16 > length)
29897 {
29898 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29899 offset += length - nelt_mode;
29900 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29901 /* We are shifting bytes back, set the alignment accordingly. */
29902 if ((length & 0x3) == 0)
29903 set_mem_align (mem, BITS_PER_UNIT * 4);
29904 else if ((length & 0x1) == 0)
29905 set_mem_align (mem, BITS_PER_UNIT * 2);
29906 else
29907 set_mem_align (mem, BITS_PER_UNIT);
29908
29909 emit_insn (gen_movmisalignv16qi (mem, reg));
29910 return true;
29911 }
29912 /* Fall through for bytes leftover. */
29913 mode = V8QImode;
29914 nelt_mode = GET_MODE_NUNITS (mode);
29915 reg = gen_lowpart (V8QImode, reg);
29916 }
29917
29918 /* Handle 8 bytes in a vector. */
29919 for (; (i + nelt_mode <= length); i += nelt_mode)
29920 {
29921 addr = plus_constant (Pmode, dst, i);
29922 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
29923 emit_move_insn (mem, reg);
29924 }
29925
29926 /* Handle single word leftover by shifting 4 bytes back. We can
29927 use aligned access for this case. */
29928 if (i + UNITS_PER_WORD == length)
29929 {
29930 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
29931 offset += i - UNITS_PER_WORD;
29932 mem = adjust_automodify_address (dstbase, mode, addr, offset);
29933 /* We are shifting 4 bytes back, set the alignment accordingly. */
29934 if (align > UNITS_PER_WORD)
29935 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
29936
29937 emit_move_insn (mem, reg);
29938 }
29939 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
29940 We have to use unaligned access for this case. */
29941 else if (i < length)
29942 {
29943 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
29944 offset += length - nelt_mode;
29945 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29946 /* We are shifting bytes back, set the alignment accordingly. */
29947 if ((length & 1) == 0)
29948 set_mem_align (mem, BITS_PER_UNIT * 2);
29949 else
29950 set_mem_align (mem, BITS_PER_UNIT);
29951
29952 emit_insn (gen_movmisalignv8qi (mem, reg));
29953 }
29954
29955 return true;
29956 }
29957
29958 /* Set a block of memory using plain strh/strb instructions, only
29959 using instructions allowed by ALIGN on processor. We fill the
29960 first LENGTH bytes of the memory area starting from DSTBASE
29961 with byte constant VALUE. ALIGN is the alignment requirement
29962 of memory. */
29963 static bool
29964 arm_block_set_unaligned_non_vect (rtx dstbase,
29965 unsigned HOST_WIDE_INT length,
29966 unsigned HOST_WIDE_INT value,
29967 unsigned HOST_WIDE_INT align)
29968 {
29969 unsigned int i;
29970 rtx dst, addr, mem;
29971 rtx val_exp, val_reg, reg;
29972 machine_mode mode;
29973 HOST_WIDE_INT v = value;
29974
29975 gcc_assert (align == 1 || align == 2);
29976
29977 if (align == 2)
29978 v |= (value << BITS_PER_UNIT);
29979
29980 v = sext_hwi (v, BITS_PER_WORD);
29981 val_exp = GEN_INT (v);
29982 /* Skip if it isn't profitable. */
29983 if (!arm_block_set_non_vect_profit_p (val_exp, length,
29984 align, true, false))
29985 return false;
29986
29987 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29988 mode = (align == 2 ? HImode : QImode);
29989 val_reg = force_reg (SImode, val_exp);
29990 reg = gen_lowpart (mode, val_reg);
29991
29992 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
29993 {
29994 addr = plus_constant (Pmode, dst, i);
29995 mem = adjust_automodify_address (dstbase, mode, addr, i);
29996 emit_move_insn (mem, reg);
29997 }
29998
29999 /* Handle single byte leftover. */
30000 if (i + 1 == length)
30001 {
30002 reg = gen_lowpart (QImode, val_reg);
30003 addr = plus_constant (Pmode, dst, i);
30004 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30005 emit_move_insn (mem, reg);
30006 i++;
30007 }
30008
30009 gcc_assert (i == length);
30010 return true;
30011 }
30012
30013 /* Set a block of memory using plain strd/str/strh/strb instructions,
30014 to permit unaligned copies on processors which support unaligned
30015 semantics for those instructions. We fill the first LENGTH bytes
30016 of the memory area starting from DSTBASE with byte constant VALUE.
30017 ALIGN is the alignment requirement of memory. */
30018 static bool
30019 arm_block_set_aligned_non_vect (rtx dstbase,
30020 unsigned HOST_WIDE_INT length,
30021 unsigned HOST_WIDE_INT value,
30022 unsigned HOST_WIDE_INT align)
30023 {
30024 unsigned int i;
30025 rtx dst, addr, mem;
30026 rtx val_exp, val_reg, reg;
30027 unsigned HOST_WIDE_INT v;
30028 bool use_strd_p;
30029
30030 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30031 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30032
30033 v = (value | (value << 8) | (value << 16) | (value << 24));
30034 if (length < UNITS_PER_WORD)
30035 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30036
30037 if (use_strd_p)
30038 v |= (v << BITS_PER_WORD);
30039 else
30040 v = sext_hwi (v, BITS_PER_WORD);
30041
30042 val_exp = GEN_INT (v);
30043 /* Skip if it isn't profitable. */
30044 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30045 align, false, use_strd_p))
30046 {
30047 if (!use_strd_p)
30048 return false;
30049
30050 /* Try without strd. */
30051 v = (v >> BITS_PER_WORD);
30052 v = sext_hwi (v, BITS_PER_WORD);
30053 val_exp = GEN_INT (v);
30054 use_strd_p = false;
30055 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30056 align, false, use_strd_p))
30057 return false;
30058 }
30059
30060 i = 0;
30061 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30062 /* Handle double words using strd if possible. */
30063 if (use_strd_p)
30064 {
30065 val_reg = force_reg (DImode, val_exp);
30066 reg = val_reg;
30067 for (; (i + 8 <= length); i += 8)
30068 {
30069 addr = plus_constant (Pmode, dst, i);
30070 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30071 emit_move_insn (mem, reg);
30072 }
30073 }
30074 else
30075 val_reg = force_reg (SImode, val_exp);
30076
30077 /* Handle words. */
30078 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30079 for (; (i + 4 <= length); i += 4)
30080 {
30081 addr = plus_constant (Pmode, dst, i);
30082 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30083 if ((align & 3) == 0)
30084 emit_move_insn (mem, reg);
30085 else
30086 emit_insn (gen_unaligned_storesi (mem, reg));
30087 }
30088
30089 /* Merge last pair of STRH and STRB into a STR if possible. */
30090 if (unaligned_access && i > 0 && (i + 3) == length)
30091 {
30092 addr = plus_constant (Pmode, dst, i - 1);
30093 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30094 /* We are shifting one byte back, set the alignment accordingly. */
30095 if ((align & 1) == 0)
30096 set_mem_align (mem, BITS_PER_UNIT);
30097
30098 /* Most likely this is an unaligned access, and we can't tell at
30099 compilation time. */
30100 emit_insn (gen_unaligned_storesi (mem, reg));
30101 return true;
30102 }
30103
30104 /* Handle half word leftover. */
30105 if (i + 2 <= length)
30106 {
30107 reg = gen_lowpart (HImode, val_reg);
30108 addr = plus_constant (Pmode, dst, i);
30109 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30110 if ((align & 1) == 0)
30111 emit_move_insn (mem, reg);
30112 else
30113 emit_insn (gen_unaligned_storehi (mem, reg));
30114
30115 i += 2;
30116 }
30117
30118 /* Handle single byte leftover. */
30119 if (i + 1 == length)
30120 {
30121 reg = gen_lowpart (QImode, val_reg);
30122 addr = plus_constant (Pmode, dst, i);
30123 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30124 emit_move_insn (mem, reg);
30125 }
30126
30127 return true;
30128 }
30129
30130 /* Set a block of memory using vectorization instructions for both
30131 aligned and unaligned cases. We fill the first LENGTH bytes of
30132 the memory area starting from DSTBASE with byte constant VALUE.
30133 ALIGN is the alignment requirement of memory. */
30134 static bool
30135 arm_block_set_vect (rtx dstbase,
30136 unsigned HOST_WIDE_INT length,
30137 unsigned HOST_WIDE_INT value,
30138 unsigned HOST_WIDE_INT align)
30139 {
30140 /* Check whether we need to use unaligned store instruction. */
30141 if (((align & 3) != 0 || (length & 3) != 0)
30142 /* Check whether unaligned store instruction is available. */
30143 && (!unaligned_access || BYTES_BIG_ENDIAN))
30144 return false;
30145
30146 if ((align & 3) == 0)
30147 return arm_block_set_aligned_vect (dstbase, length, value, align);
30148 else
30149 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30150 }
30151
30152 /* Expand string store operation. Firstly we try to do that by using
30153 vectorization instructions, then try with ARM unaligned access and
30154 double-word store if profitable. OPERANDS[0] is the destination,
30155 OPERANDS[1] is the number of bytes, operands[2] is the value to
30156 initialize the memory, OPERANDS[3] is the known alignment of the
30157 destination. */
30158 bool
30159 arm_gen_setmem (rtx *operands)
30160 {
30161 rtx dstbase = operands[0];
30162 unsigned HOST_WIDE_INT length;
30163 unsigned HOST_WIDE_INT value;
30164 unsigned HOST_WIDE_INT align;
30165
30166 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30167 return false;
30168
30169 length = UINTVAL (operands[1]);
30170 if (length > 64)
30171 return false;
30172
30173 value = (UINTVAL (operands[2]) & 0xFF);
30174 align = UINTVAL (operands[3]);
30175 if (TARGET_NEON && length >= 8
30176 && current_tune->string_ops_prefer_neon
30177 && arm_block_set_vect (dstbase, length, value, align))
30178 return true;
30179
30180 if (!unaligned_access && (align & 3) != 0)
30181 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30182
30183 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30184 }
30185
30186
30187 static bool
30188 arm_macro_fusion_p (void)
30189 {
30190 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30191 }
30192
30193 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30194 for MOVW / MOVT macro fusion. */
30195
30196 static bool
30197 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30198 {
30199 /* We are trying to fuse
30200 movw imm / movt imm
30201 instructions as a group that gets scheduled together. */
30202
30203 rtx set_dest = SET_DEST (curr_set);
30204
30205 if (GET_MODE (set_dest) != SImode)
30206 return false;
30207
30208 /* We are trying to match:
30209 prev (movw) == (set (reg r0) (const_int imm16))
30210 curr (movt) == (set (zero_extract (reg r0)
30211 (const_int 16)
30212 (const_int 16))
30213 (const_int imm16_1))
30214 or
30215 prev (movw) == (set (reg r1)
30216 (high (symbol_ref ("SYM"))))
30217 curr (movt) == (set (reg r0)
30218 (lo_sum (reg r1)
30219 (symbol_ref ("SYM")))) */
30220
30221 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30222 {
30223 if (CONST_INT_P (SET_SRC (curr_set))
30224 && CONST_INT_P (SET_SRC (prev_set))
30225 && REG_P (XEXP (set_dest, 0))
30226 && REG_P (SET_DEST (prev_set))
30227 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30228 return true;
30229
30230 }
30231 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30232 && REG_P (SET_DEST (curr_set))
30233 && REG_P (SET_DEST (prev_set))
30234 && GET_CODE (SET_SRC (prev_set)) == HIGH
30235 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30236 return true;
30237
30238 return false;
30239 }
30240
30241 static bool
30242 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30243 {
30244 rtx prev_set = single_set (prev);
30245 rtx curr_set = single_set (curr);
30246
30247 if (!prev_set
30248 || !curr_set)
30249 return false;
30250
30251 if (any_condjump_p (curr))
30252 return false;
30253
30254 if (!arm_macro_fusion_p ())
30255 return false;
30256
30257 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30258 && aarch_crypto_can_dual_issue (prev, curr))
30259 return true;
30260
30261 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30262 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30263 return true;
30264
30265 return false;
30266 }
30267
30268 /* Return true iff the instruction fusion described by OP is enabled. */
30269 bool
30270 arm_fusion_enabled_p (tune_params::fuse_ops op)
30271 {
30272 return current_tune->fusible_ops & op;
30273 }
30274
30275 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30276 scheduled for speculative execution. Reject the long-running division
30277 and square-root instructions. */
30278
30279 static bool
30280 arm_sched_can_speculate_insn (rtx_insn *insn)
30281 {
30282 switch (get_attr_type (insn))
30283 {
30284 case TYPE_SDIV:
30285 case TYPE_UDIV:
30286 case TYPE_FDIVS:
30287 case TYPE_FDIVD:
30288 case TYPE_FSQRTS:
30289 case TYPE_FSQRTD:
30290 case TYPE_NEON_FP_SQRT_S:
30291 case TYPE_NEON_FP_SQRT_D:
30292 case TYPE_NEON_FP_SQRT_S_Q:
30293 case TYPE_NEON_FP_SQRT_D_Q:
30294 case TYPE_NEON_FP_DIV_S:
30295 case TYPE_NEON_FP_DIV_D:
30296 case TYPE_NEON_FP_DIV_S_Q:
30297 case TYPE_NEON_FP_DIV_D_Q:
30298 return false;
30299 default:
30300 return true;
30301 }
30302 }
30303
30304 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30305
30306 static unsigned HOST_WIDE_INT
30307 arm_asan_shadow_offset (void)
30308 {
30309 return HOST_WIDE_INT_1U << 29;
30310 }
30311
30312
30313 /* This is a temporary fix for PR60655. Ideally we need
30314 to handle most of these cases in the generic part but
30315 currently we reject minus (..) (sym_ref). We try to
30316 ameliorate the case with minus (sym_ref1) (sym_ref2)
30317 where they are in the same section. */
30318
30319 static bool
30320 arm_const_not_ok_for_debug_p (rtx p)
30321 {
30322 tree decl_op0 = NULL;
30323 tree decl_op1 = NULL;
30324
30325 if (GET_CODE (p) == MINUS)
30326 {
30327 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30328 {
30329 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30330 if (decl_op1
30331 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30332 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30333 {
30334 if ((VAR_P (decl_op1)
30335 || TREE_CODE (decl_op1) == CONST_DECL)
30336 && (VAR_P (decl_op0)
30337 || TREE_CODE (decl_op0) == CONST_DECL))
30338 return (get_variable_section (decl_op1, false)
30339 != get_variable_section (decl_op0, false));
30340
30341 if (TREE_CODE (decl_op1) == LABEL_DECL
30342 && TREE_CODE (decl_op0) == LABEL_DECL)
30343 return (DECL_CONTEXT (decl_op1)
30344 != DECL_CONTEXT (decl_op0));
30345 }
30346
30347 return true;
30348 }
30349 }
30350
30351 return false;
30352 }
30353
30354 /* return TRUE if x is a reference to a value in a constant pool */
30355 extern bool
30356 arm_is_constant_pool_ref (rtx x)
30357 {
30358 return (MEM_P (x)
30359 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30360 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30361 }
30362
30363 /* Remember the last target of arm_set_current_function. */
30364 static GTY(()) tree arm_previous_fndecl;
30365
30366 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30367
30368 void
30369 save_restore_target_globals (tree new_tree)
30370 {
30371 /* If we have a previous state, use it. */
30372 if (TREE_TARGET_GLOBALS (new_tree))
30373 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30374 else if (new_tree == target_option_default_node)
30375 restore_target_globals (&default_target_globals);
30376 else
30377 {
30378 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30379 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30380 }
30381
30382 arm_option_params_internal ();
30383 }
30384
30385 /* Invalidate arm_previous_fndecl. */
30386
30387 void
30388 arm_reset_previous_fndecl (void)
30389 {
30390 arm_previous_fndecl = NULL_TREE;
30391 }
30392
30393 /* Establish appropriate back-end context for processing the function
30394 FNDECL. The argument might be NULL to indicate processing at top
30395 level, outside of any function scope. */
30396
30397 static void
30398 arm_set_current_function (tree fndecl)
30399 {
30400 if (!fndecl || fndecl == arm_previous_fndecl)
30401 return;
30402
30403 tree old_tree = (arm_previous_fndecl
30404 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30405 : NULL_TREE);
30406
30407 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30408
30409 /* If current function has no attributes but previous one did,
30410 use the default node. */
30411 if (! new_tree && old_tree)
30412 new_tree = target_option_default_node;
30413
30414 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30415 the default have been handled by save_restore_target_globals from
30416 arm_pragma_target_parse. */
30417 if (old_tree == new_tree)
30418 return;
30419
30420 arm_previous_fndecl = fndecl;
30421
30422 /* First set the target options. */
30423 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30424
30425 save_restore_target_globals (new_tree);
30426 }
30427
30428 /* Implement TARGET_OPTION_PRINT. */
30429
30430 static void
30431 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30432 {
30433 int flags = ptr->x_target_flags;
30434 const char *fpu_name;
30435
30436 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30437 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30438
30439 fprintf (file, "%*sselected isa %s\n", indent, "",
30440 TARGET_THUMB2_P (flags) ? "thumb2" :
30441 TARGET_THUMB_P (flags) ? "thumb1" :
30442 "arm");
30443
30444 if (ptr->x_arm_arch_string)
30445 fprintf (file, "%*sselected architecture %s\n", indent, "",
30446 ptr->x_arm_arch_string);
30447
30448 if (ptr->x_arm_cpu_string)
30449 fprintf (file, "%*sselected CPU %s\n", indent, "",
30450 ptr->x_arm_cpu_string);
30451
30452 if (ptr->x_arm_tune_string)
30453 fprintf (file, "%*sselected tune %s\n", indent, "",
30454 ptr->x_arm_tune_string);
30455
30456 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30457 }
30458
30459 /* Hook to determine if one function can safely inline another. */
30460
30461 static bool
30462 arm_can_inline_p (tree caller, tree callee)
30463 {
30464 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30465 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30466 bool can_inline = true;
30467
30468 struct cl_target_option *caller_opts
30469 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30470 : target_option_default_node);
30471
30472 struct cl_target_option *callee_opts
30473 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30474 : target_option_default_node);
30475
30476 if (callee_opts == caller_opts)
30477 return true;
30478
30479 /* Callee's ISA features should be a subset of the caller's. */
30480 struct arm_build_target caller_target;
30481 struct arm_build_target callee_target;
30482 caller_target.isa = sbitmap_alloc (isa_num_bits);
30483 callee_target.isa = sbitmap_alloc (isa_num_bits);
30484
30485 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30486 false);
30487 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30488 false);
30489 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30490 can_inline = false;
30491
30492 sbitmap_free (caller_target.isa);
30493 sbitmap_free (callee_target.isa);
30494
30495 /* OK to inline between different modes.
30496 Function with mode specific instructions, e.g using asm,
30497 must be explicitly protected with noinline. */
30498 return can_inline;
30499 }
30500
30501 /* Hook to fix function's alignment affected by target attribute. */
30502
30503 static void
30504 arm_relayout_function (tree fndecl)
30505 {
30506 if (DECL_USER_ALIGN (fndecl))
30507 return;
30508
30509 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30510
30511 if (!callee_tree)
30512 callee_tree = target_option_default_node;
30513
30514 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30515 SET_DECL_ALIGN
30516 (fndecl,
30517 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30518 }
30519
30520 /* Inner function to process the attribute((target(...))), take an argument and
30521 set the current options from the argument. If we have a list, recursively
30522 go over the list. */
30523
30524 static bool
30525 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30526 {
30527 if (TREE_CODE (args) == TREE_LIST)
30528 {
30529 bool ret = true;
30530
30531 for (; args; args = TREE_CHAIN (args))
30532 if (TREE_VALUE (args)
30533 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30534 ret = false;
30535 return ret;
30536 }
30537
30538 else if (TREE_CODE (args) != STRING_CST)
30539 {
30540 error ("attribute %<target%> argument not a string");
30541 return false;
30542 }
30543
30544 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30545 char *q;
30546
30547 while ((q = strtok (argstr, ",")) != NULL)
30548 {
30549 while (ISSPACE (*q)) ++q;
30550
30551 argstr = NULL;
30552 if (!strncmp (q, "thumb", 5))
30553 opts->x_target_flags |= MASK_THUMB;
30554
30555 else if (!strncmp (q, "arm", 3))
30556 opts->x_target_flags &= ~MASK_THUMB;
30557
30558 else if (!strncmp (q, "fpu=", 4))
30559 {
30560 int fpu_index;
30561 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30562 &fpu_index, CL_TARGET))
30563 {
30564 error ("invalid fpu for attribute(target(\"%s\"))", q);
30565 return false;
30566 }
30567 if (fpu_index == TARGET_FPU_auto)
30568 {
30569 /* This doesn't really make sense until we support
30570 general dynamic selection of the architecture and all
30571 sub-features. */
30572 sorry ("auto fpu selection not currently permitted here");
30573 return false;
30574 }
30575 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30576 }
30577 else
30578 {
30579 error ("attribute(target(\"%s\")) is unknown", q);
30580 return false;
30581 }
30582 }
30583
30584 return true;
30585 }
30586
30587 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30588
30589 tree
30590 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30591 struct gcc_options *opts_set)
30592 {
30593 struct cl_target_option cl_opts;
30594
30595 if (!arm_valid_target_attribute_rec (args, opts))
30596 return NULL_TREE;
30597
30598 cl_target_option_save (&cl_opts, opts);
30599 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30600 arm_option_check_internal (opts);
30601 /* Do any overrides, such as global options arch=xxx. */
30602 arm_option_override_internal (opts, opts_set);
30603
30604 return build_target_option_node (opts);
30605 }
30606
30607 static void
30608 add_attribute (const char * mode, tree *attributes)
30609 {
30610 size_t len = strlen (mode);
30611 tree value = build_string (len, mode);
30612
30613 TREE_TYPE (value) = build_array_type (char_type_node,
30614 build_index_type (size_int (len)));
30615
30616 *attributes = tree_cons (get_identifier ("target"),
30617 build_tree_list (NULL_TREE, value),
30618 *attributes);
30619 }
30620
30621 /* For testing. Insert thumb or arm modes alternatively on functions. */
30622
30623 static void
30624 arm_insert_attributes (tree fndecl, tree * attributes)
30625 {
30626 const char *mode;
30627
30628 if (! TARGET_FLIP_THUMB)
30629 return;
30630
30631 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30632 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30633 return;
30634
30635 /* Nested definitions must inherit mode. */
30636 if (current_function_decl)
30637 {
30638 mode = TARGET_THUMB ? "thumb" : "arm";
30639 add_attribute (mode, attributes);
30640 return;
30641 }
30642
30643 /* If there is already a setting don't change it. */
30644 if (lookup_attribute ("target", *attributes) != NULL)
30645 return;
30646
30647 mode = thumb_flipper ? "thumb" : "arm";
30648 add_attribute (mode, attributes);
30649
30650 thumb_flipper = !thumb_flipper;
30651 }
30652
30653 /* Hook to validate attribute((target("string"))). */
30654
30655 static bool
30656 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30657 tree args, int ARG_UNUSED (flags))
30658 {
30659 bool ret = true;
30660 struct gcc_options func_options;
30661 tree cur_tree, new_optimize;
30662 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30663
30664 /* Get the optimization options of the current function. */
30665 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30666
30667 /* If the function changed the optimization levels as well as setting target
30668 options, start with the optimizations specified. */
30669 if (!func_optimize)
30670 func_optimize = optimization_default_node;
30671
30672 /* Init func_options. */
30673 memset (&func_options, 0, sizeof (func_options));
30674 init_options_struct (&func_options, NULL);
30675 lang_hooks.init_options_struct (&func_options);
30676
30677 /* Initialize func_options to the defaults. */
30678 cl_optimization_restore (&func_options,
30679 TREE_OPTIMIZATION (func_optimize));
30680
30681 cl_target_option_restore (&func_options,
30682 TREE_TARGET_OPTION (target_option_default_node));
30683
30684 /* Set func_options flags with new target mode. */
30685 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30686 &global_options_set);
30687
30688 if (cur_tree == NULL_TREE)
30689 ret = false;
30690
30691 new_optimize = build_optimization_node (&func_options);
30692
30693 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30694
30695 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30696
30697 finalize_options_struct (&func_options);
30698
30699 return ret;
30700 }
30701
30702 /* Match an ISA feature bitmap to a named FPU. We always use the
30703 first entry that exactly matches the feature set, so that we
30704 effectively canonicalize the FPU name for the assembler. */
30705 static const char*
30706 arm_identify_fpu_from_isa (sbitmap isa)
30707 {
30708 auto_sbitmap fpubits (isa_num_bits);
30709 auto_sbitmap cand_fpubits (isa_num_bits);
30710
30711 bitmap_and (fpubits, isa, isa_all_fpubits);
30712
30713 /* If there are no ISA feature bits relating to the FPU, we must be
30714 doing soft-float. */
30715 if (bitmap_empty_p (fpubits))
30716 return "softvfp";
30717
30718 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30719 {
30720 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30721 if (bitmap_equal_p (fpubits, cand_fpubits))
30722 return all_fpus[i].name;
30723 }
30724 /* We must find an entry, or things have gone wrong. */
30725 gcc_unreachable ();
30726 }
30727
30728 void
30729 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30730 {
30731
30732 fprintf (stream, "\t.syntax unified\n");
30733
30734 if (TARGET_THUMB)
30735 {
30736 if (is_called_in_ARM_mode (decl)
30737 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30738 && cfun->is_thunk))
30739 fprintf (stream, "\t.code 32\n");
30740 else if (TARGET_THUMB1)
30741 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30742 else
30743 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30744 }
30745 else
30746 fprintf (stream, "\t.arm\n");
30747
30748 asm_fprintf (asm_out_file, "\t.fpu %s\n",
30749 (TARGET_SOFT_FLOAT
30750 ? "softvfp"
30751 : arm_identify_fpu_from_isa (arm_active_target.isa)));
30752
30753 if (TARGET_POKE_FUNCTION_NAME)
30754 arm_poke_function_name (stream, (const char *) name);
30755 }
30756
30757 /* If MEM is in the form of [base+offset], extract the two parts
30758 of address and set to BASE and OFFSET, otherwise return false
30759 after clearing BASE and OFFSET. */
30760
30761 static bool
30762 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
30763 {
30764 rtx addr;
30765
30766 gcc_assert (MEM_P (mem));
30767
30768 addr = XEXP (mem, 0);
30769
30770 /* Strip off const from addresses like (const (addr)). */
30771 if (GET_CODE (addr) == CONST)
30772 addr = XEXP (addr, 0);
30773
30774 if (GET_CODE (addr) == REG)
30775 {
30776 *base = addr;
30777 *offset = const0_rtx;
30778 return true;
30779 }
30780
30781 if (GET_CODE (addr) == PLUS
30782 && GET_CODE (XEXP (addr, 0)) == REG
30783 && CONST_INT_P (XEXP (addr, 1)))
30784 {
30785 *base = XEXP (addr, 0);
30786 *offset = XEXP (addr, 1);
30787 return true;
30788 }
30789
30790 *base = NULL_RTX;
30791 *offset = NULL_RTX;
30792
30793 return false;
30794 }
30795
30796 /* If INSN is a load or store of address in the form of [base+offset],
30797 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
30798 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
30799 otherwise return FALSE. */
30800
30801 static bool
30802 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
30803 {
30804 rtx x, dest, src;
30805
30806 gcc_assert (INSN_P (insn));
30807 x = PATTERN (insn);
30808 if (GET_CODE (x) != SET)
30809 return false;
30810
30811 src = SET_SRC (x);
30812 dest = SET_DEST (x);
30813 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
30814 {
30815 *is_load = false;
30816 extract_base_offset_in_addr (dest, base, offset);
30817 }
30818 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
30819 {
30820 *is_load = true;
30821 extract_base_offset_in_addr (src, base, offset);
30822 }
30823 else
30824 return false;
30825
30826 return (*base != NULL_RTX && *offset != NULL_RTX);
30827 }
30828
30829 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
30830
30831 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
30832 and PRI are only calculated for these instructions. For other instruction,
30833 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
30834 instruction fusion can be supported by returning different priorities.
30835
30836 It's important that irrelevant instructions get the largest FUSION_PRI. */
30837
30838 static void
30839 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
30840 int *fusion_pri, int *pri)
30841 {
30842 int tmp, off_val;
30843 bool is_load;
30844 rtx base, offset;
30845
30846 gcc_assert (INSN_P (insn));
30847
30848 tmp = max_pri - 1;
30849 if (!fusion_load_store (insn, &base, &offset, &is_load))
30850 {
30851 *pri = tmp;
30852 *fusion_pri = tmp;
30853 return;
30854 }
30855
30856 /* Load goes first. */
30857 if (is_load)
30858 *fusion_pri = tmp - 1;
30859 else
30860 *fusion_pri = tmp - 2;
30861
30862 tmp /= 2;
30863
30864 /* INSN with smaller base register goes first. */
30865 tmp -= ((REGNO (base) & 0xff) << 20);
30866
30867 /* INSN with smaller offset goes first. */
30868 off_val = (int)(INTVAL (offset));
30869 if (off_val >= 0)
30870 tmp -= (off_val & 0xfffff);
30871 else
30872 tmp += ((- off_val) & 0xfffff);
30873
30874 *pri = tmp;
30875 return;
30876 }
30877
30878
30879 /* Construct and return a PARALLEL RTX vector with elements numbering the
30880 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
30881 the vector - from the perspective of the architecture. This does not
30882 line up with GCC's perspective on lane numbers, so we end up with
30883 different masks depending on our target endian-ness. The diagram
30884 below may help. We must draw the distinction when building masks
30885 which select one half of the vector. An instruction selecting
30886 architectural low-lanes for a big-endian target, must be described using
30887 a mask selecting GCC high-lanes.
30888
30889 Big-Endian Little-Endian
30890
30891 GCC 0 1 2 3 3 2 1 0
30892 | x | x | x | x | | x | x | x | x |
30893 Architecture 3 2 1 0 3 2 1 0
30894
30895 Low Mask: { 2, 3 } { 0, 1 }
30896 High Mask: { 0, 1 } { 2, 3 }
30897 */
30898
30899 rtx
30900 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
30901 {
30902 int nunits = GET_MODE_NUNITS (mode);
30903 rtvec v = rtvec_alloc (nunits / 2);
30904 int high_base = nunits / 2;
30905 int low_base = 0;
30906 int base;
30907 rtx t1;
30908 int i;
30909
30910 if (BYTES_BIG_ENDIAN)
30911 base = high ? low_base : high_base;
30912 else
30913 base = high ? high_base : low_base;
30914
30915 for (i = 0; i < nunits / 2; i++)
30916 RTVEC_ELT (v, i) = GEN_INT (base + i);
30917
30918 t1 = gen_rtx_PARALLEL (mode, v);
30919 return t1;
30920 }
30921
30922 /* Check OP for validity as a PARALLEL RTX vector with elements
30923 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
30924 from the perspective of the architecture. See the diagram above
30925 arm_simd_vect_par_cnst_half_p for more details. */
30926
30927 bool
30928 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
30929 bool high)
30930 {
30931 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
30932 HOST_WIDE_INT count_op = XVECLEN (op, 0);
30933 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
30934 int i = 0;
30935
30936 if (!VECTOR_MODE_P (mode))
30937 return false;
30938
30939 if (count_op != count_ideal)
30940 return false;
30941
30942 for (i = 0; i < count_ideal; i++)
30943 {
30944 rtx elt_op = XVECEXP (op, 0, i);
30945 rtx elt_ideal = XVECEXP (ideal, 0, i);
30946
30947 if (!CONST_INT_P (elt_op)
30948 || INTVAL (elt_ideal) != INTVAL (elt_op))
30949 return false;
30950 }
30951 return true;
30952 }
30953
30954 /* Can output mi_thunk for all cases except for non-zero vcall_offset
30955 in Thumb1. */
30956 static bool
30957 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
30958 const_tree)
30959 {
30960 /* For now, we punt and not handle this for TARGET_THUMB1. */
30961 if (vcall_offset && TARGET_THUMB1)
30962 return false;
30963
30964 /* Otherwise ok. */
30965 return true;
30966 }
30967
30968 /* Generate RTL for a conditional branch with rtx comparison CODE in
30969 mode CC_MODE. The destination of the unlikely conditional branch
30970 is LABEL_REF. */
30971
30972 void
30973 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
30974 rtx label_ref)
30975 {
30976 rtx x;
30977 x = gen_rtx_fmt_ee (code, VOIDmode,
30978 gen_rtx_REG (cc_mode, CC_REGNUM),
30979 const0_rtx);
30980
30981 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
30982 gen_rtx_LABEL_REF (VOIDmode, label_ref),
30983 pc_rtx);
30984 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
30985 }
30986
30987 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
30988
30989 For pure-code sections there is no letter code for this attribute, so
30990 output all the section flags numerically when this is needed. */
30991
30992 static bool
30993 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
30994 {
30995
30996 if (flags & SECTION_ARM_PURECODE)
30997 {
30998 *num = 0x20000000;
30999
31000 if (!(flags & SECTION_DEBUG))
31001 *num |= 0x2;
31002 if (flags & SECTION_EXCLUDE)
31003 *num |= 0x80000000;
31004 if (flags & SECTION_WRITE)
31005 *num |= 0x1;
31006 if (flags & SECTION_CODE)
31007 *num |= 0x4;
31008 if (flags & SECTION_MERGE)
31009 *num |= 0x10;
31010 if (flags & SECTION_STRINGS)
31011 *num |= 0x20;
31012 if (flags & SECTION_TLS)
31013 *num |= 0x400;
31014 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31015 *num |= 0x200;
31016
31017 return true;
31018 }
31019
31020 return false;
31021 }
31022
31023 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31024
31025 If pure-code is passed as an option, make sure all functions are in
31026 sections that have the SHF_ARM_PURECODE attribute. */
31027
31028 static section *
31029 arm_function_section (tree decl, enum node_frequency freq,
31030 bool startup, bool exit)
31031 {
31032 const char * section_name;
31033 section * sec;
31034
31035 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31036 return default_function_section (decl, freq, startup, exit);
31037
31038 if (!target_pure_code)
31039 return default_function_section (decl, freq, startup, exit);
31040
31041
31042 section_name = DECL_SECTION_NAME (decl);
31043
31044 /* If a function is not in a named section then it falls under the 'default'
31045 text section, also known as '.text'. We can preserve previous behavior as
31046 the default text section already has the SHF_ARM_PURECODE section
31047 attribute. */
31048 if (!section_name)
31049 {
31050 section *default_sec = default_function_section (decl, freq, startup,
31051 exit);
31052
31053 /* If default_sec is not null, then it must be a special section like for
31054 example .text.startup. We set the pure-code attribute and return the
31055 same section to preserve existing behavior. */
31056 if (default_sec)
31057 default_sec->common.flags |= SECTION_ARM_PURECODE;
31058 return default_sec;
31059 }
31060
31061 /* Otherwise look whether a section has already been created with
31062 'section_name'. */
31063 sec = get_named_section (decl, section_name, 0);
31064 if (!sec)
31065 /* If that is not the case passing NULL as the section's name to
31066 'get_named_section' will create a section with the declaration's
31067 section name. */
31068 sec = get_named_section (decl, NULL, 0);
31069
31070 /* Set the SHF_ARM_PURECODE attribute. */
31071 sec->common.flags |= SECTION_ARM_PURECODE;
31072
31073 return sec;
31074 }
31075
31076 /* Implements the TARGET_SECTION_FLAGS hook.
31077
31078 If DECL is a function declaration and pure-code is passed as an option
31079 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31080 section's name and RELOC indicates whether the declarations initializer may
31081 contain runtime relocations. */
31082
31083 static unsigned int
31084 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31085 {
31086 unsigned int flags = default_section_type_flags (decl, name, reloc);
31087
31088 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31089 flags |= SECTION_ARM_PURECODE;
31090
31091 return flags;
31092 }
31093
31094 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31095
31096 static void
31097 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31098 rtx op0, rtx op1,
31099 rtx *quot_p, rtx *rem_p)
31100 {
31101 if (mode == SImode)
31102 gcc_assert (!TARGET_IDIV);
31103
31104 scalar_int_mode libval_mode
31105 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31106
31107 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31108 libval_mode,
31109 op0, GET_MODE (op0),
31110 op1, GET_MODE (op1));
31111
31112 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31113 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31114 GET_MODE_SIZE (mode));
31115
31116 gcc_assert (quotient);
31117 gcc_assert (remainder);
31118
31119 *quot_p = quotient;
31120 *rem_p = remainder;
31121 }
31122
31123 /* This function checks for the availability of the coprocessor builtin passed
31124 in BUILTIN for the current target. Returns true if it is available and
31125 false otherwise. If a BUILTIN is passed for which this function has not
31126 been implemented it will cause an exception. */
31127
31128 bool
31129 arm_coproc_builtin_available (enum unspecv builtin)
31130 {
31131 /* None of these builtins are available in Thumb mode if the target only
31132 supports Thumb-1. */
31133 if (TARGET_THUMB1)
31134 return false;
31135
31136 switch (builtin)
31137 {
31138 case VUNSPEC_CDP:
31139 case VUNSPEC_LDC:
31140 case VUNSPEC_LDCL:
31141 case VUNSPEC_STC:
31142 case VUNSPEC_STCL:
31143 case VUNSPEC_MCR:
31144 case VUNSPEC_MRC:
31145 if (arm_arch4)
31146 return true;
31147 break;
31148 case VUNSPEC_CDP2:
31149 case VUNSPEC_LDC2:
31150 case VUNSPEC_LDC2L:
31151 case VUNSPEC_STC2:
31152 case VUNSPEC_STC2L:
31153 case VUNSPEC_MCR2:
31154 case VUNSPEC_MRC2:
31155 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31156 ARMv8-{A,M}. */
31157 if (arm_arch5)
31158 return true;
31159 break;
31160 case VUNSPEC_MCRR:
31161 case VUNSPEC_MRRC:
31162 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31163 ARMv8-{A,M}. */
31164 if (arm_arch6 || arm_arch5te)
31165 return true;
31166 break;
31167 case VUNSPEC_MCRR2:
31168 case VUNSPEC_MRRC2:
31169 if (arm_arch6)
31170 return true;
31171 break;
31172 default:
31173 gcc_unreachable ();
31174 }
31175 return false;
31176 }
31177
31178 /* This function returns true if OP is a valid memory operand for the ldc and
31179 stc coprocessor instructions and false otherwise. */
31180
31181 bool
31182 arm_coproc_ldc_stc_legitimate_address (rtx op)
31183 {
31184 HOST_WIDE_INT range;
31185 /* Has to be a memory operand. */
31186 if (!MEM_P (op))
31187 return false;
31188
31189 op = XEXP (op, 0);
31190
31191 /* We accept registers. */
31192 if (REG_P (op))
31193 return true;
31194
31195 switch GET_CODE (op)
31196 {
31197 case PLUS:
31198 {
31199 /* Or registers with an offset. */
31200 if (!REG_P (XEXP (op, 0)))
31201 return false;
31202
31203 op = XEXP (op, 1);
31204
31205 /* The offset must be an immediate though. */
31206 if (!CONST_INT_P (op))
31207 return false;
31208
31209 range = INTVAL (op);
31210
31211 /* Within the range of [-1020,1020]. */
31212 if (!IN_RANGE (range, -1020, 1020))
31213 return false;
31214
31215 /* And a multiple of 4. */
31216 return (range % 4) == 0;
31217 }
31218 case PRE_INC:
31219 case POST_INC:
31220 case PRE_DEC:
31221 case POST_DEC:
31222 return REG_P (XEXP (op, 0));
31223 default:
31224 gcc_unreachable ();
31225 }
31226 return false;
31227 }
31228
31229 #if CHECKING_P
31230 namespace selftest {
31231
31232 /* Scan the static data tables generated by parsecpu.awk looking for
31233 potential issues with the data. We primarily check for
31234 inconsistencies in the option extensions at present (extensions
31235 that duplicate others but aren't marked as aliases). Furthermore,
31236 for correct canonicalization later options must never be a subset
31237 of an earlier option. Any extension should also only specify other
31238 feature bits and never an architecture bit. The architecture is inferred
31239 from the declaration of the extension. */
31240 static void
31241 arm_test_cpu_arch_data (void)
31242 {
31243 const arch_option *arch;
31244 const cpu_option *cpu;
31245 auto_sbitmap target_isa (isa_num_bits);
31246 auto_sbitmap isa1 (isa_num_bits);
31247 auto_sbitmap isa2 (isa_num_bits);
31248
31249 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31250 {
31251 const cpu_arch_extension *ext1, *ext2;
31252
31253 if (arch->common.extensions == NULL)
31254 continue;
31255
31256 arm_initialize_isa (target_isa, arch->common.isa_bits);
31257
31258 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31259 {
31260 if (ext1->alias)
31261 continue;
31262
31263 arm_initialize_isa (isa1, ext1->isa_bits);
31264 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31265 {
31266 if (ext2->alias || ext1->remove != ext2->remove)
31267 continue;
31268
31269 arm_initialize_isa (isa2, ext2->isa_bits);
31270 /* If the option is a subset of the parent option, it doesn't
31271 add anything and so isn't useful. */
31272 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31273
31274 /* If the extension specifies any architectural bits then
31275 disallow it. Extensions should only specify feature bits. */
31276 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31277 }
31278 }
31279 }
31280
31281 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31282 {
31283 const cpu_arch_extension *ext1, *ext2;
31284
31285 if (cpu->common.extensions == NULL)
31286 continue;
31287
31288 arm_initialize_isa (target_isa, arch->common.isa_bits);
31289
31290 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31291 {
31292 if (ext1->alias)
31293 continue;
31294
31295 arm_initialize_isa (isa1, ext1->isa_bits);
31296 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31297 {
31298 if (ext2->alias || ext1->remove != ext2->remove)
31299 continue;
31300
31301 arm_initialize_isa (isa2, ext2->isa_bits);
31302 /* If the option is a subset of the parent option, it doesn't
31303 add anything and so isn't useful. */
31304 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31305
31306 /* If the extension specifies any architectural bits then
31307 disallow it. Extensions should only specify feature bits. */
31308 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31309 }
31310 }
31311 }
31312 }
31313
31314 static void
31315 arm_run_selftests (void)
31316 {
31317 arm_test_cpu_arch_data ();
31318 }
31319 } /* Namespace selftest. */
31320
31321 #undef TARGET_RUN_TARGET_SELFTESTS
31322 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31323 #endif /* CHECKING_P */
31324
31325 struct gcc_target targetm = TARGET_INITIALIZER;
31326
31327 #include "gt-arm.h"