]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
f914285e16e7213a9d473dfe2c0a05cd556e6773
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2017 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
80
81 void (*arm_lang_output_object_attributes_hook)(void);
82
83 struct four_ints
84 {
85 int i[4];
86 };
87
88 /* Forward function declarations. */
89 static bool arm_const_not_ok_for_debug_p (rtx);
90 static int arm_needs_doubleword_align (machine_mode, const_tree);
91 static int arm_compute_static_chain_stack_bytes (void);
92 static arm_stack_offsets *arm_get_frame_offsets (void);
93 static void arm_compute_frame_layout (void);
94 static void arm_add_gc_roots (void);
95 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
96 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
97 static unsigned bit_count (unsigned long);
98 static unsigned bitmap_popcount (const sbitmap);
99 static int arm_address_register_rtx_p (rtx, int);
100 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
101 static bool is_called_in_ARM_mode (tree);
102 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
103 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
104 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
105 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
106 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
107 inline static int thumb1_index_register_rtx_p (rtx, int);
108 static int thumb_far_jump_used_p (void);
109 static bool thumb_force_lr_save (void);
110 static unsigned arm_size_return_regs (void);
111 static bool arm_assemble_integer (rtx, unsigned int, int);
112 static void arm_print_operand (FILE *, rtx, int);
113 static void arm_print_operand_address (FILE *, machine_mode, rtx);
114 static bool arm_print_operand_punct_valid_p (unsigned char code);
115 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
116 static arm_cc get_arm_condition_code (rtx);
117 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
118 static const char *output_multi_immediate (rtx *, const char *, const char *,
119 int, HOST_WIDE_INT);
120 static const char *shift_op (rtx, HOST_WIDE_INT *);
121 static struct machine_function *arm_init_machine_status (void);
122 static void thumb_exit (FILE *, int);
123 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
124 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
125 static Mnode *add_minipool_forward_ref (Mfix *);
126 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
127 static Mnode *add_minipool_backward_ref (Mfix *);
128 static void assign_minipool_offsets (Mfix *);
129 static void arm_print_value (FILE *, rtx);
130 static void dump_minipool (rtx_insn *);
131 static int arm_barrier_cost (rtx_insn *);
132 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
133 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
134 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
135 machine_mode, rtx);
136 static void arm_reorg (void);
137 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
138 static unsigned long arm_compute_save_reg0_reg12_mask (void);
139 static unsigned long arm_compute_save_core_reg_mask (void);
140 static unsigned long arm_isr_value (tree);
141 static unsigned long arm_compute_func_type (void);
142 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
143 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
144 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
145 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
146 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
147 #endif
148 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
149 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
150 static void arm_output_function_epilogue (FILE *);
151 static void arm_output_function_prologue (FILE *);
152 static int arm_comp_type_attributes (const_tree, const_tree);
153 static void arm_set_default_type_attributes (tree);
154 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
155 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
156 static int optimal_immediate_sequence (enum rtx_code code,
157 unsigned HOST_WIDE_INT val,
158 struct four_ints *return_sequence);
159 static int optimal_immediate_sequence_1 (enum rtx_code code,
160 unsigned HOST_WIDE_INT val,
161 struct four_ints *return_sequence,
162 int i);
163 static int arm_get_strip_length (int);
164 static bool arm_function_ok_for_sibcall (tree, tree);
165 static machine_mode arm_promote_function_mode (const_tree,
166 machine_mode, int *,
167 const_tree, int);
168 static bool arm_return_in_memory (const_tree, const_tree);
169 static rtx arm_function_value (const_tree, const_tree, bool);
170 static rtx arm_libcall_value_1 (machine_mode);
171 static rtx arm_libcall_value (machine_mode, const_rtx);
172 static bool arm_function_value_regno_p (const unsigned int);
173 static void arm_internal_label (FILE *, const char *, unsigned long);
174 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
175 tree);
176 static bool arm_have_conditional_execution (void);
177 static bool arm_cannot_force_const_mem (machine_mode, rtx);
178 static bool arm_legitimate_constant_p (machine_mode, rtx);
179 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
180 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
181 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
182 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
183 static void emit_constant_insn (rtx cond, rtx pattern);
184 static rtx_insn *emit_set_insn (rtx, rtx);
185 static rtx emit_multi_reg_push (unsigned long, unsigned long);
186 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
187 tree, bool);
188 static rtx arm_function_arg (cumulative_args_t, machine_mode,
189 const_tree, bool);
190 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
191 const_tree, bool);
192 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
193 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
194 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
195 const_tree);
196 static rtx aapcs_libcall_value (machine_mode);
197 static int aapcs_select_return_coproc (const_tree, const_tree);
198
199 #ifdef OBJECT_FORMAT_ELF
200 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
201 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
202 #endif
203 #ifndef ARM_PE
204 static void arm_encode_section_info (tree, rtx, int);
205 #endif
206
207 static void arm_file_end (void);
208 static void arm_file_start (void);
209 static void arm_insert_attributes (tree, tree *);
210
211 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
212 tree, int *, int);
213 static bool arm_pass_by_reference (cumulative_args_t,
214 machine_mode, const_tree, bool);
215 static bool arm_promote_prototypes (const_tree);
216 static bool arm_default_short_enums (void);
217 static bool arm_align_anon_bitfield (void);
218 static bool arm_return_in_msb (const_tree);
219 static bool arm_must_pass_in_stack (machine_mode, const_tree);
220 static bool arm_return_in_memory (const_tree, const_tree);
221 #if ARM_UNWIND_INFO
222 static void arm_unwind_emit (FILE *, rtx_insn *);
223 static bool arm_output_ttype (rtx);
224 static void arm_asm_emit_except_personality (rtx);
225 #endif
226 static void arm_asm_init_sections (void);
227 static rtx arm_dwarf_register_span (rtx);
228
229 static tree arm_cxx_guard_type (void);
230 static bool arm_cxx_guard_mask_bit (void);
231 static tree arm_get_cookie_size (tree);
232 static bool arm_cookie_has_size (void);
233 static bool arm_cxx_cdtor_returns_this (void);
234 static bool arm_cxx_key_method_may_be_inline (void);
235 static void arm_cxx_determine_class_data_visibility (tree);
236 static bool arm_cxx_class_data_always_comdat (void);
237 static bool arm_cxx_use_aeabi_atexit (void);
238 static void arm_init_libfuncs (void);
239 static tree arm_build_builtin_va_list (void);
240 static void arm_expand_builtin_va_start (tree, rtx);
241 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
242 static void arm_option_override (void);
243 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
244 static void arm_option_restore (struct gcc_options *,
245 struct cl_target_option *);
246 static void arm_override_options_after_change (void);
247 static void arm_option_print (FILE *, int, struct cl_target_option *);
248 static void arm_set_current_function (tree);
249 static bool arm_can_inline_p (tree, tree);
250 static void arm_relayout_function (tree);
251 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
252 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
253 static bool arm_sched_can_speculate_insn (rtx_insn *);
254 static bool arm_macro_fusion_p (void);
255 static bool arm_cannot_copy_insn_p (rtx_insn *);
256 static int arm_issue_rate (void);
257 static int arm_first_cycle_multipass_dfa_lookahead (void);
258 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
259 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
260 static bool arm_output_addr_const_extra (FILE *, rtx);
261 static bool arm_allocate_stack_slots_for_args (void);
262 static bool arm_warn_func_return (tree);
263 static tree arm_promoted_type (const_tree t);
264 static bool arm_scalar_mode_supported_p (scalar_mode);
265 static bool arm_frame_pointer_required (void);
266 static bool arm_can_eliminate (const int, const int);
267 static void arm_asm_trampoline_template (FILE *);
268 static void arm_trampoline_init (rtx, tree, rtx);
269 static rtx arm_trampoline_adjust_address (rtx);
270 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
271 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
272 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
273 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
274 static bool arm_array_mode_supported_p (machine_mode,
275 unsigned HOST_WIDE_INT);
276 static machine_mode arm_preferred_simd_mode (scalar_mode);
277 static bool arm_class_likely_spilled_p (reg_class_t);
278 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
279 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
280 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
281 const_tree type,
282 int misalignment,
283 bool is_packed);
284 static void arm_conditional_register_usage (void);
285 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
286 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
287 static unsigned int arm_autovectorize_vector_sizes (void);
288 static int arm_default_branch_cost (bool, bool);
289 static int arm_cortex_a5_branch_cost (bool, bool);
290 static int arm_cortex_m_branch_cost (bool, bool);
291 static int arm_cortex_m7_branch_cost (bool, bool);
292
293 static bool arm_vectorize_vec_perm_const_ok (machine_mode, vec_perm_indices);
294
295 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
296
297 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
298 tree vectype,
299 int misalign ATTRIBUTE_UNUSED);
300 static unsigned arm_add_stmt_cost (void *data, int count,
301 enum vect_cost_for_stmt kind,
302 struct _stmt_vec_info *stmt_info,
303 int misalign,
304 enum vect_cost_model_location where);
305
306 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
307 bool op0_preserve_value);
308 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
309
310 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
311 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
312 const_tree);
313 static section *arm_function_section (tree, enum node_frequency, bool, bool);
314 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
315 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
316 int reloc);
317 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
318 static opt_scalar_float_mode arm_floatn_mode (int, bool);
319 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
320 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
321 static bool arm_modes_tieable_p (machine_mode, machine_mode);
322 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
323 \f
324 /* Table of machine attributes. */
325 static const struct attribute_spec arm_attribute_table[] =
326 {
327 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
328 affects_type_identity, handler, exclude } */
329 /* Function calls made to this symbol must be done indirectly, because
330 it may lie outside of the 26 bit addressing range of a normal function
331 call. */
332 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
333 /* Whereas these functions are always known to reside within the 26 bit
334 addressing range. */
335 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
336 /* Specify the procedure call conventions for a function. */
337 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
338 NULL },
339 /* Interrupt Service Routines have special prologue and epilogue requirements. */
340 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
341 NULL },
342 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
343 NULL },
344 { "naked", 0, 0, true, false, false, false,
345 arm_handle_fndecl_attribute, NULL },
346 #ifdef ARM_PE
347 /* ARM/PE has three new attributes:
348 interfacearm - ?
349 dllexport - for exporting a function/variable that will live in a dll
350 dllimport - for importing a function/variable from a dll
351
352 Microsoft allows multiple declspecs in one __declspec, separating
353 them with spaces. We do NOT support this. Instead, use __declspec
354 multiple times.
355 */
356 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
357 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
358 { "interfacearm", 0, 0, true, false, false, false,
359 arm_handle_fndecl_attribute, NULL },
360 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
361 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
362 NULL },
363 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
364 NULL },
365 { "notshared", 0, 0, false, true, false, false,
366 arm_handle_notshared_attribute, NULL },
367 #endif
368 /* ARMv8-M Security Extensions support. */
369 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
370 arm_handle_cmse_nonsecure_entry, NULL },
371 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
372 arm_handle_cmse_nonsecure_call, NULL },
373 { NULL, 0, 0, false, false, false, false, NULL, NULL }
374 };
375 \f
376 /* Initialize the GCC target structure. */
377 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
378 #undef TARGET_MERGE_DECL_ATTRIBUTES
379 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
380 #endif
381
382 #undef TARGET_LEGITIMIZE_ADDRESS
383 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
384
385 #undef TARGET_ATTRIBUTE_TABLE
386 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
387
388 #undef TARGET_INSERT_ATTRIBUTES
389 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
390
391 #undef TARGET_ASM_FILE_START
392 #define TARGET_ASM_FILE_START arm_file_start
393 #undef TARGET_ASM_FILE_END
394 #define TARGET_ASM_FILE_END arm_file_end
395
396 #undef TARGET_ASM_ALIGNED_SI_OP
397 #define TARGET_ASM_ALIGNED_SI_OP NULL
398 #undef TARGET_ASM_INTEGER
399 #define TARGET_ASM_INTEGER arm_assemble_integer
400
401 #undef TARGET_PRINT_OPERAND
402 #define TARGET_PRINT_OPERAND arm_print_operand
403 #undef TARGET_PRINT_OPERAND_ADDRESS
404 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
405 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
406 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
407
408 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
409 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
410
411 #undef TARGET_ASM_FUNCTION_PROLOGUE
412 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
413
414 #undef TARGET_ASM_FUNCTION_EPILOGUE
415 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
416
417 #undef TARGET_CAN_INLINE_P
418 #define TARGET_CAN_INLINE_P arm_can_inline_p
419
420 #undef TARGET_RELAYOUT_FUNCTION
421 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
422
423 #undef TARGET_OPTION_OVERRIDE
424 #define TARGET_OPTION_OVERRIDE arm_option_override
425
426 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
427 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
428
429 #undef TARGET_OPTION_SAVE
430 #define TARGET_OPTION_SAVE arm_option_save
431
432 #undef TARGET_OPTION_RESTORE
433 #define TARGET_OPTION_RESTORE arm_option_restore
434
435 #undef TARGET_OPTION_PRINT
436 #define TARGET_OPTION_PRINT arm_option_print
437
438 #undef TARGET_COMP_TYPE_ATTRIBUTES
439 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
440
441 #undef TARGET_SCHED_CAN_SPECULATE_INSN
442 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
443
444 #undef TARGET_SCHED_MACRO_FUSION_P
445 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
446
447 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
448 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
449
450 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
451 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
452
453 #undef TARGET_SCHED_ADJUST_COST
454 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
455
456 #undef TARGET_SET_CURRENT_FUNCTION
457 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
458
459 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
460 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
461
462 #undef TARGET_SCHED_REORDER
463 #define TARGET_SCHED_REORDER arm_sched_reorder
464
465 #undef TARGET_REGISTER_MOVE_COST
466 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
467
468 #undef TARGET_MEMORY_MOVE_COST
469 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
470
471 #undef TARGET_ENCODE_SECTION_INFO
472 #ifdef ARM_PE
473 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
474 #else
475 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
476 #endif
477
478 #undef TARGET_STRIP_NAME_ENCODING
479 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
480
481 #undef TARGET_ASM_INTERNAL_LABEL
482 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
483
484 #undef TARGET_FLOATN_MODE
485 #define TARGET_FLOATN_MODE arm_floatn_mode
486
487 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
488 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
489
490 #undef TARGET_FUNCTION_VALUE
491 #define TARGET_FUNCTION_VALUE arm_function_value
492
493 #undef TARGET_LIBCALL_VALUE
494 #define TARGET_LIBCALL_VALUE arm_libcall_value
495
496 #undef TARGET_FUNCTION_VALUE_REGNO_P
497 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
498
499 #undef TARGET_ASM_OUTPUT_MI_THUNK
500 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
501 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
502 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
503
504 #undef TARGET_RTX_COSTS
505 #define TARGET_RTX_COSTS arm_rtx_costs
506 #undef TARGET_ADDRESS_COST
507 #define TARGET_ADDRESS_COST arm_address_cost
508
509 #undef TARGET_SHIFT_TRUNCATION_MASK
510 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
511 #undef TARGET_VECTOR_MODE_SUPPORTED_P
512 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
513 #undef TARGET_ARRAY_MODE_SUPPORTED_P
514 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
515 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
516 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
517 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
518 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
519 arm_autovectorize_vector_sizes
520
521 #undef TARGET_MACHINE_DEPENDENT_REORG
522 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
523
524 #undef TARGET_INIT_BUILTINS
525 #define TARGET_INIT_BUILTINS arm_init_builtins
526 #undef TARGET_EXPAND_BUILTIN
527 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
528 #undef TARGET_BUILTIN_DECL
529 #define TARGET_BUILTIN_DECL arm_builtin_decl
530
531 #undef TARGET_INIT_LIBFUNCS
532 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
533
534 #undef TARGET_PROMOTE_FUNCTION_MODE
535 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
536 #undef TARGET_PROMOTE_PROTOTYPES
537 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
538 #undef TARGET_PASS_BY_REFERENCE
539 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
540 #undef TARGET_ARG_PARTIAL_BYTES
541 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
542 #undef TARGET_FUNCTION_ARG
543 #define TARGET_FUNCTION_ARG arm_function_arg
544 #undef TARGET_FUNCTION_ARG_ADVANCE
545 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
546 #undef TARGET_FUNCTION_ARG_PADDING
547 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
548 #undef TARGET_FUNCTION_ARG_BOUNDARY
549 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
550
551 #undef TARGET_SETUP_INCOMING_VARARGS
552 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
553
554 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
555 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
556
557 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
558 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
559 #undef TARGET_TRAMPOLINE_INIT
560 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
561 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
562 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
563
564 #undef TARGET_WARN_FUNC_RETURN
565 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
566
567 #undef TARGET_DEFAULT_SHORT_ENUMS
568 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
569
570 #undef TARGET_ALIGN_ANON_BITFIELD
571 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
572
573 #undef TARGET_NARROW_VOLATILE_BITFIELD
574 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
575
576 #undef TARGET_CXX_GUARD_TYPE
577 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
578
579 #undef TARGET_CXX_GUARD_MASK_BIT
580 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
581
582 #undef TARGET_CXX_GET_COOKIE_SIZE
583 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
584
585 #undef TARGET_CXX_COOKIE_HAS_SIZE
586 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
587
588 #undef TARGET_CXX_CDTOR_RETURNS_THIS
589 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
590
591 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
592 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
593
594 #undef TARGET_CXX_USE_AEABI_ATEXIT
595 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
596
597 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
598 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
599 arm_cxx_determine_class_data_visibility
600
601 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
602 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
603
604 #undef TARGET_RETURN_IN_MSB
605 #define TARGET_RETURN_IN_MSB arm_return_in_msb
606
607 #undef TARGET_RETURN_IN_MEMORY
608 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
609
610 #undef TARGET_MUST_PASS_IN_STACK
611 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
612
613 #if ARM_UNWIND_INFO
614 #undef TARGET_ASM_UNWIND_EMIT
615 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
616
617 /* EABI unwinding tables use a different format for the typeinfo tables. */
618 #undef TARGET_ASM_TTYPE
619 #define TARGET_ASM_TTYPE arm_output_ttype
620
621 #undef TARGET_ARM_EABI_UNWINDER
622 #define TARGET_ARM_EABI_UNWINDER true
623
624 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
625 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
626
627 #endif /* ARM_UNWIND_INFO */
628
629 #undef TARGET_ASM_INIT_SECTIONS
630 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
631
632 #undef TARGET_DWARF_REGISTER_SPAN
633 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
634
635 #undef TARGET_CANNOT_COPY_INSN_P
636 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
637
638 #ifdef HAVE_AS_TLS
639 #undef TARGET_HAVE_TLS
640 #define TARGET_HAVE_TLS true
641 #endif
642
643 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
644 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
645
646 #undef TARGET_LEGITIMATE_CONSTANT_P
647 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
648
649 #undef TARGET_CANNOT_FORCE_CONST_MEM
650 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
651
652 #undef TARGET_MAX_ANCHOR_OFFSET
653 #define TARGET_MAX_ANCHOR_OFFSET 4095
654
655 /* The minimum is set such that the total size of the block
656 for a particular anchor is -4088 + 1 + 4095 bytes, which is
657 divisible by eight, ensuring natural spacing of anchors. */
658 #undef TARGET_MIN_ANCHOR_OFFSET
659 #define TARGET_MIN_ANCHOR_OFFSET -4088
660
661 #undef TARGET_SCHED_ISSUE_RATE
662 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
663
664 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
665 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
666 arm_first_cycle_multipass_dfa_lookahead
667
668 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
669 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
670 arm_first_cycle_multipass_dfa_lookahead_guard
671
672 #undef TARGET_MANGLE_TYPE
673 #define TARGET_MANGLE_TYPE arm_mangle_type
674
675 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
676 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
677
678 #undef TARGET_BUILD_BUILTIN_VA_LIST
679 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
680 #undef TARGET_EXPAND_BUILTIN_VA_START
681 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
682 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
683 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
684
685 #ifdef HAVE_AS_TLS
686 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
687 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
688 #endif
689
690 #undef TARGET_LEGITIMATE_ADDRESS_P
691 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
692
693 #undef TARGET_PREFERRED_RELOAD_CLASS
694 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
695
696 #undef TARGET_PROMOTED_TYPE
697 #define TARGET_PROMOTED_TYPE arm_promoted_type
698
699 #undef TARGET_SCALAR_MODE_SUPPORTED_P
700 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
701
702 #undef TARGET_COMPUTE_FRAME_LAYOUT
703 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
704
705 #undef TARGET_FRAME_POINTER_REQUIRED
706 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
707
708 #undef TARGET_CAN_ELIMINATE
709 #define TARGET_CAN_ELIMINATE arm_can_eliminate
710
711 #undef TARGET_CONDITIONAL_REGISTER_USAGE
712 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
713
714 #undef TARGET_CLASS_LIKELY_SPILLED_P
715 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
716
717 #undef TARGET_VECTORIZE_BUILTINS
718 #define TARGET_VECTORIZE_BUILTINS
719
720 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
721 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
722 arm_builtin_vectorized_function
723
724 #undef TARGET_VECTOR_ALIGNMENT
725 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
726
727 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
728 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
729 arm_vector_alignment_reachable
730
731 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
732 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
733 arm_builtin_support_vector_misalignment
734
735 #undef TARGET_PREFERRED_RENAME_CLASS
736 #define TARGET_PREFERRED_RENAME_CLASS \
737 arm_preferred_rename_class
738
739 #undef TARGET_VECTORIZE_VEC_PERM_CONST_OK
740 #define TARGET_VECTORIZE_VEC_PERM_CONST_OK \
741 arm_vectorize_vec_perm_const_ok
742
743 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
744 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
745 arm_builtin_vectorization_cost
746 #undef TARGET_VECTORIZE_ADD_STMT_COST
747 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
748
749 #undef TARGET_CANONICALIZE_COMPARISON
750 #define TARGET_CANONICALIZE_COMPARISON \
751 arm_canonicalize_comparison
752
753 #undef TARGET_ASAN_SHADOW_OFFSET
754 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
755
756 #undef MAX_INSN_PER_IT_BLOCK
757 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
758
759 #undef TARGET_CAN_USE_DOLOOP_P
760 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
761
762 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
763 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
764
765 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
766 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
767
768 #undef TARGET_SCHED_FUSION_PRIORITY
769 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
770
771 #undef TARGET_ASM_FUNCTION_SECTION
772 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
773
774 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
775 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
776
777 #undef TARGET_SECTION_TYPE_FLAGS
778 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
779
780 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
781 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
782
783 #undef TARGET_C_EXCESS_PRECISION
784 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
785
786 /* Although the architecture reserves bits 0 and 1, only the former is
787 used for ARM/Thumb ISA selection in v7 and earlier versions. */
788 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
789 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
790
791 #undef TARGET_FIXED_CONDITION_CODE_REGS
792 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
793
794 #undef TARGET_HARD_REGNO_NREGS
795 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
796 #undef TARGET_HARD_REGNO_MODE_OK
797 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
798
799 #undef TARGET_MODES_TIEABLE_P
800 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
801
802 #undef TARGET_CAN_CHANGE_MODE_CLASS
803 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
804
805 #undef TARGET_CONSTANT_ALIGNMENT
806 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
807 \f
808 /* Obstack for minipool constant handling. */
809 static struct obstack minipool_obstack;
810 static char * minipool_startobj;
811
812 /* The maximum number of insns skipped which
813 will be conditionalised if possible. */
814 static int max_insns_skipped = 5;
815
816 extern FILE * asm_out_file;
817
818 /* True if we are currently building a constant table. */
819 int making_const_table;
820
821 /* The processor for which instructions should be scheduled. */
822 enum processor_type arm_tune = TARGET_CPU_arm_none;
823
824 /* The current tuning set. */
825 const struct tune_params *current_tune;
826
827 /* Which floating point hardware to schedule for. */
828 int arm_fpu_attr;
829
830 /* Used for Thumb call_via trampolines. */
831 rtx thumb_call_via_label[14];
832 static int thumb_call_reg_needed;
833
834 /* The bits in this mask specify which instruction scheduling options should
835 be used. */
836 unsigned int tune_flags = 0;
837
838 /* The highest ARM architecture version supported by the
839 target. */
840 enum base_architecture arm_base_arch = BASE_ARCH_0;
841
842 /* Active target architecture and tuning. */
843
844 struct arm_build_target arm_active_target;
845
846 /* The following are used in the arm.md file as equivalents to bits
847 in the above two flag variables. */
848
849 /* Nonzero if this chip supports the ARM Architecture 3M extensions. */
850 int arm_arch3m = 0;
851
852 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
853 int arm_arch4 = 0;
854
855 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
856 int arm_arch4t = 0;
857
858 /* Nonzero if this chip supports the ARM Architecture 5 extensions. */
859 int arm_arch5 = 0;
860
861 /* Nonzero if this chip supports the ARM Architecture 5E extensions. */
862 int arm_arch5e = 0;
863
864 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
865 int arm_arch5te = 0;
866
867 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
868 int arm_arch6 = 0;
869
870 /* Nonzero if this chip supports the ARM 6K extensions. */
871 int arm_arch6k = 0;
872
873 /* Nonzero if this chip supports the ARM 6KZ extensions. */
874 int arm_arch6kz = 0;
875
876 /* Nonzero if instructions present in ARMv6-M can be used. */
877 int arm_arch6m = 0;
878
879 /* Nonzero if this chip supports the ARM 7 extensions. */
880 int arm_arch7 = 0;
881
882 /* Nonzero if this chip supports the Large Physical Address Extension. */
883 int arm_arch_lpae = 0;
884
885 /* Nonzero if instructions not present in the 'M' profile can be used. */
886 int arm_arch_notm = 0;
887
888 /* Nonzero if instructions present in ARMv7E-M can be used. */
889 int arm_arch7em = 0;
890
891 /* Nonzero if instructions present in ARMv8 can be used. */
892 int arm_arch8 = 0;
893
894 /* Nonzero if this chip supports the ARMv8.1 extensions. */
895 int arm_arch8_1 = 0;
896
897 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
898 int arm_arch8_2 = 0;
899
900 /* Nonzero if this chip supports the FP16 instructions extension of ARM
901 Architecture 8.2. */
902 int arm_fp16_inst = 0;
903
904 /* Nonzero if this chip can benefit from load scheduling. */
905 int arm_ld_sched = 0;
906
907 /* Nonzero if this chip is a StrongARM. */
908 int arm_tune_strongarm = 0;
909
910 /* Nonzero if this chip supports Intel Wireless MMX technology. */
911 int arm_arch_iwmmxt = 0;
912
913 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
914 int arm_arch_iwmmxt2 = 0;
915
916 /* Nonzero if this chip is an XScale. */
917 int arm_arch_xscale = 0;
918
919 /* Nonzero if tuning for XScale */
920 int arm_tune_xscale = 0;
921
922 /* Nonzero if we want to tune for stores that access the write-buffer.
923 This typically means an ARM6 or ARM7 with MMU or MPU. */
924 int arm_tune_wbuf = 0;
925
926 /* Nonzero if tuning for Cortex-A9. */
927 int arm_tune_cortex_a9 = 0;
928
929 /* Nonzero if we should define __THUMB_INTERWORK__ in the
930 preprocessor.
931 XXX This is a bit of a hack, it's intended to help work around
932 problems in GLD which doesn't understand that armv5t code is
933 interworking clean. */
934 int arm_cpp_interwork = 0;
935
936 /* Nonzero if chip supports Thumb 1. */
937 int arm_arch_thumb1;
938
939 /* Nonzero if chip supports Thumb 2. */
940 int arm_arch_thumb2;
941
942 /* Nonzero if chip supports integer division instruction. */
943 int arm_arch_arm_hwdiv;
944 int arm_arch_thumb_hwdiv;
945
946 /* Nonzero if chip disallows volatile memory access in IT block. */
947 int arm_arch_no_volatile_ce;
948
949 /* Nonzero if we should use Neon to handle 64-bits operations rather
950 than core registers. */
951 int prefer_neon_for_64bits = 0;
952
953 /* Nonzero if we shouldn't use literal pools. */
954 bool arm_disable_literal_pool = false;
955
956 /* The register number to be used for the PIC offset register. */
957 unsigned arm_pic_register = INVALID_REGNUM;
958
959 enum arm_pcs arm_pcs_default;
960
961 /* For an explanation of these variables, see final_prescan_insn below. */
962 int arm_ccfsm_state;
963 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
964 enum arm_cond_code arm_current_cc;
965
966 rtx arm_target_insn;
967 int arm_target_label;
968 /* The number of conditionally executed insns, including the current insn. */
969 int arm_condexec_count = 0;
970 /* A bitmask specifying the patterns for the IT block.
971 Zero means do not output an IT block before this insn. */
972 int arm_condexec_mask = 0;
973 /* The number of bits used in arm_condexec_mask. */
974 int arm_condexec_masklen = 0;
975
976 /* Nonzero if chip supports the ARMv8 CRC instructions. */
977 int arm_arch_crc = 0;
978
979 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
980 int arm_arch_dotprod = 0;
981
982 /* Nonzero if chip supports the ARMv8-M security extensions. */
983 int arm_arch_cmse = 0;
984
985 /* Nonzero if the core has a very small, high-latency, multiply unit. */
986 int arm_m_profile_small_mul = 0;
987
988 /* The condition codes of the ARM, and the inverse function. */
989 static const char * const arm_condition_codes[] =
990 {
991 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
992 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
993 };
994
995 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
996 int arm_regs_in_sequence[] =
997 {
998 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
999 };
1000
1001 #define ARM_LSL_NAME "lsl"
1002 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1003
1004 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1005 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1006 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1007 \f
1008 /* Initialization code. */
1009
1010 struct cpu_tune
1011 {
1012 enum processor_type scheduler;
1013 unsigned int tune_flags;
1014 const struct tune_params *tune;
1015 };
1016
1017 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1018 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1019 { \
1020 num_slots, \
1021 l1_size, \
1022 l1_line_size \
1023 }
1024
1025 /* arm generic vectorizer costs. */
1026 static const
1027 struct cpu_vec_costs arm_default_vec_cost = {
1028 1, /* scalar_stmt_cost. */
1029 1, /* scalar load_cost. */
1030 1, /* scalar_store_cost. */
1031 1, /* vec_stmt_cost. */
1032 1, /* vec_to_scalar_cost. */
1033 1, /* scalar_to_vec_cost. */
1034 1, /* vec_align_load_cost. */
1035 1, /* vec_unalign_load_cost. */
1036 1, /* vec_unalign_store_cost. */
1037 1, /* vec_store_cost. */
1038 3, /* cond_taken_branch_cost. */
1039 1, /* cond_not_taken_branch_cost. */
1040 };
1041
1042 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1043 #include "aarch-cost-tables.h"
1044
1045
1046
1047 const struct cpu_cost_table cortexa9_extra_costs =
1048 {
1049 /* ALU */
1050 {
1051 0, /* arith. */
1052 0, /* logical. */
1053 0, /* shift. */
1054 COSTS_N_INSNS (1), /* shift_reg. */
1055 COSTS_N_INSNS (1), /* arith_shift. */
1056 COSTS_N_INSNS (2), /* arith_shift_reg. */
1057 0, /* log_shift. */
1058 COSTS_N_INSNS (1), /* log_shift_reg. */
1059 COSTS_N_INSNS (1), /* extend. */
1060 COSTS_N_INSNS (2), /* extend_arith. */
1061 COSTS_N_INSNS (1), /* bfi. */
1062 COSTS_N_INSNS (1), /* bfx. */
1063 0, /* clz. */
1064 0, /* rev. */
1065 0, /* non_exec. */
1066 true /* non_exec_costs_exec. */
1067 },
1068 {
1069 /* MULT SImode */
1070 {
1071 COSTS_N_INSNS (3), /* simple. */
1072 COSTS_N_INSNS (3), /* flag_setting. */
1073 COSTS_N_INSNS (2), /* extend. */
1074 COSTS_N_INSNS (3), /* add. */
1075 COSTS_N_INSNS (2), /* extend_add. */
1076 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1077 },
1078 /* MULT DImode */
1079 {
1080 0, /* simple (N/A). */
1081 0, /* flag_setting (N/A). */
1082 COSTS_N_INSNS (4), /* extend. */
1083 0, /* add (N/A). */
1084 COSTS_N_INSNS (4), /* extend_add. */
1085 0 /* idiv (N/A). */
1086 }
1087 },
1088 /* LD/ST */
1089 {
1090 COSTS_N_INSNS (2), /* load. */
1091 COSTS_N_INSNS (2), /* load_sign_extend. */
1092 COSTS_N_INSNS (2), /* ldrd. */
1093 COSTS_N_INSNS (2), /* ldm_1st. */
1094 1, /* ldm_regs_per_insn_1st. */
1095 2, /* ldm_regs_per_insn_subsequent. */
1096 COSTS_N_INSNS (5), /* loadf. */
1097 COSTS_N_INSNS (5), /* loadd. */
1098 COSTS_N_INSNS (1), /* load_unaligned. */
1099 COSTS_N_INSNS (2), /* store. */
1100 COSTS_N_INSNS (2), /* strd. */
1101 COSTS_N_INSNS (2), /* stm_1st. */
1102 1, /* stm_regs_per_insn_1st. */
1103 2, /* stm_regs_per_insn_subsequent. */
1104 COSTS_N_INSNS (1), /* storef. */
1105 COSTS_N_INSNS (1), /* stored. */
1106 COSTS_N_INSNS (1), /* store_unaligned. */
1107 COSTS_N_INSNS (1), /* loadv. */
1108 COSTS_N_INSNS (1) /* storev. */
1109 },
1110 {
1111 /* FP SFmode */
1112 {
1113 COSTS_N_INSNS (14), /* div. */
1114 COSTS_N_INSNS (4), /* mult. */
1115 COSTS_N_INSNS (7), /* mult_addsub. */
1116 COSTS_N_INSNS (30), /* fma. */
1117 COSTS_N_INSNS (3), /* addsub. */
1118 COSTS_N_INSNS (1), /* fpconst. */
1119 COSTS_N_INSNS (1), /* neg. */
1120 COSTS_N_INSNS (3), /* compare. */
1121 COSTS_N_INSNS (3), /* widen. */
1122 COSTS_N_INSNS (3), /* narrow. */
1123 COSTS_N_INSNS (3), /* toint. */
1124 COSTS_N_INSNS (3), /* fromint. */
1125 COSTS_N_INSNS (3) /* roundint. */
1126 },
1127 /* FP DFmode */
1128 {
1129 COSTS_N_INSNS (24), /* div. */
1130 COSTS_N_INSNS (5), /* mult. */
1131 COSTS_N_INSNS (8), /* mult_addsub. */
1132 COSTS_N_INSNS (30), /* fma. */
1133 COSTS_N_INSNS (3), /* addsub. */
1134 COSTS_N_INSNS (1), /* fpconst. */
1135 COSTS_N_INSNS (1), /* neg. */
1136 COSTS_N_INSNS (3), /* compare. */
1137 COSTS_N_INSNS (3), /* widen. */
1138 COSTS_N_INSNS (3), /* narrow. */
1139 COSTS_N_INSNS (3), /* toint. */
1140 COSTS_N_INSNS (3), /* fromint. */
1141 COSTS_N_INSNS (3) /* roundint. */
1142 }
1143 },
1144 /* Vector */
1145 {
1146 COSTS_N_INSNS (1) /* alu. */
1147 }
1148 };
1149
1150 const struct cpu_cost_table cortexa8_extra_costs =
1151 {
1152 /* ALU */
1153 {
1154 0, /* arith. */
1155 0, /* logical. */
1156 COSTS_N_INSNS (1), /* shift. */
1157 0, /* shift_reg. */
1158 COSTS_N_INSNS (1), /* arith_shift. */
1159 0, /* arith_shift_reg. */
1160 COSTS_N_INSNS (1), /* log_shift. */
1161 0, /* log_shift_reg. */
1162 0, /* extend. */
1163 0, /* extend_arith. */
1164 0, /* bfi. */
1165 0, /* bfx. */
1166 0, /* clz. */
1167 0, /* rev. */
1168 0, /* non_exec. */
1169 true /* non_exec_costs_exec. */
1170 },
1171 {
1172 /* MULT SImode */
1173 {
1174 COSTS_N_INSNS (1), /* simple. */
1175 COSTS_N_INSNS (1), /* flag_setting. */
1176 COSTS_N_INSNS (1), /* extend. */
1177 COSTS_N_INSNS (1), /* add. */
1178 COSTS_N_INSNS (1), /* extend_add. */
1179 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1180 },
1181 /* MULT DImode */
1182 {
1183 0, /* simple (N/A). */
1184 0, /* flag_setting (N/A). */
1185 COSTS_N_INSNS (2), /* extend. */
1186 0, /* add (N/A). */
1187 COSTS_N_INSNS (2), /* extend_add. */
1188 0 /* idiv (N/A). */
1189 }
1190 },
1191 /* LD/ST */
1192 {
1193 COSTS_N_INSNS (1), /* load. */
1194 COSTS_N_INSNS (1), /* load_sign_extend. */
1195 COSTS_N_INSNS (1), /* ldrd. */
1196 COSTS_N_INSNS (1), /* ldm_1st. */
1197 1, /* ldm_regs_per_insn_1st. */
1198 2, /* ldm_regs_per_insn_subsequent. */
1199 COSTS_N_INSNS (1), /* loadf. */
1200 COSTS_N_INSNS (1), /* loadd. */
1201 COSTS_N_INSNS (1), /* load_unaligned. */
1202 COSTS_N_INSNS (1), /* store. */
1203 COSTS_N_INSNS (1), /* strd. */
1204 COSTS_N_INSNS (1), /* stm_1st. */
1205 1, /* stm_regs_per_insn_1st. */
1206 2, /* stm_regs_per_insn_subsequent. */
1207 COSTS_N_INSNS (1), /* storef. */
1208 COSTS_N_INSNS (1), /* stored. */
1209 COSTS_N_INSNS (1), /* store_unaligned. */
1210 COSTS_N_INSNS (1), /* loadv. */
1211 COSTS_N_INSNS (1) /* storev. */
1212 },
1213 {
1214 /* FP SFmode */
1215 {
1216 COSTS_N_INSNS (36), /* div. */
1217 COSTS_N_INSNS (11), /* mult. */
1218 COSTS_N_INSNS (20), /* mult_addsub. */
1219 COSTS_N_INSNS (30), /* fma. */
1220 COSTS_N_INSNS (9), /* addsub. */
1221 COSTS_N_INSNS (3), /* fpconst. */
1222 COSTS_N_INSNS (3), /* neg. */
1223 COSTS_N_INSNS (6), /* compare. */
1224 COSTS_N_INSNS (4), /* widen. */
1225 COSTS_N_INSNS (4), /* narrow. */
1226 COSTS_N_INSNS (8), /* toint. */
1227 COSTS_N_INSNS (8), /* fromint. */
1228 COSTS_N_INSNS (8) /* roundint. */
1229 },
1230 /* FP DFmode */
1231 {
1232 COSTS_N_INSNS (64), /* div. */
1233 COSTS_N_INSNS (16), /* mult. */
1234 COSTS_N_INSNS (25), /* mult_addsub. */
1235 COSTS_N_INSNS (30), /* fma. */
1236 COSTS_N_INSNS (9), /* addsub. */
1237 COSTS_N_INSNS (3), /* fpconst. */
1238 COSTS_N_INSNS (3), /* neg. */
1239 COSTS_N_INSNS (6), /* compare. */
1240 COSTS_N_INSNS (6), /* widen. */
1241 COSTS_N_INSNS (6), /* narrow. */
1242 COSTS_N_INSNS (8), /* toint. */
1243 COSTS_N_INSNS (8), /* fromint. */
1244 COSTS_N_INSNS (8) /* roundint. */
1245 }
1246 },
1247 /* Vector */
1248 {
1249 COSTS_N_INSNS (1) /* alu. */
1250 }
1251 };
1252
1253 const struct cpu_cost_table cortexa5_extra_costs =
1254 {
1255 /* ALU */
1256 {
1257 0, /* arith. */
1258 0, /* logical. */
1259 COSTS_N_INSNS (1), /* shift. */
1260 COSTS_N_INSNS (1), /* shift_reg. */
1261 COSTS_N_INSNS (1), /* arith_shift. */
1262 COSTS_N_INSNS (1), /* arith_shift_reg. */
1263 COSTS_N_INSNS (1), /* log_shift. */
1264 COSTS_N_INSNS (1), /* log_shift_reg. */
1265 COSTS_N_INSNS (1), /* extend. */
1266 COSTS_N_INSNS (1), /* extend_arith. */
1267 COSTS_N_INSNS (1), /* bfi. */
1268 COSTS_N_INSNS (1), /* bfx. */
1269 COSTS_N_INSNS (1), /* clz. */
1270 COSTS_N_INSNS (1), /* rev. */
1271 0, /* non_exec. */
1272 true /* non_exec_costs_exec. */
1273 },
1274
1275 {
1276 /* MULT SImode */
1277 {
1278 0, /* simple. */
1279 COSTS_N_INSNS (1), /* flag_setting. */
1280 COSTS_N_INSNS (1), /* extend. */
1281 COSTS_N_INSNS (1), /* add. */
1282 COSTS_N_INSNS (1), /* extend_add. */
1283 COSTS_N_INSNS (7) /* idiv. */
1284 },
1285 /* MULT DImode */
1286 {
1287 0, /* simple (N/A). */
1288 0, /* flag_setting (N/A). */
1289 COSTS_N_INSNS (1), /* extend. */
1290 0, /* add. */
1291 COSTS_N_INSNS (2), /* extend_add. */
1292 0 /* idiv (N/A). */
1293 }
1294 },
1295 /* LD/ST */
1296 {
1297 COSTS_N_INSNS (1), /* load. */
1298 COSTS_N_INSNS (1), /* load_sign_extend. */
1299 COSTS_N_INSNS (6), /* ldrd. */
1300 COSTS_N_INSNS (1), /* ldm_1st. */
1301 1, /* ldm_regs_per_insn_1st. */
1302 2, /* ldm_regs_per_insn_subsequent. */
1303 COSTS_N_INSNS (2), /* loadf. */
1304 COSTS_N_INSNS (4), /* loadd. */
1305 COSTS_N_INSNS (1), /* load_unaligned. */
1306 COSTS_N_INSNS (1), /* store. */
1307 COSTS_N_INSNS (3), /* strd. */
1308 COSTS_N_INSNS (1), /* stm_1st. */
1309 1, /* stm_regs_per_insn_1st. */
1310 2, /* stm_regs_per_insn_subsequent. */
1311 COSTS_N_INSNS (2), /* storef. */
1312 COSTS_N_INSNS (2), /* stored. */
1313 COSTS_N_INSNS (1), /* store_unaligned. */
1314 COSTS_N_INSNS (1), /* loadv. */
1315 COSTS_N_INSNS (1) /* storev. */
1316 },
1317 {
1318 /* FP SFmode */
1319 {
1320 COSTS_N_INSNS (15), /* div. */
1321 COSTS_N_INSNS (3), /* mult. */
1322 COSTS_N_INSNS (7), /* mult_addsub. */
1323 COSTS_N_INSNS (7), /* fma. */
1324 COSTS_N_INSNS (3), /* addsub. */
1325 COSTS_N_INSNS (3), /* fpconst. */
1326 COSTS_N_INSNS (3), /* neg. */
1327 COSTS_N_INSNS (3), /* compare. */
1328 COSTS_N_INSNS (3), /* widen. */
1329 COSTS_N_INSNS (3), /* narrow. */
1330 COSTS_N_INSNS (3), /* toint. */
1331 COSTS_N_INSNS (3), /* fromint. */
1332 COSTS_N_INSNS (3) /* roundint. */
1333 },
1334 /* FP DFmode */
1335 {
1336 COSTS_N_INSNS (30), /* div. */
1337 COSTS_N_INSNS (6), /* mult. */
1338 COSTS_N_INSNS (10), /* mult_addsub. */
1339 COSTS_N_INSNS (7), /* fma. */
1340 COSTS_N_INSNS (3), /* addsub. */
1341 COSTS_N_INSNS (3), /* fpconst. */
1342 COSTS_N_INSNS (3), /* neg. */
1343 COSTS_N_INSNS (3), /* compare. */
1344 COSTS_N_INSNS (3), /* widen. */
1345 COSTS_N_INSNS (3), /* narrow. */
1346 COSTS_N_INSNS (3), /* toint. */
1347 COSTS_N_INSNS (3), /* fromint. */
1348 COSTS_N_INSNS (3) /* roundint. */
1349 }
1350 },
1351 /* Vector */
1352 {
1353 COSTS_N_INSNS (1) /* alu. */
1354 }
1355 };
1356
1357
1358 const struct cpu_cost_table cortexa7_extra_costs =
1359 {
1360 /* ALU */
1361 {
1362 0, /* arith. */
1363 0, /* logical. */
1364 COSTS_N_INSNS (1), /* shift. */
1365 COSTS_N_INSNS (1), /* shift_reg. */
1366 COSTS_N_INSNS (1), /* arith_shift. */
1367 COSTS_N_INSNS (1), /* arith_shift_reg. */
1368 COSTS_N_INSNS (1), /* log_shift. */
1369 COSTS_N_INSNS (1), /* log_shift_reg. */
1370 COSTS_N_INSNS (1), /* extend. */
1371 COSTS_N_INSNS (1), /* extend_arith. */
1372 COSTS_N_INSNS (1), /* bfi. */
1373 COSTS_N_INSNS (1), /* bfx. */
1374 COSTS_N_INSNS (1), /* clz. */
1375 COSTS_N_INSNS (1), /* rev. */
1376 0, /* non_exec. */
1377 true /* non_exec_costs_exec. */
1378 },
1379
1380 {
1381 /* MULT SImode */
1382 {
1383 0, /* simple. */
1384 COSTS_N_INSNS (1), /* flag_setting. */
1385 COSTS_N_INSNS (1), /* extend. */
1386 COSTS_N_INSNS (1), /* add. */
1387 COSTS_N_INSNS (1), /* extend_add. */
1388 COSTS_N_INSNS (7) /* idiv. */
1389 },
1390 /* MULT DImode */
1391 {
1392 0, /* simple (N/A). */
1393 0, /* flag_setting (N/A). */
1394 COSTS_N_INSNS (1), /* extend. */
1395 0, /* add. */
1396 COSTS_N_INSNS (2), /* extend_add. */
1397 0 /* idiv (N/A). */
1398 }
1399 },
1400 /* LD/ST */
1401 {
1402 COSTS_N_INSNS (1), /* load. */
1403 COSTS_N_INSNS (1), /* load_sign_extend. */
1404 COSTS_N_INSNS (3), /* ldrd. */
1405 COSTS_N_INSNS (1), /* ldm_1st. */
1406 1, /* ldm_regs_per_insn_1st. */
1407 2, /* ldm_regs_per_insn_subsequent. */
1408 COSTS_N_INSNS (2), /* loadf. */
1409 COSTS_N_INSNS (2), /* loadd. */
1410 COSTS_N_INSNS (1), /* load_unaligned. */
1411 COSTS_N_INSNS (1), /* store. */
1412 COSTS_N_INSNS (3), /* strd. */
1413 COSTS_N_INSNS (1), /* stm_1st. */
1414 1, /* stm_regs_per_insn_1st. */
1415 2, /* stm_regs_per_insn_subsequent. */
1416 COSTS_N_INSNS (2), /* storef. */
1417 COSTS_N_INSNS (2), /* stored. */
1418 COSTS_N_INSNS (1), /* store_unaligned. */
1419 COSTS_N_INSNS (1), /* loadv. */
1420 COSTS_N_INSNS (1) /* storev. */
1421 },
1422 {
1423 /* FP SFmode */
1424 {
1425 COSTS_N_INSNS (15), /* div. */
1426 COSTS_N_INSNS (3), /* mult. */
1427 COSTS_N_INSNS (7), /* mult_addsub. */
1428 COSTS_N_INSNS (7), /* fma. */
1429 COSTS_N_INSNS (3), /* addsub. */
1430 COSTS_N_INSNS (3), /* fpconst. */
1431 COSTS_N_INSNS (3), /* neg. */
1432 COSTS_N_INSNS (3), /* compare. */
1433 COSTS_N_INSNS (3), /* widen. */
1434 COSTS_N_INSNS (3), /* narrow. */
1435 COSTS_N_INSNS (3), /* toint. */
1436 COSTS_N_INSNS (3), /* fromint. */
1437 COSTS_N_INSNS (3) /* roundint. */
1438 },
1439 /* FP DFmode */
1440 {
1441 COSTS_N_INSNS (30), /* div. */
1442 COSTS_N_INSNS (6), /* mult. */
1443 COSTS_N_INSNS (10), /* mult_addsub. */
1444 COSTS_N_INSNS (7), /* fma. */
1445 COSTS_N_INSNS (3), /* addsub. */
1446 COSTS_N_INSNS (3), /* fpconst. */
1447 COSTS_N_INSNS (3), /* neg. */
1448 COSTS_N_INSNS (3), /* compare. */
1449 COSTS_N_INSNS (3), /* widen. */
1450 COSTS_N_INSNS (3), /* narrow. */
1451 COSTS_N_INSNS (3), /* toint. */
1452 COSTS_N_INSNS (3), /* fromint. */
1453 COSTS_N_INSNS (3) /* roundint. */
1454 }
1455 },
1456 /* Vector */
1457 {
1458 COSTS_N_INSNS (1) /* alu. */
1459 }
1460 };
1461
1462 const struct cpu_cost_table cortexa12_extra_costs =
1463 {
1464 /* ALU */
1465 {
1466 0, /* arith. */
1467 0, /* logical. */
1468 0, /* shift. */
1469 COSTS_N_INSNS (1), /* shift_reg. */
1470 COSTS_N_INSNS (1), /* arith_shift. */
1471 COSTS_N_INSNS (1), /* arith_shift_reg. */
1472 COSTS_N_INSNS (1), /* log_shift. */
1473 COSTS_N_INSNS (1), /* log_shift_reg. */
1474 0, /* extend. */
1475 COSTS_N_INSNS (1), /* extend_arith. */
1476 0, /* bfi. */
1477 COSTS_N_INSNS (1), /* bfx. */
1478 COSTS_N_INSNS (1), /* clz. */
1479 COSTS_N_INSNS (1), /* rev. */
1480 0, /* non_exec. */
1481 true /* non_exec_costs_exec. */
1482 },
1483 /* MULT SImode */
1484 {
1485 {
1486 COSTS_N_INSNS (2), /* simple. */
1487 COSTS_N_INSNS (3), /* flag_setting. */
1488 COSTS_N_INSNS (2), /* extend. */
1489 COSTS_N_INSNS (3), /* add. */
1490 COSTS_N_INSNS (2), /* extend_add. */
1491 COSTS_N_INSNS (18) /* idiv. */
1492 },
1493 /* MULT DImode */
1494 {
1495 0, /* simple (N/A). */
1496 0, /* flag_setting (N/A). */
1497 COSTS_N_INSNS (3), /* extend. */
1498 0, /* add (N/A). */
1499 COSTS_N_INSNS (3), /* extend_add. */
1500 0 /* idiv (N/A). */
1501 }
1502 },
1503 /* LD/ST */
1504 {
1505 COSTS_N_INSNS (3), /* load. */
1506 COSTS_N_INSNS (3), /* load_sign_extend. */
1507 COSTS_N_INSNS (3), /* ldrd. */
1508 COSTS_N_INSNS (3), /* ldm_1st. */
1509 1, /* ldm_regs_per_insn_1st. */
1510 2, /* ldm_regs_per_insn_subsequent. */
1511 COSTS_N_INSNS (3), /* loadf. */
1512 COSTS_N_INSNS (3), /* loadd. */
1513 0, /* load_unaligned. */
1514 0, /* store. */
1515 0, /* strd. */
1516 0, /* stm_1st. */
1517 1, /* stm_regs_per_insn_1st. */
1518 2, /* stm_regs_per_insn_subsequent. */
1519 COSTS_N_INSNS (2), /* storef. */
1520 COSTS_N_INSNS (2), /* stored. */
1521 0, /* store_unaligned. */
1522 COSTS_N_INSNS (1), /* loadv. */
1523 COSTS_N_INSNS (1) /* storev. */
1524 },
1525 {
1526 /* FP SFmode */
1527 {
1528 COSTS_N_INSNS (17), /* div. */
1529 COSTS_N_INSNS (4), /* mult. */
1530 COSTS_N_INSNS (8), /* mult_addsub. */
1531 COSTS_N_INSNS (8), /* fma. */
1532 COSTS_N_INSNS (4), /* addsub. */
1533 COSTS_N_INSNS (2), /* fpconst. */
1534 COSTS_N_INSNS (2), /* neg. */
1535 COSTS_N_INSNS (2), /* compare. */
1536 COSTS_N_INSNS (4), /* widen. */
1537 COSTS_N_INSNS (4), /* narrow. */
1538 COSTS_N_INSNS (4), /* toint. */
1539 COSTS_N_INSNS (4), /* fromint. */
1540 COSTS_N_INSNS (4) /* roundint. */
1541 },
1542 /* FP DFmode */
1543 {
1544 COSTS_N_INSNS (31), /* div. */
1545 COSTS_N_INSNS (4), /* mult. */
1546 COSTS_N_INSNS (8), /* mult_addsub. */
1547 COSTS_N_INSNS (8), /* fma. */
1548 COSTS_N_INSNS (4), /* addsub. */
1549 COSTS_N_INSNS (2), /* fpconst. */
1550 COSTS_N_INSNS (2), /* neg. */
1551 COSTS_N_INSNS (2), /* compare. */
1552 COSTS_N_INSNS (4), /* widen. */
1553 COSTS_N_INSNS (4), /* narrow. */
1554 COSTS_N_INSNS (4), /* toint. */
1555 COSTS_N_INSNS (4), /* fromint. */
1556 COSTS_N_INSNS (4) /* roundint. */
1557 }
1558 },
1559 /* Vector */
1560 {
1561 COSTS_N_INSNS (1) /* alu. */
1562 }
1563 };
1564
1565 const struct cpu_cost_table cortexa15_extra_costs =
1566 {
1567 /* ALU */
1568 {
1569 0, /* arith. */
1570 0, /* logical. */
1571 0, /* shift. */
1572 0, /* shift_reg. */
1573 COSTS_N_INSNS (1), /* arith_shift. */
1574 COSTS_N_INSNS (1), /* arith_shift_reg. */
1575 COSTS_N_INSNS (1), /* log_shift. */
1576 COSTS_N_INSNS (1), /* log_shift_reg. */
1577 0, /* extend. */
1578 COSTS_N_INSNS (1), /* extend_arith. */
1579 COSTS_N_INSNS (1), /* bfi. */
1580 0, /* bfx. */
1581 0, /* clz. */
1582 0, /* rev. */
1583 0, /* non_exec. */
1584 true /* non_exec_costs_exec. */
1585 },
1586 /* MULT SImode */
1587 {
1588 {
1589 COSTS_N_INSNS (2), /* simple. */
1590 COSTS_N_INSNS (3), /* flag_setting. */
1591 COSTS_N_INSNS (2), /* extend. */
1592 COSTS_N_INSNS (2), /* add. */
1593 COSTS_N_INSNS (2), /* extend_add. */
1594 COSTS_N_INSNS (18) /* idiv. */
1595 },
1596 /* MULT DImode */
1597 {
1598 0, /* simple (N/A). */
1599 0, /* flag_setting (N/A). */
1600 COSTS_N_INSNS (3), /* extend. */
1601 0, /* add (N/A). */
1602 COSTS_N_INSNS (3), /* extend_add. */
1603 0 /* idiv (N/A). */
1604 }
1605 },
1606 /* LD/ST */
1607 {
1608 COSTS_N_INSNS (3), /* load. */
1609 COSTS_N_INSNS (3), /* load_sign_extend. */
1610 COSTS_N_INSNS (3), /* ldrd. */
1611 COSTS_N_INSNS (4), /* ldm_1st. */
1612 1, /* ldm_regs_per_insn_1st. */
1613 2, /* ldm_regs_per_insn_subsequent. */
1614 COSTS_N_INSNS (4), /* loadf. */
1615 COSTS_N_INSNS (4), /* loadd. */
1616 0, /* load_unaligned. */
1617 0, /* store. */
1618 0, /* strd. */
1619 COSTS_N_INSNS (1), /* stm_1st. */
1620 1, /* stm_regs_per_insn_1st. */
1621 2, /* stm_regs_per_insn_subsequent. */
1622 0, /* storef. */
1623 0, /* stored. */
1624 0, /* store_unaligned. */
1625 COSTS_N_INSNS (1), /* loadv. */
1626 COSTS_N_INSNS (1) /* storev. */
1627 },
1628 {
1629 /* FP SFmode */
1630 {
1631 COSTS_N_INSNS (17), /* div. */
1632 COSTS_N_INSNS (4), /* mult. */
1633 COSTS_N_INSNS (8), /* mult_addsub. */
1634 COSTS_N_INSNS (8), /* fma. */
1635 COSTS_N_INSNS (4), /* addsub. */
1636 COSTS_N_INSNS (2), /* fpconst. */
1637 COSTS_N_INSNS (2), /* neg. */
1638 COSTS_N_INSNS (5), /* compare. */
1639 COSTS_N_INSNS (4), /* widen. */
1640 COSTS_N_INSNS (4), /* narrow. */
1641 COSTS_N_INSNS (4), /* toint. */
1642 COSTS_N_INSNS (4), /* fromint. */
1643 COSTS_N_INSNS (4) /* roundint. */
1644 },
1645 /* FP DFmode */
1646 {
1647 COSTS_N_INSNS (31), /* div. */
1648 COSTS_N_INSNS (4), /* mult. */
1649 COSTS_N_INSNS (8), /* mult_addsub. */
1650 COSTS_N_INSNS (8), /* fma. */
1651 COSTS_N_INSNS (4), /* addsub. */
1652 COSTS_N_INSNS (2), /* fpconst. */
1653 COSTS_N_INSNS (2), /* neg. */
1654 COSTS_N_INSNS (2), /* compare. */
1655 COSTS_N_INSNS (4), /* widen. */
1656 COSTS_N_INSNS (4), /* narrow. */
1657 COSTS_N_INSNS (4), /* toint. */
1658 COSTS_N_INSNS (4), /* fromint. */
1659 COSTS_N_INSNS (4) /* roundint. */
1660 }
1661 },
1662 /* Vector */
1663 {
1664 COSTS_N_INSNS (1) /* alu. */
1665 }
1666 };
1667
1668 const struct cpu_cost_table v7m_extra_costs =
1669 {
1670 /* ALU */
1671 {
1672 0, /* arith. */
1673 0, /* logical. */
1674 0, /* shift. */
1675 0, /* shift_reg. */
1676 0, /* arith_shift. */
1677 COSTS_N_INSNS (1), /* arith_shift_reg. */
1678 0, /* log_shift. */
1679 COSTS_N_INSNS (1), /* log_shift_reg. */
1680 0, /* extend. */
1681 COSTS_N_INSNS (1), /* extend_arith. */
1682 0, /* bfi. */
1683 0, /* bfx. */
1684 0, /* clz. */
1685 0, /* rev. */
1686 COSTS_N_INSNS (1), /* non_exec. */
1687 false /* non_exec_costs_exec. */
1688 },
1689 {
1690 /* MULT SImode */
1691 {
1692 COSTS_N_INSNS (1), /* simple. */
1693 COSTS_N_INSNS (1), /* flag_setting. */
1694 COSTS_N_INSNS (2), /* extend. */
1695 COSTS_N_INSNS (1), /* add. */
1696 COSTS_N_INSNS (3), /* extend_add. */
1697 COSTS_N_INSNS (8) /* idiv. */
1698 },
1699 /* MULT DImode */
1700 {
1701 0, /* simple (N/A). */
1702 0, /* flag_setting (N/A). */
1703 COSTS_N_INSNS (2), /* extend. */
1704 0, /* add (N/A). */
1705 COSTS_N_INSNS (3), /* extend_add. */
1706 0 /* idiv (N/A). */
1707 }
1708 },
1709 /* LD/ST */
1710 {
1711 COSTS_N_INSNS (2), /* load. */
1712 0, /* load_sign_extend. */
1713 COSTS_N_INSNS (3), /* ldrd. */
1714 COSTS_N_INSNS (2), /* ldm_1st. */
1715 1, /* ldm_regs_per_insn_1st. */
1716 1, /* ldm_regs_per_insn_subsequent. */
1717 COSTS_N_INSNS (2), /* loadf. */
1718 COSTS_N_INSNS (3), /* loadd. */
1719 COSTS_N_INSNS (1), /* load_unaligned. */
1720 COSTS_N_INSNS (2), /* store. */
1721 COSTS_N_INSNS (3), /* strd. */
1722 COSTS_N_INSNS (2), /* stm_1st. */
1723 1, /* stm_regs_per_insn_1st. */
1724 1, /* stm_regs_per_insn_subsequent. */
1725 COSTS_N_INSNS (2), /* storef. */
1726 COSTS_N_INSNS (3), /* stored. */
1727 COSTS_N_INSNS (1), /* store_unaligned. */
1728 COSTS_N_INSNS (1), /* loadv. */
1729 COSTS_N_INSNS (1) /* storev. */
1730 },
1731 {
1732 /* FP SFmode */
1733 {
1734 COSTS_N_INSNS (7), /* div. */
1735 COSTS_N_INSNS (2), /* mult. */
1736 COSTS_N_INSNS (5), /* mult_addsub. */
1737 COSTS_N_INSNS (3), /* fma. */
1738 COSTS_N_INSNS (1), /* addsub. */
1739 0, /* fpconst. */
1740 0, /* neg. */
1741 0, /* compare. */
1742 0, /* widen. */
1743 0, /* narrow. */
1744 0, /* toint. */
1745 0, /* fromint. */
1746 0 /* roundint. */
1747 },
1748 /* FP DFmode */
1749 {
1750 COSTS_N_INSNS (15), /* div. */
1751 COSTS_N_INSNS (5), /* mult. */
1752 COSTS_N_INSNS (7), /* mult_addsub. */
1753 COSTS_N_INSNS (7), /* fma. */
1754 COSTS_N_INSNS (3), /* addsub. */
1755 0, /* fpconst. */
1756 0, /* neg. */
1757 0, /* compare. */
1758 0, /* widen. */
1759 0, /* narrow. */
1760 0, /* toint. */
1761 0, /* fromint. */
1762 0 /* roundint. */
1763 }
1764 },
1765 /* Vector */
1766 {
1767 COSTS_N_INSNS (1) /* alu. */
1768 }
1769 };
1770
1771 const struct addr_mode_cost_table generic_addr_mode_costs =
1772 {
1773 /* int. */
1774 {
1775 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1776 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1777 COSTS_N_INSNS (0) /* AMO_WB. */
1778 },
1779 /* float. */
1780 {
1781 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1782 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1783 COSTS_N_INSNS (0) /* AMO_WB. */
1784 },
1785 /* vector. */
1786 {
1787 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1788 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1789 COSTS_N_INSNS (0) /* AMO_WB. */
1790 }
1791 };
1792
1793 const struct tune_params arm_slowmul_tune =
1794 {
1795 &generic_extra_costs, /* Insn extra costs. */
1796 &generic_addr_mode_costs, /* Addressing mode costs. */
1797 NULL, /* Sched adj cost. */
1798 arm_default_branch_cost,
1799 &arm_default_vec_cost,
1800 3, /* Constant limit. */
1801 5, /* Max cond insns. */
1802 8, /* Memset max inline. */
1803 1, /* Issue rate. */
1804 ARM_PREFETCH_NOT_BENEFICIAL,
1805 tune_params::PREF_CONST_POOL_TRUE,
1806 tune_params::PREF_LDRD_FALSE,
1807 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1808 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1809 tune_params::DISPARAGE_FLAGS_NEITHER,
1810 tune_params::PREF_NEON_64_FALSE,
1811 tune_params::PREF_NEON_STRINGOPS_FALSE,
1812 tune_params::FUSE_NOTHING,
1813 tune_params::SCHED_AUTOPREF_OFF
1814 };
1815
1816 const struct tune_params arm_fastmul_tune =
1817 {
1818 &generic_extra_costs, /* Insn extra costs. */
1819 &generic_addr_mode_costs, /* Addressing mode costs. */
1820 NULL, /* Sched adj cost. */
1821 arm_default_branch_cost,
1822 &arm_default_vec_cost,
1823 1, /* Constant limit. */
1824 5, /* Max cond insns. */
1825 8, /* Memset max inline. */
1826 1, /* Issue rate. */
1827 ARM_PREFETCH_NOT_BENEFICIAL,
1828 tune_params::PREF_CONST_POOL_TRUE,
1829 tune_params::PREF_LDRD_FALSE,
1830 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1831 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1832 tune_params::DISPARAGE_FLAGS_NEITHER,
1833 tune_params::PREF_NEON_64_FALSE,
1834 tune_params::PREF_NEON_STRINGOPS_FALSE,
1835 tune_params::FUSE_NOTHING,
1836 tune_params::SCHED_AUTOPREF_OFF
1837 };
1838
1839 /* StrongARM has early execution of branches, so a sequence that is worth
1840 skipping is shorter. Set max_insns_skipped to a lower value. */
1841
1842 const struct tune_params arm_strongarm_tune =
1843 {
1844 &generic_extra_costs, /* Insn extra costs. */
1845 &generic_addr_mode_costs, /* Addressing mode costs. */
1846 NULL, /* Sched adj cost. */
1847 arm_default_branch_cost,
1848 &arm_default_vec_cost,
1849 1, /* Constant limit. */
1850 3, /* Max cond insns. */
1851 8, /* Memset max inline. */
1852 1, /* Issue rate. */
1853 ARM_PREFETCH_NOT_BENEFICIAL,
1854 tune_params::PREF_CONST_POOL_TRUE,
1855 tune_params::PREF_LDRD_FALSE,
1856 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1857 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1858 tune_params::DISPARAGE_FLAGS_NEITHER,
1859 tune_params::PREF_NEON_64_FALSE,
1860 tune_params::PREF_NEON_STRINGOPS_FALSE,
1861 tune_params::FUSE_NOTHING,
1862 tune_params::SCHED_AUTOPREF_OFF
1863 };
1864
1865 const struct tune_params arm_xscale_tune =
1866 {
1867 &generic_extra_costs, /* Insn extra costs. */
1868 &generic_addr_mode_costs, /* Addressing mode costs. */
1869 xscale_sched_adjust_cost,
1870 arm_default_branch_cost,
1871 &arm_default_vec_cost,
1872 2, /* Constant limit. */
1873 3, /* Max cond insns. */
1874 8, /* Memset max inline. */
1875 1, /* Issue rate. */
1876 ARM_PREFETCH_NOT_BENEFICIAL,
1877 tune_params::PREF_CONST_POOL_TRUE,
1878 tune_params::PREF_LDRD_FALSE,
1879 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1880 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1881 tune_params::DISPARAGE_FLAGS_NEITHER,
1882 tune_params::PREF_NEON_64_FALSE,
1883 tune_params::PREF_NEON_STRINGOPS_FALSE,
1884 tune_params::FUSE_NOTHING,
1885 tune_params::SCHED_AUTOPREF_OFF
1886 };
1887
1888 const struct tune_params arm_9e_tune =
1889 {
1890 &generic_extra_costs, /* Insn extra costs. */
1891 &generic_addr_mode_costs, /* Addressing mode costs. */
1892 NULL, /* Sched adj cost. */
1893 arm_default_branch_cost,
1894 &arm_default_vec_cost,
1895 1, /* Constant limit. */
1896 5, /* Max cond insns. */
1897 8, /* Memset max inline. */
1898 1, /* Issue rate. */
1899 ARM_PREFETCH_NOT_BENEFICIAL,
1900 tune_params::PREF_CONST_POOL_TRUE,
1901 tune_params::PREF_LDRD_FALSE,
1902 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1903 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1904 tune_params::DISPARAGE_FLAGS_NEITHER,
1905 tune_params::PREF_NEON_64_FALSE,
1906 tune_params::PREF_NEON_STRINGOPS_FALSE,
1907 tune_params::FUSE_NOTHING,
1908 tune_params::SCHED_AUTOPREF_OFF
1909 };
1910
1911 const struct tune_params arm_marvell_pj4_tune =
1912 {
1913 &generic_extra_costs, /* Insn extra costs. */
1914 &generic_addr_mode_costs, /* Addressing mode costs. */
1915 NULL, /* Sched adj cost. */
1916 arm_default_branch_cost,
1917 &arm_default_vec_cost,
1918 1, /* Constant limit. */
1919 5, /* Max cond insns. */
1920 8, /* Memset max inline. */
1921 2, /* Issue rate. */
1922 ARM_PREFETCH_NOT_BENEFICIAL,
1923 tune_params::PREF_CONST_POOL_TRUE,
1924 tune_params::PREF_LDRD_FALSE,
1925 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1926 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1927 tune_params::DISPARAGE_FLAGS_NEITHER,
1928 tune_params::PREF_NEON_64_FALSE,
1929 tune_params::PREF_NEON_STRINGOPS_FALSE,
1930 tune_params::FUSE_NOTHING,
1931 tune_params::SCHED_AUTOPREF_OFF
1932 };
1933
1934 const struct tune_params arm_v6t2_tune =
1935 {
1936 &generic_extra_costs, /* Insn extra costs. */
1937 &generic_addr_mode_costs, /* Addressing mode costs. */
1938 NULL, /* Sched adj cost. */
1939 arm_default_branch_cost,
1940 &arm_default_vec_cost,
1941 1, /* Constant limit. */
1942 5, /* Max cond insns. */
1943 8, /* Memset max inline. */
1944 1, /* Issue rate. */
1945 ARM_PREFETCH_NOT_BENEFICIAL,
1946 tune_params::PREF_CONST_POOL_FALSE,
1947 tune_params::PREF_LDRD_FALSE,
1948 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1949 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1950 tune_params::DISPARAGE_FLAGS_NEITHER,
1951 tune_params::PREF_NEON_64_FALSE,
1952 tune_params::PREF_NEON_STRINGOPS_FALSE,
1953 tune_params::FUSE_NOTHING,
1954 tune_params::SCHED_AUTOPREF_OFF
1955 };
1956
1957
1958 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1959 const struct tune_params arm_cortex_tune =
1960 {
1961 &generic_extra_costs,
1962 &generic_addr_mode_costs, /* Addressing mode costs. */
1963 NULL, /* Sched adj cost. */
1964 arm_default_branch_cost,
1965 &arm_default_vec_cost,
1966 1, /* Constant limit. */
1967 5, /* Max cond insns. */
1968 8, /* Memset max inline. */
1969 2, /* Issue rate. */
1970 ARM_PREFETCH_NOT_BENEFICIAL,
1971 tune_params::PREF_CONST_POOL_FALSE,
1972 tune_params::PREF_LDRD_FALSE,
1973 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1974 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1975 tune_params::DISPARAGE_FLAGS_NEITHER,
1976 tune_params::PREF_NEON_64_FALSE,
1977 tune_params::PREF_NEON_STRINGOPS_FALSE,
1978 tune_params::FUSE_NOTHING,
1979 tune_params::SCHED_AUTOPREF_OFF
1980 };
1981
1982 const struct tune_params arm_cortex_a8_tune =
1983 {
1984 &cortexa8_extra_costs,
1985 &generic_addr_mode_costs, /* Addressing mode costs. */
1986 NULL, /* Sched adj cost. */
1987 arm_default_branch_cost,
1988 &arm_default_vec_cost,
1989 1, /* Constant limit. */
1990 5, /* Max cond insns. */
1991 8, /* Memset max inline. */
1992 2, /* Issue rate. */
1993 ARM_PREFETCH_NOT_BENEFICIAL,
1994 tune_params::PREF_CONST_POOL_FALSE,
1995 tune_params::PREF_LDRD_FALSE,
1996 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1997 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1998 tune_params::DISPARAGE_FLAGS_NEITHER,
1999 tune_params::PREF_NEON_64_FALSE,
2000 tune_params::PREF_NEON_STRINGOPS_TRUE,
2001 tune_params::FUSE_NOTHING,
2002 tune_params::SCHED_AUTOPREF_OFF
2003 };
2004
2005 const struct tune_params arm_cortex_a7_tune =
2006 {
2007 &cortexa7_extra_costs,
2008 &generic_addr_mode_costs, /* Addressing mode costs. */
2009 NULL, /* Sched adj cost. */
2010 arm_default_branch_cost,
2011 &arm_default_vec_cost,
2012 1, /* Constant limit. */
2013 5, /* Max cond insns. */
2014 8, /* Memset max inline. */
2015 2, /* Issue rate. */
2016 ARM_PREFETCH_NOT_BENEFICIAL,
2017 tune_params::PREF_CONST_POOL_FALSE,
2018 tune_params::PREF_LDRD_FALSE,
2019 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2020 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2021 tune_params::DISPARAGE_FLAGS_NEITHER,
2022 tune_params::PREF_NEON_64_FALSE,
2023 tune_params::PREF_NEON_STRINGOPS_TRUE,
2024 tune_params::FUSE_NOTHING,
2025 tune_params::SCHED_AUTOPREF_OFF
2026 };
2027
2028 const struct tune_params arm_cortex_a15_tune =
2029 {
2030 &cortexa15_extra_costs,
2031 &generic_addr_mode_costs, /* Addressing mode costs. */
2032 NULL, /* Sched adj cost. */
2033 arm_default_branch_cost,
2034 &arm_default_vec_cost,
2035 1, /* Constant limit. */
2036 2, /* Max cond insns. */
2037 8, /* Memset max inline. */
2038 3, /* Issue rate. */
2039 ARM_PREFETCH_NOT_BENEFICIAL,
2040 tune_params::PREF_CONST_POOL_FALSE,
2041 tune_params::PREF_LDRD_TRUE,
2042 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2043 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2044 tune_params::DISPARAGE_FLAGS_ALL,
2045 tune_params::PREF_NEON_64_FALSE,
2046 tune_params::PREF_NEON_STRINGOPS_TRUE,
2047 tune_params::FUSE_NOTHING,
2048 tune_params::SCHED_AUTOPREF_FULL
2049 };
2050
2051 const struct tune_params arm_cortex_a35_tune =
2052 {
2053 &cortexa53_extra_costs,
2054 &generic_addr_mode_costs, /* Addressing mode costs. */
2055 NULL, /* Sched adj cost. */
2056 arm_default_branch_cost,
2057 &arm_default_vec_cost,
2058 1, /* Constant limit. */
2059 5, /* Max cond insns. */
2060 8, /* Memset max inline. */
2061 1, /* Issue rate. */
2062 ARM_PREFETCH_NOT_BENEFICIAL,
2063 tune_params::PREF_CONST_POOL_FALSE,
2064 tune_params::PREF_LDRD_FALSE,
2065 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2066 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2067 tune_params::DISPARAGE_FLAGS_NEITHER,
2068 tune_params::PREF_NEON_64_FALSE,
2069 tune_params::PREF_NEON_STRINGOPS_TRUE,
2070 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2071 tune_params::SCHED_AUTOPREF_OFF
2072 };
2073
2074 const struct tune_params arm_cortex_a53_tune =
2075 {
2076 &cortexa53_extra_costs,
2077 &generic_addr_mode_costs, /* Addressing mode costs. */
2078 NULL, /* Sched adj cost. */
2079 arm_default_branch_cost,
2080 &arm_default_vec_cost,
2081 1, /* Constant limit. */
2082 5, /* Max cond insns. */
2083 8, /* Memset max inline. */
2084 2, /* Issue rate. */
2085 ARM_PREFETCH_NOT_BENEFICIAL,
2086 tune_params::PREF_CONST_POOL_FALSE,
2087 tune_params::PREF_LDRD_FALSE,
2088 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2089 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2090 tune_params::DISPARAGE_FLAGS_NEITHER,
2091 tune_params::PREF_NEON_64_FALSE,
2092 tune_params::PREF_NEON_STRINGOPS_TRUE,
2093 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2094 tune_params::SCHED_AUTOPREF_OFF
2095 };
2096
2097 const struct tune_params arm_cortex_a57_tune =
2098 {
2099 &cortexa57_extra_costs,
2100 &generic_addr_mode_costs, /* addressing mode costs */
2101 NULL, /* Sched adj cost. */
2102 arm_default_branch_cost,
2103 &arm_default_vec_cost,
2104 1, /* Constant limit. */
2105 2, /* Max cond insns. */
2106 8, /* Memset max inline. */
2107 3, /* Issue rate. */
2108 ARM_PREFETCH_NOT_BENEFICIAL,
2109 tune_params::PREF_CONST_POOL_FALSE,
2110 tune_params::PREF_LDRD_TRUE,
2111 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2112 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2113 tune_params::DISPARAGE_FLAGS_ALL,
2114 tune_params::PREF_NEON_64_FALSE,
2115 tune_params::PREF_NEON_STRINGOPS_TRUE,
2116 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2117 tune_params::SCHED_AUTOPREF_FULL
2118 };
2119
2120 const struct tune_params arm_exynosm1_tune =
2121 {
2122 &exynosm1_extra_costs,
2123 &generic_addr_mode_costs, /* Addressing mode costs. */
2124 NULL, /* Sched adj cost. */
2125 arm_default_branch_cost,
2126 &arm_default_vec_cost,
2127 1, /* Constant limit. */
2128 2, /* Max cond insns. */
2129 8, /* Memset max inline. */
2130 3, /* Issue rate. */
2131 ARM_PREFETCH_NOT_BENEFICIAL,
2132 tune_params::PREF_CONST_POOL_FALSE,
2133 tune_params::PREF_LDRD_TRUE,
2134 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2135 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2136 tune_params::DISPARAGE_FLAGS_ALL,
2137 tune_params::PREF_NEON_64_FALSE,
2138 tune_params::PREF_NEON_STRINGOPS_TRUE,
2139 tune_params::FUSE_NOTHING,
2140 tune_params::SCHED_AUTOPREF_OFF
2141 };
2142
2143 const struct tune_params arm_xgene1_tune =
2144 {
2145 &xgene1_extra_costs,
2146 &generic_addr_mode_costs, /* Addressing mode costs. */
2147 NULL, /* Sched adj cost. */
2148 arm_default_branch_cost,
2149 &arm_default_vec_cost,
2150 1, /* Constant limit. */
2151 2, /* Max cond insns. */
2152 32, /* Memset max inline. */
2153 4, /* Issue rate. */
2154 ARM_PREFETCH_NOT_BENEFICIAL,
2155 tune_params::PREF_CONST_POOL_FALSE,
2156 tune_params::PREF_LDRD_TRUE,
2157 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2158 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2159 tune_params::DISPARAGE_FLAGS_ALL,
2160 tune_params::PREF_NEON_64_FALSE,
2161 tune_params::PREF_NEON_STRINGOPS_FALSE,
2162 tune_params::FUSE_NOTHING,
2163 tune_params::SCHED_AUTOPREF_OFF
2164 };
2165
2166 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2167 less appealing. Set max_insns_skipped to a low value. */
2168
2169 const struct tune_params arm_cortex_a5_tune =
2170 {
2171 &cortexa5_extra_costs,
2172 &generic_addr_mode_costs, /* Addressing mode costs. */
2173 NULL, /* Sched adj cost. */
2174 arm_cortex_a5_branch_cost,
2175 &arm_default_vec_cost,
2176 1, /* Constant limit. */
2177 1, /* Max cond insns. */
2178 8, /* Memset max inline. */
2179 2, /* Issue rate. */
2180 ARM_PREFETCH_NOT_BENEFICIAL,
2181 tune_params::PREF_CONST_POOL_FALSE,
2182 tune_params::PREF_LDRD_FALSE,
2183 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2184 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2185 tune_params::DISPARAGE_FLAGS_NEITHER,
2186 tune_params::PREF_NEON_64_FALSE,
2187 tune_params::PREF_NEON_STRINGOPS_TRUE,
2188 tune_params::FUSE_NOTHING,
2189 tune_params::SCHED_AUTOPREF_OFF
2190 };
2191
2192 const struct tune_params arm_cortex_a9_tune =
2193 {
2194 &cortexa9_extra_costs,
2195 &generic_addr_mode_costs, /* Addressing mode costs. */
2196 cortex_a9_sched_adjust_cost,
2197 arm_default_branch_cost,
2198 &arm_default_vec_cost,
2199 1, /* Constant limit. */
2200 5, /* Max cond insns. */
2201 8, /* Memset max inline. */
2202 2, /* Issue rate. */
2203 ARM_PREFETCH_BENEFICIAL(4,32,32),
2204 tune_params::PREF_CONST_POOL_FALSE,
2205 tune_params::PREF_LDRD_FALSE,
2206 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2207 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2208 tune_params::DISPARAGE_FLAGS_NEITHER,
2209 tune_params::PREF_NEON_64_FALSE,
2210 tune_params::PREF_NEON_STRINGOPS_FALSE,
2211 tune_params::FUSE_NOTHING,
2212 tune_params::SCHED_AUTOPREF_OFF
2213 };
2214
2215 const struct tune_params arm_cortex_a12_tune =
2216 {
2217 &cortexa12_extra_costs,
2218 &generic_addr_mode_costs, /* Addressing mode costs. */
2219 NULL, /* Sched adj cost. */
2220 arm_default_branch_cost,
2221 &arm_default_vec_cost, /* Vectorizer costs. */
2222 1, /* Constant limit. */
2223 2, /* Max cond insns. */
2224 8, /* Memset max inline. */
2225 2, /* Issue rate. */
2226 ARM_PREFETCH_NOT_BENEFICIAL,
2227 tune_params::PREF_CONST_POOL_FALSE,
2228 tune_params::PREF_LDRD_TRUE,
2229 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2230 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2231 tune_params::DISPARAGE_FLAGS_ALL,
2232 tune_params::PREF_NEON_64_FALSE,
2233 tune_params::PREF_NEON_STRINGOPS_TRUE,
2234 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2235 tune_params::SCHED_AUTOPREF_OFF
2236 };
2237
2238 const struct tune_params arm_cortex_a73_tune =
2239 {
2240 &cortexa57_extra_costs,
2241 &generic_addr_mode_costs, /* Addressing mode costs. */
2242 NULL, /* Sched adj cost. */
2243 arm_default_branch_cost,
2244 &arm_default_vec_cost, /* Vectorizer costs. */
2245 1, /* Constant limit. */
2246 2, /* Max cond insns. */
2247 8, /* Memset max inline. */
2248 2, /* Issue rate. */
2249 ARM_PREFETCH_NOT_BENEFICIAL,
2250 tune_params::PREF_CONST_POOL_FALSE,
2251 tune_params::PREF_LDRD_TRUE,
2252 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2253 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2254 tune_params::DISPARAGE_FLAGS_ALL,
2255 tune_params::PREF_NEON_64_FALSE,
2256 tune_params::PREF_NEON_STRINGOPS_TRUE,
2257 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2258 tune_params::SCHED_AUTOPREF_FULL
2259 };
2260
2261 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2262 cycle to execute each. An LDR from the constant pool also takes two cycles
2263 to execute, but mildly increases pipelining opportunity (consecutive
2264 loads/stores can be pipelined together, saving one cycle), and may also
2265 improve icache utilisation. Hence we prefer the constant pool for such
2266 processors. */
2267
2268 const struct tune_params arm_v7m_tune =
2269 {
2270 &v7m_extra_costs,
2271 &generic_addr_mode_costs, /* Addressing mode costs. */
2272 NULL, /* Sched adj cost. */
2273 arm_cortex_m_branch_cost,
2274 &arm_default_vec_cost,
2275 1, /* Constant limit. */
2276 2, /* Max cond insns. */
2277 8, /* Memset max inline. */
2278 1, /* Issue rate. */
2279 ARM_PREFETCH_NOT_BENEFICIAL,
2280 tune_params::PREF_CONST_POOL_TRUE,
2281 tune_params::PREF_LDRD_FALSE,
2282 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2283 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2284 tune_params::DISPARAGE_FLAGS_NEITHER,
2285 tune_params::PREF_NEON_64_FALSE,
2286 tune_params::PREF_NEON_STRINGOPS_FALSE,
2287 tune_params::FUSE_NOTHING,
2288 tune_params::SCHED_AUTOPREF_OFF
2289 };
2290
2291 /* Cortex-M7 tuning. */
2292
2293 const struct tune_params arm_cortex_m7_tune =
2294 {
2295 &v7m_extra_costs,
2296 &generic_addr_mode_costs, /* Addressing mode costs. */
2297 NULL, /* Sched adj cost. */
2298 arm_cortex_m7_branch_cost,
2299 &arm_default_vec_cost,
2300 0, /* Constant limit. */
2301 1, /* Max cond insns. */
2302 8, /* Memset max inline. */
2303 2, /* Issue rate. */
2304 ARM_PREFETCH_NOT_BENEFICIAL,
2305 tune_params::PREF_CONST_POOL_TRUE,
2306 tune_params::PREF_LDRD_FALSE,
2307 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2308 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2309 tune_params::DISPARAGE_FLAGS_NEITHER,
2310 tune_params::PREF_NEON_64_FALSE,
2311 tune_params::PREF_NEON_STRINGOPS_FALSE,
2312 tune_params::FUSE_NOTHING,
2313 tune_params::SCHED_AUTOPREF_OFF
2314 };
2315
2316 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2317 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2318 cortex-m23. */
2319 const struct tune_params arm_v6m_tune =
2320 {
2321 &generic_extra_costs, /* Insn extra costs. */
2322 &generic_addr_mode_costs, /* Addressing mode costs. */
2323 NULL, /* Sched adj cost. */
2324 arm_default_branch_cost,
2325 &arm_default_vec_cost, /* Vectorizer costs. */
2326 1, /* Constant limit. */
2327 5, /* Max cond insns. */
2328 8, /* Memset max inline. */
2329 1, /* Issue rate. */
2330 ARM_PREFETCH_NOT_BENEFICIAL,
2331 tune_params::PREF_CONST_POOL_FALSE,
2332 tune_params::PREF_LDRD_FALSE,
2333 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2334 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2335 tune_params::DISPARAGE_FLAGS_NEITHER,
2336 tune_params::PREF_NEON_64_FALSE,
2337 tune_params::PREF_NEON_STRINGOPS_FALSE,
2338 tune_params::FUSE_NOTHING,
2339 tune_params::SCHED_AUTOPREF_OFF
2340 };
2341
2342 const struct tune_params arm_fa726te_tune =
2343 {
2344 &generic_extra_costs, /* Insn extra costs. */
2345 &generic_addr_mode_costs, /* Addressing mode costs. */
2346 fa726te_sched_adjust_cost,
2347 arm_default_branch_cost,
2348 &arm_default_vec_cost,
2349 1, /* Constant limit. */
2350 5, /* Max cond insns. */
2351 8, /* Memset max inline. */
2352 2, /* Issue rate. */
2353 ARM_PREFETCH_NOT_BENEFICIAL,
2354 tune_params::PREF_CONST_POOL_TRUE,
2355 tune_params::PREF_LDRD_FALSE,
2356 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2357 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2358 tune_params::DISPARAGE_FLAGS_NEITHER,
2359 tune_params::PREF_NEON_64_FALSE,
2360 tune_params::PREF_NEON_STRINGOPS_FALSE,
2361 tune_params::FUSE_NOTHING,
2362 tune_params::SCHED_AUTOPREF_OFF
2363 };
2364
2365 /* Auto-generated CPU, FPU and architecture tables. */
2366 #include "arm-cpu-data.h"
2367
2368 /* The name of the preprocessor macro to define for this architecture. PROFILE
2369 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2370 is thus chosen to be big enough to hold the longest architecture name. */
2371
2372 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2373
2374 /* Supported TLS relocations. */
2375
2376 enum tls_reloc {
2377 TLS_GD32,
2378 TLS_LDM32,
2379 TLS_LDO32,
2380 TLS_IE32,
2381 TLS_LE32,
2382 TLS_DESCSEQ /* GNU scheme */
2383 };
2384
2385 /* The maximum number of insns to be used when loading a constant. */
2386 inline static int
2387 arm_constant_limit (bool size_p)
2388 {
2389 return size_p ? 1 : current_tune->constant_limit;
2390 }
2391
2392 /* Emit an insn that's a simple single-set. Both the operands must be known
2393 to be valid. */
2394 inline static rtx_insn *
2395 emit_set_insn (rtx x, rtx y)
2396 {
2397 return emit_insn (gen_rtx_SET (x, y));
2398 }
2399
2400 /* Return the number of bits set in VALUE. */
2401 static unsigned
2402 bit_count (unsigned long value)
2403 {
2404 unsigned long count = 0;
2405
2406 while (value)
2407 {
2408 count++;
2409 value &= value - 1; /* Clear the least-significant set bit. */
2410 }
2411
2412 return count;
2413 }
2414
2415 /* Return the number of bits set in BMAP. */
2416 static unsigned
2417 bitmap_popcount (const sbitmap bmap)
2418 {
2419 unsigned int count = 0;
2420 unsigned int n = 0;
2421 sbitmap_iterator sbi;
2422
2423 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2424 count++;
2425 return count;
2426 }
2427
2428 typedef struct
2429 {
2430 machine_mode mode;
2431 const char *name;
2432 } arm_fixed_mode_set;
2433
2434 /* A small helper for setting fixed-point library libfuncs. */
2435
2436 static void
2437 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2438 const char *funcname, const char *modename,
2439 int num_suffix)
2440 {
2441 char buffer[50];
2442
2443 if (num_suffix == 0)
2444 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2445 else
2446 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2447
2448 set_optab_libfunc (optable, mode, buffer);
2449 }
2450
2451 static void
2452 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2453 machine_mode from, const char *funcname,
2454 const char *toname, const char *fromname)
2455 {
2456 char buffer[50];
2457 const char *maybe_suffix_2 = "";
2458
2459 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2460 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2461 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2462 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2463 maybe_suffix_2 = "2";
2464
2465 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2466 maybe_suffix_2);
2467
2468 set_conv_libfunc (optable, to, from, buffer);
2469 }
2470
2471 /* Set up library functions unique to ARM. */
2472
2473 static void
2474 arm_init_libfuncs (void)
2475 {
2476 /* For Linux, we have access to kernel support for atomic operations. */
2477 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2478 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2479
2480 /* There are no special library functions unless we are using the
2481 ARM BPABI. */
2482 if (!TARGET_BPABI)
2483 return;
2484
2485 /* The functions below are described in Section 4 of the "Run-Time
2486 ABI for the ARM architecture", Version 1.0. */
2487
2488 /* Double-precision floating-point arithmetic. Table 2. */
2489 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2490 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2491 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2492 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2493 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2494
2495 /* Double-precision comparisons. Table 3. */
2496 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2497 set_optab_libfunc (ne_optab, DFmode, NULL);
2498 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2499 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2500 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2501 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2502 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2503
2504 /* Single-precision floating-point arithmetic. Table 4. */
2505 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2506 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2507 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2508 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2509 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2510
2511 /* Single-precision comparisons. Table 5. */
2512 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2513 set_optab_libfunc (ne_optab, SFmode, NULL);
2514 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2515 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2516 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2517 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2518 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2519
2520 /* Floating-point to integer conversions. Table 6. */
2521 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2522 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2523 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2524 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2525 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2526 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2527 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2528 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2529
2530 /* Conversions between floating types. Table 7. */
2531 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2532 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2533
2534 /* Integer to floating-point conversions. Table 8. */
2535 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2536 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2537 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2538 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2539 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2540 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2541 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2542 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2543
2544 /* Long long. Table 9. */
2545 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2546 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2547 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2548 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2549 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2550 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2551 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2552 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2553
2554 /* Integer (32/32->32) division. \S 4.3.1. */
2555 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2556 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2557
2558 /* The divmod functions are designed so that they can be used for
2559 plain division, even though they return both the quotient and the
2560 remainder. The quotient is returned in the usual location (i.e.,
2561 r0 for SImode, {r0, r1} for DImode), just as would be expected
2562 for an ordinary division routine. Because the AAPCS calling
2563 conventions specify that all of { r0, r1, r2, r3 } are
2564 callee-saved registers, there is no need to tell the compiler
2565 explicitly that those registers are clobbered by these
2566 routines. */
2567 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2568 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2569
2570 /* For SImode division the ABI provides div-without-mod routines,
2571 which are faster. */
2572 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2573 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2574
2575 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2576 divmod libcalls instead. */
2577 set_optab_libfunc (smod_optab, DImode, NULL);
2578 set_optab_libfunc (umod_optab, DImode, NULL);
2579 set_optab_libfunc (smod_optab, SImode, NULL);
2580 set_optab_libfunc (umod_optab, SImode, NULL);
2581
2582 /* Half-precision float operations. The compiler handles all operations
2583 with NULL libfuncs by converting the SFmode. */
2584 switch (arm_fp16_format)
2585 {
2586 case ARM_FP16_FORMAT_IEEE:
2587 case ARM_FP16_FORMAT_ALTERNATIVE:
2588
2589 /* Conversions. */
2590 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2591 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2592 ? "__gnu_f2h_ieee"
2593 : "__gnu_f2h_alternative"));
2594 set_conv_libfunc (sext_optab, SFmode, HFmode,
2595 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2596 ? "__gnu_h2f_ieee"
2597 : "__gnu_h2f_alternative"));
2598
2599 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2600 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2601 ? "__gnu_d2h_ieee"
2602 : "__gnu_d2h_alternative"));
2603
2604 /* Arithmetic. */
2605 set_optab_libfunc (add_optab, HFmode, NULL);
2606 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2607 set_optab_libfunc (smul_optab, HFmode, NULL);
2608 set_optab_libfunc (neg_optab, HFmode, NULL);
2609 set_optab_libfunc (sub_optab, HFmode, NULL);
2610
2611 /* Comparisons. */
2612 set_optab_libfunc (eq_optab, HFmode, NULL);
2613 set_optab_libfunc (ne_optab, HFmode, NULL);
2614 set_optab_libfunc (lt_optab, HFmode, NULL);
2615 set_optab_libfunc (le_optab, HFmode, NULL);
2616 set_optab_libfunc (ge_optab, HFmode, NULL);
2617 set_optab_libfunc (gt_optab, HFmode, NULL);
2618 set_optab_libfunc (unord_optab, HFmode, NULL);
2619 break;
2620
2621 default:
2622 break;
2623 }
2624
2625 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2626 {
2627 const arm_fixed_mode_set fixed_arith_modes[] =
2628 {
2629 { E_QQmode, "qq" },
2630 { E_UQQmode, "uqq" },
2631 { E_HQmode, "hq" },
2632 { E_UHQmode, "uhq" },
2633 { E_SQmode, "sq" },
2634 { E_USQmode, "usq" },
2635 { E_DQmode, "dq" },
2636 { E_UDQmode, "udq" },
2637 { E_TQmode, "tq" },
2638 { E_UTQmode, "utq" },
2639 { E_HAmode, "ha" },
2640 { E_UHAmode, "uha" },
2641 { E_SAmode, "sa" },
2642 { E_USAmode, "usa" },
2643 { E_DAmode, "da" },
2644 { E_UDAmode, "uda" },
2645 { E_TAmode, "ta" },
2646 { E_UTAmode, "uta" }
2647 };
2648 const arm_fixed_mode_set fixed_conv_modes[] =
2649 {
2650 { E_QQmode, "qq" },
2651 { E_UQQmode, "uqq" },
2652 { E_HQmode, "hq" },
2653 { E_UHQmode, "uhq" },
2654 { E_SQmode, "sq" },
2655 { E_USQmode, "usq" },
2656 { E_DQmode, "dq" },
2657 { E_UDQmode, "udq" },
2658 { E_TQmode, "tq" },
2659 { E_UTQmode, "utq" },
2660 { E_HAmode, "ha" },
2661 { E_UHAmode, "uha" },
2662 { E_SAmode, "sa" },
2663 { E_USAmode, "usa" },
2664 { E_DAmode, "da" },
2665 { E_UDAmode, "uda" },
2666 { E_TAmode, "ta" },
2667 { E_UTAmode, "uta" },
2668 { E_QImode, "qi" },
2669 { E_HImode, "hi" },
2670 { E_SImode, "si" },
2671 { E_DImode, "di" },
2672 { E_TImode, "ti" },
2673 { E_SFmode, "sf" },
2674 { E_DFmode, "df" }
2675 };
2676 unsigned int i, j;
2677
2678 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2679 {
2680 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2681 "add", fixed_arith_modes[i].name, 3);
2682 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2683 "ssadd", fixed_arith_modes[i].name, 3);
2684 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2685 "usadd", fixed_arith_modes[i].name, 3);
2686 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2687 "sub", fixed_arith_modes[i].name, 3);
2688 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2689 "sssub", fixed_arith_modes[i].name, 3);
2690 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2691 "ussub", fixed_arith_modes[i].name, 3);
2692 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2693 "mul", fixed_arith_modes[i].name, 3);
2694 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2695 "ssmul", fixed_arith_modes[i].name, 3);
2696 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2697 "usmul", fixed_arith_modes[i].name, 3);
2698 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2699 "div", fixed_arith_modes[i].name, 3);
2700 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2701 "udiv", fixed_arith_modes[i].name, 3);
2702 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2703 "ssdiv", fixed_arith_modes[i].name, 3);
2704 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2705 "usdiv", fixed_arith_modes[i].name, 3);
2706 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2707 "neg", fixed_arith_modes[i].name, 2);
2708 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2709 "ssneg", fixed_arith_modes[i].name, 2);
2710 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2711 "usneg", fixed_arith_modes[i].name, 2);
2712 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2713 "ashl", fixed_arith_modes[i].name, 3);
2714 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2715 "ashr", fixed_arith_modes[i].name, 3);
2716 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2717 "lshr", fixed_arith_modes[i].name, 3);
2718 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2719 "ssashl", fixed_arith_modes[i].name, 3);
2720 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2721 "usashl", fixed_arith_modes[i].name, 3);
2722 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2723 "cmp", fixed_arith_modes[i].name, 2);
2724 }
2725
2726 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2727 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2728 {
2729 if (i == j
2730 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2731 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2732 continue;
2733
2734 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2735 fixed_conv_modes[j].mode, "fract",
2736 fixed_conv_modes[i].name,
2737 fixed_conv_modes[j].name);
2738 arm_set_fixed_conv_libfunc (satfract_optab,
2739 fixed_conv_modes[i].mode,
2740 fixed_conv_modes[j].mode, "satfract",
2741 fixed_conv_modes[i].name,
2742 fixed_conv_modes[j].name);
2743 arm_set_fixed_conv_libfunc (fractuns_optab,
2744 fixed_conv_modes[i].mode,
2745 fixed_conv_modes[j].mode, "fractuns",
2746 fixed_conv_modes[i].name,
2747 fixed_conv_modes[j].name);
2748 arm_set_fixed_conv_libfunc (satfractuns_optab,
2749 fixed_conv_modes[i].mode,
2750 fixed_conv_modes[j].mode, "satfractuns",
2751 fixed_conv_modes[i].name,
2752 fixed_conv_modes[j].name);
2753 }
2754 }
2755
2756 if (TARGET_AAPCS_BASED)
2757 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2758 }
2759
2760 /* On AAPCS systems, this is the "struct __va_list". */
2761 static GTY(()) tree va_list_type;
2762
2763 /* Return the type to use as __builtin_va_list. */
2764 static tree
2765 arm_build_builtin_va_list (void)
2766 {
2767 tree va_list_name;
2768 tree ap_field;
2769
2770 if (!TARGET_AAPCS_BASED)
2771 return std_build_builtin_va_list ();
2772
2773 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2774 defined as:
2775
2776 struct __va_list
2777 {
2778 void *__ap;
2779 };
2780
2781 The C Library ABI further reinforces this definition in \S
2782 4.1.
2783
2784 We must follow this definition exactly. The structure tag
2785 name is visible in C++ mangled names, and thus forms a part
2786 of the ABI. The field name may be used by people who
2787 #include <stdarg.h>. */
2788 /* Create the type. */
2789 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2790 /* Give it the required name. */
2791 va_list_name = build_decl (BUILTINS_LOCATION,
2792 TYPE_DECL,
2793 get_identifier ("__va_list"),
2794 va_list_type);
2795 DECL_ARTIFICIAL (va_list_name) = 1;
2796 TYPE_NAME (va_list_type) = va_list_name;
2797 TYPE_STUB_DECL (va_list_type) = va_list_name;
2798 /* Create the __ap field. */
2799 ap_field = build_decl (BUILTINS_LOCATION,
2800 FIELD_DECL,
2801 get_identifier ("__ap"),
2802 ptr_type_node);
2803 DECL_ARTIFICIAL (ap_field) = 1;
2804 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2805 TYPE_FIELDS (va_list_type) = ap_field;
2806 /* Compute its layout. */
2807 layout_type (va_list_type);
2808
2809 return va_list_type;
2810 }
2811
2812 /* Return an expression of type "void *" pointing to the next
2813 available argument in a variable-argument list. VALIST is the
2814 user-level va_list object, of type __builtin_va_list. */
2815 static tree
2816 arm_extract_valist_ptr (tree valist)
2817 {
2818 if (TREE_TYPE (valist) == error_mark_node)
2819 return error_mark_node;
2820
2821 /* On an AAPCS target, the pointer is stored within "struct
2822 va_list". */
2823 if (TARGET_AAPCS_BASED)
2824 {
2825 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2826 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2827 valist, ap_field, NULL_TREE);
2828 }
2829
2830 return valist;
2831 }
2832
2833 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2834 static void
2835 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2836 {
2837 valist = arm_extract_valist_ptr (valist);
2838 std_expand_builtin_va_start (valist, nextarg);
2839 }
2840
2841 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2842 static tree
2843 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2844 gimple_seq *post_p)
2845 {
2846 valist = arm_extract_valist_ptr (valist);
2847 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2848 }
2849
2850 /* Check any incompatible options that the user has specified. */
2851 static void
2852 arm_option_check_internal (struct gcc_options *opts)
2853 {
2854 int flags = opts->x_target_flags;
2855
2856 /* iWMMXt and NEON are incompatible. */
2857 if (TARGET_IWMMXT
2858 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2859 error ("iWMMXt and NEON are incompatible");
2860
2861 /* Make sure that the processor choice does not conflict with any of the
2862 other command line choices. */
2863 if (TARGET_ARM_P (flags)
2864 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2865 error ("target CPU does not support ARM mode");
2866
2867 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2868 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2869 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2870
2871 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2872 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2873
2874 /* If this target is normally configured to use APCS frames, warn if they
2875 are turned off and debugging is turned on. */
2876 if (TARGET_ARM_P (flags)
2877 && write_symbols != NO_DEBUG
2878 && !TARGET_APCS_FRAME
2879 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2880 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2881
2882 /* iWMMXt unsupported under Thumb mode. */
2883 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2884 error ("iWMMXt unsupported under Thumb mode");
2885
2886 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2887 error ("can not use -mtp=cp15 with 16-bit Thumb");
2888
2889 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2890 {
2891 error ("RTP PIC is incompatible with Thumb");
2892 flag_pic = 0;
2893 }
2894
2895 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2896 with MOVT. */
2897 if ((target_pure_code || target_slow_flash_data)
2898 && (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON))
2899 {
2900 const char *flag = (target_pure_code ? "-mpure-code" :
2901 "-mslow-flash-data");
2902 error ("%s only supports non-pic code on M-profile targets with the "
2903 "MOVT instruction", flag);
2904 }
2905
2906 }
2907
2908 /* Recompute the global settings depending on target attribute options. */
2909
2910 static void
2911 arm_option_params_internal (void)
2912 {
2913 /* If we are not using the default (ARM mode) section anchor offset
2914 ranges, then set the correct ranges now. */
2915 if (TARGET_THUMB1)
2916 {
2917 /* Thumb-1 LDR instructions cannot have negative offsets.
2918 Permissible positive offset ranges are 5-bit (for byte loads),
2919 6-bit (for halfword loads), or 7-bit (for word loads).
2920 Empirical results suggest a 7-bit anchor range gives the best
2921 overall code size. */
2922 targetm.min_anchor_offset = 0;
2923 targetm.max_anchor_offset = 127;
2924 }
2925 else if (TARGET_THUMB2)
2926 {
2927 /* The minimum is set such that the total size of the block
2928 for a particular anchor is 248 + 1 + 4095 bytes, which is
2929 divisible by eight, ensuring natural spacing of anchors. */
2930 targetm.min_anchor_offset = -248;
2931 targetm.max_anchor_offset = 4095;
2932 }
2933 else
2934 {
2935 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2936 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2937 }
2938
2939 /* Increase the number of conditional instructions with -Os. */
2940 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2941
2942 /* For THUMB2, we limit the conditional sequence to one IT block. */
2943 if (TARGET_THUMB2)
2944 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2945 }
2946
2947 /* True if -mflip-thumb should next add an attribute for the default
2948 mode, false if it should next add an attribute for the opposite mode. */
2949 static GTY(()) bool thumb_flipper;
2950
2951 /* Options after initial target override. */
2952 static GTY(()) tree init_optimize;
2953
2954 static void
2955 arm_override_options_after_change_1 (struct gcc_options *opts)
2956 {
2957 if (opts->x_align_functions <= 0)
2958 opts->x_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2959 && opts->x_optimize_size ? 2 : 4;
2960 }
2961
2962 /* Implement targetm.override_options_after_change. */
2963
2964 static void
2965 arm_override_options_after_change (void)
2966 {
2967 arm_configure_build_target (&arm_active_target,
2968 TREE_TARGET_OPTION (target_option_default_node),
2969 &global_options_set, false);
2970
2971 arm_override_options_after_change_1 (&global_options);
2972 }
2973
2974 /* Implement TARGET_OPTION_SAVE. */
2975 static void
2976 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2977 {
2978 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2979 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2980 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2981 }
2982
2983 /* Implement TARGET_OPTION_RESTORE. */
2984 static void
2985 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2986 {
2987 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2988 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2989 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2990 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2991 false);
2992 }
2993
2994 /* Reset options between modes that the user has specified. */
2995 static void
2996 arm_option_override_internal (struct gcc_options *opts,
2997 struct gcc_options *opts_set)
2998 {
2999 arm_override_options_after_change_1 (opts);
3000
3001 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3002 {
3003 /* The default is to enable interworking, so this warning message would
3004 be confusing to users who have just compiled with, eg, -march=armv3. */
3005 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3006 opts->x_target_flags &= ~MASK_INTERWORK;
3007 }
3008
3009 if (TARGET_THUMB_P (opts->x_target_flags)
3010 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3011 {
3012 warning (0, "target CPU does not support THUMB instructions");
3013 opts->x_target_flags &= ~MASK_THUMB;
3014 }
3015
3016 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3017 {
3018 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3019 opts->x_target_flags &= ~MASK_APCS_FRAME;
3020 }
3021
3022 /* Callee super interworking implies thumb interworking. Adding
3023 this to the flags here simplifies the logic elsewhere. */
3024 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3025 opts->x_target_flags |= MASK_INTERWORK;
3026
3027 /* need to remember initial values so combinaisons of options like
3028 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3029 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3030
3031 if (! opts_set->x_arm_restrict_it)
3032 opts->x_arm_restrict_it = arm_arch8;
3033
3034 /* ARM execution state and M profile don't have [restrict] IT. */
3035 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3036 opts->x_arm_restrict_it = 0;
3037
3038 /* Enable -munaligned-access by default for
3039 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3040 i.e. Thumb2 and ARM state only.
3041 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3042 - ARMv8 architecture-base processors.
3043
3044 Disable -munaligned-access by default for
3045 - all pre-ARMv6 architecture-based processors
3046 - ARMv6-M architecture-based processors
3047 - ARMv8-M Baseline processors. */
3048
3049 if (! opts_set->x_unaligned_access)
3050 {
3051 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3052 && arm_arch6 && (arm_arch_notm || arm_arch7));
3053 }
3054 else if (opts->x_unaligned_access == 1
3055 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3056 {
3057 warning (0, "target CPU does not support unaligned accesses");
3058 opts->x_unaligned_access = 0;
3059 }
3060
3061 /* Don't warn since it's on by default in -O2. */
3062 if (TARGET_THUMB1_P (opts->x_target_flags))
3063 opts->x_flag_schedule_insns = 0;
3064 else
3065 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3066
3067 /* Disable shrink-wrap when optimizing function for size, since it tends to
3068 generate additional returns. */
3069 if (optimize_function_for_size_p (cfun)
3070 && TARGET_THUMB2_P (opts->x_target_flags))
3071 opts->x_flag_shrink_wrap = false;
3072 else
3073 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3074
3075 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3076 - epilogue_insns - does not accurately model the corresponding insns
3077 emitted in the asm file. In particular, see the comment in thumb_exit
3078 'Find out how many of the (return) argument registers we can corrupt'.
3079 As a consequence, the epilogue may clobber registers without fipa-ra
3080 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3081 TODO: Accurately model clobbers for epilogue_insns and reenable
3082 fipa-ra. */
3083 if (TARGET_THUMB1_P (opts->x_target_flags))
3084 opts->x_flag_ipa_ra = 0;
3085 else
3086 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3087
3088 /* Thumb2 inline assembly code should always use unified syntax.
3089 This will apply to ARM and Thumb1 eventually. */
3090 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3091
3092 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3093 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3094 #endif
3095 }
3096
3097 static sbitmap isa_all_fpubits;
3098 static sbitmap isa_quirkbits;
3099
3100 /* Configure a build target TARGET from the user-specified options OPTS and
3101 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3102 architecture have been specified, but the two are not identical. */
3103 void
3104 arm_configure_build_target (struct arm_build_target *target,
3105 struct cl_target_option *opts,
3106 struct gcc_options *opts_set,
3107 bool warn_compatible)
3108 {
3109 const cpu_option *arm_selected_tune = NULL;
3110 const arch_option *arm_selected_arch = NULL;
3111 const cpu_option *arm_selected_cpu = NULL;
3112 const arm_fpu_desc *arm_selected_fpu = NULL;
3113 const char *tune_opts = NULL;
3114 const char *arch_opts = NULL;
3115 const char *cpu_opts = NULL;
3116
3117 bitmap_clear (target->isa);
3118 target->core_name = NULL;
3119 target->arch_name = NULL;
3120
3121 if (opts_set->x_arm_arch_string)
3122 {
3123 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3124 "-march",
3125 opts->x_arm_arch_string);
3126 arch_opts = strchr (opts->x_arm_arch_string, '+');
3127 }
3128
3129 if (opts_set->x_arm_cpu_string)
3130 {
3131 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3132 opts->x_arm_cpu_string);
3133 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3134 arm_selected_tune = arm_selected_cpu;
3135 /* If taking the tuning from -mcpu, we don't need to rescan the
3136 options for tuning. */
3137 }
3138
3139 if (opts_set->x_arm_tune_string)
3140 {
3141 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3142 opts->x_arm_tune_string);
3143 tune_opts = strchr (opts->x_arm_tune_string, '+');
3144 }
3145
3146 if (arm_selected_arch)
3147 {
3148 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3149 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3150 arch_opts);
3151
3152 if (arm_selected_cpu)
3153 {
3154 auto_sbitmap cpu_isa (isa_num_bits);
3155 auto_sbitmap isa_delta (isa_num_bits);
3156
3157 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3158 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3159 cpu_opts);
3160 bitmap_xor (isa_delta, cpu_isa, target->isa);
3161 /* Ignore any bits that are quirk bits. */
3162 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3163 /* Ignore (for now) any bits that might be set by -mfpu. */
3164 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3165
3166 if (!bitmap_empty_p (isa_delta))
3167 {
3168 if (warn_compatible)
3169 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3170 arm_selected_cpu->common.name,
3171 arm_selected_arch->common.name);
3172 /* -march wins for code generation.
3173 -mcpu wins for default tuning. */
3174 if (!arm_selected_tune)
3175 arm_selected_tune = arm_selected_cpu;
3176
3177 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3178 target->arch_name = arm_selected_arch->common.name;
3179 }
3180 else
3181 {
3182 /* Architecture and CPU are essentially the same.
3183 Prefer the CPU setting. */
3184 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3185 target->core_name = arm_selected_cpu->common.name;
3186 /* Copy the CPU's capabilities, so that we inherit the
3187 appropriate extensions and quirks. */
3188 bitmap_copy (target->isa, cpu_isa);
3189 }
3190 }
3191 else
3192 {
3193 /* Pick a CPU based on the architecture. */
3194 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3195 target->arch_name = arm_selected_arch->common.name;
3196 /* Note: target->core_name is left unset in this path. */
3197 }
3198 }
3199 else if (arm_selected_cpu)
3200 {
3201 target->core_name = arm_selected_cpu->common.name;
3202 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3203 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3204 cpu_opts);
3205 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3206 }
3207 /* If the user did not specify a processor or architecture, choose
3208 one for them. */
3209 else
3210 {
3211 const cpu_option *sel;
3212 auto_sbitmap sought_isa (isa_num_bits);
3213 bitmap_clear (sought_isa);
3214 auto_sbitmap default_isa (isa_num_bits);
3215
3216 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3217 TARGET_CPU_DEFAULT);
3218 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3219 gcc_assert (arm_selected_cpu->common.name);
3220
3221 /* RWE: All of the selection logic below (to the end of this
3222 'if' clause) looks somewhat suspect. It appears to be mostly
3223 there to support forcing thumb support when the default CPU
3224 does not have thumb (somewhat dubious in terms of what the
3225 user might be expecting). I think it should be removed once
3226 support for the pre-thumb era cores is removed. */
3227 sel = arm_selected_cpu;
3228 arm_initialize_isa (default_isa, sel->common.isa_bits);
3229 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3230 cpu_opts);
3231
3232 /* Now check to see if the user has specified any command line
3233 switches that require certain abilities from the cpu. */
3234
3235 if (TARGET_INTERWORK || TARGET_THUMB)
3236 {
3237 bitmap_set_bit (sought_isa, isa_bit_thumb);
3238 bitmap_set_bit (sought_isa, isa_bit_mode32);
3239
3240 /* There are no ARM processors that support both APCS-26 and
3241 interworking. Therefore we forcibly remove MODE26 from
3242 from the isa features here (if it was set), so that the
3243 search below will always be able to find a compatible
3244 processor. */
3245 bitmap_clear_bit (default_isa, isa_bit_mode26);
3246 }
3247
3248 /* If there are such requirements and the default CPU does not
3249 satisfy them, we need to run over the complete list of
3250 cores looking for one that is satisfactory. */
3251 if (!bitmap_empty_p (sought_isa)
3252 && !bitmap_subset_p (sought_isa, default_isa))
3253 {
3254 auto_sbitmap candidate_isa (isa_num_bits);
3255 /* We're only interested in a CPU with at least the
3256 capabilities of the default CPU and the required
3257 additional features. */
3258 bitmap_ior (default_isa, default_isa, sought_isa);
3259
3260 /* Try to locate a CPU type that supports all of the abilities
3261 of the default CPU, plus the extra abilities requested by
3262 the user. */
3263 for (sel = all_cores; sel->common.name != NULL; sel++)
3264 {
3265 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3266 /* An exact match? */
3267 if (bitmap_equal_p (default_isa, candidate_isa))
3268 break;
3269 }
3270
3271 if (sel->common.name == NULL)
3272 {
3273 unsigned current_bit_count = isa_num_bits;
3274 const cpu_option *best_fit = NULL;
3275
3276 /* Ideally we would like to issue an error message here
3277 saying that it was not possible to find a CPU compatible
3278 with the default CPU, but which also supports the command
3279 line options specified by the programmer, and so they
3280 ought to use the -mcpu=<name> command line option to
3281 override the default CPU type.
3282
3283 If we cannot find a CPU that has exactly the
3284 characteristics of the default CPU and the given
3285 command line options we scan the array again looking
3286 for a best match. The best match must have at least
3287 the capabilities of the perfect match. */
3288 for (sel = all_cores; sel->common.name != NULL; sel++)
3289 {
3290 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3291
3292 if (bitmap_subset_p (default_isa, candidate_isa))
3293 {
3294 unsigned count;
3295
3296 bitmap_and_compl (candidate_isa, candidate_isa,
3297 default_isa);
3298 count = bitmap_popcount (candidate_isa);
3299
3300 if (count < current_bit_count)
3301 {
3302 best_fit = sel;
3303 current_bit_count = count;
3304 }
3305 }
3306
3307 gcc_assert (best_fit);
3308 sel = best_fit;
3309 }
3310 }
3311 arm_selected_cpu = sel;
3312 }
3313
3314 /* Now we know the CPU, we can finally initialize the target
3315 structure. */
3316 target->core_name = arm_selected_cpu->common.name;
3317 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3318 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3319 cpu_opts);
3320 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3321 }
3322
3323 gcc_assert (arm_selected_cpu);
3324 gcc_assert (arm_selected_arch);
3325
3326 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3327 {
3328 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3329 auto_sbitmap fpu_bits (isa_num_bits);
3330
3331 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3332 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3333 bitmap_ior (target->isa, target->isa, fpu_bits);
3334 }
3335
3336 if (!arm_selected_tune)
3337 arm_selected_tune = arm_selected_cpu;
3338 else /* Validate the features passed to -mtune. */
3339 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3340
3341 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3342
3343 /* Finish initializing the target structure. */
3344 target->arch_pp_name = arm_selected_arch->arch;
3345 target->base_arch = arm_selected_arch->base_arch;
3346 target->profile = arm_selected_arch->profile;
3347
3348 target->tune_flags = tune_data->tune_flags;
3349 target->tune = tune_data->tune;
3350 target->tune_core = tune_data->scheduler;
3351 arm_option_reconfigure_globals ();
3352 }
3353
3354 /* Fix up any incompatible options that the user has specified. */
3355 static void
3356 arm_option_override (void)
3357 {
3358 static const enum isa_feature fpu_bitlist[]
3359 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3360 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3361 cl_target_option opts;
3362
3363 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3364 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3365
3366 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3367 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3368
3369 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3370
3371 if (!global_options_set.x_arm_fpu_index)
3372 {
3373 bool ok;
3374 int fpu_index;
3375
3376 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3377 CL_TARGET);
3378 gcc_assert (ok);
3379 arm_fpu_index = (enum fpu_type) fpu_index;
3380 }
3381
3382 cl_target_option_save (&opts, &global_options);
3383 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3384 true);
3385
3386 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3387 SUBTARGET_OVERRIDE_OPTIONS;
3388 #endif
3389
3390 /* Initialize boolean versions of the architectural flags, for use
3391 in the arm.md file and for enabling feature flags. */
3392 arm_option_reconfigure_globals ();
3393
3394 arm_tune = arm_active_target.tune_core;
3395 tune_flags = arm_active_target.tune_flags;
3396 current_tune = arm_active_target.tune;
3397
3398 /* TBD: Dwarf info for apcs frame is not handled yet. */
3399 if (TARGET_APCS_FRAME)
3400 flag_shrink_wrap = false;
3401
3402 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3403 {
3404 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3405 target_flags |= MASK_APCS_FRAME;
3406 }
3407
3408 if (TARGET_POKE_FUNCTION_NAME)
3409 target_flags |= MASK_APCS_FRAME;
3410
3411 if (TARGET_APCS_REENT && flag_pic)
3412 error ("-fpic and -mapcs-reent are incompatible");
3413
3414 if (TARGET_APCS_REENT)
3415 warning (0, "APCS reentrant code not supported. Ignored");
3416
3417 /* Set up some tuning parameters. */
3418 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3419 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3420 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3421 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3422 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3423 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3424
3425 /* For arm2/3 there is no need to do any scheduling if we are doing
3426 software floating-point. */
3427 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3428 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3429
3430 /* Override the default structure alignment for AAPCS ABI. */
3431 if (!global_options_set.x_arm_structure_size_boundary)
3432 {
3433 if (TARGET_AAPCS_BASED)
3434 arm_structure_size_boundary = 8;
3435 }
3436 else
3437 {
3438 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3439
3440 if (arm_structure_size_boundary != 8
3441 && arm_structure_size_boundary != 32
3442 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3443 {
3444 if (ARM_DOUBLEWORD_ALIGN)
3445 warning (0,
3446 "structure size boundary can only be set to 8, 32 or 64");
3447 else
3448 warning (0, "structure size boundary can only be set to 8 or 32");
3449 arm_structure_size_boundary
3450 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3451 }
3452 }
3453
3454 if (TARGET_VXWORKS_RTP)
3455 {
3456 if (!global_options_set.x_arm_pic_data_is_text_relative)
3457 arm_pic_data_is_text_relative = 0;
3458 }
3459 else if (flag_pic
3460 && !arm_pic_data_is_text_relative
3461 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3462 /* When text & data segments don't have a fixed displacement, the
3463 intended use is with a single, read only, pic base register.
3464 Unless the user explicitly requested not to do that, set
3465 it. */
3466 target_flags |= MASK_SINGLE_PIC_BASE;
3467
3468 /* If stack checking is disabled, we can use r10 as the PIC register,
3469 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3470 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3471 {
3472 if (TARGET_VXWORKS_RTP)
3473 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3474 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3475 }
3476
3477 if (flag_pic && TARGET_VXWORKS_RTP)
3478 arm_pic_register = 9;
3479
3480 if (arm_pic_register_string != NULL)
3481 {
3482 int pic_register = decode_reg_name (arm_pic_register_string);
3483
3484 if (!flag_pic)
3485 warning (0, "-mpic-register= is useless without -fpic");
3486
3487 /* Prevent the user from choosing an obviously stupid PIC register. */
3488 else if (pic_register < 0 || call_used_regs[pic_register]
3489 || pic_register == HARD_FRAME_POINTER_REGNUM
3490 || pic_register == STACK_POINTER_REGNUM
3491 || pic_register >= PC_REGNUM
3492 || (TARGET_VXWORKS_RTP
3493 && (unsigned int) pic_register != arm_pic_register))
3494 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3495 else
3496 arm_pic_register = pic_register;
3497 }
3498
3499 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3500 if (fix_cm3_ldrd == 2)
3501 {
3502 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3503 fix_cm3_ldrd = 1;
3504 else
3505 fix_cm3_ldrd = 0;
3506 }
3507
3508 /* Hot/Cold partitioning is not currently supported, since we can't
3509 handle literal pool placement in that case. */
3510 if (flag_reorder_blocks_and_partition)
3511 {
3512 inform (input_location,
3513 "-freorder-blocks-and-partition not supported on this architecture");
3514 flag_reorder_blocks_and_partition = 0;
3515 flag_reorder_blocks = 1;
3516 }
3517
3518 if (flag_pic)
3519 /* Hoisting PIC address calculations more aggressively provides a small,
3520 but measurable, size reduction for PIC code. Therefore, we decrease
3521 the bar for unrestricted expression hoisting to the cost of PIC address
3522 calculation, which is 2 instructions. */
3523 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3524 global_options.x_param_values,
3525 global_options_set.x_param_values);
3526
3527 /* ARM EABI defaults to strict volatile bitfields. */
3528 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3529 && abi_version_at_least(2))
3530 flag_strict_volatile_bitfields = 1;
3531
3532 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3533 have deemed it beneficial (signified by setting
3534 prefetch.num_slots to 1 or more). */
3535 if (flag_prefetch_loop_arrays < 0
3536 && HAVE_prefetch
3537 && optimize >= 3
3538 && current_tune->prefetch.num_slots > 0)
3539 flag_prefetch_loop_arrays = 1;
3540
3541 /* Set up parameters to be used in prefetching algorithm. Do not
3542 override the defaults unless we are tuning for a core we have
3543 researched values for. */
3544 if (current_tune->prefetch.num_slots > 0)
3545 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3546 current_tune->prefetch.num_slots,
3547 global_options.x_param_values,
3548 global_options_set.x_param_values);
3549 if (current_tune->prefetch.l1_cache_line_size >= 0)
3550 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3551 current_tune->prefetch.l1_cache_line_size,
3552 global_options.x_param_values,
3553 global_options_set.x_param_values);
3554 if (current_tune->prefetch.l1_cache_size >= 0)
3555 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3556 current_tune->prefetch.l1_cache_size,
3557 global_options.x_param_values,
3558 global_options_set.x_param_values);
3559
3560 /* Use Neon to perform 64-bits operations rather than core
3561 registers. */
3562 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3563 if (use_neon_for_64bits == 1)
3564 prefer_neon_for_64bits = true;
3565
3566 /* Use the alternative scheduling-pressure algorithm by default. */
3567 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3568 global_options.x_param_values,
3569 global_options_set.x_param_values);
3570
3571 /* Look through ready list and all of queue for instructions
3572 relevant for L2 auto-prefetcher. */
3573 int param_sched_autopref_queue_depth;
3574
3575 switch (current_tune->sched_autopref)
3576 {
3577 case tune_params::SCHED_AUTOPREF_OFF:
3578 param_sched_autopref_queue_depth = -1;
3579 break;
3580
3581 case tune_params::SCHED_AUTOPREF_RANK:
3582 param_sched_autopref_queue_depth = 0;
3583 break;
3584
3585 case tune_params::SCHED_AUTOPREF_FULL:
3586 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3587 break;
3588
3589 default:
3590 gcc_unreachable ();
3591 }
3592
3593 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3594 param_sched_autopref_queue_depth,
3595 global_options.x_param_values,
3596 global_options_set.x_param_values);
3597
3598 /* Currently, for slow flash data, we just disable literal pools. We also
3599 disable it for pure-code. */
3600 if (target_slow_flash_data || target_pure_code)
3601 arm_disable_literal_pool = true;
3602
3603 /* Disable scheduling fusion by default if it's not armv7 processor
3604 or doesn't prefer ldrd/strd. */
3605 if (flag_schedule_fusion == 2
3606 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3607 flag_schedule_fusion = 0;
3608
3609 /* Need to remember initial options before they are overriden. */
3610 init_optimize = build_optimization_node (&global_options);
3611
3612 arm_options_perform_arch_sanity_checks ();
3613 arm_option_override_internal (&global_options, &global_options_set);
3614 arm_option_check_internal (&global_options);
3615 arm_option_params_internal ();
3616
3617 /* Create the default target_options structure. */
3618 target_option_default_node = target_option_current_node
3619 = build_target_option_node (&global_options);
3620
3621 /* Register global variables with the garbage collector. */
3622 arm_add_gc_roots ();
3623
3624 /* Init initial mode for testing. */
3625 thumb_flipper = TARGET_THUMB;
3626 }
3627
3628
3629 /* Reconfigure global status flags from the active_target.isa. */
3630 void
3631 arm_option_reconfigure_globals (void)
3632 {
3633 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3634 arm_base_arch = arm_active_target.base_arch;
3635
3636 /* Initialize boolean versions of the architectural flags, for use
3637 in the arm.md file. */
3638 arm_arch3m = bitmap_bit_p (arm_active_target.isa, isa_bit_armv3m);
3639 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3640 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3641 arm_arch5 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5);
3642 arm_arch5e = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5e);
3643 arm_arch5te = arm_arch5e
3644 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3645 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3646 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3647 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3648 arm_arch6m = arm_arch6 && !arm_arch_notm;
3649 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3650 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3651 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3652 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3653 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3654 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3655 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3656 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3657 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3658 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3659 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3660 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3661 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3662 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3663 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3664 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3665 if (arm_fp16_inst)
3666 {
3667 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3668 error ("selected fp16 options are incompatible");
3669 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3670 }
3671
3672 /* And finally, set up some quirks. */
3673 arm_arch_no_volatile_ce
3674 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3675 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3676 isa_bit_quirk_armv6kz);
3677
3678 /* Use the cp15 method if it is available. */
3679 if (target_thread_pointer == TP_AUTO)
3680 {
3681 if (arm_arch6k && !TARGET_THUMB1)
3682 target_thread_pointer = TP_CP15;
3683 else
3684 target_thread_pointer = TP_SOFT;
3685 }
3686 }
3687
3688 /* Perform some validation between the desired architecture and the rest of the
3689 options. */
3690 void
3691 arm_options_perform_arch_sanity_checks (void)
3692 {
3693 /* V5 code we generate is completely interworking capable, so we turn off
3694 TARGET_INTERWORK here to avoid many tests later on. */
3695
3696 /* XXX However, we must pass the right pre-processor defines to CPP
3697 or GLD can get confused. This is a hack. */
3698 if (TARGET_INTERWORK)
3699 arm_cpp_interwork = 1;
3700
3701 if (arm_arch5)
3702 target_flags &= ~MASK_INTERWORK;
3703
3704 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3705 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3706
3707 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3708 error ("iwmmxt abi requires an iwmmxt capable cpu");
3709
3710 /* BPABI targets use linker tricks to allow interworking on cores
3711 without thumb support. */
3712 if (TARGET_INTERWORK
3713 && !TARGET_BPABI
3714 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3715 {
3716 warning (0, "target CPU does not support interworking" );
3717 target_flags &= ~MASK_INTERWORK;
3718 }
3719
3720 /* If soft-float is specified then don't use FPU. */
3721 if (TARGET_SOFT_FLOAT)
3722 arm_fpu_attr = FPU_NONE;
3723 else
3724 arm_fpu_attr = FPU_VFP;
3725
3726 if (TARGET_AAPCS_BASED)
3727 {
3728 if (TARGET_CALLER_INTERWORKING)
3729 error ("AAPCS does not support -mcaller-super-interworking");
3730 else
3731 if (TARGET_CALLEE_INTERWORKING)
3732 error ("AAPCS does not support -mcallee-super-interworking");
3733 }
3734
3735 /* __fp16 support currently assumes the core has ldrh. */
3736 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3737 sorry ("__fp16 and no ldrh");
3738
3739 if (use_cmse && !arm_arch_cmse)
3740 error ("target CPU does not support ARMv8-M Security Extensions");
3741
3742 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3743 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3744 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3745 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3746
3747
3748 if (TARGET_AAPCS_BASED)
3749 {
3750 if (arm_abi == ARM_ABI_IWMMXT)
3751 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3752 else if (TARGET_HARD_FLOAT_ABI)
3753 {
3754 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3755 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3756 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3757 }
3758 else
3759 arm_pcs_default = ARM_PCS_AAPCS;
3760 }
3761 else
3762 {
3763 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3764 sorry ("-mfloat-abi=hard and VFP");
3765
3766 if (arm_abi == ARM_ABI_APCS)
3767 arm_pcs_default = ARM_PCS_APCS;
3768 else
3769 arm_pcs_default = ARM_PCS_ATPCS;
3770 }
3771 }
3772
3773 static void
3774 arm_add_gc_roots (void)
3775 {
3776 gcc_obstack_init(&minipool_obstack);
3777 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3778 }
3779 \f
3780 /* A table of known ARM exception types.
3781 For use with the interrupt function attribute. */
3782
3783 typedef struct
3784 {
3785 const char *const arg;
3786 const unsigned long return_value;
3787 }
3788 isr_attribute_arg;
3789
3790 static const isr_attribute_arg isr_attribute_args [] =
3791 {
3792 { "IRQ", ARM_FT_ISR },
3793 { "irq", ARM_FT_ISR },
3794 { "FIQ", ARM_FT_FIQ },
3795 { "fiq", ARM_FT_FIQ },
3796 { "ABORT", ARM_FT_ISR },
3797 { "abort", ARM_FT_ISR },
3798 { "ABORT", ARM_FT_ISR },
3799 { "abort", ARM_FT_ISR },
3800 { "UNDEF", ARM_FT_EXCEPTION },
3801 { "undef", ARM_FT_EXCEPTION },
3802 { "SWI", ARM_FT_EXCEPTION },
3803 { "swi", ARM_FT_EXCEPTION },
3804 { NULL, ARM_FT_NORMAL }
3805 };
3806
3807 /* Returns the (interrupt) function type of the current
3808 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3809
3810 static unsigned long
3811 arm_isr_value (tree argument)
3812 {
3813 const isr_attribute_arg * ptr;
3814 const char * arg;
3815
3816 if (!arm_arch_notm)
3817 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3818
3819 /* No argument - default to IRQ. */
3820 if (argument == NULL_TREE)
3821 return ARM_FT_ISR;
3822
3823 /* Get the value of the argument. */
3824 if (TREE_VALUE (argument) == NULL_TREE
3825 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3826 return ARM_FT_UNKNOWN;
3827
3828 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3829
3830 /* Check it against the list of known arguments. */
3831 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3832 if (streq (arg, ptr->arg))
3833 return ptr->return_value;
3834
3835 /* An unrecognized interrupt type. */
3836 return ARM_FT_UNKNOWN;
3837 }
3838
3839 /* Computes the type of the current function. */
3840
3841 static unsigned long
3842 arm_compute_func_type (void)
3843 {
3844 unsigned long type = ARM_FT_UNKNOWN;
3845 tree a;
3846 tree attr;
3847
3848 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3849
3850 /* Decide if the current function is volatile. Such functions
3851 never return, and many memory cycles can be saved by not storing
3852 register values that will never be needed again. This optimization
3853 was added to speed up context switching in a kernel application. */
3854 if (optimize > 0
3855 && (TREE_NOTHROW (current_function_decl)
3856 || !(flag_unwind_tables
3857 || (flag_exceptions
3858 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3859 && TREE_THIS_VOLATILE (current_function_decl))
3860 type |= ARM_FT_VOLATILE;
3861
3862 if (cfun->static_chain_decl != NULL)
3863 type |= ARM_FT_NESTED;
3864
3865 attr = DECL_ATTRIBUTES (current_function_decl);
3866
3867 a = lookup_attribute ("naked", attr);
3868 if (a != NULL_TREE)
3869 type |= ARM_FT_NAKED;
3870
3871 a = lookup_attribute ("isr", attr);
3872 if (a == NULL_TREE)
3873 a = lookup_attribute ("interrupt", attr);
3874
3875 if (a == NULL_TREE)
3876 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3877 else
3878 type |= arm_isr_value (TREE_VALUE (a));
3879
3880 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3881 type |= ARM_FT_CMSE_ENTRY;
3882
3883 return type;
3884 }
3885
3886 /* Returns the type of the current function. */
3887
3888 unsigned long
3889 arm_current_func_type (void)
3890 {
3891 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3892 cfun->machine->func_type = arm_compute_func_type ();
3893
3894 return cfun->machine->func_type;
3895 }
3896
3897 bool
3898 arm_allocate_stack_slots_for_args (void)
3899 {
3900 /* Naked functions should not allocate stack slots for arguments. */
3901 return !IS_NAKED (arm_current_func_type ());
3902 }
3903
3904 static bool
3905 arm_warn_func_return (tree decl)
3906 {
3907 /* Naked functions are implemented entirely in assembly, including the
3908 return sequence, so suppress warnings about this. */
3909 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3910 }
3911
3912 \f
3913 /* Output assembler code for a block containing the constant parts
3914 of a trampoline, leaving space for the variable parts.
3915
3916 On the ARM, (if r8 is the static chain regnum, and remembering that
3917 referencing pc adds an offset of 8) the trampoline looks like:
3918 ldr r8, [pc, #0]
3919 ldr pc, [pc]
3920 .word static chain value
3921 .word function's address
3922 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3923
3924 static void
3925 arm_asm_trampoline_template (FILE *f)
3926 {
3927 fprintf (f, "\t.syntax unified\n");
3928
3929 if (TARGET_ARM)
3930 {
3931 fprintf (f, "\t.arm\n");
3932 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3933 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3934 }
3935 else if (TARGET_THUMB2)
3936 {
3937 fprintf (f, "\t.thumb\n");
3938 /* The Thumb-2 trampoline is similar to the arm implementation.
3939 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3940 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3941 STATIC_CHAIN_REGNUM, PC_REGNUM);
3942 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3943 }
3944 else
3945 {
3946 ASM_OUTPUT_ALIGN (f, 2);
3947 fprintf (f, "\t.code\t16\n");
3948 fprintf (f, ".Ltrampoline_start:\n");
3949 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3950 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3951 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3952 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3953 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3954 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3955 }
3956 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3957 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3958 }
3959
3960 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3961
3962 static void
3963 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3964 {
3965 rtx fnaddr, mem, a_tramp;
3966
3967 emit_block_move (m_tramp, assemble_trampoline_template (),
3968 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3969
3970 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3971 emit_move_insn (mem, chain_value);
3972
3973 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3974 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3975 emit_move_insn (mem, fnaddr);
3976
3977 a_tramp = XEXP (m_tramp, 0);
3978 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3979 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3980 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3981 }
3982
3983 /* Thumb trampolines should be entered in thumb mode, so set
3984 the bottom bit of the address. */
3985
3986 static rtx
3987 arm_trampoline_adjust_address (rtx addr)
3988 {
3989 if (TARGET_THUMB)
3990 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3991 NULL, 0, OPTAB_LIB_WIDEN);
3992 return addr;
3993 }
3994 \f
3995 /* Return 1 if it is possible to return using a single instruction.
3996 If SIBLING is non-null, this is a test for a return before a sibling
3997 call. SIBLING is the call insn, so we can examine its register usage. */
3998
3999 int
4000 use_return_insn (int iscond, rtx sibling)
4001 {
4002 int regno;
4003 unsigned int func_type;
4004 unsigned long saved_int_regs;
4005 unsigned HOST_WIDE_INT stack_adjust;
4006 arm_stack_offsets *offsets;
4007
4008 /* Never use a return instruction before reload has run. */
4009 if (!reload_completed)
4010 return 0;
4011
4012 func_type = arm_current_func_type ();
4013
4014 /* Naked, volatile and stack alignment functions need special
4015 consideration. */
4016 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4017 return 0;
4018
4019 /* So do interrupt functions that use the frame pointer and Thumb
4020 interrupt functions. */
4021 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4022 return 0;
4023
4024 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4025 && !optimize_function_for_size_p (cfun))
4026 return 0;
4027
4028 offsets = arm_get_frame_offsets ();
4029 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4030
4031 /* As do variadic functions. */
4032 if (crtl->args.pretend_args_size
4033 || cfun->machine->uses_anonymous_args
4034 /* Or if the function calls __builtin_eh_return () */
4035 || crtl->calls_eh_return
4036 /* Or if the function calls alloca */
4037 || cfun->calls_alloca
4038 /* Or if there is a stack adjustment. However, if the stack pointer
4039 is saved on the stack, we can use a pre-incrementing stack load. */
4040 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4041 && stack_adjust == 4))
4042 /* Or if the static chain register was saved above the frame, under the
4043 assumption that the stack pointer isn't saved on the stack. */
4044 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4045 && arm_compute_static_chain_stack_bytes() != 0))
4046 return 0;
4047
4048 saved_int_regs = offsets->saved_regs_mask;
4049
4050 /* Unfortunately, the insn
4051
4052 ldmib sp, {..., sp, ...}
4053
4054 triggers a bug on most SA-110 based devices, such that the stack
4055 pointer won't be correctly restored if the instruction takes a
4056 page fault. We work around this problem by popping r3 along with
4057 the other registers, since that is never slower than executing
4058 another instruction.
4059
4060 We test for !arm_arch5 here, because code for any architecture
4061 less than this could potentially be run on one of the buggy
4062 chips. */
4063 if (stack_adjust == 4 && !arm_arch5 && TARGET_ARM)
4064 {
4065 /* Validate that r3 is a call-clobbered register (always true in
4066 the default abi) ... */
4067 if (!call_used_regs[3])
4068 return 0;
4069
4070 /* ... that it isn't being used for a return value ... */
4071 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4072 return 0;
4073
4074 /* ... or for a tail-call argument ... */
4075 if (sibling)
4076 {
4077 gcc_assert (CALL_P (sibling));
4078
4079 if (find_regno_fusage (sibling, USE, 3))
4080 return 0;
4081 }
4082
4083 /* ... and that there are no call-saved registers in r0-r2
4084 (always true in the default ABI). */
4085 if (saved_int_regs & 0x7)
4086 return 0;
4087 }
4088
4089 /* Can't be done if interworking with Thumb, and any registers have been
4090 stacked. */
4091 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4092 return 0;
4093
4094 /* On StrongARM, conditional returns are expensive if they aren't
4095 taken and multiple registers have been stacked. */
4096 if (iscond && arm_tune_strongarm)
4097 {
4098 /* Conditional return when just the LR is stored is a simple
4099 conditional-load instruction, that's not expensive. */
4100 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4101 return 0;
4102
4103 if (flag_pic
4104 && arm_pic_register != INVALID_REGNUM
4105 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4106 return 0;
4107 }
4108
4109 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4110 several instructions if anything needs to be popped. */
4111 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4112 return 0;
4113
4114 /* If there are saved registers but the LR isn't saved, then we need
4115 two instructions for the return. */
4116 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4117 return 0;
4118
4119 /* Can't be done if any of the VFP regs are pushed,
4120 since this also requires an insn. */
4121 if (TARGET_HARD_FLOAT)
4122 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4123 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4124 return 0;
4125
4126 if (TARGET_REALLY_IWMMXT)
4127 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4128 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4129 return 0;
4130
4131 return 1;
4132 }
4133
4134 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4135 shrink-wrapping if possible. This is the case if we need to emit a
4136 prologue, which we can test by looking at the offsets. */
4137 bool
4138 use_simple_return_p (void)
4139 {
4140 arm_stack_offsets *offsets;
4141
4142 /* Note this function can be called before or after reload. */
4143 if (!reload_completed)
4144 arm_compute_frame_layout ();
4145
4146 offsets = arm_get_frame_offsets ();
4147 return offsets->outgoing_args != 0;
4148 }
4149
4150 /* Return TRUE if int I is a valid immediate ARM constant. */
4151
4152 int
4153 const_ok_for_arm (HOST_WIDE_INT i)
4154 {
4155 int lowbit;
4156
4157 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4158 be all zero, or all one. */
4159 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4160 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4161 != ((~(unsigned HOST_WIDE_INT) 0)
4162 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4163 return FALSE;
4164
4165 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4166
4167 /* Fast return for 0 and small values. We must do this for zero, since
4168 the code below can't handle that one case. */
4169 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4170 return TRUE;
4171
4172 /* Get the number of trailing zeros. */
4173 lowbit = ffs((int) i) - 1;
4174
4175 /* Only even shifts are allowed in ARM mode so round down to the
4176 nearest even number. */
4177 if (TARGET_ARM)
4178 lowbit &= ~1;
4179
4180 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4181 return TRUE;
4182
4183 if (TARGET_ARM)
4184 {
4185 /* Allow rotated constants in ARM mode. */
4186 if (lowbit <= 4
4187 && ((i & ~0xc000003f) == 0
4188 || (i & ~0xf000000f) == 0
4189 || (i & ~0xfc000003) == 0))
4190 return TRUE;
4191 }
4192 else if (TARGET_THUMB2)
4193 {
4194 HOST_WIDE_INT v;
4195
4196 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4197 v = i & 0xff;
4198 v |= v << 16;
4199 if (i == v || i == (v | (v << 8)))
4200 return TRUE;
4201
4202 /* Allow repeated pattern 0xXY00XY00. */
4203 v = i & 0xff00;
4204 v |= v << 16;
4205 if (i == v)
4206 return TRUE;
4207 }
4208 else if (TARGET_HAVE_MOVT)
4209 {
4210 /* Thumb-1 Targets with MOVT. */
4211 if (i > 0xffff)
4212 return FALSE;
4213 else
4214 return TRUE;
4215 }
4216
4217 return FALSE;
4218 }
4219
4220 /* Return true if I is a valid constant for the operation CODE. */
4221 int
4222 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4223 {
4224 if (const_ok_for_arm (i))
4225 return 1;
4226
4227 switch (code)
4228 {
4229 case SET:
4230 /* See if we can use movw. */
4231 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4232 return 1;
4233 else
4234 /* Otherwise, try mvn. */
4235 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4236
4237 case PLUS:
4238 /* See if we can use addw or subw. */
4239 if (TARGET_THUMB2
4240 && ((i & 0xfffff000) == 0
4241 || ((-i) & 0xfffff000) == 0))
4242 return 1;
4243 /* Fall through. */
4244 case COMPARE:
4245 case EQ:
4246 case NE:
4247 case GT:
4248 case LE:
4249 case LT:
4250 case GE:
4251 case GEU:
4252 case LTU:
4253 case GTU:
4254 case LEU:
4255 case UNORDERED:
4256 case ORDERED:
4257 case UNEQ:
4258 case UNGE:
4259 case UNLT:
4260 case UNGT:
4261 case UNLE:
4262 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4263
4264 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4265 case XOR:
4266 return 0;
4267
4268 case IOR:
4269 if (TARGET_THUMB2)
4270 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4271 return 0;
4272
4273 case AND:
4274 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4275
4276 default:
4277 gcc_unreachable ();
4278 }
4279 }
4280
4281 /* Return true if I is a valid di mode constant for the operation CODE. */
4282 int
4283 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4284 {
4285 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4286 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4287 rtx hi = GEN_INT (hi_val);
4288 rtx lo = GEN_INT (lo_val);
4289
4290 if (TARGET_THUMB1)
4291 return 0;
4292
4293 switch (code)
4294 {
4295 case AND:
4296 case IOR:
4297 case XOR:
4298 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4299 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4300 case PLUS:
4301 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4302
4303 default:
4304 return 0;
4305 }
4306 }
4307
4308 /* Emit a sequence of insns to handle a large constant.
4309 CODE is the code of the operation required, it can be any of SET, PLUS,
4310 IOR, AND, XOR, MINUS;
4311 MODE is the mode in which the operation is being performed;
4312 VAL is the integer to operate on;
4313 SOURCE is the other operand (a register, or a null-pointer for SET);
4314 SUBTARGETS means it is safe to create scratch registers if that will
4315 either produce a simpler sequence, or we will want to cse the values.
4316 Return value is the number of insns emitted. */
4317
4318 /* ??? Tweak this for thumb2. */
4319 int
4320 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4321 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4322 {
4323 rtx cond;
4324
4325 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4326 cond = COND_EXEC_TEST (PATTERN (insn));
4327 else
4328 cond = NULL_RTX;
4329
4330 if (subtargets || code == SET
4331 || (REG_P (target) && REG_P (source)
4332 && REGNO (target) != REGNO (source)))
4333 {
4334 /* After arm_reorg has been called, we can't fix up expensive
4335 constants by pushing them into memory so we must synthesize
4336 them in-line, regardless of the cost. This is only likely to
4337 be more costly on chips that have load delay slots and we are
4338 compiling without running the scheduler (so no splitting
4339 occurred before the final instruction emission).
4340
4341 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4342 */
4343 if (!cfun->machine->after_arm_reorg
4344 && !cond
4345 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4346 1, 0)
4347 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4348 + (code != SET))))
4349 {
4350 if (code == SET)
4351 {
4352 /* Currently SET is the only monadic value for CODE, all
4353 the rest are diadic. */
4354 if (TARGET_USE_MOVT)
4355 arm_emit_movpair (target, GEN_INT (val));
4356 else
4357 emit_set_insn (target, GEN_INT (val));
4358
4359 return 1;
4360 }
4361 else
4362 {
4363 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4364
4365 if (TARGET_USE_MOVT)
4366 arm_emit_movpair (temp, GEN_INT (val));
4367 else
4368 emit_set_insn (temp, GEN_INT (val));
4369
4370 /* For MINUS, the value is subtracted from, since we never
4371 have subtraction of a constant. */
4372 if (code == MINUS)
4373 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4374 else
4375 emit_set_insn (target,
4376 gen_rtx_fmt_ee (code, mode, source, temp));
4377 return 2;
4378 }
4379 }
4380 }
4381
4382 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4383 1);
4384 }
4385
4386 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4387 ARM/THUMB2 immediates, and add up to VAL.
4388 Thr function return value gives the number of insns required. */
4389 static int
4390 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4391 struct four_ints *return_sequence)
4392 {
4393 int best_consecutive_zeros = 0;
4394 int i;
4395 int best_start = 0;
4396 int insns1, insns2;
4397 struct four_ints tmp_sequence;
4398
4399 /* If we aren't targeting ARM, the best place to start is always at
4400 the bottom, otherwise look more closely. */
4401 if (TARGET_ARM)
4402 {
4403 for (i = 0; i < 32; i += 2)
4404 {
4405 int consecutive_zeros = 0;
4406
4407 if (!(val & (3 << i)))
4408 {
4409 while ((i < 32) && !(val & (3 << i)))
4410 {
4411 consecutive_zeros += 2;
4412 i += 2;
4413 }
4414 if (consecutive_zeros > best_consecutive_zeros)
4415 {
4416 best_consecutive_zeros = consecutive_zeros;
4417 best_start = i - consecutive_zeros;
4418 }
4419 i -= 2;
4420 }
4421 }
4422 }
4423
4424 /* So long as it won't require any more insns to do so, it's
4425 desirable to emit a small constant (in bits 0...9) in the last
4426 insn. This way there is more chance that it can be combined with
4427 a later addressing insn to form a pre-indexed load or store
4428 operation. Consider:
4429
4430 *((volatile int *)0xe0000100) = 1;
4431 *((volatile int *)0xe0000110) = 2;
4432
4433 We want this to wind up as:
4434
4435 mov rA, #0xe0000000
4436 mov rB, #1
4437 str rB, [rA, #0x100]
4438 mov rB, #2
4439 str rB, [rA, #0x110]
4440
4441 rather than having to synthesize both large constants from scratch.
4442
4443 Therefore, we calculate how many insns would be required to emit
4444 the constant starting from `best_start', and also starting from
4445 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4446 yield a shorter sequence, we may as well use zero. */
4447 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4448 if (best_start != 0
4449 && ((HOST_WIDE_INT_1U << best_start) < val))
4450 {
4451 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4452 if (insns2 <= insns1)
4453 {
4454 *return_sequence = tmp_sequence;
4455 insns1 = insns2;
4456 }
4457 }
4458
4459 return insns1;
4460 }
4461
4462 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4463 static int
4464 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4465 struct four_ints *return_sequence, int i)
4466 {
4467 int remainder = val & 0xffffffff;
4468 int insns = 0;
4469
4470 /* Try and find a way of doing the job in either two or three
4471 instructions.
4472
4473 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4474 location. We start at position I. This may be the MSB, or
4475 optimial_immediate_sequence may have positioned it at the largest block
4476 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4477 wrapping around to the top of the word when we drop off the bottom.
4478 In the worst case this code should produce no more than four insns.
4479
4480 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4481 constants, shifted to any arbitrary location. We should always start
4482 at the MSB. */
4483 do
4484 {
4485 int end;
4486 unsigned int b1, b2, b3, b4;
4487 unsigned HOST_WIDE_INT result;
4488 int loc;
4489
4490 gcc_assert (insns < 4);
4491
4492 if (i <= 0)
4493 i += 32;
4494
4495 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4496 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4497 {
4498 loc = i;
4499 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4500 /* We can use addw/subw for the last 12 bits. */
4501 result = remainder;
4502 else
4503 {
4504 /* Use an 8-bit shifted/rotated immediate. */
4505 end = i - 8;
4506 if (end < 0)
4507 end += 32;
4508 result = remainder & ((0x0ff << end)
4509 | ((i < end) ? (0xff >> (32 - end))
4510 : 0));
4511 i -= 8;
4512 }
4513 }
4514 else
4515 {
4516 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4517 arbitrary shifts. */
4518 i -= TARGET_ARM ? 2 : 1;
4519 continue;
4520 }
4521
4522 /* Next, see if we can do a better job with a thumb2 replicated
4523 constant.
4524
4525 We do it this way around to catch the cases like 0x01F001E0 where
4526 two 8-bit immediates would work, but a replicated constant would
4527 make it worse.
4528
4529 TODO: 16-bit constants that don't clear all the bits, but still win.
4530 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4531 if (TARGET_THUMB2)
4532 {
4533 b1 = (remainder & 0xff000000) >> 24;
4534 b2 = (remainder & 0x00ff0000) >> 16;
4535 b3 = (remainder & 0x0000ff00) >> 8;
4536 b4 = remainder & 0xff;
4537
4538 if (loc > 24)
4539 {
4540 /* The 8-bit immediate already found clears b1 (and maybe b2),
4541 but must leave b3 and b4 alone. */
4542
4543 /* First try to find a 32-bit replicated constant that clears
4544 almost everything. We can assume that we can't do it in one,
4545 or else we wouldn't be here. */
4546 unsigned int tmp = b1 & b2 & b3 & b4;
4547 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4548 + (tmp << 24);
4549 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4550 + (tmp == b3) + (tmp == b4);
4551 if (tmp
4552 && (matching_bytes >= 3
4553 || (matching_bytes == 2
4554 && const_ok_for_op (remainder & ~tmp2, code))))
4555 {
4556 /* At least 3 of the bytes match, and the fourth has at
4557 least as many bits set, or two of the bytes match
4558 and it will only require one more insn to finish. */
4559 result = tmp2;
4560 i = tmp != b1 ? 32
4561 : tmp != b2 ? 24
4562 : tmp != b3 ? 16
4563 : 8;
4564 }
4565
4566 /* Second, try to find a 16-bit replicated constant that can
4567 leave three of the bytes clear. If b2 or b4 is already
4568 zero, then we can. If the 8-bit from above would not
4569 clear b2 anyway, then we still win. */
4570 else if (b1 == b3 && (!b2 || !b4
4571 || (remainder & 0x00ff0000 & ~result)))
4572 {
4573 result = remainder & 0xff00ff00;
4574 i = 24;
4575 }
4576 }
4577 else if (loc > 16)
4578 {
4579 /* The 8-bit immediate already found clears b2 (and maybe b3)
4580 and we don't get here unless b1 is alredy clear, but it will
4581 leave b4 unchanged. */
4582
4583 /* If we can clear b2 and b4 at once, then we win, since the
4584 8-bits couldn't possibly reach that far. */
4585 if (b2 == b4)
4586 {
4587 result = remainder & 0x00ff00ff;
4588 i = 16;
4589 }
4590 }
4591 }
4592
4593 return_sequence->i[insns++] = result;
4594 remainder &= ~result;
4595
4596 if (code == SET || code == MINUS)
4597 code = PLUS;
4598 }
4599 while (remainder);
4600
4601 return insns;
4602 }
4603
4604 /* Emit an instruction with the indicated PATTERN. If COND is
4605 non-NULL, conditionalize the execution of the instruction on COND
4606 being true. */
4607
4608 static void
4609 emit_constant_insn (rtx cond, rtx pattern)
4610 {
4611 if (cond)
4612 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4613 emit_insn (pattern);
4614 }
4615
4616 /* As above, but extra parameter GENERATE which, if clear, suppresses
4617 RTL generation. */
4618
4619 static int
4620 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4621 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4622 int subtargets, int generate)
4623 {
4624 int can_invert = 0;
4625 int can_negate = 0;
4626 int final_invert = 0;
4627 int i;
4628 int set_sign_bit_copies = 0;
4629 int clear_sign_bit_copies = 0;
4630 int clear_zero_bit_copies = 0;
4631 int set_zero_bit_copies = 0;
4632 int insns = 0, neg_insns, inv_insns;
4633 unsigned HOST_WIDE_INT temp1, temp2;
4634 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4635 struct four_ints *immediates;
4636 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4637
4638 /* Find out which operations are safe for a given CODE. Also do a quick
4639 check for degenerate cases; these can occur when DImode operations
4640 are split. */
4641 switch (code)
4642 {
4643 case SET:
4644 can_invert = 1;
4645 break;
4646
4647 case PLUS:
4648 can_negate = 1;
4649 break;
4650
4651 case IOR:
4652 if (remainder == 0xffffffff)
4653 {
4654 if (generate)
4655 emit_constant_insn (cond,
4656 gen_rtx_SET (target,
4657 GEN_INT (ARM_SIGN_EXTEND (val))));
4658 return 1;
4659 }
4660
4661 if (remainder == 0)
4662 {
4663 if (reload_completed && rtx_equal_p (target, source))
4664 return 0;
4665
4666 if (generate)
4667 emit_constant_insn (cond, gen_rtx_SET (target, source));
4668 return 1;
4669 }
4670 break;
4671
4672 case AND:
4673 if (remainder == 0)
4674 {
4675 if (generate)
4676 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4677 return 1;
4678 }
4679 if (remainder == 0xffffffff)
4680 {
4681 if (reload_completed && rtx_equal_p (target, source))
4682 return 0;
4683 if (generate)
4684 emit_constant_insn (cond, gen_rtx_SET (target, source));
4685 return 1;
4686 }
4687 can_invert = 1;
4688 break;
4689
4690 case XOR:
4691 if (remainder == 0)
4692 {
4693 if (reload_completed && rtx_equal_p (target, source))
4694 return 0;
4695 if (generate)
4696 emit_constant_insn (cond, gen_rtx_SET (target, source));
4697 return 1;
4698 }
4699
4700 if (remainder == 0xffffffff)
4701 {
4702 if (generate)
4703 emit_constant_insn (cond,
4704 gen_rtx_SET (target,
4705 gen_rtx_NOT (mode, source)));
4706 return 1;
4707 }
4708 final_invert = 1;
4709 break;
4710
4711 case MINUS:
4712 /* We treat MINUS as (val - source), since (source - val) is always
4713 passed as (source + (-val)). */
4714 if (remainder == 0)
4715 {
4716 if (generate)
4717 emit_constant_insn (cond,
4718 gen_rtx_SET (target,
4719 gen_rtx_NEG (mode, source)));
4720 return 1;
4721 }
4722 if (const_ok_for_arm (val))
4723 {
4724 if (generate)
4725 emit_constant_insn (cond,
4726 gen_rtx_SET (target,
4727 gen_rtx_MINUS (mode, GEN_INT (val),
4728 source)));
4729 return 1;
4730 }
4731
4732 break;
4733
4734 default:
4735 gcc_unreachable ();
4736 }
4737
4738 /* If we can do it in one insn get out quickly. */
4739 if (const_ok_for_op (val, code))
4740 {
4741 if (generate)
4742 emit_constant_insn (cond,
4743 gen_rtx_SET (target,
4744 (source
4745 ? gen_rtx_fmt_ee (code, mode, source,
4746 GEN_INT (val))
4747 : GEN_INT (val))));
4748 return 1;
4749 }
4750
4751 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4752 insn. */
4753 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4754 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4755 {
4756 if (generate)
4757 {
4758 if (mode == SImode && i == 16)
4759 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4760 smaller insn. */
4761 emit_constant_insn (cond,
4762 gen_zero_extendhisi2
4763 (target, gen_lowpart (HImode, source)));
4764 else
4765 /* Extz only supports SImode, but we can coerce the operands
4766 into that mode. */
4767 emit_constant_insn (cond,
4768 gen_extzv_t2 (gen_lowpart (SImode, target),
4769 gen_lowpart (SImode, source),
4770 GEN_INT (i), const0_rtx));
4771 }
4772
4773 return 1;
4774 }
4775
4776 /* Calculate a few attributes that may be useful for specific
4777 optimizations. */
4778 /* Count number of leading zeros. */
4779 for (i = 31; i >= 0; i--)
4780 {
4781 if ((remainder & (1 << i)) == 0)
4782 clear_sign_bit_copies++;
4783 else
4784 break;
4785 }
4786
4787 /* Count number of leading 1's. */
4788 for (i = 31; i >= 0; i--)
4789 {
4790 if ((remainder & (1 << i)) != 0)
4791 set_sign_bit_copies++;
4792 else
4793 break;
4794 }
4795
4796 /* Count number of trailing zero's. */
4797 for (i = 0; i <= 31; i++)
4798 {
4799 if ((remainder & (1 << i)) == 0)
4800 clear_zero_bit_copies++;
4801 else
4802 break;
4803 }
4804
4805 /* Count number of trailing 1's. */
4806 for (i = 0; i <= 31; i++)
4807 {
4808 if ((remainder & (1 << i)) != 0)
4809 set_zero_bit_copies++;
4810 else
4811 break;
4812 }
4813
4814 switch (code)
4815 {
4816 case SET:
4817 /* See if we can do this by sign_extending a constant that is known
4818 to be negative. This is a good, way of doing it, since the shift
4819 may well merge into a subsequent insn. */
4820 if (set_sign_bit_copies > 1)
4821 {
4822 if (const_ok_for_arm
4823 (temp1 = ARM_SIGN_EXTEND (remainder
4824 << (set_sign_bit_copies - 1))))
4825 {
4826 if (generate)
4827 {
4828 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4829 emit_constant_insn (cond,
4830 gen_rtx_SET (new_src, GEN_INT (temp1)));
4831 emit_constant_insn (cond,
4832 gen_ashrsi3 (target, new_src,
4833 GEN_INT (set_sign_bit_copies - 1)));
4834 }
4835 return 2;
4836 }
4837 /* For an inverted constant, we will need to set the low bits,
4838 these will be shifted out of harm's way. */
4839 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4840 if (const_ok_for_arm (~temp1))
4841 {
4842 if (generate)
4843 {
4844 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4845 emit_constant_insn (cond,
4846 gen_rtx_SET (new_src, GEN_INT (temp1)));
4847 emit_constant_insn (cond,
4848 gen_ashrsi3 (target, new_src,
4849 GEN_INT (set_sign_bit_copies - 1)));
4850 }
4851 return 2;
4852 }
4853 }
4854
4855 /* See if we can calculate the value as the difference between two
4856 valid immediates. */
4857 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4858 {
4859 int topshift = clear_sign_bit_copies & ~1;
4860
4861 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4862 & (0xff000000 >> topshift));
4863
4864 /* If temp1 is zero, then that means the 9 most significant
4865 bits of remainder were 1 and we've caused it to overflow.
4866 When topshift is 0 we don't need to do anything since we
4867 can borrow from 'bit 32'. */
4868 if (temp1 == 0 && topshift != 0)
4869 temp1 = 0x80000000 >> (topshift - 1);
4870
4871 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4872
4873 if (const_ok_for_arm (temp2))
4874 {
4875 if (generate)
4876 {
4877 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4878 emit_constant_insn (cond,
4879 gen_rtx_SET (new_src, GEN_INT (temp1)));
4880 emit_constant_insn (cond,
4881 gen_addsi3 (target, new_src,
4882 GEN_INT (-temp2)));
4883 }
4884
4885 return 2;
4886 }
4887 }
4888
4889 /* See if we can generate this by setting the bottom (or the top)
4890 16 bits, and then shifting these into the other half of the
4891 word. We only look for the simplest cases, to do more would cost
4892 too much. Be careful, however, not to generate this when the
4893 alternative would take fewer insns. */
4894 if (val & 0xffff0000)
4895 {
4896 temp1 = remainder & 0xffff0000;
4897 temp2 = remainder & 0x0000ffff;
4898
4899 /* Overlaps outside this range are best done using other methods. */
4900 for (i = 9; i < 24; i++)
4901 {
4902 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4903 && !const_ok_for_arm (temp2))
4904 {
4905 rtx new_src = (subtargets
4906 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4907 : target);
4908 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4909 source, subtargets, generate);
4910 source = new_src;
4911 if (generate)
4912 emit_constant_insn
4913 (cond,
4914 gen_rtx_SET
4915 (target,
4916 gen_rtx_IOR (mode,
4917 gen_rtx_ASHIFT (mode, source,
4918 GEN_INT (i)),
4919 source)));
4920 return insns + 1;
4921 }
4922 }
4923
4924 /* Don't duplicate cases already considered. */
4925 for (i = 17; i < 24; i++)
4926 {
4927 if (((temp1 | (temp1 >> i)) == remainder)
4928 && !const_ok_for_arm (temp1))
4929 {
4930 rtx new_src = (subtargets
4931 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4932 : target);
4933 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4934 source, subtargets, generate);
4935 source = new_src;
4936 if (generate)
4937 emit_constant_insn
4938 (cond,
4939 gen_rtx_SET (target,
4940 gen_rtx_IOR
4941 (mode,
4942 gen_rtx_LSHIFTRT (mode, source,
4943 GEN_INT (i)),
4944 source)));
4945 return insns + 1;
4946 }
4947 }
4948 }
4949 break;
4950
4951 case IOR:
4952 case XOR:
4953 /* If we have IOR or XOR, and the constant can be loaded in a
4954 single instruction, and we can find a temporary to put it in,
4955 then this can be done in two instructions instead of 3-4. */
4956 if (subtargets
4957 /* TARGET can't be NULL if SUBTARGETS is 0 */
4958 || (reload_completed && !reg_mentioned_p (target, source)))
4959 {
4960 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4961 {
4962 if (generate)
4963 {
4964 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4965
4966 emit_constant_insn (cond,
4967 gen_rtx_SET (sub, GEN_INT (val)));
4968 emit_constant_insn (cond,
4969 gen_rtx_SET (target,
4970 gen_rtx_fmt_ee (code, mode,
4971 source, sub)));
4972 }
4973 return 2;
4974 }
4975 }
4976
4977 if (code == XOR)
4978 break;
4979
4980 /* Convert.
4981 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4982 and the remainder 0s for e.g. 0xfff00000)
4983 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4984
4985 This can be done in 2 instructions by using shifts with mov or mvn.
4986 e.g. for
4987 x = x | 0xfff00000;
4988 we generate.
4989 mvn r0, r0, asl #12
4990 mvn r0, r0, lsr #12 */
4991 if (set_sign_bit_copies > 8
4992 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4993 {
4994 if (generate)
4995 {
4996 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4997 rtx shift = GEN_INT (set_sign_bit_copies);
4998
4999 emit_constant_insn
5000 (cond,
5001 gen_rtx_SET (sub,
5002 gen_rtx_NOT (mode,
5003 gen_rtx_ASHIFT (mode,
5004 source,
5005 shift))));
5006 emit_constant_insn
5007 (cond,
5008 gen_rtx_SET (target,
5009 gen_rtx_NOT (mode,
5010 gen_rtx_LSHIFTRT (mode, sub,
5011 shift))));
5012 }
5013 return 2;
5014 }
5015
5016 /* Convert
5017 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5018 to
5019 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5020
5021 For eg. r0 = r0 | 0xfff
5022 mvn r0, r0, lsr #12
5023 mvn r0, r0, asl #12
5024
5025 */
5026 if (set_zero_bit_copies > 8
5027 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5028 {
5029 if (generate)
5030 {
5031 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5032 rtx shift = GEN_INT (set_zero_bit_copies);
5033
5034 emit_constant_insn
5035 (cond,
5036 gen_rtx_SET (sub,
5037 gen_rtx_NOT (mode,
5038 gen_rtx_LSHIFTRT (mode,
5039 source,
5040 shift))));
5041 emit_constant_insn
5042 (cond,
5043 gen_rtx_SET (target,
5044 gen_rtx_NOT (mode,
5045 gen_rtx_ASHIFT (mode, sub,
5046 shift))));
5047 }
5048 return 2;
5049 }
5050
5051 /* This will never be reached for Thumb2 because orn is a valid
5052 instruction. This is for Thumb1 and the ARM 32 bit cases.
5053
5054 x = y | constant (such that ~constant is a valid constant)
5055 Transform this to
5056 x = ~(~y & ~constant).
5057 */
5058 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5059 {
5060 if (generate)
5061 {
5062 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5063 emit_constant_insn (cond,
5064 gen_rtx_SET (sub,
5065 gen_rtx_NOT (mode, source)));
5066 source = sub;
5067 if (subtargets)
5068 sub = gen_reg_rtx (mode);
5069 emit_constant_insn (cond,
5070 gen_rtx_SET (sub,
5071 gen_rtx_AND (mode, source,
5072 GEN_INT (temp1))));
5073 emit_constant_insn (cond,
5074 gen_rtx_SET (target,
5075 gen_rtx_NOT (mode, sub)));
5076 }
5077 return 3;
5078 }
5079 break;
5080
5081 case AND:
5082 /* See if two shifts will do 2 or more insn's worth of work. */
5083 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5084 {
5085 HOST_WIDE_INT shift_mask = ((0xffffffff
5086 << (32 - clear_sign_bit_copies))
5087 & 0xffffffff);
5088
5089 if ((remainder | shift_mask) != 0xffffffff)
5090 {
5091 HOST_WIDE_INT new_val
5092 = ARM_SIGN_EXTEND (remainder | shift_mask);
5093
5094 if (generate)
5095 {
5096 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5097 insns = arm_gen_constant (AND, SImode, cond, new_val,
5098 new_src, source, subtargets, 1);
5099 source = new_src;
5100 }
5101 else
5102 {
5103 rtx targ = subtargets ? NULL_RTX : target;
5104 insns = arm_gen_constant (AND, mode, cond, new_val,
5105 targ, source, subtargets, 0);
5106 }
5107 }
5108
5109 if (generate)
5110 {
5111 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5112 rtx shift = GEN_INT (clear_sign_bit_copies);
5113
5114 emit_insn (gen_ashlsi3 (new_src, source, shift));
5115 emit_insn (gen_lshrsi3 (target, new_src, shift));
5116 }
5117
5118 return insns + 2;
5119 }
5120
5121 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5122 {
5123 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5124
5125 if ((remainder | shift_mask) != 0xffffffff)
5126 {
5127 HOST_WIDE_INT new_val
5128 = ARM_SIGN_EXTEND (remainder | shift_mask);
5129 if (generate)
5130 {
5131 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5132
5133 insns = arm_gen_constant (AND, mode, cond, new_val,
5134 new_src, source, subtargets, 1);
5135 source = new_src;
5136 }
5137 else
5138 {
5139 rtx targ = subtargets ? NULL_RTX : target;
5140
5141 insns = arm_gen_constant (AND, mode, cond, new_val,
5142 targ, source, subtargets, 0);
5143 }
5144 }
5145
5146 if (generate)
5147 {
5148 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5149 rtx shift = GEN_INT (clear_zero_bit_copies);
5150
5151 emit_insn (gen_lshrsi3 (new_src, source, shift));
5152 emit_insn (gen_ashlsi3 (target, new_src, shift));
5153 }
5154
5155 return insns + 2;
5156 }
5157
5158 break;
5159
5160 default:
5161 break;
5162 }
5163
5164 /* Calculate what the instruction sequences would be if we generated it
5165 normally, negated, or inverted. */
5166 if (code == AND)
5167 /* AND cannot be split into multiple insns, so invert and use BIC. */
5168 insns = 99;
5169 else
5170 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5171
5172 if (can_negate)
5173 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5174 &neg_immediates);
5175 else
5176 neg_insns = 99;
5177
5178 if (can_invert || final_invert)
5179 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5180 &inv_immediates);
5181 else
5182 inv_insns = 99;
5183
5184 immediates = &pos_immediates;
5185
5186 /* Is the negated immediate sequence more efficient? */
5187 if (neg_insns < insns && neg_insns <= inv_insns)
5188 {
5189 insns = neg_insns;
5190 immediates = &neg_immediates;
5191 }
5192 else
5193 can_negate = 0;
5194
5195 /* Is the inverted immediate sequence more efficient?
5196 We must allow for an extra NOT instruction for XOR operations, although
5197 there is some chance that the final 'mvn' will get optimized later. */
5198 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5199 {
5200 insns = inv_insns;
5201 immediates = &inv_immediates;
5202 }
5203 else
5204 {
5205 can_invert = 0;
5206 final_invert = 0;
5207 }
5208
5209 /* Now output the chosen sequence as instructions. */
5210 if (generate)
5211 {
5212 for (i = 0; i < insns; i++)
5213 {
5214 rtx new_src, temp1_rtx;
5215
5216 temp1 = immediates->i[i];
5217
5218 if (code == SET || code == MINUS)
5219 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5220 else if ((final_invert || i < (insns - 1)) && subtargets)
5221 new_src = gen_reg_rtx (mode);
5222 else
5223 new_src = target;
5224
5225 if (can_invert)
5226 temp1 = ~temp1;
5227 else if (can_negate)
5228 temp1 = -temp1;
5229
5230 temp1 = trunc_int_for_mode (temp1, mode);
5231 temp1_rtx = GEN_INT (temp1);
5232
5233 if (code == SET)
5234 ;
5235 else if (code == MINUS)
5236 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5237 else
5238 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5239
5240 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5241 source = new_src;
5242
5243 if (code == SET)
5244 {
5245 can_negate = can_invert;
5246 can_invert = 0;
5247 code = PLUS;
5248 }
5249 else if (code == MINUS)
5250 code = PLUS;
5251 }
5252 }
5253
5254 if (final_invert)
5255 {
5256 if (generate)
5257 emit_constant_insn (cond, gen_rtx_SET (target,
5258 gen_rtx_NOT (mode, source)));
5259 insns++;
5260 }
5261
5262 return insns;
5263 }
5264
5265 /* Canonicalize a comparison so that we are more likely to recognize it.
5266 This can be done for a few constant compares, where we can make the
5267 immediate value easier to load. */
5268
5269 static void
5270 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5271 bool op0_preserve_value)
5272 {
5273 machine_mode mode;
5274 unsigned HOST_WIDE_INT i, maxval;
5275
5276 mode = GET_MODE (*op0);
5277 if (mode == VOIDmode)
5278 mode = GET_MODE (*op1);
5279
5280 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5281
5282 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5283 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5284 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5285 for GTU/LEU in Thumb mode. */
5286 if (mode == DImode)
5287 {
5288
5289 if (*code == GT || *code == LE
5290 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5291 {
5292 /* Missing comparison. First try to use an available
5293 comparison. */
5294 if (CONST_INT_P (*op1))
5295 {
5296 i = INTVAL (*op1);
5297 switch (*code)
5298 {
5299 case GT:
5300 case LE:
5301 if (i != maxval
5302 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5303 {
5304 *op1 = GEN_INT (i + 1);
5305 *code = *code == GT ? GE : LT;
5306 return;
5307 }
5308 break;
5309 case GTU:
5310 case LEU:
5311 if (i != ~((unsigned HOST_WIDE_INT) 0)
5312 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5313 {
5314 *op1 = GEN_INT (i + 1);
5315 *code = *code == GTU ? GEU : LTU;
5316 return;
5317 }
5318 break;
5319 default:
5320 gcc_unreachable ();
5321 }
5322 }
5323
5324 /* If that did not work, reverse the condition. */
5325 if (!op0_preserve_value)
5326 {
5327 std::swap (*op0, *op1);
5328 *code = (int)swap_condition ((enum rtx_code)*code);
5329 }
5330 }
5331 return;
5332 }
5333
5334 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5335 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5336 to facilitate possible combining with a cmp into 'ands'. */
5337 if (mode == SImode
5338 && GET_CODE (*op0) == ZERO_EXTEND
5339 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5340 && GET_MODE (XEXP (*op0, 0)) == QImode
5341 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5342 && subreg_lowpart_p (XEXP (*op0, 0))
5343 && *op1 == const0_rtx)
5344 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5345 GEN_INT (255));
5346
5347 /* Comparisons smaller than DImode. Only adjust comparisons against
5348 an out-of-range constant. */
5349 if (!CONST_INT_P (*op1)
5350 || const_ok_for_arm (INTVAL (*op1))
5351 || const_ok_for_arm (- INTVAL (*op1)))
5352 return;
5353
5354 i = INTVAL (*op1);
5355
5356 switch (*code)
5357 {
5358 case EQ:
5359 case NE:
5360 return;
5361
5362 case GT:
5363 case LE:
5364 if (i != maxval
5365 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5366 {
5367 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5368 *code = *code == GT ? GE : LT;
5369 return;
5370 }
5371 break;
5372
5373 case GE:
5374 case LT:
5375 if (i != ~maxval
5376 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5377 {
5378 *op1 = GEN_INT (i - 1);
5379 *code = *code == GE ? GT : LE;
5380 return;
5381 }
5382 break;
5383
5384 case GTU:
5385 case LEU:
5386 if (i != ~((unsigned HOST_WIDE_INT) 0)
5387 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5388 {
5389 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5390 *code = *code == GTU ? GEU : LTU;
5391 return;
5392 }
5393 break;
5394
5395 case GEU:
5396 case LTU:
5397 if (i != 0
5398 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5399 {
5400 *op1 = GEN_INT (i - 1);
5401 *code = *code == GEU ? GTU : LEU;
5402 return;
5403 }
5404 break;
5405
5406 default:
5407 gcc_unreachable ();
5408 }
5409 }
5410
5411
5412 /* Define how to find the value returned by a function. */
5413
5414 static rtx
5415 arm_function_value(const_tree type, const_tree func,
5416 bool outgoing ATTRIBUTE_UNUSED)
5417 {
5418 machine_mode mode;
5419 int unsignedp ATTRIBUTE_UNUSED;
5420 rtx r ATTRIBUTE_UNUSED;
5421
5422 mode = TYPE_MODE (type);
5423
5424 if (TARGET_AAPCS_BASED)
5425 return aapcs_allocate_return_reg (mode, type, func);
5426
5427 /* Promote integer types. */
5428 if (INTEGRAL_TYPE_P (type))
5429 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5430
5431 /* Promotes small structs returned in a register to full-word size
5432 for big-endian AAPCS. */
5433 if (arm_return_in_msb (type))
5434 {
5435 HOST_WIDE_INT size = int_size_in_bytes (type);
5436 if (size % UNITS_PER_WORD != 0)
5437 {
5438 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5439 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5440 }
5441 }
5442
5443 return arm_libcall_value_1 (mode);
5444 }
5445
5446 /* libcall hashtable helpers. */
5447
5448 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5449 {
5450 static inline hashval_t hash (const rtx_def *);
5451 static inline bool equal (const rtx_def *, const rtx_def *);
5452 static inline void remove (rtx_def *);
5453 };
5454
5455 inline bool
5456 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5457 {
5458 return rtx_equal_p (p1, p2);
5459 }
5460
5461 inline hashval_t
5462 libcall_hasher::hash (const rtx_def *p1)
5463 {
5464 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5465 }
5466
5467 typedef hash_table<libcall_hasher> libcall_table_type;
5468
5469 static void
5470 add_libcall (libcall_table_type *htab, rtx libcall)
5471 {
5472 *htab->find_slot (libcall, INSERT) = libcall;
5473 }
5474
5475 static bool
5476 arm_libcall_uses_aapcs_base (const_rtx libcall)
5477 {
5478 static bool init_done = false;
5479 static libcall_table_type *libcall_htab = NULL;
5480
5481 if (!init_done)
5482 {
5483 init_done = true;
5484
5485 libcall_htab = new libcall_table_type (31);
5486 add_libcall (libcall_htab,
5487 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5488 add_libcall (libcall_htab,
5489 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5490 add_libcall (libcall_htab,
5491 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5492 add_libcall (libcall_htab,
5493 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5494
5495 add_libcall (libcall_htab,
5496 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5497 add_libcall (libcall_htab,
5498 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5499 add_libcall (libcall_htab,
5500 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5501 add_libcall (libcall_htab,
5502 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5503
5504 add_libcall (libcall_htab,
5505 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5506 add_libcall (libcall_htab,
5507 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5508 add_libcall (libcall_htab,
5509 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5510 add_libcall (libcall_htab,
5511 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5512 add_libcall (libcall_htab,
5513 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5514 add_libcall (libcall_htab,
5515 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5516 add_libcall (libcall_htab,
5517 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5518 add_libcall (libcall_htab,
5519 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5520
5521 /* Values from double-precision helper functions are returned in core
5522 registers if the selected core only supports single-precision
5523 arithmetic, even if we are using the hard-float ABI. The same is
5524 true for single-precision helpers, but we will never be using the
5525 hard-float ABI on a CPU which doesn't support single-precision
5526 operations in hardware. */
5527 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5528 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5529 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5530 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5531 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5532 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5533 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5534 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5535 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5536 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5537 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5538 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5539 SFmode));
5540 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5541 DFmode));
5542 add_libcall (libcall_htab,
5543 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5544 }
5545
5546 return libcall && libcall_htab->find (libcall) != NULL;
5547 }
5548
5549 static rtx
5550 arm_libcall_value_1 (machine_mode mode)
5551 {
5552 if (TARGET_AAPCS_BASED)
5553 return aapcs_libcall_value (mode);
5554 else if (TARGET_IWMMXT_ABI
5555 && arm_vector_mode_supported_p (mode))
5556 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5557 else
5558 return gen_rtx_REG (mode, ARG_REGISTER (1));
5559 }
5560
5561 /* Define how to find the value returned by a library function
5562 assuming the value has mode MODE. */
5563
5564 static rtx
5565 arm_libcall_value (machine_mode mode, const_rtx libcall)
5566 {
5567 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5568 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5569 {
5570 /* The following libcalls return their result in integer registers,
5571 even though they return a floating point value. */
5572 if (arm_libcall_uses_aapcs_base (libcall))
5573 return gen_rtx_REG (mode, ARG_REGISTER(1));
5574
5575 }
5576
5577 return arm_libcall_value_1 (mode);
5578 }
5579
5580 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5581
5582 static bool
5583 arm_function_value_regno_p (const unsigned int regno)
5584 {
5585 if (regno == ARG_REGISTER (1)
5586 || (TARGET_32BIT
5587 && TARGET_AAPCS_BASED
5588 && TARGET_HARD_FLOAT
5589 && regno == FIRST_VFP_REGNUM)
5590 || (TARGET_IWMMXT_ABI
5591 && regno == FIRST_IWMMXT_REGNUM))
5592 return true;
5593
5594 return false;
5595 }
5596
5597 /* Determine the amount of memory needed to store the possible return
5598 registers of an untyped call. */
5599 int
5600 arm_apply_result_size (void)
5601 {
5602 int size = 16;
5603
5604 if (TARGET_32BIT)
5605 {
5606 if (TARGET_HARD_FLOAT_ABI)
5607 size += 32;
5608 if (TARGET_IWMMXT_ABI)
5609 size += 8;
5610 }
5611
5612 return size;
5613 }
5614
5615 /* Decide whether TYPE should be returned in memory (true)
5616 or in a register (false). FNTYPE is the type of the function making
5617 the call. */
5618 static bool
5619 arm_return_in_memory (const_tree type, const_tree fntype)
5620 {
5621 HOST_WIDE_INT size;
5622
5623 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5624
5625 if (TARGET_AAPCS_BASED)
5626 {
5627 /* Simple, non-aggregate types (ie not including vectors and
5628 complex) are always returned in a register (or registers).
5629 We don't care about which register here, so we can short-cut
5630 some of the detail. */
5631 if (!AGGREGATE_TYPE_P (type)
5632 && TREE_CODE (type) != VECTOR_TYPE
5633 && TREE_CODE (type) != COMPLEX_TYPE)
5634 return false;
5635
5636 /* Any return value that is no larger than one word can be
5637 returned in r0. */
5638 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5639 return false;
5640
5641 /* Check any available co-processors to see if they accept the
5642 type as a register candidate (VFP, for example, can return
5643 some aggregates in consecutive registers). These aren't
5644 available if the call is variadic. */
5645 if (aapcs_select_return_coproc (type, fntype) >= 0)
5646 return false;
5647
5648 /* Vector values should be returned using ARM registers, not
5649 memory (unless they're over 16 bytes, which will break since
5650 we only have four call-clobbered registers to play with). */
5651 if (TREE_CODE (type) == VECTOR_TYPE)
5652 return (size < 0 || size > (4 * UNITS_PER_WORD));
5653
5654 /* The rest go in memory. */
5655 return true;
5656 }
5657
5658 if (TREE_CODE (type) == VECTOR_TYPE)
5659 return (size < 0 || size > (4 * UNITS_PER_WORD));
5660
5661 if (!AGGREGATE_TYPE_P (type) &&
5662 (TREE_CODE (type) != VECTOR_TYPE))
5663 /* All simple types are returned in registers. */
5664 return false;
5665
5666 if (arm_abi != ARM_ABI_APCS)
5667 {
5668 /* ATPCS and later return aggregate types in memory only if they are
5669 larger than a word (or are variable size). */
5670 return (size < 0 || size > UNITS_PER_WORD);
5671 }
5672
5673 /* For the arm-wince targets we choose to be compatible with Microsoft's
5674 ARM and Thumb compilers, which always return aggregates in memory. */
5675 #ifndef ARM_WINCE
5676 /* All structures/unions bigger than one word are returned in memory.
5677 Also catch the case where int_size_in_bytes returns -1. In this case
5678 the aggregate is either huge or of variable size, and in either case
5679 we will want to return it via memory and not in a register. */
5680 if (size < 0 || size > UNITS_PER_WORD)
5681 return true;
5682
5683 if (TREE_CODE (type) == RECORD_TYPE)
5684 {
5685 tree field;
5686
5687 /* For a struct the APCS says that we only return in a register
5688 if the type is 'integer like' and every addressable element
5689 has an offset of zero. For practical purposes this means
5690 that the structure can have at most one non bit-field element
5691 and that this element must be the first one in the structure. */
5692
5693 /* Find the first field, ignoring non FIELD_DECL things which will
5694 have been created by C++. */
5695 for (field = TYPE_FIELDS (type);
5696 field && TREE_CODE (field) != FIELD_DECL;
5697 field = DECL_CHAIN (field))
5698 continue;
5699
5700 if (field == NULL)
5701 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5702
5703 /* Check that the first field is valid for returning in a register. */
5704
5705 /* ... Floats are not allowed */
5706 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5707 return true;
5708
5709 /* ... Aggregates that are not themselves valid for returning in
5710 a register are not allowed. */
5711 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5712 return true;
5713
5714 /* Now check the remaining fields, if any. Only bitfields are allowed,
5715 since they are not addressable. */
5716 for (field = DECL_CHAIN (field);
5717 field;
5718 field = DECL_CHAIN (field))
5719 {
5720 if (TREE_CODE (field) != FIELD_DECL)
5721 continue;
5722
5723 if (!DECL_BIT_FIELD_TYPE (field))
5724 return true;
5725 }
5726
5727 return false;
5728 }
5729
5730 if (TREE_CODE (type) == UNION_TYPE)
5731 {
5732 tree field;
5733
5734 /* Unions can be returned in registers if every element is
5735 integral, or can be returned in an integer register. */
5736 for (field = TYPE_FIELDS (type);
5737 field;
5738 field = DECL_CHAIN (field))
5739 {
5740 if (TREE_CODE (field) != FIELD_DECL)
5741 continue;
5742
5743 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5744 return true;
5745
5746 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5747 return true;
5748 }
5749
5750 return false;
5751 }
5752 #endif /* not ARM_WINCE */
5753
5754 /* Return all other types in memory. */
5755 return true;
5756 }
5757
5758 const struct pcs_attribute_arg
5759 {
5760 const char *arg;
5761 enum arm_pcs value;
5762 } pcs_attribute_args[] =
5763 {
5764 {"aapcs", ARM_PCS_AAPCS},
5765 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5766 #if 0
5767 /* We could recognize these, but changes would be needed elsewhere
5768 * to implement them. */
5769 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5770 {"atpcs", ARM_PCS_ATPCS},
5771 {"apcs", ARM_PCS_APCS},
5772 #endif
5773 {NULL, ARM_PCS_UNKNOWN}
5774 };
5775
5776 static enum arm_pcs
5777 arm_pcs_from_attribute (tree attr)
5778 {
5779 const struct pcs_attribute_arg *ptr;
5780 const char *arg;
5781
5782 /* Get the value of the argument. */
5783 if (TREE_VALUE (attr) == NULL_TREE
5784 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5785 return ARM_PCS_UNKNOWN;
5786
5787 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5788
5789 /* Check it against the list of known arguments. */
5790 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5791 if (streq (arg, ptr->arg))
5792 return ptr->value;
5793
5794 /* An unrecognized interrupt type. */
5795 return ARM_PCS_UNKNOWN;
5796 }
5797
5798 /* Get the PCS variant to use for this call. TYPE is the function's type
5799 specification, DECL is the specific declartion. DECL may be null if
5800 the call could be indirect or if this is a library call. */
5801 static enum arm_pcs
5802 arm_get_pcs_model (const_tree type, const_tree decl)
5803 {
5804 bool user_convention = false;
5805 enum arm_pcs user_pcs = arm_pcs_default;
5806 tree attr;
5807
5808 gcc_assert (type);
5809
5810 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5811 if (attr)
5812 {
5813 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5814 user_convention = true;
5815 }
5816
5817 if (TARGET_AAPCS_BASED)
5818 {
5819 /* Detect varargs functions. These always use the base rules
5820 (no argument is ever a candidate for a co-processor
5821 register). */
5822 bool base_rules = stdarg_p (type);
5823
5824 if (user_convention)
5825 {
5826 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5827 sorry ("non-AAPCS derived PCS variant");
5828 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5829 error ("variadic functions must use the base AAPCS variant");
5830 }
5831
5832 if (base_rules)
5833 return ARM_PCS_AAPCS;
5834 else if (user_convention)
5835 return user_pcs;
5836 else if (decl && flag_unit_at_a_time)
5837 {
5838 /* Local functions never leak outside this compilation unit,
5839 so we are free to use whatever conventions are
5840 appropriate. */
5841 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5842 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5843 if (i && i->local)
5844 return ARM_PCS_AAPCS_LOCAL;
5845 }
5846 }
5847 else if (user_convention && user_pcs != arm_pcs_default)
5848 sorry ("PCS variant");
5849
5850 /* For everything else we use the target's default. */
5851 return arm_pcs_default;
5852 }
5853
5854
5855 static void
5856 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5857 const_tree fntype ATTRIBUTE_UNUSED,
5858 rtx libcall ATTRIBUTE_UNUSED,
5859 const_tree fndecl ATTRIBUTE_UNUSED)
5860 {
5861 /* Record the unallocated VFP registers. */
5862 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5863 pcum->aapcs_vfp_reg_alloc = 0;
5864 }
5865
5866 /* Walk down the type tree of TYPE counting consecutive base elements.
5867 If *MODEP is VOIDmode, then set it to the first valid floating point
5868 type. If a non-floating point type is found, or if a floating point
5869 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5870 otherwise return the count in the sub-tree. */
5871 static int
5872 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5873 {
5874 machine_mode mode;
5875 HOST_WIDE_INT size;
5876
5877 switch (TREE_CODE (type))
5878 {
5879 case REAL_TYPE:
5880 mode = TYPE_MODE (type);
5881 if (mode != DFmode && mode != SFmode && mode != HFmode)
5882 return -1;
5883
5884 if (*modep == VOIDmode)
5885 *modep = mode;
5886
5887 if (*modep == mode)
5888 return 1;
5889
5890 break;
5891
5892 case COMPLEX_TYPE:
5893 mode = TYPE_MODE (TREE_TYPE (type));
5894 if (mode != DFmode && mode != SFmode)
5895 return -1;
5896
5897 if (*modep == VOIDmode)
5898 *modep = mode;
5899
5900 if (*modep == mode)
5901 return 2;
5902
5903 break;
5904
5905 case VECTOR_TYPE:
5906 /* Use V2SImode and V4SImode as representatives of all 64-bit
5907 and 128-bit vector types, whether or not those modes are
5908 supported with the present options. */
5909 size = int_size_in_bytes (type);
5910 switch (size)
5911 {
5912 case 8:
5913 mode = V2SImode;
5914 break;
5915 case 16:
5916 mode = V4SImode;
5917 break;
5918 default:
5919 return -1;
5920 }
5921
5922 if (*modep == VOIDmode)
5923 *modep = mode;
5924
5925 /* Vector modes are considered to be opaque: two vectors are
5926 equivalent for the purposes of being homogeneous aggregates
5927 if they are the same size. */
5928 if (*modep == mode)
5929 return 1;
5930
5931 break;
5932
5933 case ARRAY_TYPE:
5934 {
5935 int count;
5936 tree index = TYPE_DOMAIN (type);
5937
5938 /* Can't handle incomplete types nor sizes that are not
5939 fixed. */
5940 if (!COMPLETE_TYPE_P (type)
5941 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5942 return -1;
5943
5944 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5945 if (count == -1
5946 || !index
5947 || !TYPE_MAX_VALUE (index)
5948 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5949 || !TYPE_MIN_VALUE (index)
5950 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5951 || count < 0)
5952 return -1;
5953
5954 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5955 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5956
5957 /* There must be no padding. */
5958 if (wi::to_wide (TYPE_SIZE (type))
5959 != count * GET_MODE_BITSIZE (*modep))
5960 return -1;
5961
5962 return count;
5963 }
5964
5965 case RECORD_TYPE:
5966 {
5967 int count = 0;
5968 int sub_count;
5969 tree field;
5970
5971 /* Can't handle incomplete types nor sizes that are not
5972 fixed. */
5973 if (!COMPLETE_TYPE_P (type)
5974 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5975 return -1;
5976
5977 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5978 {
5979 if (TREE_CODE (field) != FIELD_DECL)
5980 continue;
5981
5982 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5983 if (sub_count < 0)
5984 return -1;
5985 count += sub_count;
5986 }
5987
5988 /* There must be no padding. */
5989 if (wi::to_wide (TYPE_SIZE (type))
5990 != count * GET_MODE_BITSIZE (*modep))
5991 return -1;
5992
5993 return count;
5994 }
5995
5996 case UNION_TYPE:
5997 case QUAL_UNION_TYPE:
5998 {
5999 /* These aren't very interesting except in a degenerate case. */
6000 int count = 0;
6001 int sub_count;
6002 tree field;
6003
6004 /* Can't handle incomplete types nor sizes that are not
6005 fixed. */
6006 if (!COMPLETE_TYPE_P (type)
6007 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6008 return -1;
6009
6010 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6011 {
6012 if (TREE_CODE (field) != FIELD_DECL)
6013 continue;
6014
6015 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6016 if (sub_count < 0)
6017 return -1;
6018 count = count > sub_count ? count : sub_count;
6019 }
6020
6021 /* There must be no padding. */
6022 if (wi::to_wide (TYPE_SIZE (type))
6023 != count * GET_MODE_BITSIZE (*modep))
6024 return -1;
6025
6026 return count;
6027 }
6028
6029 default:
6030 break;
6031 }
6032
6033 return -1;
6034 }
6035
6036 /* Return true if PCS_VARIANT should use VFP registers. */
6037 static bool
6038 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6039 {
6040 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6041 {
6042 static bool seen_thumb1_vfp = false;
6043
6044 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6045 {
6046 sorry ("Thumb-1 hard-float VFP ABI");
6047 /* sorry() is not immediately fatal, so only display this once. */
6048 seen_thumb1_vfp = true;
6049 }
6050
6051 return true;
6052 }
6053
6054 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6055 return false;
6056
6057 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6058 (TARGET_VFP_DOUBLE || !is_double));
6059 }
6060
6061 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6062 suitable for passing or returning in VFP registers for the PCS
6063 variant selected. If it is, then *BASE_MODE is updated to contain
6064 a machine mode describing each element of the argument's type and
6065 *COUNT to hold the number of such elements. */
6066 static bool
6067 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6068 machine_mode mode, const_tree type,
6069 machine_mode *base_mode, int *count)
6070 {
6071 machine_mode new_mode = VOIDmode;
6072
6073 /* If we have the type information, prefer that to working things
6074 out from the mode. */
6075 if (type)
6076 {
6077 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6078
6079 if (ag_count > 0 && ag_count <= 4)
6080 *count = ag_count;
6081 else
6082 return false;
6083 }
6084 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6085 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6086 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6087 {
6088 *count = 1;
6089 new_mode = mode;
6090 }
6091 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6092 {
6093 *count = 2;
6094 new_mode = (mode == DCmode ? DFmode : SFmode);
6095 }
6096 else
6097 return false;
6098
6099
6100 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6101 return false;
6102
6103 *base_mode = new_mode;
6104 return true;
6105 }
6106
6107 static bool
6108 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6109 machine_mode mode, const_tree type)
6110 {
6111 int count ATTRIBUTE_UNUSED;
6112 machine_mode ag_mode ATTRIBUTE_UNUSED;
6113
6114 if (!use_vfp_abi (pcs_variant, false))
6115 return false;
6116 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6117 &ag_mode, &count);
6118 }
6119
6120 static bool
6121 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6122 const_tree type)
6123 {
6124 if (!use_vfp_abi (pcum->pcs_variant, false))
6125 return false;
6126
6127 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6128 &pcum->aapcs_vfp_rmode,
6129 &pcum->aapcs_vfp_rcount);
6130 }
6131
6132 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6133 for the behaviour of this function. */
6134
6135 static bool
6136 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6137 const_tree type ATTRIBUTE_UNUSED)
6138 {
6139 int rmode_size
6140 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6141 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6142 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6143 int regno;
6144
6145 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6146 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6147 {
6148 pcum->aapcs_vfp_reg_alloc = mask << regno;
6149 if (mode == BLKmode
6150 || (mode == TImode && ! TARGET_NEON)
6151 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6152 {
6153 int i;
6154 int rcount = pcum->aapcs_vfp_rcount;
6155 int rshift = shift;
6156 machine_mode rmode = pcum->aapcs_vfp_rmode;
6157 rtx par;
6158 if (!TARGET_NEON)
6159 {
6160 /* Avoid using unsupported vector modes. */
6161 if (rmode == V2SImode)
6162 rmode = DImode;
6163 else if (rmode == V4SImode)
6164 {
6165 rmode = DImode;
6166 rcount *= 2;
6167 rshift /= 2;
6168 }
6169 }
6170 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6171 for (i = 0; i < rcount; i++)
6172 {
6173 rtx tmp = gen_rtx_REG (rmode,
6174 FIRST_VFP_REGNUM + regno + i * rshift);
6175 tmp = gen_rtx_EXPR_LIST
6176 (VOIDmode, tmp,
6177 GEN_INT (i * GET_MODE_SIZE (rmode)));
6178 XVECEXP (par, 0, i) = tmp;
6179 }
6180
6181 pcum->aapcs_reg = par;
6182 }
6183 else
6184 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6185 return true;
6186 }
6187 return false;
6188 }
6189
6190 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6191 comment there for the behaviour of this function. */
6192
6193 static rtx
6194 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6195 machine_mode mode,
6196 const_tree type ATTRIBUTE_UNUSED)
6197 {
6198 if (!use_vfp_abi (pcs_variant, false))
6199 return NULL;
6200
6201 if (mode == BLKmode
6202 || (GET_MODE_CLASS (mode) == MODE_INT
6203 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6204 && !TARGET_NEON))
6205 {
6206 int count;
6207 machine_mode ag_mode;
6208 int i;
6209 rtx par;
6210 int shift;
6211
6212 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6213 &ag_mode, &count);
6214
6215 if (!TARGET_NEON)
6216 {
6217 if (ag_mode == V2SImode)
6218 ag_mode = DImode;
6219 else if (ag_mode == V4SImode)
6220 {
6221 ag_mode = DImode;
6222 count *= 2;
6223 }
6224 }
6225 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6226 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6227 for (i = 0; i < count; i++)
6228 {
6229 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6230 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6231 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6232 XVECEXP (par, 0, i) = tmp;
6233 }
6234
6235 return par;
6236 }
6237
6238 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6239 }
6240
6241 static void
6242 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6243 machine_mode mode ATTRIBUTE_UNUSED,
6244 const_tree type ATTRIBUTE_UNUSED)
6245 {
6246 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6247 pcum->aapcs_vfp_reg_alloc = 0;
6248 return;
6249 }
6250
6251 #define AAPCS_CP(X) \
6252 { \
6253 aapcs_ ## X ## _cum_init, \
6254 aapcs_ ## X ## _is_call_candidate, \
6255 aapcs_ ## X ## _allocate, \
6256 aapcs_ ## X ## _is_return_candidate, \
6257 aapcs_ ## X ## _allocate_return_reg, \
6258 aapcs_ ## X ## _advance \
6259 }
6260
6261 /* Table of co-processors that can be used to pass arguments in
6262 registers. Idealy no arugment should be a candidate for more than
6263 one co-processor table entry, but the table is processed in order
6264 and stops after the first match. If that entry then fails to put
6265 the argument into a co-processor register, the argument will go on
6266 the stack. */
6267 static struct
6268 {
6269 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6270 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6271
6272 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6273 BLKmode) is a candidate for this co-processor's registers; this
6274 function should ignore any position-dependent state in
6275 CUMULATIVE_ARGS and only use call-type dependent information. */
6276 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6277
6278 /* Return true if the argument does get a co-processor register; it
6279 should set aapcs_reg to an RTX of the register allocated as is
6280 required for a return from FUNCTION_ARG. */
6281 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6282
6283 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6284 be returned in this co-processor's registers. */
6285 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6286
6287 /* Allocate and return an RTX element to hold the return type of a call. This
6288 routine must not fail and will only be called if is_return_candidate
6289 returned true with the same parameters. */
6290 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6291
6292 /* Finish processing this argument and prepare to start processing
6293 the next one. */
6294 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6295 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6296 {
6297 AAPCS_CP(vfp)
6298 };
6299
6300 #undef AAPCS_CP
6301
6302 static int
6303 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6304 const_tree type)
6305 {
6306 int i;
6307
6308 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6309 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6310 return i;
6311
6312 return -1;
6313 }
6314
6315 static int
6316 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6317 {
6318 /* We aren't passed a decl, so we can't check that a call is local.
6319 However, it isn't clear that that would be a win anyway, since it
6320 might limit some tail-calling opportunities. */
6321 enum arm_pcs pcs_variant;
6322
6323 if (fntype)
6324 {
6325 const_tree fndecl = NULL_TREE;
6326
6327 if (TREE_CODE (fntype) == FUNCTION_DECL)
6328 {
6329 fndecl = fntype;
6330 fntype = TREE_TYPE (fntype);
6331 }
6332
6333 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6334 }
6335 else
6336 pcs_variant = arm_pcs_default;
6337
6338 if (pcs_variant != ARM_PCS_AAPCS)
6339 {
6340 int i;
6341
6342 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6343 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6344 TYPE_MODE (type),
6345 type))
6346 return i;
6347 }
6348 return -1;
6349 }
6350
6351 static rtx
6352 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6353 const_tree fntype)
6354 {
6355 /* We aren't passed a decl, so we can't check that a call is local.
6356 However, it isn't clear that that would be a win anyway, since it
6357 might limit some tail-calling opportunities. */
6358 enum arm_pcs pcs_variant;
6359 int unsignedp ATTRIBUTE_UNUSED;
6360
6361 if (fntype)
6362 {
6363 const_tree fndecl = NULL_TREE;
6364
6365 if (TREE_CODE (fntype) == FUNCTION_DECL)
6366 {
6367 fndecl = fntype;
6368 fntype = TREE_TYPE (fntype);
6369 }
6370
6371 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6372 }
6373 else
6374 pcs_variant = arm_pcs_default;
6375
6376 /* Promote integer types. */
6377 if (type && INTEGRAL_TYPE_P (type))
6378 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6379
6380 if (pcs_variant != ARM_PCS_AAPCS)
6381 {
6382 int i;
6383
6384 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6385 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6386 type))
6387 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6388 mode, type);
6389 }
6390
6391 /* Promotes small structs returned in a register to full-word size
6392 for big-endian AAPCS. */
6393 if (type && arm_return_in_msb (type))
6394 {
6395 HOST_WIDE_INT size = int_size_in_bytes (type);
6396 if (size % UNITS_PER_WORD != 0)
6397 {
6398 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6399 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6400 }
6401 }
6402
6403 return gen_rtx_REG (mode, R0_REGNUM);
6404 }
6405
6406 static rtx
6407 aapcs_libcall_value (machine_mode mode)
6408 {
6409 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6410 && GET_MODE_SIZE (mode) <= 4)
6411 mode = SImode;
6412
6413 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6414 }
6415
6416 /* Lay out a function argument using the AAPCS rules. The rule
6417 numbers referred to here are those in the AAPCS. */
6418 static void
6419 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6420 const_tree type, bool named)
6421 {
6422 int nregs, nregs2;
6423 int ncrn;
6424
6425 /* We only need to do this once per argument. */
6426 if (pcum->aapcs_arg_processed)
6427 return;
6428
6429 pcum->aapcs_arg_processed = true;
6430
6431 /* Special case: if named is false then we are handling an incoming
6432 anonymous argument which is on the stack. */
6433 if (!named)
6434 return;
6435
6436 /* Is this a potential co-processor register candidate? */
6437 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6438 {
6439 int slot = aapcs_select_call_coproc (pcum, mode, type);
6440 pcum->aapcs_cprc_slot = slot;
6441
6442 /* We don't have to apply any of the rules from part B of the
6443 preparation phase, these are handled elsewhere in the
6444 compiler. */
6445
6446 if (slot >= 0)
6447 {
6448 /* A Co-processor register candidate goes either in its own
6449 class of registers or on the stack. */
6450 if (!pcum->aapcs_cprc_failed[slot])
6451 {
6452 /* C1.cp - Try to allocate the argument to co-processor
6453 registers. */
6454 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6455 return;
6456
6457 /* C2.cp - Put the argument on the stack and note that we
6458 can't assign any more candidates in this slot. We also
6459 need to note that we have allocated stack space, so that
6460 we won't later try to split a non-cprc candidate between
6461 core registers and the stack. */
6462 pcum->aapcs_cprc_failed[slot] = true;
6463 pcum->can_split = false;
6464 }
6465
6466 /* We didn't get a register, so this argument goes on the
6467 stack. */
6468 gcc_assert (pcum->can_split == false);
6469 return;
6470 }
6471 }
6472
6473 /* C3 - For double-word aligned arguments, round the NCRN up to the
6474 next even number. */
6475 ncrn = pcum->aapcs_ncrn;
6476 if (ncrn & 1)
6477 {
6478 int res = arm_needs_doubleword_align (mode, type);
6479 /* Only warn during RTL expansion of call stmts, otherwise we would
6480 warn e.g. during gimplification even on functions that will be
6481 always inlined, and we'd warn multiple times. Don't warn when
6482 called in expand_function_start either, as we warn instead in
6483 arm_function_arg_boundary in that case. */
6484 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6485 inform (input_location, "parameter passing for argument of type "
6486 "%qT changed in GCC 7.1", type);
6487 else if (res > 0)
6488 ncrn++;
6489 }
6490
6491 nregs = ARM_NUM_REGS2(mode, type);
6492
6493 /* Sigh, this test should really assert that nregs > 0, but a GCC
6494 extension allows empty structs and then gives them empty size; it
6495 then allows such a structure to be passed by value. For some of
6496 the code below we have to pretend that such an argument has
6497 non-zero size so that we 'locate' it correctly either in
6498 registers or on the stack. */
6499 gcc_assert (nregs >= 0);
6500
6501 nregs2 = nregs ? nregs : 1;
6502
6503 /* C4 - Argument fits entirely in core registers. */
6504 if (ncrn + nregs2 <= NUM_ARG_REGS)
6505 {
6506 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6507 pcum->aapcs_next_ncrn = ncrn + nregs;
6508 return;
6509 }
6510
6511 /* C5 - Some core registers left and there are no arguments already
6512 on the stack: split this argument between the remaining core
6513 registers and the stack. */
6514 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6515 {
6516 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6517 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6518 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6519 return;
6520 }
6521
6522 /* C6 - NCRN is set to 4. */
6523 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6524
6525 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6526 return;
6527 }
6528
6529 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6530 for a call to a function whose data type is FNTYPE.
6531 For a library call, FNTYPE is NULL. */
6532 void
6533 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6534 rtx libname,
6535 tree fndecl ATTRIBUTE_UNUSED)
6536 {
6537 /* Long call handling. */
6538 if (fntype)
6539 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6540 else
6541 pcum->pcs_variant = arm_pcs_default;
6542
6543 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6544 {
6545 if (arm_libcall_uses_aapcs_base (libname))
6546 pcum->pcs_variant = ARM_PCS_AAPCS;
6547
6548 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6549 pcum->aapcs_reg = NULL_RTX;
6550 pcum->aapcs_partial = 0;
6551 pcum->aapcs_arg_processed = false;
6552 pcum->aapcs_cprc_slot = -1;
6553 pcum->can_split = true;
6554
6555 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6556 {
6557 int i;
6558
6559 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6560 {
6561 pcum->aapcs_cprc_failed[i] = false;
6562 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6563 }
6564 }
6565 return;
6566 }
6567
6568 /* Legacy ABIs */
6569
6570 /* On the ARM, the offset starts at 0. */
6571 pcum->nregs = 0;
6572 pcum->iwmmxt_nregs = 0;
6573 pcum->can_split = true;
6574
6575 /* Varargs vectors are treated the same as long long.
6576 named_count avoids having to change the way arm handles 'named' */
6577 pcum->named_count = 0;
6578 pcum->nargs = 0;
6579
6580 if (TARGET_REALLY_IWMMXT && fntype)
6581 {
6582 tree fn_arg;
6583
6584 for (fn_arg = TYPE_ARG_TYPES (fntype);
6585 fn_arg;
6586 fn_arg = TREE_CHAIN (fn_arg))
6587 pcum->named_count += 1;
6588
6589 if (! pcum->named_count)
6590 pcum->named_count = INT_MAX;
6591 }
6592 }
6593
6594 /* Return 1 if double word alignment is required for argument passing.
6595 Return -1 if double word alignment used to be required for argument
6596 passing before PR77728 ABI fix, but is not required anymore.
6597 Return 0 if double word alignment is not required and wasn't requried
6598 before either. */
6599 static int
6600 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6601 {
6602 if (!type)
6603 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6604
6605 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6606 if (!AGGREGATE_TYPE_P (type))
6607 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6608
6609 /* Array types: Use member alignment of element type. */
6610 if (TREE_CODE (type) == ARRAY_TYPE)
6611 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6612
6613 int ret = 0;
6614 /* Record/aggregate types: Use greatest member alignment of any member. */
6615 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6616 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6617 {
6618 if (TREE_CODE (field) == FIELD_DECL)
6619 return 1;
6620 else
6621 /* Before PR77728 fix, we were incorrectly considering also
6622 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6623 Make sure we can warn about that with -Wpsabi. */
6624 ret = -1;
6625 }
6626
6627 return ret;
6628 }
6629
6630
6631 /* Determine where to put an argument to a function.
6632 Value is zero to push the argument on the stack,
6633 or a hard register in which to store the argument.
6634
6635 MODE is the argument's machine mode.
6636 TYPE is the data type of the argument (as a tree).
6637 This is null for libcalls where that information may
6638 not be available.
6639 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6640 the preceding args and about the function being called.
6641 NAMED is nonzero if this argument is a named parameter
6642 (otherwise it is an extra parameter matching an ellipsis).
6643
6644 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6645 other arguments are passed on the stack. If (NAMED == 0) (which happens
6646 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6647 defined), say it is passed in the stack (function_prologue will
6648 indeed make it pass in the stack if necessary). */
6649
6650 static rtx
6651 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6652 const_tree type, bool named)
6653 {
6654 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6655 int nregs;
6656
6657 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6658 a call insn (op3 of a call_value insn). */
6659 if (mode == VOIDmode)
6660 return const0_rtx;
6661
6662 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6663 {
6664 aapcs_layout_arg (pcum, mode, type, named);
6665 return pcum->aapcs_reg;
6666 }
6667
6668 /* Varargs vectors are treated the same as long long.
6669 named_count avoids having to change the way arm handles 'named' */
6670 if (TARGET_IWMMXT_ABI
6671 && arm_vector_mode_supported_p (mode)
6672 && pcum->named_count > pcum->nargs + 1)
6673 {
6674 if (pcum->iwmmxt_nregs <= 9)
6675 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6676 else
6677 {
6678 pcum->can_split = false;
6679 return NULL_RTX;
6680 }
6681 }
6682
6683 /* Put doubleword aligned quantities in even register pairs. */
6684 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6685 {
6686 int res = arm_needs_doubleword_align (mode, type);
6687 if (res < 0 && warn_psabi)
6688 inform (input_location, "parameter passing for argument of type "
6689 "%qT changed in GCC 7.1", type);
6690 else if (res > 0)
6691 pcum->nregs++;
6692 }
6693
6694 /* Only allow splitting an arg between regs and memory if all preceding
6695 args were allocated to regs. For args passed by reference we only count
6696 the reference pointer. */
6697 if (pcum->can_split)
6698 nregs = 1;
6699 else
6700 nregs = ARM_NUM_REGS2 (mode, type);
6701
6702 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6703 return NULL_RTX;
6704
6705 return gen_rtx_REG (mode, pcum->nregs);
6706 }
6707
6708 static unsigned int
6709 arm_function_arg_boundary (machine_mode mode, const_tree type)
6710 {
6711 if (!ARM_DOUBLEWORD_ALIGN)
6712 return PARM_BOUNDARY;
6713
6714 int res = arm_needs_doubleword_align (mode, type);
6715 if (res < 0 && warn_psabi)
6716 inform (input_location, "parameter passing for argument of type %qT "
6717 "changed in GCC 7.1", type);
6718
6719 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6720 }
6721
6722 static int
6723 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6724 tree type, bool named)
6725 {
6726 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6727 int nregs = pcum->nregs;
6728
6729 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6730 {
6731 aapcs_layout_arg (pcum, mode, type, named);
6732 return pcum->aapcs_partial;
6733 }
6734
6735 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6736 return 0;
6737
6738 if (NUM_ARG_REGS > nregs
6739 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6740 && pcum->can_split)
6741 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6742
6743 return 0;
6744 }
6745
6746 /* Update the data in PCUM to advance over an argument
6747 of mode MODE and data type TYPE.
6748 (TYPE is null for libcalls where that information may not be available.) */
6749
6750 static void
6751 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6752 const_tree type, bool named)
6753 {
6754 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6755
6756 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6757 {
6758 aapcs_layout_arg (pcum, mode, type, named);
6759
6760 if (pcum->aapcs_cprc_slot >= 0)
6761 {
6762 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6763 type);
6764 pcum->aapcs_cprc_slot = -1;
6765 }
6766
6767 /* Generic stuff. */
6768 pcum->aapcs_arg_processed = false;
6769 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6770 pcum->aapcs_reg = NULL_RTX;
6771 pcum->aapcs_partial = 0;
6772 }
6773 else
6774 {
6775 pcum->nargs += 1;
6776 if (arm_vector_mode_supported_p (mode)
6777 && pcum->named_count > pcum->nargs
6778 && TARGET_IWMMXT_ABI)
6779 pcum->iwmmxt_nregs += 1;
6780 else
6781 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6782 }
6783 }
6784
6785 /* Variable sized types are passed by reference. This is a GCC
6786 extension to the ARM ABI. */
6787
6788 static bool
6789 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6790 machine_mode mode ATTRIBUTE_UNUSED,
6791 const_tree type, bool named ATTRIBUTE_UNUSED)
6792 {
6793 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6794 }
6795 \f
6796 /* Encode the current state of the #pragma [no_]long_calls. */
6797 typedef enum
6798 {
6799 OFF, /* No #pragma [no_]long_calls is in effect. */
6800 LONG, /* #pragma long_calls is in effect. */
6801 SHORT /* #pragma no_long_calls is in effect. */
6802 } arm_pragma_enum;
6803
6804 static arm_pragma_enum arm_pragma_long_calls = OFF;
6805
6806 void
6807 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6808 {
6809 arm_pragma_long_calls = LONG;
6810 }
6811
6812 void
6813 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6814 {
6815 arm_pragma_long_calls = SHORT;
6816 }
6817
6818 void
6819 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6820 {
6821 arm_pragma_long_calls = OFF;
6822 }
6823 \f
6824 /* Handle an attribute requiring a FUNCTION_DECL;
6825 arguments as in struct attribute_spec.handler. */
6826 static tree
6827 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6828 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6829 {
6830 if (TREE_CODE (*node) != FUNCTION_DECL)
6831 {
6832 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6833 name);
6834 *no_add_attrs = true;
6835 }
6836
6837 return NULL_TREE;
6838 }
6839
6840 /* Handle an "interrupt" or "isr" attribute;
6841 arguments as in struct attribute_spec.handler. */
6842 static tree
6843 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6844 bool *no_add_attrs)
6845 {
6846 if (DECL_P (*node))
6847 {
6848 if (TREE_CODE (*node) != FUNCTION_DECL)
6849 {
6850 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6851 name);
6852 *no_add_attrs = true;
6853 }
6854 /* FIXME: the argument if any is checked for type attributes;
6855 should it be checked for decl ones? */
6856 }
6857 else
6858 {
6859 if (TREE_CODE (*node) == FUNCTION_TYPE
6860 || TREE_CODE (*node) == METHOD_TYPE)
6861 {
6862 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6863 {
6864 warning (OPT_Wattributes, "%qE attribute ignored",
6865 name);
6866 *no_add_attrs = true;
6867 }
6868 }
6869 else if (TREE_CODE (*node) == POINTER_TYPE
6870 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6871 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6872 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6873 {
6874 *node = build_variant_type_copy (*node);
6875 TREE_TYPE (*node) = build_type_attribute_variant
6876 (TREE_TYPE (*node),
6877 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6878 *no_add_attrs = true;
6879 }
6880 else
6881 {
6882 /* Possibly pass this attribute on from the type to a decl. */
6883 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6884 | (int) ATTR_FLAG_FUNCTION_NEXT
6885 | (int) ATTR_FLAG_ARRAY_NEXT))
6886 {
6887 *no_add_attrs = true;
6888 return tree_cons (name, args, NULL_TREE);
6889 }
6890 else
6891 {
6892 warning (OPT_Wattributes, "%qE attribute ignored",
6893 name);
6894 }
6895 }
6896 }
6897
6898 return NULL_TREE;
6899 }
6900
6901 /* Handle a "pcs" attribute; arguments as in struct
6902 attribute_spec.handler. */
6903 static tree
6904 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6905 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6906 {
6907 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6908 {
6909 warning (OPT_Wattributes, "%qE attribute ignored", name);
6910 *no_add_attrs = true;
6911 }
6912 return NULL_TREE;
6913 }
6914
6915 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6916 /* Handle the "notshared" attribute. This attribute is another way of
6917 requesting hidden visibility. ARM's compiler supports
6918 "__declspec(notshared)"; we support the same thing via an
6919 attribute. */
6920
6921 static tree
6922 arm_handle_notshared_attribute (tree *node,
6923 tree name ATTRIBUTE_UNUSED,
6924 tree args ATTRIBUTE_UNUSED,
6925 int flags ATTRIBUTE_UNUSED,
6926 bool *no_add_attrs)
6927 {
6928 tree decl = TYPE_NAME (*node);
6929
6930 if (decl)
6931 {
6932 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6933 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6934 *no_add_attrs = false;
6935 }
6936 return NULL_TREE;
6937 }
6938 #endif
6939
6940 /* This function returns true if a function with declaration FNDECL and type
6941 FNTYPE uses the stack to pass arguments or return variables and false
6942 otherwise. This is used for functions with the attributes
6943 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6944 diagnostic messages if the stack is used. NAME is the name of the attribute
6945 used. */
6946
6947 static bool
6948 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6949 {
6950 function_args_iterator args_iter;
6951 CUMULATIVE_ARGS args_so_far_v;
6952 cumulative_args_t args_so_far;
6953 bool first_param = true;
6954 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6955
6956 /* Error out if any argument is passed on the stack. */
6957 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6958 args_so_far = pack_cumulative_args (&args_so_far_v);
6959 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6960 {
6961 rtx arg_rtx;
6962 machine_mode arg_mode = TYPE_MODE (arg_type);
6963
6964 prev_arg_type = arg_type;
6965 if (VOID_TYPE_P (arg_type))
6966 continue;
6967
6968 if (!first_param)
6969 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6970 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6971 if (!arg_rtx
6972 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6973 {
6974 error ("%qE attribute not available to functions with arguments "
6975 "passed on the stack", name);
6976 return true;
6977 }
6978 first_param = false;
6979 }
6980
6981 /* Error out for variadic functions since we cannot control how many
6982 arguments will be passed and thus stack could be used. stdarg_p () is not
6983 used for the checking to avoid browsing arguments twice. */
6984 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6985 {
6986 error ("%qE attribute not available to functions with variable number "
6987 "of arguments", name);
6988 return true;
6989 }
6990
6991 /* Error out if return value is passed on the stack. */
6992 ret_type = TREE_TYPE (fntype);
6993 if (arm_return_in_memory (ret_type, fntype))
6994 {
6995 error ("%qE attribute not available to functions that return value on "
6996 "the stack", name);
6997 return true;
6998 }
6999 return false;
7000 }
7001
7002 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7003 function will check whether the attribute is allowed here and will add the
7004 attribute to the function declaration tree or otherwise issue a warning. */
7005
7006 static tree
7007 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7008 tree /* args */,
7009 int /* flags */,
7010 bool *no_add_attrs)
7011 {
7012 tree fndecl;
7013
7014 if (!use_cmse)
7015 {
7016 *no_add_attrs = true;
7017 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7018 name);
7019 return NULL_TREE;
7020 }
7021
7022 /* Ignore attribute for function types. */
7023 if (TREE_CODE (*node) != FUNCTION_DECL)
7024 {
7025 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7026 name);
7027 *no_add_attrs = true;
7028 return NULL_TREE;
7029 }
7030
7031 fndecl = *node;
7032
7033 /* Warn for static linkage functions. */
7034 if (!TREE_PUBLIC (fndecl))
7035 {
7036 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7037 "with static linkage", name);
7038 *no_add_attrs = true;
7039 return NULL_TREE;
7040 }
7041
7042 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7043 TREE_TYPE (fndecl));
7044 return NULL_TREE;
7045 }
7046
7047
7048 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7049 function will check whether the attribute is allowed here and will add the
7050 attribute to the function type tree or otherwise issue a diagnostic. The
7051 reason we check this at declaration time is to only allow the use of the
7052 attribute with declarations of function pointers and not function
7053 declarations. This function checks NODE is of the expected type and issues
7054 diagnostics otherwise using NAME. If it is not of the expected type
7055 *NO_ADD_ATTRS will be set to true. */
7056
7057 static tree
7058 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7059 tree /* args */,
7060 int /* flags */,
7061 bool *no_add_attrs)
7062 {
7063 tree decl = NULL_TREE, fntype = NULL_TREE;
7064 tree type;
7065
7066 if (!use_cmse)
7067 {
7068 *no_add_attrs = true;
7069 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7070 name);
7071 return NULL_TREE;
7072 }
7073
7074 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7075 {
7076 decl = *node;
7077 fntype = TREE_TYPE (decl);
7078 }
7079
7080 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7081 fntype = TREE_TYPE (fntype);
7082
7083 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7084 {
7085 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7086 "function pointer", name);
7087 *no_add_attrs = true;
7088 return NULL_TREE;
7089 }
7090
7091 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7092
7093 if (*no_add_attrs)
7094 return NULL_TREE;
7095
7096 /* Prevent trees being shared among function types with and without
7097 cmse_nonsecure_call attribute. */
7098 type = TREE_TYPE (decl);
7099
7100 type = build_distinct_type_copy (type);
7101 TREE_TYPE (decl) = type;
7102 fntype = type;
7103
7104 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7105 {
7106 type = fntype;
7107 fntype = TREE_TYPE (fntype);
7108 fntype = build_distinct_type_copy (fntype);
7109 TREE_TYPE (type) = fntype;
7110 }
7111
7112 /* Construct a type attribute and add it to the function type. */
7113 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7114 TYPE_ATTRIBUTES (fntype));
7115 TYPE_ATTRIBUTES (fntype) = attrs;
7116 return NULL_TREE;
7117 }
7118
7119 /* Return 0 if the attributes for two types are incompatible, 1 if they
7120 are compatible, and 2 if they are nearly compatible (which causes a
7121 warning to be generated). */
7122 static int
7123 arm_comp_type_attributes (const_tree type1, const_tree type2)
7124 {
7125 int l1, l2, s1, s2;
7126
7127 /* Check for mismatch of non-default calling convention. */
7128 if (TREE_CODE (type1) != FUNCTION_TYPE)
7129 return 1;
7130
7131 /* Check for mismatched call attributes. */
7132 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7133 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7134 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7135 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7136
7137 /* Only bother to check if an attribute is defined. */
7138 if (l1 | l2 | s1 | s2)
7139 {
7140 /* If one type has an attribute, the other must have the same attribute. */
7141 if ((l1 != l2) || (s1 != s2))
7142 return 0;
7143
7144 /* Disallow mixed attributes. */
7145 if ((l1 & s2) || (l2 & s1))
7146 return 0;
7147 }
7148
7149 /* Check for mismatched ISR attribute. */
7150 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7151 if (! l1)
7152 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7153 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7154 if (! l2)
7155 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7156 if (l1 != l2)
7157 return 0;
7158
7159 l1 = lookup_attribute ("cmse_nonsecure_call",
7160 TYPE_ATTRIBUTES (type1)) != NULL;
7161 l2 = lookup_attribute ("cmse_nonsecure_call",
7162 TYPE_ATTRIBUTES (type2)) != NULL;
7163
7164 if (l1 != l2)
7165 return 0;
7166
7167 return 1;
7168 }
7169
7170 /* Assigns default attributes to newly defined type. This is used to
7171 set short_call/long_call attributes for function types of
7172 functions defined inside corresponding #pragma scopes. */
7173 static void
7174 arm_set_default_type_attributes (tree type)
7175 {
7176 /* Add __attribute__ ((long_call)) to all functions, when
7177 inside #pragma long_calls or __attribute__ ((short_call)),
7178 when inside #pragma no_long_calls. */
7179 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7180 {
7181 tree type_attr_list, attr_name;
7182 type_attr_list = TYPE_ATTRIBUTES (type);
7183
7184 if (arm_pragma_long_calls == LONG)
7185 attr_name = get_identifier ("long_call");
7186 else if (arm_pragma_long_calls == SHORT)
7187 attr_name = get_identifier ("short_call");
7188 else
7189 return;
7190
7191 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7192 TYPE_ATTRIBUTES (type) = type_attr_list;
7193 }
7194 }
7195 \f
7196 /* Return true if DECL is known to be linked into section SECTION. */
7197
7198 static bool
7199 arm_function_in_section_p (tree decl, section *section)
7200 {
7201 /* We can only be certain about the prevailing symbol definition. */
7202 if (!decl_binds_to_current_def_p (decl))
7203 return false;
7204
7205 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7206 if (!DECL_SECTION_NAME (decl))
7207 {
7208 /* Make sure that we will not create a unique section for DECL. */
7209 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7210 return false;
7211 }
7212
7213 return function_section (decl) == section;
7214 }
7215
7216 /* Return nonzero if a 32-bit "long_call" should be generated for
7217 a call from the current function to DECL. We generate a long_call
7218 if the function:
7219
7220 a. has an __attribute__((long call))
7221 or b. is within the scope of a #pragma long_calls
7222 or c. the -mlong-calls command line switch has been specified
7223
7224 However we do not generate a long call if the function:
7225
7226 d. has an __attribute__ ((short_call))
7227 or e. is inside the scope of a #pragma no_long_calls
7228 or f. is defined in the same section as the current function. */
7229
7230 bool
7231 arm_is_long_call_p (tree decl)
7232 {
7233 tree attrs;
7234
7235 if (!decl)
7236 return TARGET_LONG_CALLS;
7237
7238 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7239 if (lookup_attribute ("short_call", attrs))
7240 return false;
7241
7242 /* For "f", be conservative, and only cater for cases in which the
7243 whole of the current function is placed in the same section. */
7244 if (!flag_reorder_blocks_and_partition
7245 && TREE_CODE (decl) == FUNCTION_DECL
7246 && arm_function_in_section_p (decl, current_function_section ()))
7247 return false;
7248
7249 if (lookup_attribute ("long_call", attrs))
7250 return true;
7251
7252 return TARGET_LONG_CALLS;
7253 }
7254
7255 /* Return nonzero if it is ok to make a tail-call to DECL. */
7256 static bool
7257 arm_function_ok_for_sibcall (tree decl, tree exp)
7258 {
7259 unsigned long func_type;
7260
7261 if (cfun->machine->sibcall_blocked)
7262 return false;
7263
7264 /* Never tailcall something if we are generating code for Thumb-1. */
7265 if (TARGET_THUMB1)
7266 return false;
7267
7268 /* The PIC register is live on entry to VxWorks PLT entries, so we
7269 must make the call before restoring the PIC register. */
7270 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7271 return false;
7272
7273 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7274 may be used both as target of the call and base register for restoring
7275 the VFP registers */
7276 if (TARGET_APCS_FRAME && TARGET_ARM
7277 && TARGET_HARD_FLOAT
7278 && decl && arm_is_long_call_p (decl))
7279 return false;
7280
7281 /* If we are interworking and the function is not declared static
7282 then we can't tail-call it unless we know that it exists in this
7283 compilation unit (since it might be a Thumb routine). */
7284 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7285 && !TREE_ASM_WRITTEN (decl))
7286 return false;
7287
7288 func_type = arm_current_func_type ();
7289 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7290 if (IS_INTERRUPT (func_type))
7291 return false;
7292
7293 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7294 generated for entry functions themselves. */
7295 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7296 return false;
7297
7298 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7299 this would complicate matters for later code generation. */
7300 if (TREE_CODE (exp) == CALL_EXPR)
7301 {
7302 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7303 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7304 return false;
7305 }
7306
7307 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7308 {
7309 /* Check that the return value locations are the same. For
7310 example that we aren't returning a value from the sibling in
7311 a VFP register but then need to transfer it to a core
7312 register. */
7313 rtx a, b;
7314 tree decl_or_type = decl;
7315
7316 /* If it is an indirect function pointer, get the function type. */
7317 if (!decl)
7318 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7319
7320 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7321 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7322 cfun->decl, false);
7323 if (!rtx_equal_p (a, b))
7324 return false;
7325 }
7326
7327 /* Never tailcall if function may be called with a misaligned SP. */
7328 if (IS_STACKALIGN (func_type))
7329 return false;
7330
7331 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7332 references should become a NOP. Don't convert such calls into
7333 sibling calls. */
7334 if (TARGET_AAPCS_BASED
7335 && arm_abi == ARM_ABI_AAPCS
7336 && decl
7337 && DECL_WEAK (decl))
7338 return false;
7339
7340 /* We cannot do a tailcall for an indirect call by descriptor if all the
7341 argument registers are used because the only register left to load the
7342 address is IP and it will already contain the static chain. */
7343 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7344 {
7345 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7346 CUMULATIVE_ARGS cum;
7347 cumulative_args_t cum_v;
7348
7349 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7350 cum_v = pack_cumulative_args (&cum);
7351
7352 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7353 {
7354 tree type = TREE_VALUE (t);
7355 if (!VOID_TYPE_P (type))
7356 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7357 }
7358
7359 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7360 return false;
7361 }
7362
7363 /* Everything else is ok. */
7364 return true;
7365 }
7366
7367 \f
7368 /* Addressing mode support functions. */
7369
7370 /* Return nonzero if X is a legitimate immediate operand when compiling
7371 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7372 int
7373 legitimate_pic_operand_p (rtx x)
7374 {
7375 if (GET_CODE (x) == SYMBOL_REF
7376 || (GET_CODE (x) == CONST
7377 && GET_CODE (XEXP (x, 0)) == PLUS
7378 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7379 return 0;
7380
7381 return 1;
7382 }
7383
7384 /* Record that the current function needs a PIC register. Initialize
7385 cfun->machine->pic_reg if we have not already done so. */
7386
7387 static void
7388 require_pic_register (void)
7389 {
7390 /* A lot of the logic here is made obscure by the fact that this
7391 routine gets called as part of the rtx cost estimation process.
7392 We don't want those calls to affect any assumptions about the real
7393 function; and further, we can't call entry_of_function() until we
7394 start the real expansion process. */
7395 if (!crtl->uses_pic_offset_table)
7396 {
7397 gcc_assert (can_create_pseudo_p ());
7398 if (arm_pic_register != INVALID_REGNUM
7399 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7400 {
7401 if (!cfun->machine->pic_reg)
7402 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7403
7404 /* Play games to avoid marking the function as needing pic
7405 if we are being called as part of the cost-estimation
7406 process. */
7407 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7408 crtl->uses_pic_offset_table = 1;
7409 }
7410 else
7411 {
7412 rtx_insn *seq, *insn;
7413
7414 if (!cfun->machine->pic_reg)
7415 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7416
7417 /* Play games to avoid marking the function as needing pic
7418 if we are being called as part of the cost-estimation
7419 process. */
7420 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7421 {
7422 crtl->uses_pic_offset_table = 1;
7423 start_sequence ();
7424
7425 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7426 && arm_pic_register > LAST_LO_REGNUM)
7427 emit_move_insn (cfun->machine->pic_reg,
7428 gen_rtx_REG (Pmode, arm_pic_register));
7429 else
7430 arm_load_pic_register (0UL);
7431
7432 seq = get_insns ();
7433 end_sequence ();
7434
7435 for (insn = seq; insn; insn = NEXT_INSN (insn))
7436 if (INSN_P (insn))
7437 INSN_LOCATION (insn) = prologue_location;
7438
7439 /* We can be called during expansion of PHI nodes, where
7440 we can't yet emit instructions directly in the final
7441 insn stream. Queue the insns on the entry edge, they will
7442 be committed after everything else is expanded. */
7443 insert_insn_on_edge (seq,
7444 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7445 }
7446 }
7447 }
7448 }
7449
7450 rtx
7451 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7452 {
7453 if (GET_CODE (orig) == SYMBOL_REF
7454 || GET_CODE (orig) == LABEL_REF)
7455 {
7456 if (reg == 0)
7457 {
7458 gcc_assert (can_create_pseudo_p ());
7459 reg = gen_reg_rtx (Pmode);
7460 }
7461
7462 /* VxWorks does not impose a fixed gap between segments; the run-time
7463 gap can be different from the object-file gap. We therefore can't
7464 use GOTOFF unless we are absolutely sure that the symbol is in the
7465 same segment as the GOT. Unfortunately, the flexibility of linker
7466 scripts means that we can't be sure of that in general, so assume
7467 that GOTOFF is never valid on VxWorks. */
7468 /* References to weak symbols cannot be resolved locally: they
7469 may be overridden by a non-weak definition at link time. */
7470 rtx_insn *insn;
7471 if ((GET_CODE (orig) == LABEL_REF
7472 || (GET_CODE (orig) == SYMBOL_REF
7473 && SYMBOL_REF_LOCAL_P (orig)
7474 && (SYMBOL_REF_DECL (orig)
7475 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7476 && NEED_GOT_RELOC
7477 && arm_pic_data_is_text_relative)
7478 insn = arm_pic_static_addr (orig, reg);
7479 else
7480 {
7481 rtx pat;
7482 rtx mem;
7483
7484 /* If this function doesn't have a pic register, create one now. */
7485 require_pic_register ();
7486
7487 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7488
7489 /* Make the MEM as close to a constant as possible. */
7490 mem = SET_SRC (pat);
7491 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7492 MEM_READONLY_P (mem) = 1;
7493 MEM_NOTRAP_P (mem) = 1;
7494
7495 insn = emit_insn (pat);
7496 }
7497
7498 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7499 by loop. */
7500 set_unique_reg_note (insn, REG_EQUAL, orig);
7501
7502 return reg;
7503 }
7504 else if (GET_CODE (orig) == CONST)
7505 {
7506 rtx base, offset;
7507
7508 if (GET_CODE (XEXP (orig, 0)) == PLUS
7509 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7510 return orig;
7511
7512 /* Handle the case where we have: const (UNSPEC_TLS). */
7513 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7514 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7515 return orig;
7516
7517 /* Handle the case where we have:
7518 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7519 CONST_INT. */
7520 if (GET_CODE (XEXP (orig, 0)) == PLUS
7521 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7522 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7523 {
7524 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7525 return orig;
7526 }
7527
7528 if (reg == 0)
7529 {
7530 gcc_assert (can_create_pseudo_p ());
7531 reg = gen_reg_rtx (Pmode);
7532 }
7533
7534 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7535
7536 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7537 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7538 base == reg ? 0 : reg);
7539
7540 if (CONST_INT_P (offset))
7541 {
7542 /* The base register doesn't really matter, we only want to
7543 test the index for the appropriate mode. */
7544 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7545 {
7546 gcc_assert (can_create_pseudo_p ());
7547 offset = force_reg (Pmode, offset);
7548 }
7549
7550 if (CONST_INT_P (offset))
7551 return plus_constant (Pmode, base, INTVAL (offset));
7552 }
7553
7554 if (GET_MODE_SIZE (mode) > 4
7555 && (GET_MODE_CLASS (mode) == MODE_INT
7556 || TARGET_SOFT_FLOAT))
7557 {
7558 emit_insn (gen_addsi3 (reg, base, offset));
7559 return reg;
7560 }
7561
7562 return gen_rtx_PLUS (Pmode, base, offset);
7563 }
7564
7565 return orig;
7566 }
7567
7568
7569 /* Find a spare register to use during the prolog of a function. */
7570
7571 static int
7572 thumb_find_work_register (unsigned long pushed_regs_mask)
7573 {
7574 int reg;
7575
7576 /* Check the argument registers first as these are call-used. The
7577 register allocation order means that sometimes r3 might be used
7578 but earlier argument registers might not, so check them all. */
7579 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7580 if (!df_regs_ever_live_p (reg))
7581 return reg;
7582
7583 /* Before going on to check the call-saved registers we can try a couple
7584 more ways of deducing that r3 is available. The first is when we are
7585 pushing anonymous arguments onto the stack and we have less than 4
7586 registers worth of fixed arguments(*). In this case r3 will be part of
7587 the variable argument list and so we can be sure that it will be
7588 pushed right at the start of the function. Hence it will be available
7589 for the rest of the prologue.
7590 (*): ie crtl->args.pretend_args_size is greater than 0. */
7591 if (cfun->machine->uses_anonymous_args
7592 && crtl->args.pretend_args_size > 0)
7593 return LAST_ARG_REGNUM;
7594
7595 /* The other case is when we have fixed arguments but less than 4 registers
7596 worth. In this case r3 might be used in the body of the function, but
7597 it is not being used to convey an argument into the function. In theory
7598 we could just check crtl->args.size to see how many bytes are
7599 being passed in argument registers, but it seems that it is unreliable.
7600 Sometimes it will have the value 0 when in fact arguments are being
7601 passed. (See testcase execute/20021111-1.c for an example). So we also
7602 check the args_info.nregs field as well. The problem with this field is
7603 that it makes no allowances for arguments that are passed to the
7604 function but which are not used. Hence we could miss an opportunity
7605 when a function has an unused argument in r3. But it is better to be
7606 safe than to be sorry. */
7607 if (! cfun->machine->uses_anonymous_args
7608 && crtl->args.size >= 0
7609 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7610 && (TARGET_AAPCS_BASED
7611 ? crtl->args.info.aapcs_ncrn < 4
7612 : crtl->args.info.nregs < 4))
7613 return LAST_ARG_REGNUM;
7614
7615 /* Otherwise look for a call-saved register that is going to be pushed. */
7616 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7617 if (pushed_regs_mask & (1 << reg))
7618 return reg;
7619
7620 if (TARGET_THUMB2)
7621 {
7622 /* Thumb-2 can use high regs. */
7623 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7624 if (pushed_regs_mask & (1 << reg))
7625 return reg;
7626 }
7627 /* Something went wrong - thumb_compute_save_reg_mask()
7628 should have arranged for a suitable register to be pushed. */
7629 gcc_unreachable ();
7630 }
7631
7632 static GTY(()) int pic_labelno;
7633
7634 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7635 low register. */
7636
7637 void
7638 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7639 {
7640 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7641
7642 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7643 return;
7644
7645 gcc_assert (flag_pic);
7646
7647 pic_reg = cfun->machine->pic_reg;
7648 if (TARGET_VXWORKS_RTP)
7649 {
7650 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7651 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7652 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7653
7654 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7655
7656 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7657 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7658 }
7659 else
7660 {
7661 /* We use an UNSPEC rather than a LABEL_REF because this label
7662 never appears in the code stream. */
7663
7664 labelno = GEN_INT (pic_labelno++);
7665 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7666 l1 = gen_rtx_CONST (VOIDmode, l1);
7667
7668 /* On the ARM the PC register contains 'dot + 8' at the time of the
7669 addition, on the Thumb it is 'dot + 4'. */
7670 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7671 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7672 UNSPEC_GOTSYM_OFF);
7673 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7674
7675 if (TARGET_32BIT)
7676 {
7677 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7678 }
7679 else /* TARGET_THUMB1 */
7680 {
7681 if (arm_pic_register != INVALID_REGNUM
7682 && REGNO (pic_reg) > LAST_LO_REGNUM)
7683 {
7684 /* We will have pushed the pic register, so we should always be
7685 able to find a work register. */
7686 pic_tmp = gen_rtx_REG (SImode,
7687 thumb_find_work_register (saved_regs));
7688 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7689 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7690 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7691 }
7692 else if (arm_pic_register != INVALID_REGNUM
7693 && arm_pic_register > LAST_LO_REGNUM
7694 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7695 {
7696 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7697 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7698 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7699 }
7700 else
7701 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7702 }
7703 }
7704
7705 /* Need to emit this whether or not we obey regdecls,
7706 since setjmp/longjmp can cause life info to screw up. */
7707 emit_use (pic_reg);
7708 }
7709
7710 /* Generate code to load the address of a static var when flag_pic is set. */
7711 static rtx_insn *
7712 arm_pic_static_addr (rtx orig, rtx reg)
7713 {
7714 rtx l1, labelno, offset_rtx;
7715
7716 gcc_assert (flag_pic);
7717
7718 /* We use an UNSPEC rather than a LABEL_REF because this label
7719 never appears in the code stream. */
7720 labelno = GEN_INT (pic_labelno++);
7721 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7722 l1 = gen_rtx_CONST (VOIDmode, l1);
7723
7724 /* On the ARM the PC register contains 'dot + 8' at the time of the
7725 addition, on the Thumb it is 'dot + 4'. */
7726 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7727 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7728 UNSPEC_SYMBOL_OFFSET);
7729 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7730
7731 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7732 }
7733
7734 /* Return nonzero if X is valid as an ARM state addressing register. */
7735 static int
7736 arm_address_register_rtx_p (rtx x, int strict_p)
7737 {
7738 int regno;
7739
7740 if (!REG_P (x))
7741 return 0;
7742
7743 regno = REGNO (x);
7744
7745 if (strict_p)
7746 return ARM_REGNO_OK_FOR_BASE_P (regno);
7747
7748 return (regno <= LAST_ARM_REGNUM
7749 || regno >= FIRST_PSEUDO_REGISTER
7750 || regno == FRAME_POINTER_REGNUM
7751 || regno == ARG_POINTER_REGNUM);
7752 }
7753
7754 /* Return TRUE if this rtx is the difference of a symbol and a label,
7755 and will reduce to a PC-relative relocation in the object file.
7756 Expressions like this can be left alone when generating PIC, rather
7757 than forced through the GOT. */
7758 static int
7759 pcrel_constant_p (rtx x)
7760 {
7761 if (GET_CODE (x) == MINUS)
7762 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7763
7764 return FALSE;
7765 }
7766
7767 /* Return true if X will surely end up in an index register after next
7768 splitting pass. */
7769 static bool
7770 will_be_in_index_register (const_rtx x)
7771 {
7772 /* arm.md: calculate_pic_address will split this into a register. */
7773 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7774 }
7775
7776 /* Return nonzero if X is a valid ARM state address operand. */
7777 int
7778 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7779 int strict_p)
7780 {
7781 bool use_ldrd;
7782 enum rtx_code code = GET_CODE (x);
7783
7784 if (arm_address_register_rtx_p (x, strict_p))
7785 return 1;
7786
7787 use_ldrd = (TARGET_LDRD
7788 && (mode == DImode || mode == DFmode));
7789
7790 if (code == POST_INC || code == PRE_DEC
7791 || ((code == PRE_INC || code == POST_DEC)
7792 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7793 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7794
7795 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7796 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7797 && GET_CODE (XEXP (x, 1)) == PLUS
7798 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7799 {
7800 rtx addend = XEXP (XEXP (x, 1), 1);
7801
7802 /* Don't allow ldrd post increment by register because it's hard
7803 to fixup invalid register choices. */
7804 if (use_ldrd
7805 && GET_CODE (x) == POST_MODIFY
7806 && REG_P (addend))
7807 return 0;
7808
7809 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7810 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7811 }
7812
7813 /* After reload constants split into minipools will have addresses
7814 from a LABEL_REF. */
7815 else if (reload_completed
7816 && (code == LABEL_REF
7817 || (code == CONST
7818 && GET_CODE (XEXP (x, 0)) == PLUS
7819 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7820 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7821 return 1;
7822
7823 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7824 return 0;
7825
7826 else if (code == PLUS)
7827 {
7828 rtx xop0 = XEXP (x, 0);
7829 rtx xop1 = XEXP (x, 1);
7830
7831 return ((arm_address_register_rtx_p (xop0, strict_p)
7832 && ((CONST_INT_P (xop1)
7833 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7834 || (!strict_p && will_be_in_index_register (xop1))))
7835 || (arm_address_register_rtx_p (xop1, strict_p)
7836 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7837 }
7838
7839 #if 0
7840 /* Reload currently can't handle MINUS, so disable this for now */
7841 else if (GET_CODE (x) == MINUS)
7842 {
7843 rtx xop0 = XEXP (x, 0);
7844 rtx xop1 = XEXP (x, 1);
7845
7846 return (arm_address_register_rtx_p (xop0, strict_p)
7847 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7848 }
7849 #endif
7850
7851 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7852 && code == SYMBOL_REF
7853 && CONSTANT_POOL_ADDRESS_P (x)
7854 && ! (flag_pic
7855 && symbol_mentioned_p (get_pool_constant (x))
7856 && ! pcrel_constant_p (get_pool_constant (x))))
7857 return 1;
7858
7859 return 0;
7860 }
7861
7862 /* Return true if we can avoid creating a constant pool entry for x. */
7863 static bool
7864 can_avoid_literal_pool_for_label_p (rtx x)
7865 {
7866 /* Normally we can assign constant values to target registers without
7867 the help of constant pool. But there are cases we have to use constant
7868 pool like:
7869 1) assign a label to register.
7870 2) sign-extend a 8bit value to 32bit and then assign to register.
7871
7872 Constant pool access in format:
7873 (set (reg r0) (mem (symbol_ref (".LC0"))))
7874 will cause the use of literal pool (later in function arm_reorg).
7875 So here we mark such format as an invalid format, then the compiler
7876 will adjust it into:
7877 (set (reg r0) (symbol_ref (".LC0")))
7878 (set (reg r0) (mem (reg r0))).
7879 No extra register is required, and (mem (reg r0)) won't cause the use
7880 of literal pools. */
7881 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7882 && CONSTANT_POOL_ADDRESS_P (x))
7883 return 1;
7884 return 0;
7885 }
7886
7887
7888 /* Return nonzero if X is a valid Thumb-2 address operand. */
7889 static int
7890 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7891 {
7892 bool use_ldrd;
7893 enum rtx_code code = GET_CODE (x);
7894
7895 if (arm_address_register_rtx_p (x, strict_p))
7896 return 1;
7897
7898 use_ldrd = (TARGET_LDRD
7899 && (mode == DImode || mode == DFmode));
7900
7901 if (code == POST_INC || code == PRE_DEC
7902 || ((code == PRE_INC || code == POST_DEC)
7903 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7904 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7905
7906 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7907 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7908 && GET_CODE (XEXP (x, 1)) == PLUS
7909 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7910 {
7911 /* Thumb-2 only has autoincrement by constant. */
7912 rtx addend = XEXP (XEXP (x, 1), 1);
7913 HOST_WIDE_INT offset;
7914
7915 if (!CONST_INT_P (addend))
7916 return 0;
7917
7918 offset = INTVAL(addend);
7919 if (GET_MODE_SIZE (mode) <= 4)
7920 return (offset > -256 && offset < 256);
7921
7922 return (use_ldrd && offset > -1024 && offset < 1024
7923 && (offset & 3) == 0);
7924 }
7925
7926 /* After reload constants split into minipools will have addresses
7927 from a LABEL_REF. */
7928 else if (reload_completed
7929 && (code == LABEL_REF
7930 || (code == CONST
7931 && GET_CODE (XEXP (x, 0)) == PLUS
7932 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7933 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7934 return 1;
7935
7936 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7937 return 0;
7938
7939 else if (code == PLUS)
7940 {
7941 rtx xop0 = XEXP (x, 0);
7942 rtx xop1 = XEXP (x, 1);
7943
7944 return ((arm_address_register_rtx_p (xop0, strict_p)
7945 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7946 || (!strict_p && will_be_in_index_register (xop1))))
7947 || (arm_address_register_rtx_p (xop1, strict_p)
7948 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7949 }
7950
7951 else if (can_avoid_literal_pool_for_label_p (x))
7952 return 0;
7953
7954 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7955 && code == SYMBOL_REF
7956 && CONSTANT_POOL_ADDRESS_P (x)
7957 && ! (flag_pic
7958 && symbol_mentioned_p (get_pool_constant (x))
7959 && ! pcrel_constant_p (get_pool_constant (x))))
7960 return 1;
7961
7962 return 0;
7963 }
7964
7965 /* Return nonzero if INDEX is valid for an address index operand in
7966 ARM state. */
7967 static int
7968 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7969 int strict_p)
7970 {
7971 HOST_WIDE_INT range;
7972 enum rtx_code code = GET_CODE (index);
7973
7974 /* Standard coprocessor addressing modes. */
7975 if (TARGET_HARD_FLOAT
7976 && (mode == SFmode || mode == DFmode))
7977 return (code == CONST_INT && INTVAL (index) < 1024
7978 && INTVAL (index) > -1024
7979 && (INTVAL (index) & 3) == 0);
7980
7981 /* For quad modes, we restrict the constant offset to be slightly less
7982 than what the instruction format permits. We do this because for
7983 quad mode moves, we will actually decompose them into two separate
7984 double-mode reads or writes. INDEX must therefore be a valid
7985 (double-mode) offset and so should INDEX+8. */
7986 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7987 return (code == CONST_INT
7988 && INTVAL (index) < 1016
7989 && INTVAL (index) > -1024
7990 && (INTVAL (index) & 3) == 0);
7991
7992 /* We have no such constraint on double mode offsets, so we permit the
7993 full range of the instruction format. */
7994 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7995 return (code == CONST_INT
7996 && INTVAL (index) < 1024
7997 && INTVAL (index) > -1024
7998 && (INTVAL (index) & 3) == 0);
7999
8000 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8001 return (code == CONST_INT
8002 && INTVAL (index) < 1024
8003 && INTVAL (index) > -1024
8004 && (INTVAL (index) & 3) == 0);
8005
8006 if (arm_address_register_rtx_p (index, strict_p)
8007 && (GET_MODE_SIZE (mode) <= 4))
8008 return 1;
8009
8010 if (mode == DImode || mode == DFmode)
8011 {
8012 if (code == CONST_INT)
8013 {
8014 HOST_WIDE_INT val = INTVAL (index);
8015
8016 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8017 If vldr is selected it uses arm_coproc_mem_operand. */
8018 if (TARGET_LDRD)
8019 return val > -256 && val < 256;
8020 else
8021 return val > -4096 && val < 4092;
8022 }
8023
8024 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8025 }
8026
8027 if (GET_MODE_SIZE (mode) <= 4
8028 && ! (arm_arch4
8029 && (mode == HImode
8030 || mode == HFmode
8031 || (mode == QImode && outer == SIGN_EXTEND))))
8032 {
8033 if (code == MULT)
8034 {
8035 rtx xiop0 = XEXP (index, 0);
8036 rtx xiop1 = XEXP (index, 1);
8037
8038 return ((arm_address_register_rtx_p (xiop0, strict_p)
8039 && power_of_two_operand (xiop1, SImode))
8040 || (arm_address_register_rtx_p (xiop1, strict_p)
8041 && power_of_two_operand (xiop0, SImode)));
8042 }
8043 else if (code == LSHIFTRT || code == ASHIFTRT
8044 || code == ASHIFT || code == ROTATERT)
8045 {
8046 rtx op = XEXP (index, 1);
8047
8048 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8049 && CONST_INT_P (op)
8050 && INTVAL (op) > 0
8051 && INTVAL (op) <= 31);
8052 }
8053 }
8054
8055 /* For ARM v4 we may be doing a sign-extend operation during the
8056 load. */
8057 if (arm_arch4)
8058 {
8059 if (mode == HImode
8060 || mode == HFmode
8061 || (outer == SIGN_EXTEND && mode == QImode))
8062 range = 256;
8063 else
8064 range = 4096;
8065 }
8066 else
8067 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8068
8069 return (code == CONST_INT
8070 && INTVAL (index) < range
8071 && INTVAL (index) > -range);
8072 }
8073
8074 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8075 index operand. i.e. 1, 2, 4 or 8. */
8076 static bool
8077 thumb2_index_mul_operand (rtx op)
8078 {
8079 HOST_WIDE_INT val;
8080
8081 if (!CONST_INT_P (op))
8082 return false;
8083
8084 val = INTVAL(op);
8085 return (val == 1 || val == 2 || val == 4 || val == 8);
8086 }
8087
8088 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8089 static int
8090 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8091 {
8092 enum rtx_code code = GET_CODE (index);
8093
8094 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8095 /* Standard coprocessor addressing modes. */
8096 if (TARGET_HARD_FLOAT
8097 && (mode == SFmode || mode == DFmode))
8098 return (code == CONST_INT && INTVAL (index) < 1024
8099 /* Thumb-2 allows only > -256 index range for it's core register
8100 load/stores. Since we allow SF/DF in core registers, we have
8101 to use the intersection between -256~4096 (core) and -1024~1024
8102 (coprocessor). */
8103 && INTVAL (index) > -256
8104 && (INTVAL (index) & 3) == 0);
8105
8106 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8107 {
8108 /* For DImode assume values will usually live in core regs
8109 and only allow LDRD addressing modes. */
8110 if (!TARGET_LDRD || mode != DImode)
8111 return (code == CONST_INT
8112 && INTVAL (index) < 1024
8113 && INTVAL (index) > -1024
8114 && (INTVAL (index) & 3) == 0);
8115 }
8116
8117 /* For quad modes, we restrict the constant offset to be slightly less
8118 than what the instruction format permits. We do this because for
8119 quad mode moves, we will actually decompose them into two separate
8120 double-mode reads or writes. INDEX must therefore be a valid
8121 (double-mode) offset and so should INDEX+8. */
8122 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8123 return (code == CONST_INT
8124 && INTVAL (index) < 1016
8125 && INTVAL (index) > -1024
8126 && (INTVAL (index) & 3) == 0);
8127
8128 /* We have no such constraint on double mode offsets, so we permit the
8129 full range of the instruction format. */
8130 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8131 return (code == CONST_INT
8132 && INTVAL (index) < 1024
8133 && INTVAL (index) > -1024
8134 && (INTVAL (index) & 3) == 0);
8135
8136 if (arm_address_register_rtx_p (index, strict_p)
8137 && (GET_MODE_SIZE (mode) <= 4))
8138 return 1;
8139
8140 if (mode == DImode || mode == DFmode)
8141 {
8142 if (code == CONST_INT)
8143 {
8144 HOST_WIDE_INT val = INTVAL (index);
8145 /* Thumb-2 ldrd only has reg+const addressing modes.
8146 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8147 If vldr is selected it uses arm_coproc_mem_operand. */
8148 if (TARGET_LDRD)
8149 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8150 else
8151 return IN_RANGE (val, -255, 4095 - 4);
8152 }
8153 else
8154 return 0;
8155 }
8156
8157 if (code == MULT)
8158 {
8159 rtx xiop0 = XEXP (index, 0);
8160 rtx xiop1 = XEXP (index, 1);
8161
8162 return ((arm_address_register_rtx_p (xiop0, strict_p)
8163 && thumb2_index_mul_operand (xiop1))
8164 || (arm_address_register_rtx_p (xiop1, strict_p)
8165 && thumb2_index_mul_operand (xiop0)));
8166 }
8167 else if (code == ASHIFT)
8168 {
8169 rtx op = XEXP (index, 1);
8170
8171 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8172 && CONST_INT_P (op)
8173 && INTVAL (op) > 0
8174 && INTVAL (op) <= 3);
8175 }
8176
8177 return (code == CONST_INT
8178 && INTVAL (index) < 4096
8179 && INTVAL (index) > -256);
8180 }
8181
8182 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8183 static int
8184 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8185 {
8186 int regno;
8187
8188 if (!REG_P (x))
8189 return 0;
8190
8191 regno = REGNO (x);
8192
8193 if (strict_p)
8194 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8195
8196 return (regno <= LAST_LO_REGNUM
8197 || regno > LAST_VIRTUAL_REGISTER
8198 || regno == FRAME_POINTER_REGNUM
8199 || (GET_MODE_SIZE (mode) >= 4
8200 && (regno == STACK_POINTER_REGNUM
8201 || regno >= FIRST_PSEUDO_REGISTER
8202 || x == hard_frame_pointer_rtx
8203 || x == arg_pointer_rtx)));
8204 }
8205
8206 /* Return nonzero if x is a legitimate index register. This is the case
8207 for any base register that can access a QImode object. */
8208 inline static int
8209 thumb1_index_register_rtx_p (rtx x, int strict_p)
8210 {
8211 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8212 }
8213
8214 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8215
8216 The AP may be eliminated to either the SP or the FP, so we use the
8217 least common denominator, e.g. SImode, and offsets from 0 to 64.
8218
8219 ??? Verify whether the above is the right approach.
8220
8221 ??? Also, the FP may be eliminated to the SP, so perhaps that
8222 needs special handling also.
8223
8224 ??? Look at how the mips16 port solves this problem. It probably uses
8225 better ways to solve some of these problems.
8226
8227 Although it is not incorrect, we don't accept QImode and HImode
8228 addresses based on the frame pointer or arg pointer until the
8229 reload pass starts. This is so that eliminating such addresses
8230 into stack based ones won't produce impossible code. */
8231 int
8232 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8233 {
8234 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8235 return 0;
8236
8237 /* ??? Not clear if this is right. Experiment. */
8238 if (GET_MODE_SIZE (mode) < 4
8239 && !(reload_in_progress || reload_completed)
8240 && (reg_mentioned_p (frame_pointer_rtx, x)
8241 || reg_mentioned_p (arg_pointer_rtx, x)
8242 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8243 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8244 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8245 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8246 return 0;
8247
8248 /* Accept any base register. SP only in SImode or larger. */
8249 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8250 return 1;
8251
8252 /* This is PC relative data before arm_reorg runs. */
8253 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8254 && GET_CODE (x) == SYMBOL_REF
8255 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8256 return 1;
8257
8258 /* This is PC relative data after arm_reorg runs. */
8259 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8260 && reload_completed
8261 && (GET_CODE (x) == LABEL_REF
8262 || (GET_CODE (x) == CONST
8263 && GET_CODE (XEXP (x, 0)) == PLUS
8264 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8265 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8266 return 1;
8267
8268 /* Post-inc indexing only supported for SImode and larger. */
8269 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8270 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8271 return 1;
8272
8273 else if (GET_CODE (x) == PLUS)
8274 {
8275 /* REG+REG address can be any two index registers. */
8276 /* We disallow FRAME+REG addressing since we know that FRAME
8277 will be replaced with STACK, and SP relative addressing only
8278 permits SP+OFFSET. */
8279 if (GET_MODE_SIZE (mode) <= 4
8280 && XEXP (x, 0) != frame_pointer_rtx
8281 && XEXP (x, 1) != frame_pointer_rtx
8282 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8283 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8284 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8285 return 1;
8286
8287 /* REG+const has 5-7 bit offset for non-SP registers. */
8288 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8289 || XEXP (x, 0) == arg_pointer_rtx)
8290 && CONST_INT_P (XEXP (x, 1))
8291 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8292 return 1;
8293
8294 /* REG+const has 10-bit offset for SP, but only SImode and
8295 larger is supported. */
8296 /* ??? Should probably check for DI/DFmode overflow here
8297 just like GO_IF_LEGITIMATE_OFFSET does. */
8298 else if (REG_P (XEXP (x, 0))
8299 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8300 && GET_MODE_SIZE (mode) >= 4
8301 && CONST_INT_P (XEXP (x, 1))
8302 && INTVAL (XEXP (x, 1)) >= 0
8303 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8304 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8305 return 1;
8306
8307 else if (REG_P (XEXP (x, 0))
8308 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8309 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8310 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8311 && REGNO (XEXP (x, 0))
8312 <= LAST_VIRTUAL_POINTER_REGISTER))
8313 && GET_MODE_SIZE (mode) >= 4
8314 && CONST_INT_P (XEXP (x, 1))
8315 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8316 return 1;
8317 }
8318
8319 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8320 && GET_MODE_SIZE (mode) == 4
8321 && GET_CODE (x) == SYMBOL_REF
8322 && CONSTANT_POOL_ADDRESS_P (x)
8323 && ! (flag_pic
8324 && symbol_mentioned_p (get_pool_constant (x))
8325 && ! pcrel_constant_p (get_pool_constant (x))))
8326 return 1;
8327
8328 return 0;
8329 }
8330
8331 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8332 instruction of mode MODE. */
8333 int
8334 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8335 {
8336 switch (GET_MODE_SIZE (mode))
8337 {
8338 case 1:
8339 return val >= 0 && val < 32;
8340
8341 case 2:
8342 return val >= 0 && val < 64 && (val & 1) == 0;
8343
8344 default:
8345 return (val >= 0
8346 && (val + GET_MODE_SIZE (mode)) <= 128
8347 && (val & 3) == 0);
8348 }
8349 }
8350
8351 bool
8352 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8353 {
8354 if (TARGET_ARM)
8355 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8356 else if (TARGET_THUMB2)
8357 return thumb2_legitimate_address_p (mode, x, strict_p);
8358 else /* if (TARGET_THUMB1) */
8359 return thumb1_legitimate_address_p (mode, x, strict_p);
8360 }
8361
8362 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8363
8364 Given an rtx X being reloaded into a reg required to be
8365 in class CLASS, return the class of reg to actually use.
8366 In general this is just CLASS, but for the Thumb core registers and
8367 immediate constants we prefer a LO_REGS class or a subset. */
8368
8369 static reg_class_t
8370 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8371 {
8372 if (TARGET_32BIT)
8373 return rclass;
8374 else
8375 {
8376 if (rclass == GENERAL_REGS)
8377 return LO_REGS;
8378 else
8379 return rclass;
8380 }
8381 }
8382
8383 /* Build the SYMBOL_REF for __tls_get_addr. */
8384
8385 static GTY(()) rtx tls_get_addr_libfunc;
8386
8387 static rtx
8388 get_tls_get_addr (void)
8389 {
8390 if (!tls_get_addr_libfunc)
8391 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8392 return tls_get_addr_libfunc;
8393 }
8394
8395 rtx
8396 arm_load_tp (rtx target)
8397 {
8398 if (!target)
8399 target = gen_reg_rtx (SImode);
8400
8401 if (TARGET_HARD_TP)
8402 {
8403 /* Can return in any reg. */
8404 emit_insn (gen_load_tp_hard (target));
8405 }
8406 else
8407 {
8408 /* Always returned in r0. Immediately copy the result into a pseudo,
8409 otherwise other uses of r0 (e.g. setting up function arguments) may
8410 clobber the value. */
8411
8412 rtx tmp;
8413
8414 emit_insn (gen_load_tp_soft ());
8415
8416 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8417 emit_move_insn (target, tmp);
8418 }
8419 return target;
8420 }
8421
8422 static rtx
8423 load_tls_operand (rtx x, rtx reg)
8424 {
8425 rtx tmp;
8426
8427 if (reg == NULL_RTX)
8428 reg = gen_reg_rtx (SImode);
8429
8430 tmp = gen_rtx_CONST (SImode, x);
8431
8432 emit_move_insn (reg, tmp);
8433
8434 return reg;
8435 }
8436
8437 static rtx_insn *
8438 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8439 {
8440 rtx label, labelno, sum;
8441
8442 gcc_assert (reloc != TLS_DESCSEQ);
8443 start_sequence ();
8444
8445 labelno = GEN_INT (pic_labelno++);
8446 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8447 label = gen_rtx_CONST (VOIDmode, label);
8448
8449 sum = gen_rtx_UNSPEC (Pmode,
8450 gen_rtvec (4, x, GEN_INT (reloc), label,
8451 GEN_INT (TARGET_ARM ? 8 : 4)),
8452 UNSPEC_TLS);
8453 reg = load_tls_operand (sum, reg);
8454
8455 if (TARGET_ARM)
8456 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8457 else
8458 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8459
8460 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8461 LCT_PURE, /* LCT_CONST? */
8462 Pmode, reg, Pmode);
8463
8464 rtx_insn *insns = get_insns ();
8465 end_sequence ();
8466
8467 return insns;
8468 }
8469
8470 static rtx
8471 arm_tls_descseq_addr (rtx x, rtx reg)
8472 {
8473 rtx labelno = GEN_INT (pic_labelno++);
8474 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8475 rtx sum = gen_rtx_UNSPEC (Pmode,
8476 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8477 gen_rtx_CONST (VOIDmode, label),
8478 GEN_INT (!TARGET_ARM)),
8479 UNSPEC_TLS);
8480 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8481
8482 emit_insn (gen_tlscall (x, labelno));
8483 if (!reg)
8484 reg = gen_reg_rtx (SImode);
8485 else
8486 gcc_assert (REGNO (reg) != R0_REGNUM);
8487
8488 emit_move_insn (reg, reg0);
8489
8490 return reg;
8491 }
8492
8493 rtx
8494 legitimize_tls_address (rtx x, rtx reg)
8495 {
8496 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8497 rtx_insn *insns;
8498 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8499
8500 switch (model)
8501 {
8502 case TLS_MODEL_GLOBAL_DYNAMIC:
8503 if (TARGET_GNU2_TLS)
8504 {
8505 reg = arm_tls_descseq_addr (x, reg);
8506
8507 tp = arm_load_tp (NULL_RTX);
8508
8509 dest = gen_rtx_PLUS (Pmode, tp, reg);
8510 }
8511 else
8512 {
8513 /* Original scheme */
8514 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8515 dest = gen_reg_rtx (Pmode);
8516 emit_libcall_block (insns, dest, ret, x);
8517 }
8518 return dest;
8519
8520 case TLS_MODEL_LOCAL_DYNAMIC:
8521 if (TARGET_GNU2_TLS)
8522 {
8523 reg = arm_tls_descseq_addr (x, reg);
8524
8525 tp = arm_load_tp (NULL_RTX);
8526
8527 dest = gen_rtx_PLUS (Pmode, tp, reg);
8528 }
8529 else
8530 {
8531 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8532
8533 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8534 share the LDM result with other LD model accesses. */
8535 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8536 UNSPEC_TLS);
8537 dest = gen_reg_rtx (Pmode);
8538 emit_libcall_block (insns, dest, ret, eqv);
8539
8540 /* Load the addend. */
8541 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8542 GEN_INT (TLS_LDO32)),
8543 UNSPEC_TLS);
8544 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8545 dest = gen_rtx_PLUS (Pmode, dest, addend);
8546 }
8547 return dest;
8548
8549 case TLS_MODEL_INITIAL_EXEC:
8550 labelno = GEN_INT (pic_labelno++);
8551 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8552 label = gen_rtx_CONST (VOIDmode, label);
8553 sum = gen_rtx_UNSPEC (Pmode,
8554 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8555 GEN_INT (TARGET_ARM ? 8 : 4)),
8556 UNSPEC_TLS);
8557 reg = load_tls_operand (sum, reg);
8558
8559 if (TARGET_ARM)
8560 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8561 else if (TARGET_THUMB2)
8562 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8563 else
8564 {
8565 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8566 emit_move_insn (reg, gen_const_mem (SImode, reg));
8567 }
8568
8569 tp = arm_load_tp (NULL_RTX);
8570
8571 return gen_rtx_PLUS (Pmode, tp, reg);
8572
8573 case TLS_MODEL_LOCAL_EXEC:
8574 tp = arm_load_tp (NULL_RTX);
8575
8576 reg = gen_rtx_UNSPEC (Pmode,
8577 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8578 UNSPEC_TLS);
8579 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8580
8581 return gen_rtx_PLUS (Pmode, tp, reg);
8582
8583 default:
8584 abort ();
8585 }
8586 }
8587
8588 /* Try machine-dependent ways of modifying an illegitimate address
8589 to be legitimate. If we find one, return the new, valid address. */
8590 rtx
8591 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8592 {
8593 if (arm_tls_referenced_p (x))
8594 {
8595 rtx addend = NULL;
8596
8597 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8598 {
8599 addend = XEXP (XEXP (x, 0), 1);
8600 x = XEXP (XEXP (x, 0), 0);
8601 }
8602
8603 if (GET_CODE (x) != SYMBOL_REF)
8604 return x;
8605
8606 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8607
8608 x = legitimize_tls_address (x, NULL_RTX);
8609
8610 if (addend)
8611 {
8612 x = gen_rtx_PLUS (SImode, x, addend);
8613 orig_x = x;
8614 }
8615 else
8616 return x;
8617 }
8618
8619 if (!TARGET_ARM)
8620 {
8621 /* TODO: legitimize_address for Thumb2. */
8622 if (TARGET_THUMB2)
8623 return x;
8624 return thumb_legitimize_address (x, orig_x, mode);
8625 }
8626
8627 if (GET_CODE (x) == PLUS)
8628 {
8629 rtx xop0 = XEXP (x, 0);
8630 rtx xop1 = XEXP (x, 1);
8631
8632 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8633 xop0 = force_reg (SImode, xop0);
8634
8635 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8636 && !symbol_mentioned_p (xop1))
8637 xop1 = force_reg (SImode, xop1);
8638
8639 if (ARM_BASE_REGISTER_RTX_P (xop0)
8640 && CONST_INT_P (xop1))
8641 {
8642 HOST_WIDE_INT n, low_n;
8643 rtx base_reg, val;
8644 n = INTVAL (xop1);
8645
8646 /* VFP addressing modes actually allow greater offsets, but for
8647 now we just stick with the lowest common denominator. */
8648 if (mode == DImode || mode == DFmode)
8649 {
8650 low_n = n & 0x0f;
8651 n &= ~0x0f;
8652 if (low_n > 4)
8653 {
8654 n += 16;
8655 low_n -= 16;
8656 }
8657 }
8658 else
8659 {
8660 low_n = ((mode) == TImode ? 0
8661 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8662 n -= low_n;
8663 }
8664
8665 base_reg = gen_reg_rtx (SImode);
8666 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8667 emit_move_insn (base_reg, val);
8668 x = plus_constant (Pmode, base_reg, low_n);
8669 }
8670 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8671 x = gen_rtx_PLUS (SImode, xop0, xop1);
8672 }
8673
8674 /* XXX We don't allow MINUS any more -- see comment in
8675 arm_legitimate_address_outer_p (). */
8676 else if (GET_CODE (x) == MINUS)
8677 {
8678 rtx xop0 = XEXP (x, 0);
8679 rtx xop1 = XEXP (x, 1);
8680
8681 if (CONSTANT_P (xop0))
8682 xop0 = force_reg (SImode, xop0);
8683
8684 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8685 xop1 = force_reg (SImode, xop1);
8686
8687 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8688 x = gen_rtx_MINUS (SImode, xop0, xop1);
8689 }
8690
8691 /* Make sure to take full advantage of the pre-indexed addressing mode
8692 with absolute addresses which often allows for the base register to
8693 be factorized for multiple adjacent memory references, and it might
8694 even allows for the mini pool to be avoided entirely. */
8695 else if (CONST_INT_P (x) && optimize > 0)
8696 {
8697 unsigned int bits;
8698 HOST_WIDE_INT mask, base, index;
8699 rtx base_reg;
8700
8701 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8702 use a 8-bit index. So let's use a 12-bit index for SImode only and
8703 hope that arm_gen_constant will enable ldrb to use more bits. */
8704 bits = (mode == SImode) ? 12 : 8;
8705 mask = (1 << bits) - 1;
8706 base = INTVAL (x) & ~mask;
8707 index = INTVAL (x) & mask;
8708 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8709 {
8710 /* It'll most probably be more efficient to generate the base
8711 with more bits set and use a negative index instead. */
8712 base |= mask;
8713 index -= mask;
8714 }
8715 base_reg = force_reg (SImode, GEN_INT (base));
8716 x = plus_constant (Pmode, base_reg, index);
8717 }
8718
8719 if (flag_pic)
8720 {
8721 /* We need to find and carefully transform any SYMBOL and LABEL
8722 references; so go back to the original address expression. */
8723 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8724
8725 if (new_x != orig_x)
8726 x = new_x;
8727 }
8728
8729 return x;
8730 }
8731
8732
8733 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8734 to be legitimate. If we find one, return the new, valid address. */
8735 rtx
8736 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8737 {
8738 if (GET_CODE (x) == PLUS
8739 && CONST_INT_P (XEXP (x, 1))
8740 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8741 || INTVAL (XEXP (x, 1)) < 0))
8742 {
8743 rtx xop0 = XEXP (x, 0);
8744 rtx xop1 = XEXP (x, 1);
8745 HOST_WIDE_INT offset = INTVAL (xop1);
8746
8747 /* Try and fold the offset into a biasing of the base register and
8748 then offsetting that. Don't do this when optimizing for space
8749 since it can cause too many CSEs. */
8750 if (optimize_size && offset >= 0
8751 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8752 {
8753 HOST_WIDE_INT delta;
8754
8755 if (offset >= 256)
8756 delta = offset - (256 - GET_MODE_SIZE (mode));
8757 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8758 delta = 31 * GET_MODE_SIZE (mode);
8759 else
8760 delta = offset & (~31 * GET_MODE_SIZE (mode));
8761
8762 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8763 NULL_RTX);
8764 x = plus_constant (Pmode, xop0, delta);
8765 }
8766 else if (offset < 0 && offset > -256)
8767 /* Small negative offsets are best done with a subtract before the
8768 dereference, forcing these into a register normally takes two
8769 instructions. */
8770 x = force_operand (x, NULL_RTX);
8771 else
8772 {
8773 /* For the remaining cases, force the constant into a register. */
8774 xop1 = force_reg (SImode, xop1);
8775 x = gen_rtx_PLUS (SImode, xop0, xop1);
8776 }
8777 }
8778 else if (GET_CODE (x) == PLUS
8779 && s_register_operand (XEXP (x, 1), SImode)
8780 && !s_register_operand (XEXP (x, 0), SImode))
8781 {
8782 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8783
8784 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8785 }
8786
8787 if (flag_pic)
8788 {
8789 /* We need to find and carefully transform any SYMBOL and LABEL
8790 references; so go back to the original address expression. */
8791 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8792
8793 if (new_x != orig_x)
8794 x = new_x;
8795 }
8796
8797 return x;
8798 }
8799
8800 /* Return TRUE if X contains any TLS symbol references. */
8801
8802 bool
8803 arm_tls_referenced_p (rtx x)
8804 {
8805 if (! TARGET_HAVE_TLS)
8806 return false;
8807
8808 subrtx_iterator::array_type array;
8809 FOR_EACH_SUBRTX (iter, array, x, ALL)
8810 {
8811 const_rtx x = *iter;
8812 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8813 {
8814 /* ARM currently does not provide relocations to encode TLS variables
8815 into AArch32 instructions, only data, so there is no way to
8816 currently implement these if a literal pool is disabled. */
8817 if (arm_disable_literal_pool)
8818 sorry ("accessing thread-local storage is not currently supported "
8819 "with -mpure-code or -mslow-flash-data");
8820
8821 return true;
8822 }
8823
8824 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8825 TLS offsets, not real symbol references. */
8826 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8827 iter.skip_subrtxes ();
8828 }
8829 return false;
8830 }
8831
8832 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8833
8834 On the ARM, allow any integer (invalid ones are removed later by insn
8835 patterns), nice doubles and symbol_refs which refer to the function's
8836 constant pool XXX.
8837
8838 When generating pic allow anything. */
8839
8840 static bool
8841 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8842 {
8843 return flag_pic || !label_mentioned_p (x);
8844 }
8845
8846 static bool
8847 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8848 {
8849 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8850 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8851 for ARMv8-M Baseline or later the result is valid. */
8852 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8853 x = XEXP (x, 0);
8854
8855 return (CONST_INT_P (x)
8856 || CONST_DOUBLE_P (x)
8857 || CONSTANT_ADDRESS_P (x)
8858 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8859 || flag_pic);
8860 }
8861
8862 static bool
8863 arm_legitimate_constant_p (machine_mode mode, rtx x)
8864 {
8865 return (!arm_cannot_force_const_mem (mode, x)
8866 && (TARGET_32BIT
8867 ? arm_legitimate_constant_p_1 (mode, x)
8868 : thumb_legitimate_constant_p (mode, x)));
8869 }
8870
8871 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8872
8873 static bool
8874 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8875 {
8876 rtx base, offset;
8877
8878 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8879 {
8880 split_const (x, &base, &offset);
8881 if (GET_CODE (base) == SYMBOL_REF
8882 && !offset_within_block_p (base, INTVAL (offset)))
8883 return true;
8884 }
8885 return arm_tls_referenced_p (x);
8886 }
8887 \f
8888 #define REG_OR_SUBREG_REG(X) \
8889 (REG_P (X) \
8890 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8891
8892 #define REG_OR_SUBREG_RTX(X) \
8893 (REG_P (X) ? (X) : SUBREG_REG (X))
8894
8895 static inline int
8896 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8897 {
8898 machine_mode mode = GET_MODE (x);
8899 int total, words;
8900
8901 switch (code)
8902 {
8903 case ASHIFT:
8904 case ASHIFTRT:
8905 case LSHIFTRT:
8906 case ROTATERT:
8907 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8908
8909 case PLUS:
8910 case MINUS:
8911 case COMPARE:
8912 case NEG:
8913 case NOT:
8914 return COSTS_N_INSNS (1);
8915
8916 case MULT:
8917 if (arm_arch6m && arm_m_profile_small_mul)
8918 return COSTS_N_INSNS (32);
8919
8920 if (CONST_INT_P (XEXP (x, 1)))
8921 {
8922 int cycles = 0;
8923 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8924
8925 while (i)
8926 {
8927 i >>= 2;
8928 cycles++;
8929 }
8930 return COSTS_N_INSNS (2) + cycles;
8931 }
8932 return COSTS_N_INSNS (1) + 16;
8933
8934 case SET:
8935 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8936 the mode. */
8937 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8938 return (COSTS_N_INSNS (words)
8939 + 4 * ((MEM_P (SET_SRC (x)))
8940 + MEM_P (SET_DEST (x))));
8941
8942 case CONST_INT:
8943 if (outer == SET)
8944 {
8945 if (UINTVAL (x) < 256
8946 /* 16-bit constant. */
8947 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8948 return 0;
8949 if (thumb_shiftable_const (INTVAL (x)))
8950 return COSTS_N_INSNS (2);
8951 return COSTS_N_INSNS (3);
8952 }
8953 else if ((outer == PLUS || outer == COMPARE)
8954 && INTVAL (x) < 256 && INTVAL (x) > -256)
8955 return 0;
8956 else if ((outer == IOR || outer == XOR || outer == AND)
8957 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8958 return COSTS_N_INSNS (1);
8959 else if (outer == AND)
8960 {
8961 int i;
8962 /* This duplicates the tests in the andsi3 expander. */
8963 for (i = 9; i <= 31; i++)
8964 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8965 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8966 return COSTS_N_INSNS (2);
8967 }
8968 else if (outer == ASHIFT || outer == ASHIFTRT
8969 || outer == LSHIFTRT)
8970 return 0;
8971 return COSTS_N_INSNS (2);
8972
8973 case CONST:
8974 case CONST_DOUBLE:
8975 case LABEL_REF:
8976 case SYMBOL_REF:
8977 return COSTS_N_INSNS (3);
8978
8979 case UDIV:
8980 case UMOD:
8981 case DIV:
8982 case MOD:
8983 return 100;
8984
8985 case TRUNCATE:
8986 return 99;
8987
8988 case AND:
8989 case XOR:
8990 case IOR:
8991 /* XXX guess. */
8992 return 8;
8993
8994 case MEM:
8995 /* XXX another guess. */
8996 /* Memory costs quite a lot for the first word, but subsequent words
8997 load at the equivalent of a single insn each. */
8998 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8999 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9000 ? 4 : 0));
9001
9002 case IF_THEN_ELSE:
9003 /* XXX a guess. */
9004 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9005 return 14;
9006 return 2;
9007
9008 case SIGN_EXTEND:
9009 case ZERO_EXTEND:
9010 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9011 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9012
9013 if (mode == SImode)
9014 return total;
9015
9016 if (arm_arch6)
9017 return total + COSTS_N_INSNS (1);
9018
9019 /* Assume a two-shift sequence. Increase the cost slightly so
9020 we prefer actual shifts over an extend operation. */
9021 return total + 1 + COSTS_N_INSNS (2);
9022
9023 default:
9024 return 99;
9025 }
9026 }
9027
9028 /* Estimates the size cost of thumb1 instructions.
9029 For now most of the code is copied from thumb1_rtx_costs. We need more
9030 fine grain tuning when we have more related test cases. */
9031 static inline int
9032 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9033 {
9034 machine_mode mode = GET_MODE (x);
9035 int words, cost;
9036
9037 switch (code)
9038 {
9039 case ASHIFT:
9040 case ASHIFTRT:
9041 case LSHIFTRT:
9042 case ROTATERT:
9043 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9044
9045 case PLUS:
9046 case MINUS:
9047 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9048 defined by RTL expansion, especially for the expansion of
9049 multiplication. */
9050 if ((GET_CODE (XEXP (x, 0)) == MULT
9051 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9052 || (GET_CODE (XEXP (x, 1)) == MULT
9053 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9054 return COSTS_N_INSNS (2);
9055 /* Fall through. */
9056 case COMPARE:
9057 case NEG:
9058 case NOT:
9059 return COSTS_N_INSNS (1);
9060
9061 case MULT:
9062 if (CONST_INT_P (XEXP (x, 1)))
9063 {
9064 /* Thumb1 mul instruction can't operate on const. We must Load it
9065 into a register first. */
9066 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9067 /* For the targets which have a very small and high-latency multiply
9068 unit, we prefer to synthesize the mult with up to 5 instructions,
9069 giving a good balance between size and performance. */
9070 if (arm_arch6m && arm_m_profile_small_mul)
9071 return COSTS_N_INSNS (5);
9072 else
9073 return COSTS_N_INSNS (1) + const_size;
9074 }
9075 return COSTS_N_INSNS (1);
9076
9077 case SET:
9078 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9079 the mode. */
9080 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9081 cost = COSTS_N_INSNS (words);
9082 if (satisfies_constraint_J (SET_SRC (x))
9083 || satisfies_constraint_K (SET_SRC (x))
9084 /* Too big an immediate for a 2-byte mov, using MOVT. */
9085 || (CONST_INT_P (SET_SRC (x))
9086 && UINTVAL (SET_SRC (x)) >= 256
9087 && TARGET_HAVE_MOVT
9088 && satisfies_constraint_j (SET_SRC (x)))
9089 /* thumb1_movdi_insn. */
9090 || ((words > 1) && MEM_P (SET_SRC (x))))
9091 cost += COSTS_N_INSNS (1);
9092 return cost;
9093
9094 case CONST_INT:
9095 if (outer == SET)
9096 {
9097 if (UINTVAL (x) < 256)
9098 return COSTS_N_INSNS (1);
9099 /* movw is 4byte long. */
9100 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9101 return COSTS_N_INSNS (2);
9102 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9103 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9104 return COSTS_N_INSNS (2);
9105 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9106 if (thumb_shiftable_const (INTVAL (x)))
9107 return COSTS_N_INSNS (2);
9108 return COSTS_N_INSNS (3);
9109 }
9110 else if ((outer == PLUS || outer == COMPARE)
9111 && INTVAL (x) < 256 && INTVAL (x) > -256)
9112 return 0;
9113 else if ((outer == IOR || outer == XOR || outer == AND)
9114 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9115 return COSTS_N_INSNS (1);
9116 else if (outer == AND)
9117 {
9118 int i;
9119 /* This duplicates the tests in the andsi3 expander. */
9120 for (i = 9; i <= 31; i++)
9121 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9122 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9123 return COSTS_N_INSNS (2);
9124 }
9125 else if (outer == ASHIFT || outer == ASHIFTRT
9126 || outer == LSHIFTRT)
9127 return 0;
9128 return COSTS_N_INSNS (2);
9129
9130 case CONST:
9131 case CONST_DOUBLE:
9132 case LABEL_REF:
9133 case SYMBOL_REF:
9134 return COSTS_N_INSNS (3);
9135
9136 case UDIV:
9137 case UMOD:
9138 case DIV:
9139 case MOD:
9140 return 100;
9141
9142 case TRUNCATE:
9143 return 99;
9144
9145 case AND:
9146 case XOR:
9147 case IOR:
9148 return COSTS_N_INSNS (1);
9149
9150 case MEM:
9151 return (COSTS_N_INSNS (1)
9152 + COSTS_N_INSNS (1)
9153 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9154 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9155 ? COSTS_N_INSNS (1) : 0));
9156
9157 case IF_THEN_ELSE:
9158 /* XXX a guess. */
9159 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9160 return 14;
9161 return 2;
9162
9163 case ZERO_EXTEND:
9164 /* XXX still guessing. */
9165 switch (GET_MODE (XEXP (x, 0)))
9166 {
9167 case E_QImode:
9168 return (1 + (mode == DImode ? 4 : 0)
9169 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9170
9171 case E_HImode:
9172 return (4 + (mode == DImode ? 4 : 0)
9173 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9174
9175 case E_SImode:
9176 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9177
9178 default:
9179 return 99;
9180 }
9181
9182 default:
9183 return 99;
9184 }
9185 }
9186
9187 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9188 operand, then return the operand that is being shifted. If the shift
9189 is not by a constant, then set SHIFT_REG to point to the operand.
9190 Return NULL if OP is not a shifter operand. */
9191 static rtx
9192 shifter_op_p (rtx op, rtx *shift_reg)
9193 {
9194 enum rtx_code code = GET_CODE (op);
9195
9196 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9197 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9198 return XEXP (op, 0);
9199 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9200 return XEXP (op, 0);
9201 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9202 || code == ASHIFTRT)
9203 {
9204 if (!CONST_INT_P (XEXP (op, 1)))
9205 *shift_reg = XEXP (op, 1);
9206 return XEXP (op, 0);
9207 }
9208
9209 return NULL;
9210 }
9211
9212 static bool
9213 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9214 {
9215 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9216 rtx_code code = GET_CODE (x);
9217 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9218
9219 switch (XINT (x, 1))
9220 {
9221 case UNSPEC_UNALIGNED_LOAD:
9222 /* We can only do unaligned loads into the integer unit, and we can't
9223 use LDM or LDRD. */
9224 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9225 if (speed_p)
9226 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9227 + extra_cost->ldst.load_unaligned);
9228
9229 #ifdef NOT_YET
9230 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9231 ADDR_SPACE_GENERIC, speed_p);
9232 #endif
9233 return true;
9234
9235 case UNSPEC_UNALIGNED_STORE:
9236 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9237 if (speed_p)
9238 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9239 + extra_cost->ldst.store_unaligned);
9240
9241 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9242 #ifdef NOT_YET
9243 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9244 ADDR_SPACE_GENERIC, speed_p);
9245 #endif
9246 return true;
9247
9248 case UNSPEC_VRINTZ:
9249 case UNSPEC_VRINTP:
9250 case UNSPEC_VRINTM:
9251 case UNSPEC_VRINTR:
9252 case UNSPEC_VRINTX:
9253 case UNSPEC_VRINTA:
9254 if (speed_p)
9255 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9256
9257 return true;
9258 default:
9259 *cost = COSTS_N_INSNS (2);
9260 break;
9261 }
9262 return true;
9263 }
9264
9265 /* Cost of a libcall. We assume one insn per argument, an amount for the
9266 call (one insn for -Os) and then one for processing the result. */
9267 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9268
9269 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9270 do \
9271 { \
9272 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9273 if (shift_op != NULL \
9274 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9275 { \
9276 if (shift_reg) \
9277 { \
9278 if (speed_p) \
9279 *cost += extra_cost->alu.arith_shift_reg; \
9280 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9281 ASHIFT, 1, speed_p); \
9282 } \
9283 else if (speed_p) \
9284 *cost += extra_cost->alu.arith_shift; \
9285 \
9286 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9287 ASHIFT, 0, speed_p) \
9288 + rtx_cost (XEXP (x, 1 - IDX), \
9289 GET_MODE (shift_op), \
9290 OP, 1, speed_p)); \
9291 return true; \
9292 } \
9293 } \
9294 while (0)
9295
9296 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9297 considering the costs of the addressing mode and memory access
9298 separately. */
9299 static bool
9300 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9301 int *cost, bool speed_p)
9302 {
9303 machine_mode mode = GET_MODE (x);
9304
9305 *cost = COSTS_N_INSNS (1);
9306
9307 if (flag_pic
9308 && GET_CODE (XEXP (x, 0)) == PLUS
9309 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9310 /* This will be split into two instructions. Add the cost of the
9311 additional instruction here. The cost of the memory access is computed
9312 below. See arm.md:calculate_pic_address. */
9313 *cost += COSTS_N_INSNS (1);
9314
9315 /* Calculate cost of the addressing mode. */
9316 if (speed_p)
9317 {
9318 arm_addr_mode_op op_type;
9319 switch (GET_CODE (XEXP (x, 0)))
9320 {
9321 default:
9322 case REG:
9323 op_type = AMO_DEFAULT;
9324 break;
9325 case MINUS:
9326 /* MINUS does not appear in RTL, but the architecture supports it,
9327 so handle this case defensively. */
9328 /* fall through */
9329 case PLUS:
9330 op_type = AMO_NO_WB;
9331 break;
9332 case PRE_INC:
9333 case PRE_DEC:
9334 case POST_INC:
9335 case POST_DEC:
9336 case PRE_MODIFY:
9337 case POST_MODIFY:
9338 op_type = AMO_WB;
9339 break;
9340 }
9341
9342 if (VECTOR_MODE_P (mode))
9343 *cost += current_tune->addr_mode_costs->vector[op_type];
9344 else if (FLOAT_MODE_P (mode))
9345 *cost += current_tune->addr_mode_costs->fp[op_type];
9346 else
9347 *cost += current_tune->addr_mode_costs->integer[op_type];
9348 }
9349
9350 /* Calculate cost of memory access. */
9351 if (speed_p)
9352 {
9353 if (FLOAT_MODE_P (mode))
9354 {
9355 if (GET_MODE_SIZE (mode) == 8)
9356 *cost += extra_cost->ldst.loadd;
9357 else
9358 *cost += extra_cost->ldst.loadf;
9359 }
9360 else if (VECTOR_MODE_P (mode))
9361 *cost += extra_cost->ldst.loadv;
9362 else
9363 {
9364 /* Integer modes */
9365 if (GET_MODE_SIZE (mode) == 8)
9366 *cost += extra_cost->ldst.ldrd;
9367 else
9368 *cost += extra_cost->ldst.load;
9369 }
9370 }
9371
9372 return true;
9373 }
9374
9375 /* RTX costs. Make an estimate of the cost of executing the operation
9376 X, which is contained within an operation with code OUTER_CODE.
9377 SPEED_P indicates whether the cost desired is the performance cost,
9378 or the size cost. The estimate is stored in COST and the return
9379 value is TRUE if the cost calculation is final, or FALSE if the
9380 caller should recurse through the operands of X to add additional
9381 costs.
9382
9383 We currently make no attempt to model the size savings of Thumb-2
9384 16-bit instructions. At the normal points in compilation where
9385 this code is called we have no measure of whether the condition
9386 flags are live or not, and thus no realistic way to determine what
9387 the size will eventually be. */
9388 static bool
9389 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9390 const struct cpu_cost_table *extra_cost,
9391 int *cost, bool speed_p)
9392 {
9393 machine_mode mode = GET_MODE (x);
9394
9395 *cost = COSTS_N_INSNS (1);
9396
9397 if (TARGET_THUMB1)
9398 {
9399 if (speed_p)
9400 *cost = thumb1_rtx_costs (x, code, outer_code);
9401 else
9402 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9403 return true;
9404 }
9405
9406 switch (code)
9407 {
9408 case SET:
9409 *cost = 0;
9410 /* SET RTXs don't have a mode so we get it from the destination. */
9411 mode = GET_MODE (SET_DEST (x));
9412
9413 if (REG_P (SET_SRC (x))
9414 && REG_P (SET_DEST (x)))
9415 {
9416 /* Assume that most copies can be done with a single insn,
9417 unless we don't have HW FP, in which case everything
9418 larger than word mode will require two insns. */
9419 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9420 && GET_MODE_SIZE (mode) > 4)
9421 || mode == DImode)
9422 ? 2 : 1);
9423 /* Conditional register moves can be encoded
9424 in 16 bits in Thumb mode. */
9425 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9426 *cost >>= 1;
9427
9428 return true;
9429 }
9430
9431 if (CONST_INT_P (SET_SRC (x)))
9432 {
9433 /* Handle CONST_INT here, since the value doesn't have a mode
9434 and we would otherwise be unable to work out the true cost. */
9435 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9436 0, speed_p);
9437 outer_code = SET;
9438 /* Slightly lower the cost of setting a core reg to a constant.
9439 This helps break up chains and allows for better scheduling. */
9440 if (REG_P (SET_DEST (x))
9441 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9442 *cost -= 1;
9443 x = SET_SRC (x);
9444 /* Immediate moves with an immediate in the range [0, 255] can be
9445 encoded in 16 bits in Thumb mode. */
9446 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9447 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9448 *cost >>= 1;
9449 goto const_int_cost;
9450 }
9451
9452 return false;
9453
9454 case MEM:
9455 return arm_mem_costs (x, extra_cost, cost, speed_p);
9456
9457 case PARALLEL:
9458 {
9459 /* Calculations of LDM costs are complex. We assume an initial cost
9460 (ldm_1st) which will load the number of registers mentioned in
9461 ldm_regs_per_insn_1st registers; then each additional
9462 ldm_regs_per_insn_subsequent registers cost one more insn. The
9463 formula for N regs is thus:
9464
9465 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9466 + ldm_regs_per_insn_subsequent - 1)
9467 / ldm_regs_per_insn_subsequent).
9468
9469 Additional costs may also be added for addressing. A similar
9470 formula is used for STM. */
9471
9472 bool is_ldm = load_multiple_operation (x, SImode);
9473 bool is_stm = store_multiple_operation (x, SImode);
9474
9475 if (is_ldm || is_stm)
9476 {
9477 if (speed_p)
9478 {
9479 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9480 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9481 ? extra_cost->ldst.ldm_regs_per_insn_1st
9482 : extra_cost->ldst.stm_regs_per_insn_1st;
9483 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9484 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9485 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9486
9487 *cost += regs_per_insn_1st
9488 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9489 + regs_per_insn_sub - 1)
9490 / regs_per_insn_sub);
9491 return true;
9492 }
9493
9494 }
9495 return false;
9496 }
9497 case DIV:
9498 case UDIV:
9499 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9500 && (mode == SFmode || !TARGET_VFP_SINGLE))
9501 *cost += COSTS_N_INSNS (speed_p
9502 ? extra_cost->fp[mode != SFmode].div : 0);
9503 else if (mode == SImode && TARGET_IDIV)
9504 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9505 else
9506 *cost = LIBCALL_COST (2);
9507
9508 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9509 possible udiv is prefered. */
9510 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9511 return false; /* All arguments must be in registers. */
9512
9513 case MOD:
9514 /* MOD by a power of 2 can be expanded as:
9515 rsbs r1, r0, #0
9516 and r0, r0, #(n - 1)
9517 and r1, r1, #(n - 1)
9518 rsbpl r0, r1, #0. */
9519 if (CONST_INT_P (XEXP (x, 1))
9520 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9521 && mode == SImode)
9522 {
9523 *cost += COSTS_N_INSNS (3);
9524
9525 if (speed_p)
9526 *cost += 2 * extra_cost->alu.logical
9527 + extra_cost->alu.arith;
9528 return true;
9529 }
9530
9531 /* Fall-through. */
9532 case UMOD:
9533 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9534 possible udiv is prefered. */
9535 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9536 return false; /* All arguments must be in registers. */
9537
9538 case ROTATE:
9539 if (mode == SImode && REG_P (XEXP (x, 1)))
9540 {
9541 *cost += (COSTS_N_INSNS (1)
9542 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9543 if (speed_p)
9544 *cost += extra_cost->alu.shift_reg;
9545 return true;
9546 }
9547 /* Fall through */
9548 case ROTATERT:
9549 case ASHIFT:
9550 case LSHIFTRT:
9551 case ASHIFTRT:
9552 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9553 {
9554 *cost += (COSTS_N_INSNS (2)
9555 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9556 if (speed_p)
9557 *cost += 2 * extra_cost->alu.shift;
9558 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9559 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9560 *cost += 1;
9561 return true;
9562 }
9563 else if (mode == SImode)
9564 {
9565 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9566 /* Slightly disparage register shifts at -Os, but not by much. */
9567 if (!CONST_INT_P (XEXP (x, 1)))
9568 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9569 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9570 return true;
9571 }
9572 else if (GET_MODE_CLASS (mode) == MODE_INT
9573 && GET_MODE_SIZE (mode) < 4)
9574 {
9575 if (code == ASHIFT)
9576 {
9577 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9578 /* Slightly disparage register shifts at -Os, but not by
9579 much. */
9580 if (!CONST_INT_P (XEXP (x, 1)))
9581 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9582 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9583 }
9584 else if (code == LSHIFTRT || code == ASHIFTRT)
9585 {
9586 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9587 {
9588 /* Can use SBFX/UBFX. */
9589 if (speed_p)
9590 *cost += extra_cost->alu.bfx;
9591 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9592 }
9593 else
9594 {
9595 *cost += COSTS_N_INSNS (1);
9596 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9597 if (speed_p)
9598 {
9599 if (CONST_INT_P (XEXP (x, 1)))
9600 *cost += 2 * extra_cost->alu.shift;
9601 else
9602 *cost += (extra_cost->alu.shift
9603 + extra_cost->alu.shift_reg);
9604 }
9605 else
9606 /* Slightly disparage register shifts. */
9607 *cost += !CONST_INT_P (XEXP (x, 1));
9608 }
9609 }
9610 else /* Rotates. */
9611 {
9612 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9613 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9614 if (speed_p)
9615 {
9616 if (CONST_INT_P (XEXP (x, 1)))
9617 *cost += (2 * extra_cost->alu.shift
9618 + extra_cost->alu.log_shift);
9619 else
9620 *cost += (extra_cost->alu.shift
9621 + extra_cost->alu.shift_reg
9622 + extra_cost->alu.log_shift_reg);
9623 }
9624 }
9625 return true;
9626 }
9627
9628 *cost = LIBCALL_COST (2);
9629 return false;
9630
9631 case BSWAP:
9632 if (arm_arch6)
9633 {
9634 if (mode == SImode)
9635 {
9636 if (speed_p)
9637 *cost += extra_cost->alu.rev;
9638
9639 return false;
9640 }
9641 }
9642 else
9643 {
9644 /* No rev instruction available. Look at arm_legacy_rev
9645 and thumb_legacy_rev for the form of RTL used then. */
9646 if (TARGET_THUMB)
9647 {
9648 *cost += COSTS_N_INSNS (9);
9649
9650 if (speed_p)
9651 {
9652 *cost += 6 * extra_cost->alu.shift;
9653 *cost += 3 * extra_cost->alu.logical;
9654 }
9655 }
9656 else
9657 {
9658 *cost += COSTS_N_INSNS (4);
9659
9660 if (speed_p)
9661 {
9662 *cost += 2 * extra_cost->alu.shift;
9663 *cost += extra_cost->alu.arith_shift;
9664 *cost += 2 * extra_cost->alu.logical;
9665 }
9666 }
9667 return true;
9668 }
9669 return false;
9670
9671 case MINUS:
9672 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9673 && (mode == SFmode || !TARGET_VFP_SINGLE))
9674 {
9675 if (GET_CODE (XEXP (x, 0)) == MULT
9676 || GET_CODE (XEXP (x, 1)) == MULT)
9677 {
9678 rtx mul_op0, mul_op1, sub_op;
9679
9680 if (speed_p)
9681 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9682
9683 if (GET_CODE (XEXP (x, 0)) == MULT)
9684 {
9685 mul_op0 = XEXP (XEXP (x, 0), 0);
9686 mul_op1 = XEXP (XEXP (x, 0), 1);
9687 sub_op = XEXP (x, 1);
9688 }
9689 else
9690 {
9691 mul_op0 = XEXP (XEXP (x, 1), 0);
9692 mul_op1 = XEXP (XEXP (x, 1), 1);
9693 sub_op = XEXP (x, 0);
9694 }
9695
9696 /* The first operand of the multiply may be optionally
9697 negated. */
9698 if (GET_CODE (mul_op0) == NEG)
9699 mul_op0 = XEXP (mul_op0, 0);
9700
9701 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9702 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9703 + rtx_cost (sub_op, mode, code, 0, speed_p));
9704
9705 return true;
9706 }
9707
9708 if (speed_p)
9709 *cost += extra_cost->fp[mode != SFmode].addsub;
9710 return false;
9711 }
9712
9713 if (mode == SImode)
9714 {
9715 rtx shift_by_reg = NULL;
9716 rtx shift_op;
9717 rtx non_shift_op;
9718
9719 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9720 if (shift_op == NULL)
9721 {
9722 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9723 non_shift_op = XEXP (x, 0);
9724 }
9725 else
9726 non_shift_op = XEXP (x, 1);
9727
9728 if (shift_op != NULL)
9729 {
9730 if (shift_by_reg != NULL)
9731 {
9732 if (speed_p)
9733 *cost += extra_cost->alu.arith_shift_reg;
9734 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9735 }
9736 else if (speed_p)
9737 *cost += extra_cost->alu.arith_shift;
9738
9739 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9740 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9741 return true;
9742 }
9743
9744 if (arm_arch_thumb2
9745 && GET_CODE (XEXP (x, 1)) == MULT)
9746 {
9747 /* MLS. */
9748 if (speed_p)
9749 *cost += extra_cost->mult[0].add;
9750 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9751 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9752 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9753 return true;
9754 }
9755
9756 if (CONST_INT_P (XEXP (x, 0)))
9757 {
9758 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9759 INTVAL (XEXP (x, 0)), NULL_RTX,
9760 NULL_RTX, 1, 0);
9761 *cost = COSTS_N_INSNS (insns);
9762 if (speed_p)
9763 *cost += insns * extra_cost->alu.arith;
9764 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9765 return true;
9766 }
9767 else if (speed_p)
9768 *cost += extra_cost->alu.arith;
9769
9770 return false;
9771 }
9772
9773 if (GET_MODE_CLASS (mode) == MODE_INT
9774 && GET_MODE_SIZE (mode) < 4)
9775 {
9776 rtx shift_op, shift_reg;
9777 shift_reg = NULL;
9778
9779 /* We check both sides of the MINUS for shifter operands since,
9780 unlike PLUS, it's not commutative. */
9781
9782 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9783 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9784
9785 /* Slightly disparage, as we might need to widen the result. */
9786 *cost += 1;
9787 if (speed_p)
9788 *cost += extra_cost->alu.arith;
9789
9790 if (CONST_INT_P (XEXP (x, 0)))
9791 {
9792 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9793 return true;
9794 }
9795
9796 return false;
9797 }
9798
9799 if (mode == DImode)
9800 {
9801 *cost += COSTS_N_INSNS (1);
9802
9803 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9804 {
9805 rtx op1 = XEXP (x, 1);
9806
9807 if (speed_p)
9808 *cost += 2 * extra_cost->alu.arith;
9809
9810 if (GET_CODE (op1) == ZERO_EXTEND)
9811 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9812 0, speed_p);
9813 else
9814 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9815 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9816 0, speed_p);
9817 return true;
9818 }
9819 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9820 {
9821 if (speed_p)
9822 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9823 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9824 0, speed_p)
9825 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9826 return true;
9827 }
9828 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9829 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9830 {
9831 if (speed_p)
9832 *cost += (extra_cost->alu.arith
9833 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9834 ? extra_cost->alu.arith
9835 : extra_cost->alu.arith_shift));
9836 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9837 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9838 GET_CODE (XEXP (x, 1)), 0, speed_p));
9839 return true;
9840 }
9841
9842 if (speed_p)
9843 *cost += 2 * extra_cost->alu.arith;
9844 return false;
9845 }
9846
9847 /* Vector mode? */
9848
9849 *cost = LIBCALL_COST (2);
9850 return false;
9851
9852 case PLUS:
9853 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9854 && (mode == SFmode || !TARGET_VFP_SINGLE))
9855 {
9856 if (GET_CODE (XEXP (x, 0)) == MULT)
9857 {
9858 rtx mul_op0, mul_op1, add_op;
9859
9860 if (speed_p)
9861 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9862
9863 mul_op0 = XEXP (XEXP (x, 0), 0);
9864 mul_op1 = XEXP (XEXP (x, 0), 1);
9865 add_op = XEXP (x, 1);
9866
9867 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9868 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9869 + rtx_cost (add_op, mode, code, 0, speed_p));
9870
9871 return true;
9872 }
9873
9874 if (speed_p)
9875 *cost += extra_cost->fp[mode != SFmode].addsub;
9876 return false;
9877 }
9878 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9879 {
9880 *cost = LIBCALL_COST (2);
9881 return false;
9882 }
9883
9884 /* Narrow modes can be synthesized in SImode, but the range
9885 of useful sub-operations is limited. Check for shift operations
9886 on one of the operands. Only left shifts can be used in the
9887 narrow modes. */
9888 if (GET_MODE_CLASS (mode) == MODE_INT
9889 && GET_MODE_SIZE (mode) < 4)
9890 {
9891 rtx shift_op, shift_reg;
9892 shift_reg = NULL;
9893
9894 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9895
9896 if (CONST_INT_P (XEXP (x, 1)))
9897 {
9898 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9899 INTVAL (XEXP (x, 1)), NULL_RTX,
9900 NULL_RTX, 1, 0);
9901 *cost = COSTS_N_INSNS (insns);
9902 if (speed_p)
9903 *cost += insns * extra_cost->alu.arith;
9904 /* Slightly penalize a narrow operation as the result may
9905 need widening. */
9906 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9907 return true;
9908 }
9909
9910 /* Slightly penalize a narrow operation as the result may
9911 need widening. */
9912 *cost += 1;
9913 if (speed_p)
9914 *cost += extra_cost->alu.arith;
9915
9916 return false;
9917 }
9918
9919 if (mode == SImode)
9920 {
9921 rtx shift_op, shift_reg;
9922
9923 if (TARGET_INT_SIMD
9924 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9925 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9926 {
9927 /* UXTA[BH] or SXTA[BH]. */
9928 if (speed_p)
9929 *cost += extra_cost->alu.extend_arith;
9930 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9931 0, speed_p)
9932 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9933 return true;
9934 }
9935
9936 shift_reg = NULL;
9937 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9938 if (shift_op != NULL)
9939 {
9940 if (shift_reg)
9941 {
9942 if (speed_p)
9943 *cost += extra_cost->alu.arith_shift_reg;
9944 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9945 }
9946 else if (speed_p)
9947 *cost += extra_cost->alu.arith_shift;
9948
9949 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9950 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9951 return true;
9952 }
9953 if (GET_CODE (XEXP (x, 0)) == MULT)
9954 {
9955 rtx mul_op = XEXP (x, 0);
9956
9957 if (TARGET_DSP_MULTIPLY
9958 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9959 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9960 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9961 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9962 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9963 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9964 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9965 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9966 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9967 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9968 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9969 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9970 == 16))))))
9971 {
9972 /* SMLA[BT][BT]. */
9973 if (speed_p)
9974 *cost += extra_cost->mult[0].extend_add;
9975 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9976 SIGN_EXTEND, 0, speed_p)
9977 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9978 SIGN_EXTEND, 0, speed_p)
9979 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9980 return true;
9981 }
9982
9983 if (speed_p)
9984 *cost += extra_cost->mult[0].add;
9985 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9986 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9987 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9988 return true;
9989 }
9990 if (CONST_INT_P (XEXP (x, 1)))
9991 {
9992 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9993 INTVAL (XEXP (x, 1)), NULL_RTX,
9994 NULL_RTX, 1, 0);
9995 *cost = COSTS_N_INSNS (insns);
9996 if (speed_p)
9997 *cost += insns * extra_cost->alu.arith;
9998 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9999 return true;
10000 }
10001 else if (speed_p)
10002 *cost += extra_cost->alu.arith;
10003
10004 return false;
10005 }
10006
10007 if (mode == DImode)
10008 {
10009 if (arm_arch3m
10010 && GET_CODE (XEXP (x, 0)) == MULT
10011 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10012 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10013 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10014 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10015 {
10016 if (speed_p)
10017 *cost += extra_cost->mult[1].extend_add;
10018 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10019 ZERO_EXTEND, 0, speed_p)
10020 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10021 ZERO_EXTEND, 0, speed_p)
10022 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10023 return true;
10024 }
10025
10026 *cost += COSTS_N_INSNS (1);
10027
10028 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10029 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10030 {
10031 if (speed_p)
10032 *cost += (extra_cost->alu.arith
10033 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10034 ? extra_cost->alu.arith
10035 : extra_cost->alu.arith_shift));
10036
10037 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10038 0, speed_p)
10039 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10040 return true;
10041 }
10042
10043 if (speed_p)
10044 *cost += 2 * extra_cost->alu.arith;
10045 return false;
10046 }
10047
10048 /* Vector mode? */
10049 *cost = LIBCALL_COST (2);
10050 return false;
10051 case IOR:
10052 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10053 {
10054 if (speed_p)
10055 *cost += extra_cost->alu.rev;
10056
10057 return true;
10058 }
10059 /* Fall through. */
10060 case AND: case XOR:
10061 if (mode == SImode)
10062 {
10063 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10064 rtx op0 = XEXP (x, 0);
10065 rtx shift_op, shift_reg;
10066
10067 if (subcode == NOT
10068 && (code == AND
10069 || (code == IOR && TARGET_THUMB2)))
10070 op0 = XEXP (op0, 0);
10071
10072 shift_reg = NULL;
10073 shift_op = shifter_op_p (op0, &shift_reg);
10074 if (shift_op != NULL)
10075 {
10076 if (shift_reg)
10077 {
10078 if (speed_p)
10079 *cost += extra_cost->alu.log_shift_reg;
10080 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10081 }
10082 else if (speed_p)
10083 *cost += extra_cost->alu.log_shift;
10084
10085 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10086 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10087 return true;
10088 }
10089
10090 if (CONST_INT_P (XEXP (x, 1)))
10091 {
10092 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10093 INTVAL (XEXP (x, 1)), NULL_RTX,
10094 NULL_RTX, 1, 0);
10095
10096 *cost = COSTS_N_INSNS (insns);
10097 if (speed_p)
10098 *cost += insns * extra_cost->alu.logical;
10099 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10100 return true;
10101 }
10102
10103 if (speed_p)
10104 *cost += extra_cost->alu.logical;
10105 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10106 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10107 return true;
10108 }
10109
10110 if (mode == DImode)
10111 {
10112 rtx op0 = XEXP (x, 0);
10113 enum rtx_code subcode = GET_CODE (op0);
10114
10115 *cost += COSTS_N_INSNS (1);
10116
10117 if (subcode == NOT
10118 && (code == AND
10119 || (code == IOR && TARGET_THUMB2)))
10120 op0 = XEXP (op0, 0);
10121
10122 if (GET_CODE (op0) == ZERO_EXTEND)
10123 {
10124 if (speed_p)
10125 *cost += 2 * extra_cost->alu.logical;
10126
10127 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10128 0, speed_p)
10129 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10130 return true;
10131 }
10132 else if (GET_CODE (op0) == SIGN_EXTEND)
10133 {
10134 if (speed_p)
10135 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10136
10137 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10138 0, speed_p)
10139 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10140 return true;
10141 }
10142
10143 if (speed_p)
10144 *cost += 2 * extra_cost->alu.logical;
10145
10146 return true;
10147 }
10148 /* Vector mode? */
10149
10150 *cost = LIBCALL_COST (2);
10151 return false;
10152
10153 case MULT:
10154 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10155 && (mode == SFmode || !TARGET_VFP_SINGLE))
10156 {
10157 rtx op0 = XEXP (x, 0);
10158
10159 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10160 op0 = XEXP (op0, 0);
10161
10162 if (speed_p)
10163 *cost += extra_cost->fp[mode != SFmode].mult;
10164
10165 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10166 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10167 return true;
10168 }
10169 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10170 {
10171 *cost = LIBCALL_COST (2);
10172 return false;
10173 }
10174
10175 if (mode == SImode)
10176 {
10177 if (TARGET_DSP_MULTIPLY
10178 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10179 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10180 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10181 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10182 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10183 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10184 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10185 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10186 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10187 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10188 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10189 && (INTVAL (XEXP (XEXP (x, 1), 1))
10190 == 16))))))
10191 {
10192 /* SMUL[TB][TB]. */
10193 if (speed_p)
10194 *cost += extra_cost->mult[0].extend;
10195 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10196 SIGN_EXTEND, 0, speed_p);
10197 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10198 SIGN_EXTEND, 1, speed_p);
10199 return true;
10200 }
10201 if (speed_p)
10202 *cost += extra_cost->mult[0].simple;
10203 return false;
10204 }
10205
10206 if (mode == DImode)
10207 {
10208 if (arm_arch3m
10209 && ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10210 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10211 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10212 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)))
10213 {
10214 if (speed_p)
10215 *cost += extra_cost->mult[1].extend;
10216 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10217 ZERO_EXTEND, 0, speed_p)
10218 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10219 ZERO_EXTEND, 0, speed_p));
10220 return true;
10221 }
10222
10223 *cost = LIBCALL_COST (2);
10224 return false;
10225 }
10226
10227 /* Vector mode? */
10228 *cost = LIBCALL_COST (2);
10229 return false;
10230
10231 case NEG:
10232 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10233 && (mode == SFmode || !TARGET_VFP_SINGLE))
10234 {
10235 if (GET_CODE (XEXP (x, 0)) == MULT)
10236 {
10237 /* VNMUL. */
10238 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10239 return true;
10240 }
10241
10242 if (speed_p)
10243 *cost += extra_cost->fp[mode != SFmode].neg;
10244
10245 return false;
10246 }
10247 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10248 {
10249 *cost = LIBCALL_COST (1);
10250 return false;
10251 }
10252
10253 if (mode == SImode)
10254 {
10255 if (GET_CODE (XEXP (x, 0)) == ABS)
10256 {
10257 *cost += COSTS_N_INSNS (1);
10258 /* Assume the non-flag-changing variant. */
10259 if (speed_p)
10260 *cost += (extra_cost->alu.log_shift
10261 + extra_cost->alu.arith_shift);
10262 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10263 return true;
10264 }
10265
10266 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10267 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10268 {
10269 *cost += COSTS_N_INSNS (1);
10270 /* No extra cost for MOV imm and MVN imm. */
10271 /* If the comparison op is using the flags, there's no further
10272 cost, otherwise we need to add the cost of the comparison. */
10273 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10274 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10275 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10276 {
10277 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10278 *cost += (COSTS_N_INSNS (1)
10279 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10280 0, speed_p)
10281 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10282 1, speed_p));
10283 if (speed_p)
10284 *cost += extra_cost->alu.arith;
10285 }
10286 return true;
10287 }
10288
10289 if (speed_p)
10290 *cost += extra_cost->alu.arith;
10291 return false;
10292 }
10293
10294 if (GET_MODE_CLASS (mode) == MODE_INT
10295 && GET_MODE_SIZE (mode) < 4)
10296 {
10297 /* Slightly disparage, as we might need an extend operation. */
10298 *cost += 1;
10299 if (speed_p)
10300 *cost += extra_cost->alu.arith;
10301 return false;
10302 }
10303
10304 if (mode == DImode)
10305 {
10306 *cost += COSTS_N_INSNS (1);
10307 if (speed_p)
10308 *cost += 2 * extra_cost->alu.arith;
10309 return false;
10310 }
10311
10312 /* Vector mode? */
10313 *cost = LIBCALL_COST (1);
10314 return false;
10315
10316 case NOT:
10317 if (mode == SImode)
10318 {
10319 rtx shift_op;
10320 rtx shift_reg = NULL;
10321
10322 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10323
10324 if (shift_op)
10325 {
10326 if (shift_reg != NULL)
10327 {
10328 if (speed_p)
10329 *cost += extra_cost->alu.log_shift_reg;
10330 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10331 }
10332 else if (speed_p)
10333 *cost += extra_cost->alu.log_shift;
10334 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10335 return true;
10336 }
10337
10338 if (speed_p)
10339 *cost += extra_cost->alu.logical;
10340 return false;
10341 }
10342 if (mode == DImode)
10343 {
10344 *cost += COSTS_N_INSNS (1);
10345 return false;
10346 }
10347
10348 /* Vector mode? */
10349
10350 *cost += LIBCALL_COST (1);
10351 return false;
10352
10353 case IF_THEN_ELSE:
10354 {
10355 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10356 {
10357 *cost += COSTS_N_INSNS (3);
10358 return true;
10359 }
10360 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10361 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10362
10363 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10364 /* Assume that if one arm of the if_then_else is a register,
10365 that it will be tied with the result and eliminate the
10366 conditional insn. */
10367 if (REG_P (XEXP (x, 1)))
10368 *cost += op2cost;
10369 else if (REG_P (XEXP (x, 2)))
10370 *cost += op1cost;
10371 else
10372 {
10373 if (speed_p)
10374 {
10375 if (extra_cost->alu.non_exec_costs_exec)
10376 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10377 else
10378 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10379 }
10380 else
10381 *cost += op1cost + op2cost;
10382 }
10383 }
10384 return true;
10385
10386 case COMPARE:
10387 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10388 *cost = 0;
10389 else
10390 {
10391 machine_mode op0mode;
10392 /* We'll mostly assume that the cost of a compare is the cost of the
10393 LHS. However, there are some notable exceptions. */
10394
10395 /* Floating point compares are never done as side-effects. */
10396 op0mode = GET_MODE (XEXP (x, 0));
10397 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10398 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10399 {
10400 if (speed_p)
10401 *cost += extra_cost->fp[op0mode != SFmode].compare;
10402
10403 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10404 {
10405 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10406 return true;
10407 }
10408
10409 return false;
10410 }
10411 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10412 {
10413 *cost = LIBCALL_COST (2);
10414 return false;
10415 }
10416
10417 /* DImode compares normally take two insns. */
10418 if (op0mode == DImode)
10419 {
10420 *cost += COSTS_N_INSNS (1);
10421 if (speed_p)
10422 *cost += 2 * extra_cost->alu.arith;
10423 return false;
10424 }
10425
10426 if (op0mode == SImode)
10427 {
10428 rtx shift_op;
10429 rtx shift_reg;
10430
10431 if (XEXP (x, 1) == const0_rtx
10432 && !(REG_P (XEXP (x, 0))
10433 || (GET_CODE (XEXP (x, 0)) == SUBREG
10434 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10435 {
10436 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10437
10438 /* Multiply operations that set the flags are often
10439 significantly more expensive. */
10440 if (speed_p
10441 && GET_CODE (XEXP (x, 0)) == MULT
10442 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10443 *cost += extra_cost->mult[0].flag_setting;
10444
10445 if (speed_p
10446 && GET_CODE (XEXP (x, 0)) == PLUS
10447 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10448 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10449 0), 1), mode))
10450 *cost += extra_cost->mult[0].flag_setting;
10451 return true;
10452 }
10453
10454 shift_reg = NULL;
10455 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10456 if (shift_op != NULL)
10457 {
10458 if (shift_reg != NULL)
10459 {
10460 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10461 1, speed_p);
10462 if (speed_p)
10463 *cost += extra_cost->alu.arith_shift_reg;
10464 }
10465 else if (speed_p)
10466 *cost += extra_cost->alu.arith_shift;
10467 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10468 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10469 return true;
10470 }
10471
10472 if (speed_p)
10473 *cost += extra_cost->alu.arith;
10474 if (CONST_INT_P (XEXP (x, 1))
10475 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10476 {
10477 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10478 return true;
10479 }
10480 return false;
10481 }
10482
10483 /* Vector mode? */
10484
10485 *cost = LIBCALL_COST (2);
10486 return false;
10487 }
10488 return true;
10489
10490 case EQ:
10491 case NE:
10492 case LT:
10493 case LE:
10494 case GT:
10495 case GE:
10496 case LTU:
10497 case LEU:
10498 case GEU:
10499 case GTU:
10500 case ORDERED:
10501 case UNORDERED:
10502 case UNEQ:
10503 case UNLE:
10504 case UNLT:
10505 case UNGE:
10506 case UNGT:
10507 case LTGT:
10508 if (outer_code == SET)
10509 {
10510 /* Is it a store-flag operation? */
10511 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10512 && XEXP (x, 1) == const0_rtx)
10513 {
10514 /* Thumb also needs an IT insn. */
10515 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10516 return true;
10517 }
10518 if (XEXP (x, 1) == const0_rtx)
10519 {
10520 switch (code)
10521 {
10522 case LT:
10523 /* LSR Rd, Rn, #31. */
10524 if (speed_p)
10525 *cost += extra_cost->alu.shift;
10526 break;
10527
10528 case EQ:
10529 /* RSBS T1, Rn, #0
10530 ADC Rd, Rn, T1. */
10531
10532 case NE:
10533 /* SUBS T1, Rn, #1
10534 SBC Rd, Rn, T1. */
10535 *cost += COSTS_N_INSNS (1);
10536 break;
10537
10538 case LE:
10539 /* RSBS T1, Rn, Rn, LSR #31
10540 ADC Rd, Rn, T1. */
10541 *cost += COSTS_N_INSNS (1);
10542 if (speed_p)
10543 *cost += extra_cost->alu.arith_shift;
10544 break;
10545
10546 case GT:
10547 /* RSB Rd, Rn, Rn, ASR #1
10548 LSR Rd, Rd, #31. */
10549 *cost += COSTS_N_INSNS (1);
10550 if (speed_p)
10551 *cost += (extra_cost->alu.arith_shift
10552 + extra_cost->alu.shift);
10553 break;
10554
10555 case GE:
10556 /* ASR Rd, Rn, #31
10557 ADD Rd, Rn, #1. */
10558 *cost += COSTS_N_INSNS (1);
10559 if (speed_p)
10560 *cost += extra_cost->alu.shift;
10561 break;
10562
10563 default:
10564 /* Remaining cases are either meaningless or would take
10565 three insns anyway. */
10566 *cost = COSTS_N_INSNS (3);
10567 break;
10568 }
10569 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10570 return true;
10571 }
10572 else
10573 {
10574 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10575 if (CONST_INT_P (XEXP (x, 1))
10576 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10577 {
10578 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10579 return true;
10580 }
10581
10582 return false;
10583 }
10584 }
10585 /* Not directly inside a set. If it involves the condition code
10586 register it must be the condition for a branch, cond_exec or
10587 I_T_E operation. Since the comparison is performed elsewhere
10588 this is just the control part which has no additional
10589 cost. */
10590 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10591 && XEXP (x, 1) == const0_rtx)
10592 {
10593 *cost = 0;
10594 return true;
10595 }
10596 return false;
10597
10598 case ABS:
10599 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10600 && (mode == SFmode || !TARGET_VFP_SINGLE))
10601 {
10602 if (speed_p)
10603 *cost += extra_cost->fp[mode != SFmode].neg;
10604
10605 return false;
10606 }
10607 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10608 {
10609 *cost = LIBCALL_COST (1);
10610 return false;
10611 }
10612
10613 if (mode == SImode)
10614 {
10615 if (speed_p)
10616 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10617 return false;
10618 }
10619 /* Vector mode? */
10620 *cost = LIBCALL_COST (1);
10621 return false;
10622
10623 case SIGN_EXTEND:
10624 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10625 && MEM_P (XEXP (x, 0)))
10626 {
10627 if (mode == DImode)
10628 *cost += COSTS_N_INSNS (1);
10629
10630 if (!speed_p)
10631 return true;
10632
10633 if (GET_MODE (XEXP (x, 0)) == SImode)
10634 *cost += extra_cost->ldst.load;
10635 else
10636 *cost += extra_cost->ldst.load_sign_extend;
10637
10638 if (mode == DImode)
10639 *cost += extra_cost->alu.shift;
10640
10641 return true;
10642 }
10643
10644 /* Widening from less than 32-bits requires an extend operation. */
10645 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10646 {
10647 /* We have SXTB/SXTH. */
10648 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10649 if (speed_p)
10650 *cost += extra_cost->alu.extend;
10651 }
10652 else if (GET_MODE (XEXP (x, 0)) != SImode)
10653 {
10654 /* Needs two shifts. */
10655 *cost += COSTS_N_INSNS (1);
10656 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10657 if (speed_p)
10658 *cost += 2 * extra_cost->alu.shift;
10659 }
10660
10661 /* Widening beyond 32-bits requires one more insn. */
10662 if (mode == DImode)
10663 {
10664 *cost += COSTS_N_INSNS (1);
10665 if (speed_p)
10666 *cost += extra_cost->alu.shift;
10667 }
10668
10669 return true;
10670
10671 case ZERO_EXTEND:
10672 if ((arm_arch4
10673 || GET_MODE (XEXP (x, 0)) == SImode
10674 || GET_MODE (XEXP (x, 0)) == QImode)
10675 && MEM_P (XEXP (x, 0)))
10676 {
10677 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10678
10679 if (mode == DImode)
10680 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10681
10682 return true;
10683 }
10684
10685 /* Widening from less than 32-bits requires an extend operation. */
10686 if (GET_MODE (XEXP (x, 0)) == QImode)
10687 {
10688 /* UXTB can be a shorter instruction in Thumb2, but it might
10689 be slower than the AND Rd, Rn, #255 alternative. When
10690 optimizing for speed it should never be slower to use
10691 AND, and we don't really model 16-bit vs 32-bit insns
10692 here. */
10693 if (speed_p)
10694 *cost += extra_cost->alu.logical;
10695 }
10696 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10697 {
10698 /* We have UXTB/UXTH. */
10699 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10700 if (speed_p)
10701 *cost += extra_cost->alu.extend;
10702 }
10703 else if (GET_MODE (XEXP (x, 0)) != SImode)
10704 {
10705 /* Needs two shifts. It's marginally preferable to use
10706 shifts rather than two BIC instructions as the second
10707 shift may merge with a subsequent insn as a shifter
10708 op. */
10709 *cost = COSTS_N_INSNS (2);
10710 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10711 if (speed_p)
10712 *cost += 2 * extra_cost->alu.shift;
10713 }
10714
10715 /* Widening beyond 32-bits requires one more insn. */
10716 if (mode == DImode)
10717 {
10718 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10719 }
10720
10721 return true;
10722
10723 case CONST_INT:
10724 *cost = 0;
10725 /* CONST_INT has no mode, so we cannot tell for sure how many
10726 insns are really going to be needed. The best we can do is
10727 look at the value passed. If it fits in SImode, then assume
10728 that's the mode it will be used for. Otherwise assume it
10729 will be used in DImode. */
10730 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10731 mode = SImode;
10732 else
10733 mode = DImode;
10734
10735 /* Avoid blowing up in arm_gen_constant (). */
10736 if (!(outer_code == PLUS
10737 || outer_code == AND
10738 || outer_code == IOR
10739 || outer_code == XOR
10740 || outer_code == MINUS))
10741 outer_code = SET;
10742
10743 const_int_cost:
10744 if (mode == SImode)
10745 {
10746 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10747 INTVAL (x), NULL, NULL,
10748 0, 0));
10749 /* Extra costs? */
10750 }
10751 else
10752 {
10753 *cost += COSTS_N_INSNS (arm_gen_constant
10754 (outer_code, SImode, NULL,
10755 trunc_int_for_mode (INTVAL (x), SImode),
10756 NULL, NULL, 0, 0)
10757 + arm_gen_constant (outer_code, SImode, NULL,
10758 INTVAL (x) >> 32, NULL,
10759 NULL, 0, 0));
10760 /* Extra costs? */
10761 }
10762
10763 return true;
10764
10765 case CONST:
10766 case LABEL_REF:
10767 case SYMBOL_REF:
10768 if (speed_p)
10769 {
10770 if (arm_arch_thumb2 && !flag_pic)
10771 *cost += COSTS_N_INSNS (1);
10772 else
10773 *cost += extra_cost->ldst.load;
10774 }
10775 else
10776 *cost += COSTS_N_INSNS (1);
10777
10778 if (flag_pic)
10779 {
10780 *cost += COSTS_N_INSNS (1);
10781 if (speed_p)
10782 *cost += extra_cost->alu.arith;
10783 }
10784
10785 return true;
10786
10787 case CONST_FIXED:
10788 *cost = COSTS_N_INSNS (4);
10789 /* Fixme. */
10790 return true;
10791
10792 case CONST_DOUBLE:
10793 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10794 && (mode == SFmode || !TARGET_VFP_SINGLE))
10795 {
10796 if (vfp3_const_double_rtx (x))
10797 {
10798 if (speed_p)
10799 *cost += extra_cost->fp[mode == DFmode].fpconst;
10800 return true;
10801 }
10802
10803 if (speed_p)
10804 {
10805 if (mode == DFmode)
10806 *cost += extra_cost->ldst.loadd;
10807 else
10808 *cost += extra_cost->ldst.loadf;
10809 }
10810 else
10811 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10812
10813 return true;
10814 }
10815 *cost = COSTS_N_INSNS (4);
10816 return true;
10817
10818 case CONST_VECTOR:
10819 /* Fixme. */
10820 if (TARGET_NEON
10821 && TARGET_HARD_FLOAT
10822 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10823 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10824 *cost = COSTS_N_INSNS (1);
10825 else
10826 *cost = COSTS_N_INSNS (4);
10827 return true;
10828
10829 case HIGH:
10830 case LO_SUM:
10831 /* When optimizing for size, we prefer constant pool entries to
10832 MOVW/MOVT pairs, so bump the cost of these slightly. */
10833 if (!speed_p)
10834 *cost += 1;
10835 return true;
10836
10837 case CLZ:
10838 if (speed_p)
10839 *cost += extra_cost->alu.clz;
10840 return false;
10841
10842 case SMIN:
10843 if (XEXP (x, 1) == const0_rtx)
10844 {
10845 if (speed_p)
10846 *cost += extra_cost->alu.log_shift;
10847 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10848 return true;
10849 }
10850 /* Fall through. */
10851 case SMAX:
10852 case UMIN:
10853 case UMAX:
10854 *cost += COSTS_N_INSNS (1);
10855 return false;
10856
10857 case TRUNCATE:
10858 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10859 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10860 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10861 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10862 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10863 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10864 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10865 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10866 == ZERO_EXTEND))))
10867 {
10868 if (speed_p)
10869 *cost += extra_cost->mult[1].extend;
10870 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10871 ZERO_EXTEND, 0, speed_p)
10872 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10873 ZERO_EXTEND, 0, speed_p));
10874 return true;
10875 }
10876 *cost = LIBCALL_COST (1);
10877 return false;
10878
10879 case UNSPEC_VOLATILE:
10880 case UNSPEC:
10881 return arm_unspec_cost (x, outer_code, speed_p, cost);
10882
10883 case PC:
10884 /* Reading the PC is like reading any other register. Writing it
10885 is more expensive, but we take that into account elsewhere. */
10886 *cost = 0;
10887 return true;
10888
10889 case ZERO_EXTRACT:
10890 /* TODO: Simple zero_extract of bottom bits using AND. */
10891 /* Fall through. */
10892 case SIGN_EXTRACT:
10893 if (arm_arch6
10894 && mode == SImode
10895 && CONST_INT_P (XEXP (x, 1))
10896 && CONST_INT_P (XEXP (x, 2)))
10897 {
10898 if (speed_p)
10899 *cost += extra_cost->alu.bfx;
10900 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10901 return true;
10902 }
10903 /* Without UBFX/SBFX, need to resort to shift operations. */
10904 *cost += COSTS_N_INSNS (1);
10905 if (speed_p)
10906 *cost += 2 * extra_cost->alu.shift;
10907 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10908 return true;
10909
10910 case FLOAT_EXTEND:
10911 if (TARGET_HARD_FLOAT)
10912 {
10913 if (speed_p)
10914 *cost += extra_cost->fp[mode == DFmode].widen;
10915 if (!TARGET_VFP5
10916 && GET_MODE (XEXP (x, 0)) == HFmode)
10917 {
10918 /* Pre v8, widening HF->DF is a two-step process, first
10919 widening to SFmode. */
10920 *cost += COSTS_N_INSNS (1);
10921 if (speed_p)
10922 *cost += extra_cost->fp[0].widen;
10923 }
10924 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10925 return true;
10926 }
10927
10928 *cost = LIBCALL_COST (1);
10929 return false;
10930
10931 case FLOAT_TRUNCATE:
10932 if (TARGET_HARD_FLOAT)
10933 {
10934 if (speed_p)
10935 *cost += extra_cost->fp[mode == DFmode].narrow;
10936 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10937 return true;
10938 /* Vector modes? */
10939 }
10940 *cost = LIBCALL_COST (1);
10941 return false;
10942
10943 case FMA:
10944 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10945 {
10946 rtx op0 = XEXP (x, 0);
10947 rtx op1 = XEXP (x, 1);
10948 rtx op2 = XEXP (x, 2);
10949
10950
10951 /* vfms or vfnma. */
10952 if (GET_CODE (op0) == NEG)
10953 op0 = XEXP (op0, 0);
10954
10955 /* vfnms or vfnma. */
10956 if (GET_CODE (op2) == NEG)
10957 op2 = XEXP (op2, 0);
10958
10959 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10960 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10961 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10962
10963 if (speed_p)
10964 *cost += extra_cost->fp[mode ==DFmode].fma;
10965
10966 return true;
10967 }
10968
10969 *cost = LIBCALL_COST (3);
10970 return false;
10971
10972 case FIX:
10973 case UNSIGNED_FIX:
10974 if (TARGET_HARD_FLOAT)
10975 {
10976 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10977 a vcvt fixed-point conversion. */
10978 if (code == FIX && mode == SImode
10979 && GET_CODE (XEXP (x, 0)) == FIX
10980 && GET_MODE (XEXP (x, 0)) == SFmode
10981 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10982 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10983 > 0)
10984 {
10985 if (speed_p)
10986 *cost += extra_cost->fp[0].toint;
10987
10988 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10989 code, 0, speed_p);
10990 return true;
10991 }
10992
10993 if (GET_MODE_CLASS (mode) == MODE_INT)
10994 {
10995 mode = GET_MODE (XEXP (x, 0));
10996 if (speed_p)
10997 *cost += extra_cost->fp[mode == DFmode].toint;
10998 /* Strip of the 'cost' of rounding towards zero. */
10999 if (GET_CODE (XEXP (x, 0)) == FIX)
11000 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11001 0, speed_p);
11002 else
11003 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11004 /* ??? Increase the cost to deal with transferring from
11005 FP -> CORE registers? */
11006 return true;
11007 }
11008 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11009 && TARGET_VFP5)
11010 {
11011 if (speed_p)
11012 *cost += extra_cost->fp[mode == DFmode].roundint;
11013 return false;
11014 }
11015 /* Vector costs? */
11016 }
11017 *cost = LIBCALL_COST (1);
11018 return false;
11019
11020 case FLOAT:
11021 case UNSIGNED_FLOAT:
11022 if (TARGET_HARD_FLOAT)
11023 {
11024 /* ??? Increase the cost to deal with transferring from CORE
11025 -> FP registers? */
11026 if (speed_p)
11027 *cost += extra_cost->fp[mode == DFmode].fromint;
11028 return false;
11029 }
11030 *cost = LIBCALL_COST (1);
11031 return false;
11032
11033 case CALL:
11034 return true;
11035
11036 case ASM_OPERANDS:
11037 {
11038 /* Just a guess. Guess number of instructions in the asm
11039 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11040 though (see PR60663). */
11041 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11042 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11043
11044 *cost = COSTS_N_INSNS (asm_length + num_operands);
11045 return true;
11046 }
11047 default:
11048 if (mode != VOIDmode)
11049 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11050 else
11051 *cost = COSTS_N_INSNS (4); /* Who knows? */
11052 return false;
11053 }
11054 }
11055
11056 #undef HANDLE_NARROW_SHIFT_ARITH
11057
11058 /* RTX costs entry point. */
11059
11060 static bool
11061 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11062 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11063 {
11064 bool result;
11065 int code = GET_CODE (x);
11066 gcc_assert (current_tune->insn_extra_cost);
11067
11068 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11069 (enum rtx_code) outer_code,
11070 current_tune->insn_extra_cost,
11071 total, speed);
11072
11073 if (dump_file && arm_verbose_cost)
11074 {
11075 print_rtl_single (dump_file, x);
11076 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11077 *total, result ? "final" : "partial");
11078 }
11079 return result;
11080 }
11081
11082 /* All address computations that can be done are free, but rtx cost returns
11083 the same for practically all of them. So we weight the different types
11084 of address here in the order (most pref first):
11085 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11086 static inline int
11087 arm_arm_address_cost (rtx x)
11088 {
11089 enum rtx_code c = GET_CODE (x);
11090
11091 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11092 return 0;
11093 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11094 return 10;
11095
11096 if (c == PLUS)
11097 {
11098 if (CONST_INT_P (XEXP (x, 1)))
11099 return 2;
11100
11101 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11102 return 3;
11103
11104 return 4;
11105 }
11106
11107 return 6;
11108 }
11109
11110 static inline int
11111 arm_thumb_address_cost (rtx x)
11112 {
11113 enum rtx_code c = GET_CODE (x);
11114
11115 if (c == REG)
11116 return 1;
11117 if (c == PLUS
11118 && REG_P (XEXP (x, 0))
11119 && CONST_INT_P (XEXP (x, 1)))
11120 return 1;
11121
11122 return 2;
11123 }
11124
11125 static int
11126 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11127 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11128 {
11129 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11130 }
11131
11132 /* Adjust cost hook for XScale. */
11133 static bool
11134 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11135 int * cost)
11136 {
11137 /* Some true dependencies can have a higher cost depending
11138 on precisely how certain input operands are used. */
11139 if (dep_type == 0
11140 && recog_memoized (insn) >= 0
11141 && recog_memoized (dep) >= 0)
11142 {
11143 int shift_opnum = get_attr_shift (insn);
11144 enum attr_type attr_type = get_attr_type (dep);
11145
11146 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11147 operand for INSN. If we have a shifted input operand and the
11148 instruction we depend on is another ALU instruction, then we may
11149 have to account for an additional stall. */
11150 if (shift_opnum != 0
11151 && (attr_type == TYPE_ALU_SHIFT_IMM
11152 || attr_type == TYPE_ALUS_SHIFT_IMM
11153 || attr_type == TYPE_LOGIC_SHIFT_IMM
11154 || attr_type == TYPE_LOGICS_SHIFT_IMM
11155 || attr_type == TYPE_ALU_SHIFT_REG
11156 || attr_type == TYPE_ALUS_SHIFT_REG
11157 || attr_type == TYPE_LOGIC_SHIFT_REG
11158 || attr_type == TYPE_LOGICS_SHIFT_REG
11159 || attr_type == TYPE_MOV_SHIFT
11160 || attr_type == TYPE_MVN_SHIFT
11161 || attr_type == TYPE_MOV_SHIFT_REG
11162 || attr_type == TYPE_MVN_SHIFT_REG))
11163 {
11164 rtx shifted_operand;
11165 int opno;
11166
11167 /* Get the shifted operand. */
11168 extract_insn (insn);
11169 shifted_operand = recog_data.operand[shift_opnum];
11170
11171 /* Iterate over all the operands in DEP. If we write an operand
11172 that overlaps with SHIFTED_OPERAND, then we have increase the
11173 cost of this dependency. */
11174 extract_insn (dep);
11175 preprocess_constraints (dep);
11176 for (opno = 0; opno < recog_data.n_operands; opno++)
11177 {
11178 /* We can ignore strict inputs. */
11179 if (recog_data.operand_type[opno] == OP_IN)
11180 continue;
11181
11182 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11183 shifted_operand))
11184 {
11185 *cost = 2;
11186 return false;
11187 }
11188 }
11189 }
11190 }
11191 return true;
11192 }
11193
11194 /* Adjust cost hook for Cortex A9. */
11195 static bool
11196 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11197 int * cost)
11198 {
11199 switch (dep_type)
11200 {
11201 case REG_DEP_ANTI:
11202 *cost = 0;
11203 return false;
11204
11205 case REG_DEP_TRUE:
11206 case REG_DEP_OUTPUT:
11207 if (recog_memoized (insn) >= 0
11208 && recog_memoized (dep) >= 0)
11209 {
11210 if (GET_CODE (PATTERN (insn)) == SET)
11211 {
11212 if (GET_MODE_CLASS
11213 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11214 || GET_MODE_CLASS
11215 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11216 {
11217 enum attr_type attr_type_insn = get_attr_type (insn);
11218 enum attr_type attr_type_dep = get_attr_type (dep);
11219
11220 /* By default all dependencies of the form
11221 s0 = s0 <op> s1
11222 s0 = s0 <op> s2
11223 have an extra latency of 1 cycle because
11224 of the input and output dependency in this
11225 case. However this gets modeled as an true
11226 dependency and hence all these checks. */
11227 if (REG_P (SET_DEST (PATTERN (insn)))
11228 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11229 {
11230 /* FMACS is a special case where the dependent
11231 instruction can be issued 3 cycles before
11232 the normal latency in case of an output
11233 dependency. */
11234 if ((attr_type_insn == TYPE_FMACS
11235 || attr_type_insn == TYPE_FMACD)
11236 && (attr_type_dep == TYPE_FMACS
11237 || attr_type_dep == TYPE_FMACD))
11238 {
11239 if (dep_type == REG_DEP_OUTPUT)
11240 *cost = insn_default_latency (dep) - 3;
11241 else
11242 *cost = insn_default_latency (dep);
11243 return false;
11244 }
11245 else
11246 {
11247 if (dep_type == REG_DEP_OUTPUT)
11248 *cost = insn_default_latency (dep) + 1;
11249 else
11250 *cost = insn_default_latency (dep);
11251 }
11252 return false;
11253 }
11254 }
11255 }
11256 }
11257 break;
11258
11259 default:
11260 gcc_unreachable ();
11261 }
11262
11263 return true;
11264 }
11265
11266 /* Adjust cost hook for FA726TE. */
11267 static bool
11268 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11269 int * cost)
11270 {
11271 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11272 have penalty of 3. */
11273 if (dep_type == REG_DEP_TRUE
11274 && recog_memoized (insn) >= 0
11275 && recog_memoized (dep) >= 0
11276 && get_attr_conds (dep) == CONDS_SET)
11277 {
11278 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11279 if (get_attr_conds (insn) == CONDS_USE
11280 && get_attr_type (insn) != TYPE_BRANCH)
11281 {
11282 *cost = 3;
11283 return false;
11284 }
11285
11286 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11287 || get_attr_conds (insn) == CONDS_USE)
11288 {
11289 *cost = 0;
11290 return false;
11291 }
11292 }
11293
11294 return true;
11295 }
11296
11297 /* Implement TARGET_REGISTER_MOVE_COST.
11298
11299 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11300 it is typically more expensive than a single memory access. We set
11301 the cost to less than two memory accesses so that floating
11302 point to integer conversion does not go through memory. */
11303
11304 int
11305 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11306 reg_class_t from, reg_class_t to)
11307 {
11308 if (TARGET_32BIT)
11309 {
11310 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11311 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11312 return 15;
11313 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11314 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11315 return 4;
11316 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11317 return 20;
11318 else
11319 return 2;
11320 }
11321 else
11322 {
11323 if (from == HI_REGS || to == HI_REGS)
11324 return 4;
11325 else
11326 return 2;
11327 }
11328 }
11329
11330 /* Implement TARGET_MEMORY_MOVE_COST. */
11331
11332 int
11333 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11334 bool in ATTRIBUTE_UNUSED)
11335 {
11336 if (TARGET_32BIT)
11337 return 10;
11338 else
11339 {
11340 if (GET_MODE_SIZE (mode) < 4)
11341 return 8;
11342 else
11343 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11344 }
11345 }
11346
11347 /* Vectorizer cost model implementation. */
11348
11349 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11350 static int
11351 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11352 tree vectype,
11353 int misalign ATTRIBUTE_UNUSED)
11354 {
11355 unsigned elements;
11356
11357 switch (type_of_cost)
11358 {
11359 case scalar_stmt:
11360 return current_tune->vec_costs->scalar_stmt_cost;
11361
11362 case scalar_load:
11363 return current_tune->vec_costs->scalar_load_cost;
11364
11365 case scalar_store:
11366 return current_tune->vec_costs->scalar_store_cost;
11367
11368 case vector_stmt:
11369 return current_tune->vec_costs->vec_stmt_cost;
11370
11371 case vector_load:
11372 return current_tune->vec_costs->vec_align_load_cost;
11373
11374 case vector_store:
11375 return current_tune->vec_costs->vec_store_cost;
11376
11377 case vec_to_scalar:
11378 return current_tune->vec_costs->vec_to_scalar_cost;
11379
11380 case scalar_to_vec:
11381 return current_tune->vec_costs->scalar_to_vec_cost;
11382
11383 case unaligned_load:
11384 case vector_gather_load:
11385 return current_tune->vec_costs->vec_unalign_load_cost;
11386
11387 case unaligned_store:
11388 case vector_scatter_store:
11389 return current_tune->vec_costs->vec_unalign_store_cost;
11390
11391 case cond_branch_taken:
11392 return current_tune->vec_costs->cond_taken_branch_cost;
11393
11394 case cond_branch_not_taken:
11395 return current_tune->vec_costs->cond_not_taken_branch_cost;
11396
11397 case vec_perm:
11398 case vec_promote_demote:
11399 return current_tune->vec_costs->vec_stmt_cost;
11400
11401 case vec_construct:
11402 elements = TYPE_VECTOR_SUBPARTS (vectype);
11403 return elements / 2 + 1;
11404
11405 default:
11406 gcc_unreachable ();
11407 }
11408 }
11409
11410 /* Implement targetm.vectorize.add_stmt_cost. */
11411
11412 static unsigned
11413 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11414 struct _stmt_vec_info *stmt_info, int misalign,
11415 enum vect_cost_model_location where)
11416 {
11417 unsigned *cost = (unsigned *) data;
11418 unsigned retval = 0;
11419
11420 if (flag_vect_cost_model)
11421 {
11422 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11423 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11424
11425 /* Statements in an inner loop relative to the loop being
11426 vectorized are weighted more heavily. The value here is
11427 arbitrary and could potentially be improved with analysis. */
11428 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11429 count *= 50; /* FIXME. */
11430
11431 retval = (unsigned) (count * stmt_cost);
11432 cost[where] += retval;
11433 }
11434
11435 return retval;
11436 }
11437
11438 /* Return true if and only if this insn can dual-issue only as older. */
11439 static bool
11440 cortexa7_older_only (rtx_insn *insn)
11441 {
11442 if (recog_memoized (insn) < 0)
11443 return false;
11444
11445 switch (get_attr_type (insn))
11446 {
11447 case TYPE_ALU_DSP_REG:
11448 case TYPE_ALU_SREG:
11449 case TYPE_ALUS_SREG:
11450 case TYPE_LOGIC_REG:
11451 case TYPE_LOGICS_REG:
11452 case TYPE_ADC_REG:
11453 case TYPE_ADCS_REG:
11454 case TYPE_ADR:
11455 case TYPE_BFM:
11456 case TYPE_REV:
11457 case TYPE_MVN_REG:
11458 case TYPE_SHIFT_IMM:
11459 case TYPE_SHIFT_REG:
11460 case TYPE_LOAD_BYTE:
11461 case TYPE_LOAD_4:
11462 case TYPE_STORE_4:
11463 case TYPE_FFARITHS:
11464 case TYPE_FADDS:
11465 case TYPE_FFARITHD:
11466 case TYPE_FADDD:
11467 case TYPE_FMOV:
11468 case TYPE_F_CVT:
11469 case TYPE_FCMPS:
11470 case TYPE_FCMPD:
11471 case TYPE_FCONSTS:
11472 case TYPE_FCONSTD:
11473 case TYPE_FMULS:
11474 case TYPE_FMACS:
11475 case TYPE_FMULD:
11476 case TYPE_FMACD:
11477 case TYPE_FDIVS:
11478 case TYPE_FDIVD:
11479 case TYPE_F_MRC:
11480 case TYPE_F_MRRC:
11481 case TYPE_F_FLAG:
11482 case TYPE_F_LOADS:
11483 case TYPE_F_STORES:
11484 return true;
11485 default:
11486 return false;
11487 }
11488 }
11489
11490 /* Return true if and only if this insn can dual-issue as younger. */
11491 static bool
11492 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11493 {
11494 if (recog_memoized (insn) < 0)
11495 {
11496 if (verbose > 5)
11497 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11498 return false;
11499 }
11500
11501 switch (get_attr_type (insn))
11502 {
11503 case TYPE_ALU_IMM:
11504 case TYPE_ALUS_IMM:
11505 case TYPE_LOGIC_IMM:
11506 case TYPE_LOGICS_IMM:
11507 case TYPE_EXTEND:
11508 case TYPE_MVN_IMM:
11509 case TYPE_MOV_IMM:
11510 case TYPE_MOV_REG:
11511 case TYPE_MOV_SHIFT:
11512 case TYPE_MOV_SHIFT_REG:
11513 case TYPE_BRANCH:
11514 case TYPE_CALL:
11515 return true;
11516 default:
11517 return false;
11518 }
11519 }
11520
11521
11522 /* Look for an instruction that can dual issue only as an older
11523 instruction, and move it in front of any instructions that can
11524 dual-issue as younger, while preserving the relative order of all
11525 other instructions in the ready list. This is a hueuristic to help
11526 dual-issue in later cycles, by postponing issue of more flexible
11527 instructions. This heuristic may affect dual issue opportunities
11528 in the current cycle. */
11529 static void
11530 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11531 int *n_readyp, int clock)
11532 {
11533 int i;
11534 int first_older_only = -1, first_younger = -1;
11535
11536 if (verbose > 5)
11537 fprintf (file,
11538 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11539 clock,
11540 *n_readyp);
11541
11542 /* Traverse the ready list from the head (the instruction to issue
11543 first), and looking for the first instruction that can issue as
11544 younger and the first instruction that can dual-issue only as
11545 older. */
11546 for (i = *n_readyp - 1; i >= 0; i--)
11547 {
11548 rtx_insn *insn = ready[i];
11549 if (cortexa7_older_only (insn))
11550 {
11551 first_older_only = i;
11552 if (verbose > 5)
11553 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11554 break;
11555 }
11556 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11557 first_younger = i;
11558 }
11559
11560 /* Nothing to reorder because either no younger insn found or insn
11561 that can dual-issue only as older appears before any insn that
11562 can dual-issue as younger. */
11563 if (first_younger == -1)
11564 {
11565 if (verbose > 5)
11566 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11567 return;
11568 }
11569
11570 /* Nothing to reorder because no older-only insn in the ready list. */
11571 if (first_older_only == -1)
11572 {
11573 if (verbose > 5)
11574 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11575 return;
11576 }
11577
11578 /* Move first_older_only insn before first_younger. */
11579 if (verbose > 5)
11580 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11581 INSN_UID(ready [first_older_only]),
11582 INSN_UID(ready [first_younger]));
11583 rtx_insn *first_older_only_insn = ready [first_older_only];
11584 for (i = first_older_only; i < first_younger; i++)
11585 {
11586 ready[i] = ready[i+1];
11587 }
11588
11589 ready[i] = first_older_only_insn;
11590 return;
11591 }
11592
11593 /* Implement TARGET_SCHED_REORDER. */
11594 static int
11595 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11596 int clock)
11597 {
11598 switch (arm_tune)
11599 {
11600 case TARGET_CPU_cortexa7:
11601 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11602 break;
11603 default:
11604 /* Do nothing for other cores. */
11605 break;
11606 }
11607
11608 return arm_issue_rate ();
11609 }
11610
11611 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11612 It corrects the value of COST based on the relationship between
11613 INSN and DEP through the dependence LINK. It returns the new
11614 value. There is a per-core adjust_cost hook to adjust scheduler costs
11615 and the per-core hook can choose to completely override the generic
11616 adjust_cost function. Only put bits of code into arm_adjust_cost that
11617 are common across all cores. */
11618 static int
11619 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11620 unsigned int)
11621 {
11622 rtx i_pat, d_pat;
11623
11624 /* When generating Thumb-1 code, we want to place flag-setting operations
11625 close to a conditional branch which depends on them, so that we can
11626 omit the comparison. */
11627 if (TARGET_THUMB1
11628 && dep_type == 0
11629 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11630 && recog_memoized (dep) >= 0
11631 && get_attr_conds (dep) == CONDS_SET)
11632 return 0;
11633
11634 if (current_tune->sched_adjust_cost != NULL)
11635 {
11636 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11637 return cost;
11638 }
11639
11640 /* XXX Is this strictly true? */
11641 if (dep_type == REG_DEP_ANTI
11642 || dep_type == REG_DEP_OUTPUT)
11643 return 0;
11644
11645 /* Call insns don't incur a stall, even if they follow a load. */
11646 if (dep_type == 0
11647 && CALL_P (insn))
11648 return 1;
11649
11650 if ((i_pat = single_set (insn)) != NULL
11651 && MEM_P (SET_SRC (i_pat))
11652 && (d_pat = single_set (dep)) != NULL
11653 && MEM_P (SET_DEST (d_pat)))
11654 {
11655 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11656 /* This is a load after a store, there is no conflict if the load reads
11657 from a cached area. Assume that loads from the stack, and from the
11658 constant pool are cached, and that others will miss. This is a
11659 hack. */
11660
11661 if ((GET_CODE (src_mem) == SYMBOL_REF
11662 && CONSTANT_POOL_ADDRESS_P (src_mem))
11663 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11664 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11665 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11666 return 1;
11667 }
11668
11669 return cost;
11670 }
11671
11672 int
11673 arm_max_conditional_execute (void)
11674 {
11675 return max_insns_skipped;
11676 }
11677
11678 static int
11679 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11680 {
11681 if (TARGET_32BIT)
11682 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11683 else
11684 return (optimize > 0) ? 2 : 0;
11685 }
11686
11687 static int
11688 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11689 {
11690 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11691 }
11692
11693 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11694 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11695 sequences of non-executed instructions in IT blocks probably take the same
11696 amount of time as executed instructions (and the IT instruction itself takes
11697 space in icache). This function was experimentally determined to give good
11698 results on a popular embedded benchmark. */
11699
11700 static int
11701 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11702 {
11703 return (TARGET_32BIT && speed_p) ? 1
11704 : arm_default_branch_cost (speed_p, predictable_p);
11705 }
11706
11707 static int
11708 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11709 {
11710 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11711 }
11712
11713 static bool fp_consts_inited = false;
11714
11715 static REAL_VALUE_TYPE value_fp0;
11716
11717 static void
11718 init_fp_table (void)
11719 {
11720 REAL_VALUE_TYPE r;
11721
11722 r = REAL_VALUE_ATOF ("0", DFmode);
11723 value_fp0 = r;
11724 fp_consts_inited = true;
11725 }
11726
11727 /* Return TRUE if rtx X is a valid immediate FP constant. */
11728 int
11729 arm_const_double_rtx (rtx x)
11730 {
11731 const REAL_VALUE_TYPE *r;
11732
11733 if (!fp_consts_inited)
11734 init_fp_table ();
11735
11736 r = CONST_DOUBLE_REAL_VALUE (x);
11737 if (REAL_VALUE_MINUS_ZERO (*r))
11738 return 0;
11739
11740 if (real_equal (r, &value_fp0))
11741 return 1;
11742
11743 return 0;
11744 }
11745
11746 /* VFPv3 has a fairly wide range of representable immediates, formed from
11747 "quarter-precision" floating-point values. These can be evaluated using this
11748 formula (with ^ for exponentiation):
11749
11750 -1^s * n * 2^-r
11751
11752 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11753 16 <= n <= 31 and 0 <= r <= 7.
11754
11755 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11756
11757 - A (most-significant) is the sign bit.
11758 - BCD are the exponent (encoded as r XOR 3).
11759 - EFGH are the mantissa (encoded as n - 16).
11760 */
11761
11762 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11763 fconst[sd] instruction, or -1 if X isn't suitable. */
11764 static int
11765 vfp3_const_double_index (rtx x)
11766 {
11767 REAL_VALUE_TYPE r, m;
11768 int sign, exponent;
11769 unsigned HOST_WIDE_INT mantissa, mant_hi;
11770 unsigned HOST_WIDE_INT mask;
11771 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11772 bool fail;
11773
11774 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11775 return -1;
11776
11777 r = *CONST_DOUBLE_REAL_VALUE (x);
11778
11779 /* We can't represent these things, so detect them first. */
11780 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11781 return -1;
11782
11783 /* Extract sign, exponent and mantissa. */
11784 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11785 r = real_value_abs (&r);
11786 exponent = REAL_EXP (&r);
11787 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11788 highest (sign) bit, with a fixed binary point at bit point_pos.
11789 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11790 bits for the mantissa, this may fail (low bits would be lost). */
11791 real_ldexp (&m, &r, point_pos - exponent);
11792 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11793 mantissa = w.elt (0);
11794 mant_hi = w.elt (1);
11795
11796 /* If there are bits set in the low part of the mantissa, we can't
11797 represent this value. */
11798 if (mantissa != 0)
11799 return -1;
11800
11801 /* Now make it so that mantissa contains the most-significant bits, and move
11802 the point_pos to indicate that the least-significant bits have been
11803 discarded. */
11804 point_pos -= HOST_BITS_PER_WIDE_INT;
11805 mantissa = mant_hi;
11806
11807 /* We can permit four significant bits of mantissa only, plus a high bit
11808 which is always 1. */
11809 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11810 if ((mantissa & mask) != 0)
11811 return -1;
11812
11813 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11814 mantissa >>= point_pos - 5;
11815
11816 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11817 floating-point immediate zero with Neon using an integer-zero load, but
11818 that case is handled elsewhere.) */
11819 if (mantissa == 0)
11820 return -1;
11821
11822 gcc_assert (mantissa >= 16 && mantissa <= 31);
11823
11824 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11825 normalized significands are in the range [1, 2). (Our mantissa is shifted
11826 left 4 places at this point relative to normalized IEEE754 values). GCC
11827 internally uses [0.5, 1) (see real.c), so the exponent returned from
11828 REAL_EXP must be altered. */
11829 exponent = 5 - exponent;
11830
11831 if (exponent < 0 || exponent > 7)
11832 return -1;
11833
11834 /* Sign, mantissa and exponent are now in the correct form to plug into the
11835 formula described in the comment above. */
11836 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11837 }
11838
11839 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11840 int
11841 vfp3_const_double_rtx (rtx x)
11842 {
11843 if (!TARGET_VFP3)
11844 return 0;
11845
11846 return vfp3_const_double_index (x) != -1;
11847 }
11848
11849 /* Recognize immediates which can be used in various Neon instructions. Legal
11850 immediates are described by the following table (for VMVN variants, the
11851 bitwise inverse of the constant shown is recognized. In either case, VMOV
11852 is output and the correct instruction to use for a given constant is chosen
11853 by the assembler). The constant shown is replicated across all elements of
11854 the destination vector.
11855
11856 insn elems variant constant (binary)
11857 ---- ----- ------- -----------------
11858 vmov i32 0 00000000 00000000 00000000 abcdefgh
11859 vmov i32 1 00000000 00000000 abcdefgh 00000000
11860 vmov i32 2 00000000 abcdefgh 00000000 00000000
11861 vmov i32 3 abcdefgh 00000000 00000000 00000000
11862 vmov i16 4 00000000 abcdefgh
11863 vmov i16 5 abcdefgh 00000000
11864 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11865 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11866 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11867 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11868 vmvn i16 10 00000000 abcdefgh
11869 vmvn i16 11 abcdefgh 00000000
11870 vmov i32 12 00000000 00000000 abcdefgh 11111111
11871 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11872 vmov i32 14 00000000 abcdefgh 11111111 11111111
11873 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11874 vmov i8 16 abcdefgh
11875 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11876 eeeeeeee ffffffff gggggggg hhhhhhhh
11877 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11878 vmov f32 19 00000000 00000000 00000000 00000000
11879
11880 For case 18, B = !b. Representable values are exactly those accepted by
11881 vfp3_const_double_index, but are output as floating-point numbers rather
11882 than indices.
11883
11884 For case 19, we will change it to vmov.i32 when assembling.
11885
11886 Variants 0-5 (inclusive) may also be used as immediates for the second
11887 operand of VORR/VBIC instructions.
11888
11889 The INVERSE argument causes the bitwise inverse of the given operand to be
11890 recognized instead (used for recognizing legal immediates for the VAND/VORN
11891 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11892 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11893 output, rather than the real insns vbic/vorr).
11894
11895 INVERSE makes no difference to the recognition of float vectors.
11896
11897 The return value is the variant of immediate as shown in the above table, or
11898 -1 if the given value doesn't match any of the listed patterns.
11899 */
11900 static int
11901 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11902 rtx *modconst, int *elementwidth)
11903 {
11904 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11905 matches = 1; \
11906 for (i = 0; i < idx; i += (STRIDE)) \
11907 if (!(TEST)) \
11908 matches = 0; \
11909 if (matches) \
11910 { \
11911 immtype = (CLASS); \
11912 elsize = (ELSIZE); \
11913 break; \
11914 }
11915
11916 unsigned int i, elsize = 0, idx = 0, n_elts;
11917 unsigned int innersize;
11918 unsigned char bytes[16];
11919 int immtype = -1, matches;
11920 unsigned int invmask = inverse ? 0xff : 0;
11921 bool vector = GET_CODE (op) == CONST_VECTOR;
11922
11923 if (vector)
11924 n_elts = CONST_VECTOR_NUNITS (op);
11925 else
11926 {
11927 n_elts = 1;
11928 if (mode == VOIDmode)
11929 mode = DImode;
11930 }
11931
11932 innersize = GET_MODE_UNIT_SIZE (mode);
11933
11934 /* Vectors of float constants. */
11935 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11936 {
11937 rtx el0 = CONST_VECTOR_ELT (op, 0);
11938
11939 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11940 return -1;
11941
11942 /* FP16 vectors cannot be represented. */
11943 if (GET_MODE_INNER (mode) == HFmode)
11944 return -1;
11945
11946 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11947 are distinct in this context. */
11948 if (!const_vec_duplicate_p (op))
11949 return -1;
11950
11951 if (modconst)
11952 *modconst = CONST_VECTOR_ELT (op, 0);
11953
11954 if (elementwidth)
11955 *elementwidth = 0;
11956
11957 if (el0 == CONST0_RTX (GET_MODE (el0)))
11958 return 19;
11959 else
11960 return 18;
11961 }
11962
11963 /* The tricks done in the code below apply for little-endian vector layout.
11964 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11965 FIXME: Implement logic for big-endian vectors. */
11966 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11967 return -1;
11968
11969 /* Splat vector constant out into a byte vector. */
11970 for (i = 0; i < n_elts; i++)
11971 {
11972 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11973 unsigned HOST_WIDE_INT elpart;
11974
11975 gcc_assert (CONST_INT_P (el));
11976 elpart = INTVAL (el);
11977
11978 for (unsigned int byte = 0; byte < innersize; byte++)
11979 {
11980 bytes[idx++] = (elpart & 0xff) ^ invmask;
11981 elpart >>= BITS_PER_UNIT;
11982 }
11983 }
11984
11985 /* Sanity check. */
11986 gcc_assert (idx == GET_MODE_SIZE (mode));
11987
11988 do
11989 {
11990 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11991 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11992
11993 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11994 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11995
11996 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11997 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11998
11999 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12000 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12001
12002 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12003
12004 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12005
12006 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12007 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12008
12009 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12010 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12011
12012 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12013 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12014
12015 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12016 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12017
12018 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12019
12020 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12021
12022 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12023 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12024
12025 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12026 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12027
12028 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12029 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12030
12031 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12032 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12033
12034 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12035
12036 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12037 && bytes[i] == bytes[(i + 8) % idx]);
12038 }
12039 while (0);
12040
12041 if (immtype == -1)
12042 return -1;
12043
12044 if (elementwidth)
12045 *elementwidth = elsize;
12046
12047 if (modconst)
12048 {
12049 unsigned HOST_WIDE_INT imm = 0;
12050
12051 /* Un-invert bytes of recognized vector, if necessary. */
12052 if (invmask != 0)
12053 for (i = 0; i < idx; i++)
12054 bytes[i] ^= invmask;
12055
12056 if (immtype == 17)
12057 {
12058 /* FIXME: Broken on 32-bit H_W_I hosts. */
12059 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12060
12061 for (i = 0; i < 8; i++)
12062 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12063 << (i * BITS_PER_UNIT);
12064
12065 *modconst = GEN_INT (imm);
12066 }
12067 else
12068 {
12069 unsigned HOST_WIDE_INT imm = 0;
12070
12071 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12072 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12073
12074 *modconst = GEN_INT (imm);
12075 }
12076 }
12077
12078 return immtype;
12079 #undef CHECK
12080 }
12081
12082 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12083 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12084 float elements), and a modified constant (whatever should be output for a
12085 VMOV) in *MODCONST. */
12086
12087 int
12088 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12089 rtx *modconst, int *elementwidth)
12090 {
12091 rtx tmpconst;
12092 int tmpwidth;
12093 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12094
12095 if (retval == -1)
12096 return 0;
12097
12098 if (modconst)
12099 *modconst = tmpconst;
12100
12101 if (elementwidth)
12102 *elementwidth = tmpwidth;
12103
12104 return 1;
12105 }
12106
12107 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12108 the immediate is valid, write a constant suitable for using as an operand
12109 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12110 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12111
12112 int
12113 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12114 rtx *modconst, int *elementwidth)
12115 {
12116 rtx tmpconst;
12117 int tmpwidth;
12118 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12119
12120 if (retval < 0 || retval > 5)
12121 return 0;
12122
12123 if (modconst)
12124 *modconst = tmpconst;
12125
12126 if (elementwidth)
12127 *elementwidth = tmpwidth;
12128
12129 return 1;
12130 }
12131
12132 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12133 the immediate is valid, write a constant suitable for using as an operand
12134 to VSHR/VSHL to *MODCONST and the corresponding element width to
12135 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12136 because they have different limitations. */
12137
12138 int
12139 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12140 rtx *modconst, int *elementwidth,
12141 bool isleftshift)
12142 {
12143 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12144 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12145 unsigned HOST_WIDE_INT last_elt = 0;
12146 unsigned HOST_WIDE_INT maxshift;
12147
12148 /* Split vector constant out into a byte vector. */
12149 for (i = 0; i < n_elts; i++)
12150 {
12151 rtx el = CONST_VECTOR_ELT (op, i);
12152 unsigned HOST_WIDE_INT elpart;
12153
12154 if (CONST_INT_P (el))
12155 elpart = INTVAL (el);
12156 else if (CONST_DOUBLE_P (el))
12157 return 0;
12158 else
12159 gcc_unreachable ();
12160
12161 if (i != 0 && elpart != last_elt)
12162 return 0;
12163
12164 last_elt = elpart;
12165 }
12166
12167 /* Shift less than element size. */
12168 maxshift = innersize * 8;
12169
12170 if (isleftshift)
12171 {
12172 /* Left shift immediate value can be from 0 to <size>-1. */
12173 if (last_elt >= maxshift)
12174 return 0;
12175 }
12176 else
12177 {
12178 /* Right shift immediate value can be from 1 to <size>. */
12179 if (last_elt == 0 || last_elt > maxshift)
12180 return 0;
12181 }
12182
12183 if (elementwidth)
12184 *elementwidth = innersize * 8;
12185
12186 if (modconst)
12187 *modconst = CONST_VECTOR_ELT (op, 0);
12188
12189 return 1;
12190 }
12191
12192 /* Return a string suitable for output of Neon immediate logic operation
12193 MNEM. */
12194
12195 char *
12196 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12197 int inverse, int quad)
12198 {
12199 int width, is_valid;
12200 static char templ[40];
12201
12202 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12203
12204 gcc_assert (is_valid != 0);
12205
12206 if (quad)
12207 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12208 else
12209 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12210
12211 return templ;
12212 }
12213
12214 /* Return a string suitable for output of Neon immediate shift operation
12215 (VSHR or VSHL) MNEM. */
12216
12217 char *
12218 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12219 machine_mode mode, int quad,
12220 bool isleftshift)
12221 {
12222 int width, is_valid;
12223 static char templ[40];
12224
12225 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12226 gcc_assert (is_valid != 0);
12227
12228 if (quad)
12229 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12230 else
12231 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12232
12233 return templ;
12234 }
12235
12236 /* Output a sequence of pairwise operations to implement a reduction.
12237 NOTE: We do "too much work" here, because pairwise operations work on two
12238 registers-worth of operands in one go. Unfortunately we can't exploit those
12239 extra calculations to do the full operation in fewer steps, I don't think.
12240 Although all vector elements of the result but the first are ignored, we
12241 actually calculate the same result in each of the elements. An alternative
12242 such as initially loading a vector with zero to use as each of the second
12243 operands would use up an additional register and take an extra instruction,
12244 for no particular gain. */
12245
12246 void
12247 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12248 rtx (*reduc) (rtx, rtx, rtx))
12249 {
12250 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12251 rtx tmpsum = op1;
12252
12253 for (i = parts / 2; i >= 1; i /= 2)
12254 {
12255 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12256 emit_insn (reduc (dest, tmpsum, tmpsum));
12257 tmpsum = dest;
12258 }
12259 }
12260
12261 /* If VALS is a vector constant that can be loaded into a register
12262 using VDUP, generate instructions to do so and return an RTX to
12263 assign to the register. Otherwise return NULL_RTX. */
12264
12265 static rtx
12266 neon_vdup_constant (rtx vals)
12267 {
12268 machine_mode mode = GET_MODE (vals);
12269 machine_mode inner_mode = GET_MODE_INNER (mode);
12270 rtx x;
12271
12272 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12273 return NULL_RTX;
12274
12275 if (!const_vec_duplicate_p (vals, &x))
12276 /* The elements are not all the same. We could handle repeating
12277 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12278 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12279 vdup.i16). */
12280 return NULL_RTX;
12281
12282 /* We can load this constant by using VDUP and a constant in a
12283 single ARM register. This will be cheaper than a vector
12284 load. */
12285
12286 x = copy_to_mode_reg (inner_mode, x);
12287 return gen_vec_duplicate (mode, x);
12288 }
12289
12290 /* Generate code to load VALS, which is a PARALLEL containing only
12291 constants (for vec_init) or CONST_VECTOR, efficiently into a
12292 register. Returns an RTX to copy into the register, or NULL_RTX
12293 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12294
12295 rtx
12296 neon_make_constant (rtx vals)
12297 {
12298 machine_mode mode = GET_MODE (vals);
12299 rtx target;
12300 rtx const_vec = NULL_RTX;
12301 int n_elts = GET_MODE_NUNITS (mode);
12302 int n_const = 0;
12303 int i;
12304
12305 if (GET_CODE (vals) == CONST_VECTOR)
12306 const_vec = vals;
12307 else if (GET_CODE (vals) == PARALLEL)
12308 {
12309 /* A CONST_VECTOR must contain only CONST_INTs and
12310 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12311 Only store valid constants in a CONST_VECTOR. */
12312 for (i = 0; i < n_elts; ++i)
12313 {
12314 rtx x = XVECEXP (vals, 0, i);
12315 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12316 n_const++;
12317 }
12318 if (n_const == n_elts)
12319 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12320 }
12321 else
12322 gcc_unreachable ();
12323
12324 if (const_vec != NULL
12325 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12326 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12327 return const_vec;
12328 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12329 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12330 pipeline cycle; creating the constant takes one or two ARM
12331 pipeline cycles. */
12332 return target;
12333 else if (const_vec != NULL_RTX)
12334 /* Load from constant pool. On Cortex-A8 this takes two cycles
12335 (for either double or quad vectors). We can not take advantage
12336 of single-cycle VLD1 because we need a PC-relative addressing
12337 mode. */
12338 return const_vec;
12339 else
12340 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12341 We can not construct an initializer. */
12342 return NULL_RTX;
12343 }
12344
12345 /* Initialize vector TARGET to VALS. */
12346
12347 void
12348 neon_expand_vector_init (rtx target, rtx vals)
12349 {
12350 machine_mode mode = GET_MODE (target);
12351 machine_mode inner_mode = GET_MODE_INNER (mode);
12352 int n_elts = GET_MODE_NUNITS (mode);
12353 int n_var = 0, one_var = -1;
12354 bool all_same = true;
12355 rtx x, mem;
12356 int i;
12357
12358 for (i = 0; i < n_elts; ++i)
12359 {
12360 x = XVECEXP (vals, 0, i);
12361 if (!CONSTANT_P (x))
12362 ++n_var, one_var = i;
12363
12364 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12365 all_same = false;
12366 }
12367
12368 if (n_var == 0)
12369 {
12370 rtx constant = neon_make_constant (vals);
12371 if (constant != NULL_RTX)
12372 {
12373 emit_move_insn (target, constant);
12374 return;
12375 }
12376 }
12377
12378 /* Splat a single non-constant element if we can. */
12379 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12380 {
12381 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12382 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12383 return;
12384 }
12385
12386 /* One field is non-constant. Load constant then overwrite varying
12387 field. This is more efficient than using the stack. */
12388 if (n_var == 1)
12389 {
12390 rtx copy = copy_rtx (vals);
12391 rtx index = GEN_INT (one_var);
12392
12393 /* Load constant part of vector, substitute neighboring value for
12394 varying element. */
12395 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12396 neon_expand_vector_init (target, copy);
12397
12398 /* Insert variable. */
12399 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12400 switch (mode)
12401 {
12402 case E_V8QImode:
12403 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12404 break;
12405 case E_V16QImode:
12406 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12407 break;
12408 case E_V4HImode:
12409 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12410 break;
12411 case E_V8HImode:
12412 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12413 break;
12414 case E_V2SImode:
12415 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12416 break;
12417 case E_V4SImode:
12418 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12419 break;
12420 case E_V2SFmode:
12421 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12422 break;
12423 case E_V4SFmode:
12424 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12425 break;
12426 case E_V2DImode:
12427 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12428 break;
12429 default:
12430 gcc_unreachable ();
12431 }
12432 return;
12433 }
12434
12435 /* Construct the vector in memory one field at a time
12436 and load the whole vector. */
12437 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12438 for (i = 0; i < n_elts; i++)
12439 emit_move_insn (adjust_address_nv (mem, inner_mode,
12440 i * GET_MODE_SIZE (inner_mode)),
12441 XVECEXP (vals, 0, i));
12442 emit_move_insn (target, mem);
12443 }
12444
12445 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12446 ERR if it doesn't. EXP indicates the source location, which includes the
12447 inlining history for intrinsics. */
12448
12449 static void
12450 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12451 const_tree exp, const char *desc)
12452 {
12453 HOST_WIDE_INT lane;
12454
12455 gcc_assert (CONST_INT_P (operand));
12456
12457 lane = INTVAL (operand);
12458
12459 if (lane < low || lane >= high)
12460 {
12461 if (exp)
12462 error ("%K%s %wd out of range %wd - %wd",
12463 exp, desc, lane, low, high - 1);
12464 else
12465 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12466 }
12467 }
12468
12469 /* Bounds-check lanes. */
12470
12471 void
12472 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12473 const_tree exp)
12474 {
12475 bounds_check (operand, low, high, exp, "lane");
12476 }
12477
12478 /* Bounds-check constants. */
12479
12480 void
12481 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12482 {
12483 bounds_check (operand, low, high, NULL_TREE, "constant");
12484 }
12485
12486 HOST_WIDE_INT
12487 neon_element_bits (machine_mode mode)
12488 {
12489 return GET_MODE_UNIT_BITSIZE (mode);
12490 }
12491
12492 \f
12493 /* Predicates for `match_operand' and `match_operator'. */
12494
12495 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12496 WB is true if full writeback address modes are allowed and is false
12497 if limited writeback address modes (POST_INC and PRE_DEC) are
12498 allowed. */
12499
12500 int
12501 arm_coproc_mem_operand (rtx op, bool wb)
12502 {
12503 rtx ind;
12504
12505 /* Reject eliminable registers. */
12506 if (! (reload_in_progress || reload_completed || lra_in_progress)
12507 && ( reg_mentioned_p (frame_pointer_rtx, op)
12508 || reg_mentioned_p (arg_pointer_rtx, op)
12509 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12510 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12511 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12512 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12513 return FALSE;
12514
12515 /* Constants are converted into offsets from labels. */
12516 if (!MEM_P (op))
12517 return FALSE;
12518
12519 ind = XEXP (op, 0);
12520
12521 if (reload_completed
12522 && (GET_CODE (ind) == LABEL_REF
12523 || (GET_CODE (ind) == CONST
12524 && GET_CODE (XEXP (ind, 0)) == PLUS
12525 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12526 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12527 return TRUE;
12528
12529 /* Match: (mem (reg)). */
12530 if (REG_P (ind))
12531 return arm_address_register_rtx_p (ind, 0);
12532
12533 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12534 acceptable in any case (subject to verification by
12535 arm_address_register_rtx_p). We need WB to be true to accept
12536 PRE_INC and POST_DEC. */
12537 if (GET_CODE (ind) == POST_INC
12538 || GET_CODE (ind) == PRE_DEC
12539 || (wb
12540 && (GET_CODE (ind) == PRE_INC
12541 || GET_CODE (ind) == POST_DEC)))
12542 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12543
12544 if (wb
12545 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12546 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12547 && GET_CODE (XEXP (ind, 1)) == PLUS
12548 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12549 ind = XEXP (ind, 1);
12550
12551 /* Match:
12552 (plus (reg)
12553 (const)). */
12554 if (GET_CODE (ind) == PLUS
12555 && REG_P (XEXP (ind, 0))
12556 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12557 && CONST_INT_P (XEXP (ind, 1))
12558 && INTVAL (XEXP (ind, 1)) > -1024
12559 && INTVAL (XEXP (ind, 1)) < 1024
12560 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12561 return TRUE;
12562
12563 return FALSE;
12564 }
12565
12566 /* Return TRUE if OP is a memory operand which we can load or store a vector
12567 to/from. TYPE is one of the following values:
12568 0 - Vector load/stor (vldr)
12569 1 - Core registers (ldm)
12570 2 - Element/structure loads (vld1)
12571 */
12572 int
12573 neon_vector_mem_operand (rtx op, int type, bool strict)
12574 {
12575 rtx ind;
12576
12577 /* Reject eliminable registers. */
12578 if (strict && ! (reload_in_progress || reload_completed)
12579 && (reg_mentioned_p (frame_pointer_rtx, op)
12580 || reg_mentioned_p (arg_pointer_rtx, op)
12581 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12582 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12583 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12584 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12585 return FALSE;
12586
12587 /* Constants are converted into offsets from labels. */
12588 if (!MEM_P (op))
12589 return FALSE;
12590
12591 ind = XEXP (op, 0);
12592
12593 if (reload_completed
12594 && (GET_CODE (ind) == LABEL_REF
12595 || (GET_CODE (ind) == CONST
12596 && GET_CODE (XEXP (ind, 0)) == PLUS
12597 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12598 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12599 return TRUE;
12600
12601 /* Match: (mem (reg)). */
12602 if (REG_P (ind))
12603 return arm_address_register_rtx_p (ind, 0);
12604
12605 /* Allow post-increment with Neon registers. */
12606 if ((type != 1 && GET_CODE (ind) == POST_INC)
12607 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12608 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12609
12610 /* Allow post-increment by register for VLDn */
12611 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12612 && GET_CODE (XEXP (ind, 1)) == PLUS
12613 && REG_P (XEXP (XEXP (ind, 1), 1)))
12614 return true;
12615
12616 /* Match:
12617 (plus (reg)
12618 (const)). */
12619 if (type == 0
12620 && GET_CODE (ind) == PLUS
12621 && REG_P (XEXP (ind, 0))
12622 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12623 && CONST_INT_P (XEXP (ind, 1))
12624 && INTVAL (XEXP (ind, 1)) > -1024
12625 /* For quad modes, we restrict the constant offset to be slightly less
12626 than what the instruction format permits. We have no such constraint
12627 on double mode offsets. (This must match arm_legitimate_index_p.) */
12628 && (INTVAL (XEXP (ind, 1))
12629 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12630 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12631 return TRUE;
12632
12633 return FALSE;
12634 }
12635
12636 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12637 type. */
12638 int
12639 neon_struct_mem_operand (rtx op)
12640 {
12641 rtx ind;
12642
12643 /* Reject eliminable registers. */
12644 if (! (reload_in_progress || reload_completed)
12645 && ( reg_mentioned_p (frame_pointer_rtx, op)
12646 || reg_mentioned_p (arg_pointer_rtx, op)
12647 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12648 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12649 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12650 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12651 return FALSE;
12652
12653 /* Constants are converted into offsets from labels. */
12654 if (!MEM_P (op))
12655 return FALSE;
12656
12657 ind = XEXP (op, 0);
12658
12659 if (reload_completed
12660 && (GET_CODE (ind) == LABEL_REF
12661 || (GET_CODE (ind) == CONST
12662 && GET_CODE (XEXP (ind, 0)) == PLUS
12663 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12664 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12665 return TRUE;
12666
12667 /* Match: (mem (reg)). */
12668 if (REG_P (ind))
12669 return arm_address_register_rtx_p (ind, 0);
12670
12671 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12672 if (GET_CODE (ind) == POST_INC
12673 || GET_CODE (ind) == PRE_DEC)
12674 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12675
12676 return FALSE;
12677 }
12678
12679 /* Return true if X is a register that will be eliminated later on. */
12680 int
12681 arm_eliminable_register (rtx x)
12682 {
12683 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12684 || REGNO (x) == ARG_POINTER_REGNUM
12685 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12686 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12687 }
12688
12689 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12690 coprocessor registers. Otherwise return NO_REGS. */
12691
12692 enum reg_class
12693 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12694 {
12695 if (mode == HFmode)
12696 {
12697 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12698 return GENERAL_REGS;
12699 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12700 return NO_REGS;
12701 return GENERAL_REGS;
12702 }
12703
12704 /* The neon move patterns handle all legitimate vector and struct
12705 addresses. */
12706 if (TARGET_NEON
12707 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12708 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12709 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12710 || VALID_NEON_STRUCT_MODE (mode)))
12711 return NO_REGS;
12712
12713 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12714 return NO_REGS;
12715
12716 return GENERAL_REGS;
12717 }
12718
12719 /* Values which must be returned in the most-significant end of the return
12720 register. */
12721
12722 static bool
12723 arm_return_in_msb (const_tree valtype)
12724 {
12725 return (TARGET_AAPCS_BASED
12726 && BYTES_BIG_ENDIAN
12727 && (AGGREGATE_TYPE_P (valtype)
12728 || TREE_CODE (valtype) == COMPLEX_TYPE
12729 || FIXED_POINT_TYPE_P (valtype)));
12730 }
12731
12732 /* Return TRUE if X references a SYMBOL_REF. */
12733 int
12734 symbol_mentioned_p (rtx x)
12735 {
12736 const char * fmt;
12737 int i;
12738
12739 if (GET_CODE (x) == SYMBOL_REF)
12740 return 1;
12741
12742 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12743 are constant offsets, not symbols. */
12744 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12745 return 0;
12746
12747 fmt = GET_RTX_FORMAT (GET_CODE (x));
12748
12749 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12750 {
12751 if (fmt[i] == 'E')
12752 {
12753 int j;
12754
12755 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12756 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12757 return 1;
12758 }
12759 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12760 return 1;
12761 }
12762
12763 return 0;
12764 }
12765
12766 /* Return TRUE if X references a LABEL_REF. */
12767 int
12768 label_mentioned_p (rtx x)
12769 {
12770 const char * fmt;
12771 int i;
12772
12773 if (GET_CODE (x) == LABEL_REF)
12774 return 1;
12775
12776 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12777 instruction, but they are constant offsets, not symbols. */
12778 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12779 return 0;
12780
12781 fmt = GET_RTX_FORMAT (GET_CODE (x));
12782 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12783 {
12784 if (fmt[i] == 'E')
12785 {
12786 int j;
12787
12788 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12789 if (label_mentioned_p (XVECEXP (x, i, j)))
12790 return 1;
12791 }
12792 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12793 return 1;
12794 }
12795
12796 return 0;
12797 }
12798
12799 int
12800 tls_mentioned_p (rtx x)
12801 {
12802 switch (GET_CODE (x))
12803 {
12804 case CONST:
12805 return tls_mentioned_p (XEXP (x, 0));
12806
12807 case UNSPEC:
12808 if (XINT (x, 1) == UNSPEC_TLS)
12809 return 1;
12810
12811 /* Fall through. */
12812 default:
12813 return 0;
12814 }
12815 }
12816
12817 /* Must not copy any rtx that uses a pc-relative address.
12818 Also, disallow copying of load-exclusive instructions that
12819 may appear after splitting of compare-and-swap-style operations
12820 so as to prevent those loops from being transformed away from their
12821 canonical forms (see PR 69904). */
12822
12823 static bool
12824 arm_cannot_copy_insn_p (rtx_insn *insn)
12825 {
12826 /* The tls call insn cannot be copied, as it is paired with a data
12827 word. */
12828 if (recog_memoized (insn) == CODE_FOR_tlscall)
12829 return true;
12830
12831 subrtx_iterator::array_type array;
12832 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12833 {
12834 const_rtx x = *iter;
12835 if (GET_CODE (x) == UNSPEC
12836 && (XINT (x, 1) == UNSPEC_PIC_BASE
12837 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12838 return true;
12839 }
12840
12841 rtx set = single_set (insn);
12842 if (set)
12843 {
12844 rtx src = SET_SRC (set);
12845 if (GET_CODE (src) == ZERO_EXTEND)
12846 src = XEXP (src, 0);
12847
12848 /* Catch the load-exclusive and load-acquire operations. */
12849 if (GET_CODE (src) == UNSPEC_VOLATILE
12850 && (XINT (src, 1) == VUNSPEC_LL
12851 || XINT (src, 1) == VUNSPEC_LAX))
12852 return true;
12853 }
12854 return false;
12855 }
12856
12857 enum rtx_code
12858 minmax_code (rtx x)
12859 {
12860 enum rtx_code code = GET_CODE (x);
12861
12862 switch (code)
12863 {
12864 case SMAX:
12865 return GE;
12866 case SMIN:
12867 return LE;
12868 case UMIN:
12869 return LEU;
12870 case UMAX:
12871 return GEU;
12872 default:
12873 gcc_unreachable ();
12874 }
12875 }
12876
12877 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12878
12879 bool
12880 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12881 int *mask, bool *signed_sat)
12882 {
12883 /* The high bound must be a power of two minus one. */
12884 int log = exact_log2 (INTVAL (hi_bound) + 1);
12885 if (log == -1)
12886 return false;
12887
12888 /* The low bound is either zero (for usat) or one less than the
12889 negation of the high bound (for ssat). */
12890 if (INTVAL (lo_bound) == 0)
12891 {
12892 if (mask)
12893 *mask = log;
12894 if (signed_sat)
12895 *signed_sat = false;
12896
12897 return true;
12898 }
12899
12900 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12901 {
12902 if (mask)
12903 *mask = log + 1;
12904 if (signed_sat)
12905 *signed_sat = true;
12906
12907 return true;
12908 }
12909
12910 return false;
12911 }
12912
12913 /* Return 1 if memory locations are adjacent. */
12914 int
12915 adjacent_mem_locations (rtx a, rtx b)
12916 {
12917 /* We don't guarantee to preserve the order of these memory refs. */
12918 if (volatile_refs_p (a) || volatile_refs_p (b))
12919 return 0;
12920
12921 if ((REG_P (XEXP (a, 0))
12922 || (GET_CODE (XEXP (a, 0)) == PLUS
12923 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12924 && (REG_P (XEXP (b, 0))
12925 || (GET_CODE (XEXP (b, 0)) == PLUS
12926 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12927 {
12928 HOST_WIDE_INT val0 = 0, val1 = 0;
12929 rtx reg0, reg1;
12930 int val_diff;
12931
12932 if (GET_CODE (XEXP (a, 0)) == PLUS)
12933 {
12934 reg0 = XEXP (XEXP (a, 0), 0);
12935 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12936 }
12937 else
12938 reg0 = XEXP (a, 0);
12939
12940 if (GET_CODE (XEXP (b, 0)) == PLUS)
12941 {
12942 reg1 = XEXP (XEXP (b, 0), 0);
12943 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12944 }
12945 else
12946 reg1 = XEXP (b, 0);
12947
12948 /* Don't accept any offset that will require multiple
12949 instructions to handle, since this would cause the
12950 arith_adjacentmem pattern to output an overlong sequence. */
12951 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12952 return 0;
12953
12954 /* Don't allow an eliminable register: register elimination can make
12955 the offset too large. */
12956 if (arm_eliminable_register (reg0))
12957 return 0;
12958
12959 val_diff = val1 - val0;
12960
12961 if (arm_ld_sched)
12962 {
12963 /* If the target has load delay slots, then there's no benefit
12964 to using an ldm instruction unless the offset is zero and
12965 we are optimizing for size. */
12966 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12967 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12968 && (val_diff == 4 || val_diff == -4));
12969 }
12970
12971 return ((REGNO (reg0) == REGNO (reg1))
12972 && (val_diff == 4 || val_diff == -4));
12973 }
12974
12975 return 0;
12976 }
12977
12978 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12979 for load operations, false for store operations. CONSECUTIVE is true
12980 if the register numbers in the operation must be consecutive in the register
12981 bank. RETURN_PC is true if value is to be loaded in PC.
12982 The pattern we are trying to match for load is:
12983 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12984 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12985 :
12986 :
12987 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12988 ]
12989 where
12990 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12991 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12992 3. If consecutive is TRUE, then for kth register being loaded,
12993 REGNO (R_dk) = REGNO (R_d0) + k.
12994 The pattern for store is similar. */
12995 bool
12996 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12997 bool consecutive, bool return_pc)
12998 {
12999 HOST_WIDE_INT count = XVECLEN (op, 0);
13000 rtx reg, mem, addr;
13001 unsigned regno;
13002 unsigned first_regno;
13003 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13004 rtx elt;
13005 bool addr_reg_in_reglist = false;
13006 bool update = false;
13007 int reg_increment;
13008 int offset_adj;
13009 int regs_per_val;
13010
13011 /* If not in SImode, then registers must be consecutive
13012 (e.g., VLDM instructions for DFmode). */
13013 gcc_assert ((mode == SImode) || consecutive);
13014 /* Setting return_pc for stores is illegal. */
13015 gcc_assert (!return_pc || load);
13016
13017 /* Set up the increments and the regs per val based on the mode. */
13018 reg_increment = GET_MODE_SIZE (mode);
13019 regs_per_val = reg_increment / 4;
13020 offset_adj = return_pc ? 1 : 0;
13021
13022 if (count <= 1
13023 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13024 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13025 return false;
13026
13027 /* Check if this is a write-back. */
13028 elt = XVECEXP (op, 0, offset_adj);
13029 if (GET_CODE (SET_SRC (elt)) == PLUS)
13030 {
13031 i++;
13032 base = 1;
13033 update = true;
13034
13035 /* The offset adjustment must be the number of registers being
13036 popped times the size of a single register. */
13037 if (!REG_P (SET_DEST (elt))
13038 || !REG_P (XEXP (SET_SRC (elt), 0))
13039 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13040 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13041 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13042 ((count - 1 - offset_adj) * reg_increment))
13043 return false;
13044 }
13045
13046 i = i + offset_adj;
13047 base = base + offset_adj;
13048 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13049 success depends on the type: VLDM can do just one reg,
13050 LDM must do at least two. */
13051 if ((count <= i) && (mode == SImode))
13052 return false;
13053
13054 elt = XVECEXP (op, 0, i - 1);
13055 if (GET_CODE (elt) != SET)
13056 return false;
13057
13058 if (load)
13059 {
13060 reg = SET_DEST (elt);
13061 mem = SET_SRC (elt);
13062 }
13063 else
13064 {
13065 reg = SET_SRC (elt);
13066 mem = SET_DEST (elt);
13067 }
13068
13069 if (!REG_P (reg) || !MEM_P (mem))
13070 return false;
13071
13072 regno = REGNO (reg);
13073 first_regno = regno;
13074 addr = XEXP (mem, 0);
13075 if (GET_CODE (addr) == PLUS)
13076 {
13077 if (!CONST_INT_P (XEXP (addr, 1)))
13078 return false;
13079
13080 offset = INTVAL (XEXP (addr, 1));
13081 addr = XEXP (addr, 0);
13082 }
13083
13084 if (!REG_P (addr))
13085 return false;
13086
13087 /* Don't allow SP to be loaded unless it is also the base register. It
13088 guarantees that SP is reset correctly when an LDM instruction
13089 is interrupted. Otherwise, we might end up with a corrupt stack. */
13090 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13091 return false;
13092
13093 for (; i < count; i++)
13094 {
13095 elt = XVECEXP (op, 0, i);
13096 if (GET_CODE (elt) != SET)
13097 return false;
13098
13099 if (load)
13100 {
13101 reg = SET_DEST (elt);
13102 mem = SET_SRC (elt);
13103 }
13104 else
13105 {
13106 reg = SET_SRC (elt);
13107 mem = SET_DEST (elt);
13108 }
13109
13110 if (!REG_P (reg)
13111 || GET_MODE (reg) != mode
13112 || REGNO (reg) <= regno
13113 || (consecutive
13114 && (REGNO (reg) !=
13115 (unsigned int) (first_regno + regs_per_val * (i - base))))
13116 /* Don't allow SP to be loaded unless it is also the base register. It
13117 guarantees that SP is reset correctly when an LDM instruction
13118 is interrupted. Otherwise, we might end up with a corrupt stack. */
13119 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13120 || !MEM_P (mem)
13121 || GET_MODE (mem) != mode
13122 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13123 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13124 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13125 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13126 offset + (i - base) * reg_increment))
13127 && (!REG_P (XEXP (mem, 0))
13128 || offset + (i - base) * reg_increment != 0)))
13129 return false;
13130
13131 regno = REGNO (reg);
13132 if (regno == REGNO (addr))
13133 addr_reg_in_reglist = true;
13134 }
13135
13136 if (load)
13137 {
13138 if (update && addr_reg_in_reglist)
13139 return false;
13140
13141 /* For Thumb-1, address register is always modified - either by write-back
13142 or by explicit load. If the pattern does not describe an update,
13143 then the address register must be in the list of loaded registers. */
13144 if (TARGET_THUMB1)
13145 return update || addr_reg_in_reglist;
13146 }
13147
13148 return true;
13149 }
13150
13151 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13152 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13153 instruction. ADD_OFFSET is nonzero if the base address register needs
13154 to be modified with an add instruction before we can use it. */
13155
13156 static bool
13157 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13158 int nops, HOST_WIDE_INT add_offset)
13159 {
13160 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13161 if the offset isn't small enough. The reason 2 ldrs are faster
13162 is because these ARMs are able to do more than one cache access
13163 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13164 whilst the ARM8 has a double bandwidth cache. This means that
13165 these cores can do both an instruction fetch and a data fetch in
13166 a single cycle, so the trick of calculating the address into a
13167 scratch register (one of the result regs) and then doing a load
13168 multiple actually becomes slower (and no smaller in code size).
13169 That is the transformation
13170
13171 ldr rd1, [rbase + offset]
13172 ldr rd2, [rbase + offset + 4]
13173
13174 to
13175
13176 add rd1, rbase, offset
13177 ldmia rd1, {rd1, rd2}
13178
13179 produces worse code -- '3 cycles + any stalls on rd2' instead of
13180 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13181 access per cycle, the first sequence could never complete in less
13182 than 6 cycles, whereas the ldm sequence would only take 5 and
13183 would make better use of sequential accesses if not hitting the
13184 cache.
13185
13186 We cheat here and test 'arm_ld_sched' which we currently know to
13187 only be true for the ARM8, ARM9 and StrongARM. If this ever
13188 changes, then the test below needs to be reworked. */
13189 if (nops == 2 && arm_ld_sched && add_offset != 0)
13190 return false;
13191
13192 /* XScale has load-store double instructions, but they have stricter
13193 alignment requirements than load-store multiple, so we cannot
13194 use them.
13195
13196 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13197 the pipeline until completion.
13198
13199 NREGS CYCLES
13200 1 3
13201 2 4
13202 3 5
13203 4 6
13204
13205 An ldr instruction takes 1-3 cycles, but does not block the
13206 pipeline.
13207
13208 NREGS CYCLES
13209 1 1-3
13210 2 2-6
13211 3 3-9
13212 4 4-12
13213
13214 Best case ldr will always win. However, the more ldr instructions
13215 we issue, the less likely we are to be able to schedule them well.
13216 Using ldr instructions also increases code size.
13217
13218 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13219 for counts of 3 or 4 regs. */
13220 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13221 return false;
13222 return true;
13223 }
13224
13225 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13226 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13227 an array ORDER which describes the sequence to use when accessing the
13228 offsets that produces an ascending order. In this sequence, each
13229 offset must be larger by exactly 4 than the previous one. ORDER[0]
13230 must have been filled in with the lowest offset by the caller.
13231 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13232 we use to verify that ORDER produces an ascending order of registers.
13233 Return true if it was possible to construct such an order, false if
13234 not. */
13235
13236 static bool
13237 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13238 int *unsorted_regs)
13239 {
13240 int i;
13241 for (i = 1; i < nops; i++)
13242 {
13243 int j;
13244
13245 order[i] = order[i - 1];
13246 for (j = 0; j < nops; j++)
13247 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13248 {
13249 /* We must find exactly one offset that is higher than the
13250 previous one by 4. */
13251 if (order[i] != order[i - 1])
13252 return false;
13253 order[i] = j;
13254 }
13255 if (order[i] == order[i - 1])
13256 return false;
13257 /* The register numbers must be ascending. */
13258 if (unsorted_regs != NULL
13259 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13260 return false;
13261 }
13262 return true;
13263 }
13264
13265 /* Used to determine in a peephole whether a sequence of load
13266 instructions can be changed into a load-multiple instruction.
13267 NOPS is the number of separate load instructions we are examining. The
13268 first NOPS entries in OPERANDS are the destination registers, the
13269 next NOPS entries are memory operands. If this function is
13270 successful, *BASE is set to the common base register of the memory
13271 accesses; *LOAD_OFFSET is set to the first memory location's offset
13272 from that base register.
13273 REGS is an array filled in with the destination register numbers.
13274 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13275 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13276 the sequence of registers in REGS matches the loads from ascending memory
13277 locations, and the function verifies that the register numbers are
13278 themselves ascending. If CHECK_REGS is false, the register numbers
13279 are stored in the order they are found in the operands. */
13280 static int
13281 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13282 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13283 {
13284 int unsorted_regs[MAX_LDM_STM_OPS];
13285 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13286 int order[MAX_LDM_STM_OPS];
13287 rtx base_reg_rtx = NULL;
13288 int base_reg = -1;
13289 int i, ldm_case;
13290
13291 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13292 easily extended if required. */
13293 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13294
13295 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13296
13297 /* Loop over the operands and check that the memory references are
13298 suitable (i.e. immediate offsets from the same base register). At
13299 the same time, extract the target register, and the memory
13300 offsets. */
13301 for (i = 0; i < nops; i++)
13302 {
13303 rtx reg;
13304 rtx offset;
13305
13306 /* Convert a subreg of a mem into the mem itself. */
13307 if (GET_CODE (operands[nops + i]) == SUBREG)
13308 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13309
13310 gcc_assert (MEM_P (operands[nops + i]));
13311
13312 /* Don't reorder volatile memory references; it doesn't seem worth
13313 looking for the case where the order is ok anyway. */
13314 if (MEM_VOLATILE_P (operands[nops + i]))
13315 return 0;
13316
13317 offset = const0_rtx;
13318
13319 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13320 || (GET_CODE (reg) == SUBREG
13321 && REG_P (reg = SUBREG_REG (reg))))
13322 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13323 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13324 || (GET_CODE (reg) == SUBREG
13325 && REG_P (reg = SUBREG_REG (reg))))
13326 && (CONST_INT_P (offset
13327 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13328 {
13329 if (i == 0)
13330 {
13331 base_reg = REGNO (reg);
13332 base_reg_rtx = reg;
13333 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13334 return 0;
13335 }
13336 else if (base_reg != (int) REGNO (reg))
13337 /* Not addressed from the same base register. */
13338 return 0;
13339
13340 unsorted_regs[i] = (REG_P (operands[i])
13341 ? REGNO (operands[i])
13342 : REGNO (SUBREG_REG (operands[i])));
13343
13344 /* If it isn't an integer register, or if it overwrites the
13345 base register but isn't the last insn in the list, then
13346 we can't do this. */
13347 if (unsorted_regs[i] < 0
13348 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13349 || unsorted_regs[i] > 14
13350 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13351 return 0;
13352
13353 /* Don't allow SP to be loaded unless it is also the base
13354 register. It guarantees that SP is reset correctly when
13355 an LDM instruction is interrupted. Otherwise, we might
13356 end up with a corrupt stack. */
13357 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13358 return 0;
13359
13360 unsorted_offsets[i] = INTVAL (offset);
13361 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13362 order[0] = i;
13363 }
13364 else
13365 /* Not a suitable memory address. */
13366 return 0;
13367 }
13368
13369 /* All the useful information has now been extracted from the
13370 operands into unsorted_regs and unsorted_offsets; additionally,
13371 order[0] has been set to the lowest offset in the list. Sort
13372 the offsets into order, verifying that they are adjacent, and
13373 check that the register numbers are ascending. */
13374 if (!compute_offset_order (nops, unsorted_offsets, order,
13375 check_regs ? unsorted_regs : NULL))
13376 return 0;
13377
13378 if (saved_order)
13379 memcpy (saved_order, order, sizeof order);
13380
13381 if (base)
13382 {
13383 *base = base_reg;
13384
13385 for (i = 0; i < nops; i++)
13386 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13387
13388 *load_offset = unsorted_offsets[order[0]];
13389 }
13390
13391 if (TARGET_THUMB1
13392 && !peep2_reg_dead_p (nops, base_reg_rtx))
13393 return 0;
13394
13395 if (unsorted_offsets[order[0]] == 0)
13396 ldm_case = 1; /* ldmia */
13397 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13398 ldm_case = 2; /* ldmib */
13399 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13400 ldm_case = 3; /* ldmda */
13401 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13402 ldm_case = 4; /* ldmdb */
13403 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13404 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13405 ldm_case = 5;
13406 else
13407 return 0;
13408
13409 if (!multiple_operation_profitable_p (false, nops,
13410 ldm_case == 5
13411 ? unsorted_offsets[order[0]] : 0))
13412 return 0;
13413
13414 return ldm_case;
13415 }
13416
13417 /* Used to determine in a peephole whether a sequence of store instructions can
13418 be changed into a store-multiple instruction.
13419 NOPS is the number of separate store instructions we are examining.
13420 NOPS_TOTAL is the total number of instructions recognized by the peephole
13421 pattern.
13422 The first NOPS entries in OPERANDS are the source registers, the next
13423 NOPS entries are memory operands. If this function is successful, *BASE is
13424 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13425 to the first memory location's offset from that base register. REGS is an
13426 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13427 likewise filled with the corresponding rtx's.
13428 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13429 numbers to an ascending order of stores.
13430 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13431 from ascending memory locations, and the function verifies that the register
13432 numbers are themselves ascending. If CHECK_REGS is false, the register
13433 numbers are stored in the order they are found in the operands. */
13434 static int
13435 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13436 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13437 HOST_WIDE_INT *load_offset, bool check_regs)
13438 {
13439 int unsorted_regs[MAX_LDM_STM_OPS];
13440 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13441 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13442 int order[MAX_LDM_STM_OPS];
13443 int base_reg = -1;
13444 rtx base_reg_rtx = NULL;
13445 int i, stm_case;
13446
13447 /* Write back of base register is currently only supported for Thumb 1. */
13448 int base_writeback = TARGET_THUMB1;
13449
13450 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13451 easily extended if required. */
13452 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13453
13454 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13455
13456 /* Loop over the operands and check that the memory references are
13457 suitable (i.e. immediate offsets from the same base register). At
13458 the same time, extract the target register, and the memory
13459 offsets. */
13460 for (i = 0; i < nops; i++)
13461 {
13462 rtx reg;
13463 rtx offset;
13464
13465 /* Convert a subreg of a mem into the mem itself. */
13466 if (GET_CODE (operands[nops + i]) == SUBREG)
13467 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13468
13469 gcc_assert (MEM_P (operands[nops + i]));
13470
13471 /* Don't reorder volatile memory references; it doesn't seem worth
13472 looking for the case where the order is ok anyway. */
13473 if (MEM_VOLATILE_P (operands[nops + i]))
13474 return 0;
13475
13476 offset = const0_rtx;
13477
13478 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13479 || (GET_CODE (reg) == SUBREG
13480 && REG_P (reg = SUBREG_REG (reg))))
13481 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13482 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13483 || (GET_CODE (reg) == SUBREG
13484 && REG_P (reg = SUBREG_REG (reg))))
13485 && (CONST_INT_P (offset
13486 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13487 {
13488 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13489 ? operands[i] : SUBREG_REG (operands[i]));
13490 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13491
13492 if (i == 0)
13493 {
13494 base_reg = REGNO (reg);
13495 base_reg_rtx = reg;
13496 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13497 return 0;
13498 }
13499 else if (base_reg != (int) REGNO (reg))
13500 /* Not addressed from the same base register. */
13501 return 0;
13502
13503 /* If it isn't an integer register, then we can't do this. */
13504 if (unsorted_regs[i] < 0
13505 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13506 /* The effects are unpredictable if the base register is
13507 both updated and stored. */
13508 || (base_writeback && unsorted_regs[i] == base_reg)
13509 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13510 || unsorted_regs[i] > 14)
13511 return 0;
13512
13513 unsorted_offsets[i] = INTVAL (offset);
13514 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13515 order[0] = i;
13516 }
13517 else
13518 /* Not a suitable memory address. */
13519 return 0;
13520 }
13521
13522 /* All the useful information has now been extracted from the
13523 operands into unsorted_regs and unsorted_offsets; additionally,
13524 order[0] has been set to the lowest offset in the list. Sort
13525 the offsets into order, verifying that they are adjacent, and
13526 check that the register numbers are ascending. */
13527 if (!compute_offset_order (nops, unsorted_offsets, order,
13528 check_regs ? unsorted_regs : NULL))
13529 return 0;
13530
13531 if (saved_order)
13532 memcpy (saved_order, order, sizeof order);
13533
13534 if (base)
13535 {
13536 *base = base_reg;
13537
13538 for (i = 0; i < nops; i++)
13539 {
13540 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13541 if (reg_rtxs)
13542 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13543 }
13544
13545 *load_offset = unsorted_offsets[order[0]];
13546 }
13547
13548 if (TARGET_THUMB1
13549 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13550 return 0;
13551
13552 if (unsorted_offsets[order[0]] == 0)
13553 stm_case = 1; /* stmia */
13554 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13555 stm_case = 2; /* stmib */
13556 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13557 stm_case = 3; /* stmda */
13558 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13559 stm_case = 4; /* stmdb */
13560 else
13561 return 0;
13562
13563 if (!multiple_operation_profitable_p (false, nops, 0))
13564 return 0;
13565
13566 return stm_case;
13567 }
13568 \f
13569 /* Routines for use in generating RTL. */
13570
13571 /* Generate a load-multiple instruction. COUNT is the number of loads in
13572 the instruction; REGS and MEMS are arrays containing the operands.
13573 BASEREG is the base register to be used in addressing the memory operands.
13574 WBACK_OFFSET is nonzero if the instruction should update the base
13575 register. */
13576
13577 static rtx
13578 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13579 HOST_WIDE_INT wback_offset)
13580 {
13581 int i = 0, j;
13582 rtx result;
13583
13584 if (!multiple_operation_profitable_p (false, count, 0))
13585 {
13586 rtx seq;
13587
13588 start_sequence ();
13589
13590 for (i = 0; i < count; i++)
13591 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13592
13593 if (wback_offset != 0)
13594 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13595
13596 seq = get_insns ();
13597 end_sequence ();
13598
13599 return seq;
13600 }
13601
13602 result = gen_rtx_PARALLEL (VOIDmode,
13603 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13604 if (wback_offset != 0)
13605 {
13606 XVECEXP (result, 0, 0)
13607 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13608 i = 1;
13609 count++;
13610 }
13611
13612 for (j = 0; i < count; i++, j++)
13613 XVECEXP (result, 0, i)
13614 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13615
13616 return result;
13617 }
13618
13619 /* Generate a store-multiple instruction. COUNT is the number of stores in
13620 the instruction; REGS and MEMS are arrays containing the operands.
13621 BASEREG is the base register to be used in addressing the memory operands.
13622 WBACK_OFFSET is nonzero if the instruction should update the base
13623 register. */
13624
13625 static rtx
13626 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13627 HOST_WIDE_INT wback_offset)
13628 {
13629 int i = 0, j;
13630 rtx result;
13631
13632 if (GET_CODE (basereg) == PLUS)
13633 basereg = XEXP (basereg, 0);
13634
13635 if (!multiple_operation_profitable_p (false, count, 0))
13636 {
13637 rtx seq;
13638
13639 start_sequence ();
13640
13641 for (i = 0; i < count; i++)
13642 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13643
13644 if (wback_offset != 0)
13645 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13646
13647 seq = get_insns ();
13648 end_sequence ();
13649
13650 return seq;
13651 }
13652
13653 result = gen_rtx_PARALLEL (VOIDmode,
13654 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13655 if (wback_offset != 0)
13656 {
13657 XVECEXP (result, 0, 0)
13658 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13659 i = 1;
13660 count++;
13661 }
13662
13663 for (j = 0; i < count; i++, j++)
13664 XVECEXP (result, 0, i)
13665 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13666
13667 return result;
13668 }
13669
13670 /* Generate either a load-multiple or a store-multiple instruction. This
13671 function can be used in situations where we can start with a single MEM
13672 rtx and adjust its address upwards.
13673 COUNT is the number of operations in the instruction, not counting a
13674 possible update of the base register. REGS is an array containing the
13675 register operands.
13676 BASEREG is the base register to be used in addressing the memory operands,
13677 which are constructed from BASEMEM.
13678 WRITE_BACK specifies whether the generated instruction should include an
13679 update of the base register.
13680 OFFSETP is used to pass an offset to and from this function; this offset
13681 is not used when constructing the address (instead BASEMEM should have an
13682 appropriate offset in its address), it is used only for setting
13683 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13684
13685 static rtx
13686 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13687 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13688 {
13689 rtx mems[MAX_LDM_STM_OPS];
13690 HOST_WIDE_INT offset = *offsetp;
13691 int i;
13692
13693 gcc_assert (count <= MAX_LDM_STM_OPS);
13694
13695 if (GET_CODE (basereg) == PLUS)
13696 basereg = XEXP (basereg, 0);
13697
13698 for (i = 0; i < count; i++)
13699 {
13700 rtx addr = plus_constant (Pmode, basereg, i * 4);
13701 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13702 offset += 4;
13703 }
13704
13705 if (write_back)
13706 *offsetp = offset;
13707
13708 if (is_load)
13709 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13710 write_back ? 4 * count : 0);
13711 else
13712 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13713 write_back ? 4 * count : 0);
13714 }
13715
13716 rtx
13717 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13718 rtx basemem, HOST_WIDE_INT *offsetp)
13719 {
13720 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13721 offsetp);
13722 }
13723
13724 rtx
13725 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13726 rtx basemem, HOST_WIDE_INT *offsetp)
13727 {
13728 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13729 offsetp);
13730 }
13731
13732 /* Called from a peephole2 expander to turn a sequence of loads into an
13733 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13734 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13735 is true if we can reorder the registers because they are used commutatively
13736 subsequently.
13737 Returns true iff we could generate a new instruction. */
13738
13739 bool
13740 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13741 {
13742 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13743 rtx mems[MAX_LDM_STM_OPS];
13744 int i, j, base_reg;
13745 rtx base_reg_rtx;
13746 HOST_WIDE_INT offset;
13747 int write_back = FALSE;
13748 int ldm_case;
13749 rtx addr;
13750
13751 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13752 &base_reg, &offset, !sort_regs);
13753
13754 if (ldm_case == 0)
13755 return false;
13756
13757 if (sort_regs)
13758 for (i = 0; i < nops - 1; i++)
13759 for (j = i + 1; j < nops; j++)
13760 if (regs[i] > regs[j])
13761 {
13762 int t = regs[i];
13763 regs[i] = regs[j];
13764 regs[j] = t;
13765 }
13766 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13767
13768 if (TARGET_THUMB1)
13769 {
13770 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13771 gcc_assert (ldm_case == 1 || ldm_case == 5);
13772 write_back = TRUE;
13773 }
13774
13775 if (ldm_case == 5)
13776 {
13777 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13778 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13779 offset = 0;
13780 if (!TARGET_THUMB1)
13781 base_reg_rtx = newbase;
13782 }
13783
13784 for (i = 0; i < nops; i++)
13785 {
13786 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13787 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13788 SImode, addr, 0);
13789 }
13790 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13791 write_back ? offset + i * 4 : 0));
13792 return true;
13793 }
13794
13795 /* Called from a peephole2 expander to turn a sequence of stores into an
13796 STM instruction. OPERANDS are the operands found by the peephole matcher;
13797 NOPS indicates how many separate stores we are trying to combine.
13798 Returns true iff we could generate a new instruction. */
13799
13800 bool
13801 gen_stm_seq (rtx *operands, int nops)
13802 {
13803 int i;
13804 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13805 rtx mems[MAX_LDM_STM_OPS];
13806 int base_reg;
13807 rtx base_reg_rtx;
13808 HOST_WIDE_INT offset;
13809 int write_back = FALSE;
13810 int stm_case;
13811 rtx addr;
13812 bool base_reg_dies;
13813
13814 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13815 mem_order, &base_reg, &offset, true);
13816
13817 if (stm_case == 0)
13818 return false;
13819
13820 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13821
13822 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13823 if (TARGET_THUMB1)
13824 {
13825 gcc_assert (base_reg_dies);
13826 write_back = TRUE;
13827 }
13828
13829 if (stm_case == 5)
13830 {
13831 gcc_assert (base_reg_dies);
13832 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13833 offset = 0;
13834 }
13835
13836 addr = plus_constant (Pmode, base_reg_rtx, offset);
13837
13838 for (i = 0; i < nops; i++)
13839 {
13840 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13841 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13842 SImode, addr, 0);
13843 }
13844 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13845 write_back ? offset + i * 4 : 0));
13846 return true;
13847 }
13848
13849 /* Called from a peephole2 expander to turn a sequence of stores that are
13850 preceded by constant loads into an STM instruction. OPERANDS are the
13851 operands found by the peephole matcher; NOPS indicates how many
13852 separate stores we are trying to combine; there are 2 * NOPS
13853 instructions in the peephole.
13854 Returns true iff we could generate a new instruction. */
13855
13856 bool
13857 gen_const_stm_seq (rtx *operands, int nops)
13858 {
13859 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13860 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13861 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13862 rtx mems[MAX_LDM_STM_OPS];
13863 int base_reg;
13864 rtx base_reg_rtx;
13865 HOST_WIDE_INT offset;
13866 int write_back = FALSE;
13867 int stm_case;
13868 rtx addr;
13869 bool base_reg_dies;
13870 int i, j;
13871 HARD_REG_SET allocated;
13872
13873 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13874 mem_order, &base_reg, &offset, false);
13875
13876 if (stm_case == 0)
13877 return false;
13878
13879 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13880
13881 /* If the same register is used more than once, try to find a free
13882 register. */
13883 CLEAR_HARD_REG_SET (allocated);
13884 for (i = 0; i < nops; i++)
13885 {
13886 for (j = i + 1; j < nops; j++)
13887 if (regs[i] == regs[j])
13888 {
13889 rtx t = peep2_find_free_register (0, nops * 2,
13890 TARGET_THUMB1 ? "l" : "r",
13891 SImode, &allocated);
13892 if (t == NULL_RTX)
13893 return false;
13894 reg_rtxs[i] = t;
13895 regs[i] = REGNO (t);
13896 }
13897 }
13898
13899 /* Compute an ordering that maps the register numbers to an ascending
13900 sequence. */
13901 reg_order[0] = 0;
13902 for (i = 0; i < nops; i++)
13903 if (regs[i] < regs[reg_order[0]])
13904 reg_order[0] = i;
13905
13906 for (i = 1; i < nops; i++)
13907 {
13908 int this_order = reg_order[i - 1];
13909 for (j = 0; j < nops; j++)
13910 if (regs[j] > regs[reg_order[i - 1]]
13911 && (this_order == reg_order[i - 1]
13912 || regs[j] < regs[this_order]))
13913 this_order = j;
13914 reg_order[i] = this_order;
13915 }
13916
13917 /* Ensure that registers that must be live after the instruction end
13918 up with the correct value. */
13919 for (i = 0; i < nops; i++)
13920 {
13921 int this_order = reg_order[i];
13922 if ((this_order != mem_order[i]
13923 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13924 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13925 return false;
13926 }
13927
13928 /* Load the constants. */
13929 for (i = 0; i < nops; i++)
13930 {
13931 rtx op = operands[2 * nops + mem_order[i]];
13932 sorted_regs[i] = regs[reg_order[i]];
13933 emit_move_insn (reg_rtxs[reg_order[i]], op);
13934 }
13935
13936 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13937
13938 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13939 if (TARGET_THUMB1)
13940 {
13941 gcc_assert (base_reg_dies);
13942 write_back = TRUE;
13943 }
13944
13945 if (stm_case == 5)
13946 {
13947 gcc_assert (base_reg_dies);
13948 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13949 offset = 0;
13950 }
13951
13952 addr = plus_constant (Pmode, base_reg_rtx, offset);
13953
13954 for (i = 0; i < nops; i++)
13955 {
13956 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13957 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13958 SImode, addr, 0);
13959 }
13960 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13961 write_back ? offset + i * 4 : 0));
13962 return true;
13963 }
13964
13965 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13966 unaligned copies on processors which support unaligned semantics for those
13967 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13968 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13969 An interleave factor of 1 (the minimum) will perform no interleaving.
13970 Load/store multiple are used for aligned addresses where possible. */
13971
13972 static void
13973 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13974 HOST_WIDE_INT length,
13975 unsigned int interleave_factor)
13976 {
13977 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13978 int *regnos = XALLOCAVEC (int, interleave_factor);
13979 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13980 HOST_WIDE_INT i, j;
13981 HOST_WIDE_INT remaining = length, words;
13982 rtx halfword_tmp = NULL, byte_tmp = NULL;
13983 rtx dst, src;
13984 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13985 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13986 HOST_WIDE_INT srcoffset, dstoffset;
13987 HOST_WIDE_INT src_autoinc, dst_autoinc;
13988 rtx mem, addr;
13989
13990 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
13991
13992 /* Use hard registers if we have aligned source or destination so we can use
13993 load/store multiple with contiguous registers. */
13994 if (dst_aligned || src_aligned)
13995 for (i = 0; i < interleave_factor; i++)
13996 regs[i] = gen_rtx_REG (SImode, i);
13997 else
13998 for (i = 0; i < interleave_factor; i++)
13999 regs[i] = gen_reg_rtx (SImode);
14000
14001 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14002 src = copy_addr_to_reg (XEXP (srcbase, 0));
14003
14004 srcoffset = dstoffset = 0;
14005
14006 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14007 For copying the last bytes we want to subtract this offset again. */
14008 src_autoinc = dst_autoinc = 0;
14009
14010 for (i = 0; i < interleave_factor; i++)
14011 regnos[i] = i;
14012
14013 /* Copy BLOCK_SIZE_BYTES chunks. */
14014
14015 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14016 {
14017 /* Load words. */
14018 if (src_aligned && interleave_factor > 1)
14019 {
14020 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14021 TRUE, srcbase, &srcoffset));
14022 src_autoinc += UNITS_PER_WORD * interleave_factor;
14023 }
14024 else
14025 {
14026 for (j = 0; j < interleave_factor; j++)
14027 {
14028 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14029 - src_autoinc));
14030 mem = adjust_automodify_address (srcbase, SImode, addr,
14031 srcoffset + j * UNITS_PER_WORD);
14032 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14033 }
14034 srcoffset += block_size_bytes;
14035 }
14036
14037 /* Store words. */
14038 if (dst_aligned && interleave_factor > 1)
14039 {
14040 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14041 TRUE, dstbase, &dstoffset));
14042 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14043 }
14044 else
14045 {
14046 for (j = 0; j < interleave_factor; j++)
14047 {
14048 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14049 - dst_autoinc));
14050 mem = adjust_automodify_address (dstbase, SImode, addr,
14051 dstoffset + j * UNITS_PER_WORD);
14052 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14053 }
14054 dstoffset += block_size_bytes;
14055 }
14056
14057 remaining -= block_size_bytes;
14058 }
14059
14060 /* Copy any whole words left (note these aren't interleaved with any
14061 subsequent halfword/byte load/stores in the interests of simplicity). */
14062
14063 words = remaining / UNITS_PER_WORD;
14064
14065 gcc_assert (words < interleave_factor);
14066
14067 if (src_aligned && words > 1)
14068 {
14069 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14070 &srcoffset));
14071 src_autoinc += UNITS_PER_WORD * words;
14072 }
14073 else
14074 {
14075 for (j = 0; j < words; j++)
14076 {
14077 addr = plus_constant (Pmode, src,
14078 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14079 mem = adjust_automodify_address (srcbase, SImode, addr,
14080 srcoffset + j * UNITS_PER_WORD);
14081 if (src_aligned)
14082 emit_move_insn (regs[j], mem);
14083 else
14084 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14085 }
14086 srcoffset += words * UNITS_PER_WORD;
14087 }
14088
14089 if (dst_aligned && words > 1)
14090 {
14091 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14092 &dstoffset));
14093 dst_autoinc += words * UNITS_PER_WORD;
14094 }
14095 else
14096 {
14097 for (j = 0; j < words; j++)
14098 {
14099 addr = plus_constant (Pmode, dst,
14100 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14101 mem = adjust_automodify_address (dstbase, SImode, addr,
14102 dstoffset + j * UNITS_PER_WORD);
14103 if (dst_aligned)
14104 emit_move_insn (mem, regs[j]);
14105 else
14106 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14107 }
14108 dstoffset += words * UNITS_PER_WORD;
14109 }
14110
14111 remaining -= words * UNITS_PER_WORD;
14112
14113 gcc_assert (remaining < 4);
14114
14115 /* Copy a halfword if necessary. */
14116
14117 if (remaining >= 2)
14118 {
14119 halfword_tmp = gen_reg_rtx (SImode);
14120
14121 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14122 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14123 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14124
14125 /* Either write out immediately, or delay until we've loaded the last
14126 byte, depending on interleave factor. */
14127 if (interleave_factor == 1)
14128 {
14129 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14130 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14131 emit_insn (gen_unaligned_storehi (mem,
14132 gen_lowpart (HImode, halfword_tmp)));
14133 halfword_tmp = NULL;
14134 dstoffset += 2;
14135 }
14136
14137 remaining -= 2;
14138 srcoffset += 2;
14139 }
14140
14141 gcc_assert (remaining < 2);
14142
14143 /* Copy last byte. */
14144
14145 if ((remaining & 1) != 0)
14146 {
14147 byte_tmp = gen_reg_rtx (SImode);
14148
14149 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14150 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14151 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14152
14153 if (interleave_factor == 1)
14154 {
14155 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14156 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14157 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14158 byte_tmp = NULL;
14159 dstoffset++;
14160 }
14161
14162 remaining--;
14163 srcoffset++;
14164 }
14165
14166 /* Store last halfword if we haven't done so already. */
14167
14168 if (halfword_tmp)
14169 {
14170 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14171 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14172 emit_insn (gen_unaligned_storehi (mem,
14173 gen_lowpart (HImode, halfword_tmp)));
14174 dstoffset += 2;
14175 }
14176
14177 /* Likewise for last byte. */
14178
14179 if (byte_tmp)
14180 {
14181 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14182 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14183 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14184 dstoffset++;
14185 }
14186
14187 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14188 }
14189
14190 /* From mips_adjust_block_mem:
14191
14192 Helper function for doing a loop-based block operation on memory
14193 reference MEM. Each iteration of the loop will operate on LENGTH
14194 bytes of MEM.
14195
14196 Create a new base register for use within the loop and point it to
14197 the start of MEM. Create a new memory reference that uses this
14198 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14199
14200 static void
14201 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14202 rtx *loop_mem)
14203 {
14204 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14205
14206 /* Although the new mem does not refer to a known location,
14207 it does keep up to LENGTH bytes of alignment. */
14208 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14209 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14210 }
14211
14212 /* From mips_block_move_loop:
14213
14214 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14215 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14216 the memory regions do not overlap. */
14217
14218 static void
14219 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14220 unsigned int interleave_factor,
14221 HOST_WIDE_INT bytes_per_iter)
14222 {
14223 rtx src_reg, dest_reg, final_src, test;
14224 HOST_WIDE_INT leftover;
14225
14226 leftover = length % bytes_per_iter;
14227 length -= leftover;
14228
14229 /* Create registers and memory references for use within the loop. */
14230 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14231 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14232
14233 /* Calculate the value that SRC_REG should have after the last iteration of
14234 the loop. */
14235 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14236 0, 0, OPTAB_WIDEN);
14237
14238 /* Emit the start of the loop. */
14239 rtx_code_label *label = gen_label_rtx ();
14240 emit_label (label);
14241
14242 /* Emit the loop body. */
14243 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14244 interleave_factor);
14245
14246 /* Move on to the next block. */
14247 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14248 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14249
14250 /* Emit the loop condition. */
14251 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14252 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14253
14254 /* Mop up any left-over bytes. */
14255 if (leftover)
14256 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14257 }
14258
14259 /* Emit a block move when either the source or destination is unaligned (not
14260 aligned to a four-byte boundary). This may need further tuning depending on
14261 core type, optimize_size setting, etc. */
14262
14263 static int
14264 arm_movmemqi_unaligned (rtx *operands)
14265 {
14266 HOST_WIDE_INT length = INTVAL (operands[2]);
14267
14268 if (optimize_size)
14269 {
14270 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14271 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14272 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14273 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14274 or dst_aligned though: allow more interleaving in those cases since the
14275 resulting code can be smaller. */
14276 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14277 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14278
14279 if (length > 12)
14280 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14281 interleave_factor, bytes_per_iter);
14282 else
14283 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14284 interleave_factor);
14285 }
14286 else
14287 {
14288 /* Note that the loop created by arm_block_move_unaligned_loop may be
14289 subject to loop unrolling, which makes tuning this condition a little
14290 redundant. */
14291 if (length > 32)
14292 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14293 else
14294 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14295 }
14296
14297 return 1;
14298 }
14299
14300 int
14301 arm_gen_movmemqi (rtx *operands)
14302 {
14303 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14304 HOST_WIDE_INT srcoffset, dstoffset;
14305 rtx src, dst, srcbase, dstbase;
14306 rtx part_bytes_reg = NULL;
14307 rtx mem;
14308
14309 if (!CONST_INT_P (operands[2])
14310 || !CONST_INT_P (operands[3])
14311 || INTVAL (operands[2]) > 64)
14312 return 0;
14313
14314 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14315 return arm_movmemqi_unaligned (operands);
14316
14317 if (INTVAL (operands[3]) & 3)
14318 return 0;
14319
14320 dstbase = operands[0];
14321 srcbase = operands[1];
14322
14323 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14324 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14325
14326 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14327 out_words_to_go = INTVAL (operands[2]) / 4;
14328 last_bytes = INTVAL (operands[2]) & 3;
14329 dstoffset = srcoffset = 0;
14330
14331 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14332 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14333
14334 while (in_words_to_go >= 2)
14335 {
14336 if (in_words_to_go > 4)
14337 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14338 TRUE, srcbase, &srcoffset));
14339 else
14340 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14341 src, FALSE, srcbase,
14342 &srcoffset));
14343
14344 if (out_words_to_go)
14345 {
14346 if (out_words_to_go > 4)
14347 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14348 TRUE, dstbase, &dstoffset));
14349 else if (out_words_to_go != 1)
14350 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14351 out_words_to_go, dst,
14352 (last_bytes == 0
14353 ? FALSE : TRUE),
14354 dstbase, &dstoffset));
14355 else
14356 {
14357 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14358 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14359 if (last_bytes != 0)
14360 {
14361 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14362 dstoffset += 4;
14363 }
14364 }
14365 }
14366
14367 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14368 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14369 }
14370
14371 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14372 if (out_words_to_go)
14373 {
14374 rtx sreg;
14375
14376 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14377 sreg = copy_to_reg (mem);
14378
14379 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14380 emit_move_insn (mem, sreg);
14381 in_words_to_go--;
14382
14383 gcc_assert (!in_words_to_go); /* Sanity check */
14384 }
14385
14386 if (in_words_to_go)
14387 {
14388 gcc_assert (in_words_to_go > 0);
14389
14390 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14391 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14392 }
14393
14394 gcc_assert (!last_bytes || part_bytes_reg);
14395
14396 if (BYTES_BIG_ENDIAN && last_bytes)
14397 {
14398 rtx tmp = gen_reg_rtx (SImode);
14399
14400 /* The bytes we want are in the top end of the word. */
14401 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14402 GEN_INT (8 * (4 - last_bytes))));
14403 part_bytes_reg = tmp;
14404
14405 while (last_bytes)
14406 {
14407 mem = adjust_automodify_address (dstbase, QImode,
14408 plus_constant (Pmode, dst,
14409 last_bytes - 1),
14410 dstoffset + last_bytes - 1);
14411 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14412
14413 if (--last_bytes)
14414 {
14415 tmp = gen_reg_rtx (SImode);
14416 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14417 part_bytes_reg = tmp;
14418 }
14419 }
14420
14421 }
14422 else
14423 {
14424 if (last_bytes > 1)
14425 {
14426 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14427 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14428 last_bytes -= 2;
14429 if (last_bytes)
14430 {
14431 rtx tmp = gen_reg_rtx (SImode);
14432 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14433 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14434 part_bytes_reg = tmp;
14435 dstoffset += 2;
14436 }
14437 }
14438
14439 if (last_bytes)
14440 {
14441 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14442 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14443 }
14444 }
14445
14446 return 1;
14447 }
14448
14449 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14450 by mode size. */
14451 inline static rtx
14452 next_consecutive_mem (rtx mem)
14453 {
14454 machine_mode mode = GET_MODE (mem);
14455 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14456 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14457
14458 return adjust_automodify_address (mem, mode, addr, offset);
14459 }
14460
14461 /* Copy using LDRD/STRD instructions whenever possible.
14462 Returns true upon success. */
14463 bool
14464 gen_movmem_ldrd_strd (rtx *operands)
14465 {
14466 unsigned HOST_WIDE_INT len;
14467 HOST_WIDE_INT align;
14468 rtx src, dst, base;
14469 rtx reg0;
14470 bool src_aligned, dst_aligned;
14471 bool src_volatile, dst_volatile;
14472
14473 gcc_assert (CONST_INT_P (operands[2]));
14474 gcc_assert (CONST_INT_P (operands[3]));
14475
14476 len = UINTVAL (operands[2]);
14477 if (len > 64)
14478 return false;
14479
14480 /* Maximum alignment we can assume for both src and dst buffers. */
14481 align = INTVAL (operands[3]);
14482
14483 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14484 return false;
14485
14486 /* Place src and dst addresses in registers
14487 and update the corresponding mem rtx. */
14488 dst = operands[0];
14489 dst_volatile = MEM_VOLATILE_P (dst);
14490 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14491 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14492 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14493
14494 src = operands[1];
14495 src_volatile = MEM_VOLATILE_P (src);
14496 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14497 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14498 src = adjust_automodify_address (src, VOIDmode, base, 0);
14499
14500 if (!unaligned_access && !(src_aligned && dst_aligned))
14501 return false;
14502
14503 if (src_volatile || dst_volatile)
14504 return false;
14505
14506 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14507 if (!(dst_aligned || src_aligned))
14508 return arm_gen_movmemqi (operands);
14509
14510 /* If the either src or dst is unaligned we'll be accessing it as pairs
14511 of unaligned SImode accesses. Otherwise we can generate DImode
14512 ldrd/strd instructions. */
14513 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14514 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14515
14516 while (len >= 8)
14517 {
14518 len -= 8;
14519 reg0 = gen_reg_rtx (DImode);
14520 rtx low_reg = NULL_RTX;
14521 rtx hi_reg = NULL_RTX;
14522
14523 if (!src_aligned || !dst_aligned)
14524 {
14525 low_reg = gen_lowpart (SImode, reg0);
14526 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14527 }
14528 if (src_aligned)
14529 emit_move_insn (reg0, src);
14530 else
14531 {
14532 emit_insn (gen_unaligned_loadsi (low_reg, src));
14533 src = next_consecutive_mem (src);
14534 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14535 }
14536
14537 if (dst_aligned)
14538 emit_move_insn (dst, reg0);
14539 else
14540 {
14541 emit_insn (gen_unaligned_storesi (dst, low_reg));
14542 dst = next_consecutive_mem (dst);
14543 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14544 }
14545
14546 src = next_consecutive_mem (src);
14547 dst = next_consecutive_mem (dst);
14548 }
14549
14550 gcc_assert (len < 8);
14551 if (len >= 4)
14552 {
14553 /* More than a word but less than a double-word to copy. Copy a word. */
14554 reg0 = gen_reg_rtx (SImode);
14555 src = adjust_address (src, SImode, 0);
14556 dst = adjust_address (dst, SImode, 0);
14557 if (src_aligned)
14558 emit_move_insn (reg0, src);
14559 else
14560 emit_insn (gen_unaligned_loadsi (reg0, src));
14561
14562 if (dst_aligned)
14563 emit_move_insn (dst, reg0);
14564 else
14565 emit_insn (gen_unaligned_storesi (dst, reg0));
14566
14567 src = next_consecutive_mem (src);
14568 dst = next_consecutive_mem (dst);
14569 len -= 4;
14570 }
14571
14572 if (len == 0)
14573 return true;
14574
14575 /* Copy the remaining bytes. */
14576 if (len >= 2)
14577 {
14578 dst = adjust_address (dst, HImode, 0);
14579 src = adjust_address (src, HImode, 0);
14580 reg0 = gen_reg_rtx (SImode);
14581 if (src_aligned)
14582 emit_insn (gen_zero_extendhisi2 (reg0, src));
14583 else
14584 emit_insn (gen_unaligned_loadhiu (reg0, src));
14585
14586 if (dst_aligned)
14587 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14588 else
14589 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14590
14591 src = next_consecutive_mem (src);
14592 dst = next_consecutive_mem (dst);
14593 if (len == 2)
14594 return true;
14595 }
14596
14597 dst = adjust_address (dst, QImode, 0);
14598 src = adjust_address (src, QImode, 0);
14599 reg0 = gen_reg_rtx (QImode);
14600 emit_move_insn (reg0, src);
14601 emit_move_insn (dst, reg0);
14602 return true;
14603 }
14604
14605 /* Select a dominance comparison mode if possible for a test of the general
14606 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14607 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14608 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14609 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14610 In all cases OP will be either EQ or NE, but we don't need to know which
14611 here. If we are unable to support a dominance comparison we return
14612 CC mode. This will then fail to match for the RTL expressions that
14613 generate this call. */
14614 machine_mode
14615 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14616 {
14617 enum rtx_code cond1, cond2;
14618 int swapped = 0;
14619
14620 /* Currently we will probably get the wrong result if the individual
14621 comparisons are not simple. This also ensures that it is safe to
14622 reverse a comparison if necessary. */
14623 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14624 != CCmode)
14625 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14626 != CCmode))
14627 return CCmode;
14628
14629 /* The if_then_else variant of this tests the second condition if the
14630 first passes, but is true if the first fails. Reverse the first
14631 condition to get a true "inclusive-or" expression. */
14632 if (cond_or == DOM_CC_NX_OR_Y)
14633 cond1 = reverse_condition (cond1);
14634
14635 /* If the comparisons are not equal, and one doesn't dominate the other,
14636 then we can't do this. */
14637 if (cond1 != cond2
14638 && !comparison_dominates_p (cond1, cond2)
14639 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14640 return CCmode;
14641
14642 if (swapped)
14643 std::swap (cond1, cond2);
14644
14645 switch (cond1)
14646 {
14647 case EQ:
14648 if (cond_or == DOM_CC_X_AND_Y)
14649 return CC_DEQmode;
14650
14651 switch (cond2)
14652 {
14653 case EQ: return CC_DEQmode;
14654 case LE: return CC_DLEmode;
14655 case LEU: return CC_DLEUmode;
14656 case GE: return CC_DGEmode;
14657 case GEU: return CC_DGEUmode;
14658 default: gcc_unreachable ();
14659 }
14660
14661 case LT:
14662 if (cond_or == DOM_CC_X_AND_Y)
14663 return CC_DLTmode;
14664
14665 switch (cond2)
14666 {
14667 case LT:
14668 return CC_DLTmode;
14669 case LE:
14670 return CC_DLEmode;
14671 case NE:
14672 return CC_DNEmode;
14673 default:
14674 gcc_unreachable ();
14675 }
14676
14677 case GT:
14678 if (cond_or == DOM_CC_X_AND_Y)
14679 return CC_DGTmode;
14680
14681 switch (cond2)
14682 {
14683 case GT:
14684 return CC_DGTmode;
14685 case GE:
14686 return CC_DGEmode;
14687 case NE:
14688 return CC_DNEmode;
14689 default:
14690 gcc_unreachable ();
14691 }
14692
14693 case LTU:
14694 if (cond_or == DOM_CC_X_AND_Y)
14695 return CC_DLTUmode;
14696
14697 switch (cond2)
14698 {
14699 case LTU:
14700 return CC_DLTUmode;
14701 case LEU:
14702 return CC_DLEUmode;
14703 case NE:
14704 return CC_DNEmode;
14705 default:
14706 gcc_unreachable ();
14707 }
14708
14709 case GTU:
14710 if (cond_or == DOM_CC_X_AND_Y)
14711 return CC_DGTUmode;
14712
14713 switch (cond2)
14714 {
14715 case GTU:
14716 return CC_DGTUmode;
14717 case GEU:
14718 return CC_DGEUmode;
14719 case NE:
14720 return CC_DNEmode;
14721 default:
14722 gcc_unreachable ();
14723 }
14724
14725 /* The remaining cases only occur when both comparisons are the
14726 same. */
14727 case NE:
14728 gcc_assert (cond1 == cond2);
14729 return CC_DNEmode;
14730
14731 case LE:
14732 gcc_assert (cond1 == cond2);
14733 return CC_DLEmode;
14734
14735 case GE:
14736 gcc_assert (cond1 == cond2);
14737 return CC_DGEmode;
14738
14739 case LEU:
14740 gcc_assert (cond1 == cond2);
14741 return CC_DLEUmode;
14742
14743 case GEU:
14744 gcc_assert (cond1 == cond2);
14745 return CC_DGEUmode;
14746
14747 default:
14748 gcc_unreachable ();
14749 }
14750 }
14751
14752 machine_mode
14753 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14754 {
14755 /* All floating point compares return CCFP if it is an equality
14756 comparison, and CCFPE otherwise. */
14757 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14758 {
14759 switch (op)
14760 {
14761 case EQ:
14762 case NE:
14763 case UNORDERED:
14764 case ORDERED:
14765 case UNLT:
14766 case UNLE:
14767 case UNGT:
14768 case UNGE:
14769 case UNEQ:
14770 case LTGT:
14771 return CCFPmode;
14772
14773 case LT:
14774 case LE:
14775 case GT:
14776 case GE:
14777 return CCFPEmode;
14778
14779 default:
14780 gcc_unreachable ();
14781 }
14782 }
14783
14784 /* A compare with a shifted operand. Because of canonicalization, the
14785 comparison will have to be swapped when we emit the assembler. */
14786 if (GET_MODE (y) == SImode
14787 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14788 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14789 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14790 || GET_CODE (x) == ROTATERT))
14791 return CC_SWPmode;
14792
14793 /* This operation is performed swapped, but since we only rely on the Z
14794 flag we don't need an additional mode. */
14795 if (GET_MODE (y) == SImode
14796 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14797 && GET_CODE (x) == NEG
14798 && (op == EQ || op == NE))
14799 return CC_Zmode;
14800
14801 /* This is a special case that is used by combine to allow a
14802 comparison of a shifted byte load to be split into a zero-extend
14803 followed by a comparison of the shifted integer (only valid for
14804 equalities and unsigned inequalities). */
14805 if (GET_MODE (x) == SImode
14806 && GET_CODE (x) == ASHIFT
14807 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14808 && GET_CODE (XEXP (x, 0)) == SUBREG
14809 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14810 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14811 && (op == EQ || op == NE
14812 || op == GEU || op == GTU || op == LTU || op == LEU)
14813 && CONST_INT_P (y))
14814 return CC_Zmode;
14815
14816 /* A construct for a conditional compare, if the false arm contains
14817 0, then both conditions must be true, otherwise either condition
14818 must be true. Not all conditions are possible, so CCmode is
14819 returned if it can't be done. */
14820 if (GET_CODE (x) == IF_THEN_ELSE
14821 && (XEXP (x, 2) == const0_rtx
14822 || XEXP (x, 2) == const1_rtx)
14823 && COMPARISON_P (XEXP (x, 0))
14824 && COMPARISON_P (XEXP (x, 1)))
14825 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14826 INTVAL (XEXP (x, 2)));
14827
14828 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14829 if (GET_CODE (x) == AND
14830 && (op == EQ || op == NE)
14831 && COMPARISON_P (XEXP (x, 0))
14832 && COMPARISON_P (XEXP (x, 1)))
14833 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14834 DOM_CC_X_AND_Y);
14835
14836 if (GET_CODE (x) == IOR
14837 && (op == EQ || op == NE)
14838 && COMPARISON_P (XEXP (x, 0))
14839 && COMPARISON_P (XEXP (x, 1)))
14840 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14841 DOM_CC_X_OR_Y);
14842
14843 /* An operation (on Thumb) where we want to test for a single bit.
14844 This is done by shifting that bit up into the top bit of a
14845 scratch register; we can then branch on the sign bit. */
14846 if (TARGET_THUMB1
14847 && GET_MODE (x) == SImode
14848 && (op == EQ || op == NE)
14849 && GET_CODE (x) == ZERO_EXTRACT
14850 && XEXP (x, 1) == const1_rtx)
14851 return CC_Nmode;
14852
14853 /* An operation that sets the condition codes as a side-effect, the
14854 V flag is not set correctly, so we can only use comparisons where
14855 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14856 instead.) */
14857 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14858 if (GET_MODE (x) == SImode
14859 && y == const0_rtx
14860 && (op == EQ || op == NE || op == LT || op == GE)
14861 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14862 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14863 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14864 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14865 || GET_CODE (x) == LSHIFTRT
14866 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14867 || GET_CODE (x) == ROTATERT
14868 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14869 return CC_NOOVmode;
14870
14871 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14872 return CC_Zmode;
14873
14874 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14875 && GET_CODE (x) == PLUS
14876 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14877 return CC_Cmode;
14878
14879 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14880 {
14881 switch (op)
14882 {
14883 case EQ:
14884 case NE:
14885 /* A DImode comparison against zero can be implemented by
14886 or'ing the two halves together. */
14887 if (y == const0_rtx)
14888 return CC_Zmode;
14889
14890 /* We can do an equality test in three Thumb instructions. */
14891 if (!TARGET_32BIT)
14892 return CC_Zmode;
14893
14894 /* FALLTHROUGH */
14895
14896 case LTU:
14897 case LEU:
14898 case GTU:
14899 case GEU:
14900 /* DImode unsigned comparisons can be implemented by cmp +
14901 cmpeq without a scratch register. Not worth doing in
14902 Thumb-2. */
14903 if (TARGET_32BIT)
14904 return CC_CZmode;
14905
14906 /* FALLTHROUGH */
14907
14908 case LT:
14909 case LE:
14910 case GT:
14911 case GE:
14912 /* DImode signed and unsigned comparisons can be implemented
14913 by cmp + sbcs with a scratch register, but that does not
14914 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14915 gcc_assert (op != EQ && op != NE);
14916 return CC_NCVmode;
14917
14918 default:
14919 gcc_unreachable ();
14920 }
14921 }
14922
14923 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14924 return GET_MODE (x);
14925
14926 return CCmode;
14927 }
14928
14929 /* X and Y are two things to compare using CODE. Emit the compare insn and
14930 return the rtx for register 0 in the proper mode. FP means this is a
14931 floating point compare: I don't think that it is needed on the arm. */
14932 rtx
14933 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14934 {
14935 machine_mode mode;
14936 rtx cc_reg;
14937 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14938
14939 /* We might have X as a constant, Y as a register because of the predicates
14940 used for cmpdi. If so, force X to a register here. */
14941 if (dimode_comparison && !REG_P (x))
14942 x = force_reg (DImode, x);
14943
14944 mode = SELECT_CC_MODE (code, x, y);
14945 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14946
14947 if (dimode_comparison
14948 && mode != CC_CZmode)
14949 {
14950 rtx clobber, set;
14951
14952 /* To compare two non-zero values for equality, XOR them and
14953 then compare against zero. Not used for ARM mode; there
14954 CC_CZmode is cheaper. */
14955 if (mode == CC_Zmode && y != const0_rtx)
14956 {
14957 gcc_assert (!reload_completed);
14958 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14959 y = const0_rtx;
14960 }
14961
14962 /* A scratch register is required. */
14963 if (reload_completed)
14964 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14965 else
14966 scratch = gen_rtx_SCRATCH (SImode);
14967
14968 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14969 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14970 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14971 }
14972 else
14973 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14974
14975 return cc_reg;
14976 }
14977
14978 /* Generate a sequence of insns that will generate the correct return
14979 address mask depending on the physical architecture that the program
14980 is running on. */
14981 rtx
14982 arm_gen_return_addr_mask (void)
14983 {
14984 rtx reg = gen_reg_rtx (Pmode);
14985
14986 emit_insn (gen_return_addr_mask (reg));
14987 return reg;
14988 }
14989
14990 void
14991 arm_reload_in_hi (rtx *operands)
14992 {
14993 rtx ref = operands[1];
14994 rtx base, scratch;
14995 HOST_WIDE_INT offset = 0;
14996
14997 if (GET_CODE (ref) == SUBREG)
14998 {
14999 offset = SUBREG_BYTE (ref);
15000 ref = SUBREG_REG (ref);
15001 }
15002
15003 if (REG_P (ref))
15004 {
15005 /* We have a pseudo which has been spilt onto the stack; there
15006 are two cases here: the first where there is a simple
15007 stack-slot replacement and a second where the stack-slot is
15008 out of range, or is used as a subreg. */
15009 if (reg_equiv_mem (REGNO (ref)))
15010 {
15011 ref = reg_equiv_mem (REGNO (ref));
15012 base = find_replacement (&XEXP (ref, 0));
15013 }
15014 else
15015 /* The slot is out of range, or was dressed up in a SUBREG. */
15016 base = reg_equiv_address (REGNO (ref));
15017
15018 /* PR 62554: If there is no equivalent memory location then just move
15019 the value as an SImode register move. This happens when the target
15020 architecture variant does not have an HImode register move. */
15021 if (base == NULL)
15022 {
15023 gcc_assert (REG_P (operands[0]));
15024 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15025 gen_rtx_SUBREG (SImode, ref, 0)));
15026 return;
15027 }
15028 }
15029 else
15030 base = find_replacement (&XEXP (ref, 0));
15031
15032 /* Handle the case where the address is too complex to be offset by 1. */
15033 if (GET_CODE (base) == MINUS
15034 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15035 {
15036 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15037
15038 emit_set_insn (base_plus, base);
15039 base = base_plus;
15040 }
15041 else if (GET_CODE (base) == PLUS)
15042 {
15043 /* The addend must be CONST_INT, or we would have dealt with it above. */
15044 HOST_WIDE_INT hi, lo;
15045
15046 offset += INTVAL (XEXP (base, 1));
15047 base = XEXP (base, 0);
15048
15049 /* Rework the address into a legal sequence of insns. */
15050 /* Valid range for lo is -4095 -> 4095 */
15051 lo = (offset >= 0
15052 ? (offset & 0xfff)
15053 : -((-offset) & 0xfff));
15054
15055 /* Corner case, if lo is the max offset then we would be out of range
15056 once we have added the additional 1 below, so bump the msb into the
15057 pre-loading insn(s). */
15058 if (lo == 4095)
15059 lo &= 0x7ff;
15060
15061 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15062 ^ (HOST_WIDE_INT) 0x80000000)
15063 - (HOST_WIDE_INT) 0x80000000);
15064
15065 gcc_assert (hi + lo == offset);
15066
15067 if (hi != 0)
15068 {
15069 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15070
15071 /* Get the base address; addsi3 knows how to handle constants
15072 that require more than one insn. */
15073 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15074 base = base_plus;
15075 offset = lo;
15076 }
15077 }
15078
15079 /* Operands[2] may overlap operands[0] (though it won't overlap
15080 operands[1]), that's why we asked for a DImode reg -- so we can
15081 use the bit that does not overlap. */
15082 if (REGNO (operands[2]) == REGNO (operands[0]))
15083 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15084 else
15085 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15086
15087 emit_insn (gen_zero_extendqisi2 (scratch,
15088 gen_rtx_MEM (QImode,
15089 plus_constant (Pmode, base,
15090 offset))));
15091 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15092 gen_rtx_MEM (QImode,
15093 plus_constant (Pmode, base,
15094 offset + 1))));
15095 if (!BYTES_BIG_ENDIAN)
15096 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15097 gen_rtx_IOR (SImode,
15098 gen_rtx_ASHIFT
15099 (SImode,
15100 gen_rtx_SUBREG (SImode, operands[0], 0),
15101 GEN_INT (8)),
15102 scratch));
15103 else
15104 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15105 gen_rtx_IOR (SImode,
15106 gen_rtx_ASHIFT (SImode, scratch,
15107 GEN_INT (8)),
15108 gen_rtx_SUBREG (SImode, operands[0], 0)));
15109 }
15110
15111 /* Handle storing a half-word to memory during reload by synthesizing as two
15112 byte stores. Take care not to clobber the input values until after we
15113 have moved them somewhere safe. This code assumes that if the DImode
15114 scratch in operands[2] overlaps either the input value or output address
15115 in some way, then that value must die in this insn (we absolutely need
15116 two scratch registers for some corner cases). */
15117 void
15118 arm_reload_out_hi (rtx *operands)
15119 {
15120 rtx ref = operands[0];
15121 rtx outval = operands[1];
15122 rtx base, scratch;
15123 HOST_WIDE_INT offset = 0;
15124
15125 if (GET_CODE (ref) == SUBREG)
15126 {
15127 offset = SUBREG_BYTE (ref);
15128 ref = SUBREG_REG (ref);
15129 }
15130
15131 if (REG_P (ref))
15132 {
15133 /* We have a pseudo which has been spilt onto the stack; there
15134 are two cases here: the first where there is a simple
15135 stack-slot replacement and a second where the stack-slot is
15136 out of range, or is used as a subreg. */
15137 if (reg_equiv_mem (REGNO (ref)))
15138 {
15139 ref = reg_equiv_mem (REGNO (ref));
15140 base = find_replacement (&XEXP (ref, 0));
15141 }
15142 else
15143 /* The slot is out of range, or was dressed up in a SUBREG. */
15144 base = reg_equiv_address (REGNO (ref));
15145
15146 /* PR 62254: If there is no equivalent memory location then just move
15147 the value as an SImode register move. This happens when the target
15148 architecture variant does not have an HImode register move. */
15149 if (base == NULL)
15150 {
15151 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15152
15153 if (REG_P (outval))
15154 {
15155 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15156 gen_rtx_SUBREG (SImode, outval, 0)));
15157 }
15158 else /* SUBREG_P (outval) */
15159 {
15160 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15161 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15162 SUBREG_REG (outval)));
15163 else
15164 /* FIXME: Handle other cases ? */
15165 gcc_unreachable ();
15166 }
15167 return;
15168 }
15169 }
15170 else
15171 base = find_replacement (&XEXP (ref, 0));
15172
15173 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15174
15175 /* Handle the case where the address is too complex to be offset by 1. */
15176 if (GET_CODE (base) == MINUS
15177 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15178 {
15179 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15180
15181 /* Be careful not to destroy OUTVAL. */
15182 if (reg_overlap_mentioned_p (base_plus, outval))
15183 {
15184 /* Updating base_plus might destroy outval, see if we can
15185 swap the scratch and base_plus. */
15186 if (!reg_overlap_mentioned_p (scratch, outval))
15187 std::swap (scratch, base_plus);
15188 else
15189 {
15190 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15191
15192 /* Be conservative and copy OUTVAL into the scratch now,
15193 this should only be necessary if outval is a subreg
15194 of something larger than a word. */
15195 /* XXX Might this clobber base? I can't see how it can,
15196 since scratch is known to overlap with OUTVAL, and
15197 must be wider than a word. */
15198 emit_insn (gen_movhi (scratch_hi, outval));
15199 outval = scratch_hi;
15200 }
15201 }
15202
15203 emit_set_insn (base_plus, base);
15204 base = base_plus;
15205 }
15206 else if (GET_CODE (base) == PLUS)
15207 {
15208 /* The addend must be CONST_INT, or we would have dealt with it above. */
15209 HOST_WIDE_INT hi, lo;
15210
15211 offset += INTVAL (XEXP (base, 1));
15212 base = XEXP (base, 0);
15213
15214 /* Rework the address into a legal sequence of insns. */
15215 /* Valid range for lo is -4095 -> 4095 */
15216 lo = (offset >= 0
15217 ? (offset & 0xfff)
15218 : -((-offset) & 0xfff));
15219
15220 /* Corner case, if lo is the max offset then we would be out of range
15221 once we have added the additional 1 below, so bump the msb into the
15222 pre-loading insn(s). */
15223 if (lo == 4095)
15224 lo &= 0x7ff;
15225
15226 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15227 ^ (HOST_WIDE_INT) 0x80000000)
15228 - (HOST_WIDE_INT) 0x80000000);
15229
15230 gcc_assert (hi + lo == offset);
15231
15232 if (hi != 0)
15233 {
15234 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15235
15236 /* Be careful not to destroy OUTVAL. */
15237 if (reg_overlap_mentioned_p (base_plus, outval))
15238 {
15239 /* Updating base_plus might destroy outval, see if we
15240 can swap the scratch and base_plus. */
15241 if (!reg_overlap_mentioned_p (scratch, outval))
15242 std::swap (scratch, base_plus);
15243 else
15244 {
15245 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15246
15247 /* Be conservative and copy outval into scratch now,
15248 this should only be necessary if outval is a
15249 subreg of something larger than a word. */
15250 /* XXX Might this clobber base? I can't see how it
15251 can, since scratch is known to overlap with
15252 outval. */
15253 emit_insn (gen_movhi (scratch_hi, outval));
15254 outval = scratch_hi;
15255 }
15256 }
15257
15258 /* Get the base address; addsi3 knows how to handle constants
15259 that require more than one insn. */
15260 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15261 base = base_plus;
15262 offset = lo;
15263 }
15264 }
15265
15266 if (BYTES_BIG_ENDIAN)
15267 {
15268 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15269 plus_constant (Pmode, base,
15270 offset + 1)),
15271 gen_lowpart (QImode, outval)));
15272 emit_insn (gen_lshrsi3 (scratch,
15273 gen_rtx_SUBREG (SImode, outval, 0),
15274 GEN_INT (8)));
15275 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15276 offset)),
15277 gen_lowpart (QImode, scratch)));
15278 }
15279 else
15280 {
15281 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15282 offset)),
15283 gen_lowpart (QImode, outval)));
15284 emit_insn (gen_lshrsi3 (scratch,
15285 gen_rtx_SUBREG (SImode, outval, 0),
15286 GEN_INT (8)));
15287 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15288 plus_constant (Pmode, base,
15289 offset + 1)),
15290 gen_lowpart (QImode, scratch)));
15291 }
15292 }
15293
15294 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15295 (padded to the size of a word) should be passed in a register. */
15296
15297 static bool
15298 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15299 {
15300 if (TARGET_AAPCS_BASED)
15301 return must_pass_in_stack_var_size (mode, type);
15302 else
15303 return must_pass_in_stack_var_size_or_pad (mode, type);
15304 }
15305
15306
15307 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15308 byte of a stack argument has useful data. For legacy APCS ABIs we use
15309 the default. For AAPCS based ABIs small aggregate types are placed
15310 in the lowest memory address. */
15311
15312 static pad_direction
15313 arm_function_arg_padding (machine_mode mode, const_tree type)
15314 {
15315 if (!TARGET_AAPCS_BASED)
15316 return default_function_arg_padding (mode, type);
15317
15318 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15319 return PAD_DOWNWARD;
15320
15321 return PAD_UPWARD;
15322 }
15323
15324
15325 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15326 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15327 register has useful data, and return the opposite if the most
15328 significant byte does. */
15329
15330 bool
15331 arm_pad_reg_upward (machine_mode mode,
15332 tree type, int first ATTRIBUTE_UNUSED)
15333 {
15334 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15335 {
15336 /* For AAPCS, small aggregates, small fixed-point types,
15337 and small complex types are always padded upwards. */
15338 if (type)
15339 {
15340 if ((AGGREGATE_TYPE_P (type)
15341 || TREE_CODE (type) == COMPLEX_TYPE
15342 || FIXED_POINT_TYPE_P (type))
15343 && int_size_in_bytes (type) <= 4)
15344 return true;
15345 }
15346 else
15347 {
15348 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15349 && GET_MODE_SIZE (mode) <= 4)
15350 return true;
15351 }
15352 }
15353
15354 /* Otherwise, use default padding. */
15355 return !BYTES_BIG_ENDIAN;
15356 }
15357
15358 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15359 assuming that the address in the base register is word aligned. */
15360 bool
15361 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15362 {
15363 HOST_WIDE_INT max_offset;
15364
15365 /* Offset must be a multiple of 4 in Thumb mode. */
15366 if (TARGET_THUMB2 && ((offset & 3) != 0))
15367 return false;
15368
15369 if (TARGET_THUMB2)
15370 max_offset = 1020;
15371 else if (TARGET_ARM)
15372 max_offset = 255;
15373 else
15374 return false;
15375
15376 return ((offset <= max_offset) && (offset >= -max_offset));
15377 }
15378
15379 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15380 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15381 Assumes that the address in the base register RN is word aligned. Pattern
15382 guarantees that both memory accesses use the same base register,
15383 the offsets are constants within the range, and the gap between the offsets is 4.
15384 If preload complete then check that registers are legal. WBACK indicates whether
15385 address is updated. LOAD indicates whether memory access is load or store. */
15386 bool
15387 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15388 bool wback, bool load)
15389 {
15390 unsigned int t, t2, n;
15391
15392 if (!reload_completed)
15393 return true;
15394
15395 if (!offset_ok_for_ldrd_strd (offset))
15396 return false;
15397
15398 t = REGNO (rt);
15399 t2 = REGNO (rt2);
15400 n = REGNO (rn);
15401
15402 if ((TARGET_THUMB2)
15403 && ((wback && (n == t || n == t2))
15404 || (t == SP_REGNUM)
15405 || (t == PC_REGNUM)
15406 || (t2 == SP_REGNUM)
15407 || (t2 == PC_REGNUM)
15408 || (!load && (n == PC_REGNUM))
15409 || (load && (t == t2))
15410 /* Triggers Cortex-M3 LDRD errata. */
15411 || (!wback && load && fix_cm3_ldrd && (n == t))))
15412 return false;
15413
15414 if ((TARGET_ARM)
15415 && ((wback && (n == t || n == t2))
15416 || (t2 == PC_REGNUM)
15417 || (t % 2 != 0) /* First destination register is not even. */
15418 || (t2 != t + 1)
15419 /* PC can be used as base register (for offset addressing only),
15420 but it is depricated. */
15421 || (n == PC_REGNUM)))
15422 return false;
15423
15424 return true;
15425 }
15426
15427 /* Return true if a 64-bit access with alignment ALIGN and with a
15428 constant offset OFFSET from the base pointer is permitted on this
15429 architecture. */
15430 static bool
15431 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15432 {
15433 return (unaligned_access
15434 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15435 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15436 }
15437
15438 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15439 operand MEM's address contains an immediate offset from the base
15440 register and has no side effects, in which case it sets BASE,
15441 OFFSET and ALIGN accordingly. */
15442 static bool
15443 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15444 {
15445 rtx addr;
15446
15447 gcc_assert (base != NULL && offset != NULL);
15448
15449 /* TODO: Handle more general memory operand patterns, such as
15450 PRE_DEC and PRE_INC. */
15451
15452 if (side_effects_p (mem))
15453 return false;
15454
15455 /* Can't deal with subregs. */
15456 if (GET_CODE (mem) == SUBREG)
15457 return false;
15458
15459 gcc_assert (MEM_P (mem));
15460
15461 *offset = const0_rtx;
15462 *align = MEM_ALIGN (mem);
15463
15464 addr = XEXP (mem, 0);
15465
15466 /* If addr isn't valid for DImode, then we can't handle it. */
15467 if (!arm_legitimate_address_p (DImode, addr,
15468 reload_in_progress || reload_completed))
15469 return false;
15470
15471 if (REG_P (addr))
15472 {
15473 *base = addr;
15474 return true;
15475 }
15476 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15477 {
15478 *base = XEXP (addr, 0);
15479 *offset = XEXP (addr, 1);
15480 return (REG_P (*base) && CONST_INT_P (*offset));
15481 }
15482
15483 return false;
15484 }
15485
15486 /* Called from a peephole2 to replace two word-size accesses with a
15487 single LDRD/STRD instruction. Returns true iff we can generate a
15488 new instruction sequence. That is, both accesses use the same base
15489 register and the gap between constant offsets is 4. This function
15490 may reorder its operands to match ldrd/strd RTL templates.
15491 OPERANDS are the operands found by the peephole matcher;
15492 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15493 corresponding memory operands. LOAD indicaates whether the access
15494 is load or store. CONST_STORE indicates a store of constant
15495 integer values held in OPERANDS[4,5] and assumes that the pattern
15496 is of length 4 insn, for the purpose of checking dead registers.
15497 COMMUTE indicates that register operands may be reordered. */
15498 bool
15499 gen_operands_ldrd_strd (rtx *operands, bool load,
15500 bool const_store, bool commute)
15501 {
15502 int nops = 2;
15503 HOST_WIDE_INT offsets[2], offset, align[2];
15504 rtx base = NULL_RTX;
15505 rtx cur_base, cur_offset, tmp;
15506 int i, gap;
15507 HARD_REG_SET regset;
15508
15509 gcc_assert (!const_store || !load);
15510 /* Check that the memory references are immediate offsets from the
15511 same base register. Extract the base register, the destination
15512 registers, and the corresponding memory offsets. */
15513 for (i = 0; i < nops; i++)
15514 {
15515 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15516 &align[i]))
15517 return false;
15518
15519 if (i == 0)
15520 base = cur_base;
15521 else if (REGNO (base) != REGNO (cur_base))
15522 return false;
15523
15524 offsets[i] = INTVAL (cur_offset);
15525 if (GET_CODE (operands[i]) == SUBREG)
15526 {
15527 tmp = SUBREG_REG (operands[i]);
15528 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15529 operands[i] = tmp;
15530 }
15531 }
15532
15533 /* Make sure there is no dependency between the individual loads. */
15534 if (load && REGNO (operands[0]) == REGNO (base))
15535 return false; /* RAW */
15536
15537 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15538 return false; /* WAW */
15539
15540 /* If the same input register is used in both stores
15541 when storing different constants, try to find a free register.
15542 For example, the code
15543 mov r0, 0
15544 str r0, [r2]
15545 mov r0, 1
15546 str r0, [r2, #4]
15547 can be transformed into
15548 mov r1, 0
15549 mov r0, 1
15550 strd r1, r0, [r2]
15551 in Thumb mode assuming that r1 is free.
15552 For ARM mode do the same but only if the starting register
15553 can be made to be even. */
15554 if (const_store
15555 && REGNO (operands[0]) == REGNO (operands[1])
15556 && INTVAL (operands[4]) != INTVAL (operands[5]))
15557 {
15558 if (TARGET_THUMB2)
15559 {
15560 CLEAR_HARD_REG_SET (regset);
15561 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15562 if (tmp == NULL_RTX)
15563 return false;
15564
15565 /* Use the new register in the first load to ensure that
15566 if the original input register is not dead after peephole,
15567 then it will have the correct constant value. */
15568 operands[0] = tmp;
15569 }
15570 else if (TARGET_ARM)
15571 {
15572 int regno = REGNO (operands[0]);
15573 if (!peep2_reg_dead_p (4, operands[0]))
15574 {
15575 /* When the input register is even and is not dead after the
15576 pattern, it has to hold the second constant but we cannot
15577 form a legal STRD in ARM mode with this register as the second
15578 register. */
15579 if (regno % 2 == 0)
15580 return false;
15581
15582 /* Is regno-1 free? */
15583 SET_HARD_REG_SET (regset);
15584 CLEAR_HARD_REG_BIT(regset, regno - 1);
15585 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15586 if (tmp == NULL_RTX)
15587 return false;
15588
15589 operands[0] = tmp;
15590 }
15591 else
15592 {
15593 /* Find a DImode register. */
15594 CLEAR_HARD_REG_SET (regset);
15595 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15596 if (tmp != NULL_RTX)
15597 {
15598 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15599 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15600 }
15601 else
15602 {
15603 /* Can we use the input register to form a DI register? */
15604 SET_HARD_REG_SET (regset);
15605 CLEAR_HARD_REG_BIT(regset,
15606 regno % 2 == 0 ? regno + 1 : regno - 1);
15607 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15608 if (tmp == NULL_RTX)
15609 return false;
15610 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15611 }
15612 }
15613
15614 gcc_assert (operands[0] != NULL_RTX);
15615 gcc_assert (operands[1] != NULL_RTX);
15616 gcc_assert (REGNO (operands[0]) % 2 == 0);
15617 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15618 }
15619 }
15620
15621 /* Make sure the instructions are ordered with lower memory access first. */
15622 if (offsets[0] > offsets[1])
15623 {
15624 gap = offsets[0] - offsets[1];
15625 offset = offsets[1];
15626
15627 /* Swap the instructions such that lower memory is accessed first. */
15628 std::swap (operands[0], operands[1]);
15629 std::swap (operands[2], operands[3]);
15630 std::swap (align[0], align[1]);
15631 if (const_store)
15632 std::swap (operands[4], operands[5]);
15633 }
15634 else
15635 {
15636 gap = offsets[1] - offsets[0];
15637 offset = offsets[0];
15638 }
15639
15640 /* Make sure accesses are to consecutive memory locations. */
15641 if (gap != 4)
15642 return false;
15643
15644 if (!align_ok_ldrd_strd (align[0], offset))
15645 return false;
15646
15647 /* Make sure we generate legal instructions. */
15648 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15649 false, load))
15650 return true;
15651
15652 /* In Thumb state, where registers are almost unconstrained, there
15653 is little hope to fix it. */
15654 if (TARGET_THUMB2)
15655 return false;
15656
15657 if (load && commute)
15658 {
15659 /* Try reordering registers. */
15660 std::swap (operands[0], operands[1]);
15661 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15662 false, load))
15663 return true;
15664 }
15665
15666 if (const_store)
15667 {
15668 /* If input registers are dead after this pattern, they can be
15669 reordered or replaced by other registers that are free in the
15670 current pattern. */
15671 if (!peep2_reg_dead_p (4, operands[0])
15672 || !peep2_reg_dead_p (4, operands[1]))
15673 return false;
15674
15675 /* Try to reorder the input registers. */
15676 /* For example, the code
15677 mov r0, 0
15678 mov r1, 1
15679 str r1, [r2]
15680 str r0, [r2, #4]
15681 can be transformed into
15682 mov r1, 0
15683 mov r0, 1
15684 strd r0, [r2]
15685 */
15686 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15687 false, false))
15688 {
15689 std::swap (operands[0], operands[1]);
15690 return true;
15691 }
15692
15693 /* Try to find a free DI register. */
15694 CLEAR_HARD_REG_SET (regset);
15695 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15696 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15697 while (true)
15698 {
15699 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15700 if (tmp == NULL_RTX)
15701 return false;
15702
15703 /* DREG must be an even-numbered register in DImode.
15704 Split it into SI registers. */
15705 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15706 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15707 gcc_assert (operands[0] != NULL_RTX);
15708 gcc_assert (operands[1] != NULL_RTX);
15709 gcc_assert (REGNO (operands[0]) % 2 == 0);
15710 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15711
15712 return (operands_ok_ldrd_strd (operands[0], operands[1],
15713 base, offset,
15714 false, load));
15715 }
15716 }
15717
15718 return false;
15719 }
15720
15721
15722
15723 \f
15724 /* Print a symbolic form of X to the debug file, F. */
15725 static void
15726 arm_print_value (FILE *f, rtx x)
15727 {
15728 switch (GET_CODE (x))
15729 {
15730 case CONST_INT:
15731 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15732 return;
15733
15734 case CONST_DOUBLE:
15735 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15736 return;
15737
15738 case CONST_VECTOR:
15739 {
15740 int i;
15741
15742 fprintf (f, "<");
15743 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15744 {
15745 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15746 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15747 fputc (',', f);
15748 }
15749 fprintf (f, ">");
15750 }
15751 return;
15752
15753 case CONST_STRING:
15754 fprintf (f, "\"%s\"", XSTR (x, 0));
15755 return;
15756
15757 case SYMBOL_REF:
15758 fprintf (f, "`%s'", XSTR (x, 0));
15759 return;
15760
15761 case LABEL_REF:
15762 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15763 return;
15764
15765 case CONST:
15766 arm_print_value (f, XEXP (x, 0));
15767 return;
15768
15769 case PLUS:
15770 arm_print_value (f, XEXP (x, 0));
15771 fprintf (f, "+");
15772 arm_print_value (f, XEXP (x, 1));
15773 return;
15774
15775 case PC:
15776 fprintf (f, "pc");
15777 return;
15778
15779 default:
15780 fprintf (f, "????");
15781 return;
15782 }
15783 }
15784 \f
15785 /* Routines for manipulation of the constant pool. */
15786
15787 /* Arm instructions cannot load a large constant directly into a
15788 register; they have to come from a pc relative load. The constant
15789 must therefore be placed in the addressable range of the pc
15790 relative load. Depending on the precise pc relative load
15791 instruction the range is somewhere between 256 bytes and 4k. This
15792 means that we often have to dump a constant inside a function, and
15793 generate code to branch around it.
15794
15795 It is important to minimize this, since the branches will slow
15796 things down and make the code larger.
15797
15798 Normally we can hide the table after an existing unconditional
15799 branch so that there is no interruption of the flow, but in the
15800 worst case the code looks like this:
15801
15802 ldr rn, L1
15803 ...
15804 b L2
15805 align
15806 L1: .long value
15807 L2:
15808 ...
15809
15810 ldr rn, L3
15811 ...
15812 b L4
15813 align
15814 L3: .long value
15815 L4:
15816 ...
15817
15818 We fix this by performing a scan after scheduling, which notices
15819 which instructions need to have their operands fetched from the
15820 constant table and builds the table.
15821
15822 The algorithm starts by building a table of all the constants that
15823 need fixing up and all the natural barriers in the function (places
15824 where a constant table can be dropped without breaking the flow).
15825 For each fixup we note how far the pc-relative replacement will be
15826 able to reach and the offset of the instruction into the function.
15827
15828 Having built the table we then group the fixes together to form
15829 tables that are as large as possible (subject to addressing
15830 constraints) and emit each table of constants after the last
15831 barrier that is within range of all the instructions in the group.
15832 If a group does not contain a barrier, then we forcibly create one
15833 by inserting a jump instruction into the flow. Once the table has
15834 been inserted, the insns are then modified to reference the
15835 relevant entry in the pool.
15836
15837 Possible enhancements to the algorithm (not implemented) are:
15838
15839 1) For some processors and object formats, there may be benefit in
15840 aligning the pools to the start of cache lines; this alignment
15841 would need to be taken into account when calculating addressability
15842 of a pool. */
15843
15844 /* These typedefs are located at the start of this file, so that
15845 they can be used in the prototypes there. This comment is to
15846 remind readers of that fact so that the following structures
15847 can be understood more easily.
15848
15849 typedef struct minipool_node Mnode;
15850 typedef struct minipool_fixup Mfix; */
15851
15852 struct minipool_node
15853 {
15854 /* Doubly linked chain of entries. */
15855 Mnode * next;
15856 Mnode * prev;
15857 /* The maximum offset into the code that this entry can be placed. While
15858 pushing fixes for forward references, all entries are sorted in order
15859 of increasing max_address. */
15860 HOST_WIDE_INT max_address;
15861 /* Similarly for an entry inserted for a backwards ref. */
15862 HOST_WIDE_INT min_address;
15863 /* The number of fixes referencing this entry. This can become zero
15864 if we "unpush" an entry. In this case we ignore the entry when we
15865 come to emit the code. */
15866 int refcount;
15867 /* The offset from the start of the minipool. */
15868 HOST_WIDE_INT offset;
15869 /* The value in table. */
15870 rtx value;
15871 /* The mode of value. */
15872 machine_mode mode;
15873 /* The size of the value. With iWMMXt enabled
15874 sizes > 4 also imply an alignment of 8-bytes. */
15875 int fix_size;
15876 };
15877
15878 struct minipool_fixup
15879 {
15880 Mfix * next;
15881 rtx_insn * insn;
15882 HOST_WIDE_INT address;
15883 rtx * loc;
15884 machine_mode mode;
15885 int fix_size;
15886 rtx value;
15887 Mnode * minipool;
15888 HOST_WIDE_INT forwards;
15889 HOST_WIDE_INT backwards;
15890 };
15891
15892 /* Fixes less than a word need padding out to a word boundary. */
15893 #define MINIPOOL_FIX_SIZE(mode) \
15894 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15895
15896 static Mnode * minipool_vector_head;
15897 static Mnode * minipool_vector_tail;
15898 static rtx_code_label *minipool_vector_label;
15899 static int minipool_pad;
15900
15901 /* The linked list of all minipool fixes required for this function. */
15902 Mfix * minipool_fix_head;
15903 Mfix * minipool_fix_tail;
15904 /* The fix entry for the current minipool, once it has been placed. */
15905 Mfix * minipool_barrier;
15906
15907 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15908 #define JUMP_TABLES_IN_TEXT_SECTION 0
15909 #endif
15910
15911 static HOST_WIDE_INT
15912 get_jump_table_size (rtx_jump_table_data *insn)
15913 {
15914 /* ADDR_VECs only take room if read-only data does into the text
15915 section. */
15916 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15917 {
15918 rtx body = PATTERN (insn);
15919 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15920 HOST_WIDE_INT size;
15921 HOST_WIDE_INT modesize;
15922
15923 modesize = GET_MODE_SIZE (GET_MODE (body));
15924 size = modesize * XVECLEN (body, elt);
15925 switch (modesize)
15926 {
15927 case 1:
15928 /* Round up size of TBB table to a halfword boundary. */
15929 size = (size + 1) & ~HOST_WIDE_INT_1;
15930 break;
15931 case 2:
15932 /* No padding necessary for TBH. */
15933 break;
15934 case 4:
15935 /* Add two bytes for alignment on Thumb. */
15936 if (TARGET_THUMB)
15937 size += 2;
15938 break;
15939 default:
15940 gcc_unreachable ();
15941 }
15942 return size;
15943 }
15944
15945 return 0;
15946 }
15947
15948 /* Return the maximum amount of padding that will be inserted before
15949 label LABEL. */
15950
15951 static HOST_WIDE_INT
15952 get_label_padding (rtx label)
15953 {
15954 HOST_WIDE_INT align, min_insn_size;
15955
15956 align = 1 << label_to_alignment (label);
15957 min_insn_size = TARGET_THUMB ? 2 : 4;
15958 return align > min_insn_size ? align - min_insn_size : 0;
15959 }
15960
15961 /* Move a minipool fix MP from its current location to before MAX_MP.
15962 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15963 constraints may need updating. */
15964 static Mnode *
15965 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15966 HOST_WIDE_INT max_address)
15967 {
15968 /* The code below assumes these are different. */
15969 gcc_assert (mp != max_mp);
15970
15971 if (max_mp == NULL)
15972 {
15973 if (max_address < mp->max_address)
15974 mp->max_address = max_address;
15975 }
15976 else
15977 {
15978 if (max_address > max_mp->max_address - mp->fix_size)
15979 mp->max_address = max_mp->max_address - mp->fix_size;
15980 else
15981 mp->max_address = max_address;
15982
15983 /* Unlink MP from its current position. Since max_mp is non-null,
15984 mp->prev must be non-null. */
15985 mp->prev->next = mp->next;
15986 if (mp->next != NULL)
15987 mp->next->prev = mp->prev;
15988 else
15989 minipool_vector_tail = mp->prev;
15990
15991 /* Re-insert it before MAX_MP. */
15992 mp->next = max_mp;
15993 mp->prev = max_mp->prev;
15994 max_mp->prev = mp;
15995
15996 if (mp->prev != NULL)
15997 mp->prev->next = mp;
15998 else
15999 minipool_vector_head = mp;
16000 }
16001
16002 /* Save the new entry. */
16003 max_mp = mp;
16004
16005 /* Scan over the preceding entries and adjust their addresses as
16006 required. */
16007 while (mp->prev != NULL
16008 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16009 {
16010 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16011 mp = mp->prev;
16012 }
16013
16014 return max_mp;
16015 }
16016
16017 /* Add a constant to the minipool for a forward reference. Returns the
16018 node added or NULL if the constant will not fit in this pool. */
16019 static Mnode *
16020 add_minipool_forward_ref (Mfix *fix)
16021 {
16022 /* If set, max_mp is the first pool_entry that has a lower
16023 constraint than the one we are trying to add. */
16024 Mnode * max_mp = NULL;
16025 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16026 Mnode * mp;
16027
16028 /* If the minipool starts before the end of FIX->INSN then this FIX
16029 can not be placed into the current pool. Furthermore, adding the
16030 new constant pool entry may cause the pool to start FIX_SIZE bytes
16031 earlier. */
16032 if (minipool_vector_head &&
16033 (fix->address + get_attr_length (fix->insn)
16034 >= minipool_vector_head->max_address - fix->fix_size))
16035 return NULL;
16036
16037 /* Scan the pool to see if a constant with the same value has
16038 already been added. While we are doing this, also note the
16039 location where we must insert the constant if it doesn't already
16040 exist. */
16041 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16042 {
16043 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16044 && fix->mode == mp->mode
16045 && (!LABEL_P (fix->value)
16046 || (CODE_LABEL_NUMBER (fix->value)
16047 == CODE_LABEL_NUMBER (mp->value)))
16048 && rtx_equal_p (fix->value, mp->value))
16049 {
16050 /* More than one fix references this entry. */
16051 mp->refcount++;
16052 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16053 }
16054
16055 /* Note the insertion point if necessary. */
16056 if (max_mp == NULL
16057 && mp->max_address > max_address)
16058 max_mp = mp;
16059
16060 /* If we are inserting an 8-bytes aligned quantity and
16061 we have not already found an insertion point, then
16062 make sure that all such 8-byte aligned quantities are
16063 placed at the start of the pool. */
16064 if (ARM_DOUBLEWORD_ALIGN
16065 && max_mp == NULL
16066 && fix->fix_size >= 8
16067 && mp->fix_size < 8)
16068 {
16069 max_mp = mp;
16070 max_address = mp->max_address;
16071 }
16072 }
16073
16074 /* The value is not currently in the minipool, so we need to create
16075 a new entry for it. If MAX_MP is NULL, the entry will be put on
16076 the end of the list since the placement is less constrained than
16077 any existing entry. Otherwise, we insert the new fix before
16078 MAX_MP and, if necessary, adjust the constraints on the other
16079 entries. */
16080 mp = XNEW (Mnode);
16081 mp->fix_size = fix->fix_size;
16082 mp->mode = fix->mode;
16083 mp->value = fix->value;
16084 mp->refcount = 1;
16085 /* Not yet required for a backwards ref. */
16086 mp->min_address = -65536;
16087
16088 if (max_mp == NULL)
16089 {
16090 mp->max_address = max_address;
16091 mp->next = NULL;
16092 mp->prev = minipool_vector_tail;
16093
16094 if (mp->prev == NULL)
16095 {
16096 minipool_vector_head = mp;
16097 minipool_vector_label = gen_label_rtx ();
16098 }
16099 else
16100 mp->prev->next = mp;
16101
16102 minipool_vector_tail = mp;
16103 }
16104 else
16105 {
16106 if (max_address > max_mp->max_address - mp->fix_size)
16107 mp->max_address = max_mp->max_address - mp->fix_size;
16108 else
16109 mp->max_address = max_address;
16110
16111 mp->next = max_mp;
16112 mp->prev = max_mp->prev;
16113 max_mp->prev = mp;
16114 if (mp->prev != NULL)
16115 mp->prev->next = mp;
16116 else
16117 minipool_vector_head = mp;
16118 }
16119
16120 /* Save the new entry. */
16121 max_mp = mp;
16122
16123 /* Scan over the preceding entries and adjust their addresses as
16124 required. */
16125 while (mp->prev != NULL
16126 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16127 {
16128 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16129 mp = mp->prev;
16130 }
16131
16132 return max_mp;
16133 }
16134
16135 static Mnode *
16136 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16137 HOST_WIDE_INT min_address)
16138 {
16139 HOST_WIDE_INT offset;
16140
16141 /* The code below assumes these are different. */
16142 gcc_assert (mp != min_mp);
16143
16144 if (min_mp == NULL)
16145 {
16146 if (min_address > mp->min_address)
16147 mp->min_address = min_address;
16148 }
16149 else
16150 {
16151 /* We will adjust this below if it is too loose. */
16152 mp->min_address = min_address;
16153
16154 /* Unlink MP from its current position. Since min_mp is non-null,
16155 mp->next must be non-null. */
16156 mp->next->prev = mp->prev;
16157 if (mp->prev != NULL)
16158 mp->prev->next = mp->next;
16159 else
16160 minipool_vector_head = mp->next;
16161
16162 /* Reinsert it after MIN_MP. */
16163 mp->prev = min_mp;
16164 mp->next = min_mp->next;
16165 min_mp->next = mp;
16166 if (mp->next != NULL)
16167 mp->next->prev = mp;
16168 else
16169 minipool_vector_tail = mp;
16170 }
16171
16172 min_mp = mp;
16173
16174 offset = 0;
16175 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16176 {
16177 mp->offset = offset;
16178 if (mp->refcount > 0)
16179 offset += mp->fix_size;
16180
16181 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16182 mp->next->min_address = mp->min_address + mp->fix_size;
16183 }
16184
16185 return min_mp;
16186 }
16187
16188 /* Add a constant to the minipool for a backward reference. Returns the
16189 node added or NULL if the constant will not fit in this pool.
16190
16191 Note that the code for insertion for a backwards reference can be
16192 somewhat confusing because the calculated offsets for each fix do
16193 not take into account the size of the pool (which is still under
16194 construction. */
16195 static Mnode *
16196 add_minipool_backward_ref (Mfix *fix)
16197 {
16198 /* If set, min_mp is the last pool_entry that has a lower constraint
16199 than the one we are trying to add. */
16200 Mnode *min_mp = NULL;
16201 /* This can be negative, since it is only a constraint. */
16202 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16203 Mnode *mp;
16204
16205 /* If we can't reach the current pool from this insn, or if we can't
16206 insert this entry at the end of the pool without pushing other
16207 fixes out of range, then we don't try. This ensures that we
16208 can't fail later on. */
16209 if (min_address >= minipool_barrier->address
16210 || (minipool_vector_tail->min_address + fix->fix_size
16211 >= minipool_barrier->address))
16212 return NULL;
16213
16214 /* Scan the pool to see if a constant with the same value has
16215 already been added. While we are doing this, also note the
16216 location where we must insert the constant if it doesn't already
16217 exist. */
16218 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16219 {
16220 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16221 && fix->mode == mp->mode
16222 && (!LABEL_P (fix->value)
16223 || (CODE_LABEL_NUMBER (fix->value)
16224 == CODE_LABEL_NUMBER (mp->value)))
16225 && rtx_equal_p (fix->value, mp->value)
16226 /* Check that there is enough slack to move this entry to the
16227 end of the table (this is conservative). */
16228 && (mp->max_address
16229 > (minipool_barrier->address
16230 + minipool_vector_tail->offset
16231 + minipool_vector_tail->fix_size)))
16232 {
16233 mp->refcount++;
16234 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16235 }
16236
16237 if (min_mp != NULL)
16238 mp->min_address += fix->fix_size;
16239 else
16240 {
16241 /* Note the insertion point if necessary. */
16242 if (mp->min_address < min_address)
16243 {
16244 /* For now, we do not allow the insertion of 8-byte alignment
16245 requiring nodes anywhere but at the start of the pool. */
16246 if (ARM_DOUBLEWORD_ALIGN
16247 && fix->fix_size >= 8 && mp->fix_size < 8)
16248 return NULL;
16249 else
16250 min_mp = mp;
16251 }
16252 else if (mp->max_address
16253 < minipool_barrier->address + mp->offset + fix->fix_size)
16254 {
16255 /* Inserting before this entry would push the fix beyond
16256 its maximum address (which can happen if we have
16257 re-located a forwards fix); force the new fix to come
16258 after it. */
16259 if (ARM_DOUBLEWORD_ALIGN
16260 && fix->fix_size >= 8 && mp->fix_size < 8)
16261 return NULL;
16262 else
16263 {
16264 min_mp = mp;
16265 min_address = mp->min_address + fix->fix_size;
16266 }
16267 }
16268 /* Do not insert a non-8-byte aligned quantity before 8-byte
16269 aligned quantities. */
16270 else if (ARM_DOUBLEWORD_ALIGN
16271 && fix->fix_size < 8
16272 && mp->fix_size >= 8)
16273 {
16274 min_mp = mp;
16275 min_address = mp->min_address + fix->fix_size;
16276 }
16277 }
16278 }
16279
16280 /* We need to create a new entry. */
16281 mp = XNEW (Mnode);
16282 mp->fix_size = fix->fix_size;
16283 mp->mode = fix->mode;
16284 mp->value = fix->value;
16285 mp->refcount = 1;
16286 mp->max_address = minipool_barrier->address + 65536;
16287
16288 mp->min_address = min_address;
16289
16290 if (min_mp == NULL)
16291 {
16292 mp->prev = NULL;
16293 mp->next = minipool_vector_head;
16294
16295 if (mp->next == NULL)
16296 {
16297 minipool_vector_tail = mp;
16298 minipool_vector_label = gen_label_rtx ();
16299 }
16300 else
16301 mp->next->prev = mp;
16302
16303 minipool_vector_head = mp;
16304 }
16305 else
16306 {
16307 mp->next = min_mp->next;
16308 mp->prev = min_mp;
16309 min_mp->next = mp;
16310
16311 if (mp->next != NULL)
16312 mp->next->prev = mp;
16313 else
16314 minipool_vector_tail = mp;
16315 }
16316
16317 /* Save the new entry. */
16318 min_mp = mp;
16319
16320 if (mp->prev)
16321 mp = mp->prev;
16322 else
16323 mp->offset = 0;
16324
16325 /* Scan over the following entries and adjust their offsets. */
16326 while (mp->next != NULL)
16327 {
16328 if (mp->next->min_address < mp->min_address + mp->fix_size)
16329 mp->next->min_address = mp->min_address + mp->fix_size;
16330
16331 if (mp->refcount)
16332 mp->next->offset = mp->offset + mp->fix_size;
16333 else
16334 mp->next->offset = mp->offset;
16335
16336 mp = mp->next;
16337 }
16338
16339 return min_mp;
16340 }
16341
16342 static void
16343 assign_minipool_offsets (Mfix *barrier)
16344 {
16345 HOST_WIDE_INT offset = 0;
16346 Mnode *mp;
16347
16348 minipool_barrier = barrier;
16349
16350 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16351 {
16352 mp->offset = offset;
16353
16354 if (mp->refcount > 0)
16355 offset += mp->fix_size;
16356 }
16357 }
16358
16359 /* Output the literal table */
16360 static void
16361 dump_minipool (rtx_insn *scan)
16362 {
16363 Mnode * mp;
16364 Mnode * nmp;
16365 int align64 = 0;
16366
16367 if (ARM_DOUBLEWORD_ALIGN)
16368 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16369 if (mp->refcount > 0 && mp->fix_size >= 8)
16370 {
16371 align64 = 1;
16372 break;
16373 }
16374
16375 if (dump_file)
16376 fprintf (dump_file,
16377 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16378 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16379
16380 scan = emit_label_after (gen_label_rtx (), scan);
16381 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16382 scan = emit_label_after (minipool_vector_label, scan);
16383
16384 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16385 {
16386 if (mp->refcount > 0)
16387 {
16388 if (dump_file)
16389 {
16390 fprintf (dump_file,
16391 ";; Offset %u, min %ld, max %ld ",
16392 (unsigned) mp->offset, (unsigned long) mp->min_address,
16393 (unsigned long) mp->max_address);
16394 arm_print_value (dump_file, mp->value);
16395 fputc ('\n', dump_file);
16396 }
16397
16398 rtx val = copy_rtx (mp->value);
16399
16400 switch (GET_MODE_SIZE (mp->mode))
16401 {
16402 #ifdef HAVE_consttable_1
16403 case 1:
16404 scan = emit_insn_after (gen_consttable_1 (val), scan);
16405 break;
16406
16407 #endif
16408 #ifdef HAVE_consttable_2
16409 case 2:
16410 scan = emit_insn_after (gen_consttable_2 (val), scan);
16411 break;
16412
16413 #endif
16414 #ifdef HAVE_consttable_4
16415 case 4:
16416 scan = emit_insn_after (gen_consttable_4 (val), scan);
16417 break;
16418
16419 #endif
16420 #ifdef HAVE_consttable_8
16421 case 8:
16422 scan = emit_insn_after (gen_consttable_8 (val), scan);
16423 break;
16424
16425 #endif
16426 #ifdef HAVE_consttable_16
16427 case 16:
16428 scan = emit_insn_after (gen_consttable_16 (val), scan);
16429 break;
16430
16431 #endif
16432 default:
16433 gcc_unreachable ();
16434 }
16435 }
16436
16437 nmp = mp->next;
16438 free (mp);
16439 }
16440
16441 minipool_vector_head = minipool_vector_tail = NULL;
16442 scan = emit_insn_after (gen_consttable_end (), scan);
16443 scan = emit_barrier_after (scan);
16444 }
16445
16446 /* Return the cost of forcibly inserting a barrier after INSN. */
16447 static int
16448 arm_barrier_cost (rtx_insn *insn)
16449 {
16450 /* Basing the location of the pool on the loop depth is preferable,
16451 but at the moment, the basic block information seems to be
16452 corrupt by this stage of the compilation. */
16453 int base_cost = 50;
16454 rtx_insn *next = next_nonnote_insn (insn);
16455
16456 if (next != NULL && LABEL_P (next))
16457 base_cost -= 20;
16458
16459 switch (GET_CODE (insn))
16460 {
16461 case CODE_LABEL:
16462 /* It will always be better to place the table before the label, rather
16463 than after it. */
16464 return 50;
16465
16466 case INSN:
16467 case CALL_INSN:
16468 return base_cost;
16469
16470 case JUMP_INSN:
16471 return base_cost - 10;
16472
16473 default:
16474 return base_cost + 10;
16475 }
16476 }
16477
16478 /* Find the best place in the insn stream in the range
16479 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16480 Create the barrier by inserting a jump and add a new fix entry for
16481 it. */
16482 static Mfix *
16483 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16484 {
16485 HOST_WIDE_INT count = 0;
16486 rtx_barrier *barrier;
16487 rtx_insn *from = fix->insn;
16488 /* The instruction after which we will insert the jump. */
16489 rtx_insn *selected = NULL;
16490 int selected_cost;
16491 /* The address at which the jump instruction will be placed. */
16492 HOST_WIDE_INT selected_address;
16493 Mfix * new_fix;
16494 HOST_WIDE_INT max_count = max_address - fix->address;
16495 rtx_code_label *label = gen_label_rtx ();
16496
16497 selected_cost = arm_barrier_cost (from);
16498 selected_address = fix->address;
16499
16500 while (from && count < max_count)
16501 {
16502 rtx_jump_table_data *tmp;
16503 int new_cost;
16504
16505 /* This code shouldn't have been called if there was a natural barrier
16506 within range. */
16507 gcc_assert (!BARRIER_P (from));
16508
16509 /* Count the length of this insn. This must stay in sync with the
16510 code that pushes minipool fixes. */
16511 if (LABEL_P (from))
16512 count += get_label_padding (from);
16513 else
16514 count += get_attr_length (from);
16515
16516 /* If there is a jump table, add its length. */
16517 if (tablejump_p (from, NULL, &tmp))
16518 {
16519 count += get_jump_table_size (tmp);
16520
16521 /* Jump tables aren't in a basic block, so base the cost on
16522 the dispatch insn. If we select this location, we will
16523 still put the pool after the table. */
16524 new_cost = arm_barrier_cost (from);
16525
16526 if (count < max_count
16527 && (!selected || new_cost <= selected_cost))
16528 {
16529 selected = tmp;
16530 selected_cost = new_cost;
16531 selected_address = fix->address + count;
16532 }
16533
16534 /* Continue after the dispatch table. */
16535 from = NEXT_INSN (tmp);
16536 continue;
16537 }
16538
16539 new_cost = arm_barrier_cost (from);
16540
16541 if (count < max_count
16542 && (!selected || new_cost <= selected_cost))
16543 {
16544 selected = from;
16545 selected_cost = new_cost;
16546 selected_address = fix->address + count;
16547 }
16548
16549 from = NEXT_INSN (from);
16550 }
16551
16552 /* Make sure that we found a place to insert the jump. */
16553 gcc_assert (selected);
16554
16555 /* Make sure we do not split a call and its corresponding
16556 CALL_ARG_LOCATION note. */
16557 if (CALL_P (selected))
16558 {
16559 rtx_insn *next = NEXT_INSN (selected);
16560 if (next && NOTE_P (next)
16561 && NOTE_KIND (next) == NOTE_INSN_CALL_ARG_LOCATION)
16562 selected = next;
16563 }
16564
16565 /* Create a new JUMP_INSN that branches around a barrier. */
16566 from = emit_jump_insn_after (gen_jump (label), selected);
16567 JUMP_LABEL (from) = label;
16568 barrier = emit_barrier_after (from);
16569 emit_label_after (label, barrier);
16570
16571 /* Create a minipool barrier entry for the new barrier. */
16572 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16573 new_fix->insn = barrier;
16574 new_fix->address = selected_address;
16575 new_fix->next = fix->next;
16576 fix->next = new_fix;
16577
16578 return new_fix;
16579 }
16580
16581 /* Record that there is a natural barrier in the insn stream at
16582 ADDRESS. */
16583 static void
16584 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16585 {
16586 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16587
16588 fix->insn = insn;
16589 fix->address = address;
16590
16591 fix->next = NULL;
16592 if (minipool_fix_head != NULL)
16593 minipool_fix_tail->next = fix;
16594 else
16595 minipool_fix_head = fix;
16596
16597 minipool_fix_tail = fix;
16598 }
16599
16600 /* Record INSN, which will need fixing up to load a value from the
16601 minipool. ADDRESS is the offset of the insn since the start of the
16602 function; LOC is a pointer to the part of the insn which requires
16603 fixing; VALUE is the constant that must be loaded, which is of type
16604 MODE. */
16605 static void
16606 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16607 machine_mode mode, rtx value)
16608 {
16609 gcc_assert (!arm_disable_literal_pool);
16610 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16611
16612 fix->insn = insn;
16613 fix->address = address;
16614 fix->loc = loc;
16615 fix->mode = mode;
16616 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16617 fix->value = value;
16618 fix->forwards = get_attr_pool_range (insn);
16619 fix->backwards = get_attr_neg_pool_range (insn);
16620 fix->minipool = NULL;
16621
16622 /* If an insn doesn't have a range defined for it, then it isn't
16623 expecting to be reworked by this code. Better to stop now than
16624 to generate duff assembly code. */
16625 gcc_assert (fix->forwards || fix->backwards);
16626
16627 /* If an entry requires 8-byte alignment then assume all constant pools
16628 require 4 bytes of padding. Trying to do this later on a per-pool
16629 basis is awkward because existing pool entries have to be modified. */
16630 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16631 minipool_pad = 4;
16632
16633 if (dump_file)
16634 {
16635 fprintf (dump_file,
16636 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16637 GET_MODE_NAME (mode),
16638 INSN_UID (insn), (unsigned long) address,
16639 -1 * (long)fix->backwards, (long)fix->forwards);
16640 arm_print_value (dump_file, fix->value);
16641 fprintf (dump_file, "\n");
16642 }
16643
16644 /* Add it to the chain of fixes. */
16645 fix->next = NULL;
16646
16647 if (minipool_fix_head != NULL)
16648 minipool_fix_tail->next = fix;
16649 else
16650 minipool_fix_head = fix;
16651
16652 minipool_fix_tail = fix;
16653 }
16654
16655 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16656 Returns the number of insns needed, or 99 if we always want to synthesize
16657 the value. */
16658 int
16659 arm_max_const_double_inline_cost ()
16660 {
16661 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16662 }
16663
16664 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16665 Returns the number of insns needed, or 99 if we don't know how to
16666 do it. */
16667 int
16668 arm_const_double_inline_cost (rtx val)
16669 {
16670 rtx lowpart, highpart;
16671 machine_mode mode;
16672
16673 mode = GET_MODE (val);
16674
16675 if (mode == VOIDmode)
16676 mode = DImode;
16677
16678 gcc_assert (GET_MODE_SIZE (mode) == 8);
16679
16680 lowpart = gen_lowpart (SImode, val);
16681 highpart = gen_highpart_mode (SImode, mode, val);
16682
16683 gcc_assert (CONST_INT_P (lowpart));
16684 gcc_assert (CONST_INT_P (highpart));
16685
16686 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16687 NULL_RTX, NULL_RTX, 0, 0)
16688 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16689 NULL_RTX, NULL_RTX, 0, 0));
16690 }
16691
16692 /* Cost of loading a SImode constant. */
16693 static inline int
16694 arm_const_inline_cost (enum rtx_code code, rtx val)
16695 {
16696 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16697 NULL_RTX, NULL_RTX, 1, 0);
16698 }
16699
16700 /* Return true if it is worthwhile to split a 64-bit constant into two
16701 32-bit operations. This is the case if optimizing for size, or
16702 if we have load delay slots, or if one 32-bit part can be done with
16703 a single data operation. */
16704 bool
16705 arm_const_double_by_parts (rtx val)
16706 {
16707 machine_mode mode = GET_MODE (val);
16708 rtx part;
16709
16710 if (optimize_size || arm_ld_sched)
16711 return true;
16712
16713 if (mode == VOIDmode)
16714 mode = DImode;
16715
16716 part = gen_highpart_mode (SImode, mode, val);
16717
16718 gcc_assert (CONST_INT_P (part));
16719
16720 if (const_ok_for_arm (INTVAL (part))
16721 || const_ok_for_arm (~INTVAL (part)))
16722 return true;
16723
16724 part = gen_lowpart (SImode, val);
16725
16726 gcc_assert (CONST_INT_P (part));
16727
16728 if (const_ok_for_arm (INTVAL (part))
16729 || const_ok_for_arm (~INTVAL (part)))
16730 return true;
16731
16732 return false;
16733 }
16734
16735 /* Return true if it is possible to inline both the high and low parts
16736 of a 64-bit constant into 32-bit data processing instructions. */
16737 bool
16738 arm_const_double_by_immediates (rtx val)
16739 {
16740 machine_mode mode = GET_MODE (val);
16741 rtx part;
16742
16743 if (mode == VOIDmode)
16744 mode = DImode;
16745
16746 part = gen_highpart_mode (SImode, mode, val);
16747
16748 gcc_assert (CONST_INT_P (part));
16749
16750 if (!const_ok_for_arm (INTVAL (part)))
16751 return false;
16752
16753 part = gen_lowpart (SImode, val);
16754
16755 gcc_assert (CONST_INT_P (part));
16756
16757 if (!const_ok_for_arm (INTVAL (part)))
16758 return false;
16759
16760 return true;
16761 }
16762
16763 /* Scan INSN and note any of its operands that need fixing.
16764 If DO_PUSHES is false we do not actually push any of the fixups
16765 needed. */
16766 static void
16767 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16768 {
16769 int opno;
16770
16771 extract_constrain_insn (insn);
16772
16773 if (recog_data.n_alternatives == 0)
16774 return;
16775
16776 /* Fill in recog_op_alt with information about the constraints of
16777 this insn. */
16778 preprocess_constraints (insn);
16779
16780 const operand_alternative *op_alt = which_op_alt ();
16781 for (opno = 0; opno < recog_data.n_operands; opno++)
16782 {
16783 /* Things we need to fix can only occur in inputs. */
16784 if (recog_data.operand_type[opno] != OP_IN)
16785 continue;
16786
16787 /* If this alternative is a memory reference, then any mention
16788 of constants in this alternative is really to fool reload
16789 into allowing us to accept one there. We need to fix them up
16790 now so that we output the right code. */
16791 if (op_alt[opno].memory_ok)
16792 {
16793 rtx op = recog_data.operand[opno];
16794
16795 if (CONSTANT_P (op))
16796 {
16797 if (do_pushes)
16798 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16799 recog_data.operand_mode[opno], op);
16800 }
16801 else if (MEM_P (op)
16802 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16803 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16804 {
16805 if (do_pushes)
16806 {
16807 rtx cop = avoid_constant_pool_reference (op);
16808
16809 /* Casting the address of something to a mode narrower
16810 than a word can cause avoid_constant_pool_reference()
16811 to return the pool reference itself. That's no good to
16812 us here. Lets just hope that we can use the
16813 constant pool value directly. */
16814 if (op == cop)
16815 cop = get_pool_constant (XEXP (op, 0));
16816
16817 push_minipool_fix (insn, address,
16818 recog_data.operand_loc[opno],
16819 recog_data.operand_mode[opno], cop);
16820 }
16821
16822 }
16823 }
16824 }
16825
16826 return;
16827 }
16828
16829 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16830 and unions in the context of ARMv8-M Security Extensions. It is used as a
16831 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16832 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16833 or four masks, depending on whether it is being computed for a
16834 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16835 respectively. The tree for the type of the argument or a field within an
16836 argument is passed in ARG_TYPE, the current register this argument or field
16837 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16838 argument or field starts at is passed in STARTING_BIT and the last used bit
16839 is kept in LAST_USED_BIT which is also updated accordingly. */
16840
16841 static unsigned HOST_WIDE_INT
16842 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16843 uint32_t * padding_bits_to_clear,
16844 unsigned starting_bit, int * last_used_bit)
16845
16846 {
16847 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16848
16849 if (TREE_CODE (arg_type) == RECORD_TYPE)
16850 {
16851 unsigned current_bit = starting_bit;
16852 tree field;
16853 long int offset, size;
16854
16855
16856 field = TYPE_FIELDS (arg_type);
16857 while (field)
16858 {
16859 /* The offset within a structure is always an offset from
16860 the start of that structure. Make sure we take that into the
16861 calculation of the register based offset that we use here. */
16862 offset = starting_bit;
16863 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16864 offset %= 32;
16865
16866 /* This is the actual size of the field, for bitfields this is the
16867 bitfield width and not the container size. */
16868 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16869
16870 if (*last_used_bit != offset)
16871 {
16872 if (offset < *last_used_bit)
16873 {
16874 /* This field's offset is before the 'last_used_bit', that
16875 means this field goes on the next register. So we need to
16876 pad the rest of the current register and increase the
16877 register number. */
16878 uint32_t mask;
16879 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16880 mask++;
16881
16882 padding_bits_to_clear[*regno] |= mask;
16883 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16884 (*regno)++;
16885 }
16886 else
16887 {
16888 /* Otherwise we pad the bits between the last field's end and
16889 the start of the new field. */
16890 uint32_t mask;
16891
16892 mask = ((uint32_t)-1) >> (32 - offset);
16893 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16894 padding_bits_to_clear[*regno] |= mask;
16895 }
16896 current_bit = offset;
16897 }
16898
16899 /* Calculate further padding bits for inner structs/unions too. */
16900 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16901 {
16902 *last_used_bit = current_bit;
16903 not_to_clear_reg_mask
16904 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16905 padding_bits_to_clear, offset,
16906 last_used_bit);
16907 }
16908 else
16909 {
16910 /* Update 'current_bit' with this field's size. If the
16911 'current_bit' lies in a subsequent register, update 'regno' and
16912 reset 'current_bit' to point to the current bit in that new
16913 register. */
16914 current_bit += size;
16915 while (current_bit >= 32)
16916 {
16917 current_bit-=32;
16918 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16919 (*regno)++;
16920 }
16921 *last_used_bit = current_bit;
16922 }
16923
16924 field = TREE_CHAIN (field);
16925 }
16926 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16927 }
16928 else if (TREE_CODE (arg_type) == UNION_TYPE)
16929 {
16930 tree field, field_t;
16931 int i, regno_t, field_size;
16932 int max_reg = -1;
16933 int max_bit = -1;
16934 uint32_t mask;
16935 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16936 = {-1, -1, -1, -1};
16937
16938 /* To compute the padding bits in a union we only consider bits as
16939 padding bits if they are always either a padding bit or fall outside a
16940 fields size for all fields in the union. */
16941 field = TYPE_FIELDS (arg_type);
16942 while (field)
16943 {
16944 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16945 = {0U, 0U, 0U, 0U};
16946 int last_used_bit_t = *last_used_bit;
16947 regno_t = *regno;
16948 field_t = TREE_TYPE (field);
16949
16950 /* If the field's type is either a record or a union make sure to
16951 compute their padding bits too. */
16952 if (RECORD_OR_UNION_TYPE_P (field_t))
16953 not_to_clear_reg_mask
16954 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16955 &padding_bits_to_clear_t[0],
16956 starting_bit, &last_used_bit_t);
16957 else
16958 {
16959 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16960 regno_t = (field_size / 32) + *regno;
16961 last_used_bit_t = (starting_bit + field_size) % 32;
16962 }
16963
16964 for (i = *regno; i < regno_t; i++)
16965 {
16966 /* For all but the last register used by this field only keep the
16967 padding bits that were padding bits in this field. */
16968 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16969 }
16970
16971 /* For the last register, keep all padding bits that were padding
16972 bits in this field and any padding bits that are still valid
16973 as padding bits but fall outside of this field's size. */
16974 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16975 padding_bits_to_clear_res[regno_t]
16976 &= padding_bits_to_clear_t[regno_t] | mask;
16977
16978 /* Update the maximum size of the fields in terms of registers used
16979 ('max_reg') and the 'last_used_bit' in said register. */
16980 if (max_reg < regno_t)
16981 {
16982 max_reg = regno_t;
16983 max_bit = last_used_bit_t;
16984 }
16985 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16986 max_bit = last_used_bit_t;
16987
16988 field = TREE_CHAIN (field);
16989 }
16990
16991 /* Update the current padding_bits_to_clear using the intersection of the
16992 padding bits of all the fields. */
16993 for (i=*regno; i < max_reg; i++)
16994 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16995
16996 /* Do not keep trailing padding bits, we do not know yet whether this
16997 is the end of the argument. */
16998 mask = ((uint32_t) 1 << max_bit) - 1;
16999 padding_bits_to_clear[max_reg]
17000 |= padding_bits_to_clear_res[max_reg] & mask;
17001
17002 *regno = max_reg;
17003 *last_used_bit = max_bit;
17004 }
17005 else
17006 /* This function should only be used for structs and unions. */
17007 gcc_unreachable ();
17008
17009 return not_to_clear_reg_mask;
17010 }
17011
17012 /* In the context of ARMv8-M Security Extensions, this function is used for both
17013 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17014 registers are used when returning or passing arguments, which is then
17015 returned as a mask. It will also compute a mask to indicate padding/unused
17016 bits for each of these registers, and passes this through the
17017 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17018 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17019 the starting register used to pass this argument or return value is passed
17020 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17021 for struct and union types. */
17022
17023 static unsigned HOST_WIDE_INT
17024 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17025 uint32_t * padding_bits_to_clear)
17026
17027 {
17028 int last_used_bit = 0;
17029 unsigned HOST_WIDE_INT not_to_clear_mask;
17030
17031 if (RECORD_OR_UNION_TYPE_P (arg_type))
17032 {
17033 not_to_clear_mask
17034 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17035 padding_bits_to_clear, 0,
17036 &last_used_bit);
17037
17038
17039 /* If the 'last_used_bit' is not zero, that means we are still using a
17040 part of the last 'regno'. In such cases we must clear the trailing
17041 bits. Otherwise we are not using regno and we should mark it as to
17042 clear. */
17043 if (last_used_bit != 0)
17044 padding_bits_to_clear[regno]
17045 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17046 else
17047 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17048 }
17049 else
17050 {
17051 not_to_clear_mask = 0;
17052 /* We are not dealing with structs nor unions. So these arguments may be
17053 passed in floating point registers too. In some cases a BLKmode is
17054 used when returning or passing arguments in multiple VFP registers. */
17055 if (GET_MODE (arg_rtx) == BLKmode)
17056 {
17057 int i, arg_regs;
17058 rtx reg;
17059
17060 /* This should really only occur when dealing with the hard-float
17061 ABI. */
17062 gcc_assert (TARGET_HARD_FLOAT_ABI);
17063
17064 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17065 {
17066 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17067 gcc_assert (REG_P (reg));
17068
17069 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17070
17071 /* If we are dealing with DF mode, make sure we don't
17072 clear either of the registers it addresses. */
17073 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17074 if (arg_regs > 1)
17075 {
17076 unsigned HOST_WIDE_INT mask;
17077 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17078 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17079 not_to_clear_mask |= mask;
17080 }
17081 }
17082 }
17083 else
17084 {
17085 /* Otherwise we can rely on the MODE to determine how many registers
17086 are being used by this argument. */
17087 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17088 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17089 if (arg_regs > 1)
17090 {
17091 unsigned HOST_WIDE_INT
17092 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17093 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17094 not_to_clear_mask |= mask;
17095 }
17096 }
17097 }
17098
17099 return not_to_clear_mask;
17100 }
17101
17102 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17103 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17104 are to be fully cleared, using the value in register CLEARING_REG if more
17105 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17106 the bits that needs to be cleared in caller-saved core registers, with
17107 SCRATCH_REG used as a scratch register for that clearing.
17108
17109 NOTE: one of three following assertions must hold:
17110 - SCRATCH_REG is a low register
17111 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17112 in TO_CLEAR_BITMAP)
17113 - CLEARING_REG is a low register. */
17114
17115 static void
17116 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17117 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17118 {
17119 bool saved_clearing = false;
17120 rtx saved_clearing_reg = NULL_RTX;
17121 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17122
17123 gcc_assert (arm_arch_cmse);
17124
17125 if (!bitmap_empty_p (to_clear_bitmap))
17126 {
17127 minregno = bitmap_first_set_bit (to_clear_bitmap);
17128 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17129 }
17130 clearing_regno = REGNO (clearing_reg);
17131
17132 /* Clear padding bits. */
17133 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17134 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17135 {
17136 uint64_t mask;
17137 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17138
17139 if (padding_bits_to_clear[i] == 0)
17140 continue;
17141
17142 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17143 CLEARING_REG as scratch. */
17144 if (TARGET_THUMB1
17145 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17146 {
17147 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17148 such that we can use clearing_reg to clear the unused bits in the
17149 arguments. */
17150 if ((clearing_regno > maxregno
17151 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17152 && !saved_clearing)
17153 {
17154 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17155 emit_move_insn (scratch_reg, clearing_reg);
17156 saved_clearing = true;
17157 saved_clearing_reg = scratch_reg;
17158 }
17159 scratch_reg = clearing_reg;
17160 }
17161
17162 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17163 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17164 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17165
17166 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17167 mask = (~padding_bits_to_clear[i]) >> 16;
17168 rtx16 = gen_int_mode (16, SImode);
17169 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17170 if (mask)
17171 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17172
17173 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17174 }
17175 if (saved_clearing)
17176 emit_move_insn (clearing_reg, saved_clearing_reg);
17177
17178
17179 /* Clear full registers. */
17180
17181 /* If not marked for clearing, clearing_reg already does not contain
17182 any secret. */
17183 if (clearing_regno <= maxregno
17184 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17185 {
17186 emit_move_insn (clearing_reg, const0_rtx);
17187 emit_use (clearing_reg);
17188 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17189 }
17190
17191 for (regno = minregno; regno <= maxregno; regno++)
17192 {
17193 if (!bitmap_bit_p (to_clear_bitmap, regno))
17194 continue;
17195
17196 if (IS_VFP_REGNUM (regno))
17197 {
17198 /* If regno is an even vfp register and its successor is also to
17199 be cleared, use vmov. */
17200 if (TARGET_VFP_DOUBLE
17201 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17202 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17203 {
17204 emit_move_insn (gen_rtx_REG (DFmode, regno),
17205 CONST1_RTX (DFmode));
17206 emit_use (gen_rtx_REG (DFmode, regno));
17207 regno++;
17208 }
17209 else
17210 {
17211 emit_move_insn (gen_rtx_REG (SFmode, regno),
17212 CONST1_RTX (SFmode));
17213 emit_use (gen_rtx_REG (SFmode, regno));
17214 }
17215 }
17216 else
17217 {
17218 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17219 emit_use (gen_rtx_REG (SImode, regno));
17220 }
17221 }
17222 }
17223
17224 /* Clears caller saved registers not used to pass arguments before a
17225 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17226 registers is done in __gnu_cmse_nonsecure_call libcall.
17227 See libgcc/config/arm/cmse_nonsecure_call.S. */
17228
17229 static void
17230 cmse_nonsecure_call_clear_caller_saved (void)
17231 {
17232 basic_block bb;
17233
17234 FOR_EACH_BB_FN (bb, cfun)
17235 {
17236 rtx_insn *insn;
17237
17238 FOR_BB_INSNS (bb, insn)
17239 {
17240 unsigned address_regnum, regno, maxregno =
17241 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17242 auto_sbitmap to_clear_bitmap (maxregno + 1);
17243 rtx_insn *seq;
17244 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17245 rtx address;
17246 CUMULATIVE_ARGS args_so_far_v;
17247 cumulative_args_t args_so_far;
17248 tree arg_type, fntype;
17249 bool first_param = true;
17250 function_args_iterator args_iter;
17251 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17252
17253 if (!NONDEBUG_INSN_P (insn))
17254 continue;
17255
17256 if (!CALL_P (insn))
17257 continue;
17258
17259 pat = PATTERN (insn);
17260 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17261 call = XVECEXP (pat, 0, 0);
17262
17263 /* Get the real call RTX if the insn sets a value, ie. returns. */
17264 if (GET_CODE (call) == SET)
17265 call = SET_SRC (call);
17266
17267 /* Check if it is a cmse_nonsecure_call. */
17268 unspec = XEXP (call, 0);
17269 if (GET_CODE (unspec) != UNSPEC
17270 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17271 continue;
17272
17273 /* Determine the caller-saved registers we need to clear. */
17274 bitmap_clear (to_clear_bitmap);
17275 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17276
17277 /* Only look at the caller-saved floating point registers in case of
17278 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17279 lazy store and loads which clear both caller- and callee-saved
17280 registers. */
17281 if (TARGET_HARD_FLOAT_ABI)
17282 {
17283 auto_sbitmap float_bitmap (maxregno + 1);
17284
17285 bitmap_clear (float_bitmap);
17286 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17287 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17288 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17289 }
17290
17291 /* Make sure the register used to hold the function address is not
17292 cleared. */
17293 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17294 gcc_assert (MEM_P (address));
17295 gcc_assert (REG_P (XEXP (address, 0)));
17296 address_regnum = REGNO (XEXP (address, 0));
17297 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17298 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17299
17300 /* Set basic block of call insn so that df rescan is performed on
17301 insns inserted here. */
17302 set_block_for_insn (insn, bb);
17303 df_set_flags (DF_DEFER_INSN_RESCAN);
17304 start_sequence ();
17305
17306 /* Make sure the scheduler doesn't schedule other insns beyond
17307 here. */
17308 emit_insn (gen_blockage ());
17309
17310 /* Walk through all arguments and clear registers appropriately.
17311 */
17312 fntype = TREE_TYPE (MEM_EXPR (address));
17313 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17314 NULL_TREE);
17315 args_so_far = pack_cumulative_args (&args_so_far_v);
17316 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17317 {
17318 rtx arg_rtx;
17319 uint64_t to_clear_args_mask;
17320 machine_mode arg_mode = TYPE_MODE (arg_type);
17321
17322 if (VOID_TYPE_P (arg_type))
17323 continue;
17324
17325 if (!first_param)
17326 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17327 true);
17328
17329 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17330 true);
17331 gcc_assert (REG_P (arg_rtx));
17332 to_clear_args_mask
17333 = compute_not_to_clear_mask (arg_type, arg_rtx,
17334 REGNO (arg_rtx),
17335 &padding_bits_to_clear[0]);
17336 if (to_clear_args_mask)
17337 {
17338 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17339 {
17340 if (to_clear_args_mask & (1ULL << regno))
17341 bitmap_clear_bit (to_clear_bitmap, regno);
17342 }
17343 }
17344
17345 first_param = false;
17346 }
17347
17348 /* We use right shift and left shift to clear the LSB of the address
17349 we jump to instead of using bic, to avoid having to use an extra
17350 register on Thumb-1. */
17351 clearing_reg = XEXP (address, 0);
17352 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17353 emit_insn (gen_rtx_SET (clearing_reg, shift));
17354 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17355 emit_insn (gen_rtx_SET (clearing_reg, shift));
17356
17357 /* Clear caller-saved registers that leak before doing a non-secure
17358 call. */
17359 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17360 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17361 NUM_ARG_REGS, ip_reg, clearing_reg);
17362
17363 seq = get_insns ();
17364 end_sequence ();
17365 emit_insn_before (seq, insn);
17366 }
17367 }
17368 }
17369
17370 /* Rewrite move insn into subtract of 0 if the condition codes will
17371 be useful in next conditional jump insn. */
17372
17373 static void
17374 thumb1_reorg (void)
17375 {
17376 basic_block bb;
17377
17378 FOR_EACH_BB_FN (bb, cfun)
17379 {
17380 rtx dest, src;
17381 rtx cmp, op0, op1, set = NULL;
17382 rtx_insn *prev, *insn = BB_END (bb);
17383 bool insn_clobbered = false;
17384
17385 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17386 insn = PREV_INSN (insn);
17387
17388 /* Find the last cbranchsi4_insn in basic block BB. */
17389 if (insn == BB_HEAD (bb)
17390 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17391 continue;
17392
17393 /* Get the register with which we are comparing. */
17394 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17395 op0 = XEXP (cmp, 0);
17396 op1 = XEXP (cmp, 1);
17397
17398 /* Check that comparison is against ZERO. */
17399 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17400 continue;
17401
17402 /* Find the first flag setting insn before INSN in basic block BB. */
17403 gcc_assert (insn != BB_HEAD (bb));
17404 for (prev = PREV_INSN (insn);
17405 (!insn_clobbered
17406 && prev != BB_HEAD (bb)
17407 && (NOTE_P (prev)
17408 || DEBUG_INSN_P (prev)
17409 || ((set = single_set (prev)) != NULL
17410 && get_attr_conds (prev) == CONDS_NOCOND)));
17411 prev = PREV_INSN (prev))
17412 {
17413 if (reg_set_p (op0, prev))
17414 insn_clobbered = true;
17415 }
17416
17417 /* Skip if op0 is clobbered by insn other than prev. */
17418 if (insn_clobbered)
17419 continue;
17420
17421 if (!set)
17422 continue;
17423
17424 dest = SET_DEST (set);
17425 src = SET_SRC (set);
17426 if (!low_register_operand (dest, SImode)
17427 || !low_register_operand (src, SImode))
17428 continue;
17429
17430 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17431 in INSN. Both src and dest of the move insn are checked. */
17432 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17433 {
17434 dest = copy_rtx (dest);
17435 src = copy_rtx (src);
17436 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17437 PATTERN (prev) = gen_rtx_SET (dest, src);
17438 INSN_CODE (prev) = -1;
17439 /* Set test register in INSN to dest. */
17440 XEXP (cmp, 0) = copy_rtx (dest);
17441 INSN_CODE (insn) = -1;
17442 }
17443 }
17444 }
17445
17446 /* Convert instructions to their cc-clobbering variant if possible, since
17447 that allows us to use smaller encodings. */
17448
17449 static void
17450 thumb2_reorg (void)
17451 {
17452 basic_block bb;
17453 regset_head live;
17454
17455 INIT_REG_SET (&live);
17456
17457 /* We are freeing block_for_insn in the toplev to keep compatibility
17458 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17459 compute_bb_for_insn ();
17460 df_analyze ();
17461
17462 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17463
17464 FOR_EACH_BB_FN (bb, cfun)
17465 {
17466 if ((current_tune->disparage_flag_setting_t16_encodings
17467 == tune_params::DISPARAGE_FLAGS_ALL)
17468 && optimize_bb_for_speed_p (bb))
17469 continue;
17470
17471 rtx_insn *insn;
17472 Convert_Action action = SKIP;
17473 Convert_Action action_for_partial_flag_setting
17474 = ((current_tune->disparage_flag_setting_t16_encodings
17475 != tune_params::DISPARAGE_FLAGS_NEITHER)
17476 && optimize_bb_for_speed_p (bb))
17477 ? SKIP : CONV;
17478
17479 COPY_REG_SET (&live, DF_LR_OUT (bb));
17480 df_simulate_initialize_backwards (bb, &live);
17481 FOR_BB_INSNS_REVERSE (bb, insn)
17482 {
17483 if (NONJUMP_INSN_P (insn)
17484 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17485 && GET_CODE (PATTERN (insn)) == SET)
17486 {
17487 action = SKIP;
17488 rtx pat = PATTERN (insn);
17489 rtx dst = XEXP (pat, 0);
17490 rtx src = XEXP (pat, 1);
17491 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17492
17493 if (UNARY_P (src) || BINARY_P (src))
17494 op0 = XEXP (src, 0);
17495
17496 if (BINARY_P (src))
17497 op1 = XEXP (src, 1);
17498
17499 if (low_register_operand (dst, SImode))
17500 {
17501 switch (GET_CODE (src))
17502 {
17503 case PLUS:
17504 /* Adding two registers and storing the result
17505 in the first source is already a 16-bit
17506 operation. */
17507 if (rtx_equal_p (dst, op0)
17508 && register_operand (op1, SImode))
17509 break;
17510
17511 if (low_register_operand (op0, SImode))
17512 {
17513 /* ADDS <Rd>,<Rn>,<Rm> */
17514 if (low_register_operand (op1, SImode))
17515 action = CONV;
17516 /* ADDS <Rdn>,#<imm8> */
17517 /* SUBS <Rdn>,#<imm8> */
17518 else if (rtx_equal_p (dst, op0)
17519 && CONST_INT_P (op1)
17520 && IN_RANGE (INTVAL (op1), -255, 255))
17521 action = CONV;
17522 /* ADDS <Rd>,<Rn>,#<imm3> */
17523 /* SUBS <Rd>,<Rn>,#<imm3> */
17524 else if (CONST_INT_P (op1)
17525 && IN_RANGE (INTVAL (op1), -7, 7))
17526 action = CONV;
17527 }
17528 /* ADCS <Rd>, <Rn> */
17529 else if (GET_CODE (XEXP (src, 0)) == PLUS
17530 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17531 && low_register_operand (XEXP (XEXP (src, 0), 1),
17532 SImode)
17533 && COMPARISON_P (op1)
17534 && cc_register (XEXP (op1, 0), VOIDmode)
17535 && maybe_get_arm_condition_code (op1) == ARM_CS
17536 && XEXP (op1, 1) == const0_rtx)
17537 action = CONV;
17538 break;
17539
17540 case MINUS:
17541 /* RSBS <Rd>,<Rn>,#0
17542 Not handled here: see NEG below. */
17543 /* SUBS <Rd>,<Rn>,#<imm3>
17544 SUBS <Rdn>,#<imm8>
17545 Not handled here: see PLUS above. */
17546 /* SUBS <Rd>,<Rn>,<Rm> */
17547 if (low_register_operand (op0, SImode)
17548 && low_register_operand (op1, SImode))
17549 action = CONV;
17550 break;
17551
17552 case MULT:
17553 /* MULS <Rdm>,<Rn>,<Rdm>
17554 As an exception to the rule, this is only used
17555 when optimizing for size since MULS is slow on all
17556 known implementations. We do not even want to use
17557 MULS in cold code, if optimizing for speed, so we
17558 test the global flag here. */
17559 if (!optimize_size)
17560 break;
17561 /* Fall through. */
17562 case AND:
17563 case IOR:
17564 case XOR:
17565 /* ANDS <Rdn>,<Rm> */
17566 if (rtx_equal_p (dst, op0)
17567 && low_register_operand (op1, SImode))
17568 action = action_for_partial_flag_setting;
17569 else if (rtx_equal_p (dst, op1)
17570 && low_register_operand (op0, SImode))
17571 action = action_for_partial_flag_setting == SKIP
17572 ? SKIP : SWAP_CONV;
17573 break;
17574
17575 case ASHIFTRT:
17576 case ASHIFT:
17577 case LSHIFTRT:
17578 /* ASRS <Rdn>,<Rm> */
17579 /* LSRS <Rdn>,<Rm> */
17580 /* LSLS <Rdn>,<Rm> */
17581 if (rtx_equal_p (dst, op0)
17582 && low_register_operand (op1, SImode))
17583 action = action_for_partial_flag_setting;
17584 /* ASRS <Rd>,<Rm>,#<imm5> */
17585 /* LSRS <Rd>,<Rm>,#<imm5> */
17586 /* LSLS <Rd>,<Rm>,#<imm5> */
17587 else if (low_register_operand (op0, SImode)
17588 && CONST_INT_P (op1)
17589 && IN_RANGE (INTVAL (op1), 0, 31))
17590 action = action_for_partial_flag_setting;
17591 break;
17592
17593 case ROTATERT:
17594 /* RORS <Rdn>,<Rm> */
17595 if (rtx_equal_p (dst, op0)
17596 && low_register_operand (op1, SImode))
17597 action = action_for_partial_flag_setting;
17598 break;
17599
17600 case NOT:
17601 /* MVNS <Rd>,<Rm> */
17602 if (low_register_operand (op0, SImode))
17603 action = action_for_partial_flag_setting;
17604 break;
17605
17606 case NEG:
17607 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17608 if (low_register_operand (op0, SImode))
17609 action = CONV;
17610 break;
17611
17612 case CONST_INT:
17613 /* MOVS <Rd>,#<imm8> */
17614 if (CONST_INT_P (src)
17615 && IN_RANGE (INTVAL (src), 0, 255))
17616 action = action_for_partial_flag_setting;
17617 break;
17618
17619 case REG:
17620 /* MOVS and MOV<c> with registers have different
17621 encodings, so are not relevant here. */
17622 break;
17623
17624 default:
17625 break;
17626 }
17627 }
17628
17629 if (action != SKIP)
17630 {
17631 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17632 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17633 rtvec vec;
17634
17635 if (action == SWAP_CONV)
17636 {
17637 src = copy_rtx (src);
17638 XEXP (src, 0) = op1;
17639 XEXP (src, 1) = op0;
17640 pat = gen_rtx_SET (dst, src);
17641 vec = gen_rtvec (2, pat, clobber);
17642 }
17643 else /* action == CONV */
17644 vec = gen_rtvec (2, pat, clobber);
17645
17646 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17647 INSN_CODE (insn) = -1;
17648 }
17649 }
17650
17651 if (NONDEBUG_INSN_P (insn))
17652 df_simulate_one_insn_backwards (bb, insn, &live);
17653 }
17654 }
17655
17656 CLEAR_REG_SET (&live);
17657 }
17658
17659 /* Gcc puts the pool in the wrong place for ARM, since we can only
17660 load addresses a limited distance around the pc. We do some
17661 special munging to move the constant pool values to the correct
17662 point in the code. */
17663 static void
17664 arm_reorg (void)
17665 {
17666 rtx_insn *insn;
17667 HOST_WIDE_INT address = 0;
17668 Mfix * fix;
17669
17670 if (use_cmse)
17671 cmse_nonsecure_call_clear_caller_saved ();
17672 if (TARGET_THUMB1)
17673 thumb1_reorg ();
17674 else if (TARGET_THUMB2)
17675 thumb2_reorg ();
17676
17677 /* Ensure all insns that must be split have been split at this point.
17678 Otherwise, the pool placement code below may compute incorrect
17679 insn lengths. Note that when optimizing, all insns have already
17680 been split at this point. */
17681 if (!optimize)
17682 split_all_insns_noflow ();
17683
17684 /* Make sure we do not attempt to create a literal pool even though it should
17685 no longer be necessary to create any. */
17686 if (arm_disable_literal_pool)
17687 return ;
17688
17689 minipool_fix_head = minipool_fix_tail = NULL;
17690
17691 /* The first insn must always be a note, or the code below won't
17692 scan it properly. */
17693 insn = get_insns ();
17694 gcc_assert (NOTE_P (insn));
17695 minipool_pad = 0;
17696
17697 /* Scan all the insns and record the operands that will need fixing. */
17698 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17699 {
17700 if (BARRIER_P (insn))
17701 push_minipool_barrier (insn, address);
17702 else if (INSN_P (insn))
17703 {
17704 rtx_jump_table_data *table;
17705
17706 note_invalid_constants (insn, address, true);
17707 address += get_attr_length (insn);
17708
17709 /* If the insn is a vector jump, add the size of the table
17710 and skip the table. */
17711 if (tablejump_p (insn, NULL, &table))
17712 {
17713 address += get_jump_table_size (table);
17714 insn = table;
17715 }
17716 }
17717 else if (LABEL_P (insn))
17718 /* Add the worst-case padding due to alignment. We don't add
17719 the _current_ padding because the minipool insertions
17720 themselves might change it. */
17721 address += get_label_padding (insn);
17722 }
17723
17724 fix = minipool_fix_head;
17725
17726 /* Now scan the fixups and perform the required changes. */
17727 while (fix)
17728 {
17729 Mfix * ftmp;
17730 Mfix * fdel;
17731 Mfix * last_added_fix;
17732 Mfix * last_barrier = NULL;
17733 Mfix * this_fix;
17734
17735 /* Skip any further barriers before the next fix. */
17736 while (fix && BARRIER_P (fix->insn))
17737 fix = fix->next;
17738
17739 /* No more fixes. */
17740 if (fix == NULL)
17741 break;
17742
17743 last_added_fix = NULL;
17744
17745 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17746 {
17747 if (BARRIER_P (ftmp->insn))
17748 {
17749 if (ftmp->address >= minipool_vector_head->max_address)
17750 break;
17751
17752 last_barrier = ftmp;
17753 }
17754 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17755 break;
17756
17757 last_added_fix = ftmp; /* Keep track of the last fix added. */
17758 }
17759
17760 /* If we found a barrier, drop back to that; any fixes that we
17761 could have reached but come after the barrier will now go in
17762 the next mini-pool. */
17763 if (last_barrier != NULL)
17764 {
17765 /* Reduce the refcount for those fixes that won't go into this
17766 pool after all. */
17767 for (fdel = last_barrier->next;
17768 fdel && fdel != ftmp;
17769 fdel = fdel->next)
17770 {
17771 fdel->minipool->refcount--;
17772 fdel->minipool = NULL;
17773 }
17774
17775 ftmp = last_barrier;
17776 }
17777 else
17778 {
17779 /* ftmp is first fix that we can't fit into this pool and
17780 there no natural barriers that we could use. Insert a
17781 new barrier in the code somewhere between the previous
17782 fix and this one, and arrange to jump around it. */
17783 HOST_WIDE_INT max_address;
17784
17785 /* The last item on the list of fixes must be a barrier, so
17786 we can never run off the end of the list of fixes without
17787 last_barrier being set. */
17788 gcc_assert (ftmp);
17789
17790 max_address = minipool_vector_head->max_address;
17791 /* Check that there isn't another fix that is in range that
17792 we couldn't fit into this pool because the pool was
17793 already too large: we need to put the pool before such an
17794 instruction. The pool itself may come just after the
17795 fix because create_fix_barrier also allows space for a
17796 jump instruction. */
17797 if (ftmp->address < max_address)
17798 max_address = ftmp->address + 1;
17799
17800 last_barrier = create_fix_barrier (last_added_fix, max_address);
17801 }
17802
17803 assign_minipool_offsets (last_barrier);
17804
17805 while (ftmp)
17806 {
17807 if (!BARRIER_P (ftmp->insn)
17808 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17809 == NULL))
17810 break;
17811
17812 ftmp = ftmp->next;
17813 }
17814
17815 /* Scan over the fixes we have identified for this pool, fixing them
17816 up and adding the constants to the pool itself. */
17817 for (this_fix = fix; this_fix && ftmp != this_fix;
17818 this_fix = this_fix->next)
17819 if (!BARRIER_P (this_fix->insn))
17820 {
17821 rtx addr
17822 = plus_constant (Pmode,
17823 gen_rtx_LABEL_REF (VOIDmode,
17824 minipool_vector_label),
17825 this_fix->minipool->offset);
17826 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17827 }
17828
17829 dump_minipool (last_barrier->insn);
17830 fix = ftmp;
17831 }
17832
17833 /* From now on we must synthesize any constants that we can't handle
17834 directly. This can happen if the RTL gets split during final
17835 instruction generation. */
17836 cfun->machine->after_arm_reorg = 1;
17837
17838 /* Free the minipool memory. */
17839 obstack_free (&minipool_obstack, minipool_startobj);
17840 }
17841 \f
17842 /* Routines to output assembly language. */
17843
17844 /* Return string representation of passed in real value. */
17845 static const char *
17846 fp_const_from_val (REAL_VALUE_TYPE *r)
17847 {
17848 if (!fp_consts_inited)
17849 init_fp_table ();
17850
17851 gcc_assert (real_equal (r, &value_fp0));
17852 return "0";
17853 }
17854
17855 /* OPERANDS[0] is the entire list of insns that constitute pop,
17856 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17857 is in the list, UPDATE is true iff the list contains explicit
17858 update of base register. */
17859 void
17860 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17861 bool update)
17862 {
17863 int i;
17864 char pattern[100];
17865 int offset;
17866 const char *conditional;
17867 int num_saves = XVECLEN (operands[0], 0);
17868 unsigned int regno;
17869 unsigned int regno_base = REGNO (operands[1]);
17870 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17871
17872 offset = 0;
17873 offset += update ? 1 : 0;
17874 offset += return_pc ? 1 : 0;
17875
17876 /* Is the base register in the list? */
17877 for (i = offset; i < num_saves; i++)
17878 {
17879 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17880 /* If SP is in the list, then the base register must be SP. */
17881 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17882 /* If base register is in the list, there must be no explicit update. */
17883 if (regno == regno_base)
17884 gcc_assert (!update);
17885 }
17886
17887 conditional = reverse ? "%?%D0" : "%?%d0";
17888 /* Can't use POP if returning from an interrupt. */
17889 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17890 sprintf (pattern, "pop%s\t{", conditional);
17891 else
17892 {
17893 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17894 It's just a convention, their semantics are identical. */
17895 if (regno_base == SP_REGNUM)
17896 sprintf (pattern, "ldmfd%s\t", conditional);
17897 else if (update)
17898 sprintf (pattern, "ldmia%s\t", conditional);
17899 else
17900 sprintf (pattern, "ldm%s\t", conditional);
17901
17902 strcat (pattern, reg_names[regno_base]);
17903 if (update)
17904 strcat (pattern, "!, {");
17905 else
17906 strcat (pattern, ", {");
17907 }
17908
17909 /* Output the first destination register. */
17910 strcat (pattern,
17911 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17912
17913 /* Output the rest of the destination registers. */
17914 for (i = offset + 1; i < num_saves; i++)
17915 {
17916 strcat (pattern, ", ");
17917 strcat (pattern,
17918 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17919 }
17920
17921 strcat (pattern, "}");
17922
17923 if (interrupt_p && return_pc)
17924 strcat (pattern, "^");
17925
17926 output_asm_insn (pattern, &cond);
17927 }
17928
17929
17930 /* Output the assembly for a store multiple. */
17931
17932 const char *
17933 vfp_output_vstmd (rtx * operands)
17934 {
17935 char pattern[100];
17936 int p;
17937 int base;
17938 int i;
17939 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17940 ? XEXP (operands[0], 0)
17941 : XEXP (XEXP (operands[0], 0), 0);
17942 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17943
17944 if (push_p)
17945 strcpy (pattern, "vpush%?.64\t{%P1");
17946 else
17947 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17948
17949 p = strlen (pattern);
17950
17951 gcc_assert (REG_P (operands[1]));
17952
17953 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17954 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17955 {
17956 p += sprintf (&pattern[p], ", d%d", base + i);
17957 }
17958 strcpy (&pattern[p], "}");
17959
17960 output_asm_insn (pattern, operands);
17961 return "";
17962 }
17963
17964
17965 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17966 number of bytes pushed. */
17967
17968 static int
17969 vfp_emit_fstmd (int base_reg, int count)
17970 {
17971 rtx par;
17972 rtx dwarf;
17973 rtx tmp, reg;
17974 int i;
17975
17976 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17977 register pairs are stored by a store multiple insn. We avoid this
17978 by pushing an extra pair. */
17979 if (count == 2 && !arm_arch6)
17980 {
17981 if (base_reg == LAST_VFP_REGNUM - 3)
17982 base_reg -= 2;
17983 count++;
17984 }
17985
17986 /* FSTMD may not store more than 16 doubleword registers at once. Split
17987 larger stores into multiple parts (up to a maximum of two, in
17988 practice). */
17989 if (count > 16)
17990 {
17991 int saved;
17992 /* NOTE: base_reg is an internal register number, so each D register
17993 counts as 2. */
17994 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17995 saved += vfp_emit_fstmd (base_reg, 16);
17996 return saved;
17997 }
17998
17999 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18000 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18001
18002 reg = gen_rtx_REG (DFmode, base_reg);
18003 base_reg += 2;
18004
18005 XVECEXP (par, 0, 0)
18006 = gen_rtx_SET (gen_frame_mem
18007 (BLKmode,
18008 gen_rtx_PRE_MODIFY (Pmode,
18009 stack_pointer_rtx,
18010 plus_constant
18011 (Pmode, stack_pointer_rtx,
18012 - (count * 8)))
18013 ),
18014 gen_rtx_UNSPEC (BLKmode,
18015 gen_rtvec (1, reg),
18016 UNSPEC_PUSH_MULT));
18017
18018 tmp = gen_rtx_SET (stack_pointer_rtx,
18019 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18020 RTX_FRAME_RELATED_P (tmp) = 1;
18021 XVECEXP (dwarf, 0, 0) = tmp;
18022
18023 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18024 RTX_FRAME_RELATED_P (tmp) = 1;
18025 XVECEXP (dwarf, 0, 1) = tmp;
18026
18027 for (i = 1; i < count; i++)
18028 {
18029 reg = gen_rtx_REG (DFmode, base_reg);
18030 base_reg += 2;
18031 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18032
18033 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18034 plus_constant (Pmode,
18035 stack_pointer_rtx,
18036 i * 8)),
18037 reg);
18038 RTX_FRAME_RELATED_P (tmp) = 1;
18039 XVECEXP (dwarf, 0, i + 1) = tmp;
18040 }
18041
18042 par = emit_insn (par);
18043 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18044 RTX_FRAME_RELATED_P (par) = 1;
18045
18046 return count * 8;
18047 }
18048
18049 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18050 has the cmse_nonsecure_call attribute and returns false otherwise. */
18051
18052 bool
18053 detect_cmse_nonsecure_call (tree addr)
18054 {
18055 if (!addr)
18056 return FALSE;
18057
18058 tree fntype = TREE_TYPE (addr);
18059 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18060 TYPE_ATTRIBUTES (fntype)))
18061 return TRUE;
18062 return FALSE;
18063 }
18064
18065
18066 /* Emit a call instruction with pattern PAT. ADDR is the address of
18067 the call target. */
18068
18069 void
18070 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18071 {
18072 rtx insn;
18073
18074 insn = emit_call_insn (pat);
18075
18076 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18077 If the call might use such an entry, add a use of the PIC register
18078 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18079 if (TARGET_VXWORKS_RTP
18080 && flag_pic
18081 && !sibcall
18082 && GET_CODE (addr) == SYMBOL_REF
18083 && (SYMBOL_REF_DECL (addr)
18084 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18085 : !SYMBOL_REF_LOCAL_P (addr)))
18086 {
18087 require_pic_register ();
18088 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18089 }
18090
18091 if (TARGET_AAPCS_BASED)
18092 {
18093 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18094 linker. We need to add an IP clobber to allow setting
18095 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18096 is not needed since it's a fixed register. */
18097 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18098 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18099 }
18100 }
18101
18102 /* Output a 'call' insn. */
18103 const char *
18104 output_call (rtx *operands)
18105 {
18106 gcc_assert (!arm_arch5); /* Patterns should call blx <reg> directly. */
18107
18108 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18109 if (REGNO (operands[0]) == LR_REGNUM)
18110 {
18111 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18112 output_asm_insn ("mov%?\t%0, %|lr", operands);
18113 }
18114
18115 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18116
18117 if (TARGET_INTERWORK || arm_arch4t)
18118 output_asm_insn ("bx%?\t%0", operands);
18119 else
18120 output_asm_insn ("mov%?\t%|pc, %0", operands);
18121
18122 return "";
18123 }
18124
18125 /* Output a move from arm registers to arm registers of a long double
18126 OPERANDS[0] is the destination.
18127 OPERANDS[1] is the source. */
18128 const char *
18129 output_mov_long_double_arm_from_arm (rtx *operands)
18130 {
18131 /* We have to be careful here because the two might overlap. */
18132 int dest_start = REGNO (operands[0]);
18133 int src_start = REGNO (operands[1]);
18134 rtx ops[2];
18135 int i;
18136
18137 if (dest_start < src_start)
18138 {
18139 for (i = 0; i < 3; i++)
18140 {
18141 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18142 ops[1] = gen_rtx_REG (SImode, src_start + i);
18143 output_asm_insn ("mov%?\t%0, %1", ops);
18144 }
18145 }
18146 else
18147 {
18148 for (i = 2; i >= 0; i--)
18149 {
18150 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18151 ops[1] = gen_rtx_REG (SImode, src_start + i);
18152 output_asm_insn ("mov%?\t%0, %1", ops);
18153 }
18154 }
18155
18156 return "";
18157 }
18158
18159 void
18160 arm_emit_movpair (rtx dest, rtx src)
18161 {
18162 /* If the src is an immediate, simplify it. */
18163 if (CONST_INT_P (src))
18164 {
18165 HOST_WIDE_INT val = INTVAL (src);
18166 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18167 if ((val >> 16) & 0x0000ffff)
18168 {
18169 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18170 GEN_INT (16)),
18171 GEN_INT ((val >> 16) & 0x0000ffff));
18172 rtx_insn *insn = get_last_insn ();
18173 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18174 }
18175 return;
18176 }
18177 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18178 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18179 rtx_insn *insn = get_last_insn ();
18180 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18181 }
18182
18183 /* Output a move between double words. It must be REG<-MEM
18184 or MEM<-REG. */
18185 const char *
18186 output_move_double (rtx *operands, bool emit, int *count)
18187 {
18188 enum rtx_code code0 = GET_CODE (operands[0]);
18189 enum rtx_code code1 = GET_CODE (operands[1]);
18190 rtx otherops[3];
18191 if (count)
18192 *count = 1;
18193
18194 /* The only case when this might happen is when
18195 you are looking at the length of a DImode instruction
18196 that has an invalid constant in it. */
18197 if (code0 == REG && code1 != MEM)
18198 {
18199 gcc_assert (!emit);
18200 *count = 2;
18201 return "";
18202 }
18203
18204 if (code0 == REG)
18205 {
18206 unsigned int reg0 = REGNO (operands[0]);
18207
18208 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18209
18210 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18211
18212 switch (GET_CODE (XEXP (operands[1], 0)))
18213 {
18214 case REG:
18215
18216 if (emit)
18217 {
18218 if (TARGET_LDRD
18219 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18220 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18221 else
18222 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18223 }
18224 break;
18225
18226 case PRE_INC:
18227 gcc_assert (TARGET_LDRD);
18228 if (emit)
18229 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18230 break;
18231
18232 case PRE_DEC:
18233 if (emit)
18234 {
18235 if (TARGET_LDRD)
18236 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18237 else
18238 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18239 }
18240 break;
18241
18242 case POST_INC:
18243 if (emit)
18244 {
18245 if (TARGET_LDRD)
18246 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18247 else
18248 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18249 }
18250 break;
18251
18252 case POST_DEC:
18253 gcc_assert (TARGET_LDRD);
18254 if (emit)
18255 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18256 break;
18257
18258 case PRE_MODIFY:
18259 case POST_MODIFY:
18260 /* Autoicrement addressing modes should never have overlapping
18261 base and destination registers, and overlapping index registers
18262 are already prohibited, so this doesn't need to worry about
18263 fix_cm3_ldrd. */
18264 otherops[0] = operands[0];
18265 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18266 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18267
18268 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18269 {
18270 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18271 {
18272 /* Registers overlap so split out the increment. */
18273 if (emit)
18274 {
18275 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18276 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18277 }
18278 if (count)
18279 *count = 2;
18280 }
18281 else
18282 {
18283 /* Use a single insn if we can.
18284 FIXME: IWMMXT allows offsets larger than ldrd can
18285 handle, fix these up with a pair of ldr. */
18286 if (TARGET_THUMB2
18287 || !CONST_INT_P (otherops[2])
18288 || (INTVAL (otherops[2]) > -256
18289 && INTVAL (otherops[2]) < 256))
18290 {
18291 if (emit)
18292 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18293 }
18294 else
18295 {
18296 if (emit)
18297 {
18298 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18299 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18300 }
18301 if (count)
18302 *count = 2;
18303
18304 }
18305 }
18306 }
18307 else
18308 {
18309 /* Use a single insn if we can.
18310 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18311 fix these up with a pair of ldr. */
18312 if (TARGET_THUMB2
18313 || !CONST_INT_P (otherops[2])
18314 || (INTVAL (otherops[2]) > -256
18315 && INTVAL (otherops[2]) < 256))
18316 {
18317 if (emit)
18318 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18319 }
18320 else
18321 {
18322 if (emit)
18323 {
18324 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18325 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18326 }
18327 if (count)
18328 *count = 2;
18329 }
18330 }
18331 break;
18332
18333 case LABEL_REF:
18334 case CONST:
18335 /* We might be able to use ldrd %0, %1 here. However the range is
18336 different to ldr/adr, and it is broken on some ARMv7-M
18337 implementations. */
18338 /* Use the second register of the pair to avoid problematic
18339 overlap. */
18340 otherops[1] = operands[1];
18341 if (emit)
18342 output_asm_insn ("adr%?\t%0, %1", otherops);
18343 operands[1] = otherops[0];
18344 if (emit)
18345 {
18346 if (TARGET_LDRD)
18347 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18348 else
18349 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18350 }
18351
18352 if (count)
18353 *count = 2;
18354 break;
18355
18356 /* ??? This needs checking for thumb2. */
18357 default:
18358 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18359 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18360 {
18361 otherops[0] = operands[0];
18362 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18363 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18364
18365 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18366 {
18367 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18368 {
18369 switch ((int) INTVAL (otherops[2]))
18370 {
18371 case -8:
18372 if (emit)
18373 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18374 return "";
18375 case -4:
18376 if (TARGET_THUMB2)
18377 break;
18378 if (emit)
18379 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18380 return "";
18381 case 4:
18382 if (TARGET_THUMB2)
18383 break;
18384 if (emit)
18385 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18386 return "";
18387 }
18388 }
18389 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18390 operands[1] = otherops[0];
18391 if (TARGET_LDRD
18392 && (REG_P (otherops[2])
18393 || TARGET_THUMB2
18394 || (CONST_INT_P (otherops[2])
18395 && INTVAL (otherops[2]) > -256
18396 && INTVAL (otherops[2]) < 256)))
18397 {
18398 if (reg_overlap_mentioned_p (operands[0],
18399 otherops[2]))
18400 {
18401 /* Swap base and index registers over to
18402 avoid a conflict. */
18403 std::swap (otherops[1], otherops[2]);
18404 }
18405 /* If both registers conflict, it will usually
18406 have been fixed by a splitter. */
18407 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18408 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18409 {
18410 if (emit)
18411 {
18412 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18413 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18414 }
18415 if (count)
18416 *count = 2;
18417 }
18418 else
18419 {
18420 otherops[0] = operands[0];
18421 if (emit)
18422 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18423 }
18424 return "";
18425 }
18426
18427 if (CONST_INT_P (otherops[2]))
18428 {
18429 if (emit)
18430 {
18431 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18432 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18433 else
18434 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18435 }
18436 }
18437 else
18438 {
18439 if (emit)
18440 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18441 }
18442 }
18443 else
18444 {
18445 if (emit)
18446 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18447 }
18448
18449 if (count)
18450 *count = 2;
18451
18452 if (TARGET_LDRD)
18453 return "ldrd%?\t%0, [%1]";
18454
18455 return "ldmia%?\t%1, %M0";
18456 }
18457 else
18458 {
18459 otherops[1] = adjust_address (operands[1], SImode, 4);
18460 /* Take care of overlapping base/data reg. */
18461 if (reg_mentioned_p (operands[0], operands[1]))
18462 {
18463 if (emit)
18464 {
18465 output_asm_insn ("ldr%?\t%0, %1", otherops);
18466 output_asm_insn ("ldr%?\t%0, %1", operands);
18467 }
18468 if (count)
18469 *count = 2;
18470
18471 }
18472 else
18473 {
18474 if (emit)
18475 {
18476 output_asm_insn ("ldr%?\t%0, %1", operands);
18477 output_asm_insn ("ldr%?\t%0, %1", otherops);
18478 }
18479 if (count)
18480 *count = 2;
18481 }
18482 }
18483 }
18484 }
18485 else
18486 {
18487 /* Constraints should ensure this. */
18488 gcc_assert (code0 == MEM && code1 == REG);
18489 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18490 || (TARGET_ARM && TARGET_LDRD));
18491
18492 switch (GET_CODE (XEXP (operands[0], 0)))
18493 {
18494 case REG:
18495 if (emit)
18496 {
18497 if (TARGET_LDRD)
18498 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18499 else
18500 output_asm_insn ("stm%?\t%m0, %M1", operands);
18501 }
18502 break;
18503
18504 case PRE_INC:
18505 gcc_assert (TARGET_LDRD);
18506 if (emit)
18507 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18508 break;
18509
18510 case PRE_DEC:
18511 if (emit)
18512 {
18513 if (TARGET_LDRD)
18514 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18515 else
18516 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18517 }
18518 break;
18519
18520 case POST_INC:
18521 if (emit)
18522 {
18523 if (TARGET_LDRD)
18524 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18525 else
18526 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18527 }
18528 break;
18529
18530 case POST_DEC:
18531 gcc_assert (TARGET_LDRD);
18532 if (emit)
18533 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18534 break;
18535
18536 case PRE_MODIFY:
18537 case POST_MODIFY:
18538 otherops[0] = operands[1];
18539 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18540 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18541
18542 /* IWMMXT allows offsets larger than ldrd can handle,
18543 fix these up with a pair of ldr. */
18544 if (!TARGET_THUMB2
18545 && CONST_INT_P (otherops[2])
18546 && (INTVAL(otherops[2]) <= -256
18547 || INTVAL(otherops[2]) >= 256))
18548 {
18549 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18550 {
18551 if (emit)
18552 {
18553 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18554 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18555 }
18556 if (count)
18557 *count = 2;
18558 }
18559 else
18560 {
18561 if (emit)
18562 {
18563 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18564 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18565 }
18566 if (count)
18567 *count = 2;
18568 }
18569 }
18570 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18571 {
18572 if (emit)
18573 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18574 }
18575 else
18576 {
18577 if (emit)
18578 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18579 }
18580 break;
18581
18582 case PLUS:
18583 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18584 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18585 {
18586 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18587 {
18588 case -8:
18589 if (emit)
18590 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18591 return "";
18592
18593 case -4:
18594 if (TARGET_THUMB2)
18595 break;
18596 if (emit)
18597 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18598 return "";
18599
18600 case 4:
18601 if (TARGET_THUMB2)
18602 break;
18603 if (emit)
18604 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18605 return "";
18606 }
18607 }
18608 if (TARGET_LDRD
18609 && (REG_P (otherops[2])
18610 || TARGET_THUMB2
18611 || (CONST_INT_P (otherops[2])
18612 && INTVAL (otherops[2]) > -256
18613 && INTVAL (otherops[2]) < 256)))
18614 {
18615 otherops[0] = operands[1];
18616 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18617 if (emit)
18618 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18619 return "";
18620 }
18621 /* Fall through */
18622
18623 default:
18624 otherops[0] = adjust_address (operands[0], SImode, 4);
18625 otherops[1] = operands[1];
18626 if (emit)
18627 {
18628 output_asm_insn ("str%?\t%1, %0", operands);
18629 output_asm_insn ("str%?\t%H1, %0", otherops);
18630 }
18631 if (count)
18632 *count = 2;
18633 }
18634 }
18635
18636 return "";
18637 }
18638
18639 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18640 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18641
18642 const char *
18643 output_move_quad (rtx *operands)
18644 {
18645 if (REG_P (operands[0]))
18646 {
18647 /* Load, or reg->reg move. */
18648
18649 if (MEM_P (operands[1]))
18650 {
18651 switch (GET_CODE (XEXP (operands[1], 0)))
18652 {
18653 case REG:
18654 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18655 break;
18656
18657 case LABEL_REF:
18658 case CONST:
18659 output_asm_insn ("adr%?\t%0, %1", operands);
18660 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18661 break;
18662
18663 default:
18664 gcc_unreachable ();
18665 }
18666 }
18667 else
18668 {
18669 rtx ops[2];
18670 int dest, src, i;
18671
18672 gcc_assert (REG_P (operands[1]));
18673
18674 dest = REGNO (operands[0]);
18675 src = REGNO (operands[1]);
18676
18677 /* This seems pretty dumb, but hopefully GCC won't try to do it
18678 very often. */
18679 if (dest < src)
18680 for (i = 0; i < 4; i++)
18681 {
18682 ops[0] = gen_rtx_REG (SImode, dest + i);
18683 ops[1] = gen_rtx_REG (SImode, src + i);
18684 output_asm_insn ("mov%?\t%0, %1", ops);
18685 }
18686 else
18687 for (i = 3; i >= 0; i--)
18688 {
18689 ops[0] = gen_rtx_REG (SImode, dest + i);
18690 ops[1] = gen_rtx_REG (SImode, src + i);
18691 output_asm_insn ("mov%?\t%0, %1", ops);
18692 }
18693 }
18694 }
18695 else
18696 {
18697 gcc_assert (MEM_P (operands[0]));
18698 gcc_assert (REG_P (operands[1]));
18699 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18700
18701 switch (GET_CODE (XEXP (operands[0], 0)))
18702 {
18703 case REG:
18704 output_asm_insn ("stm%?\t%m0, %M1", operands);
18705 break;
18706
18707 default:
18708 gcc_unreachable ();
18709 }
18710 }
18711
18712 return "";
18713 }
18714
18715 /* Output a VFP load or store instruction. */
18716
18717 const char *
18718 output_move_vfp (rtx *operands)
18719 {
18720 rtx reg, mem, addr, ops[2];
18721 int load = REG_P (operands[0]);
18722 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18723 int sp = (!TARGET_VFP_FP16INST
18724 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18725 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18726 const char *templ;
18727 char buff[50];
18728 machine_mode mode;
18729
18730 reg = operands[!load];
18731 mem = operands[load];
18732
18733 mode = GET_MODE (reg);
18734
18735 gcc_assert (REG_P (reg));
18736 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18737 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18738 || mode == SFmode
18739 || mode == DFmode
18740 || mode == HImode
18741 || mode == SImode
18742 || mode == DImode
18743 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18744 gcc_assert (MEM_P (mem));
18745
18746 addr = XEXP (mem, 0);
18747
18748 switch (GET_CODE (addr))
18749 {
18750 case PRE_DEC:
18751 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18752 ops[0] = XEXP (addr, 0);
18753 ops[1] = reg;
18754 break;
18755
18756 case POST_INC:
18757 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18758 ops[0] = XEXP (addr, 0);
18759 ops[1] = reg;
18760 break;
18761
18762 default:
18763 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18764 ops[0] = reg;
18765 ops[1] = mem;
18766 break;
18767 }
18768
18769 sprintf (buff, templ,
18770 load ? "ld" : "st",
18771 dp ? "64" : sp ? "32" : "16",
18772 dp ? "P" : "",
18773 integer_p ? "\t%@ int" : "");
18774 output_asm_insn (buff, ops);
18775
18776 return "";
18777 }
18778
18779 /* Output a Neon double-word or quad-word load or store, or a load
18780 or store for larger structure modes.
18781
18782 WARNING: The ordering of elements is weird in big-endian mode,
18783 because the EABI requires that vectors stored in memory appear
18784 as though they were stored by a VSTM, as required by the EABI.
18785 GCC RTL defines element ordering based on in-memory order.
18786 This can be different from the architectural ordering of elements
18787 within a NEON register. The intrinsics defined in arm_neon.h use the
18788 NEON register element ordering, not the GCC RTL element ordering.
18789
18790 For example, the in-memory ordering of a big-endian a quadword
18791 vector with 16-bit elements when stored from register pair {d0,d1}
18792 will be (lowest address first, d0[N] is NEON register element N):
18793
18794 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18795
18796 When necessary, quadword registers (dN, dN+1) are moved to ARM
18797 registers from rN in the order:
18798
18799 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18800
18801 So that STM/LDM can be used on vectors in ARM registers, and the
18802 same memory layout will result as if VSTM/VLDM were used.
18803
18804 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18805 possible, which allows use of appropriate alignment tags.
18806 Note that the choice of "64" is independent of the actual vector
18807 element size; this size simply ensures that the behavior is
18808 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18809
18810 Due to limitations of those instructions, use of VST1.64/VLD1.64
18811 is not possible if:
18812 - the address contains PRE_DEC, or
18813 - the mode refers to more than 4 double-word registers
18814
18815 In those cases, it would be possible to replace VSTM/VLDM by a
18816 sequence of instructions; this is not currently implemented since
18817 this is not certain to actually improve performance. */
18818
18819 const char *
18820 output_move_neon (rtx *operands)
18821 {
18822 rtx reg, mem, addr, ops[2];
18823 int regno, nregs, load = REG_P (operands[0]);
18824 const char *templ;
18825 char buff[50];
18826 machine_mode mode;
18827
18828 reg = operands[!load];
18829 mem = operands[load];
18830
18831 mode = GET_MODE (reg);
18832
18833 gcc_assert (REG_P (reg));
18834 regno = REGNO (reg);
18835 nregs = REG_NREGS (reg) / 2;
18836 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18837 || NEON_REGNO_OK_FOR_QUAD (regno));
18838 gcc_assert (VALID_NEON_DREG_MODE (mode)
18839 || VALID_NEON_QREG_MODE (mode)
18840 || VALID_NEON_STRUCT_MODE (mode));
18841 gcc_assert (MEM_P (mem));
18842
18843 addr = XEXP (mem, 0);
18844
18845 /* Strip off const from addresses like (const (plus (...))). */
18846 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18847 addr = XEXP (addr, 0);
18848
18849 switch (GET_CODE (addr))
18850 {
18851 case POST_INC:
18852 /* We have to use vldm / vstm for too-large modes. */
18853 if (nregs > 4)
18854 {
18855 templ = "v%smia%%?\t%%0!, %%h1";
18856 ops[0] = XEXP (addr, 0);
18857 }
18858 else
18859 {
18860 templ = "v%s1.64\t%%h1, %%A0";
18861 ops[0] = mem;
18862 }
18863 ops[1] = reg;
18864 break;
18865
18866 case PRE_DEC:
18867 /* We have to use vldm / vstm in this case, since there is no
18868 pre-decrement form of the vld1 / vst1 instructions. */
18869 templ = "v%smdb%%?\t%%0!, %%h1";
18870 ops[0] = XEXP (addr, 0);
18871 ops[1] = reg;
18872 break;
18873
18874 case POST_MODIFY:
18875 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18876 gcc_unreachable ();
18877
18878 case REG:
18879 /* We have to use vldm / vstm for too-large modes. */
18880 if (nregs > 1)
18881 {
18882 if (nregs > 4)
18883 templ = "v%smia%%?\t%%m0, %%h1";
18884 else
18885 templ = "v%s1.64\t%%h1, %%A0";
18886
18887 ops[0] = mem;
18888 ops[1] = reg;
18889 break;
18890 }
18891 /* Fall through. */
18892 case LABEL_REF:
18893 case PLUS:
18894 {
18895 int i;
18896 int overlap = -1;
18897 for (i = 0; i < nregs; i++)
18898 {
18899 /* We're only using DImode here because it's a convenient size. */
18900 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18901 ops[1] = adjust_address (mem, DImode, 8 * i);
18902 if (reg_overlap_mentioned_p (ops[0], mem))
18903 {
18904 gcc_assert (overlap == -1);
18905 overlap = i;
18906 }
18907 else
18908 {
18909 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18910 output_asm_insn (buff, ops);
18911 }
18912 }
18913 if (overlap != -1)
18914 {
18915 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18916 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18917 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18918 output_asm_insn (buff, ops);
18919 }
18920
18921 return "";
18922 }
18923
18924 default:
18925 gcc_unreachable ();
18926 }
18927
18928 sprintf (buff, templ, load ? "ld" : "st");
18929 output_asm_insn (buff, ops);
18930
18931 return "";
18932 }
18933
18934 /* Compute and return the length of neon_mov<mode>, where <mode> is
18935 one of VSTRUCT modes: EI, OI, CI or XI. */
18936 int
18937 arm_attr_length_move_neon (rtx_insn *insn)
18938 {
18939 rtx reg, mem, addr;
18940 int load;
18941 machine_mode mode;
18942
18943 extract_insn_cached (insn);
18944
18945 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18946 {
18947 mode = GET_MODE (recog_data.operand[0]);
18948 switch (mode)
18949 {
18950 case E_EImode:
18951 case E_OImode:
18952 return 8;
18953 case E_CImode:
18954 return 12;
18955 case E_XImode:
18956 return 16;
18957 default:
18958 gcc_unreachable ();
18959 }
18960 }
18961
18962 load = REG_P (recog_data.operand[0]);
18963 reg = recog_data.operand[!load];
18964 mem = recog_data.operand[load];
18965
18966 gcc_assert (MEM_P (mem));
18967
18968 addr = XEXP (mem, 0);
18969
18970 /* Strip off const from addresses like (const (plus (...))). */
18971 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18972 addr = XEXP (addr, 0);
18973
18974 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18975 {
18976 int insns = REG_NREGS (reg) / 2;
18977 return insns * 4;
18978 }
18979 else
18980 return 4;
18981 }
18982
18983 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18984 return zero. */
18985
18986 int
18987 arm_address_offset_is_imm (rtx_insn *insn)
18988 {
18989 rtx mem, addr;
18990
18991 extract_insn_cached (insn);
18992
18993 if (REG_P (recog_data.operand[0]))
18994 return 0;
18995
18996 mem = recog_data.operand[0];
18997
18998 gcc_assert (MEM_P (mem));
18999
19000 addr = XEXP (mem, 0);
19001
19002 if (REG_P (addr)
19003 || (GET_CODE (addr) == PLUS
19004 && REG_P (XEXP (addr, 0))
19005 && CONST_INT_P (XEXP (addr, 1))))
19006 return 1;
19007 else
19008 return 0;
19009 }
19010
19011 /* Output an ADD r, s, #n where n may be too big for one instruction.
19012 If adding zero to one register, output nothing. */
19013 const char *
19014 output_add_immediate (rtx *operands)
19015 {
19016 HOST_WIDE_INT n = INTVAL (operands[2]);
19017
19018 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19019 {
19020 if (n < 0)
19021 output_multi_immediate (operands,
19022 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19023 -n);
19024 else
19025 output_multi_immediate (operands,
19026 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19027 n);
19028 }
19029
19030 return "";
19031 }
19032
19033 /* Output a multiple immediate operation.
19034 OPERANDS is the vector of operands referred to in the output patterns.
19035 INSTR1 is the output pattern to use for the first constant.
19036 INSTR2 is the output pattern to use for subsequent constants.
19037 IMMED_OP is the index of the constant slot in OPERANDS.
19038 N is the constant value. */
19039 static const char *
19040 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19041 int immed_op, HOST_WIDE_INT n)
19042 {
19043 #if HOST_BITS_PER_WIDE_INT > 32
19044 n &= 0xffffffff;
19045 #endif
19046
19047 if (n == 0)
19048 {
19049 /* Quick and easy output. */
19050 operands[immed_op] = const0_rtx;
19051 output_asm_insn (instr1, operands);
19052 }
19053 else
19054 {
19055 int i;
19056 const char * instr = instr1;
19057
19058 /* Note that n is never zero here (which would give no output). */
19059 for (i = 0; i < 32; i += 2)
19060 {
19061 if (n & (3 << i))
19062 {
19063 operands[immed_op] = GEN_INT (n & (255 << i));
19064 output_asm_insn (instr, operands);
19065 instr = instr2;
19066 i += 6;
19067 }
19068 }
19069 }
19070
19071 return "";
19072 }
19073
19074 /* Return the name of a shifter operation. */
19075 static const char *
19076 arm_shift_nmem(enum rtx_code code)
19077 {
19078 switch (code)
19079 {
19080 case ASHIFT:
19081 return ARM_LSL_NAME;
19082
19083 case ASHIFTRT:
19084 return "asr";
19085
19086 case LSHIFTRT:
19087 return "lsr";
19088
19089 case ROTATERT:
19090 return "ror";
19091
19092 default:
19093 abort();
19094 }
19095 }
19096
19097 /* Return the appropriate ARM instruction for the operation code.
19098 The returned result should not be overwritten. OP is the rtx of the
19099 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19100 was shifted. */
19101 const char *
19102 arithmetic_instr (rtx op, int shift_first_arg)
19103 {
19104 switch (GET_CODE (op))
19105 {
19106 case PLUS:
19107 return "add";
19108
19109 case MINUS:
19110 return shift_first_arg ? "rsb" : "sub";
19111
19112 case IOR:
19113 return "orr";
19114
19115 case XOR:
19116 return "eor";
19117
19118 case AND:
19119 return "and";
19120
19121 case ASHIFT:
19122 case ASHIFTRT:
19123 case LSHIFTRT:
19124 case ROTATERT:
19125 return arm_shift_nmem(GET_CODE(op));
19126
19127 default:
19128 gcc_unreachable ();
19129 }
19130 }
19131
19132 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19133 for the operation code. The returned result should not be overwritten.
19134 OP is the rtx code of the shift.
19135 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19136 shift. */
19137 static const char *
19138 shift_op (rtx op, HOST_WIDE_INT *amountp)
19139 {
19140 const char * mnem;
19141 enum rtx_code code = GET_CODE (op);
19142
19143 switch (code)
19144 {
19145 case ROTATE:
19146 if (!CONST_INT_P (XEXP (op, 1)))
19147 {
19148 output_operand_lossage ("invalid shift operand");
19149 return NULL;
19150 }
19151
19152 code = ROTATERT;
19153 *amountp = 32 - INTVAL (XEXP (op, 1));
19154 mnem = "ror";
19155 break;
19156
19157 case ASHIFT:
19158 case ASHIFTRT:
19159 case LSHIFTRT:
19160 case ROTATERT:
19161 mnem = arm_shift_nmem(code);
19162 if (CONST_INT_P (XEXP (op, 1)))
19163 {
19164 *amountp = INTVAL (XEXP (op, 1));
19165 }
19166 else if (REG_P (XEXP (op, 1)))
19167 {
19168 *amountp = -1;
19169 return mnem;
19170 }
19171 else
19172 {
19173 output_operand_lossage ("invalid shift operand");
19174 return NULL;
19175 }
19176 break;
19177
19178 case MULT:
19179 /* We never have to worry about the amount being other than a
19180 power of 2, since this case can never be reloaded from a reg. */
19181 if (!CONST_INT_P (XEXP (op, 1)))
19182 {
19183 output_operand_lossage ("invalid shift operand");
19184 return NULL;
19185 }
19186
19187 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19188
19189 /* Amount must be a power of two. */
19190 if (*amountp & (*amountp - 1))
19191 {
19192 output_operand_lossage ("invalid shift operand");
19193 return NULL;
19194 }
19195
19196 *amountp = exact_log2 (*amountp);
19197 gcc_assert (IN_RANGE (*amountp, 0, 31));
19198 return ARM_LSL_NAME;
19199
19200 default:
19201 output_operand_lossage ("invalid shift operand");
19202 return NULL;
19203 }
19204
19205 /* This is not 100% correct, but follows from the desire to merge
19206 multiplication by a power of 2 with the recognizer for a
19207 shift. >=32 is not a valid shift for "lsl", so we must try and
19208 output a shift that produces the correct arithmetical result.
19209 Using lsr #32 is identical except for the fact that the carry bit
19210 is not set correctly if we set the flags; but we never use the
19211 carry bit from such an operation, so we can ignore that. */
19212 if (code == ROTATERT)
19213 /* Rotate is just modulo 32. */
19214 *amountp &= 31;
19215 else if (*amountp != (*amountp & 31))
19216 {
19217 if (code == ASHIFT)
19218 mnem = "lsr";
19219 *amountp = 32;
19220 }
19221
19222 /* Shifts of 0 are no-ops. */
19223 if (*amountp == 0)
19224 return NULL;
19225
19226 return mnem;
19227 }
19228
19229 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19230 because /bin/as is horribly restrictive. The judgement about
19231 whether or not each character is 'printable' (and can be output as
19232 is) or not (and must be printed with an octal escape) must be made
19233 with reference to the *host* character set -- the situation is
19234 similar to that discussed in the comments above pp_c_char in
19235 c-pretty-print.c. */
19236
19237 #define MAX_ASCII_LEN 51
19238
19239 void
19240 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19241 {
19242 int i;
19243 int len_so_far = 0;
19244
19245 fputs ("\t.ascii\t\"", stream);
19246
19247 for (i = 0; i < len; i++)
19248 {
19249 int c = p[i];
19250
19251 if (len_so_far >= MAX_ASCII_LEN)
19252 {
19253 fputs ("\"\n\t.ascii\t\"", stream);
19254 len_so_far = 0;
19255 }
19256
19257 if (ISPRINT (c))
19258 {
19259 if (c == '\\' || c == '\"')
19260 {
19261 putc ('\\', stream);
19262 len_so_far++;
19263 }
19264 putc (c, stream);
19265 len_so_far++;
19266 }
19267 else
19268 {
19269 fprintf (stream, "\\%03o", c);
19270 len_so_far += 4;
19271 }
19272 }
19273
19274 fputs ("\"\n", stream);
19275 }
19276 \f
19277 /* Whether a register is callee saved or not. This is necessary because high
19278 registers are marked as caller saved when optimizing for size on Thumb-1
19279 targets despite being callee saved in order to avoid using them. */
19280 #define callee_saved_reg_p(reg) \
19281 (!call_used_regs[reg] \
19282 || (TARGET_THUMB1 && optimize_size \
19283 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19284
19285 /* Compute the register save mask for registers 0 through 12
19286 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19287
19288 static unsigned long
19289 arm_compute_save_reg0_reg12_mask (void)
19290 {
19291 unsigned long func_type = arm_current_func_type ();
19292 unsigned long save_reg_mask = 0;
19293 unsigned int reg;
19294
19295 if (IS_INTERRUPT (func_type))
19296 {
19297 unsigned int max_reg;
19298 /* Interrupt functions must not corrupt any registers,
19299 even call clobbered ones. If this is a leaf function
19300 we can just examine the registers used by the RTL, but
19301 otherwise we have to assume that whatever function is
19302 called might clobber anything, and so we have to save
19303 all the call-clobbered registers as well. */
19304 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19305 /* FIQ handlers have registers r8 - r12 banked, so
19306 we only need to check r0 - r7, Normal ISRs only
19307 bank r14 and r15, so we must check up to r12.
19308 r13 is the stack pointer which is always preserved,
19309 so we do not need to consider it here. */
19310 max_reg = 7;
19311 else
19312 max_reg = 12;
19313
19314 for (reg = 0; reg <= max_reg; reg++)
19315 if (df_regs_ever_live_p (reg)
19316 || (! crtl->is_leaf && call_used_regs[reg]))
19317 save_reg_mask |= (1 << reg);
19318
19319 /* Also save the pic base register if necessary. */
19320 if (flag_pic
19321 && !TARGET_SINGLE_PIC_BASE
19322 && arm_pic_register != INVALID_REGNUM
19323 && crtl->uses_pic_offset_table)
19324 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19325 }
19326 else if (IS_VOLATILE(func_type))
19327 {
19328 /* For noreturn functions we historically omitted register saves
19329 altogether. However this really messes up debugging. As a
19330 compromise save just the frame pointers. Combined with the link
19331 register saved elsewhere this should be sufficient to get
19332 a backtrace. */
19333 if (frame_pointer_needed)
19334 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19335 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19336 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19337 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19338 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19339 }
19340 else
19341 {
19342 /* In the normal case we only need to save those registers
19343 which are call saved and which are used by this function. */
19344 for (reg = 0; reg <= 11; reg++)
19345 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19346 save_reg_mask |= (1 << reg);
19347
19348 /* Handle the frame pointer as a special case. */
19349 if (frame_pointer_needed)
19350 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19351
19352 /* If we aren't loading the PIC register,
19353 don't stack it even though it may be live. */
19354 if (flag_pic
19355 && !TARGET_SINGLE_PIC_BASE
19356 && arm_pic_register != INVALID_REGNUM
19357 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19358 || crtl->uses_pic_offset_table))
19359 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19360
19361 /* The prologue will copy SP into R0, so save it. */
19362 if (IS_STACKALIGN (func_type))
19363 save_reg_mask |= 1;
19364 }
19365
19366 /* Save registers so the exception handler can modify them. */
19367 if (crtl->calls_eh_return)
19368 {
19369 unsigned int i;
19370
19371 for (i = 0; ; i++)
19372 {
19373 reg = EH_RETURN_DATA_REGNO (i);
19374 if (reg == INVALID_REGNUM)
19375 break;
19376 save_reg_mask |= 1 << reg;
19377 }
19378 }
19379
19380 return save_reg_mask;
19381 }
19382
19383 /* Return true if r3 is live at the start of the function. */
19384
19385 static bool
19386 arm_r3_live_at_start_p (void)
19387 {
19388 /* Just look at cfg info, which is still close enough to correct at this
19389 point. This gives false positives for broken functions that might use
19390 uninitialized data that happens to be allocated in r3, but who cares? */
19391 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19392 }
19393
19394 /* Compute the number of bytes used to store the static chain register on the
19395 stack, above the stack frame. We need to know this accurately to get the
19396 alignment of the rest of the stack frame correct. */
19397
19398 static int
19399 arm_compute_static_chain_stack_bytes (void)
19400 {
19401 /* See the defining assertion in arm_expand_prologue. */
19402 if (IS_NESTED (arm_current_func_type ())
19403 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19404 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19405 || flag_stack_clash_protection)
19406 && !df_regs_ever_live_p (LR_REGNUM)))
19407 && arm_r3_live_at_start_p ()
19408 && crtl->args.pretend_args_size == 0)
19409 return 4;
19410
19411 return 0;
19412 }
19413
19414 /* Compute a bit mask of which core registers need to be
19415 saved on the stack for the current function.
19416 This is used by arm_compute_frame_layout, which may add extra registers. */
19417
19418 static unsigned long
19419 arm_compute_save_core_reg_mask (void)
19420 {
19421 unsigned int save_reg_mask = 0;
19422 unsigned long func_type = arm_current_func_type ();
19423 unsigned int reg;
19424
19425 if (IS_NAKED (func_type))
19426 /* This should never really happen. */
19427 return 0;
19428
19429 /* If we are creating a stack frame, then we must save the frame pointer,
19430 IP (which will hold the old stack pointer), LR and the PC. */
19431 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19432 save_reg_mask |=
19433 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19434 | (1 << IP_REGNUM)
19435 | (1 << LR_REGNUM)
19436 | (1 << PC_REGNUM);
19437
19438 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19439
19440 /* Decide if we need to save the link register.
19441 Interrupt routines have their own banked link register,
19442 so they never need to save it.
19443 Otherwise if we do not use the link register we do not need to save
19444 it. If we are pushing other registers onto the stack however, we
19445 can save an instruction in the epilogue by pushing the link register
19446 now and then popping it back into the PC. This incurs extra memory
19447 accesses though, so we only do it when optimizing for size, and only
19448 if we know that we will not need a fancy return sequence. */
19449 if (df_regs_ever_live_p (LR_REGNUM)
19450 || (save_reg_mask
19451 && optimize_size
19452 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19453 && !crtl->tail_call_emit
19454 && !crtl->calls_eh_return))
19455 save_reg_mask |= 1 << LR_REGNUM;
19456
19457 if (cfun->machine->lr_save_eliminated)
19458 save_reg_mask &= ~ (1 << LR_REGNUM);
19459
19460 if (TARGET_REALLY_IWMMXT
19461 && ((bit_count (save_reg_mask)
19462 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19463 arm_compute_static_chain_stack_bytes())
19464 ) % 2) != 0)
19465 {
19466 /* The total number of registers that are going to be pushed
19467 onto the stack is odd. We need to ensure that the stack
19468 is 64-bit aligned before we start to save iWMMXt registers,
19469 and also before we start to create locals. (A local variable
19470 might be a double or long long which we will load/store using
19471 an iWMMXt instruction). Therefore we need to push another
19472 ARM register, so that the stack will be 64-bit aligned. We
19473 try to avoid using the arg registers (r0 -r3) as they might be
19474 used to pass values in a tail call. */
19475 for (reg = 4; reg <= 12; reg++)
19476 if ((save_reg_mask & (1 << reg)) == 0)
19477 break;
19478
19479 if (reg <= 12)
19480 save_reg_mask |= (1 << reg);
19481 else
19482 {
19483 cfun->machine->sibcall_blocked = 1;
19484 save_reg_mask |= (1 << 3);
19485 }
19486 }
19487
19488 /* We may need to push an additional register for use initializing the
19489 PIC base register. */
19490 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19491 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19492 {
19493 reg = thumb_find_work_register (1 << 4);
19494 if (!call_used_regs[reg])
19495 save_reg_mask |= (1 << reg);
19496 }
19497
19498 return save_reg_mask;
19499 }
19500
19501 /* Compute a bit mask of which core registers need to be
19502 saved on the stack for the current function. */
19503 static unsigned long
19504 thumb1_compute_save_core_reg_mask (void)
19505 {
19506 unsigned long mask;
19507 unsigned reg;
19508
19509 mask = 0;
19510 for (reg = 0; reg < 12; reg ++)
19511 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19512 mask |= 1 << reg;
19513
19514 /* Handle the frame pointer as a special case. */
19515 if (frame_pointer_needed)
19516 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19517
19518 if (flag_pic
19519 && !TARGET_SINGLE_PIC_BASE
19520 && arm_pic_register != INVALID_REGNUM
19521 && crtl->uses_pic_offset_table)
19522 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19523
19524 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19525 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19526 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19527
19528 /* LR will also be pushed if any lo regs are pushed. */
19529 if (mask & 0xff || thumb_force_lr_save ())
19530 mask |= (1 << LR_REGNUM);
19531
19532 /* Make sure we have a low work register if we need one.
19533 We will need one if we are going to push a high register,
19534 but we are not currently intending to push a low register. */
19535 if ((mask & 0xff) == 0
19536 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19537 {
19538 /* Use thumb_find_work_register to choose which register
19539 we will use. If the register is live then we will
19540 have to push it. Use LAST_LO_REGNUM as our fallback
19541 choice for the register to select. */
19542 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19543 /* Make sure the register returned by thumb_find_work_register is
19544 not part of the return value. */
19545 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19546 reg = LAST_LO_REGNUM;
19547
19548 if (callee_saved_reg_p (reg))
19549 mask |= 1 << reg;
19550 }
19551
19552 /* The 504 below is 8 bytes less than 512 because there are two possible
19553 alignment words. We can't tell here if they will be present or not so we
19554 have to play it safe and assume that they are. */
19555 if ((CALLER_INTERWORKING_SLOT_SIZE +
19556 ROUND_UP_WORD (get_frame_size ()) +
19557 crtl->outgoing_args_size) >= 504)
19558 {
19559 /* This is the same as the code in thumb1_expand_prologue() which
19560 determines which register to use for stack decrement. */
19561 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19562 if (mask & (1 << reg))
19563 break;
19564
19565 if (reg > LAST_LO_REGNUM)
19566 {
19567 /* Make sure we have a register available for stack decrement. */
19568 mask |= 1 << LAST_LO_REGNUM;
19569 }
19570 }
19571
19572 return mask;
19573 }
19574
19575
19576 /* Return the number of bytes required to save VFP registers. */
19577 static int
19578 arm_get_vfp_saved_size (void)
19579 {
19580 unsigned int regno;
19581 int count;
19582 int saved;
19583
19584 saved = 0;
19585 /* Space for saved VFP registers. */
19586 if (TARGET_HARD_FLOAT)
19587 {
19588 count = 0;
19589 for (regno = FIRST_VFP_REGNUM;
19590 regno < LAST_VFP_REGNUM;
19591 regno += 2)
19592 {
19593 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19594 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19595 {
19596 if (count > 0)
19597 {
19598 /* Workaround ARM10 VFPr1 bug. */
19599 if (count == 2 && !arm_arch6)
19600 count++;
19601 saved += count * 8;
19602 }
19603 count = 0;
19604 }
19605 else
19606 count++;
19607 }
19608 if (count > 0)
19609 {
19610 if (count == 2 && !arm_arch6)
19611 count++;
19612 saved += count * 8;
19613 }
19614 }
19615 return saved;
19616 }
19617
19618
19619 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19620 everything bar the final return instruction. If simple_return is true,
19621 then do not output epilogue, because it has already been emitted in RTL.
19622
19623 Note: do not forget to update length attribute of corresponding insn pattern
19624 when changing assembly output (eg. length attribute of
19625 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19626 register clearing sequences). */
19627 const char *
19628 output_return_instruction (rtx operand, bool really_return, bool reverse,
19629 bool simple_return)
19630 {
19631 char conditional[10];
19632 char instr[100];
19633 unsigned reg;
19634 unsigned long live_regs_mask;
19635 unsigned long func_type;
19636 arm_stack_offsets *offsets;
19637
19638 func_type = arm_current_func_type ();
19639
19640 if (IS_NAKED (func_type))
19641 return "";
19642
19643 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19644 {
19645 /* If this function was declared non-returning, and we have
19646 found a tail call, then we have to trust that the called
19647 function won't return. */
19648 if (really_return)
19649 {
19650 rtx ops[2];
19651
19652 /* Otherwise, trap an attempted return by aborting. */
19653 ops[0] = operand;
19654 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19655 : "abort");
19656 assemble_external_libcall (ops[1]);
19657 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19658 }
19659
19660 return "";
19661 }
19662
19663 gcc_assert (!cfun->calls_alloca || really_return);
19664
19665 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19666
19667 cfun->machine->return_used_this_function = 1;
19668
19669 offsets = arm_get_frame_offsets ();
19670 live_regs_mask = offsets->saved_regs_mask;
19671
19672 if (!simple_return && live_regs_mask)
19673 {
19674 const char * return_reg;
19675
19676 /* If we do not have any special requirements for function exit
19677 (e.g. interworking) then we can load the return address
19678 directly into the PC. Otherwise we must load it into LR. */
19679 if (really_return
19680 && !IS_CMSE_ENTRY (func_type)
19681 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19682 return_reg = reg_names[PC_REGNUM];
19683 else
19684 return_reg = reg_names[LR_REGNUM];
19685
19686 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19687 {
19688 /* There are three possible reasons for the IP register
19689 being saved. 1) a stack frame was created, in which case
19690 IP contains the old stack pointer, or 2) an ISR routine
19691 corrupted it, or 3) it was saved to align the stack on
19692 iWMMXt. In case 1, restore IP into SP, otherwise just
19693 restore IP. */
19694 if (frame_pointer_needed)
19695 {
19696 live_regs_mask &= ~ (1 << IP_REGNUM);
19697 live_regs_mask |= (1 << SP_REGNUM);
19698 }
19699 else
19700 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19701 }
19702
19703 /* On some ARM architectures it is faster to use LDR rather than
19704 LDM to load a single register. On other architectures, the
19705 cost is the same. In 26 bit mode, or for exception handlers,
19706 we have to use LDM to load the PC so that the CPSR is also
19707 restored. */
19708 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19709 if (live_regs_mask == (1U << reg))
19710 break;
19711
19712 if (reg <= LAST_ARM_REGNUM
19713 && (reg != LR_REGNUM
19714 || ! really_return
19715 || ! IS_INTERRUPT (func_type)))
19716 {
19717 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19718 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19719 }
19720 else
19721 {
19722 char *p;
19723 int first = 1;
19724
19725 /* Generate the load multiple instruction to restore the
19726 registers. Note we can get here, even if
19727 frame_pointer_needed is true, but only if sp already
19728 points to the base of the saved core registers. */
19729 if (live_regs_mask & (1 << SP_REGNUM))
19730 {
19731 unsigned HOST_WIDE_INT stack_adjust;
19732
19733 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19734 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19735
19736 if (stack_adjust && arm_arch5 && TARGET_ARM)
19737 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19738 else
19739 {
19740 /* If we can't use ldmib (SA110 bug),
19741 then try to pop r3 instead. */
19742 if (stack_adjust)
19743 live_regs_mask |= 1 << 3;
19744
19745 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19746 }
19747 }
19748 /* For interrupt returns we have to use an LDM rather than
19749 a POP so that we can use the exception return variant. */
19750 else if (IS_INTERRUPT (func_type))
19751 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19752 else
19753 sprintf (instr, "pop%s\t{", conditional);
19754
19755 p = instr + strlen (instr);
19756
19757 for (reg = 0; reg <= SP_REGNUM; reg++)
19758 if (live_regs_mask & (1 << reg))
19759 {
19760 int l = strlen (reg_names[reg]);
19761
19762 if (first)
19763 first = 0;
19764 else
19765 {
19766 memcpy (p, ", ", 2);
19767 p += 2;
19768 }
19769
19770 memcpy (p, "%|", 2);
19771 memcpy (p + 2, reg_names[reg], l);
19772 p += l + 2;
19773 }
19774
19775 if (live_regs_mask & (1 << LR_REGNUM))
19776 {
19777 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19778 /* If returning from an interrupt, restore the CPSR. */
19779 if (IS_INTERRUPT (func_type))
19780 strcat (p, "^");
19781 }
19782 else
19783 strcpy (p, "}");
19784 }
19785
19786 output_asm_insn (instr, & operand);
19787
19788 /* See if we need to generate an extra instruction to
19789 perform the actual function return. */
19790 if (really_return
19791 && func_type != ARM_FT_INTERWORKED
19792 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19793 {
19794 /* The return has already been handled
19795 by loading the LR into the PC. */
19796 return "";
19797 }
19798 }
19799
19800 if (really_return)
19801 {
19802 switch ((int) ARM_FUNC_TYPE (func_type))
19803 {
19804 case ARM_FT_ISR:
19805 case ARM_FT_FIQ:
19806 /* ??? This is wrong for unified assembly syntax. */
19807 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19808 break;
19809
19810 case ARM_FT_INTERWORKED:
19811 gcc_assert (arm_arch5 || arm_arch4t);
19812 sprintf (instr, "bx%s\t%%|lr", conditional);
19813 break;
19814
19815 case ARM_FT_EXCEPTION:
19816 /* ??? This is wrong for unified assembly syntax. */
19817 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19818 break;
19819
19820 default:
19821 if (IS_CMSE_ENTRY (func_type))
19822 {
19823 /* Check if we have to clear the 'GE bits' which is only used if
19824 parallel add and subtraction instructions are available. */
19825 if (TARGET_INT_SIMD)
19826 snprintf (instr, sizeof (instr),
19827 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19828 else
19829 snprintf (instr, sizeof (instr),
19830 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19831
19832 output_asm_insn (instr, & operand);
19833 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19834 {
19835 /* Clear the cumulative exception-status bits (0-4,7) and the
19836 condition code bits (28-31) of the FPSCR. We need to
19837 remember to clear the first scratch register used (IP) and
19838 save and restore the second (r4). */
19839 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19840 output_asm_insn (instr, & operand);
19841 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19842 output_asm_insn (instr, & operand);
19843 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19844 output_asm_insn (instr, & operand);
19845 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19846 output_asm_insn (instr, & operand);
19847 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19848 output_asm_insn (instr, & operand);
19849 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19850 output_asm_insn (instr, & operand);
19851 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19852 output_asm_insn (instr, & operand);
19853 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19854 output_asm_insn (instr, & operand);
19855 }
19856 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19857 }
19858 /* Use bx if it's available. */
19859 else if (arm_arch5 || arm_arch4t)
19860 sprintf (instr, "bx%s\t%%|lr", conditional);
19861 else
19862 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19863 break;
19864 }
19865
19866 output_asm_insn (instr, & operand);
19867 }
19868
19869 return "";
19870 }
19871
19872 /* Output in FILE asm statements needed to declare the NAME of the function
19873 defined by its DECL node. */
19874
19875 void
19876 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19877 {
19878 size_t cmse_name_len;
19879 char *cmse_name = 0;
19880 char cmse_prefix[] = "__acle_se_";
19881
19882 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19883 extra function label for each function with the 'cmse_nonsecure_entry'
19884 attribute. This extra function label should be prepended with
19885 '__acle_se_', telling the linker that it needs to create secure gateway
19886 veneers for this function. */
19887 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19888 DECL_ATTRIBUTES (decl)))
19889 {
19890 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19891 cmse_name = XALLOCAVEC (char, cmse_name_len);
19892 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19893 targetm.asm_out.globalize_label (file, cmse_name);
19894
19895 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19896 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19897 }
19898
19899 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19900 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19901 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19902 ASM_OUTPUT_LABEL (file, name);
19903
19904 if (cmse_name)
19905 ASM_OUTPUT_LABEL (file, cmse_name);
19906
19907 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19908 }
19909
19910 /* Write the function name into the code section, directly preceding
19911 the function prologue.
19912
19913 Code will be output similar to this:
19914 t0
19915 .ascii "arm_poke_function_name", 0
19916 .align
19917 t1
19918 .word 0xff000000 + (t1 - t0)
19919 arm_poke_function_name
19920 mov ip, sp
19921 stmfd sp!, {fp, ip, lr, pc}
19922 sub fp, ip, #4
19923
19924 When performing a stack backtrace, code can inspect the value
19925 of 'pc' stored at 'fp' + 0. If the trace function then looks
19926 at location pc - 12 and the top 8 bits are set, then we know
19927 that there is a function name embedded immediately preceding this
19928 location and has length ((pc[-3]) & 0xff000000).
19929
19930 We assume that pc is declared as a pointer to an unsigned long.
19931
19932 It is of no benefit to output the function name if we are assembling
19933 a leaf function. These function types will not contain a stack
19934 backtrace structure, therefore it is not possible to determine the
19935 function name. */
19936 void
19937 arm_poke_function_name (FILE *stream, const char *name)
19938 {
19939 unsigned long alignlength;
19940 unsigned long length;
19941 rtx x;
19942
19943 length = strlen (name) + 1;
19944 alignlength = ROUND_UP_WORD (length);
19945
19946 ASM_OUTPUT_ASCII (stream, name, length);
19947 ASM_OUTPUT_ALIGN (stream, 2);
19948 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19949 assemble_aligned_integer (UNITS_PER_WORD, x);
19950 }
19951
19952 /* Place some comments into the assembler stream
19953 describing the current function. */
19954 static void
19955 arm_output_function_prologue (FILE *f)
19956 {
19957 unsigned long func_type;
19958
19959 /* Sanity check. */
19960 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19961
19962 func_type = arm_current_func_type ();
19963
19964 switch ((int) ARM_FUNC_TYPE (func_type))
19965 {
19966 default:
19967 case ARM_FT_NORMAL:
19968 break;
19969 case ARM_FT_INTERWORKED:
19970 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19971 break;
19972 case ARM_FT_ISR:
19973 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19974 break;
19975 case ARM_FT_FIQ:
19976 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19977 break;
19978 case ARM_FT_EXCEPTION:
19979 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19980 break;
19981 }
19982
19983 if (IS_NAKED (func_type))
19984 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19985
19986 if (IS_VOLATILE (func_type))
19987 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19988
19989 if (IS_NESTED (func_type))
19990 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19991 if (IS_STACKALIGN (func_type))
19992 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19993 if (IS_CMSE_ENTRY (func_type))
19994 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19995
19996 asm_fprintf (f, "\t%@ args = %d, pretend = %d, frame = %wd\n",
19997 crtl->args.size,
19998 crtl->args.pretend_args_size,
19999 (HOST_WIDE_INT) get_frame_size ());
20000
20001 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20002 frame_pointer_needed,
20003 cfun->machine->uses_anonymous_args);
20004
20005 if (cfun->machine->lr_save_eliminated)
20006 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20007
20008 if (crtl->calls_eh_return)
20009 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20010
20011 }
20012
20013 static void
20014 arm_output_function_epilogue (FILE *)
20015 {
20016 arm_stack_offsets *offsets;
20017
20018 if (TARGET_THUMB1)
20019 {
20020 int regno;
20021
20022 /* Emit any call-via-reg trampolines that are needed for v4t support
20023 of call_reg and call_value_reg type insns. */
20024 for (regno = 0; regno < LR_REGNUM; regno++)
20025 {
20026 rtx label = cfun->machine->call_via[regno];
20027
20028 if (label != NULL)
20029 {
20030 switch_to_section (function_section (current_function_decl));
20031 targetm.asm_out.internal_label (asm_out_file, "L",
20032 CODE_LABEL_NUMBER (label));
20033 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20034 }
20035 }
20036
20037 /* ??? Probably not safe to set this here, since it assumes that a
20038 function will be emitted as assembly immediately after we generate
20039 RTL for it. This does not happen for inline functions. */
20040 cfun->machine->return_used_this_function = 0;
20041 }
20042 else /* TARGET_32BIT */
20043 {
20044 /* We need to take into account any stack-frame rounding. */
20045 offsets = arm_get_frame_offsets ();
20046
20047 gcc_assert (!use_return_insn (FALSE, NULL)
20048 || (cfun->machine->return_used_this_function != 0)
20049 || offsets->saved_regs == offsets->outgoing_args
20050 || frame_pointer_needed);
20051 }
20052 }
20053
20054 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20055 STR and STRD. If an even number of registers are being pushed, one
20056 or more STRD patterns are created for each register pair. If an
20057 odd number of registers are pushed, emit an initial STR followed by
20058 as many STRD instructions as are needed. This works best when the
20059 stack is initially 64-bit aligned (the normal case), since it
20060 ensures that each STRD is also 64-bit aligned. */
20061 static void
20062 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20063 {
20064 int num_regs = 0;
20065 int i;
20066 int regno;
20067 rtx par = NULL_RTX;
20068 rtx dwarf = NULL_RTX;
20069 rtx tmp;
20070 bool first = true;
20071
20072 num_regs = bit_count (saved_regs_mask);
20073
20074 /* Must be at least one register to save, and can't save SP or PC. */
20075 gcc_assert (num_regs > 0 && num_regs <= 14);
20076 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20077 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20078
20079 /* Create sequence for DWARF info. All the frame-related data for
20080 debugging is held in this wrapper. */
20081 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20082
20083 /* Describe the stack adjustment. */
20084 tmp = gen_rtx_SET (stack_pointer_rtx,
20085 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20086 RTX_FRAME_RELATED_P (tmp) = 1;
20087 XVECEXP (dwarf, 0, 0) = tmp;
20088
20089 /* Find the first register. */
20090 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20091 ;
20092
20093 i = 0;
20094
20095 /* If there's an odd number of registers to push. Start off by
20096 pushing a single register. This ensures that subsequent strd
20097 operations are dword aligned (assuming that SP was originally
20098 64-bit aligned). */
20099 if ((num_regs & 1) != 0)
20100 {
20101 rtx reg, mem, insn;
20102
20103 reg = gen_rtx_REG (SImode, regno);
20104 if (num_regs == 1)
20105 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20106 stack_pointer_rtx));
20107 else
20108 mem = gen_frame_mem (Pmode,
20109 gen_rtx_PRE_MODIFY
20110 (Pmode, stack_pointer_rtx,
20111 plus_constant (Pmode, stack_pointer_rtx,
20112 -4 * num_regs)));
20113
20114 tmp = gen_rtx_SET (mem, reg);
20115 RTX_FRAME_RELATED_P (tmp) = 1;
20116 insn = emit_insn (tmp);
20117 RTX_FRAME_RELATED_P (insn) = 1;
20118 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20119 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20120 RTX_FRAME_RELATED_P (tmp) = 1;
20121 i++;
20122 regno++;
20123 XVECEXP (dwarf, 0, i) = tmp;
20124 first = false;
20125 }
20126
20127 while (i < num_regs)
20128 if (saved_regs_mask & (1 << regno))
20129 {
20130 rtx reg1, reg2, mem1, mem2;
20131 rtx tmp0, tmp1, tmp2;
20132 int regno2;
20133
20134 /* Find the register to pair with this one. */
20135 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20136 regno2++)
20137 ;
20138
20139 reg1 = gen_rtx_REG (SImode, regno);
20140 reg2 = gen_rtx_REG (SImode, regno2);
20141
20142 if (first)
20143 {
20144 rtx insn;
20145
20146 first = false;
20147 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20148 stack_pointer_rtx,
20149 -4 * num_regs));
20150 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20151 stack_pointer_rtx,
20152 -4 * (num_regs - 1)));
20153 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20154 plus_constant (Pmode, stack_pointer_rtx,
20155 -4 * (num_regs)));
20156 tmp1 = gen_rtx_SET (mem1, reg1);
20157 tmp2 = gen_rtx_SET (mem2, reg2);
20158 RTX_FRAME_RELATED_P (tmp0) = 1;
20159 RTX_FRAME_RELATED_P (tmp1) = 1;
20160 RTX_FRAME_RELATED_P (tmp2) = 1;
20161 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20162 XVECEXP (par, 0, 0) = tmp0;
20163 XVECEXP (par, 0, 1) = tmp1;
20164 XVECEXP (par, 0, 2) = tmp2;
20165 insn = emit_insn (par);
20166 RTX_FRAME_RELATED_P (insn) = 1;
20167 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20168 }
20169 else
20170 {
20171 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20172 stack_pointer_rtx,
20173 4 * i));
20174 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20175 stack_pointer_rtx,
20176 4 * (i + 1)));
20177 tmp1 = gen_rtx_SET (mem1, reg1);
20178 tmp2 = gen_rtx_SET (mem2, reg2);
20179 RTX_FRAME_RELATED_P (tmp1) = 1;
20180 RTX_FRAME_RELATED_P (tmp2) = 1;
20181 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20182 XVECEXP (par, 0, 0) = tmp1;
20183 XVECEXP (par, 0, 1) = tmp2;
20184 emit_insn (par);
20185 }
20186
20187 /* Create unwind information. This is an approximation. */
20188 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20189 plus_constant (Pmode,
20190 stack_pointer_rtx,
20191 4 * i)),
20192 reg1);
20193 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20194 plus_constant (Pmode,
20195 stack_pointer_rtx,
20196 4 * (i + 1))),
20197 reg2);
20198
20199 RTX_FRAME_RELATED_P (tmp1) = 1;
20200 RTX_FRAME_RELATED_P (tmp2) = 1;
20201 XVECEXP (dwarf, 0, i + 1) = tmp1;
20202 XVECEXP (dwarf, 0, i + 2) = tmp2;
20203 i += 2;
20204 regno = regno2 + 1;
20205 }
20206 else
20207 regno++;
20208
20209 return;
20210 }
20211
20212 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20213 whenever possible, otherwise it emits single-word stores. The first store
20214 also allocates stack space for all saved registers, using writeback with
20215 post-addressing mode. All other stores use offset addressing. If no STRD
20216 can be emitted, this function emits a sequence of single-word stores,
20217 and not an STM as before, because single-word stores provide more freedom
20218 scheduling and can be turned into an STM by peephole optimizations. */
20219 static void
20220 arm_emit_strd_push (unsigned long saved_regs_mask)
20221 {
20222 int num_regs = 0;
20223 int i, j, dwarf_index = 0;
20224 int offset = 0;
20225 rtx dwarf = NULL_RTX;
20226 rtx insn = NULL_RTX;
20227 rtx tmp, mem;
20228
20229 /* TODO: A more efficient code can be emitted by changing the
20230 layout, e.g., first push all pairs that can use STRD to keep the
20231 stack aligned, and then push all other registers. */
20232 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20233 if (saved_regs_mask & (1 << i))
20234 num_regs++;
20235
20236 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20237 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20238 gcc_assert (num_regs > 0);
20239
20240 /* Create sequence for DWARF info. */
20241 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20242
20243 /* For dwarf info, we generate explicit stack update. */
20244 tmp = gen_rtx_SET (stack_pointer_rtx,
20245 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20246 RTX_FRAME_RELATED_P (tmp) = 1;
20247 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20248
20249 /* Save registers. */
20250 offset = - 4 * num_regs;
20251 j = 0;
20252 while (j <= LAST_ARM_REGNUM)
20253 if (saved_regs_mask & (1 << j))
20254 {
20255 if ((j % 2 == 0)
20256 && (saved_regs_mask & (1 << (j + 1))))
20257 {
20258 /* Current register and previous register form register pair for
20259 which STRD can be generated. */
20260 if (offset < 0)
20261 {
20262 /* Allocate stack space for all saved registers. */
20263 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20264 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20265 mem = gen_frame_mem (DImode, tmp);
20266 offset = 0;
20267 }
20268 else if (offset > 0)
20269 mem = gen_frame_mem (DImode,
20270 plus_constant (Pmode,
20271 stack_pointer_rtx,
20272 offset));
20273 else
20274 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20275
20276 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20277 RTX_FRAME_RELATED_P (tmp) = 1;
20278 tmp = emit_insn (tmp);
20279
20280 /* Record the first store insn. */
20281 if (dwarf_index == 1)
20282 insn = tmp;
20283
20284 /* Generate dwarf info. */
20285 mem = gen_frame_mem (SImode,
20286 plus_constant (Pmode,
20287 stack_pointer_rtx,
20288 offset));
20289 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20290 RTX_FRAME_RELATED_P (tmp) = 1;
20291 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20292
20293 mem = gen_frame_mem (SImode,
20294 plus_constant (Pmode,
20295 stack_pointer_rtx,
20296 offset + 4));
20297 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20298 RTX_FRAME_RELATED_P (tmp) = 1;
20299 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20300
20301 offset += 8;
20302 j += 2;
20303 }
20304 else
20305 {
20306 /* Emit a single word store. */
20307 if (offset < 0)
20308 {
20309 /* Allocate stack space for all saved registers. */
20310 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20311 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20312 mem = gen_frame_mem (SImode, tmp);
20313 offset = 0;
20314 }
20315 else if (offset > 0)
20316 mem = gen_frame_mem (SImode,
20317 plus_constant (Pmode,
20318 stack_pointer_rtx,
20319 offset));
20320 else
20321 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20322
20323 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20324 RTX_FRAME_RELATED_P (tmp) = 1;
20325 tmp = emit_insn (tmp);
20326
20327 /* Record the first store insn. */
20328 if (dwarf_index == 1)
20329 insn = tmp;
20330
20331 /* Generate dwarf info. */
20332 mem = gen_frame_mem (SImode,
20333 plus_constant(Pmode,
20334 stack_pointer_rtx,
20335 offset));
20336 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20337 RTX_FRAME_RELATED_P (tmp) = 1;
20338 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20339
20340 offset += 4;
20341 j += 1;
20342 }
20343 }
20344 else
20345 j++;
20346
20347 /* Attach dwarf info to the first insn we generate. */
20348 gcc_assert (insn != NULL_RTX);
20349 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20350 RTX_FRAME_RELATED_P (insn) = 1;
20351 }
20352
20353 /* Generate and emit an insn that we will recognize as a push_multi.
20354 Unfortunately, since this insn does not reflect very well the actual
20355 semantics of the operation, we need to annotate the insn for the benefit
20356 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20357 MASK for registers that should be annotated for DWARF2 frame unwind
20358 information. */
20359 static rtx
20360 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20361 {
20362 int num_regs = 0;
20363 int num_dwarf_regs = 0;
20364 int i, j;
20365 rtx par;
20366 rtx dwarf;
20367 int dwarf_par_index;
20368 rtx tmp, reg;
20369
20370 /* We don't record the PC in the dwarf frame information. */
20371 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20372
20373 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20374 {
20375 if (mask & (1 << i))
20376 num_regs++;
20377 if (dwarf_regs_mask & (1 << i))
20378 num_dwarf_regs++;
20379 }
20380
20381 gcc_assert (num_regs && num_regs <= 16);
20382 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20383
20384 /* For the body of the insn we are going to generate an UNSPEC in
20385 parallel with several USEs. This allows the insn to be recognized
20386 by the push_multi pattern in the arm.md file.
20387
20388 The body of the insn looks something like this:
20389
20390 (parallel [
20391 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20392 (const_int:SI <num>)))
20393 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20394 (use (reg:SI XX))
20395 (use (reg:SI YY))
20396 ...
20397 ])
20398
20399 For the frame note however, we try to be more explicit and actually
20400 show each register being stored into the stack frame, plus a (single)
20401 decrement of the stack pointer. We do it this way in order to be
20402 friendly to the stack unwinding code, which only wants to see a single
20403 stack decrement per instruction. The RTL we generate for the note looks
20404 something like this:
20405
20406 (sequence [
20407 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20408 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20409 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20410 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20411 ...
20412 ])
20413
20414 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20415 instead we'd have a parallel expression detailing all
20416 the stores to the various memory addresses so that debug
20417 information is more up-to-date. Remember however while writing
20418 this to take care of the constraints with the push instruction.
20419
20420 Note also that this has to be taken care of for the VFP registers.
20421
20422 For more see PR43399. */
20423
20424 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20425 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20426 dwarf_par_index = 1;
20427
20428 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20429 {
20430 if (mask & (1 << i))
20431 {
20432 reg = gen_rtx_REG (SImode, i);
20433
20434 XVECEXP (par, 0, 0)
20435 = gen_rtx_SET (gen_frame_mem
20436 (BLKmode,
20437 gen_rtx_PRE_MODIFY (Pmode,
20438 stack_pointer_rtx,
20439 plus_constant
20440 (Pmode, stack_pointer_rtx,
20441 -4 * num_regs))
20442 ),
20443 gen_rtx_UNSPEC (BLKmode,
20444 gen_rtvec (1, reg),
20445 UNSPEC_PUSH_MULT));
20446
20447 if (dwarf_regs_mask & (1 << i))
20448 {
20449 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20450 reg);
20451 RTX_FRAME_RELATED_P (tmp) = 1;
20452 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20453 }
20454
20455 break;
20456 }
20457 }
20458
20459 for (j = 1, i++; j < num_regs; i++)
20460 {
20461 if (mask & (1 << i))
20462 {
20463 reg = gen_rtx_REG (SImode, i);
20464
20465 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20466
20467 if (dwarf_regs_mask & (1 << i))
20468 {
20469 tmp
20470 = gen_rtx_SET (gen_frame_mem
20471 (SImode,
20472 plus_constant (Pmode, stack_pointer_rtx,
20473 4 * j)),
20474 reg);
20475 RTX_FRAME_RELATED_P (tmp) = 1;
20476 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20477 }
20478
20479 j++;
20480 }
20481 }
20482
20483 par = emit_insn (par);
20484
20485 tmp = gen_rtx_SET (stack_pointer_rtx,
20486 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20487 RTX_FRAME_RELATED_P (tmp) = 1;
20488 XVECEXP (dwarf, 0, 0) = tmp;
20489
20490 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20491
20492 return par;
20493 }
20494
20495 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20496 SIZE is the offset to be adjusted.
20497 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20498 static void
20499 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20500 {
20501 rtx dwarf;
20502
20503 RTX_FRAME_RELATED_P (insn) = 1;
20504 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20505 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20506 }
20507
20508 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20509 SAVED_REGS_MASK shows which registers need to be restored.
20510
20511 Unfortunately, since this insn does not reflect very well the actual
20512 semantics of the operation, we need to annotate the insn for the benefit
20513 of DWARF2 frame unwind information. */
20514 static void
20515 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20516 {
20517 int num_regs = 0;
20518 int i, j;
20519 rtx par;
20520 rtx dwarf = NULL_RTX;
20521 rtx tmp, reg;
20522 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20523 int offset_adj;
20524 int emit_update;
20525
20526 offset_adj = return_in_pc ? 1 : 0;
20527 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20528 if (saved_regs_mask & (1 << i))
20529 num_regs++;
20530
20531 gcc_assert (num_regs && num_regs <= 16);
20532
20533 /* If SP is in reglist, then we don't emit SP update insn. */
20534 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20535
20536 /* The parallel needs to hold num_regs SETs
20537 and one SET for the stack update. */
20538 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20539
20540 if (return_in_pc)
20541 XVECEXP (par, 0, 0) = ret_rtx;
20542
20543 if (emit_update)
20544 {
20545 /* Increment the stack pointer, based on there being
20546 num_regs 4-byte registers to restore. */
20547 tmp = gen_rtx_SET (stack_pointer_rtx,
20548 plus_constant (Pmode,
20549 stack_pointer_rtx,
20550 4 * num_regs));
20551 RTX_FRAME_RELATED_P (tmp) = 1;
20552 XVECEXP (par, 0, offset_adj) = tmp;
20553 }
20554
20555 /* Now restore every reg, which may include PC. */
20556 for (j = 0, i = 0; j < num_regs; i++)
20557 if (saved_regs_mask & (1 << i))
20558 {
20559 reg = gen_rtx_REG (SImode, i);
20560 if ((num_regs == 1) && emit_update && !return_in_pc)
20561 {
20562 /* Emit single load with writeback. */
20563 tmp = gen_frame_mem (SImode,
20564 gen_rtx_POST_INC (Pmode,
20565 stack_pointer_rtx));
20566 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20567 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20568 return;
20569 }
20570
20571 tmp = gen_rtx_SET (reg,
20572 gen_frame_mem
20573 (SImode,
20574 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20575 RTX_FRAME_RELATED_P (tmp) = 1;
20576 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20577
20578 /* We need to maintain a sequence for DWARF info too. As dwarf info
20579 should not have PC, skip PC. */
20580 if (i != PC_REGNUM)
20581 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20582
20583 j++;
20584 }
20585
20586 if (return_in_pc)
20587 par = emit_jump_insn (par);
20588 else
20589 par = emit_insn (par);
20590
20591 REG_NOTES (par) = dwarf;
20592 if (!return_in_pc)
20593 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20594 stack_pointer_rtx, stack_pointer_rtx);
20595 }
20596
20597 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20598 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20599
20600 Unfortunately, since this insn does not reflect very well the actual
20601 semantics of the operation, we need to annotate the insn for the benefit
20602 of DWARF2 frame unwind information. */
20603 static void
20604 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20605 {
20606 int i, j;
20607 rtx par;
20608 rtx dwarf = NULL_RTX;
20609 rtx tmp, reg;
20610
20611 gcc_assert (num_regs && num_regs <= 32);
20612
20613 /* Workaround ARM10 VFPr1 bug. */
20614 if (num_regs == 2 && !arm_arch6)
20615 {
20616 if (first_reg == 15)
20617 first_reg--;
20618
20619 num_regs++;
20620 }
20621
20622 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20623 there could be up to 32 D-registers to restore.
20624 If there are more than 16 D-registers, make two recursive calls,
20625 each of which emits one pop_multi instruction. */
20626 if (num_regs > 16)
20627 {
20628 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20629 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20630 return;
20631 }
20632
20633 /* The parallel needs to hold num_regs SETs
20634 and one SET for the stack update. */
20635 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20636
20637 /* Increment the stack pointer, based on there being
20638 num_regs 8-byte registers to restore. */
20639 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20640 RTX_FRAME_RELATED_P (tmp) = 1;
20641 XVECEXP (par, 0, 0) = tmp;
20642
20643 /* Now show every reg that will be restored, using a SET for each. */
20644 for (j = 0, i=first_reg; j < num_regs; i += 2)
20645 {
20646 reg = gen_rtx_REG (DFmode, i);
20647
20648 tmp = gen_rtx_SET (reg,
20649 gen_frame_mem
20650 (DFmode,
20651 plus_constant (Pmode, base_reg, 8 * j)));
20652 RTX_FRAME_RELATED_P (tmp) = 1;
20653 XVECEXP (par, 0, j + 1) = tmp;
20654
20655 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20656
20657 j++;
20658 }
20659
20660 par = emit_insn (par);
20661 REG_NOTES (par) = dwarf;
20662
20663 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20664 if (REGNO (base_reg) == IP_REGNUM)
20665 {
20666 RTX_FRAME_RELATED_P (par) = 1;
20667 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20668 }
20669 else
20670 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20671 base_reg, base_reg);
20672 }
20673
20674 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20675 number of registers are being popped, multiple LDRD patterns are created for
20676 all register pairs. If odd number of registers are popped, last register is
20677 loaded by using LDR pattern. */
20678 static void
20679 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20680 {
20681 int num_regs = 0;
20682 int i, j;
20683 rtx par = NULL_RTX;
20684 rtx dwarf = NULL_RTX;
20685 rtx tmp, reg, tmp1;
20686 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20687
20688 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20689 if (saved_regs_mask & (1 << i))
20690 num_regs++;
20691
20692 gcc_assert (num_regs && num_regs <= 16);
20693
20694 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20695 to be popped. So, if num_regs is even, now it will become odd,
20696 and we can generate pop with PC. If num_regs is odd, it will be
20697 even now, and ldr with return can be generated for PC. */
20698 if (return_in_pc)
20699 num_regs--;
20700
20701 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20702
20703 /* Var j iterates over all the registers to gather all the registers in
20704 saved_regs_mask. Var i gives index of saved registers in stack frame.
20705 A PARALLEL RTX of register-pair is created here, so that pattern for
20706 LDRD can be matched. As PC is always last register to be popped, and
20707 we have already decremented num_regs if PC, we don't have to worry
20708 about PC in this loop. */
20709 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20710 if (saved_regs_mask & (1 << j))
20711 {
20712 /* Create RTX for memory load. */
20713 reg = gen_rtx_REG (SImode, j);
20714 tmp = gen_rtx_SET (reg,
20715 gen_frame_mem (SImode,
20716 plus_constant (Pmode,
20717 stack_pointer_rtx, 4 * i)));
20718 RTX_FRAME_RELATED_P (tmp) = 1;
20719
20720 if (i % 2 == 0)
20721 {
20722 /* When saved-register index (i) is even, the RTX to be emitted is
20723 yet to be created. Hence create it first. The LDRD pattern we
20724 are generating is :
20725 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20726 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20727 where target registers need not be consecutive. */
20728 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20729 dwarf = NULL_RTX;
20730 }
20731
20732 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20733 added as 0th element and if i is odd, reg_i is added as 1st element
20734 of LDRD pattern shown above. */
20735 XVECEXP (par, 0, (i % 2)) = tmp;
20736 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20737
20738 if ((i % 2) == 1)
20739 {
20740 /* When saved-register index (i) is odd, RTXs for both the registers
20741 to be loaded are generated in above given LDRD pattern, and the
20742 pattern can be emitted now. */
20743 par = emit_insn (par);
20744 REG_NOTES (par) = dwarf;
20745 RTX_FRAME_RELATED_P (par) = 1;
20746 }
20747
20748 i++;
20749 }
20750
20751 /* If the number of registers pushed is odd AND return_in_pc is false OR
20752 number of registers are even AND return_in_pc is true, last register is
20753 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20754 then LDR with post increment. */
20755
20756 /* Increment the stack pointer, based on there being
20757 num_regs 4-byte registers to restore. */
20758 tmp = gen_rtx_SET (stack_pointer_rtx,
20759 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20760 RTX_FRAME_RELATED_P (tmp) = 1;
20761 tmp = emit_insn (tmp);
20762 if (!return_in_pc)
20763 {
20764 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20765 stack_pointer_rtx, stack_pointer_rtx);
20766 }
20767
20768 dwarf = NULL_RTX;
20769
20770 if (((num_regs % 2) == 1 && !return_in_pc)
20771 || ((num_regs % 2) == 0 && return_in_pc))
20772 {
20773 /* Scan for the single register to be popped. Skip until the saved
20774 register is found. */
20775 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20776
20777 /* Gen LDR with post increment here. */
20778 tmp1 = gen_rtx_MEM (SImode,
20779 gen_rtx_POST_INC (SImode,
20780 stack_pointer_rtx));
20781 set_mem_alias_set (tmp1, get_frame_alias_set ());
20782
20783 reg = gen_rtx_REG (SImode, j);
20784 tmp = gen_rtx_SET (reg, tmp1);
20785 RTX_FRAME_RELATED_P (tmp) = 1;
20786 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20787
20788 if (return_in_pc)
20789 {
20790 /* If return_in_pc, j must be PC_REGNUM. */
20791 gcc_assert (j == PC_REGNUM);
20792 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20793 XVECEXP (par, 0, 0) = ret_rtx;
20794 XVECEXP (par, 0, 1) = tmp;
20795 par = emit_jump_insn (par);
20796 }
20797 else
20798 {
20799 par = emit_insn (tmp);
20800 REG_NOTES (par) = dwarf;
20801 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20802 stack_pointer_rtx, stack_pointer_rtx);
20803 }
20804
20805 }
20806 else if ((num_regs % 2) == 1 && return_in_pc)
20807 {
20808 /* There are 2 registers to be popped. So, generate the pattern
20809 pop_multiple_with_stack_update_and_return to pop in PC. */
20810 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20811 }
20812
20813 return;
20814 }
20815
20816 /* LDRD in ARM mode needs consecutive registers as operands. This function
20817 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20818 offset addressing and then generates one separate stack udpate. This provides
20819 more scheduling freedom, compared to writeback on every load. However,
20820 if the function returns using load into PC directly
20821 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20822 before the last load. TODO: Add a peephole optimization to recognize
20823 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20824 peephole optimization to merge the load at stack-offset zero
20825 with the stack update instruction using load with writeback
20826 in post-index addressing mode. */
20827 static void
20828 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20829 {
20830 int j = 0;
20831 int offset = 0;
20832 rtx par = NULL_RTX;
20833 rtx dwarf = NULL_RTX;
20834 rtx tmp, mem;
20835
20836 /* Restore saved registers. */
20837 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20838 j = 0;
20839 while (j <= LAST_ARM_REGNUM)
20840 if (saved_regs_mask & (1 << j))
20841 {
20842 if ((j % 2) == 0
20843 && (saved_regs_mask & (1 << (j + 1)))
20844 && (j + 1) != PC_REGNUM)
20845 {
20846 /* Current register and next register form register pair for which
20847 LDRD can be generated. PC is always the last register popped, and
20848 we handle it separately. */
20849 if (offset > 0)
20850 mem = gen_frame_mem (DImode,
20851 plus_constant (Pmode,
20852 stack_pointer_rtx,
20853 offset));
20854 else
20855 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20856
20857 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20858 tmp = emit_insn (tmp);
20859 RTX_FRAME_RELATED_P (tmp) = 1;
20860
20861 /* Generate dwarf info. */
20862
20863 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20864 gen_rtx_REG (SImode, j),
20865 NULL_RTX);
20866 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20867 gen_rtx_REG (SImode, j + 1),
20868 dwarf);
20869
20870 REG_NOTES (tmp) = dwarf;
20871
20872 offset += 8;
20873 j += 2;
20874 }
20875 else if (j != PC_REGNUM)
20876 {
20877 /* Emit a single word load. */
20878 if (offset > 0)
20879 mem = gen_frame_mem (SImode,
20880 plus_constant (Pmode,
20881 stack_pointer_rtx,
20882 offset));
20883 else
20884 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20885
20886 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20887 tmp = emit_insn (tmp);
20888 RTX_FRAME_RELATED_P (tmp) = 1;
20889
20890 /* Generate dwarf info. */
20891 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20892 gen_rtx_REG (SImode, j),
20893 NULL_RTX);
20894
20895 offset += 4;
20896 j += 1;
20897 }
20898 else /* j == PC_REGNUM */
20899 j++;
20900 }
20901 else
20902 j++;
20903
20904 /* Update the stack. */
20905 if (offset > 0)
20906 {
20907 tmp = gen_rtx_SET (stack_pointer_rtx,
20908 plus_constant (Pmode,
20909 stack_pointer_rtx,
20910 offset));
20911 tmp = emit_insn (tmp);
20912 arm_add_cfa_adjust_cfa_note (tmp, offset,
20913 stack_pointer_rtx, stack_pointer_rtx);
20914 offset = 0;
20915 }
20916
20917 if (saved_regs_mask & (1 << PC_REGNUM))
20918 {
20919 /* Only PC is to be popped. */
20920 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20921 XVECEXP (par, 0, 0) = ret_rtx;
20922 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20923 gen_frame_mem (SImode,
20924 gen_rtx_POST_INC (SImode,
20925 stack_pointer_rtx)));
20926 RTX_FRAME_RELATED_P (tmp) = 1;
20927 XVECEXP (par, 0, 1) = tmp;
20928 par = emit_jump_insn (par);
20929
20930 /* Generate dwarf info. */
20931 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20932 gen_rtx_REG (SImode, PC_REGNUM),
20933 NULL_RTX);
20934 REG_NOTES (par) = dwarf;
20935 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20936 stack_pointer_rtx, stack_pointer_rtx);
20937 }
20938 }
20939
20940 /* Calculate the size of the return value that is passed in registers. */
20941 static unsigned
20942 arm_size_return_regs (void)
20943 {
20944 machine_mode mode;
20945
20946 if (crtl->return_rtx != 0)
20947 mode = GET_MODE (crtl->return_rtx);
20948 else
20949 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20950
20951 return GET_MODE_SIZE (mode);
20952 }
20953
20954 /* Return true if the current function needs to save/restore LR. */
20955 static bool
20956 thumb_force_lr_save (void)
20957 {
20958 return !cfun->machine->lr_save_eliminated
20959 && (!crtl->is_leaf
20960 || thumb_far_jump_used_p ()
20961 || df_regs_ever_live_p (LR_REGNUM));
20962 }
20963
20964 /* We do not know if r3 will be available because
20965 we do have an indirect tailcall happening in this
20966 particular case. */
20967 static bool
20968 is_indirect_tailcall_p (rtx call)
20969 {
20970 rtx pat = PATTERN (call);
20971
20972 /* Indirect tail call. */
20973 pat = XVECEXP (pat, 0, 0);
20974 if (GET_CODE (pat) == SET)
20975 pat = SET_SRC (pat);
20976
20977 pat = XEXP (XEXP (pat, 0), 0);
20978 return REG_P (pat);
20979 }
20980
20981 /* Return true if r3 is used by any of the tail call insns in the
20982 current function. */
20983 static bool
20984 any_sibcall_could_use_r3 (void)
20985 {
20986 edge_iterator ei;
20987 edge e;
20988
20989 if (!crtl->tail_call_emit)
20990 return false;
20991 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20992 if (e->flags & EDGE_SIBCALL)
20993 {
20994 rtx_insn *call = BB_END (e->src);
20995 if (!CALL_P (call))
20996 call = prev_nonnote_nondebug_insn (call);
20997 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20998 if (find_regno_fusage (call, USE, 3)
20999 || is_indirect_tailcall_p (call))
21000 return true;
21001 }
21002 return false;
21003 }
21004
21005
21006 /* Compute the distance from register FROM to register TO.
21007 These can be the arg pointer (26), the soft frame pointer (25),
21008 the stack pointer (13) or the hard frame pointer (11).
21009 In thumb mode r7 is used as the soft frame pointer, if needed.
21010 Typical stack layout looks like this:
21011
21012 old stack pointer -> | |
21013 ----
21014 | | \
21015 | | saved arguments for
21016 | | vararg functions
21017 | | /
21018 --
21019 hard FP & arg pointer -> | | \
21020 | | stack
21021 | | frame
21022 | | /
21023 --
21024 | | \
21025 | | call saved
21026 | | registers
21027 soft frame pointer -> | | /
21028 --
21029 | | \
21030 | | local
21031 | | variables
21032 locals base pointer -> | | /
21033 --
21034 | | \
21035 | | outgoing
21036 | | arguments
21037 current stack pointer -> | | /
21038 --
21039
21040 For a given function some or all of these stack components
21041 may not be needed, giving rise to the possibility of
21042 eliminating some of the registers.
21043
21044 The values returned by this function must reflect the behavior
21045 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21046
21047 The sign of the number returned reflects the direction of stack
21048 growth, so the values are positive for all eliminations except
21049 from the soft frame pointer to the hard frame pointer.
21050
21051 SFP may point just inside the local variables block to ensure correct
21052 alignment. */
21053
21054
21055 /* Return cached stack offsets. */
21056
21057 static arm_stack_offsets *
21058 arm_get_frame_offsets (void)
21059 {
21060 struct arm_stack_offsets *offsets;
21061
21062 offsets = &cfun->machine->stack_offsets;
21063
21064 return offsets;
21065 }
21066
21067
21068 /* Calculate stack offsets. These are used to calculate register elimination
21069 offsets and in prologue/epilogue code. Also calculates which registers
21070 should be saved. */
21071
21072 static void
21073 arm_compute_frame_layout (void)
21074 {
21075 struct arm_stack_offsets *offsets;
21076 unsigned long func_type;
21077 int saved;
21078 int core_saved;
21079 HOST_WIDE_INT frame_size;
21080 int i;
21081
21082 offsets = &cfun->machine->stack_offsets;
21083
21084 /* Initially this is the size of the local variables. It will translated
21085 into an offset once we have determined the size of preceding data. */
21086 frame_size = ROUND_UP_WORD (get_frame_size ());
21087
21088 /* Space for variadic functions. */
21089 offsets->saved_args = crtl->args.pretend_args_size;
21090
21091 /* In Thumb mode this is incorrect, but never used. */
21092 offsets->frame
21093 = (offsets->saved_args
21094 + arm_compute_static_chain_stack_bytes ()
21095 + (frame_pointer_needed ? 4 : 0));
21096
21097 if (TARGET_32BIT)
21098 {
21099 unsigned int regno;
21100
21101 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21102 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21103 saved = core_saved;
21104
21105 /* We know that SP will be doubleword aligned on entry, and we must
21106 preserve that condition at any subroutine call. We also require the
21107 soft frame pointer to be doubleword aligned. */
21108
21109 if (TARGET_REALLY_IWMMXT)
21110 {
21111 /* Check for the call-saved iWMMXt registers. */
21112 for (regno = FIRST_IWMMXT_REGNUM;
21113 regno <= LAST_IWMMXT_REGNUM;
21114 regno++)
21115 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21116 saved += 8;
21117 }
21118
21119 func_type = arm_current_func_type ();
21120 /* Space for saved VFP registers. */
21121 if (! IS_VOLATILE (func_type)
21122 && TARGET_HARD_FLOAT)
21123 saved += arm_get_vfp_saved_size ();
21124 }
21125 else /* TARGET_THUMB1 */
21126 {
21127 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21128 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21129 saved = core_saved;
21130 if (TARGET_BACKTRACE)
21131 saved += 16;
21132 }
21133
21134 /* Saved registers include the stack frame. */
21135 offsets->saved_regs
21136 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21137 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21138
21139 /* A leaf function does not need any stack alignment if it has nothing
21140 on the stack. */
21141 if (crtl->is_leaf && frame_size == 0
21142 /* However if it calls alloca(), we have a dynamically allocated
21143 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21144 && ! cfun->calls_alloca)
21145 {
21146 offsets->outgoing_args = offsets->soft_frame;
21147 offsets->locals_base = offsets->soft_frame;
21148 return;
21149 }
21150
21151 /* Ensure SFP has the correct alignment. */
21152 if (ARM_DOUBLEWORD_ALIGN
21153 && (offsets->soft_frame & 7))
21154 {
21155 offsets->soft_frame += 4;
21156 /* Try to align stack by pushing an extra reg. Don't bother doing this
21157 when there is a stack frame as the alignment will be rolled into
21158 the normal stack adjustment. */
21159 if (frame_size + crtl->outgoing_args_size == 0)
21160 {
21161 int reg = -1;
21162
21163 /* Register r3 is caller-saved. Normally it does not need to be
21164 saved on entry by the prologue. However if we choose to save
21165 it for padding then we may confuse the compiler into thinking
21166 a prologue sequence is required when in fact it is not. This
21167 will occur when shrink-wrapping if r3 is used as a scratch
21168 register and there are no other callee-saved writes.
21169
21170 This situation can be avoided when other callee-saved registers
21171 are available and r3 is not mandatory if we choose a callee-saved
21172 register for padding. */
21173 bool prefer_callee_reg_p = false;
21174
21175 /* If it is safe to use r3, then do so. This sometimes
21176 generates better code on Thumb-2 by avoiding the need to
21177 use 32-bit push/pop instructions. */
21178 if (! any_sibcall_could_use_r3 ()
21179 && arm_size_return_regs () <= 12
21180 && (offsets->saved_regs_mask & (1 << 3)) == 0
21181 && (TARGET_THUMB2
21182 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21183 {
21184 reg = 3;
21185 if (!TARGET_THUMB2)
21186 prefer_callee_reg_p = true;
21187 }
21188 if (reg == -1
21189 || prefer_callee_reg_p)
21190 {
21191 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21192 {
21193 /* Avoid fixed registers; they may be changed at
21194 arbitrary times so it's unsafe to restore them
21195 during the epilogue. */
21196 if (!fixed_regs[i]
21197 && (offsets->saved_regs_mask & (1 << i)) == 0)
21198 {
21199 reg = i;
21200 break;
21201 }
21202 }
21203 }
21204
21205 if (reg != -1)
21206 {
21207 offsets->saved_regs += 4;
21208 offsets->saved_regs_mask |= (1 << reg);
21209 }
21210 }
21211 }
21212
21213 offsets->locals_base = offsets->soft_frame + frame_size;
21214 offsets->outgoing_args = (offsets->locals_base
21215 + crtl->outgoing_args_size);
21216
21217 if (ARM_DOUBLEWORD_ALIGN)
21218 {
21219 /* Ensure SP remains doubleword aligned. */
21220 if (offsets->outgoing_args & 7)
21221 offsets->outgoing_args += 4;
21222 gcc_assert (!(offsets->outgoing_args & 7));
21223 }
21224 }
21225
21226
21227 /* Calculate the relative offsets for the different stack pointers. Positive
21228 offsets are in the direction of stack growth. */
21229
21230 HOST_WIDE_INT
21231 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21232 {
21233 arm_stack_offsets *offsets;
21234
21235 offsets = arm_get_frame_offsets ();
21236
21237 /* OK, now we have enough information to compute the distances.
21238 There must be an entry in these switch tables for each pair
21239 of registers in ELIMINABLE_REGS, even if some of the entries
21240 seem to be redundant or useless. */
21241 switch (from)
21242 {
21243 case ARG_POINTER_REGNUM:
21244 switch (to)
21245 {
21246 case THUMB_HARD_FRAME_POINTER_REGNUM:
21247 return 0;
21248
21249 case FRAME_POINTER_REGNUM:
21250 /* This is the reverse of the soft frame pointer
21251 to hard frame pointer elimination below. */
21252 return offsets->soft_frame - offsets->saved_args;
21253
21254 case ARM_HARD_FRAME_POINTER_REGNUM:
21255 /* This is only non-zero in the case where the static chain register
21256 is stored above the frame. */
21257 return offsets->frame - offsets->saved_args - 4;
21258
21259 case STACK_POINTER_REGNUM:
21260 /* If nothing has been pushed on the stack at all
21261 then this will return -4. This *is* correct! */
21262 return offsets->outgoing_args - (offsets->saved_args + 4);
21263
21264 default:
21265 gcc_unreachable ();
21266 }
21267 gcc_unreachable ();
21268
21269 case FRAME_POINTER_REGNUM:
21270 switch (to)
21271 {
21272 case THUMB_HARD_FRAME_POINTER_REGNUM:
21273 return 0;
21274
21275 case ARM_HARD_FRAME_POINTER_REGNUM:
21276 /* The hard frame pointer points to the top entry in the
21277 stack frame. The soft frame pointer to the bottom entry
21278 in the stack frame. If there is no stack frame at all,
21279 then they are identical. */
21280
21281 return offsets->frame - offsets->soft_frame;
21282
21283 case STACK_POINTER_REGNUM:
21284 return offsets->outgoing_args - offsets->soft_frame;
21285
21286 default:
21287 gcc_unreachable ();
21288 }
21289 gcc_unreachable ();
21290
21291 default:
21292 /* You cannot eliminate from the stack pointer.
21293 In theory you could eliminate from the hard frame
21294 pointer to the stack pointer, but this will never
21295 happen, since if a stack frame is not needed the
21296 hard frame pointer will never be used. */
21297 gcc_unreachable ();
21298 }
21299 }
21300
21301 /* Given FROM and TO register numbers, say whether this elimination is
21302 allowed. Frame pointer elimination is automatically handled.
21303
21304 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21305 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21306 pointer, we must eliminate FRAME_POINTER_REGNUM into
21307 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21308 ARG_POINTER_REGNUM. */
21309
21310 bool
21311 arm_can_eliminate (const int from, const int to)
21312 {
21313 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21314 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21315 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21316 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21317 true);
21318 }
21319
21320 /* Emit RTL to save coprocessor registers on function entry. Returns the
21321 number of bytes pushed. */
21322
21323 static int
21324 arm_save_coproc_regs(void)
21325 {
21326 int saved_size = 0;
21327 unsigned reg;
21328 unsigned start_reg;
21329 rtx insn;
21330
21331 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21332 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21333 {
21334 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21335 insn = gen_rtx_MEM (V2SImode, insn);
21336 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21337 RTX_FRAME_RELATED_P (insn) = 1;
21338 saved_size += 8;
21339 }
21340
21341 if (TARGET_HARD_FLOAT)
21342 {
21343 start_reg = FIRST_VFP_REGNUM;
21344
21345 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21346 {
21347 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21348 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21349 {
21350 if (start_reg != reg)
21351 saved_size += vfp_emit_fstmd (start_reg,
21352 (reg - start_reg) / 2);
21353 start_reg = reg + 2;
21354 }
21355 }
21356 if (start_reg != reg)
21357 saved_size += vfp_emit_fstmd (start_reg,
21358 (reg - start_reg) / 2);
21359 }
21360 return saved_size;
21361 }
21362
21363
21364 /* Set the Thumb frame pointer from the stack pointer. */
21365
21366 static void
21367 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21368 {
21369 HOST_WIDE_INT amount;
21370 rtx insn, dwarf;
21371
21372 amount = offsets->outgoing_args - offsets->locals_base;
21373 if (amount < 1024)
21374 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21375 stack_pointer_rtx, GEN_INT (amount)));
21376 else
21377 {
21378 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21379 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21380 expects the first two operands to be the same. */
21381 if (TARGET_THUMB2)
21382 {
21383 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21384 stack_pointer_rtx,
21385 hard_frame_pointer_rtx));
21386 }
21387 else
21388 {
21389 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21390 hard_frame_pointer_rtx,
21391 stack_pointer_rtx));
21392 }
21393 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21394 plus_constant (Pmode, stack_pointer_rtx, amount));
21395 RTX_FRAME_RELATED_P (dwarf) = 1;
21396 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21397 }
21398
21399 RTX_FRAME_RELATED_P (insn) = 1;
21400 }
21401
21402 struct scratch_reg {
21403 rtx reg;
21404 bool saved;
21405 };
21406
21407 /* Return a short-lived scratch register for use as a 2nd scratch register on
21408 function entry after the registers are saved in the prologue. This register
21409 must be released by means of release_scratch_register_on_entry. IP is not
21410 considered since it is always used as the 1st scratch register if available.
21411
21412 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21413 mask of live registers. */
21414
21415 static void
21416 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21417 unsigned long live_regs)
21418 {
21419 int regno = -1;
21420
21421 sr->saved = false;
21422
21423 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21424 regno = LR_REGNUM;
21425 else
21426 {
21427 unsigned int i;
21428
21429 for (i = 4; i < 11; i++)
21430 if (regno1 != i && (live_regs & (1 << i)) != 0)
21431 {
21432 regno = i;
21433 break;
21434 }
21435
21436 if (regno < 0)
21437 {
21438 /* If IP is used as the 1st scratch register for a nested function,
21439 then either r3 wasn't available or is used to preserve IP. */
21440 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21441 regno1 = 3;
21442 regno = (regno1 == 3 ? 2 : 3);
21443 sr->saved
21444 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21445 regno);
21446 }
21447 }
21448
21449 sr->reg = gen_rtx_REG (SImode, regno);
21450 if (sr->saved)
21451 {
21452 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21453 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21454 rtx x = gen_rtx_SET (stack_pointer_rtx,
21455 plus_constant (Pmode, stack_pointer_rtx, -4));
21456 RTX_FRAME_RELATED_P (insn) = 1;
21457 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21458 }
21459 }
21460
21461 /* Release a scratch register obtained from the preceding function. */
21462
21463 static void
21464 release_scratch_register_on_entry (struct scratch_reg *sr)
21465 {
21466 if (sr->saved)
21467 {
21468 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21469 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21470 rtx x = gen_rtx_SET (stack_pointer_rtx,
21471 plus_constant (Pmode, stack_pointer_rtx, 4));
21472 RTX_FRAME_RELATED_P (insn) = 1;
21473 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21474 }
21475 }
21476
21477 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21478
21479 #if PROBE_INTERVAL > 4096
21480 #error Cannot use indexed addressing mode for stack probing
21481 #endif
21482
21483 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21484 inclusive. These are offsets from the current stack pointer. REGNO1
21485 is the index number of the 1st scratch register and LIVE_REGS is the
21486 mask of live registers. */
21487
21488 static void
21489 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21490 unsigned int regno1, unsigned long live_regs)
21491 {
21492 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21493
21494 /* See if we have a constant small number of probes to generate. If so,
21495 that's the easy case. */
21496 if (size <= PROBE_INTERVAL)
21497 {
21498 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21499 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21500 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21501 }
21502
21503 /* The run-time loop is made up of 10 insns in the generic case while the
21504 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21505 else if (size <= 5 * PROBE_INTERVAL)
21506 {
21507 HOST_WIDE_INT i, rem;
21508
21509 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21510 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21511 emit_stack_probe (reg1);
21512
21513 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21514 it exceeds SIZE. If only two probes are needed, this will not
21515 generate any code. Then probe at FIRST + SIZE. */
21516 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21517 {
21518 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21519 emit_stack_probe (reg1);
21520 }
21521
21522 rem = size - (i - PROBE_INTERVAL);
21523 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21524 {
21525 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21526 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21527 }
21528 else
21529 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21530 }
21531
21532 /* Otherwise, do the same as above, but in a loop. Note that we must be
21533 extra careful with variables wrapping around because we might be at
21534 the very top (or the very bottom) of the address space and we have
21535 to be able to handle this case properly; in particular, we use an
21536 equality test for the loop condition. */
21537 else
21538 {
21539 HOST_WIDE_INT rounded_size;
21540 struct scratch_reg sr;
21541
21542 get_scratch_register_on_entry (&sr, regno1, live_regs);
21543
21544 emit_move_insn (reg1, GEN_INT (first));
21545
21546
21547 /* Step 1: round SIZE to the previous multiple of the interval. */
21548
21549 rounded_size = size & -PROBE_INTERVAL;
21550 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21551
21552
21553 /* Step 2: compute initial and final value of the loop counter. */
21554
21555 /* TEST_ADDR = SP + FIRST. */
21556 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21557
21558 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21559 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21560
21561
21562 /* Step 3: the loop
21563
21564 do
21565 {
21566 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21567 probe at TEST_ADDR
21568 }
21569 while (TEST_ADDR != LAST_ADDR)
21570
21571 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21572 until it is equal to ROUNDED_SIZE. */
21573
21574 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21575
21576
21577 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21578 that SIZE is equal to ROUNDED_SIZE. */
21579
21580 if (size != rounded_size)
21581 {
21582 HOST_WIDE_INT rem = size - rounded_size;
21583
21584 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21585 {
21586 emit_set_insn (sr.reg,
21587 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21588 emit_stack_probe (plus_constant (Pmode, sr.reg,
21589 PROBE_INTERVAL - rem));
21590 }
21591 else
21592 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21593 }
21594
21595 release_scratch_register_on_entry (&sr);
21596 }
21597
21598 /* Make sure nothing is scheduled before we are done. */
21599 emit_insn (gen_blockage ());
21600 }
21601
21602 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21603 absolute addresses. */
21604
21605 const char *
21606 output_probe_stack_range (rtx reg1, rtx reg2)
21607 {
21608 static int labelno = 0;
21609 char loop_lab[32];
21610 rtx xops[2];
21611
21612 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21613
21614 /* Loop. */
21615 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21616
21617 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21618 xops[0] = reg1;
21619 xops[1] = GEN_INT (PROBE_INTERVAL);
21620 output_asm_insn ("sub\t%0, %0, %1", xops);
21621
21622 /* Probe at TEST_ADDR. */
21623 output_asm_insn ("str\tr0, [%0, #0]", xops);
21624
21625 /* Test if TEST_ADDR == LAST_ADDR. */
21626 xops[1] = reg2;
21627 output_asm_insn ("cmp\t%0, %1", xops);
21628
21629 /* Branch. */
21630 fputs ("\tbne\t", asm_out_file);
21631 assemble_name_raw (asm_out_file, loop_lab);
21632 fputc ('\n', asm_out_file);
21633
21634 return "";
21635 }
21636
21637 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21638 function. */
21639 void
21640 arm_expand_prologue (void)
21641 {
21642 rtx amount;
21643 rtx insn;
21644 rtx ip_rtx;
21645 unsigned long live_regs_mask;
21646 unsigned long func_type;
21647 int fp_offset = 0;
21648 int saved_pretend_args = 0;
21649 int saved_regs = 0;
21650 unsigned HOST_WIDE_INT args_to_push;
21651 HOST_WIDE_INT size;
21652 arm_stack_offsets *offsets;
21653 bool clobber_ip;
21654
21655 func_type = arm_current_func_type ();
21656
21657 /* Naked functions don't have prologues. */
21658 if (IS_NAKED (func_type))
21659 {
21660 if (flag_stack_usage_info)
21661 current_function_static_stack_size = 0;
21662 return;
21663 }
21664
21665 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21666 args_to_push = crtl->args.pretend_args_size;
21667
21668 /* Compute which register we will have to save onto the stack. */
21669 offsets = arm_get_frame_offsets ();
21670 live_regs_mask = offsets->saved_regs_mask;
21671
21672 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21673
21674 if (IS_STACKALIGN (func_type))
21675 {
21676 rtx r0, r1;
21677
21678 /* Handle a word-aligned stack pointer. We generate the following:
21679
21680 mov r0, sp
21681 bic r1, r0, #7
21682 mov sp, r1
21683 <save and restore r0 in normal prologue/epilogue>
21684 mov sp, r0
21685 bx lr
21686
21687 The unwinder doesn't need to know about the stack realignment.
21688 Just tell it we saved SP in r0. */
21689 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21690
21691 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21692 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21693
21694 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21695 RTX_FRAME_RELATED_P (insn) = 1;
21696 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21697
21698 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21699
21700 /* ??? The CFA changes here, which may cause GDB to conclude that it
21701 has entered a different function. That said, the unwind info is
21702 correct, individually, before and after this instruction because
21703 we've described the save of SP, which will override the default
21704 handling of SP as restoring from the CFA. */
21705 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21706 }
21707
21708 /* The static chain register is the same as the IP register. If it is
21709 clobbered when creating the frame, we need to save and restore it. */
21710 clobber_ip = IS_NESTED (func_type)
21711 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21712 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21713 || flag_stack_clash_protection)
21714 && !df_regs_ever_live_p (LR_REGNUM)
21715 && arm_r3_live_at_start_p ()));
21716
21717 /* Find somewhere to store IP whilst the frame is being created.
21718 We try the following places in order:
21719
21720 1. The last argument register r3 if it is available.
21721 2. A slot on the stack above the frame if there are no
21722 arguments to push onto the stack.
21723 3. Register r3 again, after pushing the argument registers
21724 onto the stack, if this is a varargs function.
21725 4. The last slot on the stack created for the arguments to
21726 push, if this isn't a varargs function.
21727
21728 Note - we only need to tell the dwarf2 backend about the SP
21729 adjustment in the second variant; the static chain register
21730 doesn't need to be unwound, as it doesn't contain a value
21731 inherited from the caller. */
21732 if (clobber_ip)
21733 {
21734 if (!arm_r3_live_at_start_p ())
21735 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21736 else if (args_to_push == 0)
21737 {
21738 rtx addr, dwarf;
21739
21740 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21741 saved_regs += 4;
21742
21743 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21744 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21745 fp_offset = 4;
21746
21747 /* Just tell the dwarf backend that we adjusted SP. */
21748 dwarf = gen_rtx_SET (stack_pointer_rtx,
21749 plus_constant (Pmode, stack_pointer_rtx,
21750 -fp_offset));
21751 RTX_FRAME_RELATED_P (insn) = 1;
21752 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21753 }
21754 else
21755 {
21756 /* Store the args on the stack. */
21757 if (cfun->machine->uses_anonymous_args)
21758 {
21759 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21760 (0xf0 >> (args_to_push / 4)) & 0xf);
21761 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21762 saved_pretend_args = 1;
21763 }
21764 else
21765 {
21766 rtx addr, dwarf;
21767
21768 if (args_to_push == 4)
21769 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21770 else
21771 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21772 plus_constant (Pmode,
21773 stack_pointer_rtx,
21774 -args_to_push));
21775
21776 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21777
21778 /* Just tell the dwarf backend that we adjusted SP. */
21779 dwarf = gen_rtx_SET (stack_pointer_rtx,
21780 plus_constant (Pmode, stack_pointer_rtx,
21781 -args_to_push));
21782 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21783 }
21784
21785 RTX_FRAME_RELATED_P (insn) = 1;
21786 fp_offset = args_to_push;
21787 args_to_push = 0;
21788 }
21789 }
21790
21791 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21792 {
21793 if (IS_INTERRUPT (func_type))
21794 {
21795 /* Interrupt functions must not corrupt any registers.
21796 Creating a frame pointer however, corrupts the IP
21797 register, so we must push it first. */
21798 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21799
21800 /* Do not set RTX_FRAME_RELATED_P on this insn.
21801 The dwarf stack unwinding code only wants to see one
21802 stack decrement per function, and this is not it. If
21803 this instruction is labeled as being part of the frame
21804 creation sequence then dwarf2out_frame_debug_expr will
21805 die when it encounters the assignment of IP to FP
21806 later on, since the use of SP here establishes SP as
21807 the CFA register and not IP.
21808
21809 Anyway this instruction is not really part of the stack
21810 frame creation although it is part of the prologue. */
21811 }
21812
21813 insn = emit_set_insn (ip_rtx,
21814 plus_constant (Pmode, stack_pointer_rtx,
21815 fp_offset));
21816 RTX_FRAME_RELATED_P (insn) = 1;
21817 }
21818
21819 if (args_to_push)
21820 {
21821 /* Push the argument registers, or reserve space for them. */
21822 if (cfun->machine->uses_anonymous_args)
21823 insn = emit_multi_reg_push
21824 ((0xf0 >> (args_to_push / 4)) & 0xf,
21825 (0xf0 >> (args_to_push / 4)) & 0xf);
21826 else
21827 insn = emit_insn
21828 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21829 GEN_INT (- args_to_push)));
21830 RTX_FRAME_RELATED_P (insn) = 1;
21831 }
21832
21833 /* If this is an interrupt service routine, and the link register
21834 is going to be pushed, and we're not generating extra
21835 push of IP (needed when frame is needed and frame layout if apcs),
21836 subtracting four from LR now will mean that the function return
21837 can be done with a single instruction. */
21838 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21839 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21840 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21841 && TARGET_ARM)
21842 {
21843 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21844
21845 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21846 }
21847
21848 if (live_regs_mask)
21849 {
21850 unsigned long dwarf_regs_mask = live_regs_mask;
21851
21852 saved_regs += bit_count (live_regs_mask) * 4;
21853 if (optimize_size && !frame_pointer_needed
21854 && saved_regs == offsets->saved_regs - offsets->saved_args)
21855 {
21856 /* If no coprocessor registers are being pushed and we don't have
21857 to worry about a frame pointer then push extra registers to
21858 create the stack frame. This is done in a way that does not
21859 alter the frame layout, so is independent of the epilogue. */
21860 int n;
21861 int frame;
21862 n = 0;
21863 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21864 n++;
21865 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21866 if (frame && n * 4 >= frame)
21867 {
21868 n = frame / 4;
21869 live_regs_mask |= (1 << n) - 1;
21870 saved_regs += frame;
21871 }
21872 }
21873
21874 if (TARGET_LDRD
21875 && current_tune->prefer_ldrd_strd
21876 && !optimize_function_for_size_p (cfun))
21877 {
21878 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21879 if (TARGET_THUMB2)
21880 thumb2_emit_strd_push (live_regs_mask);
21881 else if (TARGET_ARM
21882 && !TARGET_APCS_FRAME
21883 && !IS_INTERRUPT (func_type))
21884 arm_emit_strd_push (live_regs_mask);
21885 else
21886 {
21887 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21888 RTX_FRAME_RELATED_P (insn) = 1;
21889 }
21890 }
21891 else
21892 {
21893 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21894 RTX_FRAME_RELATED_P (insn) = 1;
21895 }
21896 }
21897
21898 if (! IS_VOLATILE (func_type))
21899 saved_regs += arm_save_coproc_regs ();
21900
21901 if (frame_pointer_needed && TARGET_ARM)
21902 {
21903 /* Create the new frame pointer. */
21904 if (TARGET_APCS_FRAME)
21905 {
21906 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21907 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21908 RTX_FRAME_RELATED_P (insn) = 1;
21909 }
21910 else
21911 {
21912 insn = GEN_INT (saved_regs - (4 + fp_offset));
21913 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21914 stack_pointer_rtx, insn));
21915 RTX_FRAME_RELATED_P (insn) = 1;
21916 }
21917 }
21918
21919 size = offsets->outgoing_args - offsets->saved_args;
21920 if (flag_stack_usage_info)
21921 current_function_static_stack_size = size;
21922
21923 /* If this isn't an interrupt service routine and we have a frame, then do
21924 stack checking. We use IP as the first scratch register, except for the
21925 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21926 if (!IS_INTERRUPT (func_type)
21927 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21928 || flag_stack_clash_protection))
21929 {
21930 unsigned int regno;
21931
21932 if (!IS_NESTED (func_type) || clobber_ip)
21933 regno = IP_REGNUM;
21934 else if (df_regs_ever_live_p (LR_REGNUM))
21935 regno = LR_REGNUM;
21936 else
21937 regno = 3;
21938
21939 if (crtl->is_leaf && !cfun->calls_alloca)
21940 {
21941 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21942 arm_emit_probe_stack_range (get_stack_check_protect (),
21943 size - get_stack_check_protect (),
21944 regno, live_regs_mask);
21945 }
21946 else if (size > 0)
21947 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21948 regno, live_regs_mask);
21949 }
21950
21951 /* Recover the static chain register. */
21952 if (clobber_ip)
21953 {
21954 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21955 insn = gen_rtx_REG (SImode, 3);
21956 else
21957 {
21958 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21959 insn = gen_frame_mem (SImode, insn);
21960 }
21961 emit_set_insn (ip_rtx, insn);
21962 emit_insn (gen_force_register_use (ip_rtx));
21963 }
21964
21965 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21966 {
21967 /* This add can produce multiple insns for a large constant, so we
21968 need to get tricky. */
21969 rtx_insn *last = get_last_insn ();
21970
21971 amount = GEN_INT (offsets->saved_args + saved_regs
21972 - offsets->outgoing_args);
21973
21974 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21975 amount));
21976 do
21977 {
21978 last = last ? NEXT_INSN (last) : get_insns ();
21979 RTX_FRAME_RELATED_P (last) = 1;
21980 }
21981 while (last != insn);
21982
21983 /* If the frame pointer is needed, emit a special barrier that
21984 will prevent the scheduler from moving stores to the frame
21985 before the stack adjustment. */
21986 if (frame_pointer_needed)
21987 emit_insn (gen_stack_tie (stack_pointer_rtx,
21988 hard_frame_pointer_rtx));
21989 }
21990
21991
21992 if (frame_pointer_needed && TARGET_THUMB2)
21993 thumb_set_frame_pointer (offsets);
21994
21995 if (flag_pic && arm_pic_register != INVALID_REGNUM)
21996 {
21997 unsigned long mask;
21998
21999 mask = live_regs_mask;
22000 mask &= THUMB2_WORK_REGS;
22001 if (!IS_NESTED (func_type))
22002 mask |= (1 << IP_REGNUM);
22003 arm_load_pic_register (mask);
22004 }
22005
22006 /* If we are profiling, make sure no instructions are scheduled before
22007 the call to mcount. Similarly if the user has requested no
22008 scheduling in the prolog. Similarly if we want non-call exceptions
22009 using the EABI unwinder, to prevent faulting instructions from being
22010 swapped with a stack adjustment. */
22011 if (crtl->profile || !TARGET_SCHED_PROLOG
22012 || (arm_except_unwind_info (&global_options) == UI_TARGET
22013 && cfun->can_throw_non_call_exceptions))
22014 emit_insn (gen_blockage ());
22015
22016 /* If the link register is being kept alive, with the return address in it,
22017 then make sure that it does not get reused by the ce2 pass. */
22018 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22019 cfun->machine->lr_save_eliminated = 1;
22020 }
22021 \f
22022 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22023 static void
22024 arm_print_condition (FILE *stream)
22025 {
22026 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22027 {
22028 /* Branch conversion is not implemented for Thumb-2. */
22029 if (TARGET_THUMB)
22030 {
22031 output_operand_lossage ("predicated Thumb instruction");
22032 return;
22033 }
22034 if (current_insn_predicate != NULL)
22035 {
22036 output_operand_lossage
22037 ("predicated instruction in conditional sequence");
22038 return;
22039 }
22040
22041 fputs (arm_condition_codes[arm_current_cc], stream);
22042 }
22043 else if (current_insn_predicate)
22044 {
22045 enum arm_cond_code code;
22046
22047 if (TARGET_THUMB1)
22048 {
22049 output_operand_lossage ("predicated Thumb instruction");
22050 return;
22051 }
22052
22053 code = get_arm_condition_code (current_insn_predicate);
22054 fputs (arm_condition_codes[code], stream);
22055 }
22056 }
22057
22058
22059 /* Globally reserved letters: acln
22060 Puncutation letters currently used: @_|?().!#
22061 Lower case letters currently used: bcdefhimpqtvwxyz
22062 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22063 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22064
22065 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22066
22067 If CODE is 'd', then the X is a condition operand and the instruction
22068 should only be executed if the condition is true.
22069 if CODE is 'D', then the X is a condition operand and the instruction
22070 should only be executed if the condition is false: however, if the mode
22071 of the comparison is CCFPEmode, then always execute the instruction -- we
22072 do this because in these circumstances !GE does not necessarily imply LT;
22073 in these cases the instruction pattern will take care to make sure that
22074 an instruction containing %d will follow, thereby undoing the effects of
22075 doing this instruction unconditionally.
22076 If CODE is 'N' then X is a floating point operand that must be negated
22077 before output.
22078 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22079 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22080 static void
22081 arm_print_operand (FILE *stream, rtx x, int code)
22082 {
22083 switch (code)
22084 {
22085 case '@':
22086 fputs (ASM_COMMENT_START, stream);
22087 return;
22088
22089 case '_':
22090 fputs (user_label_prefix, stream);
22091 return;
22092
22093 case '|':
22094 fputs (REGISTER_PREFIX, stream);
22095 return;
22096
22097 case '?':
22098 arm_print_condition (stream);
22099 return;
22100
22101 case '.':
22102 /* The current condition code for a condition code setting instruction.
22103 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22104 fputc('s', stream);
22105 arm_print_condition (stream);
22106 return;
22107
22108 case '!':
22109 /* If the instruction is conditionally executed then print
22110 the current condition code, otherwise print 's'. */
22111 gcc_assert (TARGET_THUMB2);
22112 if (current_insn_predicate)
22113 arm_print_condition (stream);
22114 else
22115 fputc('s', stream);
22116 break;
22117
22118 /* %# is a "break" sequence. It doesn't output anything, but is used to
22119 separate e.g. operand numbers from following text, if that text consists
22120 of further digits which we don't want to be part of the operand
22121 number. */
22122 case '#':
22123 return;
22124
22125 case 'N':
22126 {
22127 REAL_VALUE_TYPE r;
22128 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22129 fprintf (stream, "%s", fp_const_from_val (&r));
22130 }
22131 return;
22132
22133 /* An integer or symbol address without a preceding # sign. */
22134 case 'c':
22135 switch (GET_CODE (x))
22136 {
22137 case CONST_INT:
22138 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22139 break;
22140
22141 case SYMBOL_REF:
22142 output_addr_const (stream, x);
22143 break;
22144
22145 case CONST:
22146 if (GET_CODE (XEXP (x, 0)) == PLUS
22147 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22148 {
22149 output_addr_const (stream, x);
22150 break;
22151 }
22152 /* Fall through. */
22153
22154 default:
22155 output_operand_lossage ("Unsupported operand for code '%c'", code);
22156 }
22157 return;
22158
22159 /* An integer that we want to print in HEX. */
22160 case 'x':
22161 switch (GET_CODE (x))
22162 {
22163 case CONST_INT:
22164 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22165 break;
22166
22167 default:
22168 output_operand_lossage ("Unsupported operand for code '%c'", code);
22169 }
22170 return;
22171
22172 case 'B':
22173 if (CONST_INT_P (x))
22174 {
22175 HOST_WIDE_INT val;
22176 val = ARM_SIGN_EXTEND (~INTVAL (x));
22177 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22178 }
22179 else
22180 {
22181 putc ('~', stream);
22182 output_addr_const (stream, x);
22183 }
22184 return;
22185
22186 case 'b':
22187 /* Print the log2 of a CONST_INT. */
22188 {
22189 HOST_WIDE_INT val;
22190
22191 if (!CONST_INT_P (x)
22192 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22193 output_operand_lossage ("Unsupported operand for code '%c'", code);
22194 else
22195 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22196 }
22197 return;
22198
22199 case 'L':
22200 /* The low 16 bits of an immediate constant. */
22201 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22202 return;
22203
22204 case 'i':
22205 fprintf (stream, "%s", arithmetic_instr (x, 1));
22206 return;
22207
22208 case 'I':
22209 fprintf (stream, "%s", arithmetic_instr (x, 0));
22210 return;
22211
22212 case 'S':
22213 {
22214 HOST_WIDE_INT val;
22215 const char *shift;
22216
22217 shift = shift_op (x, &val);
22218
22219 if (shift)
22220 {
22221 fprintf (stream, ", %s ", shift);
22222 if (val == -1)
22223 arm_print_operand (stream, XEXP (x, 1), 0);
22224 else
22225 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22226 }
22227 }
22228 return;
22229
22230 /* An explanation of the 'Q', 'R' and 'H' register operands:
22231
22232 In a pair of registers containing a DI or DF value the 'Q'
22233 operand returns the register number of the register containing
22234 the least significant part of the value. The 'R' operand returns
22235 the register number of the register containing the most
22236 significant part of the value.
22237
22238 The 'H' operand returns the higher of the two register numbers.
22239 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22240 same as the 'Q' operand, since the most significant part of the
22241 value is held in the lower number register. The reverse is true
22242 on systems where WORDS_BIG_ENDIAN is false.
22243
22244 The purpose of these operands is to distinguish between cases
22245 where the endian-ness of the values is important (for example
22246 when they are added together), and cases where the endian-ness
22247 is irrelevant, but the order of register operations is important.
22248 For example when loading a value from memory into a register
22249 pair, the endian-ness does not matter. Provided that the value
22250 from the lower memory address is put into the lower numbered
22251 register, and the value from the higher address is put into the
22252 higher numbered register, the load will work regardless of whether
22253 the value being loaded is big-wordian or little-wordian. The
22254 order of the two register loads can matter however, if the address
22255 of the memory location is actually held in one of the registers
22256 being overwritten by the load.
22257
22258 The 'Q' and 'R' constraints are also available for 64-bit
22259 constants. */
22260 case 'Q':
22261 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22262 {
22263 rtx part = gen_lowpart (SImode, x);
22264 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22265 return;
22266 }
22267
22268 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22269 {
22270 output_operand_lossage ("invalid operand for code '%c'", code);
22271 return;
22272 }
22273
22274 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22275 return;
22276
22277 case 'R':
22278 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22279 {
22280 machine_mode mode = GET_MODE (x);
22281 rtx part;
22282
22283 if (mode == VOIDmode)
22284 mode = DImode;
22285 part = gen_highpart_mode (SImode, mode, x);
22286 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22287 return;
22288 }
22289
22290 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22291 {
22292 output_operand_lossage ("invalid operand for code '%c'", code);
22293 return;
22294 }
22295
22296 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22297 return;
22298
22299 case 'H':
22300 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22301 {
22302 output_operand_lossage ("invalid operand for code '%c'", code);
22303 return;
22304 }
22305
22306 asm_fprintf (stream, "%r", REGNO (x) + 1);
22307 return;
22308
22309 case 'J':
22310 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22311 {
22312 output_operand_lossage ("invalid operand for code '%c'", code);
22313 return;
22314 }
22315
22316 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22317 return;
22318
22319 case 'K':
22320 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22321 {
22322 output_operand_lossage ("invalid operand for code '%c'", code);
22323 return;
22324 }
22325
22326 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22327 return;
22328
22329 case 'm':
22330 asm_fprintf (stream, "%r",
22331 REG_P (XEXP (x, 0))
22332 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22333 return;
22334
22335 case 'M':
22336 asm_fprintf (stream, "{%r-%r}",
22337 REGNO (x),
22338 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22339 return;
22340
22341 /* Like 'M', but writing doubleword vector registers, for use by Neon
22342 insns. */
22343 case 'h':
22344 {
22345 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22346 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22347 if (numregs == 1)
22348 asm_fprintf (stream, "{d%d}", regno);
22349 else
22350 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22351 }
22352 return;
22353
22354 case 'd':
22355 /* CONST_TRUE_RTX means always -- that's the default. */
22356 if (x == const_true_rtx)
22357 return;
22358
22359 if (!COMPARISON_P (x))
22360 {
22361 output_operand_lossage ("invalid operand for code '%c'", code);
22362 return;
22363 }
22364
22365 fputs (arm_condition_codes[get_arm_condition_code (x)],
22366 stream);
22367 return;
22368
22369 case 'D':
22370 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22371 want to do that. */
22372 if (x == const_true_rtx)
22373 {
22374 output_operand_lossage ("instruction never executed");
22375 return;
22376 }
22377 if (!COMPARISON_P (x))
22378 {
22379 output_operand_lossage ("invalid operand for code '%c'", code);
22380 return;
22381 }
22382
22383 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22384 (get_arm_condition_code (x))],
22385 stream);
22386 return;
22387
22388 case 's':
22389 case 'V':
22390 case 'W':
22391 case 'X':
22392 case 'Y':
22393 case 'Z':
22394 /* Former Maverick support, removed after GCC-4.7. */
22395 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22396 return;
22397
22398 case 'U':
22399 if (!REG_P (x)
22400 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22401 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22402 /* Bad value for wCG register number. */
22403 {
22404 output_operand_lossage ("invalid operand for code '%c'", code);
22405 return;
22406 }
22407
22408 else
22409 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22410 return;
22411
22412 /* Print an iWMMXt control register name. */
22413 case 'w':
22414 if (!CONST_INT_P (x)
22415 || INTVAL (x) < 0
22416 || INTVAL (x) >= 16)
22417 /* Bad value for wC register number. */
22418 {
22419 output_operand_lossage ("invalid operand for code '%c'", code);
22420 return;
22421 }
22422
22423 else
22424 {
22425 static const char * wc_reg_names [16] =
22426 {
22427 "wCID", "wCon", "wCSSF", "wCASF",
22428 "wC4", "wC5", "wC6", "wC7",
22429 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22430 "wC12", "wC13", "wC14", "wC15"
22431 };
22432
22433 fputs (wc_reg_names [INTVAL (x)], stream);
22434 }
22435 return;
22436
22437 /* Print the high single-precision register of a VFP double-precision
22438 register. */
22439 case 'p':
22440 {
22441 machine_mode mode = GET_MODE (x);
22442 int regno;
22443
22444 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22445 {
22446 output_operand_lossage ("invalid operand for code '%c'", code);
22447 return;
22448 }
22449
22450 regno = REGNO (x);
22451 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22452 {
22453 output_operand_lossage ("invalid operand for code '%c'", code);
22454 return;
22455 }
22456
22457 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22458 }
22459 return;
22460
22461 /* Print a VFP/Neon double precision or quad precision register name. */
22462 case 'P':
22463 case 'q':
22464 {
22465 machine_mode mode = GET_MODE (x);
22466 int is_quad = (code == 'q');
22467 int regno;
22468
22469 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22470 {
22471 output_operand_lossage ("invalid operand for code '%c'", code);
22472 return;
22473 }
22474
22475 if (!REG_P (x)
22476 || !IS_VFP_REGNUM (REGNO (x)))
22477 {
22478 output_operand_lossage ("invalid operand for code '%c'", code);
22479 return;
22480 }
22481
22482 regno = REGNO (x);
22483 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22484 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22485 {
22486 output_operand_lossage ("invalid operand for code '%c'", code);
22487 return;
22488 }
22489
22490 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22491 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22492 }
22493 return;
22494
22495 /* These two codes print the low/high doubleword register of a Neon quad
22496 register, respectively. For pair-structure types, can also print
22497 low/high quadword registers. */
22498 case 'e':
22499 case 'f':
22500 {
22501 machine_mode mode = GET_MODE (x);
22502 int regno;
22503
22504 if ((GET_MODE_SIZE (mode) != 16
22505 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22506 {
22507 output_operand_lossage ("invalid operand for code '%c'", code);
22508 return;
22509 }
22510
22511 regno = REGNO (x);
22512 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22513 {
22514 output_operand_lossage ("invalid operand for code '%c'", code);
22515 return;
22516 }
22517
22518 if (GET_MODE_SIZE (mode) == 16)
22519 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22520 + (code == 'f' ? 1 : 0));
22521 else
22522 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22523 + (code == 'f' ? 1 : 0));
22524 }
22525 return;
22526
22527 /* Print a VFPv3 floating-point constant, represented as an integer
22528 index. */
22529 case 'G':
22530 {
22531 int index = vfp3_const_double_index (x);
22532 gcc_assert (index != -1);
22533 fprintf (stream, "%d", index);
22534 }
22535 return;
22536
22537 /* Print bits representing opcode features for Neon.
22538
22539 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22540 and polynomials as unsigned.
22541
22542 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22543
22544 Bit 2 is 1 for rounding functions, 0 otherwise. */
22545
22546 /* Identify the type as 's', 'u', 'p' or 'f'. */
22547 case 'T':
22548 {
22549 HOST_WIDE_INT bits = INTVAL (x);
22550 fputc ("uspf"[bits & 3], stream);
22551 }
22552 return;
22553
22554 /* Likewise, but signed and unsigned integers are both 'i'. */
22555 case 'F':
22556 {
22557 HOST_WIDE_INT bits = INTVAL (x);
22558 fputc ("iipf"[bits & 3], stream);
22559 }
22560 return;
22561
22562 /* As for 'T', but emit 'u' instead of 'p'. */
22563 case 't':
22564 {
22565 HOST_WIDE_INT bits = INTVAL (x);
22566 fputc ("usuf"[bits & 3], stream);
22567 }
22568 return;
22569
22570 /* Bit 2: rounding (vs none). */
22571 case 'O':
22572 {
22573 HOST_WIDE_INT bits = INTVAL (x);
22574 fputs ((bits & 4) != 0 ? "r" : "", stream);
22575 }
22576 return;
22577
22578 /* Memory operand for vld1/vst1 instruction. */
22579 case 'A':
22580 {
22581 rtx addr;
22582 bool postinc = FALSE;
22583 rtx postinc_reg = NULL;
22584 unsigned align, memsize, align_bits;
22585
22586 gcc_assert (MEM_P (x));
22587 addr = XEXP (x, 0);
22588 if (GET_CODE (addr) == POST_INC)
22589 {
22590 postinc = 1;
22591 addr = XEXP (addr, 0);
22592 }
22593 if (GET_CODE (addr) == POST_MODIFY)
22594 {
22595 postinc_reg = XEXP( XEXP (addr, 1), 1);
22596 addr = XEXP (addr, 0);
22597 }
22598 asm_fprintf (stream, "[%r", REGNO (addr));
22599
22600 /* We know the alignment of this access, so we can emit a hint in the
22601 instruction (for some alignments) as an aid to the memory subsystem
22602 of the target. */
22603 align = MEM_ALIGN (x) >> 3;
22604 memsize = MEM_SIZE (x);
22605
22606 /* Only certain alignment specifiers are supported by the hardware. */
22607 if (memsize == 32 && (align % 32) == 0)
22608 align_bits = 256;
22609 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22610 align_bits = 128;
22611 else if (memsize >= 8 && (align % 8) == 0)
22612 align_bits = 64;
22613 else
22614 align_bits = 0;
22615
22616 if (align_bits != 0)
22617 asm_fprintf (stream, ":%d", align_bits);
22618
22619 asm_fprintf (stream, "]");
22620
22621 if (postinc)
22622 fputs("!", stream);
22623 if (postinc_reg)
22624 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22625 }
22626 return;
22627
22628 case 'C':
22629 {
22630 rtx addr;
22631
22632 gcc_assert (MEM_P (x));
22633 addr = XEXP (x, 0);
22634 gcc_assert (REG_P (addr));
22635 asm_fprintf (stream, "[%r]", REGNO (addr));
22636 }
22637 return;
22638
22639 /* Translate an S register number into a D register number and element index. */
22640 case 'y':
22641 {
22642 machine_mode mode = GET_MODE (x);
22643 int regno;
22644
22645 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22646 {
22647 output_operand_lossage ("invalid operand for code '%c'", code);
22648 return;
22649 }
22650
22651 regno = REGNO (x);
22652 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22653 {
22654 output_operand_lossage ("invalid operand for code '%c'", code);
22655 return;
22656 }
22657
22658 regno = regno - FIRST_VFP_REGNUM;
22659 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22660 }
22661 return;
22662
22663 case 'v':
22664 gcc_assert (CONST_DOUBLE_P (x));
22665 int result;
22666 result = vfp3_const_double_for_fract_bits (x);
22667 if (result == 0)
22668 result = vfp3_const_double_for_bits (x);
22669 fprintf (stream, "#%d", result);
22670 return;
22671
22672 /* Register specifier for vld1.16/vst1.16. Translate the S register
22673 number into a D register number and element index. */
22674 case 'z':
22675 {
22676 machine_mode mode = GET_MODE (x);
22677 int regno;
22678
22679 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22680 {
22681 output_operand_lossage ("invalid operand for code '%c'", code);
22682 return;
22683 }
22684
22685 regno = REGNO (x);
22686 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22687 {
22688 output_operand_lossage ("invalid operand for code '%c'", code);
22689 return;
22690 }
22691
22692 regno = regno - FIRST_VFP_REGNUM;
22693 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22694 }
22695 return;
22696
22697 default:
22698 if (x == 0)
22699 {
22700 output_operand_lossage ("missing operand");
22701 return;
22702 }
22703
22704 switch (GET_CODE (x))
22705 {
22706 case REG:
22707 asm_fprintf (stream, "%r", REGNO (x));
22708 break;
22709
22710 case MEM:
22711 output_address (GET_MODE (x), XEXP (x, 0));
22712 break;
22713
22714 case CONST_DOUBLE:
22715 {
22716 char fpstr[20];
22717 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22718 sizeof (fpstr), 0, 1);
22719 fprintf (stream, "#%s", fpstr);
22720 }
22721 break;
22722
22723 default:
22724 gcc_assert (GET_CODE (x) != NEG);
22725 fputc ('#', stream);
22726 if (GET_CODE (x) == HIGH)
22727 {
22728 fputs (":lower16:", stream);
22729 x = XEXP (x, 0);
22730 }
22731
22732 output_addr_const (stream, x);
22733 break;
22734 }
22735 }
22736 }
22737 \f
22738 /* Target hook for printing a memory address. */
22739 static void
22740 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22741 {
22742 if (TARGET_32BIT)
22743 {
22744 int is_minus = GET_CODE (x) == MINUS;
22745
22746 if (REG_P (x))
22747 asm_fprintf (stream, "[%r]", REGNO (x));
22748 else if (GET_CODE (x) == PLUS || is_minus)
22749 {
22750 rtx base = XEXP (x, 0);
22751 rtx index = XEXP (x, 1);
22752 HOST_WIDE_INT offset = 0;
22753 if (!REG_P (base)
22754 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22755 {
22756 /* Ensure that BASE is a register. */
22757 /* (one of them must be). */
22758 /* Also ensure the SP is not used as in index register. */
22759 std::swap (base, index);
22760 }
22761 switch (GET_CODE (index))
22762 {
22763 case CONST_INT:
22764 offset = INTVAL (index);
22765 if (is_minus)
22766 offset = -offset;
22767 asm_fprintf (stream, "[%r, #%wd]",
22768 REGNO (base), offset);
22769 break;
22770
22771 case REG:
22772 asm_fprintf (stream, "[%r, %s%r]",
22773 REGNO (base), is_minus ? "-" : "",
22774 REGNO (index));
22775 break;
22776
22777 case MULT:
22778 case ASHIFTRT:
22779 case LSHIFTRT:
22780 case ASHIFT:
22781 case ROTATERT:
22782 {
22783 asm_fprintf (stream, "[%r, %s%r",
22784 REGNO (base), is_minus ? "-" : "",
22785 REGNO (XEXP (index, 0)));
22786 arm_print_operand (stream, index, 'S');
22787 fputs ("]", stream);
22788 break;
22789 }
22790
22791 default:
22792 gcc_unreachable ();
22793 }
22794 }
22795 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22796 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22797 {
22798 gcc_assert (REG_P (XEXP (x, 0)));
22799
22800 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22801 asm_fprintf (stream, "[%r, #%s%d]!",
22802 REGNO (XEXP (x, 0)),
22803 GET_CODE (x) == PRE_DEC ? "-" : "",
22804 GET_MODE_SIZE (mode));
22805 else
22806 asm_fprintf (stream, "[%r], #%s%d",
22807 REGNO (XEXP (x, 0)),
22808 GET_CODE (x) == POST_DEC ? "-" : "",
22809 GET_MODE_SIZE (mode));
22810 }
22811 else if (GET_CODE (x) == PRE_MODIFY)
22812 {
22813 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22814 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22815 asm_fprintf (stream, "#%wd]!",
22816 INTVAL (XEXP (XEXP (x, 1), 1)));
22817 else
22818 asm_fprintf (stream, "%r]!",
22819 REGNO (XEXP (XEXP (x, 1), 1)));
22820 }
22821 else if (GET_CODE (x) == POST_MODIFY)
22822 {
22823 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22824 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22825 asm_fprintf (stream, "#%wd",
22826 INTVAL (XEXP (XEXP (x, 1), 1)));
22827 else
22828 asm_fprintf (stream, "%r",
22829 REGNO (XEXP (XEXP (x, 1), 1)));
22830 }
22831 else output_addr_const (stream, x);
22832 }
22833 else
22834 {
22835 if (REG_P (x))
22836 asm_fprintf (stream, "[%r]", REGNO (x));
22837 else if (GET_CODE (x) == POST_INC)
22838 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22839 else if (GET_CODE (x) == PLUS)
22840 {
22841 gcc_assert (REG_P (XEXP (x, 0)));
22842 if (CONST_INT_P (XEXP (x, 1)))
22843 asm_fprintf (stream, "[%r, #%wd]",
22844 REGNO (XEXP (x, 0)),
22845 INTVAL (XEXP (x, 1)));
22846 else
22847 asm_fprintf (stream, "[%r, %r]",
22848 REGNO (XEXP (x, 0)),
22849 REGNO (XEXP (x, 1)));
22850 }
22851 else
22852 output_addr_const (stream, x);
22853 }
22854 }
22855 \f
22856 /* Target hook for indicating whether a punctuation character for
22857 TARGET_PRINT_OPERAND is valid. */
22858 static bool
22859 arm_print_operand_punct_valid_p (unsigned char code)
22860 {
22861 return (code == '@' || code == '|' || code == '.'
22862 || code == '(' || code == ')' || code == '#'
22863 || (TARGET_32BIT && (code == '?'))
22864 || (TARGET_THUMB2 && (code == '!'))
22865 || (TARGET_THUMB && (code == '_')));
22866 }
22867 \f
22868 /* Target hook for assembling integer objects. The ARM version needs to
22869 handle word-sized values specially. */
22870 static bool
22871 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22872 {
22873 machine_mode mode;
22874
22875 if (size == UNITS_PER_WORD && aligned_p)
22876 {
22877 fputs ("\t.word\t", asm_out_file);
22878 output_addr_const (asm_out_file, x);
22879
22880 /* Mark symbols as position independent. We only do this in the
22881 .text segment, not in the .data segment. */
22882 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22883 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22884 {
22885 /* See legitimize_pic_address for an explanation of the
22886 TARGET_VXWORKS_RTP check. */
22887 /* References to weak symbols cannot be resolved locally:
22888 they may be overridden by a non-weak definition at link
22889 time. */
22890 if (!arm_pic_data_is_text_relative
22891 || (GET_CODE (x) == SYMBOL_REF
22892 && (!SYMBOL_REF_LOCAL_P (x)
22893 || (SYMBOL_REF_DECL (x)
22894 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22895 fputs ("(GOT)", asm_out_file);
22896 else
22897 fputs ("(GOTOFF)", asm_out_file);
22898 }
22899 fputc ('\n', asm_out_file);
22900 return true;
22901 }
22902
22903 mode = GET_MODE (x);
22904
22905 if (arm_vector_mode_supported_p (mode))
22906 {
22907 int i, units;
22908
22909 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22910
22911 units = CONST_VECTOR_NUNITS (x);
22912 size = GET_MODE_UNIT_SIZE (mode);
22913
22914 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22915 for (i = 0; i < units; i++)
22916 {
22917 rtx elt = CONST_VECTOR_ELT (x, i);
22918 assemble_integer
22919 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22920 }
22921 else
22922 for (i = 0; i < units; i++)
22923 {
22924 rtx elt = CONST_VECTOR_ELT (x, i);
22925 assemble_real
22926 (*CONST_DOUBLE_REAL_VALUE (elt),
22927 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22928 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22929 }
22930
22931 return true;
22932 }
22933
22934 return default_assemble_integer (x, size, aligned_p);
22935 }
22936
22937 static void
22938 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22939 {
22940 section *s;
22941
22942 if (!TARGET_AAPCS_BASED)
22943 {
22944 (is_ctor ?
22945 default_named_section_asm_out_constructor
22946 : default_named_section_asm_out_destructor) (symbol, priority);
22947 return;
22948 }
22949
22950 /* Put these in the .init_array section, using a special relocation. */
22951 if (priority != DEFAULT_INIT_PRIORITY)
22952 {
22953 char buf[18];
22954 sprintf (buf, "%s.%.5u",
22955 is_ctor ? ".init_array" : ".fini_array",
22956 priority);
22957 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22958 }
22959 else if (is_ctor)
22960 s = ctors_section;
22961 else
22962 s = dtors_section;
22963
22964 switch_to_section (s);
22965 assemble_align (POINTER_SIZE);
22966 fputs ("\t.word\t", asm_out_file);
22967 output_addr_const (asm_out_file, symbol);
22968 fputs ("(target1)\n", asm_out_file);
22969 }
22970
22971 /* Add a function to the list of static constructors. */
22972
22973 static void
22974 arm_elf_asm_constructor (rtx symbol, int priority)
22975 {
22976 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22977 }
22978
22979 /* Add a function to the list of static destructors. */
22980
22981 static void
22982 arm_elf_asm_destructor (rtx symbol, int priority)
22983 {
22984 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22985 }
22986 \f
22987 /* A finite state machine takes care of noticing whether or not instructions
22988 can be conditionally executed, and thus decrease execution time and code
22989 size by deleting branch instructions. The fsm is controlled by
22990 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22991
22992 /* The state of the fsm controlling condition codes are:
22993 0: normal, do nothing special
22994 1: make ASM_OUTPUT_OPCODE not output this instruction
22995 2: make ASM_OUTPUT_OPCODE not output this instruction
22996 3: make instructions conditional
22997 4: make instructions conditional
22998
22999 State transitions (state->state by whom under condition):
23000 0 -> 1 final_prescan_insn if the `target' is a label
23001 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23002 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23003 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23004 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23005 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23006 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23007 (the target insn is arm_target_insn).
23008
23009 If the jump clobbers the conditions then we use states 2 and 4.
23010
23011 A similar thing can be done with conditional return insns.
23012
23013 XXX In case the `target' is an unconditional branch, this conditionalising
23014 of the instructions always reduces code size, but not always execution
23015 time. But then, I want to reduce the code size to somewhere near what
23016 /bin/cc produces. */
23017
23018 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23019 instructions. When a COND_EXEC instruction is seen the subsequent
23020 instructions are scanned so that multiple conditional instructions can be
23021 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23022 specify the length and true/false mask for the IT block. These will be
23023 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23024
23025 /* Returns the index of the ARM condition code string in
23026 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23027 COMPARISON should be an rtx like `(eq (...) (...))'. */
23028
23029 enum arm_cond_code
23030 maybe_get_arm_condition_code (rtx comparison)
23031 {
23032 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23033 enum arm_cond_code code;
23034 enum rtx_code comp_code = GET_CODE (comparison);
23035
23036 if (GET_MODE_CLASS (mode) != MODE_CC)
23037 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23038 XEXP (comparison, 1));
23039
23040 switch (mode)
23041 {
23042 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23043 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23044 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23045 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23046 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23047 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23048 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23049 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23050 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23051 case E_CC_DLTUmode: code = ARM_CC;
23052
23053 dominance:
23054 if (comp_code == EQ)
23055 return ARM_INVERSE_CONDITION_CODE (code);
23056 if (comp_code == NE)
23057 return code;
23058 return ARM_NV;
23059
23060 case E_CC_NOOVmode:
23061 switch (comp_code)
23062 {
23063 case NE: return ARM_NE;
23064 case EQ: return ARM_EQ;
23065 case GE: return ARM_PL;
23066 case LT: return ARM_MI;
23067 default: return ARM_NV;
23068 }
23069
23070 case E_CC_Zmode:
23071 switch (comp_code)
23072 {
23073 case NE: return ARM_NE;
23074 case EQ: return ARM_EQ;
23075 default: return ARM_NV;
23076 }
23077
23078 case E_CC_Nmode:
23079 switch (comp_code)
23080 {
23081 case NE: return ARM_MI;
23082 case EQ: return ARM_PL;
23083 default: return ARM_NV;
23084 }
23085
23086 case E_CCFPEmode:
23087 case E_CCFPmode:
23088 /* We can handle all cases except UNEQ and LTGT. */
23089 switch (comp_code)
23090 {
23091 case GE: return ARM_GE;
23092 case GT: return ARM_GT;
23093 case LE: return ARM_LS;
23094 case LT: return ARM_MI;
23095 case NE: return ARM_NE;
23096 case EQ: return ARM_EQ;
23097 case ORDERED: return ARM_VC;
23098 case UNORDERED: return ARM_VS;
23099 case UNLT: return ARM_LT;
23100 case UNLE: return ARM_LE;
23101 case UNGT: return ARM_HI;
23102 case UNGE: return ARM_PL;
23103 /* UNEQ and LTGT do not have a representation. */
23104 case UNEQ: /* Fall through. */
23105 case LTGT: /* Fall through. */
23106 default: return ARM_NV;
23107 }
23108
23109 case E_CC_SWPmode:
23110 switch (comp_code)
23111 {
23112 case NE: return ARM_NE;
23113 case EQ: return ARM_EQ;
23114 case GE: return ARM_LE;
23115 case GT: return ARM_LT;
23116 case LE: return ARM_GE;
23117 case LT: return ARM_GT;
23118 case GEU: return ARM_LS;
23119 case GTU: return ARM_CC;
23120 case LEU: return ARM_CS;
23121 case LTU: return ARM_HI;
23122 default: return ARM_NV;
23123 }
23124
23125 case E_CC_Cmode:
23126 switch (comp_code)
23127 {
23128 case LTU: return ARM_CS;
23129 case GEU: return ARM_CC;
23130 case NE: return ARM_CS;
23131 case EQ: return ARM_CC;
23132 default: return ARM_NV;
23133 }
23134
23135 case E_CC_CZmode:
23136 switch (comp_code)
23137 {
23138 case NE: return ARM_NE;
23139 case EQ: return ARM_EQ;
23140 case GEU: return ARM_CS;
23141 case GTU: return ARM_HI;
23142 case LEU: return ARM_LS;
23143 case LTU: return ARM_CC;
23144 default: return ARM_NV;
23145 }
23146
23147 case E_CC_NCVmode:
23148 switch (comp_code)
23149 {
23150 case GE: return ARM_GE;
23151 case LT: return ARM_LT;
23152 case GEU: return ARM_CS;
23153 case LTU: return ARM_CC;
23154 default: return ARM_NV;
23155 }
23156
23157 case E_CC_Vmode:
23158 switch (comp_code)
23159 {
23160 case NE: return ARM_VS;
23161 case EQ: return ARM_VC;
23162 default: return ARM_NV;
23163 }
23164
23165 case E_CCmode:
23166 switch (comp_code)
23167 {
23168 case NE: return ARM_NE;
23169 case EQ: return ARM_EQ;
23170 case GE: return ARM_GE;
23171 case GT: return ARM_GT;
23172 case LE: return ARM_LE;
23173 case LT: return ARM_LT;
23174 case GEU: return ARM_CS;
23175 case GTU: return ARM_HI;
23176 case LEU: return ARM_LS;
23177 case LTU: return ARM_CC;
23178 default: return ARM_NV;
23179 }
23180
23181 default: gcc_unreachable ();
23182 }
23183 }
23184
23185 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23186 static enum arm_cond_code
23187 get_arm_condition_code (rtx comparison)
23188 {
23189 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23190 gcc_assert (code != ARM_NV);
23191 return code;
23192 }
23193
23194 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23195 code registers when not targetting Thumb1. The VFP condition register
23196 only exists when generating hard-float code. */
23197 static bool
23198 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23199 {
23200 if (!TARGET_32BIT)
23201 return false;
23202
23203 *p1 = CC_REGNUM;
23204 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23205 return true;
23206 }
23207
23208 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23209 instructions. */
23210 void
23211 thumb2_final_prescan_insn (rtx_insn *insn)
23212 {
23213 rtx_insn *first_insn = insn;
23214 rtx body = PATTERN (insn);
23215 rtx predicate;
23216 enum arm_cond_code code;
23217 int n;
23218 int mask;
23219 int max;
23220
23221 /* max_insns_skipped in the tune was already taken into account in the
23222 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23223 just emit the IT blocks as we can. It does not make sense to split
23224 the IT blocks. */
23225 max = MAX_INSN_PER_IT_BLOCK;
23226
23227 /* Remove the previous insn from the count of insns to be output. */
23228 if (arm_condexec_count)
23229 arm_condexec_count--;
23230
23231 /* Nothing to do if we are already inside a conditional block. */
23232 if (arm_condexec_count)
23233 return;
23234
23235 if (GET_CODE (body) != COND_EXEC)
23236 return;
23237
23238 /* Conditional jumps are implemented directly. */
23239 if (JUMP_P (insn))
23240 return;
23241
23242 predicate = COND_EXEC_TEST (body);
23243 arm_current_cc = get_arm_condition_code (predicate);
23244
23245 n = get_attr_ce_count (insn);
23246 arm_condexec_count = 1;
23247 arm_condexec_mask = (1 << n) - 1;
23248 arm_condexec_masklen = n;
23249 /* See if subsequent instructions can be combined into the same block. */
23250 for (;;)
23251 {
23252 insn = next_nonnote_insn (insn);
23253
23254 /* Jumping into the middle of an IT block is illegal, so a label or
23255 barrier terminates the block. */
23256 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23257 break;
23258
23259 body = PATTERN (insn);
23260 /* USE and CLOBBER aren't really insns, so just skip them. */
23261 if (GET_CODE (body) == USE
23262 || GET_CODE (body) == CLOBBER)
23263 continue;
23264
23265 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23266 if (GET_CODE (body) != COND_EXEC)
23267 break;
23268 /* Maximum number of conditionally executed instructions in a block. */
23269 n = get_attr_ce_count (insn);
23270 if (arm_condexec_masklen + n > max)
23271 break;
23272
23273 predicate = COND_EXEC_TEST (body);
23274 code = get_arm_condition_code (predicate);
23275 mask = (1 << n) - 1;
23276 if (arm_current_cc == code)
23277 arm_condexec_mask |= (mask << arm_condexec_masklen);
23278 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23279 break;
23280
23281 arm_condexec_count++;
23282 arm_condexec_masklen += n;
23283
23284 /* A jump must be the last instruction in a conditional block. */
23285 if (JUMP_P (insn))
23286 break;
23287 }
23288 /* Restore recog_data (getting the attributes of other insns can
23289 destroy this array, but final.c assumes that it remains intact
23290 across this call). */
23291 extract_constrain_insn_cached (first_insn);
23292 }
23293
23294 void
23295 arm_final_prescan_insn (rtx_insn *insn)
23296 {
23297 /* BODY will hold the body of INSN. */
23298 rtx body = PATTERN (insn);
23299
23300 /* This will be 1 if trying to repeat the trick, and things need to be
23301 reversed if it appears to fail. */
23302 int reverse = 0;
23303
23304 /* If we start with a return insn, we only succeed if we find another one. */
23305 int seeking_return = 0;
23306 enum rtx_code return_code = UNKNOWN;
23307
23308 /* START_INSN will hold the insn from where we start looking. This is the
23309 first insn after the following code_label if REVERSE is true. */
23310 rtx_insn *start_insn = insn;
23311
23312 /* If in state 4, check if the target branch is reached, in order to
23313 change back to state 0. */
23314 if (arm_ccfsm_state == 4)
23315 {
23316 if (insn == arm_target_insn)
23317 {
23318 arm_target_insn = NULL;
23319 arm_ccfsm_state = 0;
23320 }
23321 return;
23322 }
23323
23324 /* If in state 3, it is possible to repeat the trick, if this insn is an
23325 unconditional branch to a label, and immediately following this branch
23326 is the previous target label which is only used once, and the label this
23327 branch jumps to is not too far off. */
23328 if (arm_ccfsm_state == 3)
23329 {
23330 if (simplejump_p (insn))
23331 {
23332 start_insn = next_nonnote_insn (start_insn);
23333 if (BARRIER_P (start_insn))
23334 {
23335 /* XXX Isn't this always a barrier? */
23336 start_insn = next_nonnote_insn (start_insn);
23337 }
23338 if (LABEL_P (start_insn)
23339 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23340 && LABEL_NUSES (start_insn) == 1)
23341 reverse = TRUE;
23342 else
23343 return;
23344 }
23345 else if (ANY_RETURN_P (body))
23346 {
23347 start_insn = next_nonnote_insn (start_insn);
23348 if (BARRIER_P (start_insn))
23349 start_insn = next_nonnote_insn (start_insn);
23350 if (LABEL_P (start_insn)
23351 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23352 && LABEL_NUSES (start_insn) == 1)
23353 {
23354 reverse = TRUE;
23355 seeking_return = 1;
23356 return_code = GET_CODE (body);
23357 }
23358 else
23359 return;
23360 }
23361 else
23362 return;
23363 }
23364
23365 gcc_assert (!arm_ccfsm_state || reverse);
23366 if (!JUMP_P (insn))
23367 return;
23368
23369 /* This jump might be paralleled with a clobber of the condition codes
23370 the jump should always come first */
23371 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23372 body = XVECEXP (body, 0, 0);
23373
23374 if (reverse
23375 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23376 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23377 {
23378 int insns_skipped;
23379 int fail = FALSE, succeed = FALSE;
23380 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23381 int then_not_else = TRUE;
23382 rtx_insn *this_insn = start_insn;
23383 rtx label = 0;
23384
23385 /* Register the insn jumped to. */
23386 if (reverse)
23387 {
23388 if (!seeking_return)
23389 label = XEXP (SET_SRC (body), 0);
23390 }
23391 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23392 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23393 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23394 {
23395 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23396 then_not_else = FALSE;
23397 }
23398 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23399 {
23400 seeking_return = 1;
23401 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23402 }
23403 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23404 {
23405 seeking_return = 1;
23406 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23407 then_not_else = FALSE;
23408 }
23409 else
23410 gcc_unreachable ();
23411
23412 /* See how many insns this branch skips, and what kind of insns. If all
23413 insns are okay, and the label or unconditional branch to the same
23414 label is not too far away, succeed. */
23415 for (insns_skipped = 0;
23416 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23417 {
23418 rtx scanbody;
23419
23420 this_insn = next_nonnote_insn (this_insn);
23421 if (!this_insn)
23422 break;
23423
23424 switch (GET_CODE (this_insn))
23425 {
23426 case CODE_LABEL:
23427 /* Succeed if it is the target label, otherwise fail since
23428 control falls in from somewhere else. */
23429 if (this_insn == label)
23430 {
23431 arm_ccfsm_state = 1;
23432 succeed = TRUE;
23433 }
23434 else
23435 fail = TRUE;
23436 break;
23437
23438 case BARRIER:
23439 /* Succeed if the following insn is the target label.
23440 Otherwise fail.
23441 If return insns are used then the last insn in a function
23442 will be a barrier. */
23443 this_insn = next_nonnote_insn (this_insn);
23444 if (this_insn && this_insn == label)
23445 {
23446 arm_ccfsm_state = 1;
23447 succeed = TRUE;
23448 }
23449 else
23450 fail = TRUE;
23451 break;
23452
23453 case CALL_INSN:
23454 /* The AAPCS says that conditional calls should not be
23455 used since they make interworking inefficient (the
23456 linker can't transform BL<cond> into BLX). That's
23457 only a problem if the machine has BLX. */
23458 if (arm_arch5)
23459 {
23460 fail = TRUE;
23461 break;
23462 }
23463
23464 /* Succeed if the following insn is the target label, or
23465 if the following two insns are a barrier and the
23466 target label. */
23467 this_insn = next_nonnote_insn (this_insn);
23468 if (this_insn && BARRIER_P (this_insn))
23469 this_insn = next_nonnote_insn (this_insn);
23470
23471 if (this_insn && this_insn == label
23472 && insns_skipped < max_insns_skipped)
23473 {
23474 arm_ccfsm_state = 1;
23475 succeed = TRUE;
23476 }
23477 else
23478 fail = TRUE;
23479 break;
23480
23481 case JUMP_INSN:
23482 /* If this is an unconditional branch to the same label, succeed.
23483 If it is to another label, do nothing. If it is conditional,
23484 fail. */
23485 /* XXX Probably, the tests for SET and the PC are
23486 unnecessary. */
23487
23488 scanbody = PATTERN (this_insn);
23489 if (GET_CODE (scanbody) == SET
23490 && GET_CODE (SET_DEST (scanbody)) == PC)
23491 {
23492 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23493 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23494 {
23495 arm_ccfsm_state = 2;
23496 succeed = TRUE;
23497 }
23498 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23499 fail = TRUE;
23500 }
23501 /* Fail if a conditional return is undesirable (e.g. on a
23502 StrongARM), but still allow this if optimizing for size. */
23503 else if (GET_CODE (scanbody) == return_code
23504 && !use_return_insn (TRUE, NULL)
23505 && !optimize_size)
23506 fail = TRUE;
23507 else if (GET_CODE (scanbody) == return_code)
23508 {
23509 arm_ccfsm_state = 2;
23510 succeed = TRUE;
23511 }
23512 else if (GET_CODE (scanbody) == PARALLEL)
23513 {
23514 switch (get_attr_conds (this_insn))
23515 {
23516 case CONDS_NOCOND:
23517 break;
23518 default:
23519 fail = TRUE;
23520 break;
23521 }
23522 }
23523 else
23524 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23525
23526 break;
23527
23528 case INSN:
23529 /* Instructions using or affecting the condition codes make it
23530 fail. */
23531 scanbody = PATTERN (this_insn);
23532 if (!(GET_CODE (scanbody) == SET
23533 || GET_CODE (scanbody) == PARALLEL)
23534 || get_attr_conds (this_insn) != CONDS_NOCOND)
23535 fail = TRUE;
23536 break;
23537
23538 default:
23539 break;
23540 }
23541 }
23542 if (succeed)
23543 {
23544 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23545 arm_target_label = CODE_LABEL_NUMBER (label);
23546 else
23547 {
23548 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23549
23550 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23551 {
23552 this_insn = next_nonnote_insn (this_insn);
23553 gcc_assert (!this_insn
23554 || (!BARRIER_P (this_insn)
23555 && !LABEL_P (this_insn)));
23556 }
23557 if (!this_insn)
23558 {
23559 /* Oh, dear! we ran off the end.. give up. */
23560 extract_constrain_insn_cached (insn);
23561 arm_ccfsm_state = 0;
23562 arm_target_insn = NULL;
23563 return;
23564 }
23565 arm_target_insn = this_insn;
23566 }
23567
23568 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23569 what it was. */
23570 if (!reverse)
23571 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23572
23573 if (reverse || then_not_else)
23574 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23575 }
23576
23577 /* Restore recog_data (getting the attributes of other insns can
23578 destroy this array, but final.c assumes that it remains intact
23579 across this call. */
23580 extract_constrain_insn_cached (insn);
23581 }
23582 }
23583
23584 /* Output IT instructions. */
23585 void
23586 thumb2_asm_output_opcode (FILE * stream)
23587 {
23588 char buff[5];
23589 int n;
23590
23591 if (arm_condexec_mask)
23592 {
23593 for (n = 0; n < arm_condexec_masklen; n++)
23594 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23595 buff[n] = 0;
23596 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23597 arm_condition_codes[arm_current_cc]);
23598 arm_condexec_mask = 0;
23599 }
23600 }
23601
23602 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23603 UNITS_PER_WORD bytes wide. */
23604 static unsigned int
23605 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23606 {
23607 if (TARGET_32BIT
23608 && regno > PC_REGNUM
23609 && regno != FRAME_POINTER_REGNUM
23610 && regno != ARG_POINTER_REGNUM
23611 && !IS_VFP_REGNUM (regno))
23612 return 1;
23613
23614 return ARM_NUM_REGS (mode);
23615 }
23616
23617 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23618 static bool
23619 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23620 {
23621 if (GET_MODE_CLASS (mode) == MODE_CC)
23622 return (regno == CC_REGNUM
23623 || (TARGET_HARD_FLOAT
23624 && regno == VFPCC_REGNUM));
23625
23626 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23627 return false;
23628
23629 if (TARGET_THUMB1)
23630 /* For the Thumb we only allow values bigger than SImode in
23631 registers 0 - 6, so that there is always a second low
23632 register available to hold the upper part of the value.
23633 We probably we ought to ensure that the register is the
23634 start of an even numbered register pair. */
23635 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23636
23637 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23638 {
23639 if (mode == SFmode || mode == SImode)
23640 return VFP_REGNO_OK_FOR_SINGLE (regno);
23641
23642 if (mode == DFmode)
23643 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23644
23645 if (mode == HFmode)
23646 return VFP_REGNO_OK_FOR_SINGLE (regno);
23647
23648 /* VFP registers can hold HImode values. */
23649 if (mode == HImode)
23650 return VFP_REGNO_OK_FOR_SINGLE (regno);
23651
23652 if (TARGET_NEON)
23653 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23654 || (VALID_NEON_QREG_MODE (mode)
23655 && NEON_REGNO_OK_FOR_QUAD (regno))
23656 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23657 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23658 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23659 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23660 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23661
23662 return false;
23663 }
23664
23665 if (TARGET_REALLY_IWMMXT)
23666 {
23667 if (IS_IWMMXT_GR_REGNUM (regno))
23668 return mode == SImode;
23669
23670 if (IS_IWMMXT_REGNUM (regno))
23671 return VALID_IWMMXT_REG_MODE (mode);
23672 }
23673
23674 /* We allow almost any value to be stored in the general registers.
23675 Restrict doubleword quantities to even register pairs in ARM state
23676 so that we can use ldrd. Do not allow very large Neon structure
23677 opaque modes in general registers; they would use too many. */
23678 if (regno <= LAST_ARM_REGNUM)
23679 {
23680 if (ARM_NUM_REGS (mode) > 4)
23681 return false;
23682
23683 if (TARGET_THUMB2)
23684 return true;
23685
23686 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23687 }
23688
23689 if (regno == FRAME_POINTER_REGNUM
23690 || regno == ARG_POINTER_REGNUM)
23691 /* We only allow integers in the fake hard registers. */
23692 return GET_MODE_CLASS (mode) == MODE_INT;
23693
23694 return false;
23695 }
23696
23697 /* Implement TARGET_MODES_TIEABLE_P. */
23698
23699 static bool
23700 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23701 {
23702 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23703 return true;
23704
23705 /* We specifically want to allow elements of "structure" modes to
23706 be tieable to the structure. This more general condition allows
23707 other rarer situations too. */
23708 if (TARGET_NEON
23709 && (VALID_NEON_DREG_MODE (mode1)
23710 || VALID_NEON_QREG_MODE (mode1)
23711 || VALID_NEON_STRUCT_MODE (mode1))
23712 && (VALID_NEON_DREG_MODE (mode2)
23713 || VALID_NEON_QREG_MODE (mode2)
23714 || VALID_NEON_STRUCT_MODE (mode2)))
23715 return true;
23716
23717 return false;
23718 }
23719
23720 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23721 not used in arm mode. */
23722
23723 enum reg_class
23724 arm_regno_class (int regno)
23725 {
23726 if (regno == PC_REGNUM)
23727 return NO_REGS;
23728
23729 if (TARGET_THUMB1)
23730 {
23731 if (regno == STACK_POINTER_REGNUM)
23732 return STACK_REG;
23733 if (regno == CC_REGNUM)
23734 return CC_REG;
23735 if (regno < 8)
23736 return LO_REGS;
23737 return HI_REGS;
23738 }
23739
23740 if (TARGET_THUMB2 && regno < 8)
23741 return LO_REGS;
23742
23743 if ( regno <= LAST_ARM_REGNUM
23744 || regno == FRAME_POINTER_REGNUM
23745 || regno == ARG_POINTER_REGNUM)
23746 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23747
23748 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23749 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23750
23751 if (IS_VFP_REGNUM (regno))
23752 {
23753 if (regno <= D7_VFP_REGNUM)
23754 return VFP_D0_D7_REGS;
23755 else if (regno <= LAST_LO_VFP_REGNUM)
23756 return VFP_LO_REGS;
23757 else
23758 return VFP_HI_REGS;
23759 }
23760
23761 if (IS_IWMMXT_REGNUM (regno))
23762 return IWMMXT_REGS;
23763
23764 if (IS_IWMMXT_GR_REGNUM (regno))
23765 return IWMMXT_GR_REGS;
23766
23767 return NO_REGS;
23768 }
23769
23770 /* Handle a special case when computing the offset
23771 of an argument from the frame pointer. */
23772 int
23773 arm_debugger_arg_offset (int value, rtx addr)
23774 {
23775 rtx_insn *insn;
23776
23777 /* We are only interested if dbxout_parms() failed to compute the offset. */
23778 if (value != 0)
23779 return 0;
23780
23781 /* We can only cope with the case where the address is held in a register. */
23782 if (!REG_P (addr))
23783 return 0;
23784
23785 /* If we are using the frame pointer to point at the argument, then
23786 an offset of 0 is correct. */
23787 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23788 return 0;
23789
23790 /* If we are using the stack pointer to point at the
23791 argument, then an offset of 0 is correct. */
23792 /* ??? Check this is consistent with thumb2 frame layout. */
23793 if ((TARGET_THUMB || !frame_pointer_needed)
23794 && REGNO (addr) == SP_REGNUM)
23795 return 0;
23796
23797 /* Oh dear. The argument is pointed to by a register rather
23798 than being held in a register, or being stored at a known
23799 offset from the frame pointer. Since GDB only understands
23800 those two kinds of argument we must translate the address
23801 held in the register into an offset from the frame pointer.
23802 We do this by searching through the insns for the function
23803 looking to see where this register gets its value. If the
23804 register is initialized from the frame pointer plus an offset
23805 then we are in luck and we can continue, otherwise we give up.
23806
23807 This code is exercised by producing debugging information
23808 for a function with arguments like this:
23809
23810 double func (double a, double b, int c, double d) {return d;}
23811
23812 Without this code the stab for parameter 'd' will be set to
23813 an offset of 0 from the frame pointer, rather than 8. */
23814
23815 /* The if() statement says:
23816
23817 If the insn is a normal instruction
23818 and if the insn is setting the value in a register
23819 and if the register being set is the register holding the address of the argument
23820 and if the address is computing by an addition
23821 that involves adding to a register
23822 which is the frame pointer
23823 a constant integer
23824
23825 then... */
23826
23827 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23828 {
23829 if ( NONJUMP_INSN_P (insn)
23830 && GET_CODE (PATTERN (insn)) == SET
23831 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23832 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23833 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23834 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23835 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23836 )
23837 {
23838 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23839
23840 break;
23841 }
23842 }
23843
23844 if (value == 0)
23845 {
23846 debug_rtx (addr);
23847 warning (0, "unable to compute real location of stacked parameter");
23848 value = 8; /* XXX magic hack */
23849 }
23850
23851 return value;
23852 }
23853 \f
23854 /* Implement TARGET_PROMOTED_TYPE. */
23855
23856 static tree
23857 arm_promoted_type (const_tree t)
23858 {
23859 if (SCALAR_FLOAT_TYPE_P (t)
23860 && TYPE_PRECISION (t) == 16
23861 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23862 return float_type_node;
23863 return NULL_TREE;
23864 }
23865
23866 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23867 This simply adds HFmode as a supported mode; even though we don't
23868 implement arithmetic on this type directly, it's supported by
23869 optabs conversions, much the way the double-word arithmetic is
23870 special-cased in the default hook. */
23871
23872 static bool
23873 arm_scalar_mode_supported_p (scalar_mode mode)
23874 {
23875 if (mode == HFmode)
23876 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23877 else if (ALL_FIXED_POINT_MODE_P (mode))
23878 return true;
23879 else
23880 return default_scalar_mode_supported_p (mode);
23881 }
23882
23883 /* Set the value of FLT_EVAL_METHOD.
23884 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23885
23886 0: evaluate all operations and constants, whose semantic type has at
23887 most the range and precision of type float, to the range and
23888 precision of float; evaluate all other operations and constants to
23889 the range and precision of the semantic type;
23890
23891 N, where _FloatN is a supported interchange floating type
23892 evaluate all operations and constants, whose semantic type has at
23893 most the range and precision of _FloatN type, to the range and
23894 precision of the _FloatN type; evaluate all other operations and
23895 constants to the range and precision of the semantic type;
23896
23897 If we have the ARMv8.2-A extensions then we support _Float16 in native
23898 precision, so we should set this to 16. Otherwise, we support the type,
23899 but want to evaluate expressions in float precision, so set this to
23900 0. */
23901
23902 static enum flt_eval_method
23903 arm_excess_precision (enum excess_precision_type type)
23904 {
23905 switch (type)
23906 {
23907 case EXCESS_PRECISION_TYPE_FAST:
23908 case EXCESS_PRECISION_TYPE_STANDARD:
23909 /* We can calculate either in 16-bit range and precision or
23910 32-bit range and precision. Make that decision based on whether
23911 we have native support for the ARMv8.2-A 16-bit floating-point
23912 instructions or not. */
23913 return (TARGET_VFP_FP16INST
23914 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23915 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23916 case EXCESS_PRECISION_TYPE_IMPLICIT:
23917 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23918 default:
23919 gcc_unreachable ();
23920 }
23921 return FLT_EVAL_METHOD_UNPREDICTABLE;
23922 }
23923
23924
23925 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23926 _Float16 if we are using anything other than ieee format for 16-bit
23927 floating point. Otherwise, punt to the default implementation. */
23928 static opt_scalar_float_mode
23929 arm_floatn_mode (int n, bool extended)
23930 {
23931 if (!extended && n == 16)
23932 {
23933 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23934 return HFmode;
23935 return opt_scalar_float_mode ();
23936 }
23937
23938 return default_floatn_mode (n, extended);
23939 }
23940
23941
23942 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23943 not to early-clobber SRC registers in the process.
23944
23945 We assume that the operands described by SRC and DEST represent a
23946 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23947 number of components into which the copy has been decomposed. */
23948 void
23949 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23950 {
23951 unsigned int i;
23952
23953 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23954 || REGNO (operands[0]) < REGNO (operands[1]))
23955 {
23956 for (i = 0; i < count; i++)
23957 {
23958 operands[2 * i] = dest[i];
23959 operands[2 * i + 1] = src[i];
23960 }
23961 }
23962 else
23963 {
23964 for (i = 0; i < count; i++)
23965 {
23966 operands[2 * i] = dest[count - i - 1];
23967 operands[2 * i + 1] = src[count - i - 1];
23968 }
23969 }
23970 }
23971
23972 /* Split operands into moves from op[1] + op[2] into op[0]. */
23973
23974 void
23975 neon_split_vcombine (rtx operands[3])
23976 {
23977 unsigned int dest = REGNO (operands[0]);
23978 unsigned int src1 = REGNO (operands[1]);
23979 unsigned int src2 = REGNO (operands[2]);
23980 machine_mode halfmode = GET_MODE (operands[1]);
23981 unsigned int halfregs = REG_NREGS (operands[1]);
23982 rtx destlo, desthi;
23983
23984 if (src1 == dest && src2 == dest + halfregs)
23985 {
23986 /* No-op move. Can't split to nothing; emit something. */
23987 emit_note (NOTE_INSN_DELETED);
23988 return;
23989 }
23990
23991 /* Preserve register attributes for variable tracking. */
23992 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23993 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
23994 GET_MODE_SIZE (halfmode));
23995
23996 /* Special case of reversed high/low parts. Use VSWP. */
23997 if (src2 == dest && src1 == dest + halfregs)
23998 {
23999 rtx x = gen_rtx_SET (destlo, operands[1]);
24000 rtx y = gen_rtx_SET (desthi, operands[2]);
24001 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24002 return;
24003 }
24004
24005 if (!reg_overlap_mentioned_p (operands[2], destlo))
24006 {
24007 /* Try to avoid unnecessary moves if part of the result
24008 is in the right place already. */
24009 if (src1 != dest)
24010 emit_move_insn (destlo, operands[1]);
24011 if (src2 != dest + halfregs)
24012 emit_move_insn (desthi, operands[2]);
24013 }
24014 else
24015 {
24016 if (src2 != dest + halfregs)
24017 emit_move_insn (desthi, operands[2]);
24018 if (src1 != dest)
24019 emit_move_insn (destlo, operands[1]);
24020 }
24021 }
24022 \f
24023 /* Return the number (counting from 0) of
24024 the least significant set bit in MASK. */
24025
24026 inline static int
24027 number_of_first_bit_set (unsigned mask)
24028 {
24029 return ctz_hwi (mask);
24030 }
24031
24032 /* Like emit_multi_reg_push, but allowing for a different set of
24033 registers to be described as saved. MASK is the set of registers
24034 to be saved; REAL_REGS is the set of registers to be described as
24035 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24036
24037 static rtx_insn *
24038 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24039 {
24040 unsigned long regno;
24041 rtx par[10], tmp, reg;
24042 rtx_insn *insn;
24043 int i, j;
24044
24045 /* Build the parallel of the registers actually being stored. */
24046 for (i = 0; mask; ++i, mask &= mask - 1)
24047 {
24048 regno = ctz_hwi (mask);
24049 reg = gen_rtx_REG (SImode, regno);
24050
24051 if (i == 0)
24052 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24053 else
24054 tmp = gen_rtx_USE (VOIDmode, reg);
24055
24056 par[i] = tmp;
24057 }
24058
24059 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24060 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24061 tmp = gen_frame_mem (BLKmode, tmp);
24062 tmp = gen_rtx_SET (tmp, par[0]);
24063 par[0] = tmp;
24064
24065 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24066 insn = emit_insn (tmp);
24067
24068 /* Always build the stack adjustment note for unwind info. */
24069 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24070 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24071 par[0] = tmp;
24072
24073 /* Build the parallel of the registers recorded as saved for unwind. */
24074 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24075 {
24076 regno = ctz_hwi (real_regs);
24077 reg = gen_rtx_REG (SImode, regno);
24078
24079 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24080 tmp = gen_frame_mem (SImode, tmp);
24081 tmp = gen_rtx_SET (tmp, reg);
24082 RTX_FRAME_RELATED_P (tmp) = 1;
24083 par[j + 1] = tmp;
24084 }
24085
24086 if (j == 0)
24087 tmp = par[0];
24088 else
24089 {
24090 RTX_FRAME_RELATED_P (par[0]) = 1;
24091 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24092 }
24093
24094 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24095
24096 return insn;
24097 }
24098
24099 /* Emit code to push or pop registers to or from the stack. F is the
24100 assembly file. MASK is the registers to pop. */
24101 static void
24102 thumb_pop (FILE *f, unsigned long mask)
24103 {
24104 int regno;
24105 int lo_mask = mask & 0xFF;
24106
24107 gcc_assert (mask);
24108
24109 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24110 {
24111 /* Special case. Do not generate a POP PC statement here, do it in
24112 thumb_exit() */
24113 thumb_exit (f, -1);
24114 return;
24115 }
24116
24117 fprintf (f, "\tpop\t{");
24118
24119 /* Look at the low registers first. */
24120 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24121 {
24122 if (lo_mask & 1)
24123 {
24124 asm_fprintf (f, "%r", regno);
24125
24126 if ((lo_mask & ~1) != 0)
24127 fprintf (f, ", ");
24128 }
24129 }
24130
24131 if (mask & (1 << PC_REGNUM))
24132 {
24133 /* Catch popping the PC. */
24134 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24135 || IS_CMSE_ENTRY (arm_current_func_type ()))
24136 {
24137 /* The PC is never poped directly, instead
24138 it is popped into r3 and then BX is used. */
24139 fprintf (f, "}\n");
24140
24141 thumb_exit (f, -1);
24142
24143 return;
24144 }
24145 else
24146 {
24147 if (mask & 0xFF)
24148 fprintf (f, ", ");
24149
24150 asm_fprintf (f, "%r", PC_REGNUM);
24151 }
24152 }
24153
24154 fprintf (f, "}\n");
24155 }
24156
24157 /* Generate code to return from a thumb function.
24158 If 'reg_containing_return_addr' is -1, then the return address is
24159 actually on the stack, at the stack pointer.
24160
24161 Note: do not forget to update length attribute of corresponding insn pattern
24162 when changing assembly output (eg. length attribute of epilogue_insns when
24163 updating Armv8-M Baseline Security Extensions register clearing
24164 sequences). */
24165 static void
24166 thumb_exit (FILE *f, int reg_containing_return_addr)
24167 {
24168 unsigned regs_available_for_popping;
24169 unsigned regs_to_pop;
24170 int pops_needed;
24171 unsigned available;
24172 unsigned required;
24173 machine_mode mode;
24174 int size;
24175 int restore_a4 = FALSE;
24176
24177 /* Compute the registers we need to pop. */
24178 regs_to_pop = 0;
24179 pops_needed = 0;
24180
24181 if (reg_containing_return_addr == -1)
24182 {
24183 regs_to_pop |= 1 << LR_REGNUM;
24184 ++pops_needed;
24185 }
24186
24187 if (TARGET_BACKTRACE)
24188 {
24189 /* Restore the (ARM) frame pointer and stack pointer. */
24190 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24191 pops_needed += 2;
24192 }
24193
24194 /* If there is nothing to pop then just emit the BX instruction and
24195 return. */
24196 if (pops_needed == 0)
24197 {
24198 if (crtl->calls_eh_return)
24199 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24200
24201 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24202 {
24203 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24204 reg_containing_return_addr);
24205 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24206 }
24207 else
24208 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24209 return;
24210 }
24211 /* Otherwise if we are not supporting interworking and we have not created
24212 a backtrace structure and the function was not entered in ARM mode then
24213 just pop the return address straight into the PC. */
24214 else if (!TARGET_INTERWORK
24215 && !TARGET_BACKTRACE
24216 && !is_called_in_ARM_mode (current_function_decl)
24217 && !crtl->calls_eh_return
24218 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24219 {
24220 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24221 return;
24222 }
24223
24224 /* Find out how many of the (return) argument registers we can corrupt. */
24225 regs_available_for_popping = 0;
24226
24227 /* If returning via __builtin_eh_return, the bottom three registers
24228 all contain information needed for the return. */
24229 if (crtl->calls_eh_return)
24230 size = 12;
24231 else
24232 {
24233 /* If we can deduce the registers used from the function's
24234 return value. This is more reliable that examining
24235 df_regs_ever_live_p () because that will be set if the register is
24236 ever used in the function, not just if the register is used
24237 to hold a return value. */
24238
24239 if (crtl->return_rtx != 0)
24240 mode = GET_MODE (crtl->return_rtx);
24241 else
24242 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24243
24244 size = GET_MODE_SIZE (mode);
24245
24246 if (size == 0)
24247 {
24248 /* In a void function we can use any argument register.
24249 In a function that returns a structure on the stack
24250 we can use the second and third argument registers. */
24251 if (mode == VOIDmode)
24252 regs_available_for_popping =
24253 (1 << ARG_REGISTER (1))
24254 | (1 << ARG_REGISTER (2))
24255 | (1 << ARG_REGISTER (3));
24256 else
24257 regs_available_for_popping =
24258 (1 << ARG_REGISTER (2))
24259 | (1 << ARG_REGISTER (3));
24260 }
24261 else if (size <= 4)
24262 regs_available_for_popping =
24263 (1 << ARG_REGISTER (2))
24264 | (1 << ARG_REGISTER (3));
24265 else if (size <= 8)
24266 regs_available_for_popping =
24267 (1 << ARG_REGISTER (3));
24268 }
24269
24270 /* Match registers to be popped with registers into which we pop them. */
24271 for (available = regs_available_for_popping,
24272 required = regs_to_pop;
24273 required != 0 && available != 0;
24274 available &= ~(available & - available),
24275 required &= ~(required & - required))
24276 -- pops_needed;
24277
24278 /* If we have any popping registers left over, remove them. */
24279 if (available > 0)
24280 regs_available_for_popping &= ~available;
24281
24282 /* Otherwise if we need another popping register we can use
24283 the fourth argument register. */
24284 else if (pops_needed)
24285 {
24286 /* If we have not found any free argument registers and
24287 reg a4 contains the return address, we must move it. */
24288 if (regs_available_for_popping == 0
24289 && reg_containing_return_addr == LAST_ARG_REGNUM)
24290 {
24291 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24292 reg_containing_return_addr = LR_REGNUM;
24293 }
24294 else if (size > 12)
24295 {
24296 /* Register a4 is being used to hold part of the return value,
24297 but we have dire need of a free, low register. */
24298 restore_a4 = TRUE;
24299
24300 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24301 }
24302
24303 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24304 {
24305 /* The fourth argument register is available. */
24306 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24307
24308 --pops_needed;
24309 }
24310 }
24311
24312 /* Pop as many registers as we can. */
24313 thumb_pop (f, regs_available_for_popping);
24314
24315 /* Process the registers we popped. */
24316 if (reg_containing_return_addr == -1)
24317 {
24318 /* The return address was popped into the lowest numbered register. */
24319 regs_to_pop &= ~(1 << LR_REGNUM);
24320
24321 reg_containing_return_addr =
24322 number_of_first_bit_set (regs_available_for_popping);
24323
24324 /* Remove this register for the mask of available registers, so that
24325 the return address will not be corrupted by further pops. */
24326 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24327 }
24328
24329 /* If we popped other registers then handle them here. */
24330 if (regs_available_for_popping)
24331 {
24332 int frame_pointer;
24333
24334 /* Work out which register currently contains the frame pointer. */
24335 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24336
24337 /* Move it into the correct place. */
24338 asm_fprintf (f, "\tmov\t%r, %r\n",
24339 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24340
24341 /* (Temporarily) remove it from the mask of popped registers. */
24342 regs_available_for_popping &= ~(1 << frame_pointer);
24343 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24344
24345 if (regs_available_for_popping)
24346 {
24347 int stack_pointer;
24348
24349 /* We popped the stack pointer as well,
24350 find the register that contains it. */
24351 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24352
24353 /* Move it into the stack register. */
24354 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24355
24356 /* At this point we have popped all necessary registers, so
24357 do not worry about restoring regs_available_for_popping
24358 to its correct value:
24359
24360 assert (pops_needed == 0)
24361 assert (regs_available_for_popping == (1 << frame_pointer))
24362 assert (regs_to_pop == (1 << STACK_POINTER)) */
24363 }
24364 else
24365 {
24366 /* Since we have just move the popped value into the frame
24367 pointer, the popping register is available for reuse, and
24368 we know that we still have the stack pointer left to pop. */
24369 regs_available_for_popping |= (1 << frame_pointer);
24370 }
24371 }
24372
24373 /* If we still have registers left on the stack, but we no longer have
24374 any registers into which we can pop them, then we must move the return
24375 address into the link register and make available the register that
24376 contained it. */
24377 if (regs_available_for_popping == 0 && pops_needed > 0)
24378 {
24379 regs_available_for_popping |= 1 << reg_containing_return_addr;
24380
24381 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24382 reg_containing_return_addr);
24383
24384 reg_containing_return_addr = LR_REGNUM;
24385 }
24386
24387 /* If we have registers left on the stack then pop some more.
24388 We know that at most we will want to pop FP and SP. */
24389 if (pops_needed > 0)
24390 {
24391 int popped_into;
24392 int move_to;
24393
24394 thumb_pop (f, regs_available_for_popping);
24395
24396 /* We have popped either FP or SP.
24397 Move whichever one it is into the correct register. */
24398 popped_into = number_of_first_bit_set (regs_available_for_popping);
24399 move_to = number_of_first_bit_set (regs_to_pop);
24400
24401 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24402 --pops_needed;
24403 }
24404
24405 /* If we still have not popped everything then we must have only
24406 had one register available to us and we are now popping the SP. */
24407 if (pops_needed > 0)
24408 {
24409 int popped_into;
24410
24411 thumb_pop (f, regs_available_for_popping);
24412
24413 popped_into = number_of_first_bit_set (regs_available_for_popping);
24414
24415 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24416 /*
24417 assert (regs_to_pop == (1 << STACK_POINTER))
24418 assert (pops_needed == 1)
24419 */
24420 }
24421
24422 /* If necessary restore the a4 register. */
24423 if (restore_a4)
24424 {
24425 if (reg_containing_return_addr != LR_REGNUM)
24426 {
24427 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24428 reg_containing_return_addr = LR_REGNUM;
24429 }
24430
24431 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24432 }
24433
24434 if (crtl->calls_eh_return)
24435 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24436
24437 /* Return to caller. */
24438 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24439 {
24440 /* This is for the cases where LR is not being used to contain the return
24441 address. It may therefore contain information that we might not want
24442 to leak, hence it must be cleared. The value in R0 will never be a
24443 secret at this point, so it is safe to use it, see the clearing code
24444 in 'cmse_nonsecure_entry_clear_before_return'. */
24445 if (reg_containing_return_addr != LR_REGNUM)
24446 asm_fprintf (f, "\tmov\tlr, r0\n");
24447
24448 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24449 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24450 }
24451 else
24452 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24453 }
24454 \f
24455 /* Scan INSN just before assembler is output for it.
24456 For Thumb-1, we track the status of the condition codes; this
24457 information is used in the cbranchsi4_insn pattern. */
24458 void
24459 thumb1_final_prescan_insn (rtx_insn *insn)
24460 {
24461 if (flag_print_asm_name)
24462 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24463 INSN_ADDRESSES (INSN_UID (insn)));
24464 /* Don't overwrite the previous setter when we get to a cbranch. */
24465 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24466 {
24467 enum attr_conds conds;
24468
24469 if (cfun->machine->thumb1_cc_insn)
24470 {
24471 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24472 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24473 CC_STATUS_INIT;
24474 }
24475 conds = get_attr_conds (insn);
24476 if (conds == CONDS_SET)
24477 {
24478 rtx set = single_set (insn);
24479 cfun->machine->thumb1_cc_insn = insn;
24480 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24481 cfun->machine->thumb1_cc_op1 = const0_rtx;
24482 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24483 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24484 {
24485 rtx src1 = XEXP (SET_SRC (set), 1);
24486 if (src1 == const0_rtx)
24487 cfun->machine->thumb1_cc_mode = CCmode;
24488 }
24489 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24490 {
24491 /* Record the src register operand instead of dest because
24492 cprop_hardreg pass propagates src. */
24493 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24494 }
24495 }
24496 else if (conds != CONDS_NOCOND)
24497 cfun->machine->thumb1_cc_insn = NULL_RTX;
24498 }
24499
24500 /* Check if unexpected far jump is used. */
24501 if (cfun->machine->lr_save_eliminated
24502 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24503 internal_error("Unexpected thumb1 far jump");
24504 }
24505
24506 int
24507 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24508 {
24509 unsigned HOST_WIDE_INT mask = 0xff;
24510 int i;
24511
24512 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24513 if (val == 0) /* XXX */
24514 return 0;
24515
24516 for (i = 0; i < 25; i++)
24517 if ((val & (mask << i)) == val)
24518 return 1;
24519
24520 return 0;
24521 }
24522
24523 /* Returns nonzero if the current function contains,
24524 or might contain a far jump. */
24525 static int
24526 thumb_far_jump_used_p (void)
24527 {
24528 rtx_insn *insn;
24529 bool far_jump = false;
24530 unsigned int func_size = 0;
24531
24532 /* If we have already decided that far jumps may be used,
24533 do not bother checking again, and always return true even if
24534 it turns out that they are not being used. Once we have made
24535 the decision that far jumps are present (and that hence the link
24536 register will be pushed onto the stack) we cannot go back on it. */
24537 if (cfun->machine->far_jump_used)
24538 return 1;
24539
24540 /* If this function is not being called from the prologue/epilogue
24541 generation code then it must be being called from the
24542 INITIAL_ELIMINATION_OFFSET macro. */
24543 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24544 {
24545 /* In this case we know that we are being asked about the elimination
24546 of the arg pointer register. If that register is not being used,
24547 then there are no arguments on the stack, and we do not have to
24548 worry that a far jump might force the prologue to push the link
24549 register, changing the stack offsets. In this case we can just
24550 return false, since the presence of far jumps in the function will
24551 not affect stack offsets.
24552
24553 If the arg pointer is live (or if it was live, but has now been
24554 eliminated and so set to dead) then we do have to test to see if
24555 the function might contain a far jump. This test can lead to some
24556 false negatives, since before reload is completed, then length of
24557 branch instructions is not known, so gcc defaults to returning their
24558 longest length, which in turn sets the far jump attribute to true.
24559
24560 A false negative will not result in bad code being generated, but it
24561 will result in a needless push and pop of the link register. We
24562 hope that this does not occur too often.
24563
24564 If we need doubleword stack alignment this could affect the other
24565 elimination offsets so we can't risk getting it wrong. */
24566 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24567 cfun->machine->arg_pointer_live = 1;
24568 else if (!cfun->machine->arg_pointer_live)
24569 return 0;
24570 }
24571
24572 /* We should not change far_jump_used during or after reload, as there is
24573 no chance to change stack frame layout. */
24574 if (reload_in_progress || reload_completed)
24575 return 0;
24576
24577 /* Check to see if the function contains a branch
24578 insn with the far jump attribute set. */
24579 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24580 {
24581 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24582 {
24583 far_jump = true;
24584 }
24585 func_size += get_attr_length (insn);
24586 }
24587
24588 /* Attribute far_jump will always be true for thumb1 before
24589 shorten_branch pass. So checking far_jump attribute before
24590 shorten_branch isn't much useful.
24591
24592 Following heuristic tries to estimate more accurately if a far jump
24593 may finally be used. The heuristic is very conservative as there is
24594 no chance to roll-back the decision of not to use far jump.
24595
24596 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24597 2-byte insn is associated with a 4 byte constant pool. Using
24598 function size 2048/3 as the threshold is conservative enough. */
24599 if (far_jump)
24600 {
24601 if ((func_size * 3) >= 2048)
24602 {
24603 /* Record the fact that we have decided that
24604 the function does use far jumps. */
24605 cfun->machine->far_jump_used = 1;
24606 return 1;
24607 }
24608 }
24609
24610 return 0;
24611 }
24612
24613 /* Return nonzero if FUNC must be entered in ARM mode. */
24614 static bool
24615 is_called_in_ARM_mode (tree func)
24616 {
24617 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24618
24619 /* Ignore the problem about functions whose address is taken. */
24620 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24621 return true;
24622
24623 #ifdef ARM_PE
24624 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24625 #else
24626 return false;
24627 #endif
24628 }
24629
24630 /* Given the stack offsets and register mask in OFFSETS, decide how
24631 many additional registers to push instead of subtracting a constant
24632 from SP. For epilogues the principle is the same except we use pop.
24633 FOR_PROLOGUE indicates which we're generating. */
24634 static int
24635 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24636 {
24637 HOST_WIDE_INT amount;
24638 unsigned long live_regs_mask = offsets->saved_regs_mask;
24639 /* Extract a mask of the ones we can give to the Thumb's push/pop
24640 instruction. */
24641 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24642 /* Then count how many other high registers will need to be pushed. */
24643 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24644 int n_free, reg_base, size;
24645
24646 if (!for_prologue && frame_pointer_needed)
24647 amount = offsets->locals_base - offsets->saved_regs;
24648 else
24649 amount = offsets->outgoing_args - offsets->saved_regs;
24650
24651 /* If the stack frame size is 512 exactly, we can save one load
24652 instruction, which should make this a win even when optimizing
24653 for speed. */
24654 if (!optimize_size && amount != 512)
24655 return 0;
24656
24657 /* Can't do this if there are high registers to push. */
24658 if (high_regs_pushed != 0)
24659 return 0;
24660
24661 /* Shouldn't do it in the prologue if no registers would normally
24662 be pushed at all. In the epilogue, also allow it if we'll have
24663 a pop insn for the PC. */
24664 if (l_mask == 0
24665 && (for_prologue
24666 || TARGET_BACKTRACE
24667 || (live_regs_mask & 1 << LR_REGNUM) == 0
24668 || TARGET_INTERWORK
24669 || crtl->args.pretend_args_size != 0))
24670 return 0;
24671
24672 /* Don't do this if thumb_expand_prologue wants to emit instructions
24673 between the push and the stack frame allocation. */
24674 if (for_prologue
24675 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24676 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24677 return 0;
24678
24679 reg_base = 0;
24680 n_free = 0;
24681 if (!for_prologue)
24682 {
24683 size = arm_size_return_regs ();
24684 reg_base = ARM_NUM_INTS (size);
24685 live_regs_mask >>= reg_base;
24686 }
24687
24688 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24689 && (for_prologue || call_used_regs[reg_base + n_free]))
24690 {
24691 live_regs_mask >>= 1;
24692 n_free++;
24693 }
24694
24695 if (n_free == 0)
24696 return 0;
24697 gcc_assert (amount / 4 * 4 == amount);
24698
24699 if (amount >= 512 && (amount - n_free * 4) < 512)
24700 return (amount - 508) / 4;
24701 if (amount <= n_free * 4)
24702 return amount / 4;
24703 return 0;
24704 }
24705
24706 /* The bits which aren't usefully expanded as rtl. */
24707 const char *
24708 thumb1_unexpanded_epilogue (void)
24709 {
24710 arm_stack_offsets *offsets;
24711 int regno;
24712 unsigned long live_regs_mask = 0;
24713 int high_regs_pushed = 0;
24714 int extra_pop;
24715 int had_to_push_lr;
24716 int size;
24717
24718 if (cfun->machine->return_used_this_function != 0)
24719 return "";
24720
24721 if (IS_NAKED (arm_current_func_type ()))
24722 return "";
24723
24724 offsets = arm_get_frame_offsets ();
24725 live_regs_mask = offsets->saved_regs_mask;
24726 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24727
24728 /* If we can deduce the registers used from the function's return value.
24729 This is more reliable that examining df_regs_ever_live_p () because that
24730 will be set if the register is ever used in the function, not just if
24731 the register is used to hold a return value. */
24732 size = arm_size_return_regs ();
24733
24734 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24735 if (extra_pop > 0)
24736 {
24737 unsigned long extra_mask = (1 << extra_pop) - 1;
24738 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24739 }
24740
24741 /* The prolog may have pushed some high registers to use as
24742 work registers. e.g. the testsuite file:
24743 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24744 compiles to produce:
24745 push {r4, r5, r6, r7, lr}
24746 mov r7, r9
24747 mov r6, r8
24748 push {r6, r7}
24749 as part of the prolog. We have to undo that pushing here. */
24750
24751 if (high_regs_pushed)
24752 {
24753 unsigned long mask = live_regs_mask & 0xff;
24754 int next_hi_reg;
24755
24756 /* The available low registers depend on the size of the value we are
24757 returning. */
24758 if (size <= 12)
24759 mask |= 1 << 3;
24760 if (size <= 8)
24761 mask |= 1 << 2;
24762
24763 if (mask == 0)
24764 /* Oh dear! We have no low registers into which we can pop
24765 high registers! */
24766 internal_error
24767 ("no low registers available for popping high registers");
24768
24769 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24770 if (live_regs_mask & (1 << next_hi_reg))
24771 break;
24772
24773 while (high_regs_pushed)
24774 {
24775 /* Find lo register(s) into which the high register(s) can
24776 be popped. */
24777 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24778 {
24779 if (mask & (1 << regno))
24780 high_regs_pushed--;
24781 if (high_regs_pushed == 0)
24782 break;
24783 }
24784
24785 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24786
24787 /* Pop the values into the low register(s). */
24788 thumb_pop (asm_out_file, mask);
24789
24790 /* Move the value(s) into the high registers. */
24791 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24792 {
24793 if (mask & (1 << regno))
24794 {
24795 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24796 regno);
24797
24798 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24799 if (live_regs_mask & (1 << next_hi_reg))
24800 break;
24801 }
24802 }
24803 }
24804 live_regs_mask &= ~0x0f00;
24805 }
24806
24807 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24808 live_regs_mask &= 0xff;
24809
24810 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24811 {
24812 /* Pop the return address into the PC. */
24813 if (had_to_push_lr)
24814 live_regs_mask |= 1 << PC_REGNUM;
24815
24816 /* Either no argument registers were pushed or a backtrace
24817 structure was created which includes an adjusted stack
24818 pointer, so just pop everything. */
24819 if (live_regs_mask)
24820 thumb_pop (asm_out_file, live_regs_mask);
24821
24822 /* We have either just popped the return address into the
24823 PC or it is was kept in LR for the entire function.
24824 Note that thumb_pop has already called thumb_exit if the
24825 PC was in the list. */
24826 if (!had_to_push_lr)
24827 thumb_exit (asm_out_file, LR_REGNUM);
24828 }
24829 else
24830 {
24831 /* Pop everything but the return address. */
24832 if (live_regs_mask)
24833 thumb_pop (asm_out_file, live_regs_mask);
24834
24835 if (had_to_push_lr)
24836 {
24837 if (size > 12)
24838 {
24839 /* We have no free low regs, so save one. */
24840 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24841 LAST_ARG_REGNUM);
24842 }
24843
24844 /* Get the return address into a temporary register. */
24845 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24846
24847 if (size > 12)
24848 {
24849 /* Move the return address to lr. */
24850 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24851 LAST_ARG_REGNUM);
24852 /* Restore the low register. */
24853 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24854 IP_REGNUM);
24855 regno = LR_REGNUM;
24856 }
24857 else
24858 regno = LAST_ARG_REGNUM;
24859 }
24860 else
24861 regno = LR_REGNUM;
24862
24863 /* Remove the argument registers that were pushed onto the stack. */
24864 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24865 SP_REGNUM, SP_REGNUM,
24866 crtl->args.pretend_args_size);
24867
24868 thumb_exit (asm_out_file, regno);
24869 }
24870
24871 return "";
24872 }
24873
24874 /* Functions to save and restore machine-specific function data. */
24875 static struct machine_function *
24876 arm_init_machine_status (void)
24877 {
24878 struct machine_function *machine;
24879 machine = ggc_cleared_alloc<machine_function> ();
24880
24881 #if ARM_FT_UNKNOWN != 0
24882 machine->func_type = ARM_FT_UNKNOWN;
24883 #endif
24884 return machine;
24885 }
24886
24887 /* Return an RTX indicating where the return address to the
24888 calling function can be found. */
24889 rtx
24890 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24891 {
24892 if (count != 0)
24893 return NULL_RTX;
24894
24895 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24896 }
24897
24898 /* Do anything needed before RTL is emitted for each function. */
24899 void
24900 arm_init_expanders (void)
24901 {
24902 /* Arrange to initialize and mark the machine per-function status. */
24903 init_machine_status = arm_init_machine_status;
24904
24905 /* This is to stop the combine pass optimizing away the alignment
24906 adjustment of va_arg. */
24907 /* ??? It is claimed that this should not be necessary. */
24908 if (cfun)
24909 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24910 }
24911
24912 /* Check that FUNC is called with a different mode. */
24913
24914 bool
24915 arm_change_mode_p (tree func)
24916 {
24917 if (TREE_CODE (func) != FUNCTION_DECL)
24918 return false;
24919
24920 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24921
24922 if (!callee_tree)
24923 callee_tree = target_option_default_node;
24924
24925 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24926 int flags = callee_opts->x_target_flags;
24927
24928 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24929 }
24930
24931 /* Like arm_compute_initial_elimination offset. Simpler because there
24932 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24933 to point at the base of the local variables after static stack
24934 space for a function has been allocated. */
24935
24936 HOST_WIDE_INT
24937 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24938 {
24939 arm_stack_offsets *offsets;
24940
24941 offsets = arm_get_frame_offsets ();
24942
24943 switch (from)
24944 {
24945 case ARG_POINTER_REGNUM:
24946 switch (to)
24947 {
24948 case STACK_POINTER_REGNUM:
24949 return offsets->outgoing_args - offsets->saved_args;
24950
24951 case FRAME_POINTER_REGNUM:
24952 return offsets->soft_frame - offsets->saved_args;
24953
24954 case ARM_HARD_FRAME_POINTER_REGNUM:
24955 return offsets->saved_regs - offsets->saved_args;
24956
24957 case THUMB_HARD_FRAME_POINTER_REGNUM:
24958 return offsets->locals_base - offsets->saved_args;
24959
24960 default:
24961 gcc_unreachable ();
24962 }
24963 break;
24964
24965 case FRAME_POINTER_REGNUM:
24966 switch (to)
24967 {
24968 case STACK_POINTER_REGNUM:
24969 return offsets->outgoing_args - offsets->soft_frame;
24970
24971 case ARM_HARD_FRAME_POINTER_REGNUM:
24972 return offsets->saved_regs - offsets->soft_frame;
24973
24974 case THUMB_HARD_FRAME_POINTER_REGNUM:
24975 return offsets->locals_base - offsets->soft_frame;
24976
24977 default:
24978 gcc_unreachable ();
24979 }
24980 break;
24981
24982 default:
24983 gcc_unreachable ();
24984 }
24985 }
24986
24987 /* Generate the function's prologue. */
24988
24989 void
24990 thumb1_expand_prologue (void)
24991 {
24992 rtx_insn *insn;
24993
24994 HOST_WIDE_INT amount;
24995 HOST_WIDE_INT size;
24996 arm_stack_offsets *offsets;
24997 unsigned long func_type;
24998 int regno;
24999 unsigned long live_regs_mask;
25000 unsigned long l_mask;
25001 unsigned high_regs_pushed = 0;
25002 bool lr_needs_saving;
25003
25004 func_type = arm_current_func_type ();
25005
25006 /* Naked functions don't have prologues. */
25007 if (IS_NAKED (func_type))
25008 {
25009 if (flag_stack_usage_info)
25010 current_function_static_stack_size = 0;
25011 return;
25012 }
25013
25014 if (IS_INTERRUPT (func_type))
25015 {
25016 error ("interrupt Service Routines cannot be coded in Thumb mode");
25017 return;
25018 }
25019
25020 if (is_called_in_ARM_mode (current_function_decl))
25021 emit_insn (gen_prologue_thumb1_interwork ());
25022
25023 offsets = arm_get_frame_offsets ();
25024 live_regs_mask = offsets->saved_regs_mask;
25025 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25026
25027 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25028 l_mask = live_regs_mask & 0x40ff;
25029 /* Then count how many other high registers will need to be pushed. */
25030 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25031
25032 if (crtl->args.pretend_args_size)
25033 {
25034 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25035
25036 if (cfun->machine->uses_anonymous_args)
25037 {
25038 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25039 unsigned long mask;
25040
25041 mask = 1ul << (LAST_ARG_REGNUM + 1);
25042 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25043
25044 insn = thumb1_emit_multi_reg_push (mask, 0);
25045 }
25046 else
25047 {
25048 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25049 stack_pointer_rtx, x));
25050 }
25051 RTX_FRAME_RELATED_P (insn) = 1;
25052 }
25053
25054 if (TARGET_BACKTRACE)
25055 {
25056 HOST_WIDE_INT offset = 0;
25057 unsigned work_register;
25058 rtx work_reg, x, arm_hfp_rtx;
25059
25060 /* We have been asked to create a stack backtrace structure.
25061 The code looks like this:
25062
25063 0 .align 2
25064 0 func:
25065 0 sub SP, #16 Reserve space for 4 registers.
25066 2 push {R7} Push low registers.
25067 4 add R7, SP, #20 Get the stack pointer before the push.
25068 6 str R7, [SP, #8] Store the stack pointer
25069 (before reserving the space).
25070 8 mov R7, PC Get hold of the start of this code + 12.
25071 10 str R7, [SP, #16] Store it.
25072 12 mov R7, FP Get hold of the current frame pointer.
25073 14 str R7, [SP, #4] Store it.
25074 16 mov R7, LR Get hold of the current return address.
25075 18 str R7, [SP, #12] Store it.
25076 20 add R7, SP, #16 Point at the start of the
25077 backtrace structure.
25078 22 mov FP, R7 Put this value into the frame pointer. */
25079
25080 work_register = thumb_find_work_register (live_regs_mask);
25081 work_reg = gen_rtx_REG (SImode, work_register);
25082 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25083
25084 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25085 stack_pointer_rtx, GEN_INT (-16)));
25086 RTX_FRAME_RELATED_P (insn) = 1;
25087
25088 if (l_mask)
25089 {
25090 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25091 RTX_FRAME_RELATED_P (insn) = 1;
25092 lr_needs_saving = false;
25093
25094 offset = bit_count (l_mask) * UNITS_PER_WORD;
25095 }
25096
25097 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25098 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25099
25100 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25101 x = gen_frame_mem (SImode, x);
25102 emit_move_insn (x, work_reg);
25103
25104 /* Make sure that the instruction fetching the PC is in the right place
25105 to calculate "start of backtrace creation code + 12". */
25106 /* ??? The stores using the common WORK_REG ought to be enough to
25107 prevent the scheduler from doing anything weird. Failing that
25108 we could always move all of the following into an UNSPEC_VOLATILE. */
25109 if (l_mask)
25110 {
25111 x = gen_rtx_REG (SImode, PC_REGNUM);
25112 emit_move_insn (work_reg, x);
25113
25114 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25115 x = gen_frame_mem (SImode, x);
25116 emit_move_insn (x, work_reg);
25117
25118 emit_move_insn (work_reg, arm_hfp_rtx);
25119
25120 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25121 x = gen_frame_mem (SImode, x);
25122 emit_move_insn (x, work_reg);
25123 }
25124 else
25125 {
25126 emit_move_insn (work_reg, arm_hfp_rtx);
25127
25128 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25129 x = gen_frame_mem (SImode, x);
25130 emit_move_insn (x, work_reg);
25131
25132 x = gen_rtx_REG (SImode, PC_REGNUM);
25133 emit_move_insn (work_reg, x);
25134
25135 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25136 x = gen_frame_mem (SImode, x);
25137 emit_move_insn (x, work_reg);
25138 }
25139
25140 x = gen_rtx_REG (SImode, LR_REGNUM);
25141 emit_move_insn (work_reg, x);
25142
25143 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25144 x = gen_frame_mem (SImode, x);
25145 emit_move_insn (x, work_reg);
25146
25147 x = GEN_INT (offset + 12);
25148 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25149
25150 emit_move_insn (arm_hfp_rtx, work_reg);
25151 }
25152 /* Optimization: If we are not pushing any low registers but we are going
25153 to push some high registers then delay our first push. This will just
25154 be a push of LR and we can combine it with the push of the first high
25155 register. */
25156 else if ((l_mask & 0xff) != 0
25157 || (high_regs_pushed == 0 && lr_needs_saving))
25158 {
25159 unsigned long mask = l_mask;
25160 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25161 insn = thumb1_emit_multi_reg_push (mask, mask);
25162 RTX_FRAME_RELATED_P (insn) = 1;
25163 lr_needs_saving = false;
25164 }
25165
25166 if (high_regs_pushed)
25167 {
25168 unsigned pushable_regs;
25169 unsigned next_hi_reg;
25170 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25171 : crtl->args.info.nregs;
25172 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25173
25174 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25175 if (live_regs_mask & (1 << next_hi_reg))
25176 break;
25177
25178 /* Here we need to mask out registers used for passing arguments
25179 even if they can be pushed. This is to avoid using them to stash the high
25180 registers. Such kind of stash may clobber the use of arguments. */
25181 pushable_regs = l_mask & (~arg_regs_mask);
25182 if (lr_needs_saving)
25183 pushable_regs &= ~(1 << LR_REGNUM);
25184
25185 if (pushable_regs == 0)
25186 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25187
25188 while (high_regs_pushed > 0)
25189 {
25190 unsigned long real_regs_mask = 0;
25191 unsigned long push_mask = 0;
25192
25193 for (regno = LR_REGNUM; regno >= 0; regno --)
25194 {
25195 if (pushable_regs & (1 << regno))
25196 {
25197 emit_move_insn (gen_rtx_REG (SImode, regno),
25198 gen_rtx_REG (SImode, next_hi_reg));
25199
25200 high_regs_pushed --;
25201 real_regs_mask |= (1 << next_hi_reg);
25202 push_mask |= (1 << regno);
25203
25204 if (high_regs_pushed)
25205 {
25206 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25207 next_hi_reg --)
25208 if (live_regs_mask & (1 << next_hi_reg))
25209 break;
25210 }
25211 else
25212 break;
25213 }
25214 }
25215
25216 /* If we had to find a work register and we have not yet
25217 saved the LR then add it to the list of regs to push. */
25218 if (lr_needs_saving)
25219 {
25220 push_mask |= 1 << LR_REGNUM;
25221 real_regs_mask |= 1 << LR_REGNUM;
25222 lr_needs_saving = false;
25223 }
25224
25225 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25226 RTX_FRAME_RELATED_P (insn) = 1;
25227 }
25228 }
25229
25230 /* Load the pic register before setting the frame pointer,
25231 so we can use r7 as a temporary work register. */
25232 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25233 arm_load_pic_register (live_regs_mask);
25234
25235 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25236 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25237 stack_pointer_rtx);
25238
25239 size = offsets->outgoing_args - offsets->saved_args;
25240 if (flag_stack_usage_info)
25241 current_function_static_stack_size = size;
25242
25243 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25244 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25245 || flag_stack_clash_protection)
25246 && size)
25247 sorry ("-fstack-check=specific for Thumb-1");
25248
25249 amount = offsets->outgoing_args - offsets->saved_regs;
25250 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25251 if (amount)
25252 {
25253 if (amount < 512)
25254 {
25255 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25256 GEN_INT (- amount)));
25257 RTX_FRAME_RELATED_P (insn) = 1;
25258 }
25259 else
25260 {
25261 rtx reg, dwarf;
25262
25263 /* The stack decrement is too big for an immediate value in a single
25264 insn. In theory we could issue multiple subtracts, but after
25265 three of them it becomes more space efficient to place the full
25266 value in the constant pool and load into a register. (Also the
25267 ARM debugger really likes to see only one stack decrement per
25268 function). So instead we look for a scratch register into which
25269 we can load the decrement, and then we subtract this from the
25270 stack pointer. Unfortunately on the thumb the only available
25271 scratch registers are the argument registers, and we cannot use
25272 these as they may hold arguments to the function. Instead we
25273 attempt to locate a call preserved register which is used by this
25274 function. If we can find one, then we know that it will have
25275 been pushed at the start of the prologue and so we can corrupt
25276 it now. */
25277 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25278 if (live_regs_mask & (1 << regno))
25279 break;
25280
25281 gcc_assert(regno <= LAST_LO_REGNUM);
25282
25283 reg = gen_rtx_REG (SImode, regno);
25284
25285 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25286
25287 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25288 stack_pointer_rtx, reg));
25289
25290 dwarf = gen_rtx_SET (stack_pointer_rtx,
25291 plus_constant (Pmode, stack_pointer_rtx,
25292 -amount));
25293 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25294 RTX_FRAME_RELATED_P (insn) = 1;
25295 }
25296 }
25297
25298 if (frame_pointer_needed)
25299 thumb_set_frame_pointer (offsets);
25300
25301 /* If we are profiling, make sure no instructions are scheduled before
25302 the call to mcount. Similarly if the user has requested no
25303 scheduling in the prolog. Similarly if we want non-call exceptions
25304 using the EABI unwinder, to prevent faulting instructions from being
25305 swapped with a stack adjustment. */
25306 if (crtl->profile || !TARGET_SCHED_PROLOG
25307 || (arm_except_unwind_info (&global_options) == UI_TARGET
25308 && cfun->can_throw_non_call_exceptions))
25309 emit_insn (gen_blockage ());
25310
25311 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25312 if (live_regs_mask & 0xff)
25313 cfun->machine->lr_save_eliminated = 0;
25314 }
25315
25316 /* Clear caller saved registers not used to pass return values and leaked
25317 condition flags before exiting a cmse_nonsecure_entry function. */
25318
25319 void
25320 cmse_nonsecure_entry_clear_before_return (void)
25321 {
25322 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25323 uint32_t padding_bits_to_clear = 0;
25324 auto_sbitmap to_clear_bitmap (maxregno + 1);
25325 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25326 tree result_type;
25327
25328 bitmap_clear (to_clear_bitmap);
25329 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25330 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25331
25332 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25333 registers. */
25334 if (TARGET_HARD_FLOAT)
25335 {
25336 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25337
25338 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25339
25340 /* Make sure we don't clear the two scratch registers used to clear the
25341 relevant FPSCR bits in output_return_instruction. */
25342 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25343 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25344 emit_use (gen_rtx_REG (SImode, 4));
25345 bitmap_clear_bit (to_clear_bitmap, 4);
25346 }
25347
25348 /* If the user has defined registers to be caller saved, these are no longer
25349 restored by the function before returning and must thus be cleared for
25350 security purposes. */
25351 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25352 {
25353 /* We do not touch registers that can be used to pass arguments as per
25354 the AAPCS, since these should never be made callee-saved by user
25355 options. */
25356 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25357 continue;
25358 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25359 continue;
25360 if (call_used_regs[regno])
25361 bitmap_set_bit (to_clear_bitmap, regno);
25362 }
25363
25364 /* Make sure we do not clear the registers used to return the result in. */
25365 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25366 if (!VOID_TYPE_P (result_type))
25367 {
25368 uint64_t to_clear_return_mask;
25369 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25370
25371 /* No need to check that we return in registers, because we don't
25372 support returning on stack yet. */
25373 gcc_assert (REG_P (result_rtl));
25374 to_clear_return_mask
25375 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25376 &padding_bits_to_clear);
25377 if (to_clear_return_mask)
25378 {
25379 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25380 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25381 {
25382 if (to_clear_return_mask & (1ULL << regno))
25383 bitmap_clear_bit (to_clear_bitmap, regno);
25384 }
25385 }
25386 }
25387
25388 if (padding_bits_to_clear != 0)
25389 {
25390 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25391 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25392
25393 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25394 returning a composite type, which only uses r0. Let's make sure that
25395 r1-r3 is cleared too. */
25396 bitmap_clear (to_clear_arg_regs_bitmap);
25397 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25398 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25399 }
25400
25401 /* Clear full registers that leak before returning. */
25402 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25403 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25404 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25405 clearing_reg);
25406 }
25407
25408 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25409 POP instruction can be generated. LR should be replaced by PC. All
25410 the checks required are already done by USE_RETURN_INSN (). Hence,
25411 all we really need to check here is if single register is to be
25412 returned, or multiple register return. */
25413 void
25414 thumb2_expand_return (bool simple_return)
25415 {
25416 int i, num_regs;
25417 unsigned long saved_regs_mask;
25418 arm_stack_offsets *offsets;
25419
25420 offsets = arm_get_frame_offsets ();
25421 saved_regs_mask = offsets->saved_regs_mask;
25422
25423 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25424 if (saved_regs_mask & (1 << i))
25425 num_regs++;
25426
25427 if (!simple_return && saved_regs_mask)
25428 {
25429 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25430 functions or adapt code to handle according to ACLE. This path should
25431 not be reachable for cmse_nonsecure_entry functions though we prefer
25432 to assert it for now to ensure that future code changes do not silently
25433 change this behavior. */
25434 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25435 if (num_regs == 1)
25436 {
25437 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25438 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25439 rtx addr = gen_rtx_MEM (SImode,
25440 gen_rtx_POST_INC (SImode,
25441 stack_pointer_rtx));
25442 set_mem_alias_set (addr, get_frame_alias_set ());
25443 XVECEXP (par, 0, 0) = ret_rtx;
25444 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25445 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25446 emit_jump_insn (par);
25447 }
25448 else
25449 {
25450 saved_regs_mask &= ~ (1 << LR_REGNUM);
25451 saved_regs_mask |= (1 << PC_REGNUM);
25452 arm_emit_multi_reg_pop (saved_regs_mask);
25453 }
25454 }
25455 else
25456 {
25457 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25458 cmse_nonsecure_entry_clear_before_return ();
25459 emit_jump_insn (simple_return_rtx);
25460 }
25461 }
25462
25463 void
25464 thumb1_expand_epilogue (void)
25465 {
25466 HOST_WIDE_INT amount;
25467 arm_stack_offsets *offsets;
25468 int regno;
25469
25470 /* Naked functions don't have prologues. */
25471 if (IS_NAKED (arm_current_func_type ()))
25472 return;
25473
25474 offsets = arm_get_frame_offsets ();
25475 amount = offsets->outgoing_args - offsets->saved_regs;
25476
25477 if (frame_pointer_needed)
25478 {
25479 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25480 amount = offsets->locals_base - offsets->saved_regs;
25481 }
25482 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25483
25484 gcc_assert (amount >= 0);
25485 if (amount)
25486 {
25487 emit_insn (gen_blockage ());
25488
25489 if (amount < 512)
25490 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25491 GEN_INT (amount)));
25492 else
25493 {
25494 /* r3 is always free in the epilogue. */
25495 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25496
25497 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25498 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25499 }
25500 }
25501
25502 /* Emit a USE (stack_pointer_rtx), so that
25503 the stack adjustment will not be deleted. */
25504 emit_insn (gen_force_register_use (stack_pointer_rtx));
25505
25506 if (crtl->profile || !TARGET_SCHED_PROLOG)
25507 emit_insn (gen_blockage ());
25508
25509 /* Emit a clobber for each insn that will be restored in the epilogue,
25510 so that flow2 will get register lifetimes correct. */
25511 for (regno = 0; regno < 13; regno++)
25512 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25513 emit_clobber (gen_rtx_REG (SImode, regno));
25514
25515 if (! df_regs_ever_live_p (LR_REGNUM))
25516 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25517
25518 /* Clear all caller-saved regs that are not used to return. */
25519 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25520 cmse_nonsecure_entry_clear_before_return ();
25521 }
25522
25523 /* Epilogue code for APCS frame. */
25524 static void
25525 arm_expand_epilogue_apcs_frame (bool really_return)
25526 {
25527 unsigned long func_type;
25528 unsigned long saved_regs_mask;
25529 int num_regs = 0;
25530 int i;
25531 int floats_from_frame = 0;
25532 arm_stack_offsets *offsets;
25533
25534 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25535 func_type = arm_current_func_type ();
25536
25537 /* Get frame offsets for ARM. */
25538 offsets = arm_get_frame_offsets ();
25539 saved_regs_mask = offsets->saved_regs_mask;
25540
25541 /* Find the offset of the floating-point save area in the frame. */
25542 floats_from_frame
25543 = (offsets->saved_args
25544 + arm_compute_static_chain_stack_bytes ()
25545 - offsets->frame);
25546
25547 /* Compute how many core registers saved and how far away the floats are. */
25548 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25549 if (saved_regs_mask & (1 << i))
25550 {
25551 num_regs++;
25552 floats_from_frame += 4;
25553 }
25554
25555 if (TARGET_HARD_FLOAT)
25556 {
25557 int start_reg;
25558 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25559
25560 /* The offset is from IP_REGNUM. */
25561 int saved_size = arm_get_vfp_saved_size ();
25562 if (saved_size > 0)
25563 {
25564 rtx_insn *insn;
25565 floats_from_frame += saved_size;
25566 insn = emit_insn (gen_addsi3 (ip_rtx,
25567 hard_frame_pointer_rtx,
25568 GEN_INT (-floats_from_frame)));
25569 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25570 ip_rtx, hard_frame_pointer_rtx);
25571 }
25572
25573 /* Generate VFP register multi-pop. */
25574 start_reg = FIRST_VFP_REGNUM;
25575
25576 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25577 /* Look for a case where a reg does not need restoring. */
25578 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25579 && (!df_regs_ever_live_p (i + 1)
25580 || call_used_regs[i + 1]))
25581 {
25582 if (start_reg != i)
25583 arm_emit_vfp_multi_reg_pop (start_reg,
25584 (i - start_reg) / 2,
25585 gen_rtx_REG (SImode,
25586 IP_REGNUM));
25587 start_reg = i + 2;
25588 }
25589
25590 /* Restore the remaining regs that we have discovered (or possibly
25591 even all of them, if the conditional in the for loop never
25592 fired). */
25593 if (start_reg != i)
25594 arm_emit_vfp_multi_reg_pop (start_reg,
25595 (i - start_reg) / 2,
25596 gen_rtx_REG (SImode, IP_REGNUM));
25597 }
25598
25599 if (TARGET_IWMMXT)
25600 {
25601 /* The frame pointer is guaranteed to be non-double-word aligned, as
25602 it is set to double-word-aligned old_stack_pointer - 4. */
25603 rtx_insn *insn;
25604 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25605
25606 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25607 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25608 {
25609 rtx addr = gen_frame_mem (V2SImode,
25610 plus_constant (Pmode, hard_frame_pointer_rtx,
25611 - lrm_count * 4));
25612 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25613 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25614 gen_rtx_REG (V2SImode, i),
25615 NULL_RTX);
25616 lrm_count += 2;
25617 }
25618 }
25619
25620 /* saved_regs_mask should contain IP which contains old stack pointer
25621 at the time of activation creation. Since SP and IP are adjacent registers,
25622 we can restore the value directly into SP. */
25623 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25624 saved_regs_mask &= ~(1 << IP_REGNUM);
25625 saved_regs_mask |= (1 << SP_REGNUM);
25626
25627 /* There are two registers left in saved_regs_mask - LR and PC. We
25628 only need to restore LR (the return address), but to
25629 save time we can load it directly into PC, unless we need a
25630 special function exit sequence, or we are not really returning. */
25631 if (really_return
25632 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25633 && !crtl->calls_eh_return)
25634 /* Delete LR from the register mask, so that LR on
25635 the stack is loaded into the PC in the register mask. */
25636 saved_regs_mask &= ~(1 << LR_REGNUM);
25637 else
25638 saved_regs_mask &= ~(1 << PC_REGNUM);
25639
25640 num_regs = bit_count (saved_regs_mask);
25641 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25642 {
25643 rtx_insn *insn;
25644 emit_insn (gen_blockage ());
25645 /* Unwind the stack to just below the saved registers. */
25646 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25647 hard_frame_pointer_rtx,
25648 GEN_INT (- 4 * num_regs)));
25649
25650 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25651 stack_pointer_rtx, hard_frame_pointer_rtx);
25652 }
25653
25654 arm_emit_multi_reg_pop (saved_regs_mask);
25655
25656 if (IS_INTERRUPT (func_type))
25657 {
25658 /* Interrupt handlers will have pushed the
25659 IP onto the stack, so restore it now. */
25660 rtx_insn *insn;
25661 rtx addr = gen_rtx_MEM (SImode,
25662 gen_rtx_POST_INC (SImode,
25663 stack_pointer_rtx));
25664 set_mem_alias_set (addr, get_frame_alias_set ());
25665 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25666 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25667 gen_rtx_REG (SImode, IP_REGNUM),
25668 NULL_RTX);
25669 }
25670
25671 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25672 return;
25673
25674 if (crtl->calls_eh_return)
25675 emit_insn (gen_addsi3 (stack_pointer_rtx,
25676 stack_pointer_rtx,
25677 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25678
25679 if (IS_STACKALIGN (func_type))
25680 /* Restore the original stack pointer. Before prologue, the stack was
25681 realigned and the original stack pointer saved in r0. For details,
25682 see comment in arm_expand_prologue. */
25683 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25684
25685 emit_jump_insn (simple_return_rtx);
25686 }
25687
25688 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25689 function is not a sibcall. */
25690 void
25691 arm_expand_epilogue (bool really_return)
25692 {
25693 unsigned long func_type;
25694 unsigned long saved_regs_mask;
25695 int num_regs = 0;
25696 int i;
25697 int amount;
25698 arm_stack_offsets *offsets;
25699
25700 func_type = arm_current_func_type ();
25701
25702 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25703 let output_return_instruction take care of instruction emission if any. */
25704 if (IS_NAKED (func_type)
25705 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25706 {
25707 if (really_return)
25708 emit_jump_insn (simple_return_rtx);
25709 return;
25710 }
25711
25712 /* If we are throwing an exception, then we really must be doing a
25713 return, so we can't tail-call. */
25714 gcc_assert (!crtl->calls_eh_return || really_return);
25715
25716 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25717 {
25718 arm_expand_epilogue_apcs_frame (really_return);
25719 return;
25720 }
25721
25722 /* Get frame offsets for ARM. */
25723 offsets = arm_get_frame_offsets ();
25724 saved_regs_mask = offsets->saved_regs_mask;
25725 num_regs = bit_count (saved_regs_mask);
25726
25727 if (frame_pointer_needed)
25728 {
25729 rtx_insn *insn;
25730 /* Restore stack pointer if necessary. */
25731 if (TARGET_ARM)
25732 {
25733 /* In ARM mode, frame pointer points to first saved register.
25734 Restore stack pointer to last saved register. */
25735 amount = offsets->frame - offsets->saved_regs;
25736
25737 /* Force out any pending memory operations that reference stacked data
25738 before stack de-allocation occurs. */
25739 emit_insn (gen_blockage ());
25740 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25741 hard_frame_pointer_rtx,
25742 GEN_INT (amount)));
25743 arm_add_cfa_adjust_cfa_note (insn, amount,
25744 stack_pointer_rtx,
25745 hard_frame_pointer_rtx);
25746
25747 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25748 deleted. */
25749 emit_insn (gen_force_register_use (stack_pointer_rtx));
25750 }
25751 else
25752 {
25753 /* In Thumb-2 mode, the frame pointer points to the last saved
25754 register. */
25755 amount = offsets->locals_base - offsets->saved_regs;
25756 if (amount)
25757 {
25758 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25759 hard_frame_pointer_rtx,
25760 GEN_INT (amount)));
25761 arm_add_cfa_adjust_cfa_note (insn, amount,
25762 hard_frame_pointer_rtx,
25763 hard_frame_pointer_rtx);
25764 }
25765
25766 /* Force out any pending memory operations that reference stacked data
25767 before stack de-allocation occurs. */
25768 emit_insn (gen_blockage ());
25769 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25770 hard_frame_pointer_rtx));
25771 arm_add_cfa_adjust_cfa_note (insn, 0,
25772 stack_pointer_rtx,
25773 hard_frame_pointer_rtx);
25774 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25775 deleted. */
25776 emit_insn (gen_force_register_use (stack_pointer_rtx));
25777 }
25778 }
25779 else
25780 {
25781 /* Pop off outgoing args and local frame to adjust stack pointer to
25782 last saved register. */
25783 amount = offsets->outgoing_args - offsets->saved_regs;
25784 if (amount)
25785 {
25786 rtx_insn *tmp;
25787 /* Force out any pending memory operations that reference stacked data
25788 before stack de-allocation occurs. */
25789 emit_insn (gen_blockage ());
25790 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25791 stack_pointer_rtx,
25792 GEN_INT (amount)));
25793 arm_add_cfa_adjust_cfa_note (tmp, amount,
25794 stack_pointer_rtx, stack_pointer_rtx);
25795 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25796 not deleted. */
25797 emit_insn (gen_force_register_use (stack_pointer_rtx));
25798 }
25799 }
25800
25801 if (TARGET_HARD_FLOAT)
25802 {
25803 /* Generate VFP register multi-pop. */
25804 int end_reg = LAST_VFP_REGNUM + 1;
25805
25806 /* Scan the registers in reverse order. We need to match
25807 any groupings made in the prologue and generate matching
25808 vldm operations. The need to match groups is because,
25809 unlike pop, vldm can only do consecutive regs. */
25810 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25811 /* Look for a case where a reg does not need restoring. */
25812 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25813 && (!df_regs_ever_live_p (i + 1)
25814 || call_used_regs[i + 1]))
25815 {
25816 /* Restore the regs discovered so far (from reg+2 to
25817 end_reg). */
25818 if (end_reg > i + 2)
25819 arm_emit_vfp_multi_reg_pop (i + 2,
25820 (end_reg - (i + 2)) / 2,
25821 stack_pointer_rtx);
25822 end_reg = i;
25823 }
25824
25825 /* Restore the remaining regs that we have discovered (or possibly
25826 even all of them, if the conditional in the for loop never
25827 fired). */
25828 if (end_reg > i + 2)
25829 arm_emit_vfp_multi_reg_pop (i + 2,
25830 (end_reg - (i + 2)) / 2,
25831 stack_pointer_rtx);
25832 }
25833
25834 if (TARGET_IWMMXT)
25835 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25836 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25837 {
25838 rtx_insn *insn;
25839 rtx addr = gen_rtx_MEM (V2SImode,
25840 gen_rtx_POST_INC (SImode,
25841 stack_pointer_rtx));
25842 set_mem_alias_set (addr, get_frame_alias_set ());
25843 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25844 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25845 gen_rtx_REG (V2SImode, i),
25846 NULL_RTX);
25847 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25848 stack_pointer_rtx, stack_pointer_rtx);
25849 }
25850
25851 if (saved_regs_mask)
25852 {
25853 rtx insn;
25854 bool return_in_pc = false;
25855
25856 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25857 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25858 && !IS_CMSE_ENTRY (func_type)
25859 && !IS_STACKALIGN (func_type)
25860 && really_return
25861 && crtl->args.pretend_args_size == 0
25862 && saved_regs_mask & (1 << LR_REGNUM)
25863 && !crtl->calls_eh_return)
25864 {
25865 saved_regs_mask &= ~(1 << LR_REGNUM);
25866 saved_regs_mask |= (1 << PC_REGNUM);
25867 return_in_pc = true;
25868 }
25869
25870 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25871 {
25872 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25873 if (saved_regs_mask & (1 << i))
25874 {
25875 rtx addr = gen_rtx_MEM (SImode,
25876 gen_rtx_POST_INC (SImode,
25877 stack_pointer_rtx));
25878 set_mem_alias_set (addr, get_frame_alias_set ());
25879
25880 if (i == PC_REGNUM)
25881 {
25882 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25883 XVECEXP (insn, 0, 0) = ret_rtx;
25884 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25885 addr);
25886 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25887 insn = emit_jump_insn (insn);
25888 }
25889 else
25890 {
25891 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25892 addr));
25893 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25894 gen_rtx_REG (SImode, i),
25895 NULL_RTX);
25896 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25897 stack_pointer_rtx,
25898 stack_pointer_rtx);
25899 }
25900 }
25901 }
25902 else
25903 {
25904 if (TARGET_LDRD
25905 && current_tune->prefer_ldrd_strd
25906 && !optimize_function_for_size_p (cfun))
25907 {
25908 if (TARGET_THUMB2)
25909 thumb2_emit_ldrd_pop (saved_regs_mask);
25910 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25911 arm_emit_ldrd_pop (saved_regs_mask);
25912 else
25913 arm_emit_multi_reg_pop (saved_regs_mask);
25914 }
25915 else
25916 arm_emit_multi_reg_pop (saved_regs_mask);
25917 }
25918
25919 if (return_in_pc)
25920 return;
25921 }
25922
25923 amount
25924 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25925 if (amount)
25926 {
25927 int i, j;
25928 rtx dwarf = NULL_RTX;
25929 rtx_insn *tmp =
25930 emit_insn (gen_addsi3 (stack_pointer_rtx,
25931 stack_pointer_rtx,
25932 GEN_INT (amount)));
25933
25934 RTX_FRAME_RELATED_P (tmp) = 1;
25935
25936 if (cfun->machine->uses_anonymous_args)
25937 {
25938 /* Restore pretend args. Refer arm_expand_prologue on how to save
25939 pretend_args in stack. */
25940 int num_regs = crtl->args.pretend_args_size / 4;
25941 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25942 for (j = 0, i = 0; j < num_regs; i++)
25943 if (saved_regs_mask & (1 << i))
25944 {
25945 rtx reg = gen_rtx_REG (SImode, i);
25946 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25947 j++;
25948 }
25949 REG_NOTES (tmp) = dwarf;
25950 }
25951 arm_add_cfa_adjust_cfa_note (tmp, amount,
25952 stack_pointer_rtx, stack_pointer_rtx);
25953 }
25954
25955 /* Clear all caller-saved regs that are not used to return. */
25956 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25957 {
25958 /* CMSE_ENTRY always returns. */
25959 gcc_assert (really_return);
25960 cmse_nonsecure_entry_clear_before_return ();
25961 }
25962
25963 if (!really_return)
25964 return;
25965
25966 if (crtl->calls_eh_return)
25967 emit_insn (gen_addsi3 (stack_pointer_rtx,
25968 stack_pointer_rtx,
25969 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25970
25971 if (IS_STACKALIGN (func_type))
25972 /* Restore the original stack pointer. Before prologue, the stack was
25973 realigned and the original stack pointer saved in r0. For details,
25974 see comment in arm_expand_prologue. */
25975 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25976
25977 emit_jump_insn (simple_return_rtx);
25978 }
25979
25980 /* Implementation of insn prologue_thumb1_interwork. This is the first
25981 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25982
25983 const char *
25984 thumb1_output_interwork (void)
25985 {
25986 const char * name;
25987 FILE *f = asm_out_file;
25988
25989 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25990 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25991 == SYMBOL_REF);
25992 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
25993
25994 /* Generate code sequence to switch us into Thumb mode. */
25995 /* The .code 32 directive has already been emitted by
25996 ASM_DECLARE_FUNCTION_NAME. */
25997 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
25998 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
25999
26000 /* Generate a label, so that the debugger will notice the
26001 change in instruction sets. This label is also used by
26002 the assembler to bypass the ARM code when this function
26003 is called from a Thumb encoded function elsewhere in the
26004 same file. Hence the definition of STUB_NAME here must
26005 agree with the definition in gas/config/tc-arm.c. */
26006
26007 #define STUB_NAME ".real_start_of"
26008
26009 fprintf (f, "\t.code\t16\n");
26010 #ifdef ARM_PE
26011 if (arm_dllexport_name_p (name))
26012 name = arm_strip_name_encoding (name);
26013 #endif
26014 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26015 fprintf (f, "\t.thumb_func\n");
26016 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26017
26018 return "";
26019 }
26020
26021 /* Handle the case of a double word load into a low register from
26022 a computed memory address. The computed address may involve a
26023 register which is overwritten by the load. */
26024 const char *
26025 thumb_load_double_from_address (rtx *operands)
26026 {
26027 rtx addr;
26028 rtx base;
26029 rtx offset;
26030 rtx arg1;
26031 rtx arg2;
26032
26033 gcc_assert (REG_P (operands[0]));
26034 gcc_assert (MEM_P (operands[1]));
26035
26036 /* Get the memory address. */
26037 addr = XEXP (operands[1], 0);
26038
26039 /* Work out how the memory address is computed. */
26040 switch (GET_CODE (addr))
26041 {
26042 case REG:
26043 operands[2] = adjust_address (operands[1], SImode, 4);
26044
26045 if (REGNO (operands[0]) == REGNO (addr))
26046 {
26047 output_asm_insn ("ldr\t%H0, %2", operands);
26048 output_asm_insn ("ldr\t%0, %1", operands);
26049 }
26050 else
26051 {
26052 output_asm_insn ("ldr\t%0, %1", operands);
26053 output_asm_insn ("ldr\t%H0, %2", operands);
26054 }
26055 break;
26056
26057 case CONST:
26058 /* Compute <address> + 4 for the high order load. */
26059 operands[2] = adjust_address (operands[1], SImode, 4);
26060
26061 output_asm_insn ("ldr\t%0, %1", operands);
26062 output_asm_insn ("ldr\t%H0, %2", operands);
26063 break;
26064
26065 case PLUS:
26066 arg1 = XEXP (addr, 0);
26067 arg2 = XEXP (addr, 1);
26068
26069 if (CONSTANT_P (arg1))
26070 base = arg2, offset = arg1;
26071 else
26072 base = arg1, offset = arg2;
26073
26074 gcc_assert (REG_P (base));
26075
26076 /* Catch the case of <address> = <reg> + <reg> */
26077 if (REG_P (offset))
26078 {
26079 int reg_offset = REGNO (offset);
26080 int reg_base = REGNO (base);
26081 int reg_dest = REGNO (operands[0]);
26082
26083 /* Add the base and offset registers together into the
26084 higher destination register. */
26085 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26086 reg_dest + 1, reg_base, reg_offset);
26087
26088 /* Load the lower destination register from the address in
26089 the higher destination register. */
26090 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26091 reg_dest, reg_dest + 1);
26092
26093 /* Load the higher destination register from its own address
26094 plus 4. */
26095 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26096 reg_dest + 1, reg_dest + 1);
26097 }
26098 else
26099 {
26100 /* Compute <address> + 4 for the high order load. */
26101 operands[2] = adjust_address (operands[1], SImode, 4);
26102
26103 /* If the computed address is held in the low order register
26104 then load the high order register first, otherwise always
26105 load the low order register first. */
26106 if (REGNO (operands[0]) == REGNO (base))
26107 {
26108 output_asm_insn ("ldr\t%H0, %2", operands);
26109 output_asm_insn ("ldr\t%0, %1", operands);
26110 }
26111 else
26112 {
26113 output_asm_insn ("ldr\t%0, %1", operands);
26114 output_asm_insn ("ldr\t%H0, %2", operands);
26115 }
26116 }
26117 break;
26118
26119 case LABEL_REF:
26120 /* With no registers to worry about we can just load the value
26121 directly. */
26122 operands[2] = adjust_address (operands[1], SImode, 4);
26123
26124 output_asm_insn ("ldr\t%H0, %2", operands);
26125 output_asm_insn ("ldr\t%0, %1", operands);
26126 break;
26127
26128 default:
26129 gcc_unreachable ();
26130 }
26131
26132 return "";
26133 }
26134
26135 const char *
26136 thumb_output_move_mem_multiple (int n, rtx *operands)
26137 {
26138 switch (n)
26139 {
26140 case 2:
26141 if (REGNO (operands[4]) > REGNO (operands[5]))
26142 std::swap (operands[4], operands[5]);
26143
26144 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26145 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26146 break;
26147
26148 case 3:
26149 if (REGNO (operands[4]) > REGNO (operands[5]))
26150 std::swap (operands[4], operands[5]);
26151 if (REGNO (operands[5]) > REGNO (operands[6]))
26152 std::swap (operands[5], operands[6]);
26153 if (REGNO (operands[4]) > REGNO (operands[5]))
26154 std::swap (operands[4], operands[5]);
26155
26156 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26157 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26158 break;
26159
26160 default:
26161 gcc_unreachable ();
26162 }
26163
26164 return "";
26165 }
26166
26167 /* Output a call-via instruction for thumb state. */
26168 const char *
26169 thumb_call_via_reg (rtx reg)
26170 {
26171 int regno = REGNO (reg);
26172 rtx *labelp;
26173
26174 gcc_assert (regno < LR_REGNUM);
26175
26176 /* If we are in the normal text section we can use a single instance
26177 per compilation unit. If we are doing function sections, then we need
26178 an entry per section, since we can't rely on reachability. */
26179 if (in_section == text_section)
26180 {
26181 thumb_call_reg_needed = 1;
26182
26183 if (thumb_call_via_label[regno] == NULL)
26184 thumb_call_via_label[regno] = gen_label_rtx ();
26185 labelp = thumb_call_via_label + regno;
26186 }
26187 else
26188 {
26189 if (cfun->machine->call_via[regno] == NULL)
26190 cfun->machine->call_via[regno] = gen_label_rtx ();
26191 labelp = cfun->machine->call_via + regno;
26192 }
26193
26194 output_asm_insn ("bl\t%a0", labelp);
26195 return "";
26196 }
26197
26198 /* Routines for generating rtl. */
26199 void
26200 thumb_expand_movmemqi (rtx *operands)
26201 {
26202 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26203 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26204 HOST_WIDE_INT len = INTVAL (operands[2]);
26205 HOST_WIDE_INT offset = 0;
26206
26207 while (len >= 12)
26208 {
26209 emit_insn (gen_movmem12b (out, in, out, in));
26210 len -= 12;
26211 }
26212
26213 if (len >= 8)
26214 {
26215 emit_insn (gen_movmem8b (out, in, out, in));
26216 len -= 8;
26217 }
26218
26219 if (len >= 4)
26220 {
26221 rtx reg = gen_reg_rtx (SImode);
26222 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26223 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26224 len -= 4;
26225 offset += 4;
26226 }
26227
26228 if (len >= 2)
26229 {
26230 rtx reg = gen_reg_rtx (HImode);
26231 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26232 plus_constant (Pmode, in,
26233 offset))));
26234 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26235 offset)),
26236 reg));
26237 len -= 2;
26238 offset += 2;
26239 }
26240
26241 if (len)
26242 {
26243 rtx reg = gen_reg_rtx (QImode);
26244 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26245 plus_constant (Pmode, in,
26246 offset))));
26247 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26248 offset)),
26249 reg));
26250 }
26251 }
26252
26253 void
26254 thumb_reload_out_hi (rtx *operands)
26255 {
26256 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26257 }
26258
26259 /* Return the length of a function name prefix
26260 that starts with the character 'c'. */
26261 static int
26262 arm_get_strip_length (int c)
26263 {
26264 switch (c)
26265 {
26266 ARM_NAME_ENCODING_LENGTHS
26267 default: return 0;
26268 }
26269 }
26270
26271 /* Return a pointer to a function's name with any
26272 and all prefix encodings stripped from it. */
26273 const char *
26274 arm_strip_name_encoding (const char *name)
26275 {
26276 int skip;
26277
26278 while ((skip = arm_get_strip_length (* name)))
26279 name += skip;
26280
26281 return name;
26282 }
26283
26284 /* If there is a '*' anywhere in the name's prefix, then
26285 emit the stripped name verbatim, otherwise prepend an
26286 underscore if leading underscores are being used. */
26287 void
26288 arm_asm_output_labelref (FILE *stream, const char *name)
26289 {
26290 int skip;
26291 int verbatim = 0;
26292
26293 while ((skip = arm_get_strip_length (* name)))
26294 {
26295 verbatim |= (*name == '*');
26296 name += skip;
26297 }
26298
26299 if (verbatim)
26300 fputs (name, stream);
26301 else
26302 asm_fprintf (stream, "%U%s", name);
26303 }
26304
26305 /* This function is used to emit an EABI tag and its associated value.
26306 We emit the numerical value of the tag in case the assembler does not
26307 support textual tags. (Eg gas prior to 2.20). If requested we include
26308 the tag name in a comment so that anyone reading the assembler output
26309 will know which tag is being set.
26310
26311 This function is not static because arm-c.c needs it too. */
26312
26313 void
26314 arm_emit_eabi_attribute (const char *name, int num, int val)
26315 {
26316 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26317 if (flag_verbose_asm || flag_debug_asm)
26318 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26319 asm_fprintf (asm_out_file, "\n");
26320 }
26321
26322 /* This function is used to print CPU tuning information as comment
26323 in assembler file. Pointers are not printed for now. */
26324
26325 void
26326 arm_print_tune_info (void)
26327 {
26328 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26329 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26330 current_tune->constant_limit);
26331 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26332 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26333 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26334 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26335 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26336 "prefetch.l1_cache_size:\t%d\n",
26337 current_tune->prefetch.l1_cache_size);
26338 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26339 "prefetch.l1_cache_line_size:\t%d\n",
26340 current_tune->prefetch.l1_cache_line_size);
26341 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26342 "prefer_constant_pool:\t%d\n",
26343 (int) current_tune->prefer_constant_pool);
26344 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26345 "branch_cost:\t(s:speed, p:predictable)\n");
26346 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26347 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26348 current_tune->branch_cost (false, false));
26349 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26350 current_tune->branch_cost (false, true));
26351 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26352 current_tune->branch_cost (true, false));
26353 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26354 current_tune->branch_cost (true, true));
26355 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26356 "prefer_ldrd_strd:\t%d\n",
26357 (int) current_tune->prefer_ldrd_strd);
26358 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26359 "logical_op_non_short_circuit:\t[%d,%d]\n",
26360 (int) current_tune->logical_op_non_short_circuit_thumb,
26361 (int) current_tune->logical_op_non_short_circuit_arm);
26362 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26363 "prefer_neon_for_64bits:\t%d\n",
26364 (int) current_tune->prefer_neon_for_64bits);
26365 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26366 "disparage_flag_setting_t16_encodings:\t%d\n",
26367 (int) current_tune->disparage_flag_setting_t16_encodings);
26368 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26369 "string_ops_prefer_neon:\t%d\n",
26370 (int) current_tune->string_ops_prefer_neon);
26371 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26372 "max_insns_inline_memset:\t%d\n",
26373 current_tune->max_insns_inline_memset);
26374 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26375 current_tune->fusible_ops);
26376 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26377 (int) current_tune->sched_autopref);
26378 }
26379
26380 /* Print .arch and .arch_extension directives corresponding to the
26381 current architecture configuration. */
26382 static void
26383 arm_print_asm_arch_directives ()
26384 {
26385 const arch_option *arch
26386 = arm_parse_arch_option_name (all_architectures, "-march",
26387 arm_active_target.arch_name);
26388 auto_sbitmap opt_bits (isa_num_bits);
26389
26390 gcc_assert (arch);
26391
26392 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26393 if (!arch->common.extensions)
26394 return;
26395
26396 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26397 opt->name != NULL;
26398 opt++)
26399 {
26400 if (!opt->remove)
26401 {
26402 arm_initialize_isa (opt_bits, opt->isa_bits);
26403
26404 /* If every feature bit of this option is set in the target
26405 ISA specification, print out the option name. However,
26406 don't print anything if all the bits are part of the
26407 FPU specification. */
26408 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26409 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26410 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26411 }
26412 }
26413 }
26414
26415 static void
26416 arm_file_start (void)
26417 {
26418 int val;
26419
26420 if (TARGET_BPABI)
26421 {
26422 /* We don't have a specified CPU. Use the architecture to
26423 generate the tags.
26424
26425 Note: it might be better to do this unconditionally, then the
26426 assembler would not need to know about all new CPU names as
26427 they are added. */
26428 if (!arm_active_target.core_name)
26429 {
26430 /* armv7ve doesn't support any extensions. */
26431 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26432 {
26433 /* Keep backward compatability for assemblers
26434 which don't support armv7ve. */
26435 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26436 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26437 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26438 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26439 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26440 }
26441 else
26442 arm_print_asm_arch_directives ();
26443 }
26444 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26445 asm_fprintf (asm_out_file, "\t.arch %s\n",
26446 arm_active_target.core_name + 8);
26447 else
26448 {
26449 const char* truncated_name
26450 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26451 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26452 }
26453
26454 if (print_tune_info)
26455 arm_print_tune_info ();
26456
26457 if (! TARGET_SOFT_FLOAT)
26458 {
26459 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26460 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26461
26462 if (TARGET_HARD_FLOAT_ABI)
26463 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26464 }
26465
26466 /* Some of these attributes only apply when the corresponding features
26467 are used. However we don't have any easy way of figuring this out.
26468 Conservatively record the setting that would have been used. */
26469
26470 if (flag_rounding_math)
26471 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26472
26473 if (!flag_unsafe_math_optimizations)
26474 {
26475 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26476 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26477 }
26478 if (flag_signaling_nans)
26479 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26480
26481 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26482 flag_finite_math_only ? 1 : 3);
26483
26484 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26485 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26486 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26487 flag_short_enums ? 1 : 2);
26488
26489 /* Tag_ABI_optimization_goals. */
26490 if (optimize_size)
26491 val = 4;
26492 else if (optimize >= 2)
26493 val = 2;
26494 else if (optimize)
26495 val = 1;
26496 else
26497 val = 6;
26498 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26499
26500 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26501 unaligned_access);
26502
26503 if (arm_fp16_format)
26504 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26505 (int) arm_fp16_format);
26506
26507 if (arm_lang_output_object_attributes_hook)
26508 arm_lang_output_object_attributes_hook();
26509 }
26510
26511 default_file_start ();
26512 }
26513
26514 static void
26515 arm_file_end (void)
26516 {
26517 int regno;
26518
26519 if (NEED_INDICATE_EXEC_STACK)
26520 /* Add .note.GNU-stack. */
26521 file_end_indicate_exec_stack ();
26522
26523 if (! thumb_call_reg_needed)
26524 return;
26525
26526 switch_to_section (text_section);
26527 asm_fprintf (asm_out_file, "\t.code 16\n");
26528 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26529
26530 for (regno = 0; regno < LR_REGNUM; regno++)
26531 {
26532 rtx label = thumb_call_via_label[regno];
26533
26534 if (label != 0)
26535 {
26536 targetm.asm_out.internal_label (asm_out_file, "L",
26537 CODE_LABEL_NUMBER (label));
26538 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26539 }
26540 }
26541 }
26542
26543 #ifndef ARM_PE
26544 /* Symbols in the text segment can be accessed without indirecting via the
26545 constant pool; it may take an extra binary operation, but this is still
26546 faster than indirecting via memory. Don't do this when not optimizing,
26547 since we won't be calculating al of the offsets necessary to do this
26548 simplification. */
26549
26550 static void
26551 arm_encode_section_info (tree decl, rtx rtl, int first)
26552 {
26553 if (optimize > 0 && TREE_CONSTANT (decl))
26554 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26555
26556 default_encode_section_info (decl, rtl, first);
26557 }
26558 #endif /* !ARM_PE */
26559
26560 static void
26561 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26562 {
26563 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26564 && !strcmp (prefix, "L"))
26565 {
26566 arm_ccfsm_state = 0;
26567 arm_target_insn = NULL;
26568 }
26569 default_internal_label (stream, prefix, labelno);
26570 }
26571
26572 /* Output code to add DELTA to the first argument, and then jump
26573 to FUNCTION. Used for C++ multiple inheritance. */
26574
26575 static void
26576 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26577 HOST_WIDE_INT, tree function)
26578 {
26579 static int thunk_label = 0;
26580 char label[256];
26581 char labelpc[256];
26582 int mi_delta = delta;
26583 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26584 int shift = 0;
26585 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26586 ? 1 : 0);
26587 if (mi_delta < 0)
26588 mi_delta = - mi_delta;
26589
26590 final_start_function (emit_barrier (), file, 1);
26591
26592 if (TARGET_THUMB1)
26593 {
26594 int labelno = thunk_label++;
26595 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26596 /* Thunks are entered in arm mode when available. */
26597 if (TARGET_THUMB1_ONLY)
26598 {
26599 /* push r3 so we can use it as a temporary. */
26600 /* TODO: Omit this save if r3 is not used. */
26601 fputs ("\tpush {r3}\n", file);
26602 fputs ("\tldr\tr3, ", file);
26603 }
26604 else
26605 {
26606 fputs ("\tldr\tr12, ", file);
26607 }
26608 assemble_name (file, label);
26609 fputc ('\n', file);
26610 if (flag_pic)
26611 {
26612 /* If we are generating PIC, the ldr instruction below loads
26613 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26614 the address of the add + 8, so we have:
26615
26616 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26617 = target + 1.
26618
26619 Note that we have "+ 1" because some versions of GNU ld
26620 don't set the low bit of the result for R_ARM_REL32
26621 relocations against thumb function symbols.
26622 On ARMv6M this is +4, not +8. */
26623 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26624 assemble_name (file, labelpc);
26625 fputs (":\n", file);
26626 if (TARGET_THUMB1_ONLY)
26627 {
26628 /* This is 2 insns after the start of the thunk, so we know it
26629 is 4-byte aligned. */
26630 fputs ("\tadd\tr3, pc, r3\n", file);
26631 fputs ("\tmov r12, r3\n", file);
26632 }
26633 else
26634 fputs ("\tadd\tr12, pc, r12\n", file);
26635 }
26636 else if (TARGET_THUMB1_ONLY)
26637 fputs ("\tmov r12, r3\n", file);
26638 }
26639 if (TARGET_THUMB1_ONLY)
26640 {
26641 if (mi_delta > 255)
26642 {
26643 fputs ("\tldr\tr3, ", file);
26644 assemble_name (file, label);
26645 fputs ("+4\n", file);
26646 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26647 mi_op, this_regno, this_regno);
26648 }
26649 else if (mi_delta != 0)
26650 {
26651 /* Thumb1 unified syntax requires s suffix in instruction name when
26652 one of the operands is immediate. */
26653 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26654 mi_op, this_regno, this_regno,
26655 mi_delta);
26656 }
26657 }
26658 else
26659 {
26660 /* TODO: Use movw/movt for large constants when available. */
26661 while (mi_delta != 0)
26662 {
26663 if ((mi_delta & (3 << shift)) == 0)
26664 shift += 2;
26665 else
26666 {
26667 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26668 mi_op, this_regno, this_regno,
26669 mi_delta & (0xff << shift));
26670 mi_delta &= ~(0xff << shift);
26671 shift += 8;
26672 }
26673 }
26674 }
26675 if (TARGET_THUMB1)
26676 {
26677 if (TARGET_THUMB1_ONLY)
26678 fputs ("\tpop\t{r3}\n", file);
26679
26680 fprintf (file, "\tbx\tr12\n");
26681 ASM_OUTPUT_ALIGN (file, 2);
26682 assemble_name (file, label);
26683 fputs (":\n", file);
26684 if (flag_pic)
26685 {
26686 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26687 rtx tem = XEXP (DECL_RTL (function), 0);
26688 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26689 pipeline offset is four rather than eight. Adjust the offset
26690 accordingly. */
26691 tem = plus_constant (GET_MODE (tem), tem,
26692 TARGET_THUMB1_ONLY ? -3 : -7);
26693 tem = gen_rtx_MINUS (GET_MODE (tem),
26694 tem,
26695 gen_rtx_SYMBOL_REF (Pmode,
26696 ggc_strdup (labelpc)));
26697 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26698 }
26699 else
26700 /* Output ".word .LTHUNKn". */
26701 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26702
26703 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26704 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26705 }
26706 else
26707 {
26708 fputs ("\tb\t", file);
26709 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26710 if (NEED_PLT_RELOC)
26711 fputs ("(PLT)", file);
26712 fputc ('\n', file);
26713 }
26714
26715 final_end_function ();
26716 }
26717
26718 /* MI thunk handling for TARGET_32BIT. */
26719
26720 static void
26721 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26722 HOST_WIDE_INT vcall_offset, tree function)
26723 {
26724 /* On ARM, this_regno is R0 or R1 depending on
26725 whether the function returns an aggregate or not.
26726 */
26727 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26728 function)
26729 ? R1_REGNUM : R0_REGNUM);
26730
26731 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26732 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26733 reload_completed = 1;
26734 emit_note (NOTE_INSN_PROLOGUE_END);
26735
26736 /* Add DELTA to THIS_RTX. */
26737 if (delta != 0)
26738 arm_split_constant (PLUS, Pmode, NULL_RTX,
26739 delta, this_rtx, this_rtx, false);
26740
26741 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26742 if (vcall_offset != 0)
26743 {
26744 /* Load *THIS_RTX. */
26745 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26746 /* Compute *THIS_RTX + VCALL_OFFSET. */
26747 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26748 false);
26749 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26750 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26751 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26752 }
26753
26754 /* Generate a tail call to the target function. */
26755 if (!TREE_USED (function))
26756 {
26757 assemble_external (function);
26758 TREE_USED (function) = 1;
26759 }
26760 rtx funexp = XEXP (DECL_RTL (function), 0);
26761 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26762 rtx_insn * insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26763 SIBLING_CALL_P (insn) = 1;
26764
26765 insn = get_insns ();
26766 shorten_branches (insn);
26767 final_start_function (insn, file, 1);
26768 final (insn, file, 1);
26769 final_end_function ();
26770
26771 /* Stop pretending this is a post-reload pass. */
26772 reload_completed = 0;
26773 }
26774
26775 /* Output code to add DELTA to the first argument, and then jump
26776 to FUNCTION. Used for C++ multiple inheritance. */
26777
26778 static void
26779 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26780 HOST_WIDE_INT vcall_offset, tree function)
26781 {
26782 if (TARGET_32BIT)
26783 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26784 else
26785 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26786 }
26787
26788 int
26789 arm_emit_vector_const (FILE *file, rtx x)
26790 {
26791 int i;
26792 const char * pattern;
26793
26794 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26795
26796 switch (GET_MODE (x))
26797 {
26798 case E_V2SImode: pattern = "%08x"; break;
26799 case E_V4HImode: pattern = "%04x"; break;
26800 case E_V8QImode: pattern = "%02x"; break;
26801 default: gcc_unreachable ();
26802 }
26803
26804 fprintf (file, "0x");
26805 for (i = CONST_VECTOR_NUNITS (x); i--;)
26806 {
26807 rtx element;
26808
26809 element = CONST_VECTOR_ELT (x, i);
26810 fprintf (file, pattern, INTVAL (element));
26811 }
26812
26813 return 1;
26814 }
26815
26816 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26817 HFmode constant pool entries are actually loaded with ldr. */
26818 void
26819 arm_emit_fp16_const (rtx c)
26820 {
26821 long bits;
26822
26823 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26824 if (WORDS_BIG_ENDIAN)
26825 assemble_zeros (2);
26826 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26827 if (!WORDS_BIG_ENDIAN)
26828 assemble_zeros (2);
26829 }
26830
26831 const char *
26832 arm_output_load_gr (rtx *operands)
26833 {
26834 rtx reg;
26835 rtx offset;
26836 rtx wcgr;
26837 rtx sum;
26838
26839 if (!MEM_P (operands [1])
26840 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26841 || !REG_P (reg = XEXP (sum, 0))
26842 || !CONST_INT_P (offset = XEXP (sum, 1))
26843 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26844 return "wldrw%?\t%0, %1";
26845
26846 /* Fix up an out-of-range load of a GR register. */
26847 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26848 wcgr = operands[0];
26849 operands[0] = reg;
26850 output_asm_insn ("ldr%?\t%0, %1", operands);
26851
26852 operands[0] = wcgr;
26853 operands[1] = reg;
26854 output_asm_insn ("tmcr%?\t%0, %1", operands);
26855 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26856
26857 return "";
26858 }
26859
26860 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26861
26862 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26863 named arg and all anonymous args onto the stack.
26864 XXX I know the prologue shouldn't be pushing registers, but it is faster
26865 that way. */
26866
26867 static void
26868 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26869 machine_mode mode,
26870 tree type,
26871 int *pretend_size,
26872 int second_time ATTRIBUTE_UNUSED)
26873 {
26874 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26875 int nregs;
26876
26877 cfun->machine->uses_anonymous_args = 1;
26878 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26879 {
26880 nregs = pcum->aapcs_ncrn;
26881 if (nregs & 1)
26882 {
26883 int res = arm_needs_doubleword_align (mode, type);
26884 if (res < 0 && warn_psabi)
26885 inform (input_location, "parameter passing for argument of "
26886 "type %qT changed in GCC 7.1", type);
26887 else if (res > 0)
26888 nregs++;
26889 }
26890 }
26891 else
26892 nregs = pcum->nregs;
26893
26894 if (nregs < NUM_ARG_REGS)
26895 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26896 }
26897
26898 /* We can't rely on the caller doing the proper promotion when
26899 using APCS or ATPCS. */
26900
26901 static bool
26902 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26903 {
26904 return !TARGET_AAPCS_BASED;
26905 }
26906
26907 static machine_mode
26908 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26909 machine_mode mode,
26910 int *punsignedp ATTRIBUTE_UNUSED,
26911 const_tree fntype ATTRIBUTE_UNUSED,
26912 int for_return ATTRIBUTE_UNUSED)
26913 {
26914 if (GET_MODE_CLASS (mode) == MODE_INT
26915 && GET_MODE_SIZE (mode) < 4)
26916 return SImode;
26917
26918 return mode;
26919 }
26920
26921
26922 static bool
26923 arm_default_short_enums (void)
26924 {
26925 return ARM_DEFAULT_SHORT_ENUMS;
26926 }
26927
26928
26929 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26930
26931 static bool
26932 arm_align_anon_bitfield (void)
26933 {
26934 return TARGET_AAPCS_BASED;
26935 }
26936
26937
26938 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26939
26940 static tree
26941 arm_cxx_guard_type (void)
26942 {
26943 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26944 }
26945
26946
26947 /* The EABI says test the least significant bit of a guard variable. */
26948
26949 static bool
26950 arm_cxx_guard_mask_bit (void)
26951 {
26952 return TARGET_AAPCS_BASED;
26953 }
26954
26955
26956 /* The EABI specifies that all array cookies are 8 bytes long. */
26957
26958 static tree
26959 arm_get_cookie_size (tree type)
26960 {
26961 tree size;
26962
26963 if (!TARGET_AAPCS_BASED)
26964 return default_cxx_get_cookie_size (type);
26965
26966 size = build_int_cst (sizetype, 8);
26967 return size;
26968 }
26969
26970
26971 /* The EABI says that array cookies should also contain the element size. */
26972
26973 static bool
26974 arm_cookie_has_size (void)
26975 {
26976 return TARGET_AAPCS_BASED;
26977 }
26978
26979
26980 /* The EABI says constructors and destructors should return a pointer to
26981 the object constructed/destroyed. */
26982
26983 static bool
26984 arm_cxx_cdtor_returns_this (void)
26985 {
26986 return TARGET_AAPCS_BASED;
26987 }
26988
26989 /* The EABI says that an inline function may never be the key
26990 method. */
26991
26992 static bool
26993 arm_cxx_key_method_may_be_inline (void)
26994 {
26995 return !TARGET_AAPCS_BASED;
26996 }
26997
26998 static void
26999 arm_cxx_determine_class_data_visibility (tree decl)
27000 {
27001 if (!TARGET_AAPCS_BASED
27002 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27003 return;
27004
27005 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27006 is exported. However, on systems without dynamic vague linkage,
27007 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27008 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27009 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27010 else
27011 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27012 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27013 }
27014
27015 static bool
27016 arm_cxx_class_data_always_comdat (void)
27017 {
27018 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27019 vague linkage if the class has no key function. */
27020 return !TARGET_AAPCS_BASED;
27021 }
27022
27023
27024 /* The EABI says __aeabi_atexit should be used to register static
27025 destructors. */
27026
27027 static bool
27028 arm_cxx_use_aeabi_atexit (void)
27029 {
27030 return TARGET_AAPCS_BASED;
27031 }
27032
27033
27034 void
27035 arm_set_return_address (rtx source, rtx scratch)
27036 {
27037 arm_stack_offsets *offsets;
27038 HOST_WIDE_INT delta;
27039 rtx addr, mem;
27040 unsigned long saved_regs;
27041
27042 offsets = arm_get_frame_offsets ();
27043 saved_regs = offsets->saved_regs_mask;
27044
27045 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27046 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27047 else
27048 {
27049 if (frame_pointer_needed)
27050 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27051 else
27052 {
27053 /* LR will be the first saved register. */
27054 delta = offsets->outgoing_args - (offsets->frame + 4);
27055
27056
27057 if (delta >= 4096)
27058 {
27059 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27060 GEN_INT (delta & ~4095)));
27061 addr = scratch;
27062 delta &= 4095;
27063 }
27064 else
27065 addr = stack_pointer_rtx;
27066
27067 addr = plus_constant (Pmode, addr, delta);
27068 }
27069
27070 /* The store needs to be marked to prevent DSE from deleting
27071 it as dead if it is based on fp. */
27072 mem = gen_frame_mem (Pmode, addr);
27073 MEM_VOLATILE_P (mem) = true;
27074 emit_move_insn (mem, source);
27075 }
27076 }
27077
27078
27079 void
27080 thumb_set_return_address (rtx source, rtx scratch)
27081 {
27082 arm_stack_offsets *offsets;
27083 HOST_WIDE_INT delta;
27084 HOST_WIDE_INT limit;
27085 int reg;
27086 rtx addr, mem;
27087 unsigned long mask;
27088
27089 emit_use (source);
27090
27091 offsets = arm_get_frame_offsets ();
27092 mask = offsets->saved_regs_mask;
27093 if (mask & (1 << LR_REGNUM))
27094 {
27095 limit = 1024;
27096 /* Find the saved regs. */
27097 if (frame_pointer_needed)
27098 {
27099 delta = offsets->soft_frame - offsets->saved_args;
27100 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27101 if (TARGET_THUMB1)
27102 limit = 128;
27103 }
27104 else
27105 {
27106 delta = offsets->outgoing_args - offsets->saved_args;
27107 reg = SP_REGNUM;
27108 }
27109 /* Allow for the stack frame. */
27110 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27111 delta -= 16;
27112 /* The link register is always the first saved register. */
27113 delta -= 4;
27114
27115 /* Construct the address. */
27116 addr = gen_rtx_REG (SImode, reg);
27117 if (delta > limit)
27118 {
27119 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27120 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27121 addr = scratch;
27122 }
27123 else
27124 addr = plus_constant (Pmode, addr, delta);
27125
27126 /* The store needs to be marked to prevent DSE from deleting
27127 it as dead if it is based on fp. */
27128 mem = gen_frame_mem (Pmode, addr);
27129 MEM_VOLATILE_P (mem) = true;
27130 emit_move_insn (mem, source);
27131 }
27132 else
27133 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27134 }
27135
27136 /* Implements target hook vector_mode_supported_p. */
27137 bool
27138 arm_vector_mode_supported_p (machine_mode mode)
27139 {
27140 /* Neon also supports V2SImode, etc. listed in the clause below. */
27141 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27142 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27143 || mode == V2DImode || mode == V8HFmode))
27144 return true;
27145
27146 if ((TARGET_NEON || TARGET_IWMMXT)
27147 && ((mode == V2SImode)
27148 || (mode == V4HImode)
27149 || (mode == V8QImode)))
27150 return true;
27151
27152 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27153 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27154 || mode == V2HAmode))
27155 return true;
27156
27157 return false;
27158 }
27159
27160 /* Implements target hook array_mode_supported_p. */
27161
27162 static bool
27163 arm_array_mode_supported_p (machine_mode mode,
27164 unsigned HOST_WIDE_INT nelems)
27165 {
27166 if (TARGET_NEON
27167 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27168 && (nelems >= 2 && nelems <= 4))
27169 return true;
27170
27171 return false;
27172 }
27173
27174 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27175 registers when autovectorizing for Neon, at least until multiple vector
27176 widths are supported properly by the middle-end. */
27177
27178 static machine_mode
27179 arm_preferred_simd_mode (scalar_mode mode)
27180 {
27181 if (TARGET_NEON)
27182 switch (mode)
27183 {
27184 case E_SFmode:
27185 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27186 case E_SImode:
27187 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27188 case E_HImode:
27189 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27190 case E_QImode:
27191 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27192 case E_DImode:
27193 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27194 return V2DImode;
27195 break;
27196
27197 default:;
27198 }
27199
27200 if (TARGET_REALLY_IWMMXT)
27201 switch (mode)
27202 {
27203 case E_SImode:
27204 return V2SImode;
27205 case E_HImode:
27206 return V4HImode;
27207 case E_QImode:
27208 return V8QImode;
27209
27210 default:;
27211 }
27212
27213 return word_mode;
27214 }
27215
27216 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27217
27218 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27219 using r0-r4 for function arguments, r7 for the stack frame and don't have
27220 enough left over to do doubleword arithmetic. For Thumb-2 all the
27221 potentially problematic instructions accept high registers so this is not
27222 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27223 that require many low registers. */
27224 static bool
27225 arm_class_likely_spilled_p (reg_class_t rclass)
27226 {
27227 if ((TARGET_THUMB1 && rclass == LO_REGS)
27228 || rclass == CC_REG)
27229 return true;
27230
27231 return false;
27232 }
27233
27234 /* Implements target hook small_register_classes_for_mode_p. */
27235 bool
27236 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27237 {
27238 return TARGET_THUMB1;
27239 }
27240
27241 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27242 ARM insns and therefore guarantee that the shift count is modulo 256.
27243 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27244 guarantee no particular behavior for out-of-range counts. */
27245
27246 static unsigned HOST_WIDE_INT
27247 arm_shift_truncation_mask (machine_mode mode)
27248 {
27249 return mode == SImode ? 255 : 0;
27250 }
27251
27252
27253 /* Map internal gcc register numbers to DWARF2 register numbers. */
27254
27255 unsigned int
27256 arm_dbx_register_number (unsigned int regno)
27257 {
27258 if (regno < 16)
27259 return regno;
27260
27261 if (IS_VFP_REGNUM (regno))
27262 {
27263 /* See comment in arm_dwarf_register_span. */
27264 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27265 return 64 + regno - FIRST_VFP_REGNUM;
27266 else
27267 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27268 }
27269
27270 if (IS_IWMMXT_GR_REGNUM (regno))
27271 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27272
27273 if (IS_IWMMXT_REGNUM (regno))
27274 return 112 + regno - FIRST_IWMMXT_REGNUM;
27275
27276 return DWARF_FRAME_REGISTERS;
27277 }
27278
27279 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27280 GCC models tham as 64 32-bit registers, so we need to describe this to
27281 the DWARF generation code. Other registers can use the default. */
27282 static rtx
27283 arm_dwarf_register_span (rtx rtl)
27284 {
27285 machine_mode mode;
27286 unsigned regno;
27287 rtx parts[16];
27288 int nregs;
27289 int i;
27290
27291 regno = REGNO (rtl);
27292 if (!IS_VFP_REGNUM (regno))
27293 return NULL_RTX;
27294
27295 /* XXX FIXME: The EABI defines two VFP register ranges:
27296 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27297 256-287: D0-D31
27298 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27299 corresponding D register. Until GDB supports this, we shall use the
27300 legacy encodings. We also use these encodings for D0-D15 for
27301 compatibility with older debuggers. */
27302 mode = GET_MODE (rtl);
27303 if (GET_MODE_SIZE (mode) < 8)
27304 return NULL_RTX;
27305
27306 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27307 {
27308 nregs = GET_MODE_SIZE (mode) / 4;
27309 for (i = 0; i < nregs; i += 2)
27310 if (TARGET_BIG_END)
27311 {
27312 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27313 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27314 }
27315 else
27316 {
27317 parts[i] = gen_rtx_REG (SImode, regno + i);
27318 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27319 }
27320 }
27321 else
27322 {
27323 nregs = GET_MODE_SIZE (mode) / 8;
27324 for (i = 0; i < nregs; i++)
27325 parts[i] = gen_rtx_REG (DImode, regno + i);
27326 }
27327
27328 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27329 }
27330
27331 #if ARM_UNWIND_INFO
27332 /* Emit unwind directives for a store-multiple instruction or stack pointer
27333 push during alignment.
27334 These should only ever be generated by the function prologue code, so
27335 expect them to have a particular form.
27336 The store-multiple instruction sometimes pushes pc as the last register,
27337 although it should not be tracked into unwind information, or for -Os
27338 sometimes pushes some dummy registers before first register that needs
27339 to be tracked in unwind information; such dummy registers are there just
27340 to avoid separate stack adjustment, and will not be restored in the
27341 epilogue. */
27342
27343 static void
27344 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27345 {
27346 int i;
27347 HOST_WIDE_INT offset;
27348 HOST_WIDE_INT nregs;
27349 int reg_size;
27350 unsigned reg;
27351 unsigned lastreg;
27352 unsigned padfirst = 0, padlast = 0;
27353 rtx e;
27354
27355 e = XVECEXP (p, 0, 0);
27356 gcc_assert (GET_CODE (e) == SET);
27357
27358 /* First insn will adjust the stack pointer. */
27359 gcc_assert (GET_CODE (e) == SET
27360 && REG_P (SET_DEST (e))
27361 && REGNO (SET_DEST (e)) == SP_REGNUM
27362 && GET_CODE (SET_SRC (e)) == PLUS);
27363
27364 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27365 nregs = XVECLEN (p, 0) - 1;
27366 gcc_assert (nregs);
27367
27368 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27369 if (reg < 16)
27370 {
27371 /* For -Os dummy registers can be pushed at the beginning to
27372 avoid separate stack pointer adjustment. */
27373 e = XVECEXP (p, 0, 1);
27374 e = XEXP (SET_DEST (e), 0);
27375 if (GET_CODE (e) == PLUS)
27376 padfirst = INTVAL (XEXP (e, 1));
27377 gcc_assert (padfirst == 0 || optimize_size);
27378 /* The function prologue may also push pc, but not annotate it as it is
27379 never restored. We turn this into a stack pointer adjustment. */
27380 e = XVECEXP (p, 0, nregs);
27381 e = XEXP (SET_DEST (e), 0);
27382 if (GET_CODE (e) == PLUS)
27383 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27384 else
27385 padlast = offset - 4;
27386 gcc_assert (padlast == 0 || padlast == 4);
27387 if (padlast == 4)
27388 fprintf (asm_out_file, "\t.pad #4\n");
27389 reg_size = 4;
27390 fprintf (asm_out_file, "\t.save {");
27391 }
27392 else if (IS_VFP_REGNUM (reg))
27393 {
27394 reg_size = 8;
27395 fprintf (asm_out_file, "\t.vsave {");
27396 }
27397 else
27398 /* Unknown register type. */
27399 gcc_unreachable ();
27400
27401 /* If the stack increment doesn't match the size of the saved registers,
27402 something has gone horribly wrong. */
27403 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27404
27405 offset = padfirst;
27406 lastreg = 0;
27407 /* The remaining insns will describe the stores. */
27408 for (i = 1; i <= nregs; i++)
27409 {
27410 /* Expect (set (mem <addr>) (reg)).
27411 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27412 e = XVECEXP (p, 0, i);
27413 gcc_assert (GET_CODE (e) == SET
27414 && MEM_P (SET_DEST (e))
27415 && REG_P (SET_SRC (e)));
27416
27417 reg = REGNO (SET_SRC (e));
27418 gcc_assert (reg >= lastreg);
27419
27420 if (i != 1)
27421 fprintf (asm_out_file, ", ");
27422 /* We can't use %r for vfp because we need to use the
27423 double precision register names. */
27424 if (IS_VFP_REGNUM (reg))
27425 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27426 else
27427 asm_fprintf (asm_out_file, "%r", reg);
27428
27429 if (flag_checking)
27430 {
27431 /* Check that the addresses are consecutive. */
27432 e = XEXP (SET_DEST (e), 0);
27433 if (GET_CODE (e) == PLUS)
27434 gcc_assert (REG_P (XEXP (e, 0))
27435 && REGNO (XEXP (e, 0)) == SP_REGNUM
27436 && CONST_INT_P (XEXP (e, 1))
27437 && offset == INTVAL (XEXP (e, 1)));
27438 else
27439 gcc_assert (i == 1
27440 && REG_P (e)
27441 && REGNO (e) == SP_REGNUM);
27442 offset += reg_size;
27443 }
27444 }
27445 fprintf (asm_out_file, "}\n");
27446 if (padfirst)
27447 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27448 }
27449
27450 /* Emit unwind directives for a SET. */
27451
27452 static void
27453 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27454 {
27455 rtx e0;
27456 rtx e1;
27457 unsigned reg;
27458
27459 e0 = XEXP (p, 0);
27460 e1 = XEXP (p, 1);
27461 switch (GET_CODE (e0))
27462 {
27463 case MEM:
27464 /* Pushing a single register. */
27465 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27466 || !REG_P (XEXP (XEXP (e0, 0), 0))
27467 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27468 abort ();
27469
27470 asm_fprintf (asm_out_file, "\t.save ");
27471 if (IS_VFP_REGNUM (REGNO (e1)))
27472 asm_fprintf(asm_out_file, "{d%d}\n",
27473 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27474 else
27475 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27476 break;
27477
27478 case REG:
27479 if (REGNO (e0) == SP_REGNUM)
27480 {
27481 /* A stack increment. */
27482 if (GET_CODE (e1) != PLUS
27483 || !REG_P (XEXP (e1, 0))
27484 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27485 || !CONST_INT_P (XEXP (e1, 1)))
27486 abort ();
27487
27488 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27489 -INTVAL (XEXP (e1, 1)));
27490 }
27491 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27492 {
27493 HOST_WIDE_INT offset;
27494
27495 if (GET_CODE (e1) == PLUS)
27496 {
27497 if (!REG_P (XEXP (e1, 0))
27498 || !CONST_INT_P (XEXP (e1, 1)))
27499 abort ();
27500 reg = REGNO (XEXP (e1, 0));
27501 offset = INTVAL (XEXP (e1, 1));
27502 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27503 HARD_FRAME_POINTER_REGNUM, reg,
27504 offset);
27505 }
27506 else if (REG_P (e1))
27507 {
27508 reg = REGNO (e1);
27509 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27510 HARD_FRAME_POINTER_REGNUM, reg);
27511 }
27512 else
27513 abort ();
27514 }
27515 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27516 {
27517 /* Move from sp to reg. */
27518 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27519 }
27520 else if (GET_CODE (e1) == PLUS
27521 && REG_P (XEXP (e1, 0))
27522 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27523 && CONST_INT_P (XEXP (e1, 1)))
27524 {
27525 /* Set reg to offset from sp. */
27526 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27527 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27528 }
27529 else
27530 abort ();
27531 break;
27532
27533 default:
27534 abort ();
27535 }
27536 }
27537
27538
27539 /* Emit unwind directives for the given insn. */
27540
27541 static void
27542 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27543 {
27544 rtx note, pat;
27545 bool handled_one = false;
27546
27547 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27548 return;
27549
27550 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27551 && (TREE_NOTHROW (current_function_decl)
27552 || crtl->all_throwers_are_sibcalls))
27553 return;
27554
27555 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27556 return;
27557
27558 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27559 {
27560 switch (REG_NOTE_KIND (note))
27561 {
27562 case REG_FRAME_RELATED_EXPR:
27563 pat = XEXP (note, 0);
27564 goto found;
27565
27566 case REG_CFA_REGISTER:
27567 pat = XEXP (note, 0);
27568 if (pat == NULL)
27569 {
27570 pat = PATTERN (insn);
27571 if (GET_CODE (pat) == PARALLEL)
27572 pat = XVECEXP (pat, 0, 0);
27573 }
27574
27575 /* Only emitted for IS_STACKALIGN re-alignment. */
27576 {
27577 rtx dest, src;
27578 unsigned reg;
27579
27580 src = SET_SRC (pat);
27581 dest = SET_DEST (pat);
27582
27583 gcc_assert (src == stack_pointer_rtx);
27584 reg = REGNO (dest);
27585 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27586 reg + 0x90, reg);
27587 }
27588 handled_one = true;
27589 break;
27590
27591 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27592 to get correct dwarf information for shrink-wrap. We should not
27593 emit unwind information for it because these are used either for
27594 pretend arguments or notes to adjust sp and restore registers from
27595 stack. */
27596 case REG_CFA_DEF_CFA:
27597 case REG_CFA_ADJUST_CFA:
27598 case REG_CFA_RESTORE:
27599 return;
27600
27601 case REG_CFA_EXPRESSION:
27602 case REG_CFA_OFFSET:
27603 /* ??? Only handling here what we actually emit. */
27604 gcc_unreachable ();
27605
27606 default:
27607 break;
27608 }
27609 }
27610 if (handled_one)
27611 return;
27612 pat = PATTERN (insn);
27613 found:
27614
27615 switch (GET_CODE (pat))
27616 {
27617 case SET:
27618 arm_unwind_emit_set (asm_out_file, pat);
27619 break;
27620
27621 case SEQUENCE:
27622 /* Store multiple. */
27623 arm_unwind_emit_sequence (asm_out_file, pat);
27624 break;
27625
27626 default:
27627 abort();
27628 }
27629 }
27630
27631
27632 /* Output a reference from a function exception table to the type_info
27633 object X. The EABI specifies that the symbol should be relocated by
27634 an R_ARM_TARGET2 relocation. */
27635
27636 static bool
27637 arm_output_ttype (rtx x)
27638 {
27639 fputs ("\t.word\t", asm_out_file);
27640 output_addr_const (asm_out_file, x);
27641 /* Use special relocations for symbol references. */
27642 if (!CONST_INT_P (x))
27643 fputs ("(TARGET2)", asm_out_file);
27644 fputc ('\n', asm_out_file);
27645
27646 return TRUE;
27647 }
27648
27649 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27650
27651 static void
27652 arm_asm_emit_except_personality (rtx personality)
27653 {
27654 fputs ("\t.personality\t", asm_out_file);
27655 output_addr_const (asm_out_file, personality);
27656 fputc ('\n', asm_out_file);
27657 }
27658 #endif /* ARM_UNWIND_INFO */
27659
27660 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27661
27662 static void
27663 arm_asm_init_sections (void)
27664 {
27665 #if ARM_UNWIND_INFO
27666 exception_section = get_unnamed_section (0, output_section_asm_op,
27667 "\t.handlerdata");
27668 #endif /* ARM_UNWIND_INFO */
27669
27670 #ifdef OBJECT_FORMAT_ELF
27671 if (target_pure_code)
27672 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27673 #endif
27674 }
27675
27676 /* Output unwind directives for the start/end of a function. */
27677
27678 void
27679 arm_output_fn_unwind (FILE * f, bool prologue)
27680 {
27681 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27682 return;
27683
27684 if (prologue)
27685 fputs ("\t.fnstart\n", f);
27686 else
27687 {
27688 /* If this function will never be unwound, then mark it as such.
27689 The came condition is used in arm_unwind_emit to suppress
27690 the frame annotations. */
27691 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27692 && (TREE_NOTHROW (current_function_decl)
27693 || crtl->all_throwers_are_sibcalls))
27694 fputs("\t.cantunwind\n", f);
27695
27696 fputs ("\t.fnend\n", f);
27697 }
27698 }
27699
27700 static bool
27701 arm_emit_tls_decoration (FILE *fp, rtx x)
27702 {
27703 enum tls_reloc reloc;
27704 rtx val;
27705
27706 val = XVECEXP (x, 0, 0);
27707 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27708
27709 output_addr_const (fp, val);
27710
27711 switch (reloc)
27712 {
27713 case TLS_GD32:
27714 fputs ("(tlsgd)", fp);
27715 break;
27716 case TLS_LDM32:
27717 fputs ("(tlsldm)", fp);
27718 break;
27719 case TLS_LDO32:
27720 fputs ("(tlsldo)", fp);
27721 break;
27722 case TLS_IE32:
27723 fputs ("(gottpoff)", fp);
27724 break;
27725 case TLS_LE32:
27726 fputs ("(tpoff)", fp);
27727 break;
27728 case TLS_DESCSEQ:
27729 fputs ("(tlsdesc)", fp);
27730 break;
27731 default:
27732 gcc_unreachable ();
27733 }
27734
27735 switch (reloc)
27736 {
27737 case TLS_GD32:
27738 case TLS_LDM32:
27739 case TLS_IE32:
27740 case TLS_DESCSEQ:
27741 fputs (" + (. - ", fp);
27742 output_addr_const (fp, XVECEXP (x, 0, 2));
27743 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27744 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27745 output_addr_const (fp, XVECEXP (x, 0, 3));
27746 fputc (')', fp);
27747 break;
27748 default:
27749 break;
27750 }
27751
27752 return TRUE;
27753 }
27754
27755 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27756
27757 static void
27758 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27759 {
27760 gcc_assert (size == 4);
27761 fputs ("\t.word\t", file);
27762 output_addr_const (file, x);
27763 fputs ("(tlsldo)", file);
27764 }
27765
27766 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27767
27768 static bool
27769 arm_output_addr_const_extra (FILE *fp, rtx x)
27770 {
27771 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27772 return arm_emit_tls_decoration (fp, x);
27773 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27774 {
27775 char label[256];
27776 int labelno = INTVAL (XVECEXP (x, 0, 0));
27777
27778 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27779 assemble_name_raw (fp, label);
27780
27781 return TRUE;
27782 }
27783 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27784 {
27785 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27786 if (GOT_PCREL)
27787 fputs ("+.", fp);
27788 fputs ("-(", fp);
27789 output_addr_const (fp, XVECEXP (x, 0, 0));
27790 fputc (')', fp);
27791 return TRUE;
27792 }
27793 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27794 {
27795 output_addr_const (fp, XVECEXP (x, 0, 0));
27796 if (GOT_PCREL)
27797 fputs ("+.", fp);
27798 fputs ("-(", fp);
27799 output_addr_const (fp, XVECEXP (x, 0, 1));
27800 fputc (')', fp);
27801 return TRUE;
27802 }
27803 else if (GET_CODE (x) == CONST_VECTOR)
27804 return arm_emit_vector_const (fp, x);
27805
27806 return FALSE;
27807 }
27808
27809 /* Output assembly for a shift instruction.
27810 SET_FLAGS determines how the instruction modifies the condition codes.
27811 0 - Do not set condition codes.
27812 1 - Set condition codes.
27813 2 - Use smallest instruction. */
27814 const char *
27815 arm_output_shift(rtx * operands, int set_flags)
27816 {
27817 char pattern[100];
27818 static const char flag_chars[3] = {'?', '.', '!'};
27819 const char *shift;
27820 HOST_WIDE_INT val;
27821 char c;
27822
27823 c = flag_chars[set_flags];
27824 shift = shift_op(operands[3], &val);
27825 if (shift)
27826 {
27827 if (val != -1)
27828 operands[2] = GEN_INT(val);
27829 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27830 }
27831 else
27832 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27833
27834 output_asm_insn (pattern, operands);
27835 return "";
27836 }
27837
27838 /* Output assembly for a WMMX immediate shift instruction. */
27839 const char *
27840 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27841 {
27842 int shift = INTVAL (operands[2]);
27843 char templ[50];
27844 machine_mode opmode = GET_MODE (operands[0]);
27845
27846 gcc_assert (shift >= 0);
27847
27848 /* If the shift value in the register versions is > 63 (for D qualifier),
27849 31 (for W qualifier) or 15 (for H qualifier). */
27850 if (((opmode == V4HImode) && (shift > 15))
27851 || ((opmode == V2SImode) && (shift > 31))
27852 || ((opmode == DImode) && (shift > 63)))
27853 {
27854 if (wror_or_wsra)
27855 {
27856 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27857 output_asm_insn (templ, operands);
27858 if (opmode == DImode)
27859 {
27860 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27861 output_asm_insn (templ, operands);
27862 }
27863 }
27864 else
27865 {
27866 /* The destination register will contain all zeros. */
27867 sprintf (templ, "wzero\t%%0");
27868 output_asm_insn (templ, operands);
27869 }
27870 return "";
27871 }
27872
27873 if ((opmode == DImode) && (shift > 32))
27874 {
27875 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27876 output_asm_insn (templ, operands);
27877 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27878 output_asm_insn (templ, operands);
27879 }
27880 else
27881 {
27882 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27883 output_asm_insn (templ, operands);
27884 }
27885 return "";
27886 }
27887
27888 /* Output assembly for a WMMX tinsr instruction. */
27889 const char *
27890 arm_output_iwmmxt_tinsr (rtx *operands)
27891 {
27892 int mask = INTVAL (operands[3]);
27893 int i;
27894 char templ[50];
27895 int units = mode_nunits[GET_MODE (operands[0])];
27896 gcc_assert ((mask & (mask - 1)) == 0);
27897 for (i = 0; i < units; ++i)
27898 {
27899 if ((mask & 0x01) == 1)
27900 {
27901 break;
27902 }
27903 mask >>= 1;
27904 }
27905 gcc_assert (i < units);
27906 {
27907 switch (GET_MODE (operands[0]))
27908 {
27909 case E_V8QImode:
27910 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27911 break;
27912 case E_V4HImode:
27913 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27914 break;
27915 case E_V2SImode:
27916 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27917 break;
27918 default:
27919 gcc_unreachable ();
27920 break;
27921 }
27922 output_asm_insn (templ, operands);
27923 }
27924 return "";
27925 }
27926
27927 /* Output a Thumb-1 casesi dispatch sequence. */
27928 const char *
27929 thumb1_output_casesi (rtx *operands)
27930 {
27931 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27932
27933 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27934
27935 switch (GET_MODE(diff_vec))
27936 {
27937 case E_QImode:
27938 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27939 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27940 case E_HImode:
27941 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27942 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27943 case E_SImode:
27944 return "bl\t%___gnu_thumb1_case_si";
27945 default:
27946 gcc_unreachable ();
27947 }
27948 }
27949
27950 /* Output a Thumb-2 casesi instruction. */
27951 const char *
27952 thumb2_output_casesi (rtx *operands)
27953 {
27954 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27955
27956 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27957
27958 output_asm_insn ("cmp\t%0, %1", operands);
27959 output_asm_insn ("bhi\t%l3", operands);
27960 switch (GET_MODE(diff_vec))
27961 {
27962 case E_QImode:
27963 return "tbb\t[%|pc, %0]";
27964 case E_HImode:
27965 return "tbh\t[%|pc, %0, lsl #1]";
27966 case E_SImode:
27967 if (flag_pic)
27968 {
27969 output_asm_insn ("adr\t%4, %l2", operands);
27970 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
27971 output_asm_insn ("add\t%4, %4, %5", operands);
27972 return "bx\t%4";
27973 }
27974 else
27975 {
27976 output_asm_insn ("adr\t%4, %l2", operands);
27977 return "ldr\t%|pc, [%4, %0, lsl #2]";
27978 }
27979 default:
27980 gcc_unreachable ();
27981 }
27982 }
27983
27984 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
27985 per-core tuning structs. */
27986 static int
27987 arm_issue_rate (void)
27988 {
27989 return current_tune->issue_rate;
27990 }
27991
27992 /* Return how many instructions should scheduler lookahead to choose the
27993 best one. */
27994 static int
27995 arm_first_cycle_multipass_dfa_lookahead (void)
27996 {
27997 int issue_rate = arm_issue_rate ();
27998
27999 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28000 }
28001
28002 /* Enable modeling of L2 auto-prefetcher. */
28003 static int
28004 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28005 {
28006 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28007 }
28008
28009 const char *
28010 arm_mangle_type (const_tree type)
28011 {
28012 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28013 has to be managled as if it is in the "std" namespace. */
28014 if (TARGET_AAPCS_BASED
28015 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28016 return "St9__va_list";
28017
28018 /* Half-precision float. */
28019 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28020 return "Dh";
28021
28022 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28023 builtin type. */
28024 if (TYPE_NAME (type) != NULL)
28025 return arm_mangle_builtin_type (type);
28026
28027 /* Use the default mangling. */
28028 return NULL;
28029 }
28030
28031 /* Order of allocation of core registers for Thumb: this allocation is
28032 written over the corresponding initial entries of the array
28033 initialized with REG_ALLOC_ORDER. We allocate all low registers
28034 first. Saving and restoring a low register is usually cheaper than
28035 using a call-clobbered high register. */
28036
28037 static const int thumb_core_reg_alloc_order[] =
28038 {
28039 3, 2, 1, 0, 4, 5, 6, 7,
28040 12, 14, 8, 9, 10, 11
28041 };
28042
28043 /* Adjust register allocation order when compiling for Thumb. */
28044
28045 void
28046 arm_order_regs_for_local_alloc (void)
28047 {
28048 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28049 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28050 if (TARGET_THUMB)
28051 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28052 sizeof (thumb_core_reg_alloc_order));
28053 }
28054
28055 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28056
28057 bool
28058 arm_frame_pointer_required (void)
28059 {
28060 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28061 return true;
28062
28063 /* If the function receives nonlocal gotos, it needs to save the frame
28064 pointer in the nonlocal_goto_save_area object. */
28065 if (cfun->has_nonlocal_label)
28066 return true;
28067
28068 /* The frame pointer is required for non-leaf APCS frames. */
28069 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28070 return true;
28071
28072 /* If we are probing the stack in the prologue, we will have a faulting
28073 instruction prior to the stack adjustment and this requires a frame
28074 pointer if we want to catch the exception using the EABI unwinder. */
28075 if (!IS_INTERRUPT (arm_current_func_type ())
28076 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28077 || flag_stack_clash_protection)
28078 && arm_except_unwind_info (&global_options) == UI_TARGET
28079 && cfun->can_throw_non_call_exceptions)
28080 {
28081 HOST_WIDE_INT size = get_frame_size ();
28082
28083 /* That's irrelevant if there is no stack adjustment. */
28084 if (size <= 0)
28085 return false;
28086
28087 /* That's relevant only if there is a stack probe. */
28088 if (crtl->is_leaf && !cfun->calls_alloca)
28089 {
28090 /* We don't have the final size of the frame so adjust. */
28091 size += 32 * UNITS_PER_WORD;
28092 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28093 return true;
28094 }
28095 else
28096 return true;
28097 }
28098
28099 return false;
28100 }
28101
28102 /* Only thumb1 can't support conditional execution, so return true if
28103 the target is not thumb1. */
28104 static bool
28105 arm_have_conditional_execution (void)
28106 {
28107 return !TARGET_THUMB1;
28108 }
28109
28110 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28111 static HOST_WIDE_INT
28112 arm_vector_alignment (const_tree type)
28113 {
28114 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28115
28116 if (TARGET_AAPCS_BASED)
28117 align = MIN (align, 64);
28118
28119 return align;
28120 }
28121
28122 static unsigned int
28123 arm_autovectorize_vector_sizes (void)
28124 {
28125 return TARGET_NEON_VECTORIZE_DOUBLE ? 0 : (16 | 8);
28126 }
28127
28128 static bool
28129 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28130 {
28131 /* Vectors which aren't in packed structures will not be less aligned than
28132 the natural alignment of their element type, so this is safe. */
28133 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28134 return !is_packed;
28135
28136 return default_builtin_vector_alignment_reachable (type, is_packed);
28137 }
28138
28139 static bool
28140 arm_builtin_support_vector_misalignment (machine_mode mode,
28141 const_tree type, int misalignment,
28142 bool is_packed)
28143 {
28144 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28145 {
28146 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28147
28148 if (is_packed)
28149 return align == 1;
28150
28151 /* If the misalignment is unknown, we should be able to handle the access
28152 so long as it is not to a member of a packed data structure. */
28153 if (misalignment == -1)
28154 return true;
28155
28156 /* Return true if the misalignment is a multiple of the natural alignment
28157 of the vector's element type. This is probably always going to be
28158 true in practice, since we've already established that this isn't a
28159 packed access. */
28160 return ((misalignment % align) == 0);
28161 }
28162
28163 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28164 is_packed);
28165 }
28166
28167 static void
28168 arm_conditional_register_usage (void)
28169 {
28170 int regno;
28171
28172 if (TARGET_THUMB1 && optimize_size)
28173 {
28174 /* When optimizing for size on Thumb-1, it's better not
28175 to use the HI regs, because of the overhead of
28176 stacking them. */
28177 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28178 fixed_regs[regno] = call_used_regs[regno] = 1;
28179 }
28180
28181 /* The link register can be clobbered by any branch insn,
28182 but we have no way to track that at present, so mark
28183 it as unavailable. */
28184 if (TARGET_THUMB1)
28185 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28186
28187 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28188 {
28189 /* VFPv3 registers are disabled when earlier VFP
28190 versions are selected due to the definition of
28191 LAST_VFP_REGNUM. */
28192 for (regno = FIRST_VFP_REGNUM;
28193 regno <= LAST_VFP_REGNUM; ++ regno)
28194 {
28195 fixed_regs[regno] = 0;
28196 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28197 || regno >= FIRST_VFP_REGNUM + 32;
28198 }
28199 }
28200
28201 if (TARGET_REALLY_IWMMXT)
28202 {
28203 regno = FIRST_IWMMXT_GR_REGNUM;
28204 /* The 2002/10/09 revision of the XScale ABI has wCG0
28205 and wCG1 as call-preserved registers. The 2002/11/21
28206 revision changed this so that all wCG registers are
28207 scratch registers. */
28208 for (regno = FIRST_IWMMXT_GR_REGNUM;
28209 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28210 fixed_regs[regno] = 0;
28211 /* The XScale ABI has wR0 - wR9 as scratch registers,
28212 the rest as call-preserved registers. */
28213 for (regno = FIRST_IWMMXT_REGNUM;
28214 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28215 {
28216 fixed_regs[regno] = 0;
28217 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28218 }
28219 }
28220
28221 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28222 {
28223 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28224 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28225 }
28226 else if (TARGET_APCS_STACK)
28227 {
28228 fixed_regs[10] = 1;
28229 call_used_regs[10] = 1;
28230 }
28231 /* -mcaller-super-interworking reserves r11 for calls to
28232 _interwork_r11_call_via_rN(). Making the register global
28233 is an easy way of ensuring that it remains valid for all
28234 calls. */
28235 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28236 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28237 {
28238 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28239 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28240 if (TARGET_CALLER_INTERWORKING)
28241 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28242 }
28243 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28244 }
28245
28246 static reg_class_t
28247 arm_preferred_rename_class (reg_class_t rclass)
28248 {
28249 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28250 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28251 and code size can be reduced. */
28252 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28253 return LO_REGS;
28254 else
28255 return NO_REGS;
28256 }
28257
28258 /* Compute the attribute "length" of insn "*push_multi".
28259 So this function MUST be kept in sync with that insn pattern. */
28260 int
28261 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28262 {
28263 int i, regno, hi_reg;
28264 int num_saves = XVECLEN (parallel_op, 0);
28265
28266 /* ARM mode. */
28267 if (TARGET_ARM)
28268 return 4;
28269 /* Thumb1 mode. */
28270 if (TARGET_THUMB1)
28271 return 2;
28272
28273 /* Thumb2 mode. */
28274 regno = REGNO (first_op);
28275 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28276 list is 8-bit. Normally this means all registers in the list must be
28277 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28278 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28279 with 16-bit encoding. */
28280 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28281 for (i = 1; i < num_saves && !hi_reg; i++)
28282 {
28283 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28284 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28285 }
28286
28287 if (!hi_reg)
28288 return 2;
28289 return 4;
28290 }
28291
28292 /* Compute the attribute "length" of insn. Currently, this function is used
28293 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28294 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28295 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28296 true if OPERANDS contains insn which explicit updates base register. */
28297
28298 int
28299 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28300 {
28301 /* ARM mode. */
28302 if (TARGET_ARM)
28303 return 4;
28304 /* Thumb1 mode. */
28305 if (TARGET_THUMB1)
28306 return 2;
28307
28308 rtx parallel_op = operands[0];
28309 /* Initialize to elements number of PARALLEL. */
28310 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28311 /* Initialize the value to base register. */
28312 unsigned regno = REGNO (operands[1]);
28313 /* Skip return and write back pattern.
28314 We only need register pop pattern for later analysis. */
28315 unsigned first_indx = 0;
28316 first_indx += return_pc ? 1 : 0;
28317 first_indx += write_back_p ? 1 : 0;
28318
28319 /* A pop operation can be done through LDM or POP. If the base register is SP
28320 and if it's with write back, then a LDM will be alias of POP. */
28321 bool pop_p = (regno == SP_REGNUM && write_back_p);
28322 bool ldm_p = !pop_p;
28323
28324 /* Check base register for LDM. */
28325 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28326 return 4;
28327
28328 /* Check each register in the list. */
28329 for (; indx >= first_indx; indx--)
28330 {
28331 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28332 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28333 comment in arm_attr_length_push_multi. */
28334 if (REGNO_REG_CLASS (regno) == HI_REGS
28335 && (regno != PC_REGNUM || ldm_p))
28336 return 4;
28337 }
28338
28339 return 2;
28340 }
28341
28342 /* Compute the number of instructions emitted by output_move_double. */
28343 int
28344 arm_count_output_move_double_insns (rtx *operands)
28345 {
28346 int count;
28347 rtx ops[2];
28348 /* output_move_double may modify the operands array, so call it
28349 here on a copy of the array. */
28350 ops[0] = operands[0];
28351 ops[1] = operands[1];
28352 output_move_double (ops, false, &count);
28353 return count;
28354 }
28355
28356 int
28357 vfp3_const_double_for_fract_bits (rtx operand)
28358 {
28359 REAL_VALUE_TYPE r0;
28360
28361 if (!CONST_DOUBLE_P (operand))
28362 return 0;
28363
28364 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28365 if (exact_real_inverse (DFmode, &r0)
28366 && !REAL_VALUE_NEGATIVE (r0))
28367 {
28368 if (exact_real_truncate (DFmode, &r0))
28369 {
28370 HOST_WIDE_INT value = real_to_integer (&r0);
28371 value = value & 0xffffffff;
28372 if ((value != 0) && ( (value & (value - 1)) == 0))
28373 {
28374 int ret = exact_log2 (value);
28375 gcc_assert (IN_RANGE (ret, 0, 31));
28376 return ret;
28377 }
28378 }
28379 }
28380 return 0;
28381 }
28382
28383 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28384 log2 is in [1, 32], return that log2. Otherwise return -1.
28385 This is used in the patterns for vcvt.s32.f32 floating-point to
28386 fixed-point conversions. */
28387
28388 int
28389 vfp3_const_double_for_bits (rtx x)
28390 {
28391 const REAL_VALUE_TYPE *r;
28392
28393 if (!CONST_DOUBLE_P (x))
28394 return -1;
28395
28396 r = CONST_DOUBLE_REAL_VALUE (x);
28397
28398 if (REAL_VALUE_NEGATIVE (*r)
28399 || REAL_VALUE_ISNAN (*r)
28400 || REAL_VALUE_ISINF (*r)
28401 || !real_isinteger (r, SFmode))
28402 return -1;
28403
28404 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28405
28406 /* The exact_log2 above will have returned -1 if this is
28407 not an exact log2. */
28408 if (!IN_RANGE (hwint, 1, 32))
28409 return -1;
28410
28411 return hwint;
28412 }
28413
28414 \f
28415 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28416
28417 static void
28418 arm_pre_atomic_barrier (enum memmodel model)
28419 {
28420 if (need_atomic_barrier_p (model, true))
28421 emit_insn (gen_memory_barrier ());
28422 }
28423
28424 static void
28425 arm_post_atomic_barrier (enum memmodel model)
28426 {
28427 if (need_atomic_barrier_p (model, false))
28428 emit_insn (gen_memory_barrier ());
28429 }
28430
28431 /* Emit the load-exclusive and store-exclusive instructions.
28432 Use acquire and release versions if necessary. */
28433
28434 static void
28435 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28436 {
28437 rtx (*gen) (rtx, rtx);
28438
28439 if (acq)
28440 {
28441 switch (mode)
28442 {
28443 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28444 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28445 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28446 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28447 default:
28448 gcc_unreachable ();
28449 }
28450 }
28451 else
28452 {
28453 switch (mode)
28454 {
28455 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28456 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28457 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28458 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28459 default:
28460 gcc_unreachable ();
28461 }
28462 }
28463
28464 emit_insn (gen (rval, mem));
28465 }
28466
28467 static void
28468 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28469 rtx mem, bool rel)
28470 {
28471 rtx (*gen) (rtx, rtx, rtx);
28472
28473 if (rel)
28474 {
28475 switch (mode)
28476 {
28477 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28478 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28479 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28480 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28481 default:
28482 gcc_unreachable ();
28483 }
28484 }
28485 else
28486 {
28487 switch (mode)
28488 {
28489 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28490 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28491 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28492 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28493 default:
28494 gcc_unreachable ();
28495 }
28496 }
28497
28498 emit_insn (gen (bval, rval, mem));
28499 }
28500
28501 /* Mark the previous jump instruction as unlikely. */
28502
28503 static void
28504 emit_unlikely_jump (rtx insn)
28505 {
28506 rtx_insn *jump = emit_jump_insn (insn);
28507 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28508 }
28509
28510 /* Expand a compare and swap pattern. */
28511
28512 void
28513 arm_expand_compare_and_swap (rtx operands[])
28514 {
28515 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28516 machine_mode mode;
28517 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28518
28519 bval = operands[0];
28520 rval = operands[1];
28521 mem = operands[2];
28522 oldval = operands[3];
28523 newval = operands[4];
28524 is_weak = operands[5];
28525 mod_s = operands[6];
28526 mod_f = operands[7];
28527 mode = GET_MODE (mem);
28528
28529 /* Normally the succ memory model must be stronger than fail, but in the
28530 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28531 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28532
28533 if (TARGET_HAVE_LDACQ
28534 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28535 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28536 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28537
28538 switch (mode)
28539 {
28540 case E_QImode:
28541 case E_HImode:
28542 /* For narrow modes, we're going to perform the comparison in SImode,
28543 so do the zero-extension now. */
28544 rval = gen_reg_rtx (SImode);
28545 oldval = convert_modes (SImode, mode, oldval, true);
28546 /* FALLTHRU */
28547
28548 case E_SImode:
28549 /* Force the value into a register if needed. We waited until after
28550 the zero-extension above to do this properly. */
28551 if (!arm_add_operand (oldval, SImode))
28552 oldval = force_reg (SImode, oldval);
28553 break;
28554
28555 case E_DImode:
28556 if (!cmpdi_operand (oldval, mode))
28557 oldval = force_reg (mode, oldval);
28558 break;
28559
28560 default:
28561 gcc_unreachable ();
28562 }
28563
28564 if (TARGET_THUMB1)
28565 {
28566 switch (mode)
28567 {
28568 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28569 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28570 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28571 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28572 default:
28573 gcc_unreachable ();
28574 }
28575 }
28576 else
28577 {
28578 switch (mode)
28579 {
28580 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28581 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28582 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28583 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28584 default:
28585 gcc_unreachable ();
28586 }
28587 }
28588
28589 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28590 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28591
28592 if (mode == QImode || mode == HImode)
28593 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28594
28595 /* In all cases, we arrange for success to be signaled by Z set.
28596 This arrangement allows for the boolean result to be used directly
28597 in a subsequent branch, post optimization. For Thumb-1 targets, the
28598 boolean negation of the result is also stored in bval because Thumb-1
28599 backend lacks dependency tracking for CC flag due to flag-setting not
28600 being represented at RTL level. */
28601 if (TARGET_THUMB1)
28602 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28603 else
28604 {
28605 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28606 emit_insn (gen_rtx_SET (bval, x));
28607 }
28608 }
28609
28610 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28611 another memory store between the load-exclusive and store-exclusive can
28612 reset the monitor from Exclusive to Open state. This means we must wait
28613 until after reload to split the pattern, lest we get a register spill in
28614 the middle of the atomic sequence. Success of the compare and swap is
28615 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28616 for Thumb-1 targets (ie. negation of the boolean value returned by
28617 atomic_compare_and_swapmode standard pattern in operand 0). */
28618
28619 void
28620 arm_split_compare_and_swap (rtx operands[])
28621 {
28622 rtx rval, mem, oldval, newval, neg_bval;
28623 machine_mode mode;
28624 enum memmodel mod_s, mod_f;
28625 bool is_weak;
28626 rtx_code_label *label1, *label2;
28627 rtx x, cond;
28628
28629 rval = operands[1];
28630 mem = operands[2];
28631 oldval = operands[3];
28632 newval = operands[4];
28633 is_weak = (operands[5] != const0_rtx);
28634 mod_s = memmodel_from_int (INTVAL (operands[6]));
28635 mod_f = memmodel_from_int (INTVAL (operands[7]));
28636 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28637 mode = GET_MODE (mem);
28638
28639 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28640
28641 bool use_acquire = TARGET_HAVE_LDACQ
28642 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28643 || is_mm_release (mod_s));
28644
28645 bool use_release = TARGET_HAVE_LDACQ
28646 && !(is_mm_relaxed (mod_s) || is_mm_consume (mod_s)
28647 || is_mm_acquire (mod_s));
28648
28649 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28650 a full barrier is emitted after the store-release. */
28651 if (is_armv8_sync)
28652 use_acquire = false;
28653
28654 /* Checks whether a barrier is needed and emits one accordingly. */
28655 if (!(use_acquire || use_release))
28656 arm_pre_atomic_barrier (mod_s);
28657
28658 label1 = NULL;
28659 if (!is_weak)
28660 {
28661 label1 = gen_label_rtx ();
28662 emit_label (label1);
28663 }
28664 label2 = gen_label_rtx ();
28665
28666 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28667
28668 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28669 as required to communicate with arm_expand_compare_and_swap. */
28670 if (TARGET_32BIT)
28671 {
28672 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28673 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28674 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28675 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28676 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28677 }
28678 else
28679 {
28680 emit_move_insn (neg_bval, const1_rtx);
28681 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28682 if (thumb1_cmpneg_operand (oldval, SImode))
28683 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28684 label2, cond));
28685 else
28686 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28687 }
28688
28689 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28690
28691 /* Weak or strong, we want EQ to be true for success, so that we
28692 match the flags that we got from the compare above. */
28693 if (TARGET_32BIT)
28694 {
28695 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28696 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28697 emit_insn (gen_rtx_SET (cond, x));
28698 }
28699
28700 if (!is_weak)
28701 {
28702 /* Z is set to boolean value of !neg_bval, as required to communicate
28703 with arm_expand_compare_and_swap. */
28704 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28705 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28706 }
28707
28708 if (!is_mm_relaxed (mod_f))
28709 emit_label (label2);
28710
28711 /* Checks whether a barrier is needed and emits one accordingly. */
28712 if (is_armv8_sync
28713 || !(use_acquire || use_release))
28714 arm_post_atomic_barrier (mod_s);
28715
28716 if (is_mm_relaxed (mod_f))
28717 emit_label (label2);
28718 }
28719
28720 /* Split an atomic operation pattern. Operation is given by CODE and is one
28721 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28722 operation). Operation is performed on the content at MEM and on VALUE
28723 following the memory model MODEL_RTX. The content at MEM before and after
28724 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28725 success of the operation is returned in COND. Using a scratch register or
28726 an operand register for these determines what result is returned for that
28727 pattern. */
28728
28729 void
28730 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28731 rtx value, rtx model_rtx, rtx cond)
28732 {
28733 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28734 machine_mode mode = GET_MODE (mem);
28735 machine_mode wmode = (mode == DImode ? DImode : SImode);
28736 rtx_code_label *label;
28737 bool all_low_regs, bind_old_new;
28738 rtx x;
28739
28740 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28741
28742 bool use_acquire = TARGET_HAVE_LDACQ
28743 && !(is_mm_relaxed (model) || is_mm_consume (model)
28744 || is_mm_release (model));
28745
28746 bool use_release = TARGET_HAVE_LDACQ
28747 && !(is_mm_relaxed (model) || is_mm_consume (model)
28748 || is_mm_acquire (model));
28749
28750 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28751 a full barrier is emitted after the store-release. */
28752 if (is_armv8_sync)
28753 use_acquire = false;
28754
28755 /* Checks whether a barrier is needed and emits one accordingly. */
28756 if (!(use_acquire || use_release))
28757 arm_pre_atomic_barrier (model);
28758
28759 label = gen_label_rtx ();
28760 emit_label (label);
28761
28762 if (new_out)
28763 new_out = gen_lowpart (wmode, new_out);
28764 if (old_out)
28765 old_out = gen_lowpart (wmode, old_out);
28766 else
28767 old_out = new_out;
28768 value = simplify_gen_subreg (wmode, value, mode, 0);
28769
28770 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28771
28772 /* Does the operation require destination and first operand to use the same
28773 register? This is decided by register constraints of relevant insn
28774 patterns in thumb1.md. */
28775 gcc_assert (!new_out || REG_P (new_out));
28776 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28777 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28778 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28779 bind_old_new =
28780 (TARGET_THUMB1
28781 && code != SET
28782 && code != MINUS
28783 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28784
28785 /* We want to return the old value while putting the result of the operation
28786 in the same register as the old value so copy the old value over to the
28787 destination register and use that register for the operation. */
28788 if (old_out && bind_old_new)
28789 {
28790 emit_move_insn (new_out, old_out);
28791 old_out = new_out;
28792 }
28793
28794 switch (code)
28795 {
28796 case SET:
28797 new_out = value;
28798 break;
28799
28800 case NOT:
28801 x = gen_rtx_AND (wmode, old_out, value);
28802 emit_insn (gen_rtx_SET (new_out, x));
28803 x = gen_rtx_NOT (wmode, new_out);
28804 emit_insn (gen_rtx_SET (new_out, x));
28805 break;
28806
28807 case MINUS:
28808 if (CONST_INT_P (value))
28809 {
28810 value = GEN_INT (-INTVAL (value));
28811 code = PLUS;
28812 }
28813 /* FALLTHRU */
28814
28815 case PLUS:
28816 if (mode == DImode)
28817 {
28818 /* DImode plus/minus need to clobber flags. */
28819 /* The adddi3 and subdi3 patterns are incorrectly written so that
28820 they require matching operands, even when we could easily support
28821 three operands. Thankfully, this can be fixed up post-splitting,
28822 as the individual add+adc patterns do accept three operands and
28823 post-reload cprop can make these moves go away. */
28824 emit_move_insn (new_out, old_out);
28825 if (code == PLUS)
28826 x = gen_adddi3 (new_out, new_out, value);
28827 else
28828 x = gen_subdi3 (new_out, new_out, value);
28829 emit_insn (x);
28830 break;
28831 }
28832 /* FALLTHRU */
28833
28834 default:
28835 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28836 emit_insn (gen_rtx_SET (new_out, x));
28837 break;
28838 }
28839
28840 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28841 use_release);
28842
28843 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28844 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28845
28846 /* Checks whether a barrier is needed and emits one accordingly. */
28847 if (is_armv8_sync
28848 || !(use_acquire || use_release))
28849 arm_post_atomic_barrier (model);
28850 }
28851 \f
28852 #define MAX_VECT_LEN 16
28853
28854 struct expand_vec_perm_d
28855 {
28856 rtx target, op0, op1;
28857 auto_vec_perm_indices perm;
28858 machine_mode vmode;
28859 bool one_vector_p;
28860 bool testing_p;
28861 };
28862
28863 /* Generate a variable permutation. */
28864
28865 static void
28866 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28867 {
28868 machine_mode vmode = GET_MODE (target);
28869 bool one_vector_p = rtx_equal_p (op0, op1);
28870
28871 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28872 gcc_checking_assert (GET_MODE (op0) == vmode);
28873 gcc_checking_assert (GET_MODE (op1) == vmode);
28874 gcc_checking_assert (GET_MODE (sel) == vmode);
28875 gcc_checking_assert (TARGET_NEON);
28876
28877 if (one_vector_p)
28878 {
28879 if (vmode == V8QImode)
28880 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28881 else
28882 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28883 }
28884 else
28885 {
28886 rtx pair;
28887
28888 if (vmode == V8QImode)
28889 {
28890 pair = gen_reg_rtx (V16QImode);
28891 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28892 pair = gen_lowpart (TImode, pair);
28893 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28894 }
28895 else
28896 {
28897 pair = gen_reg_rtx (OImode);
28898 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28899 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28900 }
28901 }
28902 }
28903
28904 void
28905 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28906 {
28907 machine_mode vmode = GET_MODE (target);
28908 unsigned int nelt = GET_MODE_NUNITS (vmode);
28909 bool one_vector_p = rtx_equal_p (op0, op1);
28910 rtx mask;
28911
28912 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28913 numbering of elements for big-endian, we must reverse the order. */
28914 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28915
28916 /* The VTBL instruction does not use a modulo index, so we must take care
28917 of that ourselves. */
28918 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28919 mask = gen_const_vec_duplicate (vmode, mask);
28920 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28921
28922 arm_expand_vec_perm_1 (target, op0, op1, sel);
28923 }
28924
28925 /* Map lane ordering between architectural lane order, and GCC lane order,
28926 taking into account ABI. See comment above output_move_neon for details. */
28927
28928 static int
28929 neon_endian_lane_map (machine_mode mode, int lane)
28930 {
28931 if (BYTES_BIG_ENDIAN)
28932 {
28933 int nelems = GET_MODE_NUNITS (mode);
28934 /* Reverse lane order. */
28935 lane = (nelems - 1 - lane);
28936 /* Reverse D register order, to match ABI. */
28937 if (GET_MODE_SIZE (mode) == 16)
28938 lane = lane ^ (nelems / 2);
28939 }
28940 return lane;
28941 }
28942
28943 /* Some permutations index into pairs of vectors, this is a helper function
28944 to map indexes into those pairs of vectors. */
28945
28946 static int
28947 neon_pair_endian_lane_map (machine_mode mode, int lane)
28948 {
28949 int nelem = GET_MODE_NUNITS (mode);
28950 if (BYTES_BIG_ENDIAN)
28951 lane =
28952 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28953 return lane;
28954 }
28955
28956 /* Generate or test for an insn that supports a constant permutation. */
28957
28958 /* Recognize patterns for the VUZP insns. */
28959
28960 static bool
28961 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28962 {
28963 unsigned int i, odd, mask, nelt = d->perm.length ();
28964 rtx out0, out1, in0, in1;
28965 rtx (*gen)(rtx, rtx, rtx, rtx);
28966 int first_elem;
28967 int swap_nelt;
28968
28969 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28970 return false;
28971
28972 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28973 big endian pattern on 64 bit vectors, so we correct for that. */
28974 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
28975 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
28976
28977 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
28978
28979 if (first_elem == neon_endian_lane_map (d->vmode, 0))
28980 odd = 0;
28981 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
28982 odd = 1;
28983 else
28984 return false;
28985 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
28986
28987 for (i = 0; i < nelt; i++)
28988 {
28989 unsigned elt =
28990 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
28991 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
28992 return false;
28993 }
28994
28995 /* Success! */
28996 if (d->testing_p)
28997 return true;
28998
28999 switch (d->vmode)
29000 {
29001 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29002 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
29003 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
29004 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
29005 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
29006 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
29007 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
29008 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
29009 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
29010 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
29011 default:
29012 gcc_unreachable ();
29013 }
29014
29015 in0 = d->op0;
29016 in1 = d->op1;
29017 if (swap_nelt != 0)
29018 std::swap (in0, in1);
29019
29020 out0 = d->target;
29021 out1 = gen_reg_rtx (d->vmode);
29022 if (odd)
29023 std::swap (out0, out1);
29024
29025 emit_insn (gen (out0, in0, in1, out1));
29026 return true;
29027 }
29028
29029 /* Recognize patterns for the VZIP insns. */
29030
29031 static bool
29032 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29033 {
29034 unsigned int i, high, mask, nelt = d->perm.length ();
29035 rtx out0, out1, in0, in1;
29036 rtx (*gen)(rtx, rtx, rtx, rtx);
29037 int first_elem;
29038 bool is_swapped;
29039
29040 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29041 return false;
29042
29043 is_swapped = BYTES_BIG_ENDIAN;
29044
29045 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29046
29047 high = nelt / 2;
29048 if (first_elem == neon_endian_lane_map (d->vmode, high))
29049 ;
29050 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29051 high = 0;
29052 else
29053 return false;
29054 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29055
29056 for (i = 0; i < nelt / 2; i++)
29057 {
29058 unsigned elt =
29059 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29060 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29061 != elt)
29062 return false;
29063 elt =
29064 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29065 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29066 != elt)
29067 return false;
29068 }
29069
29070 /* Success! */
29071 if (d->testing_p)
29072 return true;
29073
29074 switch (d->vmode)
29075 {
29076 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29077 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
29078 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
29079 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
29080 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
29081 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
29082 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
29083 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
29084 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
29085 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
29086 default:
29087 gcc_unreachable ();
29088 }
29089
29090 in0 = d->op0;
29091 in1 = d->op1;
29092 if (is_swapped)
29093 std::swap (in0, in1);
29094
29095 out0 = d->target;
29096 out1 = gen_reg_rtx (d->vmode);
29097 if (high)
29098 std::swap (out0, out1);
29099
29100 emit_insn (gen (out0, in0, in1, out1));
29101 return true;
29102 }
29103
29104 /* Recognize patterns for the VREV insns. */
29105
29106 static bool
29107 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29108 {
29109 unsigned int i, j, diff, nelt = d->perm.length ();
29110 rtx (*gen)(rtx, rtx);
29111
29112 if (!d->one_vector_p)
29113 return false;
29114
29115 diff = d->perm[0];
29116 switch (diff)
29117 {
29118 case 7:
29119 switch (d->vmode)
29120 {
29121 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29122 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
29123 default:
29124 return false;
29125 }
29126 break;
29127 case 3:
29128 switch (d->vmode)
29129 {
29130 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29131 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
29132 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
29133 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
29134 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
29135 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
29136 default:
29137 return false;
29138 }
29139 break;
29140 case 1:
29141 switch (d->vmode)
29142 {
29143 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29144 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
29145 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
29146 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
29147 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
29148 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
29149 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
29150 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
29151 default:
29152 return false;
29153 }
29154 break;
29155 default:
29156 return false;
29157 }
29158
29159 for (i = 0; i < nelt ; i += diff + 1)
29160 for (j = 0; j <= diff; j += 1)
29161 {
29162 /* This is guaranteed to be true as the value of diff
29163 is 7, 3, 1 and we should have enough elements in the
29164 queue to generate this. Getting a vector mask with a
29165 value of diff other than these values implies that
29166 something is wrong by the time we get here. */
29167 gcc_assert (i + j < nelt);
29168 if (d->perm[i + j] != i + diff - j)
29169 return false;
29170 }
29171
29172 /* Success! */
29173 if (d->testing_p)
29174 return true;
29175
29176 emit_insn (gen (d->target, d->op0));
29177 return true;
29178 }
29179
29180 /* Recognize patterns for the VTRN insns. */
29181
29182 static bool
29183 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29184 {
29185 unsigned int i, odd, mask, nelt = d->perm.length ();
29186 rtx out0, out1, in0, in1;
29187 rtx (*gen)(rtx, rtx, rtx, rtx);
29188
29189 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29190 return false;
29191
29192 /* Note that these are little-endian tests. Adjust for big-endian later. */
29193 if (d->perm[0] == 0)
29194 odd = 0;
29195 else if (d->perm[0] == 1)
29196 odd = 1;
29197 else
29198 return false;
29199 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29200
29201 for (i = 0; i < nelt; i += 2)
29202 {
29203 if (d->perm[i] != i + odd)
29204 return false;
29205 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29206 return false;
29207 }
29208
29209 /* Success! */
29210 if (d->testing_p)
29211 return true;
29212
29213 switch (d->vmode)
29214 {
29215 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29216 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29217 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29218 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29219 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29220 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29221 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29222 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29223 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29224 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29225 default:
29226 gcc_unreachable ();
29227 }
29228
29229 in0 = d->op0;
29230 in1 = d->op1;
29231 if (BYTES_BIG_ENDIAN)
29232 {
29233 std::swap (in0, in1);
29234 odd = !odd;
29235 }
29236
29237 out0 = d->target;
29238 out1 = gen_reg_rtx (d->vmode);
29239 if (odd)
29240 std::swap (out0, out1);
29241
29242 emit_insn (gen (out0, in0, in1, out1));
29243 return true;
29244 }
29245
29246 /* Recognize patterns for the VEXT insns. */
29247
29248 static bool
29249 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29250 {
29251 unsigned int i, nelt = d->perm.length ();
29252 rtx (*gen) (rtx, rtx, rtx, rtx);
29253 rtx offset;
29254
29255 unsigned int location;
29256
29257 unsigned int next = d->perm[0] + 1;
29258
29259 /* TODO: Handle GCC's numbering of elements for big-endian. */
29260 if (BYTES_BIG_ENDIAN)
29261 return false;
29262
29263 /* Check if the extracted indexes are increasing by one. */
29264 for (i = 1; i < nelt; next++, i++)
29265 {
29266 /* If we hit the most significant element of the 2nd vector in
29267 the previous iteration, no need to test further. */
29268 if (next == 2 * nelt)
29269 return false;
29270
29271 /* If we are operating on only one vector: it could be a
29272 rotation. If there are only two elements of size < 64, let
29273 arm_evpc_neon_vrev catch it. */
29274 if (d->one_vector_p && (next == nelt))
29275 {
29276 if ((nelt == 2) && (d->vmode != V2DImode))
29277 return false;
29278 else
29279 next = 0;
29280 }
29281
29282 if (d->perm[i] != next)
29283 return false;
29284 }
29285
29286 location = d->perm[0];
29287
29288 switch (d->vmode)
29289 {
29290 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29291 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29292 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29293 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29294 case E_V2SImode: gen = gen_neon_vextv2si; break;
29295 case E_V4SImode: gen = gen_neon_vextv4si; break;
29296 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29297 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29298 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29299 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29300 case E_V2DImode: gen = gen_neon_vextv2di; break;
29301 default:
29302 return false;
29303 }
29304
29305 /* Success! */
29306 if (d->testing_p)
29307 return true;
29308
29309 offset = GEN_INT (location);
29310 emit_insn (gen (d->target, d->op0, d->op1, offset));
29311 return true;
29312 }
29313
29314 /* The NEON VTBL instruction is a fully variable permuation that's even
29315 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29316 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29317 can do slightly better by expanding this as a constant where we don't
29318 have to apply a mask. */
29319
29320 static bool
29321 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29322 {
29323 rtx rperm[MAX_VECT_LEN], sel;
29324 machine_mode vmode = d->vmode;
29325 unsigned int i, nelt = d->perm.length ();
29326
29327 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29328 numbering of elements for big-endian, we must reverse the order. */
29329 if (BYTES_BIG_ENDIAN)
29330 return false;
29331
29332 if (d->testing_p)
29333 return true;
29334
29335 /* Generic code will try constant permutation twice. Once with the
29336 original mode and again with the elements lowered to QImode.
29337 So wait and don't do the selector expansion ourselves. */
29338 if (vmode != V8QImode && vmode != V16QImode)
29339 return false;
29340
29341 for (i = 0; i < nelt; ++i)
29342 rperm[i] = GEN_INT (d->perm[i]);
29343 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29344 sel = force_reg (vmode, sel);
29345
29346 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29347 return true;
29348 }
29349
29350 static bool
29351 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29352 {
29353 /* Check if the input mask matches vext before reordering the
29354 operands. */
29355 if (TARGET_NEON)
29356 if (arm_evpc_neon_vext (d))
29357 return true;
29358
29359 /* The pattern matching functions above are written to look for a small
29360 number to begin the sequence (0, 1, N/2). If we begin with an index
29361 from the second operand, we can swap the operands. */
29362 unsigned int nelt = d->perm.length ();
29363 if (d->perm[0] >= nelt)
29364 {
29365 for (unsigned int i = 0; i < nelt; ++i)
29366 d->perm[i] = (d->perm[i] + nelt) & (2 * nelt - 1);
29367
29368 std::swap (d->op0, d->op1);
29369 }
29370
29371 if (TARGET_NEON)
29372 {
29373 if (arm_evpc_neon_vuzp (d))
29374 return true;
29375 if (arm_evpc_neon_vzip (d))
29376 return true;
29377 if (arm_evpc_neon_vrev (d))
29378 return true;
29379 if (arm_evpc_neon_vtrn (d))
29380 return true;
29381 return arm_evpc_neon_vtbl (d);
29382 }
29383 return false;
29384 }
29385
29386 /* Expand a vec_perm_const pattern. */
29387
29388 bool
29389 arm_expand_vec_perm_const (rtx target, rtx op0, rtx op1, rtx sel)
29390 {
29391 struct expand_vec_perm_d d;
29392 int i, nelt, which;
29393
29394 d.target = target;
29395 d.op0 = op0;
29396 d.op1 = op1;
29397
29398 d.vmode = GET_MODE (target);
29399 gcc_assert (VECTOR_MODE_P (d.vmode));
29400 d.testing_p = false;
29401
29402 nelt = GET_MODE_NUNITS (d.vmode);
29403 d.perm.reserve (nelt);
29404 for (i = which = 0; i < nelt; ++i)
29405 {
29406 rtx e = XVECEXP (sel, 0, i);
29407 int ei = INTVAL (e) & (2 * nelt - 1);
29408 which |= (ei < nelt ? 1 : 2);
29409 d.perm.quick_push (ei);
29410 }
29411
29412 switch (which)
29413 {
29414 default:
29415 gcc_unreachable();
29416
29417 case 3:
29418 d.one_vector_p = false;
29419 if (!rtx_equal_p (op0, op1))
29420 break;
29421
29422 /* The elements of PERM do not suggest that only the first operand
29423 is used, but both operands are identical. Allow easier matching
29424 of the permutation by folding the permutation into the single
29425 input vector. */
29426 /* FALLTHRU */
29427 case 2:
29428 for (i = 0; i < nelt; ++i)
29429 d.perm[i] &= nelt - 1;
29430 d.op0 = op1;
29431 d.one_vector_p = true;
29432 break;
29433
29434 case 1:
29435 d.op1 = op0;
29436 d.one_vector_p = true;
29437 break;
29438 }
29439
29440 return arm_expand_vec_perm_const_1 (&d);
29441 }
29442
29443 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST_OK. */
29444
29445 static bool
29446 arm_vectorize_vec_perm_const_ok (machine_mode vmode, vec_perm_indices sel)
29447 {
29448 struct expand_vec_perm_d d;
29449 unsigned int i, nelt, which;
29450 bool ret;
29451
29452 d.vmode = vmode;
29453 d.testing_p = true;
29454 d.perm.safe_splice (sel);
29455
29456 /* Categorize the set of elements in the selector. */
29457 nelt = GET_MODE_NUNITS (d.vmode);
29458 for (i = which = 0; i < nelt; ++i)
29459 {
29460 unsigned int e = d.perm[i];
29461 gcc_assert (e < 2 * nelt);
29462 which |= (e < nelt ? 1 : 2);
29463 }
29464
29465 /* For all elements from second vector, fold the elements to first. */
29466 if (which == 2)
29467 for (i = 0; i < nelt; ++i)
29468 d.perm[i] -= nelt;
29469
29470 /* Check whether the mask can be applied to the vector type. */
29471 d.one_vector_p = (which != 3);
29472
29473 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29474 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29475 if (!d.one_vector_p)
29476 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29477
29478 start_sequence ();
29479 ret = arm_expand_vec_perm_const_1 (&d);
29480 end_sequence ();
29481
29482 return ret;
29483 }
29484
29485 bool
29486 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29487 {
29488 /* If we are soft float and we do not have ldrd
29489 then all auto increment forms are ok. */
29490 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29491 return true;
29492
29493 switch (code)
29494 {
29495 /* Post increment and Pre Decrement are supported for all
29496 instruction forms except for vector forms. */
29497 case ARM_POST_INC:
29498 case ARM_PRE_DEC:
29499 if (VECTOR_MODE_P (mode))
29500 {
29501 if (code != ARM_PRE_DEC)
29502 return true;
29503 else
29504 return false;
29505 }
29506
29507 return true;
29508
29509 case ARM_POST_DEC:
29510 case ARM_PRE_INC:
29511 /* Without LDRD and mode size greater than
29512 word size, there is no point in auto-incrementing
29513 because ldm and stm will not have these forms. */
29514 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29515 return false;
29516
29517 /* Vector and floating point modes do not support
29518 these auto increment forms. */
29519 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29520 return false;
29521
29522 return true;
29523
29524 default:
29525 return false;
29526
29527 }
29528
29529 return false;
29530 }
29531
29532 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29533 on ARM, since we know that shifts by negative amounts are no-ops.
29534 Additionally, the default expansion code is not available or suitable
29535 for post-reload insn splits (this can occur when the register allocator
29536 chooses not to do a shift in NEON).
29537
29538 This function is used in both initial expand and post-reload splits, and
29539 handles all kinds of 64-bit shifts.
29540
29541 Input requirements:
29542 - It is safe for the input and output to be the same register, but
29543 early-clobber rules apply for the shift amount and scratch registers.
29544 - Shift by register requires both scratch registers. In all other cases
29545 the scratch registers may be NULL.
29546 - Ashiftrt by a register also clobbers the CC register. */
29547 void
29548 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29549 rtx amount, rtx scratch1, rtx scratch2)
29550 {
29551 rtx out_high = gen_highpart (SImode, out);
29552 rtx out_low = gen_lowpart (SImode, out);
29553 rtx in_high = gen_highpart (SImode, in);
29554 rtx in_low = gen_lowpart (SImode, in);
29555
29556 /* Terminology:
29557 in = the register pair containing the input value.
29558 out = the destination register pair.
29559 up = the high- or low-part of each pair.
29560 down = the opposite part to "up".
29561 In a shift, we can consider bits to shift from "up"-stream to
29562 "down"-stream, so in a left-shift "up" is the low-part and "down"
29563 is the high-part of each register pair. */
29564
29565 rtx out_up = code == ASHIFT ? out_low : out_high;
29566 rtx out_down = code == ASHIFT ? out_high : out_low;
29567 rtx in_up = code == ASHIFT ? in_low : in_high;
29568 rtx in_down = code == ASHIFT ? in_high : in_low;
29569
29570 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29571 gcc_assert (out
29572 && (REG_P (out) || GET_CODE (out) == SUBREG)
29573 && GET_MODE (out) == DImode);
29574 gcc_assert (in
29575 && (REG_P (in) || GET_CODE (in) == SUBREG)
29576 && GET_MODE (in) == DImode);
29577 gcc_assert (amount
29578 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29579 && GET_MODE (amount) == SImode)
29580 || CONST_INT_P (amount)));
29581 gcc_assert (scratch1 == NULL
29582 || (GET_CODE (scratch1) == SCRATCH)
29583 || (GET_MODE (scratch1) == SImode
29584 && REG_P (scratch1)));
29585 gcc_assert (scratch2 == NULL
29586 || (GET_CODE (scratch2) == SCRATCH)
29587 || (GET_MODE (scratch2) == SImode
29588 && REG_P (scratch2)));
29589 gcc_assert (!REG_P (out) || !REG_P (amount)
29590 || !HARD_REGISTER_P (out)
29591 || (REGNO (out) != REGNO (amount)
29592 && REGNO (out) + 1 != REGNO (amount)));
29593
29594 /* Macros to make following code more readable. */
29595 #define SUB_32(DEST,SRC) \
29596 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29597 #define RSB_32(DEST,SRC) \
29598 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29599 #define SUB_S_32(DEST,SRC) \
29600 gen_addsi3_compare0 ((DEST), (SRC), \
29601 GEN_INT (-32))
29602 #define SET(DEST,SRC) \
29603 gen_rtx_SET ((DEST), (SRC))
29604 #define SHIFT(CODE,SRC,AMOUNT) \
29605 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29606 #define LSHIFT(CODE,SRC,AMOUNT) \
29607 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29608 SImode, (SRC), (AMOUNT))
29609 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29610 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29611 SImode, (SRC), (AMOUNT))
29612 #define ORR(A,B) \
29613 gen_rtx_IOR (SImode, (A), (B))
29614 #define BRANCH(COND,LABEL) \
29615 gen_arm_cond_branch ((LABEL), \
29616 gen_rtx_ ## COND (CCmode, cc_reg, \
29617 const0_rtx), \
29618 cc_reg)
29619
29620 /* Shifts by register and shifts by constant are handled separately. */
29621 if (CONST_INT_P (amount))
29622 {
29623 /* We have a shift-by-constant. */
29624
29625 /* First, handle out-of-range shift amounts.
29626 In both cases we try to match the result an ARM instruction in a
29627 shift-by-register would give. This helps reduce execution
29628 differences between optimization levels, but it won't stop other
29629 parts of the compiler doing different things. This is "undefined
29630 behavior, in any case. */
29631 if (INTVAL (amount) <= 0)
29632 emit_insn (gen_movdi (out, in));
29633 else if (INTVAL (amount) >= 64)
29634 {
29635 if (code == ASHIFTRT)
29636 {
29637 rtx const31_rtx = GEN_INT (31);
29638 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29639 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29640 }
29641 else
29642 emit_insn (gen_movdi (out, const0_rtx));
29643 }
29644
29645 /* Now handle valid shifts. */
29646 else if (INTVAL (amount) < 32)
29647 {
29648 /* Shifts by a constant less than 32. */
29649 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29650
29651 /* Clearing the out register in DImode first avoids lots
29652 of spilling and results in less stack usage.
29653 Later this redundant insn is completely removed.
29654 Do that only if "in" and "out" are different registers. */
29655 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29656 emit_insn (SET (out, const0_rtx));
29657 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29658 emit_insn (SET (out_down,
29659 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29660 out_down)));
29661 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29662 }
29663 else
29664 {
29665 /* Shifts by a constant greater than 31. */
29666 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29667
29668 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29669 emit_insn (SET (out, const0_rtx));
29670 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29671 if (code == ASHIFTRT)
29672 emit_insn (gen_ashrsi3 (out_up, in_up,
29673 GEN_INT (31)));
29674 else
29675 emit_insn (SET (out_up, const0_rtx));
29676 }
29677 }
29678 else
29679 {
29680 /* We have a shift-by-register. */
29681 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29682
29683 /* This alternative requires the scratch registers. */
29684 gcc_assert (scratch1 && REG_P (scratch1));
29685 gcc_assert (scratch2 && REG_P (scratch2));
29686
29687 /* We will need the values "amount-32" and "32-amount" later.
29688 Swapping them around now allows the later code to be more general. */
29689 switch (code)
29690 {
29691 case ASHIFT:
29692 emit_insn (SUB_32 (scratch1, amount));
29693 emit_insn (RSB_32 (scratch2, amount));
29694 break;
29695 case ASHIFTRT:
29696 emit_insn (RSB_32 (scratch1, amount));
29697 /* Also set CC = amount > 32. */
29698 emit_insn (SUB_S_32 (scratch2, amount));
29699 break;
29700 case LSHIFTRT:
29701 emit_insn (RSB_32 (scratch1, amount));
29702 emit_insn (SUB_32 (scratch2, amount));
29703 break;
29704 default:
29705 gcc_unreachable ();
29706 }
29707
29708 /* Emit code like this:
29709
29710 arithmetic-left:
29711 out_down = in_down << amount;
29712 out_down = (in_up << (amount - 32)) | out_down;
29713 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29714 out_up = in_up << amount;
29715
29716 arithmetic-right:
29717 out_down = in_down >> amount;
29718 out_down = (in_up << (32 - amount)) | out_down;
29719 if (amount < 32)
29720 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29721 out_up = in_up << amount;
29722
29723 logical-right:
29724 out_down = in_down >> amount;
29725 out_down = (in_up << (32 - amount)) | out_down;
29726 if (amount < 32)
29727 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29728 out_up = in_up << amount;
29729
29730 The ARM and Thumb2 variants are the same but implemented slightly
29731 differently. If this were only called during expand we could just
29732 use the Thumb2 case and let combine do the right thing, but this
29733 can also be called from post-reload splitters. */
29734
29735 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29736
29737 if (!TARGET_THUMB2)
29738 {
29739 /* Emit code for ARM mode. */
29740 emit_insn (SET (out_down,
29741 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29742 if (code == ASHIFTRT)
29743 {
29744 rtx_code_label *done_label = gen_label_rtx ();
29745 emit_jump_insn (BRANCH (LT, done_label));
29746 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29747 out_down)));
29748 emit_label (done_label);
29749 }
29750 else
29751 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29752 out_down)));
29753 }
29754 else
29755 {
29756 /* Emit code for Thumb2 mode.
29757 Thumb2 can't do shift and or in one insn. */
29758 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29759 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29760
29761 if (code == ASHIFTRT)
29762 {
29763 rtx_code_label *done_label = gen_label_rtx ();
29764 emit_jump_insn (BRANCH (LT, done_label));
29765 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29766 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29767 emit_label (done_label);
29768 }
29769 else
29770 {
29771 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29772 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29773 }
29774 }
29775
29776 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29777 }
29778
29779 #undef SUB_32
29780 #undef RSB_32
29781 #undef SUB_S_32
29782 #undef SET
29783 #undef SHIFT
29784 #undef LSHIFT
29785 #undef REV_LSHIFT
29786 #undef ORR
29787 #undef BRANCH
29788 }
29789
29790 /* Returns true if the pattern is a valid symbolic address, which is either a
29791 symbol_ref or (symbol_ref + addend).
29792
29793 According to the ARM ELF ABI, the initial addend of REL-type relocations
29794 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29795 literal field of the instruction as a 16-bit signed value in the range
29796 -32768 <= A < 32768. */
29797
29798 bool
29799 arm_valid_symbolic_address_p (rtx addr)
29800 {
29801 rtx xop0, xop1 = NULL_RTX;
29802 rtx tmp = addr;
29803
29804 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29805 return true;
29806
29807 /* (const (plus: symbol_ref const_int)) */
29808 if (GET_CODE (addr) == CONST)
29809 tmp = XEXP (addr, 0);
29810
29811 if (GET_CODE (tmp) == PLUS)
29812 {
29813 xop0 = XEXP (tmp, 0);
29814 xop1 = XEXP (tmp, 1);
29815
29816 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29817 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29818 }
29819
29820 return false;
29821 }
29822
29823 /* Returns true if a valid comparison operation and makes
29824 the operands in a form that is valid. */
29825 bool
29826 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29827 {
29828 enum rtx_code code = GET_CODE (*comparison);
29829 int code_int;
29830 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29831 ? GET_MODE (*op2) : GET_MODE (*op1);
29832
29833 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29834
29835 if (code == UNEQ || code == LTGT)
29836 return false;
29837
29838 code_int = (int)code;
29839 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29840 PUT_CODE (*comparison, (enum rtx_code)code_int);
29841
29842 switch (mode)
29843 {
29844 case E_SImode:
29845 if (!arm_add_operand (*op1, mode))
29846 *op1 = force_reg (mode, *op1);
29847 if (!arm_add_operand (*op2, mode))
29848 *op2 = force_reg (mode, *op2);
29849 return true;
29850
29851 case E_DImode:
29852 if (!cmpdi_operand (*op1, mode))
29853 *op1 = force_reg (mode, *op1);
29854 if (!cmpdi_operand (*op2, mode))
29855 *op2 = force_reg (mode, *op2);
29856 return true;
29857
29858 case E_HFmode:
29859 if (!TARGET_VFP_FP16INST)
29860 break;
29861 /* FP16 comparisons are done in SF mode. */
29862 mode = SFmode;
29863 *op1 = convert_to_mode (mode, *op1, 1);
29864 *op2 = convert_to_mode (mode, *op2, 1);
29865 /* Fall through. */
29866 case E_SFmode:
29867 case E_DFmode:
29868 if (!vfp_compare_operand (*op1, mode))
29869 *op1 = force_reg (mode, *op1);
29870 if (!vfp_compare_operand (*op2, mode))
29871 *op2 = force_reg (mode, *op2);
29872 return true;
29873 default:
29874 break;
29875 }
29876
29877 return false;
29878
29879 }
29880
29881 /* Maximum number of instructions to set block of memory. */
29882 static int
29883 arm_block_set_max_insns (void)
29884 {
29885 if (optimize_function_for_size_p (cfun))
29886 return 4;
29887 else
29888 return current_tune->max_insns_inline_memset;
29889 }
29890
29891 /* Return TRUE if it's profitable to set block of memory for
29892 non-vectorized case. VAL is the value to set the memory
29893 with. LENGTH is the number of bytes to set. ALIGN is the
29894 alignment of the destination memory in bytes. UNALIGNED_P
29895 is TRUE if we can only set the memory with instructions
29896 meeting alignment requirements. USE_STRD_P is TRUE if we
29897 can use strd to set the memory. */
29898 static bool
29899 arm_block_set_non_vect_profit_p (rtx val,
29900 unsigned HOST_WIDE_INT length,
29901 unsigned HOST_WIDE_INT align,
29902 bool unaligned_p, bool use_strd_p)
29903 {
29904 int num = 0;
29905 /* For leftovers in bytes of 0-7, we can set the memory block using
29906 strb/strh/str with minimum instruction number. */
29907 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29908
29909 if (unaligned_p)
29910 {
29911 num = arm_const_inline_cost (SET, val);
29912 num += length / align + length % align;
29913 }
29914 else if (use_strd_p)
29915 {
29916 num = arm_const_double_inline_cost (val);
29917 num += (length >> 3) + leftover[length & 7];
29918 }
29919 else
29920 {
29921 num = arm_const_inline_cost (SET, val);
29922 num += (length >> 2) + leftover[length & 3];
29923 }
29924
29925 /* We may be able to combine last pair STRH/STRB into a single STR
29926 by shifting one byte back. */
29927 if (unaligned_access && length > 3 && (length & 3) == 3)
29928 num--;
29929
29930 return (num <= arm_block_set_max_insns ());
29931 }
29932
29933 /* Return TRUE if it's profitable to set block of memory for
29934 vectorized case. LENGTH is the number of bytes to set.
29935 ALIGN is the alignment of destination memory in bytes.
29936 MODE is the vector mode used to set the memory. */
29937 static bool
29938 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29939 unsigned HOST_WIDE_INT align,
29940 machine_mode mode)
29941 {
29942 int num;
29943 bool unaligned_p = ((align & 3) != 0);
29944 unsigned int nelt = GET_MODE_NUNITS (mode);
29945
29946 /* Instruction loading constant value. */
29947 num = 1;
29948 /* Instructions storing the memory. */
29949 num += (length + nelt - 1) / nelt;
29950 /* Instructions adjusting the address expression. Only need to
29951 adjust address expression if it's 4 bytes aligned and bytes
29952 leftover can only be stored by mis-aligned store instruction. */
29953 if (!unaligned_p && (length & 3) != 0)
29954 num++;
29955
29956 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29957 if (!unaligned_p && mode == V16QImode)
29958 num--;
29959
29960 return (num <= arm_block_set_max_insns ());
29961 }
29962
29963 /* Set a block of memory using vectorization instructions for the
29964 unaligned case. We fill the first LENGTH bytes of the memory
29965 area starting from DSTBASE with byte constant VALUE. ALIGN is
29966 the alignment requirement of memory. Return TRUE if succeeded. */
29967 static bool
29968 arm_block_set_unaligned_vect (rtx dstbase,
29969 unsigned HOST_WIDE_INT length,
29970 unsigned HOST_WIDE_INT value,
29971 unsigned HOST_WIDE_INT align)
29972 {
29973 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29974 rtx dst, mem;
29975 rtx val_vec, reg;
29976 rtx (*gen_func) (rtx, rtx);
29977 machine_mode mode;
29978 unsigned HOST_WIDE_INT v = value;
29979 unsigned int offset = 0;
29980 gcc_assert ((align & 0x3) != 0);
29981 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29982 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29983 if (length >= nelt_v16)
29984 {
29985 mode = V16QImode;
29986 gen_func = gen_movmisalignv16qi;
29987 }
29988 else
29989 {
29990 mode = V8QImode;
29991 gen_func = gen_movmisalignv8qi;
29992 }
29993 nelt_mode = GET_MODE_NUNITS (mode);
29994 gcc_assert (length >= nelt_mode);
29995 /* Skip if it isn't profitable. */
29996 if (!arm_block_set_vect_profit_p (length, align, mode))
29997 return false;
29998
29999 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30000 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30001
30002 v = sext_hwi (v, BITS_PER_WORD);
30003
30004 reg = gen_reg_rtx (mode);
30005 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30006 /* Emit instruction loading the constant value. */
30007 emit_move_insn (reg, val_vec);
30008
30009 /* Handle nelt_mode bytes in a vector. */
30010 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30011 {
30012 emit_insn ((*gen_func) (mem, reg));
30013 if (i + 2 * nelt_mode <= length)
30014 {
30015 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30016 offset += nelt_mode;
30017 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30018 }
30019 }
30020
30021 /* If there are not less than nelt_v8 bytes leftover, we must be in
30022 V16QI mode. */
30023 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30024
30025 /* Handle (8, 16) bytes leftover. */
30026 if (i + nelt_v8 < length)
30027 {
30028 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30029 offset += length - i;
30030 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30031
30032 /* We are shifting bytes back, set the alignment accordingly. */
30033 if ((length & 1) != 0 && align >= 2)
30034 set_mem_align (mem, BITS_PER_UNIT);
30035
30036 emit_insn (gen_movmisalignv16qi (mem, reg));
30037 }
30038 /* Handle (0, 8] bytes leftover. */
30039 else if (i < length && i + nelt_v8 >= length)
30040 {
30041 if (mode == V16QImode)
30042 reg = gen_lowpart (V8QImode, reg);
30043
30044 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30045 + (nelt_mode - nelt_v8))));
30046 offset += (length - i) + (nelt_mode - nelt_v8);
30047 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30048
30049 /* We are shifting bytes back, set the alignment accordingly. */
30050 if ((length & 1) != 0 && align >= 2)
30051 set_mem_align (mem, BITS_PER_UNIT);
30052
30053 emit_insn (gen_movmisalignv8qi (mem, reg));
30054 }
30055
30056 return true;
30057 }
30058
30059 /* Set a block of memory using vectorization instructions for the
30060 aligned case. We fill the first LENGTH bytes of the memory area
30061 starting from DSTBASE with byte constant VALUE. ALIGN is the
30062 alignment requirement of memory. Return TRUE if succeeded. */
30063 static bool
30064 arm_block_set_aligned_vect (rtx dstbase,
30065 unsigned HOST_WIDE_INT length,
30066 unsigned HOST_WIDE_INT value,
30067 unsigned HOST_WIDE_INT align)
30068 {
30069 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30070 rtx dst, addr, mem;
30071 rtx val_vec, reg;
30072 machine_mode mode;
30073 unsigned HOST_WIDE_INT v = value;
30074 unsigned int offset = 0;
30075
30076 gcc_assert ((align & 0x3) == 0);
30077 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30078 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30079 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30080 mode = V16QImode;
30081 else
30082 mode = V8QImode;
30083
30084 nelt_mode = GET_MODE_NUNITS (mode);
30085 gcc_assert (length >= nelt_mode);
30086 /* Skip if it isn't profitable. */
30087 if (!arm_block_set_vect_profit_p (length, align, mode))
30088 return false;
30089
30090 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30091
30092 v = sext_hwi (v, BITS_PER_WORD);
30093
30094 reg = gen_reg_rtx (mode);
30095 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30096 /* Emit instruction loading the constant value. */
30097 emit_move_insn (reg, val_vec);
30098
30099 i = 0;
30100 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30101 if (mode == V16QImode)
30102 {
30103 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30104 emit_insn (gen_movmisalignv16qi (mem, reg));
30105 i += nelt_mode;
30106 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30107 if (i + nelt_v8 < length && i + nelt_v16 > length)
30108 {
30109 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30110 offset += length - nelt_mode;
30111 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30112 /* We are shifting bytes back, set the alignment accordingly. */
30113 if ((length & 0x3) == 0)
30114 set_mem_align (mem, BITS_PER_UNIT * 4);
30115 else if ((length & 0x1) == 0)
30116 set_mem_align (mem, BITS_PER_UNIT * 2);
30117 else
30118 set_mem_align (mem, BITS_PER_UNIT);
30119
30120 emit_insn (gen_movmisalignv16qi (mem, reg));
30121 return true;
30122 }
30123 /* Fall through for bytes leftover. */
30124 mode = V8QImode;
30125 nelt_mode = GET_MODE_NUNITS (mode);
30126 reg = gen_lowpart (V8QImode, reg);
30127 }
30128
30129 /* Handle 8 bytes in a vector. */
30130 for (; (i + nelt_mode <= length); i += nelt_mode)
30131 {
30132 addr = plus_constant (Pmode, dst, i);
30133 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30134 emit_move_insn (mem, reg);
30135 }
30136
30137 /* Handle single word leftover by shifting 4 bytes back. We can
30138 use aligned access for this case. */
30139 if (i + UNITS_PER_WORD == length)
30140 {
30141 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30142 offset += i - UNITS_PER_WORD;
30143 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30144 /* We are shifting 4 bytes back, set the alignment accordingly. */
30145 if (align > UNITS_PER_WORD)
30146 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30147
30148 emit_move_insn (mem, reg);
30149 }
30150 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30151 We have to use unaligned access for this case. */
30152 else if (i < length)
30153 {
30154 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30155 offset += length - nelt_mode;
30156 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30157 /* We are shifting bytes back, set the alignment accordingly. */
30158 if ((length & 1) == 0)
30159 set_mem_align (mem, BITS_PER_UNIT * 2);
30160 else
30161 set_mem_align (mem, BITS_PER_UNIT);
30162
30163 emit_insn (gen_movmisalignv8qi (mem, reg));
30164 }
30165
30166 return true;
30167 }
30168
30169 /* Set a block of memory using plain strh/strb instructions, only
30170 using instructions allowed by ALIGN on processor. We fill the
30171 first LENGTH bytes of the memory area starting from DSTBASE
30172 with byte constant VALUE. ALIGN is the alignment requirement
30173 of memory. */
30174 static bool
30175 arm_block_set_unaligned_non_vect (rtx dstbase,
30176 unsigned HOST_WIDE_INT length,
30177 unsigned HOST_WIDE_INT value,
30178 unsigned HOST_WIDE_INT align)
30179 {
30180 unsigned int i;
30181 rtx dst, addr, mem;
30182 rtx val_exp, val_reg, reg;
30183 machine_mode mode;
30184 HOST_WIDE_INT v = value;
30185
30186 gcc_assert (align == 1 || align == 2);
30187
30188 if (align == 2)
30189 v |= (value << BITS_PER_UNIT);
30190
30191 v = sext_hwi (v, BITS_PER_WORD);
30192 val_exp = GEN_INT (v);
30193 /* Skip if it isn't profitable. */
30194 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30195 align, true, false))
30196 return false;
30197
30198 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30199 mode = (align == 2 ? HImode : QImode);
30200 val_reg = force_reg (SImode, val_exp);
30201 reg = gen_lowpart (mode, val_reg);
30202
30203 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30204 {
30205 addr = plus_constant (Pmode, dst, i);
30206 mem = adjust_automodify_address (dstbase, mode, addr, i);
30207 emit_move_insn (mem, reg);
30208 }
30209
30210 /* Handle single byte leftover. */
30211 if (i + 1 == length)
30212 {
30213 reg = gen_lowpart (QImode, val_reg);
30214 addr = plus_constant (Pmode, dst, i);
30215 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30216 emit_move_insn (mem, reg);
30217 i++;
30218 }
30219
30220 gcc_assert (i == length);
30221 return true;
30222 }
30223
30224 /* Set a block of memory using plain strd/str/strh/strb instructions,
30225 to permit unaligned copies on processors which support unaligned
30226 semantics for those instructions. We fill the first LENGTH bytes
30227 of the memory area starting from DSTBASE with byte constant VALUE.
30228 ALIGN is the alignment requirement of memory. */
30229 static bool
30230 arm_block_set_aligned_non_vect (rtx dstbase,
30231 unsigned HOST_WIDE_INT length,
30232 unsigned HOST_WIDE_INT value,
30233 unsigned HOST_WIDE_INT align)
30234 {
30235 unsigned int i;
30236 rtx dst, addr, mem;
30237 rtx val_exp, val_reg, reg;
30238 unsigned HOST_WIDE_INT v;
30239 bool use_strd_p;
30240
30241 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30242 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30243
30244 v = (value | (value << 8) | (value << 16) | (value << 24));
30245 if (length < UNITS_PER_WORD)
30246 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30247
30248 if (use_strd_p)
30249 v |= (v << BITS_PER_WORD);
30250 else
30251 v = sext_hwi (v, BITS_PER_WORD);
30252
30253 val_exp = GEN_INT (v);
30254 /* Skip if it isn't profitable. */
30255 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30256 align, false, use_strd_p))
30257 {
30258 if (!use_strd_p)
30259 return false;
30260
30261 /* Try without strd. */
30262 v = (v >> BITS_PER_WORD);
30263 v = sext_hwi (v, BITS_PER_WORD);
30264 val_exp = GEN_INT (v);
30265 use_strd_p = false;
30266 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30267 align, false, use_strd_p))
30268 return false;
30269 }
30270
30271 i = 0;
30272 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30273 /* Handle double words using strd if possible. */
30274 if (use_strd_p)
30275 {
30276 val_reg = force_reg (DImode, val_exp);
30277 reg = val_reg;
30278 for (; (i + 8 <= length); i += 8)
30279 {
30280 addr = plus_constant (Pmode, dst, i);
30281 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30282 emit_move_insn (mem, reg);
30283 }
30284 }
30285 else
30286 val_reg = force_reg (SImode, val_exp);
30287
30288 /* Handle words. */
30289 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30290 for (; (i + 4 <= length); i += 4)
30291 {
30292 addr = plus_constant (Pmode, dst, i);
30293 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30294 if ((align & 3) == 0)
30295 emit_move_insn (mem, reg);
30296 else
30297 emit_insn (gen_unaligned_storesi (mem, reg));
30298 }
30299
30300 /* Merge last pair of STRH and STRB into a STR if possible. */
30301 if (unaligned_access && i > 0 && (i + 3) == length)
30302 {
30303 addr = plus_constant (Pmode, dst, i - 1);
30304 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30305 /* We are shifting one byte back, set the alignment accordingly. */
30306 if ((align & 1) == 0)
30307 set_mem_align (mem, BITS_PER_UNIT);
30308
30309 /* Most likely this is an unaligned access, and we can't tell at
30310 compilation time. */
30311 emit_insn (gen_unaligned_storesi (mem, reg));
30312 return true;
30313 }
30314
30315 /* Handle half word leftover. */
30316 if (i + 2 <= length)
30317 {
30318 reg = gen_lowpart (HImode, val_reg);
30319 addr = plus_constant (Pmode, dst, i);
30320 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30321 if ((align & 1) == 0)
30322 emit_move_insn (mem, reg);
30323 else
30324 emit_insn (gen_unaligned_storehi (mem, reg));
30325
30326 i += 2;
30327 }
30328
30329 /* Handle single byte leftover. */
30330 if (i + 1 == length)
30331 {
30332 reg = gen_lowpart (QImode, val_reg);
30333 addr = plus_constant (Pmode, dst, i);
30334 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30335 emit_move_insn (mem, reg);
30336 }
30337
30338 return true;
30339 }
30340
30341 /* Set a block of memory using vectorization instructions for both
30342 aligned and unaligned cases. We fill the first LENGTH bytes of
30343 the memory area starting from DSTBASE with byte constant VALUE.
30344 ALIGN is the alignment requirement of memory. */
30345 static bool
30346 arm_block_set_vect (rtx dstbase,
30347 unsigned HOST_WIDE_INT length,
30348 unsigned HOST_WIDE_INT value,
30349 unsigned HOST_WIDE_INT align)
30350 {
30351 /* Check whether we need to use unaligned store instruction. */
30352 if (((align & 3) != 0 || (length & 3) != 0)
30353 /* Check whether unaligned store instruction is available. */
30354 && (!unaligned_access || BYTES_BIG_ENDIAN))
30355 return false;
30356
30357 if ((align & 3) == 0)
30358 return arm_block_set_aligned_vect (dstbase, length, value, align);
30359 else
30360 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30361 }
30362
30363 /* Expand string store operation. Firstly we try to do that by using
30364 vectorization instructions, then try with ARM unaligned access and
30365 double-word store if profitable. OPERANDS[0] is the destination,
30366 OPERANDS[1] is the number of bytes, operands[2] is the value to
30367 initialize the memory, OPERANDS[3] is the known alignment of the
30368 destination. */
30369 bool
30370 arm_gen_setmem (rtx *operands)
30371 {
30372 rtx dstbase = operands[0];
30373 unsigned HOST_WIDE_INT length;
30374 unsigned HOST_WIDE_INT value;
30375 unsigned HOST_WIDE_INT align;
30376
30377 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30378 return false;
30379
30380 length = UINTVAL (operands[1]);
30381 if (length > 64)
30382 return false;
30383
30384 value = (UINTVAL (operands[2]) & 0xFF);
30385 align = UINTVAL (operands[3]);
30386 if (TARGET_NEON && length >= 8
30387 && current_tune->string_ops_prefer_neon
30388 && arm_block_set_vect (dstbase, length, value, align))
30389 return true;
30390
30391 if (!unaligned_access && (align & 3) != 0)
30392 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30393
30394 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30395 }
30396
30397
30398 static bool
30399 arm_macro_fusion_p (void)
30400 {
30401 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30402 }
30403
30404 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30405 for MOVW / MOVT macro fusion. */
30406
30407 static bool
30408 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30409 {
30410 /* We are trying to fuse
30411 movw imm / movt imm
30412 instructions as a group that gets scheduled together. */
30413
30414 rtx set_dest = SET_DEST (curr_set);
30415
30416 if (GET_MODE (set_dest) != SImode)
30417 return false;
30418
30419 /* We are trying to match:
30420 prev (movw) == (set (reg r0) (const_int imm16))
30421 curr (movt) == (set (zero_extract (reg r0)
30422 (const_int 16)
30423 (const_int 16))
30424 (const_int imm16_1))
30425 or
30426 prev (movw) == (set (reg r1)
30427 (high (symbol_ref ("SYM"))))
30428 curr (movt) == (set (reg r0)
30429 (lo_sum (reg r1)
30430 (symbol_ref ("SYM")))) */
30431
30432 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30433 {
30434 if (CONST_INT_P (SET_SRC (curr_set))
30435 && CONST_INT_P (SET_SRC (prev_set))
30436 && REG_P (XEXP (set_dest, 0))
30437 && REG_P (SET_DEST (prev_set))
30438 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30439 return true;
30440
30441 }
30442 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30443 && REG_P (SET_DEST (curr_set))
30444 && REG_P (SET_DEST (prev_set))
30445 && GET_CODE (SET_SRC (prev_set)) == HIGH
30446 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30447 return true;
30448
30449 return false;
30450 }
30451
30452 static bool
30453 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30454 {
30455 rtx prev_set = single_set (prev);
30456 rtx curr_set = single_set (curr);
30457
30458 if (!prev_set
30459 || !curr_set)
30460 return false;
30461
30462 if (any_condjump_p (curr))
30463 return false;
30464
30465 if (!arm_macro_fusion_p ())
30466 return false;
30467
30468 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30469 && aarch_crypto_can_dual_issue (prev, curr))
30470 return true;
30471
30472 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30473 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30474 return true;
30475
30476 return false;
30477 }
30478
30479 /* Return true iff the instruction fusion described by OP is enabled. */
30480 bool
30481 arm_fusion_enabled_p (tune_params::fuse_ops op)
30482 {
30483 return current_tune->fusible_ops & op;
30484 }
30485
30486 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30487 scheduled for speculative execution. Reject the long-running division
30488 and square-root instructions. */
30489
30490 static bool
30491 arm_sched_can_speculate_insn (rtx_insn *insn)
30492 {
30493 switch (get_attr_type (insn))
30494 {
30495 case TYPE_SDIV:
30496 case TYPE_UDIV:
30497 case TYPE_FDIVS:
30498 case TYPE_FDIVD:
30499 case TYPE_FSQRTS:
30500 case TYPE_FSQRTD:
30501 case TYPE_NEON_FP_SQRT_S:
30502 case TYPE_NEON_FP_SQRT_D:
30503 case TYPE_NEON_FP_SQRT_S_Q:
30504 case TYPE_NEON_FP_SQRT_D_Q:
30505 case TYPE_NEON_FP_DIV_S:
30506 case TYPE_NEON_FP_DIV_D:
30507 case TYPE_NEON_FP_DIV_S_Q:
30508 case TYPE_NEON_FP_DIV_D_Q:
30509 return false;
30510 default:
30511 return true;
30512 }
30513 }
30514
30515 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30516
30517 static unsigned HOST_WIDE_INT
30518 arm_asan_shadow_offset (void)
30519 {
30520 return HOST_WIDE_INT_1U << 29;
30521 }
30522
30523
30524 /* This is a temporary fix for PR60655. Ideally we need
30525 to handle most of these cases in the generic part but
30526 currently we reject minus (..) (sym_ref). We try to
30527 ameliorate the case with minus (sym_ref1) (sym_ref2)
30528 where they are in the same section. */
30529
30530 static bool
30531 arm_const_not_ok_for_debug_p (rtx p)
30532 {
30533 tree decl_op0 = NULL;
30534 tree decl_op1 = NULL;
30535
30536 if (GET_CODE (p) == UNSPEC)
30537 return true;
30538 if (GET_CODE (p) == MINUS)
30539 {
30540 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30541 {
30542 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30543 if (decl_op1
30544 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30545 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30546 {
30547 if ((VAR_P (decl_op1)
30548 || TREE_CODE (decl_op1) == CONST_DECL)
30549 && (VAR_P (decl_op0)
30550 || TREE_CODE (decl_op0) == CONST_DECL))
30551 return (get_variable_section (decl_op1, false)
30552 != get_variable_section (decl_op0, false));
30553
30554 if (TREE_CODE (decl_op1) == LABEL_DECL
30555 && TREE_CODE (decl_op0) == LABEL_DECL)
30556 return (DECL_CONTEXT (decl_op1)
30557 != DECL_CONTEXT (decl_op0));
30558 }
30559
30560 return true;
30561 }
30562 }
30563
30564 return false;
30565 }
30566
30567 /* return TRUE if x is a reference to a value in a constant pool */
30568 extern bool
30569 arm_is_constant_pool_ref (rtx x)
30570 {
30571 return (MEM_P (x)
30572 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30573 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30574 }
30575
30576 /* Remember the last target of arm_set_current_function. */
30577 static GTY(()) tree arm_previous_fndecl;
30578
30579 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30580
30581 void
30582 save_restore_target_globals (tree new_tree)
30583 {
30584 /* If we have a previous state, use it. */
30585 if (TREE_TARGET_GLOBALS (new_tree))
30586 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30587 else if (new_tree == target_option_default_node)
30588 restore_target_globals (&default_target_globals);
30589 else
30590 {
30591 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30592 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30593 }
30594
30595 arm_option_params_internal ();
30596 }
30597
30598 /* Invalidate arm_previous_fndecl. */
30599
30600 void
30601 arm_reset_previous_fndecl (void)
30602 {
30603 arm_previous_fndecl = NULL_TREE;
30604 }
30605
30606 /* Establish appropriate back-end context for processing the function
30607 FNDECL. The argument might be NULL to indicate processing at top
30608 level, outside of any function scope. */
30609
30610 static void
30611 arm_set_current_function (tree fndecl)
30612 {
30613 if (!fndecl || fndecl == arm_previous_fndecl)
30614 return;
30615
30616 tree old_tree = (arm_previous_fndecl
30617 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30618 : NULL_TREE);
30619
30620 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30621
30622 /* If current function has no attributes but previous one did,
30623 use the default node. */
30624 if (! new_tree && old_tree)
30625 new_tree = target_option_default_node;
30626
30627 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30628 the default have been handled by save_restore_target_globals from
30629 arm_pragma_target_parse. */
30630 if (old_tree == new_tree)
30631 return;
30632
30633 arm_previous_fndecl = fndecl;
30634
30635 /* First set the target options. */
30636 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30637
30638 save_restore_target_globals (new_tree);
30639 }
30640
30641 /* Implement TARGET_OPTION_PRINT. */
30642
30643 static void
30644 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30645 {
30646 int flags = ptr->x_target_flags;
30647 const char *fpu_name;
30648
30649 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30650 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30651
30652 fprintf (file, "%*sselected isa %s\n", indent, "",
30653 TARGET_THUMB2_P (flags) ? "thumb2" :
30654 TARGET_THUMB_P (flags) ? "thumb1" :
30655 "arm");
30656
30657 if (ptr->x_arm_arch_string)
30658 fprintf (file, "%*sselected architecture %s\n", indent, "",
30659 ptr->x_arm_arch_string);
30660
30661 if (ptr->x_arm_cpu_string)
30662 fprintf (file, "%*sselected CPU %s\n", indent, "",
30663 ptr->x_arm_cpu_string);
30664
30665 if (ptr->x_arm_tune_string)
30666 fprintf (file, "%*sselected tune %s\n", indent, "",
30667 ptr->x_arm_tune_string);
30668
30669 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30670 }
30671
30672 /* Hook to determine if one function can safely inline another. */
30673
30674 static bool
30675 arm_can_inline_p (tree caller, tree callee)
30676 {
30677 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30678 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30679 bool can_inline = true;
30680
30681 struct cl_target_option *caller_opts
30682 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30683 : target_option_default_node);
30684
30685 struct cl_target_option *callee_opts
30686 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30687 : target_option_default_node);
30688
30689 if (callee_opts == caller_opts)
30690 return true;
30691
30692 /* Callee's ISA features should be a subset of the caller's. */
30693 struct arm_build_target caller_target;
30694 struct arm_build_target callee_target;
30695 caller_target.isa = sbitmap_alloc (isa_num_bits);
30696 callee_target.isa = sbitmap_alloc (isa_num_bits);
30697
30698 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30699 false);
30700 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30701 false);
30702 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30703 can_inline = false;
30704
30705 sbitmap_free (caller_target.isa);
30706 sbitmap_free (callee_target.isa);
30707
30708 /* OK to inline between different modes.
30709 Function with mode specific instructions, e.g using asm,
30710 must be explicitly protected with noinline. */
30711 return can_inline;
30712 }
30713
30714 /* Hook to fix function's alignment affected by target attribute. */
30715
30716 static void
30717 arm_relayout_function (tree fndecl)
30718 {
30719 if (DECL_USER_ALIGN (fndecl))
30720 return;
30721
30722 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30723
30724 if (!callee_tree)
30725 callee_tree = target_option_default_node;
30726
30727 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30728 SET_DECL_ALIGN
30729 (fndecl,
30730 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30731 }
30732
30733 /* Inner function to process the attribute((target(...))), take an argument and
30734 set the current options from the argument. If we have a list, recursively
30735 go over the list. */
30736
30737 static bool
30738 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30739 {
30740 if (TREE_CODE (args) == TREE_LIST)
30741 {
30742 bool ret = true;
30743
30744 for (; args; args = TREE_CHAIN (args))
30745 if (TREE_VALUE (args)
30746 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30747 ret = false;
30748 return ret;
30749 }
30750
30751 else if (TREE_CODE (args) != STRING_CST)
30752 {
30753 error ("attribute %<target%> argument not a string");
30754 return false;
30755 }
30756
30757 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30758 char *q;
30759
30760 while ((q = strtok (argstr, ",")) != NULL)
30761 {
30762 while (ISSPACE (*q)) ++q;
30763
30764 argstr = NULL;
30765 if (!strncmp (q, "thumb", 5))
30766 opts->x_target_flags |= MASK_THUMB;
30767
30768 else if (!strncmp (q, "arm", 3))
30769 opts->x_target_flags &= ~MASK_THUMB;
30770
30771 else if (!strncmp (q, "fpu=", 4))
30772 {
30773 int fpu_index;
30774 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30775 &fpu_index, CL_TARGET))
30776 {
30777 error ("invalid fpu for target attribute or pragma %qs", q);
30778 return false;
30779 }
30780 if (fpu_index == TARGET_FPU_auto)
30781 {
30782 /* This doesn't really make sense until we support
30783 general dynamic selection of the architecture and all
30784 sub-features. */
30785 sorry ("auto fpu selection not currently permitted here");
30786 return false;
30787 }
30788 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30789 }
30790 else if (!strncmp (q, "arch=", 5))
30791 {
30792 char* arch = q+5;
30793 const arch_option *arm_selected_arch
30794 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30795
30796 if (!arm_selected_arch)
30797 {
30798 error ("invalid architecture for target attribute or pragma %qs",
30799 q);
30800 return false;
30801 }
30802
30803 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30804 }
30805 else if (q[0] == '+')
30806 {
30807 opts->x_arm_arch_string
30808 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30809 }
30810 else
30811 {
30812 error ("unknown target attribute or pragma %qs", q);
30813 return false;
30814 }
30815 }
30816
30817 return true;
30818 }
30819
30820 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30821
30822 tree
30823 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30824 struct gcc_options *opts_set)
30825 {
30826 struct cl_target_option cl_opts;
30827
30828 if (!arm_valid_target_attribute_rec (args, opts))
30829 return NULL_TREE;
30830
30831 cl_target_option_save (&cl_opts, opts);
30832 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30833 arm_option_check_internal (opts);
30834 /* Do any overrides, such as global options arch=xxx.
30835 We do this since arm_active_target was overridden. */
30836 arm_option_reconfigure_globals ();
30837 arm_options_perform_arch_sanity_checks ();
30838 arm_option_override_internal (opts, opts_set);
30839
30840 return build_target_option_node (opts);
30841 }
30842
30843 static void
30844 add_attribute (const char * mode, tree *attributes)
30845 {
30846 size_t len = strlen (mode);
30847 tree value = build_string (len, mode);
30848
30849 TREE_TYPE (value) = build_array_type (char_type_node,
30850 build_index_type (size_int (len)));
30851
30852 *attributes = tree_cons (get_identifier ("target"),
30853 build_tree_list (NULL_TREE, value),
30854 *attributes);
30855 }
30856
30857 /* For testing. Insert thumb or arm modes alternatively on functions. */
30858
30859 static void
30860 arm_insert_attributes (tree fndecl, tree * attributes)
30861 {
30862 const char *mode;
30863
30864 if (! TARGET_FLIP_THUMB)
30865 return;
30866
30867 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30868 || DECL_BUILT_IN (fndecl) || DECL_ARTIFICIAL (fndecl))
30869 return;
30870
30871 /* Nested definitions must inherit mode. */
30872 if (current_function_decl)
30873 {
30874 mode = TARGET_THUMB ? "thumb" : "arm";
30875 add_attribute (mode, attributes);
30876 return;
30877 }
30878
30879 /* If there is already a setting don't change it. */
30880 if (lookup_attribute ("target", *attributes) != NULL)
30881 return;
30882
30883 mode = thumb_flipper ? "thumb" : "arm";
30884 add_attribute (mode, attributes);
30885
30886 thumb_flipper = !thumb_flipper;
30887 }
30888
30889 /* Hook to validate attribute((target("string"))). */
30890
30891 static bool
30892 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30893 tree args, int ARG_UNUSED (flags))
30894 {
30895 bool ret = true;
30896 struct gcc_options func_options;
30897 tree cur_tree, new_optimize;
30898 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30899
30900 /* Get the optimization options of the current function. */
30901 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30902
30903 /* If the function changed the optimization levels as well as setting target
30904 options, start with the optimizations specified. */
30905 if (!func_optimize)
30906 func_optimize = optimization_default_node;
30907
30908 /* Init func_options. */
30909 memset (&func_options, 0, sizeof (func_options));
30910 init_options_struct (&func_options, NULL);
30911 lang_hooks.init_options_struct (&func_options);
30912
30913 /* Initialize func_options to the defaults. */
30914 cl_optimization_restore (&func_options,
30915 TREE_OPTIMIZATION (func_optimize));
30916
30917 cl_target_option_restore (&func_options,
30918 TREE_TARGET_OPTION (target_option_default_node));
30919
30920 /* Set func_options flags with new target mode. */
30921 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30922 &global_options_set);
30923
30924 if (cur_tree == NULL_TREE)
30925 ret = false;
30926
30927 new_optimize = build_optimization_node (&func_options);
30928
30929 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30930
30931 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30932
30933 finalize_options_struct (&func_options);
30934
30935 return ret;
30936 }
30937
30938 /* Match an ISA feature bitmap to a named FPU. We always use the
30939 first entry that exactly matches the feature set, so that we
30940 effectively canonicalize the FPU name for the assembler. */
30941 static const char*
30942 arm_identify_fpu_from_isa (sbitmap isa)
30943 {
30944 auto_sbitmap fpubits (isa_num_bits);
30945 auto_sbitmap cand_fpubits (isa_num_bits);
30946
30947 bitmap_and (fpubits, isa, isa_all_fpubits);
30948
30949 /* If there are no ISA feature bits relating to the FPU, we must be
30950 doing soft-float. */
30951 if (bitmap_empty_p (fpubits))
30952 return "softvfp";
30953
30954 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30955 {
30956 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30957 if (bitmap_equal_p (fpubits, cand_fpubits))
30958 return all_fpus[i].name;
30959 }
30960 /* We must find an entry, or things have gone wrong. */
30961 gcc_unreachable ();
30962 }
30963
30964 /* The last .arch and .fpu assembly strings that we printed. */
30965 static std::string arm_last_printed_arch_string;
30966 static std::string arm_last_printed_fpu_string;
30967
30968 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30969 by the function fndecl. */
30970 void
30971 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30972 {
30973 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30974
30975 struct cl_target_option *targ_options;
30976 if (target_parts)
30977 targ_options = TREE_TARGET_OPTION (target_parts);
30978 else
30979 targ_options = TREE_TARGET_OPTION (target_option_current_node);
30980 gcc_assert (targ_options);
30981
30982 /* Only update the assembler .arch string if it is distinct from the last
30983 such string we printed. */
30984 std::string arch_to_print = targ_options->x_arm_arch_string;
30985 if (arch_to_print != arm_last_printed_arch_string)
30986 {
30987 std::string arch_name
30988 = arch_to_print.substr (0, arch_to_print.find ("+"));
30989 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30990 const arch_option *arch
30991 = arm_parse_arch_option_name (all_architectures, "-march",
30992 targ_options->x_arm_arch_string);
30993 auto_sbitmap opt_bits (isa_num_bits);
30994
30995 gcc_assert (arch);
30996 if (arch->common.extensions)
30997 {
30998 for (const struct cpu_arch_extension *opt = arch->common.extensions;
30999 opt->name != NULL;
31000 opt++)
31001 {
31002 if (!opt->remove)
31003 {
31004 arm_initialize_isa (opt_bits, opt->isa_bits);
31005 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31006 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31007 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31008 opt->name);
31009 }
31010 }
31011 }
31012
31013 arm_last_printed_arch_string = arch_to_print;
31014 }
31015
31016 fprintf (stream, "\t.syntax unified\n");
31017
31018 if (TARGET_THUMB)
31019 {
31020 if (is_called_in_ARM_mode (decl)
31021 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31022 && cfun->is_thunk))
31023 fprintf (stream, "\t.code 32\n");
31024 else if (TARGET_THUMB1)
31025 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31026 else
31027 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31028 }
31029 else
31030 fprintf (stream, "\t.arm\n");
31031
31032 std::string fpu_to_print
31033 = TARGET_SOFT_FLOAT
31034 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31035
31036 if (fpu_to_print != arm_last_printed_arch_string)
31037 {
31038 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31039 arm_last_printed_fpu_string = fpu_to_print;
31040 }
31041
31042 if (TARGET_POKE_FUNCTION_NAME)
31043 arm_poke_function_name (stream, (const char *) name);
31044 }
31045
31046 /* If MEM is in the form of [base+offset], extract the two parts
31047 of address and set to BASE and OFFSET, otherwise return false
31048 after clearing BASE and OFFSET. */
31049
31050 static bool
31051 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31052 {
31053 rtx addr;
31054
31055 gcc_assert (MEM_P (mem));
31056
31057 addr = XEXP (mem, 0);
31058
31059 /* Strip off const from addresses like (const (addr)). */
31060 if (GET_CODE (addr) == CONST)
31061 addr = XEXP (addr, 0);
31062
31063 if (GET_CODE (addr) == REG)
31064 {
31065 *base = addr;
31066 *offset = const0_rtx;
31067 return true;
31068 }
31069
31070 if (GET_CODE (addr) == PLUS
31071 && GET_CODE (XEXP (addr, 0)) == REG
31072 && CONST_INT_P (XEXP (addr, 1)))
31073 {
31074 *base = XEXP (addr, 0);
31075 *offset = XEXP (addr, 1);
31076 return true;
31077 }
31078
31079 *base = NULL_RTX;
31080 *offset = NULL_RTX;
31081
31082 return false;
31083 }
31084
31085 /* If INSN is a load or store of address in the form of [base+offset],
31086 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31087 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31088 otherwise return FALSE. */
31089
31090 static bool
31091 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31092 {
31093 rtx x, dest, src;
31094
31095 gcc_assert (INSN_P (insn));
31096 x = PATTERN (insn);
31097 if (GET_CODE (x) != SET)
31098 return false;
31099
31100 src = SET_SRC (x);
31101 dest = SET_DEST (x);
31102 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31103 {
31104 *is_load = false;
31105 extract_base_offset_in_addr (dest, base, offset);
31106 }
31107 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31108 {
31109 *is_load = true;
31110 extract_base_offset_in_addr (src, base, offset);
31111 }
31112 else
31113 return false;
31114
31115 return (*base != NULL_RTX && *offset != NULL_RTX);
31116 }
31117
31118 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31119
31120 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31121 and PRI are only calculated for these instructions. For other instruction,
31122 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31123 instruction fusion can be supported by returning different priorities.
31124
31125 It's important that irrelevant instructions get the largest FUSION_PRI. */
31126
31127 static void
31128 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31129 int *fusion_pri, int *pri)
31130 {
31131 int tmp, off_val;
31132 bool is_load;
31133 rtx base, offset;
31134
31135 gcc_assert (INSN_P (insn));
31136
31137 tmp = max_pri - 1;
31138 if (!fusion_load_store (insn, &base, &offset, &is_load))
31139 {
31140 *pri = tmp;
31141 *fusion_pri = tmp;
31142 return;
31143 }
31144
31145 /* Load goes first. */
31146 if (is_load)
31147 *fusion_pri = tmp - 1;
31148 else
31149 *fusion_pri = tmp - 2;
31150
31151 tmp /= 2;
31152
31153 /* INSN with smaller base register goes first. */
31154 tmp -= ((REGNO (base) & 0xff) << 20);
31155
31156 /* INSN with smaller offset goes first. */
31157 off_val = (int)(INTVAL (offset));
31158 if (off_val >= 0)
31159 tmp -= (off_val & 0xfffff);
31160 else
31161 tmp += ((- off_val) & 0xfffff);
31162
31163 *pri = tmp;
31164 return;
31165 }
31166
31167
31168 /* Construct and return a PARALLEL RTX vector with elements numbering the
31169 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31170 the vector - from the perspective of the architecture. This does not
31171 line up with GCC's perspective on lane numbers, so we end up with
31172 different masks depending on our target endian-ness. The diagram
31173 below may help. We must draw the distinction when building masks
31174 which select one half of the vector. An instruction selecting
31175 architectural low-lanes for a big-endian target, must be described using
31176 a mask selecting GCC high-lanes.
31177
31178 Big-Endian Little-Endian
31179
31180 GCC 0 1 2 3 3 2 1 0
31181 | x | x | x | x | | x | x | x | x |
31182 Architecture 3 2 1 0 3 2 1 0
31183
31184 Low Mask: { 2, 3 } { 0, 1 }
31185 High Mask: { 0, 1 } { 2, 3 }
31186 */
31187
31188 rtx
31189 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31190 {
31191 int nunits = GET_MODE_NUNITS (mode);
31192 rtvec v = rtvec_alloc (nunits / 2);
31193 int high_base = nunits / 2;
31194 int low_base = 0;
31195 int base;
31196 rtx t1;
31197 int i;
31198
31199 if (BYTES_BIG_ENDIAN)
31200 base = high ? low_base : high_base;
31201 else
31202 base = high ? high_base : low_base;
31203
31204 for (i = 0; i < nunits / 2; i++)
31205 RTVEC_ELT (v, i) = GEN_INT (base + i);
31206
31207 t1 = gen_rtx_PARALLEL (mode, v);
31208 return t1;
31209 }
31210
31211 /* Check OP for validity as a PARALLEL RTX vector with elements
31212 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31213 from the perspective of the architecture. See the diagram above
31214 arm_simd_vect_par_cnst_half_p for more details. */
31215
31216 bool
31217 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31218 bool high)
31219 {
31220 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31221 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31222 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31223 int i = 0;
31224
31225 if (!VECTOR_MODE_P (mode))
31226 return false;
31227
31228 if (count_op != count_ideal)
31229 return false;
31230
31231 for (i = 0; i < count_ideal; i++)
31232 {
31233 rtx elt_op = XVECEXP (op, 0, i);
31234 rtx elt_ideal = XVECEXP (ideal, 0, i);
31235
31236 if (!CONST_INT_P (elt_op)
31237 || INTVAL (elt_ideal) != INTVAL (elt_op))
31238 return false;
31239 }
31240 return true;
31241 }
31242
31243 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31244 in Thumb1. */
31245 static bool
31246 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31247 const_tree)
31248 {
31249 /* For now, we punt and not handle this for TARGET_THUMB1. */
31250 if (vcall_offset && TARGET_THUMB1)
31251 return false;
31252
31253 /* Otherwise ok. */
31254 return true;
31255 }
31256
31257 /* Generate RTL for a conditional branch with rtx comparison CODE in
31258 mode CC_MODE. The destination of the unlikely conditional branch
31259 is LABEL_REF. */
31260
31261 void
31262 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31263 rtx label_ref)
31264 {
31265 rtx x;
31266 x = gen_rtx_fmt_ee (code, VOIDmode,
31267 gen_rtx_REG (cc_mode, CC_REGNUM),
31268 const0_rtx);
31269
31270 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31271 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31272 pc_rtx);
31273 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31274 }
31275
31276 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31277
31278 For pure-code sections there is no letter code for this attribute, so
31279 output all the section flags numerically when this is needed. */
31280
31281 static bool
31282 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31283 {
31284
31285 if (flags & SECTION_ARM_PURECODE)
31286 {
31287 *num = 0x20000000;
31288
31289 if (!(flags & SECTION_DEBUG))
31290 *num |= 0x2;
31291 if (flags & SECTION_EXCLUDE)
31292 *num |= 0x80000000;
31293 if (flags & SECTION_WRITE)
31294 *num |= 0x1;
31295 if (flags & SECTION_CODE)
31296 *num |= 0x4;
31297 if (flags & SECTION_MERGE)
31298 *num |= 0x10;
31299 if (flags & SECTION_STRINGS)
31300 *num |= 0x20;
31301 if (flags & SECTION_TLS)
31302 *num |= 0x400;
31303 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31304 *num |= 0x200;
31305
31306 return true;
31307 }
31308
31309 return false;
31310 }
31311
31312 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31313
31314 If pure-code is passed as an option, make sure all functions are in
31315 sections that have the SHF_ARM_PURECODE attribute. */
31316
31317 static section *
31318 arm_function_section (tree decl, enum node_frequency freq,
31319 bool startup, bool exit)
31320 {
31321 const char * section_name;
31322 section * sec;
31323
31324 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31325 return default_function_section (decl, freq, startup, exit);
31326
31327 if (!target_pure_code)
31328 return default_function_section (decl, freq, startup, exit);
31329
31330
31331 section_name = DECL_SECTION_NAME (decl);
31332
31333 /* If a function is not in a named section then it falls under the 'default'
31334 text section, also known as '.text'. We can preserve previous behavior as
31335 the default text section already has the SHF_ARM_PURECODE section
31336 attribute. */
31337 if (!section_name)
31338 {
31339 section *default_sec = default_function_section (decl, freq, startup,
31340 exit);
31341
31342 /* If default_sec is not null, then it must be a special section like for
31343 example .text.startup. We set the pure-code attribute and return the
31344 same section to preserve existing behavior. */
31345 if (default_sec)
31346 default_sec->common.flags |= SECTION_ARM_PURECODE;
31347 return default_sec;
31348 }
31349
31350 /* Otherwise look whether a section has already been created with
31351 'section_name'. */
31352 sec = get_named_section (decl, section_name, 0);
31353 if (!sec)
31354 /* If that is not the case passing NULL as the section's name to
31355 'get_named_section' will create a section with the declaration's
31356 section name. */
31357 sec = get_named_section (decl, NULL, 0);
31358
31359 /* Set the SHF_ARM_PURECODE attribute. */
31360 sec->common.flags |= SECTION_ARM_PURECODE;
31361
31362 return sec;
31363 }
31364
31365 /* Implements the TARGET_SECTION_FLAGS hook.
31366
31367 If DECL is a function declaration and pure-code is passed as an option
31368 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31369 section's name and RELOC indicates whether the declarations initializer may
31370 contain runtime relocations. */
31371
31372 static unsigned int
31373 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31374 {
31375 unsigned int flags = default_section_type_flags (decl, name, reloc);
31376
31377 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31378 flags |= SECTION_ARM_PURECODE;
31379
31380 return flags;
31381 }
31382
31383 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31384
31385 static void
31386 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31387 rtx op0, rtx op1,
31388 rtx *quot_p, rtx *rem_p)
31389 {
31390 if (mode == SImode)
31391 gcc_assert (!TARGET_IDIV);
31392
31393 scalar_int_mode libval_mode
31394 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31395
31396 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31397 libval_mode,
31398 op0, GET_MODE (op0),
31399 op1, GET_MODE (op1));
31400
31401 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31402 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31403 GET_MODE_SIZE (mode));
31404
31405 gcc_assert (quotient);
31406 gcc_assert (remainder);
31407
31408 *quot_p = quotient;
31409 *rem_p = remainder;
31410 }
31411
31412 /* This function checks for the availability of the coprocessor builtin passed
31413 in BUILTIN for the current target. Returns true if it is available and
31414 false otherwise. If a BUILTIN is passed for which this function has not
31415 been implemented it will cause an exception. */
31416
31417 bool
31418 arm_coproc_builtin_available (enum unspecv builtin)
31419 {
31420 /* None of these builtins are available in Thumb mode if the target only
31421 supports Thumb-1. */
31422 if (TARGET_THUMB1)
31423 return false;
31424
31425 switch (builtin)
31426 {
31427 case VUNSPEC_CDP:
31428 case VUNSPEC_LDC:
31429 case VUNSPEC_LDCL:
31430 case VUNSPEC_STC:
31431 case VUNSPEC_STCL:
31432 case VUNSPEC_MCR:
31433 case VUNSPEC_MRC:
31434 if (arm_arch4)
31435 return true;
31436 break;
31437 case VUNSPEC_CDP2:
31438 case VUNSPEC_LDC2:
31439 case VUNSPEC_LDC2L:
31440 case VUNSPEC_STC2:
31441 case VUNSPEC_STC2L:
31442 case VUNSPEC_MCR2:
31443 case VUNSPEC_MRC2:
31444 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31445 ARMv8-{A,M}. */
31446 if (arm_arch5)
31447 return true;
31448 break;
31449 case VUNSPEC_MCRR:
31450 case VUNSPEC_MRRC:
31451 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31452 ARMv8-{A,M}. */
31453 if (arm_arch6 || arm_arch5te)
31454 return true;
31455 break;
31456 case VUNSPEC_MCRR2:
31457 case VUNSPEC_MRRC2:
31458 if (arm_arch6)
31459 return true;
31460 break;
31461 default:
31462 gcc_unreachable ();
31463 }
31464 return false;
31465 }
31466
31467 /* This function returns true if OP is a valid memory operand for the ldc and
31468 stc coprocessor instructions and false otherwise. */
31469
31470 bool
31471 arm_coproc_ldc_stc_legitimate_address (rtx op)
31472 {
31473 HOST_WIDE_INT range;
31474 /* Has to be a memory operand. */
31475 if (!MEM_P (op))
31476 return false;
31477
31478 op = XEXP (op, 0);
31479
31480 /* We accept registers. */
31481 if (REG_P (op))
31482 return true;
31483
31484 switch GET_CODE (op)
31485 {
31486 case PLUS:
31487 {
31488 /* Or registers with an offset. */
31489 if (!REG_P (XEXP (op, 0)))
31490 return false;
31491
31492 op = XEXP (op, 1);
31493
31494 /* The offset must be an immediate though. */
31495 if (!CONST_INT_P (op))
31496 return false;
31497
31498 range = INTVAL (op);
31499
31500 /* Within the range of [-1020,1020]. */
31501 if (!IN_RANGE (range, -1020, 1020))
31502 return false;
31503
31504 /* And a multiple of 4. */
31505 return (range % 4) == 0;
31506 }
31507 case PRE_INC:
31508 case POST_INC:
31509 case PRE_DEC:
31510 case POST_DEC:
31511 return REG_P (XEXP (op, 0));
31512 default:
31513 gcc_unreachable ();
31514 }
31515 return false;
31516 }
31517
31518 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31519
31520 In VFPv1, VFP registers could only be accessed in the mode they were
31521 set, so subregs would be invalid there. However, we don't support
31522 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31523
31524 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31525 VFP registers in little-endian order. We can't describe that accurately to
31526 GCC, so avoid taking subregs of such values.
31527
31528 The only exception is going from a 128-bit to a 64-bit type. In that
31529 case the data layout happens to be consistent for big-endian, so we
31530 explicitly allow that case. */
31531
31532 static bool
31533 arm_can_change_mode_class (machine_mode from, machine_mode to,
31534 reg_class_t rclass)
31535 {
31536 if (TARGET_BIG_END
31537 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31538 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31539 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31540 && reg_classes_intersect_p (VFP_REGS, rclass))
31541 return false;
31542 return true;
31543 }
31544
31545 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31546 strcpy from constants will be faster. */
31547
31548 static HOST_WIDE_INT
31549 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31550 {
31551 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31552 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31553 return MAX (align, BITS_PER_WORD * factor);
31554 return align;
31555 }
31556
31557 #if CHECKING_P
31558 namespace selftest {
31559
31560 /* Scan the static data tables generated by parsecpu.awk looking for
31561 potential issues with the data. We primarily check for
31562 inconsistencies in the option extensions at present (extensions
31563 that duplicate others but aren't marked as aliases). Furthermore,
31564 for correct canonicalization later options must never be a subset
31565 of an earlier option. Any extension should also only specify other
31566 feature bits and never an architecture bit. The architecture is inferred
31567 from the declaration of the extension. */
31568 static void
31569 arm_test_cpu_arch_data (void)
31570 {
31571 const arch_option *arch;
31572 const cpu_option *cpu;
31573 auto_sbitmap target_isa (isa_num_bits);
31574 auto_sbitmap isa1 (isa_num_bits);
31575 auto_sbitmap isa2 (isa_num_bits);
31576
31577 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31578 {
31579 const cpu_arch_extension *ext1, *ext2;
31580
31581 if (arch->common.extensions == NULL)
31582 continue;
31583
31584 arm_initialize_isa (target_isa, arch->common.isa_bits);
31585
31586 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31587 {
31588 if (ext1->alias)
31589 continue;
31590
31591 arm_initialize_isa (isa1, ext1->isa_bits);
31592 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31593 {
31594 if (ext2->alias || ext1->remove != ext2->remove)
31595 continue;
31596
31597 arm_initialize_isa (isa2, ext2->isa_bits);
31598 /* If the option is a subset of the parent option, it doesn't
31599 add anything and so isn't useful. */
31600 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31601
31602 /* If the extension specifies any architectural bits then
31603 disallow it. Extensions should only specify feature bits. */
31604 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31605 }
31606 }
31607 }
31608
31609 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31610 {
31611 const cpu_arch_extension *ext1, *ext2;
31612
31613 if (cpu->common.extensions == NULL)
31614 continue;
31615
31616 arm_initialize_isa (target_isa, arch->common.isa_bits);
31617
31618 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31619 {
31620 if (ext1->alias)
31621 continue;
31622
31623 arm_initialize_isa (isa1, ext1->isa_bits);
31624 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31625 {
31626 if (ext2->alias || ext1->remove != ext2->remove)
31627 continue;
31628
31629 arm_initialize_isa (isa2, ext2->isa_bits);
31630 /* If the option is a subset of the parent option, it doesn't
31631 add anything and so isn't useful. */
31632 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31633
31634 /* If the extension specifies any architectural bits then
31635 disallow it. Extensions should only specify feature bits. */
31636 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31637 }
31638 }
31639 }
31640 }
31641
31642 /* Scan the static data tables generated by parsecpu.awk looking for
31643 potential issues with the data. Here we check for consistency between the
31644 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31645 a feature bit that is not defined by any FPU flag. */
31646 static void
31647 arm_test_fpu_data (void)
31648 {
31649 auto_sbitmap isa_all_fpubits (isa_num_bits);
31650 auto_sbitmap fpubits (isa_num_bits);
31651 auto_sbitmap tmpset (isa_num_bits);
31652
31653 static const enum isa_feature fpu_bitlist[]
31654 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31655 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31656
31657 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31658 {
31659 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31660 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31661 bitmap_clear (isa_all_fpubits);
31662 bitmap_copy (isa_all_fpubits, tmpset);
31663 }
31664
31665 if (!bitmap_empty_p (isa_all_fpubits))
31666 {
31667 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31668 " group that are not defined by any FPU.\n"
31669 " Check your arm-cpus.in.\n");
31670 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31671 }
31672 }
31673
31674 static void
31675 arm_run_selftests (void)
31676 {
31677 arm_test_cpu_arch_data ();
31678 arm_test_fpu_data ();
31679 }
31680 } /* Namespace selftest. */
31681
31682 #undef TARGET_RUN_TARGET_SELFTESTS
31683 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31684 #endif /* CHECKING_P */
31685
31686 struct gcc_target targetm = TARGET_INITIALIZER;
31687
31688 #include "gt-arm.h"