]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
Fix PR87374: ICE with -mslow-flash-data and -mword-relocations
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2018 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
80
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
191 tree, bool);
192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
193 const_tree, bool);
194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
195 const_tree, bool);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 machine_mode, const_tree, bool);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
232
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 const_tree type,
286 int misalignment,
287 bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static void arm_autovectorize_vector_sizes (vector_sizes *);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 const vec_perm_indices &);
299
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
310
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 \f
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
331 {
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
343 NULL },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
346 NULL },
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute, NULL },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
356
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
360 */
361 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
362 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute, NULL },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
367 NULL },
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute, NULL },
372 #endif
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry, NULL },
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call, NULL },
378 { NULL, 0, 0, false, false, false, false, NULL, NULL }
379 };
380 \f
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
386
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
389
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
392
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
395
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
400
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
405
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
412
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
415
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
418
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
421
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
424
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
427
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
430
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
433
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
436
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
439
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
442
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
445
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
448
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
451
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
454
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
457
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
460
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
463
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
466
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
469
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
472
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
475
476 #undef TARGET_ENCODE_SECTION_INFO
477 #ifdef ARM_PE
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
479 #else
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
481 #endif
482
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
485
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
488
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
491
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
494
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
497
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
500
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
503
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
513
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
525
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
528
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
535
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
538
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
555
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
558
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
561
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
568
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
571
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
574
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
577
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
580
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
583
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
586
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
589
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
592
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
595
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
598
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
601
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
605
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
608
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
611
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
614
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
617
618 #if ARM_UNWIND_INFO
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
621
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
625
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
628
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
631
632 #endif /* ARM_UNWIND_INFO */
633
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
636
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
639
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
642
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
646 #endif
647
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
650
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
653
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
656
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
659
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
665
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
668
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671 arm_first_cycle_multipass_dfa_lookahead
672
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675 arm_first_cycle_multipass_dfa_lookahead_guard
676
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
679
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
682
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
689
690 #ifdef HAVE_AS_TLS
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
693 #endif
694
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
697
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
700
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
703
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
706
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
709
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
712
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
715
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
718
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
721
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
724
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727 arm_builtin_vectorized_function
728
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
731
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734 arm_vector_alignment_reachable
735
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738 arm_builtin_support_vector_misalignment
739
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742 arm_preferred_rename_class
743
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
746
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749 arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
752
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755 arm_canonicalize_comparison
756
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
759
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
762
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
765
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
768
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
771
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
774
775 #undef TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
777
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
780
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
783
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
786
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
789
790 /* Although the architecture reserves bits 0 and 1, only the former is
791 used for ARM/Thumb ISA selection in v7 and earlier versions. */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
794
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
797
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
802
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
805
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
808
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
811 \f
812 /* Obstack for minipool constant handling. */
813 static struct obstack minipool_obstack;
814 static char * minipool_startobj;
815
816 /* The maximum number of insns skipped which
817 will be conditionalised if possible. */
818 static int max_insns_skipped = 5;
819
820 extern FILE * asm_out_file;
821
822 /* True if we are currently building a constant table. */
823 int making_const_table;
824
825 /* The processor for which instructions should be scheduled. */
826 enum processor_type arm_tune = TARGET_CPU_arm_none;
827
828 /* The current tuning set. */
829 const struct tune_params *current_tune;
830
831 /* Which floating point hardware to schedule for. */
832 int arm_fpu_attr;
833
834 /* Used for Thumb call_via trampolines. */
835 rtx thumb_call_via_label[14];
836 static int thumb_call_reg_needed;
837
838 /* The bits in this mask specify which instruction scheduling options should
839 be used. */
840 unsigned int tune_flags = 0;
841
842 /* The highest ARM architecture version supported by the
843 target. */
844 enum base_architecture arm_base_arch = BASE_ARCH_0;
845
846 /* Active target architecture and tuning. */
847
848 struct arm_build_target arm_active_target;
849
850 /* The following are used in the arm.md file as equivalents to bits
851 in the above two flag variables. */
852
853 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
854 int arm_arch4 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
857 int arm_arch4t = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
860 int arm_arch5t = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
863 int arm_arch5te = 0;
864
865 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
866 int arm_arch6 = 0;
867
868 /* Nonzero if this chip supports the ARM 6K extensions. */
869 int arm_arch6k = 0;
870
871 /* Nonzero if this chip supports the ARM 6KZ extensions. */
872 int arm_arch6kz = 0;
873
874 /* Nonzero if instructions present in ARMv6-M can be used. */
875 int arm_arch6m = 0;
876
877 /* Nonzero if this chip supports the ARM 7 extensions. */
878 int arm_arch7 = 0;
879
880 /* Nonzero if this chip supports the Large Physical Address Extension. */
881 int arm_arch_lpae = 0;
882
883 /* Nonzero if instructions not present in the 'M' profile can be used. */
884 int arm_arch_notm = 0;
885
886 /* Nonzero if instructions present in ARMv7E-M can be used. */
887 int arm_arch7em = 0;
888
889 /* Nonzero if instructions present in ARMv8 can be used. */
890 int arm_arch8 = 0;
891
892 /* Nonzero if this chip supports the ARMv8.1 extensions. */
893 int arm_arch8_1 = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
896 int arm_arch8_2 = 0;
897
898 /* Nonzero if this chip supports the FP16 instructions extension of ARM
899 Architecture 8.2. */
900 int arm_fp16_inst = 0;
901
902 /* Nonzero if this chip can benefit from load scheduling. */
903 int arm_ld_sched = 0;
904
905 /* Nonzero if this chip is a StrongARM. */
906 int arm_tune_strongarm = 0;
907
908 /* Nonzero if this chip supports Intel Wireless MMX technology. */
909 int arm_arch_iwmmxt = 0;
910
911 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
912 int arm_arch_iwmmxt2 = 0;
913
914 /* Nonzero if this chip is an XScale. */
915 int arm_arch_xscale = 0;
916
917 /* Nonzero if tuning for XScale */
918 int arm_tune_xscale = 0;
919
920 /* Nonzero if we want to tune for stores that access the write-buffer.
921 This typically means an ARM6 or ARM7 with MMU or MPU. */
922 int arm_tune_wbuf = 0;
923
924 /* Nonzero if tuning for Cortex-A9. */
925 int arm_tune_cortex_a9 = 0;
926
927 /* Nonzero if we should define __THUMB_INTERWORK__ in the
928 preprocessor.
929 XXX This is a bit of a hack, it's intended to help work around
930 problems in GLD which doesn't understand that armv5t code is
931 interworking clean. */
932 int arm_cpp_interwork = 0;
933
934 /* Nonzero if chip supports Thumb 1. */
935 int arm_arch_thumb1;
936
937 /* Nonzero if chip supports Thumb 2. */
938 int arm_arch_thumb2;
939
940 /* Nonzero if chip supports integer division instruction. */
941 int arm_arch_arm_hwdiv;
942 int arm_arch_thumb_hwdiv;
943
944 /* Nonzero if chip disallows volatile memory access in IT block. */
945 int arm_arch_no_volatile_ce;
946
947 /* Nonzero if we should use Neon to handle 64-bits operations rather
948 than core registers. */
949 int prefer_neon_for_64bits = 0;
950
951 /* Nonzero if we shouldn't use literal pools. */
952 bool arm_disable_literal_pool = false;
953
954 /* The register number to be used for the PIC offset register. */
955 unsigned arm_pic_register = INVALID_REGNUM;
956
957 enum arm_pcs arm_pcs_default;
958
959 /* For an explanation of these variables, see final_prescan_insn below. */
960 int arm_ccfsm_state;
961 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
962 enum arm_cond_code arm_current_cc;
963
964 rtx arm_target_insn;
965 int arm_target_label;
966 /* The number of conditionally executed insns, including the current insn. */
967 int arm_condexec_count = 0;
968 /* A bitmask specifying the patterns for the IT block.
969 Zero means do not output an IT block before this insn. */
970 int arm_condexec_mask = 0;
971 /* The number of bits used in arm_condexec_mask. */
972 int arm_condexec_masklen = 0;
973
974 /* Nonzero if chip supports the ARMv8 CRC instructions. */
975 int arm_arch_crc = 0;
976
977 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
978 int arm_arch_dotprod = 0;
979
980 /* Nonzero if chip supports the ARMv8-M security extensions. */
981 int arm_arch_cmse = 0;
982
983 /* Nonzero if the core has a very small, high-latency, multiply unit. */
984 int arm_m_profile_small_mul = 0;
985
986 /* The condition codes of the ARM, and the inverse function. */
987 static const char * const arm_condition_codes[] =
988 {
989 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
990 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
991 };
992
993 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
994 int arm_regs_in_sequence[] =
995 {
996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
997 };
998
999 #define ARM_LSL_NAME "lsl"
1000 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1001
1002 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1003 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1004 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1005 \f
1006 /* Initialization code. */
1007
1008 struct cpu_tune
1009 {
1010 enum processor_type scheduler;
1011 unsigned int tune_flags;
1012 const struct tune_params *tune;
1013 };
1014
1015 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1016 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1017 { \
1018 num_slots, \
1019 l1_size, \
1020 l1_line_size \
1021 }
1022
1023 /* arm generic vectorizer costs. */
1024 static const
1025 struct cpu_vec_costs arm_default_vec_cost = {
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 1, /* vec_unalign_load_cost. */
1034 1, /* vec_unalign_store_cost. */
1035 1, /* vec_store_cost. */
1036 3, /* cond_taken_branch_cost. */
1037 1, /* cond_not_taken_branch_cost. */
1038 };
1039
1040 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1041 #include "aarch-cost-tables.h"
1042
1043
1044
1045 const struct cpu_cost_table cortexa9_extra_costs =
1046 {
1047 /* ALU */
1048 {
1049 0, /* arith. */
1050 0, /* logical. */
1051 0, /* shift. */
1052 COSTS_N_INSNS (1), /* shift_reg. */
1053 COSTS_N_INSNS (1), /* arith_shift. */
1054 COSTS_N_INSNS (2), /* arith_shift_reg. */
1055 0, /* log_shift. */
1056 COSTS_N_INSNS (1), /* log_shift_reg. */
1057 COSTS_N_INSNS (1), /* extend. */
1058 COSTS_N_INSNS (2), /* extend_arith. */
1059 COSTS_N_INSNS (1), /* bfi. */
1060 COSTS_N_INSNS (1), /* bfx. */
1061 0, /* clz. */
1062 0, /* rev. */
1063 0, /* non_exec. */
1064 true /* non_exec_costs_exec. */
1065 },
1066 {
1067 /* MULT SImode */
1068 {
1069 COSTS_N_INSNS (3), /* simple. */
1070 COSTS_N_INSNS (3), /* flag_setting. */
1071 COSTS_N_INSNS (2), /* extend. */
1072 COSTS_N_INSNS (3), /* add. */
1073 COSTS_N_INSNS (2), /* extend_add. */
1074 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1075 },
1076 /* MULT DImode */
1077 {
1078 0, /* simple (N/A). */
1079 0, /* flag_setting (N/A). */
1080 COSTS_N_INSNS (4), /* extend. */
1081 0, /* add (N/A). */
1082 COSTS_N_INSNS (4), /* extend_add. */
1083 0 /* idiv (N/A). */
1084 }
1085 },
1086 /* LD/ST */
1087 {
1088 COSTS_N_INSNS (2), /* load. */
1089 COSTS_N_INSNS (2), /* load_sign_extend. */
1090 COSTS_N_INSNS (2), /* ldrd. */
1091 COSTS_N_INSNS (2), /* ldm_1st. */
1092 1, /* ldm_regs_per_insn_1st. */
1093 2, /* ldm_regs_per_insn_subsequent. */
1094 COSTS_N_INSNS (5), /* loadf. */
1095 COSTS_N_INSNS (5), /* loadd. */
1096 COSTS_N_INSNS (1), /* load_unaligned. */
1097 COSTS_N_INSNS (2), /* store. */
1098 COSTS_N_INSNS (2), /* strd. */
1099 COSTS_N_INSNS (2), /* stm_1st. */
1100 1, /* stm_regs_per_insn_1st. */
1101 2, /* stm_regs_per_insn_subsequent. */
1102 COSTS_N_INSNS (1), /* storef. */
1103 COSTS_N_INSNS (1), /* stored. */
1104 COSTS_N_INSNS (1), /* store_unaligned. */
1105 COSTS_N_INSNS (1), /* loadv. */
1106 COSTS_N_INSNS (1) /* storev. */
1107 },
1108 {
1109 /* FP SFmode */
1110 {
1111 COSTS_N_INSNS (14), /* div. */
1112 COSTS_N_INSNS (4), /* mult. */
1113 COSTS_N_INSNS (7), /* mult_addsub. */
1114 COSTS_N_INSNS (30), /* fma. */
1115 COSTS_N_INSNS (3), /* addsub. */
1116 COSTS_N_INSNS (1), /* fpconst. */
1117 COSTS_N_INSNS (1), /* neg. */
1118 COSTS_N_INSNS (3), /* compare. */
1119 COSTS_N_INSNS (3), /* widen. */
1120 COSTS_N_INSNS (3), /* narrow. */
1121 COSTS_N_INSNS (3), /* toint. */
1122 COSTS_N_INSNS (3), /* fromint. */
1123 COSTS_N_INSNS (3) /* roundint. */
1124 },
1125 /* FP DFmode */
1126 {
1127 COSTS_N_INSNS (24), /* div. */
1128 COSTS_N_INSNS (5), /* mult. */
1129 COSTS_N_INSNS (8), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (3), /* addsub. */
1132 COSTS_N_INSNS (1), /* fpconst. */
1133 COSTS_N_INSNS (1), /* neg. */
1134 COSTS_N_INSNS (3), /* compare. */
1135 COSTS_N_INSNS (3), /* widen. */
1136 COSTS_N_INSNS (3), /* narrow. */
1137 COSTS_N_INSNS (3), /* toint. */
1138 COSTS_N_INSNS (3), /* fromint. */
1139 COSTS_N_INSNS (3) /* roundint. */
1140 }
1141 },
1142 /* Vector */
1143 {
1144 COSTS_N_INSNS (1) /* alu. */
1145 }
1146 };
1147
1148 const struct cpu_cost_table cortexa8_extra_costs =
1149 {
1150 /* ALU */
1151 {
1152 0, /* arith. */
1153 0, /* logical. */
1154 COSTS_N_INSNS (1), /* shift. */
1155 0, /* shift_reg. */
1156 COSTS_N_INSNS (1), /* arith_shift. */
1157 0, /* arith_shift_reg. */
1158 COSTS_N_INSNS (1), /* log_shift. */
1159 0, /* log_shift_reg. */
1160 0, /* extend. */
1161 0, /* extend_arith. */
1162 0, /* bfi. */
1163 0, /* bfx. */
1164 0, /* clz. */
1165 0, /* rev. */
1166 0, /* non_exec. */
1167 true /* non_exec_costs_exec. */
1168 },
1169 {
1170 /* MULT SImode */
1171 {
1172 COSTS_N_INSNS (1), /* simple. */
1173 COSTS_N_INSNS (1), /* flag_setting. */
1174 COSTS_N_INSNS (1), /* extend. */
1175 COSTS_N_INSNS (1), /* add. */
1176 COSTS_N_INSNS (1), /* extend_add. */
1177 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1178 },
1179 /* MULT DImode */
1180 {
1181 0, /* simple (N/A). */
1182 0, /* flag_setting (N/A). */
1183 COSTS_N_INSNS (2), /* extend. */
1184 0, /* add (N/A). */
1185 COSTS_N_INSNS (2), /* extend_add. */
1186 0 /* idiv (N/A). */
1187 }
1188 },
1189 /* LD/ST */
1190 {
1191 COSTS_N_INSNS (1), /* load. */
1192 COSTS_N_INSNS (1), /* load_sign_extend. */
1193 COSTS_N_INSNS (1), /* ldrd. */
1194 COSTS_N_INSNS (1), /* ldm_1st. */
1195 1, /* ldm_regs_per_insn_1st. */
1196 2, /* ldm_regs_per_insn_subsequent. */
1197 COSTS_N_INSNS (1), /* loadf. */
1198 COSTS_N_INSNS (1), /* loadd. */
1199 COSTS_N_INSNS (1), /* load_unaligned. */
1200 COSTS_N_INSNS (1), /* store. */
1201 COSTS_N_INSNS (1), /* strd. */
1202 COSTS_N_INSNS (1), /* stm_1st. */
1203 1, /* stm_regs_per_insn_1st. */
1204 2, /* stm_regs_per_insn_subsequent. */
1205 COSTS_N_INSNS (1), /* storef. */
1206 COSTS_N_INSNS (1), /* stored. */
1207 COSTS_N_INSNS (1), /* store_unaligned. */
1208 COSTS_N_INSNS (1), /* loadv. */
1209 COSTS_N_INSNS (1) /* storev. */
1210 },
1211 {
1212 /* FP SFmode */
1213 {
1214 COSTS_N_INSNS (36), /* div. */
1215 COSTS_N_INSNS (11), /* mult. */
1216 COSTS_N_INSNS (20), /* mult_addsub. */
1217 COSTS_N_INSNS (30), /* fma. */
1218 COSTS_N_INSNS (9), /* addsub. */
1219 COSTS_N_INSNS (3), /* fpconst. */
1220 COSTS_N_INSNS (3), /* neg. */
1221 COSTS_N_INSNS (6), /* compare. */
1222 COSTS_N_INSNS (4), /* widen. */
1223 COSTS_N_INSNS (4), /* narrow. */
1224 COSTS_N_INSNS (8), /* toint. */
1225 COSTS_N_INSNS (8), /* fromint. */
1226 COSTS_N_INSNS (8) /* roundint. */
1227 },
1228 /* FP DFmode */
1229 {
1230 COSTS_N_INSNS (64), /* div. */
1231 COSTS_N_INSNS (16), /* mult. */
1232 COSTS_N_INSNS (25), /* mult_addsub. */
1233 COSTS_N_INSNS (30), /* fma. */
1234 COSTS_N_INSNS (9), /* addsub. */
1235 COSTS_N_INSNS (3), /* fpconst. */
1236 COSTS_N_INSNS (3), /* neg. */
1237 COSTS_N_INSNS (6), /* compare. */
1238 COSTS_N_INSNS (6), /* widen. */
1239 COSTS_N_INSNS (6), /* narrow. */
1240 COSTS_N_INSNS (8), /* toint. */
1241 COSTS_N_INSNS (8), /* fromint. */
1242 COSTS_N_INSNS (8) /* roundint. */
1243 }
1244 },
1245 /* Vector */
1246 {
1247 COSTS_N_INSNS (1) /* alu. */
1248 }
1249 };
1250
1251 const struct cpu_cost_table cortexa5_extra_costs =
1252 {
1253 /* ALU */
1254 {
1255 0, /* arith. */
1256 0, /* logical. */
1257 COSTS_N_INSNS (1), /* shift. */
1258 COSTS_N_INSNS (1), /* shift_reg. */
1259 COSTS_N_INSNS (1), /* arith_shift. */
1260 COSTS_N_INSNS (1), /* arith_shift_reg. */
1261 COSTS_N_INSNS (1), /* log_shift. */
1262 COSTS_N_INSNS (1), /* log_shift_reg. */
1263 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (1), /* extend_arith. */
1265 COSTS_N_INSNS (1), /* bfi. */
1266 COSTS_N_INSNS (1), /* bfx. */
1267 COSTS_N_INSNS (1), /* clz. */
1268 COSTS_N_INSNS (1), /* rev. */
1269 0, /* non_exec. */
1270 true /* non_exec_costs_exec. */
1271 },
1272
1273 {
1274 /* MULT SImode */
1275 {
1276 0, /* simple. */
1277 COSTS_N_INSNS (1), /* flag_setting. */
1278 COSTS_N_INSNS (1), /* extend. */
1279 COSTS_N_INSNS (1), /* add. */
1280 COSTS_N_INSNS (1), /* extend_add. */
1281 COSTS_N_INSNS (7) /* idiv. */
1282 },
1283 /* MULT DImode */
1284 {
1285 0, /* simple (N/A). */
1286 0, /* flag_setting (N/A). */
1287 COSTS_N_INSNS (1), /* extend. */
1288 0, /* add. */
1289 COSTS_N_INSNS (2), /* extend_add. */
1290 0 /* idiv (N/A). */
1291 }
1292 },
1293 /* LD/ST */
1294 {
1295 COSTS_N_INSNS (1), /* load. */
1296 COSTS_N_INSNS (1), /* load_sign_extend. */
1297 COSTS_N_INSNS (6), /* ldrd. */
1298 COSTS_N_INSNS (1), /* ldm_1st. */
1299 1, /* ldm_regs_per_insn_1st. */
1300 2, /* ldm_regs_per_insn_subsequent. */
1301 COSTS_N_INSNS (2), /* loadf. */
1302 COSTS_N_INSNS (4), /* loadd. */
1303 COSTS_N_INSNS (1), /* load_unaligned. */
1304 COSTS_N_INSNS (1), /* store. */
1305 COSTS_N_INSNS (3), /* strd. */
1306 COSTS_N_INSNS (1), /* stm_1st. */
1307 1, /* stm_regs_per_insn_1st. */
1308 2, /* stm_regs_per_insn_subsequent. */
1309 COSTS_N_INSNS (2), /* storef. */
1310 COSTS_N_INSNS (2), /* stored. */
1311 COSTS_N_INSNS (1), /* store_unaligned. */
1312 COSTS_N_INSNS (1), /* loadv. */
1313 COSTS_N_INSNS (1) /* storev. */
1314 },
1315 {
1316 /* FP SFmode */
1317 {
1318 COSTS_N_INSNS (15), /* div. */
1319 COSTS_N_INSNS (3), /* mult. */
1320 COSTS_N_INSNS (7), /* mult_addsub. */
1321 COSTS_N_INSNS (7), /* fma. */
1322 COSTS_N_INSNS (3), /* addsub. */
1323 COSTS_N_INSNS (3), /* fpconst. */
1324 COSTS_N_INSNS (3), /* neg. */
1325 COSTS_N_INSNS (3), /* compare. */
1326 COSTS_N_INSNS (3), /* widen. */
1327 COSTS_N_INSNS (3), /* narrow. */
1328 COSTS_N_INSNS (3), /* toint. */
1329 COSTS_N_INSNS (3), /* fromint. */
1330 COSTS_N_INSNS (3) /* roundint. */
1331 },
1332 /* FP DFmode */
1333 {
1334 COSTS_N_INSNS (30), /* div. */
1335 COSTS_N_INSNS (6), /* mult. */
1336 COSTS_N_INSNS (10), /* mult_addsub. */
1337 COSTS_N_INSNS (7), /* fma. */
1338 COSTS_N_INSNS (3), /* addsub. */
1339 COSTS_N_INSNS (3), /* fpconst. */
1340 COSTS_N_INSNS (3), /* neg. */
1341 COSTS_N_INSNS (3), /* compare. */
1342 COSTS_N_INSNS (3), /* widen. */
1343 COSTS_N_INSNS (3), /* narrow. */
1344 COSTS_N_INSNS (3), /* toint. */
1345 COSTS_N_INSNS (3), /* fromint. */
1346 COSTS_N_INSNS (3) /* roundint. */
1347 }
1348 },
1349 /* Vector */
1350 {
1351 COSTS_N_INSNS (1) /* alu. */
1352 }
1353 };
1354
1355
1356 const struct cpu_cost_table cortexa7_extra_costs =
1357 {
1358 /* ALU */
1359 {
1360 0, /* arith. */
1361 0, /* logical. */
1362 COSTS_N_INSNS (1), /* shift. */
1363 COSTS_N_INSNS (1), /* shift_reg. */
1364 COSTS_N_INSNS (1), /* arith_shift. */
1365 COSTS_N_INSNS (1), /* arith_shift_reg. */
1366 COSTS_N_INSNS (1), /* log_shift. */
1367 COSTS_N_INSNS (1), /* log_shift_reg. */
1368 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (1), /* extend_arith. */
1370 COSTS_N_INSNS (1), /* bfi. */
1371 COSTS_N_INSNS (1), /* bfx. */
1372 COSTS_N_INSNS (1), /* clz. */
1373 COSTS_N_INSNS (1), /* rev. */
1374 0, /* non_exec. */
1375 true /* non_exec_costs_exec. */
1376 },
1377
1378 {
1379 /* MULT SImode */
1380 {
1381 0, /* simple. */
1382 COSTS_N_INSNS (1), /* flag_setting. */
1383 COSTS_N_INSNS (1), /* extend. */
1384 COSTS_N_INSNS (1), /* add. */
1385 COSTS_N_INSNS (1), /* extend_add. */
1386 COSTS_N_INSNS (7) /* idiv. */
1387 },
1388 /* MULT DImode */
1389 {
1390 0, /* simple (N/A). */
1391 0, /* flag_setting (N/A). */
1392 COSTS_N_INSNS (1), /* extend. */
1393 0, /* add. */
1394 COSTS_N_INSNS (2), /* extend_add. */
1395 0 /* idiv (N/A). */
1396 }
1397 },
1398 /* LD/ST */
1399 {
1400 COSTS_N_INSNS (1), /* load. */
1401 COSTS_N_INSNS (1), /* load_sign_extend. */
1402 COSTS_N_INSNS (3), /* ldrd. */
1403 COSTS_N_INSNS (1), /* ldm_1st. */
1404 1, /* ldm_regs_per_insn_1st. */
1405 2, /* ldm_regs_per_insn_subsequent. */
1406 COSTS_N_INSNS (2), /* loadf. */
1407 COSTS_N_INSNS (2), /* loadd. */
1408 COSTS_N_INSNS (1), /* load_unaligned. */
1409 COSTS_N_INSNS (1), /* store. */
1410 COSTS_N_INSNS (3), /* strd. */
1411 COSTS_N_INSNS (1), /* stm_1st. */
1412 1, /* stm_regs_per_insn_1st. */
1413 2, /* stm_regs_per_insn_subsequent. */
1414 COSTS_N_INSNS (2), /* storef. */
1415 COSTS_N_INSNS (2), /* stored. */
1416 COSTS_N_INSNS (1), /* store_unaligned. */
1417 COSTS_N_INSNS (1), /* loadv. */
1418 COSTS_N_INSNS (1) /* storev. */
1419 },
1420 {
1421 /* FP SFmode */
1422 {
1423 COSTS_N_INSNS (15), /* div. */
1424 COSTS_N_INSNS (3), /* mult. */
1425 COSTS_N_INSNS (7), /* mult_addsub. */
1426 COSTS_N_INSNS (7), /* fma. */
1427 COSTS_N_INSNS (3), /* addsub. */
1428 COSTS_N_INSNS (3), /* fpconst. */
1429 COSTS_N_INSNS (3), /* neg. */
1430 COSTS_N_INSNS (3), /* compare. */
1431 COSTS_N_INSNS (3), /* widen. */
1432 COSTS_N_INSNS (3), /* narrow. */
1433 COSTS_N_INSNS (3), /* toint. */
1434 COSTS_N_INSNS (3), /* fromint. */
1435 COSTS_N_INSNS (3) /* roundint. */
1436 },
1437 /* FP DFmode */
1438 {
1439 COSTS_N_INSNS (30), /* div. */
1440 COSTS_N_INSNS (6), /* mult. */
1441 COSTS_N_INSNS (10), /* mult_addsub. */
1442 COSTS_N_INSNS (7), /* fma. */
1443 COSTS_N_INSNS (3), /* addsub. */
1444 COSTS_N_INSNS (3), /* fpconst. */
1445 COSTS_N_INSNS (3), /* neg. */
1446 COSTS_N_INSNS (3), /* compare. */
1447 COSTS_N_INSNS (3), /* widen. */
1448 COSTS_N_INSNS (3), /* narrow. */
1449 COSTS_N_INSNS (3), /* toint. */
1450 COSTS_N_INSNS (3), /* fromint. */
1451 COSTS_N_INSNS (3) /* roundint. */
1452 }
1453 },
1454 /* Vector */
1455 {
1456 COSTS_N_INSNS (1) /* alu. */
1457 }
1458 };
1459
1460 const struct cpu_cost_table cortexa12_extra_costs =
1461 {
1462 /* ALU */
1463 {
1464 0, /* arith. */
1465 0, /* logical. */
1466 0, /* shift. */
1467 COSTS_N_INSNS (1), /* shift_reg. */
1468 COSTS_N_INSNS (1), /* arith_shift. */
1469 COSTS_N_INSNS (1), /* arith_shift_reg. */
1470 COSTS_N_INSNS (1), /* log_shift. */
1471 COSTS_N_INSNS (1), /* log_shift_reg. */
1472 0, /* extend. */
1473 COSTS_N_INSNS (1), /* extend_arith. */
1474 0, /* bfi. */
1475 COSTS_N_INSNS (1), /* bfx. */
1476 COSTS_N_INSNS (1), /* clz. */
1477 COSTS_N_INSNS (1), /* rev. */
1478 0, /* non_exec. */
1479 true /* non_exec_costs_exec. */
1480 },
1481 /* MULT SImode */
1482 {
1483 {
1484 COSTS_N_INSNS (2), /* simple. */
1485 COSTS_N_INSNS (3), /* flag_setting. */
1486 COSTS_N_INSNS (2), /* extend. */
1487 COSTS_N_INSNS (3), /* add. */
1488 COSTS_N_INSNS (2), /* extend_add. */
1489 COSTS_N_INSNS (18) /* idiv. */
1490 },
1491 /* MULT DImode */
1492 {
1493 0, /* simple (N/A). */
1494 0, /* flag_setting (N/A). */
1495 COSTS_N_INSNS (3), /* extend. */
1496 0, /* add (N/A). */
1497 COSTS_N_INSNS (3), /* extend_add. */
1498 0 /* idiv (N/A). */
1499 }
1500 },
1501 /* LD/ST */
1502 {
1503 COSTS_N_INSNS (3), /* load. */
1504 COSTS_N_INSNS (3), /* load_sign_extend. */
1505 COSTS_N_INSNS (3), /* ldrd. */
1506 COSTS_N_INSNS (3), /* ldm_1st. */
1507 1, /* ldm_regs_per_insn_1st. */
1508 2, /* ldm_regs_per_insn_subsequent. */
1509 COSTS_N_INSNS (3), /* loadf. */
1510 COSTS_N_INSNS (3), /* loadd. */
1511 0, /* load_unaligned. */
1512 0, /* store. */
1513 0, /* strd. */
1514 0, /* stm_1st. */
1515 1, /* stm_regs_per_insn_1st. */
1516 2, /* stm_regs_per_insn_subsequent. */
1517 COSTS_N_INSNS (2), /* storef. */
1518 COSTS_N_INSNS (2), /* stored. */
1519 0, /* store_unaligned. */
1520 COSTS_N_INSNS (1), /* loadv. */
1521 COSTS_N_INSNS (1) /* storev. */
1522 },
1523 {
1524 /* FP SFmode */
1525 {
1526 COSTS_N_INSNS (17), /* div. */
1527 COSTS_N_INSNS (4), /* mult. */
1528 COSTS_N_INSNS (8), /* mult_addsub. */
1529 COSTS_N_INSNS (8), /* fma. */
1530 COSTS_N_INSNS (4), /* addsub. */
1531 COSTS_N_INSNS (2), /* fpconst. */
1532 COSTS_N_INSNS (2), /* neg. */
1533 COSTS_N_INSNS (2), /* compare. */
1534 COSTS_N_INSNS (4), /* widen. */
1535 COSTS_N_INSNS (4), /* narrow. */
1536 COSTS_N_INSNS (4), /* toint. */
1537 COSTS_N_INSNS (4), /* fromint. */
1538 COSTS_N_INSNS (4) /* roundint. */
1539 },
1540 /* FP DFmode */
1541 {
1542 COSTS_N_INSNS (31), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (2), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1555 }
1556 },
1557 /* Vector */
1558 {
1559 COSTS_N_INSNS (1) /* alu. */
1560 }
1561 };
1562
1563 const struct cpu_cost_table cortexa15_extra_costs =
1564 {
1565 /* ALU */
1566 {
1567 0, /* arith. */
1568 0, /* logical. */
1569 0, /* shift. */
1570 0, /* shift_reg. */
1571 COSTS_N_INSNS (1), /* arith_shift. */
1572 COSTS_N_INSNS (1), /* arith_shift_reg. */
1573 COSTS_N_INSNS (1), /* log_shift. */
1574 COSTS_N_INSNS (1), /* log_shift_reg. */
1575 0, /* extend. */
1576 COSTS_N_INSNS (1), /* extend_arith. */
1577 COSTS_N_INSNS (1), /* bfi. */
1578 0, /* bfx. */
1579 0, /* clz. */
1580 0, /* rev. */
1581 0, /* non_exec. */
1582 true /* non_exec_costs_exec. */
1583 },
1584 /* MULT SImode */
1585 {
1586 {
1587 COSTS_N_INSNS (2), /* simple. */
1588 COSTS_N_INSNS (3), /* flag_setting. */
1589 COSTS_N_INSNS (2), /* extend. */
1590 COSTS_N_INSNS (2), /* add. */
1591 COSTS_N_INSNS (2), /* extend_add. */
1592 COSTS_N_INSNS (18) /* idiv. */
1593 },
1594 /* MULT DImode */
1595 {
1596 0, /* simple (N/A). */
1597 0, /* flag_setting (N/A). */
1598 COSTS_N_INSNS (3), /* extend. */
1599 0, /* add (N/A). */
1600 COSTS_N_INSNS (3), /* extend_add. */
1601 0 /* idiv (N/A). */
1602 }
1603 },
1604 /* LD/ST */
1605 {
1606 COSTS_N_INSNS (3), /* load. */
1607 COSTS_N_INSNS (3), /* load_sign_extend. */
1608 COSTS_N_INSNS (3), /* ldrd. */
1609 COSTS_N_INSNS (4), /* ldm_1st. */
1610 1, /* ldm_regs_per_insn_1st. */
1611 2, /* ldm_regs_per_insn_subsequent. */
1612 COSTS_N_INSNS (4), /* loadf. */
1613 COSTS_N_INSNS (4), /* loadd. */
1614 0, /* load_unaligned. */
1615 0, /* store. */
1616 0, /* strd. */
1617 COSTS_N_INSNS (1), /* stm_1st. */
1618 1, /* stm_regs_per_insn_1st. */
1619 2, /* stm_regs_per_insn_subsequent. */
1620 0, /* storef. */
1621 0, /* stored. */
1622 0, /* store_unaligned. */
1623 COSTS_N_INSNS (1), /* loadv. */
1624 COSTS_N_INSNS (1) /* storev. */
1625 },
1626 {
1627 /* FP SFmode */
1628 {
1629 COSTS_N_INSNS (17), /* div. */
1630 COSTS_N_INSNS (4), /* mult. */
1631 COSTS_N_INSNS (8), /* mult_addsub. */
1632 COSTS_N_INSNS (8), /* fma. */
1633 COSTS_N_INSNS (4), /* addsub. */
1634 COSTS_N_INSNS (2), /* fpconst. */
1635 COSTS_N_INSNS (2), /* neg. */
1636 COSTS_N_INSNS (5), /* compare. */
1637 COSTS_N_INSNS (4), /* widen. */
1638 COSTS_N_INSNS (4), /* narrow. */
1639 COSTS_N_INSNS (4), /* toint. */
1640 COSTS_N_INSNS (4), /* fromint. */
1641 COSTS_N_INSNS (4) /* roundint. */
1642 },
1643 /* FP DFmode */
1644 {
1645 COSTS_N_INSNS (31), /* div. */
1646 COSTS_N_INSNS (4), /* mult. */
1647 COSTS_N_INSNS (8), /* mult_addsub. */
1648 COSTS_N_INSNS (8), /* fma. */
1649 COSTS_N_INSNS (4), /* addsub. */
1650 COSTS_N_INSNS (2), /* fpconst. */
1651 COSTS_N_INSNS (2), /* neg. */
1652 COSTS_N_INSNS (2), /* compare. */
1653 COSTS_N_INSNS (4), /* widen. */
1654 COSTS_N_INSNS (4), /* narrow. */
1655 COSTS_N_INSNS (4), /* toint. */
1656 COSTS_N_INSNS (4), /* fromint. */
1657 COSTS_N_INSNS (4) /* roundint. */
1658 }
1659 },
1660 /* Vector */
1661 {
1662 COSTS_N_INSNS (1) /* alu. */
1663 }
1664 };
1665
1666 const struct cpu_cost_table v7m_extra_costs =
1667 {
1668 /* ALU */
1669 {
1670 0, /* arith. */
1671 0, /* logical. */
1672 0, /* shift. */
1673 0, /* shift_reg. */
1674 0, /* arith_shift. */
1675 COSTS_N_INSNS (1), /* arith_shift_reg. */
1676 0, /* log_shift. */
1677 COSTS_N_INSNS (1), /* log_shift_reg. */
1678 0, /* extend. */
1679 COSTS_N_INSNS (1), /* extend_arith. */
1680 0, /* bfi. */
1681 0, /* bfx. */
1682 0, /* clz. */
1683 0, /* rev. */
1684 COSTS_N_INSNS (1), /* non_exec. */
1685 false /* non_exec_costs_exec. */
1686 },
1687 {
1688 /* MULT SImode */
1689 {
1690 COSTS_N_INSNS (1), /* simple. */
1691 COSTS_N_INSNS (1), /* flag_setting. */
1692 COSTS_N_INSNS (2), /* extend. */
1693 COSTS_N_INSNS (1), /* add. */
1694 COSTS_N_INSNS (3), /* extend_add. */
1695 COSTS_N_INSNS (8) /* idiv. */
1696 },
1697 /* MULT DImode */
1698 {
1699 0, /* simple (N/A). */
1700 0, /* flag_setting (N/A). */
1701 COSTS_N_INSNS (2), /* extend. */
1702 0, /* add (N/A). */
1703 COSTS_N_INSNS (3), /* extend_add. */
1704 0 /* idiv (N/A). */
1705 }
1706 },
1707 /* LD/ST */
1708 {
1709 COSTS_N_INSNS (2), /* load. */
1710 0, /* load_sign_extend. */
1711 COSTS_N_INSNS (3), /* ldrd. */
1712 COSTS_N_INSNS (2), /* ldm_1st. */
1713 1, /* ldm_regs_per_insn_1st. */
1714 1, /* ldm_regs_per_insn_subsequent. */
1715 COSTS_N_INSNS (2), /* loadf. */
1716 COSTS_N_INSNS (3), /* loadd. */
1717 COSTS_N_INSNS (1), /* load_unaligned. */
1718 COSTS_N_INSNS (2), /* store. */
1719 COSTS_N_INSNS (3), /* strd. */
1720 COSTS_N_INSNS (2), /* stm_1st. */
1721 1, /* stm_regs_per_insn_1st. */
1722 1, /* stm_regs_per_insn_subsequent. */
1723 COSTS_N_INSNS (2), /* storef. */
1724 COSTS_N_INSNS (3), /* stored. */
1725 COSTS_N_INSNS (1), /* store_unaligned. */
1726 COSTS_N_INSNS (1), /* loadv. */
1727 COSTS_N_INSNS (1) /* storev. */
1728 },
1729 {
1730 /* FP SFmode */
1731 {
1732 COSTS_N_INSNS (7), /* div. */
1733 COSTS_N_INSNS (2), /* mult. */
1734 COSTS_N_INSNS (5), /* mult_addsub. */
1735 COSTS_N_INSNS (3), /* fma. */
1736 COSTS_N_INSNS (1), /* addsub. */
1737 0, /* fpconst. */
1738 0, /* neg. */
1739 0, /* compare. */
1740 0, /* widen. */
1741 0, /* narrow. */
1742 0, /* toint. */
1743 0, /* fromint. */
1744 0 /* roundint. */
1745 },
1746 /* FP DFmode */
1747 {
1748 COSTS_N_INSNS (15), /* div. */
1749 COSTS_N_INSNS (5), /* mult. */
1750 COSTS_N_INSNS (7), /* mult_addsub. */
1751 COSTS_N_INSNS (7), /* fma. */
1752 COSTS_N_INSNS (3), /* addsub. */
1753 0, /* fpconst. */
1754 0, /* neg. */
1755 0, /* compare. */
1756 0, /* widen. */
1757 0, /* narrow. */
1758 0, /* toint. */
1759 0, /* fromint. */
1760 0 /* roundint. */
1761 }
1762 },
1763 /* Vector */
1764 {
1765 COSTS_N_INSNS (1) /* alu. */
1766 }
1767 };
1768
1769 const struct addr_mode_cost_table generic_addr_mode_costs =
1770 {
1771 /* int. */
1772 {
1773 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1774 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1775 COSTS_N_INSNS (0) /* AMO_WB. */
1776 },
1777 /* float. */
1778 {
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1782 },
1783 /* vector. */
1784 {
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1788 }
1789 };
1790
1791 const struct tune_params arm_slowmul_tune =
1792 {
1793 &generic_extra_costs, /* Insn extra costs. */
1794 &generic_addr_mode_costs, /* Addressing mode costs. */
1795 NULL, /* Sched adj cost. */
1796 arm_default_branch_cost,
1797 &arm_default_vec_cost,
1798 3, /* Constant limit. */
1799 5, /* Max cond insns. */
1800 8, /* Memset max inline. */
1801 1, /* Issue rate. */
1802 ARM_PREFETCH_NOT_BENEFICIAL,
1803 tune_params::PREF_CONST_POOL_TRUE,
1804 tune_params::PREF_LDRD_FALSE,
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1807 tune_params::DISPARAGE_FLAGS_NEITHER,
1808 tune_params::PREF_NEON_64_FALSE,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE,
1810 tune_params::FUSE_NOTHING,
1811 tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_fastmul_tune =
1815 {
1816 &generic_extra_costs, /* Insn extra costs. */
1817 &generic_addr_mode_costs, /* Addressing mode costs. */
1818 NULL, /* Sched adj cost. */
1819 arm_default_branch_cost,
1820 &arm_default_vec_cost,
1821 1, /* Constant limit. */
1822 5, /* Max cond insns. */
1823 8, /* Memset max inline. */
1824 1, /* Issue rate. */
1825 ARM_PREFETCH_NOT_BENEFICIAL,
1826 tune_params::PREF_CONST_POOL_TRUE,
1827 tune_params::PREF_LDRD_FALSE,
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1830 tune_params::DISPARAGE_FLAGS_NEITHER,
1831 tune_params::PREF_NEON_64_FALSE,
1832 tune_params::PREF_NEON_STRINGOPS_FALSE,
1833 tune_params::FUSE_NOTHING,
1834 tune_params::SCHED_AUTOPREF_OFF
1835 };
1836
1837 /* StrongARM has early execution of branches, so a sequence that is worth
1838 skipping is shorter. Set max_insns_skipped to a lower value. */
1839
1840 const struct tune_params arm_strongarm_tune =
1841 {
1842 &generic_extra_costs, /* Insn extra costs. */
1843 &generic_addr_mode_costs, /* Addressing mode costs. */
1844 NULL, /* Sched adj cost. */
1845 arm_default_branch_cost,
1846 &arm_default_vec_cost,
1847 1, /* Constant limit. */
1848 3, /* Max cond insns. */
1849 8, /* Memset max inline. */
1850 1, /* Issue rate. */
1851 ARM_PREFETCH_NOT_BENEFICIAL,
1852 tune_params::PREF_CONST_POOL_TRUE,
1853 tune_params::PREF_LDRD_FALSE,
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1855 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1856 tune_params::DISPARAGE_FLAGS_NEITHER,
1857 tune_params::PREF_NEON_64_FALSE,
1858 tune_params::PREF_NEON_STRINGOPS_FALSE,
1859 tune_params::FUSE_NOTHING,
1860 tune_params::SCHED_AUTOPREF_OFF
1861 };
1862
1863 const struct tune_params arm_xscale_tune =
1864 {
1865 &generic_extra_costs, /* Insn extra costs. */
1866 &generic_addr_mode_costs, /* Addressing mode costs. */
1867 xscale_sched_adjust_cost,
1868 arm_default_branch_cost,
1869 &arm_default_vec_cost,
1870 2, /* Constant limit. */
1871 3, /* Max cond insns. */
1872 8, /* Memset max inline. */
1873 1, /* Issue rate. */
1874 ARM_PREFETCH_NOT_BENEFICIAL,
1875 tune_params::PREF_CONST_POOL_TRUE,
1876 tune_params::PREF_LDRD_FALSE,
1877 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1878 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1879 tune_params::DISPARAGE_FLAGS_NEITHER,
1880 tune_params::PREF_NEON_64_FALSE,
1881 tune_params::PREF_NEON_STRINGOPS_FALSE,
1882 tune_params::FUSE_NOTHING,
1883 tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 const struct tune_params arm_9e_tune =
1887 {
1888 &generic_extra_costs, /* Insn extra costs. */
1889 &generic_addr_mode_costs, /* Addressing mode costs. */
1890 NULL, /* Sched adj cost. */
1891 arm_default_branch_cost,
1892 &arm_default_vec_cost,
1893 1, /* Constant limit. */
1894 5, /* Max cond insns. */
1895 8, /* Memset max inline. */
1896 1, /* Issue rate. */
1897 ARM_PREFETCH_NOT_BENEFICIAL,
1898 tune_params::PREF_CONST_POOL_TRUE,
1899 tune_params::PREF_LDRD_FALSE,
1900 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1901 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1902 tune_params::DISPARAGE_FLAGS_NEITHER,
1903 tune_params::PREF_NEON_64_FALSE,
1904 tune_params::PREF_NEON_STRINGOPS_FALSE,
1905 tune_params::FUSE_NOTHING,
1906 tune_params::SCHED_AUTOPREF_OFF
1907 };
1908
1909 const struct tune_params arm_marvell_pj4_tune =
1910 {
1911 &generic_extra_costs, /* Insn extra costs. */
1912 &generic_addr_mode_costs, /* Addressing mode costs. */
1913 NULL, /* Sched adj cost. */
1914 arm_default_branch_cost,
1915 &arm_default_vec_cost,
1916 1, /* Constant limit. */
1917 5, /* Max cond insns. */
1918 8, /* Memset max inline. */
1919 2, /* Issue rate. */
1920 ARM_PREFETCH_NOT_BENEFICIAL,
1921 tune_params::PREF_CONST_POOL_TRUE,
1922 tune_params::PREF_LDRD_FALSE,
1923 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1925 tune_params::DISPARAGE_FLAGS_NEITHER,
1926 tune_params::PREF_NEON_64_FALSE,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE,
1928 tune_params::FUSE_NOTHING,
1929 tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 const struct tune_params arm_v6t2_tune =
1933 {
1934 &generic_extra_costs, /* Insn extra costs. */
1935 &generic_addr_mode_costs, /* Addressing mode costs. */
1936 NULL, /* Sched adj cost. */
1937 arm_default_branch_cost,
1938 &arm_default_vec_cost,
1939 1, /* Constant limit. */
1940 5, /* Max cond insns. */
1941 8, /* Memset max inline. */
1942 1, /* Issue rate. */
1943 ARM_PREFETCH_NOT_BENEFICIAL,
1944 tune_params::PREF_CONST_POOL_FALSE,
1945 tune_params::PREF_LDRD_FALSE,
1946 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1947 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1948 tune_params::DISPARAGE_FLAGS_NEITHER,
1949 tune_params::PREF_NEON_64_FALSE,
1950 tune_params::PREF_NEON_STRINGOPS_FALSE,
1951 tune_params::FUSE_NOTHING,
1952 tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955
1956 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1957 const struct tune_params arm_cortex_tune =
1958 {
1959 &generic_extra_costs,
1960 &generic_addr_mode_costs, /* Addressing mode costs. */
1961 NULL, /* Sched adj cost. */
1962 arm_default_branch_cost,
1963 &arm_default_vec_cost,
1964 1, /* Constant limit. */
1965 5, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 2, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 tune_params::PREF_CONST_POOL_FALSE,
1970 tune_params::PREF_LDRD_FALSE,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER,
1974 tune_params::PREF_NEON_64_FALSE,
1975 tune_params::PREF_NEON_STRINGOPS_FALSE,
1976 tune_params::FUSE_NOTHING,
1977 tune_params::SCHED_AUTOPREF_OFF
1978 };
1979
1980 const struct tune_params arm_cortex_a8_tune =
1981 {
1982 &cortexa8_extra_costs,
1983 &generic_addr_mode_costs, /* Addressing mode costs. */
1984 NULL, /* Sched adj cost. */
1985 arm_default_branch_cost,
1986 &arm_default_vec_cost,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL,
1992 tune_params::PREF_CONST_POOL_FALSE,
1993 tune_params::PREF_LDRD_FALSE,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER,
1997 tune_params::PREF_NEON_64_FALSE,
1998 tune_params::PREF_NEON_STRINGOPS_TRUE,
1999 tune_params::FUSE_NOTHING,
2000 tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_cortex_a7_tune =
2004 {
2005 &cortexa7_extra_costs,
2006 &generic_addr_mode_costs, /* Addressing mode costs. */
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_64_FALSE,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE,
2022 tune_params::FUSE_NOTHING,
2023 tune_params::SCHED_AUTOPREF_OFF
2024 };
2025
2026 const struct tune_params arm_cortex_a15_tune =
2027 {
2028 &cortexa15_extra_costs,
2029 &generic_addr_mode_costs, /* Addressing mode costs. */
2030 NULL, /* Sched adj cost. */
2031 arm_default_branch_cost,
2032 &arm_default_vec_cost,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL,
2038 tune_params::PREF_CONST_POOL_FALSE,
2039 tune_params::PREF_LDRD_TRUE,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL,
2043 tune_params::PREF_NEON_64_FALSE,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE,
2045 tune_params::FUSE_NOTHING,
2046 tune_params::SCHED_AUTOPREF_FULL
2047 };
2048
2049 const struct tune_params arm_cortex_a35_tune =
2050 {
2051 &cortexa53_extra_costs,
2052 &generic_addr_mode_costs, /* Addressing mode costs. */
2053 NULL, /* Sched adj cost. */
2054 arm_default_branch_cost,
2055 &arm_default_vec_cost,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 1, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL,
2061 tune_params::PREF_CONST_POOL_FALSE,
2062 tune_params::PREF_LDRD_FALSE,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER,
2066 tune_params::PREF_NEON_64_FALSE,
2067 tune_params::PREF_NEON_STRINGOPS_TRUE,
2068 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2069 tune_params::SCHED_AUTOPREF_OFF
2070 };
2071
2072 const struct tune_params arm_cortex_a53_tune =
2073 {
2074 &cortexa53_extra_costs,
2075 &generic_addr_mode_costs, /* Addressing mode costs. */
2076 NULL, /* Sched adj cost. */
2077 arm_default_branch_cost,
2078 &arm_default_vec_cost,
2079 1, /* Constant limit. */
2080 5, /* Max cond insns. */
2081 8, /* Memset max inline. */
2082 2, /* Issue rate. */
2083 ARM_PREFETCH_NOT_BENEFICIAL,
2084 tune_params::PREF_CONST_POOL_FALSE,
2085 tune_params::PREF_LDRD_FALSE,
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2087 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2088 tune_params::DISPARAGE_FLAGS_NEITHER,
2089 tune_params::PREF_NEON_64_FALSE,
2090 tune_params::PREF_NEON_STRINGOPS_TRUE,
2091 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2092 tune_params::SCHED_AUTOPREF_OFF
2093 };
2094
2095 const struct tune_params arm_cortex_a57_tune =
2096 {
2097 &cortexa57_extra_costs,
2098 &generic_addr_mode_costs, /* addressing mode costs */
2099 NULL, /* Sched adj cost. */
2100 arm_default_branch_cost,
2101 &arm_default_vec_cost,
2102 1, /* Constant limit. */
2103 2, /* Max cond insns. */
2104 8, /* Memset max inline. */
2105 3, /* Issue rate. */
2106 ARM_PREFETCH_NOT_BENEFICIAL,
2107 tune_params::PREF_CONST_POOL_FALSE,
2108 tune_params::PREF_LDRD_TRUE,
2109 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2111 tune_params::DISPARAGE_FLAGS_ALL,
2112 tune_params::PREF_NEON_64_FALSE,
2113 tune_params::PREF_NEON_STRINGOPS_TRUE,
2114 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2115 tune_params::SCHED_AUTOPREF_FULL
2116 };
2117
2118 const struct tune_params arm_exynosm1_tune =
2119 {
2120 &exynosm1_extra_costs,
2121 &generic_addr_mode_costs, /* Addressing mode costs. */
2122 NULL, /* Sched adj cost. */
2123 arm_default_branch_cost,
2124 &arm_default_vec_cost,
2125 1, /* Constant limit. */
2126 2, /* Max cond insns. */
2127 8, /* Memset max inline. */
2128 3, /* Issue rate. */
2129 ARM_PREFETCH_NOT_BENEFICIAL,
2130 tune_params::PREF_CONST_POOL_FALSE,
2131 tune_params::PREF_LDRD_TRUE,
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2133 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2134 tune_params::DISPARAGE_FLAGS_ALL,
2135 tune_params::PREF_NEON_64_FALSE,
2136 tune_params::PREF_NEON_STRINGOPS_TRUE,
2137 tune_params::FUSE_NOTHING,
2138 tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_xgene1_tune =
2142 {
2143 &xgene1_extra_costs,
2144 &generic_addr_mode_costs, /* Addressing mode costs. */
2145 NULL, /* Sched adj cost. */
2146 arm_default_branch_cost,
2147 &arm_default_vec_cost,
2148 1, /* Constant limit. */
2149 2, /* Max cond insns. */
2150 32, /* Memset max inline. */
2151 4, /* Issue rate. */
2152 ARM_PREFETCH_NOT_BENEFICIAL,
2153 tune_params::PREF_CONST_POOL_FALSE,
2154 tune_params::PREF_LDRD_TRUE,
2155 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2157 tune_params::DISPARAGE_FLAGS_ALL,
2158 tune_params::PREF_NEON_64_FALSE,
2159 tune_params::PREF_NEON_STRINGOPS_FALSE,
2160 tune_params::FUSE_NOTHING,
2161 tune_params::SCHED_AUTOPREF_OFF
2162 };
2163
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165 less appealing. Set max_insns_skipped to a low value. */
2166
2167 const struct tune_params arm_cortex_a5_tune =
2168 {
2169 &cortexa5_extra_costs,
2170 &generic_addr_mode_costs, /* Addressing mode costs. */
2171 NULL, /* Sched adj cost. */
2172 arm_cortex_a5_branch_cost,
2173 &arm_default_vec_cost,
2174 1, /* Constant limit. */
2175 1, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 2, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL,
2179 tune_params::PREF_CONST_POOL_FALSE,
2180 tune_params::PREF_LDRD_FALSE,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_NEITHER,
2184 tune_params::PREF_NEON_64_FALSE,
2185 tune_params::PREF_NEON_STRINGOPS_TRUE,
2186 tune_params::FUSE_NOTHING,
2187 tune_params::SCHED_AUTOPREF_OFF
2188 };
2189
2190 const struct tune_params arm_cortex_a9_tune =
2191 {
2192 &cortexa9_extra_costs,
2193 &generic_addr_mode_costs, /* Addressing mode costs. */
2194 cortex_a9_sched_adjust_cost,
2195 arm_default_branch_cost,
2196 &arm_default_vec_cost,
2197 1, /* Constant limit. */
2198 5, /* Max cond insns. */
2199 8, /* Memset max inline. */
2200 2, /* Issue rate. */
2201 ARM_PREFETCH_BENEFICIAL(4,32,32),
2202 tune_params::PREF_CONST_POOL_FALSE,
2203 tune_params::PREF_LDRD_FALSE,
2204 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2206 tune_params::DISPARAGE_FLAGS_NEITHER,
2207 tune_params::PREF_NEON_64_FALSE,
2208 tune_params::PREF_NEON_STRINGOPS_FALSE,
2209 tune_params::FUSE_NOTHING,
2210 tune_params::SCHED_AUTOPREF_OFF
2211 };
2212
2213 const struct tune_params arm_cortex_a12_tune =
2214 {
2215 &cortexa12_extra_costs,
2216 &generic_addr_mode_costs, /* Addressing mode costs. */
2217 NULL, /* Sched adj cost. */
2218 arm_default_branch_cost,
2219 &arm_default_vec_cost, /* Vectorizer costs. */
2220 1, /* Constant limit. */
2221 2, /* Max cond insns. */
2222 8, /* Memset max inline. */
2223 2, /* Issue rate. */
2224 ARM_PREFETCH_NOT_BENEFICIAL,
2225 tune_params::PREF_CONST_POOL_FALSE,
2226 tune_params::PREF_LDRD_TRUE,
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2228 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2229 tune_params::DISPARAGE_FLAGS_ALL,
2230 tune_params::PREF_NEON_64_FALSE,
2231 tune_params::PREF_NEON_STRINGOPS_TRUE,
2232 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2233 tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 const struct tune_params arm_cortex_a73_tune =
2237 {
2238 &cortexa57_extra_costs,
2239 &generic_addr_mode_costs, /* Addressing mode costs. */
2240 NULL, /* Sched adj cost. */
2241 arm_default_branch_cost,
2242 &arm_default_vec_cost, /* Vectorizer costs. */
2243 1, /* Constant limit. */
2244 2, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL,
2248 tune_params::PREF_CONST_POOL_FALSE,
2249 tune_params::PREF_LDRD_TRUE,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_ALL,
2253 tune_params::PREF_NEON_64_FALSE,
2254 tune_params::PREF_NEON_STRINGOPS_TRUE,
2255 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2256 tune_params::SCHED_AUTOPREF_FULL
2257 };
2258
2259 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2260 cycle to execute each. An LDR from the constant pool also takes two cycles
2261 to execute, but mildly increases pipelining opportunity (consecutive
2262 loads/stores can be pipelined together, saving one cycle), and may also
2263 improve icache utilisation. Hence we prefer the constant pool for such
2264 processors. */
2265
2266 const struct tune_params arm_v7m_tune =
2267 {
2268 &v7m_extra_costs,
2269 &generic_addr_mode_costs, /* Addressing mode costs. */
2270 NULL, /* Sched adj cost. */
2271 arm_cortex_m_branch_cost,
2272 &arm_default_vec_cost,
2273 1, /* Constant limit. */
2274 2, /* Max cond insns. */
2275 8, /* Memset max inline. */
2276 1, /* Issue rate. */
2277 ARM_PREFETCH_NOT_BENEFICIAL,
2278 tune_params::PREF_CONST_POOL_TRUE,
2279 tune_params::PREF_LDRD_FALSE,
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2281 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2282 tune_params::DISPARAGE_FLAGS_NEITHER,
2283 tune_params::PREF_NEON_64_FALSE,
2284 tune_params::PREF_NEON_STRINGOPS_FALSE,
2285 tune_params::FUSE_NOTHING,
2286 tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 /* Cortex-M7 tuning. */
2290
2291 const struct tune_params arm_cortex_m7_tune =
2292 {
2293 &v7m_extra_costs,
2294 &generic_addr_mode_costs, /* Addressing mode costs. */
2295 NULL, /* Sched adj cost. */
2296 arm_cortex_m7_branch_cost,
2297 &arm_default_vec_cost,
2298 0, /* Constant limit. */
2299 1, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL,
2303 tune_params::PREF_CONST_POOL_TRUE,
2304 tune_params::PREF_LDRD_FALSE,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER,
2308 tune_params::PREF_NEON_64_FALSE,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE,
2310 tune_params::FUSE_NOTHING,
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2315 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2316 cortex-m23. */
2317 const struct tune_params arm_v6m_tune =
2318 {
2319 &generic_extra_costs, /* Insn extra costs. */
2320 &generic_addr_mode_costs, /* Addressing mode costs. */
2321 NULL, /* Sched adj cost. */
2322 arm_default_branch_cost,
2323 &arm_default_vec_cost, /* Vectorizer costs. */
2324 1, /* Constant limit. */
2325 5, /* Max cond insns. */
2326 8, /* Memset max inline. */
2327 1, /* Issue rate. */
2328 ARM_PREFETCH_NOT_BENEFICIAL,
2329 tune_params::PREF_CONST_POOL_FALSE,
2330 tune_params::PREF_LDRD_FALSE,
2331 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2332 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2333 tune_params::DISPARAGE_FLAGS_NEITHER,
2334 tune_params::PREF_NEON_64_FALSE,
2335 tune_params::PREF_NEON_STRINGOPS_FALSE,
2336 tune_params::FUSE_NOTHING,
2337 tune_params::SCHED_AUTOPREF_OFF
2338 };
2339
2340 const struct tune_params arm_fa726te_tune =
2341 {
2342 &generic_extra_costs, /* Insn extra costs. */
2343 &generic_addr_mode_costs, /* Addressing mode costs. */
2344 fa726te_sched_adjust_cost,
2345 arm_default_branch_cost,
2346 &arm_default_vec_cost,
2347 1, /* Constant limit. */
2348 5, /* Max cond insns. */
2349 8, /* Memset max inline. */
2350 2, /* Issue rate. */
2351 ARM_PREFETCH_NOT_BENEFICIAL,
2352 tune_params::PREF_CONST_POOL_TRUE,
2353 tune_params::PREF_LDRD_FALSE,
2354 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2355 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2356 tune_params::DISPARAGE_FLAGS_NEITHER,
2357 tune_params::PREF_NEON_64_FALSE,
2358 tune_params::PREF_NEON_STRINGOPS_FALSE,
2359 tune_params::FUSE_NOTHING,
2360 tune_params::SCHED_AUTOPREF_OFF
2361 };
2362
2363 /* Auto-generated CPU, FPU and architecture tables. */
2364 #include "arm-cpu-data.h"
2365
2366 /* The name of the preprocessor macro to define for this architecture. PROFILE
2367 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2368 is thus chosen to be big enough to hold the longest architecture name. */
2369
2370 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2371
2372 /* Supported TLS relocations. */
2373
2374 enum tls_reloc {
2375 TLS_GD32,
2376 TLS_LDM32,
2377 TLS_LDO32,
2378 TLS_IE32,
2379 TLS_LE32,
2380 TLS_DESCSEQ /* GNU scheme */
2381 };
2382
2383 /* The maximum number of insns to be used when loading a constant. */
2384 inline static int
2385 arm_constant_limit (bool size_p)
2386 {
2387 return size_p ? 1 : current_tune->constant_limit;
2388 }
2389
2390 /* Emit an insn that's a simple single-set. Both the operands must be known
2391 to be valid. */
2392 inline static rtx_insn *
2393 emit_set_insn (rtx x, rtx y)
2394 {
2395 return emit_insn (gen_rtx_SET (x, y));
2396 }
2397
2398 /* Return the number of bits set in VALUE. */
2399 static unsigned
2400 bit_count (unsigned long value)
2401 {
2402 unsigned long count = 0;
2403
2404 while (value)
2405 {
2406 count++;
2407 value &= value - 1; /* Clear the least-significant set bit. */
2408 }
2409
2410 return count;
2411 }
2412
2413 /* Return the number of bits set in BMAP. */
2414 static unsigned
2415 bitmap_popcount (const sbitmap bmap)
2416 {
2417 unsigned int count = 0;
2418 unsigned int n = 0;
2419 sbitmap_iterator sbi;
2420
2421 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2422 count++;
2423 return count;
2424 }
2425
2426 typedef struct
2427 {
2428 machine_mode mode;
2429 const char *name;
2430 } arm_fixed_mode_set;
2431
2432 /* A small helper for setting fixed-point library libfuncs. */
2433
2434 static void
2435 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2436 const char *funcname, const char *modename,
2437 int num_suffix)
2438 {
2439 char buffer[50];
2440
2441 if (num_suffix == 0)
2442 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2443 else
2444 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2445
2446 set_optab_libfunc (optable, mode, buffer);
2447 }
2448
2449 static void
2450 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2451 machine_mode from, const char *funcname,
2452 const char *toname, const char *fromname)
2453 {
2454 char buffer[50];
2455 const char *maybe_suffix_2 = "";
2456
2457 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2458 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2459 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2460 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2461 maybe_suffix_2 = "2";
2462
2463 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2464 maybe_suffix_2);
2465
2466 set_conv_libfunc (optable, to, from, buffer);
2467 }
2468
2469 static GTY(()) rtx speculation_barrier_libfunc;
2470
2471 /* Set up library functions unique to ARM. */
2472 static void
2473 arm_init_libfuncs (void)
2474 {
2475 /* For Linux, we have access to kernel support for atomic operations. */
2476 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2477 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2478
2479 /* There are no special library functions unless we are using the
2480 ARM BPABI. */
2481 if (!TARGET_BPABI)
2482 return;
2483
2484 /* The functions below are described in Section 4 of the "Run-Time
2485 ABI for the ARM architecture", Version 1.0. */
2486
2487 /* Double-precision floating-point arithmetic. Table 2. */
2488 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2489 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2490 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2491 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2492 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2493
2494 /* Double-precision comparisons. Table 3. */
2495 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2496 set_optab_libfunc (ne_optab, DFmode, NULL);
2497 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2498 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2499 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2500 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2501 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2502
2503 /* Single-precision floating-point arithmetic. Table 4. */
2504 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2505 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2506 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2507 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2508 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2509
2510 /* Single-precision comparisons. Table 5. */
2511 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2512 set_optab_libfunc (ne_optab, SFmode, NULL);
2513 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2514 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2515 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2516 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2517 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2518
2519 /* Floating-point to integer conversions. Table 6. */
2520 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2521 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2522 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2523 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2524 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2525 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2526 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2527 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2528
2529 /* Conversions between floating types. Table 7. */
2530 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2531 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2532
2533 /* Integer to floating-point conversions. Table 8. */
2534 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2535 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2536 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2537 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2538 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2539 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2540 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2541 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2542
2543 /* Long long. Table 9. */
2544 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2545 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2546 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2547 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2548 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2549 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2550 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2551 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2552
2553 /* Integer (32/32->32) division. \S 4.3.1. */
2554 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2555 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2556
2557 /* The divmod functions are designed so that they can be used for
2558 plain division, even though they return both the quotient and the
2559 remainder. The quotient is returned in the usual location (i.e.,
2560 r0 for SImode, {r0, r1} for DImode), just as would be expected
2561 for an ordinary division routine. Because the AAPCS calling
2562 conventions specify that all of { r0, r1, r2, r3 } are
2563 callee-saved registers, there is no need to tell the compiler
2564 explicitly that those registers are clobbered by these
2565 routines. */
2566 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2567 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2568
2569 /* For SImode division the ABI provides div-without-mod routines,
2570 which are faster. */
2571 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2572 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2573
2574 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2575 divmod libcalls instead. */
2576 set_optab_libfunc (smod_optab, DImode, NULL);
2577 set_optab_libfunc (umod_optab, DImode, NULL);
2578 set_optab_libfunc (smod_optab, SImode, NULL);
2579 set_optab_libfunc (umod_optab, SImode, NULL);
2580
2581 /* Half-precision float operations. The compiler handles all operations
2582 with NULL libfuncs by converting the SFmode. */
2583 switch (arm_fp16_format)
2584 {
2585 case ARM_FP16_FORMAT_IEEE:
2586 case ARM_FP16_FORMAT_ALTERNATIVE:
2587
2588 /* Conversions. */
2589 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2590 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2591 ? "__gnu_f2h_ieee"
2592 : "__gnu_f2h_alternative"));
2593 set_conv_libfunc (sext_optab, SFmode, HFmode,
2594 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2595 ? "__gnu_h2f_ieee"
2596 : "__gnu_h2f_alternative"));
2597
2598 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2599 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2600 ? "__gnu_d2h_ieee"
2601 : "__gnu_d2h_alternative"));
2602
2603 /* Arithmetic. */
2604 set_optab_libfunc (add_optab, HFmode, NULL);
2605 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2606 set_optab_libfunc (smul_optab, HFmode, NULL);
2607 set_optab_libfunc (neg_optab, HFmode, NULL);
2608 set_optab_libfunc (sub_optab, HFmode, NULL);
2609
2610 /* Comparisons. */
2611 set_optab_libfunc (eq_optab, HFmode, NULL);
2612 set_optab_libfunc (ne_optab, HFmode, NULL);
2613 set_optab_libfunc (lt_optab, HFmode, NULL);
2614 set_optab_libfunc (le_optab, HFmode, NULL);
2615 set_optab_libfunc (ge_optab, HFmode, NULL);
2616 set_optab_libfunc (gt_optab, HFmode, NULL);
2617 set_optab_libfunc (unord_optab, HFmode, NULL);
2618 break;
2619
2620 default:
2621 break;
2622 }
2623
2624 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2625 {
2626 const arm_fixed_mode_set fixed_arith_modes[] =
2627 {
2628 { E_QQmode, "qq" },
2629 { E_UQQmode, "uqq" },
2630 { E_HQmode, "hq" },
2631 { E_UHQmode, "uhq" },
2632 { E_SQmode, "sq" },
2633 { E_USQmode, "usq" },
2634 { E_DQmode, "dq" },
2635 { E_UDQmode, "udq" },
2636 { E_TQmode, "tq" },
2637 { E_UTQmode, "utq" },
2638 { E_HAmode, "ha" },
2639 { E_UHAmode, "uha" },
2640 { E_SAmode, "sa" },
2641 { E_USAmode, "usa" },
2642 { E_DAmode, "da" },
2643 { E_UDAmode, "uda" },
2644 { E_TAmode, "ta" },
2645 { E_UTAmode, "uta" }
2646 };
2647 const arm_fixed_mode_set fixed_conv_modes[] =
2648 {
2649 { E_QQmode, "qq" },
2650 { E_UQQmode, "uqq" },
2651 { E_HQmode, "hq" },
2652 { E_UHQmode, "uhq" },
2653 { E_SQmode, "sq" },
2654 { E_USQmode, "usq" },
2655 { E_DQmode, "dq" },
2656 { E_UDQmode, "udq" },
2657 { E_TQmode, "tq" },
2658 { E_UTQmode, "utq" },
2659 { E_HAmode, "ha" },
2660 { E_UHAmode, "uha" },
2661 { E_SAmode, "sa" },
2662 { E_USAmode, "usa" },
2663 { E_DAmode, "da" },
2664 { E_UDAmode, "uda" },
2665 { E_TAmode, "ta" },
2666 { E_UTAmode, "uta" },
2667 { E_QImode, "qi" },
2668 { E_HImode, "hi" },
2669 { E_SImode, "si" },
2670 { E_DImode, "di" },
2671 { E_TImode, "ti" },
2672 { E_SFmode, "sf" },
2673 { E_DFmode, "df" }
2674 };
2675 unsigned int i, j;
2676
2677 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2678 {
2679 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2680 "add", fixed_arith_modes[i].name, 3);
2681 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2682 "ssadd", fixed_arith_modes[i].name, 3);
2683 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2684 "usadd", fixed_arith_modes[i].name, 3);
2685 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2686 "sub", fixed_arith_modes[i].name, 3);
2687 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2688 "sssub", fixed_arith_modes[i].name, 3);
2689 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2690 "ussub", fixed_arith_modes[i].name, 3);
2691 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2692 "mul", fixed_arith_modes[i].name, 3);
2693 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2694 "ssmul", fixed_arith_modes[i].name, 3);
2695 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2696 "usmul", fixed_arith_modes[i].name, 3);
2697 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2698 "div", fixed_arith_modes[i].name, 3);
2699 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2700 "udiv", fixed_arith_modes[i].name, 3);
2701 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2702 "ssdiv", fixed_arith_modes[i].name, 3);
2703 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2704 "usdiv", fixed_arith_modes[i].name, 3);
2705 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2706 "neg", fixed_arith_modes[i].name, 2);
2707 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2708 "ssneg", fixed_arith_modes[i].name, 2);
2709 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2710 "usneg", fixed_arith_modes[i].name, 2);
2711 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2712 "ashl", fixed_arith_modes[i].name, 3);
2713 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2714 "ashr", fixed_arith_modes[i].name, 3);
2715 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2716 "lshr", fixed_arith_modes[i].name, 3);
2717 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2718 "ssashl", fixed_arith_modes[i].name, 3);
2719 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2720 "usashl", fixed_arith_modes[i].name, 3);
2721 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2722 "cmp", fixed_arith_modes[i].name, 2);
2723 }
2724
2725 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2726 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2727 {
2728 if (i == j
2729 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2730 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2731 continue;
2732
2733 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2734 fixed_conv_modes[j].mode, "fract",
2735 fixed_conv_modes[i].name,
2736 fixed_conv_modes[j].name);
2737 arm_set_fixed_conv_libfunc (satfract_optab,
2738 fixed_conv_modes[i].mode,
2739 fixed_conv_modes[j].mode, "satfract",
2740 fixed_conv_modes[i].name,
2741 fixed_conv_modes[j].name);
2742 arm_set_fixed_conv_libfunc (fractuns_optab,
2743 fixed_conv_modes[i].mode,
2744 fixed_conv_modes[j].mode, "fractuns",
2745 fixed_conv_modes[i].name,
2746 fixed_conv_modes[j].name);
2747 arm_set_fixed_conv_libfunc (satfractuns_optab,
2748 fixed_conv_modes[i].mode,
2749 fixed_conv_modes[j].mode, "satfractuns",
2750 fixed_conv_modes[i].name,
2751 fixed_conv_modes[j].name);
2752 }
2753 }
2754
2755 if (TARGET_AAPCS_BASED)
2756 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2757
2758 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2759 }
2760
2761 /* On AAPCS systems, this is the "struct __va_list". */
2762 static GTY(()) tree va_list_type;
2763
2764 /* Return the type to use as __builtin_va_list. */
2765 static tree
2766 arm_build_builtin_va_list (void)
2767 {
2768 tree va_list_name;
2769 tree ap_field;
2770
2771 if (!TARGET_AAPCS_BASED)
2772 return std_build_builtin_va_list ();
2773
2774 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2775 defined as:
2776
2777 struct __va_list
2778 {
2779 void *__ap;
2780 };
2781
2782 The C Library ABI further reinforces this definition in \S
2783 4.1.
2784
2785 We must follow this definition exactly. The structure tag
2786 name is visible in C++ mangled names, and thus forms a part
2787 of the ABI. The field name may be used by people who
2788 #include <stdarg.h>. */
2789 /* Create the type. */
2790 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2791 /* Give it the required name. */
2792 va_list_name = build_decl (BUILTINS_LOCATION,
2793 TYPE_DECL,
2794 get_identifier ("__va_list"),
2795 va_list_type);
2796 DECL_ARTIFICIAL (va_list_name) = 1;
2797 TYPE_NAME (va_list_type) = va_list_name;
2798 TYPE_STUB_DECL (va_list_type) = va_list_name;
2799 /* Create the __ap field. */
2800 ap_field = build_decl (BUILTINS_LOCATION,
2801 FIELD_DECL,
2802 get_identifier ("__ap"),
2803 ptr_type_node);
2804 DECL_ARTIFICIAL (ap_field) = 1;
2805 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2806 TYPE_FIELDS (va_list_type) = ap_field;
2807 /* Compute its layout. */
2808 layout_type (va_list_type);
2809
2810 return va_list_type;
2811 }
2812
2813 /* Return an expression of type "void *" pointing to the next
2814 available argument in a variable-argument list. VALIST is the
2815 user-level va_list object, of type __builtin_va_list. */
2816 static tree
2817 arm_extract_valist_ptr (tree valist)
2818 {
2819 if (TREE_TYPE (valist) == error_mark_node)
2820 return error_mark_node;
2821
2822 /* On an AAPCS target, the pointer is stored within "struct
2823 va_list". */
2824 if (TARGET_AAPCS_BASED)
2825 {
2826 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2827 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2828 valist, ap_field, NULL_TREE);
2829 }
2830
2831 return valist;
2832 }
2833
2834 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2835 static void
2836 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2837 {
2838 valist = arm_extract_valist_ptr (valist);
2839 std_expand_builtin_va_start (valist, nextarg);
2840 }
2841
2842 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2843 static tree
2844 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2845 gimple_seq *post_p)
2846 {
2847 valist = arm_extract_valist_ptr (valist);
2848 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2849 }
2850
2851 /* Check any incompatible options that the user has specified. */
2852 static void
2853 arm_option_check_internal (struct gcc_options *opts)
2854 {
2855 int flags = opts->x_target_flags;
2856
2857 /* iWMMXt and NEON are incompatible. */
2858 if (TARGET_IWMMXT
2859 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2860 error ("iWMMXt and NEON are incompatible");
2861
2862 /* Make sure that the processor choice does not conflict with any of the
2863 other command line choices. */
2864 if (TARGET_ARM_P (flags)
2865 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2866 error ("target CPU does not support ARM mode");
2867
2868 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2869 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2870 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2871
2872 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2873 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2874
2875 /* If this target is normally configured to use APCS frames, warn if they
2876 are turned off and debugging is turned on. */
2877 if (TARGET_ARM_P (flags)
2878 && write_symbols != NO_DEBUG
2879 && !TARGET_APCS_FRAME
2880 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2881 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2882
2883 /* iWMMXt unsupported under Thumb mode. */
2884 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2885 error ("iWMMXt unsupported under Thumb mode");
2886
2887 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2888 error ("can not use -mtp=cp15 with 16-bit Thumb");
2889
2890 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2891 {
2892 error ("RTP PIC is incompatible with Thumb");
2893 flag_pic = 0;
2894 }
2895
2896 if (target_pure_code || target_slow_flash_data)
2897 {
2898 const char *flag = (target_pure_code ? "-mpure-code" :
2899 "-mslow-flash-data");
2900
2901 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2902 with MOVT. */
2903 if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2904 error ("%s only supports non-pic code on M-profile targets with the "
2905 "MOVT instruction", flag);
2906
2907 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2908 -mword-relocations forbids relocation of MOVT/MOVW. */
2909 if (target_word_relocations)
2910 error ("%s incompatible with -mword-relocations", flag);
2911 }
2912 }
2913
2914 /* Recompute the global settings depending on target attribute options. */
2915
2916 static void
2917 arm_option_params_internal (void)
2918 {
2919 /* If we are not using the default (ARM mode) section anchor offset
2920 ranges, then set the correct ranges now. */
2921 if (TARGET_THUMB1)
2922 {
2923 /* Thumb-1 LDR instructions cannot have negative offsets.
2924 Permissible positive offset ranges are 5-bit (for byte loads),
2925 6-bit (for halfword loads), or 7-bit (for word loads).
2926 Empirical results suggest a 7-bit anchor range gives the best
2927 overall code size. */
2928 targetm.min_anchor_offset = 0;
2929 targetm.max_anchor_offset = 127;
2930 }
2931 else if (TARGET_THUMB2)
2932 {
2933 /* The minimum is set such that the total size of the block
2934 for a particular anchor is 248 + 1 + 4095 bytes, which is
2935 divisible by eight, ensuring natural spacing of anchors. */
2936 targetm.min_anchor_offset = -248;
2937 targetm.max_anchor_offset = 4095;
2938 }
2939 else
2940 {
2941 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2942 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2943 }
2944
2945 /* Increase the number of conditional instructions with -Os. */
2946 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2947
2948 /* For THUMB2, we limit the conditional sequence to one IT block. */
2949 if (TARGET_THUMB2)
2950 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2951 }
2952
2953 /* True if -mflip-thumb should next add an attribute for the default
2954 mode, false if it should next add an attribute for the opposite mode. */
2955 static GTY(()) bool thumb_flipper;
2956
2957 /* Options after initial target override. */
2958 static GTY(()) tree init_optimize;
2959
2960 static void
2961 arm_override_options_after_change_1 (struct gcc_options *opts)
2962 {
2963 /* -falign-functions without argument: supply one. */
2964 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2965 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2966 && opts->x_optimize_size ? "2" : "4";
2967 }
2968
2969 /* Implement targetm.override_options_after_change. */
2970
2971 static void
2972 arm_override_options_after_change (void)
2973 {
2974 arm_configure_build_target (&arm_active_target,
2975 TREE_TARGET_OPTION (target_option_default_node),
2976 &global_options_set, false);
2977
2978 arm_override_options_after_change_1 (&global_options);
2979 }
2980
2981 /* Implement TARGET_OPTION_SAVE. */
2982 static void
2983 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2984 {
2985 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2986 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2987 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2988 }
2989
2990 /* Implement TARGET_OPTION_RESTORE. */
2991 static void
2992 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2993 {
2994 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2995 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2996 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2997 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2998 false);
2999 }
3000
3001 /* Reset options between modes that the user has specified. */
3002 static void
3003 arm_option_override_internal (struct gcc_options *opts,
3004 struct gcc_options *opts_set)
3005 {
3006 arm_override_options_after_change_1 (opts);
3007
3008 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3009 {
3010 /* The default is to enable interworking, so this warning message would
3011 be confusing to users who have just compiled with
3012 eg, -march=armv4. */
3013 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3014 opts->x_target_flags &= ~MASK_INTERWORK;
3015 }
3016
3017 if (TARGET_THUMB_P (opts->x_target_flags)
3018 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3019 {
3020 warning (0, "target CPU does not support THUMB instructions");
3021 opts->x_target_flags &= ~MASK_THUMB;
3022 }
3023
3024 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3025 {
3026 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3027 opts->x_target_flags &= ~MASK_APCS_FRAME;
3028 }
3029
3030 /* Callee super interworking implies thumb interworking. Adding
3031 this to the flags here simplifies the logic elsewhere. */
3032 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3033 opts->x_target_flags |= MASK_INTERWORK;
3034
3035 /* need to remember initial values so combinaisons of options like
3036 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3037 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3038
3039 if (! opts_set->x_arm_restrict_it)
3040 opts->x_arm_restrict_it = arm_arch8;
3041
3042 /* ARM execution state and M profile don't have [restrict] IT. */
3043 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3044 opts->x_arm_restrict_it = 0;
3045
3046 /* Enable -munaligned-access by default for
3047 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3048 i.e. Thumb2 and ARM state only.
3049 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3050 - ARMv8 architecture-base processors.
3051
3052 Disable -munaligned-access by default for
3053 - all pre-ARMv6 architecture-based processors
3054 - ARMv6-M architecture-based processors
3055 - ARMv8-M Baseline processors. */
3056
3057 if (! opts_set->x_unaligned_access)
3058 {
3059 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3060 && arm_arch6 && (arm_arch_notm || arm_arch7));
3061 }
3062 else if (opts->x_unaligned_access == 1
3063 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3064 {
3065 warning (0, "target CPU does not support unaligned accesses");
3066 opts->x_unaligned_access = 0;
3067 }
3068
3069 /* Don't warn since it's on by default in -O2. */
3070 if (TARGET_THUMB1_P (opts->x_target_flags))
3071 opts->x_flag_schedule_insns = 0;
3072 else
3073 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3074
3075 /* Disable shrink-wrap when optimizing function for size, since it tends to
3076 generate additional returns. */
3077 if (optimize_function_for_size_p (cfun)
3078 && TARGET_THUMB2_P (opts->x_target_flags))
3079 opts->x_flag_shrink_wrap = false;
3080 else
3081 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3082
3083 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3084 - epilogue_insns - does not accurately model the corresponding insns
3085 emitted in the asm file. In particular, see the comment in thumb_exit
3086 'Find out how many of the (return) argument registers we can corrupt'.
3087 As a consequence, the epilogue may clobber registers without fipa-ra
3088 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3089 TODO: Accurately model clobbers for epilogue_insns and reenable
3090 fipa-ra. */
3091 if (TARGET_THUMB1_P (opts->x_target_flags))
3092 opts->x_flag_ipa_ra = 0;
3093 else
3094 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3095
3096 /* Thumb2 inline assembly code should always use unified syntax.
3097 This will apply to ARM and Thumb1 eventually. */
3098 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3099
3100 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3101 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3102 #endif
3103 }
3104
3105 static sbitmap isa_all_fpubits;
3106 static sbitmap isa_quirkbits;
3107
3108 /* Configure a build target TARGET from the user-specified options OPTS and
3109 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3110 architecture have been specified, but the two are not identical. */
3111 void
3112 arm_configure_build_target (struct arm_build_target *target,
3113 struct cl_target_option *opts,
3114 struct gcc_options *opts_set,
3115 bool warn_compatible)
3116 {
3117 const cpu_option *arm_selected_tune = NULL;
3118 const arch_option *arm_selected_arch = NULL;
3119 const cpu_option *arm_selected_cpu = NULL;
3120 const arm_fpu_desc *arm_selected_fpu = NULL;
3121 const char *tune_opts = NULL;
3122 const char *arch_opts = NULL;
3123 const char *cpu_opts = NULL;
3124
3125 bitmap_clear (target->isa);
3126 target->core_name = NULL;
3127 target->arch_name = NULL;
3128
3129 if (opts_set->x_arm_arch_string)
3130 {
3131 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3132 "-march",
3133 opts->x_arm_arch_string);
3134 arch_opts = strchr (opts->x_arm_arch_string, '+');
3135 }
3136
3137 if (opts_set->x_arm_cpu_string)
3138 {
3139 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3140 opts->x_arm_cpu_string);
3141 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3142 arm_selected_tune = arm_selected_cpu;
3143 /* If taking the tuning from -mcpu, we don't need to rescan the
3144 options for tuning. */
3145 }
3146
3147 if (opts_set->x_arm_tune_string)
3148 {
3149 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3150 opts->x_arm_tune_string);
3151 tune_opts = strchr (opts->x_arm_tune_string, '+');
3152 }
3153
3154 if (arm_selected_arch)
3155 {
3156 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3157 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3158 arch_opts);
3159
3160 if (arm_selected_cpu)
3161 {
3162 auto_sbitmap cpu_isa (isa_num_bits);
3163 auto_sbitmap isa_delta (isa_num_bits);
3164
3165 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3166 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3167 cpu_opts);
3168 bitmap_xor (isa_delta, cpu_isa, target->isa);
3169 /* Ignore any bits that are quirk bits. */
3170 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3171 /* Ignore (for now) any bits that might be set by -mfpu. */
3172 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3173
3174 if (!bitmap_empty_p (isa_delta))
3175 {
3176 if (warn_compatible)
3177 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3178 arm_selected_cpu->common.name,
3179 arm_selected_arch->common.name);
3180 /* -march wins for code generation.
3181 -mcpu wins for default tuning. */
3182 if (!arm_selected_tune)
3183 arm_selected_tune = arm_selected_cpu;
3184
3185 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3186 target->arch_name = arm_selected_arch->common.name;
3187 }
3188 else
3189 {
3190 /* Architecture and CPU are essentially the same.
3191 Prefer the CPU setting. */
3192 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3193 target->core_name = arm_selected_cpu->common.name;
3194 /* Copy the CPU's capabilities, so that we inherit the
3195 appropriate extensions and quirks. */
3196 bitmap_copy (target->isa, cpu_isa);
3197 }
3198 }
3199 else
3200 {
3201 /* Pick a CPU based on the architecture. */
3202 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3203 target->arch_name = arm_selected_arch->common.name;
3204 /* Note: target->core_name is left unset in this path. */
3205 }
3206 }
3207 else if (arm_selected_cpu)
3208 {
3209 target->core_name = arm_selected_cpu->common.name;
3210 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3211 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3212 cpu_opts);
3213 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3214 }
3215 /* If the user did not specify a processor or architecture, choose
3216 one for them. */
3217 else
3218 {
3219 const cpu_option *sel;
3220 auto_sbitmap sought_isa (isa_num_bits);
3221 bitmap_clear (sought_isa);
3222 auto_sbitmap default_isa (isa_num_bits);
3223
3224 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3225 TARGET_CPU_DEFAULT);
3226 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3227 gcc_assert (arm_selected_cpu->common.name);
3228
3229 /* RWE: All of the selection logic below (to the end of this
3230 'if' clause) looks somewhat suspect. It appears to be mostly
3231 there to support forcing thumb support when the default CPU
3232 does not have thumb (somewhat dubious in terms of what the
3233 user might be expecting). I think it should be removed once
3234 support for the pre-thumb era cores is removed. */
3235 sel = arm_selected_cpu;
3236 arm_initialize_isa (default_isa, sel->common.isa_bits);
3237 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3238 cpu_opts);
3239
3240 /* Now check to see if the user has specified any command line
3241 switches that require certain abilities from the cpu. */
3242
3243 if (TARGET_INTERWORK || TARGET_THUMB)
3244 bitmap_set_bit (sought_isa, isa_bit_thumb);
3245
3246 /* If there are such requirements and the default CPU does not
3247 satisfy them, we need to run over the complete list of
3248 cores looking for one that is satisfactory. */
3249 if (!bitmap_empty_p (sought_isa)
3250 && !bitmap_subset_p (sought_isa, default_isa))
3251 {
3252 auto_sbitmap candidate_isa (isa_num_bits);
3253 /* We're only interested in a CPU with at least the
3254 capabilities of the default CPU and the required
3255 additional features. */
3256 bitmap_ior (default_isa, default_isa, sought_isa);
3257
3258 /* Try to locate a CPU type that supports all of the abilities
3259 of the default CPU, plus the extra abilities requested by
3260 the user. */
3261 for (sel = all_cores; sel->common.name != NULL; sel++)
3262 {
3263 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3264 /* An exact match? */
3265 if (bitmap_equal_p (default_isa, candidate_isa))
3266 break;
3267 }
3268
3269 if (sel->common.name == NULL)
3270 {
3271 unsigned current_bit_count = isa_num_bits;
3272 const cpu_option *best_fit = NULL;
3273
3274 /* Ideally we would like to issue an error message here
3275 saying that it was not possible to find a CPU compatible
3276 with the default CPU, but which also supports the command
3277 line options specified by the programmer, and so they
3278 ought to use the -mcpu=<name> command line option to
3279 override the default CPU type.
3280
3281 If we cannot find a CPU that has exactly the
3282 characteristics of the default CPU and the given
3283 command line options we scan the array again looking
3284 for a best match. The best match must have at least
3285 the capabilities of the perfect match. */
3286 for (sel = all_cores; sel->common.name != NULL; sel++)
3287 {
3288 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3289
3290 if (bitmap_subset_p (default_isa, candidate_isa))
3291 {
3292 unsigned count;
3293
3294 bitmap_and_compl (candidate_isa, candidate_isa,
3295 default_isa);
3296 count = bitmap_popcount (candidate_isa);
3297
3298 if (count < current_bit_count)
3299 {
3300 best_fit = sel;
3301 current_bit_count = count;
3302 }
3303 }
3304
3305 gcc_assert (best_fit);
3306 sel = best_fit;
3307 }
3308 }
3309 arm_selected_cpu = sel;
3310 }
3311
3312 /* Now we know the CPU, we can finally initialize the target
3313 structure. */
3314 target->core_name = arm_selected_cpu->common.name;
3315 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3316 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3317 cpu_opts);
3318 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3319 }
3320
3321 gcc_assert (arm_selected_cpu);
3322 gcc_assert (arm_selected_arch);
3323
3324 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3325 {
3326 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3327 auto_sbitmap fpu_bits (isa_num_bits);
3328
3329 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3330 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3331 bitmap_ior (target->isa, target->isa, fpu_bits);
3332 }
3333
3334 if (!arm_selected_tune)
3335 arm_selected_tune = arm_selected_cpu;
3336 else /* Validate the features passed to -mtune. */
3337 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3338
3339 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3340
3341 /* Finish initializing the target structure. */
3342 target->arch_pp_name = arm_selected_arch->arch;
3343 target->base_arch = arm_selected_arch->base_arch;
3344 target->profile = arm_selected_arch->profile;
3345
3346 target->tune_flags = tune_data->tune_flags;
3347 target->tune = tune_data->tune;
3348 target->tune_core = tune_data->scheduler;
3349 arm_option_reconfigure_globals ();
3350 }
3351
3352 /* Fix up any incompatible options that the user has specified. */
3353 static void
3354 arm_option_override (void)
3355 {
3356 static const enum isa_feature fpu_bitlist[]
3357 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3358 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3359 cl_target_option opts;
3360
3361 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3362 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3363
3364 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3365 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3366
3367 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3368
3369 if (!global_options_set.x_arm_fpu_index)
3370 {
3371 bool ok;
3372 int fpu_index;
3373
3374 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3375 CL_TARGET);
3376 gcc_assert (ok);
3377 arm_fpu_index = (enum fpu_type) fpu_index;
3378 }
3379
3380 cl_target_option_save (&opts, &global_options);
3381 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3382 true);
3383
3384 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3385 SUBTARGET_OVERRIDE_OPTIONS;
3386 #endif
3387
3388 /* Initialize boolean versions of the architectural flags, for use
3389 in the arm.md file and for enabling feature flags. */
3390 arm_option_reconfigure_globals ();
3391
3392 arm_tune = arm_active_target.tune_core;
3393 tune_flags = arm_active_target.tune_flags;
3394 current_tune = arm_active_target.tune;
3395
3396 /* TBD: Dwarf info for apcs frame is not handled yet. */
3397 if (TARGET_APCS_FRAME)
3398 flag_shrink_wrap = false;
3399
3400 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3401 {
3402 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3403 target_flags |= MASK_APCS_FRAME;
3404 }
3405
3406 if (TARGET_POKE_FUNCTION_NAME)
3407 target_flags |= MASK_APCS_FRAME;
3408
3409 if (TARGET_APCS_REENT && flag_pic)
3410 error ("-fpic and -mapcs-reent are incompatible");
3411
3412 if (TARGET_APCS_REENT)
3413 warning (0, "APCS reentrant code not supported. Ignored");
3414
3415 /* Set up some tuning parameters. */
3416 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3417 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3418 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3419 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3420 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3421 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3422
3423 /* For arm2/3 there is no need to do any scheduling if we are doing
3424 software floating-point. */
3425 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3426 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3427
3428 /* Override the default structure alignment for AAPCS ABI. */
3429 if (!global_options_set.x_arm_structure_size_boundary)
3430 {
3431 if (TARGET_AAPCS_BASED)
3432 arm_structure_size_boundary = 8;
3433 }
3434 else
3435 {
3436 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3437
3438 if (arm_structure_size_boundary != 8
3439 && arm_structure_size_boundary != 32
3440 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3441 {
3442 if (ARM_DOUBLEWORD_ALIGN)
3443 warning (0,
3444 "structure size boundary can only be set to 8, 32 or 64");
3445 else
3446 warning (0, "structure size boundary can only be set to 8 or 32");
3447 arm_structure_size_boundary
3448 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3449 }
3450 }
3451
3452 if (TARGET_VXWORKS_RTP)
3453 {
3454 if (!global_options_set.x_arm_pic_data_is_text_relative)
3455 arm_pic_data_is_text_relative = 0;
3456 }
3457 else if (flag_pic
3458 && !arm_pic_data_is_text_relative
3459 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3460 /* When text & data segments don't have a fixed displacement, the
3461 intended use is with a single, read only, pic base register.
3462 Unless the user explicitly requested not to do that, set
3463 it. */
3464 target_flags |= MASK_SINGLE_PIC_BASE;
3465
3466 /* If stack checking is disabled, we can use r10 as the PIC register,
3467 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3468 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3469 {
3470 if (TARGET_VXWORKS_RTP)
3471 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3472 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3473 }
3474
3475 if (flag_pic && TARGET_VXWORKS_RTP)
3476 arm_pic_register = 9;
3477
3478 if (arm_pic_register_string != NULL)
3479 {
3480 int pic_register = decode_reg_name (arm_pic_register_string);
3481
3482 if (!flag_pic)
3483 warning (0, "-mpic-register= is useless without -fpic");
3484
3485 /* Prevent the user from choosing an obviously stupid PIC register. */
3486 else if (pic_register < 0 || call_used_regs[pic_register]
3487 || pic_register == HARD_FRAME_POINTER_REGNUM
3488 || pic_register == STACK_POINTER_REGNUM
3489 || pic_register >= PC_REGNUM
3490 || (TARGET_VXWORKS_RTP
3491 && (unsigned int) pic_register != arm_pic_register))
3492 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3493 else
3494 arm_pic_register = pic_register;
3495 }
3496
3497 if (flag_pic)
3498 target_word_relocations = 1;
3499
3500 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3501 if (fix_cm3_ldrd == 2)
3502 {
3503 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3504 fix_cm3_ldrd = 1;
3505 else
3506 fix_cm3_ldrd = 0;
3507 }
3508
3509 /* Hot/Cold partitioning is not currently supported, since we can't
3510 handle literal pool placement in that case. */
3511 if (flag_reorder_blocks_and_partition)
3512 {
3513 inform (input_location,
3514 "-freorder-blocks-and-partition not supported on this architecture");
3515 flag_reorder_blocks_and_partition = 0;
3516 flag_reorder_blocks = 1;
3517 }
3518
3519 if (flag_pic)
3520 /* Hoisting PIC address calculations more aggressively provides a small,
3521 but measurable, size reduction for PIC code. Therefore, we decrease
3522 the bar for unrestricted expression hoisting to the cost of PIC address
3523 calculation, which is 2 instructions. */
3524 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3525 global_options.x_param_values,
3526 global_options_set.x_param_values);
3527
3528 /* ARM EABI defaults to strict volatile bitfields. */
3529 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3530 && abi_version_at_least(2))
3531 flag_strict_volatile_bitfields = 1;
3532
3533 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3534 have deemed it beneficial (signified by setting
3535 prefetch.num_slots to 1 or more). */
3536 if (flag_prefetch_loop_arrays < 0
3537 && HAVE_prefetch
3538 && optimize >= 3
3539 && current_tune->prefetch.num_slots > 0)
3540 flag_prefetch_loop_arrays = 1;
3541
3542 /* Set up parameters to be used in prefetching algorithm. Do not
3543 override the defaults unless we are tuning for a core we have
3544 researched values for. */
3545 if (current_tune->prefetch.num_slots > 0)
3546 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3547 current_tune->prefetch.num_slots,
3548 global_options.x_param_values,
3549 global_options_set.x_param_values);
3550 if (current_tune->prefetch.l1_cache_line_size >= 0)
3551 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3552 current_tune->prefetch.l1_cache_line_size,
3553 global_options.x_param_values,
3554 global_options_set.x_param_values);
3555 if (current_tune->prefetch.l1_cache_size >= 0)
3556 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3557 current_tune->prefetch.l1_cache_size,
3558 global_options.x_param_values,
3559 global_options_set.x_param_values);
3560
3561 /* Use Neon to perform 64-bits operations rather than core
3562 registers. */
3563 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3564 if (use_neon_for_64bits == 1)
3565 prefer_neon_for_64bits = true;
3566
3567 /* Use the alternative scheduling-pressure algorithm by default. */
3568 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3569 global_options.x_param_values,
3570 global_options_set.x_param_values);
3571
3572 /* Look through ready list and all of queue for instructions
3573 relevant for L2 auto-prefetcher. */
3574 int param_sched_autopref_queue_depth;
3575
3576 switch (current_tune->sched_autopref)
3577 {
3578 case tune_params::SCHED_AUTOPREF_OFF:
3579 param_sched_autopref_queue_depth = -1;
3580 break;
3581
3582 case tune_params::SCHED_AUTOPREF_RANK:
3583 param_sched_autopref_queue_depth = 0;
3584 break;
3585
3586 case tune_params::SCHED_AUTOPREF_FULL:
3587 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3588 break;
3589
3590 default:
3591 gcc_unreachable ();
3592 }
3593
3594 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3595 param_sched_autopref_queue_depth,
3596 global_options.x_param_values,
3597 global_options_set.x_param_values);
3598
3599 /* Currently, for slow flash data, we just disable literal pools. We also
3600 disable it for pure-code. */
3601 if (target_slow_flash_data || target_pure_code)
3602 arm_disable_literal_pool = true;
3603
3604 /* Disable scheduling fusion by default if it's not armv7 processor
3605 or doesn't prefer ldrd/strd. */
3606 if (flag_schedule_fusion == 2
3607 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3608 flag_schedule_fusion = 0;
3609
3610 /* Need to remember initial options before they are overriden. */
3611 init_optimize = build_optimization_node (&global_options);
3612
3613 arm_options_perform_arch_sanity_checks ();
3614 arm_option_override_internal (&global_options, &global_options_set);
3615 arm_option_check_internal (&global_options);
3616 arm_option_params_internal ();
3617
3618 /* Create the default target_options structure. */
3619 target_option_default_node = target_option_current_node
3620 = build_target_option_node (&global_options);
3621
3622 /* Register global variables with the garbage collector. */
3623 arm_add_gc_roots ();
3624
3625 /* Init initial mode for testing. */
3626 thumb_flipper = TARGET_THUMB;
3627 }
3628
3629
3630 /* Reconfigure global status flags from the active_target.isa. */
3631 void
3632 arm_option_reconfigure_globals (void)
3633 {
3634 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3635 arm_base_arch = arm_active_target.base_arch;
3636
3637 /* Initialize boolean versions of the architectural flags, for use
3638 in the arm.md file. */
3639 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3640 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3641 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3642 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3643 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3644 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3645 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3646 arm_arch6m = arm_arch6 && !arm_arch_notm;
3647 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3648 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3649 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3650 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3651 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3652 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3653 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3654 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3655 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3656 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3657 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3658 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3659 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3660 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3661 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3662 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3663 if (arm_fp16_inst)
3664 {
3665 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3666 error ("selected fp16 options are incompatible");
3667 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3668 }
3669
3670 /* And finally, set up some quirks. */
3671 arm_arch_no_volatile_ce
3672 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3673 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3674 isa_bit_quirk_armv6kz);
3675
3676 /* Use the cp15 method if it is available. */
3677 if (target_thread_pointer == TP_AUTO)
3678 {
3679 if (arm_arch6k && !TARGET_THUMB1)
3680 target_thread_pointer = TP_CP15;
3681 else
3682 target_thread_pointer = TP_SOFT;
3683 }
3684 }
3685
3686 /* Perform some validation between the desired architecture and the rest of the
3687 options. */
3688 void
3689 arm_options_perform_arch_sanity_checks (void)
3690 {
3691 /* V5T code we generate is completely interworking capable, so we turn off
3692 TARGET_INTERWORK here to avoid many tests later on. */
3693
3694 /* XXX However, we must pass the right pre-processor defines to CPP
3695 or GLD can get confused. This is a hack. */
3696 if (TARGET_INTERWORK)
3697 arm_cpp_interwork = 1;
3698
3699 if (arm_arch5t)
3700 target_flags &= ~MASK_INTERWORK;
3701
3702 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3703 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3704
3705 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3706 error ("iwmmxt abi requires an iwmmxt capable cpu");
3707
3708 /* BPABI targets use linker tricks to allow interworking on cores
3709 without thumb support. */
3710 if (TARGET_INTERWORK
3711 && !TARGET_BPABI
3712 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3713 {
3714 warning (0, "target CPU does not support interworking" );
3715 target_flags &= ~MASK_INTERWORK;
3716 }
3717
3718 /* If soft-float is specified then don't use FPU. */
3719 if (TARGET_SOFT_FLOAT)
3720 arm_fpu_attr = FPU_NONE;
3721 else
3722 arm_fpu_attr = FPU_VFP;
3723
3724 if (TARGET_AAPCS_BASED)
3725 {
3726 if (TARGET_CALLER_INTERWORKING)
3727 error ("AAPCS does not support -mcaller-super-interworking");
3728 else
3729 if (TARGET_CALLEE_INTERWORKING)
3730 error ("AAPCS does not support -mcallee-super-interworking");
3731 }
3732
3733 /* __fp16 support currently assumes the core has ldrh. */
3734 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3735 sorry ("__fp16 and no ldrh");
3736
3737 if (use_cmse && !arm_arch_cmse)
3738 error ("target CPU does not support ARMv8-M Security Extensions");
3739
3740 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3741 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3742 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3743 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3744
3745
3746 if (TARGET_AAPCS_BASED)
3747 {
3748 if (arm_abi == ARM_ABI_IWMMXT)
3749 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3750 else if (TARGET_HARD_FLOAT_ABI)
3751 {
3752 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3753 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3754 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3755 }
3756 else
3757 arm_pcs_default = ARM_PCS_AAPCS;
3758 }
3759 else
3760 {
3761 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3762 sorry ("-mfloat-abi=hard and VFP");
3763
3764 if (arm_abi == ARM_ABI_APCS)
3765 arm_pcs_default = ARM_PCS_APCS;
3766 else
3767 arm_pcs_default = ARM_PCS_ATPCS;
3768 }
3769 }
3770
3771 static void
3772 arm_add_gc_roots (void)
3773 {
3774 gcc_obstack_init(&minipool_obstack);
3775 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3776 }
3777 \f
3778 /* A table of known ARM exception types.
3779 For use with the interrupt function attribute. */
3780
3781 typedef struct
3782 {
3783 const char *const arg;
3784 const unsigned long return_value;
3785 }
3786 isr_attribute_arg;
3787
3788 static const isr_attribute_arg isr_attribute_args [] =
3789 {
3790 { "IRQ", ARM_FT_ISR },
3791 { "irq", ARM_FT_ISR },
3792 { "FIQ", ARM_FT_FIQ },
3793 { "fiq", ARM_FT_FIQ },
3794 { "ABORT", ARM_FT_ISR },
3795 { "abort", ARM_FT_ISR },
3796 { "ABORT", ARM_FT_ISR },
3797 { "abort", ARM_FT_ISR },
3798 { "UNDEF", ARM_FT_EXCEPTION },
3799 { "undef", ARM_FT_EXCEPTION },
3800 { "SWI", ARM_FT_EXCEPTION },
3801 { "swi", ARM_FT_EXCEPTION },
3802 { NULL, ARM_FT_NORMAL }
3803 };
3804
3805 /* Returns the (interrupt) function type of the current
3806 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3807
3808 static unsigned long
3809 arm_isr_value (tree argument)
3810 {
3811 const isr_attribute_arg * ptr;
3812 const char * arg;
3813
3814 if (!arm_arch_notm)
3815 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3816
3817 /* No argument - default to IRQ. */
3818 if (argument == NULL_TREE)
3819 return ARM_FT_ISR;
3820
3821 /* Get the value of the argument. */
3822 if (TREE_VALUE (argument) == NULL_TREE
3823 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3824 return ARM_FT_UNKNOWN;
3825
3826 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3827
3828 /* Check it against the list of known arguments. */
3829 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3830 if (streq (arg, ptr->arg))
3831 return ptr->return_value;
3832
3833 /* An unrecognized interrupt type. */
3834 return ARM_FT_UNKNOWN;
3835 }
3836
3837 /* Computes the type of the current function. */
3838
3839 static unsigned long
3840 arm_compute_func_type (void)
3841 {
3842 unsigned long type = ARM_FT_UNKNOWN;
3843 tree a;
3844 tree attr;
3845
3846 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3847
3848 /* Decide if the current function is volatile. Such functions
3849 never return, and many memory cycles can be saved by not storing
3850 register values that will never be needed again. This optimization
3851 was added to speed up context switching in a kernel application. */
3852 if (optimize > 0
3853 && (TREE_NOTHROW (current_function_decl)
3854 || !(flag_unwind_tables
3855 || (flag_exceptions
3856 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3857 && TREE_THIS_VOLATILE (current_function_decl))
3858 type |= ARM_FT_VOLATILE;
3859
3860 if (cfun->static_chain_decl != NULL)
3861 type |= ARM_FT_NESTED;
3862
3863 attr = DECL_ATTRIBUTES (current_function_decl);
3864
3865 a = lookup_attribute ("naked", attr);
3866 if (a != NULL_TREE)
3867 type |= ARM_FT_NAKED;
3868
3869 a = lookup_attribute ("isr", attr);
3870 if (a == NULL_TREE)
3871 a = lookup_attribute ("interrupt", attr);
3872
3873 if (a == NULL_TREE)
3874 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3875 else
3876 type |= arm_isr_value (TREE_VALUE (a));
3877
3878 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3879 type |= ARM_FT_CMSE_ENTRY;
3880
3881 return type;
3882 }
3883
3884 /* Returns the type of the current function. */
3885
3886 unsigned long
3887 arm_current_func_type (void)
3888 {
3889 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3890 cfun->machine->func_type = arm_compute_func_type ();
3891
3892 return cfun->machine->func_type;
3893 }
3894
3895 bool
3896 arm_allocate_stack_slots_for_args (void)
3897 {
3898 /* Naked functions should not allocate stack slots for arguments. */
3899 return !IS_NAKED (arm_current_func_type ());
3900 }
3901
3902 static bool
3903 arm_warn_func_return (tree decl)
3904 {
3905 /* Naked functions are implemented entirely in assembly, including the
3906 return sequence, so suppress warnings about this. */
3907 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3908 }
3909
3910 \f
3911 /* Output assembler code for a block containing the constant parts
3912 of a trampoline, leaving space for the variable parts.
3913
3914 On the ARM, (if r8 is the static chain regnum, and remembering that
3915 referencing pc adds an offset of 8) the trampoline looks like:
3916 ldr r8, [pc, #0]
3917 ldr pc, [pc]
3918 .word static chain value
3919 .word function's address
3920 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3921
3922 static void
3923 arm_asm_trampoline_template (FILE *f)
3924 {
3925 fprintf (f, "\t.syntax unified\n");
3926
3927 if (TARGET_ARM)
3928 {
3929 fprintf (f, "\t.arm\n");
3930 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3931 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3932 }
3933 else if (TARGET_THUMB2)
3934 {
3935 fprintf (f, "\t.thumb\n");
3936 /* The Thumb-2 trampoline is similar to the arm implementation.
3937 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3938 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3939 STATIC_CHAIN_REGNUM, PC_REGNUM);
3940 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3941 }
3942 else
3943 {
3944 ASM_OUTPUT_ALIGN (f, 2);
3945 fprintf (f, "\t.code\t16\n");
3946 fprintf (f, ".Ltrampoline_start:\n");
3947 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3948 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3949 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3950 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3951 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3952 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3953 }
3954 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3955 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3956 }
3957
3958 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3959
3960 static void
3961 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3962 {
3963 rtx fnaddr, mem, a_tramp;
3964
3965 emit_block_move (m_tramp, assemble_trampoline_template (),
3966 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3967
3968 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3969 emit_move_insn (mem, chain_value);
3970
3971 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3972 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3973 emit_move_insn (mem, fnaddr);
3974
3975 a_tramp = XEXP (m_tramp, 0);
3976 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3977 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3978 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3979 }
3980
3981 /* Thumb trampolines should be entered in thumb mode, so set
3982 the bottom bit of the address. */
3983
3984 static rtx
3985 arm_trampoline_adjust_address (rtx addr)
3986 {
3987 if (TARGET_THUMB)
3988 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3989 NULL, 0, OPTAB_LIB_WIDEN);
3990 return addr;
3991 }
3992 \f
3993 /* Return 1 if it is possible to return using a single instruction.
3994 If SIBLING is non-null, this is a test for a return before a sibling
3995 call. SIBLING is the call insn, so we can examine its register usage. */
3996
3997 int
3998 use_return_insn (int iscond, rtx sibling)
3999 {
4000 int regno;
4001 unsigned int func_type;
4002 unsigned long saved_int_regs;
4003 unsigned HOST_WIDE_INT stack_adjust;
4004 arm_stack_offsets *offsets;
4005
4006 /* Never use a return instruction before reload has run. */
4007 if (!reload_completed)
4008 return 0;
4009
4010 func_type = arm_current_func_type ();
4011
4012 /* Naked, volatile and stack alignment functions need special
4013 consideration. */
4014 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4015 return 0;
4016
4017 /* So do interrupt functions that use the frame pointer and Thumb
4018 interrupt functions. */
4019 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4020 return 0;
4021
4022 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4023 && !optimize_function_for_size_p (cfun))
4024 return 0;
4025
4026 offsets = arm_get_frame_offsets ();
4027 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4028
4029 /* As do variadic functions. */
4030 if (crtl->args.pretend_args_size
4031 || cfun->machine->uses_anonymous_args
4032 /* Or if the function calls __builtin_eh_return () */
4033 || crtl->calls_eh_return
4034 /* Or if the function calls alloca */
4035 || cfun->calls_alloca
4036 /* Or if there is a stack adjustment. However, if the stack pointer
4037 is saved on the stack, we can use a pre-incrementing stack load. */
4038 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4039 && stack_adjust == 4))
4040 /* Or if the static chain register was saved above the frame, under the
4041 assumption that the stack pointer isn't saved on the stack. */
4042 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4043 && arm_compute_static_chain_stack_bytes() != 0))
4044 return 0;
4045
4046 saved_int_regs = offsets->saved_regs_mask;
4047
4048 /* Unfortunately, the insn
4049
4050 ldmib sp, {..., sp, ...}
4051
4052 triggers a bug on most SA-110 based devices, such that the stack
4053 pointer won't be correctly restored if the instruction takes a
4054 page fault. We work around this problem by popping r3 along with
4055 the other registers, since that is never slower than executing
4056 another instruction.
4057
4058 We test for !arm_arch5t here, because code for any architecture
4059 less than this could potentially be run on one of the buggy
4060 chips. */
4061 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4062 {
4063 /* Validate that r3 is a call-clobbered register (always true in
4064 the default abi) ... */
4065 if (!call_used_regs[3])
4066 return 0;
4067
4068 /* ... that it isn't being used for a return value ... */
4069 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4070 return 0;
4071
4072 /* ... or for a tail-call argument ... */
4073 if (sibling)
4074 {
4075 gcc_assert (CALL_P (sibling));
4076
4077 if (find_regno_fusage (sibling, USE, 3))
4078 return 0;
4079 }
4080
4081 /* ... and that there are no call-saved registers in r0-r2
4082 (always true in the default ABI). */
4083 if (saved_int_regs & 0x7)
4084 return 0;
4085 }
4086
4087 /* Can't be done if interworking with Thumb, and any registers have been
4088 stacked. */
4089 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4090 return 0;
4091
4092 /* On StrongARM, conditional returns are expensive if they aren't
4093 taken and multiple registers have been stacked. */
4094 if (iscond && arm_tune_strongarm)
4095 {
4096 /* Conditional return when just the LR is stored is a simple
4097 conditional-load instruction, that's not expensive. */
4098 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4099 return 0;
4100
4101 if (flag_pic
4102 && arm_pic_register != INVALID_REGNUM
4103 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4104 return 0;
4105 }
4106
4107 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4108 several instructions if anything needs to be popped. */
4109 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4110 return 0;
4111
4112 /* If there are saved registers but the LR isn't saved, then we need
4113 two instructions for the return. */
4114 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4115 return 0;
4116
4117 /* Can't be done if any of the VFP regs are pushed,
4118 since this also requires an insn. */
4119 if (TARGET_HARD_FLOAT)
4120 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4121 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4122 return 0;
4123
4124 if (TARGET_REALLY_IWMMXT)
4125 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4126 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4127 return 0;
4128
4129 return 1;
4130 }
4131
4132 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4133 shrink-wrapping if possible. This is the case if we need to emit a
4134 prologue, which we can test by looking at the offsets. */
4135 bool
4136 use_simple_return_p (void)
4137 {
4138 arm_stack_offsets *offsets;
4139
4140 /* Note this function can be called before or after reload. */
4141 if (!reload_completed)
4142 arm_compute_frame_layout ();
4143
4144 offsets = arm_get_frame_offsets ();
4145 return offsets->outgoing_args != 0;
4146 }
4147
4148 /* Return TRUE if int I is a valid immediate ARM constant. */
4149
4150 int
4151 const_ok_for_arm (HOST_WIDE_INT i)
4152 {
4153 int lowbit;
4154
4155 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4156 be all zero, or all one. */
4157 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4158 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4159 != ((~(unsigned HOST_WIDE_INT) 0)
4160 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4161 return FALSE;
4162
4163 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4164
4165 /* Fast return for 0 and small values. We must do this for zero, since
4166 the code below can't handle that one case. */
4167 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4168 return TRUE;
4169
4170 /* Get the number of trailing zeros. */
4171 lowbit = ffs((int) i) - 1;
4172
4173 /* Only even shifts are allowed in ARM mode so round down to the
4174 nearest even number. */
4175 if (TARGET_ARM)
4176 lowbit &= ~1;
4177
4178 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4179 return TRUE;
4180
4181 if (TARGET_ARM)
4182 {
4183 /* Allow rotated constants in ARM mode. */
4184 if (lowbit <= 4
4185 && ((i & ~0xc000003f) == 0
4186 || (i & ~0xf000000f) == 0
4187 || (i & ~0xfc000003) == 0))
4188 return TRUE;
4189 }
4190 else if (TARGET_THUMB2)
4191 {
4192 HOST_WIDE_INT v;
4193
4194 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4195 v = i & 0xff;
4196 v |= v << 16;
4197 if (i == v || i == (v | (v << 8)))
4198 return TRUE;
4199
4200 /* Allow repeated pattern 0xXY00XY00. */
4201 v = i & 0xff00;
4202 v |= v << 16;
4203 if (i == v)
4204 return TRUE;
4205 }
4206 else if (TARGET_HAVE_MOVT)
4207 {
4208 /* Thumb-1 Targets with MOVT. */
4209 if (i > 0xffff)
4210 return FALSE;
4211 else
4212 return TRUE;
4213 }
4214
4215 return FALSE;
4216 }
4217
4218 /* Return true if I is a valid constant for the operation CODE. */
4219 int
4220 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4221 {
4222 if (const_ok_for_arm (i))
4223 return 1;
4224
4225 switch (code)
4226 {
4227 case SET:
4228 /* See if we can use movw. */
4229 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4230 return 1;
4231 else
4232 /* Otherwise, try mvn. */
4233 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4234
4235 case PLUS:
4236 /* See if we can use addw or subw. */
4237 if (TARGET_THUMB2
4238 && ((i & 0xfffff000) == 0
4239 || ((-i) & 0xfffff000) == 0))
4240 return 1;
4241 /* Fall through. */
4242 case COMPARE:
4243 case EQ:
4244 case NE:
4245 case GT:
4246 case LE:
4247 case LT:
4248 case GE:
4249 case GEU:
4250 case LTU:
4251 case GTU:
4252 case LEU:
4253 case UNORDERED:
4254 case ORDERED:
4255 case UNEQ:
4256 case UNGE:
4257 case UNLT:
4258 case UNGT:
4259 case UNLE:
4260 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4261
4262 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4263 case XOR:
4264 return 0;
4265
4266 case IOR:
4267 if (TARGET_THUMB2)
4268 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4269 return 0;
4270
4271 case AND:
4272 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4273
4274 default:
4275 gcc_unreachable ();
4276 }
4277 }
4278
4279 /* Return true if I is a valid di mode constant for the operation CODE. */
4280 int
4281 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4282 {
4283 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4284 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4285 rtx hi = GEN_INT (hi_val);
4286 rtx lo = GEN_INT (lo_val);
4287
4288 if (TARGET_THUMB1)
4289 return 0;
4290
4291 switch (code)
4292 {
4293 case AND:
4294 case IOR:
4295 case XOR:
4296 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4297 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4298 case PLUS:
4299 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4300
4301 default:
4302 return 0;
4303 }
4304 }
4305
4306 /* Emit a sequence of insns to handle a large constant.
4307 CODE is the code of the operation required, it can be any of SET, PLUS,
4308 IOR, AND, XOR, MINUS;
4309 MODE is the mode in which the operation is being performed;
4310 VAL is the integer to operate on;
4311 SOURCE is the other operand (a register, or a null-pointer for SET);
4312 SUBTARGETS means it is safe to create scratch registers if that will
4313 either produce a simpler sequence, or we will want to cse the values.
4314 Return value is the number of insns emitted. */
4315
4316 /* ??? Tweak this for thumb2. */
4317 int
4318 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4319 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4320 {
4321 rtx cond;
4322
4323 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4324 cond = COND_EXEC_TEST (PATTERN (insn));
4325 else
4326 cond = NULL_RTX;
4327
4328 if (subtargets || code == SET
4329 || (REG_P (target) && REG_P (source)
4330 && REGNO (target) != REGNO (source)))
4331 {
4332 /* After arm_reorg has been called, we can't fix up expensive
4333 constants by pushing them into memory so we must synthesize
4334 them in-line, regardless of the cost. This is only likely to
4335 be more costly on chips that have load delay slots and we are
4336 compiling without running the scheduler (so no splitting
4337 occurred before the final instruction emission).
4338
4339 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4340 */
4341 if (!cfun->machine->after_arm_reorg
4342 && !cond
4343 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4344 1, 0)
4345 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4346 + (code != SET))))
4347 {
4348 if (code == SET)
4349 {
4350 /* Currently SET is the only monadic value for CODE, all
4351 the rest are diadic. */
4352 if (TARGET_USE_MOVT)
4353 arm_emit_movpair (target, GEN_INT (val));
4354 else
4355 emit_set_insn (target, GEN_INT (val));
4356
4357 return 1;
4358 }
4359 else
4360 {
4361 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4362
4363 if (TARGET_USE_MOVT)
4364 arm_emit_movpair (temp, GEN_INT (val));
4365 else
4366 emit_set_insn (temp, GEN_INT (val));
4367
4368 /* For MINUS, the value is subtracted from, since we never
4369 have subtraction of a constant. */
4370 if (code == MINUS)
4371 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4372 else
4373 emit_set_insn (target,
4374 gen_rtx_fmt_ee (code, mode, source, temp));
4375 return 2;
4376 }
4377 }
4378 }
4379
4380 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4381 1);
4382 }
4383
4384 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4385 ARM/THUMB2 immediates, and add up to VAL.
4386 Thr function return value gives the number of insns required. */
4387 static int
4388 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4389 struct four_ints *return_sequence)
4390 {
4391 int best_consecutive_zeros = 0;
4392 int i;
4393 int best_start = 0;
4394 int insns1, insns2;
4395 struct four_ints tmp_sequence;
4396
4397 /* If we aren't targeting ARM, the best place to start is always at
4398 the bottom, otherwise look more closely. */
4399 if (TARGET_ARM)
4400 {
4401 for (i = 0; i < 32; i += 2)
4402 {
4403 int consecutive_zeros = 0;
4404
4405 if (!(val & (3 << i)))
4406 {
4407 while ((i < 32) && !(val & (3 << i)))
4408 {
4409 consecutive_zeros += 2;
4410 i += 2;
4411 }
4412 if (consecutive_zeros > best_consecutive_zeros)
4413 {
4414 best_consecutive_zeros = consecutive_zeros;
4415 best_start = i - consecutive_zeros;
4416 }
4417 i -= 2;
4418 }
4419 }
4420 }
4421
4422 /* So long as it won't require any more insns to do so, it's
4423 desirable to emit a small constant (in bits 0...9) in the last
4424 insn. This way there is more chance that it can be combined with
4425 a later addressing insn to form a pre-indexed load or store
4426 operation. Consider:
4427
4428 *((volatile int *)0xe0000100) = 1;
4429 *((volatile int *)0xe0000110) = 2;
4430
4431 We want this to wind up as:
4432
4433 mov rA, #0xe0000000
4434 mov rB, #1
4435 str rB, [rA, #0x100]
4436 mov rB, #2
4437 str rB, [rA, #0x110]
4438
4439 rather than having to synthesize both large constants from scratch.
4440
4441 Therefore, we calculate how many insns would be required to emit
4442 the constant starting from `best_start', and also starting from
4443 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4444 yield a shorter sequence, we may as well use zero. */
4445 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4446 if (best_start != 0
4447 && ((HOST_WIDE_INT_1U << best_start) < val))
4448 {
4449 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4450 if (insns2 <= insns1)
4451 {
4452 *return_sequence = tmp_sequence;
4453 insns1 = insns2;
4454 }
4455 }
4456
4457 return insns1;
4458 }
4459
4460 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4461 static int
4462 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4463 struct four_ints *return_sequence, int i)
4464 {
4465 int remainder = val & 0xffffffff;
4466 int insns = 0;
4467
4468 /* Try and find a way of doing the job in either two or three
4469 instructions.
4470
4471 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4472 location. We start at position I. This may be the MSB, or
4473 optimial_immediate_sequence may have positioned it at the largest block
4474 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4475 wrapping around to the top of the word when we drop off the bottom.
4476 In the worst case this code should produce no more than four insns.
4477
4478 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4479 constants, shifted to any arbitrary location. We should always start
4480 at the MSB. */
4481 do
4482 {
4483 int end;
4484 unsigned int b1, b2, b3, b4;
4485 unsigned HOST_WIDE_INT result;
4486 int loc;
4487
4488 gcc_assert (insns < 4);
4489
4490 if (i <= 0)
4491 i += 32;
4492
4493 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4494 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4495 {
4496 loc = i;
4497 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4498 /* We can use addw/subw for the last 12 bits. */
4499 result = remainder;
4500 else
4501 {
4502 /* Use an 8-bit shifted/rotated immediate. */
4503 end = i - 8;
4504 if (end < 0)
4505 end += 32;
4506 result = remainder & ((0x0ff << end)
4507 | ((i < end) ? (0xff >> (32 - end))
4508 : 0));
4509 i -= 8;
4510 }
4511 }
4512 else
4513 {
4514 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4515 arbitrary shifts. */
4516 i -= TARGET_ARM ? 2 : 1;
4517 continue;
4518 }
4519
4520 /* Next, see if we can do a better job with a thumb2 replicated
4521 constant.
4522
4523 We do it this way around to catch the cases like 0x01F001E0 where
4524 two 8-bit immediates would work, but a replicated constant would
4525 make it worse.
4526
4527 TODO: 16-bit constants that don't clear all the bits, but still win.
4528 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4529 if (TARGET_THUMB2)
4530 {
4531 b1 = (remainder & 0xff000000) >> 24;
4532 b2 = (remainder & 0x00ff0000) >> 16;
4533 b3 = (remainder & 0x0000ff00) >> 8;
4534 b4 = remainder & 0xff;
4535
4536 if (loc > 24)
4537 {
4538 /* The 8-bit immediate already found clears b1 (and maybe b2),
4539 but must leave b3 and b4 alone. */
4540
4541 /* First try to find a 32-bit replicated constant that clears
4542 almost everything. We can assume that we can't do it in one,
4543 or else we wouldn't be here. */
4544 unsigned int tmp = b1 & b2 & b3 & b4;
4545 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4546 + (tmp << 24);
4547 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4548 + (tmp == b3) + (tmp == b4);
4549 if (tmp
4550 && (matching_bytes >= 3
4551 || (matching_bytes == 2
4552 && const_ok_for_op (remainder & ~tmp2, code))))
4553 {
4554 /* At least 3 of the bytes match, and the fourth has at
4555 least as many bits set, or two of the bytes match
4556 and it will only require one more insn to finish. */
4557 result = tmp2;
4558 i = tmp != b1 ? 32
4559 : tmp != b2 ? 24
4560 : tmp != b3 ? 16
4561 : 8;
4562 }
4563
4564 /* Second, try to find a 16-bit replicated constant that can
4565 leave three of the bytes clear. If b2 or b4 is already
4566 zero, then we can. If the 8-bit from above would not
4567 clear b2 anyway, then we still win. */
4568 else if (b1 == b3 && (!b2 || !b4
4569 || (remainder & 0x00ff0000 & ~result)))
4570 {
4571 result = remainder & 0xff00ff00;
4572 i = 24;
4573 }
4574 }
4575 else if (loc > 16)
4576 {
4577 /* The 8-bit immediate already found clears b2 (and maybe b3)
4578 and we don't get here unless b1 is alredy clear, but it will
4579 leave b4 unchanged. */
4580
4581 /* If we can clear b2 and b4 at once, then we win, since the
4582 8-bits couldn't possibly reach that far. */
4583 if (b2 == b4)
4584 {
4585 result = remainder & 0x00ff00ff;
4586 i = 16;
4587 }
4588 }
4589 }
4590
4591 return_sequence->i[insns++] = result;
4592 remainder &= ~result;
4593
4594 if (code == SET || code == MINUS)
4595 code = PLUS;
4596 }
4597 while (remainder);
4598
4599 return insns;
4600 }
4601
4602 /* Emit an instruction with the indicated PATTERN. If COND is
4603 non-NULL, conditionalize the execution of the instruction on COND
4604 being true. */
4605
4606 static void
4607 emit_constant_insn (rtx cond, rtx pattern)
4608 {
4609 if (cond)
4610 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4611 emit_insn (pattern);
4612 }
4613
4614 /* As above, but extra parameter GENERATE which, if clear, suppresses
4615 RTL generation. */
4616
4617 static int
4618 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4619 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4620 int subtargets, int generate)
4621 {
4622 int can_invert = 0;
4623 int can_negate = 0;
4624 int final_invert = 0;
4625 int i;
4626 int set_sign_bit_copies = 0;
4627 int clear_sign_bit_copies = 0;
4628 int clear_zero_bit_copies = 0;
4629 int set_zero_bit_copies = 0;
4630 int insns = 0, neg_insns, inv_insns;
4631 unsigned HOST_WIDE_INT temp1, temp2;
4632 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4633 struct four_ints *immediates;
4634 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4635
4636 /* Find out which operations are safe for a given CODE. Also do a quick
4637 check for degenerate cases; these can occur when DImode operations
4638 are split. */
4639 switch (code)
4640 {
4641 case SET:
4642 can_invert = 1;
4643 break;
4644
4645 case PLUS:
4646 can_negate = 1;
4647 break;
4648
4649 case IOR:
4650 if (remainder == 0xffffffff)
4651 {
4652 if (generate)
4653 emit_constant_insn (cond,
4654 gen_rtx_SET (target,
4655 GEN_INT (ARM_SIGN_EXTEND (val))));
4656 return 1;
4657 }
4658
4659 if (remainder == 0)
4660 {
4661 if (reload_completed && rtx_equal_p (target, source))
4662 return 0;
4663
4664 if (generate)
4665 emit_constant_insn (cond, gen_rtx_SET (target, source));
4666 return 1;
4667 }
4668 break;
4669
4670 case AND:
4671 if (remainder == 0)
4672 {
4673 if (generate)
4674 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4675 return 1;
4676 }
4677 if (remainder == 0xffffffff)
4678 {
4679 if (reload_completed && rtx_equal_p (target, source))
4680 return 0;
4681 if (generate)
4682 emit_constant_insn (cond, gen_rtx_SET (target, source));
4683 return 1;
4684 }
4685 can_invert = 1;
4686 break;
4687
4688 case XOR:
4689 if (remainder == 0)
4690 {
4691 if (reload_completed && rtx_equal_p (target, source))
4692 return 0;
4693 if (generate)
4694 emit_constant_insn (cond, gen_rtx_SET (target, source));
4695 return 1;
4696 }
4697
4698 if (remainder == 0xffffffff)
4699 {
4700 if (generate)
4701 emit_constant_insn (cond,
4702 gen_rtx_SET (target,
4703 gen_rtx_NOT (mode, source)));
4704 return 1;
4705 }
4706 final_invert = 1;
4707 break;
4708
4709 case MINUS:
4710 /* We treat MINUS as (val - source), since (source - val) is always
4711 passed as (source + (-val)). */
4712 if (remainder == 0)
4713 {
4714 if (generate)
4715 emit_constant_insn (cond,
4716 gen_rtx_SET (target,
4717 gen_rtx_NEG (mode, source)));
4718 return 1;
4719 }
4720 if (const_ok_for_arm (val))
4721 {
4722 if (generate)
4723 emit_constant_insn (cond,
4724 gen_rtx_SET (target,
4725 gen_rtx_MINUS (mode, GEN_INT (val),
4726 source)));
4727 return 1;
4728 }
4729
4730 break;
4731
4732 default:
4733 gcc_unreachable ();
4734 }
4735
4736 /* If we can do it in one insn get out quickly. */
4737 if (const_ok_for_op (val, code))
4738 {
4739 if (generate)
4740 emit_constant_insn (cond,
4741 gen_rtx_SET (target,
4742 (source
4743 ? gen_rtx_fmt_ee (code, mode, source,
4744 GEN_INT (val))
4745 : GEN_INT (val))));
4746 return 1;
4747 }
4748
4749 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4750 insn. */
4751 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4752 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4753 {
4754 if (generate)
4755 {
4756 if (mode == SImode && i == 16)
4757 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4758 smaller insn. */
4759 emit_constant_insn (cond,
4760 gen_zero_extendhisi2
4761 (target, gen_lowpart (HImode, source)));
4762 else
4763 /* Extz only supports SImode, but we can coerce the operands
4764 into that mode. */
4765 emit_constant_insn (cond,
4766 gen_extzv_t2 (gen_lowpart (SImode, target),
4767 gen_lowpart (SImode, source),
4768 GEN_INT (i), const0_rtx));
4769 }
4770
4771 return 1;
4772 }
4773
4774 /* Calculate a few attributes that may be useful for specific
4775 optimizations. */
4776 /* Count number of leading zeros. */
4777 for (i = 31; i >= 0; i--)
4778 {
4779 if ((remainder & (1 << i)) == 0)
4780 clear_sign_bit_copies++;
4781 else
4782 break;
4783 }
4784
4785 /* Count number of leading 1's. */
4786 for (i = 31; i >= 0; i--)
4787 {
4788 if ((remainder & (1 << i)) != 0)
4789 set_sign_bit_copies++;
4790 else
4791 break;
4792 }
4793
4794 /* Count number of trailing zero's. */
4795 for (i = 0; i <= 31; i++)
4796 {
4797 if ((remainder & (1 << i)) == 0)
4798 clear_zero_bit_copies++;
4799 else
4800 break;
4801 }
4802
4803 /* Count number of trailing 1's. */
4804 for (i = 0; i <= 31; i++)
4805 {
4806 if ((remainder & (1 << i)) != 0)
4807 set_zero_bit_copies++;
4808 else
4809 break;
4810 }
4811
4812 switch (code)
4813 {
4814 case SET:
4815 /* See if we can do this by sign_extending a constant that is known
4816 to be negative. This is a good, way of doing it, since the shift
4817 may well merge into a subsequent insn. */
4818 if (set_sign_bit_copies > 1)
4819 {
4820 if (const_ok_for_arm
4821 (temp1 = ARM_SIGN_EXTEND (remainder
4822 << (set_sign_bit_copies - 1))))
4823 {
4824 if (generate)
4825 {
4826 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4827 emit_constant_insn (cond,
4828 gen_rtx_SET (new_src, GEN_INT (temp1)));
4829 emit_constant_insn (cond,
4830 gen_ashrsi3 (target, new_src,
4831 GEN_INT (set_sign_bit_copies - 1)));
4832 }
4833 return 2;
4834 }
4835 /* For an inverted constant, we will need to set the low bits,
4836 these will be shifted out of harm's way. */
4837 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4838 if (const_ok_for_arm (~temp1))
4839 {
4840 if (generate)
4841 {
4842 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4843 emit_constant_insn (cond,
4844 gen_rtx_SET (new_src, GEN_INT (temp1)));
4845 emit_constant_insn (cond,
4846 gen_ashrsi3 (target, new_src,
4847 GEN_INT (set_sign_bit_copies - 1)));
4848 }
4849 return 2;
4850 }
4851 }
4852
4853 /* See if we can calculate the value as the difference between two
4854 valid immediates. */
4855 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4856 {
4857 int topshift = clear_sign_bit_copies & ~1;
4858
4859 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4860 & (0xff000000 >> topshift));
4861
4862 /* If temp1 is zero, then that means the 9 most significant
4863 bits of remainder were 1 and we've caused it to overflow.
4864 When topshift is 0 we don't need to do anything since we
4865 can borrow from 'bit 32'. */
4866 if (temp1 == 0 && topshift != 0)
4867 temp1 = 0x80000000 >> (topshift - 1);
4868
4869 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4870
4871 if (const_ok_for_arm (temp2))
4872 {
4873 if (generate)
4874 {
4875 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4876 emit_constant_insn (cond,
4877 gen_rtx_SET (new_src, GEN_INT (temp1)));
4878 emit_constant_insn (cond,
4879 gen_addsi3 (target, new_src,
4880 GEN_INT (-temp2)));
4881 }
4882
4883 return 2;
4884 }
4885 }
4886
4887 /* See if we can generate this by setting the bottom (or the top)
4888 16 bits, and then shifting these into the other half of the
4889 word. We only look for the simplest cases, to do more would cost
4890 too much. Be careful, however, not to generate this when the
4891 alternative would take fewer insns. */
4892 if (val & 0xffff0000)
4893 {
4894 temp1 = remainder & 0xffff0000;
4895 temp2 = remainder & 0x0000ffff;
4896
4897 /* Overlaps outside this range are best done using other methods. */
4898 for (i = 9; i < 24; i++)
4899 {
4900 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4901 && !const_ok_for_arm (temp2))
4902 {
4903 rtx new_src = (subtargets
4904 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4905 : target);
4906 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4907 source, subtargets, generate);
4908 source = new_src;
4909 if (generate)
4910 emit_constant_insn
4911 (cond,
4912 gen_rtx_SET
4913 (target,
4914 gen_rtx_IOR (mode,
4915 gen_rtx_ASHIFT (mode, source,
4916 GEN_INT (i)),
4917 source)));
4918 return insns + 1;
4919 }
4920 }
4921
4922 /* Don't duplicate cases already considered. */
4923 for (i = 17; i < 24; i++)
4924 {
4925 if (((temp1 | (temp1 >> i)) == remainder)
4926 && !const_ok_for_arm (temp1))
4927 {
4928 rtx new_src = (subtargets
4929 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4930 : target);
4931 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4932 source, subtargets, generate);
4933 source = new_src;
4934 if (generate)
4935 emit_constant_insn
4936 (cond,
4937 gen_rtx_SET (target,
4938 gen_rtx_IOR
4939 (mode,
4940 gen_rtx_LSHIFTRT (mode, source,
4941 GEN_INT (i)),
4942 source)));
4943 return insns + 1;
4944 }
4945 }
4946 }
4947 break;
4948
4949 case IOR:
4950 case XOR:
4951 /* If we have IOR or XOR, and the constant can be loaded in a
4952 single instruction, and we can find a temporary to put it in,
4953 then this can be done in two instructions instead of 3-4. */
4954 if (subtargets
4955 /* TARGET can't be NULL if SUBTARGETS is 0 */
4956 || (reload_completed && !reg_mentioned_p (target, source)))
4957 {
4958 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4959 {
4960 if (generate)
4961 {
4962 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4963
4964 emit_constant_insn (cond,
4965 gen_rtx_SET (sub, GEN_INT (val)));
4966 emit_constant_insn (cond,
4967 gen_rtx_SET (target,
4968 gen_rtx_fmt_ee (code, mode,
4969 source, sub)));
4970 }
4971 return 2;
4972 }
4973 }
4974
4975 if (code == XOR)
4976 break;
4977
4978 /* Convert.
4979 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4980 and the remainder 0s for e.g. 0xfff00000)
4981 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4982
4983 This can be done in 2 instructions by using shifts with mov or mvn.
4984 e.g. for
4985 x = x | 0xfff00000;
4986 we generate.
4987 mvn r0, r0, asl #12
4988 mvn r0, r0, lsr #12 */
4989 if (set_sign_bit_copies > 8
4990 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4991 {
4992 if (generate)
4993 {
4994 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4995 rtx shift = GEN_INT (set_sign_bit_copies);
4996
4997 emit_constant_insn
4998 (cond,
4999 gen_rtx_SET (sub,
5000 gen_rtx_NOT (mode,
5001 gen_rtx_ASHIFT (mode,
5002 source,
5003 shift))));
5004 emit_constant_insn
5005 (cond,
5006 gen_rtx_SET (target,
5007 gen_rtx_NOT (mode,
5008 gen_rtx_LSHIFTRT (mode, sub,
5009 shift))));
5010 }
5011 return 2;
5012 }
5013
5014 /* Convert
5015 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5016 to
5017 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5018
5019 For eg. r0 = r0 | 0xfff
5020 mvn r0, r0, lsr #12
5021 mvn r0, r0, asl #12
5022
5023 */
5024 if (set_zero_bit_copies > 8
5025 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5026 {
5027 if (generate)
5028 {
5029 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5030 rtx shift = GEN_INT (set_zero_bit_copies);
5031
5032 emit_constant_insn
5033 (cond,
5034 gen_rtx_SET (sub,
5035 gen_rtx_NOT (mode,
5036 gen_rtx_LSHIFTRT (mode,
5037 source,
5038 shift))));
5039 emit_constant_insn
5040 (cond,
5041 gen_rtx_SET (target,
5042 gen_rtx_NOT (mode,
5043 gen_rtx_ASHIFT (mode, sub,
5044 shift))));
5045 }
5046 return 2;
5047 }
5048
5049 /* This will never be reached for Thumb2 because orn is a valid
5050 instruction. This is for Thumb1 and the ARM 32 bit cases.
5051
5052 x = y | constant (such that ~constant is a valid constant)
5053 Transform this to
5054 x = ~(~y & ~constant).
5055 */
5056 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5057 {
5058 if (generate)
5059 {
5060 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5061 emit_constant_insn (cond,
5062 gen_rtx_SET (sub,
5063 gen_rtx_NOT (mode, source)));
5064 source = sub;
5065 if (subtargets)
5066 sub = gen_reg_rtx (mode);
5067 emit_constant_insn (cond,
5068 gen_rtx_SET (sub,
5069 gen_rtx_AND (mode, source,
5070 GEN_INT (temp1))));
5071 emit_constant_insn (cond,
5072 gen_rtx_SET (target,
5073 gen_rtx_NOT (mode, sub)));
5074 }
5075 return 3;
5076 }
5077 break;
5078
5079 case AND:
5080 /* See if two shifts will do 2 or more insn's worth of work. */
5081 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5082 {
5083 HOST_WIDE_INT shift_mask = ((0xffffffff
5084 << (32 - clear_sign_bit_copies))
5085 & 0xffffffff);
5086
5087 if ((remainder | shift_mask) != 0xffffffff)
5088 {
5089 HOST_WIDE_INT new_val
5090 = ARM_SIGN_EXTEND (remainder | shift_mask);
5091
5092 if (generate)
5093 {
5094 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5095 insns = arm_gen_constant (AND, SImode, cond, new_val,
5096 new_src, source, subtargets, 1);
5097 source = new_src;
5098 }
5099 else
5100 {
5101 rtx targ = subtargets ? NULL_RTX : target;
5102 insns = arm_gen_constant (AND, mode, cond, new_val,
5103 targ, source, subtargets, 0);
5104 }
5105 }
5106
5107 if (generate)
5108 {
5109 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5110 rtx shift = GEN_INT (clear_sign_bit_copies);
5111
5112 emit_insn (gen_ashlsi3 (new_src, source, shift));
5113 emit_insn (gen_lshrsi3 (target, new_src, shift));
5114 }
5115
5116 return insns + 2;
5117 }
5118
5119 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5120 {
5121 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5122
5123 if ((remainder | shift_mask) != 0xffffffff)
5124 {
5125 HOST_WIDE_INT new_val
5126 = ARM_SIGN_EXTEND (remainder | shift_mask);
5127 if (generate)
5128 {
5129 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5130
5131 insns = arm_gen_constant (AND, mode, cond, new_val,
5132 new_src, source, subtargets, 1);
5133 source = new_src;
5134 }
5135 else
5136 {
5137 rtx targ = subtargets ? NULL_RTX : target;
5138
5139 insns = arm_gen_constant (AND, mode, cond, new_val,
5140 targ, source, subtargets, 0);
5141 }
5142 }
5143
5144 if (generate)
5145 {
5146 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5147 rtx shift = GEN_INT (clear_zero_bit_copies);
5148
5149 emit_insn (gen_lshrsi3 (new_src, source, shift));
5150 emit_insn (gen_ashlsi3 (target, new_src, shift));
5151 }
5152
5153 return insns + 2;
5154 }
5155
5156 break;
5157
5158 default:
5159 break;
5160 }
5161
5162 /* Calculate what the instruction sequences would be if we generated it
5163 normally, negated, or inverted. */
5164 if (code == AND)
5165 /* AND cannot be split into multiple insns, so invert and use BIC. */
5166 insns = 99;
5167 else
5168 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5169
5170 if (can_negate)
5171 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5172 &neg_immediates);
5173 else
5174 neg_insns = 99;
5175
5176 if (can_invert || final_invert)
5177 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5178 &inv_immediates);
5179 else
5180 inv_insns = 99;
5181
5182 immediates = &pos_immediates;
5183
5184 /* Is the negated immediate sequence more efficient? */
5185 if (neg_insns < insns && neg_insns <= inv_insns)
5186 {
5187 insns = neg_insns;
5188 immediates = &neg_immediates;
5189 }
5190 else
5191 can_negate = 0;
5192
5193 /* Is the inverted immediate sequence more efficient?
5194 We must allow for an extra NOT instruction for XOR operations, although
5195 there is some chance that the final 'mvn' will get optimized later. */
5196 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5197 {
5198 insns = inv_insns;
5199 immediates = &inv_immediates;
5200 }
5201 else
5202 {
5203 can_invert = 0;
5204 final_invert = 0;
5205 }
5206
5207 /* Now output the chosen sequence as instructions. */
5208 if (generate)
5209 {
5210 for (i = 0; i < insns; i++)
5211 {
5212 rtx new_src, temp1_rtx;
5213
5214 temp1 = immediates->i[i];
5215
5216 if (code == SET || code == MINUS)
5217 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5218 else if ((final_invert || i < (insns - 1)) && subtargets)
5219 new_src = gen_reg_rtx (mode);
5220 else
5221 new_src = target;
5222
5223 if (can_invert)
5224 temp1 = ~temp1;
5225 else if (can_negate)
5226 temp1 = -temp1;
5227
5228 temp1 = trunc_int_for_mode (temp1, mode);
5229 temp1_rtx = GEN_INT (temp1);
5230
5231 if (code == SET)
5232 ;
5233 else if (code == MINUS)
5234 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5235 else
5236 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5237
5238 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5239 source = new_src;
5240
5241 if (code == SET)
5242 {
5243 can_negate = can_invert;
5244 can_invert = 0;
5245 code = PLUS;
5246 }
5247 else if (code == MINUS)
5248 code = PLUS;
5249 }
5250 }
5251
5252 if (final_invert)
5253 {
5254 if (generate)
5255 emit_constant_insn (cond, gen_rtx_SET (target,
5256 gen_rtx_NOT (mode, source)));
5257 insns++;
5258 }
5259
5260 return insns;
5261 }
5262
5263 /* Canonicalize a comparison so that we are more likely to recognize it.
5264 This can be done for a few constant compares, where we can make the
5265 immediate value easier to load. */
5266
5267 static void
5268 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5269 bool op0_preserve_value)
5270 {
5271 machine_mode mode;
5272 unsigned HOST_WIDE_INT i, maxval;
5273
5274 mode = GET_MODE (*op0);
5275 if (mode == VOIDmode)
5276 mode = GET_MODE (*op1);
5277
5278 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5279
5280 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5281 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5282 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5283 for GTU/LEU in Thumb mode. */
5284 if (mode == DImode)
5285 {
5286
5287 if (*code == GT || *code == LE
5288 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5289 {
5290 /* Missing comparison. First try to use an available
5291 comparison. */
5292 if (CONST_INT_P (*op1))
5293 {
5294 i = INTVAL (*op1);
5295 switch (*code)
5296 {
5297 case GT:
5298 case LE:
5299 if (i != maxval
5300 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5301 {
5302 *op1 = GEN_INT (i + 1);
5303 *code = *code == GT ? GE : LT;
5304 return;
5305 }
5306 break;
5307 case GTU:
5308 case LEU:
5309 if (i != ~((unsigned HOST_WIDE_INT) 0)
5310 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5311 {
5312 *op1 = GEN_INT (i + 1);
5313 *code = *code == GTU ? GEU : LTU;
5314 return;
5315 }
5316 break;
5317 default:
5318 gcc_unreachable ();
5319 }
5320 }
5321
5322 /* If that did not work, reverse the condition. */
5323 if (!op0_preserve_value)
5324 {
5325 std::swap (*op0, *op1);
5326 *code = (int)swap_condition ((enum rtx_code)*code);
5327 }
5328 }
5329 return;
5330 }
5331
5332 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5333 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5334 to facilitate possible combining with a cmp into 'ands'. */
5335 if (mode == SImode
5336 && GET_CODE (*op0) == ZERO_EXTEND
5337 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5338 && GET_MODE (XEXP (*op0, 0)) == QImode
5339 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5340 && subreg_lowpart_p (XEXP (*op0, 0))
5341 && *op1 == const0_rtx)
5342 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5343 GEN_INT (255));
5344
5345 /* Comparisons smaller than DImode. Only adjust comparisons against
5346 an out-of-range constant. */
5347 if (!CONST_INT_P (*op1)
5348 || const_ok_for_arm (INTVAL (*op1))
5349 || const_ok_for_arm (- INTVAL (*op1)))
5350 return;
5351
5352 i = INTVAL (*op1);
5353
5354 switch (*code)
5355 {
5356 case EQ:
5357 case NE:
5358 return;
5359
5360 case GT:
5361 case LE:
5362 if (i != maxval
5363 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5364 {
5365 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5366 *code = *code == GT ? GE : LT;
5367 return;
5368 }
5369 break;
5370
5371 case GE:
5372 case LT:
5373 if (i != ~maxval
5374 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5375 {
5376 *op1 = GEN_INT (i - 1);
5377 *code = *code == GE ? GT : LE;
5378 return;
5379 }
5380 break;
5381
5382 case GTU:
5383 case LEU:
5384 if (i != ~((unsigned HOST_WIDE_INT) 0)
5385 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5386 {
5387 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5388 *code = *code == GTU ? GEU : LTU;
5389 return;
5390 }
5391 break;
5392
5393 case GEU:
5394 case LTU:
5395 if (i != 0
5396 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5397 {
5398 *op1 = GEN_INT (i - 1);
5399 *code = *code == GEU ? GTU : LEU;
5400 return;
5401 }
5402 break;
5403
5404 default:
5405 gcc_unreachable ();
5406 }
5407 }
5408
5409
5410 /* Define how to find the value returned by a function. */
5411
5412 static rtx
5413 arm_function_value(const_tree type, const_tree func,
5414 bool outgoing ATTRIBUTE_UNUSED)
5415 {
5416 machine_mode mode;
5417 int unsignedp ATTRIBUTE_UNUSED;
5418 rtx r ATTRIBUTE_UNUSED;
5419
5420 mode = TYPE_MODE (type);
5421
5422 if (TARGET_AAPCS_BASED)
5423 return aapcs_allocate_return_reg (mode, type, func);
5424
5425 /* Promote integer types. */
5426 if (INTEGRAL_TYPE_P (type))
5427 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5428
5429 /* Promotes small structs returned in a register to full-word size
5430 for big-endian AAPCS. */
5431 if (arm_return_in_msb (type))
5432 {
5433 HOST_WIDE_INT size = int_size_in_bytes (type);
5434 if (size % UNITS_PER_WORD != 0)
5435 {
5436 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5437 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5438 }
5439 }
5440
5441 return arm_libcall_value_1 (mode);
5442 }
5443
5444 /* libcall hashtable helpers. */
5445
5446 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5447 {
5448 static inline hashval_t hash (const rtx_def *);
5449 static inline bool equal (const rtx_def *, const rtx_def *);
5450 static inline void remove (rtx_def *);
5451 };
5452
5453 inline bool
5454 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5455 {
5456 return rtx_equal_p (p1, p2);
5457 }
5458
5459 inline hashval_t
5460 libcall_hasher::hash (const rtx_def *p1)
5461 {
5462 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5463 }
5464
5465 typedef hash_table<libcall_hasher> libcall_table_type;
5466
5467 static void
5468 add_libcall (libcall_table_type *htab, rtx libcall)
5469 {
5470 *htab->find_slot (libcall, INSERT) = libcall;
5471 }
5472
5473 static bool
5474 arm_libcall_uses_aapcs_base (const_rtx libcall)
5475 {
5476 static bool init_done = false;
5477 static libcall_table_type *libcall_htab = NULL;
5478
5479 if (!init_done)
5480 {
5481 init_done = true;
5482
5483 libcall_htab = new libcall_table_type (31);
5484 add_libcall (libcall_htab,
5485 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5486 add_libcall (libcall_htab,
5487 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5488 add_libcall (libcall_htab,
5489 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5490 add_libcall (libcall_htab,
5491 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5492
5493 add_libcall (libcall_htab,
5494 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5495 add_libcall (libcall_htab,
5496 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5497 add_libcall (libcall_htab,
5498 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5499 add_libcall (libcall_htab,
5500 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5501
5502 add_libcall (libcall_htab,
5503 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5504 add_libcall (libcall_htab,
5505 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5506 add_libcall (libcall_htab,
5507 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5508 add_libcall (libcall_htab,
5509 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5510 add_libcall (libcall_htab,
5511 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5512 add_libcall (libcall_htab,
5513 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5514 add_libcall (libcall_htab,
5515 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5516 add_libcall (libcall_htab,
5517 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5518
5519 /* Values from double-precision helper functions are returned in core
5520 registers if the selected core only supports single-precision
5521 arithmetic, even if we are using the hard-float ABI. The same is
5522 true for single-precision helpers, but we will never be using the
5523 hard-float ABI on a CPU which doesn't support single-precision
5524 operations in hardware. */
5525 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5526 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5527 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5528 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5529 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5530 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5531 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5532 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5533 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5534 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5535 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5536 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5537 SFmode));
5538 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5539 DFmode));
5540 add_libcall (libcall_htab,
5541 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5542 }
5543
5544 return libcall && libcall_htab->find (libcall) != NULL;
5545 }
5546
5547 static rtx
5548 arm_libcall_value_1 (machine_mode mode)
5549 {
5550 if (TARGET_AAPCS_BASED)
5551 return aapcs_libcall_value (mode);
5552 else if (TARGET_IWMMXT_ABI
5553 && arm_vector_mode_supported_p (mode))
5554 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5555 else
5556 return gen_rtx_REG (mode, ARG_REGISTER (1));
5557 }
5558
5559 /* Define how to find the value returned by a library function
5560 assuming the value has mode MODE. */
5561
5562 static rtx
5563 arm_libcall_value (machine_mode mode, const_rtx libcall)
5564 {
5565 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5566 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5567 {
5568 /* The following libcalls return their result in integer registers,
5569 even though they return a floating point value. */
5570 if (arm_libcall_uses_aapcs_base (libcall))
5571 return gen_rtx_REG (mode, ARG_REGISTER(1));
5572
5573 }
5574
5575 return arm_libcall_value_1 (mode);
5576 }
5577
5578 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5579
5580 static bool
5581 arm_function_value_regno_p (const unsigned int regno)
5582 {
5583 if (regno == ARG_REGISTER (1)
5584 || (TARGET_32BIT
5585 && TARGET_AAPCS_BASED
5586 && TARGET_HARD_FLOAT
5587 && regno == FIRST_VFP_REGNUM)
5588 || (TARGET_IWMMXT_ABI
5589 && regno == FIRST_IWMMXT_REGNUM))
5590 return true;
5591
5592 return false;
5593 }
5594
5595 /* Determine the amount of memory needed to store the possible return
5596 registers of an untyped call. */
5597 int
5598 arm_apply_result_size (void)
5599 {
5600 int size = 16;
5601
5602 if (TARGET_32BIT)
5603 {
5604 if (TARGET_HARD_FLOAT_ABI)
5605 size += 32;
5606 if (TARGET_IWMMXT_ABI)
5607 size += 8;
5608 }
5609
5610 return size;
5611 }
5612
5613 /* Decide whether TYPE should be returned in memory (true)
5614 or in a register (false). FNTYPE is the type of the function making
5615 the call. */
5616 static bool
5617 arm_return_in_memory (const_tree type, const_tree fntype)
5618 {
5619 HOST_WIDE_INT size;
5620
5621 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5622
5623 if (TARGET_AAPCS_BASED)
5624 {
5625 /* Simple, non-aggregate types (ie not including vectors and
5626 complex) are always returned in a register (or registers).
5627 We don't care about which register here, so we can short-cut
5628 some of the detail. */
5629 if (!AGGREGATE_TYPE_P (type)
5630 && TREE_CODE (type) != VECTOR_TYPE
5631 && TREE_CODE (type) != COMPLEX_TYPE)
5632 return false;
5633
5634 /* Any return value that is no larger than one word can be
5635 returned in r0. */
5636 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5637 return false;
5638
5639 /* Check any available co-processors to see if they accept the
5640 type as a register candidate (VFP, for example, can return
5641 some aggregates in consecutive registers). These aren't
5642 available if the call is variadic. */
5643 if (aapcs_select_return_coproc (type, fntype) >= 0)
5644 return false;
5645
5646 /* Vector values should be returned using ARM registers, not
5647 memory (unless they're over 16 bytes, which will break since
5648 we only have four call-clobbered registers to play with). */
5649 if (TREE_CODE (type) == VECTOR_TYPE)
5650 return (size < 0 || size > (4 * UNITS_PER_WORD));
5651
5652 /* The rest go in memory. */
5653 return true;
5654 }
5655
5656 if (TREE_CODE (type) == VECTOR_TYPE)
5657 return (size < 0 || size > (4 * UNITS_PER_WORD));
5658
5659 if (!AGGREGATE_TYPE_P (type) &&
5660 (TREE_CODE (type) != VECTOR_TYPE))
5661 /* All simple types are returned in registers. */
5662 return false;
5663
5664 if (arm_abi != ARM_ABI_APCS)
5665 {
5666 /* ATPCS and later return aggregate types in memory only if they are
5667 larger than a word (or are variable size). */
5668 return (size < 0 || size > UNITS_PER_WORD);
5669 }
5670
5671 /* For the arm-wince targets we choose to be compatible with Microsoft's
5672 ARM and Thumb compilers, which always return aggregates in memory. */
5673 #ifndef ARM_WINCE
5674 /* All structures/unions bigger than one word are returned in memory.
5675 Also catch the case where int_size_in_bytes returns -1. In this case
5676 the aggregate is either huge or of variable size, and in either case
5677 we will want to return it via memory and not in a register. */
5678 if (size < 0 || size > UNITS_PER_WORD)
5679 return true;
5680
5681 if (TREE_CODE (type) == RECORD_TYPE)
5682 {
5683 tree field;
5684
5685 /* For a struct the APCS says that we only return in a register
5686 if the type is 'integer like' and every addressable element
5687 has an offset of zero. For practical purposes this means
5688 that the structure can have at most one non bit-field element
5689 and that this element must be the first one in the structure. */
5690
5691 /* Find the first field, ignoring non FIELD_DECL things which will
5692 have been created by C++. */
5693 for (field = TYPE_FIELDS (type);
5694 field && TREE_CODE (field) != FIELD_DECL;
5695 field = DECL_CHAIN (field))
5696 continue;
5697
5698 if (field == NULL)
5699 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5700
5701 /* Check that the first field is valid for returning in a register. */
5702
5703 /* ... Floats are not allowed */
5704 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5705 return true;
5706
5707 /* ... Aggregates that are not themselves valid for returning in
5708 a register are not allowed. */
5709 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5710 return true;
5711
5712 /* Now check the remaining fields, if any. Only bitfields are allowed,
5713 since they are not addressable. */
5714 for (field = DECL_CHAIN (field);
5715 field;
5716 field = DECL_CHAIN (field))
5717 {
5718 if (TREE_CODE (field) != FIELD_DECL)
5719 continue;
5720
5721 if (!DECL_BIT_FIELD_TYPE (field))
5722 return true;
5723 }
5724
5725 return false;
5726 }
5727
5728 if (TREE_CODE (type) == UNION_TYPE)
5729 {
5730 tree field;
5731
5732 /* Unions can be returned in registers if every element is
5733 integral, or can be returned in an integer register. */
5734 for (field = TYPE_FIELDS (type);
5735 field;
5736 field = DECL_CHAIN (field))
5737 {
5738 if (TREE_CODE (field) != FIELD_DECL)
5739 continue;
5740
5741 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5742 return true;
5743
5744 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5745 return true;
5746 }
5747
5748 return false;
5749 }
5750 #endif /* not ARM_WINCE */
5751
5752 /* Return all other types in memory. */
5753 return true;
5754 }
5755
5756 const struct pcs_attribute_arg
5757 {
5758 const char *arg;
5759 enum arm_pcs value;
5760 } pcs_attribute_args[] =
5761 {
5762 {"aapcs", ARM_PCS_AAPCS},
5763 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5764 #if 0
5765 /* We could recognize these, but changes would be needed elsewhere
5766 * to implement them. */
5767 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5768 {"atpcs", ARM_PCS_ATPCS},
5769 {"apcs", ARM_PCS_APCS},
5770 #endif
5771 {NULL, ARM_PCS_UNKNOWN}
5772 };
5773
5774 static enum arm_pcs
5775 arm_pcs_from_attribute (tree attr)
5776 {
5777 const struct pcs_attribute_arg *ptr;
5778 const char *arg;
5779
5780 /* Get the value of the argument. */
5781 if (TREE_VALUE (attr) == NULL_TREE
5782 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5783 return ARM_PCS_UNKNOWN;
5784
5785 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5786
5787 /* Check it against the list of known arguments. */
5788 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5789 if (streq (arg, ptr->arg))
5790 return ptr->value;
5791
5792 /* An unrecognized interrupt type. */
5793 return ARM_PCS_UNKNOWN;
5794 }
5795
5796 /* Get the PCS variant to use for this call. TYPE is the function's type
5797 specification, DECL is the specific declartion. DECL may be null if
5798 the call could be indirect or if this is a library call. */
5799 static enum arm_pcs
5800 arm_get_pcs_model (const_tree type, const_tree decl)
5801 {
5802 bool user_convention = false;
5803 enum arm_pcs user_pcs = arm_pcs_default;
5804 tree attr;
5805
5806 gcc_assert (type);
5807
5808 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5809 if (attr)
5810 {
5811 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5812 user_convention = true;
5813 }
5814
5815 if (TARGET_AAPCS_BASED)
5816 {
5817 /* Detect varargs functions. These always use the base rules
5818 (no argument is ever a candidate for a co-processor
5819 register). */
5820 bool base_rules = stdarg_p (type);
5821
5822 if (user_convention)
5823 {
5824 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5825 sorry ("non-AAPCS derived PCS variant");
5826 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5827 error ("variadic functions must use the base AAPCS variant");
5828 }
5829
5830 if (base_rules)
5831 return ARM_PCS_AAPCS;
5832 else if (user_convention)
5833 return user_pcs;
5834 else if (decl && flag_unit_at_a_time)
5835 {
5836 /* Local functions never leak outside this compilation unit,
5837 so we are free to use whatever conventions are
5838 appropriate. */
5839 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5840 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5841 if (i && i->local)
5842 return ARM_PCS_AAPCS_LOCAL;
5843 }
5844 }
5845 else if (user_convention && user_pcs != arm_pcs_default)
5846 sorry ("PCS variant");
5847
5848 /* For everything else we use the target's default. */
5849 return arm_pcs_default;
5850 }
5851
5852
5853 static void
5854 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5855 const_tree fntype ATTRIBUTE_UNUSED,
5856 rtx libcall ATTRIBUTE_UNUSED,
5857 const_tree fndecl ATTRIBUTE_UNUSED)
5858 {
5859 /* Record the unallocated VFP registers. */
5860 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5861 pcum->aapcs_vfp_reg_alloc = 0;
5862 }
5863
5864 /* Walk down the type tree of TYPE counting consecutive base elements.
5865 If *MODEP is VOIDmode, then set it to the first valid floating point
5866 type. If a non-floating point type is found, or if a floating point
5867 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5868 otherwise return the count in the sub-tree. */
5869 static int
5870 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5871 {
5872 machine_mode mode;
5873 HOST_WIDE_INT size;
5874
5875 switch (TREE_CODE (type))
5876 {
5877 case REAL_TYPE:
5878 mode = TYPE_MODE (type);
5879 if (mode != DFmode && mode != SFmode && mode != HFmode)
5880 return -1;
5881
5882 if (*modep == VOIDmode)
5883 *modep = mode;
5884
5885 if (*modep == mode)
5886 return 1;
5887
5888 break;
5889
5890 case COMPLEX_TYPE:
5891 mode = TYPE_MODE (TREE_TYPE (type));
5892 if (mode != DFmode && mode != SFmode)
5893 return -1;
5894
5895 if (*modep == VOIDmode)
5896 *modep = mode;
5897
5898 if (*modep == mode)
5899 return 2;
5900
5901 break;
5902
5903 case VECTOR_TYPE:
5904 /* Use V2SImode and V4SImode as representatives of all 64-bit
5905 and 128-bit vector types, whether or not those modes are
5906 supported with the present options. */
5907 size = int_size_in_bytes (type);
5908 switch (size)
5909 {
5910 case 8:
5911 mode = V2SImode;
5912 break;
5913 case 16:
5914 mode = V4SImode;
5915 break;
5916 default:
5917 return -1;
5918 }
5919
5920 if (*modep == VOIDmode)
5921 *modep = mode;
5922
5923 /* Vector modes are considered to be opaque: two vectors are
5924 equivalent for the purposes of being homogeneous aggregates
5925 if they are the same size. */
5926 if (*modep == mode)
5927 return 1;
5928
5929 break;
5930
5931 case ARRAY_TYPE:
5932 {
5933 int count;
5934 tree index = TYPE_DOMAIN (type);
5935
5936 /* Can't handle incomplete types nor sizes that are not
5937 fixed. */
5938 if (!COMPLETE_TYPE_P (type)
5939 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5940 return -1;
5941
5942 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5943 if (count == -1
5944 || !index
5945 || !TYPE_MAX_VALUE (index)
5946 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5947 || !TYPE_MIN_VALUE (index)
5948 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5949 || count < 0)
5950 return -1;
5951
5952 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5953 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5954
5955 /* There must be no padding. */
5956 if (wi::to_wide (TYPE_SIZE (type))
5957 != count * GET_MODE_BITSIZE (*modep))
5958 return -1;
5959
5960 return count;
5961 }
5962
5963 case RECORD_TYPE:
5964 {
5965 int count = 0;
5966 int sub_count;
5967 tree field;
5968
5969 /* Can't handle incomplete types nor sizes that are not
5970 fixed. */
5971 if (!COMPLETE_TYPE_P (type)
5972 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5973 return -1;
5974
5975 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5976 {
5977 if (TREE_CODE (field) != FIELD_DECL)
5978 continue;
5979
5980 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5981 if (sub_count < 0)
5982 return -1;
5983 count += sub_count;
5984 }
5985
5986 /* There must be no padding. */
5987 if (wi::to_wide (TYPE_SIZE (type))
5988 != count * GET_MODE_BITSIZE (*modep))
5989 return -1;
5990
5991 return count;
5992 }
5993
5994 case UNION_TYPE:
5995 case QUAL_UNION_TYPE:
5996 {
5997 /* These aren't very interesting except in a degenerate case. */
5998 int count = 0;
5999 int sub_count;
6000 tree field;
6001
6002 /* Can't handle incomplete types nor sizes that are not
6003 fixed. */
6004 if (!COMPLETE_TYPE_P (type)
6005 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6006 return -1;
6007
6008 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6009 {
6010 if (TREE_CODE (field) != FIELD_DECL)
6011 continue;
6012
6013 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6014 if (sub_count < 0)
6015 return -1;
6016 count = count > sub_count ? count : sub_count;
6017 }
6018
6019 /* There must be no padding. */
6020 if (wi::to_wide (TYPE_SIZE (type))
6021 != count * GET_MODE_BITSIZE (*modep))
6022 return -1;
6023
6024 return count;
6025 }
6026
6027 default:
6028 break;
6029 }
6030
6031 return -1;
6032 }
6033
6034 /* Return true if PCS_VARIANT should use VFP registers. */
6035 static bool
6036 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6037 {
6038 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6039 {
6040 static bool seen_thumb1_vfp = false;
6041
6042 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6043 {
6044 sorry ("Thumb-1 hard-float VFP ABI");
6045 /* sorry() is not immediately fatal, so only display this once. */
6046 seen_thumb1_vfp = true;
6047 }
6048
6049 return true;
6050 }
6051
6052 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6053 return false;
6054
6055 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6056 (TARGET_VFP_DOUBLE || !is_double));
6057 }
6058
6059 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6060 suitable for passing or returning in VFP registers for the PCS
6061 variant selected. If it is, then *BASE_MODE is updated to contain
6062 a machine mode describing each element of the argument's type and
6063 *COUNT to hold the number of such elements. */
6064 static bool
6065 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6066 machine_mode mode, const_tree type,
6067 machine_mode *base_mode, int *count)
6068 {
6069 machine_mode new_mode = VOIDmode;
6070
6071 /* If we have the type information, prefer that to working things
6072 out from the mode. */
6073 if (type)
6074 {
6075 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6076
6077 if (ag_count > 0 && ag_count <= 4)
6078 *count = ag_count;
6079 else
6080 return false;
6081 }
6082 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6083 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6084 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6085 {
6086 *count = 1;
6087 new_mode = mode;
6088 }
6089 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6090 {
6091 *count = 2;
6092 new_mode = (mode == DCmode ? DFmode : SFmode);
6093 }
6094 else
6095 return false;
6096
6097
6098 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6099 return false;
6100
6101 *base_mode = new_mode;
6102 return true;
6103 }
6104
6105 static bool
6106 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6107 machine_mode mode, const_tree type)
6108 {
6109 int count ATTRIBUTE_UNUSED;
6110 machine_mode ag_mode ATTRIBUTE_UNUSED;
6111
6112 if (!use_vfp_abi (pcs_variant, false))
6113 return false;
6114 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6115 &ag_mode, &count);
6116 }
6117
6118 static bool
6119 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6120 const_tree type)
6121 {
6122 if (!use_vfp_abi (pcum->pcs_variant, false))
6123 return false;
6124
6125 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6126 &pcum->aapcs_vfp_rmode,
6127 &pcum->aapcs_vfp_rcount);
6128 }
6129
6130 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6131 for the behaviour of this function. */
6132
6133 static bool
6134 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6135 const_tree type ATTRIBUTE_UNUSED)
6136 {
6137 int rmode_size
6138 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6139 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6140 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6141 int regno;
6142
6143 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6144 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6145 {
6146 pcum->aapcs_vfp_reg_alloc = mask << regno;
6147 if (mode == BLKmode
6148 || (mode == TImode && ! TARGET_NEON)
6149 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6150 {
6151 int i;
6152 int rcount = pcum->aapcs_vfp_rcount;
6153 int rshift = shift;
6154 machine_mode rmode = pcum->aapcs_vfp_rmode;
6155 rtx par;
6156 if (!TARGET_NEON)
6157 {
6158 /* Avoid using unsupported vector modes. */
6159 if (rmode == V2SImode)
6160 rmode = DImode;
6161 else if (rmode == V4SImode)
6162 {
6163 rmode = DImode;
6164 rcount *= 2;
6165 rshift /= 2;
6166 }
6167 }
6168 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6169 for (i = 0; i < rcount; i++)
6170 {
6171 rtx tmp = gen_rtx_REG (rmode,
6172 FIRST_VFP_REGNUM + regno + i * rshift);
6173 tmp = gen_rtx_EXPR_LIST
6174 (VOIDmode, tmp,
6175 GEN_INT (i * GET_MODE_SIZE (rmode)));
6176 XVECEXP (par, 0, i) = tmp;
6177 }
6178
6179 pcum->aapcs_reg = par;
6180 }
6181 else
6182 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6183 return true;
6184 }
6185 return false;
6186 }
6187
6188 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6189 comment there for the behaviour of this function. */
6190
6191 static rtx
6192 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6193 machine_mode mode,
6194 const_tree type ATTRIBUTE_UNUSED)
6195 {
6196 if (!use_vfp_abi (pcs_variant, false))
6197 return NULL;
6198
6199 if (mode == BLKmode
6200 || (GET_MODE_CLASS (mode) == MODE_INT
6201 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6202 && !TARGET_NEON))
6203 {
6204 int count;
6205 machine_mode ag_mode;
6206 int i;
6207 rtx par;
6208 int shift;
6209
6210 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6211 &ag_mode, &count);
6212
6213 if (!TARGET_NEON)
6214 {
6215 if (ag_mode == V2SImode)
6216 ag_mode = DImode;
6217 else if (ag_mode == V4SImode)
6218 {
6219 ag_mode = DImode;
6220 count *= 2;
6221 }
6222 }
6223 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6224 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6225 for (i = 0; i < count; i++)
6226 {
6227 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6228 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6229 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6230 XVECEXP (par, 0, i) = tmp;
6231 }
6232
6233 return par;
6234 }
6235
6236 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6237 }
6238
6239 static void
6240 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6241 machine_mode mode ATTRIBUTE_UNUSED,
6242 const_tree type ATTRIBUTE_UNUSED)
6243 {
6244 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6245 pcum->aapcs_vfp_reg_alloc = 0;
6246 return;
6247 }
6248
6249 #define AAPCS_CP(X) \
6250 { \
6251 aapcs_ ## X ## _cum_init, \
6252 aapcs_ ## X ## _is_call_candidate, \
6253 aapcs_ ## X ## _allocate, \
6254 aapcs_ ## X ## _is_return_candidate, \
6255 aapcs_ ## X ## _allocate_return_reg, \
6256 aapcs_ ## X ## _advance \
6257 }
6258
6259 /* Table of co-processors that can be used to pass arguments in
6260 registers. Idealy no arugment should be a candidate for more than
6261 one co-processor table entry, but the table is processed in order
6262 and stops after the first match. If that entry then fails to put
6263 the argument into a co-processor register, the argument will go on
6264 the stack. */
6265 static struct
6266 {
6267 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6268 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6269
6270 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6271 BLKmode) is a candidate for this co-processor's registers; this
6272 function should ignore any position-dependent state in
6273 CUMULATIVE_ARGS and only use call-type dependent information. */
6274 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6275
6276 /* Return true if the argument does get a co-processor register; it
6277 should set aapcs_reg to an RTX of the register allocated as is
6278 required for a return from FUNCTION_ARG. */
6279 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6280
6281 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6282 be returned in this co-processor's registers. */
6283 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6284
6285 /* Allocate and return an RTX element to hold the return type of a call. This
6286 routine must not fail and will only be called if is_return_candidate
6287 returned true with the same parameters. */
6288 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6289
6290 /* Finish processing this argument and prepare to start processing
6291 the next one. */
6292 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6293 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6294 {
6295 AAPCS_CP(vfp)
6296 };
6297
6298 #undef AAPCS_CP
6299
6300 static int
6301 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6302 const_tree type)
6303 {
6304 int i;
6305
6306 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6307 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6308 return i;
6309
6310 return -1;
6311 }
6312
6313 static int
6314 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6315 {
6316 /* We aren't passed a decl, so we can't check that a call is local.
6317 However, it isn't clear that that would be a win anyway, since it
6318 might limit some tail-calling opportunities. */
6319 enum arm_pcs pcs_variant;
6320
6321 if (fntype)
6322 {
6323 const_tree fndecl = NULL_TREE;
6324
6325 if (TREE_CODE (fntype) == FUNCTION_DECL)
6326 {
6327 fndecl = fntype;
6328 fntype = TREE_TYPE (fntype);
6329 }
6330
6331 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6332 }
6333 else
6334 pcs_variant = arm_pcs_default;
6335
6336 if (pcs_variant != ARM_PCS_AAPCS)
6337 {
6338 int i;
6339
6340 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6341 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6342 TYPE_MODE (type),
6343 type))
6344 return i;
6345 }
6346 return -1;
6347 }
6348
6349 static rtx
6350 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6351 const_tree fntype)
6352 {
6353 /* We aren't passed a decl, so we can't check that a call is local.
6354 However, it isn't clear that that would be a win anyway, since it
6355 might limit some tail-calling opportunities. */
6356 enum arm_pcs pcs_variant;
6357 int unsignedp ATTRIBUTE_UNUSED;
6358
6359 if (fntype)
6360 {
6361 const_tree fndecl = NULL_TREE;
6362
6363 if (TREE_CODE (fntype) == FUNCTION_DECL)
6364 {
6365 fndecl = fntype;
6366 fntype = TREE_TYPE (fntype);
6367 }
6368
6369 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6370 }
6371 else
6372 pcs_variant = arm_pcs_default;
6373
6374 /* Promote integer types. */
6375 if (type && INTEGRAL_TYPE_P (type))
6376 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6377
6378 if (pcs_variant != ARM_PCS_AAPCS)
6379 {
6380 int i;
6381
6382 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6383 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6384 type))
6385 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6386 mode, type);
6387 }
6388
6389 /* Promotes small structs returned in a register to full-word size
6390 for big-endian AAPCS. */
6391 if (type && arm_return_in_msb (type))
6392 {
6393 HOST_WIDE_INT size = int_size_in_bytes (type);
6394 if (size % UNITS_PER_WORD != 0)
6395 {
6396 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6397 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6398 }
6399 }
6400
6401 return gen_rtx_REG (mode, R0_REGNUM);
6402 }
6403
6404 static rtx
6405 aapcs_libcall_value (machine_mode mode)
6406 {
6407 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6408 && GET_MODE_SIZE (mode) <= 4)
6409 mode = SImode;
6410
6411 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6412 }
6413
6414 /* Lay out a function argument using the AAPCS rules. The rule
6415 numbers referred to here are those in the AAPCS. */
6416 static void
6417 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6418 const_tree type, bool named)
6419 {
6420 int nregs, nregs2;
6421 int ncrn;
6422
6423 /* We only need to do this once per argument. */
6424 if (pcum->aapcs_arg_processed)
6425 return;
6426
6427 pcum->aapcs_arg_processed = true;
6428
6429 /* Special case: if named is false then we are handling an incoming
6430 anonymous argument which is on the stack. */
6431 if (!named)
6432 return;
6433
6434 /* Is this a potential co-processor register candidate? */
6435 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6436 {
6437 int slot = aapcs_select_call_coproc (pcum, mode, type);
6438 pcum->aapcs_cprc_slot = slot;
6439
6440 /* We don't have to apply any of the rules from part B of the
6441 preparation phase, these are handled elsewhere in the
6442 compiler. */
6443
6444 if (slot >= 0)
6445 {
6446 /* A Co-processor register candidate goes either in its own
6447 class of registers or on the stack. */
6448 if (!pcum->aapcs_cprc_failed[slot])
6449 {
6450 /* C1.cp - Try to allocate the argument to co-processor
6451 registers. */
6452 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6453 return;
6454
6455 /* C2.cp - Put the argument on the stack and note that we
6456 can't assign any more candidates in this slot. We also
6457 need to note that we have allocated stack space, so that
6458 we won't later try to split a non-cprc candidate between
6459 core registers and the stack. */
6460 pcum->aapcs_cprc_failed[slot] = true;
6461 pcum->can_split = false;
6462 }
6463
6464 /* We didn't get a register, so this argument goes on the
6465 stack. */
6466 gcc_assert (pcum->can_split == false);
6467 return;
6468 }
6469 }
6470
6471 /* C3 - For double-word aligned arguments, round the NCRN up to the
6472 next even number. */
6473 ncrn = pcum->aapcs_ncrn;
6474 if (ncrn & 1)
6475 {
6476 int res = arm_needs_doubleword_align (mode, type);
6477 /* Only warn during RTL expansion of call stmts, otherwise we would
6478 warn e.g. during gimplification even on functions that will be
6479 always inlined, and we'd warn multiple times. Don't warn when
6480 called in expand_function_start either, as we warn instead in
6481 arm_function_arg_boundary in that case. */
6482 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6483 inform (input_location, "parameter passing for argument of type "
6484 "%qT changed in GCC 7.1", type);
6485 else if (res > 0)
6486 ncrn++;
6487 }
6488
6489 nregs = ARM_NUM_REGS2(mode, type);
6490
6491 /* Sigh, this test should really assert that nregs > 0, but a GCC
6492 extension allows empty structs and then gives them empty size; it
6493 then allows such a structure to be passed by value. For some of
6494 the code below we have to pretend that such an argument has
6495 non-zero size so that we 'locate' it correctly either in
6496 registers or on the stack. */
6497 gcc_assert (nregs >= 0);
6498
6499 nregs2 = nregs ? nregs : 1;
6500
6501 /* C4 - Argument fits entirely in core registers. */
6502 if (ncrn + nregs2 <= NUM_ARG_REGS)
6503 {
6504 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6505 pcum->aapcs_next_ncrn = ncrn + nregs;
6506 return;
6507 }
6508
6509 /* C5 - Some core registers left and there are no arguments already
6510 on the stack: split this argument between the remaining core
6511 registers and the stack. */
6512 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6513 {
6514 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6515 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6516 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6517 return;
6518 }
6519
6520 /* C6 - NCRN is set to 4. */
6521 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6522
6523 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6524 return;
6525 }
6526
6527 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6528 for a call to a function whose data type is FNTYPE.
6529 For a library call, FNTYPE is NULL. */
6530 void
6531 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6532 rtx libname,
6533 tree fndecl ATTRIBUTE_UNUSED)
6534 {
6535 /* Long call handling. */
6536 if (fntype)
6537 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6538 else
6539 pcum->pcs_variant = arm_pcs_default;
6540
6541 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6542 {
6543 if (arm_libcall_uses_aapcs_base (libname))
6544 pcum->pcs_variant = ARM_PCS_AAPCS;
6545
6546 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6547 pcum->aapcs_reg = NULL_RTX;
6548 pcum->aapcs_partial = 0;
6549 pcum->aapcs_arg_processed = false;
6550 pcum->aapcs_cprc_slot = -1;
6551 pcum->can_split = true;
6552
6553 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6554 {
6555 int i;
6556
6557 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6558 {
6559 pcum->aapcs_cprc_failed[i] = false;
6560 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6561 }
6562 }
6563 return;
6564 }
6565
6566 /* Legacy ABIs */
6567
6568 /* On the ARM, the offset starts at 0. */
6569 pcum->nregs = 0;
6570 pcum->iwmmxt_nregs = 0;
6571 pcum->can_split = true;
6572
6573 /* Varargs vectors are treated the same as long long.
6574 named_count avoids having to change the way arm handles 'named' */
6575 pcum->named_count = 0;
6576 pcum->nargs = 0;
6577
6578 if (TARGET_REALLY_IWMMXT && fntype)
6579 {
6580 tree fn_arg;
6581
6582 for (fn_arg = TYPE_ARG_TYPES (fntype);
6583 fn_arg;
6584 fn_arg = TREE_CHAIN (fn_arg))
6585 pcum->named_count += 1;
6586
6587 if (! pcum->named_count)
6588 pcum->named_count = INT_MAX;
6589 }
6590 }
6591
6592 /* Return 1 if double word alignment is required for argument passing.
6593 Return -1 if double word alignment used to be required for argument
6594 passing before PR77728 ABI fix, but is not required anymore.
6595 Return 0 if double word alignment is not required and wasn't requried
6596 before either. */
6597 static int
6598 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6599 {
6600 if (!type)
6601 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6602
6603 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6604 if (!AGGREGATE_TYPE_P (type))
6605 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6606
6607 /* Array types: Use member alignment of element type. */
6608 if (TREE_CODE (type) == ARRAY_TYPE)
6609 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6610
6611 int ret = 0;
6612 /* Record/aggregate types: Use greatest member alignment of any member. */
6613 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6614 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6615 {
6616 if (TREE_CODE (field) == FIELD_DECL)
6617 return 1;
6618 else
6619 /* Before PR77728 fix, we were incorrectly considering also
6620 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6621 Make sure we can warn about that with -Wpsabi. */
6622 ret = -1;
6623 }
6624
6625 return ret;
6626 }
6627
6628
6629 /* Determine where to put an argument to a function.
6630 Value is zero to push the argument on the stack,
6631 or a hard register in which to store the argument.
6632
6633 MODE is the argument's machine mode.
6634 TYPE is the data type of the argument (as a tree).
6635 This is null for libcalls where that information may
6636 not be available.
6637 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6638 the preceding args and about the function being called.
6639 NAMED is nonzero if this argument is a named parameter
6640 (otherwise it is an extra parameter matching an ellipsis).
6641
6642 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6643 other arguments are passed on the stack. If (NAMED == 0) (which happens
6644 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6645 defined), say it is passed in the stack (function_prologue will
6646 indeed make it pass in the stack if necessary). */
6647
6648 static rtx
6649 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6650 const_tree type, bool named)
6651 {
6652 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6653 int nregs;
6654
6655 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6656 a call insn (op3 of a call_value insn). */
6657 if (mode == VOIDmode)
6658 return const0_rtx;
6659
6660 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6661 {
6662 aapcs_layout_arg (pcum, mode, type, named);
6663 return pcum->aapcs_reg;
6664 }
6665
6666 /* Varargs vectors are treated the same as long long.
6667 named_count avoids having to change the way arm handles 'named' */
6668 if (TARGET_IWMMXT_ABI
6669 && arm_vector_mode_supported_p (mode)
6670 && pcum->named_count > pcum->nargs + 1)
6671 {
6672 if (pcum->iwmmxt_nregs <= 9)
6673 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6674 else
6675 {
6676 pcum->can_split = false;
6677 return NULL_RTX;
6678 }
6679 }
6680
6681 /* Put doubleword aligned quantities in even register pairs. */
6682 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6683 {
6684 int res = arm_needs_doubleword_align (mode, type);
6685 if (res < 0 && warn_psabi)
6686 inform (input_location, "parameter passing for argument of type "
6687 "%qT changed in GCC 7.1", type);
6688 else if (res > 0)
6689 pcum->nregs++;
6690 }
6691
6692 /* Only allow splitting an arg between regs and memory if all preceding
6693 args were allocated to regs. For args passed by reference we only count
6694 the reference pointer. */
6695 if (pcum->can_split)
6696 nregs = 1;
6697 else
6698 nregs = ARM_NUM_REGS2 (mode, type);
6699
6700 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6701 return NULL_RTX;
6702
6703 return gen_rtx_REG (mode, pcum->nregs);
6704 }
6705
6706 static unsigned int
6707 arm_function_arg_boundary (machine_mode mode, const_tree type)
6708 {
6709 if (!ARM_DOUBLEWORD_ALIGN)
6710 return PARM_BOUNDARY;
6711
6712 int res = arm_needs_doubleword_align (mode, type);
6713 if (res < 0 && warn_psabi)
6714 inform (input_location, "parameter passing for argument of type %qT "
6715 "changed in GCC 7.1", type);
6716
6717 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6718 }
6719
6720 static int
6721 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6722 tree type, bool named)
6723 {
6724 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6725 int nregs = pcum->nregs;
6726
6727 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6728 {
6729 aapcs_layout_arg (pcum, mode, type, named);
6730 return pcum->aapcs_partial;
6731 }
6732
6733 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6734 return 0;
6735
6736 if (NUM_ARG_REGS > nregs
6737 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6738 && pcum->can_split)
6739 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6740
6741 return 0;
6742 }
6743
6744 /* Update the data in PCUM to advance over an argument
6745 of mode MODE and data type TYPE.
6746 (TYPE is null for libcalls where that information may not be available.) */
6747
6748 static void
6749 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6750 const_tree type, bool named)
6751 {
6752 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6753
6754 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6755 {
6756 aapcs_layout_arg (pcum, mode, type, named);
6757
6758 if (pcum->aapcs_cprc_slot >= 0)
6759 {
6760 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6761 type);
6762 pcum->aapcs_cprc_slot = -1;
6763 }
6764
6765 /* Generic stuff. */
6766 pcum->aapcs_arg_processed = false;
6767 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6768 pcum->aapcs_reg = NULL_RTX;
6769 pcum->aapcs_partial = 0;
6770 }
6771 else
6772 {
6773 pcum->nargs += 1;
6774 if (arm_vector_mode_supported_p (mode)
6775 && pcum->named_count > pcum->nargs
6776 && TARGET_IWMMXT_ABI)
6777 pcum->iwmmxt_nregs += 1;
6778 else
6779 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6780 }
6781 }
6782
6783 /* Variable sized types are passed by reference. This is a GCC
6784 extension to the ARM ABI. */
6785
6786 static bool
6787 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6788 machine_mode mode ATTRIBUTE_UNUSED,
6789 const_tree type, bool named ATTRIBUTE_UNUSED)
6790 {
6791 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6792 }
6793 \f
6794 /* Encode the current state of the #pragma [no_]long_calls. */
6795 typedef enum
6796 {
6797 OFF, /* No #pragma [no_]long_calls is in effect. */
6798 LONG, /* #pragma long_calls is in effect. */
6799 SHORT /* #pragma no_long_calls is in effect. */
6800 } arm_pragma_enum;
6801
6802 static arm_pragma_enum arm_pragma_long_calls = OFF;
6803
6804 void
6805 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6806 {
6807 arm_pragma_long_calls = LONG;
6808 }
6809
6810 void
6811 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6812 {
6813 arm_pragma_long_calls = SHORT;
6814 }
6815
6816 void
6817 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6818 {
6819 arm_pragma_long_calls = OFF;
6820 }
6821 \f
6822 /* Handle an attribute requiring a FUNCTION_DECL;
6823 arguments as in struct attribute_spec.handler. */
6824 static tree
6825 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6826 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6827 {
6828 if (TREE_CODE (*node) != FUNCTION_DECL)
6829 {
6830 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6831 name);
6832 *no_add_attrs = true;
6833 }
6834
6835 return NULL_TREE;
6836 }
6837
6838 /* Handle an "interrupt" or "isr" attribute;
6839 arguments as in struct attribute_spec.handler. */
6840 static tree
6841 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6842 bool *no_add_attrs)
6843 {
6844 if (DECL_P (*node))
6845 {
6846 if (TREE_CODE (*node) != FUNCTION_DECL)
6847 {
6848 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6849 name);
6850 *no_add_attrs = true;
6851 }
6852 /* FIXME: the argument if any is checked for type attributes;
6853 should it be checked for decl ones? */
6854 }
6855 else
6856 {
6857 if (TREE_CODE (*node) == FUNCTION_TYPE
6858 || TREE_CODE (*node) == METHOD_TYPE)
6859 {
6860 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6861 {
6862 warning (OPT_Wattributes, "%qE attribute ignored",
6863 name);
6864 *no_add_attrs = true;
6865 }
6866 }
6867 else if (TREE_CODE (*node) == POINTER_TYPE
6868 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6869 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6870 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6871 {
6872 *node = build_variant_type_copy (*node);
6873 TREE_TYPE (*node) = build_type_attribute_variant
6874 (TREE_TYPE (*node),
6875 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6876 *no_add_attrs = true;
6877 }
6878 else
6879 {
6880 /* Possibly pass this attribute on from the type to a decl. */
6881 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6882 | (int) ATTR_FLAG_FUNCTION_NEXT
6883 | (int) ATTR_FLAG_ARRAY_NEXT))
6884 {
6885 *no_add_attrs = true;
6886 return tree_cons (name, args, NULL_TREE);
6887 }
6888 else
6889 {
6890 warning (OPT_Wattributes, "%qE attribute ignored",
6891 name);
6892 }
6893 }
6894 }
6895
6896 return NULL_TREE;
6897 }
6898
6899 /* Handle a "pcs" attribute; arguments as in struct
6900 attribute_spec.handler. */
6901 static tree
6902 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6903 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6904 {
6905 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6906 {
6907 warning (OPT_Wattributes, "%qE attribute ignored", name);
6908 *no_add_attrs = true;
6909 }
6910 return NULL_TREE;
6911 }
6912
6913 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6914 /* Handle the "notshared" attribute. This attribute is another way of
6915 requesting hidden visibility. ARM's compiler supports
6916 "__declspec(notshared)"; we support the same thing via an
6917 attribute. */
6918
6919 static tree
6920 arm_handle_notshared_attribute (tree *node,
6921 tree name ATTRIBUTE_UNUSED,
6922 tree args ATTRIBUTE_UNUSED,
6923 int flags ATTRIBUTE_UNUSED,
6924 bool *no_add_attrs)
6925 {
6926 tree decl = TYPE_NAME (*node);
6927
6928 if (decl)
6929 {
6930 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6931 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6932 *no_add_attrs = false;
6933 }
6934 return NULL_TREE;
6935 }
6936 #endif
6937
6938 /* This function returns true if a function with declaration FNDECL and type
6939 FNTYPE uses the stack to pass arguments or return variables and false
6940 otherwise. This is used for functions with the attributes
6941 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6942 diagnostic messages if the stack is used. NAME is the name of the attribute
6943 used. */
6944
6945 static bool
6946 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6947 {
6948 function_args_iterator args_iter;
6949 CUMULATIVE_ARGS args_so_far_v;
6950 cumulative_args_t args_so_far;
6951 bool first_param = true;
6952 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6953
6954 /* Error out if any argument is passed on the stack. */
6955 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6956 args_so_far = pack_cumulative_args (&args_so_far_v);
6957 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6958 {
6959 rtx arg_rtx;
6960 machine_mode arg_mode = TYPE_MODE (arg_type);
6961
6962 prev_arg_type = arg_type;
6963 if (VOID_TYPE_P (arg_type))
6964 continue;
6965
6966 if (!first_param)
6967 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6968 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6969 if (!arg_rtx
6970 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6971 {
6972 error ("%qE attribute not available to functions with arguments "
6973 "passed on the stack", name);
6974 return true;
6975 }
6976 first_param = false;
6977 }
6978
6979 /* Error out for variadic functions since we cannot control how many
6980 arguments will be passed and thus stack could be used. stdarg_p () is not
6981 used for the checking to avoid browsing arguments twice. */
6982 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6983 {
6984 error ("%qE attribute not available to functions with variable number "
6985 "of arguments", name);
6986 return true;
6987 }
6988
6989 /* Error out if return value is passed on the stack. */
6990 ret_type = TREE_TYPE (fntype);
6991 if (arm_return_in_memory (ret_type, fntype))
6992 {
6993 error ("%qE attribute not available to functions that return value on "
6994 "the stack", name);
6995 return true;
6996 }
6997 return false;
6998 }
6999
7000 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7001 function will check whether the attribute is allowed here and will add the
7002 attribute to the function declaration tree or otherwise issue a warning. */
7003
7004 static tree
7005 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7006 tree /* args */,
7007 int /* flags */,
7008 bool *no_add_attrs)
7009 {
7010 tree fndecl;
7011
7012 if (!use_cmse)
7013 {
7014 *no_add_attrs = true;
7015 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7016 name);
7017 return NULL_TREE;
7018 }
7019
7020 /* Ignore attribute for function types. */
7021 if (TREE_CODE (*node) != FUNCTION_DECL)
7022 {
7023 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7024 name);
7025 *no_add_attrs = true;
7026 return NULL_TREE;
7027 }
7028
7029 fndecl = *node;
7030
7031 /* Warn for static linkage functions. */
7032 if (!TREE_PUBLIC (fndecl))
7033 {
7034 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7035 "with static linkage", name);
7036 *no_add_attrs = true;
7037 return NULL_TREE;
7038 }
7039
7040 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7041 TREE_TYPE (fndecl));
7042 return NULL_TREE;
7043 }
7044
7045
7046 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7047 function will check whether the attribute is allowed here and will add the
7048 attribute to the function type tree or otherwise issue a diagnostic. The
7049 reason we check this at declaration time is to only allow the use of the
7050 attribute with declarations of function pointers and not function
7051 declarations. This function checks NODE is of the expected type and issues
7052 diagnostics otherwise using NAME. If it is not of the expected type
7053 *NO_ADD_ATTRS will be set to true. */
7054
7055 static tree
7056 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7057 tree /* args */,
7058 int /* flags */,
7059 bool *no_add_attrs)
7060 {
7061 tree decl = NULL_TREE, fntype = NULL_TREE;
7062 tree type;
7063
7064 if (!use_cmse)
7065 {
7066 *no_add_attrs = true;
7067 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7068 name);
7069 return NULL_TREE;
7070 }
7071
7072 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7073 {
7074 decl = *node;
7075 fntype = TREE_TYPE (decl);
7076 }
7077
7078 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7079 fntype = TREE_TYPE (fntype);
7080
7081 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7082 {
7083 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7084 "function pointer", name);
7085 *no_add_attrs = true;
7086 return NULL_TREE;
7087 }
7088
7089 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7090
7091 if (*no_add_attrs)
7092 return NULL_TREE;
7093
7094 /* Prevent trees being shared among function types with and without
7095 cmse_nonsecure_call attribute. */
7096 type = TREE_TYPE (decl);
7097
7098 type = build_distinct_type_copy (type);
7099 TREE_TYPE (decl) = type;
7100 fntype = type;
7101
7102 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7103 {
7104 type = fntype;
7105 fntype = TREE_TYPE (fntype);
7106 fntype = build_distinct_type_copy (fntype);
7107 TREE_TYPE (type) = fntype;
7108 }
7109
7110 /* Construct a type attribute and add it to the function type. */
7111 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7112 TYPE_ATTRIBUTES (fntype));
7113 TYPE_ATTRIBUTES (fntype) = attrs;
7114 return NULL_TREE;
7115 }
7116
7117 /* Return 0 if the attributes for two types are incompatible, 1 if they
7118 are compatible, and 2 if they are nearly compatible (which causes a
7119 warning to be generated). */
7120 static int
7121 arm_comp_type_attributes (const_tree type1, const_tree type2)
7122 {
7123 int l1, l2, s1, s2;
7124
7125 /* Check for mismatch of non-default calling convention. */
7126 if (TREE_CODE (type1) != FUNCTION_TYPE)
7127 return 1;
7128
7129 /* Check for mismatched call attributes. */
7130 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7131 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7132 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7133 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7134
7135 /* Only bother to check if an attribute is defined. */
7136 if (l1 | l2 | s1 | s2)
7137 {
7138 /* If one type has an attribute, the other must have the same attribute. */
7139 if ((l1 != l2) || (s1 != s2))
7140 return 0;
7141
7142 /* Disallow mixed attributes. */
7143 if ((l1 & s2) || (l2 & s1))
7144 return 0;
7145 }
7146
7147 /* Check for mismatched ISR attribute. */
7148 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7149 if (! l1)
7150 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7151 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7152 if (! l2)
7153 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7154 if (l1 != l2)
7155 return 0;
7156
7157 l1 = lookup_attribute ("cmse_nonsecure_call",
7158 TYPE_ATTRIBUTES (type1)) != NULL;
7159 l2 = lookup_attribute ("cmse_nonsecure_call",
7160 TYPE_ATTRIBUTES (type2)) != NULL;
7161
7162 if (l1 != l2)
7163 return 0;
7164
7165 return 1;
7166 }
7167
7168 /* Assigns default attributes to newly defined type. This is used to
7169 set short_call/long_call attributes for function types of
7170 functions defined inside corresponding #pragma scopes. */
7171 static void
7172 arm_set_default_type_attributes (tree type)
7173 {
7174 /* Add __attribute__ ((long_call)) to all functions, when
7175 inside #pragma long_calls or __attribute__ ((short_call)),
7176 when inside #pragma no_long_calls. */
7177 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7178 {
7179 tree type_attr_list, attr_name;
7180 type_attr_list = TYPE_ATTRIBUTES (type);
7181
7182 if (arm_pragma_long_calls == LONG)
7183 attr_name = get_identifier ("long_call");
7184 else if (arm_pragma_long_calls == SHORT)
7185 attr_name = get_identifier ("short_call");
7186 else
7187 return;
7188
7189 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7190 TYPE_ATTRIBUTES (type) = type_attr_list;
7191 }
7192 }
7193 \f
7194 /* Return true if DECL is known to be linked into section SECTION. */
7195
7196 static bool
7197 arm_function_in_section_p (tree decl, section *section)
7198 {
7199 /* We can only be certain about the prevailing symbol definition. */
7200 if (!decl_binds_to_current_def_p (decl))
7201 return false;
7202
7203 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7204 if (!DECL_SECTION_NAME (decl))
7205 {
7206 /* Make sure that we will not create a unique section for DECL. */
7207 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7208 return false;
7209 }
7210
7211 return function_section (decl) == section;
7212 }
7213
7214 /* Return nonzero if a 32-bit "long_call" should be generated for
7215 a call from the current function to DECL. We generate a long_call
7216 if the function:
7217
7218 a. has an __attribute__((long call))
7219 or b. is within the scope of a #pragma long_calls
7220 or c. the -mlong-calls command line switch has been specified
7221
7222 However we do not generate a long call if the function:
7223
7224 d. has an __attribute__ ((short_call))
7225 or e. is inside the scope of a #pragma no_long_calls
7226 or f. is defined in the same section as the current function. */
7227
7228 bool
7229 arm_is_long_call_p (tree decl)
7230 {
7231 tree attrs;
7232
7233 if (!decl)
7234 return TARGET_LONG_CALLS;
7235
7236 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7237 if (lookup_attribute ("short_call", attrs))
7238 return false;
7239
7240 /* For "f", be conservative, and only cater for cases in which the
7241 whole of the current function is placed in the same section. */
7242 if (!flag_reorder_blocks_and_partition
7243 && TREE_CODE (decl) == FUNCTION_DECL
7244 && arm_function_in_section_p (decl, current_function_section ()))
7245 return false;
7246
7247 if (lookup_attribute ("long_call", attrs))
7248 return true;
7249
7250 return TARGET_LONG_CALLS;
7251 }
7252
7253 /* Return nonzero if it is ok to make a tail-call to DECL. */
7254 static bool
7255 arm_function_ok_for_sibcall (tree decl, tree exp)
7256 {
7257 unsigned long func_type;
7258
7259 if (cfun->machine->sibcall_blocked)
7260 return false;
7261
7262 /* Never tailcall something if we are generating code for Thumb-1. */
7263 if (TARGET_THUMB1)
7264 return false;
7265
7266 /* The PIC register is live on entry to VxWorks PLT entries, so we
7267 must make the call before restoring the PIC register. */
7268 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7269 return false;
7270
7271 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7272 may be used both as target of the call and base register for restoring
7273 the VFP registers */
7274 if (TARGET_APCS_FRAME && TARGET_ARM
7275 && TARGET_HARD_FLOAT
7276 && decl && arm_is_long_call_p (decl))
7277 return false;
7278
7279 /* If we are interworking and the function is not declared static
7280 then we can't tail-call it unless we know that it exists in this
7281 compilation unit (since it might be a Thumb routine). */
7282 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7283 && !TREE_ASM_WRITTEN (decl))
7284 return false;
7285
7286 func_type = arm_current_func_type ();
7287 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7288 if (IS_INTERRUPT (func_type))
7289 return false;
7290
7291 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7292 generated for entry functions themselves. */
7293 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7294 return false;
7295
7296 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7297 this would complicate matters for later code generation. */
7298 if (TREE_CODE (exp) == CALL_EXPR)
7299 {
7300 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7301 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7302 return false;
7303 }
7304
7305 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7306 {
7307 /* Check that the return value locations are the same. For
7308 example that we aren't returning a value from the sibling in
7309 a VFP register but then need to transfer it to a core
7310 register. */
7311 rtx a, b;
7312 tree decl_or_type = decl;
7313
7314 /* If it is an indirect function pointer, get the function type. */
7315 if (!decl)
7316 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7317
7318 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7319 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7320 cfun->decl, false);
7321 if (!rtx_equal_p (a, b))
7322 return false;
7323 }
7324
7325 /* Never tailcall if function may be called with a misaligned SP. */
7326 if (IS_STACKALIGN (func_type))
7327 return false;
7328
7329 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7330 references should become a NOP. Don't convert such calls into
7331 sibling calls. */
7332 if (TARGET_AAPCS_BASED
7333 && arm_abi == ARM_ABI_AAPCS
7334 && decl
7335 && DECL_WEAK (decl))
7336 return false;
7337
7338 /* We cannot do a tailcall for an indirect call by descriptor if all the
7339 argument registers are used because the only register left to load the
7340 address is IP and it will already contain the static chain. */
7341 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7342 {
7343 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7344 CUMULATIVE_ARGS cum;
7345 cumulative_args_t cum_v;
7346
7347 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7348 cum_v = pack_cumulative_args (&cum);
7349
7350 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7351 {
7352 tree type = TREE_VALUE (t);
7353 if (!VOID_TYPE_P (type))
7354 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7355 }
7356
7357 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7358 return false;
7359 }
7360
7361 /* Everything else is ok. */
7362 return true;
7363 }
7364
7365 \f
7366 /* Addressing mode support functions. */
7367
7368 /* Return nonzero if X is a legitimate immediate operand when compiling
7369 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7370 int
7371 legitimate_pic_operand_p (rtx x)
7372 {
7373 if (GET_CODE (x) == SYMBOL_REF
7374 || (GET_CODE (x) == CONST
7375 && GET_CODE (XEXP (x, 0)) == PLUS
7376 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7377 return 0;
7378
7379 return 1;
7380 }
7381
7382 /* Record that the current function needs a PIC register. Initialize
7383 cfun->machine->pic_reg if we have not already done so. */
7384
7385 static void
7386 require_pic_register (void)
7387 {
7388 /* A lot of the logic here is made obscure by the fact that this
7389 routine gets called as part of the rtx cost estimation process.
7390 We don't want those calls to affect any assumptions about the real
7391 function; and further, we can't call entry_of_function() until we
7392 start the real expansion process. */
7393 if (!crtl->uses_pic_offset_table)
7394 {
7395 gcc_assert (can_create_pseudo_p ());
7396 if (arm_pic_register != INVALID_REGNUM
7397 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7398 {
7399 if (!cfun->machine->pic_reg)
7400 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7401
7402 /* Play games to avoid marking the function as needing pic
7403 if we are being called as part of the cost-estimation
7404 process. */
7405 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7406 crtl->uses_pic_offset_table = 1;
7407 }
7408 else
7409 {
7410 rtx_insn *seq, *insn;
7411
7412 if (!cfun->machine->pic_reg)
7413 cfun->machine->pic_reg = gen_reg_rtx (Pmode);
7414
7415 /* Play games to avoid marking the function as needing pic
7416 if we are being called as part of the cost-estimation
7417 process. */
7418 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7419 {
7420 crtl->uses_pic_offset_table = 1;
7421 start_sequence ();
7422
7423 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7424 && arm_pic_register > LAST_LO_REGNUM)
7425 emit_move_insn (cfun->machine->pic_reg,
7426 gen_rtx_REG (Pmode, arm_pic_register));
7427 else
7428 arm_load_pic_register (0UL);
7429
7430 seq = get_insns ();
7431 end_sequence ();
7432
7433 for (insn = seq; insn; insn = NEXT_INSN (insn))
7434 if (INSN_P (insn))
7435 INSN_LOCATION (insn) = prologue_location;
7436
7437 /* We can be called during expansion of PHI nodes, where
7438 we can't yet emit instructions directly in the final
7439 insn stream. Queue the insns on the entry edge, they will
7440 be committed after everything else is expanded. */
7441 insert_insn_on_edge (seq,
7442 single_succ_edge (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7443 }
7444 }
7445 }
7446 }
7447
7448 rtx
7449 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg)
7450 {
7451 if (GET_CODE (orig) == SYMBOL_REF
7452 || GET_CODE (orig) == LABEL_REF)
7453 {
7454 if (reg == 0)
7455 {
7456 gcc_assert (can_create_pseudo_p ());
7457 reg = gen_reg_rtx (Pmode);
7458 }
7459
7460 /* VxWorks does not impose a fixed gap between segments; the run-time
7461 gap can be different from the object-file gap. We therefore can't
7462 use GOTOFF unless we are absolutely sure that the symbol is in the
7463 same segment as the GOT. Unfortunately, the flexibility of linker
7464 scripts means that we can't be sure of that in general, so assume
7465 that GOTOFF is never valid on VxWorks. */
7466 /* References to weak symbols cannot be resolved locally: they
7467 may be overridden by a non-weak definition at link time. */
7468 rtx_insn *insn;
7469 if ((GET_CODE (orig) == LABEL_REF
7470 || (GET_CODE (orig) == SYMBOL_REF
7471 && SYMBOL_REF_LOCAL_P (orig)
7472 && (SYMBOL_REF_DECL (orig)
7473 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7474 && NEED_GOT_RELOC
7475 && arm_pic_data_is_text_relative)
7476 insn = arm_pic_static_addr (orig, reg);
7477 else
7478 {
7479 rtx pat;
7480 rtx mem;
7481
7482 /* If this function doesn't have a pic register, create one now. */
7483 require_pic_register ();
7484
7485 pat = gen_calculate_pic_address (reg, cfun->machine->pic_reg, orig);
7486
7487 /* Make the MEM as close to a constant as possible. */
7488 mem = SET_SRC (pat);
7489 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7490 MEM_READONLY_P (mem) = 1;
7491 MEM_NOTRAP_P (mem) = 1;
7492
7493 insn = emit_insn (pat);
7494 }
7495
7496 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7497 by loop. */
7498 set_unique_reg_note (insn, REG_EQUAL, orig);
7499
7500 return reg;
7501 }
7502 else if (GET_CODE (orig) == CONST)
7503 {
7504 rtx base, offset;
7505
7506 if (GET_CODE (XEXP (orig, 0)) == PLUS
7507 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7508 return orig;
7509
7510 /* Handle the case where we have: const (UNSPEC_TLS). */
7511 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7512 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7513 return orig;
7514
7515 /* Handle the case where we have:
7516 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7517 CONST_INT. */
7518 if (GET_CODE (XEXP (orig, 0)) == PLUS
7519 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7520 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7521 {
7522 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7523 return orig;
7524 }
7525
7526 if (reg == 0)
7527 {
7528 gcc_assert (can_create_pseudo_p ());
7529 reg = gen_reg_rtx (Pmode);
7530 }
7531
7532 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7533
7534 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg);
7535 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7536 base == reg ? 0 : reg);
7537
7538 if (CONST_INT_P (offset))
7539 {
7540 /* The base register doesn't really matter, we only want to
7541 test the index for the appropriate mode. */
7542 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7543 {
7544 gcc_assert (can_create_pseudo_p ());
7545 offset = force_reg (Pmode, offset);
7546 }
7547
7548 if (CONST_INT_P (offset))
7549 return plus_constant (Pmode, base, INTVAL (offset));
7550 }
7551
7552 if (GET_MODE_SIZE (mode) > 4
7553 && (GET_MODE_CLASS (mode) == MODE_INT
7554 || TARGET_SOFT_FLOAT))
7555 {
7556 emit_insn (gen_addsi3 (reg, base, offset));
7557 return reg;
7558 }
7559
7560 return gen_rtx_PLUS (Pmode, base, offset);
7561 }
7562
7563 return orig;
7564 }
7565
7566
7567 /* Find a spare register to use during the prolog of a function. */
7568
7569 static int
7570 thumb_find_work_register (unsigned long pushed_regs_mask)
7571 {
7572 int reg;
7573
7574 /* Check the argument registers first as these are call-used. The
7575 register allocation order means that sometimes r3 might be used
7576 but earlier argument registers might not, so check them all. */
7577 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7578 if (!df_regs_ever_live_p (reg))
7579 return reg;
7580
7581 /* Before going on to check the call-saved registers we can try a couple
7582 more ways of deducing that r3 is available. The first is when we are
7583 pushing anonymous arguments onto the stack and we have less than 4
7584 registers worth of fixed arguments(*). In this case r3 will be part of
7585 the variable argument list and so we can be sure that it will be
7586 pushed right at the start of the function. Hence it will be available
7587 for the rest of the prologue.
7588 (*): ie crtl->args.pretend_args_size is greater than 0. */
7589 if (cfun->machine->uses_anonymous_args
7590 && crtl->args.pretend_args_size > 0)
7591 return LAST_ARG_REGNUM;
7592
7593 /* The other case is when we have fixed arguments but less than 4 registers
7594 worth. In this case r3 might be used in the body of the function, but
7595 it is not being used to convey an argument into the function. In theory
7596 we could just check crtl->args.size to see how many bytes are
7597 being passed in argument registers, but it seems that it is unreliable.
7598 Sometimes it will have the value 0 when in fact arguments are being
7599 passed. (See testcase execute/20021111-1.c for an example). So we also
7600 check the args_info.nregs field as well. The problem with this field is
7601 that it makes no allowances for arguments that are passed to the
7602 function but which are not used. Hence we could miss an opportunity
7603 when a function has an unused argument in r3. But it is better to be
7604 safe than to be sorry. */
7605 if (! cfun->machine->uses_anonymous_args
7606 && crtl->args.size >= 0
7607 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7608 && (TARGET_AAPCS_BASED
7609 ? crtl->args.info.aapcs_ncrn < 4
7610 : crtl->args.info.nregs < 4))
7611 return LAST_ARG_REGNUM;
7612
7613 /* Otherwise look for a call-saved register that is going to be pushed. */
7614 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7615 if (pushed_regs_mask & (1 << reg))
7616 return reg;
7617
7618 if (TARGET_THUMB2)
7619 {
7620 /* Thumb-2 can use high regs. */
7621 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7622 if (pushed_regs_mask & (1 << reg))
7623 return reg;
7624 }
7625 /* Something went wrong - thumb_compute_save_reg_mask()
7626 should have arranged for a suitable register to be pushed. */
7627 gcc_unreachable ();
7628 }
7629
7630 static GTY(()) int pic_labelno;
7631
7632 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7633 low register. */
7634
7635 void
7636 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED)
7637 {
7638 rtx l1, labelno, pic_tmp, pic_rtx, pic_reg;
7639
7640 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7641 return;
7642
7643 gcc_assert (flag_pic);
7644
7645 pic_reg = cfun->machine->pic_reg;
7646 if (TARGET_VXWORKS_RTP)
7647 {
7648 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7649 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7650 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7651
7652 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7653
7654 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7655 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7656 }
7657 else
7658 {
7659 /* We use an UNSPEC rather than a LABEL_REF because this label
7660 never appears in the code stream. */
7661
7662 labelno = GEN_INT (pic_labelno++);
7663 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7664 l1 = gen_rtx_CONST (VOIDmode, l1);
7665
7666 /* On the ARM the PC register contains 'dot + 8' at the time of the
7667 addition, on the Thumb it is 'dot + 4'. */
7668 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7669 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7670 UNSPEC_GOTSYM_OFF);
7671 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7672
7673 if (TARGET_32BIT)
7674 {
7675 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7676 }
7677 else /* TARGET_THUMB1 */
7678 {
7679 if (arm_pic_register != INVALID_REGNUM
7680 && REGNO (pic_reg) > LAST_LO_REGNUM)
7681 {
7682 /* We will have pushed the pic register, so we should always be
7683 able to find a work register. */
7684 pic_tmp = gen_rtx_REG (SImode,
7685 thumb_find_work_register (saved_regs));
7686 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7687 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7688 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7689 }
7690 else if (arm_pic_register != INVALID_REGNUM
7691 && arm_pic_register > LAST_LO_REGNUM
7692 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7693 {
7694 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7695 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7696 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7697 }
7698 else
7699 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7700 }
7701 }
7702
7703 /* Need to emit this whether or not we obey regdecls,
7704 since setjmp/longjmp can cause life info to screw up. */
7705 emit_use (pic_reg);
7706 }
7707
7708 /* Generate code to load the address of a static var when flag_pic is set. */
7709 static rtx_insn *
7710 arm_pic_static_addr (rtx orig, rtx reg)
7711 {
7712 rtx l1, labelno, offset_rtx;
7713
7714 gcc_assert (flag_pic);
7715
7716 /* We use an UNSPEC rather than a LABEL_REF because this label
7717 never appears in the code stream. */
7718 labelno = GEN_INT (pic_labelno++);
7719 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7720 l1 = gen_rtx_CONST (VOIDmode, l1);
7721
7722 /* On the ARM the PC register contains 'dot + 8' at the time of the
7723 addition, on the Thumb it is 'dot + 4'. */
7724 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7725 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7726 UNSPEC_SYMBOL_OFFSET);
7727 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7728
7729 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7730 }
7731
7732 /* Return nonzero if X is valid as an ARM state addressing register. */
7733 static int
7734 arm_address_register_rtx_p (rtx x, int strict_p)
7735 {
7736 int regno;
7737
7738 if (!REG_P (x))
7739 return 0;
7740
7741 regno = REGNO (x);
7742
7743 if (strict_p)
7744 return ARM_REGNO_OK_FOR_BASE_P (regno);
7745
7746 return (regno <= LAST_ARM_REGNUM
7747 || regno >= FIRST_PSEUDO_REGISTER
7748 || regno == FRAME_POINTER_REGNUM
7749 || regno == ARG_POINTER_REGNUM);
7750 }
7751
7752 /* Return TRUE if this rtx is the difference of a symbol and a label,
7753 and will reduce to a PC-relative relocation in the object file.
7754 Expressions like this can be left alone when generating PIC, rather
7755 than forced through the GOT. */
7756 static int
7757 pcrel_constant_p (rtx x)
7758 {
7759 if (GET_CODE (x) == MINUS)
7760 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7761
7762 return FALSE;
7763 }
7764
7765 /* Return true if X will surely end up in an index register after next
7766 splitting pass. */
7767 static bool
7768 will_be_in_index_register (const_rtx x)
7769 {
7770 /* arm.md: calculate_pic_address will split this into a register. */
7771 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7772 }
7773
7774 /* Return nonzero if X is a valid ARM state address operand. */
7775 int
7776 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7777 int strict_p)
7778 {
7779 bool use_ldrd;
7780 enum rtx_code code = GET_CODE (x);
7781
7782 if (arm_address_register_rtx_p (x, strict_p))
7783 return 1;
7784
7785 use_ldrd = (TARGET_LDRD
7786 && (mode == DImode || mode == DFmode));
7787
7788 if (code == POST_INC || code == PRE_DEC
7789 || ((code == PRE_INC || code == POST_DEC)
7790 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7791 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7792
7793 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7794 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7795 && GET_CODE (XEXP (x, 1)) == PLUS
7796 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7797 {
7798 rtx addend = XEXP (XEXP (x, 1), 1);
7799
7800 /* Don't allow ldrd post increment by register because it's hard
7801 to fixup invalid register choices. */
7802 if (use_ldrd
7803 && GET_CODE (x) == POST_MODIFY
7804 && REG_P (addend))
7805 return 0;
7806
7807 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7808 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7809 }
7810
7811 /* After reload constants split into minipools will have addresses
7812 from a LABEL_REF. */
7813 else if (reload_completed
7814 && (code == LABEL_REF
7815 || (code == CONST
7816 && GET_CODE (XEXP (x, 0)) == PLUS
7817 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7818 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7819 return 1;
7820
7821 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7822 return 0;
7823
7824 else if (code == PLUS)
7825 {
7826 rtx xop0 = XEXP (x, 0);
7827 rtx xop1 = XEXP (x, 1);
7828
7829 return ((arm_address_register_rtx_p (xop0, strict_p)
7830 && ((CONST_INT_P (xop1)
7831 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7832 || (!strict_p && will_be_in_index_register (xop1))))
7833 || (arm_address_register_rtx_p (xop1, strict_p)
7834 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7835 }
7836
7837 #if 0
7838 /* Reload currently can't handle MINUS, so disable this for now */
7839 else if (GET_CODE (x) == MINUS)
7840 {
7841 rtx xop0 = XEXP (x, 0);
7842 rtx xop1 = XEXP (x, 1);
7843
7844 return (arm_address_register_rtx_p (xop0, strict_p)
7845 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7846 }
7847 #endif
7848
7849 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7850 && code == SYMBOL_REF
7851 && CONSTANT_POOL_ADDRESS_P (x)
7852 && ! (flag_pic
7853 && symbol_mentioned_p (get_pool_constant (x))
7854 && ! pcrel_constant_p (get_pool_constant (x))))
7855 return 1;
7856
7857 return 0;
7858 }
7859
7860 /* Return true if we can avoid creating a constant pool entry for x. */
7861 static bool
7862 can_avoid_literal_pool_for_label_p (rtx x)
7863 {
7864 /* Normally we can assign constant values to target registers without
7865 the help of constant pool. But there are cases we have to use constant
7866 pool like:
7867 1) assign a label to register.
7868 2) sign-extend a 8bit value to 32bit and then assign to register.
7869
7870 Constant pool access in format:
7871 (set (reg r0) (mem (symbol_ref (".LC0"))))
7872 will cause the use of literal pool (later in function arm_reorg).
7873 So here we mark such format as an invalid format, then the compiler
7874 will adjust it into:
7875 (set (reg r0) (symbol_ref (".LC0")))
7876 (set (reg r0) (mem (reg r0))).
7877 No extra register is required, and (mem (reg r0)) won't cause the use
7878 of literal pools. */
7879 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7880 && CONSTANT_POOL_ADDRESS_P (x))
7881 return 1;
7882 return 0;
7883 }
7884
7885
7886 /* Return nonzero if X is a valid Thumb-2 address operand. */
7887 static int
7888 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7889 {
7890 bool use_ldrd;
7891 enum rtx_code code = GET_CODE (x);
7892
7893 if (arm_address_register_rtx_p (x, strict_p))
7894 return 1;
7895
7896 use_ldrd = (TARGET_LDRD
7897 && (mode == DImode || mode == DFmode));
7898
7899 if (code == POST_INC || code == PRE_DEC
7900 || ((code == PRE_INC || code == POST_DEC)
7901 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7902 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7903
7904 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7905 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7906 && GET_CODE (XEXP (x, 1)) == PLUS
7907 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7908 {
7909 /* Thumb-2 only has autoincrement by constant. */
7910 rtx addend = XEXP (XEXP (x, 1), 1);
7911 HOST_WIDE_INT offset;
7912
7913 if (!CONST_INT_P (addend))
7914 return 0;
7915
7916 offset = INTVAL(addend);
7917 if (GET_MODE_SIZE (mode) <= 4)
7918 return (offset > -256 && offset < 256);
7919
7920 return (use_ldrd && offset > -1024 && offset < 1024
7921 && (offset & 3) == 0);
7922 }
7923
7924 /* After reload constants split into minipools will have addresses
7925 from a LABEL_REF. */
7926 else if (reload_completed
7927 && (code == LABEL_REF
7928 || (code == CONST
7929 && GET_CODE (XEXP (x, 0)) == PLUS
7930 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7931 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7932 return 1;
7933
7934 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7935 return 0;
7936
7937 else if (code == PLUS)
7938 {
7939 rtx xop0 = XEXP (x, 0);
7940 rtx xop1 = XEXP (x, 1);
7941
7942 return ((arm_address_register_rtx_p (xop0, strict_p)
7943 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7944 || (!strict_p && will_be_in_index_register (xop1))))
7945 || (arm_address_register_rtx_p (xop1, strict_p)
7946 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7947 }
7948
7949 else if (can_avoid_literal_pool_for_label_p (x))
7950 return 0;
7951
7952 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7953 && code == SYMBOL_REF
7954 && CONSTANT_POOL_ADDRESS_P (x)
7955 && ! (flag_pic
7956 && symbol_mentioned_p (get_pool_constant (x))
7957 && ! pcrel_constant_p (get_pool_constant (x))))
7958 return 1;
7959
7960 return 0;
7961 }
7962
7963 /* Return nonzero if INDEX is valid for an address index operand in
7964 ARM state. */
7965 static int
7966 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
7967 int strict_p)
7968 {
7969 HOST_WIDE_INT range;
7970 enum rtx_code code = GET_CODE (index);
7971
7972 /* Standard coprocessor addressing modes. */
7973 if (TARGET_HARD_FLOAT
7974 && (mode == SFmode || mode == DFmode))
7975 return (code == CONST_INT && INTVAL (index) < 1024
7976 && INTVAL (index) > -1024
7977 && (INTVAL (index) & 3) == 0);
7978
7979 /* For quad modes, we restrict the constant offset to be slightly less
7980 than what the instruction format permits. We do this because for
7981 quad mode moves, we will actually decompose them into two separate
7982 double-mode reads or writes. INDEX must therefore be a valid
7983 (double-mode) offset and so should INDEX+8. */
7984 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
7985 return (code == CONST_INT
7986 && INTVAL (index) < 1016
7987 && INTVAL (index) > -1024
7988 && (INTVAL (index) & 3) == 0);
7989
7990 /* We have no such constraint on double mode offsets, so we permit the
7991 full range of the instruction format. */
7992 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
7993 return (code == CONST_INT
7994 && INTVAL (index) < 1024
7995 && INTVAL (index) > -1024
7996 && (INTVAL (index) & 3) == 0);
7997
7998 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
7999 return (code == CONST_INT
8000 && INTVAL (index) < 1024
8001 && INTVAL (index) > -1024
8002 && (INTVAL (index) & 3) == 0);
8003
8004 if (arm_address_register_rtx_p (index, strict_p)
8005 && (GET_MODE_SIZE (mode) <= 4))
8006 return 1;
8007
8008 if (mode == DImode || mode == DFmode)
8009 {
8010 if (code == CONST_INT)
8011 {
8012 HOST_WIDE_INT val = INTVAL (index);
8013
8014 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8015 If vldr is selected it uses arm_coproc_mem_operand. */
8016 if (TARGET_LDRD)
8017 return val > -256 && val < 256;
8018 else
8019 return val > -4096 && val < 4092;
8020 }
8021
8022 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8023 }
8024
8025 if (GET_MODE_SIZE (mode) <= 4
8026 && ! (arm_arch4
8027 && (mode == HImode
8028 || mode == HFmode
8029 || (mode == QImode && outer == SIGN_EXTEND))))
8030 {
8031 if (code == MULT)
8032 {
8033 rtx xiop0 = XEXP (index, 0);
8034 rtx xiop1 = XEXP (index, 1);
8035
8036 return ((arm_address_register_rtx_p (xiop0, strict_p)
8037 && power_of_two_operand (xiop1, SImode))
8038 || (arm_address_register_rtx_p (xiop1, strict_p)
8039 && power_of_two_operand (xiop0, SImode)));
8040 }
8041 else if (code == LSHIFTRT || code == ASHIFTRT
8042 || code == ASHIFT || code == ROTATERT)
8043 {
8044 rtx op = XEXP (index, 1);
8045
8046 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8047 && CONST_INT_P (op)
8048 && INTVAL (op) > 0
8049 && INTVAL (op) <= 31);
8050 }
8051 }
8052
8053 /* For ARM v4 we may be doing a sign-extend operation during the
8054 load. */
8055 if (arm_arch4)
8056 {
8057 if (mode == HImode
8058 || mode == HFmode
8059 || (outer == SIGN_EXTEND && mode == QImode))
8060 range = 256;
8061 else
8062 range = 4096;
8063 }
8064 else
8065 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8066
8067 return (code == CONST_INT
8068 && INTVAL (index) < range
8069 && INTVAL (index) > -range);
8070 }
8071
8072 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8073 index operand. i.e. 1, 2, 4 or 8. */
8074 static bool
8075 thumb2_index_mul_operand (rtx op)
8076 {
8077 HOST_WIDE_INT val;
8078
8079 if (!CONST_INT_P (op))
8080 return false;
8081
8082 val = INTVAL(op);
8083 return (val == 1 || val == 2 || val == 4 || val == 8);
8084 }
8085
8086 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8087 static int
8088 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8089 {
8090 enum rtx_code code = GET_CODE (index);
8091
8092 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8093 /* Standard coprocessor addressing modes. */
8094 if (TARGET_HARD_FLOAT
8095 && (mode == SFmode || mode == DFmode))
8096 return (code == CONST_INT && INTVAL (index) < 1024
8097 /* Thumb-2 allows only > -256 index range for it's core register
8098 load/stores. Since we allow SF/DF in core registers, we have
8099 to use the intersection between -256~4096 (core) and -1024~1024
8100 (coprocessor). */
8101 && INTVAL (index) > -256
8102 && (INTVAL (index) & 3) == 0);
8103
8104 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8105 {
8106 /* For DImode assume values will usually live in core regs
8107 and only allow LDRD addressing modes. */
8108 if (!TARGET_LDRD || mode != DImode)
8109 return (code == CONST_INT
8110 && INTVAL (index) < 1024
8111 && INTVAL (index) > -1024
8112 && (INTVAL (index) & 3) == 0);
8113 }
8114
8115 /* For quad modes, we restrict the constant offset to be slightly less
8116 than what the instruction format permits. We do this because for
8117 quad mode moves, we will actually decompose them into two separate
8118 double-mode reads or writes. INDEX must therefore be a valid
8119 (double-mode) offset and so should INDEX+8. */
8120 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8121 return (code == CONST_INT
8122 && INTVAL (index) < 1016
8123 && INTVAL (index) > -1024
8124 && (INTVAL (index) & 3) == 0);
8125
8126 /* We have no such constraint on double mode offsets, so we permit the
8127 full range of the instruction format. */
8128 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8129 return (code == CONST_INT
8130 && INTVAL (index) < 1024
8131 && INTVAL (index) > -1024
8132 && (INTVAL (index) & 3) == 0);
8133
8134 if (arm_address_register_rtx_p (index, strict_p)
8135 && (GET_MODE_SIZE (mode) <= 4))
8136 return 1;
8137
8138 if (mode == DImode || mode == DFmode)
8139 {
8140 if (code == CONST_INT)
8141 {
8142 HOST_WIDE_INT val = INTVAL (index);
8143 /* Thumb-2 ldrd only has reg+const addressing modes.
8144 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8145 If vldr is selected it uses arm_coproc_mem_operand. */
8146 if (TARGET_LDRD)
8147 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8148 else
8149 return IN_RANGE (val, -255, 4095 - 4);
8150 }
8151 else
8152 return 0;
8153 }
8154
8155 if (code == MULT)
8156 {
8157 rtx xiop0 = XEXP (index, 0);
8158 rtx xiop1 = XEXP (index, 1);
8159
8160 return ((arm_address_register_rtx_p (xiop0, strict_p)
8161 && thumb2_index_mul_operand (xiop1))
8162 || (arm_address_register_rtx_p (xiop1, strict_p)
8163 && thumb2_index_mul_operand (xiop0)));
8164 }
8165 else if (code == ASHIFT)
8166 {
8167 rtx op = XEXP (index, 1);
8168
8169 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8170 && CONST_INT_P (op)
8171 && INTVAL (op) > 0
8172 && INTVAL (op) <= 3);
8173 }
8174
8175 return (code == CONST_INT
8176 && INTVAL (index) < 4096
8177 && INTVAL (index) > -256);
8178 }
8179
8180 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8181 static int
8182 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8183 {
8184 int regno;
8185
8186 if (!REG_P (x))
8187 return 0;
8188
8189 regno = REGNO (x);
8190
8191 if (strict_p)
8192 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8193
8194 return (regno <= LAST_LO_REGNUM
8195 || regno > LAST_VIRTUAL_REGISTER
8196 || regno == FRAME_POINTER_REGNUM
8197 || (GET_MODE_SIZE (mode) >= 4
8198 && (regno == STACK_POINTER_REGNUM
8199 || regno >= FIRST_PSEUDO_REGISTER
8200 || x == hard_frame_pointer_rtx
8201 || x == arg_pointer_rtx)));
8202 }
8203
8204 /* Return nonzero if x is a legitimate index register. This is the case
8205 for any base register that can access a QImode object. */
8206 inline static int
8207 thumb1_index_register_rtx_p (rtx x, int strict_p)
8208 {
8209 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8210 }
8211
8212 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8213
8214 The AP may be eliminated to either the SP or the FP, so we use the
8215 least common denominator, e.g. SImode, and offsets from 0 to 64.
8216
8217 ??? Verify whether the above is the right approach.
8218
8219 ??? Also, the FP may be eliminated to the SP, so perhaps that
8220 needs special handling also.
8221
8222 ??? Look at how the mips16 port solves this problem. It probably uses
8223 better ways to solve some of these problems.
8224
8225 Although it is not incorrect, we don't accept QImode and HImode
8226 addresses based on the frame pointer or arg pointer until the
8227 reload pass starts. This is so that eliminating such addresses
8228 into stack based ones won't produce impossible code. */
8229 int
8230 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8231 {
8232 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8233 return 0;
8234
8235 /* ??? Not clear if this is right. Experiment. */
8236 if (GET_MODE_SIZE (mode) < 4
8237 && !(reload_in_progress || reload_completed)
8238 && (reg_mentioned_p (frame_pointer_rtx, x)
8239 || reg_mentioned_p (arg_pointer_rtx, x)
8240 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8241 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8242 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8243 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8244 return 0;
8245
8246 /* Accept any base register. SP only in SImode or larger. */
8247 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8248 return 1;
8249
8250 /* This is PC relative data before arm_reorg runs. */
8251 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8252 && GET_CODE (x) == SYMBOL_REF
8253 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8254 return 1;
8255
8256 /* This is PC relative data after arm_reorg runs. */
8257 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8258 && reload_completed
8259 && (GET_CODE (x) == LABEL_REF
8260 || (GET_CODE (x) == CONST
8261 && GET_CODE (XEXP (x, 0)) == PLUS
8262 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8263 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8264 return 1;
8265
8266 /* Post-inc indexing only supported for SImode and larger. */
8267 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8268 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8269 return 1;
8270
8271 else if (GET_CODE (x) == PLUS)
8272 {
8273 /* REG+REG address can be any two index registers. */
8274 /* We disallow FRAME+REG addressing since we know that FRAME
8275 will be replaced with STACK, and SP relative addressing only
8276 permits SP+OFFSET. */
8277 if (GET_MODE_SIZE (mode) <= 4
8278 && XEXP (x, 0) != frame_pointer_rtx
8279 && XEXP (x, 1) != frame_pointer_rtx
8280 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8281 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8282 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8283 return 1;
8284
8285 /* REG+const has 5-7 bit offset for non-SP registers. */
8286 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8287 || XEXP (x, 0) == arg_pointer_rtx)
8288 && CONST_INT_P (XEXP (x, 1))
8289 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8290 return 1;
8291
8292 /* REG+const has 10-bit offset for SP, but only SImode and
8293 larger is supported. */
8294 /* ??? Should probably check for DI/DFmode overflow here
8295 just like GO_IF_LEGITIMATE_OFFSET does. */
8296 else if (REG_P (XEXP (x, 0))
8297 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8298 && GET_MODE_SIZE (mode) >= 4
8299 && CONST_INT_P (XEXP (x, 1))
8300 && INTVAL (XEXP (x, 1)) >= 0
8301 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8302 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8303 return 1;
8304
8305 else if (REG_P (XEXP (x, 0))
8306 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8307 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8308 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8309 && REGNO (XEXP (x, 0))
8310 <= LAST_VIRTUAL_POINTER_REGISTER))
8311 && GET_MODE_SIZE (mode) >= 4
8312 && CONST_INT_P (XEXP (x, 1))
8313 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8314 return 1;
8315 }
8316
8317 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8318 && GET_MODE_SIZE (mode) == 4
8319 && GET_CODE (x) == SYMBOL_REF
8320 && CONSTANT_POOL_ADDRESS_P (x)
8321 && ! (flag_pic
8322 && symbol_mentioned_p (get_pool_constant (x))
8323 && ! pcrel_constant_p (get_pool_constant (x))))
8324 return 1;
8325
8326 return 0;
8327 }
8328
8329 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8330 instruction of mode MODE. */
8331 int
8332 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8333 {
8334 switch (GET_MODE_SIZE (mode))
8335 {
8336 case 1:
8337 return val >= 0 && val < 32;
8338
8339 case 2:
8340 return val >= 0 && val < 64 && (val & 1) == 0;
8341
8342 default:
8343 return (val >= 0
8344 && (val + GET_MODE_SIZE (mode)) <= 128
8345 && (val & 3) == 0);
8346 }
8347 }
8348
8349 bool
8350 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8351 {
8352 if (TARGET_ARM)
8353 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8354 else if (TARGET_THUMB2)
8355 return thumb2_legitimate_address_p (mode, x, strict_p);
8356 else /* if (TARGET_THUMB1) */
8357 return thumb1_legitimate_address_p (mode, x, strict_p);
8358 }
8359
8360 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8361
8362 Given an rtx X being reloaded into a reg required to be
8363 in class CLASS, return the class of reg to actually use.
8364 In general this is just CLASS, but for the Thumb core registers and
8365 immediate constants we prefer a LO_REGS class or a subset. */
8366
8367 static reg_class_t
8368 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8369 {
8370 if (TARGET_32BIT)
8371 return rclass;
8372 else
8373 {
8374 if (rclass == GENERAL_REGS)
8375 return LO_REGS;
8376 else
8377 return rclass;
8378 }
8379 }
8380
8381 /* Build the SYMBOL_REF for __tls_get_addr. */
8382
8383 static GTY(()) rtx tls_get_addr_libfunc;
8384
8385 static rtx
8386 get_tls_get_addr (void)
8387 {
8388 if (!tls_get_addr_libfunc)
8389 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8390 return tls_get_addr_libfunc;
8391 }
8392
8393 rtx
8394 arm_load_tp (rtx target)
8395 {
8396 if (!target)
8397 target = gen_reg_rtx (SImode);
8398
8399 if (TARGET_HARD_TP)
8400 {
8401 /* Can return in any reg. */
8402 emit_insn (gen_load_tp_hard (target));
8403 }
8404 else
8405 {
8406 /* Always returned in r0. Immediately copy the result into a pseudo,
8407 otherwise other uses of r0 (e.g. setting up function arguments) may
8408 clobber the value. */
8409
8410 rtx tmp;
8411
8412 emit_insn (gen_load_tp_soft ());
8413
8414 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8415 emit_move_insn (target, tmp);
8416 }
8417 return target;
8418 }
8419
8420 static rtx
8421 load_tls_operand (rtx x, rtx reg)
8422 {
8423 rtx tmp;
8424
8425 if (reg == NULL_RTX)
8426 reg = gen_reg_rtx (SImode);
8427
8428 tmp = gen_rtx_CONST (SImode, x);
8429
8430 emit_move_insn (reg, tmp);
8431
8432 return reg;
8433 }
8434
8435 static rtx_insn *
8436 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8437 {
8438 rtx label, labelno, sum;
8439
8440 gcc_assert (reloc != TLS_DESCSEQ);
8441 start_sequence ();
8442
8443 labelno = GEN_INT (pic_labelno++);
8444 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8445 label = gen_rtx_CONST (VOIDmode, label);
8446
8447 sum = gen_rtx_UNSPEC (Pmode,
8448 gen_rtvec (4, x, GEN_INT (reloc), label,
8449 GEN_INT (TARGET_ARM ? 8 : 4)),
8450 UNSPEC_TLS);
8451 reg = load_tls_operand (sum, reg);
8452
8453 if (TARGET_ARM)
8454 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8455 else
8456 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8457
8458 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8459 LCT_PURE, /* LCT_CONST? */
8460 Pmode, reg, Pmode);
8461
8462 rtx_insn *insns = get_insns ();
8463 end_sequence ();
8464
8465 return insns;
8466 }
8467
8468 static rtx
8469 arm_tls_descseq_addr (rtx x, rtx reg)
8470 {
8471 rtx labelno = GEN_INT (pic_labelno++);
8472 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8473 rtx sum = gen_rtx_UNSPEC (Pmode,
8474 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8475 gen_rtx_CONST (VOIDmode, label),
8476 GEN_INT (!TARGET_ARM)),
8477 UNSPEC_TLS);
8478 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8479
8480 emit_insn (gen_tlscall (x, labelno));
8481 if (!reg)
8482 reg = gen_reg_rtx (SImode);
8483 else
8484 gcc_assert (REGNO (reg) != R0_REGNUM);
8485
8486 emit_move_insn (reg, reg0);
8487
8488 return reg;
8489 }
8490
8491 rtx
8492 legitimize_tls_address (rtx x, rtx reg)
8493 {
8494 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8495 rtx_insn *insns;
8496 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8497
8498 switch (model)
8499 {
8500 case TLS_MODEL_GLOBAL_DYNAMIC:
8501 if (TARGET_GNU2_TLS)
8502 {
8503 reg = arm_tls_descseq_addr (x, reg);
8504
8505 tp = arm_load_tp (NULL_RTX);
8506
8507 dest = gen_rtx_PLUS (Pmode, tp, reg);
8508 }
8509 else
8510 {
8511 /* Original scheme */
8512 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8513 dest = gen_reg_rtx (Pmode);
8514 emit_libcall_block (insns, dest, ret, x);
8515 }
8516 return dest;
8517
8518 case TLS_MODEL_LOCAL_DYNAMIC:
8519 if (TARGET_GNU2_TLS)
8520 {
8521 reg = arm_tls_descseq_addr (x, reg);
8522
8523 tp = arm_load_tp (NULL_RTX);
8524
8525 dest = gen_rtx_PLUS (Pmode, tp, reg);
8526 }
8527 else
8528 {
8529 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8530
8531 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8532 share the LDM result with other LD model accesses. */
8533 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8534 UNSPEC_TLS);
8535 dest = gen_reg_rtx (Pmode);
8536 emit_libcall_block (insns, dest, ret, eqv);
8537
8538 /* Load the addend. */
8539 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8540 GEN_INT (TLS_LDO32)),
8541 UNSPEC_TLS);
8542 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8543 dest = gen_rtx_PLUS (Pmode, dest, addend);
8544 }
8545 return dest;
8546
8547 case TLS_MODEL_INITIAL_EXEC:
8548 labelno = GEN_INT (pic_labelno++);
8549 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8550 label = gen_rtx_CONST (VOIDmode, label);
8551 sum = gen_rtx_UNSPEC (Pmode,
8552 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8553 GEN_INT (TARGET_ARM ? 8 : 4)),
8554 UNSPEC_TLS);
8555 reg = load_tls_operand (sum, reg);
8556
8557 if (TARGET_ARM)
8558 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8559 else if (TARGET_THUMB2)
8560 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8561 else
8562 {
8563 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8564 emit_move_insn (reg, gen_const_mem (SImode, reg));
8565 }
8566
8567 tp = arm_load_tp (NULL_RTX);
8568
8569 return gen_rtx_PLUS (Pmode, tp, reg);
8570
8571 case TLS_MODEL_LOCAL_EXEC:
8572 tp = arm_load_tp (NULL_RTX);
8573
8574 reg = gen_rtx_UNSPEC (Pmode,
8575 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8576 UNSPEC_TLS);
8577 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8578
8579 return gen_rtx_PLUS (Pmode, tp, reg);
8580
8581 default:
8582 abort ();
8583 }
8584 }
8585
8586 /* Try machine-dependent ways of modifying an illegitimate address
8587 to be legitimate. If we find one, return the new, valid address. */
8588 rtx
8589 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8590 {
8591 if (arm_tls_referenced_p (x))
8592 {
8593 rtx addend = NULL;
8594
8595 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8596 {
8597 addend = XEXP (XEXP (x, 0), 1);
8598 x = XEXP (XEXP (x, 0), 0);
8599 }
8600
8601 if (GET_CODE (x) != SYMBOL_REF)
8602 return x;
8603
8604 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8605
8606 x = legitimize_tls_address (x, NULL_RTX);
8607
8608 if (addend)
8609 {
8610 x = gen_rtx_PLUS (SImode, x, addend);
8611 orig_x = x;
8612 }
8613 else
8614 return x;
8615 }
8616
8617 if (!TARGET_ARM)
8618 {
8619 /* TODO: legitimize_address for Thumb2. */
8620 if (TARGET_THUMB2)
8621 return x;
8622 return thumb_legitimize_address (x, orig_x, mode);
8623 }
8624
8625 if (GET_CODE (x) == PLUS)
8626 {
8627 rtx xop0 = XEXP (x, 0);
8628 rtx xop1 = XEXP (x, 1);
8629
8630 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8631 xop0 = force_reg (SImode, xop0);
8632
8633 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8634 && !symbol_mentioned_p (xop1))
8635 xop1 = force_reg (SImode, xop1);
8636
8637 if (ARM_BASE_REGISTER_RTX_P (xop0)
8638 && CONST_INT_P (xop1))
8639 {
8640 HOST_WIDE_INT n, low_n;
8641 rtx base_reg, val;
8642 n = INTVAL (xop1);
8643
8644 /* VFP addressing modes actually allow greater offsets, but for
8645 now we just stick with the lowest common denominator. */
8646 if (mode == DImode || mode == DFmode)
8647 {
8648 low_n = n & 0x0f;
8649 n &= ~0x0f;
8650 if (low_n > 4)
8651 {
8652 n += 16;
8653 low_n -= 16;
8654 }
8655 }
8656 else
8657 {
8658 low_n = ((mode) == TImode ? 0
8659 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8660 n -= low_n;
8661 }
8662
8663 base_reg = gen_reg_rtx (SImode);
8664 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8665 emit_move_insn (base_reg, val);
8666 x = plus_constant (Pmode, base_reg, low_n);
8667 }
8668 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8669 x = gen_rtx_PLUS (SImode, xop0, xop1);
8670 }
8671
8672 /* XXX We don't allow MINUS any more -- see comment in
8673 arm_legitimate_address_outer_p (). */
8674 else if (GET_CODE (x) == MINUS)
8675 {
8676 rtx xop0 = XEXP (x, 0);
8677 rtx xop1 = XEXP (x, 1);
8678
8679 if (CONSTANT_P (xop0))
8680 xop0 = force_reg (SImode, xop0);
8681
8682 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8683 xop1 = force_reg (SImode, xop1);
8684
8685 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8686 x = gen_rtx_MINUS (SImode, xop0, xop1);
8687 }
8688
8689 /* Make sure to take full advantage of the pre-indexed addressing mode
8690 with absolute addresses which often allows for the base register to
8691 be factorized for multiple adjacent memory references, and it might
8692 even allows for the mini pool to be avoided entirely. */
8693 else if (CONST_INT_P (x) && optimize > 0)
8694 {
8695 unsigned int bits;
8696 HOST_WIDE_INT mask, base, index;
8697 rtx base_reg;
8698
8699 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8700 use a 8-bit index. So let's use a 12-bit index for SImode only and
8701 hope that arm_gen_constant will enable ldrb to use more bits. */
8702 bits = (mode == SImode) ? 12 : 8;
8703 mask = (1 << bits) - 1;
8704 base = INTVAL (x) & ~mask;
8705 index = INTVAL (x) & mask;
8706 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8707 {
8708 /* It'll most probably be more efficient to generate the base
8709 with more bits set and use a negative index instead. */
8710 base |= mask;
8711 index -= mask;
8712 }
8713 base_reg = force_reg (SImode, GEN_INT (base));
8714 x = plus_constant (Pmode, base_reg, index);
8715 }
8716
8717 if (flag_pic)
8718 {
8719 /* We need to find and carefully transform any SYMBOL and LABEL
8720 references; so go back to the original address expression. */
8721 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8722
8723 if (new_x != orig_x)
8724 x = new_x;
8725 }
8726
8727 return x;
8728 }
8729
8730
8731 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8732 to be legitimate. If we find one, return the new, valid address. */
8733 rtx
8734 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8735 {
8736 if (GET_CODE (x) == PLUS
8737 && CONST_INT_P (XEXP (x, 1))
8738 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8739 || INTVAL (XEXP (x, 1)) < 0))
8740 {
8741 rtx xop0 = XEXP (x, 0);
8742 rtx xop1 = XEXP (x, 1);
8743 HOST_WIDE_INT offset = INTVAL (xop1);
8744
8745 /* Try and fold the offset into a biasing of the base register and
8746 then offsetting that. Don't do this when optimizing for space
8747 since it can cause too many CSEs. */
8748 if (optimize_size && offset >= 0
8749 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8750 {
8751 HOST_WIDE_INT delta;
8752
8753 if (offset >= 256)
8754 delta = offset - (256 - GET_MODE_SIZE (mode));
8755 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8756 delta = 31 * GET_MODE_SIZE (mode);
8757 else
8758 delta = offset & (~31 * GET_MODE_SIZE (mode));
8759
8760 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8761 NULL_RTX);
8762 x = plus_constant (Pmode, xop0, delta);
8763 }
8764 else if (offset < 0 && offset > -256)
8765 /* Small negative offsets are best done with a subtract before the
8766 dereference, forcing these into a register normally takes two
8767 instructions. */
8768 x = force_operand (x, NULL_RTX);
8769 else
8770 {
8771 /* For the remaining cases, force the constant into a register. */
8772 xop1 = force_reg (SImode, xop1);
8773 x = gen_rtx_PLUS (SImode, xop0, xop1);
8774 }
8775 }
8776 else if (GET_CODE (x) == PLUS
8777 && s_register_operand (XEXP (x, 1), SImode)
8778 && !s_register_operand (XEXP (x, 0), SImode))
8779 {
8780 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8781
8782 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8783 }
8784
8785 if (flag_pic)
8786 {
8787 /* We need to find and carefully transform any SYMBOL and LABEL
8788 references; so go back to the original address expression. */
8789 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX);
8790
8791 if (new_x != orig_x)
8792 x = new_x;
8793 }
8794
8795 return x;
8796 }
8797
8798 /* Return TRUE if X contains any TLS symbol references. */
8799
8800 bool
8801 arm_tls_referenced_p (rtx x)
8802 {
8803 if (! TARGET_HAVE_TLS)
8804 return false;
8805
8806 subrtx_iterator::array_type array;
8807 FOR_EACH_SUBRTX (iter, array, x, ALL)
8808 {
8809 const_rtx x = *iter;
8810 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8811 {
8812 /* ARM currently does not provide relocations to encode TLS variables
8813 into AArch32 instructions, only data, so there is no way to
8814 currently implement these if a literal pool is disabled. */
8815 if (arm_disable_literal_pool)
8816 sorry ("accessing thread-local storage is not currently supported "
8817 "with -mpure-code or -mslow-flash-data");
8818
8819 return true;
8820 }
8821
8822 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8823 TLS offsets, not real symbol references. */
8824 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8825 iter.skip_subrtxes ();
8826 }
8827 return false;
8828 }
8829
8830 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8831
8832 On the ARM, allow any integer (invalid ones are removed later by insn
8833 patterns), nice doubles and symbol_refs which refer to the function's
8834 constant pool XXX.
8835
8836 When generating pic allow anything. */
8837
8838 static bool
8839 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8840 {
8841 return flag_pic || !label_mentioned_p (x);
8842 }
8843
8844 static bool
8845 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8846 {
8847 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8848 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8849 for ARMv8-M Baseline or later the result is valid. */
8850 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8851 x = XEXP (x, 0);
8852
8853 return (CONST_INT_P (x)
8854 || CONST_DOUBLE_P (x)
8855 || CONSTANT_ADDRESS_P (x)
8856 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8857 || flag_pic);
8858 }
8859
8860 static bool
8861 arm_legitimate_constant_p (machine_mode mode, rtx x)
8862 {
8863 return (!arm_cannot_force_const_mem (mode, x)
8864 && (TARGET_32BIT
8865 ? arm_legitimate_constant_p_1 (mode, x)
8866 : thumb_legitimate_constant_p (mode, x)));
8867 }
8868
8869 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8870
8871 static bool
8872 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8873 {
8874 rtx base, offset;
8875
8876 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8877 {
8878 split_const (x, &base, &offset);
8879 if (GET_CODE (base) == SYMBOL_REF
8880 && !offset_within_block_p (base, INTVAL (offset)))
8881 return true;
8882 }
8883 return arm_tls_referenced_p (x);
8884 }
8885 \f
8886 #define REG_OR_SUBREG_REG(X) \
8887 (REG_P (X) \
8888 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8889
8890 #define REG_OR_SUBREG_RTX(X) \
8891 (REG_P (X) ? (X) : SUBREG_REG (X))
8892
8893 static inline int
8894 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8895 {
8896 machine_mode mode = GET_MODE (x);
8897 int total, words;
8898
8899 switch (code)
8900 {
8901 case ASHIFT:
8902 case ASHIFTRT:
8903 case LSHIFTRT:
8904 case ROTATERT:
8905 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8906
8907 case PLUS:
8908 case MINUS:
8909 case COMPARE:
8910 case NEG:
8911 case NOT:
8912 return COSTS_N_INSNS (1);
8913
8914 case MULT:
8915 if (arm_arch6m && arm_m_profile_small_mul)
8916 return COSTS_N_INSNS (32);
8917
8918 if (CONST_INT_P (XEXP (x, 1)))
8919 {
8920 int cycles = 0;
8921 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8922
8923 while (i)
8924 {
8925 i >>= 2;
8926 cycles++;
8927 }
8928 return COSTS_N_INSNS (2) + cycles;
8929 }
8930 return COSTS_N_INSNS (1) + 16;
8931
8932 case SET:
8933 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8934 the mode. */
8935 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8936 return (COSTS_N_INSNS (words)
8937 + 4 * ((MEM_P (SET_SRC (x)))
8938 + MEM_P (SET_DEST (x))));
8939
8940 case CONST_INT:
8941 if (outer == SET)
8942 {
8943 if (UINTVAL (x) < 256
8944 /* 16-bit constant. */
8945 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8946 return 0;
8947 if (thumb_shiftable_const (INTVAL (x)))
8948 return COSTS_N_INSNS (2);
8949 return COSTS_N_INSNS (3);
8950 }
8951 else if ((outer == PLUS || outer == COMPARE)
8952 && INTVAL (x) < 256 && INTVAL (x) > -256)
8953 return 0;
8954 else if ((outer == IOR || outer == XOR || outer == AND)
8955 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8956 return COSTS_N_INSNS (1);
8957 else if (outer == AND)
8958 {
8959 int i;
8960 /* This duplicates the tests in the andsi3 expander. */
8961 for (i = 9; i <= 31; i++)
8962 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
8963 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
8964 return COSTS_N_INSNS (2);
8965 }
8966 else if (outer == ASHIFT || outer == ASHIFTRT
8967 || outer == LSHIFTRT)
8968 return 0;
8969 return COSTS_N_INSNS (2);
8970
8971 case CONST:
8972 case CONST_DOUBLE:
8973 case LABEL_REF:
8974 case SYMBOL_REF:
8975 return COSTS_N_INSNS (3);
8976
8977 case UDIV:
8978 case UMOD:
8979 case DIV:
8980 case MOD:
8981 return 100;
8982
8983 case TRUNCATE:
8984 return 99;
8985
8986 case AND:
8987 case XOR:
8988 case IOR:
8989 /* XXX guess. */
8990 return 8;
8991
8992 case MEM:
8993 /* XXX another guess. */
8994 /* Memory costs quite a lot for the first word, but subsequent words
8995 load at the equivalent of a single insn each. */
8996 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
8997 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
8998 ? 4 : 0));
8999
9000 case IF_THEN_ELSE:
9001 /* XXX a guess. */
9002 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9003 return 14;
9004 return 2;
9005
9006 case SIGN_EXTEND:
9007 case ZERO_EXTEND:
9008 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9009 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9010
9011 if (mode == SImode)
9012 return total;
9013
9014 if (arm_arch6)
9015 return total + COSTS_N_INSNS (1);
9016
9017 /* Assume a two-shift sequence. Increase the cost slightly so
9018 we prefer actual shifts over an extend operation. */
9019 return total + 1 + COSTS_N_INSNS (2);
9020
9021 default:
9022 return 99;
9023 }
9024 }
9025
9026 /* Estimates the size cost of thumb1 instructions.
9027 For now most of the code is copied from thumb1_rtx_costs. We need more
9028 fine grain tuning when we have more related test cases. */
9029 static inline int
9030 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9031 {
9032 machine_mode mode = GET_MODE (x);
9033 int words, cost;
9034
9035 switch (code)
9036 {
9037 case ASHIFT:
9038 case ASHIFTRT:
9039 case LSHIFTRT:
9040 case ROTATERT:
9041 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9042
9043 case PLUS:
9044 case MINUS:
9045 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9046 defined by RTL expansion, especially for the expansion of
9047 multiplication. */
9048 if ((GET_CODE (XEXP (x, 0)) == MULT
9049 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9050 || (GET_CODE (XEXP (x, 1)) == MULT
9051 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9052 return COSTS_N_INSNS (2);
9053 /* Fall through. */
9054 case COMPARE:
9055 case NEG:
9056 case NOT:
9057 return COSTS_N_INSNS (1);
9058
9059 case MULT:
9060 if (CONST_INT_P (XEXP (x, 1)))
9061 {
9062 /* Thumb1 mul instruction can't operate on const. We must Load it
9063 into a register first. */
9064 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9065 /* For the targets which have a very small and high-latency multiply
9066 unit, we prefer to synthesize the mult with up to 5 instructions,
9067 giving a good balance between size and performance. */
9068 if (arm_arch6m && arm_m_profile_small_mul)
9069 return COSTS_N_INSNS (5);
9070 else
9071 return COSTS_N_INSNS (1) + const_size;
9072 }
9073 return COSTS_N_INSNS (1);
9074
9075 case SET:
9076 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9077 the mode. */
9078 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9079 cost = COSTS_N_INSNS (words);
9080 if (satisfies_constraint_J (SET_SRC (x))
9081 || satisfies_constraint_K (SET_SRC (x))
9082 /* Too big an immediate for a 2-byte mov, using MOVT. */
9083 || (CONST_INT_P (SET_SRC (x))
9084 && UINTVAL (SET_SRC (x)) >= 256
9085 && TARGET_HAVE_MOVT
9086 && satisfies_constraint_j (SET_SRC (x)))
9087 /* thumb1_movdi_insn. */
9088 || ((words > 1) && MEM_P (SET_SRC (x))))
9089 cost += COSTS_N_INSNS (1);
9090 return cost;
9091
9092 case CONST_INT:
9093 if (outer == SET)
9094 {
9095 if (UINTVAL (x) < 256)
9096 return COSTS_N_INSNS (1);
9097 /* movw is 4byte long. */
9098 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9099 return COSTS_N_INSNS (2);
9100 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9101 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9102 return COSTS_N_INSNS (2);
9103 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9104 if (thumb_shiftable_const (INTVAL (x)))
9105 return COSTS_N_INSNS (2);
9106 return COSTS_N_INSNS (3);
9107 }
9108 else if ((outer == PLUS || outer == COMPARE)
9109 && INTVAL (x) < 256 && INTVAL (x) > -256)
9110 return 0;
9111 else if ((outer == IOR || outer == XOR || outer == AND)
9112 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9113 return COSTS_N_INSNS (1);
9114 else if (outer == AND)
9115 {
9116 int i;
9117 /* This duplicates the tests in the andsi3 expander. */
9118 for (i = 9; i <= 31; i++)
9119 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9120 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9121 return COSTS_N_INSNS (2);
9122 }
9123 else if (outer == ASHIFT || outer == ASHIFTRT
9124 || outer == LSHIFTRT)
9125 return 0;
9126 return COSTS_N_INSNS (2);
9127
9128 case CONST:
9129 case CONST_DOUBLE:
9130 case LABEL_REF:
9131 case SYMBOL_REF:
9132 return COSTS_N_INSNS (3);
9133
9134 case UDIV:
9135 case UMOD:
9136 case DIV:
9137 case MOD:
9138 return 100;
9139
9140 case TRUNCATE:
9141 return 99;
9142
9143 case AND:
9144 case XOR:
9145 case IOR:
9146 return COSTS_N_INSNS (1);
9147
9148 case MEM:
9149 return (COSTS_N_INSNS (1)
9150 + COSTS_N_INSNS (1)
9151 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9152 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9153 ? COSTS_N_INSNS (1) : 0));
9154
9155 case IF_THEN_ELSE:
9156 /* XXX a guess. */
9157 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9158 return 14;
9159 return 2;
9160
9161 case ZERO_EXTEND:
9162 /* XXX still guessing. */
9163 switch (GET_MODE (XEXP (x, 0)))
9164 {
9165 case E_QImode:
9166 return (1 + (mode == DImode ? 4 : 0)
9167 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9168
9169 case E_HImode:
9170 return (4 + (mode == DImode ? 4 : 0)
9171 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9172
9173 case E_SImode:
9174 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9175
9176 default:
9177 return 99;
9178 }
9179
9180 default:
9181 return 99;
9182 }
9183 }
9184
9185 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9186 operand, then return the operand that is being shifted. If the shift
9187 is not by a constant, then set SHIFT_REG to point to the operand.
9188 Return NULL if OP is not a shifter operand. */
9189 static rtx
9190 shifter_op_p (rtx op, rtx *shift_reg)
9191 {
9192 enum rtx_code code = GET_CODE (op);
9193
9194 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9195 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9196 return XEXP (op, 0);
9197 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9198 return XEXP (op, 0);
9199 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9200 || code == ASHIFTRT)
9201 {
9202 if (!CONST_INT_P (XEXP (op, 1)))
9203 *shift_reg = XEXP (op, 1);
9204 return XEXP (op, 0);
9205 }
9206
9207 return NULL;
9208 }
9209
9210 static bool
9211 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9212 {
9213 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9214 rtx_code code = GET_CODE (x);
9215 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9216
9217 switch (XINT (x, 1))
9218 {
9219 case UNSPEC_UNALIGNED_LOAD:
9220 /* We can only do unaligned loads into the integer unit, and we can't
9221 use LDM or LDRD. */
9222 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9223 if (speed_p)
9224 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9225 + extra_cost->ldst.load_unaligned);
9226
9227 #ifdef NOT_YET
9228 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9229 ADDR_SPACE_GENERIC, speed_p);
9230 #endif
9231 return true;
9232
9233 case UNSPEC_UNALIGNED_STORE:
9234 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9235 if (speed_p)
9236 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9237 + extra_cost->ldst.store_unaligned);
9238
9239 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9240 #ifdef NOT_YET
9241 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9242 ADDR_SPACE_GENERIC, speed_p);
9243 #endif
9244 return true;
9245
9246 case UNSPEC_VRINTZ:
9247 case UNSPEC_VRINTP:
9248 case UNSPEC_VRINTM:
9249 case UNSPEC_VRINTR:
9250 case UNSPEC_VRINTX:
9251 case UNSPEC_VRINTA:
9252 if (speed_p)
9253 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9254
9255 return true;
9256 default:
9257 *cost = COSTS_N_INSNS (2);
9258 break;
9259 }
9260 return true;
9261 }
9262
9263 /* Cost of a libcall. We assume one insn per argument, an amount for the
9264 call (one insn for -Os) and then one for processing the result. */
9265 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9266
9267 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9268 do \
9269 { \
9270 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9271 if (shift_op != NULL \
9272 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9273 { \
9274 if (shift_reg) \
9275 { \
9276 if (speed_p) \
9277 *cost += extra_cost->alu.arith_shift_reg; \
9278 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9279 ASHIFT, 1, speed_p); \
9280 } \
9281 else if (speed_p) \
9282 *cost += extra_cost->alu.arith_shift; \
9283 \
9284 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9285 ASHIFT, 0, speed_p) \
9286 + rtx_cost (XEXP (x, 1 - IDX), \
9287 GET_MODE (shift_op), \
9288 OP, 1, speed_p)); \
9289 return true; \
9290 } \
9291 } \
9292 while (0)
9293
9294 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9295 considering the costs of the addressing mode and memory access
9296 separately. */
9297 static bool
9298 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9299 int *cost, bool speed_p)
9300 {
9301 machine_mode mode = GET_MODE (x);
9302
9303 *cost = COSTS_N_INSNS (1);
9304
9305 if (flag_pic
9306 && GET_CODE (XEXP (x, 0)) == PLUS
9307 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9308 /* This will be split into two instructions. Add the cost of the
9309 additional instruction here. The cost of the memory access is computed
9310 below. See arm.md:calculate_pic_address. */
9311 *cost += COSTS_N_INSNS (1);
9312
9313 /* Calculate cost of the addressing mode. */
9314 if (speed_p)
9315 {
9316 arm_addr_mode_op op_type;
9317 switch (GET_CODE (XEXP (x, 0)))
9318 {
9319 default:
9320 case REG:
9321 op_type = AMO_DEFAULT;
9322 break;
9323 case MINUS:
9324 /* MINUS does not appear in RTL, but the architecture supports it,
9325 so handle this case defensively. */
9326 /* fall through */
9327 case PLUS:
9328 op_type = AMO_NO_WB;
9329 break;
9330 case PRE_INC:
9331 case PRE_DEC:
9332 case POST_INC:
9333 case POST_DEC:
9334 case PRE_MODIFY:
9335 case POST_MODIFY:
9336 op_type = AMO_WB;
9337 break;
9338 }
9339
9340 if (VECTOR_MODE_P (mode))
9341 *cost += current_tune->addr_mode_costs->vector[op_type];
9342 else if (FLOAT_MODE_P (mode))
9343 *cost += current_tune->addr_mode_costs->fp[op_type];
9344 else
9345 *cost += current_tune->addr_mode_costs->integer[op_type];
9346 }
9347
9348 /* Calculate cost of memory access. */
9349 if (speed_p)
9350 {
9351 if (FLOAT_MODE_P (mode))
9352 {
9353 if (GET_MODE_SIZE (mode) == 8)
9354 *cost += extra_cost->ldst.loadd;
9355 else
9356 *cost += extra_cost->ldst.loadf;
9357 }
9358 else if (VECTOR_MODE_P (mode))
9359 *cost += extra_cost->ldst.loadv;
9360 else
9361 {
9362 /* Integer modes */
9363 if (GET_MODE_SIZE (mode) == 8)
9364 *cost += extra_cost->ldst.ldrd;
9365 else
9366 *cost += extra_cost->ldst.load;
9367 }
9368 }
9369
9370 return true;
9371 }
9372
9373 /* RTX costs. Make an estimate of the cost of executing the operation
9374 X, which is contained within an operation with code OUTER_CODE.
9375 SPEED_P indicates whether the cost desired is the performance cost,
9376 or the size cost. The estimate is stored in COST and the return
9377 value is TRUE if the cost calculation is final, or FALSE if the
9378 caller should recurse through the operands of X to add additional
9379 costs.
9380
9381 We currently make no attempt to model the size savings of Thumb-2
9382 16-bit instructions. At the normal points in compilation where
9383 this code is called we have no measure of whether the condition
9384 flags are live or not, and thus no realistic way to determine what
9385 the size will eventually be. */
9386 static bool
9387 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9388 const struct cpu_cost_table *extra_cost,
9389 int *cost, bool speed_p)
9390 {
9391 machine_mode mode = GET_MODE (x);
9392
9393 *cost = COSTS_N_INSNS (1);
9394
9395 if (TARGET_THUMB1)
9396 {
9397 if (speed_p)
9398 *cost = thumb1_rtx_costs (x, code, outer_code);
9399 else
9400 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9401 return true;
9402 }
9403
9404 switch (code)
9405 {
9406 case SET:
9407 *cost = 0;
9408 /* SET RTXs don't have a mode so we get it from the destination. */
9409 mode = GET_MODE (SET_DEST (x));
9410
9411 if (REG_P (SET_SRC (x))
9412 && REG_P (SET_DEST (x)))
9413 {
9414 /* Assume that most copies can be done with a single insn,
9415 unless we don't have HW FP, in which case everything
9416 larger than word mode will require two insns. */
9417 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9418 && GET_MODE_SIZE (mode) > 4)
9419 || mode == DImode)
9420 ? 2 : 1);
9421 /* Conditional register moves can be encoded
9422 in 16 bits in Thumb mode. */
9423 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9424 *cost >>= 1;
9425
9426 return true;
9427 }
9428
9429 if (CONST_INT_P (SET_SRC (x)))
9430 {
9431 /* Handle CONST_INT here, since the value doesn't have a mode
9432 and we would otherwise be unable to work out the true cost. */
9433 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9434 0, speed_p);
9435 outer_code = SET;
9436 /* Slightly lower the cost of setting a core reg to a constant.
9437 This helps break up chains and allows for better scheduling. */
9438 if (REG_P (SET_DEST (x))
9439 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9440 *cost -= 1;
9441 x = SET_SRC (x);
9442 /* Immediate moves with an immediate in the range [0, 255] can be
9443 encoded in 16 bits in Thumb mode. */
9444 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9445 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9446 *cost >>= 1;
9447 goto const_int_cost;
9448 }
9449
9450 return false;
9451
9452 case MEM:
9453 return arm_mem_costs (x, extra_cost, cost, speed_p);
9454
9455 case PARALLEL:
9456 {
9457 /* Calculations of LDM costs are complex. We assume an initial cost
9458 (ldm_1st) which will load the number of registers mentioned in
9459 ldm_regs_per_insn_1st registers; then each additional
9460 ldm_regs_per_insn_subsequent registers cost one more insn. The
9461 formula for N regs is thus:
9462
9463 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9464 + ldm_regs_per_insn_subsequent - 1)
9465 / ldm_regs_per_insn_subsequent).
9466
9467 Additional costs may also be added for addressing. A similar
9468 formula is used for STM. */
9469
9470 bool is_ldm = load_multiple_operation (x, SImode);
9471 bool is_stm = store_multiple_operation (x, SImode);
9472
9473 if (is_ldm || is_stm)
9474 {
9475 if (speed_p)
9476 {
9477 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9478 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9479 ? extra_cost->ldst.ldm_regs_per_insn_1st
9480 : extra_cost->ldst.stm_regs_per_insn_1st;
9481 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9482 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9483 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9484
9485 *cost += regs_per_insn_1st
9486 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9487 + regs_per_insn_sub - 1)
9488 / regs_per_insn_sub);
9489 return true;
9490 }
9491
9492 }
9493 return false;
9494 }
9495 case DIV:
9496 case UDIV:
9497 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9498 && (mode == SFmode || !TARGET_VFP_SINGLE))
9499 *cost += COSTS_N_INSNS (speed_p
9500 ? extra_cost->fp[mode != SFmode].div : 0);
9501 else if (mode == SImode && TARGET_IDIV)
9502 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9503 else
9504 *cost = LIBCALL_COST (2);
9505
9506 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9507 possible udiv is prefered. */
9508 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9509 return false; /* All arguments must be in registers. */
9510
9511 case MOD:
9512 /* MOD by a power of 2 can be expanded as:
9513 rsbs r1, r0, #0
9514 and r0, r0, #(n - 1)
9515 and r1, r1, #(n - 1)
9516 rsbpl r0, r1, #0. */
9517 if (CONST_INT_P (XEXP (x, 1))
9518 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9519 && mode == SImode)
9520 {
9521 *cost += COSTS_N_INSNS (3);
9522
9523 if (speed_p)
9524 *cost += 2 * extra_cost->alu.logical
9525 + extra_cost->alu.arith;
9526 return true;
9527 }
9528
9529 /* Fall-through. */
9530 case UMOD:
9531 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9532 possible udiv is prefered. */
9533 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9534 return false; /* All arguments must be in registers. */
9535
9536 case ROTATE:
9537 if (mode == SImode && REG_P (XEXP (x, 1)))
9538 {
9539 *cost += (COSTS_N_INSNS (1)
9540 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9541 if (speed_p)
9542 *cost += extra_cost->alu.shift_reg;
9543 return true;
9544 }
9545 /* Fall through */
9546 case ROTATERT:
9547 case ASHIFT:
9548 case LSHIFTRT:
9549 case ASHIFTRT:
9550 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9551 {
9552 *cost += (COSTS_N_INSNS (2)
9553 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9554 if (speed_p)
9555 *cost += 2 * extra_cost->alu.shift;
9556 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9557 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9558 *cost += 1;
9559 return true;
9560 }
9561 else if (mode == SImode)
9562 {
9563 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9564 /* Slightly disparage register shifts at -Os, but not by much. */
9565 if (!CONST_INT_P (XEXP (x, 1)))
9566 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9567 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9568 return true;
9569 }
9570 else if (GET_MODE_CLASS (mode) == MODE_INT
9571 && GET_MODE_SIZE (mode) < 4)
9572 {
9573 if (code == ASHIFT)
9574 {
9575 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9576 /* Slightly disparage register shifts at -Os, but not by
9577 much. */
9578 if (!CONST_INT_P (XEXP (x, 1)))
9579 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9580 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9581 }
9582 else if (code == LSHIFTRT || code == ASHIFTRT)
9583 {
9584 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9585 {
9586 /* Can use SBFX/UBFX. */
9587 if (speed_p)
9588 *cost += extra_cost->alu.bfx;
9589 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9590 }
9591 else
9592 {
9593 *cost += COSTS_N_INSNS (1);
9594 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9595 if (speed_p)
9596 {
9597 if (CONST_INT_P (XEXP (x, 1)))
9598 *cost += 2 * extra_cost->alu.shift;
9599 else
9600 *cost += (extra_cost->alu.shift
9601 + extra_cost->alu.shift_reg);
9602 }
9603 else
9604 /* Slightly disparage register shifts. */
9605 *cost += !CONST_INT_P (XEXP (x, 1));
9606 }
9607 }
9608 else /* Rotates. */
9609 {
9610 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9611 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9612 if (speed_p)
9613 {
9614 if (CONST_INT_P (XEXP (x, 1)))
9615 *cost += (2 * extra_cost->alu.shift
9616 + extra_cost->alu.log_shift);
9617 else
9618 *cost += (extra_cost->alu.shift
9619 + extra_cost->alu.shift_reg
9620 + extra_cost->alu.log_shift_reg);
9621 }
9622 }
9623 return true;
9624 }
9625
9626 *cost = LIBCALL_COST (2);
9627 return false;
9628
9629 case BSWAP:
9630 if (arm_arch6)
9631 {
9632 if (mode == SImode)
9633 {
9634 if (speed_p)
9635 *cost += extra_cost->alu.rev;
9636
9637 return false;
9638 }
9639 }
9640 else
9641 {
9642 /* No rev instruction available. Look at arm_legacy_rev
9643 and thumb_legacy_rev for the form of RTL used then. */
9644 if (TARGET_THUMB)
9645 {
9646 *cost += COSTS_N_INSNS (9);
9647
9648 if (speed_p)
9649 {
9650 *cost += 6 * extra_cost->alu.shift;
9651 *cost += 3 * extra_cost->alu.logical;
9652 }
9653 }
9654 else
9655 {
9656 *cost += COSTS_N_INSNS (4);
9657
9658 if (speed_p)
9659 {
9660 *cost += 2 * extra_cost->alu.shift;
9661 *cost += extra_cost->alu.arith_shift;
9662 *cost += 2 * extra_cost->alu.logical;
9663 }
9664 }
9665 return true;
9666 }
9667 return false;
9668
9669 case MINUS:
9670 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9671 && (mode == SFmode || !TARGET_VFP_SINGLE))
9672 {
9673 if (GET_CODE (XEXP (x, 0)) == MULT
9674 || GET_CODE (XEXP (x, 1)) == MULT)
9675 {
9676 rtx mul_op0, mul_op1, sub_op;
9677
9678 if (speed_p)
9679 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9680
9681 if (GET_CODE (XEXP (x, 0)) == MULT)
9682 {
9683 mul_op0 = XEXP (XEXP (x, 0), 0);
9684 mul_op1 = XEXP (XEXP (x, 0), 1);
9685 sub_op = XEXP (x, 1);
9686 }
9687 else
9688 {
9689 mul_op0 = XEXP (XEXP (x, 1), 0);
9690 mul_op1 = XEXP (XEXP (x, 1), 1);
9691 sub_op = XEXP (x, 0);
9692 }
9693
9694 /* The first operand of the multiply may be optionally
9695 negated. */
9696 if (GET_CODE (mul_op0) == NEG)
9697 mul_op0 = XEXP (mul_op0, 0);
9698
9699 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9700 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9701 + rtx_cost (sub_op, mode, code, 0, speed_p));
9702
9703 return true;
9704 }
9705
9706 if (speed_p)
9707 *cost += extra_cost->fp[mode != SFmode].addsub;
9708 return false;
9709 }
9710
9711 if (mode == SImode)
9712 {
9713 rtx shift_by_reg = NULL;
9714 rtx shift_op;
9715 rtx non_shift_op;
9716
9717 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9718 if (shift_op == NULL)
9719 {
9720 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9721 non_shift_op = XEXP (x, 0);
9722 }
9723 else
9724 non_shift_op = XEXP (x, 1);
9725
9726 if (shift_op != NULL)
9727 {
9728 if (shift_by_reg != NULL)
9729 {
9730 if (speed_p)
9731 *cost += extra_cost->alu.arith_shift_reg;
9732 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9733 }
9734 else if (speed_p)
9735 *cost += extra_cost->alu.arith_shift;
9736
9737 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9738 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9739 return true;
9740 }
9741
9742 if (arm_arch_thumb2
9743 && GET_CODE (XEXP (x, 1)) == MULT)
9744 {
9745 /* MLS. */
9746 if (speed_p)
9747 *cost += extra_cost->mult[0].add;
9748 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9749 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9750 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9751 return true;
9752 }
9753
9754 if (CONST_INT_P (XEXP (x, 0)))
9755 {
9756 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9757 INTVAL (XEXP (x, 0)), NULL_RTX,
9758 NULL_RTX, 1, 0);
9759 *cost = COSTS_N_INSNS (insns);
9760 if (speed_p)
9761 *cost += insns * extra_cost->alu.arith;
9762 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9763 return true;
9764 }
9765 else if (speed_p)
9766 *cost += extra_cost->alu.arith;
9767
9768 return false;
9769 }
9770
9771 if (GET_MODE_CLASS (mode) == MODE_INT
9772 && GET_MODE_SIZE (mode) < 4)
9773 {
9774 rtx shift_op, shift_reg;
9775 shift_reg = NULL;
9776
9777 /* We check both sides of the MINUS for shifter operands since,
9778 unlike PLUS, it's not commutative. */
9779
9780 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9781 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9782
9783 /* Slightly disparage, as we might need to widen the result. */
9784 *cost += 1;
9785 if (speed_p)
9786 *cost += extra_cost->alu.arith;
9787
9788 if (CONST_INT_P (XEXP (x, 0)))
9789 {
9790 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9791 return true;
9792 }
9793
9794 return false;
9795 }
9796
9797 if (mode == DImode)
9798 {
9799 *cost += COSTS_N_INSNS (1);
9800
9801 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9802 {
9803 rtx op1 = XEXP (x, 1);
9804
9805 if (speed_p)
9806 *cost += 2 * extra_cost->alu.arith;
9807
9808 if (GET_CODE (op1) == ZERO_EXTEND)
9809 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9810 0, speed_p);
9811 else
9812 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9813 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9814 0, speed_p);
9815 return true;
9816 }
9817 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9818 {
9819 if (speed_p)
9820 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9821 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9822 0, speed_p)
9823 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9824 return true;
9825 }
9826 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9827 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9828 {
9829 if (speed_p)
9830 *cost += (extra_cost->alu.arith
9831 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9832 ? extra_cost->alu.arith
9833 : extra_cost->alu.arith_shift));
9834 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9835 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9836 GET_CODE (XEXP (x, 1)), 0, speed_p));
9837 return true;
9838 }
9839
9840 if (speed_p)
9841 *cost += 2 * extra_cost->alu.arith;
9842 return false;
9843 }
9844
9845 /* Vector mode? */
9846
9847 *cost = LIBCALL_COST (2);
9848 return false;
9849
9850 case PLUS:
9851 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9852 && (mode == SFmode || !TARGET_VFP_SINGLE))
9853 {
9854 if (GET_CODE (XEXP (x, 0)) == MULT)
9855 {
9856 rtx mul_op0, mul_op1, add_op;
9857
9858 if (speed_p)
9859 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9860
9861 mul_op0 = XEXP (XEXP (x, 0), 0);
9862 mul_op1 = XEXP (XEXP (x, 0), 1);
9863 add_op = XEXP (x, 1);
9864
9865 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9866 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9867 + rtx_cost (add_op, mode, code, 0, speed_p));
9868
9869 return true;
9870 }
9871
9872 if (speed_p)
9873 *cost += extra_cost->fp[mode != SFmode].addsub;
9874 return false;
9875 }
9876 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9877 {
9878 *cost = LIBCALL_COST (2);
9879 return false;
9880 }
9881
9882 /* Narrow modes can be synthesized in SImode, but the range
9883 of useful sub-operations is limited. Check for shift operations
9884 on one of the operands. Only left shifts can be used in the
9885 narrow modes. */
9886 if (GET_MODE_CLASS (mode) == MODE_INT
9887 && GET_MODE_SIZE (mode) < 4)
9888 {
9889 rtx shift_op, shift_reg;
9890 shift_reg = NULL;
9891
9892 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9893
9894 if (CONST_INT_P (XEXP (x, 1)))
9895 {
9896 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9897 INTVAL (XEXP (x, 1)), NULL_RTX,
9898 NULL_RTX, 1, 0);
9899 *cost = COSTS_N_INSNS (insns);
9900 if (speed_p)
9901 *cost += insns * extra_cost->alu.arith;
9902 /* Slightly penalize a narrow operation as the result may
9903 need widening. */
9904 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9905 return true;
9906 }
9907
9908 /* Slightly penalize a narrow operation as the result may
9909 need widening. */
9910 *cost += 1;
9911 if (speed_p)
9912 *cost += extra_cost->alu.arith;
9913
9914 return false;
9915 }
9916
9917 if (mode == SImode)
9918 {
9919 rtx shift_op, shift_reg;
9920
9921 if (TARGET_INT_SIMD
9922 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9923 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9924 {
9925 /* UXTA[BH] or SXTA[BH]. */
9926 if (speed_p)
9927 *cost += extra_cost->alu.extend_arith;
9928 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9929 0, speed_p)
9930 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9931 return true;
9932 }
9933
9934 shift_reg = NULL;
9935 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9936 if (shift_op != NULL)
9937 {
9938 if (shift_reg)
9939 {
9940 if (speed_p)
9941 *cost += extra_cost->alu.arith_shift_reg;
9942 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9943 }
9944 else if (speed_p)
9945 *cost += extra_cost->alu.arith_shift;
9946
9947 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9948 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9949 return true;
9950 }
9951 if (GET_CODE (XEXP (x, 0)) == MULT)
9952 {
9953 rtx mul_op = XEXP (x, 0);
9954
9955 if (TARGET_DSP_MULTIPLY
9956 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9957 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9958 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9959 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9960 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
9961 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
9962 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
9963 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
9964 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9965 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
9966 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
9967 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
9968 == 16))))))
9969 {
9970 /* SMLA[BT][BT]. */
9971 if (speed_p)
9972 *cost += extra_cost->mult[0].extend_add;
9973 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
9974 SIGN_EXTEND, 0, speed_p)
9975 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
9976 SIGN_EXTEND, 0, speed_p)
9977 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9978 return true;
9979 }
9980
9981 if (speed_p)
9982 *cost += extra_cost->mult[0].add;
9983 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
9984 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
9985 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9986 return true;
9987 }
9988 if (CONST_INT_P (XEXP (x, 1)))
9989 {
9990 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9991 INTVAL (XEXP (x, 1)), NULL_RTX,
9992 NULL_RTX, 1, 0);
9993 *cost = COSTS_N_INSNS (insns);
9994 if (speed_p)
9995 *cost += insns * extra_cost->alu.arith;
9996 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9997 return true;
9998 }
9999 else if (speed_p)
10000 *cost += extra_cost->alu.arith;
10001
10002 return false;
10003 }
10004
10005 if (mode == DImode)
10006 {
10007 if (GET_CODE (XEXP (x, 0)) == MULT
10008 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10009 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10010 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10011 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10012 {
10013 if (speed_p)
10014 *cost += extra_cost->mult[1].extend_add;
10015 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10016 ZERO_EXTEND, 0, speed_p)
10017 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10018 ZERO_EXTEND, 0, speed_p)
10019 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10020 return true;
10021 }
10022
10023 *cost += COSTS_N_INSNS (1);
10024
10025 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10026 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10027 {
10028 if (speed_p)
10029 *cost += (extra_cost->alu.arith
10030 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10031 ? extra_cost->alu.arith
10032 : extra_cost->alu.arith_shift));
10033
10034 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10035 0, speed_p)
10036 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10037 return true;
10038 }
10039
10040 if (speed_p)
10041 *cost += 2 * extra_cost->alu.arith;
10042 return false;
10043 }
10044
10045 /* Vector mode? */
10046 *cost = LIBCALL_COST (2);
10047 return false;
10048 case IOR:
10049 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10050 {
10051 if (speed_p)
10052 *cost += extra_cost->alu.rev;
10053
10054 return true;
10055 }
10056 /* Fall through. */
10057 case AND: case XOR:
10058 if (mode == SImode)
10059 {
10060 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10061 rtx op0 = XEXP (x, 0);
10062 rtx shift_op, shift_reg;
10063
10064 if (subcode == NOT
10065 && (code == AND
10066 || (code == IOR && TARGET_THUMB2)))
10067 op0 = XEXP (op0, 0);
10068
10069 shift_reg = NULL;
10070 shift_op = shifter_op_p (op0, &shift_reg);
10071 if (shift_op != NULL)
10072 {
10073 if (shift_reg)
10074 {
10075 if (speed_p)
10076 *cost += extra_cost->alu.log_shift_reg;
10077 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10078 }
10079 else if (speed_p)
10080 *cost += extra_cost->alu.log_shift;
10081
10082 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10083 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10084 return true;
10085 }
10086
10087 if (CONST_INT_P (XEXP (x, 1)))
10088 {
10089 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10090 INTVAL (XEXP (x, 1)), NULL_RTX,
10091 NULL_RTX, 1, 0);
10092
10093 *cost = COSTS_N_INSNS (insns);
10094 if (speed_p)
10095 *cost += insns * extra_cost->alu.logical;
10096 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10097 return true;
10098 }
10099
10100 if (speed_p)
10101 *cost += extra_cost->alu.logical;
10102 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10103 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10104 return true;
10105 }
10106
10107 if (mode == DImode)
10108 {
10109 rtx op0 = XEXP (x, 0);
10110 enum rtx_code subcode = GET_CODE (op0);
10111
10112 *cost += COSTS_N_INSNS (1);
10113
10114 if (subcode == NOT
10115 && (code == AND
10116 || (code == IOR && TARGET_THUMB2)))
10117 op0 = XEXP (op0, 0);
10118
10119 if (GET_CODE (op0) == ZERO_EXTEND)
10120 {
10121 if (speed_p)
10122 *cost += 2 * extra_cost->alu.logical;
10123
10124 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10125 0, speed_p)
10126 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10127 return true;
10128 }
10129 else if (GET_CODE (op0) == SIGN_EXTEND)
10130 {
10131 if (speed_p)
10132 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10133
10134 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10135 0, speed_p)
10136 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10137 return true;
10138 }
10139
10140 if (speed_p)
10141 *cost += 2 * extra_cost->alu.logical;
10142
10143 return true;
10144 }
10145 /* Vector mode? */
10146
10147 *cost = LIBCALL_COST (2);
10148 return false;
10149
10150 case MULT:
10151 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10152 && (mode == SFmode || !TARGET_VFP_SINGLE))
10153 {
10154 rtx op0 = XEXP (x, 0);
10155
10156 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10157 op0 = XEXP (op0, 0);
10158
10159 if (speed_p)
10160 *cost += extra_cost->fp[mode != SFmode].mult;
10161
10162 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10163 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10164 return true;
10165 }
10166 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10167 {
10168 *cost = LIBCALL_COST (2);
10169 return false;
10170 }
10171
10172 if (mode == SImode)
10173 {
10174 if (TARGET_DSP_MULTIPLY
10175 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10176 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10177 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10178 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10179 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10180 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10181 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10182 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10183 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10184 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10185 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10186 && (INTVAL (XEXP (XEXP (x, 1), 1))
10187 == 16))))))
10188 {
10189 /* SMUL[TB][TB]. */
10190 if (speed_p)
10191 *cost += extra_cost->mult[0].extend;
10192 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10193 SIGN_EXTEND, 0, speed_p);
10194 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10195 SIGN_EXTEND, 1, speed_p);
10196 return true;
10197 }
10198 if (speed_p)
10199 *cost += extra_cost->mult[0].simple;
10200 return false;
10201 }
10202
10203 if (mode == DImode)
10204 {
10205 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10206 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10207 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10208 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10209 {
10210 if (speed_p)
10211 *cost += extra_cost->mult[1].extend;
10212 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10213 ZERO_EXTEND, 0, speed_p)
10214 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10215 ZERO_EXTEND, 0, speed_p));
10216 return true;
10217 }
10218
10219 *cost = LIBCALL_COST (2);
10220 return false;
10221 }
10222
10223 /* Vector mode? */
10224 *cost = LIBCALL_COST (2);
10225 return false;
10226
10227 case NEG:
10228 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10229 && (mode == SFmode || !TARGET_VFP_SINGLE))
10230 {
10231 if (GET_CODE (XEXP (x, 0)) == MULT)
10232 {
10233 /* VNMUL. */
10234 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10235 return true;
10236 }
10237
10238 if (speed_p)
10239 *cost += extra_cost->fp[mode != SFmode].neg;
10240
10241 return false;
10242 }
10243 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10244 {
10245 *cost = LIBCALL_COST (1);
10246 return false;
10247 }
10248
10249 if (mode == SImode)
10250 {
10251 if (GET_CODE (XEXP (x, 0)) == ABS)
10252 {
10253 *cost += COSTS_N_INSNS (1);
10254 /* Assume the non-flag-changing variant. */
10255 if (speed_p)
10256 *cost += (extra_cost->alu.log_shift
10257 + extra_cost->alu.arith_shift);
10258 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10259 return true;
10260 }
10261
10262 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10263 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10264 {
10265 *cost += COSTS_N_INSNS (1);
10266 /* No extra cost for MOV imm and MVN imm. */
10267 /* If the comparison op is using the flags, there's no further
10268 cost, otherwise we need to add the cost of the comparison. */
10269 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10270 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10271 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10272 {
10273 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10274 *cost += (COSTS_N_INSNS (1)
10275 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10276 0, speed_p)
10277 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10278 1, speed_p));
10279 if (speed_p)
10280 *cost += extra_cost->alu.arith;
10281 }
10282 return true;
10283 }
10284
10285 if (speed_p)
10286 *cost += extra_cost->alu.arith;
10287 return false;
10288 }
10289
10290 if (GET_MODE_CLASS (mode) == MODE_INT
10291 && GET_MODE_SIZE (mode) < 4)
10292 {
10293 /* Slightly disparage, as we might need an extend operation. */
10294 *cost += 1;
10295 if (speed_p)
10296 *cost += extra_cost->alu.arith;
10297 return false;
10298 }
10299
10300 if (mode == DImode)
10301 {
10302 *cost += COSTS_N_INSNS (1);
10303 if (speed_p)
10304 *cost += 2 * extra_cost->alu.arith;
10305 return false;
10306 }
10307
10308 /* Vector mode? */
10309 *cost = LIBCALL_COST (1);
10310 return false;
10311
10312 case NOT:
10313 if (mode == SImode)
10314 {
10315 rtx shift_op;
10316 rtx shift_reg = NULL;
10317
10318 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10319
10320 if (shift_op)
10321 {
10322 if (shift_reg != NULL)
10323 {
10324 if (speed_p)
10325 *cost += extra_cost->alu.log_shift_reg;
10326 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10327 }
10328 else if (speed_p)
10329 *cost += extra_cost->alu.log_shift;
10330 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10331 return true;
10332 }
10333
10334 if (speed_p)
10335 *cost += extra_cost->alu.logical;
10336 return false;
10337 }
10338 if (mode == DImode)
10339 {
10340 *cost += COSTS_N_INSNS (1);
10341 return false;
10342 }
10343
10344 /* Vector mode? */
10345
10346 *cost += LIBCALL_COST (1);
10347 return false;
10348
10349 case IF_THEN_ELSE:
10350 {
10351 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10352 {
10353 *cost += COSTS_N_INSNS (3);
10354 return true;
10355 }
10356 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10357 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10358
10359 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10360 /* Assume that if one arm of the if_then_else is a register,
10361 that it will be tied with the result and eliminate the
10362 conditional insn. */
10363 if (REG_P (XEXP (x, 1)))
10364 *cost += op2cost;
10365 else if (REG_P (XEXP (x, 2)))
10366 *cost += op1cost;
10367 else
10368 {
10369 if (speed_p)
10370 {
10371 if (extra_cost->alu.non_exec_costs_exec)
10372 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10373 else
10374 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10375 }
10376 else
10377 *cost += op1cost + op2cost;
10378 }
10379 }
10380 return true;
10381
10382 case COMPARE:
10383 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10384 *cost = 0;
10385 else
10386 {
10387 machine_mode op0mode;
10388 /* We'll mostly assume that the cost of a compare is the cost of the
10389 LHS. However, there are some notable exceptions. */
10390
10391 /* Floating point compares are never done as side-effects. */
10392 op0mode = GET_MODE (XEXP (x, 0));
10393 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10394 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10395 {
10396 if (speed_p)
10397 *cost += extra_cost->fp[op0mode != SFmode].compare;
10398
10399 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10400 {
10401 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10402 return true;
10403 }
10404
10405 return false;
10406 }
10407 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10408 {
10409 *cost = LIBCALL_COST (2);
10410 return false;
10411 }
10412
10413 /* DImode compares normally take two insns. */
10414 if (op0mode == DImode)
10415 {
10416 *cost += COSTS_N_INSNS (1);
10417 if (speed_p)
10418 *cost += 2 * extra_cost->alu.arith;
10419 return false;
10420 }
10421
10422 if (op0mode == SImode)
10423 {
10424 rtx shift_op;
10425 rtx shift_reg;
10426
10427 if (XEXP (x, 1) == const0_rtx
10428 && !(REG_P (XEXP (x, 0))
10429 || (GET_CODE (XEXP (x, 0)) == SUBREG
10430 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10431 {
10432 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10433
10434 /* Multiply operations that set the flags are often
10435 significantly more expensive. */
10436 if (speed_p
10437 && GET_CODE (XEXP (x, 0)) == MULT
10438 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10439 *cost += extra_cost->mult[0].flag_setting;
10440
10441 if (speed_p
10442 && GET_CODE (XEXP (x, 0)) == PLUS
10443 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10444 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10445 0), 1), mode))
10446 *cost += extra_cost->mult[0].flag_setting;
10447 return true;
10448 }
10449
10450 shift_reg = NULL;
10451 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10452 if (shift_op != NULL)
10453 {
10454 if (shift_reg != NULL)
10455 {
10456 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10457 1, speed_p);
10458 if (speed_p)
10459 *cost += extra_cost->alu.arith_shift_reg;
10460 }
10461 else if (speed_p)
10462 *cost += extra_cost->alu.arith_shift;
10463 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10464 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10465 return true;
10466 }
10467
10468 if (speed_p)
10469 *cost += extra_cost->alu.arith;
10470 if (CONST_INT_P (XEXP (x, 1))
10471 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10472 {
10473 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10474 return true;
10475 }
10476 return false;
10477 }
10478
10479 /* Vector mode? */
10480
10481 *cost = LIBCALL_COST (2);
10482 return false;
10483 }
10484 return true;
10485
10486 case EQ:
10487 case NE:
10488 case LT:
10489 case LE:
10490 case GT:
10491 case GE:
10492 case LTU:
10493 case LEU:
10494 case GEU:
10495 case GTU:
10496 case ORDERED:
10497 case UNORDERED:
10498 case UNEQ:
10499 case UNLE:
10500 case UNLT:
10501 case UNGE:
10502 case UNGT:
10503 case LTGT:
10504 if (outer_code == SET)
10505 {
10506 /* Is it a store-flag operation? */
10507 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10508 && XEXP (x, 1) == const0_rtx)
10509 {
10510 /* Thumb also needs an IT insn. */
10511 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10512 return true;
10513 }
10514 if (XEXP (x, 1) == const0_rtx)
10515 {
10516 switch (code)
10517 {
10518 case LT:
10519 /* LSR Rd, Rn, #31. */
10520 if (speed_p)
10521 *cost += extra_cost->alu.shift;
10522 break;
10523
10524 case EQ:
10525 /* RSBS T1, Rn, #0
10526 ADC Rd, Rn, T1. */
10527
10528 case NE:
10529 /* SUBS T1, Rn, #1
10530 SBC Rd, Rn, T1. */
10531 *cost += COSTS_N_INSNS (1);
10532 break;
10533
10534 case LE:
10535 /* RSBS T1, Rn, Rn, LSR #31
10536 ADC Rd, Rn, T1. */
10537 *cost += COSTS_N_INSNS (1);
10538 if (speed_p)
10539 *cost += extra_cost->alu.arith_shift;
10540 break;
10541
10542 case GT:
10543 /* RSB Rd, Rn, Rn, ASR #1
10544 LSR Rd, Rd, #31. */
10545 *cost += COSTS_N_INSNS (1);
10546 if (speed_p)
10547 *cost += (extra_cost->alu.arith_shift
10548 + extra_cost->alu.shift);
10549 break;
10550
10551 case GE:
10552 /* ASR Rd, Rn, #31
10553 ADD Rd, Rn, #1. */
10554 *cost += COSTS_N_INSNS (1);
10555 if (speed_p)
10556 *cost += extra_cost->alu.shift;
10557 break;
10558
10559 default:
10560 /* Remaining cases are either meaningless or would take
10561 three insns anyway. */
10562 *cost = COSTS_N_INSNS (3);
10563 break;
10564 }
10565 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10566 return true;
10567 }
10568 else
10569 {
10570 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10571 if (CONST_INT_P (XEXP (x, 1))
10572 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10573 {
10574 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10575 return true;
10576 }
10577
10578 return false;
10579 }
10580 }
10581 /* Not directly inside a set. If it involves the condition code
10582 register it must be the condition for a branch, cond_exec or
10583 I_T_E operation. Since the comparison is performed elsewhere
10584 this is just the control part which has no additional
10585 cost. */
10586 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10587 && XEXP (x, 1) == const0_rtx)
10588 {
10589 *cost = 0;
10590 return true;
10591 }
10592 return false;
10593
10594 case ABS:
10595 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10596 && (mode == SFmode || !TARGET_VFP_SINGLE))
10597 {
10598 if (speed_p)
10599 *cost += extra_cost->fp[mode != SFmode].neg;
10600
10601 return false;
10602 }
10603 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10604 {
10605 *cost = LIBCALL_COST (1);
10606 return false;
10607 }
10608
10609 if (mode == SImode)
10610 {
10611 if (speed_p)
10612 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10613 return false;
10614 }
10615 /* Vector mode? */
10616 *cost = LIBCALL_COST (1);
10617 return false;
10618
10619 case SIGN_EXTEND:
10620 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10621 && MEM_P (XEXP (x, 0)))
10622 {
10623 if (mode == DImode)
10624 *cost += COSTS_N_INSNS (1);
10625
10626 if (!speed_p)
10627 return true;
10628
10629 if (GET_MODE (XEXP (x, 0)) == SImode)
10630 *cost += extra_cost->ldst.load;
10631 else
10632 *cost += extra_cost->ldst.load_sign_extend;
10633
10634 if (mode == DImode)
10635 *cost += extra_cost->alu.shift;
10636
10637 return true;
10638 }
10639
10640 /* Widening from less than 32-bits requires an extend operation. */
10641 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10642 {
10643 /* We have SXTB/SXTH. */
10644 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10645 if (speed_p)
10646 *cost += extra_cost->alu.extend;
10647 }
10648 else if (GET_MODE (XEXP (x, 0)) != SImode)
10649 {
10650 /* Needs two shifts. */
10651 *cost += COSTS_N_INSNS (1);
10652 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10653 if (speed_p)
10654 *cost += 2 * extra_cost->alu.shift;
10655 }
10656
10657 /* Widening beyond 32-bits requires one more insn. */
10658 if (mode == DImode)
10659 {
10660 *cost += COSTS_N_INSNS (1);
10661 if (speed_p)
10662 *cost += extra_cost->alu.shift;
10663 }
10664
10665 return true;
10666
10667 case ZERO_EXTEND:
10668 if ((arm_arch4
10669 || GET_MODE (XEXP (x, 0)) == SImode
10670 || GET_MODE (XEXP (x, 0)) == QImode)
10671 && MEM_P (XEXP (x, 0)))
10672 {
10673 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10674
10675 if (mode == DImode)
10676 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10677
10678 return true;
10679 }
10680
10681 /* Widening from less than 32-bits requires an extend operation. */
10682 if (GET_MODE (XEXP (x, 0)) == QImode)
10683 {
10684 /* UXTB can be a shorter instruction in Thumb2, but it might
10685 be slower than the AND Rd, Rn, #255 alternative. When
10686 optimizing for speed it should never be slower to use
10687 AND, and we don't really model 16-bit vs 32-bit insns
10688 here. */
10689 if (speed_p)
10690 *cost += extra_cost->alu.logical;
10691 }
10692 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10693 {
10694 /* We have UXTB/UXTH. */
10695 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10696 if (speed_p)
10697 *cost += extra_cost->alu.extend;
10698 }
10699 else if (GET_MODE (XEXP (x, 0)) != SImode)
10700 {
10701 /* Needs two shifts. It's marginally preferable to use
10702 shifts rather than two BIC instructions as the second
10703 shift may merge with a subsequent insn as a shifter
10704 op. */
10705 *cost = COSTS_N_INSNS (2);
10706 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10707 if (speed_p)
10708 *cost += 2 * extra_cost->alu.shift;
10709 }
10710
10711 /* Widening beyond 32-bits requires one more insn. */
10712 if (mode == DImode)
10713 {
10714 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10715 }
10716
10717 return true;
10718
10719 case CONST_INT:
10720 *cost = 0;
10721 /* CONST_INT has no mode, so we cannot tell for sure how many
10722 insns are really going to be needed. The best we can do is
10723 look at the value passed. If it fits in SImode, then assume
10724 that's the mode it will be used for. Otherwise assume it
10725 will be used in DImode. */
10726 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10727 mode = SImode;
10728 else
10729 mode = DImode;
10730
10731 /* Avoid blowing up in arm_gen_constant (). */
10732 if (!(outer_code == PLUS
10733 || outer_code == AND
10734 || outer_code == IOR
10735 || outer_code == XOR
10736 || outer_code == MINUS))
10737 outer_code = SET;
10738
10739 const_int_cost:
10740 if (mode == SImode)
10741 {
10742 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10743 INTVAL (x), NULL, NULL,
10744 0, 0));
10745 /* Extra costs? */
10746 }
10747 else
10748 {
10749 *cost += COSTS_N_INSNS (arm_gen_constant
10750 (outer_code, SImode, NULL,
10751 trunc_int_for_mode (INTVAL (x), SImode),
10752 NULL, NULL, 0, 0)
10753 + arm_gen_constant (outer_code, SImode, NULL,
10754 INTVAL (x) >> 32, NULL,
10755 NULL, 0, 0));
10756 /* Extra costs? */
10757 }
10758
10759 return true;
10760
10761 case CONST:
10762 case LABEL_REF:
10763 case SYMBOL_REF:
10764 if (speed_p)
10765 {
10766 if (arm_arch_thumb2 && !flag_pic)
10767 *cost += COSTS_N_INSNS (1);
10768 else
10769 *cost += extra_cost->ldst.load;
10770 }
10771 else
10772 *cost += COSTS_N_INSNS (1);
10773
10774 if (flag_pic)
10775 {
10776 *cost += COSTS_N_INSNS (1);
10777 if (speed_p)
10778 *cost += extra_cost->alu.arith;
10779 }
10780
10781 return true;
10782
10783 case CONST_FIXED:
10784 *cost = COSTS_N_INSNS (4);
10785 /* Fixme. */
10786 return true;
10787
10788 case CONST_DOUBLE:
10789 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10790 && (mode == SFmode || !TARGET_VFP_SINGLE))
10791 {
10792 if (vfp3_const_double_rtx (x))
10793 {
10794 if (speed_p)
10795 *cost += extra_cost->fp[mode == DFmode].fpconst;
10796 return true;
10797 }
10798
10799 if (speed_p)
10800 {
10801 if (mode == DFmode)
10802 *cost += extra_cost->ldst.loadd;
10803 else
10804 *cost += extra_cost->ldst.loadf;
10805 }
10806 else
10807 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10808
10809 return true;
10810 }
10811 *cost = COSTS_N_INSNS (4);
10812 return true;
10813
10814 case CONST_VECTOR:
10815 /* Fixme. */
10816 if (TARGET_NEON
10817 && TARGET_HARD_FLOAT
10818 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10819 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10820 *cost = COSTS_N_INSNS (1);
10821 else
10822 *cost = COSTS_N_INSNS (4);
10823 return true;
10824
10825 case HIGH:
10826 case LO_SUM:
10827 /* When optimizing for size, we prefer constant pool entries to
10828 MOVW/MOVT pairs, so bump the cost of these slightly. */
10829 if (!speed_p)
10830 *cost += 1;
10831 return true;
10832
10833 case CLZ:
10834 if (speed_p)
10835 *cost += extra_cost->alu.clz;
10836 return false;
10837
10838 case SMIN:
10839 if (XEXP (x, 1) == const0_rtx)
10840 {
10841 if (speed_p)
10842 *cost += extra_cost->alu.log_shift;
10843 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10844 return true;
10845 }
10846 /* Fall through. */
10847 case SMAX:
10848 case UMIN:
10849 case UMAX:
10850 *cost += COSTS_N_INSNS (1);
10851 return false;
10852
10853 case TRUNCATE:
10854 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10855 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10856 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10857 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10858 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10859 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10860 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10861 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10862 == ZERO_EXTEND))))
10863 {
10864 if (speed_p)
10865 *cost += extra_cost->mult[1].extend;
10866 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10867 ZERO_EXTEND, 0, speed_p)
10868 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10869 ZERO_EXTEND, 0, speed_p));
10870 return true;
10871 }
10872 *cost = LIBCALL_COST (1);
10873 return false;
10874
10875 case UNSPEC_VOLATILE:
10876 case UNSPEC:
10877 return arm_unspec_cost (x, outer_code, speed_p, cost);
10878
10879 case PC:
10880 /* Reading the PC is like reading any other register. Writing it
10881 is more expensive, but we take that into account elsewhere. */
10882 *cost = 0;
10883 return true;
10884
10885 case ZERO_EXTRACT:
10886 /* TODO: Simple zero_extract of bottom bits using AND. */
10887 /* Fall through. */
10888 case SIGN_EXTRACT:
10889 if (arm_arch6
10890 && mode == SImode
10891 && CONST_INT_P (XEXP (x, 1))
10892 && CONST_INT_P (XEXP (x, 2)))
10893 {
10894 if (speed_p)
10895 *cost += extra_cost->alu.bfx;
10896 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10897 return true;
10898 }
10899 /* Without UBFX/SBFX, need to resort to shift operations. */
10900 *cost += COSTS_N_INSNS (1);
10901 if (speed_p)
10902 *cost += 2 * extra_cost->alu.shift;
10903 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10904 return true;
10905
10906 case FLOAT_EXTEND:
10907 if (TARGET_HARD_FLOAT)
10908 {
10909 if (speed_p)
10910 *cost += extra_cost->fp[mode == DFmode].widen;
10911 if (!TARGET_VFP5
10912 && GET_MODE (XEXP (x, 0)) == HFmode)
10913 {
10914 /* Pre v8, widening HF->DF is a two-step process, first
10915 widening to SFmode. */
10916 *cost += COSTS_N_INSNS (1);
10917 if (speed_p)
10918 *cost += extra_cost->fp[0].widen;
10919 }
10920 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10921 return true;
10922 }
10923
10924 *cost = LIBCALL_COST (1);
10925 return false;
10926
10927 case FLOAT_TRUNCATE:
10928 if (TARGET_HARD_FLOAT)
10929 {
10930 if (speed_p)
10931 *cost += extra_cost->fp[mode == DFmode].narrow;
10932 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10933 return true;
10934 /* Vector modes? */
10935 }
10936 *cost = LIBCALL_COST (1);
10937 return false;
10938
10939 case FMA:
10940 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10941 {
10942 rtx op0 = XEXP (x, 0);
10943 rtx op1 = XEXP (x, 1);
10944 rtx op2 = XEXP (x, 2);
10945
10946
10947 /* vfms or vfnma. */
10948 if (GET_CODE (op0) == NEG)
10949 op0 = XEXP (op0, 0);
10950
10951 /* vfnms or vfnma. */
10952 if (GET_CODE (op2) == NEG)
10953 op2 = XEXP (op2, 0);
10954
10955 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10956 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10957 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10958
10959 if (speed_p)
10960 *cost += extra_cost->fp[mode ==DFmode].fma;
10961
10962 return true;
10963 }
10964
10965 *cost = LIBCALL_COST (3);
10966 return false;
10967
10968 case FIX:
10969 case UNSIGNED_FIX:
10970 if (TARGET_HARD_FLOAT)
10971 {
10972 /* The *combine_vcvtf2i reduces a vmul+vcvt into
10973 a vcvt fixed-point conversion. */
10974 if (code == FIX && mode == SImode
10975 && GET_CODE (XEXP (x, 0)) == FIX
10976 && GET_MODE (XEXP (x, 0)) == SFmode
10977 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10978 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
10979 > 0)
10980 {
10981 if (speed_p)
10982 *cost += extra_cost->fp[0].toint;
10983
10984 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10985 code, 0, speed_p);
10986 return true;
10987 }
10988
10989 if (GET_MODE_CLASS (mode) == MODE_INT)
10990 {
10991 mode = GET_MODE (XEXP (x, 0));
10992 if (speed_p)
10993 *cost += extra_cost->fp[mode == DFmode].toint;
10994 /* Strip of the 'cost' of rounding towards zero. */
10995 if (GET_CODE (XEXP (x, 0)) == FIX)
10996 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
10997 0, speed_p);
10998 else
10999 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11000 /* ??? Increase the cost to deal with transferring from
11001 FP -> CORE registers? */
11002 return true;
11003 }
11004 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11005 && TARGET_VFP5)
11006 {
11007 if (speed_p)
11008 *cost += extra_cost->fp[mode == DFmode].roundint;
11009 return false;
11010 }
11011 /* Vector costs? */
11012 }
11013 *cost = LIBCALL_COST (1);
11014 return false;
11015
11016 case FLOAT:
11017 case UNSIGNED_FLOAT:
11018 if (TARGET_HARD_FLOAT)
11019 {
11020 /* ??? Increase the cost to deal with transferring from CORE
11021 -> FP registers? */
11022 if (speed_p)
11023 *cost += extra_cost->fp[mode == DFmode].fromint;
11024 return false;
11025 }
11026 *cost = LIBCALL_COST (1);
11027 return false;
11028
11029 case CALL:
11030 return true;
11031
11032 case ASM_OPERANDS:
11033 {
11034 /* Just a guess. Guess number of instructions in the asm
11035 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11036 though (see PR60663). */
11037 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11038 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11039
11040 *cost = COSTS_N_INSNS (asm_length + num_operands);
11041 return true;
11042 }
11043 default:
11044 if (mode != VOIDmode)
11045 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11046 else
11047 *cost = COSTS_N_INSNS (4); /* Who knows? */
11048 return false;
11049 }
11050 }
11051
11052 #undef HANDLE_NARROW_SHIFT_ARITH
11053
11054 /* RTX costs entry point. */
11055
11056 static bool
11057 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11058 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11059 {
11060 bool result;
11061 int code = GET_CODE (x);
11062 gcc_assert (current_tune->insn_extra_cost);
11063
11064 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11065 (enum rtx_code) outer_code,
11066 current_tune->insn_extra_cost,
11067 total, speed);
11068
11069 if (dump_file && arm_verbose_cost)
11070 {
11071 print_rtl_single (dump_file, x);
11072 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11073 *total, result ? "final" : "partial");
11074 }
11075 return result;
11076 }
11077
11078 /* All address computations that can be done are free, but rtx cost returns
11079 the same for practically all of them. So we weight the different types
11080 of address here in the order (most pref first):
11081 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11082 static inline int
11083 arm_arm_address_cost (rtx x)
11084 {
11085 enum rtx_code c = GET_CODE (x);
11086
11087 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11088 return 0;
11089 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11090 return 10;
11091
11092 if (c == PLUS)
11093 {
11094 if (CONST_INT_P (XEXP (x, 1)))
11095 return 2;
11096
11097 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11098 return 3;
11099
11100 return 4;
11101 }
11102
11103 return 6;
11104 }
11105
11106 static inline int
11107 arm_thumb_address_cost (rtx x)
11108 {
11109 enum rtx_code c = GET_CODE (x);
11110
11111 if (c == REG)
11112 return 1;
11113 if (c == PLUS
11114 && REG_P (XEXP (x, 0))
11115 && CONST_INT_P (XEXP (x, 1)))
11116 return 1;
11117
11118 return 2;
11119 }
11120
11121 static int
11122 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11123 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11124 {
11125 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11126 }
11127
11128 /* Adjust cost hook for XScale. */
11129 static bool
11130 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11131 int * cost)
11132 {
11133 /* Some true dependencies can have a higher cost depending
11134 on precisely how certain input operands are used. */
11135 if (dep_type == 0
11136 && recog_memoized (insn) >= 0
11137 && recog_memoized (dep) >= 0)
11138 {
11139 int shift_opnum = get_attr_shift (insn);
11140 enum attr_type attr_type = get_attr_type (dep);
11141
11142 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11143 operand for INSN. If we have a shifted input operand and the
11144 instruction we depend on is another ALU instruction, then we may
11145 have to account for an additional stall. */
11146 if (shift_opnum != 0
11147 && (attr_type == TYPE_ALU_SHIFT_IMM
11148 || attr_type == TYPE_ALUS_SHIFT_IMM
11149 || attr_type == TYPE_LOGIC_SHIFT_IMM
11150 || attr_type == TYPE_LOGICS_SHIFT_IMM
11151 || attr_type == TYPE_ALU_SHIFT_REG
11152 || attr_type == TYPE_ALUS_SHIFT_REG
11153 || attr_type == TYPE_LOGIC_SHIFT_REG
11154 || attr_type == TYPE_LOGICS_SHIFT_REG
11155 || attr_type == TYPE_MOV_SHIFT
11156 || attr_type == TYPE_MVN_SHIFT
11157 || attr_type == TYPE_MOV_SHIFT_REG
11158 || attr_type == TYPE_MVN_SHIFT_REG))
11159 {
11160 rtx shifted_operand;
11161 int opno;
11162
11163 /* Get the shifted operand. */
11164 extract_insn (insn);
11165 shifted_operand = recog_data.operand[shift_opnum];
11166
11167 /* Iterate over all the operands in DEP. If we write an operand
11168 that overlaps with SHIFTED_OPERAND, then we have increase the
11169 cost of this dependency. */
11170 extract_insn (dep);
11171 preprocess_constraints (dep);
11172 for (opno = 0; opno < recog_data.n_operands; opno++)
11173 {
11174 /* We can ignore strict inputs. */
11175 if (recog_data.operand_type[opno] == OP_IN)
11176 continue;
11177
11178 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11179 shifted_operand))
11180 {
11181 *cost = 2;
11182 return false;
11183 }
11184 }
11185 }
11186 }
11187 return true;
11188 }
11189
11190 /* Adjust cost hook for Cortex A9. */
11191 static bool
11192 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11193 int * cost)
11194 {
11195 switch (dep_type)
11196 {
11197 case REG_DEP_ANTI:
11198 *cost = 0;
11199 return false;
11200
11201 case REG_DEP_TRUE:
11202 case REG_DEP_OUTPUT:
11203 if (recog_memoized (insn) >= 0
11204 && recog_memoized (dep) >= 0)
11205 {
11206 if (GET_CODE (PATTERN (insn)) == SET)
11207 {
11208 if (GET_MODE_CLASS
11209 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11210 || GET_MODE_CLASS
11211 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11212 {
11213 enum attr_type attr_type_insn = get_attr_type (insn);
11214 enum attr_type attr_type_dep = get_attr_type (dep);
11215
11216 /* By default all dependencies of the form
11217 s0 = s0 <op> s1
11218 s0 = s0 <op> s2
11219 have an extra latency of 1 cycle because
11220 of the input and output dependency in this
11221 case. However this gets modeled as an true
11222 dependency and hence all these checks. */
11223 if (REG_P (SET_DEST (PATTERN (insn)))
11224 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11225 {
11226 /* FMACS is a special case where the dependent
11227 instruction can be issued 3 cycles before
11228 the normal latency in case of an output
11229 dependency. */
11230 if ((attr_type_insn == TYPE_FMACS
11231 || attr_type_insn == TYPE_FMACD)
11232 && (attr_type_dep == TYPE_FMACS
11233 || attr_type_dep == TYPE_FMACD))
11234 {
11235 if (dep_type == REG_DEP_OUTPUT)
11236 *cost = insn_default_latency (dep) - 3;
11237 else
11238 *cost = insn_default_latency (dep);
11239 return false;
11240 }
11241 else
11242 {
11243 if (dep_type == REG_DEP_OUTPUT)
11244 *cost = insn_default_latency (dep) + 1;
11245 else
11246 *cost = insn_default_latency (dep);
11247 }
11248 return false;
11249 }
11250 }
11251 }
11252 }
11253 break;
11254
11255 default:
11256 gcc_unreachable ();
11257 }
11258
11259 return true;
11260 }
11261
11262 /* Adjust cost hook for FA726TE. */
11263 static bool
11264 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11265 int * cost)
11266 {
11267 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11268 have penalty of 3. */
11269 if (dep_type == REG_DEP_TRUE
11270 && recog_memoized (insn) >= 0
11271 && recog_memoized (dep) >= 0
11272 && get_attr_conds (dep) == CONDS_SET)
11273 {
11274 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11275 if (get_attr_conds (insn) == CONDS_USE
11276 && get_attr_type (insn) != TYPE_BRANCH)
11277 {
11278 *cost = 3;
11279 return false;
11280 }
11281
11282 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11283 || get_attr_conds (insn) == CONDS_USE)
11284 {
11285 *cost = 0;
11286 return false;
11287 }
11288 }
11289
11290 return true;
11291 }
11292
11293 /* Implement TARGET_REGISTER_MOVE_COST.
11294
11295 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11296 it is typically more expensive than a single memory access. We set
11297 the cost to less than two memory accesses so that floating
11298 point to integer conversion does not go through memory. */
11299
11300 int
11301 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11302 reg_class_t from, reg_class_t to)
11303 {
11304 if (TARGET_32BIT)
11305 {
11306 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11307 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11308 return 15;
11309 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11310 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11311 return 4;
11312 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11313 return 20;
11314 else
11315 return 2;
11316 }
11317 else
11318 {
11319 if (from == HI_REGS || to == HI_REGS)
11320 return 4;
11321 else
11322 return 2;
11323 }
11324 }
11325
11326 /* Implement TARGET_MEMORY_MOVE_COST. */
11327
11328 int
11329 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11330 bool in ATTRIBUTE_UNUSED)
11331 {
11332 if (TARGET_32BIT)
11333 return 10;
11334 else
11335 {
11336 if (GET_MODE_SIZE (mode) < 4)
11337 return 8;
11338 else
11339 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11340 }
11341 }
11342
11343 /* Vectorizer cost model implementation. */
11344
11345 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11346 static int
11347 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11348 tree vectype,
11349 int misalign ATTRIBUTE_UNUSED)
11350 {
11351 unsigned elements;
11352
11353 switch (type_of_cost)
11354 {
11355 case scalar_stmt:
11356 return current_tune->vec_costs->scalar_stmt_cost;
11357
11358 case scalar_load:
11359 return current_tune->vec_costs->scalar_load_cost;
11360
11361 case scalar_store:
11362 return current_tune->vec_costs->scalar_store_cost;
11363
11364 case vector_stmt:
11365 return current_tune->vec_costs->vec_stmt_cost;
11366
11367 case vector_load:
11368 return current_tune->vec_costs->vec_align_load_cost;
11369
11370 case vector_store:
11371 return current_tune->vec_costs->vec_store_cost;
11372
11373 case vec_to_scalar:
11374 return current_tune->vec_costs->vec_to_scalar_cost;
11375
11376 case scalar_to_vec:
11377 return current_tune->vec_costs->scalar_to_vec_cost;
11378
11379 case unaligned_load:
11380 case vector_gather_load:
11381 return current_tune->vec_costs->vec_unalign_load_cost;
11382
11383 case unaligned_store:
11384 case vector_scatter_store:
11385 return current_tune->vec_costs->vec_unalign_store_cost;
11386
11387 case cond_branch_taken:
11388 return current_tune->vec_costs->cond_taken_branch_cost;
11389
11390 case cond_branch_not_taken:
11391 return current_tune->vec_costs->cond_not_taken_branch_cost;
11392
11393 case vec_perm:
11394 case vec_promote_demote:
11395 return current_tune->vec_costs->vec_stmt_cost;
11396
11397 case vec_construct:
11398 elements = TYPE_VECTOR_SUBPARTS (vectype);
11399 return elements / 2 + 1;
11400
11401 default:
11402 gcc_unreachable ();
11403 }
11404 }
11405
11406 /* Implement targetm.vectorize.add_stmt_cost. */
11407
11408 static unsigned
11409 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11410 struct _stmt_vec_info *stmt_info, int misalign,
11411 enum vect_cost_model_location where)
11412 {
11413 unsigned *cost = (unsigned *) data;
11414 unsigned retval = 0;
11415
11416 if (flag_vect_cost_model)
11417 {
11418 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11419 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11420
11421 /* Statements in an inner loop relative to the loop being
11422 vectorized are weighted more heavily. The value here is
11423 arbitrary and could potentially be improved with analysis. */
11424 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11425 count *= 50; /* FIXME. */
11426
11427 retval = (unsigned) (count * stmt_cost);
11428 cost[where] += retval;
11429 }
11430
11431 return retval;
11432 }
11433
11434 /* Return true if and only if this insn can dual-issue only as older. */
11435 static bool
11436 cortexa7_older_only (rtx_insn *insn)
11437 {
11438 if (recog_memoized (insn) < 0)
11439 return false;
11440
11441 switch (get_attr_type (insn))
11442 {
11443 case TYPE_ALU_DSP_REG:
11444 case TYPE_ALU_SREG:
11445 case TYPE_ALUS_SREG:
11446 case TYPE_LOGIC_REG:
11447 case TYPE_LOGICS_REG:
11448 case TYPE_ADC_REG:
11449 case TYPE_ADCS_REG:
11450 case TYPE_ADR:
11451 case TYPE_BFM:
11452 case TYPE_REV:
11453 case TYPE_MVN_REG:
11454 case TYPE_SHIFT_IMM:
11455 case TYPE_SHIFT_REG:
11456 case TYPE_LOAD_BYTE:
11457 case TYPE_LOAD_4:
11458 case TYPE_STORE_4:
11459 case TYPE_FFARITHS:
11460 case TYPE_FADDS:
11461 case TYPE_FFARITHD:
11462 case TYPE_FADDD:
11463 case TYPE_FMOV:
11464 case TYPE_F_CVT:
11465 case TYPE_FCMPS:
11466 case TYPE_FCMPD:
11467 case TYPE_FCONSTS:
11468 case TYPE_FCONSTD:
11469 case TYPE_FMULS:
11470 case TYPE_FMACS:
11471 case TYPE_FMULD:
11472 case TYPE_FMACD:
11473 case TYPE_FDIVS:
11474 case TYPE_FDIVD:
11475 case TYPE_F_MRC:
11476 case TYPE_F_MRRC:
11477 case TYPE_F_FLAG:
11478 case TYPE_F_LOADS:
11479 case TYPE_F_STORES:
11480 return true;
11481 default:
11482 return false;
11483 }
11484 }
11485
11486 /* Return true if and only if this insn can dual-issue as younger. */
11487 static bool
11488 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11489 {
11490 if (recog_memoized (insn) < 0)
11491 {
11492 if (verbose > 5)
11493 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11494 return false;
11495 }
11496
11497 switch (get_attr_type (insn))
11498 {
11499 case TYPE_ALU_IMM:
11500 case TYPE_ALUS_IMM:
11501 case TYPE_LOGIC_IMM:
11502 case TYPE_LOGICS_IMM:
11503 case TYPE_EXTEND:
11504 case TYPE_MVN_IMM:
11505 case TYPE_MOV_IMM:
11506 case TYPE_MOV_REG:
11507 case TYPE_MOV_SHIFT:
11508 case TYPE_MOV_SHIFT_REG:
11509 case TYPE_BRANCH:
11510 case TYPE_CALL:
11511 return true;
11512 default:
11513 return false;
11514 }
11515 }
11516
11517
11518 /* Look for an instruction that can dual issue only as an older
11519 instruction, and move it in front of any instructions that can
11520 dual-issue as younger, while preserving the relative order of all
11521 other instructions in the ready list. This is a hueuristic to help
11522 dual-issue in later cycles, by postponing issue of more flexible
11523 instructions. This heuristic may affect dual issue opportunities
11524 in the current cycle. */
11525 static void
11526 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11527 int *n_readyp, int clock)
11528 {
11529 int i;
11530 int first_older_only = -1, first_younger = -1;
11531
11532 if (verbose > 5)
11533 fprintf (file,
11534 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11535 clock,
11536 *n_readyp);
11537
11538 /* Traverse the ready list from the head (the instruction to issue
11539 first), and looking for the first instruction that can issue as
11540 younger and the first instruction that can dual-issue only as
11541 older. */
11542 for (i = *n_readyp - 1; i >= 0; i--)
11543 {
11544 rtx_insn *insn = ready[i];
11545 if (cortexa7_older_only (insn))
11546 {
11547 first_older_only = i;
11548 if (verbose > 5)
11549 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11550 break;
11551 }
11552 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11553 first_younger = i;
11554 }
11555
11556 /* Nothing to reorder because either no younger insn found or insn
11557 that can dual-issue only as older appears before any insn that
11558 can dual-issue as younger. */
11559 if (first_younger == -1)
11560 {
11561 if (verbose > 5)
11562 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11563 return;
11564 }
11565
11566 /* Nothing to reorder because no older-only insn in the ready list. */
11567 if (first_older_only == -1)
11568 {
11569 if (verbose > 5)
11570 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11571 return;
11572 }
11573
11574 /* Move first_older_only insn before first_younger. */
11575 if (verbose > 5)
11576 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11577 INSN_UID(ready [first_older_only]),
11578 INSN_UID(ready [first_younger]));
11579 rtx_insn *first_older_only_insn = ready [first_older_only];
11580 for (i = first_older_only; i < first_younger; i++)
11581 {
11582 ready[i] = ready[i+1];
11583 }
11584
11585 ready[i] = first_older_only_insn;
11586 return;
11587 }
11588
11589 /* Implement TARGET_SCHED_REORDER. */
11590 static int
11591 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11592 int clock)
11593 {
11594 switch (arm_tune)
11595 {
11596 case TARGET_CPU_cortexa7:
11597 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11598 break;
11599 default:
11600 /* Do nothing for other cores. */
11601 break;
11602 }
11603
11604 return arm_issue_rate ();
11605 }
11606
11607 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11608 It corrects the value of COST based on the relationship between
11609 INSN and DEP through the dependence LINK. It returns the new
11610 value. There is a per-core adjust_cost hook to adjust scheduler costs
11611 and the per-core hook can choose to completely override the generic
11612 adjust_cost function. Only put bits of code into arm_adjust_cost that
11613 are common across all cores. */
11614 static int
11615 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11616 unsigned int)
11617 {
11618 rtx i_pat, d_pat;
11619
11620 /* When generating Thumb-1 code, we want to place flag-setting operations
11621 close to a conditional branch which depends on them, so that we can
11622 omit the comparison. */
11623 if (TARGET_THUMB1
11624 && dep_type == 0
11625 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11626 && recog_memoized (dep) >= 0
11627 && get_attr_conds (dep) == CONDS_SET)
11628 return 0;
11629
11630 if (current_tune->sched_adjust_cost != NULL)
11631 {
11632 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11633 return cost;
11634 }
11635
11636 /* XXX Is this strictly true? */
11637 if (dep_type == REG_DEP_ANTI
11638 || dep_type == REG_DEP_OUTPUT)
11639 return 0;
11640
11641 /* Call insns don't incur a stall, even if they follow a load. */
11642 if (dep_type == 0
11643 && CALL_P (insn))
11644 return 1;
11645
11646 if ((i_pat = single_set (insn)) != NULL
11647 && MEM_P (SET_SRC (i_pat))
11648 && (d_pat = single_set (dep)) != NULL
11649 && MEM_P (SET_DEST (d_pat)))
11650 {
11651 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11652 /* This is a load after a store, there is no conflict if the load reads
11653 from a cached area. Assume that loads from the stack, and from the
11654 constant pool are cached, and that others will miss. This is a
11655 hack. */
11656
11657 if ((GET_CODE (src_mem) == SYMBOL_REF
11658 && CONSTANT_POOL_ADDRESS_P (src_mem))
11659 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11660 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11661 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11662 return 1;
11663 }
11664
11665 return cost;
11666 }
11667
11668 int
11669 arm_max_conditional_execute (void)
11670 {
11671 return max_insns_skipped;
11672 }
11673
11674 static int
11675 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11676 {
11677 if (TARGET_32BIT)
11678 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11679 else
11680 return (optimize > 0) ? 2 : 0;
11681 }
11682
11683 static int
11684 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11685 {
11686 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11687 }
11688
11689 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11690 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11691 sequences of non-executed instructions in IT blocks probably take the same
11692 amount of time as executed instructions (and the IT instruction itself takes
11693 space in icache). This function was experimentally determined to give good
11694 results on a popular embedded benchmark. */
11695
11696 static int
11697 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11698 {
11699 return (TARGET_32BIT && speed_p) ? 1
11700 : arm_default_branch_cost (speed_p, predictable_p);
11701 }
11702
11703 static int
11704 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11705 {
11706 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11707 }
11708
11709 static bool fp_consts_inited = false;
11710
11711 static REAL_VALUE_TYPE value_fp0;
11712
11713 static void
11714 init_fp_table (void)
11715 {
11716 REAL_VALUE_TYPE r;
11717
11718 r = REAL_VALUE_ATOF ("0", DFmode);
11719 value_fp0 = r;
11720 fp_consts_inited = true;
11721 }
11722
11723 /* Return TRUE if rtx X is a valid immediate FP constant. */
11724 int
11725 arm_const_double_rtx (rtx x)
11726 {
11727 const REAL_VALUE_TYPE *r;
11728
11729 if (!fp_consts_inited)
11730 init_fp_table ();
11731
11732 r = CONST_DOUBLE_REAL_VALUE (x);
11733 if (REAL_VALUE_MINUS_ZERO (*r))
11734 return 0;
11735
11736 if (real_equal (r, &value_fp0))
11737 return 1;
11738
11739 return 0;
11740 }
11741
11742 /* VFPv3 has a fairly wide range of representable immediates, formed from
11743 "quarter-precision" floating-point values. These can be evaluated using this
11744 formula (with ^ for exponentiation):
11745
11746 -1^s * n * 2^-r
11747
11748 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11749 16 <= n <= 31 and 0 <= r <= 7.
11750
11751 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11752
11753 - A (most-significant) is the sign bit.
11754 - BCD are the exponent (encoded as r XOR 3).
11755 - EFGH are the mantissa (encoded as n - 16).
11756 */
11757
11758 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11759 fconst[sd] instruction, or -1 if X isn't suitable. */
11760 static int
11761 vfp3_const_double_index (rtx x)
11762 {
11763 REAL_VALUE_TYPE r, m;
11764 int sign, exponent;
11765 unsigned HOST_WIDE_INT mantissa, mant_hi;
11766 unsigned HOST_WIDE_INT mask;
11767 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11768 bool fail;
11769
11770 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11771 return -1;
11772
11773 r = *CONST_DOUBLE_REAL_VALUE (x);
11774
11775 /* We can't represent these things, so detect them first. */
11776 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11777 return -1;
11778
11779 /* Extract sign, exponent and mantissa. */
11780 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11781 r = real_value_abs (&r);
11782 exponent = REAL_EXP (&r);
11783 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11784 highest (sign) bit, with a fixed binary point at bit point_pos.
11785 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11786 bits for the mantissa, this may fail (low bits would be lost). */
11787 real_ldexp (&m, &r, point_pos - exponent);
11788 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11789 mantissa = w.elt (0);
11790 mant_hi = w.elt (1);
11791
11792 /* If there are bits set in the low part of the mantissa, we can't
11793 represent this value. */
11794 if (mantissa != 0)
11795 return -1;
11796
11797 /* Now make it so that mantissa contains the most-significant bits, and move
11798 the point_pos to indicate that the least-significant bits have been
11799 discarded. */
11800 point_pos -= HOST_BITS_PER_WIDE_INT;
11801 mantissa = mant_hi;
11802
11803 /* We can permit four significant bits of mantissa only, plus a high bit
11804 which is always 1. */
11805 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11806 if ((mantissa & mask) != 0)
11807 return -1;
11808
11809 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11810 mantissa >>= point_pos - 5;
11811
11812 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11813 floating-point immediate zero with Neon using an integer-zero load, but
11814 that case is handled elsewhere.) */
11815 if (mantissa == 0)
11816 return -1;
11817
11818 gcc_assert (mantissa >= 16 && mantissa <= 31);
11819
11820 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11821 normalized significands are in the range [1, 2). (Our mantissa is shifted
11822 left 4 places at this point relative to normalized IEEE754 values). GCC
11823 internally uses [0.5, 1) (see real.c), so the exponent returned from
11824 REAL_EXP must be altered. */
11825 exponent = 5 - exponent;
11826
11827 if (exponent < 0 || exponent > 7)
11828 return -1;
11829
11830 /* Sign, mantissa and exponent are now in the correct form to plug into the
11831 formula described in the comment above. */
11832 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11833 }
11834
11835 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11836 int
11837 vfp3_const_double_rtx (rtx x)
11838 {
11839 if (!TARGET_VFP3)
11840 return 0;
11841
11842 return vfp3_const_double_index (x) != -1;
11843 }
11844
11845 /* Recognize immediates which can be used in various Neon instructions. Legal
11846 immediates are described by the following table (for VMVN variants, the
11847 bitwise inverse of the constant shown is recognized. In either case, VMOV
11848 is output and the correct instruction to use for a given constant is chosen
11849 by the assembler). The constant shown is replicated across all elements of
11850 the destination vector.
11851
11852 insn elems variant constant (binary)
11853 ---- ----- ------- -----------------
11854 vmov i32 0 00000000 00000000 00000000 abcdefgh
11855 vmov i32 1 00000000 00000000 abcdefgh 00000000
11856 vmov i32 2 00000000 abcdefgh 00000000 00000000
11857 vmov i32 3 abcdefgh 00000000 00000000 00000000
11858 vmov i16 4 00000000 abcdefgh
11859 vmov i16 5 abcdefgh 00000000
11860 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11861 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11862 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11863 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11864 vmvn i16 10 00000000 abcdefgh
11865 vmvn i16 11 abcdefgh 00000000
11866 vmov i32 12 00000000 00000000 abcdefgh 11111111
11867 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11868 vmov i32 14 00000000 abcdefgh 11111111 11111111
11869 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11870 vmov i8 16 abcdefgh
11871 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11872 eeeeeeee ffffffff gggggggg hhhhhhhh
11873 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11874 vmov f32 19 00000000 00000000 00000000 00000000
11875
11876 For case 18, B = !b. Representable values are exactly those accepted by
11877 vfp3_const_double_index, but are output as floating-point numbers rather
11878 than indices.
11879
11880 For case 19, we will change it to vmov.i32 when assembling.
11881
11882 Variants 0-5 (inclusive) may also be used as immediates for the second
11883 operand of VORR/VBIC instructions.
11884
11885 The INVERSE argument causes the bitwise inverse of the given operand to be
11886 recognized instead (used for recognizing legal immediates for the VAND/VORN
11887 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11888 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11889 output, rather than the real insns vbic/vorr).
11890
11891 INVERSE makes no difference to the recognition of float vectors.
11892
11893 The return value is the variant of immediate as shown in the above table, or
11894 -1 if the given value doesn't match any of the listed patterns.
11895 */
11896 static int
11897 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11898 rtx *modconst, int *elementwidth)
11899 {
11900 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11901 matches = 1; \
11902 for (i = 0; i < idx; i += (STRIDE)) \
11903 if (!(TEST)) \
11904 matches = 0; \
11905 if (matches) \
11906 { \
11907 immtype = (CLASS); \
11908 elsize = (ELSIZE); \
11909 break; \
11910 }
11911
11912 unsigned int i, elsize = 0, idx = 0, n_elts;
11913 unsigned int innersize;
11914 unsigned char bytes[16];
11915 int immtype = -1, matches;
11916 unsigned int invmask = inverse ? 0xff : 0;
11917 bool vector = GET_CODE (op) == CONST_VECTOR;
11918
11919 if (vector)
11920 n_elts = CONST_VECTOR_NUNITS (op);
11921 else
11922 {
11923 n_elts = 1;
11924 if (mode == VOIDmode)
11925 mode = DImode;
11926 }
11927
11928 innersize = GET_MODE_UNIT_SIZE (mode);
11929
11930 /* Vectors of float constants. */
11931 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11932 {
11933 rtx el0 = CONST_VECTOR_ELT (op, 0);
11934
11935 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11936 return -1;
11937
11938 /* FP16 vectors cannot be represented. */
11939 if (GET_MODE_INNER (mode) == HFmode)
11940 return -1;
11941
11942 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11943 are distinct in this context. */
11944 if (!const_vec_duplicate_p (op))
11945 return -1;
11946
11947 if (modconst)
11948 *modconst = CONST_VECTOR_ELT (op, 0);
11949
11950 if (elementwidth)
11951 *elementwidth = 0;
11952
11953 if (el0 == CONST0_RTX (GET_MODE (el0)))
11954 return 19;
11955 else
11956 return 18;
11957 }
11958
11959 /* The tricks done in the code below apply for little-endian vector layout.
11960 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
11961 FIXME: Implement logic for big-endian vectors. */
11962 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
11963 return -1;
11964
11965 /* Splat vector constant out into a byte vector. */
11966 for (i = 0; i < n_elts; i++)
11967 {
11968 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
11969 unsigned HOST_WIDE_INT elpart;
11970
11971 gcc_assert (CONST_INT_P (el));
11972 elpart = INTVAL (el);
11973
11974 for (unsigned int byte = 0; byte < innersize; byte++)
11975 {
11976 bytes[idx++] = (elpart & 0xff) ^ invmask;
11977 elpart >>= BITS_PER_UNIT;
11978 }
11979 }
11980
11981 /* Sanity check. */
11982 gcc_assert (idx == GET_MODE_SIZE (mode));
11983
11984 do
11985 {
11986 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
11987 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11988
11989 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
11990 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
11991
11992 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
11993 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
11994
11995 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
11996 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
11997
11998 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
11999
12000 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12001
12002 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12003 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12004
12005 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12006 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12007
12008 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12009 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12010
12011 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12012 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12013
12014 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12015
12016 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12017
12018 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12019 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12020
12021 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12022 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12023
12024 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12025 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12026
12027 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12028 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12029
12030 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12031
12032 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12033 && bytes[i] == bytes[(i + 8) % idx]);
12034 }
12035 while (0);
12036
12037 if (immtype == -1)
12038 return -1;
12039
12040 if (elementwidth)
12041 *elementwidth = elsize;
12042
12043 if (modconst)
12044 {
12045 unsigned HOST_WIDE_INT imm = 0;
12046
12047 /* Un-invert bytes of recognized vector, if necessary. */
12048 if (invmask != 0)
12049 for (i = 0; i < idx; i++)
12050 bytes[i] ^= invmask;
12051
12052 if (immtype == 17)
12053 {
12054 /* FIXME: Broken on 32-bit H_W_I hosts. */
12055 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12056
12057 for (i = 0; i < 8; i++)
12058 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12059 << (i * BITS_PER_UNIT);
12060
12061 *modconst = GEN_INT (imm);
12062 }
12063 else
12064 {
12065 unsigned HOST_WIDE_INT imm = 0;
12066
12067 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12068 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12069
12070 *modconst = GEN_INT (imm);
12071 }
12072 }
12073
12074 return immtype;
12075 #undef CHECK
12076 }
12077
12078 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12079 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12080 float elements), and a modified constant (whatever should be output for a
12081 VMOV) in *MODCONST. */
12082
12083 int
12084 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12085 rtx *modconst, int *elementwidth)
12086 {
12087 rtx tmpconst;
12088 int tmpwidth;
12089 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12090
12091 if (retval == -1)
12092 return 0;
12093
12094 if (modconst)
12095 *modconst = tmpconst;
12096
12097 if (elementwidth)
12098 *elementwidth = tmpwidth;
12099
12100 return 1;
12101 }
12102
12103 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12104 the immediate is valid, write a constant suitable for using as an operand
12105 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12106 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12107
12108 int
12109 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12110 rtx *modconst, int *elementwidth)
12111 {
12112 rtx tmpconst;
12113 int tmpwidth;
12114 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12115
12116 if (retval < 0 || retval > 5)
12117 return 0;
12118
12119 if (modconst)
12120 *modconst = tmpconst;
12121
12122 if (elementwidth)
12123 *elementwidth = tmpwidth;
12124
12125 return 1;
12126 }
12127
12128 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12129 the immediate is valid, write a constant suitable for using as an operand
12130 to VSHR/VSHL to *MODCONST and the corresponding element width to
12131 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12132 because they have different limitations. */
12133
12134 int
12135 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12136 rtx *modconst, int *elementwidth,
12137 bool isleftshift)
12138 {
12139 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12140 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12141 unsigned HOST_WIDE_INT last_elt = 0;
12142 unsigned HOST_WIDE_INT maxshift;
12143
12144 /* Split vector constant out into a byte vector. */
12145 for (i = 0; i < n_elts; i++)
12146 {
12147 rtx el = CONST_VECTOR_ELT (op, i);
12148 unsigned HOST_WIDE_INT elpart;
12149
12150 if (CONST_INT_P (el))
12151 elpart = INTVAL (el);
12152 else if (CONST_DOUBLE_P (el))
12153 return 0;
12154 else
12155 gcc_unreachable ();
12156
12157 if (i != 0 && elpart != last_elt)
12158 return 0;
12159
12160 last_elt = elpart;
12161 }
12162
12163 /* Shift less than element size. */
12164 maxshift = innersize * 8;
12165
12166 if (isleftshift)
12167 {
12168 /* Left shift immediate value can be from 0 to <size>-1. */
12169 if (last_elt >= maxshift)
12170 return 0;
12171 }
12172 else
12173 {
12174 /* Right shift immediate value can be from 1 to <size>. */
12175 if (last_elt == 0 || last_elt > maxshift)
12176 return 0;
12177 }
12178
12179 if (elementwidth)
12180 *elementwidth = innersize * 8;
12181
12182 if (modconst)
12183 *modconst = CONST_VECTOR_ELT (op, 0);
12184
12185 return 1;
12186 }
12187
12188 /* Return a string suitable for output of Neon immediate logic operation
12189 MNEM. */
12190
12191 char *
12192 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12193 int inverse, int quad)
12194 {
12195 int width, is_valid;
12196 static char templ[40];
12197
12198 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12199
12200 gcc_assert (is_valid != 0);
12201
12202 if (quad)
12203 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12204 else
12205 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12206
12207 return templ;
12208 }
12209
12210 /* Return a string suitable for output of Neon immediate shift operation
12211 (VSHR or VSHL) MNEM. */
12212
12213 char *
12214 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12215 machine_mode mode, int quad,
12216 bool isleftshift)
12217 {
12218 int width, is_valid;
12219 static char templ[40];
12220
12221 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12222 gcc_assert (is_valid != 0);
12223
12224 if (quad)
12225 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12226 else
12227 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12228
12229 return templ;
12230 }
12231
12232 /* Output a sequence of pairwise operations to implement a reduction.
12233 NOTE: We do "too much work" here, because pairwise operations work on two
12234 registers-worth of operands in one go. Unfortunately we can't exploit those
12235 extra calculations to do the full operation in fewer steps, I don't think.
12236 Although all vector elements of the result but the first are ignored, we
12237 actually calculate the same result in each of the elements. An alternative
12238 such as initially loading a vector with zero to use as each of the second
12239 operands would use up an additional register and take an extra instruction,
12240 for no particular gain. */
12241
12242 void
12243 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12244 rtx (*reduc) (rtx, rtx, rtx))
12245 {
12246 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12247 rtx tmpsum = op1;
12248
12249 for (i = parts / 2; i >= 1; i /= 2)
12250 {
12251 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12252 emit_insn (reduc (dest, tmpsum, tmpsum));
12253 tmpsum = dest;
12254 }
12255 }
12256
12257 /* If VALS is a vector constant that can be loaded into a register
12258 using VDUP, generate instructions to do so and return an RTX to
12259 assign to the register. Otherwise return NULL_RTX. */
12260
12261 static rtx
12262 neon_vdup_constant (rtx vals)
12263 {
12264 machine_mode mode = GET_MODE (vals);
12265 machine_mode inner_mode = GET_MODE_INNER (mode);
12266 rtx x;
12267
12268 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12269 return NULL_RTX;
12270
12271 if (!const_vec_duplicate_p (vals, &x))
12272 /* The elements are not all the same. We could handle repeating
12273 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12274 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12275 vdup.i16). */
12276 return NULL_RTX;
12277
12278 /* We can load this constant by using VDUP and a constant in a
12279 single ARM register. This will be cheaper than a vector
12280 load. */
12281
12282 x = copy_to_mode_reg (inner_mode, x);
12283 return gen_vec_duplicate (mode, x);
12284 }
12285
12286 /* Generate code to load VALS, which is a PARALLEL containing only
12287 constants (for vec_init) or CONST_VECTOR, efficiently into a
12288 register. Returns an RTX to copy into the register, or NULL_RTX
12289 for a PARALLEL that can not be converted into a CONST_VECTOR. */
12290
12291 rtx
12292 neon_make_constant (rtx vals)
12293 {
12294 machine_mode mode = GET_MODE (vals);
12295 rtx target;
12296 rtx const_vec = NULL_RTX;
12297 int n_elts = GET_MODE_NUNITS (mode);
12298 int n_const = 0;
12299 int i;
12300
12301 if (GET_CODE (vals) == CONST_VECTOR)
12302 const_vec = vals;
12303 else if (GET_CODE (vals) == PARALLEL)
12304 {
12305 /* A CONST_VECTOR must contain only CONST_INTs and
12306 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12307 Only store valid constants in a CONST_VECTOR. */
12308 for (i = 0; i < n_elts; ++i)
12309 {
12310 rtx x = XVECEXP (vals, 0, i);
12311 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12312 n_const++;
12313 }
12314 if (n_const == n_elts)
12315 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12316 }
12317 else
12318 gcc_unreachable ();
12319
12320 if (const_vec != NULL
12321 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12322 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12323 return const_vec;
12324 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12325 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12326 pipeline cycle; creating the constant takes one or two ARM
12327 pipeline cycles. */
12328 return target;
12329 else if (const_vec != NULL_RTX)
12330 /* Load from constant pool. On Cortex-A8 this takes two cycles
12331 (for either double or quad vectors). We can not take advantage
12332 of single-cycle VLD1 because we need a PC-relative addressing
12333 mode. */
12334 return const_vec;
12335 else
12336 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12337 We can not construct an initializer. */
12338 return NULL_RTX;
12339 }
12340
12341 /* Initialize vector TARGET to VALS. */
12342
12343 void
12344 neon_expand_vector_init (rtx target, rtx vals)
12345 {
12346 machine_mode mode = GET_MODE (target);
12347 machine_mode inner_mode = GET_MODE_INNER (mode);
12348 int n_elts = GET_MODE_NUNITS (mode);
12349 int n_var = 0, one_var = -1;
12350 bool all_same = true;
12351 rtx x, mem;
12352 int i;
12353
12354 for (i = 0; i < n_elts; ++i)
12355 {
12356 x = XVECEXP (vals, 0, i);
12357 if (!CONSTANT_P (x))
12358 ++n_var, one_var = i;
12359
12360 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12361 all_same = false;
12362 }
12363
12364 if (n_var == 0)
12365 {
12366 rtx constant = neon_make_constant (vals);
12367 if (constant != NULL_RTX)
12368 {
12369 emit_move_insn (target, constant);
12370 return;
12371 }
12372 }
12373
12374 /* Splat a single non-constant element if we can. */
12375 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12376 {
12377 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12378 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12379 return;
12380 }
12381
12382 /* One field is non-constant. Load constant then overwrite varying
12383 field. This is more efficient than using the stack. */
12384 if (n_var == 1)
12385 {
12386 rtx copy = copy_rtx (vals);
12387 rtx index = GEN_INT (one_var);
12388
12389 /* Load constant part of vector, substitute neighboring value for
12390 varying element. */
12391 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12392 neon_expand_vector_init (target, copy);
12393
12394 /* Insert variable. */
12395 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12396 switch (mode)
12397 {
12398 case E_V8QImode:
12399 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12400 break;
12401 case E_V16QImode:
12402 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12403 break;
12404 case E_V4HImode:
12405 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12406 break;
12407 case E_V8HImode:
12408 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12409 break;
12410 case E_V2SImode:
12411 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12412 break;
12413 case E_V4SImode:
12414 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12415 break;
12416 case E_V2SFmode:
12417 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12418 break;
12419 case E_V4SFmode:
12420 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12421 break;
12422 case E_V2DImode:
12423 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12424 break;
12425 default:
12426 gcc_unreachable ();
12427 }
12428 return;
12429 }
12430
12431 /* Construct the vector in memory one field at a time
12432 and load the whole vector. */
12433 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12434 for (i = 0; i < n_elts; i++)
12435 emit_move_insn (adjust_address_nv (mem, inner_mode,
12436 i * GET_MODE_SIZE (inner_mode)),
12437 XVECEXP (vals, 0, i));
12438 emit_move_insn (target, mem);
12439 }
12440
12441 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12442 ERR if it doesn't. EXP indicates the source location, which includes the
12443 inlining history for intrinsics. */
12444
12445 static void
12446 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12447 const_tree exp, const char *desc)
12448 {
12449 HOST_WIDE_INT lane;
12450
12451 gcc_assert (CONST_INT_P (operand));
12452
12453 lane = INTVAL (operand);
12454
12455 if (lane < low || lane >= high)
12456 {
12457 if (exp)
12458 error ("%K%s %wd out of range %wd - %wd",
12459 exp, desc, lane, low, high - 1);
12460 else
12461 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12462 }
12463 }
12464
12465 /* Bounds-check lanes. */
12466
12467 void
12468 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12469 const_tree exp)
12470 {
12471 bounds_check (operand, low, high, exp, "lane");
12472 }
12473
12474 /* Bounds-check constants. */
12475
12476 void
12477 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12478 {
12479 bounds_check (operand, low, high, NULL_TREE, "constant");
12480 }
12481
12482 HOST_WIDE_INT
12483 neon_element_bits (machine_mode mode)
12484 {
12485 return GET_MODE_UNIT_BITSIZE (mode);
12486 }
12487
12488 \f
12489 /* Predicates for `match_operand' and `match_operator'. */
12490
12491 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12492 WB is true if full writeback address modes are allowed and is false
12493 if limited writeback address modes (POST_INC and PRE_DEC) are
12494 allowed. */
12495
12496 int
12497 arm_coproc_mem_operand (rtx op, bool wb)
12498 {
12499 rtx ind;
12500
12501 /* Reject eliminable registers. */
12502 if (! (reload_in_progress || reload_completed || lra_in_progress)
12503 && ( reg_mentioned_p (frame_pointer_rtx, op)
12504 || reg_mentioned_p (arg_pointer_rtx, op)
12505 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12506 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12507 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12508 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12509 return FALSE;
12510
12511 /* Constants are converted into offsets from labels. */
12512 if (!MEM_P (op))
12513 return FALSE;
12514
12515 ind = XEXP (op, 0);
12516
12517 if (reload_completed
12518 && (GET_CODE (ind) == LABEL_REF
12519 || (GET_CODE (ind) == CONST
12520 && GET_CODE (XEXP (ind, 0)) == PLUS
12521 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12522 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12523 return TRUE;
12524
12525 /* Match: (mem (reg)). */
12526 if (REG_P (ind))
12527 return arm_address_register_rtx_p (ind, 0);
12528
12529 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12530 acceptable in any case (subject to verification by
12531 arm_address_register_rtx_p). We need WB to be true to accept
12532 PRE_INC and POST_DEC. */
12533 if (GET_CODE (ind) == POST_INC
12534 || GET_CODE (ind) == PRE_DEC
12535 || (wb
12536 && (GET_CODE (ind) == PRE_INC
12537 || GET_CODE (ind) == POST_DEC)))
12538 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12539
12540 if (wb
12541 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12542 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12543 && GET_CODE (XEXP (ind, 1)) == PLUS
12544 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12545 ind = XEXP (ind, 1);
12546
12547 /* Match:
12548 (plus (reg)
12549 (const)). */
12550 if (GET_CODE (ind) == PLUS
12551 && REG_P (XEXP (ind, 0))
12552 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12553 && CONST_INT_P (XEXP (ind, 1))
12554 && INTVAL (XEXP (ind, 1)) > -1024
12555 && INTVAL (XEXP (ind, 1)) < 1024
12556 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12557 return TRUE;
12558
12559 return FALSE;
12560 }
12561
12562 /* Return TRUE if OP is a memory operand which we can load or store a vector
12563 to/from. TYPE is one of the following values:
12564 0 - Vector load/stor (vldr)
12565 1 - Core registers (ldm)
12566 2 - Element/structure loads (vld1)
12567 */
12568 int
12569 neon_vector_mem_operand (rtx op, int type, bool strict)
12570 {
12571 rtx ind;
12572
12573 /* Reject eliminable registers. */
12574 if (strict && ! (reload_in_progress || reload_completed)
12575 && (reg_mentioned_p (frame_pointer_rtx, op)
12576 || reg_mentioned_p (arg_pointer_rtx, op)
12577 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12578 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12579 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12580 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12581 return FALSE;
12582
12583 /* Constants are converted into offsets from labels. */
12584 if (!MEM_P (op))
12585 return FALSE;
12586
12587 ind = XEXP (op, 0);
12588
12589 if (reload_completed
12590 && (GET_CODE (ind) == LABEL_REF
12591 || (GET_CODE (ind) == CONST
12592 && GET_CODE (XEXP (ind, 0)) == PLUS
12593 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12594 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12595 return TRUE;
12596
12597 /* Match: (mem (reg)). */
12598 if (REG_P (ind))
12599 return arm_address_register_rtx_p (ind, 0);
12600
12601 /* Allow post-increment with Neon registers. */
12602 if ((type != 1 && GET_CODE (ind) == POST_INC)
12603 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12604 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12605
12606 /* Allow post-increment by register for VLDn */
12607 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12608 && GET_CODE (XEXP (ind, 1)) == PLUS
12609 && REG_P (XEXP (XEXP (ind, 1), 1)))
12610 return true;
12611
12612 /* Match:
12613 (plus (reg)
12614 (const)). */
12615 if (type == 0
12616 && GET_CODE (ind) == PLUS
12617 && REG_P (XEXP (ind, 0))
12618 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12619 && CONST_INT_P (XEXP (ind, 1))
12620 && INTVAL (XEXP (ind, 1)) > -1024
12621 /* For quad modes, we restrict the constant offset to be slightly less
12622 than what the instruction format permits. We have no such constraint
12623 on double mode offsets. (This must match arm_legitimate_index_p.) */
12624 && (INTVAL (XEXP (ind, 1))
12625 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12626 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12627 return TRUE;
12628
12629 return FALSE;
12630 }
12631
12632 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12633 type. */
12634 int
12635 neon_struct_mem_operand (rtx op)
12636 {
12637 rtx ind;
12638
12639 /* Reject eliminable registers. */
12640 if (! (reload_in_progress || reload_completed)
12641 && ( reg_mentioned_p (frame_pointer_rtx, op)
12642 || reg_mentioned_p (arg_pointer_rtx, op)
12643 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12644 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12645 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12646 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12647 return FALSE;
12648
12649 /* Constants are converted into offsets from labels. */
12650 if (!MEM_P (op))
12651 return FALSE;
12652
12653 ind = XEXP (op, 0);
12654
12655 if (reload_completed
12656 && (GET_CODE (ind) == LABEL_REF
12657 || (GET_CODE (ind) == CONST
12658 && GET_CODE (XEXP (ind, 0)) == PLUS
12659 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12660 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12661 return TRUE;
12662
12663 /* Match: (mem (reg)). */
12664 if (REG_P (ind))
12665 return arm_address_register_rtx_p (ind, 0);
12666
12667 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12668 if (GET_CODE (ind) == POST_INC
12669 || GET_CODE (ind) == PRE_DEC)
12670 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12671
12672 return FALSE;
12673 }
12674
12675 /* Return true if X is a register that will be eliminated later on. */
12676 int
12677 arm_eliminable_register (rtx x)
12678 {
12679 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12680 || REGNO (x) == ARG_POINTER_REGNUM
12681 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12682 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12683 }
12684
12685 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12686 coprocessor registers. Otherwise return NO_REGS. */
12687
12688 enum reg_class
12689 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12690 {
12691 if (mode == HFmode)
12692 {
12693 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12694 return GENERAL_REGS;
12695 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12696 return NO_REGS;
12697 return GENERAL_REGS;
12698 }
12699
12700 /* The neon move patterns handle all legitimate vector and struct
12701 addresses. */
12702 if (TARGET_NEON
12703 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12704 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12705 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12706 || VALID_NEON_STRUCT_MODE (mode)))
12707 return NO_REGS;
12708
12709 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12710 return NO_REGS;
12711
12712 return GENERAL_REGS;
12713 }
12714
12715 /* Values which must be returned in the most-significant end of the return
12716 register. */
12717
12718 static bool
12719 arm_return_in_msb (const_tree valtype)
12720 {
12721 return (TARGET_AAPCS_BASED
12722 && BYTES_BIG_ENDIAN
12723 && (AGGREGATE_TYPE_P (valtype)
12724 || TREE_CODE (valtype) == COMPLEX_TYPE
12725 || FIXED_POINT_TYPE_P (valtype)));
12726 }
12727
12728 /* Return TRUE if X references a SYMBOL_REF. */
12729 int
12730 symbol_mentioned_p (rtx x)
12731 {
12732 const char * fmt;
12733 int i;
12734
12735 if (GET_CODE (x) == SYMBOL_REF)
12736 return 1;
12737
12738 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12739 are constant offsets, not symbols. */
12740 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12741 return 0;
12742
12743 fmt = GET_RTX_FORMAT (GET_CODE (x));
12744
12745 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12746 {
12747 if (fmt[i] == 'E')
12748 {
12749 int j;
12750
12751 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12752 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12753 return 1;
12754 }
12755 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12756 return 1;
12757 }
12758
12759 return 0;
12760 }
12761
12762 /* Return TRUE if X references a LABEL_REF. */
12763 int
12764 label_mentioned_p (rtx x)
12765 {
12766 const char * fmt;
12767 int i;
12768
12769 if (GET_CODE (x) == LABEL_REF)
12770 return 1;
12771
12772 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12773 instruction, but they are constant offsets, not symbols. */
12774 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12775 return 0;
12776
12777 fmt = GET_RTX_FORMAT (GET_CODE (x));
12778 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12779 {
12780 if (fmt[i] == 'E')
12781 {
12782 int j;
12783
12784 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12785 if (label_mentioned_p (XVECEXP (x, i, j)))
12786 return 1;
12787 }
12788 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12789 return 1;
12790 }
12791
12792 return 0;
12793 }
12794
12795 int
12796 tls_mentioned_p (rtx x)
12797 {
12798 switch (GET_CODE (x))
12799 {
12800 case CONST:
12801 return tls_mentioned_p (XEXP (x, 0));
12802
12803 case UNSPEC:
12804 if (XINT (x, 1) == UNSPEC_TLS)
12805 return 1;
12806
12807 /* Fall through. */
12808 default:
12809 return 0;
12810 }
12811 }
12812
12813 /* Must not copy any rtx that uses a pc-relative address.
12814 Also, disallow copying of load-exclusive instructions that
12815 may appear after splitting of compare-and-swap-style operations
12816 so as to prevent those loops from being transformed away from their
12817 canonical forms (see PR 69904). */
12818
12819 static bool
12820 arm_cannot_copy_insn_p (rtx_insn *insn)
12821 {
12822 /* The tls call insn cannot be copied, as it is paired with a data
12823 word. */
12824 if (recog_memoized (insn) == CODE_FOR_tlscall)
12825 return true;
12826
12827 subrtx_iterator::array_type array;
12828 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12829 {
12830 const_rtx x = *iter;
12831 if (GET_CODE (x) == UNSPEC
12832 && (XINT (x, 1) == UNSPEC_PIC_BASE
12833 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12834 return true;
12835 }
12836
12837 rtx set = single_set (insn);
12838 if (set)
12839 {
12840 rtx src = SET_SRC (set);
12841 if (GET_CODE (src) == ZERO_EXTEND)
12842 src = XEXP (src, 0);
12843
12844 /* Catch the load-exclusive and load-acquire operations. */
12845 if (GET_CODE (src) == UNSPEC_VOLATILE
12846 && (XINT (src, 1) == VUNSPEC_LL
12847 || XINT (src, 1) == VUNSPEC_LAX))
12848 return true;
12849 }
12850 return false;
12851 }
12852
12853 enum rtx_code
12854 minmax_code (rtx x)
12855 {
12856 enum rtx_code code = GET_CODE (x);
12857
12858 switch (code)
12859 {
12860 case SMAX:
12861 return GE;
12862 case SMIN:
12863 return LE;
12864 case UMIN:
12865 return LEU;
12866 case UMAX:
12867 return GEU;
12868 default:
12869 gcc_unreachable ();
12870 }
12871 }
12872
12873 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12874
12875 bool
12876 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12877 int *mask, bool *signed_sat)
12878 {
12879 /* The high bound must be a power of two minus one. */
12880 int log = exact_log2 (INTVAL (hi_bound) + 1);
12881 if (log == -1)
12882 return false;
12883
12884 /* The low bound is either zero (for usat) or one less than the
12885 negation of the high bound (for ssat). */
12886 if (INTVAL (lo_bound) == 0)
12887 {
12888 if (mask)
12889 *mask = log;
12890 if (signed_sat)
12891 *signed_sat = false;
12892
12893 return true;
12894 }
12895
12896 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12897 {
12898 if (mask)
12899 *mask = log + 1;
12900 if (signed_sat)
12901 *signed_sat = true;
12902
12903 return true;
12904 }
12905
12906 return false;
12907 }
12908
12909 /* Return 1 if memory locations are adjacent. */
12910 int
12911 adjacent_mem_locations (rtx a, rtx b)
12912 {
12913 /* We don't guarantee to preserve the order of these memory refs. */
12914 if (volatile_refs_p (a) || volatile_refs_p (b))
12915 return 0;
12916
12917 if ((REG_P (XEXP (a, 0))
12918 || (GET_CODE (XEXP (a, 0)) == PLUS
12919 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12920 && (REG_P (XEXP (b, 0))
12921 || (GET_CODE (XEXP (b, 0)) == PLUS
12922 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12923 {
12924 HOST_WIDE_INT val0 = 0, val1 = 0;
12925 rtx reg0, reg1;
12926 int val_diff;
12927
12928 if (GET_CODE (XEXP (a, 0)) == PLUS)
12929 {
12930 reg0 = XEXP (XEXP (a, 0), 0);
12931 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12932 }
12933 else
12934 reg0 = XEXP (a, 0);
12935
12936 if (GET_CODE (XEXP (b, 0)) == PLUS)
12937 {
12938 reg1 = XEXP (XEXP (b, 0), 0);
12939 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12940 }
12941 else
12942 reg1 = XEXP (b, 0);
12943
12944 /* Don't accept any offset that will require multiple
12945 instructions to handle, since this would cause the
12946 arith_adjacentmem pattern to output an overlong sequence. */
12947 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12948 return 0;
12949
12950 /* Don't allow an eliminable register: register elimination can make
12951 the offset too large. */
12952 if (arm_eliminable_register (reg0))
12953 return 0;
12954
12955 val_diff = val1 - val0;
12956
12957 if (arm_ld_sched)
12958 {
12959 /* If the target has load delay slots, then there's no benefit
12960 to using an ldm instruction unless the offset is zero and
12961 we are optimizing for size. */
12962 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
12963 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
12964 && (val_diff == 4 || val_diff == -4));
12965 }
12966
12967 return ((REGNO (reg0) == REGNO (reg1))
12968 && (val_diff == 4 || val_diff == -4));
12969 }
12970
12971 return 0;
12972 }
12973
12974 /* Return true if OP is a valid load or store multiple operation. LOAD is true
12975 for load operations, false for store operations. CONSECUTIVE is true
12976 if the register numbers in the operation must be consecutive in the register
12977 bank. RETURN_PC is true if value is to be loaded in PC.
12978 The pattern we are trying to match for load is:
12979 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
12980 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
12981 :
12982 :
12983 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
12984 ]
12985 where
12986 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
12987 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
12988 3. If consecutive is TRUE, then for kth register being loaded,
12989 REGNO (R_dk) = REGNO (R_d0) + k.
12990 The pattern for store is similar. */
12991 bool
12992 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
12993 bool consecutive, bool return_pc)
12994 {
12995 HOST_WIDE_INT count = XVECLEN (op, 0);
12996 rtx reg, mem, addr;
12997 unsigned regno;
12998 unsigned first_regno;
12999 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13000 rtx elt;
13001 bool addr_reg_in_reglist = false;
13002 bool update = false;
13003 int reg_increment;
13004 int offset_adj;
13005 int regs_per_val;
13006
13007 /* If not in SImode, then registers must be consecutive
13008 (e.g., VLDM instructions for DFmode). */
13009 gcc_assert ((mode == SImode) || consecutive);
13010 /* Setting return_pc for stores is illegal. */
13011 gcc_assert (!return_pc || load);
13012
13013 /* Set up the increments and the regs per val based on the mode. */
13014 reg_increment = GET_MODE_SIZE (mode);
13015 regs_per_val = reg_increment / 4;
13016 offset_adj = return_pc ? 1 : 0;
13017
13018 if (count <= 1
13019 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13020 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13021 return false;
13022
13023 /* Check if this is a write-back. */
13024 elt = XVECEXP (op, 0, offset_adj);
13025 if (GET_CODE (SET_SRC (elt)) == PLUS)
13026 {
13027 i++;
13028 base = 1;
13029 update = true;
13030
13031 /* The offset adjustment must be the number of registers being
13032 popped times the size of a single register. */
13033 if (!REG_P (SET_DEST (elt))
13034 || !REG_P (XEXP (SET_SRC (elt), 0))
13035 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13036 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13037 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13038 ((count - 1 - offset_adj) * reg_increment))
13039 return false;
13040 }
13041
13042 i = i + offset_adj;
13043 base = base + offset_adj;
13044 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13045 success depends on the type: VLDM can do just one reg,
13046 LDM must do at least two. */
13047 if ((count <= i) && (mode == SImode))
13048 return false;
13049
13050 elt = XVECEXP (op, 0, i - 1);
13051 if (GET_CODE (elt) != SET)
13052 return false;
13053
13054 if (load)
13055 {
13056 reg = SET_DEST (elt);
13057 mem = SET_SRC (elt);
13058 }
13059 else
13060 {
13061 reg = SET_SRC (elt);
13062 mem = SET_DEST (elt);
13063 }
13064
13065 if (!REG_P (reg) || !MEM_P (mem))
13066 return false;
13067
13068 regno = REGNO (reg);
13069 first_regno = regno;
13070 addr = XEXP (mem, 0);
13071 if (GET_CODE (addr) == PLUS)
13072 {
13073 if (!CONST_INT_P (XEXP (addr, 1)))
13074 return false;
13075
13076 offset = INTVAL (XEXP (addr, 1));
13077 addr = XEXP (addr, 0);
13078 }
13079
13080 if (!REG_P (addr))
13081 return false;
13082
13083 /* Don't allow SP to be loaded unless it is also the base register. It
13084 guarantees that SP is reset correctly when an LDM instruction
13085 is interrupted. Otherwise, we might end up with a corrupt stack. */
13086 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13087 return false;
13088
13089 for (; i < count; i++)
13090 {
13091 elt = XVECEXP (op, 0, i);
13092 if (GET_CODE (elt) != SET)
13093 return false;
13094
13095 if (load)
13096 {
13097 reg = SET_DEST (elt);
13098 mem = SET_SRC (elt);
13099 }
13100 else
13101 {
13102 reg = SET_SRC (elt);
13103 mem = SET_DEST (elt);
13104 }
13105
13106 if (!REG_P (reg)
13107 || GET_MODE (reg) != mode
13108 || REGNO (reg) <= regno
13109 || (consecutive
13110 && (REGNO (reg) !=
13111 (unsigned int) (first_regno + regs_per_val * (i - base))))
13112 /* Don't allow SP to be loaded unless it is also the base register. It
13113 guarantees that SP is reset correctly when an LDM instruction
13114 is interrupted. Otherwise, we might end up with a corrupt stack. */
13115 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13116 || !MEM_P (mem)
13117 || GET_MODE (mem) != mode
13118 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13119 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13120 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13121 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13122 offset + (i - base) * reg_increment))
13123 && (!REG_P (XEXP (mem, 0))
13124 || offset + (i - base) * reg_increment != 0)))
13125 return false;
13126
13127 regno = REGNO (reg);
13128 if (regno == REGNO (addr))
13129 addr_reg_in_reglist = true;
13130 }
13131
13132 if (load)
13133 {
13134 if (update && addr_reg_in_reglist)
13135 return false;
13136
13137 /* For Thumb-1, address register is always modified - either by write-back
13138 or by explicit load. If the pattern does not describe an update,
13139 then the address register must be in the list of loaded registers. */
13140 if (TARGET_THUMB1)
13141 return update || addr_reg_in_reglist;
13142 }
13143
13144 return true;
13145 }
13146
13147 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13148 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13149 instruction. ADD_OFFSET is nonzero if the base address register needs
13150 to be modified with an add instruction before we can use it. */
13151
13152 static bool
13153 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13154 int nops, HOST_WIDE_INT add_offset)
13155 {
13156 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13157 if the offset isn't small enough. The reason 2 ldrs are faster
13158 is because these ARMs are able to do more than one cache access
13159 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13160 whilst the ARM8 has a double bandwidth cache. This means that
13161 these cores can do both an instruction fetch and a data fetch in
13162 a single cycle, so the trick of calculating the address into a
13163 scratch register (one of the result regs) and then doing a load
13164 multiple actually becomes slower (and no smaller in code size).
13165 That is the transformation
13166
13167 ldr rd1, [rbase + offset]
13168 ldr rd2, [rbase + offset + 4]
13169
13170 to
13171
13172 add rd1, rbase, offset
13173 ldmia rd1, {rd1, rd2}
13174
13175 produces worse code -- '3 cycles + any stalls on rd2' instead of
13176 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13177 access per cycle, the first sequence could never complete in less
13178 than 6 cycles, whereas the ldm sequence would only take 5 and
13179 would make better use of sequential accesses if not hitting the
13180 cache.
13181
13182 We cheat here and test 'arm_ld_sched' which we currently know to
13183 only be true for the ARM8, ARM9 and StrongARM. If this ever
13184 changes, then the test below needs to be reworked. */
13185 if (nops == 2 && arm_ld_sched && add_offset != 0)
13186 return false;
13187
13188 /* XScale has load-store double instructions, but they have stricter
13189 alignment requirements than load-store multiple, so we cannot
13190 use them.
13191
13192 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13193 the pipeline until completion.
13194
13195 NREGS CYCLES
13196 1 3
13197 2 4
13198 3 5
13199 4 6
13200
13201 An ldr instruction takes 1-3 cycles, but does not block the
13202 pipeline.
13203
13204 NREGS CYCLES
13205 1 1-3
13206 2 2-6
13207 3 3-9
13208 4 4-12
13209
13210 Best case ldr will always win. However, the more ldr instructions
13211 we issue, the less likely we are to be able to schedule them well.
13212 Using ldr instructions also increases code size.
13213
13214 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13215 for counts of 3 or 4 regs. */
13216 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13217 return false;
13218 return true;
13219 }
13220
13221 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13222 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13223 an array ORDER which describes the sequence to use when accessing the
13224 offsets that produces an ascending order. In this sequence, each
13225 offset must be larger by exactly 4 than the previous one. ORDER[0]
13226 must have been filled in with the lowest offset by the caller.
13227 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13228 we use to verify that ORDER produces an ascending order of registers.
13229 Return true if it was possible to construct such an order, false if
13230 not. */
13231
13232 static bool
13233 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13234 int *unsorted_regs)
13235 {
13236 int i;
13237 for (i = 1; i < nops; i++)
13238 {
13239 int j;
13240
13241 order[i] = order[i - 1];
13242 for (j = 0; j < nops; j++)
13243 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13244 {
13245 /* We must find exactly one offset that is higher than the
13246 previous one by 4. */
13247 if (order[i] != order[i - 1])
13248 return false;
13249 order[i] = j;
13250 }
13251 if (order[i] == order[i - 1])
13252 return false;
13253 /* The register numbers must be ascending. */
13254 if (unsorted_regs != NULL
13255 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13256 return false;
13257 }
13258 return true;
13259 }
13260
13261 /* Used to determine in a peephole whether a sequence of load
13262 instructions can be changed into a load-multiple instruction.
13263 NOPS is the number of separate load instructions we are examining. The
13264 first NOPS entries in OPERANDS are the destination registers, the
13265 next NOPS entries are memory operands. If this function is
13266 successful, *BASE is set to the common base register of the memory
13267 accesses; *LOAD_OFFSET is set to the first memory location's offset
13268 from that base register.
13269 REGS is an array filled in with the destination register numbers.
13270 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13271 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13272 the sequence of registers in REGS matches the loads from ascending memory
13273 locations, and the function verifies that the register numbers are
13274 themselves ascending. If CHECK_REGS is false, the register numbers
13275 are stored in the order they are found in the operands. */
13276 static int
13277 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13278 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13279 {
13280 int unsorted_regs[MAX_LDM_STM_OPS];
13281 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13282 int order[MAX_LDM_STM_OPS];
13283 rtx base_reg_rtx = NULL;
13284 int base_reg = -1;
13285 int i, ldm_case;
13286
13287 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13288 easily extended if required. */
13289 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13290
13291 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13292
13293 /* Loop over the operands and check that the memory references are
13294 suitable (i.e. immediate offsets from the same base register). At
13295 the same time, extract the target register, and the memory
13296 offsets. */
13297 for (i = 0; i < nops; i++)
13298 {
13299 rtx reg;
13300 rtx offset;
13301
13302 /* Convert a subreg of a mem into the mem itself. */
13303 if (GET_CODE (operands[nops + i]) == SUBREG)
13304 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13305
13306 gcc_assert (MEM_P (operands[nops + i]));
13307
13308 /* Don't reorder volatile memory references; it doesn't seem worth
13309 looking for the case where the order is ok anyway. */
13310 if (MEM_VOLATILE_P (operands[nops + i]))
13311 return 0;
13312
13313 offset = const0_rtx;
13314
13315 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13316 || (GET_CODE (reg) == SUBREG
13317 && REG_P (reg = SUBREG_REG (reg))))
13318 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13319 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13320 || (GET_CODE (reg) == SUBREG
13321 && REG_P (reg = SUBREG_REG (reg))))
13322 && (CONST_INT_P (offset
13323 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13324 {
13325 if (i == 0)
13326 {
13327 base_reg = REGNO (reg);
13328 base_reg_rtx = reg;
13329 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13330 return 0;
13331 }
13332 else if (base_reg != (int) REGNO (reg))
13333 /* Not addressed from the same base register. */
13334 return 0;
13335
13336 unsorted_regs[i] = (REG_P (operands[i])
13337 ? REGNO (operands[i])
13338 : REGNO (SUBREG_REG (operands[i])));
13339
13340 /* If it isn't an integer register, or if it overwrites the
13341 base register but isn't the last insn in the list, then
13342 we can't do this. */
13343 if (unsorted_regs[i] < 0
13344 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13345 || unsorted_regs[i] > 14
13346 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13347 return 0;
13348
13349 /* Don't allow SP to be loaded unless it is also the base
13350 register. It guarantees that SP is reset correctly when
13351 an LDM instruction is interrupted. Otherwise, we might
13352 end up with a corrupt stack. */
13353 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13354 return 0;
13355
13356 unsorted_offsets[i] = INTVAL (offset);
13357 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13358 order[0] = i;
13359 }
13360 else
13361 /* Not a suitable memory address. */
13362 return 0;
13363 }
13364
13365 /* All the useful information has now been extracted from the
13366 operands into unsorted_regs and unsorted_offsets; additionally,
13367 order[0] has been set to the lowest offset in the list. Sort
13368 the offsets into order, verifying that they are adjacent, and
13369 check that the register numbers are ascending. */
13370 if (!compute_offset_order (nops, unsorted_offsets, order,
13371 check_regs ? unsorted_regs : NULL))
13372 return 0;
13373
13374 if (saved_order)
13375 memcpy (saved_order, order, sizeof order);
13376
13377 if (base)
13378 {
13379 *base = base_reg;
13380
13381 for (i = 0; i < nops; i++)
13382 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13383
13384 *load_offset = unsorted_offsets[order[0]];
13385 }
13386
13387 if (TARGET_THUMB1
13388 && !peep2_reg_dead_p (nops, base_reg_rtx))
13389 return 0;
13390
13391 if (unsorted_offsets[order[0]] == 0)
13392 ldm_case = 1; /* ldmia */
13393 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13394 ldm_case = 2; /* ldmib */
13395 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13396 ldm_case = 3; /* ldmda */
13397 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13398 ldm_case = 4; /* ldmdb */
13399 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13400 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13401 ldm_case = 5;
13402 else
13403 return 0;
13404
13405 if (!multiple_operation_profitable_p (false, nops,
13406 ldm_case == 5
13407 ? unsorted_offsets[order[0]] : 0))
13408 return 0;
13409
13410 return ldm_case;
13411 }
13412
13413 /* Used to determine in a peephole whether a sequence of store instructions can
13414 be changed into a store-multiple instruction.
13415 NOPS is the number of separate store instructions we are examining.
13416 NOPS_TOTAL is the total number of instructions recognized by the peephole
13417 pattern.
13418 The first NOPS entries in OPERANDS are the source registers, the next
13419 NOPS entries are memory operands. If this function is successful, *BASE is
13420 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13421 to the first memory location's offset from that base register. REGS is an
13422 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13423 likewise filled with the corresponding rtx's.
13424 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13425 numbers to an ascending order of stores.
13426 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13427 from ascending memory locations, and the function verifies that the register
13428 numbers are themselves ascending. If CHECK_REGS is false, the register
13429 numbers are stored in the order they are found in the operands. */
13430 static int
13431 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13432 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13433 HOST_WIDE_INT *load_offset, bool check_regs)
13434 {
13435 int unsorted_regs[MAX_LDM_STM_OPS];
13436 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13437 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13438 int order[MAX_LDM_STM_OPS];
13439 int base_reg = -1;
13440 rtx base_reg_rtx = NULL;
13441 int i, stm_case;
13442
13443 /* Write back of base register is currently only supported for Thumb 1. */
13444 int base_writeback = TARGET_THUMB1;
13445
13446 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13447 easily extended if required. */
13448 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13449
13450 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13451
13452 /* Loop over the operands and check that the memory references are
13453 suitable (i.e. immediate offsets from the same base register). At
13454 the same time, extract the target register, and the memory
13455 offsets. */
13456 for (i = 0; i < nops; i++)
13457 {
13458 rtx reg;
13459 rtx offset;
13460
13461 /* Convert a subreg of a mem into the mem itself. */
13462 if (GET_CODE (operands[nops + i]) == SUBREG)
13463 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13464
13465 gcc_assert (MEM_P (operands[nops + i]));
13466
13467 /* Don't reorder volatile memory references; it doesn't seem worth
13468 looking for the case where the order is ok anyway. */
13469 if (MEM_VOLATILE_P (operands[nops + i]))
13470 return 0;
13471
13472 offset = const0_rtx;
13473
13474 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13475 || (GET_CODE (reg) == SUBREG
13476 && REG_P (reg = SUBREG_REG (reg))))
13477 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13478 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13479 || (GET_CODE (reg) == SUBREG
13480 && REG_P (reg = SUBREG_REG (reg))))
13481 && (CONST_INT_P (offset
13482 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13483 {
13484 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13485 ? operands[i] : SUBREG_REG (operands[i]));
13486 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13487
13488 if (i == 0)
13489 {
13490 base_reg = REGNO (reg);
13491 base_reg_rtx = reg;
13492 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13493 return 0;
13494 }
13495 else if (base_reg != (int) REGNO (reg))
13496 /* Not addressed from the same base register. */
13497 return 0;
13498
13499 /* If it isn't an integer register, then we can't do this. */
13500 if (unsorted_regs[i] < 0
13501 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13502 /* The effects are unpredictable if the base register is
13503 both updated and stored. */
13504 || (base_writeback && unsorted_regs[i] == base_reg)
13505 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13506 || unsorted_regs[i] > 14)
13507 return 0;
13508
13509 unsorted_offsets[i] = INTVAL (offset);
13510 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13511 order[0] = i;
13512 }
13513 else
13514 /* Not a suitable memory address. */
13515 return 0;
13516 }
13517
13518 /* All the useful information has now been extracted from the
13519 operands into unsorted_regs and unsorted_offsets; additionally,
13520 order[0] has been set to the lowest offset in the list. Sort
13521 the offsets into order, verifying that they are adjacent, and
13522 check that the register numbers are ascending. */
13523 if (!compute_offset_order (nops, unsorted_offsets, order,
13524 check_regs ? unsorted_regs : NULL))
13525 return 0;
13526
13527 if (saved_order)
13528 memcpy (saved_order, order, sizeof order);
13529
13530 if (base)
13531 {
13532 *base = base_reg;
13533
13534 for (i = 0; i < nops; i++)
13535 {
13536 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13537 if (reg_rtxs)
13538 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13539 }
13540
13541 *load_offset = unsorted_offsets[order[0]];
13542 }
13543
13544 if (TARGET_THUMB1
13545 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13546 return 0;
13547
13548 if (unsorted_offsets[order[0]] == 0)
13549 stm_case = 1; /* stmia */
13550 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13551 stm_case = 2; /* stmib */
13552 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13553 stm_case = 3; /* stmda */
13554 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13555 stm_case = 4; /* stmdb */
13556 else
13557 return 0;
13558
13559 if (!multiple_operation_profitable_p (false, nops, 0))
13560 return 0;
13561
13562 return stm_case;
13563 }
13564 \f
13565 /* Routines for use in generating RTL. */
13566
13567 /* Generate a load-multiple instruction. COUNT is the number of loads in
13568 the instruction; REGS and MEMS are arrays containing the operands.
13569 BASEREG is the base register to be used in addressing the memory operands.
13570 WBACK_OFFSET is nonzero if the instruction should update the base
13571 register. */
13572
13573 static rtx
13574 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13575 HOST_WIDE_INT wback_offset)
13576 {
13577 int i = 0, j;
13578 rtx result;
13579
13580 if (!multiple_operation_profitable_p (false, count, 0))
13581 {
13582 rtx seq;
13583
13584 start_sequence ();
13585
13586 for (i = 0; i < count; i++)
13587 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13588
13589 if (wback_offset != 0)
13590 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13591
13592 seq = get_insns ();
13593 end_sequence ();
13594
13595 return seq;
13596 }
13597
13598 result = gen_rtx_PARALLEL (VOIDmode,
13599 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13600 if (wback_offset != 0)
13601 {
13602 XVECEXP (result, 0, 0)
13603 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13604 i = 1;
13605 count++;
13606 }
13607
13608 for (j = 0; i < count; i++, j++)
13609 XVECEXP (result, 0, i)
13610 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13611
13612 return result;
13613 }
13614
13615 /* Generate a store-multiple instruction. COUNT is the number of stores in
13616 the instruction; REGS and MEMS are arrays containing the operands.
13617 BASEREG is the base register to be used in addressing the memory operands.
13618 WBACK_OFFSET is nonzero if the instruction should update the base
13619 register. */
13620
13621 static rtx
13622 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13623 HOST_WIDE_INT wback_offset)
13624 {
13625 int i = 0, j;
13626 rtx result;
13627
13628 if (GET_CODE (basereg) == PLUS)
13629 basereg = XEXP (basereg, 0);
13630
13631 if (!multiple_operation_profitable_p (false, count, 0))
13632 {
13633 rtx seq;
13634
13635 start_sequence ();
13636
13637 for (i = 0; i < count; i++)
13638 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13639
13640 if (wback_offset != 0)
13641 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13642
13643 seq = get_insns ();
13644 end_sequence ();
13645
13646 return seq;
13647 }
13648
13649 result = gen_rtx_PARALLEL (VOIDmode,
13650 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13651 if (wback_offset != 0)
13652 {
13653 XVECEXP (result, 0, 0)
13654 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13655 i = 1;
13656 count++;
13657 }
13658
13659 for (j = 0; i < count; i++, j++)
13660 XVECEXP (result, 0, i)
13661 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13662
13663 return result;
13664 }
13665
13666 /* Generate either a load-multiple or a store-multiple instruction. This
13667 function can be used in situations where we can start with a single MEM
13668 rtx and adjust its address upwards.
13669 COUNT is the number of operations in the instruction, not counting a
13670 possible update of the base register. REGS is an array containing the
13671 register operands.
13672 BASEREG is the base register to be used in addressing the memory operands,
13673 which are constructed from BASEMEM.
13674 WRITE_BACK specifies whether the generated instruction should include an
13675 update of the base register.
13676 OFFSETP is used to pass an offset to and from this function; this offset
13677 is not used when constructing the address (instead BASEMEM should have an
13678 appropriate offset in its address), it is used only for setting
13679 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13680
13681 static rtx
13682 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13683 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13684 {
13685 rtx mems[MAX_LDM_STM_OPS];
13686 HOST_WIDE_INT offset = *offsetp;
13687 int i;
13688
13689 gcc_assert (count <= MAX_LDM_STM_OPS);
13690
13691 if (GET_CODE (basereg) == PLUS)
13692 basereg = XEXP (basereg, 0);
13693
13694 for (i = 0; i < count; i++)
13695 {
13696 rtx addr = plus_constant (Pmode, basereg, i * 4);
13697 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13698 offset += 4;
13699 }
13700
13701 if (write_back)
13702 *offsetp = offset;
13703
13704 if (is_load)
13705 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13706 write_back ? 4 * count : 0);
13707 else
13708 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13709 write_back ? 4 * count : 0);
13710 }
13711
13712 rtx
13713 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13714 rtx basemem, HOST_WIDE_INT *offsetp)
13715 {
13716 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13717 offsetp);
13718 }
13719
13720 rtx
13721 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13722 rtx basemem, HOST_WIDE_INT *offsetp)
13723 {
13724 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13725 offsetp);
13726 }
13727
13728 /* Called from a peephole2 expander to turn a sequence of loads into an
13729 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13730 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13731 is true if we can reorder the registers because they are used commutatively
13732 subsequently.
13733 Returns true iff we could generate a new instruction. */
13734
13735 bool
13736 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13737 {
13738 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13739 rtx mems[MAX_LDM_STM_OPS];
13740 int i, j, base_reg;
13741 rtx base_reg_rtx;
13742 HOST_WIDE_INT offset;
13743 int write_back = FALSE;
13744 int ldm_case;
13745 rtx addr;
13746
13747 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13748 &base_reg, &offset, !sort_regs);
13749
13750 if (ldm_case == 0)
13751 return false;
13752
13753 if (sort_regs)
13754 for (i = 0; i < nops - 1; i++)
13755 for (j = i + 1; j < nops; j++)
13756 if (regs[i] > regs[j])
13757 {
13758 int t = regs[i];
13759 regs[i] = regs[j];
13760 regs[j] = t;
13761 }
13762 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13763
13764 if (TARGET_THUMB1)
13765 {
13766 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13767 gcc_assert (ldm_case == 1 || ldm_case == 5);
13768 write_back = TRUE;
13769 }
13770
13771 if (ldm_case == 5)
13772 {
13773 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13774 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13775 offset = 0;
13776 if (!TARGET_THUMB1)
13777 base_reg_rtx = newbase;
13778 }
13779
13780 for (i = 0; i < nops; i++)
13781 {
13782 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13783 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13784 SImode, addr, 0);
13785 }
13786 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13787 write_back ? offset + i * 4 : 0));
13788 return true;
13789 }
13790
13791 /* Called from a peephole2 expander to turn a sequence of stores into an
13792 STM instruction. OPERANDS are the operands found by the peephole matcher;
13793 NOPS indicates how many separate stores we are trying to combine.
13794 Returns true iff we could generate a new instruction. */
13795
13796 bool
13797 gen_stm_seq (rtx *operands, int nops)
13798 {
13799 int i;
13800 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13801 rtx mems[MAX_LDM_STM_OPS];
13802 int base_reg;
13803 rtx base_reg_rtx;
13804 HOST_WIDE_INT offset;
13805 int write_back = FALSE;
13806 int stm_case;
13807 rtx addr;
13808 bool base_reg_dies;
13809
13810 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13811 mem_order, &base_reg, &offset, true);
13812
13813 if (stm_case == 0)
13814 return false;
13815
13816 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13817
13818 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13819 if (TARGET_THUMB1)
13820 {
13821 gcc_assert (base_reg_dies);
13822 write_back = TRUE;
13823 }
13824
13825 if (stm_case == 5)
13826 {
13827 gcc_assert (base_reg_dies);
13828 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13829 offset = 0;
13830 }
13831
13832 addr = plus_constant (Pmode, base_reg_rtx, offset);
13833
13834 for (i = 0; i < nops; i++)
13835 {
13836 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13837 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13838 SImode, addr, 0);
13839 }
13840 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13841 write_back ? offset + i * 4 : 0));
13842 return true;
13843 }
13844
13845 /* Called from a peephole2 expander to turn a sequence of stores that are
13846 preceded by constant loads into an STM instruction. OPERANDS are the
13847 operands found by the peephole matcher; NOPS indicates how many
13848 separate stores we are trying to combine; there are 2 * NOPS
13849 instructions in the peephole.
13850 Returns true iff we could generate a new instruction. */
13851
13852 bool
13853 gen_const_stm_seq (rtx *operands, int nops)
13854 {
13855 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13856 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13857 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13858 rtx mems[MAX_LDM_STM_OPS];
13859 int base_reg;
13860 rtx base_reg_rtx;
13861 HOST_WIDE_INT offset;
13862 int write_back = FALSE;
13863 int stm_case;
13864 rtx addr;
13865 bool base_reg_dies;
13866 int i, j;
13867 HARD_REG_SET allocated;
13868
13869 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13870 mem_order, &base_reg, &offset, false);
13871
13872 if (stm_case == 0)
13873 return false;
13874
13875 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13876
13877 /* If the same register is used more than once, try to find a free
13878 register. */
13879 CLEAR_HARD_REG_SET (allocated);
13880 for (i = 0; i < nops; i++)
13881 {
13882 for (j = i + 1; j < nops; j++)
13883 if (regs[i] == regs[j])
13884 {
13885 rtx t = peep2_find_free_register (0, nops * 2,
13886 TARGET_THUMB1 ? "l" : "r",
13887 SImode, &allocated);
13888 if (t == NULL_RTX)
13889 return false;
13890 reg_rtxs[i] = t;
13891 regs[i] = REGNO (t);
13892 }
13893 }
13894
13895 /* Compute an ordering that maps the register numbers to an ascending
13896 sequence. */
13897 reg_order[0] = 0;
13898 for (i = 0; i < nops; i++)
13899 if (regs[i] < regs[reg_order[0]])
13900 reg_order[0] = i;
13901
13902 for (i = 1; i < nops; i++)
13903 {
13904 int this_order = reg_order[i - 1];
13905 for (j = 0; j < nops; j++)
13906 if (regs[j] > regs[reg_order[i - 1]]
13907 && (this_order == reg_order[i - 1]
13908 || regs[j] < regs[this_order]))
13909 this_order = j;
13910 reg_order[i] = this_order;
13911 }
13912
13913 /* Ensure that registers that must be live after the instruction end
13914 up with the correct value. */
13915 for (i = 0; i < nops; i++)
13916 {
13917 int this_order = reg_order[i];
13918 if ((this_order != mem_order[i]
13919 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13920 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13921 return false;
13922 }
13923
13924 /* Load the constants. */
13925 for (i = 0; i < nops; i++)
13926 {
13927 rtx op = operands[2 * nops + mem_order[i]];
13928 sorted_regs[i] = regs[reg_order[i]];
13929 emit_move_insn (reg_rtxs[reg_order[i]], op);
13930 }
13931
13932 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13933
13934 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13935 if (TARGET_THUMB1)
13936 {
13937 gcc_assert (base_reg_dies);
13938 write_back = TRUE;
13939 }
13940
13941 if (stm_case == 5)
13942 {
13943 gcc_assert (base_reg_dies);
13944 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13945 offset = 0;
13946 }
13947
13948 addr = plus_constant (Pmode, base_reg_rtx, offset);
13949
13950 for (i = 0; i < nops; i++)
13951 {
13952 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13953 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13954 SImode, addr, 0);
13955 }
13956 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13957 write_back ? offset + i * 4 : 0));
13958 return true;
13959 }
13960
13961 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
13962 unaligned copies on processors which support unaligned semantics for those
13963 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
13964 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
13965 An interleave factor of 1 (the minimum) will perform no interleaving.
13966 Load/store multiple are used for aligned addresses where possible. */
13967
13968 static void
13969 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
13970 HOST_WIDE_INT length,
13971 unsigned int interleave_factor)
13972 {
13973 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
13974 int *regnos = XALLOCAVEC (int, interleave_factor);
13975 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
13976 HOST_WIDE_INT i, j;
13977 HOST_WIDE_INT remaining = length, words;
13978 rtx halfword_tmp = NULL, byte_tmp = NULL;
13979 rtx dst, src;
13980 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
13981 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
13982 HOST_WIDE_INT srcoffset, dstoffset;
13983 HOST_WIDE_INT src_autoinc, dst_autoinc;
13984 rtx mem, addr;
13985
13986 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
13987
13988 /* Use hard registers if we have aligned source or destination so we can use
13989 load/store multiple with contiguous registers. */
13990 if (dst_aligned || src_aligned)
13991 for (i = 0; i < interleave_factor; i++)
13992 regs[i] = gen_rtx_REG (SImode, i);
13993 else
13994 for (i = 0; i < interleave_factor; i++)
13995 regs[i] = gen_reg_rtx (SImode);
13996
13997 dst = copy_addr_to_reg (XEXP (dstbase, 0));
13998 src = copy_addr_to_reg (XEXP (srcbase, 0));
13999
14000 srcoffset = dstoffset = 0;
14001
14002 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14003 For copying the last bytes we want to subtract this offset again. */
14004 src_autoinc = dst_autoinc = 0;
14005
14006 for (i = 0; i < interleave_factor; i++)
14007 regnos[i] = i;
14008
14009 /* Copy BLOCK_SIZE_BYTES chunks. */
14010
14011 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14012 {
14013 /* Load words. */
14014 if (src_aligned && interleave_factor > 1)
14015 {
14016 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14017 TRUE, srcbase, &srcoffset));
14018 src_autoinc += UNITS_PER_WORD * interleave_factor;
14019 }
14020 else
14021 {
14022 for (j = 0; j < interleave_factor; j++)
14023 {
14024 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14025 - src_autoinc));
14026 mem = adjust_automodify_address (srcbase, SImode, addr,
14027 srcoffset + j * UNITS_PER_WORD);
14028 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14029 }
14030 srcoffset += block_size_bytes;
14031 }
14032
14033 /* Store words. */
14034 if (dst_aligned && interleave_factor > 1)
14035 {
14036 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14037 TRUE, dstbase, &dstoffset));
14038 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14039 }
14040 else
14041 {
14042 for (j = 0; j < interleave_factor; j++)
14043 {
14044 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14045 - dst_autoinc));
14046 mem = adjust_automodify_address (dstbase, SImode, addr,
14047 dstoffset + j * UNITS_PER_WORD);
14048 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14049 }
14050 dstoffset += block_size_bytes;
14051 }
14052
14053 remaining -= block_size_bytes;
14054 }
14055
14056 /* Copy any whole words left (note these aren't interleaved with any
14057 subsequent halfword/byte load/stores in the interests of simplicity). */
14058
14059 words = remaining / UNITS_PER_WORD;
14060
14061 gcc_assert (words < interleave_factor);
14062
14063 if (src_aligned && words > 1)
14064 {
14065 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14066 &srcoffset));
14067 src_autoinc += UNITS_PER_WORD * words;
14068 }
14069 else
14070 {
14071 for (j = 0; j < words; j++)
14072 {
14073 addr = plus_constant (Pmode, src,
14074 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14075 mem = adjust_automodify_address (srcbase, SImode, addr,
14076 srcoffset + j * UNITS_PER_WORD);
14077 if (src_aligned)
14078 emit_move_insn (regs[j], mem);
14079 else
14080 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14081 }
14082 srcoffset += words * UNITS_PER_WORD;
14083 }
14084
14085 if (dst_aligned && words > 1)
14086 {
14087 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14088 &dstoffset));
14089 dst_autoinc += words * UNITS_PER_WORD;
14090 }
14091 else
14092 {
14093 for (j = 0; j < words; j++)
14094 {
14095 addr = plus_constant (Pmode, dst,
14096 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14097 mem = adjust_automodify_address (dstbase, SImode, addr,
14098 dstoffset + j * UNITS_PER_WORD);
14099 if (dst_aligned)
14100 emit_move_insn (mem, regs[j]);
14101 else
14102 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14103 }
14104 dstoffset += words * UNITS_PER_WORD;
14105 }
14106
14107 remaining -= words * UNITS_PER_WORD;
14108
14109 gcc_assert (remaining < 4);
14110
14111 /* Copy a halfword if necessary. */
14112
14113 if (remaining >= 2)
14114 {
14115 halfword_tmp = gen_reg_rtx (SImode);
14116
14117 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14118 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14119 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14120
14121 /* Either write out immediately, or delay until we've loaded the last
14122 byte, depending on interleave factor. */
14123 if (interleave_factor == 1)
14124 {
14125 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14126 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14127 emit_insn (gen_unaligned_storehi (mem,
14128 gen_lowpart (HImode, halfword_tmp)));
14129 halfword_tmp = NULL;
14130 dstoffset += 2;
14131 }
14132
14133 remaining -= 2;
14134 srcoffset += 2;
14135 }
14136
14137 gcc_assert (remaining < 2);
14138
14139 /* Copy last byte. */
14140
14141 if ((remaining & 1) != 0)
14142 {
14143 byte_tmp = gen_reg_rtx (SImode);
14144
14145 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14146 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14147 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14148
14149 if (interleave_factor == 1)
14150 {
14151 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14152 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14153 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14154 byte_tmp = NULL;
14155 dstoffset++;
14156 }
14157
14158 remaining--;
14159 srcoffset++;
14160 }
14161
14162 /* Store last halfword if we haven't done so already. */
14163
14164 if (halfword_tmp)
14165 {
14166 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14167 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14168 emit_insn (gen_unaligned_storehi (mem,
14169 gen_lowpart (HImode, halfword_tmp)));
14170 dstoffset += 2;
14171 }
14172
14173 /* Likewise for last byte. */
14174
14175 if (byte_tmp)
14176 {
14177 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14178 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14179 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14180 dstoffset++;
14181 }
14182
14183 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14184 }
14185
14186 /* From mips_adjust_block_mem:
14187
14188 Helper function for doing a loop-based block operation on memory
14189 reference MEM. Each iteration of the loop will operate on LENGTH
14190 bytes of MEM.
14191
14192 Create a new base register for use within the loop and point it to
14193 the start of MEM. Create a new memory reference that uses this
14194 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14195
14196 static void
14197 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14198 rtx *loop_mem)
14199 {
14200 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14201
14202 /* Although the new mem does not refer to a known location,
14203 it does keep up to LENGTH bytes of alignment. */
14204 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14205 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14206 }
14207
14208 /* From mips_block_move_loop:
14209
14210 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14211 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14212 the memory regions do not overlap. */
14213
14214 static void
14215 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14216 unsigned int interleave_factor,
14217 HOST_WIDE_INT bytes_per_iter)
14218 {
14219 rtx src_reg, dest_reg, final_src, test;
14220 HOST_WIDE_INT leftover;
14221
14222 leftover = length % bytes_per_iter;
14223 length -= leftover;
14224
14225 /* Create registers and memory references for use within the loop. */
14226 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14227 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14228
14229 /* Calculate the value that SRC_REG should have after the last iteration of
14230 the loop. */
14231 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14232 0, 0, OPTAB_WIDEN);
14233
14234 /* Emit the start of the loop. */
14235 rtx_code_label *label = gen_label_rtx ();
14236 emit_label (label);
14237
14238 /* Emit the loop body. */
14239 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14240 interleave_factor);
14241
14242 /* Move on to the next block. */
14243 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14244 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14245
14246 /* Emit the loop condition. */
14247 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14248 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14249
14250 /* Mop up any left-over bytes. */
14251 if (leftover)
14252 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14253 }
14254
14255 /* Emit a block move when either the source or destination is unaligned (not
14256 aligned to a four-byte boundary). This may need further tuning depending on
14257 core type, optimize_size setting, etc. */
14258
14259 static int
14260 arm_movmemqi_unaligned (rtx *operands)
14261 {
14262 HOST_WIDE_INT length = INTVAL (operands[2]);
14263
14264 if (optimize_size)
14265 {
14266 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14267 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14268 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14269 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14270 or dst_aligned though: allow more interleaving in those cases since the
14271 resulting code can be smaller. */
14272 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14273 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14274
14275 if (length > 12)
14276 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14277 interleave_factor, bytes_per_iter);
14278 else
14279 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14280 interleave_factor);
14281 }
14282 else
14283 {
14284 /* Note that the loop created by arm_block_move_unaligned_loop may be
14285 subject to loop unrolling, which makes tuning this condition a little
14286 redundant. */
14287 if (length > 32)
14288 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14289 else
14290 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14291 }
14292
14293 return 1;
14294 }
14295
14296 int
14297 arm_gen_movmemqi (rtx *operands)
14298 {
14299 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14300 HOST_WIDE_INT srcoffset, dstoffset;
14301 rtx src, dst, srcbase, dstbase;
14302 rtx part_bytes_reg = NULL;
14303 rtx mem;
14304
14305 if (!CONST_INT_P (operands[2])
14306 || !CONST_INT_P (operands[3])
14307 || INTVAL (operands[2]) > 64)
14308 return 0;
14309
14310 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14311 return arm_movmemqi_unaligned (operands);
14312
14313 if (INTVAL (operands[3]) & 3)
14314 return 0;
14315
14316 dstbase = operands[0];
14317 srcbase = operands[1];
14318
14319 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14320 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14321
14322 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14323 out_words_to_go = INTVAL (operands[2]) / 4;
14324 last_bytes = INTVAL (operands[2]) & 3;
14325 dstoffset = srcoffset = 0;
14326
14327 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14328 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14329
14330 while (in_words_to_go >= 2)
14331 {
14332 if (in_words_to_go > 4)
14333 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14334 TRUE, srcbase, &srcoffset));
14335 else
14336 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14337 src, FALSE, srcbase,
14338 &srcoffset));
14339
14340 if (out_words_to_go)
14341 {
14342 if (out_words_to_go > 4)
14343 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14344 TRUE, dstbase, &dstoffset));
14345 else if (out_words_to_go != 1)
14346 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14347 out_words_to_go, dst,
14348 (last_bytes == 0
14349 ? FALSE : TRUE),
14350 dstbase, &dstoffset));
14351 else
14352 {
14353 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14354 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14355 if (last_bytes != 0)
14356 {
14357 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14358 dstoffset += 4;
14359 }
14360 }
14361 }
14362
14363 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14364 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14365 }
14366
14367 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14368 if (out_words_to_go)
14369 {
14370 rtx sreg;
14371
14372 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14373 sreg = copy_to_reg (mem);
14374
14375 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14376 emit_move_insn (mem, sreg);
14377 in_words_to_go--;
14378
14379 gcc_assert (!in_words_to_go); /* Sanity check */
14380 }
14381
14382 if (in_words_to_go)
14383 {
14384 gcc_assert (in_words_to_go > 0);
14385
14386 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14387 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14388 }
14389
14390 gcc_assert (!last_bytes || part_bytes_reg);
14391
14392 if (BYTES_BIG_ENDIAN && last_bytes)
14393 {
14394 rtx tmp = gen_reg_rtx (SImode);
14395
14396 /* The bytes we want are in the top end of the word. */
14397 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14398 GEN_INT (8 * (4 - last_bytes))));
14399 part_bytes_reg = tmp;
14400
14401 while (last_bytes)
14402 {
14403 mem = adjust_automodify_address (dstbase, QImode,
14404 plus_constant (Pmode, dst,
14405 last_bytes - 1),
14406 dstoffset + last_bytes - 1);
14407 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14408
14409 if (--last_bytes)
14410 {
14411 tmp = gen_reg_rtx (SImode);
14412 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14413 part_bytes_reg = tmp;
14414 }
14415 }
14416
14417 }
14418 else
14419 {
14420 if (last_bytes > 1)
14421 {
14422 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14423 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14424 last_bytes -= 2;
14425 if (last_bytes)
14426 {
14427 rtx tmp = gen_reg_rtx (SImode);
14428 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14429 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14430 part_bytes_reg = tmp;
14431 dstoffset += 2;
14432 }
14433 }
14434
14435 if (last_bytes)
14436 {
14437 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14438 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14439 }
14440 }
14441
14442 return 1;
14443 }
14444
14445 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14446 by mode size. */
14447 inline static rtx
14448 next_consecutive_mem (rtx mem)
14449 {
14450 machine_mode mode = GET_MODE (mem);
14451 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14452 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14453
14454 return adjust_automodify_address (mem, mode, addr, offset);
14455 }
14456
14457 /* Copy using LDRD/STRD instructions whenever possible.
14458 Returns true upon success. */
14459 bool
14460 gen_movmem_ldrd_strd (rtx *operands)
14461 {
14462 unsigned HOST_WIDE_INT len;
14463 HOST_WIDE_INT align;
14464 rtx src, dst, base;
14465 rtx reg0;
14466 bool src_aligned, dst_aligned;
14467 bool src_volatile, dst_volatile;
14468
14469 gcc_assert (CONST_INT_P (operands[2]));
14470 gcc_assert (CONST_INT_P (operands[3]));
14471
14472 len = UINTVAL (operands[2]);
14473 if (len > 64)
14474 return false;
14475
14476 /* Maximum alignment we can assume for both src and dst buffers. */
14477 align = INTVAL (operands[3]);
14478
14479 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14480 return false;
14481
14482 /* Place src and dst addresses in registers
14483 and update the corresponding mem rtx. */
14484 dst = operands[0];
14485 dst_volatile = MEM_VOLATILE_P (dst);
14486 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14487 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14488 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14489
14490 src = operands[1];
14491 src_volatile = MEM_VOLATILE_P (src);
14492 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14493 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14494 src = adjust_automodify_address (src, VOIDmode, base, 0);
14495
14496 if (!unaligned_access && !(src_aligned && dst_aligned))
14497 return false;
14498
14499 if (src_volatile || dst_volatile)
14500 return false;
14501
14502 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14503 if (!(dst_aligned || src_aligned))
14504 return arm_gen_movmemqi (operands);
14505
14506 /* If the either src or dst is unaligned we'll be accessing it as pairs
14507 of unaligned SImode accesses. Otherwise we can generate DImode
14508 ldrd/strd instructions. */
14509 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14510 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14511
14512 while (len >= 8)
14513 {
14514 len -= 8;
14515 reg0 = gen_reg_rtx (DImode);
14516 rtx low_reg = NULL_RTX;
14517 rtx hi_reg = NULL_RTX;
14518
14519 if (!src_aligned || !dst_aligned)
14520 {
14521 low_reg = gen_lowpart (SImode, reg0);
14522 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14523 }
14524 if (src_aligned)
14525 emit_move_insn (reg0, src);
14526 else
14527 {
14528 emit_insn (gen_unaligned_loadsi (low_reg, src));
14529 src = next_consecutive_mem (src);
14530 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14531 }
14532
14533 if (dst_aligned)
14534 emit_move_insn (dst, reg0);
14535 else
14536 {
14537 emit_insn (gen_unaligned_storesi (dst, low_reg));
14538 dst = next_consecutive_mem (dst);
14539 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14540 }
14541
14542 src = next_consecutive_mem (src);
14543 dst = next_consecutive_mem (dst);
14544 }
14545
14546 gcc_assert (len < 8);
14547 if (len >= 4)
14548 {
14549 /* More than a word but less than a double-word to copy. Copy a word. */
14550 reg0 = gen_reg_rtx (SImode);
14551 src = adjust_address (src, SImode, 0);
14552 dst = adjust_address (dst, SImode, 0);
14553 if (src_aligned)
14554 emit_move_insn (reg0, src);
14555 else
14556 emit_insn (gen_unaligned_loadsi (reg0, src));
14557
14558 if (dst_aligned)
14559 emit_move_insn (dst, reg0);
14560 else
14561 emit_insn (gen_unaligned_storesi (dst, reg0));
14562
14563 src = next_consecutive_mem (src);
14564 dst = next_consecutive_mem (dst);
14565 len -= 4;
14566 }
14567
14568 if (len == 0)
14569 return true;
14570
14571 /* Copy the remaining bytes. */
14572 if (len >= 2)
14573 {
14574 dst = adjust_address (dst, HImode, 0);
14575 src = adjust_address (src, HImode, 0);
14576 reg0 = gen_reg_rtx (SImode);
14577 if (src_aligned)
14578 emit_insn (gen_zero_extendhisi2 (reg0, src));
14579 else
14580 emit_insn (gen_unaligned_loadhiu (reg0, src));
14581
14582 if (dst_aligned)
14583 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14584 else
14585 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14586
14587 src = next_consecutive_mem (src);
14588 dst = next_consecutive_mem (dst);
14589 if (len == 2)
14590 return true;
14591 }
14592
14593 dst = adjust_address (dst, QImode, 0);
14594 src = adjust_address (src, QImode, 0);
14595 reg0 = gen_reg_rtx (QImode);
14596 emit_move_insn (reg0, src);
14597 emit_move_insn (dst, reg0);
14598 return true;
14599 }
14600
14601 /* Select a dominance comparison mode if possible for a test of the general
14602 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14603 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14604 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14605 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14606 In all cases OP will be either EQ or NE, but we don't need to know which
14607 here. If we are unable to support a dominance comparison we return
14608 CC mode. This will then fail to match for the RTL expressions that
14609 generate this call. */
14610 machine_mode
14611 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14612 {
14613 enum rtx_code cond1, cond2;
14614 int swapped = 0;
14615
14616 /* Currently we will probably get the wrong result if the individual
14617 comparisons are not simple. This also ensures that it is safe to
14618 reverse a comparison if necessary. */
14619 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14620 != CCmode)
14621 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14622 != CCmode))
14623 return CCmode;
14624
14625 /* The if_then_else variant of this tests the second condition if the
14626 first passes, but is true if the first fails. Reverse the first
14627 condition to get a true "inclusive-or" expression. */
14628 if (cond_or == DOM_CC_NX_OR_Y)
14629 cond1 = reverse_condition (cond1);
14630
14631 /* If the comparisons are not equal, and one doesn't dominate the other,
14632 then we can't do this. */
14633 if (cond1 != cond2
14634 && !comparison_dominates_p (cond1, cond2)
14635 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14636 return CCmode;
14637
14638 if (swapped)
14639 std::swap (cond1, cond2);
14640
14641 switch (cond1)
14642 {
14643 case EQ:
14644 if (cond_or == DOM_CC_X_AND_Y)
14645 return CC_DEQmode;
14646
14647 switch (cond2)
14648 {
14649 case EQ: return CC_DEQmode;
14650 case LE: return CC_DLEmode;
14651 case LEU: return CC_DLEUmode;
14652 case GE: return CC_DGEmode;
14653 case GEU: return CC_DGEUmode;
14654 default: gcc_unreachable ();
14655 }
14656
14657 case LT:
14658 if (cond_or == DOM_CC_X_AND_Y)
14659 return CC_DLTmode;
14660
14661 switch (cond2)
14662 {
14663 case LT:
14664 return CC_DLTmode;
14665 case LE:
14666 return CC_DLEmode;
14667 case NE:
14668 return CC_DNEmode;
14669 default:
14670 gcc_unreachable ();
14671 }
14672
14673 case GT:
14674 if (cond_or == DOM_CC_X_AND_Y)
14675 return CC_DGTmode;
14676
14677 switch (cond2)
14678 {
14679 case GT:
14680 return CC_DGTmode;
14681 case GE:
14682 return CC_DGEmode;
14683 case NE:
14684 return CC_DNEmode;
14685 default:
14686 gcc_unreachable ();
14687 }
14688
14689 case LTU:
14690 if (cond_or == DOM_CC_X_AND_Y)
14691 return CC_DLTUmode;
14692
14693 switch (cond2)
14694 {
14695 case LTU:
14696 return CC_DLTUmode;
14697 case LEU:
14698 return CC_DLEUmode;
14699 case NE:
14700 return CC_DNEmode;
14701 default:
14702 gcc_unreachable ();
14703 }
14704
14705 case GTU:
14706 if (cond_or == DOM_CC_X_AND_Y)
14707 return CC_DGTUmode;
14708
14709 switch (cond2)
14710 {
14711 case GTU:
14712 return CC_DGTUmode;
14713 case GEU:
14714 return CC_DGEUmode;
14715 case NE:
14716 return CC_DNEmode;
14717 default:
14718 gcc_unreachable ();
14719 }
14720
14721 /* The remaining cases only occur when both comparisons are the
14722 same. */
14723 case NE:
14724 gcc_assert (cond1 == cond2);
14725 return CC_DNEmode;
14726
14727 case LE:
14728 gcc_assert (cond1 == cond2);
14729 return CC_DLEmode;
14730
14731 case GE:
14732 gcc_assert (cond1 == cond2);
14733 return CC_DGEmode;
14734
14735 case LEU:
14736 gcc_assert (cond1 == cond2);
14737 return CC_DLEUmode;
14738
14739 case GEU:
14740 gcc_assert (cond1 == cond2);
14741 return CC_DGEUmode;
14742
14743 default:
14744 gcc_unreachable ();
14745 }
14746 }
14747
14748 machine_mode
14749 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14750 {
14751 /* All floating point compares return CCFP if it is an equality
14752 comparison, and CCFPE otherwise. */
14753 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14754 {
14755 switch (op)
14756 {
14757 case EQ:
14758 case NE:
14759 case UNORDERED:
14760 case ORDERED:
14761 case UNLT:
14762 case UNLE:
14763 case UNGT:
14764 case UNGE:
14765 case UNEQ:
14766 case LTGT:
14767 return CCFPmode;
14768
14769 case LT:
14770 case LE:
14771 case GT:
14772 case GE:
14773 return CCFPEmode;
14774
14775 default:
14776 gcc_unreachable ();
14777 }
14778 }
14779
14780 /* A compare with a shifted operand. Because of canonicalization, the
14781 comparison will have to be swapped when we emit the assembler. */
14782 if (GET_MODE (y) == SImode
14783 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14784 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14785 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14786 || GET_CODE (x) == ROTATERT))
14787 return CC_SWPmode;
14788
14789 /* This operation is performed swapped, but since we only rely on the Z
14790 flag we don't need an additional mode. */
14791 if (GET_MODE (y) == SImode
14792 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14793 && GET_CODE (x) == NEG
14794 && (op == EQ || op == NE))
14795 return CC_Zmode;
14796
14797 /* This is a special case that is used by combine to allow a
14798 comparison of a shifted byte load to be split into a zero-extend
14799 followed by a comparison of the shifted integer (only valid for
14800 equalities and unsigned inequalities). */
14801 if (GET_MODE (x) == SImode
14802 && GET_CODE (x) == ASHIFT
14803 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14804 && GET_CODE (XEXP (x, 0)) == SUBREG
14805 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14806 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14807 && (op == EQ || op == NE
14808 || op == GEU || op == GTU || op == LTU || op == LEU)
14809 && CONST_INT_P (y))
14810 return CC_Zmode;
14811
14812 /* A construct for a conditional compare, if the false arm contains
14813 0, then both conditions must be true, otherwise either condition
14814 must be true. Not all conditions are possible, so CCmode is
14815 returned if it can't be done. */
14816 if (GET_CODE (x) == IF_THEN_ELSE
14817 && (XEXP (x, 2) == const0_rtx
14818 || XEXP (x, 2) == const1_rtx)
14819 && COMPARISON_P (XEXP (x, 0))
14820 && COMPARISON_P (XEXP (x, 1)))
14821 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14822 INTVAL (XEXP (x, 2)));
14823
14824 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14825 if (GET_CODE (x) == AND
14826 && (op == EQ || op == NE)
14827 && COMPARISON_P (XEXP (x, 0))
14828 && COMPARISON_P (XEXP (x, 1)))
14829 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14830 DOM_CC_X_AND_Y);
14831
14832 if (GET_CODE (x) == IOR
14833 && (op == EQ || op == NE)
14834 && COMPARISON_P (XEXP (x, 0))
14835 && COMPARISON_P (XEXP (x, 1)))
14836 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14837 DOM_CC_X_OR_Y);
14838
14839 /* An operation (on Thumb) where we want to test for a single bit.
14840 This is done by shifting that bit up into the top bit of a
14841 scratch register; we can then branch on the sign bit. */
14842 if (TARGET_THUMB1
14843 && GET_MODE (x) == SImode
14844 && (op == EQ || op == NE)
14845 && GET_CODE (x) == ZERO_EXTRACT
14846 && XEXP (x, 1) == const1_rtx)
14847 return CC_Nmode;
14848
14849 /* An operation that sets the condition codes as a side-effect, the
14850 V flag is not set correctly, so we can only use comparisons where
14851 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14852 instead.) */
14853 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14854 if (GET_MODE (x) == SImode
14855 && y == const0_rtx
14856 && (op == EQ || op == NE || op == LT || op == GE)
14857 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14858 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14859 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14860 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14861 || GET_CODE (x) == LSHIFTRT
14862 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14863 || GET_CODE (x) == ROTATERT
14864 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14865 return CC_NOOVmode;
14866
14867 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14868 return CC_Zmode;
14869
14870 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14871 && GET_CODE (x) == PLUS
14872 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14873 return CC_Cmode;
14874
14875 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14876 {
14877 switch (op)
14878 {
14879 case EQ:
14880 case NE:
14881 /* A DImode comparison against zero can be implemented by
14882 or'ing the two halves together. */
14883 if (y == const0_rtx)
14884 return CC_Zmode;
14885
14886 /* We can do an equality test in three Thumb instructions. */
14887 if (!TARGET_32BIT)
14888 return CC_Zmode;
14889
14890 /* FALLTHROUGH */
14891
14892 case LTU:
14893 case LEU:
14894 case GTU:
14895 case GEU:
14896 /* DImode unsigned comparisons can be implemented by cmp +
14897 cmpeq without a scratch register. Not worth doing in
14898 Thumb-2. */
14899 if (TARGET_32BIT)
14900 return CC_CZmode;
14901
14902 /* FALLTHROUGH */
14903
14904 case LT:
14905 case LE:
14906 case GT:
14907 case GE:
14908 /* DImode signed and unsigned comparisons can be implemented
14909 by cmp + sbcs with a scratch register, but that does not
14910 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14911 gcc_assert (op != EQ && op != NE);
14912 return CC_NCVmode;
14913
14914 default:
14915 gcc_unreachable ();
14916 }
14917 }
14918
14919 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14920 return GET_MODE (x);
14921
14922 return CCmode;
14923 }
14924
14925 /* X and Y are two things to compare using CODE. Emit the compare insn and
14926 return the rtx for register 0 in the proper mode. FP means this is a
14927 floating point compare: I don't think that it is needed on the arm. */
14928 rtx
14929 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14930 {
14931 machine_mode mode;
14932 rtx cc_reg;
14933 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14934
14935 /* We might have X as a constant, Y as a register because of the predicates
14936 used for cmpdi. If so, force X to a register here. */
14937 if (dimode_comparison && !REG_P (x))
14938 x = force_reg (DImode, x);
14939
14940 mode = SELECT_CC_MODE (code, x, y);
14941 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14942
14943 if (dimode_comparison
14944 && mode != CC_CZmode)
14945 {
14946 rtx clobber, set;
14947
14948 /* To compare two non-zero values for equality, XOR them and
14949 then compare against zero. Not used for ARM mode; there
14950 CC_CZmode is cheaper. */
14951 if (mode == CC_Zmode && y != const0_rtx)
14952 {
14953 gcc_assert (!reload_completed);
14954 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14955 y = const0_rtx;
14956 }
14957
14958 /* A scratch register is required. */
14959 if (reload_completed)
14960 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
14961 else
14962 scratch = gen_rtx_SCRATCH (SImode);
14963
14964 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
14965 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
14966 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
14967 }
14968 else
14969 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
14970
14971 return cc_reg;
14972 }
14973
14974 /* Generate a sequence of insns that will generate the correct return
14975 address mask depending on the physical architecture that the program
14976 is running on. */
14977 rtx
14978 arm_gen_return_addr_mask (void)
14979 {
14980 rtx reg = gen_reg_rtx (Pmode);
14981
14982 emit_insn (gen_return_addr_mask (reg));
14983 return reg;
14984 }
14985
14986 void
14987 arm_reload_in_hi (rtx *operands)
14988 {
14989 rtx ref = operands[1];
14990 rtx base, scratch;
14991 HOST_WIDE_INT offset = 0;
14992
14993 if (GET_CODE (ref) == SUBREG)
14994 {
14995 offset = SUBREG_BYTE (ref);
14996 ref = SUBREG_REG (ref);
14997 }
14998
14999 if (REG_P (ref))
15000 {
15001 /* We have a pseudo which has been spilt onto the stack; there
15002 are two cases here: the first where there is a simple
15003 stack-slot replacement and a second where the stack-slot is
15004 out of range, or is used as a subreg. */
15005 if (reg_equiv_mem (REGNO (ref)))
15006 {
15007 ref = reg_equiv_mem (REGNO (ref));
15008 base = find_replacement (&XEXP (ref, 0));
15009 }
15010 else
15011 /* The slot is out of range, or was dressed up in a SUBREG. */
15012 base = reg_equiv_address (REGNO (ref));
15013
15014 /* PR 62554: If there is no equivalent memory location then just move
15015 the value as an SImode register move. This happens when the target
15016 architecture variant does not have an HImode register move. */
15017 if (base == NULL)
15018 {
15019 gcc_assert (REG_P (operands[0]));
15020 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15021 gen_rtx_SUBREG (SImode, ref, 0)));
15022 return;
15023 }
15024 }
15025 else
15026 base = find_replacement (&XEXP (ref, 0));
15027
15028 /* Handle the case where the address is too complex to be offset by 1. */
15029 if (GET_CODE (base) == MINUS
15030 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15031 {
15032 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15033
15034 emit_set_insn (base_plus, base);
15035 base = base_plus;
15036 }
15037 else if (GET_CODE (base) == PLUS)
15038 {
15039 /* The addend must be CONST_INT, or we would have dealt with it above. */
15040 HOST_WIDE_INT hi, lo;
15041
15042 offset += INTVAL (XEXP (base, 1));
15043 base = XEXP (base, 0);
15044
15045 /* Rework the address into a legal sequence of insns. */
15046 /* Valid range for lo is -4095 -> 4095 */
15047 lo = (offset >= 0
15048 ? (offset & 0xfff)
15049 : -((-offset) & 0xfff));
15050
15051 /* Corner case, if lo is the max offset then we would be out of range
15052 once we have added the additional 1 below, so bump the msb into the
15053 pre-loading insn(s). */
15054 if (lo == 4095)
15055 lo &= 0x7ff;
15056
15057 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15058 ^ (HOST_WIDE_INT) 0x80000000)
15059 - (HOST_WIDE_INT) 0x80000000);
15060
15061 gcc_assert (hi + lo == offset);
15062
15063 if (hi != 0)
15064 {
15065 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15066
15067 /* Get the base address; addsi3 knows how to handle constants
15068 that require more than one insn. */
15069 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15070 base = base_plus;
15071 offset = lo;
15072 }
15073 }
15074
15075 /* Operands[2] may overlap operands[0] (though it won't overlap
15076 operands[1]), that's why we asked for a DImode reg -- so we can
15077 use the bit that does not overlap. */
15078 if (REGNO (operands[2]) == REGNO (operands[0]))
15079 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15080 else
15081 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15082
15083 emit_insn (gen_zero_extendqisi2 (scratch,
15084 gen_rtx_MEM (QImode,
15085 plus_constant (Pmode, base,
15086 offset))));
15087 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15088 gen_rtx_MEM (QImode,
15089 plus_constant (Pmode, base,
15090 offset + 1))));
15091 if (!BYTES_BIG_ENDIAN)
15092 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15093 gen_rtx_IOR (SImode,
15094 gen_rtx_ASHIFT
15095 (SImode,
15096 gen_rtx_SUBREG (SImode, operands[0], 0),
15097 GEN_INT (8)),
15098 scratch));
15099 else
15100 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15101 gen_rtx_IOR (SImode,
15102 gen_rtx_ASHIFT (SImode, scratch,
15103 GEN_INT (8)),
15104 gen_rtx_SUBREG (SImode, operands[0], 0)));
15105 }
15106
15107 /* Handle storing a half-word to memory during reload by synthesizing as two
15108 byte stores. Take care not to clobber the input values until after we
15109 have moved them somewhere safe. This code assumes that if the DImode
15110 scratch in operands[2] overlaps either the input value or output address
15111 in some way, then that value must die in this insn (we absolutely need
15112 two scratch registers for some corner cases). */
15113 void
15114 arm_reload_out_hi (rtx *operands)
15115 {
15116 rtx ref = operands[0];
15117 rtx outval = operands[1];
15118 rtx base, scratch;
15119 HOST_WIDE_INT offset = 0;
15120
15121 if (GET_CODE (ref) == SUBREG)
15122 {
15123 offset = SUBREG_BYTE (ref);
15124 ref = SUBREG_REG (ref);
15125 }
15126
15127 if (REG_P (ref))
15128 {
15129 /* We have a pseudo which has been spilt onto the stack; there
15130 are two cases here: the first where there is a simple
15131 stack-slot replacement and a second where the stack-slot is
15132 out of range, or is used as a subreg. */
15133 if (reg_equiv_mem (REGNO (ref)))
15134 {
15135 ref = reg_equiv_mem (REGNO (ref));
15136 base = find_replacement (&XEXP (ref, 0));
15137 }
15138 else
15139 /* The slot is out of range, or was dressed up in a SUBREG. */
15140 base = reg_equiv_address (REGNO (ref));
15141
15142 /* PR 62254: If there is no equivalent memory location then just move
15143 the value as an SImode register move. This happens when the target
15144 architecture variant does not have an HImode register move. */
15145 if (base == NULL)
15146 {
15147 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15148
15149 if (REG_P (outval))
15150 {
15151 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15152 gen_rtx_SUBREG (SImode, outval, 0)));
15153 }
15154 else /* SUBREG_P (outval) */
15155 {
15156 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15157 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15158 SUBREG_REG (outval)));
15159 else
15160 /* FIXME: Handle other cases ? */
15161 gcc_unreachable ();
15162 }
15163 return;
15164 }
15165 }
15166 else
15167 base = find_replacement (&XEXP (ref, 0));
15168
15169 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15170
15171 /* Handle the case where the address is too complex to be offset by 1. */
15172 if (GET_CODE (base) == MINUS
15173 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15174 {
15175 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15176
15177 /* Be careful not to destroy OUTVAL. */
15178 if (reg_overlap_mentioned_p (base_plus, outval))
15179 {
15180 /* Updating base_plus might destroy outval, see if we can
15181 swap the scratch and base_plus. */
15182 if (!reg_overlap_mentioned_p (scratch, outval))
15183 std::swap (scratch, base_plus);
15184 else
15185 {
15186 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15187
15188 /* Be conservative and copy OUTVAL into the scratch now,
15189 this should only be necessary if outval is a subreg
15190 of something larger than a word. */
15191 /* XXX Might this clobber base? I can't see how it can,
15192 since scratch is known to overlap with OUTVAL, and
15193 must be wider than a word. */
15194 emit_insn (gen_movhi (scratch_hi, outval));
15195 outval = scratch_hi;
15196 }
15197 }
15198
15199 emit_set_insn (base_plus, base);
15200 base = base_plus;
15201 }
15202 else if (GET_CODE (base) == PLUS)
15203 {
15204 /* The addend must be CONST_INT, or we would have dealt with it above. */
15205 HOST_WIDE_INT hi, lo;
15206
15207 offset += INTVAL (XEXP (base, 1));
15208 base = XEXP (base, 0);
15209
15210 /* Rework the address into a legal sequence of insns. */
15211 /* Valid range for lo is -4095 -> 4095 */
15212 lo = (offset >= 0
15213 ? (offset & 0xfff)
15214 : -((-offset) & 0xfff));
15215
15216 /* Corner case, if lo is the max offset then we would be out of range
15217 once we have added the additional 1 below, so bump the msb into the
15218 pre-loading insn(s). */
15219 if (lo == 4095)
15220 lo &= 0x7ff;
15221
15222 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15223 ^ (HOST_WIDE_INT) 0x80000000)
15224 - (HOST_WIDE_INT) 0x80000000);
15225
15226 gcc_assert (hi + lo == offset);
15227
15228 if (hi != 0)
15229 {
15230 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15231
15232 /* Be careful not to destroy OUTVAL. */
15233 if (reg_overlap_mentioned_p (base_plus, outval))
15234 {
15235 /* Updating base_plus might destroy outval, see if we
15236 can swap the scratch and base_plus. */
15237 if (!reg_overlap_mentioned_p (scratch, outval))
15238 std::swap (scratch, base_plus);
15239 else
15240 {
15241 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15242
15243 /* Be conservative and copy outval into scratch now,
15244 this should only be necessary if outval is a
15245 subreg of something larger than a word. */
15246 /* XXX Might this clobber base? I can't see how it
15247 can, since scratch is known to overlap with
15248 outval. */
15249 emit_insn (gen_movhi (scratch_hi, outval));
15250 outval = scratch_hi;
15251 }
15252 }
15253
15254 /* Get the base address; addsi3 knows how to handle constants
15255 that require more than one insn. */
15256 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15257 base = base_plus;
15258 offset = lo;
15259 }
15260 }
15261
15262 if (BYTES_BIG_ENDIAN)
15263 {
15264 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15265 plus_constant (Pmode, base,
15266 offset + 1)),
15267 gen_lowpart (QImode, outval)));
15268 emit_insn (gen_lshrsi3 (scratch,
15269 gen_rtx_SUBREG (SImode, outval, 0),
15270 GEN_INT (8)));
15271 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15272 offset)),
15273 gen_lowpart (QImode, scratch)));
15274 }
15275 else
15276 {
15277 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15278 offset)),
15279 gen_lowpart (QImode, outval)));
15280 emit_insn (gen_lshrsi3 (scratch,
15281 gen_rtx_SUBREG (SImode, outval, 0),
15282 GEN_INT (8)));
15283 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15284 plus_constant (Pmode, base,
15285 offset + 1)),
15286 gen_lowpart (QImode, scratch)));
15287 }
15288 }
15289
15290 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15291 (padded to the size of a word) should be passed in a register. */
15292
15293 static bool
15294 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15295 {
15296 if (TARGET_AAPCS_BASED)
15297 return must_pass_in_stack_var_size (mode, type);
15298 else
15299 return must_pass_in_stack_var_size_or_pad (mode, type);
15300 }
15301
15302
15303 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15304 byte of a stack argument has useful data. For legacy APCS ABIs we use
15305 the default. For AAPCS based ABIs small aggregate types are placed
15306 in the lowest memory address. */
15307
15308 static pad_direction
15309 arm_function_arg_padding (machine_mode mode, const_tree type)
15310 {
15311 if (!TARGET_AAPCS_BASED)
15312 return default_function_arg_padding (mode, type);
15313
15314 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15315 return PAD_DOWNWARD;
15316
15317 return PAD_UPWARD;
15318 }
15319
15320
15321 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15322 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15323 register has useful data, and return the opposite if the most
15324 significant byte does. */
15325
15326 bool
15327 arm_pad_reg_upward (machine_mode mode,
15328 tree type, int first ATTRIBUTE_UNUSED)
15329 {
15330 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15331 {
15332 /* For AAPCS, small aggregates, small fixed-point types,
15333 and small complex types are always padded upwards. */
15334 if (type)
15335 {
15336 if ((AGGREGATE_TYPE_P (type)
15337 || TREE_CODE (type) == COMPLEX_TYPE
15338 || FIXED_POINT_TYPE_P (type))
15339 && int_size_in_bytes (type) <= 4)
15340 return true;
15341 }
15342 else
15343 {
15344 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15345 && GET_MODE_SIZE (mode) <= 4)
15346 return true;
15347 }
15348 }
15349
15350 /* Otherwise, use default padding. */
15351 return !BYTES_BIG_ENDIAN;
15352 }
15353
15354 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15355 assuming that the address in the base register is word aligned. */
15356 bool
15357 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15358 {
15359 HOST_WIDE_INT max_offset;
15360
15361 /* Offset must be a multiple of 4 in Thumb mode. */
15362 if (TARGET_THUMB2 && ((offset & 3) != 0))
15363 return false;
15364
15365 if (TARGET_THUMB2)
15366 max_offset = 1020;
15367 else if (TARGET_ARM)
15368 max_offset = 255;
15369 else
15370 return false;
15371
15372 return ((offset <= max_offset) && (offset >= -max_offset));
15373 }
15374
15375 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15376 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15377 Assumes that the address in the base register RN is word aligned. Pattern
15378 guarantees that both memory accesses use the same base register,
15379 the offsets are constants within the range, and the gap between the offsets is 4.
15380 If preload complete then check that registers are legal. WBACK indicates whether
15381 address is updated. LOAD indicates whether memory access is load or store. */
15382 bool
15383 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15384 bool wback, bool load)
15385 {
15386 unsigned int t, t2, n;
15387
15388 if (!reload_completed)
15389 return true;
15390
15391 if (!offset_ok_for_ldrd_strd (offset))
15392 return false;
15393
15394 t = REGNO (rt);
15395 t2 = REGNO (rt2);
15396 n = REGNO (rn);
15397
15398 if ((TARGET_THUMB2)
15399 && ((wback && (n == t || n == t2))
15400 || (t == SP_REGNUM)
15401 || (t == PC_REGNUM)
15402 || (t2 == SP_REGNUM)
15403 || (t2 == PC_REGNUM)
15404 || (!load && (n == PC_REGNUM))
15405 || (load && (t == t2))
15406 /* Triggers Cortex-M3 LDRD errata. */
15407 || (!wback && load && fix_cm3_ldrd && (n == t))))
15408 return false;
15409
15410 if ((TARGET_ARM)
15411 && ((wback && (n == t || n == t2))
15412 || (t2 == PC_REGNUM)
15413 || (t % 2 != 0) /* First destination register is not even. */
15414 || (t2 != t + 1)
15415 /* PC can be used as base register (for offset addressing only),
15416 but it is depricated. */
15417 || (n == PC_REGNUM)))
15418 return false;
15419
15420 return true;
15421 }
15422
15423 /* Return true if a 64-bit access with alignment ALIGN and with a
15424 constant offset OFFSET from the base pointer is permitted on this
15425 architecture. */
15426 static bool
15427 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15428 {
15429 return (unaligned_access
15430 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15431 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15432 }
15433
15434 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15435 operand MEM's address contains an immediate offset from the base
15436 register and has no side effects, in which case it sets BASE,
15437 OFFSET and ALIGN accordingly. */
15438 static bool
15439 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15440 {
15441 rtx addr;
15442
15443 gcc_assert (base != NULL && offset != NULL);
15444
15445 /* TODO: Handle more general memory operand patterns, such as
15446 PRE_DEC and PRE_INC. */
15447
15448 if (side_effects_p (mem))
15449 return false;
15450
15451 /* Can't deal with subregs. */
15452 if (GET_CODE (mem) == SUBREG)
15453 return false;
15454
15455 gcc_assert (MEM_P (mem));
15456
15457 *offset = const0_rtx;
15458 *align = MEM_ALIGN (mem);
15459
15460 addr = XEXP (mem, 0);
15461
15462 /* If addr isn't valid for DImode, then we can't handle it. */
15463 if (!arm_legitimate_address_p (DImode, addr,
15464 reload_in_progress || reload_completed))
15465 return false;
15466
15467 if (REG_P (addr))
15468 {
15469 *base = addr;
15470 return true;
15471 }
15472 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15473 {
15474 *base = XEXP (addr, 0);
15475 *offset = XEXP (addr, 1);
15476 return (REG_P (*base) && CONST_INT_P (*offset));
15477 }
15478
15479 return false;
15480 }
15481
15482 /* Called from a peephole2 to replace two word-size accesses with a
15483 single LDRD/STRD instruction. Returns true iff we can generate a
15484 new instruction sequence. That is, both accesses use the same base
15485 register and the gap between constant offsets is 4. This function
15486 may reorder its operands to match ldrd/strd RTL templates.
15487 OPERANDS are the operands found by the peephole matcher;
15488 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15489 corresponding memory operands. LOAD indicaates whether the access
15490 is load or store. CONST_STORE indicates a store of constant
15491 integer values held in OPERANDS[4,5] and assumes that the pattern
15492 is of length 4 insn, for the purpose of checking dead registers.
15493 COMMUTE indicates that register operands may be reordered. */
15494 bool
15495 gen_operands_ldrd_strd (rtx *operands, bool load,
15496 bool const_store, bool commute)
15497 {
15498 int nops = 2;
15499 HOST_WIDE_INT offsets[2], offset, align[2];
15500 rtx base = NULL_RTX;
15501 rtx cur_base, cur_offset, tmp;
15502 int i, gap;
15503 HARD_REG_SET regset;
15504
15505 gcc_assert (!const_store || !load);
15506 /* Check that the memory references are immediate offsets from the
15507 same base register. Extract the base register, the destination
15508 registers, and the corresponding memory offsets. */
15509 for (i = 0; i < nops; i++)
15510 {
15511 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15512 &align[i]))
15513 return false;
15514
15515 if (i == 0)
15516 base = cur_base;
15517 else if (REGNO (base) != REGNO (cur_base))
15518 return false;
15519
15520 offsets[i] = INTVAL (cur_offset);
15521 if (GET_CODE (operands[i]) == SUBREG)
15522 {
15523 tmp = SUBREG_REG (operands[i]);
15524 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15525 operands[i] = tmp;
15526 }
15527 }
15528
15529 /* Make sure there is no dependency between the individual loads. */
15530 if (load && REGNO (operands[0]) == REGNO (base))
15531 return false; /* RAW */
15532
15533 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15534 return false; /* WAW */
15535
15536 /* If the same input register is used in both stores
15537 when storing different constants, try to find a free register.
15538 For example, the code
15539 mov r0, 0
15540 str r0, [r2]
15541 mov r0, 1
15542 str r0, [r2, #4]
15543 can be transformed into
15544 mov r1, 0
15545 mov r0, 1
15546 strd r1, r0, [r2]
15547 in Thumb mode assuming that r1 is free.
15548 For ARM mode do the same but only if the starting register
15549 can be made to be even. */
15550 if (const_store
15551 && REGNO (operands[0]) == REGNO (operands[1])
15552 && INTVAL (operands[4]) != INTVAL (operands[5]))
15553 {
15554 if (TARGET_THUMB2)
15555 {
15556 CLEAR_HARD_REG_SET (regset);
15557 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15558 if (tmp == NULL_RTX)
15559 return false;
15560
15561 /* Use the new register in the first load to ensure that
15562 if the original input register is not dead after peephole,
15563 then it will have the correct constant value. */
15564 operands[0] = tmp;
15565 }
15566 else if (TARGET_ARM)
15567 {
15568 int regno = REGNO (operands[0]);
15569 if (!peep2_reg_dead_p (4, operands[0]))
15570 {
15571 /* When the input register is even and is not dead after the
15572 pattern, it has to hold the second constant but we cannot
15573 form a legal STRD in ARM mode with this register as the second
15574 register. */
15575 if (regno % 2 == 0)
15576 return false;
15577
15578 /* Is regno-1 free? */
15579 SET_HARD_REG_SET (regset);
15580 CLEAR_HARD_REG_BIT(regset, regno - 1);
15581 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15582 if (tmp == NULL_RTX)
15583 return false;
15584
15585 operands[0] = tmp;
15586 }
15587 else
15588 {
15589 /* Find a DImode register. */
15590 CLEAR_HARD_REG_SET (regset);
15591 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15592 if (tmp != NULL_RTX)
15593 {
15594 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15595 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15596 }
15597 else
15598 {
15599 /* Can we use the input register to form a DI register? */
15600 SET_HARD_REG_SET (regset);
15601 CLEAR_HARD_REG_BIT(regset,
15602 regno % 2 == 0 ? regno + 1 : regno - 1);
15603 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15604 if (tmp == NULL_RTX)
15605 return false;
15606 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15607 }
15608 }
15609
15610 gcc_assert (operands[0] != NULL_RTX);
15611 gcc_assert (operands[1] != NULL_RTX);
15612 gcc_assert (REGNO (operands[0]) % 2 == 0);
15613 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15614 }
15615 }
15616
15617 /* Make sure the instructions are ordered with lower memory access first. */
15618 if (offsets[0] > offsets[1])
15619 {
15620 gap = offsets[0] - offsets[1];
15621 offset = offsets[1];
15622
15623 /* Swap the instructions such that lower memory is accessed first. */
15624 std::swap (operands[0], operands[1]);
15625 std::swap (operands[2], operands[3]);
15626 std::swap (align[0], align[1]);
15627 if (const_store)
15628 std::swap (operands[4], operands[5]);
15629 }
15630 else
15631 {
15632 gap = offsets[1] - offsets[0];
15633 offset = offsets[0];
15634 }
15635
15636 /* Make sure accesses are to consecutive memory locations. */
15637 if (gap != 4)
15638 return false;
15639
15640 if (!align_ok_ldrd_strd (align[0], offset))
15641 return false;
15642
15643 /* Make sure we generate legal instructions. */
15644 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15645 false, load))
15646 return true;
15647
15648 /* In Thumb state, where registers are almost unconstrained, there
15649 is little hope to fix it. */
15650 if (TARGET_THUMB2)
15651 return false;
15652
15653 if (load && commute)
15654 {
15655 /* Try reordering registers. */
15656 std::swap (operands[0], operands[1]);
15657 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15658 false, load))
15659 return true;
15660 }
15661
15662 if (const_store)
15663 {
15664 /* If input registers are dead after this pattern, they can be
15665 reordered or replaced by other registers that are free in the
15666 current pattern. */
15667 if (!peep2_reg_dead_p (4, operands[0])
15668 || !peep2_reg_dead_p (4, operands[1]))
15669 return false;
15670
15671 /* Try to reorder the input registers. */
15672 /* For example, the code
15673 mov r0, 0
15674 mov r1, 1
15675 str r1, [r2]
15676 str r0, [r2, #4]
15677 can be transformed into
15678 mov r1, 0
15679 mov r0, 1
15680 strd r0, [r2]
15681 */
15682 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15683 false, false))
15684 {
15685 std::swap (operands[0], operands[1]);
15686 return true;
15687 }
15688
15689 /* Try to find a free DI register. */
15690 CLEAR_HARD_REG_SET (regset);
15691 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15692 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15693 while (true)
15694 {
15695 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15696 if (tmp == NULL_RTX)
15697 return false;
15698
15699 /* DREG must be an even-numbered register in DImode.
15700 Split it into SI registers. */
15701 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15702 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15703 gcc_assert (operands[0] != NULL_RTX);
15704 gcc_assert (operands[1] != NULL_RTX);
15705 gcc_assert (REGNO (operands[0]) % 2 == 0);
15706 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15707
15708 return (operands_ok_ldrd_strd (operands[0], operands[1],
15709 base, offset,
15710 false, load));
15711 }
15712 }
15713
15714 return false;
15715 }
15716
15717
15718
15719 \f
15720 /* Print a symbolic form of X to the debug file, F. */
15721 static void
15722 arm_print_value (FILE *f, rtx x)
15723 {
15724 switch (GET_CODE (x))
15725 {
15726 case CONST_INT:
15727 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15728 return;
15729
15730 case CONST_DOUBLE:
15731 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15732 return;
15733
15734 case CONST_VECTOR:
15735 {
15736 int i;
15737
15738 fprintf (f, "<");
15739 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15740 {
15741 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15742 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15743 fputc (',', f);
15744 }
15745 fprintf (f, ">");
15746 }
15747 return;
15748
15749 case CONST_STRING:
15750 fprintf (f, "\"%s\"", XSTR (x, 0));
15751 return;
15752
15753 case SYMBOL_REF:
15754 fprintf (f, "`%s'", XSTR (x, 0));
15755 return;
15756
15757 case LABEL_REF:
15758 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15759 return;
15760
15761 case CONST:
15762 arm_print_value (f, XEXP (x, 0));
15763 return;
15764
15765 case PLUS:
15766 arm_print_value (f, XEXP (x, 0));
15767 fprintf (f, "+");
15768 arm_print_value (f, XEXP (x, 1));
15769 return;
15770
15771 case PC:
15772 fprintf (f, "pc");
15773 return;
15774
15775 default:
15776 fprintf (f, "????");
15777 return;
15778 }
15779 }
15780 \f
15781 /* Routines for manipulation of the constant pool. */
15782
15783 /* Arm instructions cannot load a large constant directly into a
15784 register; they have to come from a pc relative load. The constant
15785 must therefore be placed in the addressable range of the pc
15786 relative load. Depending on the precise pc relative load
15787 instruction the range is somewhere between 256 bytes and 4k. This
15788 means that we often have to dump a constant inside a function, and
15789 generate code to branch around it.
15790
15791 It is important to minimize this, since the branches will slow
15792 things down and make the code larger.
15793
15794 Normally we can hide the table after an existing unconditional
15795 branch so that there is no interruption of the flow, but in the
15796 worst case the code looks like this:
15797
15798 ldr rn, L1
15799 ...
15800 b L2
15801 align
15802 L1: .long value
15803 L2:
15804 ...
15805
15806 ldr rn, L3
15807 ...
15808 b L4
15809 align
15810 L3: .long value
15811 L4:
15812 ...
15813
15814 We fix this by performing a scan after scheduling, which notices
15815 which instructions need to have their operands fetched from the
15816 constant table and builds the table.
15817
15818 The algorithm starts by building a table of all the constants that
15819 need fixing up and all the natural barriers in the function (places
15820 where a constant table can be dropped without breaking the flow).
15821 For each fixup we note how far the pc-relative replacement will be
15822 able to reach and the offset of the instruction into the function.
15823
15824 Having built the table we then group the fixes together to form
15825 tables that are as large as possible (subject to addressing
15826 constraints) and emit each table of constants after the last
15827 barrier that is within range of all the instructions in the group.
15828 If a group does not contain a barrier, then we forcibly create one
15829 by inserting a jump instruction into the flow. Once the table has
15830 been inserted, the insns are then modified to reference the
15831 relevant entry in the pool.
15832
15833 Possible enhancements to the algorithm (not implemented) are:
15834
15835 1) For some processors and object formats, there may be benefit in
15836 aligning the pools to the start of cache lines; this alignment
15837 would need to be taken into account when calculating addressability
15838 of a pool. */
15839
15840 /* These typedefs are located at the start of this file, so that
15841 they can be used in the prototypes there. This comment is to
15842 remind readers of that fact so that the following structures
15843 can be understood more easily.
15844
15845 typedef struct minipool_node Mnode;
15846 typedef struct minipool_fixup Mfix; */
15847
15848 struct minipool_node
15849 {
15850 /* Doubly linked chain of entries. */
15851 Mnode * next;
15852 Mnode * prev;
15853 /* The maximum offset into the code that this entry can be placed. While
15854 pushing fixes for forward references, all entries are sorted in order
15855 of increasing max_address. */
15856 HOST_WIDE_INT max_address;
15857 /* Similarly for an entry inserted for a backwards ref. */
15858 HOST_WIDE_INT min_address;
15859 /* The number of fixes referencing this entry. This can become zero
15860 if we "unpush" an entry. In this case we ignore the entry when we
15861 come to emit the code. */
15862 int refcount;
15863 /* The offset from the start of the minipool. */
15864 HOST_WIDE_INT offset;
15865 /* The value in table. */
15866 rtx value;
15867 /* The mode of value. */
15868 machine_mode mode;
15869 /* The size of the value. With iWMMXt enabled
15870 sizes > 4 also imply an alignment of 8-bytes. */
15871 int fix_size;
15872 };
15873
15874 struct minipool_fixup
15875 {
15876 Mfix * next;
15877 rtx_insn * insn;
15878 HOST_WIDE_INT address;
15879 rtx * loc;
15880 machine_mode mode;
15881 int fix_size;
15882 rtx value;
15883 Mnode * minipool;
15884 HOST_WIDE_INT forwards;
15885 HOST_WIDE_INT backwards;
15886 };
15887
15888 /* Fixes less than a word need padding out to a word boundary. */
15889 #define MINIPOOL_FIX_SIZE(mode) \
15890 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15891
15892 static Mnode * minipool_vector_head;
15893 static Mnode * minipool_vector_tail;
15894 static rtx_code_label *minipool_vector_label;
15895 static int minipool_pad;
15896
15897 /* The linked list of all minipool fixes required for this function. */
15898 Mfix * minipool_fix_head;
15899 Mfix * minipool_fix_tail;
15900 /* The fix entry for the current minipool, once it has been placed. */
15901 Mfix * minipool_barrier;
15902
15903 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15904 #define JUMP_TABLES_IN_TEXT_SECTION 0
15905 #endif
15906
15907 static HOST_WIDE_INT
15908 get_jump_table_size (rtx_jump_table_data *insn)
15909 {
15910 /* ADDR_VECs only take room if read-only data does into the text
15911 section. */
15912 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15913 {
15914 rtx body = PATTERN (insn);
15915 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15916 HOST_WIDE_INT size;
15917 HOST_WIDE_INT modesize;
15918
15919 modesize = GET_MODE_SIZE (GET_MODE (body));
15920 size = modesize * XVECLEN (body, elt);
15921 switch (modesize)
15922 {
15923 case 1:
15924 /* Round up size of TBB table to a halfword boundary. */
15925 size = (size + 1) & ~HOST_WIDE_INT_1;
15926 break;
15927 case 2:
15928 /* No padding necessary for TBH. */
15929 break;
15930 case 4:
15931 /* Add two bytes for alignment on Thumb. */
15932 if (TARGET_THUMB)
15933 size += 2;
15934 break;
15935 default:
15936 gcc_unreachable ();
15937 }
15938 return size;
15939 }
15940
15941 return 0;
15942 }
15943
15944 /* Return the maximum amount of padding that will be inserted before
15945 label LABEL. */
15946
15947 static HOST_WIDE_INT
15948 get_label_padding (rtx label)
15949 {
15950 HOST_WIDE_INT align, min_insn_size;
15951
15952 align = 1 << label_to_alignment (label).levels[0].log;
15953 min_insn_size = TARGET_THUMB ? 2 : 4;
15954 return align > min_insn_size ? align - min_insn_size : 0;
15955 }
15956
15957 /* Move a minipool fix MP from its current location to before MAX_MP.
15958 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
15959 constraints may need updating. */
15960 static Mnode *
15961 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
15962 HOST_WIDE_INT max_address)
15963 {
15964 /* The code below assumes these are different. */
15965 gcc_assert (mp != max_mp);
15966
15967 if (max_mp == NULL)
15968 {
15969 if (max_address < mp->max_address)
15970 mp->max_address = max_address;
15971 }
15972 else
15973 {
15974 if (max_address > max_mp->max_address - mp->fix_size)
15975 mp->max_address = max_mp->max_address - mp->fix_size;
15976 else
15977 mp->max_address = max_address;
15978
15979 /* Unlink MP from its current position. Since max_mp is non-null,
15980 mp->prev must be non-null. */
15981 mp->prev->next = mp->next;
15982 if (mp->next != NULL)
15983 mp->next->prev = mp->prev;
15984 else
15985 minipool_vector_tail = mp->prev;
15986
15987 /* Re-insert it before MAX_MP. */
15988 mp->next = max_mp;
15989 mp->prev = max_mp->prev;
15990 max_mp->prev = mp;
15991
15992 if (mp->prev != NULL)
15993 mp->prev->next = mp;
15994 else
15995 minipool_vector_head = mp;
15996 }
15997
15998 /* Save the new entry. */
15999 max_mp = mp;
16000
16001 /* Scan over the preceding entries and adjust their addresses as
16002 required. */
16003 while (mp->prev != NULL
16004 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16005 {
16006 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16007 mp = mp->prev;
16008 }
16009
16010 return max_mp;
16011 }
16012
16013 /* Add a constant to the minipool for a forward reference. Returns the
16014 node added or NULL if the constant will not fit in this pool. */
16015 static Mnode *
16016 add_minipool_forward_ref (Mfix *fix)
16017 {
16018 /* If set, max_mp is the first pool_entry that has a lower
16019 constraint than the one we are trying to add. */
16020 Mnode * max_mp = NULL;
16021 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16022 Mnode * mp;
16023
16024 /* If the minipool starts before the end of FIX->INSN then this FIX
16025 can not be placed into the current pool. Furthermore, adding the
16026 new constant pool entry may cause the pool to start FIX_SIZE bytes
16027 earlier. */
16028 if (minipool_vector_head &&
16029 (fix->address + get_attr_length (fix->insn)
16030 >= minipool_vector_head->max_address - fix->fix_size))
16031 return NULL;
16032
16033 /* Scan the pool to see if a constant with the same value has
16034 already been added. While we are doing this, also note the
16035 location where we must insert the constant if it doesn't already
16036 exist. */
16037 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16038 {
16039 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16040 && fix->mode == mp->mode
16041 && (!LABEL_P (fix->value)
16042 || (CODE_LABEL_NUMBER (fix->value)
16043 == CODE_LABEL_NUMBER (mp->value)))
16044 && rtx_equal_p (fix->value, mp->value))
16045 {
16046 /* More than one fix references this entry. */
16047 mp->refcount++;
16048 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16049 }
16050
16051 /* Note the insertion point if necessary. */
16052 if (max_mp == NULL
16053 && mp->max_address > max_address)
16054 max_mp = mp;
16055
16056 /* If we are inserting an 8-bytes aligned quantity and
16057 we have not already found an insertion point, then
16058 make sure that all such 8-byte aligned quantities are
16059 placed at the start of the pool. */
16060 if (ARM_DOUBLEWORD_ALIGN
16061 && max_mp == NULL
16062 && fix->fix_size >= 8
16063 && mp->fix_size < 8)
16064 {
16065 max_mp = mp;
16066 max_address = mp->max_address;
16067 }
16068 }
16069
16070 /* The value is not currently in the minipool, so we need to create
16071 a new entry for it. If MAX_MP is NULL, the entry will be put on
16072 the end of the list since the placement is less constrained than
16073 any existing entry. Otherwise, we insert the new fix before
16074 MAX_MP and, if necessary, adjust the constraints on the other
16075 entries. */
16076 mp = XNEW (Mnode);
16077 mp->fix_size = fix->fix_size;
16078 mp->mode = fix->mode;
16079 mp->value = fix->value;
16080 mp->refcount = 1;
16081 /* Not yet required for a backwards ref. */
16082 mp->min_address = -65536;
16083
16084 if (max_mp == NULL)
16085 {
16086 mp->max_address = max_address;
16087 mp->next = NULL;
16088 mp->prev = minipool_vector_tail;
16089
16090 if (mp->prev == NULL)
16091 {
16092 minipool_vector_head = mp;
16093 minipool_vector_label = gen_label_rtx ();
16094 }
16095 else
16096 mp->prev->next = mp;
16097
16098 minipool_vector_tail = mp;
16099 }
16100 else
16101 {
16102 if (max_address > max_mp->max_address - mp->fix_size)
16103 mp->max_address = max_mp->max_address - mp->fix_size;
16104 else
16105 mp->max_address = max_address;
16106
16107 mp->next = max_mp;
16108 mp->prev = max_mp->prev;
16109 max_mp->prev = mp;
16110 if (mp->prev != NULL)
16111 mp->prev->next = mp;
16112 else
16113 minipool_vector_head = mp;
16114 }
16115
16116 /* Save the new entry. */
16117 max_mp = mp;
16118
16119 /* Scan over the preceding entries and adjust their addresses as
16120 required. */
16121 while (mp->prev != NULL
16122 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16123 {
16124 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16125 mp = mp->prev;
16126 }
16127
16128 return max_mp;
16129 }
16130
16131 static Mnode *
16132 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16133 HOST_WIDE_INT min_address)
16134 {
16135 HOST_WIDE_INT offset;
16136
16137 /* The code below assumes these are different. */
16138 gcc_assert (mp != min_mp);
16139
16140 if (min_mp == NULL)
16141 {
16142 if (min_address > mp->min_address)
16143 mp->min_address = min_address;
16144 }
16145 else
16146 {
16147 /* We will adjust this below if it is too loose. */
16148 mp->min_address = min_address;
16149
16150 /* Unlink MP from its current position. Since min_mp is non-null,
16151 mp->next must be non-null. */
16152 mp->next->prev = mp->prev;
16153 if (mp->prev != NULL)
16154 mp->prev->next = mp->next;
16155 else
16156 minipool_vector_head = mp->next;
16157
16158 /* Reinsert it after MIN_MP. */
16159 mp->prev = min_mp;
16160 mp->next = min_mp->next;
16161 min_mp->next = mp;
16162 if (mp->next != NULL)
16163 mp->next->prev = mp;
16164 else
16165 minipool_vector_tail = mp;
16166 }
16167
16168 min_mp = mp;
16169
16170 offset = 0;
16171 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16172 {
16173 mp->offset = offset;
16174 if (mp->refcount > 0)
16175 offset += mp->fix_size;
16176
16177 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16178 mp->next->min_address = mp->min_address + mp->fix_size;
16179 }
16180
16181 return min_mp;
16182 }
16183
16184 /* Add a constant to the minipool for a backward reference. Returns the
16185 node added or NULL if the constant will not fit in this pool.
16186
16187 Note that the code for insertion for a backwards reference can be
16188 somewhat confusing because the calculated offsets for each fix do
16189 not take into account the size of the pool (which is still under
16190 construction. */
16191 static Mnode *
16192 add_minipool_backward_ref (Mfix *fix)
16193 {
16194 /* If set, min_mp is the last pool_entry that has a lower constraint
16195 than the one we are trying to add. */
16196 Mnode *min_mp = NULL;
16197 /* This can be negative, since it is only a constraint. */
16198 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16199 Mnode *mp;
16200
16201 /* If we can't reach the current pool from this insn, or if we can't
16202 insert this entry at the end of the pool without pushing other
16203 fixes out of range, then we don't try. This ensures that we
16204 can't fail later on. */
16205 if (min_address >= minipool_barrier->address
16206 || (minipool_vector_tail->min_address + fix->fix_size
16207 >= minipool_barrier->address))
16208 return NULL;
16209
16210 /* Scan the pool to see if a constant with the same value has
16211 already been added. While we are doing this, also note the
16212 location where we must insert the constant if it doesn't already
16213 exist. */
16214 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16215 {
16216 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16217 && fix->mode == mp->mode
16218 && (!LABEL_P (fix->value)
16219 || (CODE_LABEL_NUMBER (fix->value)
16220 == CODE_LABEL_NUMBER (mp->value)))
16221 && rtx_equal_p (fix->value, mp->value)
16222 /* Check that there is enough slack to move this entry to the
16223 end of the table (this is conservative). */
16224 && (mp->max_address
16225 > (minipool_barrier->address
16226 + minipool_vector_tail->offset
16227 + minipool_vector_tail->fix_size)))
16228 {
16229 mp->refcount++;
16230 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16231 }
16232
16233 if (min_mp != NULL)
16234 mp->min_address += fix->fix_size;
16235 else
16236 {
16237 /* Note the insertion point if necessary. */
16238 if (mp->min_address < min_address)
16239 {
16240 /* For now, we do not allow the insertion of 8-byte alignment
16241 requiring nodes anywhere but at the start of the pool. */
16242 if (ARM_DOUBLEWORD_ALIGN
16243 && fix->fix_size >= 8 && mp->fix_size < 8)
16244 return NULL;
16245 else
16246 min_mp = mp;
16247 }
16248 else if (mp->max_address
16249 < minipool_barrier->address + mp->offset + fix->fix_size)
16250 {
16251 /* Inserting before this entry would push the fix beyond
16252 its maximum address (which can happen if we have
16253 re-located a forwards fix); force the new fix to come
16254 after it. */
16255 if (ARM_DOUBLEWORD_ALIGN
16256 && fix->fix_size >= 8 && mp->fix_size < 8)
16257 return NULL;
16258 else
16259 {
16260 min_mp = mp;
16261 min_address = mp->min_address + fix->fix_size;
16262 }
16263 }
16264 /* Do not insert a non-8-byte aligned quantity before 8-byte
16265 aligned quantities. */
16266 else if (ARM_DOUBLEWORD_ALIGN
16267 && fix->fix_size < 8
16268 && mp->fix_size >= 8)
16269 {
16270 min_mp = mp;
16271 min_address = mp->min_address + fix->fix_size;
16272 }
16273 }
16274 }
16275
16276 /* We need to create a new entry. */
16277 mp = XNEW (Mnode);
16278 mp->fix_size = fix->fix_size;
16279 mp->mode = fix->mode;
16280 mp->value = fix->value;
16281 mp->refcount = 1;
16282 mp->max_address = minipool_barrier->address + 65536;
16283
16284 mp->min_address = min_address;
16285
16286 if (min_mp == NULL)
16287 {
16288 mp->prev = NULL;
16289 mp->next = minipool_vector_head;
16290
16291 if (mp->next == NULL)
16292 {
16293 minipool_vector_tail = mp;
16294 minipool_vector_label = gen_label_rtx ();
16295 }
16296 else
16297 mp->next->prev = mp;
16298
16299 minipool_vector_head = mp;
16300 }
16301 else
16302 {
16303 mp->next = min_mp->next;
16304 mp->prev = min_mp;
16305 min_mp->next = mp;
16306
16307 if (mp->next != NULL)
16308 mp->next->prev = mp;
16309 else
16310 minipool_vector_tail = mp;
16311 }
16312
16313 /* Save the new entry. */
16314 min_mp = mp;
16315
16316 if (mp->prev)
16317 mp = mp->prev;
16318 else
16319 mp->offset = 0;
16320
16321 /* Scan over the following entries and adjust their offsets. */
16322 while (mp->next != NULL)
16323 {
16324 if (mp->next->min_address < mp->min_address + mp->fix_size)
16325 mp->next->min_address = mp->min_address + mp->fix_size;
16326
16327 if (mp->refcount)
16328 mp->next->offset = mp->offset + mp->fix_size;
16329 else
16330 mp->next->offset = mp->offset;
16331
16332 mp = mp->next;
16333 }
16334
16335 return min_mp;
16336 }
16337
16338 static void
16339 assign_minipool_offsets (Mfix *barrier)
16340 {
16341 HOST_WIDE_INT offset = 0;
16342 Mnode *mp;
16343
16344 minipool_barrier = barrier;
16345
16346 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16347 {
16348 mp->offset = offset;
16349
16350 if (mp->refcount > 0)
16351 offset += mp->fix_size;
16352 }
16353 }
16354
16355 /* Output the literal table */
16356 static void
16357 dump_minipool (rtx_insn *scan)
16358 {
16359 Mnode * mp;
16360 Mnode * nmp;
16361 int align64 = 0;
16362
16363 if (ARM_DOUBLEWORD_ALIGN)
16364 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16365 if (mp->refcount > 0 && mp->fix_size >= 8)
16366 {
16367 align64 = 1;
16368 break;
16369 }
16370
16371 if (dump_file)
16372 fprintf (dump_file,
16373 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16374 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16375
16376 scan = emit_label_after (gen_label_rtx (), scan);
16377 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16378 scan = emit_label_after (minipool_vector_label, scan);
16379
16380 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16381 {
16382 if (mp->refcount > 0)
16383 {
16384 if (dump_file)
16385 {
16386 fprintf (dump_file,
16387 ";; Offset %u, min %ld, max %ld ",
16388 (unsigned) mp->offset, (unsigned long) mp->min_address,
16389 (unsigned long) mp->max_address);
16390 arm_print_value (dump_file, mp->value);
16391 fputc ('\n', dump_file);
16392 }
16393
16394 rtx val = copy_rtx (mp->value);
16395
16396 switch (GET_MODE_SIZE (mp->mode))
16397 {
16398 #ifdef HAVE_consttable_1
16399 case 1:
16400 scan = emit_insn_after (gen_consttable_1 (val), scan);
16401 break;
16402
16403 #endif
16404 #ifdef HAVE_consttable_2
16405 case 2:
16406 scan = emit_insn_after (gen_consttable_2 (val), scan);
16407 break;
16408
16409 #endif
16410 #ifdef HAVE_consttable_4
16411 case 4:
16412 scan = emit_insn_after (gen_consttable_4 (val), scan);
16413 break;
16414
16415 #endif
16416 #ifdef HAVE_consttable_8
16417 case 8:
16418 scan = emit_insn_after (gen_consttable_8 (val), scan);
16419 break;
16420
16421 #endif
16422 #ifdef HAVE_consttable_16
16423 case 16:
16424 scan = emit_insn_after (gen_consttable_16 (val), scan);
16425 break;
16426
16427 #endif
16428 default:
16429 gcc_unreachable ();
16430 }
16431 }
16432
16433 nmp = mp->next;
16434 free (mp);
16435 }
16436
16437 minipool_vector_head = minipool_vector_tail = NULL;
16438 scan = emit_insn_after (gen_consttable_end (), scan);
16439 scan = emit_barrier_after (scan);
16440 }
16441
16442 /* Return the cost of forcibly inserting a barrier after INSN. */
16443 static int
16444 arm_barrier_cost (rtx_insn *insn)
16445 {
16446 /* Basing the location of the pool on the loop depth is preferable,
16447 but at the moment, the basic block information seems to be
16448 corrupt by this stage of the compilation. */
16449 int base_cost = 50;
16450 rtx_insn *next = next_nonnote_insn (insn);
16451
16452 if (next != NULL && LABEL_P (next))
16453 base_cost -= 20;
16454
16455 switch (GET_CODE (insn))
16456 {
16457 case CODE_LABEL:
16458 /* It will always be better to place the table before the label, rather
16459 than after it. */
16460 return 50;
16461
16462 case INSN:
16463 case CALL_INSN:
16464 return base_cost;
16465
16466 case JUMP_INSN:
16467 return base_cost - 10;
16468
16469 default:
16470 return base_cost + 10;
16471 }
16472 }
16473
16474 /* Find the best place in the insn stream in the range
16475 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16476 Create the barrier by inserting a jump and add a new fix entry for
16477 it. */
16478 static Mfix *
16479 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16480 {
16481 HOST_WIDE_INT count = 0;
16482 rtx_barrier *barrier;
16483 rtx_insn *from = fix->insn;
16484 /* The instruction after which we will insert the jump. */
16485 rtx_insn *selected = NULL;
16486 int selected_cost;
16487 /* The address at which the jump instruction will be placed. */
16488 HOST_WIDE_INT selected_address;
16489 Mfix * new_fix;
16490 HOST_WIDE_INT max_count = max_address - fix->address;
16491 rtx_code_label *label = gen_label_rtx ();
16492
16493 selected_cost = arm_barrier_cost (from);
16494 selected_address = fix->address;
16495
16496 while (from && count < max_count)
16497 {
16498 rtx_jump_table_data *tmp;
16499 int new_cost;
16500
16501 /* This code shouldn't have been called if there was a natural barrier
16502 within range. */
16503 gcc_assert (!BARRIER_P (from));
16504
16505 /* Count the length of this insn. This must stay in sync with the
16506 code that pushes minipool fixes. */
16507 if (LABEL_P (from))
16508 count += get_label_padding (from);
16509 else
16510 count += get_attr_length (from);
16511
16512 /* If there is a jump table, add its length. */
16513 if (tablejump_p (from, NULL, &tmp))
16514 {
16515 count += get_jump_table_size (tmp);
16516
16517 /* Jump tables aren't in a basic block, so base the cost on
16518 the dispatch insn. If we select this location, we will
16519 still put the pool after the table. */
16520 new_cost = arm_barrier_cost (from);
16521
16522 if (count < max_count
16523 && (!selected || new_cost <= selected_cost))
16524 {
16525 selected = tmp;
16526 selected_cost = new_cost;
16527 selected_address = fix->address + count;
16528 }
16529
16530 /* Continue after the dispatch table. */
16531 from = NEXT_INSN (tmp);
16532 continue;
16533 }
16534
16535 new_cost = arm_barrier_cost (from);
16536
16537 if (count < max_count
16538 && (!selected || new_cost <= selected_cost))
16539 {
16540 selected = from;
16541 selected_cost = new_cost;
16542 selected_address = fix->address + count;
16543 }
16544
16545 from = NEXT_INSN (from);
16546 }
16547
16548 /* Make sure that we found a place to insert the jump. */
16549 gcc_assert (selected);
16550
16551 /* Create a new JUMP_INSN that branches around a barrier. */
16552 from = emit_jump_insn_after (gen_jump (label), selected);
16553 JUMP_LABEL (from) = label;
16554 barrier = emit_barrier_after (from);
16555 emit_label_after (label, barrier);
16556
16557 /* Create a minipool barrier entry for the new barrier. */
16558 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16559 new_fix->insn = barrier;
16560 new_fix->address = selected_address;
16561 new_fix->next = fix->next;
16562 fix->next = new_fix;
16563
16564 return new_fix;
16565 }
16566
16567 /* Record that there is a natural barrier in the insn stream at
16568 ADDRESS. */
16569 static void
16570 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16571 {
16572 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16573
16574 fix->insn = insn;
16575 fix->address = address;
16576
16577 fix->next = NULL;
16578 if (minipool_fix_head != NULL)
16579 minipool_fix_tail->next = fix;
16580 else
16581 minipool_fix_head = fix;
16582
16583 minipool_fix_tail = fix;
16584 }
16585
16586 /* Record INSN, which will need fixing up to load a value from the
16587 minipool. ADDRESS is the offset of the insn since the start of the
16588 function; LOC is a pointer to the part of the insn which requires
16589 fixing; VALUE is the constant that must be loaded, which is of type
16590 MODE. */
16591 static void
16592 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16593 machine_mode mode, rtx value)
16594 {
16595 gcc_assert (!arm_disable_literal_pool);
16596 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16597
16598 fix->insn = insn;
16599 fix->address = address;
16600 fix->loc = loc;
16601 fix->mode = mode;
16602 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16603 fix->value = value;
16604 fix->forwards = get_attr_pool_range (insn);
16605 fix->backwards = get_attr_neg_pool_range (insn);
16606 fix->minipool = NULL;
16607
16608 /* If an insn doesn't have a range defined for it, then it isn't
16609 expecting to be reworked by this code. Better to stop now than
16610 to generate duff assembly code. */
16611 gcc_assert (fix->forwards || fix->backwards);
16612
16613 /* If an entry requires 8-byte alignment then assume all constant pools
16614 require 4 bytes of padding. Trying to do this later on a per-pool
16615 basis is awkward because existing pool entries have to be modified. */
16616 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16617 minipool_pad = 4;
16618
16619 if (dump_file)
16620 {
16621 fprintf (dump_file,
16622 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16623 GET_MODE_NAME (mode),
16624 INSN_UID (insn), (unsigned long) address,
16625 -1 * (long)fix->backwards, (long)fix->forwards);
16626 arm_print_value (dump_file, fix->value);
16627 fprintf (dump_file, "\n");
16628 }
16629
16630 /* Add it to the chain of fixes. */
16631 fix->next = NULL;
16632
16633 if (minipool_fix_head != NULL)
16634 minipool_fix_tail->next = fix;
16635 else
16636 minipool_fix_head = fix;
16637
16638 minipool_fix_tail = fix;
16639 }
16640
16641 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16642 Returns the number of insns needed, or 99 if we always want to synthesize
16643 the value. */
16644 int
16645 arm_max_const_double_inline_cost ()
16646 {
16647 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16648 }
16649
16650 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16651 Returns the number of insns needed, or 99 if we don't know how to
16652 do it. */
16653 int
16654 arm_const_double_inline_cost (rtx val)
16655 {
16656 rtx lowpart, highpart;
16657 machine_mode mode;
16658
16659 mode = GET_MODE (val);
16660
16661 if (mode == VOIDmode)
16662 mode = DImode;
16663
16664 gcc_assert (GET_MODE_SIZE (mode) == 8);
16665
16666 lowpart = gen_lowpart (SImode, val);
16667 highpart = gen_highpart_mode (SImode, mode, val);
16668
16669 gcc_assert (CONST_INT_P (lowpart));
16670 gcc_assert (CONST_INT_P (highpart));
16671
16672 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16673 NULL_RTX, NULL_RTX, 0, 0)
16674 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16675 NULL_RTX, NULL_RTX, 0, 0));
16676 }
16677
16678 /* Cost of loading a SImode constant. */
16679 static inline int
16680 arm_const_inline_cost (enum rtx_code code, rtx val)
16681 {
16682 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16683 NULL_RTX, NULL_RTX, 1, 0);
16684 }
16685
16686 /* Return true if it is worthwhile to split a 64-bit constant into two
16687 32-bit operations. This is the case if optimizing for size, or
16688 if we have load delay slots, or if one 32-bit part can be done with
16689 a single data operation. */
16690 bool
16691 arm_const_double_by_parts (rtx val)
16692 {
16693 machine_mode mode = GET_MODE (val);
16694 rtx part;
16695
16696 if (optimize_size || arm_ld_sched)
16697 return true;
16698
16699 if (mode == VOIDmode)
16700 mode = DImode;
16701
16702 part = gen_highpart_mode (SImode, mode, val);
16703
16704 gcc_assert (CONST_INT_P (part));
16705
16706 if (const_ok_for_arm (INTVAL (part))
16707 || const_ok_for_arm (~INTVAL (part)))
16708 return true;
16709
16710 part = gen_lowpart (SImode, val);
16711
16712 gcc_assert (CONST_INT_P (part));
16713
16714 if (const_ok_for_arm (INTVAL (part))
16715 || const_ok_for_arm (~INTVAL (part)))
16716 return true;
16717
16718 return false;
16719 }
16720
16721 /* Return true if it is possible to inline both the high and low parts
16722 of a 64-bit constant into 32-bit data processing instructions. */
16723 bool
16724 arm_const_double_by_immediates (rtx val)
16725 {
16726 machine_mode mode = GET_MODE (val);
16727 rtx part;
16728
16729 if (mode == VOIDmode)
16730 mode = DImode;
16731
16732 part = gen_highpart_mode (SImode, mode, val);
16733
16734 gcc_assert (CONST_INT_P (part));
16735
16736 if (!const_ok_for_arm (INTVAL (part)))
16737 return false;
16738
16739 part = gen_lowpart (SImode, val);
16740
16741 gcc_assert (CONST_INT_P (part));
16742
16743 if (!const_ok_for_arm (INTVAL (part)))
16744 return false;
16745
16746 return true;
16747 }
16748
16749 /* Scan INSN and note any of its operands that need fixing.
16750 If DO_PUSHES is false we do not actually push any of the fixups
16751 needed. */
16752 static void
16753 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16754 {
16755 int opno;
16756
16757 extract_constrain_insn (insn);
16758
16759 if (recog_data.n_alternatives == 0)
16760 return;
16761
16762 /* Fill in recog_op_alt with information about the constraints of
16763 this insn. */
16764 preprocess_constraints (insn);
16765
16766 const operand_alternative *op_alt = which_op_alt ();
16767 for (opno = 0; opno < recog_data.n_operands; opno++)
16768 {
16769 /* Things we need to fix can only occur in inputs. */
16770 if (recog_data.operand_type[opno] != OP_IN)
16771 continue;
16772
16773 /* If this alternative is a memory reference, then any mention
16774 of constants in this alternative is really to fool reload
16775 into allowing us to accept one there. We need to fix them up
16776 now so that we output the right code. */
16777 if (op_alt[opno].memory_ok)
16778 {
16779 rtx op = recog_data.operand[opno];
16780
16781 if (CONSTANT_P (op))
16782 {
16783 if (do_pushes)
16784 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16785 recog_data.operand_mode[opno], op);
16786 }
16787 else if (MEM_P (op)
16788 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16789 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16790 {
16791 if (do_pushes)
16792 {
16793 rtx cop = avoid_constant_pool_reference (op);
16794
16795 /* Casting the address of something to a mode narrower
16796 than a word can cause avoid_constant_pool_reference()
16797 to return the pool reference itself. That's no good to
16798 us here. Lets just hope that we can use the
16799 constant pool value directly. */
16800 if (op == cop)
16801 cop = get_pool_constant (XEXP (op, 0));
16802
16803 push_minipool_fix (insn, address,
16804 recog_data.operand_loc[opno],
16805 recog_data.operand_mode[opno], cop);
16806 }
16807
16808 }
16809 }
16810 }
16811
16812 return;
16813 }
16814
16815 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16816 and unions in the context of ARMv8-M Security Extensions. It is used as a
16817 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16818 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16819 or four masks, depending on whether it is being computed for a
16820 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16821 respectively. The tree for the type of the argument or a field within an
16822 argument is passed in ARG_TYPE, the current register this argument or field
16823 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16824 argument or field starts at is passed in STARTING_BIT and the last used bit
16825 is kept in LAST_USED_BIT which is also updated accordingly. */
16826
16827 static unsigned HOST_WIDE_INT
16828 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16829 uint32_t * padding_bits_to_clear,
16830 unsigned starting_bit, int * last_used_bit)
16831
16832 {
16833 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16834
16835 if (TREE_CODE (arg_type) == RECORD_TYPE)
16836 {
16837 unsigned current_bit = starting_bit;
16838 tree field;
16839 long int offset, size;
16840
16841
16842 field = TYPE_FIELDS (arg_type);
16843 while (field)
16844 {
16845 /* The offset within a structure is always an offset from
16846 the start of that structure. Make sure we take that into the
16847 calculation of the register based offset that we use here. */
16848 offset = starting_bit;
16849 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16850 offset %= 32;
16851
16852 /* This is the actual size of the field, for bitfields this is the
16853 bitfield width and not the container size. */
16854 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16855
16856 if (*last_used_bit != offset)
16857 {
16858 if (offset < *last_used_bit)
16859 {
16860 /* This field's offset is before the 'last_used_bit', that
16861 means this field goes on the next register. So we need to
16862 pad the rest of the current register and increase the
16863 register number. */
16864 uint32_t mask;
16865 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16866 mask++;
16867
16868 padding_bits_to_clear[*regno] |= mask;
16869 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16870 (*regno)++;
16871 }
16872 else
16873 {
16874 /* Otherwise we pad the bits between the last field's end and
16875 the start of the new field. */
16876 uint32_t mask;
16877
16878 mask = ((uint32_t)-1) >> (32 - offset);
16879 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16880 padding_bits_to_clear[*regno] |= mask;
16881 }
16882 current_bit = offset;
16883 }
16884
16885 /* Calculate further padding bits for inner structs/unions too. */
16886 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16887 {
16888 *last_used_bit = current_bit;
16889 not_to_clear_reg_mask
16890 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16891 padding_bits_to_clear, offset,
16892 last_used_bit);
16893 }
16894 else
16895 {
16896 /* Update 'current_bit' with this field's size. If the
16897 'current_bit' lies in a subsequent register, update 'regno' and
16898 reset 'current_bit' to point to the current bit in that new
16899 register. */
16900 current_bit += size;
16901 while (current_bit >= 32)
16902 {
16903 current_bit-=32;
16904 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16905 (*regno)++;
16906 }
16907 *last_used_bit = current_bit;
16908 }
16909
16910 field = TREE_CHAIN (field);
16911 }
16912 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16913 }
16914 else if (TREE_CODE (arg_type) == UNION_TYPE)
16915 {
16916 tree field, field_t;
16917 int i, regno_t, field_size;
16918 int max_reg = -1;
16919 int max_bit = -1;
16920 uint32_t mask;
16921 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16922 = {-1, -1, -1, -1};
16923
16924 /* To compute the padding bits in a union we only consider bits as
16925 padding bits if they are always either a padding bit or fall outside a
16926 fields size for all fields in the union. */
16927 field = TYPE_FIELDS (arg_type);
16928 while (field)
16929 {
16930 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16931 = {0U, 0U, 0U, 0U};
16932 int last_used_bit_t = *last_used_bit;
16933 regno_t = *regno;
16934 field_t = TREE_TYPE (field);
16935
16936 /* If the field's type is either a record or a union make sure to
16937 compute their padding bits too. */
16938 if (RECORD_OR_UNION_TYPE_P (field_t))
16939 not_to_clear_reg_mask
16940 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16941 &padding_bits_to_clear_t[0],
16942 starting_bit, &last_used_bit_t);
16943 else
16944 {
16945 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16946 regno_t = (field_size / 32) + *regno;
16947 last_used_bit_t = (starting_bit + field_size) % 32;
16948 }
16949
16950 for (i = *regno; i < regno_t; i++)
16951 {
16952 /* For all but the last register used by this field only keep the
16953 padding bits that were padding bits in this field. */
16954 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16955 }
16956
16957 /* For the last register, keep all padding bits that were padding
16958 bits in this field and any padding bits that are still valid
16959 as padding bits but fall outside of this field's size. */
16960 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
16961 padding_bits_to_clear_res[regno_t]
16962 &= padding_bits_to_clear_t[regno_t] | mask;
16963
16964 /* Update the maximum size of the fields in terms of registers used
16965 ('max_reg') and the 'last_used_bit' in said register. */
16966 if (max_reg < regno_t)
16967 {
16968 max_reg = regno_t;
16969 max_bit = last_used_bit_t;
16970 }
16971 else if (max_reg == regno_t && max_bit < last_used_bit_t)
16972 max_bit = last_used_bit_t;
16973
16974 field = TREE_CHAIN (field);
16975 }
16976
16977 /* Update the current padding_bits_to_clear using the intersection of the
16978 padding bits of all the fields. */
16979 for (i=*regno; i < max_reg; i++)
16980 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
16981
16982 /* Do not keep trailing padding bits, we do not know yet whether this
16983 is the end of the argument. */
16984 mask = ((uint32_t) 1 << max_bit) - 1;
16985 padding_bits_to_clear[max_reg]
16986 |= padding_bits_to_clear_res[max_reg] & mask;
16987
16988 *regno = max_reg;
16989 *last_used_bit = max_bit;
16990 }
16991 else
16992 /* This function should only be used for structs and unions. */
16993 gcc_unreachable ();
16994
16995 return not_to_clear_reg_mask;
16996 }
16997
16998 /* In the context of ARMv8-M Security Extensions, this function is used for both
16999 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17000 registers are used when returning or passing arguments, which is then
17001 returned as a mask. It will also compute a mask to indicate padding/unused
17002 bits for each of these registers, and passes this through the
17003 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17004 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17005 the starting register used to pass this argument or return value is passed
17006 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17007 for struct and union types. */
17008
17009 static unsigned HOST_WIDE_INT
17010 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17011 uint32_t * padding_bits_to_clear)
17012
17013 {
17014 int last_used_bit = 0;
17015 unsigned HOST_WIDE_INT not_to_clear_mask;
17016
17017 if (RECORD_OR_UNION_TYPE_P (arg_type))
17018 {
17019 not_to_clear_mask
17020 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17021 padding_bits_to_clear, 0,
17022 &last_used_bit);
17023
17024
17025 /* If the 'last_used_bit' is not zero, that means we are still using a
17026 part of the last 'regno'. In such cases we must clear the trailing
17027 bits. Otherwise we are not using regno and we should mark it as to
17028 clear. */
17029 if (last_used_bit != 0)
17030 padding_bits_to_clear[regno]
17031 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17032 else
17033 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17034 }
17035 else
17036 {
17037 not_to_clear_mask = 0;
17038 /* We are not dealing with structs nor unions. So these arguments may be
17039 passed in floating point registers too. In some cases a BLKmode is
17040 used when returning or passing arguments in multiple VFP registers. */
17041 if (GET_MODE (arg_rtx) == BLKmode)
17042 {
17043 int i, arg_regs;
17044 rtx reg;
17045
17046 /* This should really only occur when dealing with the hard-float
17047 ABI. */
17048 gcc_assert (TARGET_HARD_FLOAT_ABI);
17049
17050 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17051 {
17052 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17053 gcc_assert (REG_P (reg));
17054
17055 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17056
17057 /* If we are dealing with DF mode, make sure we don't
17058 clear either of the registers it addresses. */
17059 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17060 if (arg_regs > 1)
17061 {
17062 unsigned HOST_WIDE_INT mask;
17063 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17064 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17065 not_to_clear_mask |= mask;
17066 }
17067 }
17068 }
17069 else
17070 {
17071 /* Otherwise we can rely on the MODE to determine how many registers
17072 are being used by this argument. */
17073 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17074 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17075 if (arg_regs > 1)
17076 {
17077 unsigned HOST_WIDE_INT
17078 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17079 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17080 not_to_clear_mask |= mask;
17081 }
17082 }
17083 }
17084
17085 return not_to_clear_mask;
17086 }
17087
17088 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17089 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17090 are to be fully cleared, using the value in register CLEARING_REG if more
17091 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17092 the bits that needs to be cleared in caller-saved core registers, with
17093 SCRATCH_REG used as a scratch register for that clearing.
17094
17095 NOTE: one of three following assertions must hold:
17096 - SCRATCH_REG is a low register
17097 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17098 in TO_CLEAR_BITMAP)
17099 - CLEARING_REG is a low register. */
17100
17101 static void
17102 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17103 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17104 {
17105 bool saved_clearing = false;
17106 rtx saved_clearing_reg = NULL_RTX;
17107 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17108
17109 gcc_assert (arm_arch_cmse);
17110
17111 if (!bitmap_empty_p (to_clear_bitmap))
17112 {
17113 minregno = bitmap_first_set_bit (to_clear_bitmap);
17114 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17115 }
17116 clearing_regno = REGNO (clearing_reg);
17117
17118 /* Clear padding bits. */
17119 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17120 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17121 {
17122 uint64_t mask;
17123 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17124
17125 if (padding_bits_to_clear[i] == 0)
17126 continue;
17127
17128 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17129 CLEARING_REG as scratch. */
17130 if (TARGET_THUMB1
17131 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17132 {
17133 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17134 such that we can use clearing_reg to clear the unused bits in the
17135 arguments. */
17136 if ((clearing_regno > maxregno
17137 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17138 && !saved_clearing)
17139 {
17140 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17141 emit_move_insn (scratch_reg, clearing_reg);
17142 saved_clearing = true;
17143 saved_clearing_reg = scratch_reg;
17144 }
17145 scratch_reg = clearing_reg;
17146 }
17147
17148 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17149 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17150 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17151
17152 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17153 mask = (~padding_bits_to_clear[i]) >> 16;
17154 rtx16 = gen_int_mode (16, SImode);
17155 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17156 if (mask)
17157 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17158
17159 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17160 }
17161 if (saved_clearing)
17162 emit_move_insn (clearing_reg, saved_clearing_reg);
17163
17164
17165 /* Clear full registers. */
17166
17167 /* If not marked for clearing, clearing_reg already does not contain
17168 any secret. */
17169 if (clearing_regno <= maxregno
17170 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17171 {
17172 emit_move_insn (clearing_reg, const0_rtx);
17173 emit_use (clearing_reg);
17174 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17175 }
17176
17177 for (regno = minregno; regno <= maxregno; regno++)
17178 {
17179 if (!bitmap_bit_p (to_clear_bitmap, regno))
17180 continue;
17181
17182 if (IS_VFP_REGNUM (regno))
17183 {
17184 /* If regno is an even vfp register and its successor is also to
17185 be cleared, use vmov. */
17186 if (TARGET_VFP_DOUBLE
17187 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17188 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17189 {
17190 emit_move_insn (gen_rtx_REG (DFmode, regno),
17191 CONST1_RTX (DFmode));
17192 emit_use (gen_rtx_REG (DFmode, regno));
17193 regno++;
17194 }
17195 else
17196 {
17197 emit_move_insn (gen_rtx_REG (SFmode, regno),
17198 CONST1_RTX (SFmode));
17199 emit_use (gen_rtx_REG (SFmode, regno));
17200 }
17201 }
17202 else
17203 {
17204 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17205 emit_use (gen_rtx_REG (SImode, regno));
17206 }
17207 }
17208 }
17209
17210 /* Clears caller saved registers not used to pass arguments before a
17211 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17212 registers is done in __gnu_cmse_nonsecure_call libcall.
17213 See libgcc/config/arm/cmse_nonsecure_call.S. */
17214
17215 static void
17216 cmse_nonsecure_call_clear_caller_saved (void)
17217 {
17218 basic_block bb;
17219
17220 FOR_EACH_BB_FN (bb, cfun)
17221 {
17222 rtx_insn *insn;
17223
17224 FOR_BB_INSNS (bb, insn)
17225 {
17226 unsigned address_regnum, regno, maxregno =
17227 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17228 auto_sbitmap to_clear_bitmap (maxregno + 1);
17229 rtx_insn *seq;
17230 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17231 rtx address;
17232 CUMULATIVE_ARGS args_so_far_v;
17233 cumulative_args_t args_so_far;
17234 tree arg_type, fntype;
17235 bool first_param = true;
17236 function_args_iterator args_iter;
17237 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17238
17239 if (!NONDEBUG_INSN_P (insn))
17240 continue;
17241
17242 if (!CALL_P (insn))
17243 continue;
17244
17245 pat = PATTERN (insn);
17246 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17247 call = XVECEXP (pat, 0, 0);
17248
17249 /* Get the real call RTX if the insn sets a value, ie. returns. */
17250 if (GET_CODE (call) == SET)
17251 call = SET_SRC (call);
17252
17253 /* Check if it is a cmse_nonsecure_call. */
17254 unspec = XEXP (call, 0);
17255 if (GET_CODE (unspec) != UNSPEC
17256 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17257 continue;
17258
17259 /* Determine the caller-saved registers we need to clear. */
17260 bitmap_clear (to_clear_bitmap);
17261 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17262
17263 /* Only look at the caller-saved floating point registers in case of
17264 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17265 lazy store and loads which clear both caller- and callee-saved
17266 registers. */
17267 if (TARGET_HARD_FLOAT_ABI)
17268 {
17269 auto_sbitmap float_bitmap (maxregno + 1);
17270
17271 bitmap_clear (float_bitmap);
17272 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17273 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17274 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17275 }
17276
17277 /* Make sure the register used to hold the function address is not
17278 cleared. */
17279 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17280 gcc_assert (MEM_P (address));
17281 gcc_assert (REG_P (XEXP (address, 0)));
17282 address_regnum = REGNO (XEXP (address, 0));
17283 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17284 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17285
17286 /* Set basic block of call insn so that df rescan is performed on
17287 insns inserted here. */
17288 set_block_for_insn (insn, bb);
17289 df_set_flags (DF_DEFER_INSN_RESCAN);
17290 start_sequence ();
17291
17292 /* Make sure the scheduler doesn't schedule other insns beyond
17293 here. */
17294 emit_insn (gen_blockage ());
17295
17296 /* Walk through all arguments and clear registers appropriately.
17297 */
17298 fntype = TREE_TYPE (MEM_EXPR (address));
17299 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17300 NULL_TREE);
17301 args_so_far = pack_cumulative_args (&args_so_far_v);
17302 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17303 {
17304 rtx arg_rtx;
17305 uint64_t to_clear_args_mask;
17306 machine_mode arg_mode = TYPE_MODE (arg_type);
17307
17308 if (VOID_TYPE_P (arg_type))
17309 continue;
17310
17311 if (!first_param)
17312 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17313 true);
17314
17315 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17316 true);
17317 gcc_assert (REG_P (arg_rtx));
17318 to_clear_args_mask
17319 = compute_not_to_clear_mask (arg_type, arg_rtx,
17320 REGNO (arg_rtx),
17321 &padding_bits_to_clear[0]);
17322 if (to_clear_args_mask)
17323 {
17324 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17325 {
17326 if (to_clear_args_mask & (1ULL << regno))
17327 bitmap_clear_bit (to_clear_bitmap, regno);
17328 }
17329 }
17330
17331 first_param = false;
17332 }
17333
17334 /* We use right shift and left shift to clear the LSB of the address
17335 we jump to instead of using bic, to avoid having to use an extra
17336 register on Thumb-1. */
17337 clearing_reg = XEXP (address, 0);
17338 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17339 emit_insn (gen_rtx_SET (clearing_reg, shift));
17340 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17341 emit_insn (gen_rtx_SET (clearing_reg, shift));
17342
17343 /* Clear caller-saved registers that leak before doing a non-secure
17344 call. */
17345 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17346 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17347 NUM_ARG_REGS, ip_reg, clearing_reg);
17348
17349 seq = get_insns ();
17350 end_sequence ();
17351 emit_insn_before (seq, insn);
17352 }
17353 }
17354 }
17355
17356 /* Rewrite move insn into subtract of 0 if the condition codes will
17357 be useful in next conditional jump insn. */
17358
17359 static void
17360 thumb1_reorg (void)
17361 {
17362 basic_block bb;
17363
17364 FOR_EACH_BB_FN (bb, cfun)
17365 {
17366 rtx dest, src;
17367 rtx cmp, op0, op1, set = NULL;
17368 rtx_insn *prev, *insn = BB_END (bb);
17369 bool insn_clobbered = false;
17370
17371 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17372 insn = PREV_INSN (insn);
17373
17374 /* Find the last cbranchsi4_insn in basic block BB. */
17375 if (insn == BB_HEAD (bb)
17376 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17377 continue;
17378
17379 /* Get the register with which we are comparing. */
17380 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17381 op0 = XEXP (cmp, 0);
17382 op1 = XEXP (cmp, 1);
17383
17384 /* Check that comparison is against ZERO. */
17385 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17386 continue;
17387
17388 /* Find the first flag setting insn before INSN in basic block BB. */
17389 gcc_assert (insn != BB_HEAD (bb));
17390 for (prev = PREV_INSN (insn);
17391 (!insn_clobbered
17392 && prev != BB_HEAD (bb)
17393 && (NOTE_P (prev)
17394 || DEBUG_INSN_P (prev)
17395 || ((set = single_set (prev)) != NULL
17396 && get_attr_conds (prev) == CONDS_NOCOND)));
17397 prev = PREV_INSN (prev))
17398 {
17399 if (reg_set_p (op0, prev))
17400 insn_clobbered = true;
17401 }
17402
17403 /* Skip if op0 is clobbered by insn other than prev. */
17404 if (insn_clobbered)
17405 continue;
17406
17407 if (!set)
17408 continue;
17409
17410 dest = SET_DEST (set);
17411 src = SET_SRC (set);
17412 if (!low_register_operand (dest, SImode)
17413 || !low_register_operand (src, SImode))
17414 continue;
17415
17416 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17417 in INSN. Both src and dest of the move insn are checked. */
17418 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17419 {
17420 dest = copy_rtx (dest);
17421 src = copy_rtx (src);
17422 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17423 PATTERN (prev) = gen_rtx_SET (dest, src);
17424 INSN_CODE (prev) = -1;
17425 /* Set test register in INSN to dest. */
17426 XEXP (cmp, 0) = copy_rtx (dest);
17427 INSN_CODE (insn) = -1;
17428 }
17429 }
17430 }
17431
17432 /* Convert instructions to their cc-clobbering variant if possible, since
17433 that allows us to use smaller encodings. */
17434
17435 static void
17436 thumb2_reorg (void)
17437 {
17438 basic_block bb;
17439 regset_head live;
17440
17441 INIT_REG_SET (&live);
17442
17443 /* We are freeing block_for_insn in the toplev to keep compatibility
17444 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17445 compute_bb_for_insn ();
17446 df_analyze ();
17447
17448 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17449
17450 FOR_EACH_BB_FN (bb, cfun)
17451 {
17452 if ((current_tune->disparage_flag_setting_t16_encodings
17453 == tune_params::DISPARAGE_FLAGS_ALL)
17454 && optimize_bb_for_speed_p (bb))
17455 continue;
17456
17457 rtx_insn *insn;
17458 Convert_Action action = SKIP;
17459 Convert_Action action_for_partial_flag_setting
17460 = ((current_tune->disparage_flag_setting_t16_encodings
17461 != tune_params::DISPARAGE_FLAGS_NEITHER)
17462 && optimize_bb_for_speed_p (bb))
17463 ? SKIP : CONV;
17464
17465 COPY_REG_SET (&live, DF_LR_OUT (bb));
17466 df_simulate_initialize_backwards (bb, &live);
17467 FOR_BB_INSNS_REVERSE (bb, insn)
17468 {
17469 if (NONJUMP_INSN_P (insn)
17470 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17471 && GET_CODE (PATTERN (insn)) == SET)
17472 {
17473 action = SKIP;
17474 rtx pat = PATTERN (insn);
17475 rtx dst = XEXP (pat, 0);
17476 rtx src = XEXP (pat, 1);
17477 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17478
17479 if (UNARY_P (src) || BINARY_P (src))
17480 op0 = XEXP (src, 0);
17481
17482 if (BINARY_P (src))
17483 op1 = XEXP (src, 1);
17484
17485 if (low_register_operand (dst, SImode))
17486 {
17487 switch (GET_CODE (src))
17488 {
17489 case PLUS:
17490 /* Adding two registers and storing the result
17491 in the first source is already a 16-bit
17492 operation. */
17493 if (rtx_equal_p (dst, op0)
17494 && register_operand (op1, SImode))
17495 break;
17496
17497 if (low_register_operand (op0, SImode))
17498 {
17499 /* ADDS <Rd>,<Rn>,<Rm> */
17500 if (low_register_operand (op1, SImode))
17501 action = CONV;
17502 /* ADDS <Rdn>,#<imm8> */
17503 /* SUBS <Rdn>,#<imm8> */
17504 else if (rtx_equal_p (dst, op0)
17505 && CONST_INT_P (op1)
17506 && IN_RANGE (INTVAL (op1), -255, 255))
17507 action = CONV;
17508 /* ADDS <Rd>,<Rn>,#<imm3> */
17509 /* SUBS <Rd>,<Rn>,#<imm3> */
17510 else if (CONST_INT_P (op1)
17511 && IN_RANGE (INTVAL (op1), -7, 7))
17512 action = CONV;
17513 }
17514 /* ADCS <Rd>, <Rn> */
17515 else if (GET_CODE (XEXP (src, 0)) == PLUS
17516 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17517 && low_register_operand (XEXP (XEXP (src, 0), 1),
17518 SImode)
17519 && COMPARISON_P (op1)
17520 && cc_register (XEXP (op1, 0), VOIDmode)
17521 && maybe_get_arm_condition_code (op1) == ARM_CS
17522 && XEXP (op1, 1) == const0_rtx)
17523 action = CONV;
17524 break;
17525
17526 case MINUS:
17527 /* RSBS <Rd>,<Rn>,#0
17528 Not handled here: see NEG below. */
17529 /* SUBS <Rd>,<Rn>,#<imm3>
17530 SUBS <Rdn>,#<imm8>
17531 Not handled here: see PLUS above. */
17532 /* SUBS <Rd>,<Rn>,<Rm> */
17533 if (low_register_operand (op0, SImode)
17534 && low_register_operand (op1, SImode))
17535 action = CONV;
17536 break;
17537
17538 case MULT:
17539 /* MULS <Rdm>,<Rn>,<Rdm>
17540 As an exception to the rule, this is only used
17541 when optimizing for size since MULS is slow on all
17542 known implementations. We do not even want to use
17543 MULS in cold code, if optimizing for speed, so we
17544 test the global flag here. */
17545 if (!optimize_size)
17546 break;
17547 /* Fall through. */
17548 case AND:
17549 case IOR:
17550 case XOR:
17551 /* ANDS <Rdn>,<Rm> */
17552 if (rtx_equal_p (dst, op0)
17553 && low_register_operand (op1, SImode))
17554 action = action_for_partial_flag_setting;
17555 else if (rtx_equal_p (dst, op1)
17556 && low_register_operand (op0, SImode))
17557 action = action_for_partial_flag_setting == SKIP
17558 ? SKIP : SWAP_CONV;
17559 break;
17560
17561 case ASHIFTRT:
17562 case ASHIFT:
17563 case LSHIFTRT:
17564 /* ASRS <Rdn>,<Rm> */
17565 /* LSRS <Rdn>,<Rm> */
17566 /* LSLS <Rdn>,<Rm> */
17567 if (rtx_equal_p (dst, op0)
17568 && low_register_operand (op1, SImode))
17569 action = action_for_partial_flag_setting;
17570 /* ASRS <Rd>,<Rm>,#<imm5> */
17571 /* LSRS <Rd>,<Rm>,#<imm5> */
17572 /* LSLS <Rd>,<Rm>,#<imm5> */
17573 else if (low_register_operand (op0, SImode)
17574 && CONST_INT_P (op1)
17575 && IN_RANGE (INTVAL (op1), 0, 31))
17576 action = action_for_partial_flag_setting;
17577 break;
17578
17579 case ROTATERT:
17580 /* RORS <Rdn>,<Rm> */
17581 if (rtx_equal_p (dst, op0)
17582 && low_register_operand (op1, SImode))
17583 action = action_for_partial_flag_setting;
17584 break;
17585
17586 case NOT:
17587 /* MVNS <Rd>,<Rm> */
17588 if (low_register_operand (op0, SImode))
17589 action = action_for_partial_flag_setting;
17590 break;
17591
17592 case NEG:
17593 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17594 if (low_register_operand (op0, SImode))
17595 action = CONV;
17596 break;
17597
17598 case CONST_INT:
17599 /* MOVS <Rd>,#<imm8> */
17600 if (CONST_INT_P (src)
17601 && IN_RANGE (INTVAL (src), 0, 255))
17602 action = action_for_partial_flag_setting;
17603 break;
17604
17605 case REG:
17606 /* MOVS and MOV<c> with registers have different
17607 encodings, so are not relevant here. */
17608 break;
17609
17610 default:
17611 break;
17612 }
17613 }
17614
17615 if (action != SKIP)
17616 {
17617 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17618 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17619 rtvec vec;
17620
17621 if (action == SWAP_CONV)
17622 {
17623 src = copy_rtx (src);
17624 XEXP (src, 0) = op1;
17625 XEXP (src, 1) = op0;
17626 pat = gen_rtx_SET (dst, src);
17627 vec = gen_rtvec (2, pat, clobber);
17628 }
17629 else /* action == CONV */
17630 vec = gen_rtvec (2, pat, clobber);
17631
17632 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17633 INSN_CODE (insn) = -1;
17634 }
17635 }
17636
17637 if (NONDEBUG_INSN_P (insn))
17638 df_simulate_one_insn_backwards (bb, insn, &live);
17639 }
17640 }
17641
17642 CLEAR_REG_SET (&live);
17643 }
17644
17645 /* Gcc puts the pool in the wrong place for ARM, since we can only
17646 load addresses a limited distance around the pc. We do some
17647 special munging to move the constant pool values to the correct
17648 point in the code. */
17649 static void
17650 arm_reorg (void)
17651 {
17652 rtx_insn *insn;
17653 HOST_WIDE_INT address = 0;
17654 Mfix * fix;
17655
17656 if (use_cmse)
17657 cmse_nonsecure_call_clear_caller_saved ();
17658
17659 /* We cannot run the Thumb passes for thunks because there is no CFG. */
17660 if (cfun->is_thunk)
17661 ;
17662 else if (TARGET_THUMB1)
17663 thumb1_reorg ();
17664 else if (TARGET_THUMB2)
17665 thumb2_reorg ();
17666
17667 /* Ensure all insns that must be split have been split at this point.
17668 Otherwise, the pool placement code below may compute incorrect
17669 insn lengths. Note that when optimizing, all insns have already
17670 been split at this point. */
17671 if (!optimize)
17672 split_all_insns_noflow ();
17673
17674 /* Make sure we do not attempt to create a literal pool even though it should
17675 no longer be necessary to create any. */
17676 if (arm_disable_literal_pool)
17677 return ;
17678
17679 minipool_fix_head = minipool_fix_tail = NULL;
17680
17681 /* The first insn must always be a note, or the code below won't
17682 scan it properly. */
17683 insn = get_insns ();
17684 gcc_assert (NOTE_P (insn));
17685 minipool_pad = 0;
17686
17687 /* Scan all the insns and record the operands that will need fixing. */
17688 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17689 {
17690 if (BARRIER_P (insn))
17691 push_minipool_barrier (insn, address);
17692 else if (INSN_P (insn))
17693 {
17694 rtx_jump_table_data *table;
17695
17696 note_invalid_constants (insn, address, true);
17697 address += get_attr_length (insn);
17698
17699 /* If the insn is a vector jump, add the size of the table
17700 and skip the table. */
17701 if (tablejump_p (insn, NULL, &table))
17702 {
17703 address += get_jump_table_size (table);
17704 insn = table;
17705 }
17706 }
17707 else if (LABEL_P (insn))
17708 /* Add the worst-case padding due to alignment. We don't add
17709 the _current_ padding because the minipool insertions
17710 themselves might change it. */
17711 address += get_label_padding (insn);
17712 }
17713
17714 fix = minipool_fix_head;
17715
17716 /* Now scan the fixups and perform the required changes. */
17717 while (fix)
17718 {
17719 Mfix * ftmp;
17720 Mfix * fdel;
17721 Mfix * last_added_fix;
17722 Mfix * last_barrier = NULL;
17723 Mfix * this_fix;
17724
17725 /* Skip any further barriers before the next fix. */
17726 while (fix && BARRIER_P (fix->insn))
17727 fix = fix->next;
17728
17729 /* No more fixes. */
17730 if (fix == NULL)
17731 break;
17732
17733 last_added_fix = NULL;
17734
17735 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17736 {
17737 if (BARRIER_P (ftmp->insn))
17738 {
17739 if (ftmp->address >= minipool_vector_head->max_address)
17740 break;
17741
17742 last_barrier = ftmp;
17743 }
17744 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17745 break;
17746
17747 last_added_fix = ftmp; /* Keep track of the last fix added. */
17748 }
17749
17750 /* If we found a barrier, drop back to that; any fixes that we
17751 could have reached but come after the barrier will now go in
17752 the next mini-pool. */
17753 if (last_barrier != NULL)
17754 {
17755 /* Reduce the refcount for those fixes that won't go into this
17756 pool after all. */
17757 for (fdel = last_barrier->next;
17758 fdel && fdel != ftmp;
17759 fdel = fdel->next)
17760 {
17761 fdel->minipool->refcount--;
17762 fdel->minipool = NULL;
17763 }
17764
17765 ftmp = last_barrier;
17766 }
17767 else
17768 {
17769 /* ftmp is first fix that we can't fit into this pool and
17770 there no natural barriers that we could use. Insert a
17771 new barrier in the code somewhere between the previous
17772 fix and this one, and arrange to jump around it. */
17773 HOST_WIDE_INT max_address;
17774
17775 /* The last item on the list of fixes must be a barrier, so
17776 we can never run off the end of the list of fixes without
17777 last_barrier being set. */
17778 gcc_assert (ftmp);
17779
17780 max_address = minipool_vector_head->max_address;
17781 /* Check that there isn't another fix that is in range that
17782 we couldn't fit into this pool because the pool was
17783 already too large: we need to put the pool before such an
17784 instruction. The pool itself may come just after the
17785 fix because create_fix_barrier also allows space for a
17786 jump instruction. */
17787 if (ftmp->address < max_address)
17788 max_address = ftmp->address + 1;
17789
17790 last_barrier = create_fix_barrier (last_added_fix, max_address);
17791 }
17792
17793 assign_minipool_offsets (last_barrier);
17794
17795 while (ftmp)
17796 {
17797 if (!BARRIER_P (ftmp->insn)
17798 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17799 == NULL))
17800 break;
17801
17802 ftmp = ftmp->next;
17803 }
17804
17805 /* Scan over the fixes we have identified for this pool, fixing them
17806 up and adding the constants to the pool itself. */
17807 for (this_fix = fix; this_fix && ftmp != this_fix;
17808 this_fix = this_fix->next)
17809 if (!BARRIER_P (this_fix->insn))
17810 {
17811 rtx addr
17812 = plus_constant (Pmode,
17813 gen_rtx_LABEL_REF (VOIDmode,
17814 minipool_vector_label),
17815 this_fix->minipool->offset);
17816 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17817 }
17818
17819 dump_minipool (last_barrier->insn);
17820 fix = ftmp;
17821 }
17822
17823 /* From now on we must synthesize any constants that we can't handle
17824 directly. This can happen if the RTL gets split during final
17825 instruction generation. */
17826 cfun->machine->after_arm_reorg = 1;
17827
17828 /* Free the minipool memory. */
17829 obstack_free (&minipool_obstack, minipool_startobj);
17830 }
17831 \f
17832 /* Routines to output assembly language. */
17833
17834 /* Return string representation of passed in real value. */
17835 static const char *
17836 fp_const_from_val (REAL_VALUE_TYPE *r)
17837 {
17838 if (!fp_consts_inited)
17839 init_fp_table ();
17840
17841 gcc_assert (real_equal (r, &value_fp0));
17842 return "0";
17843 }
17844
17845 /* OPERANDS[0] is the entire list of insns that constitute pop,
17846 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17847 is in the list, UPDATE is true iff the list contains explicit
17848 update of base register. */
17849 void
17850 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17851 bool update)
17852 {
17853 int i;
17854 char pattern[100];
17855 int offset;
17856 const char *conditional;
17857 int num_saves = XVECLEN (operands[0], 0);
17858 unsigned int regno;
17859 unsigned int regno_base = REGNO (operands[1]);
17860 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17861
17862 offset = 0;
17863 offset += update ? 1 : 0;
17864 offset += return_pc ? 1 : 0;
17865
17866 /* Is the base register in the list? */
17867 for (i = offset; i < num_saves; i++)
17868 {
17869 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17870 /* If SP is in the list, then the base register must be SP. */
17871 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17872 /* If base register is in the list, there must be no explicit update. */
17873 if (regno == regno_base)
17874 gcc_assert (!update);
17875 }
17876
17877 conditional = reverse ? "%?%D0" : "%?%d0";
17878 /* Can't use POP if returning from an interrupt. */
17879 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17880 sprintf (pattern, "pop%s\t{", conditional);
17881 else
17882 {
17883 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17884 It's just a convention, their semantics are identical. */
17885 if (regno_base == SP_REGNUM)
17886 sprintf (pattern, "ldmfd%s\t", conditional);
17887 else if (update)
17888 sprintf (pattern, "ldmia%s\t", conditional);
17889 else
17890 sprintf (pattern, "ldm%s\t", conditional);
17891
17892 strcat (pattern, reg_names[regno_base]);
17893 if (update)
17894 strcat (pattern, "!, {");
17895 else
17896 strcat (pattern, ", {");
17897 }
17898
17899 /* Output the first destination register. */
17900 strcat (pattern,
17901 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17902
17903 /* Output the rest of the destination registers. */
17904 for (i = offset + 1; i < num_saves; i++)
17905 {
17906 strcat (pattern, ", ");
17907 strcat (pattern,
17908 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17909 }
17910
17911 strcat (pattern, "}");
17912
17913 if (interrupt_p && return_pc)
17914 strcat (pattern, "^");
17915
17916 output_asm_insn (pattern, &cond);
17917 }
17918
17919
17920 /* Output the assembly for a store multiple. */
17921
17922 const char *
17923 vfp_output_vstmd (rtx * operands)
17924 {
17925 char pattern[100];
17926 int p;
17927 int base;
17928 int i;
17929 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17930 ? XEXP (operands[0], 0)
17931 : XEXP (XEXP (operands[0], 0), 0);
17932 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17933
17934 if (push_p)
17935 strcpy (pattern, "vpush%?.64\t{%P1");
17936 else
17937 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17938
17939 p = strlen (pattern);
17940
17941 gcc_assert (REG_P (operands[1]));
17942
17943 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17944 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17945 {
17946 p += sprintf (&pattern[p], ", d%d", base + i);
17947 }
17948 strcpy (&pattern[p], "}");
17949
17950 output_asm_insn (pattern, operands);
17951 return "";
17952 }
17953
17954
17955 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17956 number of bytes pushed. */
17957
17958 static int
17959 vfp_emit_fstmd (int base_reg, int count)
17960 {
17961 rtx par;
17962 rtx dwarf;
17963 rtx tmp, reg;
17964 int i;
17965
17966 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
17967 register pairs are stored by a store multiple insn. We avoid this
17968 by pushing an extra pair. */
17969 if (count == 2 && !arm_arch6)
17970 {
17971 if (base_reg == LAST_VFP_REGNUM - 3)
17972 base_reg -= 2;
17973 count++;
17974 }
17975
17976 /* FSTMD may not store more than 16 doubleword registers at once. Split
17977 larger stores into multiple parts (up to a maximum of two, in
17978 practice). */
17979 if (count > 16)
17980 {
17981 int saved;
17982 /* NOTE: base_reg is an internal register number, so each D register
17983 counts as 2. */
17984 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
17985 saved += vfp_emit_fstmd (base_reg, 16);
17986 return saved;
17987 }
17988
17989 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
17990 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
17991
17992 reg = gen_rtx_REG (DFmode, base_reg);
17993 base_reg += 2;
17994
17995 XVECEXP (par, 0, 0)
17996 = gen_rtx_SET (gen_frame_mem
17997 (BLKmode,
17998 gen_rtx_PRE_MODIFY (Pmode,
17999 stack_pointer_rtx,
18000 plus_constant
18001 (Pmode, stack_pointer_rtx,
18002 - (count * 8)))
18003 ),
18004 gen_rtx_UNSPEC (BLKmode,
18005 gen_rtvec (1, reg),
18006 UNSPEC_PUSH_MULT));
18007
18008 tmp = gen_rtx_SET (stack_pointer_rtx,
18009 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18010 RTX_FRAME_RELATED_P (tmp) = 1;
18011 XVECEXP (dwarf, 0, 0) = tmp;
18012
18013 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18014 RTX_FRAME_RELATED_P (tmp) = 1;
18015 XVECEXP (dwarf, 0, 1) = tmp;
18016
18017 for (i = 1; i < count; i++)
18018 {
18019 reg = gen_rtx_REG (DFmode, base_reg);
18020 base_reg += 2;
18021 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18022
18023 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18024 plus_constant (Pmode,
18025 stack_pointer_rtx,
18026 i * 8)),
18027 reg);
18028 RTX_FRAME_RELATED_P (tmp) = 1;
18029 XVECEXP (dwarf, 0, i + 1) = tmp;
18030 }
18031
18032 par = emit_insn (par);
18033 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18034 RTX_FRAME_RELATED_P (par) = 1;
18035
18036 return count * 8;
18037 }
18038
18039 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18040 has the cmse_nonsecure_call attribute and returns false otherwise. */
18041
18042 bool
18043 detect_cmse_nonsecure_call (tree addr)
18044 {
18045 if (!addr)
18046 return FALSE;
18047
18048 tree fntype = TREE_TYPE (addr);
18049 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18050 TYPE_ATTRIBUTES (fntype)))
18051 return TRUE;
18052 return FALSE;
18053 }
18054
18055
18056 /* Emit a call instruction with pattern PAT. ADDR is the address of
18057 the call target. */
18058
18059 void
18060 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18061 {
18062 rtx insn;
18063
18064 insn = emit_call_insn (pat);
18065
18066 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18067 If the call might use such an entry, add a use of the PIC register
18068 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18069 if (TARGET_VXWORKS_RTP
18070 && flag_pic
18071 && !sibcall
18072 && GET_CODE (addr) == SYMBOL_REF
18073 && (SYMBOL_REF_DECL (addr)
18074 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18075 : !SYMBOL_REF_LOCAL_P (addr)))
18076 {
18077 require_pic_register ();
18078 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18079 }
18080
18081 if (TARGET_AAPCS_BASED)
18082 {
18083 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18084 linker. We need to add an IP clobber to allow setting
18085 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18086 is not needed since it's a fixed register. */
18087 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18088 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18089 }
18090 }
18091
18092 /* Output a 'call' insn. */
18093 const char *
18094 output_call (rtx *operands)
18095 {
18096 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18097
18098 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18099 if (REGNO (operands[0]) == LR_REGNUM)
18100 {
18101 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18102 output_asm_insn ("mov%?\t%0, %|lr", operands);
18103 }
18104
18105 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18106
18107 if (TARGET_INTERWORK || arm_arch4t)
18108 output_asm_insn ("bx%?\t%0", operands);
18109 else
18110 output_asm_insn ("mov%?\t%|pc, %0", operands);
18111
18112 return "";
18113 }
18114
18115 /* Output a move from arm registers to arm registers of a long double
18116 OPERANDS[0] is the destination.
18117 OPERANDS[1] is the source. */
18118 const char *
18119 output_mov_long_double_arm_from_arm (rtx *operands)
18120 {
18121 /* We have to be careful here because the two might overlap. */
18122 int dest_start = REGNO (operands[0]);
18123 int src_start = REGNO (operands[1]);
18124 rtx ops[2];
18125 int i;
18126
18127 if (dest_start < src_start)
18128 {
18129 for (i = 0; i < 3; i++)
18130 {
18131 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18132 ops[1] = gen_rtx_REG (SImode, src_start + i);
18133 output_asm_insn ("mov%?\t%0, %1", ops);
18134 }
18135 }
18136 else
18137 {
18138 for (i = 2; i >= 0; i--)
18139 {
18140 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18141 ops[1] = gen_rtx_REG (SImode, src_start + i);
18142 output_asm_insn ("mov%?\t%0, %1", ops);
18143 }
18144 }
18145
18146 return "";
18147 }
18148
18149 void
18150 arm_emit_movpair (rtx dest, rtx src)
18151 {
18152 /* If the src is an immediate, simplify it. */
18153 if (CONST_INT_P (src))
18154 {
18155 HOST_WIDE_INT val = INTVAL (src);
18156 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18157 if ((val >> 16) & 0x0000ffff)
18158 {
18159 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18160 GEN_INT (16)),
18161 GEN_INT ((val >> 16) & 0x0000ffff));
18162 rtx_insn *insn = get_last_insn ();
18163 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18164 }
18165 return;
18166 }
18167 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18168 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18169 rtx_insn *insn = get_last_insn ();
18170 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18171 }
18172
18173 /* Output a move between double words. It must be REG<-MEM
18174 or MEM<-REG. */
18175 const char *
18176 output_move_double (rtx *operands, bool emit, int *count)
18177 {
18178 enum rtx_code code0 = GET_CODE (operands[0]);
18179 enum rtx_code code1 = GET_CODE (operands[1]);
18180 rtx otherops[3];
18181 if (count)
18182 *count = 1;
18183
18184 /* The only case when this might happen is when
18185 you are looking at the length of a DImode instruction
18186 that has an invalid constant in it. */
18187 if (code0 == REG && code1 != MEM)
18188 {
18189 gcc_assert (!emit);
18190 *count = 2;
18191 return "";
18192 }
18193
18194 if (code0 == REG)
18195 {
18196 unsigned int reg0 = REGNO (operands[0]);
18197
18198 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18199
18200 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18201
18202 switch (GET_CODE (XEXP (operands[1], 0)))
18203 {
18204 case REG:
18205
18206 if (emit)
18207 {
18208 if (TARGET_LDRD
18209 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18210 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18211 else
18212 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18213 }
18214 break;
18215
18216 case PRE_INC:
18217 gcc_assert (TARGET_LDRD);
18218 if (emit)
18219 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18220 break;
18221
18222 case PRE_DEC:
18223 if (emit)
18224 {
18225 if (TARGET_LDRD)
18226 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18227 else
18228 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18229 }
18230 break;
18231
18232 case POST_INC:
18233 if (emit)
18234 {
18235 if (TARGET_LDRD)
18236 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18237 else
18238 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18239 }
18240 break;
18241
18242 case POST_DEC:
18243 gcc_assert (TARGET_LDRD);
18244 if (emit)
18245 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18246 break;
18247
18248 case PRE_MODIFY:
18249 case POST_MODIFY:
18250 /* Autoicrement addressing modes should never have overlapping
18251 base and destination registers, and overlapping index registers
18252 are already prohibited, so this doesn't need to worry about
18253 fix_cm3_ldrd. */
18254 otherops[0] = operands[0];
18255 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18256 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18257
18258 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18259 {
18260 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18261 {
18262 /* Registers overlap so split out the increment. */
18263 if (emit)
18264 {
18265 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18266 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18267 }
18268 if (count)
18269 *count = 2;
18270 }
18271 else
18272 {
18273 /* Use a single insn if we can.
18274 FIXME: IWMMXT allows offsets larger than ldrd can
18275 handle, fix these up with a pair of ldr. */
18276 if (TARGET_THUMB2
18277 || !CONST_INT_P (otherops[2])
18278 || (INTVAL (otherops[2]) > -256
18279 && INTVAL (otherops[2]) < 256))
18280 {
18281 if (emit)
18282 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18283 }
18284 else
18285 {
18286 if (emit)
18287 {
18288 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18289 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18290 }
18291 if (count)
18292 *count = 2;
18293
18294 }
18295 }
18296 }
18297 else
18298 {
18299 /* Use a single insn if we can.
18300 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18301 fix these up with a pair of ldr. */
18302 if (TARGET_THUMB2
18303 || !CONST_INT_P (otherops[2])
18304 || (INTVAL (otherops[2]) > -256
18305 && INTVAL (otherops[2]) < 256))
18306 {
18307 if (emit)
18308 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18309 }
18310 else
18311 {
18312 if (emit)
18313 {
18314 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18315 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18316 }
18317 if (count)
18318 *count = 2;
18319 }
18320 }
18321 break;
18322
18323 case LABEL_REF:
18324 case CONST:
18325 /* We might be able to use ldrd %0, %1 here. However the range is
18326 different to ldr/adr, and it is broken on some ARMv7-M
18327 implementations. */
18328 /* Use the second register of the pair to avoid problematic
18329 overlap. */
18330 otherops[1] = operands[1];
18331 if (emit)
18332 output_asm_insn ("adr%?\t%0, %1", otherops);
18333 operands[1] = otherops[0];
18334 if (emit)
18335 {
18336 if (TARGET_LDRD)
18337 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18338 else
18339 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18340 }
18341
18342 if (count)
18343 *count = 2;
18344 break;
18345
18346 /* ??? This needs checking for thumb2. */
18347 default:
18348 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18349 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18350 {
18351 otherops[0] = operands[0];
18352 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18353 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18354
18355 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18356 {
18357 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18358 {
18359 switch ((int) INTVAL (otherops[2]))
18360 {
18361 case -8:
18362 if (emit)
18363 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18364 return "";
18365 case -4:
18366 if (TARGET_THUMB2)
18367 break;
18368 if (emit)
18369 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18370 return "";
18371 case 4:
18372 if (TARGET_THUMB2)
18373 break;
18374 if (emit)
18375 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18376 return "";
18377 }
18378 }
18379 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18380 operands[1] = otherops[0];
18381 if (TARGET_LDRD
18382 && (REG_P (otherops[2])
18383 || TARGET_THUMB2
18384 || (CONST_INT_P (otherops[2])
18385 && INTVAL (otherops[2]) > -256
18386 && INTVAL (otherops[2]) < 256)))
18387 {
18388 if (reg_overlap_mentioned_p (operands[0],
18389 otherops[2]))
18390 {
18391 /* Swap base and index registers over to
18392 avoid a conflict. */
18393 std::swap (otherops[1], otherops[2]);
18394 }
18395 /* If both registers conflict, it will usually
18396 have been fixed by a splitter. */
18397 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18398 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18399 {
18400 if (emit)
18401 {
18402 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18403 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18404 }
18405 if (count)
18406 *count = 2;
18407 }
18408 else
18409 {
18410 otherops[0] = operands[0];
18411 if (emit)
18412 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18413 }
18414 return "";
18415 }
18416
18417 if (CONST_INT_P (otherops[2]))
18418 {
18419 if (emit)
18420 {
18421 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18422 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18423 else
18424 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18425 }
18426 }
18427 else
18428 {
18429 if (emit)
18430 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18431 }
18432 }
18433 else
18434 {
18435 if (emit)
18436 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18437 }
18438
18439 if (count)
18440 *count = 2;
18441
18442 if (TARGET_LDRD)
18443 return "ldrd%?\t%0, [%1]";
18444
18445 return "ldmia%?\t%1, %M0";
18446 }
18447 else
18448 {
18449 otherops[1] = adjust_address (operands[1], SImode, 4);
18450 /* Take care of overlapping base/data reg. */
18451 if (reg_mentioned_p (operands[0], operands[1]))
18452 {
18453 if (emit)
18454 {
18455 output_asm_insn ("ldr%?\t%0, %1", otherops);
18456 output_asm_insn ("ldr%?\t%0, %1", operands);
18457 }
18458 if (count)
18459 *count = 2;
18460
18461 }
18462 else
18463 {
18464 if (emit)
18465 {
18466 output_asm_insn ("ldr%?\t%0, %1", operands);
18467 output_asm_insn ("ldr%?\t%0, %1", otherops);
18468 }
18469 if (count)
18470 *count = 2;
18471 }
18472 }
18473 }
18474 }
18475 else
18476 {
18477 /* Constraints should ensure this. */
18478 gcc_assert (code0 == MEM && code1 == REG);
18479 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18480 || (TARGET_ARM && TARGET_LDRD));
18481
18482 /* For TARGET_ARM the first source register of an STRD
18483 must be even. This is usually the case for double-word
18484 values but user assembly constraints can force an odd
18485 starting register. */
18486 bool allow_strd = TARGET_LDRD
18487 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18488 switch (GET_CODE (XEXP (operands[0], 0)))
18489 {
18490 case REG:
18491 if (emit)
18492 {
18493 if (allow_strd)
18494 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18495 else
18496 output_asm_insn ("stm%?\t%m0, %M1", operands);
18497 }
18498 break;
18499
18500 case PRE_INC:
18501 gcc_assert (allow_strd);
18502 if (emit)
18503 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18504 break;
18505
18506 case PRE_DEC:
18507 if (emit)
18508 {
18509 if (allow_strd)
18510 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18511 else
18512 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18513 }
18514 break;
18515
18516 case POST_INC:
18517 if (emit)
18518 {
18519 if (allow_strd)
18520 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18521 else
18522 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18523 }
18524 break;
18525
18526 case POST_DEC:
18527 gcc_assert (allow_strd);
18528 if (emit)
18529 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18530 break;
18531
18532 case PRE_MODIFY:
18533 case POST_MODIFY:
18534 otherops[0] = operands[1];
18535 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18536 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18537
18538 /* IWMMXT allows offsets larger than strd can handle,
18539 fix these up with a pair of str. */
18540 if (!TARGET_THUMB2
18541 && CONST_INT_P (otherops[2])
18542 && (INTVAL(otherops[2]) <= -256
18543 || INTVAL(otherops[2]) >= 256))
18544 {
18545 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18546 {
18547 if (emit)
18548 {
18549 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18550 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18551 }
18552 if (count)
18553 *count = 2;
18554 }
18555 else
18556 {
18557 if (emit)
18558 {
18559 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18560 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18561 }
18562 if (count)
18563 *count = 2;
18564 }
18565 }
18566 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18567 {
18568 if (emit)
18569 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18570 }
18571 else
18572 {
18573 if (emit)
18574 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18575 }
18576 break;
18577
18578 case PLUS:
18579 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18580 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18581 {
18582 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18583 {
18584 case -8:
18585 if (emit)
18586 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18587 return "";
18588
18589 case -4:
18590 if (TARGET_THUMB2)
18591 break;
18592 if (emit)
18593 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18594 return "";
18595
18596 case 4:
18597 if (TARGET_THUMB2)
18598 break;
18599 if (emit)
18600 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18601 return "";
18602 }
18603 }
18604 if (allow_strd
18605 && (REG_P (otherops[2])
18606 || TARGET_THUMB2
18607 || (CONST_INT_P (otherops[2])
18608 && INTVAL (otherops[2]) > -256
18609 && INTVAL (otherops[2]) < 256)))
18610 {
18611 otherops[0] = operands[1];
18612 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18613 if (emit)
18614 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18615 return "";
18616 }
18617 /* Fall through */
18618
18619 default:
18620 otherops[0] = adjust_address (operands[0], SImode, 4);
18621 otherops[1] = operands[1];
18622 if (emit)
18623 {
18624 output_asm_insn ("str%?\t%1, %0", operands);
18625 output_asm_insn ("str%?\t%H1, %0", otherops);
18626 }
18627 if (count)
18628 *count = 2;
18629 }
18630 }
18631
18632 return "";
18633 }
18634
18635 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18636 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18637
18638 const char *
18639 output_move_quad (rtx *operands)
18640 {
18641 if (REG_P (operands[0]))
18642 {
18643 /* Load, or reg->reg move. */
18644
18645 if (MEM_P (operands[1]))
18646 {
18647 switch (GET_CODE (XEXP (operands[1], 0)))
18648 {
18649 case REG:
18650 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18651 break;
18652
18653 case LABEL_REF:
18654 case CONST:
18655 output_asm_insn ("adr%?\t%0, %1", operands);
18656 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18657 break;
18658
18659 default:
18660 gcc_unreachable ();
18661 }
18662 }
18663 else
18664 {
18665 rtx ops[2];
18666 int dest, src, i;
18667
18668 gcc_assert (REG_P (operands[1]));
18669
18670 dest = REGNO (operands[0]);
18671 src = REGNO (operands[1]);
18672
18673 /* This seems pretty dumb, but hopefully GCC won't try to do it
18674 very often. */
18675 if (dest < src)
18676 for (i = 0; i < 4; i++)
18677 {
18678 ops[0] = gen_rtx_REG (SImode, dest + i);
18679 ops[1] = gen_rtx_REG (SImode, src + i);
18680 output_asm_insn ("mov%?\t%0, %1", ops);
18681 }
18682 else
18683 for (i = 3; i >= 0; i--)
18684 {
18685 ops[0] = gen_rtx_REG (SImode, dest + i);
18686 ops[1] = gen_rtx_REG (SImode, src + i);
18687 output_asm_insn ("mov%?\t%0, %1", ops);
18688 }
18689 }
18690 }
18691 else
18692 {
18693 gcc_assert (MEM_P (operands[0]));
18694 gcc_assert (REG_P (operands[1]));
18695 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18696
18697 switch (GET_CODE (XEXP (operands[0], 0)))
18698 {
18699 case REG:
18700 output_asm_insn ("stm%?\t%m0, %M1", operands);
18701 break;
18702
18703 default:
18704 gcc_unreachable ();
18705 }
18706 }
18707
18708 return "";
18709 }
18710
18711 /* Output a VFP load or store instruction. */
18712
18713 const char *
18714 output_move_vfp (rtx *operands)
18715 {
18716 rtx reg, mem, addr, ops[2];
18717 int load = REG_P (operands[0]);
18718 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18719 int sp = (!TARGET_VFP_FP16INST
18720 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18721 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18722 const char *templ;
18723 char buff[50];
18724 machine_mode mode;
18725
18726 reg = operands[!load];
18727 mem = operands[load];
18728
18729 mode = GET_MODE (reg);
18730
18731 gcc_assert (REG_P (reg));
18732 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18733 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18734 || mode == SFmode
18735 || mode == DFmode
18736 || mode == HImode
18737 || mode == SImode
18738 || mode == DImode
18739 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18740 gcc_assert (MEM_P (mem));
18741
18742 addr = XEXP (mem, 0);
18743
18744 switch (GET_CODE (addr))
18745 {
18746 case PRE_DEC:
18747 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18748 ops[0] = XEXP (addr, 0);
18749 ops[1] = reg;
18750 break;
18751
18752 case POST_INC:
18753 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18754 ops[0] = XEXP (addr, 0);
18755 ops[1] = reg;
18756 break;
18757
18758 default:
18759 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18760 ops[0] = reg;
18761 ops[1] = mem;
18762 break;
18763 }
18764
18765 sprintf (buff, templ,
18766 load ? "ld" : "st",
18767 dp ? "64" : sp ? "32" : "16",
18768 dp ? "P" : "",
18769 integer_p ? "\t%@ int" : "");
18770 output_asm_insn (buff, ops);
18771
18772 return "";
18773 }
18774
18775 /* Output a Neon double-word or quad-word load or store, or a load
18776 or store for larger structure modes.
18777
18778 WARNING: The ordering of elements is weird in big-endian mode,
18779 because the EABI requires that vectors stored in memory appear
18780 as though they were stored by a VSTM, as required by the EABI.
18781 GCC RTL defines element ordering based on in-memory order.
18782 This can be different from the architectural ordering of elements
18783 within a NEON register. The intrinsics defined in arm_neon.h use the
18784 NEON register element ordering, not the GCC RTL element ordering.
18785
18786 For example, the in-memory ordering of a big-endian a quadword
18787 vector with 16-bit elements when stored from register pair {d0,d1}
18788 will be (lowest address first, d0[N] is NEON register element N):
18789
18790 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18791
18792 When necessary, quadword registers (dN, dN+1) are moved to ARM
18793 registers from rN in the order:
18794
18795 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18796
18797 So that STM/LDM can be used on vectors in ARM registers, and the
18798 same memory layout will result as if VSTM/VLDM were used.
18799
18800 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18801 possible, which allows use of appropriate alignment tags.
18802 Note that the choice of "64" is independent of the actual vector
18803 element size; this size simply ensures that the behavior is
18804 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18805
18806 Due to limitations of those instructions, use of VST1.64/VLD1.64
18807 is not possible if:
18808 - the address contains PRE_DEC, or
18809 - the mode refers to more than 4 double-word registers
18810
18811 In those cases, it would be possible to replace VSTM/VLDM by a
18812 sequence of instructions; this is not currently implemented since
18813 this is not certain to actually improve performance. */
18814
18815 const char *
18816 output_move_neon (rtx *operands)
18817 {
18818 rtx reg, mem, addr, ops[2];
18819 int regno, nregs, load = REG_P (operands[0]);
18820 const char *templ;
18821 char buff[50];
18822 machine_mode mode;
18823
18824 reg = operands[!load];
18825 mem = operands[load];
18826
18827 mode = GET_MODE (reg);
18828
18829 gcc_assert (REG_P (reg));
18830 regno = REGNO (reg);
18831 nregs = REG_NREGS (reg) / 2;
18832 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18833 || NEON_REGNO_OK_FOR_QUAD (regno));
18834 gcc_assert (VALID_NEON_DREG_MODE (mode)
18835 || VALID_NEON_QREG_MODE (mode)
18836 || VALID_NEON_STRUCT_MODE (mode));
18837 gcc_assert (MEM_P (mem));
18838
18839 addr = XEXP (mem, 0);
18840
18841 /* Strip off const from addresses like (const (plus (...))). */
18842 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18843 addr = XEXP (addr, 0);
18844
18845 switch (GET_CODE (addr))
18846 {
18847 case POST_INC:
18848 /* We have to use vldm / vstm for too-large modes. */
18849 if (nregs > 4)
18850 {
18851 templ = "v%smia%%?\t%%0!, %%h1";
18852 ops[0] = XEXP (addr, 0);
18853 }
18854 else
18855 {
18856 templ = "v%s1.64\t%%h1, %%A0";
18857 ops[0] = mem;
18858 }
18859 ops[1] = reg;
18860 break;
18861
18862 case PRE_DEC:
18863 /* We have to use vldm / vstm in this case, since there is no
18864 pre-decrement form of the vld1 / vst1 instructions. */
18865 templ = "v%smdb%%?\t%%0!, %%h1";
18866 ops[0] = XEXP (addr, 0);
18867 ops[1] = reg;
18868 break;
18869
18870 case POST_MODIFY:
18871 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18872 gcc_unreachable ();
18873
18874 case REG:
18875 /* We have to use vldm / vstm for too-large modes. */
18876 if (nregs > 1)
18877 {
18878 if (nregs > 4)
18879 templ = "v%smia%%?\t%%m0, %%h1";
18880 else
18881 templ = "v%s1.64\t%%h1, %%A0";
18882
18883 ops[0] = mem;
18884 ops[1] = reg;
18885 break;
18886 }
18887 /* Fall through. */
18888 case LABEL_REF:
18889 case PLUS:
18890 {
18891 int i;
18892 int overlap = -1;
18893 for (i = 0; i < nregs; i++)
18894 {
18895 /* We're only using DImode here because it's a convenient size. */
18896 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18897 ops[1] = adjust_address (mem, DImode, 8 * i);
18898 if (reg_overlap_mentioned_p (ops[0], mem))
18899 {
18900 gcc_assert (overlap == -1);
18901 overlap = i;
18902 }
18903 else
18904 {
18905 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18906 output_asm_insn (buff, ops);
18907 }
18908 }
18909 if (overlap != -1)
18910 {
18911 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18912 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18913 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18914 output_asm_insn (buff, ops);
18915 }
18916
18917 return "";
18918 }
18919
18920 default:
18921 gcc_unreachable ();
18922 }
18923
18924 sprintf (buff, templ, load ? "ld" : "st");
18925 output_asm_insn (buff, ops);
18926
18927 return "";
18928 }
18929
18930 /* Compute and return the length of neon_mov<mode>, where <mode> is
18931 one of VSTRUCT modes: EI, OI, CI or XI. */
18932 int
18933 arm_attr_length_move_neon (rtx_insn *insn)
18934 {
18935 rtx reg, mem, addr;
18936 int load;
18937 machine_mode mode;
18938
18939 extract_insn_cached (insn);
18940
18941 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18942 {
18943 mode = GET_MODE (recog_data.operand[0]);
18944 switch (mode)
18945 {
18946 case E_EImode:
18947 case E_OImode:
18948 return 8;
18949 case E_CImode:
18950 return 12;
18951 case E_XImode:
18952 return 16;
18953 default:
18954 gcc_unreachable ();
18955 }
18956 }
18957
18958 load = REG_P (recog_data.operand[0]);
18959 reg = recog_data.operand[!load];
18960 mem = recog_data.operand[load];
18961
18962 gcc_assert (MEM_P (mem));
18963
18964 addr = XEXP (mem, 0);
18965
18966 /* Strip off const from addresses like (const (plus (...))). */
18967 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18968 addr = XEXP (addr, 0);
18969
18970 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
18971 {
18972 int insns = REG_NREGS (reg) / 2;
18973 return insns * 4;
18974 }
18975 else
18976 return 4;
18977 }
18978
18979 /* Return nonzero if the offset in the address is an immediate. Otherwise,
18980 return zero. */
18981
18982 int
18983 arm_address_offset_is_imm (rtx_insn *insn)
18984 {
18985 rtx mem, addr;
18986
18987 extract_insn_cached (insn);
18988
18989 if (REG_P (recog_data.operand[0]))
18990 return 0;
18991
18992 mem = recog_data.operand[0];
18993
18994 gcc_assert (MEM_P (mem));
18995
18996 addr = XEXP (mem, 0);
18997
18998 if (REG_P (addr)
18999 || (GET_CODE (addr) == PLUS
19000 && REG_P (XEXP (addr, 0))
19001 && CONST_INT_P (XEXP (addr, 1))))
19002 return 1;
19003 else
19004 return 0;
19005 }
19006
19007 /* Output an ADD r, s, #n where n may be too big for one instruction.
19008 If adding zero to one register, output nothing. */
19009 const char *
19010 output_add_immediate (rtx *operands)
19011 {
19012 HOST_WIDE_INT n = INTVAL (operands[2]);
19013
19014 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19015 {
19016 if (n < 0)
19017 output_multi_immediate (operands,
19018 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19019 -n);
19020 else
19021 output_multi_immediate (operands,
19022 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19023 n);
19024 }
19025
19026 return "";
19027 }
19028
19029 /* Output a multiple immediate operation.
19030 OPERANDS is the vector of operands referred to in the output patterns.
19031 INSTR1 is the output pattern to use for the first constant.
19032 INSTR2 is the output pattern to use for subsequent constants.
19033 IMMED_OP is the index of the constant slot in OPERANDS.
19034 N is the constant value. */
19035 static const char *
19036 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19037 int immed_op, HOST_WIDE_INT n)
19038 {
19039 #if HOST_BITS_PER_WIDE_INT > 32
19040 n &= 0xffffffff;
19041 #endif
19042
19043 if (n == 0)
19044 {
19045 /* Quick and easy output. */
19046 operands[immed_op] = const0_rtx;
19047 output_asm_insn (instr1, operands);
19048 }
19049 else
19050 {
19051 int i;
19052 const char * instr = instr1;
19053
19054 /* Note that n is never zero here (which would give no output). */
19055 for (i = 0; i < 32; i += 2)
19056 {
19057 if (n & (3 << i))
19058 {
19059 operands[immed_op] = GEN_INT (n & (255 << i));
19060 output_asm_insn (instr, operands);
19061 instr = instr2;
19062 i += 6;
19063 }
19064 }
19065 }
19066
19067 return "";
19068 }
19069
19070 /* Return the name of a shifter operation. */
19071 static const char *
19072 arm_shift_nmem(enum rtx_code code)
19073 {
19074 switch (code)
19075 {
19076 case ASHIFT:
19077 return ARM_LSL_NAME;
19078
19079 case ASHIFTRT:
19080 return "asr";
19081
19082 case LSHIFTRT:
19083 return "lsr";
19084
19085 case ROTATERT:
19086 return "ror";
19087
19088 default:
19089 abort();
19090 }
19091 }
19092
19093 /* Return the appropriate ARM instruction for the operation code.
19094 The returned result should not be overwritten. OP is the rtx of the
19095 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19096 was shifted. */
19097 const char *
19098 arithmetic_instr (rtx op, int shift_first_arg)
19099 {
19100 switch (GET_CODE (op))
19101 {
19102 case PLUS:
19103 return "add";
19104
19105 case MINUS:
19106 return shift_first_arg ? "rsb" : "sub";
19107
19108 case IOR:
19109 return "orr";
19110
19111 case XOR:
19112 return "eor";
19113
19114 case AND:
19115 return "and";
19116
19117 case ASHIFT:
19118 case ASHIFTRT:
19119 case LSHIFTRT:
19120 case ROTATERT:
19121 return arm_shift_nmem(GET_CODE(op));
19122
19123 default:
19124 gcc_unreachable ();
19125 }
19126 }
19127
19128 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19129 for the operation code. The returned result should not be overwritten.
19130 OP is the rtx code of the shift.
19131 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19132 shift. */
19133 static const char *
19134 shift_op (rtx op, HOST_WIDE_INT *amountp)
19135 {
19136 const char * mnem;
19137 enum rtx_code code = GET_CODE (op);
19138
19139 switch (code)
19140 {
19141 case ROTATE:
19142 if (!CONST_INT_P (XEXP (op, 1)))
19143 {
19144 output_operand_lossage ("invalid shift operand");
19145 return NULL;
19146 }
19147
19148 code = ROTATERT;
19149 *amountp = 32 - INTVAL (XEXP (op, 1));
19150 mnem = "ror";
19151 break;
19152
19153 case ASHIFT:
19154 case ASHIFTRT:
19155 case LSHIFTRT:
19156 case ROTATERT:
19157 mnem = arm_shift_nmem(code);
19158 if (CONST_INT_P (XEXP (op, 1)))
19159 {
19160 *amountp = INTVAL (XEXP (op, 1));
19161 }
19162 else if (REG_P (XEXP (op, 1)))
19163 {
19164 *amountp = -1;
19165 return mnem;
19166 }
19167 else
19168 {
19169 output_operand_lossage ("invalid shift operand");
19170 return NULL;
19171 }
19172 break;
19173
19174 case MULT:
19175 /* We never have to worry about the amount being other than a
19176 power of 2, since this case can never be reloaded from a reg. */
19177 if (!CONST_INT_P (XEXP (op, 1)))
19178 {
19179 output_operand_lossage ("invalid shift operand");
19180 return NULL;
19181 }
19182
19183 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19184
19185 /* Amount must be a power of two. */
19186 if (*amountp & (*amountp - 1))
19187 {
19188 output_operand_lossage ("invalid shift operand");
19189 return NULL;
19190 }
19191
19192 *amountp = exact_log2 (*amountp);
19193 gcc_assert (IN_RANGE (*amountp, 0, 31));
19194 return ARM_LSL_NAME;
19195
19196 default:
19197 output_operand_lossage ("invalid shift operand");
19198 return NULL;
19199 }
19200
19201 /* This is not 100% correct, but follows from the desire to merge
19202 multiplication by a power of 2 with the recognizer for a
19203 shift. >=32 is not a valid shift for "lsl", so we must try and
19204 output a shift that produces the correct arithmetical result.
19205 Using lsr #32 is identical except for the fact that the carry bit
19206 is not set correctly if we set the flags; but we never use the
19207 carry bit from such an operation, so we can ignore that. */
19208 if (code == ROTATERT)
19209 /* Rotate is just modulo 32. */
19210 *amountp &= 31;
19211 else if (*amountp != (*amountp & 31))
19212 {
19213 if (code == ASHIFT)
19214 mnem = "lsr";
19215 *amountp = 32;
19216 }
19217
19218 /* Shifts of 0 are no-ops. */
19219 if (*amountp == 0)
19220 return NULL;
19221
19222 return mnem;
19223 }
19224
19225 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19226 because /bin/as is horribly restrictive. The judgement about
19227 whether or not each character is 'printable' (and can be output as
19228 is) or not (and must be printed with an octal escape) must be made
19229 with reference to the *host* character set -- the situation is
19230 similar to that discussed in the comments above pp_c_char in
19231 c-pretty-print.c. */
19232
19233 #define MAX_ASCII_LEN 51
19234
19235 void
19236 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19237 {
19238 int i;
19239 int len_so_far = 0;
19240
19241 fputs ("\t.ascii\t\"", stream);
19242
19243 for (i = 0; i < len; i++)
19244 {
19245 int c = p[i];
19246
19247 if (len_so_far >= MAX_ASCII_LEN)
19248 {
19249 fputs ("\"\n\t.ascii\t\"", stream);
19250 len_so_far = 0;
19251 }
19252
19253 if (ISPRINT (c))
19254 {
19255 if (c == '\\' || c == '\"')
19256 {
19257 putc ('\\', stream);
19258 len_so_far++;
19259 }
19260 putc (c, stream);
19261 len_so_far++;
19262 }
19263 else
19264 {
19265 fprintf (stream, "\\%03o", c);
19266 len_so_far += 4;
19267 }
19268 }
19269
19270 fputs ("\"\n", stream);
19271 }
19272 \f
19273 /* Whether a register is callee saved or not. This is necessary because high
19274 registers are marked as caller saved when optimizing for size on Thumb-1
19275 targets despite being callee saved in order to avoid using them. */
19276 #define callee_saved_reg_p(reg) \
19277 (!call_used_regs[reg] \
19278 || (TARGET_THUMB1 && optimize_size \
19279 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19280
19281 /* Compute the register save mask for registers 0 through 12
19282 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19283
19284 static unsigned long
19285 arm_compute_save_reg0_reg12_mask (void)
19286 {
19287 unsigned long func_type = arm_current_func_type ();
19288 unsigned long save_reg_mask = 0;
19289 unsigned int reg;
19290
19291 if (IS_INTERRUPT (func_type))
19292 {
19293 unsigned int max_reg;
19294 /* Interrupt functions must not corrupt any registers,
19295 even call clobbered ones. If this is a leaf function
19296 we can just examine the registers used by the RTL, but
19297 otherwise we have to assume that whatever function is
19298 called might clobber anything, and so we have to save
19299 all the call-clobbered registers as well. */
19300 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19301 /* FIQ handlers have registers r8 - r12 banked, so
19302 we only need to check r0 - r7, Normal ISRs only
19303 bank r14 and r15, so we must check up to r12.
19304 r13 is the stack pointer which is always preserved,
19305 so we do not need to consider it here. */
19306 max_reg = 7;
19307 else
19308 max_reg = 12;
19309
19310 for (reg = 0; reg <= max_reg; reg++)
19311 if (df_regs_ever_live_p (reg)
19312 || (! crtl->is_leaf && call_used_regs[reg]))
19313 save_reg_mask |= (1 << reg);
19314
19315 /* Also save the pic base register if necessary. */
19316 if (flag_pic
19317 && !TARGET_SINGLE_PIC_BASE
19318 && arm_pic_register != INVALID_REGNUM
19319 && crtl->uses_pic_offset_table)
19320 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19321 }
19322 else if (IS_VOLATILE(func_type))
19323 {
19324 /* For noreturn functions we historically omitted register saves
19325 altogether. However this really messes up debugging. As a
19326 compromise save just the frame pointers. Combined with the link
19327 register saved elsewhere this should be sufficient to get
19328 a backtrace. */
19329 if (frame_pointer_needed)
19330 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19331 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19332 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19333 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19334 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19335 }
19336 else
19337 {
19338 /* In the normal case we only need to save those registers
19339 which are call saved and which are used by this function. */
19340 for (reg = 0; reg <= 11; reg++)
19341 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19342 save_reg_mask |= (1 << reg);
19343
19344 /* Handle the frame pointer as a special case. */
19345 if (frame_pointer_needed)
19346 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19347
19348 /* If we aren't loading the PIC register,
19349 don't stack it even though it may be live. */
19350 if (flag_pic
19351 && !TARGET_SINGLE_PIC_BASE
19352 && arm_pic_register != INVALID_REGNUM
19353 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19354 || crtl->uses_pic_offset_table))
19355 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19356
19357 /* The prologue will copy SP into R0, so save it. */
19358 if (IS_STACKALIGN (func_type))
19359 save_reg_mask |= 1;
19360 }
19361
19362 /* Save registers so the exception handler can modify them. */
19363 if (crtl->calls_eh_return)
19364 {
19365 unsigned int i;
19366
19367 for (i = 0; ; i++)
19368 {
19369 reg = EH_RETURN_DATA_REGNO (i);
19370 if (reg == INVALID_REGNUM)
19371 break;
19372 save_reg_mask |= 1 << reg;
19373 }
19374 }
19375
19376 return save_reg_mask;
19377 }
19378
19379 /* Return true if r3 is live at the start of the function. */
19380
19381 static bool
19382 arm_r3_live_at_start_p (void)
19383 {
19384 /* Just look at cfg info, which is still close enough to correct at this
19385 point. This gives false positives for broken functions that might use
19386 uninitialized data that happens to be allocated in r3, but who cares? */
19387 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19388 }
19389
19390 /* Compute the number of bytes used to store the static chain register on the
19391 stack, above the stack frame. We need to know this accurately to get the
19392 alignment of the rest of the stack frame correct. */
19393
19394 static int
19395 arm_compute_static_chain_stack_bytes (void)
19396 {
19397 /* Once the value is updated from the init value of -1, do not
19398 re-compute. */
19399 if (cfun->machine->static_chain_stack_bytes != -1)
19400 return cfun->machine->static_chain_stack_bytes;
19401
19402 /* See the defining assertion in arm_expand_prologue. */
19403 if (IS_NESTED (arm_current_func_type ())
19404 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19405 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19406 || flag_stack_clash_protection)
19407 && !df_regs_ever_live_p (LR_REGNUM)))
19408 && arm_r3_live_at_start_p ()
19409 && crtl->args.pretend_args_size == 0)
19410 return 4;
19411
19412 return 0;
19413 }
19414
19415 /* Compute a bit mask of which core registers need to be
19416 saved on the stack for the current function.
19417 This is used by arm_compute_frame_layout, which may add extra registers. */
19418
19419 static unsigned long
19420 arm_compute_save_core_reg_mask (void)
19421 {
19422 unsigned int save_reg_mask = 0;
19423 unsigned long func_type = arm_current_func_type ();
19424 unsigned int reg;
19425
19426 if (IS_NAKED (func_type))
19427 /* This should never really happen. */
19428 return 0;
19429
19430 /* If we are creating a stack frame, then we must save the frame pointer,
19431 IP (which will hold the old stack pointer), LR and the PC. */
19432 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19433 save_reg_mask |=
19434 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19435 | (1 << IP_REGNUM)
19436 | (1 << LR_REGNUM)
19437 | (1 << PC_REGNUM);
19438
19439 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19440
19441 /* Decide if we need to save the link register.
19442 Interrupt routines have their own banked link register,
19443 so they never need to save it.
19444 Otherwise if we do not use the link register we do not need to save
19445 it. If we are pushing other registers onto the stack however, we
19446 can save an instruction in the epilogue by pushing the link register
19447 now and then popping it back into the PC. This incurs extra memory
19448 accesses though, so we only do it when optimizing for size, and only
19449 if we know that we will not need a fancy return sequence. */
19450 if (df_regs_ever_live_p (LR_REGNUM)
19451 || (save_reg_mask
19452 && optimize_size
19453 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19454 && !crtl->tail_call_emit
19455 && !crtl->calls_eh_return))
19456 save_reg_mask |= 1 << LR_REGNUM;
19457
19458 if (cfun->machine->lr_save_eliminated)
19459 save_reg_mask &= ~ (1 << LR_REGNUM);
19460
19461 if (TARGET_REALLY_IWMMXT
19462 && ((bit_count (save_reg_mask)
19463 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19464 arm_compute_static_chain_stack_bytes())
19465 ) % 2) != 0)
19466 {
19467 /* The total number of registers that are going to be pushed
19468 onto the stack is odd. We need to ensure that the stack
19469 is 64-bit aligned before we start to save iWMMXt registers,
19470 and also before we start to create locals. (A local variable
19471 might be a double or long long which we will load/store using
19472 an iWMMXt instruction). Therefore we need to push another
19473 ARM register, so that the stack will be 64-bit aligned. We
19474 try to avoid using the arg registers (r0 -r3) as they might be
19475 used to pass values in a tail call. */
19476 for (reg = 4; reg <= 12; reg++)
19477 if ((save_reg_mask & (1 << reg)) == 0)
19478 break;
19479
19480 if (reg <= 12)
19481 save_reg_mask |= (1 << reg);
19482 else
19483 {
19484 cfun->machine->sibcall_blocked = 1;
19485 save_reg_mask |= (1 << 3);
19486 }
19487 }
19488
19489 /* We may need to push an additional register for use initializing the
19490 PIC base register. */
19491 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19492 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19493 {
19494 reg = thumb_find_work_register (1 << 4);
19495 if (!call_used_regs[reg])
19496 save_reg_mask |= (1 << reg);
19497 }
19498
19499 return save_reg_mask;
19500 }
19501
19502 /* Compute a bit mask of which core registers need to be
19503 saved on the stack for the current function. */
19504 static unsigned long
19505 thumb1_compute_save_core_reg_mask (void)
19506 {
19507 unsigned long mask;
19508 unsigned reg;
19509
19510 mask = 0;
19511 for (reg = 0; reg < 12; reg ++)
19512 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19513 mask |= 1 << reg;
19514
19515 /* Handle the frame pointer as a special case. */
19516 if (frame_pointer_needed)
19517 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19518
19519 if (flag_pic
19520 && !TARGET_SINGLE_PIC_BASE
19521 && arm_pic_register != INVALID_REGNUM
19522 && crtl->uses_pic_offset_table)
19523 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19524
19525 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19526 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19527 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19528
19529 /* LR will also be pushed if any lo regs are pushed. */
19530 if (mask & 0xff || thumb_force_lr_save ())
19531 mask |= (1 << LR_REGNUM);
19532
19533 /* Make sure we have a low work register if we need one.
19534 We will need one if we are going to push a high register,
19535 but we are not currently intending to push a low register. */
19536 if ((mask & 0xff) == 0
19537 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19538 {
19539 /* Use thumb_find_work_register to choose which register
19540 we will use. If the register is live then we will
19541 have to push it. Use LAST_LO_REGNUM as our fallback
19542 choice for the register to select. */
19543 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19544 /* Make sure the register returned by thumb_find_work_register is
19545 not part of the return value. */
19546 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19547 reg = LAST_LO_REGNUM;
19548
19549 if (callee_saved_reg_p (reg))
19550 mask |= 1 << reg;
19551 }
19552
19553 /* The 504 below is 8 bytes less than 512 because there are two possible
19554 alignment words. We can't tell here if they will be present or not so we
19555 have to play it safe and assume that they are. */
19556 if ((CALLER_INTERWORKING_SLOT_SIZE +
19557 ROUND_UP_WORD (get_frame_size ()) +
19558 crtl->outgoing_args_size) >= 504)
19559 {
19560 /* This is the same as the code in thumb1_expand_prologue() which
19561 determines which register to use for stack decrement. */
19562 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19563 if (mask & (1 << reg))
19564 break;
19565
19566 if (reg > LAST_LO_REGNUM)
19567 {
19568 /* Make sure we have a register available for stack decrement. */
19569 mask |= 1 << LAST_LO_REGNUM;
19570 }
19571 }
19572
19573 return mask;
19574 }
19575
19576
19577 /* Return the number of bytes required to save VFP registers. */
19578 static int
19579 arm_get_vfp_saved_size (void)
19580 {
19581 unsigned int regno;
19582 int count;
19583 int saved;
19584
19585 saved = 0;
19586 /* Space for saved VFP registers. */
19587 if (TARGET_HARD_FLOAT)
19588 {
19589 count = 0;
19590 for (regno = FIRST_VFP_REGNUM;
19591 regno < LAST_VFP_REGNUM;
19592 regno += 2)
19593 {
19594 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19595 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19596 {
19597 if (count > 0)
19598 {
19599 /* Workaround ARM10 VFPr1 bug. */
19600 if (count == 2 && !arm_arch6)
19601 count++;
19602 saved += count * 8;
19603 }
19604 count = 0;
19605 }
19606 else
19607 count++;
19608 }
19609 if (count > 0)
19610 {
19611 if (count == 2 && !arm_arch6)
19612 count++;
19613 saved += count * 8;
19614 }
19615 }
19616 return saved;
19617 }
19618
19619
19620 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19621 everything bar the final return instruction. If simple_return is true,
19622 then do not output epilogue, because it has already been emitted in RTL.
19623
19624 Note: do not forget to update length attribute of corresponding insn pattern
19625 when changing assembly output (eg. length attribute of
19626 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19627 register clearing sequences). */
19628 const char *
19629 output_return_instruction (rtx operand, bool really_return, bool reverse,
19630 bool simple_return)
19631 {
19632 char conditional[10];
19633 char instr[100];
19634 unsigned reg;
19635 unsigned long live_regs_mask;
19636 unsigned long func_type;
19637 arm_stack_offsets *offsets;
19638
19639 func_type = arm_current_func_type ();
19640
19641 if (IS_NAKED (func_type))
19642 return "";
19643
19644 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19645 {
19646 /* If this function was declared non-returning, and we have
19647 found a tail call, then we have to trust that the called
19648 function won't return. */
19649 if (really_return)
19650 {
19651 rtx ops[2];
19652
19653 /* Otherwise, trap an attempted return by aborting. */
19654 ops[0] = operand;
19655 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19656 : "abort");
19657 assemble_external_libcall (ops[1]);
19658 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19659 }
19660
19661 return "";
19662 }
19663
19664 gcc_assert (!cfun->calls_alloca || really_return);
19665
19666 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19667
19668 cfun->machine->return_used_this_function = 1;
19669
19670 offsets = arm_get_frame_offsets ();
19671 live_regs_mask = offsets->saved_regs_mask;
19672
19673 if (!simple_return && live_regs_mask)
19674 {
19675 const char * return_reg;
19676
19677 /* If we do not have any special requirements for function exit
19678 (e.g. interworking) then we can load the return address
19679 directly into the PC. Otherwise we must load it into LR. */
19680 if (really_return
19681 && !IS_CMSE_ENTRY (func_type)
19682 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19683 return_reg = reg_names[PC_REGNUM];
19684 else
19685 return_reg = reg_names[LR_REGNUM];
19686
19687 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19688 {
19689 /* There are three possible reasons for the IP register
19690 being saved. 1) a stack frame was created, in which case
19691 IP contains the old stack pointer, or 2) an ISR routine
19692 corrupted it, or 3) it was saved to align the stack on
19693 iWMMXt. In case 1, restore IP into SP, otherwise just
19694 restore IP. */
19695 if (frame_pointer_needed)
19696 {
19697 live_regs_mask &= ~ (1 << IP_REGNUM);
19698 live_regs_mask |= (1 << SP_REGNUM);
19699 }
19700 else
19701 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19702 }
19703
19704 /* On some ARM architectures it is faster to use LDR rather than
19705 LDM to load a single register. On other architectures, the
19706 cost is the same. In 26 bit mode, or for exception handlers,
19707 we have to use LDM to load the PC so that the CPSR is also
19708 restored. */
19709 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19710 if (live_regs_mask == (1U << reg))
19711 break;
19712
19713 if (reg <= LAST_ARM_REGNUM
19714 && (reg != LR_REGNUM
19715 || ! really_return
19716 || ! IS_INTERRUPT (func_type)))
19717 {
19718 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19719 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19720 }
19721 else
19722 {
19723 char *p;
19724 int first = 1;
19725
19726 /* Generate the load multiple instruction to restore the
19727 registers. Note we can get here, even if
19728 frame_pointer_needed is true, but only if sp already
19729 points to the base of the saved core registers. */
19730 if (live_regs_mask & (1 << SP_REGNUM))
19731 {
19732 unsigned HOST_WIDE_INT stack_adjust;
19733
19734 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19735 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19736
19737 if (stack_adjust && arm_arch5t && TARGET_ARM)
19738 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19739 else
19740 {
19741 /* If we can't use ldmib (SA110 bug),
19742 then try to pop r3 instead. */
19743 if (stack_adjust)
19744 live_regs_mask |= 1 << 3;
19745
19746 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19747 }
19748 }
19749 /* For interrupt returns we have to use an LDM rather than
19750 a POP so that we can use the exception return variant. */
19751 else if (IS_INTERRUPT (func_type))
19752 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19753 else
19754 sprintf (instr, "pop%s\t{", conditional);
19755
19756 p = instr + strlen (instr);
19757
19758 for (reg = 0; reg <= SP_REGNUM; reg++)
19759 if (live_regs_mask & (1 << reg))
19760 {
19761 int l = strlen (reg_names[reg]);
19762
19763 if (first)
19764 first = 0;
19765 else
19766 {
19767 memcpy (p, ", ", 2);
19768 p += 2;
19769 }
19770
19771 memcpy (p, "%|", 2);
19772 memcpy (p + 2, reg_names[reg], l);
19773 p += l + 2;
19774 }
19775
19776 if (live_regs_mask & (1 << LR_REGNUM))
19777 {
19778 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19779 /* If returning from an interrupt, restore the CPSR. */
19780 if (IS_INTERRUPT (func_type))
19781 strcat (p, "^");
19782 }
19783 else
19784 strcpy (p, "}");
19785 }
19786
19787 output_asm_insn (instr, & operand);
19788
19789 /* See if we need to generate an extra instruction to
19790 perform the actual function return. */
19791 if (really_return
19792 && func_type != ARM_FT_INTERWORKED
19793 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19794 {
19795 /* The return has already been handled
19796 by loading the LR into the PC. */
19797 return "";
19798 }
19799 }
19800
19801 if (really_return)
19802 {
19803 switch ((int) ARM_FUNC_TYPE (func_type))
19804 {
19805 case ARM_FT_ISR:
19806 case ARM_FT_FIQ:
19807 /* ??? This is wrong for unified assembly syntax. */
19808 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19809 break;
19810
19811 case ARM_FT_INTERWORKED:
19812 gcc_assert (arm_arch5t || arm_arch4t);
19813 sprintf (instr, "bx%s\t%%|lr", conditional);
19814 break;
19815
19816 case ARM_FT_EXCEPTION:
19817 /* ??? This is wrong for unified assembly syntax. */
19818 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19819 break;
19820
19821 default:
19822 if (IS_CMSE_ENTRY (func_type))
19823 {
19824 /* Check if we have to clear the 'GE bits' which is only used if
19825 parallel add and subtraction instructions are available. */
19826 if (TARGET_INT_SIMD)
19827 snprintf (instr, sizeof (instr),
19828 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19829 else
19830 snprintf (instr, sizeof (instr),
19831 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19832
19833 output_asm_insn (instr, & operand);
19834 if (TARGET_HARD_FLOAT && !TARGET_THUMB1)
19835 {
19836 /* Clear the cumulative exception-status bits (0-4,7) and the
19837 condition code bits (28-31) of the FPSCR. We need to
19838 remember to clear the first scratch register used (IP) and
19839 save and restore the second (r4). */
19840 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19841 output_asm_insn (instr, & operand);
19842 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19843 output_asm_insn (instr, & operand);
19844 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19845 output_asm_insn (instr, & operand);
19846 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19847 output_asm_insn (instr, & operand);
19848 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19849 output_asm_insn (instr, & operand);
19850 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19851 output_asm_insn (instr, & operand);
19852 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19853 output_asm_insn (instr, & operand);
19854 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19855 output_asm_insn (instr, & operand);
19856 }
19857 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19858 }
19859 /* Use bx if it's available. */
19860 else if (arm_arch5t || arm_arch4t)
19861 sprintf (instr, "bx%s\t%%|lr", conditional);
19862 else
19863 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19864 break;
19865 }
19866
19867 output_asm_insn (instr, & operand);
19868 }
19869
19870 return "";
19871 }
19872
19873 /* Output in FILE asm statements needed to declare the NAME of the function
19874 defined by its DECL node. */
19875
19876 void
19877 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19878 {
19879 size_t cmse_name_len;
19880 char *cmse_name = 0;
19881 char cmse_prefix[] = "__acle_se_";
19882
19883 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19884 extra function label for each function with the 'cmse_nonsecure_entry'
19885 attribute. This extra function label should be prepended with
19886 '__acle_se_', telling the linker that it needs to create secure gateway
19887 veneers for this function. */
19888 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19889 DECL_ATTRIBUTES (decl)))
19890 {
19891 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19892 cmse_name = XALLOCAVEC (char, cmse_name_len);
19893 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19894 targetm.asm_out.globalize_label (file, cmse_name);
19895
19896 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19897 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19898 }
19899
19900 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19901 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19902 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19903 ASM_OUTPUT_LABEL (file, name);
19904
19905 if (cmse_name)
19906 ASM_OUTPUT_LABEL (file, cmse_name);
19907
19908 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19909 }
19910
19911 /* Write the function name into the code section, directly preceding
19912 the function prologue.
19913
19914 Code will be output similar to this:
19915 t0
19916 .ascii "arm_poke_function_name", 0
19917 .align
19918 t1
19919 .word 0xff000000 + (t1 - t0)
19920 arm_poke_function_name
19921 mov ip, sp
19922 stmfd sp!, {fp, ip, lr, pc}
19923 sub fp, ip, #4
19924
19925 When performing a stack backtrace, code can inspect the value
19926 of 'pc' stored at 'fp' + 0. If the trace function then looks
19927 at location pc - 12 and the top 8 bits are set, then we know
19928 that there is a function name embedded immediately preceding this
19929 location and has length ((pc[-3]) & 0xff000000).
19930
19931 We assume that pc is declared as a pointer to an unsigned long.
19932
19933 It is of no benefit to output the function name if we are assembling
19934 a leaf function. These function types will not contain a stack
19935 backtrace structure, therefore it is not possible to determine the
19936 function name. */
19937 void
19938 arm_poke_function_name (FILE *stream, const char *name)
19939 {
19940 unsigned long alignlength;
19941 unsigned long length;
19942 rtx x;
19943
19944 length = strlen (name) + 1;
19945 alignlength = ROUND_UP_WORD (length);
19946
19947 ASM_OUTPUT_ASCII (stream, name, length);
19948 ASM_OUTPUT_ALIGN (stream, 2);
19949 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19950 assemble_aligned_integer (UNITS_PER_WORD, x);
19951 }
19952
19953 /* Place some comments into the assembler stream
19954 describing the current function. */
19955 static void
19956 arm_output_function_prologue (FILE *f)
19957 {
19958 unsigned long func_type;
19959
19960 /* Sanity check. */
19961 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
19962
19963 func_type = arm_current_func_type ();
19964
19965 switch ((int) ARM_FUNC_TYPE (func_type))
19966 {
19967 default:
19968 case ARM_FT_NORMAL:
19969 break;
19970 case ARM_FT_INTERWORKED:
19971 asm_fprintf (f, "\t%@ Function supports interworking.\n");
19972 break;
19973 case ARM_FT_ISR:
19974 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
19975 break;
19976 case ARM_FT_FIQ:
19977 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
19978 break;
19979 case ARM_FT_EXCEPTION:
19980 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
19981 break;
19982 }
19983
19984 if (IS_NAKED (func_type))
19985 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
19986
19987 if (IS_VOLATILE (func_type))
19988 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
19989
19990 if (IS_NESTED (func_type))
19991 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
19992 if (IS_STACKALIGN (func_type))
19993 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
19994 if (IS_CMSE_ENTRY (func_type))
19995 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
19996
19997 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
19998 (HOST_WIDE_INT) crtl->args.size,
19999 crtl->args.pretend_args_size,
20000 (HOST_WIDE_INT) get_frame_size ());
20001
20002 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20003 frame_pointer_needed,
20004 cfun->machine->uses_anonymous_args);
20005
20006 if (cfun->machine->lr_save_eliminated)
20007 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20008
20009 if (crtl->calls_eh_return)
20010 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20011
20012 }
20013
20014 static void
20015 arm_output_function_epilogue (FILE *)
20016 {
20017 arm_stack_offsets *offsets;
20018
20019 if (TARGET_THUMB1)
20020 {
20021 int regno;
20022
20023 /* Emit any call-via-reg trampolines that are needed for v4t support
20024 of call_reg and call_value_reg type insns. */
20025 for (regno = 0; regno < LR_REGNUM; regno++)
20026 {
20027 rtx label = cfun->machine->call_via[regno];
20028
20029 if (label != NULL)
20030 {
20031 switch_to_section (function_section (current_function_decl));
20032 targetm.asm_out.internal_label (asm_out_file, "L",
20033 CODE_LABEL_NUMBER (label));
20034 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20035 }
20036 }
20037
20038 /* ??? Probably not safe to set this here, since it assumes that a
20039 function will be emitted as assembly immediately after we generate
20040 RTL for it. This does not happen for inline functions. */
20041 cfun->machine->return_used_this_function = 0;
20042 }
20043 else /* TARGET_32BIT */
20044 {
20045 /* We need to take into account any stack-frame rounding. */
20046 offsets = arm_get_frame_offsets ();
20047
20048 gcc_assert (!use_return_insn (FALSE, NULL)
20049 || (cfun->machine->return_used_this_function != 0)
20050 || offsets->saved_regs == offsets->outgoing_args
20051 || frame_pointer_needed);
20052 }
20053 }
20054
20055 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20056 STR and STRD. If an even number of registers are being pushed, one
20057 or more STRD patterns are created for each register pair. If an
20058 odd number of registers are pushed, emit an initial STR followed by
20059 as many STRD instructions as are needed. This works best when the
20060 stack is initially 64-bit aligned (the normal case), since it
20061 ensures that each STRD is also 64-bit aligned. */
20062 static void
20063 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20064 {
20065 int num_regs = 0;
20066 int i;
20067 int regno;
20068 rtx par = NULL_RTX;
20069 rtx dwarf = NULL_RTX;
20070 rtx tmp;
20071 bool first = true;
20072
20073 num_regs = bit_count (saved_regs_mask);
20074
20075 /* Must be at least one register to save, and can't save SP or PC. */
20076 gcc_assert (num_regs > 0 && num_regs <= 14);
20077 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20078 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20079
20080 /* Create sequence for DWARF info. All the frame-related data for
20081 debugging is held in this wrapper. */
20082 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20083
20084 /* Describe the stack adjustment. */
20085 tmp = gen_rtx_SET (stack_pointer_rtx,
20086 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20087 RTX_FRAME_RELATED_P (tmp) = 1;
20088 XVECEXP (dwarf, 0, 0) = tmp;
20089
20090 /* Find the first register. */
20091 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20092 ;
20093
20094 i = 0;
20095
20096 /* If there's an odd number of registers to push. Start off by
20097 pushing a single register. This ensures that subsequent strd
20098 operations are dword aligned (assuming that SP was originally
20099 64-bit aligned). */
20100 if ((num_regs & 1) != 0)
20101 {
20102 rtx reg, mem, insn;
20103
20104 reg = gen_rtx_REG (SImode, regno);
20105 if (num_regs == 1)
20106 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20107 stack_pointer_rtx));
20108 else
20109 mem = gen_frame_mem (Pmode,
20110 gen_rtx_PRE_MODIFY
20111 (Pmode, stack_pointer_rtx,
20112 plus_constant (Pmode, stack_pointer_rtx,
20113 -4 * num_regs)));
20114
20115 tmp = gen_rtx_SET (mem, reg);
20116 RTX_FRAME_RELATED_P (tmp) = 1;
20117 insn = emit_insn (tmp);
20118 RTX_FRAME_RELATED_P (insn) = 1;
20119 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20120 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20121 RTX_FRAME_RELATED_P (tmp) = 1;
20122 i++;
20123 regno++;
20124 XVECEXP (dwarf, 0, i) = tmp;
20125 first = false;
20126 }
20127
20128 while (i < num_regs)
20129 if (saved_regs_mask & (1 << regno))
20130 {
20131 rtx reg1, reg2, mem1, mem2;
20132 rtx tmp0, tmp1, tmp2;
20133 int regno2;
20134
20135 /* Find the register to pair with this one. */
20136 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20137 regno2++)
20138 ;
20139
20140 reg1 = gen_rtx_REG (SImode, regno);
20141 reg2 = gen_rtx_REG (SImode, regno2);
20142
20143 if (first)
20144 {
20145 rtx insn;
20146
20147 first = false;
20148 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20149 stack_pointer_rtx,
20150 -4 * num_regs));
20151 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20152 stack_pointer_rtx,
20153 -4 * (num_regs - 1)));
20154 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20155 plus_constant (Pmode, stack_pointer_rtx,
20156 -4 * (num_regs)));
20157 tmp1 = gen_rtx_SET (mem1, reg1);
20158 tmp2 = gen_rtx_SET (mem2, reg2);
20159 RTX_FRAME_RELATED_P (tmp0) = 1;
20160 RTX_FRAME_RELATED_P (tmp1) = 1;
20161 RTX_FRAME_RELATED_P (tmp2) = 1;
20162 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20163 XVECEXP (par, 0, 0) = tmp0;
20164 XVECEXP (par, 0, 1) = tmp1;
20165 XVECEXP (par, 0, 2) = tmp2;
20166 insn = emit_insn (par);
20167 RTX_FRAME_RELATED_P (insn) = 1;
20168 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20169 }
20170 else
20171 {
20172 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20173 stack_pointer_rtx,
20174 4 * i));
20175 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20176 stack_pointer_rtx,
20177 4 * (i + 1)));
20178 tmp1 = gen_rtx_SET (mem1, reg1);
20179 tmp2 = gen_rtx_SET (mem2, reg2);
20180 RTX_FRAME_RELATED_P (tmp1) = 1;
20181 RTX_FRAME_RELATED_P (tmp2) = 1;
20182 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20183 XVECEXP (par, 0, 0) = tmp1;
20184 XVECEXP (par, 0, 1) = tmp2;
20185 emit_insn (par);
20186 }
20187
20188 /* Create unwind information. This is an approximation. */
20189 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20190 plus_constant (Pmode,
20191 stack_pointer_rtx,
20192 4 * i)),
20193 reg1);
20194 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20195 plus_constant (Pmode,
20196 stack_pointer_rtx,
20197 4 * (i + 1))),
20198 reg2);
20199
20200 RTX_FRAME_RELATED_P (tmp1) = 1;
20201 RTX_FRAME_RELATED_P (tmp2) = 1;
20202 XVECEXP (dwarf, 0, i + 1) = tmp1;
20203 XVECEXP (dwarf, 0, i + 2) = tmp2;
20204 i += 2;
20205 regno = regno2 + 1;
20206 }
20207 else
20208 regno++;
20209
20210 return;
20211 }
20212
20213 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20214 whenever possible, otherwise it emits single-word stores. The first store
20215 also allocates stack space for all saved registers, using writeback with
20216 post-addressing mode. All other stores use offset addressing. If no STRD
20217 can be emitted, this function emits a sequence of single-word stores,
20218 and not an STM as before, because single-word stores provide more freedom
20219 scheduling and can be turned into an STM by peephole optimizations. */
20220 static void
20221 arm_emit_strd_push (unsigned long saved_regs_mask)
20222 {
20223 int num_regs = 0;
20224 int i, j, dwarf_index = 0;
20225 int offset = 0;
20226 rtx dwarf = NULL_RTX;
20227 rtx insn = NULL_RTX;
20228 rtx tmp, mem;
20229
20230 /* TODO: A more efficient code can be emitted by changing the
20231 layout, e.g., first push all pairs that can use STRD to keep the
20232 stack aligned, and then push all other registers. */
20233 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20234 if (saved_regs_mask & (1 << i))
20235 num_regs++;
20236
20237 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20238 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20239 gcc_assert (num_regs > 0);
20240
20241 /* Create sequence for DWARF info. */
20242 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20243
20244 /* For dwarf info, we generate explicit stack update. */
20245 tmp = gen_rtx_SET (stack_pointer_rtx,
20246 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20247 RTX_FRAME_RELATED_P (tmp) = 1;
20248 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20249
20250 /* Save registers. */
20251 offset = - 4 * num_regs;
20252 j = 0;
20253 while (j <= LAST_ARM_REGNUM)
20254 if (saved_regs_mask & (1 << j))
20255 {
20256 if ((j % 2 == 0)
20257 && (saved_regs_mask & (1 << (j + 1))))
20258 {
20259 /* Current register and previous register form register pair for
20260 which STRD can be generated. */
20261 if (offset < 0)
20262 {
20263 /* Allocate stack space for all saved registers. */
20264 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20265 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20266 mem = gen_frame_mem (DImode, tmp);
20267 offset = 0;
20268 }
20269 else if (offset > 0)
20270 mem = gen_frame_mem (DImode,
20271 plus_constant (Pmode,
20272 stack_pointer_rtx,
20273 offset));
20274 else
20275 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20276
20277 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20278 RTX_FRAME_RELATED_P (tmp) = 1;
20279 tmp = emit_insn (tmp);
20280
20281 /* Record the first store insn. */
20282 if (dwarf_index == 1)
20283 insn = tmp;
20284
20285 /* Generate dwarf info. */
20286 mem = gen_frame_mem (SImode,
20287 plus_constant (Pmode,
20288 stack_pointer_rtx,
20289 offset));
20290 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20291 RTX_FRAME_RELATED_P (tmp) = 1;
20292 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20293
20294 mem = gen_frame_mem (SImode,
20295 plus_constant (Pmode,
20296 stack_pointer_rtx,
20297 offset + 4));
20298 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20299 RTX_FRAME_RELATED_P (tmp) = 1;
20300 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20301
20302 offset += 8;
20303 j += 2;
20304 }
20305 else
20306 {
20307 /* Emit a single word store. */
20308 if (offset < 0)
20309 {
20310 /* Allocate stack space for all saved registers. */
20311 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20312 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20313 mem = gen_frame_mem (SImode, tmp);
20314 offset = 0;
20315 }
20316 else if (offset > 0)
20317 mem = gen_frame_mem (SImode,
20318 plus_constant (Pmode,
20319 stack_pointer_rtx,
20320 offset));
20321 else
20322 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20323
20324 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20325 RTX_FRAME_RELATED_P (tmp) = 1;
20326 tmp = emit_insn (tmp);
20327
20328 /* Record the first store insn. */
20329 if (dwarf_index == 1)
20330 insn = tmp;
20331
20332 /* Generate dwarf info. */
20333 mem = gen_frame_mem (SImode,
20334 plus_constant(Pmode,
20335 stack_pointer_rtx,
20336 offset));
20337 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20338 RTX_FRAME_RELATED_P (tmp) = 1;
20339 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20340
20341 offset += 4;
20342 j += 1;
20343 }
20344 }
20345 else
20346 j++;
20347
20348 /* Attach dwarf info to the first insn we generate. */
20349 gcc_assert (insn != NULL_RTX);
20350 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20351 RTX_FRAME_RELATED_P (insn) = 1;
20352 }
20353
20354 /* Generate and emit an insn that we will recognize as a push_multi.
20355 Unfortunately, since this insn does not reflect very well the actual
20356 semantics of the operation, we need to annotate the insn for the benefit
20357 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20358 MASK for registers that should be annotated for DWARF2 frame unwind
20359 information. */
20360 static rtx
20361 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20362 {
20363 int num_regs = 0;
20364 int num_dwarf_regs = 0;
20365 int i, j;
20366 rtx par;
20367 rtx dwarf;
20368 int dwarf_par_index;
20369 rtx tmp, reg;
20370
20371 /* We don't record the PC in the dwarf frame information. */
20372 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20373
20374 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20375 {
20376 if (mask & (1 << i))
20377 num_regs++;
20378 if (dwarf_regs_mask & (1 << i))
20379 num_dwarf_regs++;
20380 }
20381
20382 gcc_assert (num_regs && num_regs <= 16);
20383 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20384
20385 /* For the body of the insn we are going to generate an UNSPEC in
20386 parallel with several USEs. This allows the insn to be recognized
20387 by the push_multi pattern in the arm.md file.
20388
20389 The body of the insn looks something like this:
20390
20391 (parallel [
20392 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20393 (const_int:SI <num>)))
20394 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20395 (use (reg:SI XX))
20396 (use (reg:SI YY))
20397 ...
20398 ])
20399
20400 For the frame note however, we try to be more explicit and actually
20401 show each register being stored into the stack frame, plus a (single)
20402 decrement of the stack pointer. We do it this way in order to be
20403 friendly to the stack unwinding code, which only wants to see a single
20404 stack decrement per instruction. The RTL we generate for the note looks
20405 something like this:
20406
20407 (sequence [
20408 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20409 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20410 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20411 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20412 ...
20413 ])
20414
20415 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20416 instead we'd have a parallel expression detailing all
20417 the stores to the various memory addresses so that debug
20418 information is more up-to-date. Remember however while writing
20419 this to take care of the constraints with the push instruction.
20420
20421 Note also that this has to be taken care of for the VFP registers.
20422
20423 For more see PR43399. */
20424
20425 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20426 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20427 dwarf_par_index = 1;
20428
20429 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20430 {
20431 if (mask & (1 << i))
20432 {
20433 reg = gen_rtx_REG (SImode, i);
20434
20435 XVECEXP (par, 0, 0)
20436 = gen_rtx_SET (gen_frame_mem
20437 (BLKmode,
20438 gen_rtx_PRE_MODIFY (Pmode,
20439 stack_pointer_rtx,
20440 plus_constant
20441 (Pmode, stack_pointer_rtx,
20442 -4 * num_regs))
20443 ),
20444 gen_rtx_UNSPEC (BLKmode,
20445 gen_rtvec (1, reg),
20446 UNSPEC_PUSH_MULT));
20447
20448 if (dwarf_regs_mask & (1 << i))
20449 {
20450 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20451 reg);
20452 RTX_FRAME_RELATED_P (tmp) = 1;
20453 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20454 }
20455
20456 break;
20457 }
20458 }
20459
20460 for (j = 1, i++; j < num_regs; i++)
20461 {
20462 if (mask & (1 << i))
20463 {
20464 reg = gen_rtx_REG (SImode, i);
20465
20466 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20467
20468 if (dwarf_regs_mask & (1 << i))
20469 {
20470 tmp
20471 = gen_rtx_SET (gen_frame_mem
20472 (SImode,
20473 plus_constant (Pmode, stack_pointer_rtx,
20474 4 * j)),
20475 reg);
20476 RTX_FRAME_RELATED_P (tmp) = 1;
20477 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20478 }
20479
20480 j++;
20481 }
20482 }
20483
20484 par = emit_insn (par);
20485
20486 tmp = gen_rtx_SET (stack_pointer_rtx,
20487 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20488 RTX_FRAME_RELATED_P (tmp) = 1;
20489 XVECEXP (dwarf, 0, 0) = tmp;
20490
20491 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20492
20493 return par;
20494 }
20495
20496 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20497 SIZE is the offset to be adjusted.
20498 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20499 static void
20500 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20501 {
20502 rtx dwarf;
20503
20504 RTX_FRAME_RELATED_P (insn) = 1;
20505 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20506 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20507 }
20508
20509 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20510 SAVED_REGS_MASK shows which registers need to be restored.
20511
20512 Unfortunately, since this insn does not reflect very well the actual
20513 semantics of the operation, we need to annotate the insn for the benefit
20514 of DWARF2 frame unwind information. */
20515 static void
20516 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20517 {
20518 int num_regs = 0;
20519 int i, j;
20520 rtx par;
20521 rtx dwarf = NULL_RTX;
20522 rtx tmp, reg;
20523 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20524 int offset_adj;
20525 int emit_update;
20526
20527 offset_adj = return_in_pc ? 1 : 0;
20528 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20529 if (saved_regs_mask & (1 << i))
20530 num_regs++;
20531
20532 gcc_assert (num_regs && num_regs <= 16);
20533
20534 /* If SP is in reglist, then we don't emit SP update insn. */
20535 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20536
20537 /* The parallel needs to hold num_regs SETs
20538 and one SET for the stack update. */
20539 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20540
20541 if (return_in_pc)
20542 XVECEXP (par, 0, 0) = ret_rtx;
20543
20544 if (emit_update)
20545 {
20546 /* Increment the stack pointer, based on there being
20547 num_regs 4-byte registers to restore. */
20548 tmp = gen_rtx_SET (stack_pointer_rtx,
20549 plus_constant (Pmode,
20550 stack_pointer_rtx,
20551 4 * num_regs));
20552 RTX_FRAME_RELATED_P (tmp) = 1;
20553 XVECEXP (par, 0, offset_adj) = tmp;
20554 }
20555
20556 /* Now restore every reg, which may include PC. */
20557 for (j = 0, i = 0; j < num_regs; i++)
20558 if (saved_regs_mask & (1 << i))
20559 {
20560 reg = gen_rtx_REG (SImode, i);
20561 if ((num_regs == 1) && emit_update && !return_in_pc)
20562 {
20563 /* Emit single load with writeback. */
20564 tmp = gen_frame_mem (SImode,
20565 gen_rtx_POST_INC (Pmode,
20566 stack_pointer_rtx));
20567 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20568 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20569 return;
20570 }
20571
20572 tmp = gen_rtx_SET (reg,
20573 gen_frame_mem
20574 (SImode,
20575 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20576 RTX_FRAME_RELATED_P (tmp) = 1;
20577 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20578
20579 /* We need to maintain a sequence for DWARF info too. As dwarf info
20580 should not have PC, skip PC. */
20581 if (i != PC_REGNUM)
20582 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20583
20584 j++;
20585 }
20586
20587 if (return_in_pc)
20588 par = emit_jump_insn (par);
20589 else
20590 par = emit_insn (par);
20591
20592 REG_NOTES (par) = dwarf;
20593 if (!return_in_pc)
20594 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20595 stack_pointer_rtx, stack_pointer_rtx);
20596 }
20597
20598 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20599 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20600
20601 Unfortunately, since this insn does not reflect very well the actual
20602 semantics of the operation, we need to annotate the insn for the benefit
20603 of DWARF2 frame unwind information. */
20604 static void
20605 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20606 {
20607 int i, j;
20608 rtx par;
20609 rtx dwarf = NULL_RTX;
20610 rtx tmp, reg;
20611
20612 gcc_assert (num_regs && num_regs <= 32);
20613
20614 /* Workaround ARM10 VFPr1 bug. */
20615 if (num_regs == 2 && !arm_arch6)
20616 {
20617 if (first_reg == 15)
20618 first_reg--;
20619
20620 num_regs++;
20621 }
20622
20623 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20624 there could be up to 32 D-registers to restore.
20625 If there are more than 16 D-registers, make two recursive calls,
20626 each of which emits one pop_multi instruction. */
20627 if (num_regs > 16)
20628 {
20629 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20630 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20631 return;
20632 }
20633
20634 /* The parallel needs to hold num_regs SETs
20635 and one SET for the stack update. */
20636 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20637
20638 /* Increment the stack pointer, based on there being
20639 num_regs 8-byte registers to restore. */
20640 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20641 RTX_FRAME_RELATED_P (tmp) = 1;
20642 XVECEXP (par, 0, 0) = tmp;
20643
20644 /* Now show every reg that will be restored, using a SET for each. */
20645 for (j = 0, i=first_reg; j < num_regs; i += 2)
20646 {
20647 reg = gen_rtx_REG (DFmode, i);
20648
20649 tmp = gen_rtx_SET (reg,
20650 gen_frame_mem
20651 (DFmode,
20652 plus_constant (Pmode, base_reg, 8 * j)));
20653 RTX_FRAME_RELATED_P (tmp) = 1;
20654 XVECEXP (par, 0, j + 1) = tmp;
20655
20656 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20657
20658 j++;
20659 }
20660
20661 par = emit_insn (par);
20662 REG_NOTES (par) = dwarf;
20663
20664 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20665 if (REGNO (base_reg) == IP_REGNUM)
20666 {
20667 RTX_FRAME_RELATED_P (par) = 1;
20668 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20669 }
20670 else
20671 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20672 base_reg, base_reg);
20673 }
20674
20675 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20676 number of registers are being popped, multiple LDRD patterns are created for
20677 all register pairs. If odd number of registers are popped, last register is
20678 loaded by using LDR pattern. */
20679 static void
20680 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20681 {
20682 int num_regs = 0;
20683 int i, j;
20684 rtx par = NULL_RTX;
20685 rtx dwarf = NULL_RTX;
20686 rtx tmp, reg, tmp1;
20687 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20688
20689 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20690 if (saved_regs_mask & (1 << i))
20691 num_regs++;
20692
20693 gcc_assert (num_regs && num_regs <= 16);
20694
20695 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20696 to be popped. So, if num_regs is even, now it will become odd,
20697 and we can generate pop with PC. If num_regs is odd, it will be
20698 even now, and ldr with return can be generated for PC. */
20699 if (return_in_pc)
20700 num_regs--;
20701
20702 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20703
20704 /* Var j iterates over all the registers to gather all the registers in
20705 saved_regs_mask. Var i gives index of saved registers in stack frame.
20706 A PARALLEL RTX of register-pair is created here, so that pattern for
20707 LDRD can be matched. As PC is always last register to be popped, and
20708 we have already decremented num_regs if PC, we don't have to worry
20709 about PC in this loop. */
20710 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20711 if (saved_regs_mask & (1 << j))
20712 {
20713 /* Create RTX for memory load. */
20714 reg = gen_rtx_REG (SImode, j);
20715 tmp = gen_rtx_SET (reg,
20716 gen_frame_mem (SImode,
20717 plus_constant (Pmode,
20718 stack_pointer_rtx, 4 * i)));
20719 RTX_FRAME_RELATED_P (tmp) = 1;
20720
20721 if (i % 2 == 0)
20722 {
20723 /* When saved-register index (i) is even, the RTX to be emitted is
20724 yet to be created. Hence create it first. The LDRD pattern we
20725 are generating is :
20726 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20727 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20728 where target registers need not be consecutive. */
20729 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20730 dwarf = NULL_RTX;
20731 }
20732
20733 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20734 added as 0th element and if i is odd, reg_i is added as 1st element
20735 of LDRD pattern shown above. */
20736 XVECEXP (par, 0, (i % 2)) = tmp;
20737 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20738
20739 if ((i % 2) == 1)
20740 {
20741 /* When saved-register index (i) is odd, RTXs for both the registers
20742 to be loaded are generated in above given LDRD pattern, and the
20743 pattern can be emitted now. */
20744 par = emit_insn (par);
20745 REG_NOTES (par) = dwarf;
20746 RTX_FRAME_RELATED_P (par) = 1;
20747 }
20748
20749 i++;
20750 }
20751
20752 /* If the number of registers pushed is odd AND return_in_pc is false OR
20753 number of registers are even AND return_in_pc is true, last register is
20754 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20755 then LDR with post increment. */
20756
20757 /* Increment the stack pointer, based on there being
20758 num_regs 4-byte registers to restore. */
20759 tmp = gen_rtx_SET (stack_pointer_rtx,
20760 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20761 RTX_FRAME_RELATED_P (tmp) = 1;
20762 tmp = emit_insn (tmp);
20763 if (!return_in_pc)
20764 {
20765 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20766 stack_pointer_rtx, stack_pointer_rtx);
20767 }
20768
20769 dwarf = NULL_RTX;
20770
20771 if (((num_regs % 2) == 1 && !return_in_pc)
20772 || ((num_regs % 2) == 0 && return_in_pc))
20773 {
20774 /* Scan for the single register to be popped. Skip until the saved
20775 register is found. */
20776 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20777
20778 /* Gen LDR with post increment here. */
20779 tmp1 = gen_rtx_MEM (SImode,
20780 gen_rtx_POST_INC (SImode,
20781 stack_pointer_rtx));
20782 set_mem_alias_set (tmp1, get_frame_alias_set ());
20783
20784 reg = gen_rtx_REG (SImode, j);
20785 tmp = gen_rtx_SET (reg, tmp1);
20786 RTX_FRAME_RELATED_P (tmp) = 1;
20787 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20788
20789 if (return_in_pc)
20790 {
20791 /* If return_in_pc, j must be PC_REGNUM. */
20792 gcc_assert (j == PC_REGNUM);
20793 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20794 XVECEXP (par, 0, 0) = ret_rtx;
20795 XVECEXP (par, 0, 1) = tmp;
20796 par = emit_jump_insn (par);
20797 }
20798 else
20799 {
20800 par = emit_insn (tmp);
20801 REG_NOTES (par) = dwarf;
20802 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20803 stack_pointer_rtx, stack_pointer_rtx);
20804 }
20805
20806 }
20807 else if ((num_regs % 2) == 1 && return_in_pc)
20808 {
20809 /* There are 2 registers to be popped. So, generate the pattern
20810 pop_multiple_with_stack_update_and_return to pop in PC. */
20811 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20812 }
20813
20814 return;
20815 }
20816
20817 /* LDRD in ARM mode needs consecutive registers as operands. This function
20818 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20819 offset addressing and then generates one separate stack udpate. This provides
20820 more scheduling freedom, compared to writeback on every load. However,
20821 if the function returns using load into PC directly
20822 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20823 before the last load. TODO: Add a peephole optimization to recognize
20824 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20825 peephole optimization to merge the load at stack-offset zero
20826 with the stack update instruction using load with writeback
20827 in post-index addressing mode. */
20828 static void
20829 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20830 {
20831 int j = 0;
20832 int offset = 0;
20833 rtx par = NULL_RTX;
20834 rtx dwarf = NULL_RTX;
20835 rtx tmp, mem;
20836
20837 /* Restore saved registers. */
20838 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20839 j = 0;
20840 while (j <= LAST_ARM_REGNUM)
20841 if (saved_regs_mask & (1 << j))
20842 {
20843 if ((j % 2) == 0
20844 && (saved_regs_mask & (1 << (j + 1)))
20845 && (j + 1) != PC_REGNUM)
20846 {
20847 /* Current register and next register form register pair for which
20848 LDRD can be generated. PC is always the last register popped, and
20849 we handle it separately. */
20850 if (offset > 0)
20851 mem = gen_frame_mem (DImode,
20852 plus_constant (Pmode,
20853 stack_pointer_rtx,
20854 offset));
20855 else
20856 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20857
20858 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20859 tmp = emit_insn (tmp);
20860 RTX_FRAME_RELATED_P (tmp) = 1;
20861
20862 /* Generate dwarf info. */
20863
20864 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20865 gen_rtx_REG (SImode, j),
20866 NULL_RTX);
20867 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20868 gen_rtx_REG (SImode, j + 1),
20869 dwarf);
20870
20871 REG_NOTES (tmp) = dwarf;
20872
20873 offset += 8;
20874 j += 2;
20875 }
20876 else if (j != PC_REGNUM)
20877 {
20878 /* Emit a single word load. */
20879 if (offset > 0)
20880 mem = gen_frame_mem (SImode,
20881 plus_constant (Pmode,
20882 stack_pointer_rtx,
20883 offset));
20884 else
20885 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20886
20887 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20888 tmp = emit_insn (tmp);
20889 RTX_FRAME_RELATED_P (tmp) = 1;
20890
20891 /* Generate dwarf info. */
20892 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20893 gen_rtx_REG (SImode, j),
20894 NULL_RTX);
20895
20896 offset += 4;
20897 j += 1;
20898 }
20899 else /* j == PC_REGNUM */
20900 j++;
20901 }
20902 else
20903 j++;
20904
20905 /* Update the stack. */
20906 if (offset > 0)
20907 {
20908 tmp = gen_rtx_SET (stack_pointer_rtx,
20909 plus_constant (Pmode,
20910 stack_pointer_rtx,
20911 offset));
20912 tmp = emit_insn (tmp);
20913 arm_add_cfa_adjust_cfa_note (tmp, offset,
20914 stack_pointer_rtx, stack_pointer_rtx);
20915 offset = 0;
20916 }
20917
20918 if (saved_regs_mask & (1 << PC_REGNUM))
20919 {
20920 /* Only PC is to be popped. */
20921 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20922 XVECEXP (par, 0, 0) = ret_rtx;
20923 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20924 gen_frame_mem (SImode,
20925 gen_rtx_POST_INC (SImode,
20926 stack_pointer_rtx)));
20927 RTX_FRAME_RELATED_P (tmp) = 1;
20928 XVECEXP (par, 0, 1) = tmp;
20929 par = emit_jump_insn (par);
20930
20931 /* Generate dwarf info. */
20932 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20933 gen_rtx_REG (SImode, PC_REGNUM),
20934 NULL_RTX);
20935 REG_NOTES (par) = dwarf;
20936 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20937 stack_pointer_rtx, stack_pointer_rtx);
20938 }
20939 }
20940
20941 /* Calculate the size of the return value that is passed in registers. */
20942 static unsigned
20943 arm_size_return_regs (void)
20944 {
20945 machine_mode mode;
20946
20947 if (crtl->return_rtx != 0)
20948 mode = GET_MODE (crtl->return_rtx);
20949 else
20950 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20951
20952 return GET_MODE_SIZE (mode);
20953 }
20954
20955 /* Return true if the current function needs to save/restore LR. */
20956 static bool
20957 thumb_force_lr_save (void)
20958 {
20959 return !cfun->machine->lr_save_eliminated
20960 && (!crtl->is_leaf
20961 || thumb_far_jump_used_p ()
20962 || df_regs_ever_live_p (LR_REGNUM));
20963 }
20964
20965 /* We do not know if r3 will be available because
20966 we do have an indirect tailcall happening in this
20967 particular case. */
20968 static bool
20969 is_indirect_tailcall_p (rtx call)
20970 {
20971 rtx pat = PATTERN (call);
20972
20973 /* Indirect tail call. */
20974 pat = XVECEXP (pat, 0, 0);
20975 if (GET_CODE (pat) == SET)
20976 pat = SET_SRC (pat);
20977
20978 pat = XEXP (XEXP (pat, 0), 0);
20979 return REG_P (pat);
20980 }
20981
20982 /* Return true if r3 is used by any of the tail call insns in the
20983 current function. */
20984 static bool
20985 any_sibcall_could_use_r3 (void)
20986 {
20987 edge_iterator ei;
20988 edge e;
20989
20990 if (!crtl->tail_call_emit)
20991 return false;
20992 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
20993 if (e->flags & EDGE_SIBCALL)
20994 {
20995 rtx_insn *call = BB_END (e->src);
20996 if (!CALL_P (call))
20997 call = prev_nonnote_nondebug_insn (call);
20998 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
20999 if (find_regno_fusage (call, USE, 3)
21000 || is_indirect_tailcall_p (call))
21001 return true;
21002 }
21003 return false;
21004 }
21005
21006
21007 /* Compute the distance from register FROM to register TO.
21008 These can be the arg pointer (26), the soft frame pointer (25),
21009 the stack pointer (13) or the hard frame pointer (11).
21010 In thumb mode r7 is used as the soft frame pointer, if needed.
21011 Typical stack layout looks like this:
21012
21013 old stack pointer -> | |
21014 ----
21015 | | \
21016 | | saved arguments for
21017 | | vararg functions
21018 | | /
21019 --
21020 hard FP & arg pointer -> | | \
21021 | | stack
21022 | | frame
21023 | | /
21024 --
21025 | | \
21026 | | call saved
21027 | | registers
21028 soft frame pointer -> | | /
21029 --
21030 | | \
21031 | | local
21032 | | variables
21033 locals base pointer -> | | /
21034 --
21035 | | \
21036 | | outgoing
21037 | | arguments
21038 current stack pointer -> | | /
21039 --
21040
21041 For a given function some or all of these stack components
21042 may not be needed, giving rise to the possibility of
21043 eliminating some of the registers.
21044
21045 The values returned by this function must reflect the behavior
21046 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21047
21048 The sign of the number returned reflects the direction of stack
21049 growth, so the values are positive for all eliminations except
21050 from the soft frame pointer to the hard frame pointer.
21051
21052 SFP may point just inside the local variables block to ensure correct
21053 alignment. */
21054
21055
21056 /* Return cached stack offsets. */
21057
21058 static arm_stack_offsets *
21059 arm_get_frame_offsets (void)
21060 {
21061 struct arm_stack_offsets *offsets;
21062
21063 offsets = &cfun->machine->stack_offsets;
21064
21065 return offsets;
21066 }
21067
21068
21069 /* Calculate stack offsets. These are used to calculate register elimination
21070 offsets and in prologue/epilogue code. Also calculates which registers
21071 should be saved. */
21072
21073 static void
21074 arm_compute_frame_layout (void)
21075 {
21076 struct arm_stack_offsets *offsets;
21077 unsigned long func_type;
21078 int saved;
21079 int core_saved;
21080 HOST_WIDE_INT frame_size;
21081 int i;
21082
21083 offsets = &cfun->machine->stack_offsets;
21084
21085 /* Initially this is the size of the local variables. It will translated
21086 into an offset once we have determined the size of preceding data. */
21087 frame_size = ROUND_UP_WORD (get_frame_size ());
21088
21089 /* Space for variadic functions. */
21090 offsets->saved_args = crtl->args.pretend_args_size;
21091
21092 /* In Thumb mode this is incorrect, but never used. */
21093 offsets->frame
21094 = (offsets->saved_args
21095 + arm_compute_static_chain_stack_bytes ()
21096 + (frame_pointer_needed ? 4 : 0));
21097
21098 if (TARGET_32BIT)
21099 {
21100 unsigned int regno;
21101
21102 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21103 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21104 saved = core_saved;
21105
21106 /* We know that SP will be doubleword aligned on entry, and we must
21107 preserve that condition at any subroutine call. We also require the
21108 soft frame pointer to be doubleword aligned. */
21109
21110 if (TARGET_REALLY_IWMMXT)
21111 {
21112 /* Check for the call-saved iWMMXt registers. */
21113 for (regno = FIRST_IWMMXT_REGNUM;
21114 regno <= LAST_IWMMXT_REGNUM;
21115 regno++)
21116 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21117 saved += 8;
21118 }
21119
21120 func_type = arm_current_func_type ();
21121 /* Space for saved VFP registers. */
21122 if (! IS_VOLATILE (func_type)
21123 && TARGET_HARD_FLOAT)
21124 saved += arm_get_vfp_saved_size ();
21125 }
21126 else /* TARGET_THUMB1 */
21127 {
21128 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21129 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21130 saved = core_saved;
21131 if (TARGET_BACKTRACE)
21132 saved += 16;
21133 }
21134
21135 /* Saved registers include the stack frame. */
21136 offsets->saved_regs
21137 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21138 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21139
21140 /* A leaf function does not need any stack alignment if it has nothing
21141 on the stack. */
21142 if (crtl->is_leaf && frame_size == 0
21143 /* However if it calls alloca(), we have a dynamically allocated
21144 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21145 && ! cfun->calls_alloca)
21146 {
21147 offsets->outgoing_args = offsets->soft_frame;
21148 offsets->locals_base = offsets->soft_frame;
21149 return;
21150 }
21151
21152 /* Ensure SFP has the correct alignment. */
21153 if (ARM_DOUBLEWORD_ALIGN
21154 && (offsets->soft_frame & 7))
21155 {
21156 offsets->soft_frame += 4;
21157 /* Try to align stack by pushing an extra reg. Don't bother doing this
21158 when there is a stack frame as the alignment will be rolled into
21159 the normal stack adjustment. */
21160 if (frame_size + crtl->outgoing_args_size == 0)
21161 {
21162 int reg = -1;
21163
21164 /* Register r3 is caller-saved. Normally it does not need to be
21165 saved on entry by the prologue. However if we choose to save
21166 it for padding then we may confuse the compiler into thinking
21167 a prologue sequence is required when in fact it is not. This
21168 will occur when shrink-wrapping if r3 is used as a scratch
21169 register and there are no other callee-saved writes.
21170
21171 This situation can be avoided when other callee-saved registers
21172 are available and r3 is not mandatory if we choose a callee-saved
21173 register for padding. */
21174 bool prefer_callee_reg_p = false;
21175
21176 /* If it is safe to use r3, then do so. This sometimes
21177 generates better code on Thumb-2 by avoiding the need to
21178 use 32-bit push/pop instructions. */
21179 if (! any_sibcall_could_use_r3 ()
21180 && arm_size_return_regs () <= 12
21181 && (offsets->saved_regs_mask & (1 << 3)) == 0
21182 && (TARGET_THUMB2
21183 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21184 {
21185 reg = 3;
21186 if (!TARGET_THUMB2)
21187 prefer_callee_reg_p = true;
21188 }
21189 if (reg == -1
21190 || prefer_callee_reg_p)
21191 {
21192 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21193 {
21194 /* Avoid fixed registers; they may be changed at
21195 arbitrary times so it's unsafe to restore them
21196 during the epilogue. */
21197 if (!fixed_regs[i]
21198 && (offsets->saved_regs_mask & (1 << i)) == 0)
21199 {
21200 reg = i;
21201 break;
21202 }
21203 }
21204 }
21205
21206 if (reg != -1)
21207 {
21208 offsets->saved_regs += 4;
21209 offsets->saved_regs_mask |= (1 << reg);
21210 }
21211 }
21212 }
21213
21214 offsets->locals_base = offsets->soft_frame + frame_size;
21215 offsets->outgoing_args = (offsets->locals_base
21216 + crtl->outgoing_args_size);
21217
21218 if (ARM_DOUBLEWORD_ALIGN)
21219 {
21220 /* Ensure SP remains doubleword aligned. */
21221 if (offsets->outgoing_args & 7)
21222 offsets->outgoing_args += 4;
21223 gcc_assert (!(offsets->outgoing_args & 7));
21224 }
21225 }
21226
21227
21228 /* Calculate the relative offsets for the different stack pointers. Positive
21229 offsets are in the direction of stack growth. */
21230
21231 HOST_WIDE_INT
21232 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21233 {
21234 arm_stack_offsets *offsets;
21235
21236 offsets = arm_get_frame_offsets ();
21237
21238 /* OK, now we have enough information to compute the distances.
21239 There must be an entry in these switch tables for each pair
21240 of registers in ELIMINABLE_REGS, even if some of the entries
21241 seem to be redundant or useless. */
21242 switch (from)
21243 {
21244 case ARG_POINTER_REGNUM:
21245 switch (to)
21246 {
21247 case THUMB_HARD_FRAME_POINTER_REGNUM:
21248 return 0;
21249
21250 case FRAME_POINTER_REGNUM:
21251 /* This is the reverse of the soft frame pointer
21252 to hard frame pointer elimination below. */
21253 return offsets->soft_frame - offsets->saved_args;
21254
21255 case ARM_HARD_FRAME_POINTER_REGNUM:
21256 /* This is only non-zero in the case where the static chain register
21257 is stored above the frame. */
21258 return offsets->frame - offsets->saved_args - 4;
21259
21260 case STACK_POINTER_REGNUM:
21261 /* If nothing has been pushed on the stack at all
21262 then this will return -4. This *is* correct! */
21263 return offsets->outgoing_args - (offsets->saved_args + 4);
21264
21265 default:
21266 gcc_unreachable ();
21267 }
21268 gcc_unreachable ();
21269
21270 case FRAME_POINTER_REGNUM:
21271 switch (to)
21272 {
21273 case THUMB_HARD_FRAME_POINTER_REGNUM:
21274 return 0;
21275
21276 case ARM_HARD_FRAME_POINTER_REGNUM:
21277 /* The hard frame pointer points to the top entry in the
21278 stack frame. The soft frame pointer to the bottom entry
21279 in the stack frame. If there is no stack frame at all,
21280 then they are identical. */
21281
21282 return offsets->frame - offsets->soft_frame;
21283
21284 case STACK_POINTER_REGNUM:
21285 return offsets->outgoing_args - offsets->soft_frame;
21286
21287 default:
21288 gcc_unreachable ();
21289 }
21290 gcc_unreachable ();
21291
21292 default:
21293 /* You cannot eliminate from the stack pointer.
21294 In theory you could eliminate from the hard frame
21295 pointer to the stack pointer, but this will never
21296 happen, since if a stack frame is not needed the
21297 hard frame pointer will never be used. */
21298 gcc_unreachable ();
21299 }
21300 }
21301
21302 /* Given FROM and TO register numbers, say whether this elimination is
21303 allowed. Frame pointer elimination is automatically handled.
21304
21305 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21306 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21307 pointer, we must eliminate FRAME_POINTER_REGNUM into
21308 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21309 ARG_POINTER_REGNUM. */
21310
21311 bool
21312 arm_can_eliminate (const int from, const int to)
21313 {
21314 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21315 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21316 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21317 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21318 true);
21319 }
21320
21321 /* Emit RTL to save coprocessor registers on function entry. Returns the
21322 number of bytes pushed. */
21323
21324 static int
21325 arm_save_coproc_regs(void)
21326 {
21327 int saved_size = 0;
21328 unsigned reg;
21329 unsigned start_reg;
21330 rtx insn;
21331
21332 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21333 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21334 {
21335 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21336 insn = gen_rtx_MEM (V2SImode, insn);
21337 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21338 RTX_FRAME_RELATED_P (insn) = 1;
21339 saved_size += 8;
21340 }
21341
21342 if (TARGET_HARD_FLOAT)
21343 {
21344 start_reg = FIRST_VFP_REGNUM;
21345
21346 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21347 {
21348 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21349 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21350 {
21351 if (start_reg != reg)
21352 saved_size += vfp_emit_fstmd (start_reg,
21353 (reg - start_reg) / 2);
21354 start_reg = reg + 2;
21355 }
21356 }
21357 if (start_reg != reg)
21358 saved_size += vfp_emit_fstmd (start_reg,
21359 (reg - start_reg) / 2);
21360 }
21361 return saved_size;
21362 }
21363
21364
21365 /* Set the Thumb frame pointer from the stack pointer. */
21366
21367 static void
21368 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21369 {
21370 HOST_WIDE_INT amount;
21371 rtx insn, dwarf;
21372
21373 amount = offsets->outgoing_args - offsets->locals_base;
21374 if (amount < 1024)
21375 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21376 stack_pointer_rtx, GEN_INT (amount)));
21377 else
21378 {
21379 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21380 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21381 expects the first two operands to be the same. */
21382 if (TARGET_THUMB2)
21383 {
21384 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21385 stack_pointer_rtx,
21386 hard_frame_pointer_rtx));
21387 }
21388 else
21389 {
21390 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21391 hard_frame_pointer_rtx,
21392 stack_pointer_rtx));
21393 }
21394 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21395 plus_constant (Pmode, stack_pointer_rtx, amount));
21396 RTX_FRAME_RELATED_P (dwarf) = 1;
21397 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21398 }
21399
21400 RTX_FRAME_RELATED_P (insn) = 1;
21401 }
21402
21403 struct scratch_reg {
21404 rtx reg;
21405 bool saved;
21406 };
21407
21408 /* Return a short-lived scratch register for use as a 2nd scratch register on
21409 function entry after the registers are saved in the prologue. This register
21410 must be released by means of release_scratch_register_on_entry. IP is not
21411 considered since it is always used as the 1st scratch register if available.
21412
21413 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21414 mask of live registers. */
21415
21416 static void
21417 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21418 unsigned long live_regs)
21419 {
21420 int regno = -1;
21421
21422 sr->saved = false;
21423
21424 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21425 regno = LR_REGNUM;
21426 else
21427 {
21428 unsigned int i;
21429
21430 for (i = 4; i < 11; i++)
21431 if (regno1 != i && (live_regs & (1 << i)) != 0)
21432 {
21433 regno = i;
21434 break;
21435 }
21436
21437 if (regno < 0)
21438 {
21439 /* If IP is used as the 1st scratch register for a nested function,
21440 then either r3 wasn't available or is used to preserve IP. */
21441 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21442 regno1 = 3;
21443 regno = (regno1 == 3 ? 2 : 3);
21444 sr->saved
21445 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21446 regno);
21447 }
21448 }
21449
21450 sr->reg = gen_rtx_REG (SImode, regno);
21451 if (sr->saved)
21452 {
21453 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21454 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21455 rtx x = gen_rtx_SET (stack_pointer_rtx,
21456 plus_constant (Pmode, stack_pointer_rtx, -4));
21457 RTX_FRAME_RELATED_P (insn) = 1;
21458 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21459 }
21460 }
21461
21462 /* Release a scratch register obtained from the preceding function. */
21463
21464 static void
21465 release_scratch_register_on_entry (struct scratch_reg *sr)
21466 {
21467 if (sr->saved)
21468 {
21469 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21470 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21471 rtx x = gen_rtx_SET (stack_pointer_rtx,
21472 plus_constant (Pmode, stack_pointer_rtx, 4));
21473 RTX_FRAME_RELATED_P (insn) = 1;
21474 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21475 }
21476 }
21477
21478 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21479
21480 #if PROBE_INTERVAL > 4096
21481 #error Cannot use indexed addressing mode for stack probing
21482 #endif
21483
21484 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21485 inclusive. These are offsets from the current stack pointer. REGNO1
21486 is the index number of the 1st scratch register and LIVE_REGS is the
21487 mask of live registers. */
21488
21489 static void
21490 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21491 unsigned int regno1, unsigned long live_regs)
21492 {
21493 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21494
21495 /* See if we have a constant small number of probes to generate. If so,
21496 that's the easy case. */
21497 if (size <= PROBE_INTERVAL)
21498 {
21499 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21500 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21501 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21502 }
21503
21504 /* The run-time loop is made up of 10 insns in the generic case while the
21505 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21506 else if (size <= 5 * PROBE_INTERVAL)
21507 {
21508 HOST_WIDE_INT i, rem;
21509
21510 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21511 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21512 emit_stack_probe (reg1);
21513
21514 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21515 it exceeds SIZE. If only two probes are needed, this will not
21516 generate any code. Then probe at FIRST + SIZE. */
21517 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21518 {
21519 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21520 emit_stack_probe (reg1);
21521 }
21522
21523 rem = size - (i - PROBE_INTERVAL);
21524 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21525 {
21526 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21527 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21528 }
21529 else
21530 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21531 }
21532
21533 /* Otherwise, do the same as above, but in a loop. Note that we must be
21534 extra careful with variables wrapping around because we might be at
21535 the very top (or the very bottom) of the address space and we have
21536 to be able to handle this case properly; in particular, we use an
21537 equality test for the loop condition. */
21538 else
21539 {
21540 HOST_WIDE_INT rounded_size;
21541 struct scratch_reg sr;
21542
21543 get_scratch_register_on_entry (&sr, regno1, live_regs);
21544
21545 emit_move_insn (reg1, GEN_INT (first));
21546
21547
21548 /* Step 1: round SIZE to the previous multiple of the interval. */
21549
21550 rounded_size = size & -PROBE_INTERVAL;
21551 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21552
21553
21554 /* Step 2: compute initial and final value of the loop counter. */
21555
21556 /* TEST_ADDR = SP + FIRST. */
21557 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21558
21559 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21560 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21561
21562
21563 /* Step 3: the loop
21564
21565 do
21566 {
21567 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21568 probe at TEST_ADDR
21569 }
21570 while (TEST_ADDR != LAST_ADDR)
21571
21572 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21573 until it is equal to ROUNDED_SIZE. */
21574
21575 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21576
21577
21578 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21579 that SIZE is equal to ROUNDED_SIZE. */
21580
21581 if (size != rounded_size)
21582 {
21583 HOST_WIDE_INT rem = size - rounded_size;
21584
21585 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21586 {
21587 emit_set_insn (sr.reg,
21588 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21589 emit_stack_probe (plus_constant (Pmode, sr.reg,
21590 PROBE_INTERVAL - rem));
21591 }
21592 else
21593 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21594 }
21595
21596 release_scratch_register_on_entry (&sr);
21597 }
21598
21599 /* Make sure nothing is scheduled before we are done. */
21600 emit_insn (gen_blockage ());
21601 }
21602
21603 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21604 absolute addresses. */
21605
21606 const char *
21607 output_probe_stack_range (rtx reg1, rtx reg2)
21608 {
21609 static int labelno = 0;
21610 char loop_lab[32];
21611 rtx xops[2];
21612
21613 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21614
21615 /* Loop. */
21616 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21617
21618 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21619 xops[0] = reg1;
21620 xops[1] = GEN_INT (PROBE_INTERVAL);
21621 output_asm_insn ("sub\t%0, %0, %1", xops);
21622
21623 /* Probe at TEST_ADDR. */
21624 output_asm_insn ("str\tr0, [%0, #0]", xops);
21625
21626 /* Test if TEST_ADDR == LAST_ADDR. */
21627 xops[1] = reg2;
21628 output_asm_insn ("cmp\t%0, %1", xops);
21629
21630 /* Branch. */
21631 fputs ("\tbne\t", asm_out_file);
21632 assemble_name_raw (asm_out_file, loop_lab);
21633 fputc ('\n', asm_out_file);
21634
21635 return "";
21636 }
21637
21638 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21639 function. */
21640 void
21641 arm_expand_prologue (void)
21642 {
21643 rtx amount;
21644 rtx insn;
21645 rtx ip_rtx;
21646 unsigned long live_regs_mask;
21647 unsigned long func_type;
21648 int fp_offset = 0;
21649 int saved_pretend_args = 0;
21650 int saved_regs = 0;
21651 unsigned HOST_WIDE_INT args_to_push;
21652 HOST_WIDE_INT size;
21653 arm_stack_offsets *offsets;
21654 bool clobber_ip;
21655
21656 func_type = arm_current_func_type ();
21657
21658 /* Naked functions don't have prologues. */
21659 if (IS_NAKED (func_type))
21660 {
21661 if (flag_stack_usage_info)
21662 current_function_static_stack_size = 0;
21663 return;
21664 }
21665
21666 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21667 args_to_push = crtl->args.pretend_args_size;
21668
21669 /* Compute which register we will have to save onto the stack. */
21670 offsets = arm_get_frame_offsets ();
21671 live_regs_mask = offsets->saved_regs_mask;
21672
21673 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21674
21675 if (IS_STACKALIGN (func_type))
21676 {
21677 rtx r0, r1;
21678
21679 /* Handle a word-aligned stack pointer. We generate the following:
21680
21681 mov r0, sp
21682 bic r1, r0, #7
21683 mov sp, r1
21684 <save and restore r0 in normal prologue/epilogue>
21685 mov sp, r0
21686 bx lr
21687
21688 The unwinder doesn't need to know about the stack realignment.
21689 Just tell it we saved SP in r0. */
21690 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21691
21692 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21693 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21694
21695 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21696 RTX_FRAME_RELATED_P (insn) = 1;
21697 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21698
21699 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21700
21701 /* ??? The CFA changes here, which may cause GDB to conclude that it
21702 has entered a different function. That said, the unwind info is
21703 correct, individually, before and after this instruction because
21704 we've described the save of SP, which will override the default
21705 handling of SP as restoring from the CFA. */
21706 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21707 }
21708
21709 /* Let's compute the static_chain_stack_bytes required and store it. Right
21710 now the value must be -1 as stored by arm_init_machine_status (). */
21711 cfun->machine->static_chain_stack_bytes
21712 = arm_compute_static_chain_stack_bytes ();
21713
21714 /* The static chain register is the same as the IP register. If it is
21715 clobbered when creating the frame, we need to save and restore it. */
21716 clobber_ip = IS_NESTED (func_type)
21717 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21718 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21719 || flag_stack_clash_protection)
21720 && !df_regs_ever_live_p (LR_REGNUM)
21721 && arm_r3_live_at_start_p ()));
21722
21723 /* Find somewhere to store IP whilst the frame is being created.
21724 We try the following places in order:
21725
21726 1. The last argument register r3 if it is available.
21727 2. A slot on the stack above the frame if there are no
21728 arguments to push onto the stack.
21729 3. Register r3 again, after pushing the argument registers
21730 onto the stack, if this is a varargs function.
21731 4. The last slot on the stack created for the arguments to
21732 push, if this isn't a varargs function.
21733
21734 Note - we only need to tell the dwarf2 backend about the SP
21735 adjustment in the second variant; the static chain register
21736 doesn't need to be unwound, as it doesn't contain a value
21737 inherited from the caller. */
21738 if (clobber_ip)
21739 {
21740 if (!arm_r3_live_at_start_p ())
21741 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21742 else if (args_to_push == 0)
21743 {
21744 rtx addr, dwarf;
21745
21746 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21747 saved_regs += 4;
21748
21749 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21750 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21751 fp_offset = 4;
21752
21753 /* Just tell the dwarf backend that we adjusted SP. */
21754 dwarf = gen_rtx_SET (stack_pointer_rtx,
21755 plus_constant (Pmode, stack_pointer_rtx,
21756 -fp_offset));
21757 RTX_FRAME_RELATED_P (insn) = 1;
21758 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21759 }
21760 else
21761 {
21762 /* Store the args on the stack. */
21763 if (cfun->machine->uses_anonymous_args)
21764 {
21765 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21766 (0xf0 >> (args_to_push / 4)) & 0xf);
21767 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21768 saved_pretend_args = 1;
21769 }
21770 else
21771 {
21772 rtx addr, dwarf;
21773
21774 if (args_to_push == 4)
21775 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21776 else
21777 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21778 plus_constant (Pmode,
21779 stack_pointer_rtx,
21780 -args_to_push));
21781
21782 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21783
21784 /* Just tell the dwarf backend that we adjusted SP. */
21785 dwarf = gen_rtx_SET (stack_pointer_rtx,
21786 plus_constant (Pmode, stack_pointer_rtx,
21787 -args_to_push));
21788 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21789 }
21790
21791 RTX_FRAME_RELATED_P (insn) = 1;
21792 fp_offset = args_to_push;
21793 args_to_push = 0;
21794 }
21795 }
21796
21797 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21798 {
21799 if (IS_INTERRUPT (func_type))
21800 {
21801 /* Interrupt functions must not corrupt any registers.
21802 Creating a frame pointer however, corrupts the IP
21803 register, so we must push it first. */
21804 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21805
21806 /* Do not set RTX_FRAME_RELATED_P on this insn.
21807 The dwarf stack unwinding code only wants to see one
21808 stack decrement per function, and this is not it. If
21809 this instruction is labeled as being part of the frame
21810 creation sequence then dwarf2out_frame_debug_expr will
21811 die when it encounters the assignment of IP to FP
21812 later on, since the use of SP here establishes SP as
21813 the CFA register and not IP.
21814
21815 Anyway this instruction is not really part of the stack
21816 frame creation although it is part of the prologue. */
21817 }
21818
21819 insn = emit_set_insn (ip_rtx,
21820 plus_constant (Pmode, stack_pointer_rtx,
21821 fp_offset));
21822 RTX_FRAME_RELATED_P (insn) = 1;
21823 }
21824
21825 if (args_to_push)
21826 {
21827 /* Push the argument registers, or reserve space for them. */
21828 if (cfun->machine->uses_anonymous_args)
21829 insn = emit_multi_reg_push
21830 ((0xf0 >> (args_to_push / 4)) & 0xf,
21831 (0xf0 >> (args_to_push / 4)) & 0xf);
21832 else
21833 insn = emit_insn
21834 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21835 GEN_INT (- args_to_push)));
21836 RTX_FRAME_RELATED_P (insn) = 1;
21837 }
21838
21839 /* If this is an interrupt service routine, and the link register
21840 is going to be pushed, and we're not generating extra
21841 push of IP (needed when frame is needed and frame layout if apcs),
21842 subtracting four from LR now will mean that the function return
21843 can be done with a single instruction. */
21844 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21845 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21846 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21847 && TARGET_ARM)
21848 {
21849 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21850
21851 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21852 }
21853
21854 if (live_regs_mask)
21855 {
21856 unsigned long dwarf_regs_mask = live_regs_mask;
21857
21858 saved_regs += bit_count (live_regs_mask) * 4;
21859 if (optimize_size && !frame_pointer_needed
21860 && saved_regs == offsets->saved_regs - offsets->saved_args)
21861 {
21862 /* If no coprocessor registers are being pushed and we don't have
21863 to worry about a frame pointer then push extra registers to
21864 create the stack frame. This is done in a way that does not
21865 alter the frame layout, so is independent of the epilogue. */
21866 int n;
21867 int frame;
21868 n = 0;
21869 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21870 n++;
21871 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21872 if (frame && n * 4 >= frame)
21873 {
21874 n = frame / 4;
21875 live_regs_mask |= (1 << n) - 1;
21876 saved_regs += frame;
21877 }
21878 }
21879
21880 if (TARGET_LDRD
21881 && current_tune->prefer_ldrd_strd
21882 && !optimize_function_for_size_p (cfun))
21883 {
21884 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21885 if (TARGET_THUMB2)
21886 thumb2_emit_strd_push (live_regs_mask);
21887 else if (TARGET_ARM
21888 && !TARGET_APCS_FRAME
21889 && !IS_INTERRUPT (func_type))
21890 arm_emit_strd_push (live_regs_mask);
21891 else
21892 {
21893 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21894 RTX_FRAME_RELATED_P (insn) = 1;
21895 }
21896 }
21897 else
21898 {
21899 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21900 RTX_FRAME_RELATED_P (insn) = 1;
21901 }
21902 }
21903
21904 if (! IS_VOLATILE (func_type))
21905 saved_regs += arm_save_coproc_regs ();
21906
21907 if (frame_pointer_needed && TARGET_ARM)
21908 {
21909 /* Create the new frame pointer. */
21910 if (TARGET_APCS_FRAME)
21911 {
21912 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21913 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21914 RTX_FRAME_RELATED_P (insn) = 1;
21915 }
21916 else
21917 {
21918 insn = GEN_INT (saved_regs - (4 + fp_offset));
21919 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21920 stack_pointer_rtx, insn));
21921 RTX_FRAME_RELATED_P (insn) = 1;
21922 }
21923 }
21924
21925 size = offsets->outgoing_args - offsets->saved_args;
21926 if (flag_stack_usage_info)
21927 current_function_static_stack_size = size;
21928
21929 /* If this isn't an interrupt service routine and we have a frame, then do
21930 stack checking. We use IP as the first scratch register, except for the
21931 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21932 if (!IS_INTERRUPT (func_type)
21933 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21934 || flag_stack_clash_protection))
21935 {
21936 unsigned int regno;
21937
21938 if (!IS_NESTED (func_type) || clobber_ip)
21939 regno = IP_REGNUM;
21940 else if (df_regs_ever_live_p (LR_REGNUM))
21941 regno = LR_REGNUM;
21942 else
21943 regno = 3;
21944
21945 if (crtl->is_leaf && !cfun->calls_alloca)
21946 {
21947 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21948 arm_emit_probe_stack_range (get_stack_check_protect (),
21949 size - get_stack_check_protect (),
21950 regno, live_regs_mask);
21951 }
21952 else if (size > 0)
21953 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21954 regno, live_regs_mask);
21955 }
21956
21957 /* Recover the static chain register. */
21958 if (clobber_ip)
21959 {
21960 if (!arm_r3_live_at_start_p () || saved_pretend_args)
21961 insn = gen_rtx_REG (SImode, 3);
21962 else
21963 {
21964 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
21965 insn = gen_frame_mem (SImode, insn);
21966 }
21967 emit_set_insn (ip_rtx, insn);
21968 emit_insn (gen_force_register_use (ip_rtx));
21969 }
21970
21971 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
21972 {
21973 /* This add can produce multiple insns for a large constant, so we
21974 need to get tricky. */
21975 rtx_insn *last = get_last_insn ();
21976
21977 amount = GEN_INT (offsets->saved_args + saved_regs
21978 - offsets->outgoing_args);
21979
21980 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21981 amount));
21982 do
21983 {
21984 last = last ? NEXT_INSN (last) : get_insns ();
21985 RTX_FRAME_RELATED_P (last) = 1;
21986 }
21987 while (last != insn);
21988
21989 /* If the frame pointer is needed, emit a special barrier that
21990 will prevent the scheduler from moving stores to the frame
21991 before the stack adjustment. */
21992 if (frame_pointer_needed)
21993 emit_insn (gen_stack_tie (stack_pointer_rtx,
21994 hard_frame_pointer_rtx));
21995 }
21996
21997
21998 if (frame_pointer_needed && TARGET_THUMB2)
21999 thumb_set_frame_pointer (offsets);
22000
22001 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22002 {
22003 unsigned long mask;
22004
22005 mask = live_regs_mask;
22006 mask &= THUMB2_WORK_REGS;
22007 if (!IS_NESTED (func_type))
22008 mask |= (1 << IP_REGNUM);
22009 arm_load_pic_register (mask);
22010 }
22011
22012 /* If we are profiling, make sure no instructions are scheduled before
22013 the call to mcount. Similarly if the user has requested no
22014 scheduling in the prolog. Similarly if we want non-call exceptions
22015 using the EABI unwinder, to prevent faulting instructions from being
22016 swapped with a stack adjustment. */
22017 if (crtl->profile || !TARGET_SCHED_PROLOG
22018 || (arm_except_unwind_info (&global_options) == UI_TARGET
22019 && cfun->can_throw_non_call_exceptions))
22020 emit_insn (gen_blockage ());
22021
22022 /* If the link register is being kept alive, with the return address in it,
22023 then make sure that it does not get reused by the ce2 pass. */
22024 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22025 cfun->machine->lr_save_eliminated = 1;
22026 }
22027 \f
22028 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22029 static void
22030 arm_print_condition (FILE *stream)
22031 {
22032 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22033 {
22034 /* Branch conversion is not implemented for Thumb-2. */
22035 if (TARGET_THUMB)
22036 {
22037 output_operand_lossage ("predicated Thumb instruction");
22038 return;
22039 }
22040 if (current_insn_predicate != NULL)
22041 {
22042 output_operand_lossage
22043 ("predicated instruction in conditional sequence");
22044 return;
22045 }
22046
22047 fputs (arm_condition_codes[arm_current_cc], stream);
22048 }
22049 else if (current_insn_predicate)
22050 {
22051 enum arm_cond_code code;
22052
22053 if (TARGET_THUMB1)
22054 {
22055 output_operand_lossage ("predicated Thumb instruction");
22056 return;
22057 }
22058
22059 code = get_arm_condition_code (current_insn_predicate);
22060 fputs (arm_condition_codes[code], stream);
22061 }
22062 }
22063
22064
22065 /* Globally reserved letters: acln
22066 Puncutation letters currently used: @_|?().!#
22067 Lower case letters currently used: bcdefhimpqtvwxyz
22068 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22069 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22070
22071 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22072
22073 If CODE is 'd', then the X is a condition operand and the instruction
22074 should only be executed if the condition is true.
22075 if CODE is 'D', then the X is a condition operand and the instruction
22076 should only be executed if the condition is false: however, if the mode
22077 of the comparison is CCFPEmode, then always execute the instruction -- we
22078 do this because in these circumstances !GE does not necessarily imply LT;
22079 in these cases the instruction pattern will take care to make sure that
22080 an instruction containing %d will follow, thereby undoing the effects of
22081 doing this instruction unconditionally.
22082 If CODE is 'N' then X is a floating point operand that must be negated
22083 before output.
22084 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22085 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22086 static void
22087 arm_print_operand (FILE *stream, rtx x, int code)
22088 {
22089 switch (code)
22090 {
22091 case '@':
22092 fputs (ASM_COMMENT_START, stream);
22093 return;
22094
22095 case '_':
22096 fputs (user_label_prefix, stream);
22097 return;
22098
22099 case '|':
22100 fputs (REGISTER_PREFIX, stream);
22101 return;
22102
22103 case '?':
22104 arm_print_condition (stream);
22105 return;
22106
22107 case '.':
22108 /* The current condition code for a condition code setting instruction.
22109 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22110 fputc('s', stream);
22111 arm_print_condition (stream);
22112 return;
22113
22114 case '!':
22115 /* If the instruction is conditionally executed then print
22116 the current condition code, otherwise print 's'. */
22117 gcc_assert (TARGET_THUMB2);
22118 if (current_insn_predicate)
22119 arm_print_condition (stream);
22120 else
22121 fputc('s', stream);
22122 break;
22123
22124 /* %# is a "break" sequence. It doesn't output anything, but is used to
22125 separate e.g. operand numbers from following text, if that text consists
22126 of further digits which we don't want to be part of the operand
22127 number. */
22128 case '#':
22129 return;
22130
22131 case 'N':
22132 {
22133 REAL_VALUE_TYPE r;
22134 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22135 fprintf (stream, "%s", fp_const_from_val (&r));
22136 }
22137 return;
22138
22139 /* An integer or symbol address without a preceding # sign. */
22140 case 'c':
22141 switch (GET_CODE (x))
22142 {
22143 case CONST_INT:
22144 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22145 break;
22146
22147 case SYMBOL_REF:
22148 output_addr_const (stream, x);
22149 break;
22150
22151 case CONST:
22152 if (GET_CODE (XEXP (x, 0)) == PLUS
22153 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22154 {
22155 output_addr_const (stream, x);
22156 break;
22157 }
22158 /* Fall through. */
22159
22160 default:
22161 output_operand_lossage ("Unsupported operand for code '%c'", code);
22162 }
22163 return;
22164
22165 /* An integer that we want to print in HEX. */
22166 case 'x':
22167 switch (GET_CODE (x))
22168 {
22169 case CONST_INT:
22170 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22171 break;
22172
22173 default:
22174 output_operand_lossage ("Unsupported operand for code '%c'", code);
22175 }
22176 return;
22177
22178 case 'B':
22179 if (CONST_INT_P (x))
22180 {
22181 HOST_WIDE_INT val;
22182 val = ARM_SIGN_EXTEND (~INTVAL (x));
22183 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22184 }
22185 else
22186 {
22187 putc ('~', stream);
22188 output_addr_const (stream, x);
22189 }
22190 return;
22191
22192 case 'b':
22193 /* Print the log2 of a CONST_INT. */
22194 {
22195 HOST_WIDE_INT val;
22196
22197 if (!CONST_INT_P (x)
22198 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22199 output_operand_lossage ("Unsupported operand for code '%c'", code);
22200 else
22201 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22202 }
22203 return;
22204
22205 case 'L':
22206 /* The low 16 bits of an immediate constant. */
22207 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22208 return;
22209
22210 case 'i':
22211 fprintf (stream, "%s", arithmetic_instr (x, 1));
22212 return;
22213
22214 case 'I':
22215 fprintf (stream, "%s", arithmetic_instr (x, 0));
22216 return;
22217
22218 case 'S':
22219 {
22220 HOST_WIDE_INT val;
22221 const char *shift;
22222
22223 shift = shift_op (x, &val);
22224
22225 if (shift)
22226 {
22227 fprintf (stream, ", %s ", shift);
22228 if (val == -1)
22229 arm_print_operand (stream, XEXP (x, 1), 0);
22230 else
22231 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22232 }
22233 }
22234 return;
22235
22236 /* An explanation of the 'Q', 'R' and 'H' register operands:
22237
22238 In a pair of registers containing a DI or DF value the 'Q'
22239 operand returns the register number of the register containing
22240 the least significant part of the value. The 'R' operand returns
22241 the register number of the register containing the most
22242 significant part of the value.
22243
22244 The 'H' operand returns the higher of the two register numbers.
22245 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22246 same as the 'Q' operand, since the most significant part of the
22247 value is held in the lower number register. The reverse is true
22248 on systems where WORDS_BIG_ENDIAN is false.
22249
22250 The purpose of these operands is to distinguish between cases
22251 where the endian-ness of the values is important (for example
22252 when they are added together), and cases where the endian-ness
22253 is irrelevant, but the order of register operations is important.
22254 For example when loading a value from memory into a register
22255 pair, the endian-ness does not matter. Provided that the value
22256 from the lower memory address is put into the lower numbered
22257 register, and the value from the higher address is put into the
22258 higher numbered register, the load will work regardless of whether
22259 the value being loaded is big-wordian or little-wordian. The
22260 order of the two register loads can matter however, if the address
22261 of the memory location is actually held in one of the registers
22262 being overwritten by the load.
22263
22264 The 'Q' and 'R' constraints are also available for 64-bit
22265 constants. */
22266 case 'Q':
22267 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22268 {
22269 rtx part = gen_lowpart (SImode, x);
22270 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22271 return;
22272 }
22273
22274 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22275 {
22276 output_operand_lossage ("invalid operand for code '%c'", code);
22277 return;
22278 }
22279
22280 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22281 return;
22282
22283 case 'R':
22284 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22285 {
22286 machine_mode mode = GET_MODE (x);
22287 rtx part;
22288
22289 if (mode == VOIDmode)
22290 mode = DImode;
22291 part = gen_highpart_mode (SImode, mode, x);
22292 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22293 return;
22294 }
22295
22296 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22297 {
22298 output_operand_lossage ("invalid operand for code '%c'", code);
22299 return;
22300 }
22301
22302 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22303 return;
22304
22305 case 'H':
22306 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22307 {
22308 output_operand_lossage ("invalid operand for code '%c'", code);
22309 return;
22310 }
22311
22312 asm_fprintf (stream, "%r", REGNO (x) + 1);
22313 return;
22314
22315 case 'J':
22316 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22317 {
22318 output_operand_lossage ("invalid operand for code '%c'", code);
22319 return;
22320 }
22321
22322 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22323 return;
22324
22325 case 'K':
22326 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22327 {
22328 output_operand_lossage ("invalid operand for code '%c'", code);
22329 return;
22330 }
22331
22332 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22333 return;
22334
22335 case 'm':
22336 asm_fprintf (stream, "%r",
22337 REG_P (XEXP (x, 0))
22338 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22339 return;
22340
22341 case 'M':
22342 asm_fprintf (stream, "{%r-%r}",
22343 REGNO (x),
22344 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22345 return;
22346
22347 /* Like 'M', but writing doubleword vector registers, for use by Neon
22348 insns. */
22349 case 'h':
22350 {
22351 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22352 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22353 if (numregs == 1)
22354 asm_fprintf (stream, "{d%d}", regno);
22355 else
22356 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22357 }
22358 return;
22359
22360 case 'd':
22361 /* CONST_TRUE_RTX means always -- that's the default. */
22362 if (x == const_true_rtx)
22363 return;
22364
22365 if (!COMPARISON_P (x))
22366 {
22367 output_operand_lossage ("invalid operand for code '%c'", code);
22368 return;
22369 }
22370
22371 fputs (arm_condition_codes[get_arm_condition_code (x)],
22372 stream);
22373 return;
22374
22375 case 'D':
22376 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22377 want to do that. */
22378 if (x == const_true_rtx)
22379 {
22380 output_operand_lossage ("instruction never executed");
22381 return;
22382 }
22383 if (!COMPARISON_P (x))
22384 {
22385 output_operand_lossage ("invalid operand for code '%c'", code);
22386 return;
22387 }
22388
22389 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22390 (get_arm_condition_code (x))],
22391 stream);
22392 return;
22393
22394 case 's':
22395 case 'V':
22396 case 'W':
22397 case 'X':
22398 case 'Y':
22399 case 'Z':
22400 /* Former Maverick support, removed after GCC-4.7. */
22401 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22402 return;
22403
22404 case 'U':
22405 if (!REG_P (x)
22406 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22407 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22408 /* Bad value for wCG register number. */
22409 {
22410 output_operand_lossage ("invalid operand for code '%c'", code);
22411 return;
22412 }
22413
22414 else
22415 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22416 return;
22417
22418 /* Print an iWMMXt control register name. */
22419 case 'w':
22420 if (!CONST_INT_P (x)
22421 || INTVAL (x) < 0
22422 || INTVAL (x) >= 16)
22423 /* Bad value for wC register number. */
22424 {
22425 output_operand_lossage ("invalid operand for code '%c'", code);
22426 return;
22427 }
22428
22429 else
22430 {
22431 static const char * wc_reg_names [16] =
22432 {
22433 "wCID", "wCon", "wCSSF", "wCASF",
22434 "wC4", "wC5", "wC6", "wC7",
22435 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22436 "wC12", "wC13", "wC14", "wC15"
22437 };
22438
22439 fputs (wc_reg_names [INTVAL (x)], stream);
22440 }
22441 return;
22442
22443 /* Print the high single-precision register of a VFP double-precision
22444 register. */
22445 case 'p':
22446 {
22447 machine_mode mode = GET_MODE (x);
22448 int regno;
22449
22450 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22451 {
22452 output_operand_lossage ("invalid operand for code '%c'", code);
22453 return;
22454 }
22455
22456 regno = REGNO (x);
22457 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22458 {
22459 output_operand_lossage ("invalid operand for code '%c'", code);
22460 return;
22461 }
22462
22463 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22464 }
22465 return;
22466
22467 /* Print a VFP/Neon double precision or quad precision register name. */
22468 case 'P':
22469 case 'q':
22470 {
22471 machine_mode mode = GET_MODE (x);
22472 int is_quad = (code == 'q');
22473 int regno;
22474
22475 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22476 {
22477 output_operand_lossage ("invalid operand for code '%c'", code);
22478 return;
22479 }
22480
22481 if (!REG_P (x)
22482 || !IS_VFP_REGNUM (REGNO (x)))
22483 {
22484 output_operand_lossage ("invalid operand for code '%c'", code);
22485 return;
22486 }
22487
22488 regno = REGNO (x);
22489 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22490 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22491 {
22492 output_operand_lossage ("invalid operand for code '%c'", code);
22493 return;
22494 }
22495
22496 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22497 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22498 }
22499 return;
22500
22501 /* These two codes print the low/high doubleword register of a Neon quad
22502 register, respectively. For pair-structure types, can also print
22503 low/high quadword registers. */
22504 case 'e':
22505 case 'f':
22506 {
22507 machine_mode mode = GET_MODE (x);
22508 int regno;
22509
22510 if ((GET_MODE_SIZE (mode) != 16
22511 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22512 {
22513 output_operand_lossage ("invalid operand for code '%c'", code);
22514 return;
22515 }
22516
22517 regno = REGNO (x);
22518 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22519 {
22520 output_operand_lossage ("invalid operand for code '%c'", code);
22521 return;
22522 }
22523
22524 if (GET_MODE_SIZE (mode) == 16)
22525 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22526 + (code == 'f' ? 1 : 0));
22527 else
22528 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22529 + (code == 'f' ? 1 : 0));
22530 }
22531 return;
22532
22533 /* Print a VFPv3 floating-point constant, represented as an integer
22534 index. */
22535 case 'G':
22536 {
22537 int index = vfp3_const_double_index (x);
22538 gcc_assert (index != -1);
22539 fprintf (stream, "%d", index);
22540 }
22541 return;
22542
22543 /* Print bits representing opcode features for Neon.
22544
22545 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22546 and polynomials as unsigned.
22547
22548 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22549
22550 Bit 2 is 1 for rounding functions, 0 otherwise. */
22551
22552 /* Identify the type as 's', 'u', 'p' or 'f'. */
22553 case 'T':
22554 {
22555 HOST_WIDE_INT bits = INTVAL (x);
22556 fputc ("uspf"[bits & 3], stream);
22557 }
22558 return;
22559
22560 /* Likewise, but signed and unsigned integers are both 'i'. */
22561 case 'F':
22562 {
22563 HOST_WIDE_INT bits = INTVAL (x);
22564 fputc ("iipf"[bits & 3], stream);
22565 }
22566 return;
22567
22568 /* As for 'T', but emit 'u' instead of 'p'. */
22569 case 't':
22570 {
22571 HOST_WIDE_INT bits = INTVAL (x);
22572 fputc ("usuf"[bits & 3], stream);
22573 }
22574 return;
22575
22576 /* Bit 2: rounding (vs none). */
22577 case 'O':
22578 {
22579 HOST_WIDE_INT bits = INTVAL (x);
22580 fputs ((bits & 4) != 0 ? "r" : "", stream);
22581 }
22582 return;
22583
22584 /* Memory operand for vld1/vst1 instruction. */
22585 case 'A':
22586 {
22587 rtx addr;
22588 bool postinc = FALSE;
22589 rtx postinc_reg = NULL;
22590 unsigned align, memsize, align_bits;
22591
22592 gcc_assert (MEM_P (x));
22593 addr = XEXP (x, 0);
22594 if (GET_CODE (addr) == POST_INC)
22595 {
22596 postinc = 1;
22597 addr = XEXP (addr, 0);
22598 }
22599 if (GET_CODE (addr) == POST_MODIFY)
22600 {
22601 postinc_reg = XEXP( XEXP (addr, 1), 1);
22602 addr = XEXP (addr, 0);
22603 }
22604 asm_fprintf (stream, "[%r", REGNO (addr));
22605
22606 /* We know the alignment of this access, so we can emit a hint in the
22607 instruction (for some alignments) as an aid to the memory subsystem
22608 of the target. */
22609 align = MEM_ALIGN (x) >> 3;
22610 memsize = MEM_SIZE (x);
22611
22612 /* Only certain alignment specifiers are supported by the hardware. */
22613 if (memsize == 32 && (align % 32) == 0)
22614 align_bits = 256;
22615 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22616 align_bits = 128;
22617 else if (memsize >= 8 && (align % 8) == 0)
22618 align_bits = 64;
22619 else
22620 align_bits = 0;
22621
22622 if (align_bits != 0)
22623 asm_fprintf (stream, ":%d", align_bits);
22624
22625 asm_fprintf (stream, "]");
22626
22627 if (postinc)
22628 fputs("!", stream);
22629 if (postinc_reg)
22630 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22631 }
22632 return;
22633
22634 case 'C':
22635 {
22636 rtx addr;
22637
22638 gcc_assert (MEM_P (x));
22639 addr = XEXP (x, 0);
22640 gcc_assert (REG_P (addr));
22641 asm_fprintf (stream, "[%r]", REGNO (addr));
22642 }
22643 return;
22644
22645 /* Translate an S register number into a D register number and element index. */
22646 case 'y':
22647 {
22648 machine_mode mode = GET_MODE (x);
22649 int regno;
22650
22651 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22652 {
22653 output_operand_lossage ("invalid operand for code '%c'", code);
22654 return;
22655 }
22656
22657 regno = REGNO (x);
22658 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22659 {
22660 output_operand_lossage ("invalid operand for code '%c'", code);
22661 return;
22662 }
22663
22664 regno = regno - FIRST_VFP_REGNUM;
22665 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22666 }
22667 return;
22668
22669 case 'v':
22670 gcc_assert (CONST_DOUBLE_P (x));
22671 int result;
22672 result = vfp3_const_double_for_fract_bits (x);
22673 if (result == 0)
22674 result = vfp3_const_double_for_bits (x);
22675 fprintf (stream, "#%d", result);
22676 return;
22677
22678 /* Register specifier for vld1.16/vst1.16. Translate the S register
22679 number into a D register number and element index. */
22680 case 'z':
22681 {
22682 machine_mode mode = GET_MODE (x);
22683 int regno;
22684
22685 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22686 {
22687 output_operand_lossage ("invalid operand for code '%c'", code);
22688 return;
22689 }
22690
22691 regno = REGNO (x);
22692 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22693 {
22694 output_operand_lossage ("invalid operand for code '%c'", code);
22695 return;
22696 }
22697
22698 regno = regno - FIRST_VFP_REGNUM;
22699 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22700 }
22701 return;
22702
22703 default:
22704 if (x == 0)
22705 {
22706 output_operand_lossage ("missing operand");
22707 return;
22708 }
22709
22710 switch (GET_CODE (x))
22711 {
22712 case REG:
22713 asm_fprintf (stream, "%r", REGNO (x));
22714 break;
22715
22716 case MEM:
22717 output_address (GET_MODE (x), XEXP (x, 0));
22718 break;
22719
22720 case CONST_DOUBLE:
22721 {
22722 char fpstr[20];
22723 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22724 sizeof (fpstr), 0, 1);
22725 fprintf (stream, "#%s", fpstr);
22726 }
22727 break;
22728
22729 default:
22730 gcc_assert (GET_CODE (x) != NEG);
22731 fputc ('#', stream);
22732 if (GET_CODE (x) == HIGH)
22733 {
22734 fputs (":lower16:", stream);
22735 x = XEXP (x, 0);
22736 }
22737
22738 output_addr_const (stream, x);
22739 break;
22740 }
22741 }
22742 }
22743 \f
22744 /* Target hook for printing a memory address. */
22745 static void
22746 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22747 {
22748 if (TARGET_32BIT)
22749 {
22750 int is_minus = GET_CODE (x) == MINUS;
22751
22752 if (REG_P (x))
22753 asm_fprintf (stream, "[%r]", REGNO (x));
22754 else if (GET_CODE (x) == PLUS || is_minus)
22755 {
22756 rtx base = XEXP (x, 0);
22757 rtx index = XEXP (x, 1);
22758 HOST_WIDE_INT offset = 0;
22759 if (!REG_P (base)
22760 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22761 {
22762 /* Ensure that BASE is a register. */
22763 /* (one of them must be). */
22764 /* Also ensure the SP is not used as in index register. */
22765 std::swap (base, index);
22766 }
22767 switch (GET_CODE (index))
22768 {
22769 case CONST_INT:
22770 offset = INTVAL (index);
22771 if (is_minus)
22772 offset = -offset;
22773 asm_fprintf (stream, "[%r, #%wd]",
22774 REGNO (base), offset);
22775 break;
22776
22777 case REG:
22778 asm_fprintf (stream, "[%r, %s%r]",
22779 REGNO (base), is_minus ? "-" : "",
22780 REGNO (index));
22781 break;
22782
22783 case MULT:
22784 case ASHIFTRT:
22785 case LSHIFTRT:
22786 case ASHIFT:
22787 case ROTATERT:
22788 {
22789 asm_fprintf (stream, "[%r, %s%r",
22790 REGNO (base), is_minus ? "-" : "",
22791 REGNO (XEXP (index, 0)));
22792 arm_print_operand (stream, index, 'S');
22793 fputs ("]", stream);
22794 break;
22795 }
22796
22797 default:
22798 gcc_unreachable ();
22799 }
22800 }
22801 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22802 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22803 {
22804 gcc_assert (REG_P (XEXP (x, 0)));
22805
22806 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22807 asm_fprintf (stream, "[%r, #%s%d]!",
22808 REGNO (XEXP (x, 0)),
22809 GET_CODE (x) == PRE_DEC ? "-" : "",
22810 GET_MODE_SIZE (mode));
22811 else
22812 asm_fprintf (stream, "[%r], #%s%d",
22813 REGNO (XEXP (x, 0)),
22814 GET_CODE (x) == POST_DEC ? "-" : "",
22815 GET_MODE_SIZE (mode));
22816 }
22817 else if (GET_CODE (x) == PRE_MODIFY)
22818 {
22819 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22820 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22821 asm_fprintf (stream, "#%wd]!",
22822 INTVAL (XEXP (XEXP (x, 1), 1)));
22823 else
22824 asm_fprintf (stream, "%r]!",
22825 REGNO (XEXP (XEXP (x, 1), 1)));
22826 }
22827 else if (GET_CODE (x) == POST_MODIFY)
22828 {
22829 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22830 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22831 asm_fprintf (stream, "#%wd",
22832 INTVAL (XEXP (XEXP (x, 1), 1)));
22833 else
22834 asm_fprintf (stream, "%r",
22835 REGNO (XEXP (XEXP (x, 1), 1)));
22836 }
22837 else output_addr_const (stream, x);
22838 }
22839 else
22840 {
22841 if (REG_P (x))
22842 asm_fprintf (stream, "[%r]", REGNO (x));
22843 else if (GET_CODE (x) == POST_INC)
22844 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22845 else if (GET_CODE (x) == PLUS)
22846 {
22847 gcc_assert (REG_P (XEXP (x, 0)));
22848 if (CONST_INT_P (XEXP (x, 1)))
22849 asm_fprintf (stream, "[%r, #%wd]",
22850 REGNO (XEXP (x, 0)),
22851 INTVAL (XEXP (x, 1)));
22852 else
22853 asm_fprintf (stream, "[%r, %r]",
22854 REGNO (XEXP (x, 0)),
22855 REGNO (XEXP (x, 1)));
22856 }
22857 else
22858 output_addr_const (stream, x);
22859 }
22860 }
22861 \f
22862 /* Target hook for indicating whether a punctuation character for
22863 TARGET_PRINT_OPERAND is valid. */
22864 static bool
22865 arm_print_operand_punct_valid_p (unsigned char code)
22866 {
22867 return (code == '@' || code == '|' || code == '.'
22868 || code == '(' || code == ')' || code == '#'
22869 || (TARGET_32BIT && (code == '?'))
22870 || (TARGET_THUMB2 && (code == '!'))
22871 || (TARGET_THUMB && (code == '_')));
22872 }
22873 \f
22874 /* Target hook for assembling integer objects. The ARM version needs to
22875 handle word-sized values specially. */
22876 static bool
22877 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22878 {
22879 machine_mode mode;
22880
22881 if (size == UNITS_PER_WORD && aligned_p)
22882 {
22883 fputs ("\t.word\t", asm_out_file);
22884 output_addr_const (asm_out_file, x);
22885
22886 /* Mark symbols as position independent. We only do this in the
22887 .text segment, not in the .data segment. */
22888 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22889 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22890 {
22891 /* See legitimize_pic_address for an explanation of the
22892 TARGET_VXWORKS_RTP check. */
22893 /* References to weak symbols cannot be resolved locally:
22894 they may be overridden by a non-weak definition at link
22895 time. */
22896 if (!arm_pic_data_is_text_relative
22897 || (GET_CODE (x) == SYMBOL_REF
22898 && (!SYMBOL_REF_LOCAL_P (x)
22899 || (SYMBOL_REF_DECL (x)
22900 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22901 fputs ("(GOT)", asm_out_file);
22902 else
22903 fputs ("(GOTOFF)", asm_out_file);
22904 }
22905 fputc ('\n', asm_out_file);
22906 return true;
22907 }
22908
22909 mode = GET_MODE (x);
22910
22911 if (arm_vector_mode_supported_p (mode))
22912 {
22913 int i, units;
22914
22915 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22916
22917 units = CONST_VECTOR_NUNITS (x);
22918 size = GET_MODE_UNIT_SIZE (mode);
22919
22920 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22921 for (i = 0; i < units; i++)
22922 {
22923 rtx elt = CONST_VECTOR_ELT (x, i);
22924 assemble_integer
22925 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22926 }
22927 else
22928 for (i = 0; i < units; i++)
22929 {
22930 rtx elt = CONST_VECTOR_ELT (x, i);
22931 assemble_real
22932 (*CONST_DOUBLE_REAL_VALUE (elt),
22933 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22934 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22935 }
22936
22937 return true;
22938 }
22939
22940 return default_assemble_integer (x, size, aligned_p);
22941 }
22942
22943 static void
22944 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22945 {
22946 section *s;
22947
22948 if (!TARGET_AAPCS_BASED)
22949 {
22950 (is_ctor ?
22951 default_named_section_asm_out_constructor
22952 : default_named_section_asm_out_destructor) (symbol, priority);
22953 return;
22954 }
22955
22956 /* Put these in the .init_array section, using a special relocation. */
22957 if (priority != DEFAULT_INIT_PRIORITY)
22958 {
22959 char buf[18];
22960 sprintf (buf, "%s.%.5u",
22961 is_ctor ? ".init_array" : ".fini_array",
22962 priority);
22963 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
22964 }
22965 else if (is_ctor)
22966 s = ctors_section;
22967 else
22968 s = dtors_section;
22969
22970 switch_to_section (s);
22971 assemble_align (POINTER_SIZE);
22972 fputs ("\t.word\t", asm_out_file);
22973 output_addr_const (asm_out_file, symbol);
22974 fputs ("(target1)\n", asm_out_file);
22975 }
22976
22977 /* Add a function to the list of static constructors. */
22978
22979 static void
22980 arm_elf_asm_constructor (rtx symbol, int priority)
22981 {
22982 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
22983 }
22984
22985 /* Add a function to the list of static destructors. */
22986
22987 static void
22988 arm_elf_asm_destructor (rtx symbol, int priority)
22989 {
22990 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
22991 }
22992 \f
22993 /* A finite state machine takes care of noticing whether or not instructions
22994 can be conditionally executed, and thus decrease execution time and code
22995 size by deleting branch instructions. The fsm is controlled by
22996 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
22997
22998 /* The state of the fsm controlling condition codes are:
22999 0: normal, do nothing special
23000 1: make ASM_OUTPUT_OPCODE not output this instruction
23001 2: make ASM_OUTPUT_OPCODE not output this instruction
23002 3: make instructions conditional
23003 4: make instructions conditional
23004
23005 State transitions (state->state by whom under condition):
23006 0 -> 1 final_prescan_insn if the `target' is a label
23007 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23008 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23009 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23010 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23011 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23012 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23013 (the target insn is arm_target_insn).
23014
23015 If the jump clobbers the conditions then we use states 2 and 4.
23016
23017 A similar thing can be done with conditional return insns.
23018
23019 XXX In case the `target' is an unconditional branch, this conditionalising
23020 of the instructions always reduces code size, but not always execution
23021 time. But then, I want to reduce the code size to somewhere near what
23022 /bin/cc produces. */
23023
23024 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23025 instructions. When a COND_EXEC instruction is seen the subsequent
23026 instructions are scanned so that multiple conditional instructions can be
23027 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23028 specify the length and true/false mask for the IT block. These will be
23029 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23030
23031 /* Returns the index of the ARM condition code string in
23032 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23033 COMPARISON should be an rtx like `(eq (...) (...))'. */
23034
23035 enum arm_cond_code
23036 maybe_get_arm_condition_code (rtx comparison)
23037 {
23038 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23039 enum arm_cond_code code;
23040 enum rtx_code comp_code = GET_CODE (comparison);
23041
23042 if (GET_MODE_CLASS (mode) != MODE_CC)
23043 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23044 XEXP (comparison, 1));
23045
23046 switch (mode)
23047 {
23048 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23049 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23050 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23051 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23052 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23053 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23054 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23055 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23056 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23057 case E_CC_DLTUmode: code = ARM_CC;
23058
23059 dominance:
23060 if (comp_code == EQ)
23061 return ARM_INVERSE_CONDITION_CODE (code);
23062 if (comp_code == NE)
23063 return code;
23064 return ARM_NV;
23065
23066 case E_CC_NOOVmode:
23067 switch (comp_code)
23068 {
23069 case NE: return ARM_NE;
23070 case EQ: return ARM_EQ;
23071 case GE: return ARM_PL;
23072 case LT: return ARM_MI;
23073 default: return ARM_NV;
23074 }
23075
23076 case E_CC_Zmode:
23077 switch (comp_code)
23078 {
23079 case NE: return ARM_NE;
23080 case EQ: return ARM_EQ;
23081 default: return ARM_NV;
23082 }
23083
23084 case E_CC_Nmode:
23085 switch (comp_code)
23086 {
23087 case NE: return ARM_MI;
23088 case EQ: return ARM_PL;
23089 default: return ARM_NV;
23090 }
23091
23092 case E_CCFPEmode:
23093 case E_CCFPmode:
23094 /* We can handle all cases except UNEQ and LTGT. */
23095 switch (comp_code)
23096 {
23097 case GE: return ARM_GE;
23098 case GT: return ARM_GT;
23099 case LE: return ARM_LS;
23100 case LT: return ARM_MI;
23101 case NE: return ARM_NE;
23102 case EQ: return ARM_EQ;
23103 case ORDERED: return ARM_VC;
23104 case UNORDERED: return ARM_VS;
23105 case UNLT: return ARM_LT;
23106 case UNLE: return ARM_LE;
23107 case UNGT: return ARM_HI;
23108 case UNGE: return ARM_PL;
23109 /* UNEQ and LTGT do not have a representation. */
23110 case UNEQ: /* Fall through. */
23111 case LTGT: /* Fall through. */
23112 default: return ARM_NV;
23113 }
23114
23115 case E_CC_SWPmode:
23116 switch (comp_code)
23117 {
23118 case NE: return ARM_NE;
23119 case EQ: return ARM_EQ;
23120 case GE: return ARM_LE;
23121 case GT: return ARM_LT;
23122 case LE: return ARM_GE;
23123 case LT: return ARM_GT;
23124 case GEU: return ARM_LS;
23125 case GTU: return ARM_CC;
23126 case LEU: return ARM_CS;
23127 case LTU: return ARM_HI;
23128 default: return ARM_NV;
23129 }
23130
23131 case E_CC_Cmode:
23132 switch (comp_code)
23133 {
23134 case LTU: return ARM_CS;
23135 case GEU: return ARM_CC;
23136 case NE: return ARM_CS;
23137 case EQ: return ARM_CC;
23138 default: return ARM_NV;
23139 }
23140
23141 case E_CC_CZmode:
23142 switch (comp_code)
23143 {
23144 case NE: return ARM_NE;
23145 case EQ: return ARM_EQ;
23146 case GEU: return ARM_CS;
23147 case GTU: return ARM_HI;
23148 case LEU: return ARM_LS;
23149 case LTU: return ARM_CC;
23150 default: return ARM_NV;
23151 }
23152
23153 case E_CC_NCVmode:
23154 switch (comp_code)
23155 {
23156 case GE: return ARM_GE;
23157 case LT: return ARM_LT;
23158 case GEU: return ARM_CS;
23159 case LTU: return ARM_CC;
23160 default: return ARM_NV;
23161 }
23162
23163 case E_CC_Vmode:
23164 switch (comp_code)
23165 {
23166 case NE: return ARM_VS;
23167 case EQ: return ARM_VC;
23168 default: return ARM_NV;
23169 }
23170
23171 case E_CCmode:
23172 switch (comp_code)
23173 {
23174 case NE: return ARM_NE;
23175 case EQ: return ARM_EQ;
23176 case GE: return ARM_GE;
23177 case GT: return ARM_GT;
23178 case LE: return ARM_LE;
23179 case LT: return ARM_LT;
23180 case GEU: return ARM_CS;
23181 case GTU: return ARM_HI;
23182 case LEU: return ARM_LS;
23183 case LTU: return ARM_CC;
23184 default: return ARM_NV;
23185 }
23186
23187 default: gcc_unreachable ();
23188 }
23189 }
23190
23191 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23192 static enum arm_cond_code
23193 get_arm_condition_code (rtx comparison)
23194 {
23195 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23196 gcc_assert (code != ARM_NV);
23197 return code;
23198 }
23199
23200 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23201 code registers when not targetting Thumb1. The VFP condition register
23202 only exists when generating hard-float code. */
23203 static bool
23204 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23205 {
23206 if (!TARGET_32BIT)
23207 return false;
23208
23209 *p1 = CC_REGNUM;
23210 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23211 return true;
23212 }
23213
23214 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23215 instructions. */
23216 void
23217 thumb2_final_prescan_insn (rtx_insn *insn)
23218 {
23219 rtx_insn *first_insn = insn;
23220 rtx body = PATTERN (insn);
23221 rtx predicate;
23222 enum arm_cond_code code;
23223 int n;
23224 int mask;
23225 int max;
23226
23227 /* max_insns_skipped in the tune was already taken into account in the
23228 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23229 just emit the IT blocks as we can. It does not make sense to split
23230 the IT blocks. */
23231 max = MAX_INSN_PER_IT_BLOCK;
23232
23233 /* Remove the previous insn from the count of insns to be output. */
23234 if (arm_condexec_count)
23235 arm_condexec_count--;
23236
23237 /* Nothing to do if we are already inside a conditional block. */
23238 if (arm_condexec_count)
23239 return;
23240
23241 if (GET_CODE (body) != COND_EXEC)
23242 return;
23243
23244 /* Conditional jumps are implemented directly. */
23245 if (JUMP_P (insn))
23246 return;
23247
23248 predicate = COND_EXEC_TEST (body);
23249 arm_current_cc = get_arm_condition_code (predicate);
23250
23251 n = get_attr_ce_count (insn);
23252 arm_condexec_count = 1;
23253 arm_condexec_mask = (1 << n) - 1;
23254 arm_condexec_masklen = n;
23255 /* See if subsequent instructions can be combined into the same block. */
23256 for (;;)
23257 {
23258 insn = next_nonnote_insn (insn);
23259
23260 /* Jumping into the middle of an IT block is illegal, so a label or
23261 barrier terminates the block. */
23262 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23263 break;
23264
23265 body = PATTERN (insn);
23266 /* USE and CLOBBER aren't really insns, so just skip them. */
23267 if (GET_CODE (body) == USE
23268 || GET_CODE (body) == CLOBBER)
23269 continue;
23270
23271 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23272 if (GET_CODE (body) != COND_EXEC)
23273 break;
23274 /* Maximum number of conditionally executed instructions in a block. */
23275 n = get_attr_ce_count (insn);
23276 if (arm_condexec_masklen + n > max)
23277 break;
23278
23279 predicate = COND_EXEC_TEST (body);
23280 code = get_arm_condition_code (predicate);
23281 mask = (1 << n) - 1;
23282 if (arm_current_cc == code)
23283 arm_condexec_mask |= (mask << arm_condexec_masklen);
23284 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23285 break;
23286
23287 arm_condexec_count++;
23288 arm_condexec_masklen += n;
23289
23290 /* A jump must be the last instruction in a conditional block. */
23291 if (JUMP_P (insn))
23292 break;
23293 }
23294 /* Restore recog_data (getting the attributes of other insns can
23295 destroy this array, but final.c assumes that it remains intact
23296 across this call). */
23297 extract_constrain_insn_cached (first_insn);
23298 }
23299
23300 void
23301 arm_final_prescan_insn (rtx_insn *insn)
23302 {
23303 /* BODY will hold the body of INSN. */
23304 rtx body = PATTERN (insn);
23305
23306 /* This will be 1 if trying to repeat the trick, and things need to be
23307 reversed if it appears to fail. */
23308 int reverse = 0;
23309
23310 /* If we start with a return insn, we only succeed if we find another one. */
23311 int seeking_return = 0;
23312 enum rtx_code return_code = UNKNOWN;
23313
23314 /* START_INSN will hold the insn from where we start looking. This is the
23315 first insn after the following code_label if REVERSE is true. */
23316 rtx_insn *start_insn = insn;
23317
23318 /* If in state 4, check if the target branch is reached, in order to
23319 change back to state 0. */
23320 if (arm_ccfsm_state == 4)
23321 {
23322 if (insn == arm_target_insn)
23323 {
23324 arm_target_insn = NULL;
23325 arm_ccfsm_state = 0;
23326 }
23327 return;
23328 }
23329
23330 /* If in state 3, it is possible to repeat the trick, if this insn is an
23331 unconditional branch to a label, and immediately following this branch
23332 is the previous target label which is only used once, and the label this
23333 branch jumps to is not too far off. */
23334 if (arm_ccfsm_state == 3)
23335 {
23336 if (simplejump_p (insn))
23337 {
23338 start_insn = next_nonnote_insn (start_insn);
23339 if (BARRIER_P (start_insn))
23340 {
23341 /* XXX Isn't this always a barrier? */
23342 start_insn = next_nonnote_insn (start_insn);
23343 }
23344 if (LABEL_P (start_insn)
23345 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23346 && LABEL_NUSES (start_insn) == 1)
23347 reverse = TRUE;
23348 else
23349 return;
23350 }
23351 else if (ANY_RETURN_P (body))
23352 {
23353 start_insn = next_nonnote_insn (start_insn);
23354 if (BARRIER_P (start_insn))
23355 start_insn = next_nonnote_insn (start_insn);
23356 if (LABEL_P (start_insn)
23357 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23358 && LABEL_NUSES (start_insn) == 1)
23359 {
23360 reverse = TRUE;
23361 seeking_return = 1;
23362 return_code = GET_CODE (body);
23363 }
23364 else
23365 return;
23366 }
23367 else
23368 return;
23369 }
23370
23371 gcc_assert (!arm_ccfsm_state || reverse);
23372 if (!JUMP_P (insn))
23373 return;
23374
23375 /* This jump might be paralleled with a clobber of the condition codes
23376 the jump should always come first */
23377 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23378 body = XVECEXP (body, 0, 0);
23379
23380 if (reverse
23381 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23382 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23383 {
23384 int insns_skipped;
23385 int fail = FALSE, succeed = FALSE;
23386 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23387 int then_not_else = TRUE;
23388 rtx_insn *this_insn = start_insn;
23389 rtx label = 0;
23390
23391 /* Register the insn jumped to. */
23392 if (reverse)
23393 {
23394 if (!seeking_return)
23395 label = XEXP (SET_SRC (body), 0);
23396 }
23397 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23398 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23399 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23400 {
23401 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23402 then_not_else = FALSE;
23403 }
23404 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23405 {
23406 seeking_return = 1;
23407 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23408 }
23409 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23410 {
23411 seeking_return = 1;
23412 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23413 then_not_else = FALSE;
23414 }
23415 else
23416 gcc_unreachable ();
23417
23418 /* See how many insns this branch skips, and what kind of insns. If all
23419 insns are okay, and the label or unconditional branch to the same
23420 label is not too far away, succeed. */
23421 for (insns_skipped = 0;
23422 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23423 {
23424 rtx scanbody;
23425
23426 this_insn = next_nonnote_insn (this_insn);
23427 if (!this_insn)
23428 break;
23429
23430 switch (GET_CODE (this_insn))
23431 {
23432 case CODE_LABEL:
23433 /* Succeed if it is the target label, otherwise fail since
23434 control falls in from somewhere else. */
23435 if (this_insn == label)
23436 {
23437 arm_ccfsm_state = 1;
23438 succeed = TRUE;
23439 }
23440 else
23441 fail = TRUE;
23442 break;
23443
23444 case BARRIER:
23445 /* Succeed if the following insn is the target label.
23446 Otherwise fail.
23447 If return insns are used then the last insn in a function
23448 will be a barrier. */
23449 this_insn = next_nonnote_insn (this_insn);
23450 if (this_insn && this_insn == label)
23451 {
23452 arm_ccfsm_state = 1;
23453 succeed = TRUE;
23454 }
23455 else
23456 fail = TRUE;
23457 break;
23458
23459 case CALL_INSN:
23460 /* The AAPCS says that conditional calls should not be
23461 used since they make interworking inefficient (the
23462 linker can't transform BL<cond> into BLX). That's
23463 only a problem if the machine has BLX. */
23464 if (arm_arch5t)
23465 {
23466 fail = TRUE;
23467 break;
23468 }
23469
23470 /* Succeed if the following insn is the target label, or
23471 if the following two insns are a barrier and the
23472 target label. */
23473 this_insn = next_nonnote_insn (this_insn);
23474 if (this_insn && BARRIER_P (this_insn))
23475 this_insn = next_nonnote_insn (this_insn);
23476
23477 if (this_insn && this_insn == label
23478 && insns_skipped < max_insns_skipped)
23479 {
23480 arm_ccfsm_state = 1;
23481 succeed = TRUE;
23482 }
23483 else
23484 fail = TRUE;
23485 break;
23486
23487 case JUMP_INSN:
23488 /* If this is an unconditional branch to the same label, succeed.
23489 If it is to another label, do nothing. If it is conditional,
23490 fail. */
23491 /* XXX Probably, the tests for SET and the PC are
23492 unnecessary. */
23493
23494 scanbody = PATTERN (this_insn);
23495 if (GET_CODE (scanbody) == SET
23496 && GET_CODE (SET_DEST (scanbody)) == PC)
23497 {
23498 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23499 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23500 {
23501 arm_ccfsm_state = 2;
23502 succeed = TRUE;
23503 }
23504 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23505 fail = TRUE;
23506 }
23507 /* Fail if a conditional return is undesirable (e.g. on a
23508 StrongARM), but still allow this if optimizing for size. */
23509 else if (GET_CODE (scanbody) == return_code
23510 && !use_return_insn (TRUE, NULL)
23511 && !optimize_size)
23512 fail = TRUE;
23513 else if (GET_CODE (scanbody) == return_code)
23514 {
23515 arm_ccfsm_state = 2;
23516 succeed = TRUE;
23517 }
23518 else if (GET_CODE (scanbody) == PARALLEL)
23519 {
23520 switch (get_attr_conds (this_insn))
23521 {
23522 case CONDS_NOCOND:
23523 break;
23524 default:
23525 fail = TRUE;
23526 break;
23527 }
23528 }
23529 else
23530 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23531
23532 break;
23533
23534 case INSN:
23535 /* Instructions using or affecting the condition codes make it
23536 fail. */
23537 scanbody = PATTERN (this_insn);
23538 if (!(GET_CODE (scanbody) == SET
23539 || GET_CODE (scanbody) == PARALLEL)
23540 || get_attr_conds (this_insn) != CONDS_NOCOND)
23541 fail = TRUE;
23542 break;
23543
23544 default:
23545 break;
23546 }
23547 }
23548 if (succeed)
23549 {
23550 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23551 arm_target_label = CODE_LABEL_NUMBER (label);
23552 else
23553 {
23554 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23555
23556 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23557 {
23558 this_insn = next_nonnote_insn (this_insn);
23559 gcc_assert (!this_insn
23560 || (!BARRIER_P (this_insn)
23561 && !LABEL_P (this_insn)));
23562 }
23563 if (!this_insn)
23564 {
23565 /* Oh, dear! we ran off the end.. give up. */
23566 extract_constrain_insn_cached (insn);
23567 arm_ccfsm_state = 0;
23568 arm_target_insn = NULL;
23569 return;
23570 }
23571 arm_target_insn = this_insn;
23572 }
23573
23574 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23575 what it was. */
23576 if (!reverse)
23577 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23578
23579 if (reverse || then_not_else)
23580 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23581 }
23582
23583 /* Restore recog_data (getting the attributes of other insns can
23584 destroy this array, but final.c assumes that it remains intact
23585 across this call. */
23586 extract_constrain_insn_cached (insn);
23587 }
23588 }
23589
23590 /* Output IT instructions. */
23591 void
23592 thumb2_asm_output_opcode (FILE * stream)
23593 {
23594 char buff[5];
23595 int n;
23596
23597 if (arm_condexec_mask)
23598 {
23599 for (n = 0; n < arm_condexec_masklen; n++)
23600 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23601 buff[n] = 0;
23602 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23603 arm_condition_codes[arm_current_cc]);
23604 arm_condexec_mask = 0;
23605 }
23606 }
23607
23608 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23609 UNITS_PER_WORD bytes wide. */
23610 static unsigned int
23611 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23612 {
23613 if (TARGET_32BIT
23614 && regno > PC_REGNUM
23615 && regno != FRAME_POINTER_REGNUM
23616 && regno != ARG_POINTER_REGNUM
23617 && !IS_VFP_REGNUM (regno))
23618 return 1;
23619
23620 return ARM_NUM_REGS (mode);
23621 }
23622
23623 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23624 static bool
23625 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23626 {
23627 if (GET_MODE_CLASS (mode) == MODE_CC)
23628 return (regno == CC_REGNUM
23629 || (TARGET_HARD_FLOAT
23630 && regno == VFPCC_REGNUM));
23631
23632 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23633 return false;
23634
23635 if (TARGET_THUMB1)
23636 /* For the Thumb we only allow values bigger than SImode in
23637 registers 0 - 6, so that there is always a second low
23638 register available to hold the upper part of the value.
23639 We probably we ought to ensure that the register is the
23640 start of an even numbered register pair. */
23641 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23642
23643 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23644 {
23645 if (mode == SFmode || mode == SImode)
23646 return VFP_REGNO_OK_FOR_SINGLE (regno);
23647
23648 if (mode == DFmode)
23649 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23650
23651 if (mode == HFmode)
23652 return VFP_REGNO_OK_FOR_SINGLE (regno);
23653
23654 /* VFP registers can hold HImode values. */
23655 if (mode == HImode)
23656 return VFP_REGNO_OK_FOR_SINGLE (regno);
23657
23658 if (TARGET_NEON)
23659 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23660 || (VALID_NEON_QREG_MODE (mode)
23661 && NEON_REGNO_OK_FOR_QUAD (regno))
23662 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23663 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23664 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23665 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23666 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23667
23668 return false;
23669 }
23670
23671 if (TARGET_REALLY_IWMMXT)
23672 {
23673 if (IS_IWMMXT_GR_REGNUM (regno))
23674 return mode == SImode;
23675
23676 if (IS_IWMMXT_REGNUM (regno))
23677 return VALID_IWMMXT_REG_MODE (mode);
23678 }
23679
23680 /* We allow almost any value to be stored in the general registers.
23681 Restrict doubleword quantities to even register pairs in ARM state
23682 so that we can use ldrd. Do not allow very large Neon structure
23683 opaque modes in general registers; they would use too many. */
23684 if (regno <= LAST_ARM_REGNUM)
23685 {
23686 if (ARM_NUM_REGS (mode) > 4)
23687 return false;
23688
23689 if (TARGET_THUMB2)
23690 return true;
23691
23692 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23693 }
23694
23695 if (regno == FRAME_POINTER_REGNUM
23696 || regno == ARG_POINTER_REGNUM)
23697 /* We only allow integers in the fake hard registers. */
23698 return GET_MODE_CLASS (mode) == MODE_INT;
23699
23700 return false;
23701 }
23702
23703 /* Implement TARGET_MODES_TIEABLE_P. */
23704
23705 static bool
23706 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23707 {
23708 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23709 return true;
23710
23711 /* We specifically want to allow elements of "structure" modes to
23712 be tieable to the structure. This more general condition allows
23713 other rarer situations too. */
23714 if (TARGET_NEON
23715 && (VALID_NEON_DREG_MODE (mode1)
23716 || VALID_NEON_QREG_MODE (mode1)
23717 || VALID_NEON_STRUCT_MODE (mode1))
23718 && (VALID_NEON_DREG_MODE (mode2)
23719 || VALID_NEON_QREG_MODE (mode2)
23720 || VALID_NEON_STRUCT_MODE (mode2)))
23721 return true;
23722
23723 return false;
23724 }
23725
23726 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23727 not used in arm mode. */
23728
23729 enum reg_class
23730 arm_regno_class (int regno)
23731 {
23732 if (regno == PC_REGNUM)
23733 return NO_REGS;
23734
23735 if (TARGET_THUMB1)
23736 {
23737 if (regno == STACK_POINTER_REGNUM)
23738 return STACK_REG;
23739 if (regno == CC_REGNUM)
23740 return CC_REG;
23741 if (regno < 8)
23742 return LO_REGS;
23743 return HI_REGS;
23744 }
23745
23746 if (TARGET_THUMB2 && regno < 8)
23747 return LO_REGS;
23748
23749 if ( regno <= LAST_ARM_REGNUM
23750 || regno == FRAME_POINTER_REGNUM
23751 || regno == ARG_POINTER_REGNUM)
23752 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23753
23754 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23755 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23756
23757 if (IS_VFP_REGNUM (regno))
23758 {
23759 if (regno <= D7_VFP_REGNUM)
23760 return VFP_D0_D7_REGS;
23761 else if (regno <= LAST_LO_VFP_REGNUM)
23762 return VFP_LO_REGS;
23763 else
23764 return VFP_HI_REGS;
23765 }
23766
23767 if (IS_IWMMXT_REGNUM (regno))
23768 return IWMMXT_REGS;
23769
23770 if (IS_IWMMXT_GR_REGNUM (regno))
23771 return IWMMXT_GR_REGS;
23772
23773 return NO_REGS;
23774 }
23775
23776 /* Handle a special case when computing the offset
23777 of an argument from the frame pointer. */
23778 int
23779 arm_debugger_arg_offset (int value, rtx addr)
23780 {
23781 rtx_insn *insn;
23782
23783 /* We are only interested if dbxout_parms() failed to compute the offset. */
23784 if (value != 0)
23785 return 0;
23786
23787 /* We can only cope with the case where the address is held in a register. */
23788 if (!REG_P (addr))
23789 return 0;
23790
23791 /* If we are using the frame pointer to point at the argument, then
23792 an offset of 0 is correct. */
23793 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23794 return 0;
23795
23796 /* If we are using the stack pointer to point at the
23797 argument, then an offset of 0 is correct. */
23798 /* ??? Check this is consistent with thumb2 frame layout. */
23799 if ((TARGET_THUMB || !frame_pointer_needed)
23800 && REGNO (addr) == SP_REGNUM)
23801 return 0;
23802
23803 /* Oh dear. The argument is pointed to by a register rather
23804 than being held in a register, or being stored at a known
23805 offset from the frame pointer. Since GDB only understands
23806 those two kinds of argument we must translate the address
23807 held in the register into an offset from the frame pointer.
23808 We do this by searching through the insns for the function
23809 looking to see where this register gets its value. If the
23810 register is initialized from the frame pointer plus an offset
23811 then we are in luck and we can continue, otherwise we give up.
23812
23813 This code is exercised by producing debugging information
23814 for a function with arguments like this:
23815
23816 double func (double a, double b, int c, double d) {return d;}
23817
23818 Without this code the stab for parameter 'd' will be set to
23819 an offset of 0 from the frame pointer, rather than 8. */
23820
23821 /* The if() statement says:
23822
23823 If the insn is a normal instruction
23824 and if the insn is setting the value in a register
23825 and if the register being set is the register holding the address of the argument
23826 and if the address is computing by an addition
23827 that involves adding to a register
23828 which is the frame pointer
23829 a constant integer
23830
23831 then... */
23832
23833 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23834 {
23835 if ( NONJUMP_INSN_P (insn)
23836 && GET_CODE (PATTERN (insn)) == SET
23837 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23838 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23839 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23840 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23841 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23842 )
23843 {
23844 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23845
23846 break;
23847 }
23848 }
23849
23850 if (value == 0)
23851 {
23852 debug_rtx (addr);
23853 warning (0, "unable to compute real location of stacked parameter");
23854 value = 8; /* XXX magic hack */
23855 }
23856
23857 return value;
23858 }
23859 \f
23860 /* Implement TARGET_PROMOTED_TYPE. */
23861
23862 static tree
23863 arm_promoted_type (const_tree t)
23864 {
23865 if (SCALAR_FLOAT_TYPE_P (t)
23866 && TYPE_PRECISION (t) == 16
23867 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23868 return float_type_node;
23869 return NULL_TREE;
23870 }
23871
23872 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23873 This simply adds HFmode as a supported mode; even though we don't
23874 implement arithmetic on this type directly, it's supported by
23875 optabs conversions, much the way the double-word arithmetic is
23876 special-cased in the default hook. */
23877
23878 static bool
23879 arm_scalar_mode_supported_p (scalar_mode mode)
23880 {
23881 if (mode == HFmode)
23882 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23883 else if (ALL_FIXED_POINT_MODE_P (mode))
23884 return true;
23885 else
23886 return default_scalar_mode_supported_p (mode);
23887 }
23888
23889 /* Set the value of FLT_EVAL_METHOD.
23890 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23891
23892 0: evaluate all operations and constants, whose semantic type has at
23893 most the range and precision of type float, to the range and
23894 precision of float; evaluate all other operations and constants to
23895 the range and precision of the semantic type;
23896
23897 N, where _FloatN is a supported interchange floating type
23898 evaluate all operations and constants, whose semantic type has at
23899 most the range and precision of _FloatN type, to the range and
23900 precision of the _FloatN type; evaluate all other operations and
23901 constants to the range and precision of the semantic type;
23902
23903 If we have the ARMv8.2-A extensions then we support _Float16 in native
23904 precision, so we should set this to 16. Otherwise, we support the type,
23905 but want to evaluate expressions in float precision, so set this to
23906 0. */
23907
23908 static enum flt_eval_method
23909 arm_excess_precision (enum excess_precision_type type)
23910 {
23911 switch (type)
23912 {
23913 case EXCESS_PRECISION_TYPE_FAST:
23914 case EXCESS_PRECISION_TYPE_STANDARD:
23915 /* We can calculate either in 16-bit range and precision or
23916 32-bit range and precision. Make that decision based on whether
23917 we have native support for the ARMv8.2-A 16-bit floating-point
23918 instructions or not. */
23919 return (TARGET_VFP_FP16INST
23920 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23921 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23922 case EXCESS_PRECISION_TYPE_IMPLICIT:
23923 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23924 default:
23925 gcc_unreachable ();
23926 }
23927 return FLT_EVAL_METHOD_UNPREDICTABLE;
23928 }
23929
23930
23931 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23932 _Float16 if we are using anything other than ieee format for 16-bit
23933 floating point. Otherwise, punt to the default implementation. */
23934 static opt_scalar_float_mode
23935 arm_floatn_mode (int n, bool extended)
23936 {
23937 if (!extended && n == 16)
23938 {
23939 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23940 return HFmode;
23941 return opt_scalar_float_mode ();
23942 }
23943
23944 return default_floatn_mode (n, extended);
23945 }
23946
23947
23948 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23949 not to early-clobber SRC registers in the process.
23950
23951 We assume that the operands described by SRC and DEST represent a
23952 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23953 number of components into which the copy has been decomposed. */
23954 void
23955 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23956 {
23957 unsigned int i;
23958
23959 if (!reg_overlap_mentioned_p (operands[0], operands[1])
23960 || REGNO (operands[0]) < REGNO (operands[1]))
23961 {
23962 for (i = 0; i < count; i++)
23963 {
23964 operands[2 * i] = dest[i];
23965 operands[2 * i + 1] = src[i];
23966 }
23967 }
23968 else
23969 {
23970 for (i = 0; i < count; i++)
23971 {
23972 operands[2 * i] = dest[count - i - 1];
23973 operands[2 * i + 1] = src[count - i - 1];
23974 }
23975 }
23976 }
23977
23978 /* Split operands into moves from op[1] + op[2] into op[0]. */
23979
23980 void
23981 neon_split_vcombine (rtx operands[3])
23982 {
23983 unsigned int dest = REGNO (operands[0]);
23984 unsigned int src1 = REGNO (operands[1]);
23985 unsigned int src2 = REGNO (operands[2]);
23986 machine_mode halfmode = GET_MODE (operands[1]);
23987 unsigned int halfregs = REG_NREGS (operands[1]);
23988 rtx destlo, desthi;
23989
23990 if (src1 == dest && src2 == dest + halfregs)
23991 {
23992 /* No-op move. Can't split to nothing; emit something. */
23993 emit_note (NOTE_INSN_DELETED);
23994 return;
23995 }
23996
23997 /* Preserve register attributes for variable tracking. */
23998 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
23999 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24000 GET_MODE_SIZE (halfmode));
24001
24002 /* Special case of reversed high/low parts. Use VSWP. */
24003 if (src2 == dest && src1 == dest + halfregs)
24004 {
24005 rtx x = gen_rtx_SET (destlo, operands[1]);
24006 rtx y = gen_rtx_SET (desthi, operands[2]);
24007 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24008 return;
24009 }
24010
24011 if (!reg_overlap_mentioned_p (operands[2], destlo))
24012 {
24013 /* Try to avoid unnecessary moves if part of the result
24014 is in the right place already. */
24015 if (src1 != dest)
24016 emit_move_insn (destlo, operands[1]);
24017 if (src2 != dest + halfregs)
24018 emit_move_insn (desthi, operands[2]);
24019 }
24020 else
24021 {
24022 if (src2 != dest + halfregs)
24023 emit_move_insn (desthi, operands[2]);
24024 if (src1 != dest)
24025 emit_move_insn (destlo, operands[1]);
24026 }
24027 }
24028 \f
24029 /* Return the number (counting from 0) of
24030 the least significant set bit in MASK. */
24031
24032 inline static int
24033 number_of_first_bit_set (unsigned mask)
24034 {
24035 return ctz_hwi (mask);
24036 }
24037
24038 /* Like emit_multi_reg_push, but allowing for a different set of
24039 registers to be described as saved. MASK is the set of registers
24040 to be saved; REAL_REGS is the set of registers to be described as
24041 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24042
24043 static rtx_insn *
24044 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24045 {
24046 unsigned long regno;
24047 rtx par[10], tmp, reg;
24048 rtx_insn *insn;
24049 int i, j;
24050
24051 /* Build the parallel of the registers actually being stored. */
24052 for (i = 0; mask; ++i, mask &= mask - 1)
24053 {
24054 regno = ctz_hwi (mask);
24055 reg = gen_rtx_REG (SImode, regno);
24056
24057 if (i == 0)
24058 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24059 else
24060 tmp = gen_rtx_USE (VOIDmode, reg);
24061
24062 par[i] = tmp;
24063 }
24064
24065 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24066 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24067 tmp = gen_frame_mem (BLKmode, tmp);
24068 tmp = gen_rtx_SET (tmp, par[0]);
24069 par[0] = tmp;
24070
24071 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24072 insn = emit_insn (tmp);
24073
24074 /* Always build the stack adjustment note for unwind info. */
24075 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24076 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24077 par[0] = tmp;
24078
24079 /* Build the parallel of the registers recorded as saved for unwind. */
24080 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24081 {
24082 regno = ctz_hwi (real_regs);
24083 reg = gen_rtx_REG (SImode, regno);
24084
24085 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24086 tmp = gen_frame_mem (SImode, tmp);
24087 tmp = gen_rtx_SET (tmp, reg);
24088 RTX_FRAME_RELATED_P (tmp) = 1;
24089 par[j + 1] = tmp;
24090 }
24091
24092 if (j == 0)
24093 tmp = par[0];
24094 else
24095 {
24096 RTX_FRAME_RELATED_P (par[0]) = 1;
24097 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24098 }
24099
24100 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24101
24102 return insn;
24103 }
24104
24105 /* Emit code to push or pop registers to or from the stack. F is the
24106 assembly file. MASK is the registers to pop. */
24107 static void
24108 thumb_pop (FILE *f, unsigned long mask)
24109 {
24110 int regno;
24111 int lo_mask = mask & 0xFF;
24112
24113 gcc_assert (mask);
24114
24115 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24116 {
24117 /* Special case. Do not generate a POP PC statement here, do it in
24118 thumb_exit() */
24119 thumb_exit (f, -1);
24120 return;
24121 }
24122
24123 fprintf (f, "\tpop\t{");
24124
24125 /* Look at the low registers first. */
24126 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24127 {
24128 if (lo_mask & 1)
24129 {
24130 asm_fprintf (f, "%r", regno);
24131
24132 if ((lo_mask & ~1) != 0)
24133 fprintf (f, ", ");
24134 }
24135 }
24136
24137 if (mask & (1 << PC_REGNUM))
24138 {
24139 /* Catch popping the PC. */
24140 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24141 || IS_CMSE_ENTRY (arm_current_func_type ()))
24142 {
24143 /* The PC is never poped directly, instead
24144 it is popped into r3 and then BX is used. */
24145 fprintf (f, "}\n");
24146
24147 thumb_exit (f, -1);
24148
24149 return;
24150 }
24151 else
24152 {
24153 if (mask & 0xFF)
24154 fprintf (f, ", ");
24155
24156 asm_fprintf (f, "%r", PC_REGNUM);
24157 }
24158 }
24159
24160 fprintf (f, "}\n");
24161 }
24162
24163 /* Generate code to return from a thumb function.
24164 If 'reg_containing_return_addr' is -1, then the return address is
24165 actually on the stack, at the stack pointer.
24166
24167 Note: do not forget to update length attribute of corresponding insn pattern
24168 when changing assembly output (eg. length attribute of epilogue_insns when
24169 updating Armv8-M Baseline Security Extensions register clearing
24170 sequences). */
24171 static void
24172 thumb_exit (FILE *f, int reg_containing_return_addr)
24173 {
24174 unsigned regs_available_for_popping;
24175 unsigned regs_to_pop;
24176 int pops_needed;
24177 unsigned available;
24178 unsigned required;
24179 machine_mode mode;
24180 int size;
24181 int restore_a4 = FALSE;
24182
24183 /* Compute the registers we need to pop. */
24184 regs_to_pop = 0;
24185 pops_needed = 0;
24186
24187 if (reg_containing_return_addr == -1)
24188 {
24189 regs_to_pop |= 1 << LR_REGNUM;
24190 ++pops_needed;
24191 }
24192
24193 if (TARGET_BACKTRACE)
24194 {
24195 /* Restore the (ARM) frame pointer and stack pointer. */
24196 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24197 pops_needed += 2;
24198 }
24199
24200 /* If there is nothing to pop then just emit the BX instruction and
24201 return. */
24202 if (pops_needed == 0)
24203 {
24204 if (crtl->calls_eh_return)
24205 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24206
24207 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24208 {
24209 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24210 reg_containing_return_addr);
24211 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24212 }
24213 else
24214 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24215 return;
24216 }
24217 /* Otherwise if we are not supporting interworking and we have not created
24218 a backtrace structure and the function was not entered in ARM mode then
24219 just pop the return address straight into the PC. */
24220 else if (!TARGET_INTERWORK
24221 && !TARGET_BACKTRACE
24222 && !is_called_in_ARM_mode (current_function_decl)
24223 && !crtl->calls_eh_return
24224 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24225 {
24226 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24227 return;
24228 }
24229
24230 /* Find out how many of the (return) argument registers we can corrupt. */
24231 regs_available_for_popping = 0;
24232
24233 /* If returning via __builtin_eh_return, the bottom three registers
24234 all contain information needed for the return. */
24235 if (crtl->calls_eh_return)
24236 size = 12;
24237 else
24238 {
24239 /* If we can deduce the registers used from the function's
24240 return value. This is more reliable that examining
24241 df_regs_ever_live_p () because that will be set if the register is
24242 ever used in the function, not just if the register is used
24243 to hold a return value. */
24244
24245 if (crtl->return_rtx != 0)
24246 mode = GET_MODE (crtl->return_rtx);
24247 else
24248 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24249
24250 size = GET_MODE_SIZE (mode);
24251
24252 if (size == 0)
24253 {
24254 /* In a void function we can use any argument register.
24255 In a function that returns a structure on the stack
24256 we can use the second and third argument registers. */
24257 if (mode == VOIDmode)
24258 regs_available_for_popping =
24259 (1 << ARG_REGISTER (1))
24260 | (1 << ARG_REGISTER (2))
24261 | (1 << ARG_REGISTER (3));
24262 else
24263 regs_available_for_popping =
24264 (1 << ARG_REGISTER (2))
24265 | (1 << ARG_REGISTER (3));
24266 }
24267 else if (size <= 4)
24268 regs_available_for_popping =
24269 (1 << ARG_REGISTER (2))
24270 | (1 << ARG_REGISTER (3));
24271 else if (size <= 8)
24272 regs_available_for_popping =
24273 (1 << ARG_REGISTER (3));
24274 }
24275
24276 /* Match registers to be popped with registers into which we pop them. */
24277 for (available = regs_available_for_popping,
24278 required = regs_to_pop;
24279 required != 0 && available != 0;
24280 available &= ~(available & - available),
24281 required &= ~(required & - required))
24282 -- pops_needed;
24283
24284 /* If we have any popping registers left over, remove them. */
24285 if (available > 0)
24286 regs_available_for_popping &= ~available;
24287
24288 /* Otherwise if we need another popping register we can use
24289 the fourth argument register. */
24290 else if (pops_needed)
24291 {
24292 /* If we have not found any free argument registers and
24293 reg a4 contains the return address, we must move it. */
24294 if (regs_available_for_popping == 0
24295 && reg_containing_return_addr == LAST_ARG_REGNUM)
24296 {
24297 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24298 reg_containing_return_addr = LR_REGNUM;
24299 }
24300 else if (size > 12)
24301 {
24302 /* Register a4 is being used to hold part of the return value,
24303 but we have dire need of a free, low register. */
24304 restore_a4 = TRUE;
24305
24306 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24307 }
24308
24309 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24310 {
24311 /* The fourth argument register is available. */
24312 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24313
24314 --pops_needed;
24315 }
24316 }
24317
24318 /* Pop as many registers as we can. */
24319 thumb_pop (f, regs_available_for_popping);
24320
24321 /* Process the registers we popped. */
24322 if (reg_containing_return_addr == -1)
24323 {
24324 /* The return address was popped into the lowest numbered register. */
24325 regs_to_pop &= ~(1 << LR_REGNUM);
24326
24327 reg_containing_return_addr =
24328 number_of_first_bit_set (regs_available_for_popping);
24329
24330 /* Remove this register for the mask of available registers, so that
24331 the return address will not be corrupted by further pops. */
24332 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24333 }
24334
24335 /* If we popped other registers then handle them here. */
24336 if (regs_available_for_popping)
24337 {
24338 int frame_pointer;
24339
24340 /* Work out which register currently contains the frame pointer. */
24341 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24342
24343 /* Move it into the correct place. */
24344 asm_fprintf (f, "\tmov\t%r, %r\n",
24345 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24346
24347 /* (Temporarily) remove it from the mask of popped registers. */
24348 regs_available_for_popping &= ~(1 << frame_pointer);
24349 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24350
24351 if (regs_available_for_popping)
24352 {
24353 int stack_pointer;
24354
24355 /* We popped the stack pointer as well,
24356 find the register that contains it. */
24357 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24358
24359 /* Move it into the stack register. */
24360 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24361
24362 /* At this point we have popped all necessary registers, so
24363 do not worry about restoring regs_available_for_popping
24364 to its correct value:
24365
24366 assert (pops_needed == 0)
24367 assert (regs_available_for_popping == (1 << frame_pointer))
24368 assert (regs_to_pop == (1 << STACK_POINTER)) */
24369 }
24370 else
24371 {
24372 /* Since we have just move the popped value into the frame
24373 pointer, the popping register is available for reuse, and
24374 we know that we still have the stack pointer left to pop. */
24375 regs_available_for_popping |= (1 << frame_pointer);
24376 }
24377 }
24378
24379 /* If we still have registers left on the stack, but we no longer have
24380 any registers into which we can pop them, then we must move the return
24381 address into the link register and make available the register that
24382 contained it. */
24383 if (regs_available_for_popping == 0 && pops_needed > 0)
24384 {
24385 regs_available_for_popping |= 1 << reg_containing_return_addr;
24386
24387 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24388 reg_containing_return_addr);
24389
24390 reg_containing_return_addr = LR_REGNUM;
24391 }
24392
24393 /* If we have registers left on the stack then pop some more.
24394 We know that at most we will want to pop FP and SP. */
24395 if (pops_needed > 0)
24396 {
24397 int popped_into;
24398 int move_to;
24399
24400 thumb_pop (f, regs_available_for_popping);
24401
24402 /* We have popped either FP or SP.
24403 Move whichever one it is into the correct register. */
24404 popped_into = number_of_first_bit_set (regs_available_for_popping);
24405 move_to = number_of_first_bit_set (regs_to_pop);
24406
24407 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24408 --pops_needed;
24409 }
24410
24411 /* If we still have not popped everything then we must have only
24412 had one register available to us and we are now popping the SP. */
24413 if (pops_needed > 0)
24414 {
24415 int popped_into;
24416
24417 thumb_pop (f, regs_available_for_popping);
24418
24419 popped_into = number_of_first_bit_set (regs_available_for_popping);
24420
24421 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24422 /*
24423 assert (regs_to_pop == (1 << STACK_POINTER))
24424 assert (pops_needed == 1)
24425 */
24426 }
24427
24428 /* If necessary restore the a4 register. */
24429 if (restore_a4)
24430 {
24431 if (reg_containing_return_addr != LR_REGNUM)
24432 {
24433 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24434 reg_containing_return_addr = LR_REGNUM;
24435 }
24436
24437 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24438 }
24439
24440 if (crtl->calls_eh_return)
24441 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24442
24443 /* Return to caller. */
24444 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24445 {
24446 /* This is for the cases where LR is not being used to contain the return
24447 address. It may therefore contain information that we might not want
24448 to leak, hence it must be cleared. The value in R0 will never be a
24449 secret at this point, so it is safe to use it, see the clearing code
24450 in 'cmse_nonsecure_entry_clear_before_return'. */
24451 if (reg_containing_return_addr != LR_REGNUM)
24452 asm_fprintf (f, "\tmov\tlr, r0\n");
24453
24454 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24455 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24456 }
24457 else
24458 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24459 }
24460 \f
24461 /* Scan INSN just before assembler is output for it.
24462 For Thumb-1, we track the status of the condition codes; this
24463 information is used in the cbranchsi4_insn pattern. */
24464 void
24465 thumb1_final_prescan_insn (rtx_insn *insn)
24466 {
24467 if (flag_print_asm_name)
24468 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24469 INSN_ADDRESSES (INSN_UID (insn)));
24470 /* Don't overwrite the previous setter when we get to a cbranch. */
24471 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24472 {
24473 enum attr_conds conds;
24474
24475 if (cfun->machine->thumb1_cc_insn)
24476 {
24477 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24478 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24479 CC_STATUS_INIT;
24480 }
24481 conds = get_attr_conds (insn);
24482 if (conds == CONDS_SET)
24483 {
24484 rtx set = single_set (insn);
24485 cfun->machine->thumb1_cc_insn = insn;
24486 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24487 cfun->machine->thumb1_cc_op1 = const0_rtx;
24488 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24489 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24490 {
24491 rtx src1 = XEXP (SET_SRC (set), 1);
24492 if (src1 == const0_rtx)
24493 cfun->machine->thumb1_cc_mode = CCmode;
24494 }
24495 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24496 {
24497 /* Record the src register operand instead of dest because
24498 cprop_hardreg pass propagates src. */
24499 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24500 }
24501 }
24502 else if (conds != CONDS_NOCOND)
24503 cfun->machine->thumb1_cc_insn = NULL_RTX;
24504 }
24505
24506 /* Check if unexpected far jump is used. */
24507 if (cfun->machine->lr_save_eliminated
24508 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24509 internal_error("Unexpected thumb1 far jump");
24510 }
24511
24512 int
24513 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24514 {
24515 unsigned HOST_WIDE_INT mask = 0xff;
24516 int i;
24517
24518 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24519 if (val == 0) /* XXX */
24520 return 0;
24521
24522 for (i = 0; i < 25; i++)
24523 if ((val & (mask << i)) == val)
24524 return 1;
24525
24526 return 0;
24527 }
24528
24529 /* Returns nonzero if the current function contains,
24530 or might contain a far jump. */
24531 static int
24532 thumb_far_jump_used_p (void)
24533 {
24534 rtx_insn *insn;
24535 bool far_jump = false;
24536 unsigned int func_size = 0;
24537
24538 /* If we have already decided that far jumps may be used,
24539 do not bother checking again, and always return true even if
24540 it turns out that they are not being used. Once we have made
24541 the decision that far jumps are present (and that hence the link
24542 register will be pushed onto the stack) we cannot go back on it. */
24543 if (cfun->machine->far_jump_used)
24544 return 1;
24545
24546 /* If this function is not being called from the prologue/epilogue
24547 generation code then it must be being called from the
24548 INITIAL_ELIMINATION_OFFSET macro. */
24549 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24550 {
24551 /* In this case we know that we are being asked about the elimination
24552 of the arg pointer register. If that register is not being used,
24553 then there are no arguments on the stack, and we do not have to
24554 worry that a far jump might force the prologue to push the link
24555 register, changing the stack offsets. In this case we can just
24556 return false, since the presence of far jumps in the function will
24557 not affect stack offsets.
24558
24559 If the arg pointer is live (or if it was live, but has now been
24560 eliminated and so set to dead) then we do have to test to see if
24561 the function might contain a far jump. This test can lead to some
24562 false negatives, since before reload is completed, then length of
24563 branch instructions is not known, so gcc defaults to returning their
24564 longest length, which in turn sets the far jump attribute to true.
24565
24566 A false negative will not result in bad code being generated, but it
24567 will result in a needless push and pop of the link register. We
24568 hope that this does not occur too often.
24569
24570 If we need doubleword stack alignment this could affect the other
24571 elimination offsets so we can't risk getting it wrong. */
24572 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24573 cfun->machine->arg_pointer_live = 1;
24574 else if (!cfun->machine->arg_pointer_live)
24575 return 0;
24576 }
24577
24578 /* We should not change far_jump_used during or after reload, as there is
24579 no chance to change stack frame layout. */
24580 if (reload_in_progress || reload_completed)
24581 return 0;
24582
24583 /* Check to see if the function contains a branch
24584 insn with the far jump attribute set. */
24585 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24586 {
24587 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24588 {
24589 far_jump = true;
24590 }
24591 func_size += get_attr_length (insn);
24592 }
24593
24594 /* Attribute far_jump will always be true for thumb1 before
24595 shorten_branch pass. So checking far_jump attribute before
24596 shorten_branch isn't much useful.
24597
24598 Following heuristic tries to estimate more accurately if a far jump
24599 may finally be used. The heuristic is very conservative as there is
24600 no chance to roll-back the decision of not to use far jump.
24601
24602 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24603 2-byte insn is associated with a 4 byte constant pool. Using
24604 function size 2048/3 as the threshold is conservative enough. */
24605 if (far_jump)
24606 {
24607 if ((func_size * 3) >= 2048)
24608 {
24609 /* Record the fact that we have decided that
24610 the function does use far jumps. */
24611 cfun->machine->far_jump_used = 1;
24612 return 1;
24613 }
24614 }
24615
24616 return 0;
24617 }
24618
24619 /* Return nonzero if FUNC must be entered in ARM mode. */
24620 static bool
24621 is_called_in_ARM_mode (tree func)
24622 {
24623 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24624
24625 /* Ignore the problem about functions whose address is taken. */
24626 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24627 return true;
24628
24629 #ifdef ARM_PE
24630 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24631 #else
24632 return false;
24633 #endif
24634 }
24635
24636 /* Given the stack offsets and register mask in OFFSETS, decide how
24637 many additional registers to push instead of subtracting a constant
24638 from SP. For epilogues the principle is the same except we use pop.
24639 FOR_PROLOGUE indicates which we're generating. */
24640 static int
24641 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24642 {
24643 HOST_WIDE_INT amount;
24644 unsigned long live_regs_mask = offsets->saved_regs_mask;
24645 /* Extract a mask of the ones we can give to the Thumb's push/pop
24646 instruction. */
24647 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24648 /* Then count how many other high registers will need to be pushed. */
24649 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24650 int n_free, reg_base, size;
24651
24652 if (!for_prologue && frame_pointer_needed)
24653 amount = offsets->locals_base - offsets->saved_regs;
24654 else
24655 amount = offsets->outgoing_args - offsets->saved_regs;
24656
24657 /* If the stack frame size is 512 exactly, we can save one load
24658 instruction, which should make this a win even when optimizing
24659 for speed. */
24660 if (!optimize_size && amount != 512)
24661 return 0;
24662
24663 /* Can't do this if there are high registers to push. */
24664 if (high_regs_pushed != 0)
24665 return 0;
24666
24667 /* Shouldn't do it in the prologue if no registers would normally
24668 be pushed at all. In the epilogue, also allow it if we'll have
24669 a pop insn for the PC. */
24670 if (l_mask == 0
24671 && (for_prologue
24672 || TARGET_BACKTRACE
24673 || (live_regs_mask & 1 << LR_REGNUM) == 0
24674 || TARGET_INTERWORK
24675 || crtl->args.pretend_args_size != 0))
24676 return 0;
24677
24678 /* Don't do this if thumb_expand_prologue wants to emit instructions
24679 between the push and the stack frame allocation. */
24680 if (for_prologue
24681 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24682 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24683 return 0;
24684
24685 reg_base = 0;
24686 n_free = 0;
24687 if (!for_prologue)
24688 {
24689 size = arm_size_return_regs ();
24690 reg_base = ARM_NUM_INTS (size);
24691 live_regs_mask >>= reg_base;
24692 }
24693
24694 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24695 && (for_prologue || call_used_regs[reg_base + n_free]))
24696 {
24697 live_regs_mask >>= 1;
24698 n_free++;
24699 }
24700
24701 if (n_free == 0)
24702 return 0;
24703 gcc_assert (amount / 4 * 4 == amount);
24704
24705 if (amount >= 512 && (amount - n_free * 4) < 512)
24706 return (amount - 508) / 4;
24707 if (amount <= n_free * 4)
24708 return amount / 4;
24709 return 0;
24710 }
24711
24712 /* The bits which aren't usefully expanded as rtl. */
24713 const char *
24714 thumb1_unexpanded_epilogue (void)
24715 {
24716 arm_stack_offsets *offsets;
24717 int regno;
24718 unsigned long live_regs_mask = 0;
24719 int high_regs_pushed = 0;
24720 int extra_pop;
24721 int had_to_push_lr;
24722 int size;
24723
24724 if (cfun->machine->return_used_this_function != 0)
24725 return "";
24726
24727 if (IS_NAKED (arm_current_func_type ()))
24728 return "";
24729
24730 offsets = arm_get_frame_offsets ();
24731 live_regs_mask = offsets->saved_regs_mask;
24732 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24733
24734 /* If we can deduce the registers used from the function's return value.
24735 This is more reliable that examining df_regs_ever_live_p () because that
24736 will be set if the register is ever used in the function, not just if
24737 the register is used to hold a return value. */
24738 size = arm_size_return_regs ();
24739
24740 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24741 if (extra_pop > 0)
24742 {
24743 unsigned long extra_mask = (1 << extra_pop) - 1;
24744 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24745 }
24746
24747 /* The prolog may have pushed some high registers to use as
24748 work registers. e.g. the testsuite file:
24749 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24750 compiles to produce:
24751 push {r4, r5, r6, r7, lr}
24752 mov r7, r9
24753 mov r6, r8
24754 push {r6, r7}
24755 as part of the prolog. We have to undo that pushing here. */
24756
24757 if (high_regs_pushed)
24758 {
24759 unsigned long mask = live_regs_mask & 0xff;
24760 int next_hi_reg;
24761
24762 /* The available low registers depend on the size of the value we are
24763 returning. */
24764 if (size <= 12)
24765 mask |= 1 << 3;
24766 if (size <= 8)
24767 mask |= 1 << 2;
24768
24769 if (mask == 0)
24770 /* Oh dear! We have no low registers into which we can pop
24771 high registers! */
24772 internal_error
24773 ("no low registers available for popping high registers");
24774
24775 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24776 if (live_regs_mask & (1 << next_hi_reg))
24777 break;
24778
24779 while (high_regs_pushed)
24780 {
24781 /* Find lo register(s) into which the high register(s) can
24782 be popped. */
24783 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24784 {
24785 if (mask & (1 << regno))
24786 high_regs_pushed--;
24787 if (high_regs_pushed == 0)
24788 break;
24789 }
24790
24791 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24792
24793 /* Pop the values into the low register(s). */
24794 thumb_pop (asm_out_file, mask);
24795
24796 /* Move the value(s) into the high registers. */
24797 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24798 {
24799 if (mask & (1 << regno))
24800 {
24801 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24802 regno);
24803
24804 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24805 if (live_regs_mask & (1 << next_hi_reg))
24806 break;
24807 }
24808 }
24809 }
24810 live_regs_mask &= ~0x0f00;
24811 }
24812
24813 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24814 live_regs_mask &= 0xff;
24815
24816 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24817 {
24818 /* Pop the return address into the PC. */
24819 if (had_to_push_lr)
24820 live_regs_mask |= 1 << PC_REGNUM;
24821
24822 /* Either no argument registers were pushed or a backtrace
24823 structure was created which includes an adjusted stack
24824 pointer, so just pop everything. */
24825 if (live_regs_mask)
24826 thumb_pop (asm_out_file, live_regs_mask);
24827
24828 /* We have either just popped the return address into the
24829 PC or it is was kept in LR for the entire function.
24830 Note that thumb_pop has already called thumb_exit if the
24831 PC was in the list. */
24832 if (!had_to_push_lr)
24833 thumb_exit (asm_out_file, LR_REGNUM);
24834 }
24835 else
24836 {
24837 /* Pop everything but the return address. */
24838 if (live_regs_mask)
24839 thumb_pop (asm_out_file, live_regs_mask);
24840
24841 if (had_to_push_lr)
24842 {
24843 if (size > 12)
24844 {
24845 /* We have no free low regs, so save one. */
24846 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24847 LAST_ARG_REGNUM);
24848 }
24849
24850 /* Get the return address into a temporary register. */
24851 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24852
24853 if (size > 12)
24854 {
24855 /* Move the return address to lr. */
24856 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24857 LAST_ARG_REGNUM);
24858 /* Restore the low register. */
24859 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24860 IP_REGNUM);
24861 regno = LR_REGNUM;
24862 }
24863 else
24864 regno = LAST_ARG_REGNUM;
24865 }
24866 else
24867 regno = LR_REGNUM;
24868
24869 /* Remove the argument registers that were pushed onto the stack. */
24870 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24871 SP_REGNUM, SP_REGNUM,
24872 crtl->args.pretend_args_size);
24873
24874 thumb_exit (asm_out_file, regno);
24875 }
24876
24877 return "";
24878 }
24879
24880 /* Functions to save and restore machine-specific function data. */
24881 static struct machine_function *
24882 arm_init_machine_status (void)
24883 {
24884 struct machine_function *machine;
24885 machine = ggc_cleared_alloc<machine_function> ();
24886
24887 #if ARM_FT_UNKNOWN != 0
24888 machine->func_type = ARM_FT_UNKNOWN;
24889 #endif
24890 machine->static_chain_stack_bytes = -1;
24891 return machine;
24892 }
24893
24894 /* Return an RTX indicating where the return address to the
24895 calling function can be found. */
24896 rtx
24897 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24898 {
24899 if (count != 0)
24900 return NULL_RTX;
24901
24902 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24903 }
24904
24905 /* Do anything needed before RTL is emitted for each function. */
24906 void
24907 arm_init_expanders (void)
24908 {
24909 /* Arrange to initialize and mark the machine per-function status. */
24910 init_machine_status = arm_init_machine_status;
24911
24912 /* This is to stop the combine pass optimizing away the alignment
24913 adjustment of va_arg. */
24914 /* ??? It is claimed that this should not be necessary. */
24915 if (cfun)
24916 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24917 }
24918
24919 /* Check that FUNC is called with a different mode. */
24920
24921 bool
24922 arm_change_mode_p (tree func)
24923 {
24924 if (TREE_CODE (func) != FUNCTION_DECL)
24925 return false;
24926
24927 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24928
24929 if (!callee_tree)
24930 callee_tree = target_option_default_node;
24931
24932 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24933 int flags = callee_opts->x_target_flags;
24934
24935 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24936 }
24937
24938 /* Like arm_compute_initial_elimination offset. Simpler because there
24939 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24940 to point at the base of the local variables after static stack
24941 space for a function has been allocated. */
24942
24943 HOST_WIDE_INT
24944 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24945 {
24946 arm_stack_offsets *offsets;
24947
24948 offsets = arm_get_frame_offsets ();
24949
24950 switch (from)
24951 {
24952 case ARG_POINTER_REGNUM:
24953 switch (to)
24954 {
24955 case STACK_POINTER_REGNUM:
24956 return offsets->outgoing_args - offsets->saved_args;
24957
24958 case FRAME_POINTER_REGNUM:
24959 return offsets->soft_frame - offsets->saved_args;
24960
24961 case ARM_HARD_FRAME_POINTER_REGNUM:
24962 return offsets->saved_regs - offsets->saved_args;
24963
24964 case THUMB_HARD_FRAME_POINTER_REGNUM:
24965 return offsets->locals_base - offsets->saved_args;
24966
24967 default:
24968 gcc_unreachable ();
24969 }
24970 break;
24971
24972 case FRAME_POINTER_REGNUM:
24973 switch (to)
24974 {
24975 case STACK_POINTER_REGNUM:
24976 return offsets->outgoing_args - offsets->soft_frame;
24977
24978 case ARM_HARD_FRAME_POINTER_REGNUM:
24979 return offsets->saved_regs - offsets->soft_frame;
24980
24981 case THUMB_HARD_FRAME_POINTER_REGNUM:
24982 return offsets->locals_base - offsets->soft_frame;
24983
24984 default:
24985 gcc_unreachable ();
24986 }
24987 break;
24988
24989 default:
24990 gcc_unreachable ();
24991 }
24992 }
24993
24994 /* Generate the function's prologue. */
24995
24996 void
24997 thumb1_expand_prologue (void)
24998 {
24999 rtx_insn *insn;
25000
25001 HOST_WIDE_INT amount;
25002 HOST_WIDE_INT size;
25003 arm_stack_offsets *offsets;
25004 unsigned long func_type;
25005 int regno;
25006 unsigned long live_regs_mask;
25007 unsigned long l_mask;
25008 unsigned high_regs_pushed = 0;
25009 bool lr_needs_saving;
25010
25011 func_type = arm_current_func_type ();
25012
25013 /* Naked functions don't have prologues. */
25014 if (IS_NAKED (func_type))
25015 {
25016 if (flag_stack_usage_info)
25017 current_function_static_stack_size = 0;
25018 return;
25019 }
25020
25021 if (IS_INTERRUPT (func_type))
25022 {
25023 error ("interrupt Service Routines cannot be coded in Thumb mode");
25024 return;
25025 }
25026
25027 if (is_called_in_ARM_mode (current_function_decl))
25028 emit_insn (gen_prologue_thumb1_interwork ());
25029
25030 offsets = arm_get_frame_offsets ();
25031 live_regs_mask = offsets->saved_regs_mask;
25032 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25033
25034 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25035 l_mask = live_regs_mask & 0x40ff;
25036 /* Then count how many other high registers will need to be pushed. */
25037 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25038
25039 if (crtl->args.pretend_args_size)
25040 {
25041 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25042
25043 if (cfun->machine->uses_anonymous_args)
25044 {
25045 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25046 unsigned long mask;
25047
25048 mask = 1ul << (LAST_ARG_REGNUM + 1);
25049 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25050
25051 insn = thumb1_emit_multi_reg_push (mask, 0);
25052 }
25053 else
25054 {
25055 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25056 stack_pointer_rtx, x));
25057 }
25058 RTX_FRAME_RELATED_P (insn) = 1;
25059 }
25060
25061 if (TARGET_BACKTRACE)
25062 {
25063 HOST_WIDE_INT offset = 0;
25064 unsigned work_register;
25065 rtx work_reg, x, arm_hfp_rtx;
25066
25067 /* We have been asked to create a stack backtrace structure.
25068 The code looks like this:
25069
25070 0 .align 2
25071 0 func:
25072 0 sub SP, #16 Reserve space for 4 registers.
25073 2 push {R7} Push low registers.
25074 4 add R7, SP, #20 Get the stack pointer before the push.
25075 6 str R7, [SP, #8] Store the stack pointer
25076 (before reserving the space).
25077 8 mov R7, PC Get hold of the start of this code + 12.
25078 10 str R7, [SP, #16] Store it.
25079 12 mov R7, FP Get hold of the current frame pointer.
25080 14 str R7, [SP, #4] Store it.
25081 16 mov R7, LR Get hold of the current return address.
25082 18 str R7, [SP, #12] Store it.
25083 20 add R7, SP, #16 Point at the start of the
25084 backtrace structure.
25085 22 mov FP, R7 Put this value into the frame pointer. */
25086
25087 work_register = thumb_find_work_register (live_regs_mask);
25088 work_reg = gen_rtx_REG (SImode, work_register);
25089 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25090
25091 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25092 stack_pointer_rtx, GEN_INT (-16)));
25093 RTX_FRAME_RELATED_P (insn) = 1;
25094
25095 if (l_mask)
25096 {
25097 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25098 RTX_FRAME_RELATED_P (insn) = 1;
25099 lr_needs_saving = false;
25100
25101 offset = bit_count (l_mask) * UNITS_PER_WORD;
25102 }
25103
25104 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25105 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25106
25107 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25108 x = gen_frame_mem (SImode, x);
25109 emit_move_insn (x, work_reg);
25110
25111 /* Make sure that the instruction fetching the PC is in the right place
25112 to calculate "start of backtrace creation code + 12". */
25113 /* ??? The stores using the common WORK_REG ought to be enough to
25114 prevent the scheduler from doing anything weird. Failing that
25115 we could always move all of the following into an UNSPEC_VOLATILE. */
25116 if (l_mask)
25117 {
25118 x = gen_rtx_REG (SImode, PC_REGNUM);
25119 emit_move_insn (work_reg, x);
25120
25121 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25122 x = gen_frame_mem (SImode, x);
25123 emit_move_insn (x, work_reg);
25124
25125 emit_move_insn (work_reg, arm_hfp_rtx);
25126
25127 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25128 x = gen_frame_mem (SImode, x);
25129 emit_move_insn (x, work_reg);
25130 }
25131 else
25132 {
25133 emit_move_insn (work_reg, arm_hfp_rtx);
25134
25135 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25136 x = gen_frame_mem (SImode, x);
25137 emit_move_insn (x, work_reg);
25138
25139 x = gen_rtx_REG (SImode, PC_REGNUM);
25140 emit_move_insn (work_reg, x);
25141
25142 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25143 x = gen_frame_mem (SImode, x);
25144 emit_move_insn (x, work_reg);
25145 }
25146
25147 x = gen_rtx_REG (SImode, LR_REGNUM);
25148 emit_move_insn (work_reg, x);
25149
25150 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25151 x = gen_frame_mem (SImode, x);
25152 emit_move_insn (x, work_reg);
25153
25154 x = GEN_INT (offset + 12);
25155 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25156
25157 emit_move_insn (arm_hfp_rtx, work_reg);
25158 }
25159 /* Optimization: If we are not pushing any low registers but we are going
25160 to push some high registers then delay our first push. This will just
25161 be a push of LR and we can combine it with the push of the first high
25162 register. */
25163 else if ((l_mask & 0xff) != 0
25164 || (high_regs_pushed == 0 && lr_needs_saving))
25165 {
25166 unsigned long mask = l_mask;
25167 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25168 insn = thumb1_emit_multi_reg_push (mask, mask);
25169 RTX_FRAME_RELATED_P (insn) = 1;
25170 lr_needs_saving = false;
25171 }
25172
25173 if (high_regs_pushed)
25174 {
25175 unsigned pushable_regs;
25176 unsigned next_hi_reg;
25177 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25178 : crtl->args.info.nregs;
25179 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25180
25181 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25182 if (live_regs_mask & (1 << next_hi_reg))
25183 break;
25184
25185 /* Here we need to mask out registers used for passing arguments
25186 even if they can be pushed. This is to avoid using them to stash the high
25187 registers. Such kind of stash may clobber the use of arguments. */
25188 pushable_regs = l_mask & (~arg_regs_mask);
25189 if (lr_needs_saving)
25190 pushable_regs &= ~(1 << LR_REGNUM);
25191
25192 if (pushable_regs == 0)
25193 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25194
25195 while (high_regs_pushed > 0)
25196 {
25197 unsigned long real_regs_mask = 0;
25198 unsigned long push_mask = 0;
25199
25200 for (regno = LR_REGNUM; regno >= 0; regno --)
25201 {
25202 if (pushable_regs & (1 << regno))
25203 {
25204 emit_move_insn (gen_rtx_REG (SImode, regno),
25205 gen_rtx_REG (SImode, next_hi_reg));
25206
25207 high_regs_pushed --;
25208 real_regs_mask |= (1 << next_hi_reg);
25209 push_mask |= (1 << regno);
25210
25211 if (high_regs_pushed)
25212 {
25213 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25214 next_hi_reg --)
25215 if (live_regs_mask & (1 << next_hi_reg))
25216 break;
25217 }
25218 else
25219 break;
25220 }
25221 }
25222
25223 /* If we had to find a work register and we have not yet
25224 saved the LR then add it to the list of regs to push. */
25225 if (lr_needs_saving)
25226 {
25227 push_mask |= 1 << LR_REGNUM;
25228 real_regs_mask |= 1 << LR_REGNUM;
25229 lr_needs_saving = false;
25230 }
25231
25232 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25233 RTX_FRAME_RELATED_P (insn) = 1;
25234 }
25235 }
25236
25237 /* Load the pic register before setting the frame pointer,
25238 so we can use r7 as a temporary work register. */
25239 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25240 arm_load_pic_register (live_regs_mask);
25241
25242 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25243 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25244 stack_pointer_rtx);
25245
25246 size = offsets->outgoing_args - offsets->saved_args;
25247 if (flag_stack_usage_info)
25248 current_function_static_stack_size = size;
25249
25250 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25251 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25252 || flag_stack_clash_protection)
25253 && size)
25254 sorry ("-fstack-check=specific for Thumb-1");
25255
25256 amount = offsets->outgoing_args - offsets->saved_regs;
25257 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25258 if (amount)
25259 {
25260 if (amount < 512)
25261 {
25262 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25263 GEN_INT (- amount)));
25264 RTX_FRAME_RELATED_P (insn) = 1;
25265 }
25266 else
25267 {
25268 rtx reg, dwarf;
25269
25270 /* The stack decrement is too big for an immediate value in a single
25271 insn. In theory we could issue multiple subtracts, but after
25272 three of them it becomes more space efficient to place the full
25273 value in the constant pool and load into a register. (Also the
25274 ARM debugger really likes to see only one stack decrement per
25275 function). So instead we look for a scratch register into which
25276 we can load the decrement, and then we subtract this from the
25277 stack pointer. Unfortunately on the thumb the only available
25278 scratch registers are the argument registers, and we cannot use
25279 these as they may hold arguments to the function. Instead we
25280 attempt to locate a call preserved register which is used by this
25281 function. If we can find one, then we know that it will have
25282 been pushed at the start of the prologue and so we can corrupt
25283 it now. */
25284 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25285 if (live_regs_mask & (1 << regno))
25286 break;
25287
25288 gcc_assert(regno <= LAST_LO_REGNUM);
25289
25290 reg = gen_rtx_REG (SImode, regno);
25291
25292 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25293
25294 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25295 stack_pointer_rtx, reg));
25296
25297 dwarf = gen_rtx_SET (stack_pointer_rtx,
25298 plus_constant (Pmode, stack_pointer_rtx,
25299 -amount));
25300 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25301 RTX_FRAME_RELATED_P (insn) = 1;
25302 }
25303 }
25304
25305 if (frame_pointer_needed)
25306 thumb_set_frame_pointer (offsets);
25307
25308 /* If we are profiling, make sure no instructions are scheduled before
25309 the call to mcount. Similarly if the user has requested no
25310 scheduling in the prolog. Similarly if we want non-call exceptions
25311 using the EABI unwinder, to prevent faulting instructions from being
25312 swapped with a stack adjustment. */
25313 if (crtl->profile || !TARGET_SCHED_PROLOG
25314 || (arm_except_unwind_info (&global_options) == UI_TARGET
25315 && cfun->can_throw_non_call_exceptions))
25316 emit_insn (gen_blockage ());
25317
25318 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25319 if (live_regs_mask & 0xff)
25320 cfun->machine->lr_save_eliminated = 0;
25321 }
25322
25323 /* Clear caller saved registers not used to pass return values and leaked
25324 condition flags before exiting a cmse_nonsecure_entry function. */
25325
25326 void
25327 cmse_nonsecure_entry_clear_before_return (void)
25328 {
25329 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25330 uint32_t padding_bits_to_clear = 0;
25331 auto_sbitmap to_clear_bitmap (maxregno + 1);
25332 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25333 tree result_type;
25334
25335 bitmap_clear (to_clear_bitmap);
25336 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25337 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25338
25339 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25340 registers. */
25341 if (TARGET_HARD_FLOAT)
25342 {
25343 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25344
25345 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25346
25347 /* Make sure we don't clear the two scratch registers used to clear the
25348 relevant FPSCR bits in output_return_instruction. */
25349 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25350 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25351 emit_use (gen_rtx_REG (SImode, 4));
25352 bitmap_clear_bit (to_clear_bitmap, 4);
25353 }
25354
25355 /* If the user has defined registers to be caller saved, these are no longer
25356 restored by the function before returning and must thus be cleared for
25357 security purposes. */
25358 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25359 {
25360 /* We do not touch registers that can be used to pass arguments as per
25361 the AAPCS, since these should never be made callee-saved by user
25362 options. */
25363 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25364 continue;
25365 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25366 continue;
25367 if (call_used_regs[regno])
25368 bitmap_set_bit (to_clear_bitmap, regno);
25369 }
25370
25371 /* Make sure we do not clear the registers used to return the result in. */
25372 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25373 if (!VOID_TYPE_P (result_type))
25374 {
25375 uint64_t to_clear_return_mask;
25376 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25377
25378 /* No need to check that we return in registers, because we don't
25379 support returning on stack yet. */
25380 gcc_assert (REG_P (result_rtl));
25381 to_clear_return_mask
25382 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25383 &padding_bits_to_clear);
25384 if (to_clear_return_mask)
25385 {
25386 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25387 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25388 {
25389 if (to_clear_return_mask & (1ULL << regno))
25390 bitmap_clear_bit (to_clear_bitmap, regno);
25391 }
25392 }
25393 }
25394
25395 if (padding_bits_to_clear != 0)
25396 {
25397 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25398 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25399
25400 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25401 returning a composite type, which only uses r0. Let's make sure that
25402 r1-r3 is cleared too. */
25403 bitmap_clear (to_clear_arg_regs_bitmap);
25404 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25405 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25406 }
25407
25408 /* Clear full registers that leak before returning. */
25409 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25410 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25411 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25412 clearing_reg);
25413 }
25414
25415 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25416 POP instruction can be generated. LR should be replaced by PC. All
25417 the checks required are already done by USE_RETURN_INSN (). Hence,
25418 all we really need to check here is if single register is to be
25419 returned, or multiple register return. */
25420 void
25421 thumb2_expand_return (bool simple_return)
25422 {
25423 int i, num_regs;
25424 unsigned long saved_regs_mask;
25425 arm_stack_offsets *offsets;
25426
25427 offsets = arm_get_frame_offsets ();
25428 saved_regs_mask = offsets->saved_regs_mask;
25429
25430 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25431 if (saved_regs_mask & (1 << i))
25432 num_regs++;
25433
25434 if (!simple_return && saved_regs_mask)
25435 {
25436 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25437 functions or adapt code to handle according to ACLE. This path should
25438 not be reachable for cmse_nonsecure_entry functions though we prefer
25439 to assert it for now to ensure that future code changes do not silently
25440 change this behavior. */
25441 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25442 if (num_regs == 1)
25443 {
25444 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25445 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25446 rtx addr = gen_rtx_MEM (SImode,
25447 gen_rtx_POST_INC (SImode,
25448 stack_pointer_rtx));
25449 set_mem_alias_set (addr, get_frame_alias_set ());
25450 XVECEXP (par, 0, 0) = ret_rtx;
25451 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25452 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25453 emit_jump_insn (par);
25454 }
25455 else
25456 {
25457 saved_regs_mask &= ~ (1 << LR_REGNUM);
25458 saved_regs_mask |= (1 << PC_REGNUM);
25459 arm_emit_multi_reg_pop (saved_regs_mask);
25460 }
25461 }
25462 else
25463 {
25464 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25465 cmse_nonsecure_entry_clear_before_return ();
25466 emit_jump_insn (simple_return_rtx);
25467 }
25468 }
25469
25470 void
25471 thumb1_expand_epilogue (void)
25472 {
25473 HOST_WIDE_INT amount;
25474 arm_stack_offsets *offsets;
25475 int regno;
25476
25477 /* Naked functions don't have prologues. */
25478 if (IS_NAKED (arm_current_func_type ()))
25479 return;
25480
25481 offsets = arm_get_frame_offsets ();
25482 amount = offsets->outgoing_args - offsets->saved_regs;
25483
25484 if (frame_pointer_needed)
25485 {
25486 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25487 amount = offsets->locals_base - offsets->saved_regs;
25488 }
25489 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25490
25491 gcc_assert (amount >= 0);
25492 if (amount)
25493 {
25494 emit_insn (gen_blockage ());
25495
25496 if (amount < 512)
25497 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25498 GEN_INT (amount)));
25499 else
25500 {
25501 /* r3 is always free in the epilogue. */
25502 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25503
25504 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25505 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25506 }
25507 }
25508
25509 /* Emit a USE (stack_pointer_rtx), so that
25510 the stack adjustment will not be deleted. */
25511 emit_insn (gen_force_register_use (stack_pointer_rtx));
25512
25513 if (crtl->profile || !TARGET_SCHED_PROLOG)
25514 emit_insn (gen_blockage ());
25515
25516 /* Emit a clobber for each insn that will be restored in the epilogue,
25517 so that flow2 will get register lifetimes correct. */
25518 for (regno = 0; regno < 13; regno++)
25519 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25520 emit_clobber (gen_rtx_REG (SImode, regno));
25521
25522 if (! df_regs_ever_live_p (LR_REGNUM))
25523 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25524
25525 /* Clear all caller-saved regs that are not used to return. */
25526 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25527 cmse_nonsecure_entry_clear_before_return ();
25528 }
25529
25530 /* Epilogue code for APCS frame. */
25531 static void
25532 arm_expand_epilogue_apcs_frame (bool really_return)
25533 {
25534 unsigned long func_type;
25535 unsigned long saved_regs_mask;
25536 int num_regs = 0;
25537 int i;
25538 int floats_from_frame = 0;
25539 arm_stack_offsets *offsets;
25540
25541 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25542 func_type = arm_current_func_type ();
25543
25544 /* Get frame offsets for ARM. */
25545 offsets = arm_get_frame_offsets ();
25546 saved_regs_mask = offsets->saved_regs_mask;
25547
25548 /* Find the offset of the floating-point save area in the frame. */
25549 floats_from_frame
25550 = (offsets->saved_args
25551 + arm_compute_static_chain_stack_bytes ()
25552 - offsets->frame);
25553
25554 /* Compute how many core registers saved and how far away the floats are. */
25555 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25556 if (saved_regs_mask & (1 << i))
25557 {
25558 num_regs++;
25559 floats_from_frame += 4;
25560 }
25561
25562 if (TARGET_HARD_FLOAT)
25563 {
25564 int start_reg;
25565 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25566
25567 /* The offset is from IP_REGNUM. */
25568 int saved_size = arm_get_vfp_saved_size ();
25569 if (saved_size > 0)
25570 {
25571 rtx_insn *insn;
25572 floats_from_frame += saved_size;
25573 insn = emit_insn (gen_addsi3 (ip_rtx,
25574 hard_frame_pointer_rtx,
25575 GEN_INT (-floats_from_frame)));
25576 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25577 ip_rtx, hard_frame_pointer_rtx);
25578 }
25579
25580 /* Generate VFP register multi-pop. */
25581 start_reg = FIRST_VFP_REGNUM;
25582
25583 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25584 /* Look for a case where a reg does not need restoring. */
25585 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25586 && (!df_regs_ever_live_p (i + 1)
25587 || call_used_regs[i + 1]))
25588 {
25589 if (start_reg != i)
25590 arm_emit_vfp_multi_reg_pop (start_reg,
25591 (i - start_reg) / 2,
25592 gen_rtx_REG (SImode,
25593 IP_REGNUM));
25594 start_reg = i + 2;
25595 }
25596
25597 /* Restore the remaining regs that we have discovered (or possibly
25598 even all of them, if the conditional in the for loop never
25599 fired). */
25600 if (start_reg != i)
25601 arm_emit_vfp_multi_reg_pop (start_reg,
25602 (i - start_reg) / 2,
25603 gen_rtx_REG (SImode, IP_REGNUM));
25604 }
25605
25606 if (TARGET_IWMMXT)
25607 {
25608 /* The frame pointer is guaranteed to be non-double-word aligned, as
25609 it is set to double-word-aligned old_stack_pointer - 4. */
25610 rtx_insn *insn;
25611 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25612
25613 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25614 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25615 {
25616 rtx addr = gen_frame_mem (V2SImode,
25617 plus_constant (Pmode, hard_frame_pointer_rtx,
25618 - lrm_count * 4));
25619 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25620 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25621 gen_rtx_REG (V2SImode, i),
25622 NULL_RTX);
25623 lrm_count += 2;
25624 }
25625 }
25626
25627 /* saved_regs_mask should contain IP which contains old stack pointer
25628 at the time of activation creation. Since SP and IP are adjacent registers,
25629 we can restore the value directly into SP. */
25630 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25631 saved_regs_mask &= ~(1 << IP_REGNUM);
25632 saved_regs_mask |= (1 << SP_REGNUM);
25633
25634 /* There are two registers left in saved_regs_mask - LR and PC. We
25635 only need to restore LR (the return address), but to
25636 save time we can load it directly into PC, unless we need a
25637 special function exit sequence, or we are not really returning. */
25638 if (really_return
25639 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25640 && !crtl->calls_eh_return)
25641 /* Delete LR from the register mask, so that LR on
25642 the stack is loaded into the PC in the register mask. */
25643 saved_regs_mask &= ~(1 << LR_REGNUM);
25644 else
25645 saved_regs_mask &= ~(1 << PC_REGNUM);
25646
25647 num_regs = bit_count (saved_regs_mask);
25648 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25649 {
25650 rtx_insn *insn;
25651 emit_insn (gen_blockage ());
25652 /* Unwind the stack to just below the saved registers. */
25653 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25654 hard_frame_pointer_rtx,
25655 GEN_INT (- 4 * num_regs)));
25656
25657 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25658 stack_pointer_rtx, hard_frame_pointer_rtx);
25659 }
25660
25661 arm_emit_multi_reg_pop (saved_regs_mask);
25662
25663 if (IS_INTERRUPT (func_type))
25664 {
25665 /* Interrupt handlers will have pushed the
25666 IP onto the stack, so restore it now. */
25667 rtx_insn *insn;
25668 rtx addr = gen_rtx_MEM (SImode,
25669 gen_rtx_POST_INC (SImode,
25670 stack_pointer_rtx));
25671 set_mem_alias_set (addr, get_frame_alias_set ());
25672 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25673 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25674 gen_rtx_REG (SImode, IP_REGNUM),
25675 NULL_RTX);
25676 }
25677
25678 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25679 return;
25680
25681 if (crtl->calls_eh_return)
25682 emit_insn (gen_addsi3 (stack_pointer_rtx,
25683 stack_pointer_rtx,
25684 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25685
25686 if (IS_STACKALIGN (func_type))
25687 /* Restore the original stack pointer. Before prologue, the stack was
25688 realigned and the original stack pointer saved in r0. For details,
25689 see comment in arm_expand_prologue. */
25690 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25691
25692 emit_jump_insn (simple_return_rtx);
25693 }
25694
25695 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25696 function is not a sibcall. */
25697 void
25698 arm_expand_epilogue (bool really_return)
25699 {
25700 unsigned long func_type;
25701 unsigned long saved_regs_mask;
25702 int num_regs = 0;
25703 int i;
25704 int amount;
25705 arm_stack_offsets *offsets;
25706
25707 func_type = arm_current_func_type ();
25708
25709 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25710 let output_return_instruction take care of instruction emission if any. */
25711 if (IS_NAKED (func_type)
25712 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25713 {
25714 if (really_return)
25715 emit_jump_insn (simple_return_rtx);
25716 return;
25717 }
25718
25719 /* If we are throwing an exception, then we really must be doing a
25720 return, so we can't tail-call. */
25721 gcc_assert (!crtl->calls_eh_return || really_return);
25722
25723 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25724 {
25725 arm_expand_epilogue_apcs_frame (really_return);
25726 return;
25727 }
25728
25729 /* Get frame offsets for ARM. */
25730 offsets = arm_get_frame_offsets ();
25731 saved_regs_mask = offsets->saved_regs_mask;
25732 num_regs = bit_count (saved_regs_mask);
25733
25734 if (frame_pointer_needed)
25735 {
25736 rtx_insn *insn;
25737 /* Restore stack pointer if necessary. */
25738 if (TARGET_ARM)
25739 {
25740 /* In ARM mode, frame pointer points to first saved register.
25741 Restore stack pointer to last saved register. */
25742 amount = offsets->frame - offsets->saved_regs;
25743
25744 /* Force out any pending memory operations that reference stacked data
25745 before stack de-allocation occurs. */
25746 emit_insn (gen_blockage ());
25747 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25748 hard_frame_pointer_rtx,
25749 GEN_INT (amount)));
25750 arm_add_cfa_adjust_cfa_note (insn, amount,
25751 stack_pointer_rtx,
25752 hard_frame_pointer_rtx);
25753
25754 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25755 deleted. */
25756 emit_insn (gen_force_register_use (stack_pointer_rtx));
25757 }
25758 else
25759 {
25760 /* In Thumb-2 mode, the frame pointer points to the last saved
25761 register. */
25762 amount = offsets->locals_base - offsets->saved_regs;
25763 if (amount)
25764 {
25765 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25766 hard_frame_pointer_rtx,
25767 GEN_INT (amount)));
25768 arm_add_cfa_adjust_cfa_note (insn, amount,
25769 hard_frame_pointer_rtx,
25770 hard_frame_pointer_rtx);
25771 }
25772
25773 /* Force out any pending memory operations that reference stacked data
25774 before stack de-allocation occurs. */
25775 emit_insn (gen_blockage ());
25776 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25777 hard_frame_pointer_rtx));
25778 arm_add_cfa_adjust_cfa_note (insn, 0,
25779 stack_pointer_rtx,
25780 hard_frame_pointer_rtx);
25781 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25782 deleted. */
25783 emit_insn (gen_force_register_use (stack_pointer_rtx));
25784 }
25785 }
25786 else
25787 {
25788 /* Pop off outgoing args and local frame to adjust stack pointer to
25789 last saved register. */
25790 amount = offsets->outgoing_args - offsets->saved_regs;
25791 if (amount)
25792 {
25793 rtx_insn *tmp;
25794 /* Force out any pending memory operations that reference stacked data
25795 before stack de-allocation occurs. */
25796 emit_insn (gen_blockage ());
25797 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25798 stack_pointer_rtx,
25799 GEN_INT (amount)));
25800 arm_add_cfa_adjust_cfa_note (tmp, amount,
25801 stack_pointer_rtx, stack_pointer_rtx);
25802 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25803 not deleted. */
25804 emit_insn (gen_force_register_use (stack_pointer_rtx));
25805 }
25806 }
25807
25808 if (TARGET_HARD_FLOAT)
25809 {
25810 /* Generate VFP register multi-pop. */
25811 int end_reg = LAST_VFP_REGNUM + 1;
25812
25813 /* Scan the registers in reverse order. We need to match
25814 any groupings made in the prologue and generate matching
25815 vldm operations. The need to match groups is because,
25816 unlike pop, vldm can only do consecutive regs. */
25817 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25818 /* Look for a case where a reg does not need restoring. */
25819 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25820 && (!df_regs_ever_live_p (i + 1)
25821 || call_used_regs[i + 1]))
25822 {
25823 /* Restore the regs discovered so far (from reg+2 to
25824 end_reg). */
25825 if (end_reg > i + 2)
25826 arm_emit_vfp_multi_reg_pop (i + 2,
25827 (end_reg - (i + 2)) / 2,
25828 stack_pointer_rtx);
25829 end_reg = i;
25830 }
25831
25832 /* Restore the remaining regs that we have discovered (or possibly
25833 even all of them, if the conditional in the for loop never
25834 fired). */
25835 if (end_reg > i + 2)
25836 arm_emit_vfp_multi_reg_pop (i + 2,
25837 (end_reg - (i + 2)) / 2,
25838 stack_pointer_rtx);
25839 }
25840
25841 if (TARGET_IWMMXT)
25842 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25843 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25844 {
25845 rtx_insn *insn;
25846 rtx addr = gen_rtx_MEM (V2SImode,
25847 gen_rtx_POST_INC (SImode,
25848 stack_pointer_rtx));
25849 set_mem_alias_set (addr, get_frame_alias_set ());
25850 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25851 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25852 gen_rtx_REG (V2SImode, i),
25853 NULL_RTX);
25854 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25855 stack_pointer_rtx, stack_pointer_rtx);
25856 }
25857
25858 if (saved_regs_mask)
25859 {
25860 rtx insn;
25861 bool return_in_pc = false;
25862
25863 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25864 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25865 && !IS_CMSE_ENTRY (func_type)
25866 && !IS_STACKALIGN (func_type)
25867 && really_return
25868 && crtl->args.pretend_args_size == 0
25869 && saved_regs_mask & (1 << LR_REGNUM)
25870 && !crtl->calls_eh_return)
25871 {
25872 saved_regs_mask &= ~(1 << LR_REGNUM);
25873 saved_regs_mask |= (1 << PC_REGNUM);
25874 return_in_pc = true;
25875 }
25876
25877 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25878 {
25879 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25880 if (saved_regs_mask & (1 << i))
25881 {
25882 rtx addr = gen_rtx_MEM (SImode,
25883 gen_rtx_POST_INC (SImode,
25884 stack_pointer_rtx));
25885 set_mem_alias_set (addr, get_frame_alias_set ());
25886
25887 if (i == PC_REGNUM)
25888 {
25889 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25890 XVECEXP (insn, 0, 0) = ret_rtx;
25891 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25892 addr);
25893 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25894 insn = emit_jump_insn (insn);
25895 }
25896 else
25897 {
25898 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25899 addr));
25900 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25901 gen_rtx_REG (SImode, i),
25902 NULL_RTX);
25903 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25904 stack_pointer_rtx,
25905 stack_pointer_rtx);
25906 }
25907 }
25908 }
25909 else
25910 {
25911 if (TARGET_LDRD
25912 && current_tune->prefer_ldrd_strd
25913 && !optimize_function_for_size_p (cfun))
25914 {
25915 if (TARGET_THUMB2)
25916 thumb2_emit_ldrd_pop (saved_regs_mask);
25917 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25918 arm_emit_ldrd_pop (saved_regs_mask);
25919 else
25920 arm_emit_multi_reg_pop (saved_regs_mask);
25921 }
25922 else
25923 arm_emit_multi_reg_pop (saved_regs_mask);
25924 }
25925
25926 if (return_in_pc)
25927 return;
25928 }
25929
25930 amount
25931 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25932 if (amount)
25933 {
25934 int i, j;
25935 rtx dwarf = NULL_RTX;
25936 rtx_insn *tmp =
25937 emit_insn (gen_addsi3 (stack_pointer_rtx,
25938 stack_pointer_rtx,
25939 GEN_INT (amount)));
25940
25941 RTX_FRAME_RELATED_P (tmp) = 1;
25942
25943 if (cfun->machine->uses_anonymous_args)
25944 {
25945 /* Restore pretend args. Refer arm_expand_prologue on how to save
25946 pretend_args in stack. */
25947 int num_regs = crtl->args.pretend_args_size / 4;
25948 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25949 for (j = 0, i = 0; j < num_regs; i++)
25950 if (saved_regs_mask & (1 << i))
25951 {
25952 rtx reg = gen_rtx_REG (SImode, i);
25953 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25954 j++;
25955 }
25956 REG_NOTES (tmp) = dwarf;
25957 }
25958 arm_add_cfa_adjust_cfa_note (tmp, amount,
25959 stack_pointer_rtx, stack_pointer_rtx);
25960 }
25961
25962 /* Clear all caller-saved regs that are not used to return. */
25963 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25964 {
25965 /* CMSE_ENTRY always returns. */
25966 gcc_assert (really_return);
25967 cmse_nonsecure_entry_clear_before_return ();
25968 }
25969
25970 if (!really_return)
25971 return;
25972
25973 if (crtl->calls_eh_return)
25974 emit_insn (gen_addsi3 (stack_pointer_rtx,
25975 stack_pointer_rtx,
25976 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25977
25978 if (IS_STACKALIGN (func_type))
25979 /* Restore the original stack pointer. Before prologue, the stack was
25980 realigned and the original stack pointer saved in r0. For details,
25981 see comment in arm_expand_prologue. */
25982 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25983
25984 emit_jump_insn (simple_return_rtx);
25985 }
25986
25987 /* Implementation of insn prologue_thumb1_interwork. This is the first
25988 "instruction" of a function called in ARM mode. Swap to thumb mode. */
25989
25990 const char *
25991 thumb1_output_interwork (void)
25992 {
25993 const char * name;
25994 FILE *f = asm_out_file;
25995
25996 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
25997 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
25998 == SYMBOL_REF);
25999 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26000
26001 /* Generate code sequence to switch us into Thumb mode. */
26002 /* The .code 32 directive has already been emitted by
26003 ASM_DECLARE_FUNCTION_NAME. */
26004 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26005 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26006
26007 /* Generate a label, so that the debugger will notice the
26008 change in instruction sets. This label is also used by
26009 the assembler to bypass the ARM code when this function
26010 is called from a Thumb encoded function elsewhere in the
26011 same file. Hence the definition of STUB_NAME here must
26012 agree with the definition in gas/config/tc-arm.c. */
26013
26014 #define STUB_NAME ".real_start_of"
26015
26016 fprintf (f, "\t.code\t16\n");
26017 #ifdef ARM_PE
26018 if (arm_dllexport_name_p (name))
26019 name = arm_strip_name_encoding (name);
26020 #endif
26021 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26022 fprintf (f, "\t.thumb_func\n");
26023 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26024
26025 return "";
26026 }
26027
26028 /* Handle the case of a double word load into a low register from
26029 a computed memory address. The computed address may involve a
26030 register which is overwritten by the load. */
26031 const char *
26032 thumb_load_double_from_address (rtx *operands)
26033 {
26034 rtx addr;
26035 rtx base;
26036 rtx offset;
26037 rtx arg1;
26038 rtx arg2;
26039
26040 gcc_assert (REG_P (operands[0]));
26041 gcc_assert (MEM_P (operands[1]));
26042
26043 /* Get the memory address. */
26044 addr = XEXP (operands[1], 0);
26045
26046 /* Work out how the memory address is computed. */
26047 switch (GET_CODE (addr))
26048 {
26049 case REG:
26050 operands[2] = adjust_address (operands[1], SImode, 4);
26051
26052 if (REGNO (operands[0]) == REGNO (addr))
26053 {
26054 output_asm_insn ("ldr\t%H0, %2", operands);
26055 output_asm_insn ("ldr\t%0, %1", operands);
26056 }
26057 else
26058 {
26059 output_asm_insn ("ldr\t%0, %1", operands);
26060 output_asm_insn ("ldr\t%H0, %2", operands);
26061 }
26062 break;
26063
26064 case CONST:
26065 /* Compute <address> + 4 for the high order load. */
26066 operands[2] = adjust_address (operands[1], SImode, 4);
26067
26068 output_asm_insn ("ldr\t%0, %1", operands);
26069 output_asm_insn ("ldr\t%H0, %2", operands);
26070 break;
26071
26072 case PLUS:
26073 arg1 = XEXP (addr, 0);
26074 arg2 = XEXP (addr, 1);
26075
26076 if (CONSTANT_P (arg1))
26077 base = arg2, offset = arg1;
26078 else
26079 base = arg1, offset = arg2;
26080
26081 gcc_assert (REG_P (base));
26082
26083 /* Catch the case of <address> = <reg> + <reg> */
26084 if (REG_P (offset))
26085 {
26086 int reg_offset = REGNO (offset);
26087 int reg_base = REGNO (base);
26088 int reg_dest = REGNO (operands[0]);
26089
26090 /* Add the base and offset registers together into the
26091 higher destination register. */
26092 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26093 reg_dest + 1, reg_base, reg_offset);
26094
26095 /* Load the lower destination register from the address in
26096 the higher destination register. */
26097 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26098 reg_dest, reg_dest + 1);
26099
26100 /* Load the higher destination register from its own address
26101 plus 4. */
26102 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26103 reg_dest + 1, reg_dest + 1);
26104 }
26105 else
26106 {
26107 /* Compute <address> + 4 for the high order load. */
26108 operands[2] = adjust_address (operands[1], SImode, 4);
26109
26110 /* If the computed address is held in the low order register
26111 then load the high order register first, otherwise always
26112 load the low order register first. */
26113 if (REGNO (operands[0]) == REGNO (base))
26114 {
26115 output_asm_insn ("ldr\t%H0, %2", operands);
26116 output_asm_insn ("ldr\t%0, %1", operands);
26117 }
26118 else
26119 {
26120 output_asm_insn ("ldr\t%0, %1", operands);
26121 output_asm_insn ("ldr\t%H0, %2", operands);
26122 }
26123 }
26124 break;
26125
26126 case LABEL_REF:
26127 /* With no registers to worry about we can just load the value
26128 directly. */
26129 operands[2] = adjust_address (operands[1], SImode, 4);
26130
26131 output_asm_insn ("ldr\t%H0, %2", operands);
26132 output_asm_insn ("ldr\t%0, %1", operands);
26133 break;
26134
26135 default:
26136 gcc_unreachable ();
26137 }
26138
26139 return "";
26140 }
26141
26142 const char *
26143 thumb_output_move_mem_multiple (int n, rtx *operands)
26144 {
26145 switch (n)
26146 {
26147 case 2:
26148 if (REGNO (operands[4]) > REGNO (operands[5]))
26149 std::swap (operands[4], operands[5]);
26150
26151 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26152 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26153 break;
26154
26155 case 3:
26156 if (REGNO (operands[4]) > REGNO (operands[5]))
26157 std::swap (operands[4], operands[5]);
26158 if (REGNO (operands[5]) > REGNO (operands[6]))
26159 std::swap (operands[5], operands[6]);
26160 if (REGNO (operands[4]) > REGNO (operands[5]))
26161 std::swap (operands[4], operands[5]);
26162
26163 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26164 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26165 break;
26166
26167 default:
26168 gcc_unreachable ();
26169 }
26170
26171 return "";
26172 }
26173
26174 /* Output a call-via instruction for thumb state. */
26175 const char *
26176 thumb_call_via_reg (rtx reg)
26177 {
26178 int regno = REGNO (reg);
26179 rtx *labelp;
26180
26181 gcc_assert (regno < LR_REGNUM);
26182
26183 /* If we are in the normal text section we can use a single instance
26184 per compilation unit. If we are doing function sections, then we need
26185 an entry per section, since we can't rely on reachability. */
26186 if (in_section == text_section)
26187 {
26188 thumb_call_reg_needed = 1;
26189
26190 if (thumb_call_via_label[regno] == NULL)
26191 thumb_call_via_label[regno] = gen_label_rtx ();
26192 labelp = thumb_call_via_label + regno;
26193 }
26194 else
26195 {
26196 if (cfun->machine->call_via[regno] == NULL)
26197 cfun->machine->call_via[regno] = gen_label_rtx ();
26198 labelp = cfun->machine->call_via + regno;
26199 }
26200
26201 output_asm_insn ("bl\t%a0", labelp);
26202 return "";
26203 }
26204
26205 /* Routines for generating rtl. */
26206 void
26207 thumb_expand_movmemqi (rtx *operands)
26208 {
26209 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26210 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26211 HOST_WIDE_INT len = INTVAL (operands[2]);
26212 HOST_WIDE_INT offset = 0;
26213
26214 while (len >= 12)
26215 {
26216 emit_insn (gen_movmem12b (out, in, out, in));
26217 len -= 12;
26218 }
26219
26220 if (len >= 8)
26221 {
26222 emit_insn (gen_movmem8b (out, in, out, in));
26223 len -= 8;
26224 }
26225
26226 if (len >= 4)
26227 {
26228 rtx reg = gen_reg_rtx (SImode);
26229 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26230 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26231 len -= 4;
26232 offset += 4;
26233 }
26234
26235 if (len >= 2)
26236 {
26237 rtx reg = gen_reg_rtx (HImode);
26238 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26239 plus_constant (Pmode, in,
26240 offset))));
26241 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26242 offset)),
26243 reg));
26244 len -= 2;
26245 offset += 2;
26246 }
26247
26248 if (len)
26249 {
26250 rtx reg = gen_reg_rtx (QImode);
26251 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26252 plus_constant (Pmode, in,
26253 offset))));
26254 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26255 offset)),
26256 reg));
26257 }
26258 }
26259
26260 void
26261 thumb_reload_out_hi (rtx *operands)
26262 {
26263 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26264 }
26265
26266 /* Return the length of a function name prefix
26267 that starts with the character 'c'. */
26268 static int
26269 arm_get_strip_length (int c)
26270 {
26271 switch (c)
26272 {
26273 ARM_NAME_ENCODING_LENGTHS
26274 default: return 0;
26275 }
26276 }
26277
26278 /* Return a pointer to a function's name with any
26279 and all prefix encodings stripped from it. */
26280 const char *
26281 arm_strip_name_encoding (const char *name)
26282 {
26283 int skip;
26284
26285 while ((skip = arm_get_strip_length (* name)))
26286 name += skip;
26287
26288 return name;
26289 }
26290
26291 /* If there is a '*' anywhere in the name's prefix, then
26292 emit the stripped name verbatim, otherwise prepend an
26293 underscore if leading underscores are being used. */
26294 void
26295 arm_asm_output_labelref (FILE *stream, const char *name)
26296 {
26297 int skip;
26298 int verbatim = 0;
26299
26300 while ((skip = arm_get_strip_length (* name)))
26301 {
26302 verbatim |= (*name == '*');
26303 name += skip;
26304 }
26305
26306 if (verbatim)
26307 fputs (name, stream);
26308 else
26309 asm_fprintf (stream, "%U%s", name);
26310 }
26311
26312 /* This function is used to emit an EABI tag and its associated value.
26313 We emit the numerical value of the tag in case the assembler does not
26314 support textual tags. (Eg gas prior to 2.20). If requested we include
26315 the tag name in a comment so that anyone reading the assembler output
26316 will know which tag is being set.
26317
26318 This function is not static because arm-c.c needs it too. */
26319
26320 void
26321 arm_emit_eabi_attribute (const char *name, int num, int val)
26322 {
26323 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26324 if (flag_verbose_asm || flag_debug_asm)
26325 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26326 asm_fprintf (asm_out_file, "\n");
26327 }
26328
26329 /* This function is used to print CPU tuning information as comment
26330 in assembler file. Pointers are not printed for now. */
26331
26332 void
26333 arm_print_tune_info (void)
26334 {
26335 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26336 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26337 current_tune->constant_limit);
26338 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26339 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26340 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26341 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26342 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26343 "prefetch.l1_cache_size:\t%d\n",
26344 current_tune->prefetch.l1_cache_size);
26345 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26346 "prefetch.l1_cache_line_size:\t%d\n",
26347 current_tune->prefetch.l1_cache_line_size);
26348 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26349 "prefer_constant_pool:\t%d\n",
26350 (int) current_tune->prefer_constant_pool);
26351 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26352 "branch_cost:\t(s:speed, p:predictable)\n");
26353 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26354 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26355 current_tune->branch_cost (false, false));
26356 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26357 current_tune->branch_cost (false, true));
26358 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26359 current_tune->branch_cost (true, false));
26360 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26361 current_tune->branch_cost (true, true));
26362 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26363 "prefer_ldrd_strd:\t%d\n",
26364 (int) current_tune->prefer_ldrd_strd);
26365 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26366 "logical_op_non_short_circuit:\t[%d,%d]\n",
26367 (int) current_tune->logical_op_non_short_circuit_thumb,
26368 (int) current_tune->logical_op_non_short_circuit_arm);
26369 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26370 "prefer_neon_for_64bits:\t%d\n",
26371 (int) current_tune->prefer_neon_for_64bits);
26372 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26373 "disparage_flag_setting_t16_encodings:\t%d\n",
26374 (int) current_tune->disparage_flag_setting_t16_encodings);
26375 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26376 "string_ops_prefer_neon:\t%d\n",
26377 (int) current_tune->string_ops_prefer_neon);
26378 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26379 "max_insns_inline_memset:\t%d\n",
26380 current_tune->max_insns_inline_memset);
26381 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26382 current_tune->fusible_ops);
26383 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26384 (int) current_tune->sched_autopref);
26385 }
26386
26387 /* Print .arch and .arch_extension directives corresponding to the
26388 current architecture configuration. */
26389 static void
26390 arm_print_asm_arch_directives ()
26391 {
26392 const arch_option *arch
26393 = arm_parse_arch_option_name (all_architectures, "-march",
26394 arm_active_target.arch_name);
26395 auto_sbitmap opt_bits (isa_num_bits);
26396
26397 gcc_assert (arch);
26398
26399 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26400 arm_last_printed_arch_string = arm_active_target.arch_name;
26401 if (!arch->common.extensions)
26402 return;
26403
26404 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26405 opt->name != NULL;
26406 opt++)
26407 {
26408 if (!opt->remove)
26409 {
26410 arm_initialize_isa (opt_bits, opt->isa_bits);
26411
26412 /* If every feature bit of this option is set in the target
26413 ISA specification, print out the option name. However,
26414 don't print anything if all the bits are part of the
26415 FPU specification. */
26416 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26417 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26418 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26419 }
26420 }
26421 }
26422
26423 static void
26424 arm_file_start (void)
26425 {
26426 int val;
26427
26428 if (TARGET_BPABI)
26429 {
26430 /* We don't have a specified CPU. Use the architecture to
26431 generate the tags.
26432
26433 Note: it might be better to do this unconditionally, then the
26434 assembler would not need to know about all new CPU names as
26435 they are added. */
26436 if (!arm_active_target.core_name)
26437 {
26438 /* armv7ve doesn't support any extensions. */
26439 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26440 {
26441 /* Keep backward compatability for assemblers
26442 which don't support armv7ve. */
26443 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26444 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26445 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26446 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26447 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26448 arm_last_printed_arch_string = "armv7ve";
26449 }
26450 else
26451 arm_print_asm_arch_directives ();
26452 }
26453 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26454 {
26455 asm_fprintf (asm_out_file, "\t.arch %s\n",
26456 arm_active_target.core_name + 8);
26457 arm_last_printed_arch_string = arm_active_target.core_name + 8;
26458 }
26459 else
26460 {
26461 const char* truncated_name
26462 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26463 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26464 }
26465
26466 if (print_tune_info)
26467 arm_print_tune_info ();
26468
26469 if (! TARGET_SOFT_FLOAT)
26470 {
26471 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26472 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26473
26474 if (TARGET_HARD_FLOAT_ABI)
26475 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26476 }
26477
26478 /* Some of these attributes only apply when the corresponding features
26479 are used. However we don't have any easy way of figuring this out.
26480 Conservatively record the setting that would have been used. */
26481
26482 if (flag_rounding_math)
26483 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26484
26485 if (!flag_unsafe_math_optimizations)
26486 {
26487 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26488 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26489 }
26490 if (flag_signaling_nans)
26491 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26492
26493 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26494 flag_finite_math_only ? 1 : 3);
26495
26496 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26497 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26498 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26499 flag_short_enums ? 1 : 2);
26500
26501 /* Tag_ABI_optimization_goals. */
26502 if (optimize_size)
26503 val = 4;
26504 else if (optimize >= 2)
26505 val = 2;
26506 else if (optimize)
26507 val = 1;
26508 else
26509 val = 6;
26510 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26511
26512 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26513 unaligned_access);
26514
26515 if (arm_fp16_format)
26516 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26517 (int) arm_fp16_format);
26518
26519 if (arm_lang_output_object_attributes_hook)
26520 arm_lang_output_object_attributes_hook();
26521 }
26522
26523 default_file_start ();
26524 }
26525
26526 static void
26527 arm_file_end (void)
26528 {
26529 int regno;
26530
26531 if (NEED_INDICATE_EXEC_STACK)
26532 /* Add .note.GNU-stack. */
26533 file_end_indicate_exec_stack ();
26534
26535 if (! thumb_call_reg_needed)
26536 return;
26537
26538 switch_to_section (text_section);
26539 asm_fprintf (asm_out_file, "\t.code 16\n");
26540 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26541
26542 for (regno = 0; regno < LR_REGNUM; regno++)
26543 {
26544 rtx label = thumb_call_via_label[regno];
26545
26546 if (label != 0)
26547 {
26548 targetm.asm_out.internal_label (asm_out_file, "L",
26549 CODE_LABEL_NUMBER (label));
26550 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26551 }
26552 }
26553 }
26554
26555 #ifndef ARM_PE
26556 /* Symbols in the text segment can be accessed without indirecting via the
26557 constant pool; it may take an extra binary operation, but this is still
26558 faster than indirecting via memory. Don't do this when not optimizing,
26559 since we won't be calculating al of the offsets necessary to do this
26560 simplification. */
26561
26562 static void
26563 arm_encode_section_info (tree decl, rtx rtl, int first)
26564 {
26565 if (optimize > 0 && TREE_CONSTANT (decl))
26566 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26567
26568 default_encode_section_info (decl, rtl, first);
26569 }
26570 #endif /* !ARM_PE */
26571
26572 static void
26573 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26574 {
26575 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26576 && !strcmp (prefix, "L"))
26577 {
26578 arm_ccfsm_state = 0;
26579 arm_target_insn = NULL;
26580 }
26581 default_internal_label (stream, prefix, labelno);
26582 }
26583
26584 /* Output code to add DELTA to the first argument, and then jump
26585 to FUNCTION. Used for C++ multiple inheritance. */
26586
26587 static void
26588 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26589 HOST_WIDE_INT, tree function)
26590 {
26591 static int thunk_label = 0;
26592 char label[256];
26593 char labelpc[256];
26594 int mi_delta = delta;
26595 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26596 int shift = 0;
26597 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26598 ? 1 : 0);
26599 if (mi_delta < 0)
26600 mi_delta = - mi_delta;
26601
26602 final_start_function (emit_barrier (), file, 1);
26603
26604 if (TARGET_THUMB1)
26605 {
26606 int labelno = thunk_label++;
26607 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26608 /* Thunks are entered in arm mode when available. */
26609 if (TARGET_THUMB1_ONLY)
26610 {
26611 /* push r3 so we can use it as a temporary. */
26612 /* TODO: Omit this save if r3 is not used. */
26613 fputs ("\tpush {r3}\n", file);
26614 fputs ("\tldr\tr3, ", file);
26615 }
26616 else
26617 {
26618 fputs ("\tldr\tr12, ", file);
26619 }
26620 assemble_name (file, label);
26621 fputc ('\n', file);
26622 if (flag_pic)
26623 {
26624 /* If we are generating PIC, the ldr instruction below loads
26625 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26626 the address of the add + 8, so we have:
26627
26628 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26629 = target + 1.
26630
26631 Note that we have "+ 1" because some versions of GNU ld
26632 don't set the low bit of the result for R_ARM_REL32
26633 relocations against thumb function symbols.
26634 On ARMv6M this is +4, not +8. */
26635 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26636 assemble_name (file, labelpc);
26637 fputs (":\n", file);
26638 if (TARGET_THUMB1_ONLY)
26639 {
26640 /* This is 2 insns after the start of the thunk, so we know it
26641 is 4-byte aligned. */
26642 fputs ("\tadd\tr3, pc, r3\n", file);
26643 fputs ("\tmov r12, r3\n", file);
26644 }
26645 else
26646 fputs ("\tadd\tr12, pc, r12\n", file);
26647 }
26648 else if (TARGET_THUMB1_ONLY)
26649 fputs ("\tmov r12, r3\n", file);
26650 }
26651 if (TARGET_THUMB1_ONLY)
26652 {
26653 if (mi_delta > 255)
26654 {
26655 fputs ("\tldr\tr3, ", file);
26656 assemble_name (file, label);
26657 fputs ("+4\n", file);
26658 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26659 mi_op, this_regno, this_regno);
26660 }
26661 else if (mi_delta != 0)
26662 {
26663 /* Thumb1 unified syntax requires s suffix in instruction name when
26664 one of the operands is immediate. */
26665 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26666 mi_op, this_regno, this_regno,
26667 mi_delta);
26668 }
26669 }
26670 else
26671 {
26672 /* TODO: Use movw/movt for large constants when available. */
26673 while (mi_delta != 0)
26674 {
26675 if ((mi_delta & (3 << shift)) == 0)
26676 shift += 2;
26677 else
26678 {
26679 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26680 mi_op, this_regno, this_regno,
26681 mi_delta & (0xff << shift));
26682 mi_delta &= ~(0xff << shift);
26683 shift += 8;
26684 }
26685 }
26686 }
26687 if (TARGET_THUMB1)
26688 {
26689 if (TARGET_THUMB1_ONLY)
26690 fputs ("\tpop\t{r3}\n", file);
26691
26692 fprintf (file, "\tbx\tr12\n");
26693 ASM_OUTPUT_ALIGN (file, 2);
26694 assemble_name (file, label);
26695 fputs (":\n", file);
26696 if (flag_pic)
26697 {
26698 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26699 rtx tem = XEXP (DECL_RTL (function), 0);
26700 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26701 pipeline offset is four rather than eight. Adjust the offset
26702 accordingly. */
26703 tem = plus_constant (GET_MODE (tem), tem,
26704 TARGET_THUMB1_ONLY ? -3 : -7);
26705 tem = gen_rtx_MINUS (GET_MODE (tem),
26706 tem,
26707 gen_rtx_SYMBOL_REF (Pmode,
26708 ggc_strdup (labelpc)));
26709 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26710 }
26711 else
26712 /* Output ".word .LTHUNKn". */
26713 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26714
26715 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26716 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26717 }
26718 else
26719 {
26720 fputs ("\tb\t", file);
26721 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26722 if (NEED_PLT_RELOC)
26723 fputs ("(PLT)", file);
26724 fputc ('\n', file);
26725 }
26726
26727 final_end_function ();
26728 }
26729
26730 /* MI thunk handling for TARGET_32BIT. */
26731
26732 static void
26733 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26734 HOST_WIDE_INT vcall_offset, tree function)
26735 {
26736 const bool long_call_p = arm_is_long_call_p (function);
26737
26738 /* On ARM, this_regno is R0 or R1 depending on
26739 whether the function returns an aggregate or not.
26740 */
26741 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26742 function)
26743 ? R1_REGNUM : R0_REGNUM);
26744
26745 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26746 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26747 reload_completed = 1;
26748 emit_note (NOTE_INSN_PROLOGUE_END);
26749
26750 /* Add DELTA to THIS_RTX. */
26751 if (delta != 0)
26752 arm_split_constant (PLUS, Pmode, NULL_RTX,
26753 delta, this_rtx, this_rtx, false);
26754
26755 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26756 if (vcall_offset != 0)
26757 {
26758 /* Load *THIS_RTX. */
26759 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26760 /* Compute *THIS_RTX + VCALL_OFFSET. */
26761 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26762 false);
26763 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26764 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26765 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26766 }
26767
26768 /* Generate a tail call to the target function. */
26769 if (!TREE_USED (function))
26770 {
26771 assemble_external (function);
26772 TREE_USED (function) = 1;
26773 }
26774 rtx funexp = XEXP (DECL_RTL (function), 0);
26775 if (long_call_p)
26776 {
26777 emit_move_insn (temp, funexp);
26778 funexp = temp;
26779 }
26780 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26781 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26782 SIBLING_CALL_P (insn) = 1;
26783 emit_barrier ();
26784
26785 /* Indirect calls require a bit of fixup in PIC mode. */
26786 if (long_call_p)
26787 {
26788 split_all_insns_noflow ();
26789 arm_reorg ();
26790 }
26791
26792 insn = get_insns ();
26793 shorten_branches (insn);
26794 final_start_function (insn, file, 1);
26795 final (insn, file, 1);
26796 final_end_function ();
26797
26798 /* Stop pretending this is a post-reload pass. */
26799 reload_completed = 0;
26800 }
26801
26802 /* Output code to add DELTA to the first argument, and then jump
26803 to FUNCTION. Used for C++ multiple inheritance. */
26804
26805 static void
26806 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26807 HOST_WIDE_INT vcall_offset, tree function)
26808 {
26809 if (TARGET_32BIT)
26810 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26811 else
26812 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26813 }
26814
26815 int
26816 arm_emit_vector_const (FILE *file, rtx x)
26817 {
26818 int i;
26819 const char * pattern;
26820
26821 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26822
26823 switch (GET_MODE (x))
26824 {
26825 case E_V2SImode: pattern = "%08x"; break;
26826 case E_V4HImode: pattern = "%04x"; break;
26827 case E_V8QImode: pattern = "%02x"; break;
26828 default: gcc_unreachable ();
26829 }
26830
26831 fprintf (file, "0x");
26832 for (i = CONST_VECTOR_NUNITS (x); i--;)
26833 {
26834 rtx element;
26835
26836 element = CONST_VECTOR_ELT (x, i);
26837 fprintf (file, pattern, INTVAL (element));
26838 }
26839
26840 return 1;
26841 }
26842
26843 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26844 HFmode constant pool entries are actually loaded with ldr. */
26845 void
26846 arm_emit_fp16_const (rtx c)
26847 {
26848 long bits;
26849
26850 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26851 if (WORDS_BIG_ENDIAN)
26852 assemble_zeros (2);
26853 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26854 if (!WORDS_BIG_ENDIAN)
26855 assemble_zeros (2);
26856 }
26857
26858 const char *
26859 arm_output_load_gr (rtx *operands)
26860 {
26861 rtx reg;
26862 rtx offset;
26863 rtx wcgr;
26864 rtx sum;
26865
26866 if (!MEM_P (operands [1])
26867 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26868 || !REG_P (reg = XEXP (sum, 0))
26869 || !CONST_INT_P (offset = XEXP (sum, 1))
26870 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26871 return "wldrw%?\t%0, %1";
26872
26873 /* Fix up an out-of-range load of a GR register. */
26874 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26875 wcgr = operands[0];
26876 operands[0] = reg;
26877 output_asm_insn ("ldr%?\t%0, %1", operands);
26878
26879 operands[0] = wcgr;
26880 operands[1] = reg;
26881 output_asm_insn ("tmcr%?\t%0, %1", operands);
26882 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26883
26884 return "";
26885 }
26886
26887 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26888
26889 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26890 named arg and all anonymous args onto the stack.
26891 XXX I know the prologue shouldn't be pushing registers, but it is faster
26892 that way. */
26893
26894 static void
26895 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26896 machine_mode mode,
26897 tree type,
26898 int *pretend_size,
26899 int second_time ATTRIBUTE_UNUSED)
26900 {
26901 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26902 int nregs;
26903
26904 cfun->machine->uses_anonymous_args = 1;
26905 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26906 {
26907 nregs = pcum->aapcs_ncrn;
26908 if (nregs & 1)
26909 {
26910 int res = arm_needs_doubleword_align (mode, type);
26911 if (res < 0 && warn_psabi)
26912 inform (input_location, "parameter passing for argument of "
26913 "type %qT changed in GCC 7.1", type);
26914 else if (res > 0)
26915 nregs++;
26916 }
26917 }
26918 else
26919 nregs = pcum->nregs;
26920
26921 if (nregs < NUM_ARG_REGS)
26922 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26923 }
26924
26925 /* We can't rely on the caller doing the proper promotion when
26926 using APCS or ATPCS. */
26927
26928 static bool
26929 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26930 {
26931 return !TARGET_AAPCS_BASED;
26932 }
26933
26934 static machine_mode
26935 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26936 machine_mode mode,
26937 int *punsignedp ATTRIBUTE_UNUSED,
26938 const_tree fntype ATTRIBUTE_UNUSED,
26939 int for_return ATTRIBUTE_UNUSED)
26940 {
26941 if (GET_MODE_CLASS (mode) == MODE_INT
26942 && GET_MODE_SIZE (mode) < 4)
26943 return SImode;
26944
26945 return mode;
26946 }
26947
26948
26949 static bool
26950 arm_default_short_enums (void)
26951 {
26952 return ARM_DEFAULT_SHORT_ENUMS;
26953 }
26954
26955
26956 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26957
26958 static bool
26959 arm_align_anon_bitfield (void)
26960 {
26961 return TARGET_AAPCS_BASED;
26962 }
26963
26964
26965 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
26966
26967 static tree
26968 arm_cxx_guard_type (void)
26969 {
26970 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
26971 }
26972
26973
26974 /* The EABI says test the least significant bit of a guard variable. */
26975
26976 static bool
26977 arm_cxx_guard_mask_bit (void)
26978 {
26979 return TARGET_AAPCS_BASED;
26980 }
26981
26982
26983 /* The EABI specifies that all array cookies are 8 bytes long. */
26984
26985 static tree
26986 arm_get_cookie_size (tree type)
26987 {
26988 tree size;
26989
26990 if (!TARGET_AAPCS_BASED)
26991 return default_cxx_get_cookie_size (type);
26992
26993 size = build_int_cst (sizetype, 8);
26994 return size;
26995 }
26996
26997
26998 /* The EABI says that array cookies should also contain the element size. */
26999
27000 static bool
27001 arm_cookie_has_size (void)
27002 {
27003 return TARGET_AAPCS_BASED;
27004 }
27005
27006
27007 /* The EABI says constructors and destructors should return a pointer to
27008 the object constructed/destroyed. */
27009
27010 static bool
27011 arm_cxx_cdtor_returns_this (void)
27012 {
27013 return TARGET_AAPCS_BASED;
27014 }
27015
27016 /* The EABI says that an inline function may never be the key
27017 method. */
27018
27019 static bool
27020 arm_cxx_key_method_may_be_inline (void)
27021 {
27022 return !TARGET_AAPCS_BASED;
27023 }
27024
27025 static void
27026 arm_cxx_determine_class_data_visibility (tree decl)
27027 {
27028 if (!TARGET_AAPCS_BASED
27029 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27030 return;
27031
27032 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27033 is exported. However, on systems without dynamic vague linkage,
27034 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27035 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27036 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27037 else
27038 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27039 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27040 }
27041
27042 static bool
27043 arm_cxx_class_data_always_comdat (void)
27044 {
27045 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27046 vague linkage if the class has no key function. */
27047 return !TARGET_AAPCS_BASED;
27048 }
27049
27050
27051 /* The EABI says __aeabi_atexit should be used to register static
27052 destructors. */
27053
27054 static bool
27055 arm_cxx_use_aeabi_atexit (void)
27056 {
27057 return TARGET_AAPCS_BASED;
27058 }
27059
27060
27061 void
27062 arm_set_return_address (rtx source, rtx scratch)
27063 {
27064 arm_stack_offsets *offsets;
27065 HOST_WIDE_INT delta;
27066 rtx addr, mem;
27067 unsigned long saved_regs;
27068
27069 offsets = arm_get_frame_offsets ();
27070 saved_regs = offsets->saved_regs_mask;
27071
27072 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27073 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27074 else
27075 {
27076 if (frame_pointer_needed)
27077 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27078 else
27079 {
27080 /* LR will be the first saved register. */
27081 delta = offsets->outgoing_args - (offsets->frame + 4);
27082
27083
27084 if (delta >= 4096)
27085 {
27086 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27087 GEN_INT (delta & ~4095)));
27088 addr = scratch;
27089 delta &= 4095;
27090 }
27091 else
27092 addr = stack_pointer_rtx;
27093
27094 addr = plus_constant (Pmode, addr, delta);
27095 }
27096
27097 /* The store needs to be marked to prevent DSE from deleting
27098 it as dead if it is based on fp. */
27099 mem = gen_frame_mem (Pmode, addr);
27100 MEM_VOLATILE_P (mem) = true;
27101 emit_move_insn (mem, source);
27102 }
27103 }
27104
27105
27106 void
27107 thumb_set_return_address (rtx source, rtx scratch)
27108 {
27109 arm_stack_offsets *offsets;
27110 HOST_WIDE_INT delta;
27111 HOST_WIDE_INT limit;
27112 int reg;
27113 rtx addr, mem;
27114 unsigned long mask;
27115
27116 emit_use (source);
27117
27118 offsets = arm_get_frame_offsets ();
27119 mask = offsets->saved_regs_mask;
27120 if (mask & (1 << LR_REGNUM))
27121 {
27122 limit = 1024;
27123 /* Find the saved regs. */
27124 if (frame_pointer_needed)
27125 {
27126 delta = offsets->soft_frame - offsets->saved_args;
27127 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27128 if (TARGET_THUMB1)
27129 limit = 128;
27130 }
27131 else
27132 {
27133 delta = offsets->outgoing_args - offsets->saved_args;
27134 reg = SP_REGNUM;
27135 }
27136 /* Allow for the stack frame. */
27137 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27138 delta -= 16;
27139 /* The link register is always the first saved register. */
27140 delta -= 4;
27141
27142 /* Construct the address. */
27143 addr = gen_rtx_REG (SImode, reg);
27144 if (delta > limit)
27145 {
27146 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27147 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27148 addr = scratch;
27149 }
27150 else
27151 addr = plus_constant (Pmode, addr, delta);
27152
27153 /* The store needs to be marked to prevent DSE from deleting
27154 it as dead if it is based on fp. */
27155 mem = gen_frame_mem (Pmode, addr);
27156 MEM_VOLATILE_P (mem) = true;
27157 emit_move_insn (mem, source);
27158 }
27159 else
27160 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27161 }
27162
27163 /* Implements target hook vector_mode_supported_p. */
27164 bool
27165 arm_vector_mode_supported_p (machine_mode mode)
27166 {
27167 /* Neon also supports V2SImode, etc. listed in the clause below. */
27168 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27169 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27170 || mode == V2DImode || mode == V8HFmode))
27171 return true;
27172
27173 if ((TARGET_NEON || TARGET_IWMMXT)
27174 && ((mode == V2SImode)
27175 || (mode == V4HImode)
27176 || (mode == V8QImode)))
27177 return true;
27178
27179 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27180 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27181 || mode == V2HAmode))
27182 return true;
27183
27184 return false;
27185 }
27186
27187 /* Implements target hook array_mode_supported_p. */
27188
27189 static bool
27190 arm_array_mode_supported_p (machine_mode mode,
27191 unsigned HOST_WIDE_INT nelems)
27192 {
27193 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27194 for now, as the lane-swapping logic needs to be extended in the expanders.
27195 See PR target/82518. */
27196 if (TARGET_NEON && !BYTES_BIG_ENDIAN
27197 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27198 && (nelems >= 2 && nelems <= 4))
27199 return true;
27200
27201 return false;
27202 }
27203
27204 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27205 registers when autovectorizing for Neon, at least until multiple vector
27206 widths are supported properly by the middle-end. */
27207
27208 static machine_mode
27209 arm_preferred_simd_mode (scalar_mode mode)
27210 {
27211 if (TARGET_NEON)
27212 switch (mode)
27213 {
27214 case E_SFmode:
27215 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27216 case E_SImode:
27217 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27218 case E_HImode:
27219 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27220 case E_QImode:
27221 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27222 case E_DImode:
27223 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27224 return V2DImode;
27225 break;
27226
27227 default:;
27228 }
27229
27230 if (TARGET_REALLY_IWMMXT)
27231 switch (mode)
27232 {
27233 case E_SImode:
27234 return V2SImode;
27235 case E_HImode:
27236 return V4HImode;
27237 case E_QImode:
27238 return V8QImode;
27239
27240 default:;
27241 }
27242
27243 return word_mode;
27244 }
27245
27246 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27247
27248 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27249 using r0-r4 for function arguments, r7 for the stack frame and don't have
27250 enough left over to do doubleword arithmetic. For Thumb-2 all the
27251 potentially problematic instructions accept high registers so this is not
27252 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27253 that require many low registers. */
27254 static bool
27255 arm_class_likely_spilled_p (reg_class_t rclass)
27256 {
27257 if ((TARGET_THUMB1 && rclass == LO_REGS)
27258 || rclass == CC_REG)
27259 return true;
27260
27261 return false;
27262 }
27263
27264 /* Implements target hook small_register_classes_for_mode_p. */
27265 bool
27266 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27267 {
27268 return TARGET_THUMB1;
27269 }
27270
27271 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27272 ARM insns and therefore guarantee that the shift count is modulo 256.
27273 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27274 guarantee no particular behavior for out-of-range counts. */
27275
27276 static unsigned HOST_WIDE_INT
27277 arm_shift_truncation_mask (machine_mode mode)
27278 {
27279 return mode == SImode ? 255 : 0;
27280 }
27281
27282
27283 /* Map internal gcc register numbers to DWARF2 register numbers. */
27284
27285 unsigned int
27286 arm_dbx_register_number (unsigned int regno)
27287 {
27288 if (regno < 16)
27289 return regno;
27290
27291 if (IS_VFP_REGNUM (regno))
27292 {
27293 /* See comment in arm_dwarf_register_span. */
27294 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27295 return 64 + regno - FIRST_VFP_REGNUM;
27296 else
27297 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27298 }
27299
27300 if (IS_IWMMXT_GR_REGNUM (regno))
27301 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27302
27303 if (IS_IWMMXT_REGNUM (regno))
27304 return 112 + regno - FIRST_IWMMXT_REGNUM;
27305
27306 return DWARF_FRAME_REGISTERS;
27307 }
27308
27309 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27310 GCC models tham as 64 32-bit registers, so we need to describe this to
27311 the DWARF generation code. Other registers can use the default. */
27312 static rtx
27313 arm_dwarf_register_span (rtx rtl)
27314 {
27315 machine_mode mode;
27316 unsigned regno;
27317 rtx parts[16];
27318 int nregs;
27319 int i;
27320
27321 regno = REGNO (rtl);
27322 if (!IS_VFP_REGNUM (regno))
27323 return NULL_RTX;
27324
27325 /* XXX FIXME: The EABI defines two VFP register ranges:
27326 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27327 256-287: D0-D31
27328 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27329 corresponding D register. Until GDB supports this, we shall use the
27330 legacy encodings. We also use these encodings for D0-D15 for
27331 compatibility with older debuggers. */
27332 mode = GET_MODE (rtl);
27333 if (GET_MODE_SIZE (mode) < 8)
27334 return NULL_RTX;
27335
27336 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27337 {
27338 nregs = GET_MODE_SIZE (mode) / 4;
27339 for (i = 0; i < nregs; i += 2)
27340 if (TARGET_BIG_END)
27341 {
27342 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27343 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27344 }
27345 else
27346 {
27347 parts[i] = gen_rtx_REG (SImode, regno + i);
27348 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27349 }
27350 }
27351 else
27352 {
27353 nregs = GET_MODE_SIZE (mode) / 8;
27354 for (i = 0; i < nregs; i++)
27355 parts[i] = gen_rtx_REG (DImode, regno + i);
27356 }
27357
27358 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27359 }
27360
27361 #if ARM_UNWIND_INFO
27362 /* Emit unwind directives for a store-multiple instruction or stack pointer
27363 push during alignment.
27364 These should only ever be generated by the function prologue code, so
27365 expect them to have a particular form.
27366 The store-multiple instruction sometimes pushes pc as the last register,
27367 although it should not be tracked into unwind information, or for -Os
27368 sometimes pushes some dummy registers before first register that needs
27369 to be tracked in unwind information; such dummy registers are there just
27370 to avoid separate stack adjustment, and will not be restored in the
27371 epilogue. */
27372
27373 static void
27374 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27375 {
27376 int i;
27377 HOST_WIDE_INT offset;
27378 HOST_WIDE_INT nregs;
27379 int reg_size;
27380 unsigned reg;
27381 unsigned lastreg;
27382 unsigned padfirst = 0, padlast = 0;
27383 rtx e;
27384
27385 e = XVECEXP (p, 0, 0);
27386 gcc_assert (GET_CODE (e) == SET);
27387
27388 /* First insn will adjust the stack pointer. */
27389 gcc_assert (GET_CODE (e) == SET
27390 && REG_P (SET_DEST (e))
27391 && REGNO (SET_DEST (e)) == SP_REGNUM
27392 && GET_CODE (SET_SRC (e)) == PLUS);
27393
27394 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27395 nregs = XVECLEN (p, 0) - 1;
27396 gcc_assert (nregs);
27397
27398 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27399 if (reg < 16)
27400 {
27401 /* For -Os dummy registers can be pushed at the beginning to
27402 avoid separate stack pointer adjustment. */
27403 e = XVECEXP (p, 0, 1);
27404 e = XEXP (SET_DEST (e), 0);
27405 if (GET_CODE (e) == PLUS)
27406 padfirst = INTVAL (XEXP (e, 1));
27407 gcc_assert (padfirst == 0 || optimize_size);
27408 /* The function prologue may also push pc, but not annotate it as it is
27409 never restored. We turn this into a stack pointer adjustment. */
27410 e = XVECEXP (p, 0, nregs);
27411 e = XEXP (SET_DEST (e), 0);
27412 if (GET_CODE (e) == PLUS)
27413 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27414 else
27415 padlast = offset - 4;
27416 gcc_assert (padlast == 0 || padlast == 4);
27417 if (padlast == 4)
27418 fprintf (asm_out_file, "\t.pad #4\n");
27419 reg_size = 4;
27420 fprintf (asm_out_file, "\t.save {");
27421 }
27422 else if (IS_VFP_REGNUM (reg))
27423 {
27424 reg_size = 8;
27425 fprintf (asm_out_file, "\t.vsave {");
27426 }
27427 else
27428 /* Unknown register type. */
27429 gcc_unreachable ();
27430
27431 /* If the stack increment doesn't match the size of the saved registers,
27432 something has gone horribly wrong. */
27433 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27434
27435 offset = padfirst;
27436 lastreg = 0;
27437 /* The remaining insns will describe the stores. */
27438 for (i = 1; i <= nregs; i++)
27439 {
27440 /* Expect (set (mem <addr>) (reg)).
27441 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27442 e = XVECEXP (p, 0, i);
27443 gcc_assert (GET_CODE (e) == SET
27444 && MEM_P (SET_DEST (e))
27445 && REG_P (SET_SRC (e)));
27446
27447 reg = REGNO (SET_SRC (e));
27448 gcc_assert (reg >= lastreg);
27449
27450 if (i != 1)
27451 fprintf (asm_out_file, ", ");
27452 /* We can't use %r for vfp because we need to use the
27453 double precision register names. */
27454 if (IS_VFP_REGNUM (reg))
27455 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27456 else
27457 asm_fprintf (asm_out_file, "%r", reg);
27458
27459 if (flag_checking)
27460 {
27461 /* Check that the addresses are consecutive. */
27462 e = XEXP (SET_DEST (e), 0);
27463 if (GET_CODE (e) == PLUS)
27464 gcc_assert (REG_P (XEXP (e, 0))
27465 && REGNO (XEXP (e, 0)) == SP_REGNUM
27466 && CONST_INT_P (XEXP (e, 1))
27467 && offset == INTVAL (XEXP (e, 1)));
27468 else
27469 gcc_assert (i == 1
27470 && REG_P (e)
27471 && REGNO (e) == SP_REGNUM);
27472 offset += reg_size;
27473 }
27474 }
27475 fprintf (asm_out_file, "}\n");
27476 if (padfirst)
27477 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27478 }
27479
27480 /* Emit unwind directives for a SET. */
27481
27482 static void
27483 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27484 {
27485 rtx e0;
27486 rtx e1;
27487 unsigned reg;
27488
27489 e0 = XEXP (p, 0);
27490 e1 = XEXP (p, 1);
27491 switch (GET_CODE (e0))
27492 {
27493 case MEM:
27494 /* Pushing a single register. */
27495 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27496 || !REG_P (XEXP (XEXP (e0, 0), 0))
27497 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27498 abort ();
27499
27500 asm_fprintf (asm_out_file, "\t.save ");
27501 if (IS_VFP_REGNUM (REGNO (e1)))
27502 asm_fprintf(asm_out_file, "{d%d}\n",
27503 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27504 else
27505 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27506 break;
27507
27508 case REG:
27509 if (REGNO (e0) == SP_REGNUM)
27510 {
27511 /* A stack increment. */
27512 if (GET_CODE (e1) != PLUS
27513 || !REG_P (XEXP (e1, 0))
27514 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27515 || !CONST_INT_P (XEXP (e1, 1)))
27516 abort ();
27517
27518 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27519 -INTVAL (XEXP (e1, 1)));
27520 }
27521 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27522 {
27523 HOST_WIDE_INT offset;
27524
27525 if (GET_CODE (e1) == PLUS)
27526 {
27527 if (!REG_P (XEXP (e1, 0))
27528 || !CONST_INT_P (XEXP (e1, 1)))
27529 abort ();
27530 reg = REGNO (XEXP (e1, 0));
27531 offset = INTVAL (XEXP (e1, 1));
27532 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27533 HARD_FRAME_POINTER_REGNUM, reg,
27534 offset);
27535 }
27536 else if (REG_P (e1))
27537 {
27538 reg = REGNO (e1);
27539 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27540 HARD_FRAME_POINTER_REGNUM, reg);
27541 }
27542 else
27543 abort ();
27544 }
27545 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27546 {
27547 /* Move from sp to reg. */
27548 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27549 }
27550 else if (GET_CODE (e1) == PLUS
27551 && REG_P (XEXP (e1, 0))
27552 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27553 && CONST_INT_P (XEXP (e1, 1)))
27554 {
27555 /* Set reg to offset from sp. */
27556 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27557 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27558 }
27559 else
27560 abort ();
27561 break;
27562
27563 default:
27564 abort ();
27565 }
27566 }
27567
27568
27569 /* Emit unwind directives for the given insn. */
27570
27571 static void
27572 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27573 {
27574 rtx note, pat;
27575 bool handled_one = false;
27576
27577 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27578 return;
27579
27580 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27581 && (TREE_NOTHROW (current_function_decl)
27582 || crtl->all_throwers_are_sibcalls))
27583 return;
27584
27585 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27586 return;
27587
27588 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27589 {
27590 switch (REG_NOTE_KIND (note))
27591 {
27592 case REG_FRAME_RELATED_EXPR:
27593 pat = XEXP (note, 0);
27594 goto found;
27595
27596 case REG_CFA_REGISTER:
27597 pat = XEXP (note, 0);
27598 if (pat == NULL)
27599 {
27600 pat = PATTERN (insn);
27601 if (GET_CODE (pat) == PARALLEL)
27602 pat = XVECEXP (pat, 0, 0);
27603 }
27604
27605 /* Only emitted for IS_STACKALIGN re-alignment. */
27606 {
27607 rtx dest, src;
27608 unsigned reg;
27609
27610 src = SET_SRC (pat);
27611 dest = SET_DEST (pat);
27612
27613 gcc_assert (src == stack_pointer_rtx);
27614 reg = REGNO (dest);
27615 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27616 reg + 0x90, reg);
27617 }
27618 handled_one = true;
27619 break;
27620
27621 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27622 to get correct dwarf information for shrink-wrap. We should not
27623 emit unwind information for it because these are used either for
27624 pretend arguments or notes to adjust sp and restore registers from
27625 stack. */
27626 case REG_CFA_DEF_CFA:
27627 case REG_CFA_ADJUST_CFA:
27628 case REG_CFA_RESTORE:
27629 return;
27630
27631 case REG_CFA_EXPRESSION:
27632 case REG_CFA_OFFSET:
27633 /* ??? Only handling here what we actually emit. */
27634 gcc_unreachable ();
27635
27636 default:
27637 break;
27638 }
27639 }
27640 if (handled_one)
27641 return;
27642 pat = PATTERN (insn);
27643 found:
27644
27645 switch (GET_CODE (pat))
27646 {
27647 case SET:
27648 arm_unwind_emit_set (asm_out_file, pat);
27649 break;
27650
27651 case SEQUENCE:
27652 /* Store multiple. */
27653 arm_unwind_emit_sequence (asm_out_file, pat);
27654 break;
27655
27656 default:
27657 abort();
27658 }
27659 }
27660
27661
27662 /* Output a reference from a function exception table to the type_info
27663 object X. The EABI specifies that the symbol should be relocated by
27664 an R_ARM_TARGET2 relocation. */
27665
27666 static bool
27667 arm_output_ttype (rtx x)
27668 {
27669 fputs ("\t.word\t", asm_out_file);
27670 output_addr_const (asm_out_file, x);
27671 /* Use special relocations for symbol references. */
27672 if (!CONST_INT_P (x))
27673 fputs ("(TARGET2)", asm_out_file);
27674 fputc ('\n', asm_out_file);
27675
27676 return TRUE;
27677 }
27678
27679 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27680
27681 static void
27682 arm_asm_emit_except_personality (rtx personality)
27683 {
27684 fputs ("\t.personality\t", asm_out_file);
27685 output_addr_const (asm_out_file, personality);
27686 fputc ('\n', asm_out_file);
27687 }
27688 #endif /* ARM_UNWIND_INFO */
27689
27690 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27691
27692 static void
27693 arm_asm_init_sections (void)
27694 {
27695 #if ARM_UNWIND_INFO
27696 exception_section = get_unnamed_section (0, output_section_asm_op,
27697 "\t.handlerdata");
27698 #endif /* ARM_UNWIND_INFO */
27699
27700 #ifdef OBJECT_FORMAT_ELF
27701 if (target_pure_code)
27702 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27703 #endif
27704 }
27705
27706 /* Output unwind directives for the start/end of a function. */
27707
27708 void
27709 arm_output_fn_unwind (FILE * f, bool prologue)
27710 {
27711 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27712 return;
27713
27714 if (prologue)
27715 fputs ("\t.fnstart\n", f);
27716 else
27717 {
27718 /* If this function will never be unwound, then mark it as such.
27719 The came condition is used in arm_unwind_emit to suppress
27720 the frame annotations. */
27721 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27722 && (TREE_NOTHROW (current_function_decl)
27723 || crtl->all_throwers_are_sibcalls))
27724 fputs("\t.cantunwind\n", f);
27725
27726 fputs ("\t.fnend\n", f);
27727 }
27728 }
27729
27730 static bool
27731 arm_emit_tls_decoration (FILE *fp, rtx x)
27732 {
27733 enum tls_reloc reloc;
27734 rtx val;
27735
27736 val = XVECEXP (x, 0, 0);
27737 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27738
27739 output_addr_const (fp, val);
27740
27741 switch (reloc)
27742 {
27743 case TLS_GD32:
27744 fputs ("(tlsgd)", fp);
27745 break;
27746 case TLS_LDM32:
27747 fputs ("(tlsldm)", fp);
27748 break;
27749 case TLS_LDO32:
27750 fputs ("(tlsldo)", fp);
27751 break;
27752 case TLS_IE32:
27753 fputs ("(gottpoff)", fp);
27754 break;
27755 case TLS_LE32:
27756 fputs ("(tpoff)", fp);
27757 break;
27758 case TLS_DESCSEQ:
27759 fputs ("(tlsdesc)", fp);
27760 break;
27761 default:
27762 gcc_unreachable ();
27763 }
27764
27765 switch (reloc)
27766 {
27767 case TLS_GD32:
27768 case TLS_LDM32:
27769 case TLS_IE32:
27770 case TLS_DESCSEQ:
27771 fputs (" + (. - ", fp);
27772 output_addr_const (fp, XVECEXP (x, 0, 2));
27773 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27774 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27775 output_addr_const (fp, XVECEXP (x, 0, 3));
27776 fputc (')', fp);
27777 break;
27778 default:
27779 break;
27780 }
27781
27782 return TRUE;
27783 }
27784
27785 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27786
27787 static void
27788 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27789 {
27790 gcc_assert (size == 4);
27791 fputs ("\t.word\t", file);
27792 output_addr_const (file, x);
27793 fputs ("(tlsldo)", file);
27794 }
27795
27796 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27797
27798 static bool
27799 arm_output_addr_const_extra (FILE *fp, rtx x)
27800 {
27801 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27802 return arm_emit_tls_decoration (fp, x);
27803 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27804 {
27805 char label[256];
27806 int labelno = INTVAL (XVECEXP (x, 0, 0));
27807
27808 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27809 assemble_name_raw (fp, label);
27810
27811 return TRUE;
27812 }
27813 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27814 {
27815 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27816 if (GOT_PCREL)
27817 fputs ("+.", fp);
27818 fputs ("-(", fp);
27819 output_addr_const (fp, XVECEXP (x, 0, 0));
27820 fputc (')', fp);
27821 return TRUE;
27822 }
27823 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27824 {
27825 output_addr_const (fp, XVECEXP (x, 0, 0));
27826 if (GOT_PCREL)
27827 fputs ("+.", fp);
27828 fputs ("-(", fp);
27829 output_addr_const (fp, XVECEXP (x, 0, 1));
27830 fputc (')', fp);
27831 return TRUE;
27832 }
27833 else if (GET_CODE (x) == CONST_VECTOR)
27834 return arm_emit_vector_const (fp, x);
27835
27836 return FALSE;
27837 }
27838
27839 /* Output assembly for a shift instruction.
27840 SET_FLAGS determines how the instruction modifies the condition codes.
27841 0 - Do not set condition codes.
27842 1 - Set condition codes.
27843 2 - Use smallest instruction. */
27844 const char *
27845 arm_output_shift(rtx * operands, int set_flags)
27846 {
27847 char pattern[100];
27848 static const char flag_chars[3] = {'?', '.', '!'};
27849 const char *shift;
27850 HOST_WIDE_INT val;
27851 char c;
27852
27853 c = flag_chars[set_flags];
27854 shift = shift_op(operands[3], &val);
27855 if (shift)
27856 {
27857 if (val != -1)
27858 operands[2] = GEN_INT(val);
27859 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27860 }
27861 else
27862 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27863
27864 output_asm_insn (pattern, operands);
27865 return "";
27866 }
27867
27868 /* Output assembly for a WMMX immediate shift instruction. */
27869 const char *
27870 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27871 {
27872 int shift = INTVAL (operands[2]);
27873 char templ[50];
27874 machine_mode opmode = GET_MODE (operands[0]);
27875
27876 gcc_assert (shift >= 0);
27877
27878 /* If the shift value in the register versions is > 63 (for D qualifier),
27879 31 (for W qualifier) or 15 (for H qualifier). */
27880 if (((opmode == V4HImode) && (shift > 15))
27881 || ((opmode == V2SImode) && (shift > 31))
27882 || ((opmode == DImode) && (shift > 63)))
27883 {
27884 if (wror_or_wsra)
27885 {
27886 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27887 output_asm_insn (templ, operands);
27888 if (opmode == DImode)
27889 {
27890 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27891 output_asm_insn (templ, operands);
27892 }
27893 }
27894 else
27895 {
27896 /* The destination register will contain all zeros. */
27897 sprintf (templ, "wzero\t%%0");
27898 output_asm_insn (templ, operands);
27899 }
27900 return "";
27901 }
27902
27903 if ((opmode == DImode) && (shift > 32))
27904 {
27905 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27906 output_asm_insn (templ, operands);
27907 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27908 output_asm_insn (templ, operands);
27909 }
27910 else
27911 {
27912 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27913 output_asm_insn (templ, operands);
27914 }
27915 return "";
27916 }
27917
27918 /* Output assembly for a WMMX tinsr instruction. */
27919 const char *
27920 arm_output_iwmmxt_tinsr (rtx *operands)
27921 {
27922 int mask = INTVAL (operands[3]);
27923 int i;
27924 char templ[50];
27925 int units = mode_nunits[GET_MODE (operands[0])];
27926 gcc_assert ((mask & (mask - 1)) == 0);
27927 for (i = 0; i < units; ++i)
27928 {
27929 if ((mask & 0x01) == 1)
27930 {
27931 break;
27932 }
27933 mask >>= 1;
27934 }
27935 gcc_assert (i < units);
27936 {
27937 switch (GET_MODE (operands[0]))
27938 {
27939 case E_V8QImode:
27940 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27941 break;
27942 case E_V4HImode:
27943 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27944 break;
27945 case E_V2SImode:
27946 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27947 break;
27948 default:
27949 gcc_unreachable ();
27950 break;
27951 }
27952 output_asm_insn (templ, operands);
27953 }
27954 return "";
27955 }
27956
27957 /* Output a Thumb-1 casesi dispatch sequence. */
27958 const char *
27959 thumb1_output_casesi (rtx *operands)
27960 {
27961 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
27962
27963 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27964
27965 switch (GET_MODE(diff_vec))
27966 {
27967 case E_QImode:
27968 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27969 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
27970 case E_HImode:
27971 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
27972 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
27973 case E_SImode:
27974 return "bl\t%___gnu_thumb1_case_si";
27975 default:
27976 gcc_unreachable ();
27977 }
27978 }
27979
27980 /* Output a Thumb-2 casesi instruction. */
27981 const char *
27982 thumb2_output_casesi (rtx *operands)
27983 {
27984 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
27985
27986 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
27987
27988 output_asm_insn ("cmp\t%0, %1", operands);
27989 output_asm_insn ("bhi\t%l3", operands);
27990 switch (GET_MODE(diff_vec))
27991 {
27992 case E_QImode:
27993 return "tbb\t[%|pc, %0]";
27994 case E_HImode:
27995 return "tbh\t[%|pc, %0, lsl #1]";
27996 case E_SImode:
27997 if (flag_pic)
27998 {
27999 output_asm_insn ("adr\t%4, %l2", operands);
28000 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28001 output_asm_insn ("add\t%4, %4, %5", operands);
28002 return "bx\t%4";
28003 }
28004 else
28005 {
28006 output_asm_insn ("adr\t%4, %l2", operands);
28007 return "ldr\t%|pc, [%4, %0, lsl #2]";
28008 }
28009 default:
28010 gcc_unreachable ();
28011 }
28012 }
28013
28014 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28015 per-core tuning structs. */
28016 static int
28017 arm_issue_rate (void)
28018 {
28019 return current_tune->issue_rate;
28020 }
28021
28022 /* Return how many instructions should scheduler lookahead to choose the
28023 best one. */
28024 static int
28025 arm_first_cycle_multipass_dfa_lookahead (void)
28026 {
28027 int issue_rate = arm_issue_rate ();
28028
28029 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28030 }
28031
28032 /* Enable modeling of L2 auto-prefetcher. */
28033 static int
28034 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28035 {
28036 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28037 }
28038
28039 const char *
28040 arm_mangle_type (const_tree type)
28041 {
28042 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28043 has to be managled as if it is in the "std" namespace. */
28044 if (TARGET_AAPCS_BASED
28045 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28046 return "St9__va_list";
28047
28048 /* Half-precision float. */
28049 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28050 return "Dh";
28051
28052 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28053 builtin type. */
28054 if (TYPE_NAME (type) != NULL)
28055 return arm_mangle_builtin_type (type);
28056
28057 /* Use the default mangling. */
28058 return NULL;
28059 }
28060
28061 /* Order of allocation of core registers for Thumb: this allocation is
28062 written over the corresponding initial entries of the array
28063 initialized with REG_ALLOC_ORDER. We allocate all low registers
28064 first. Saving and restoring a low register is usually cheaper than
28065 using a call-clobbered high register. */
28066
28067 static const int thumb_core_reg_alloc_order[] =
28068 {
28069 3, 2, 1, 0, 4, 5, 6, 7,
28070 12, 14, 8, 9, 10, 11
28071 };
28072
28073 /* Adjust register allocation order when compiling for Thumb. */
28074
28075 void
28076 arm_order_regs_for_local_alloc (void)
28077 {
28078 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28079 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28080 if (TARGET_THUMB)
28081 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28082 sizeof (thumb_core_reg_alloc_order));
28083 }
28084
28085 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28086
28087 bool
28088 arm_frame_pointer_required (void)
28089 {
28090 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28091 return true;
28092
28093 /* If the function receives nonlocal gotos, it needs to save the frame
28094 pointer in the nonlocal_goto_save_area object. */
28095 if (cfun->has_nonlocal_label)
28096 return true;
28097
28098 /* The frame pointer is required for non-leaf APCS frames. */
28099 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28100 return true;
28101
28102 /* If we are probing the stack in the prologue, we will have a faulting
28103 instruction prior to the stack adjustment and this requires a frame
28104 pointer if we want to catch the exception using the EABI unwinder. */
28105 if (!IS_INTERRUPT (arm_current_func_type ())
28106 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28107 || flag_stack_clash_protection)
28108 && arm_except_unwind_info (&global_options) == UI_TARGET
28109 && cfun->can_throw_non_call_exceptions)
28110 {
28111 HOST_WIDE_INT size = get_frame_size ();
28112
28113 /* That's irrelevant if there is no stack adjustment. */
28114 if (size <= 0)
28115 return false;
28116
28117 /* That's relevant only if there is a stack probe. */
28118 if (crtl->is_leaf && !cfun->calls_alloca)
28119 {
28120 /* We don't have the final size of the frame so adjust. */
28121 size += 32 * UNITS_PER_WORD;
28122 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28123 return true;
28124 }
28125 else
28126 return true;
28127 }
28128
28129 return false;
28130 }
28131
28132 /* Only thumb1 can't support conditional execution, so return true if
28133 the target is not thumb1. */
28134 static bool
28135 arm_have_conditional_execution (void)
28136 {
28137 return !TARGET_THUMB1;
28138 }
28139
28140 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28141 static HOST_WIDE_INT
28142 arm_vector_alignment (const_tree type)
28143 {
28144 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28145
28146 if (TARGET_AAPCS_BASED)
28147 align = MIN (align, 64);
28148
28149 return align;
28150 }
28151
28152 static void
28153 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28154 {
28155 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28156 {
28157 sizes->safe_push (16);
28158 sizes->safe_push (8);
28159 }
28160 }
28161
28162 static bool
28163 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28164 {
28165 /* Vectors which aren't in packed structures will not be less aligned than
28166 the natural alignment of their element type, so this is safe. */
28167 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28168 return !is_packed;
28169
28170 return default_builtin_vector_alignment_reachable (type, is_packed);
28171 }
28172
28173 static bool
28174 arm_builtin_support_vector_misalignment (machine_mode mode,
28175 const_tree type, int misalignment,
28176 bool is_packed)
28177 {
28178 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28179 {
28180 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28181
28182 if (is_packed)
28183 return align == 1;
28184
28185 /* If the misalignment is unknown, we should be able to handle the access
28186 so long as it is not to a member of a packed data structure. */
28187 if (misalignment == -1)
28188 return true;
28189
28190 /* Return true if the misalignment is a multiple of the natural alignment
28191 of the vector's element type. This is probably always going to be
28192 true in practice, since we've already established that this isn't a
28193 packed access. */
28194 return ((misalignment % align) == 0);
28195 }
28196
28197 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28198 is_packed);
28199 }
28200
28201 static void
28202 arm_conditional_register_usage (void)
28203 {
28204 int regno;
28205
28206 if (TARGET_THUMB1 && optimize_size)
28207 {
28208 /* When optimizing for size on Thumb-1, it's better not
28209 to use the HI regs, because of the overhead of
28210 stacking them. */
28211 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28212 fixed_regs[regno] = call_used_regs[regno] = 1;
28213 }
28214
28215 /* The link register can be clobbered by any branch insn,
28216 but we have no way to track that at present, so mark
28217 it as unavailable. */
28218 if (TARGET_THUMB1)
28219 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28220
28221 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28222 {
28223 /* VFPv3 registers are disabled when earlier VFP
28224 versions are selected due to the definition of
28225 LAST_VFP_REGNUM. */
28226 for (regno = FIRST_VFP_REGNUM;
28227 regno <= LAST_VFP_REGNUM; ++ regno)
28228 {
28229 fixed_regs[regno] = 0;
28230 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28231 || regno >= FIRST_VFP_REGNUM + 32;
28232 }
28233 }
28234
28235 if (TARGET_REALLY_IWMMXT)
28236 {
28237 regno = FIRST_IWMMXT_GR_REGNUM;
28238 /* The 2002/10/09 revision of the XScale ABI has wCG0
28239 and wCG1 as call-preserved registers. The 2002/11/21
28240 revision changed this so that all wCG registers are
28241 scratch registers. */
28242 for (regno = FIRST_IWMMXT_GR_REGNUM;
28243 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28244 fixed_regs[regno] = 0;
28245 /* The XScale ABI has wR0 - wR9 as scratch registers,
28246 the rest as call-preserved registers. */
28247 for (regno = FIRST_IWMMXT_REGNUM;
28248 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28249 {
28250 fixed_regs[regno] = 0;
28251 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28252 }
28253 }
28254
28255 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28256 {
28257 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28258 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28259 }
28260 else if (TARGET_APCS_STACK)
28261 {
28262 fixed_regs[10] = 1;
28263 call_used_regs[10] = 1;
28264 }
28265 /* -mcaller-super-interworking reserves r11 for calls to
28266 _interwork_r11_call_via_rN(). Making the register global
28267 is an easy way of ensuring that it remains valid for all
28268 calls. */
28269 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28270 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28271 {
28272 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28273 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28274 if (TARGET_CALLER_INTERWORKING)
28275 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28276 }
28277 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28278 }
28279
28280 static reg_class_t
28281 arm_preferred_rename_class (reg_class_t rclass)
28282 {
28283 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28284 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28285 and code size can be reduced. */
28286 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28287 return LO_REGS;
28288 else
28289 return NO_REGS;
28290 }
28291
28292 /* Compute the attribute "length" of insn "*push_multi".
28293 So this function MUST be kept in sync with that insn pattern. */
28294 int
28295 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28296 {
28297 int i, regno, hi_reg;
28298 int num_saves = XVECLEN (parallel_op, 0);
28299
28300 /* ARM mode. */
28301 if (TARGET_ARM)
28302 return 4;
28303 /* Thumb1 mode. */
28304 if (TARGET_THUMB1)
28305 return 2;
28306
28307 /* Thumb2 mode. */
28308 regno = REGNO (first_op);
28309 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28310 list is 8-bit. Normally this means all registers in the list must be
28311 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28312 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28313 with 16-bit encoding. */
28314 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28315 for (i = 1; i < num_saves && !hi_reg; i++)
28316 {
28317 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28318 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28319 }
28320
28321 if (!hi_reg)
28322 return 2;
28323 return 4;
28324 }
28325
28326 /* Compute the attribute "length" of insn. Currently, this function is used
28327 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28328 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28329 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28330 true if OPERANDS contains insn which explicit updates base register. */
28331
28332 int
28333 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28334 {
28335 /* ARM mode. */
28336 if (TARGET_ARM)
28337 return 4;
28338 /* Thumb1 mode. */
28339 if (TARGET_THUMB1)
28340 return 2;
28341
28342 rtx parallel_op = operands[0];
28343 /* Initialize to elements number of PARALLEL. */
28344 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28345 /* Initialize the value to base register. */
28346 unsigned regno = REGNO (operands[1]);
28347 /* Skip return and write back pattern.
28348 We only need register pop pattern for later analysis. */
28349 unsigned first_indx = 0;
28350 first_indx += return_pc ? 1 : 0;
28351 first_indx += write_back_p ? 1 : 0;
28352
28353 /* A pop operation can be done through LDM or POP. If the base register is SP
28354 and if it's with write back, then a LDM will be alias of POP. */
28355 bool pop_p = (regno == SP_REGNUM && write_back_p);
28356 bool ldm_p = !pop_p;
28357
28358 /* Check base register for LDM. */
28359 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28360 return 4;
28361
28362 /* Check each register in the list. */
28363 for (; indx >= first_indx; indx--)
28364 {
28365 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28366 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28367 comment in arm_attr_length_push_multi. */
28368 if (REGNO_REG_CLASS (regno) == HI_REGS
28369 && (regno != PC_REGNUM || ldm_p))
28370 return 4;
28371 }
28372
28373 return 2;
28374 }
28375
28376 /* Compute the number of instructions emitted by output_move_double. */
28377 int
28378 arm_count_output_move_double_insns (rtx *operands)
28379 {
28380 int count;
28381 rtx ops[2];
28382 /* output_move_double may modify the operands array, so call it
28383 here on a copy of the array. */
28384 ops[0] = operands[0];
28385 ops[1] = operands[1];
28386 output_move_double (ops, false, &count);
28387 return count;
28388 }
28389
28390 int
28391 vfp3_const_double_for_fract_bits (rtx operand)
28392 {
28393 REAL_VALUE_TYPE r0;
28394
28395 if (!CONST_DOUBLE_P (operand))
28396 return 0;
28397
28398 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28399 if (exact_real_inverse (DFmode, &r0)
28400 && !REAL_VALUE_NEGATIVE (r0))
28401 {
28402 if (exact_real_truncate (DFmode, &r0))
28403 {
28404 HOST_WIDE_INT value = real_to_integer (&r0);
28405 value = value & 0xffffffff;
28406 if ((value != 0) && ( (value & (value - 1)) == 0))
28407 {
28408 int ret = exact_log2 (value);
28409 gcc_assert (IN_RANGE (ret, 0, 31));
28410 return ret;
28411 }
28412 }
28413 }
28414 return 0;
28415 }
28416
28417 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28418 log2 is in [1, 32], return that log2. Otherwise return -1.
28419 This is used in the patterns for vcvt.s32.f32 floating-point to
28420 fixed-point conversions. */
28421
28422 int
28423 vfp3_const_double_for_bits (rtx x)
28424 {
28425 const REAL_VALUE_TYPE *r;
28426
28427 if (!CONST_DOUBLE_P (x))
28428 return -1;
28429
28430 r = CONST_DOUBLE_REAL_VALUE (x);
28431
28432 if (REAL_VALUE_NEGATIVE (*r)
28433 || REAL_VALUE_ISNAN (*r)
28434 || REAL_VALUE_ISINF (*r)
28435 || !real_isinteger (r, SFmode))
28436 return -1;
28437
28438 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28439
28440 /* The exact_log2 above will have returned -1 if this is
28441 not an exact log2. */
28442 if (!IN_RANGE (hwint, 1, 32))
28443 return -1;
28444
28445 return hwint;
28446 }
28447
28448 \f
28449 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28450
28451 static void
28452 arm_pre_atomic_barrier (enum memmodel model)
28453 {
28454 if (need_atomic_barrier_p (model, true))
28455 emit_insn (gen_memory_barrier ());
28456 }
28457
28458 static void
28459 arm_post_atomic_barrier (enum memmodel model)
28460 {
28461 if (need_atomic_barrier_p (model, false))
28462 emit_insn (gen_memory_barrier ());
28463 }
28464
28465 /* Emit the load-exclusive and store-exclusive instructions.
28466 Use acquire and release versions if necessary. */
28467
28468 static void
28469 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28470 {
28471 rtx (*gen) (rtx, rtx);
28472
28473 if (acq)
28474 {
28475 switch (mode)
28476 {
28477 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28478 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28479 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28480 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28481 default:
28482 gcc_unreachable ();
28483 }
28484 }
28485 else
28486 {
28487 switch (mode)
28488 {
28489 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28490 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28491 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28492 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28493 default:
28494 gcc_unreachable ();
28495 }
28496 }
28497
28498 emit_insn (gen (rval, mem));
28499 }
28500
28501 static void
28502 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28503 rtx mem, bool rel)
28504 {
28505 rtx (*gen) (rtx, rtx, rtx);
28506
28507 if (rel)
28508 {
28509 switch (mode)
28510 {
28511 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28512 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28513 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28514 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28515 default:
28516 gcc_unreachable ();
28517 }
28518 }
28519 else
28520 {
28521 switch (mode)
28522 {
28523 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28524 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28525 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28526 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28527 default:
28528 gcc_unreachable ();
28529 }
28530 }
28531
28532 emit_insn (gen (bval, rval, mem));
28533 }
28534
28535 /* Mark the previous jump instruction as unlikely. */
28536
28537 static void
28538 emit_unlikely_jump (rtx insn)
28539 {
28540 rtx_insn *jump = emit_jump_insn (insn);
28541 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28542 }
28543
28544 /* Expand a compare and swap pattern. */
28545
28546 void
28547 arm_expand_compare_and_swap (rtx operands[])
28548 {
28549 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28550 machine_mode mode;
28551 rtx (*gen) (rtx, rtx, rtx, rtx, rtx, rtx, rtx, rtx);
28552
28553 bval = operands[0];
28554 rval = operands[1];
28555 mem = operands[2];
28556 oldval = operands[3];
28557 newval = operands[4];
28558 is_weak = operands[5];
28559 mod_s = operands[6];
28560 mod_f = operands[7];
28561 mode = GET_MODE (mem);
28562
28563 /* Normally the succ memory model must be stronger than fail, but in the
28564 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28565 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28566
28567 if (TARGET_HAVE_LDACQ
28568 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28569 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28570 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28571
28572 switch (mode)
28573 {
28574 case E_QImode:
28575 case E_HImode:
28576 /* For narrow modes, we're going to perform the comparison in SImode,
28577 so do the zero-extension now. */
28578 rval = gen_reg_rtx (SImode);
28579 oldval = convert_modes (SImode, mode, oldval, true);
28580 /* FALLTHRU */
28581
28582 case E_SImode:
28583 /* Force the value into a register if needed. We waited until after
28584 the zero-extension above to do this properly. */
28585 if (!arm_add_operand (oldval, SImode))
28586 oldval = force_reg (SImode, oldval);
28587 break;
28588
28589 case E_DImode:
28590 if (!cmpdi_operand (oldval, mode))
28591 oldval = force_reg (mode, oldval);
28592 break;
28593
28594 default:
28595 gcc_unreachable ();
28596 }
28597
28598 if (TARGET_THUMB1)
28599 {
28600 switch (mode)
28601 {
28602 case E_QImode: gen = gen_atomic_compare_and_swapt1qi_1; break;
28603 case E_HImode: gen = gen_atomic_compare_and_swapt1hi_1; break;
28604 case E_SImode: gen = gen_atomic_compare_and_swapt1si_1; break;
28605 case E_DImode: gen = gen_atomic_compare_and_swapt1di_1; break;
28606 default:
28607 gcc_unreachable ();
28608 }
28609 }
28610 else
28611 {
28612 switch (mode)
28613 {
28614 case E_QImode: gen = gen_atomic_compare_and_swap32qi_1; break;
28615 case E_HImode: gen = gen_atomic_compare_and_swap32hi_1; break;
28616 case E_SImode: gen = gen_atomic_compare_and_swap32si_1; break;
28617 case E_DImode: gen = gen_atomic_compare_and_swap32di_1; break;
28618 default:
28619 gcc_unreachable ();
28620 }
28621 }
28622
28623 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28624 emit_insn (gen (bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f));
28625
28626 if (mode == QImode || mode == HImode)
28627 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28628
28629 /* In all cases, we arrange for success to be signaled by Z set.
28630 This arrangement allows for the boolean result to be used directly
28631 in a subsequent branch, post optimization. For Thumb-1 targets, the
28632 boolean negation of the result is also stored in bval because Thumb-1
28633 backend lacks dependency tracking for CC flag due to flag-setting not
28634 being represented at RTL level. */
28635 if (TARGET_THUMB1)
28636 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28637 else
28638 {
28639 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28640 emit_insn (gen_rtx_SET (bval, x));
28641 }
28642 }
28643
28644 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28645 another memory store between the load-exclusive and store-exclusive can
28646 reset the monitor from Exclusive to Open state. This means we must wait
28647 until after reload to split the pattern, lest we get a register spill in
28648 the middle of the atomic sequence. Success of the compare and swap is
28649 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28650 for Thumb-1 targets (ie. negation of the boolean value returned by
28651 atomic_compare_and_swapmode standard pattern in operand 0). */
28652
28653 void
28654 arm_split_compare_and_swap (rtx operands[])
28655 {
28656 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
28657 machine_mode mode;
28658 enum memmodel mod_s, mod_f;
28659 bool is_weak;
28660 rtx_code_label *label1, *label2;
28661 rtx x, cond;
28662
28663 rval = operands[1];
28664 mem = operands[2];
28665 oldval = operands[3];
28666 newval = operands[4];
28667 is_weak = (operands[5] != const0_rtx);
28668 mod_s_rtx = operands[6];
28669 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
28670 mod_f = memmodel_from_int (INTVAL (operands[7]));
28671 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28672 mode = GET_MODE (mem);
28673
28674 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28675
28676 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
28677 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
28678
28679 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28680 a full barrier is emitted after the store-release. */
28681 if (is_armv8_sync)
28682 use_acquire = false;
28683
28684 /* Checks whether a barrier is needed and emits one accordingly. */
28685 if (!(use_acquire || use_release))
28686 arm_pre_atomic_barrier (mod_s);
28687
28688 label1 = NULL;
28689 if (!is_weak)
28690 {
28691 label1 = gen_label_rtx ();
28692 emit_label (label1);
28693 }
28694 label2 = gen_label_rtx ();
28695
28696 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28697
28698 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28699 as required to communicate with arm_expand_compare_and_swap. */
28700 if (TARGET_32BIT)
28701 {
28702 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28703 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28704 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28705 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28706 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28707 }
28708 else
28709 {
28710 emit_move_insn (neg_bval, const1_rtx);
28711 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28712 if (thumb1_cmpneg_operand (oldval, SImode))
28713 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28714 label2, cond));
28715 else
28716 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28717 }
28718
28719 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28720
28721 /* Weak or strong, we want EQ to be true for success, so that we
28722 match the flags that we got from the compare above. */
28723 if (TARGET_32BIT)
28724 {
28725 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28726 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28727 emit_insn (gen_rtx_SET (cond, x));
28728 }
28729
28730 if (!is_weak)
28731 {
28732 /* Z is set to boolean value of !neg_bval, as required to communicate
28733 with arm_expand_compare_and_swap. */
28734 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28735 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28736 }
28737
28738 if (!is_mm_relaxed (mod_f))
28739 emit_label (label2);
28740
28741 /* Checks whether a barrier is needed and emits one accordingly. */
28742 if (is_armv8_sync
28743 || !(use_acquire || use_release))
28744 arm_post_atomic_barrier (mod_s);
28745
28746 if (is_mm_relaxed (mod_f))
28747 emit_label (label2);
28748 }
28749
28750 /* Split an atomic operation pattern. Operation is given by CODE and is one
28751 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28752 operation). Operation is performed on the content at MEM and on VALUE
28753 following the memory model MODEL_RTX. The content at MEM before and after
28754 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28755 success of the operation is returned in COND. Using a scratch register or
28756 an operand register for these determines what result is returned for that
28757 pattern. */
28758
28759 void
28760 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28761 rtx value, rtx model_rtx, rtx cond)
28762 {
28763 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28764 machine_mode mode = GET_MODE (mem);
28765 machine_mode wmode = (mode == DImode ? DImode : SImode);
28766 rtx_code_label *label;
28767 bool all_low_regs, bind_old_new;
28768 rtx x;
28769
28770 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28771
28772 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
28773 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
28774
28775 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28776 a full barrier is emitted after the store-release. */
28777 if (is_armv8_sync)
28778 use_acquire = false;
28779
28780 /* Checks whether a barrier is needed and emits one accordingly. */
28781 if (!(use_acquire || use_release))
28782 arm_pre_atomic_barrier (model);
28783
28784 label = gen_label_rtx ();
28785 emit_label (label);
28786
28787 if (new_out)
28788 new_out = gen_lowpart (wmode, new_out);
28789 if (old_out)
28790 old_out = gen_lowpart (wmode, old_out);
28791 else
28792 old_out = new_out;
28793 value = simplify_gen_subreg (wmode, value, mode, 0);
28794
28795 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28796
28797 /* Does the operation require destination and first operand to use the same
28798 register? This is decided by register constraints of relevant insn
28799 patterns in thumb1.md. */
28800 gcc_assert (!new_out || REG_P (new_out));
28801 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28802 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28803 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28804 bind_old_new =
28805 (TARGET_THUMB1
28806 && code != SET
28807 && code != MINUS
28808 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28809
28810 /* We want to return the old value while putting the result of the operation
28811 in the same register as the old value so copy the old value over to the
28812 destination register and use that register for the operation. */
28813 if (old_out && bind_old_new)
28814 {
28815 emit_move_insn (new_out, old_out);
28816 old_out = new_out;
28817 }
28818
28819 switch (code)
28820 {
28821 case SET:
28822 new_out = value;
28823 break;
28824
28825 case NOT:
28826 x = gen_rtx_AND (wmode, old_out, value);
28827 emit_insn (gen_rtx_SET (new_out, x));
28828 x = gen_rtx_NOT (wmode, new_out);
28829 emit_insn (gen_rtx_SET (new_out, x));
28830 break;
28831
28832 case MINUS:
28833 if (CONST_INT_P (value))
28834 {
28835 value = GEN_INT (-INTVAL (value));
28836 code = PLUS;
28837 }
28838 /* FALLTHRU */
28839
28840 case PLUS:
28841 if (mode == DImode)
28842 {
28843 /* DImode plus/minus need to clobber flags. */
28844 /* The adddi3 and subdi3 patterns are incorrectly written so that
28845 they require matching operands, even when we could easily support
28846 three operands. Thankfully, this can be fixed up post-splitting,
28847 as the individual add+adc patterns do accept three operands and
28848 post-reload cprop can make these moves go away. */
28849 emit_move_insn (new_out, old_out);
28850 if (code == PLUS)
28851 x = gen_adddi3 (new_out, new_out, value);
28852 else
28853 x = gen_subdi3 (new_out, new_out, value);
28854 emit_insn (x);
28855 break;
28856 }
28857 /* FALLTHRU */
28858
28859 default:
28860 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28861 emit_insn (gen_rtx_SET (new_out, x));
28862 break;
28863 }
28864
28865 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28866 use_release);
28867
28868 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28869 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28870
28871 /* Checks whether a barrier is needed and emits one accordingly. */
28872 if (is_armv8_sync
28873 || !(use_acquire || use_release))
28874 arm_post_atomic_barrier (model);
28875 }
28876 \f
28877 #define MAX_VECT_LEN 16
28878
28879 struct expand_vec_perm_d
28880 {
28881 rtx target, op0, op1;
28882 vec_perm_indices perm;
28883 machine_mode vmode;
28884 bool one_vector_p;
28885 bool testing_p;
28886 };
28887
28888 /* Generate a variable permutation. */
28889
28890 static void
28891 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28892 {
28893 machine_mode vmode = GET_MODE (target);
28894 bool one_vector_p = rtx_equal_p (op0, op1);
28895
28896 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28897 gcc_checking_assert (GET_MODE (op0) == vmode);
28898 gcc_checking_assert (GET_MODE (op1) == vmode);
28899 gcc_checking_assert (GET_MODE (sel) == vmode);
28900 gcc_checking_assert (TARGET_NEON);
28901
28902 if (one_vector_p)
28903 {
28904 if (vmode == V8QImode)
28905 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28906 else
28907 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28908 }
28909 else
28910 {
28911 rtx pair;
28912
28913 if (vmode == V8QImode)
28914 {
28915 pair = gen_reg_rtx (V16QImode);
28916 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28917 pair = gen_lowpart (TImode, pair);
28918 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28919 }
28920 else
28921 {
28922 pair = gen_reg_rtx (OImode);
28923 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28924 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28925 }
28926 }
28927 }
28928
28929 void
28930 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28931 {
28932 machine_mode vmode = GET_MODE (target);
28933 unsigned int nelt = GET_MODE_NUNITS (vmode);
28934 bool one_vector_p = rtx_equal_p (op0, op1);
28935 rtx mask;
28936
28937 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28938 numbering of elements for big-endian, we must reverse the order. */
28939 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28940
28941 /* The VTBL instruction does not use a modulo index, so we must take care
28942 of that ourselves. */
28943 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28944 mask = gen_const_vec_duplicate (vmode, mask);
28945 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28946
28947 arm_expand_vec_perm_1 (target, op0, op1, sel);
28948 }
28949
28950 /* Map lane ordering between architectural lane order, and GCC lane order,
28951 taking into account ABI. See comment above output_move_neon for details. */
28952
28953 static int
28954 neon_endian_lane_map (machine_mode mode, int lane)
28955 {
28956 if (BYTES_BIG_ENDIAN)
28957 {
28958 int nelems = GET_MODE_NUNITS (mode);
28959 /* Reverse lane order. */
28960 lane = (nelems - 1 - lane);
28961 /* Reverse D register order, to match ABI. */
28962 if (GET_MODE_SIZE (mode) == 16)
28963 lane = lane ^ (nelems / 2);
28964 }
28965 return lane;
28966 }
28967
28968 /* Some permutations index into pairs of vectors, this is a helper function
28969 to map indexes into those pairs of vectors. */
28970
28971 static int
28972 neon_pair_endian_lane_map (machine_mode mode, int lane)
28973 {
28974 int nelem = GET_MODE_NUNITS (mode);
28975 if (BYTES_BIG_ENDIAN)
28976 lane =
28977 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28978 return lane;
28979 }
28980
28981 /* Generate or test for an insn that supports a constant permutation. */
28982
28983 /* Recognize patterns for the VUZP insns. */
28984
28985 static bool
28986 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
28987 {
28988 unsigned int i, odd, mask, nelt = d->perm.length ();
28989 rtx out0, out1, in0, in1;
28990 rtx (*gen)(rtx, rtx, rtx, rtx);
28991 int first_elem;
28992 int swap_nelt;
28993
28994 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
28995 return false;
28996
28997 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
28998 big endian pattern on 64 bit vectors, so we correct for that. */
28999 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29000 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29001
29002 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29003
29004 if (first_elem == neon_endian_lane_map (d->vmode, 0))
29005 odd = 0;
29006 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29007 odd = 1;
29008 else
29009 return false;
29010 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29011
29012 for (i = 0; i < nelt; i++)
29013 {
29014 unsigned elt =
29015 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29016 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29017 return false;
29018 }
29019
29020 /* Success! */
29021 if (d->testing_p)
29022 return true;
29023
29024 switch (d->vmode)
29025 {
29026 case E_V16QImode: gen = gen_neon_vuzpv16qi_internal; break;
29027 case E_V8QImode: gen = gen_neon_vuzpv8qi_internal; break;
29028 case E_V8HImode: gen = gen_neon_vuzpv8hi_internal; break;
29029 case E_V4HImode: gen = gen_neon_vuzpv4hi_internal; break;
29030 case E_V8HFmode: gen = gen_neon_vuzpv8hf_internal; break;
29031 case E_V4HFmode: gen = gen_neon_vuzpv4hf_internal; break;
29032 case E_V4SImode: gen = gen_neon_vuzpv4si_internal; break;
29033 case E_V2SImode: gen = gen_neon_vuzpv2si_internal; break;
29034 case E_V2SFmode: gen = gen_neon_vuzpv2sf_internal; break;
29035 case E_V4SFmode: gen = gen_neon_vuzpv4sf_internal; break;
29036 default:
29037 gcc_unreachable ();
29038 }
29039
29040 in0 = d->op0;
29041 in1 = d->op1;
29042 if (swap_nelt != 0)
29043 std::swap (in0, in1);
29044
29045 out0 = d->target;
29046 out1 = gen_reg_rtx (d->vmode);
29047 if (odd)
29048 std::swap (out0, out1);
29049
29050 emit_insn (gen (out0, in0, in1, out1));
29051 return true;
29052 }
29053
29054 /* Recognize patterns for the VZIP insns. */
29055
29056 static bool
29057 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29058 {
29059 unsigned int i, high, mask, nelt = d->perm.length ();
29060 rtx out0, out1, in0, in1;
29061 rtx (*gen)(rtx, rtx, rtx, rtx);
29062 int first_elem;
29063 bool is_swapped;
29064
29065 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29066 return false;
29067
29068 is_swapped = BYTES_BIG_ENDIAN;
29069
29070 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29071
29072 high = nelt / 2;
29073 if (first_elem == neon_endian_lane_map (d->vmode, high))
29074 ;
29075 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29076 high = 0;
29077 else
29078 return false;
29079 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29080
29081 for (i = 0; i < nelt / 2; i++)
29082 {
29083 unsigned elt =
29084 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29085 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29086 != elt)
29087 return false;
29088 elt =
29089 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29090 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29091 != elt)
29092 return false;
29093 }
29094
29095 /* Success! */
29096 if (d->testing_p)
29097 return true;
29098
29099 switch (d->vmode)
29100 {
29101 case E_V16QImode: gen = gen_neon_vzipv16qi_internal; break;
29102 case E_V8QImode: gen = gen_neon_vzipv8qi_internal; break;
29103 case E_V8HImode: gen = gen_neon_vzipv8hi_internal; break;
29104 case E_V4HImode: gen = gen_neon_vzipv4hi_internal; break;
29105 case E_V8HFmode: gen = gen_neon_vzipv8hf_internal; break;
29106 case E_V4HFmode: gen = gen_neon_vzipv4hf_internal; break;
29107 case E_V4SImode: gen = gen_neon_vzipv4si_internal; break;
29108 case E_V2SImode: gen = gen_neon_vzipv2si_internal; break;
29109 case E_V2SFmode: gen = gen_neon_vzipv2sf_internal; break;
29110 case E_V4SFmode: gen = gen_neon_vzipv4sf_internal; break;
29111 default:
29112 gcc_unreachable ();
29113 }
29114
29115 in0 = d->op0;
29116 in1 = d->op1;
29117 if (is_swapped)
29118 std::swap (in0, in1);
29119
29120 out0 = d->target;
29121 out1 = gen_reg_rtx (d->vmode);
29122 if (high)
29123 std::swap (out0, out1);
29124
29125 emit_insn (gen (out0, in0, in1, out1));
29126 return true;
29127 }
29128
29129 /* Recognize patterns for the VREV insns. */
29130
29131 static bool
29132 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29133 {
29134 unsigned int i, j, diff, nelt = d->perm.length ();
29135 rtx (*gen)(rtx, rtx);
29136
29137 if (!d->one_vector_p)
29138 return false;
29139
29140 diff = d->perm[0];
29141 switch (diff)
29142 {
29143 case 7:
29144 switch (d->vmode)
29145 {
29146 case E_V16QImode: gen = gen_neon_vrev64v16qi; break;
29147 case E_V8QImode: gen = gen_neon_vrev64v8qi; break;
29148 default:
29149 return false;
29150 }
29151 break;
29152 case 3:
29153 switch (d->vmode)
29154 {
29155 case E_V16QImode: gen = gen_neon_vrev32v16qi; break;
29156 case E_V8QImode: gen = gen_neon_vrev32v8qi; break;
29157 case E_V8HImode: gen = gen_neon_vrev64v8hi; break;
29158 case E_V4HImode: gen = gen_neon_vrev64v4hi; break;
29159 case E_V8HFmode: gen = gen_neon_vrev64v8hf; break;
29160 case E_V4HFmode: gen = gen_neon_vrev64v4hf; break;
29161 default:
29162 return false;
29163 }
29164 break;
29165 case 1:
29166 switch (d->vmode)
29167 {
29168 case E_V16QImode: gen = gen_neon_vrev16v16qi; break;
29169 case E_V8QImode: gen = gen_neon_vrev16v8qi; break;
29170 case E_V8HImode: gen = gen_neon_vrev32v8hi; break;
29171 case E_V4HImode: gen = gen_neon_vrev32v4hi; break;
29172 case E_V4SImode: gen = gen_neon_vrev64v4si; break;
29173 case E_V2SImode: gen = gen_neon_vrev64v2si; break;
29174 case E_V4SFmode: gen = gen_neon_vrev64v4sf; break;
29175 case E_V2SFmode: gen = gen_neon_vrev64v2sf; break;
29176 default:
29177 return false;
29178 }
29179 break;
29180 default:
29181 return false;
29182 }
29183
29184 for (i = 0; i < nelt ; i += diff + 1)
29185 for (j = 0; j <= diff; j += 1)
29186 {
29187 /* This is guaranteed to be true as the value of diff
29188 is 7, 3, 1 and we should have enough elements in the
29189 queue to generate this. Getting a vector mask with a
29190 value of diff other than these values implies that
29191 something is wrong by the time we get here. */
29192 gcc_assert (i + j < nelt);
29193 if (d->perm[i + j] != i + diff - j)
29194 return false;
29195 }
29196
29197 /* Success! */
29198 if (d->testing_p)
29199 return true;
29200
29201 emit_insn (gen (d->target, d->op0));
29202 return true;
29203 }
29204
29205 /* Recognize patterns for the VTRN insns. */
29206
29207 static bool
29208 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29209 {
29210 unsigned int i, odd, mask, nelt = d->perm.length ();
29211 rtx out0, out1, in0, in1;
29212 rtx (*gen)(rtx, rtx, rtx, rtx);
29213
29214 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29215 return false;
29216
29217 /* Note that these are little-endian tests. Adjust for big-endian later. */
29218 if (d->perm[0] == 0)
29219 odd = 0;
29220 else if (d->perm[0] == 1)
29221 odd = 1;
29222 else
29223 return false;
29224 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29225
29226 for (i = 0; i < nelt; i += 2)
29227 {
29228 if (d->perm[i] != i + odd)
29229 return false;
29230 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29231 return false;
29232 }
29233
29234 /* Success! */
29235 if (d->testing_p)
29236 return true;
29237
29238 switch (d->vmode)
29239 {
29240 case E_V16QImode: gen = gen_neon_vtrnv16qi_internal; break;
29241 case E_V8QImode: gen = gen_neon_vtrnv8qi_internal; break;
29242 case E_V8HImode: gen = gen_neon_vtrnv8hi_internal; break;
29243 case E_V4HImode: gen = gen_neon_vtrnv4hi_internal; break;
29244 case E_V8HFmode: gen = gen_neon_vtrnv8hf_internal; break;
29245 case E_V4HFmode: gen = gen_neon_vtrnv4hf_internal; break;
29246 case E_V4SImode: gen = gen_neon_vtrnv4si_internal; break;
29247 case E_V2SImode: gen = gen_neon_vtrnv2si_internal; break;
29248 case E_V2SFmode: gen = gen_neon_vtrnv2sf_internal; break;
29249 case E_V4SFmode: gen = gen_neon_vtrnv4sf_internal; break;
29250 default:
29251 gcc_unreachable ();
29252 }
29253
29254 in0 = d->op0;
29255 in1 = d->op1;
29256 if (BYTES_BIG_ENDIAN)
29257 {
29258 std::swap (in0, in1);
29259 odd = !odd;
29260 }
29261
29262 out0 = d->target;
29263 out1 = gen_reg_rtx (d->vmode);
29264 if (odd)
29265 std::swap (out0, out1);
29266
29267 emit_insn (gen (out0, in0, in1, out1));
29268 return true;
29269 }
29270
29271 /* Recognize patterns for the VEXT insns. */
29272
29273 static bool
29274 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29275 {
29276 unsigned int i, nelt = d->perm.length ();
29277 rtx (*gen) (rtx, rtx, rtx, rtx);
29278 rtx offset;
29279
29280 unsigned int location;
29281
29282 unsigned int next = d->perm[0] + 1;
29283
29284 /* TODO: Handle GCC's numbering of elements for big-endian. */
29285 if (BYTES_BIG_ENDIAN)
29286 return false;
29287
29288 /* Check if the extracted indexes are increasing by one. */
29289 for (i = 1; i < nelt; next++, i++)
29290 {
29291 /* If we hit the most significant element of the 2nd vector in
29292 the previous iteration, no need to test further. */
29293 if (next == 2 * nelt)
29294 return false;
29295
29296 /* If we are operating on only one vector: it could be a
29297 rotation. If there are only two elements of size < 64, let
29298 arm_evpc_neon_vrev catch it. */
29299 if (d->one_vector_p && (next == nelt))
29300 {
29301 if ((nelt == 2) && (d->vmode != V2DImode))
29302 return false;
29303 else
29304 next = 0;
29305 }
29306
29307 if (d->perm[i] != next)
29308 return false;
29309 }
29310
29311 location = d->perm[0];
29312
29313 switch (d->vmode)
29314 {
29315 case E_V16QImode: gen = gen_neon_vextv16qi; break;
29316 case E_V8QImode: gen = gen_neon_vextv8qi; break;
29317 case E_V4HImode: gen = gen_neon_vextv4hi; break;
29318 case E_V8HImode: gen = gen_neon_vextv8hi; break;
29319 case E_V2SImode: gen = gen_neon_vextv2si; break;
29320 case E_V4SImode: gen = gen_neon_vextv4si; break;
29321 case E_V4HFmode: gen = gen_neon_vextv4hf; break;
29322 case E_V8HFmode: gen = gen_neon_vextv8hf; break;
29323 case E_V2SFmode: gen = gen_neon_vextv2sf; break;
29324 case E_V4SFmode: gen = gen_neon_vextv4sf; break;
29325 case E_V2DImode: gen = gen_neon_vextv2di; break;
29326 default:
29327 return false;
29328 }
29329
29330 /* Success! */
29331 if (d->testing_p)
29332 return true;
29333
29334 offset = GEN_INT (location);
29335 emit_insn (gen (d->target, d->op0, d->op1, offset));
29336 return true;
29337 }
29338
29339 /* The NEON VTBL instruction is a fully variable permuation that's even
29340 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29341 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29342 can do slightly better by expanding this as a constant where we don't
29343 have to apply a mask. */
29344
29345 static bool
29346 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29347 {
29348 rtx rperm[MAX_VECT_LEN], sel;
29349 machine_mode vmode = d->vmode;
29350 unsigned int i, nelt = d->perm.length ();
29351
29352 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29353 numbering of elements for big-endian, we must reverse the order. */
29354 if (BYTES_BIG_ENDIAN)
29355 return false;
29356
29357 if (d->testing_p)
29358 return true;
29359
29360 /* Generic code will try constant permutation twice. Once with the
29361 original mode and again with the elements lowered to QImode.
29362 So wait and don't do the selector expansion ourselves. */
29363 if (vmode != V8QImode && vmode != V16QImode)
29364 return false;
29365
29366 for (i = 0; i < nelt; ++i)
29367 rperm[i] = GEN_INT (d->perm[i]);
29368 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29369 sel = force_reg (vmode, sel);
29370
29371 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29372 return true;
29373 }
29374
29375 static bool
29376 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29377 {
29378 /* Check if the input mask matches vext before reordering the
29379 operands. */
29380 if (TARGET_NEON)
29381 if (arm_evpc_neon_vext (d))
29382 return true;
29383
29384 /* The pattern matching functions above are written to look for a small
29385 number to begin the sequence (0, 1, N/2). If we begin with an index
29386 from the second operand, we can swap the operands. */
29387 unsigned int nelt = d->perm.length ();
29388 if (d->perm[0] >= nelt)
29389 {
29390 d->perm.rotate_inputs (1);
29391 std::swap (d->op0, d->op1);
29392 }
29393
29394 if (TARGET_NEON)
29395 {
29396 if (arm_evpc_neon_vuzp (d))
29397 return true;
29398 if (arm_evpc_neon_vzip (d))
29399 return true;
29400 if (arm_evpc_neon_vrev (d))
29401 return true;
29402 if (arm_evpc_neon_vtrn (d))
29403 return true;
29404 return arm_evpc_neon_vtbl (d);
29405 }
29406 return false;
29407 }
29408
29409 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29410
29411 static bool
29412 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29413 const vec_perm_indices &sel)
29414 {
29415 struct expand_vec_perm_d d;
29416 int i, nelt, which;
29417
29418 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29419 return false;
29420
29421 d.target = target;
29422 d.op0 = op0;
29423 d.op1 = op1;
29424
29425 d.vmode = vmode;
29426 gcc_assert (VECTOR_MODE_P (d.vmode));
29427 d.testing_p = !target;
29428
29429 nelt = GET_MODE_NUNITS (d.vmode);
29430 for (i = which = 0; i < nelt; ++i)
29431 {
29432 int ei = sel[i] & (2 * nelt - 1);
29433 which |= (ei < nelt ? 1 : 2);
29434 }
29435
29436 switch (which)
29437 {
29438 default:
29439 gcc_unreachable();
29440
29441 case 3:
29442 d.one_vector_p = false;
29443 if (d.testing_p || !rtx_equal_p (op0, op1))
29444 break;
29445
29446 /* The elements of PERM do not suggest that only the first operand
29447 is used, but both operands are identical. Allow easier matching
29448 of the permutation by folding the permutation into the single
29449 input vector. */
29450 /* FALLTHRU */
29451 case 2:
29452 d.op0 = op1;
29453 d.one_vector_p = true;
29454 break;
29455
29456 case 1:
29457 d.op1 = op0;
29458 d.one_vector_p = true;
29459 break;
29460 }
29461
29462 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29463
29464 if (!d.testing_p)
29465 return arm_expand_vec_perm_const_1 (&d);
29466
29467 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29468 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29469 if (!d.one_vector_p)
29470 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29471
29472 start_sequence ();
29473 bool ret = arm_expand_vec_perm_const_1 (&d);
29474 end_sequence ();
29475
29476 return ret;
29477 }
29478
29479 bool
29480 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29481 {
29482 /* If we are soft float and we do not have ldrd
29483 then all auto increment forms are ok. */
29484 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29485 return true;
29486
29487 switch (code)
29488 {
29489 /* Post increment and Pre Decrement are supported for all
29490 instruction forms except for vector forms. */
29491 case ARM_POST_INC:
29492 case ARM_PRE_DEC:
29493 if (VECTOR_MODE_P (mode))
29494 {
29495 if (code != ARM_PRE_DEC)
29496 return true;
29497 else
29498 return false;
29499 }
29500
29501 return true;
29502
29503 case ARM_POST_DEC:
29504 case ARM_PRE_INC:
29505 /* Without LDRD and mode size greater than
29506 word size, there is no point in auto-incrementing
29507 because ldm and stm will not have these forms. */
29508 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29509 return false;
29510
29511 /* Vector and floating point modes do not support
29512 these auto increment forms. */
29513 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29514 return false;
29515
29516 return true;
29517
29518 default:
29519 return false;
29520
29521 }
29522
29523 return false;
29524 }
29525
29526 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29527 on ARM, since we know that shifts by negative amounts are no-ops.
29528 Additionally, the default expansion code is not available or suitable
29529 for post-reload insn splits (this can occur when the register allocator
29530 chooses not to do a shift in NEON).
29531
29532 This function is used in both initial expand and post-reload splits, and
29533 handles all kinds of 64-bit shifts.
29534
29535 Input requirements:
29536 - It is safe for the input and output to be the same register, but
29537 early-clobber rules apply for the shift amount and scratch registers.
29538 - Shift by register requires both scratch registers. In all other cases
29539 the scratch registers may be NULL.
29540 - Ashiftrt by a register also clobbers the CC register. */
29541 void
29542 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29543 rtx amount, rtx scratch1, rtx scratch2)
29544 {
29545 rtx out_high = gen_highpart (SImode, out);
29546 rtx out_low = gen_lowpart (SImode, out);
29547 rtx in_high = gen_highpart (SImode, in);
29548 rtx in_low = gen_lowpart (SImode, in);
29549
29550 /* Terminology:
29551 in = the register pair containing the input value.
29552 out = the destination register pair.
29553 up = the high- or low-part of each pair.
29554 down = the opposite part to "up".
29555 In a shift, we can consider bits to shift from "up"-stream to
29556 "down"-stream, so in a left-shift "up" is the low-part and "down"
29557 is the high-part of each register pair. */
29558
29559 rtx out_up = code == ASHIFT ? out_low : out_high;
29560 rtx out_down = code == ASHIFT ? out_high : out_low;
29561 rtx in_up = code == ASHIFT ? in_low : in_high;
29562 rtx in_down = code == ASHIFT ? in_high : in_low;
29563
29564 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29565 gcc_assert (out
29566 && (REG_P (out) || GET_CODE (out) == SUBREG)
29567 && GET_MODE (out) == DImode);
29568 gcc_assert (in
29569 && (REG_P (in) || GET_CODE (in) == SUBREG)
29570 && GET_MODE (in) == DImode);
29571 gcc_assert (amount
29572 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29573 && GET_MODE (amount) == SImode)
29574 || CONST_INT_P (amount)));
29575 gcc_assert (scratch1 == NULL
29576 || (GET_CODE (scratch1) == SCRATCH)
29577 || (GET_MODE (scratch1) == SImode
29578 && REG_P (scratch1)));
29579 gcc_assert (scratch2 == NULL
29580 || (GET_CODE (scratch2) == SCRATCH)
29581 || (GET_MODE (scratch2) == SImode
29582 && REG_P (scratch2)));
29583 gcc_assert (!REG_P (out) || !REG_P (amount)
29584 || !HARD_REGISTER_P (out)
29585 || (REGNO (out) != REGNO (amount)
29586 && REGNO (out) + 1 != REGNO (amount)));
29587
29588 /* Macros to make following code more readable. */
29589 #define SUB_32(DEST,SRC) \
29590 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29591 #define RSB_32(DEST,SRC) \
29592 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29593 #define SUB_S_32(DEST,SRC) \
29594 gen_addsi3_compare0 ((DEST), (SRC), \
29595 GEN_INT (-32))
29596 #define SET(DEST,SRC) \
29597 gen_rtx_SET ((DEST), (SRC))
29598 #define SHIFT(CODE,SRC,AMOUNT) \
29599 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29600 #define LSHIFT(CODE,SRC,AMOUNT) \
29601 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29602 SImode, (SRC), (AMOUNT))
29603 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29604 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29605 SImode, (SRC), (AMOUNT))
29606 #define ORR(A,B) \
29607 gen_rtx_IOR (SImode, (A), (B))
29608 #define BRANCH(COND,LABEL) \
29609 gen_arm_cond_branch ((LABEL), \
29610 gen_rtx_ ## COND (CCmode, cc_reg, \
29611 const0_rtx), \
29612 cc_reg)
29613
29614 /* Shifts by register and shifts by constant are handled separately. */
29615 if (CONST_INT_P (amount))
29616 {
29617 /* We have a shift-by-constant. */
29618
29619 /* First, handle out-of-range shift amounts.
29620 In both cases we try to match the result an ARM instruction in a
29621 shift-by-register would give. This helps reduce execution
29622 differences between optimization levels, but it won't stop other
29623 parts of the compiler doing different things. This is "undefined
29624 behavior, in any case. */
29625 if (INTVAL (amount) <= 0)
29626 emit_insn (gen_movdi (out, in));
29627 else if (INTVAL (amount) >= 64)
29628 {
29629 if (code == ASHIFTRT)
29630 {
29631 rtx const31_rtx = GEN_INT (31);
29632 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29633 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29634 }
29635 else
29636 emit_insn (gen_movdi (out, const0_rtx));
29637 }
29638
29639 /* Now handle valid shifts. */
29640 else if (INTVAL (amount) < 32)
29641 {
29642 /* Shifts by a constant less than 32. */
29643 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29644
29645 /* Clearing the out register in DImode first avoids lots
29646 of spilling and results in less stack usage.
29647 Later this redundant insn is completely removed.
29648 Do that only if "in" and "out" are different registers. */
29649 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29650 emit_insn (SET (out, const0_rtx));
29651 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29652 emit_insn (SET (out_down,
29653 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29654 out_down)));
29655 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29656 }
29657 else
29658 {
29659 /* Shifts by a constant greater than 31. */
29660 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29661
29662 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29663 emit_insn (SET (out, const0_rtx));
29664 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29665 if (code == ASHIFTRT)
29666 emit_insn (gen_ashrsi3 (out_up, in_up,
29667 GEN_INT (31)));
29668 else
29669 emit_insn (SET (out_up, const0_rtx));
29670 }
29671 }
29672 else
29673 {
29674 /* We have a shift-by-register. */
29675 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29676
29677 /* This alternative requires the scratch registers. */
29678 gcc_assert (scratch1 && REG_P (scratch1));
29679 gcc_assert (scratch2 && REG_P (scratch2));
29680
29681 /* We will need the values "amount-32" and "32-amount" later.
29682 Swapping them around now allows the later code to be more general. */
29683 switch (code)
29684 {
29685 case ASHIFT:
29686 emit_insn (SUB_32 (scratch1, amount));
29687 emit_insn (RSB_32 (scratch2, amount));
29688 break;
29689 case ASHIFTRT:
29690 emit_insn (RSB_32 (scratch1, amount));
29691 /* Also set CC = amount > 32. */
29692 emit_insn (SUB_S_32 (scratch2, amount));
29693 break;
29694 case LSHIFTRT:
29695 emit_insn (RSB_32 (scratch1, amount));
29696 emit_insn (SUB_32 (scratch2, amount));
29697 break;
29698 default:
29699 gcc_unreachable ();
29700 }
29701
29702 /* Emit code like this:
29703
29704 arithmetic-left:
29705 out_down = in_down << amount;
29706 out_down = (in_up << (amount - 32)) | out_down;
29707 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29708 out_up = in_up << amount;
29709
29710 arithmetic-right:
29711 out_down = in_down >> amount;
29712 out_down = (in_up << (32 - amount)) | out_down;
29713 if (amount < 32)
29714 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29715 out_up = in_up << amount;
29716
29717 logical-right:
29718 out_down = in_down >> amount;
29719 out_down = (in_up << (32 - amount)) | out_down;
29720 if (amount < 32)
29721 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29722 out_up = in_up << amount;
29723
29724 The ARM and Thumb2 variants are the same but implemented slightly
29725 differently. If this were only called during expand we could just
29726 use the Thumb2 case and let combine do the right thing, but this
29727 can also be called from post-reload splitters. */
29728
29729 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29730
29731 if (!TARGET_THUMB2)
29732 {
29733 /* Emit code for ARM mode. */
29734 emit_insn (SET (out_down,
29735 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29736 if (code == ASHIFTRT)
29737 {
29738 rtx_code_label *done_label = gen_label_rtx ();
29739 emit_jump_insn (BRANCH (LT, done_label));
29740 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29741 out_down)));
29742 emit_label (done_label);
29743 }
29744 else
29745 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29746 out_down)));
29747 }
29748 else
29749 {
29750 /* Emit code for Thumb2 mode.
29751 Thumb2 can't do shift and or in one insn. */
29752 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29753 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29754
29755 if (code == ASHIFTRT)
29756 {
29757 rtx_code_label *done_label = gen_label_rtx ();
29758 emit_jump_insn (BRANCH (LT, done_label));
29759 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29760 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29761 emit_label (done_label);
29762 }
29763 else
29764 {
29765 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29766 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29767 }
29768 }
29769
29770 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29771 }
29772
29773 #undef SUB_32
29774 #undef RSB_32
29775 #undef SUB_S_32
29776 #undef SET
29777 #undef SHIFT
29778 #undef LSHIFT
29779 #undef REV_LSHIFT
29780 #undef ORR
29781 #undef BRANCH
29782 }
29783
29784 /* Returns true if the pattern is a valid symbolic address, which is either a
29785 symbol_ref or (symbol_ref + addend).
29786
29787 According to the ARM ELF ABI, the initial addend of REL-type relocations
29788 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29789 literal field of the instruction as a 16-bit signed value in the range
29790 -32768 <= A < 32768. */
29791
29792 bool
29793 arm_valid_symbolic_address_p (rtx addr)
29794 {
29795 rtx xop0, xop1 = NULL_RTX;
29796 rtx tmp = addr;
29797
29798 if (target_word_relocations)
29799 return false;
29800
29801 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29802 return true;
29803
29804 /* (const (plus: symbol_ref const_int)) */
29805 if (GET_CODE (addr) == CONST)
29806 tmp = XEXP (addr, 0);
29807
29808 if (GET_CODE (tmp) == PLUS)
29809 {
29810 xop0 = XEXP (tmp, 0);
29811 xop1 = XEXP (tmp, 1);
29812
29813 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29814 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29815 }
29816
29817 return false;
29818 }
29819
29820 /* Returns true if a valid comparison operation and makes
29821 the operands in a form that is valid. */
29822 bool
29823 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29824 {
29825 enum rtx_code code = GET_CODE (*comparison);
29826 int code_int;
29827 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29828 ? GET_MODE (*op2) : GET_MODE (*op1);
29829
29830 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29831
29832 if (code == UNEQ || code == LTGT)
29833 return false;
29834
29835 code_int = (int)code;
29836 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29837 PUT_CODE (*comparison, (enum rtx_code)code_int);
29838
29839 switch (mode)
29840 {
29841 case E_SImode:
29842 if (!arm_add_operand (*op1, mode))
29843 *op1 = force_reg (mode, *op1);
29844 if (!arm_add_operand (*op2, mode))
29845 *op2 = force_reg (mode, *op2);
29846 return true;
29847
29848 case E_DImode:
29849 if (!cmpdi_operand (*op1, mode))
29850 *op1 = force_reg (mode, *op1);
29851 if (!cmpdi_operand (*op2, mode))
29852 *op2 = force_reg (mode, *op2);
29853 return true;
29854
29855 case E_HFmode:
29856 if (!TARGET_VFP_FP16INST)
29857 break;
29858 /* FP16 comparisons are done in SF mode. */
29859 mode = SFmode;
29860 *op1 = convert_to_mode (mode, *op1, 1);
29861 *op2 = convert_to_mode (mode, *op2, 1);
29862 /* Fall through. */
29863 case E_SFmode:
29864 case E_DFmode:
29865 if (!vfp_compare_operand (*op1, mode))
29866 *op1 = force_reg (mode, *op1);
29867 if (!vfp_compare_operand (*op2, mode))
29868 *op2 = force_reg (mode, *op2);
29869 return true;
29870 default:
29871 break;
29872 }
29873
29874 return false;
29875
29876 }
29877
29878 /* Maximum number of instructions to set block of memory. */
29879 static int
29880 arm_block_set_max_insns (void)
29881 {
29882 if (optimize_function_for_size_p (cfun))
29883 return 4;
29884 else
29885 return current_tune->max_insns_inline_memset;
29886 }
29887
29888 /* Return TRUE if it's profitable to set block of memory for
29889 non-vectorized case. VAL is the value to set the memory
29890 with. LENGTH is the number of bytes to set. ALIGN is the
29891 alignment of the destination memory in bytes. UNALIGNED_P
29892 is TRUE if we can only set the memory with instructions
29893 meeting alignment requirements. USE_STRD_P is TRUE if we
29894 can use strd to set the memory. */
29895 static bool
29896 arm_block_set_non_vect_profit_p (rtx val,
29897 unsigned HOST_WIDE_INT length,
29898 unsigned HOST_WIDE_INT align,
29899 bool unaligned_p, bool use_strd_p)
29900 {
29901 int num = 0;
29902 /* For leftovers in bytes of 0-7, we can set the memory block using
29903 strb/strh/str with minimum instruction number. */
29904 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29905
29906 if (unaligned_p)
29907 {
29908 num = arm_const_inline_cost (SET, val);
29909 num += length / align + length % align;
29910 }
29911 else if (use_strd_p)
29912 {
29913 num = arm_const_double_inline_cost (val);
29914 num += (length >> 3) + leftover[length & 7];
29915 }
29916 else
29917 {
29918 num = arm_const_inline_cost (SET, val);
29919 num += (length >> 2) + leftover[length & 3];
29920 }
29921
29922 /* We may be able to combine last pair STRH/STRB into a single STR
29923 by shifting one byte back. */
29924 if (unaligned_access && length > 3 && (length & 3) == 3)
29925 num--;
29926
29927 return (num <= arm_block_set_max_insns ());
29928 }
29929
29930 /* Return TRUE if it's profitable to set block of memory for
29931 vectorized case. LENGTH is the number of bytes to set.
29932 ALIGN is the alignment of destination memory in bytes.
29933 MODE is the vector mode used to set the memory. */
29934 static bool
29935 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29936 unsigned HOST_WIDE_INT align,
29937 machine_mode mode)
29938 {
29939 int num;
29940 bool unaligned_p = ((align & 3) != 0);
29941 unsigned int nelt = GET_MODE_NUNITS (mode);
29942
29943 /* Instruction loading constant value. */
29944 num = 1;
29945 /* Instructions storing the memory. */
29946 num += (length + nelt - 1) / nelt;
29947 /* Instructions adjusting the address expression. Only need to
29948 adjust address expression if it's 4 bytes aligned and bytes
29949 leftover can only be stored by mis-aligned store instruction. */
29950 if (!unaligned_p && (length & 3) != 0)
29951 num++;
29952
29953 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29954 if (!unaligned_p && mode == V16QImode)
29955 num--;
29956
29957 return (num <= arm_block_set_max_insns ());
29958 }
29959
29960 /* Set a block of memory using vectorization instructions for the
29961 unaligned case. We fill the first LENGTH bytes of the memory
29962 area starting from DSTBASE with byte constant VALUE. ALIGN is
29963 the alignment requirement of memory. Return TRUE if succeeded. */
29964 static bool
29965 arm_block_set_unaligned_vect (rtx dstbase,
29966 unsigned HOST_WIDE_INT length,
29967 unsigned HOST_WIDE_INT value,
29968 unsigned HOST_WIDE_INT align)
29969 {
29970 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29971 rtx dst, mem;
29972 rtx val_vec, reg;
29973 rtx (*gen_func) (rtx, rtx);
29974 machine_mode mode;
29975 unsigned HOST_WIDE_INT v = value;
29976 unsigned int offset = 0;
29977 gcc_assert ((align & 0x3) != 0);
29978 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29979 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29980 if (length >= nelt_v16)
29981 {
29982 mode = V16QImode;
29983 gen_func = gen_movmisalignv16qi;
29984 }
29985 else
29986 {
29987 mode = V8QImode;
29988 gen_func = gen_movmisalignv8qi;
29989 }
29990 nelt_mode = GET_MODE_NUNITS (mode);
29991 gcc_assert (length >= nelt_mode);
29992 /* Skip if it isn't profitable. */
29993 if (!arm_block_set_vect_profit_p (length, align, mode))
29994 return false;
29995
29996 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29997 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29998
29999 v = sext_hwi (v, BITS_PER_WORD);
30000
30001 reg = gen_reg_rtx (mode);
30002 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30003 /* Emit instruction loading the constant value. */
30004 emit_move_insn (reg, val_vec);
30005
30006 /* Handle nelt_mode bytes in a vector. */
30007 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30008 {
30009 emit_insn ((*gen_func) (mem, reg));
30010 if (i + 2 * nelt_mode <= length)
30011 {
30012 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30013 offset += nelt_mode;
30014 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30015 }
30016 }
30017
30018 /* If there are not less than nelt_v8 bytes leftover, we must be in
30019 V16QI mode. */
30020 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30021
30022 /* Handle (8, 16) bytes leftover. */
30023 if (i + nelt_v8 < length)
30024 {
30025 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30026 offset += length - i;
30027 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30028
30029 /* We are shifting bytes back, set the alignment accordingly. */
30030 if ((length & 1) != 0 && align >= 2)
30031 set_mem_align (mem, BITS_PER_UNIT);
30032
30033 emit_insn (gen_movmisalignv16qi (mem, reg));
30034 }
30035 /* Handle (0, 8] bytes leftover. */
30036 else if (i < length && i + nelt_v8 >= length)
30037 {
30038 if (mode == V16QImode)
30039 reg = gen_lowpart (V8QImode, reg);
30040
30041 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30042 + (nelt_mode - nelt_v8))));
30043 offset += (length - i) + (nelt_mode - nelt_v8);
30044 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30045
30046 /* We are shifting bytes back, set the alignment accordingly. */
30047 if ((length & 1) != 0 && align >= 2)
30048 set_mem_align (mem, BITS_PER_UNIT);
30049
30050 emit_insn (gen_movmisalignv8qi (mem, reg));
30051 }
30052
30053 return true;
30054 }
30055
30056 /* Set a block of memory using vectorization instructions for the
30057 aligned case. We fill the first LENGTH bytes of the memory area
30058 starting from DSTBASE with byte constant VALUE. ALIGN is the
30059 alignment requirement of memory. Return TRUE if succeeded. */
30060 static bool
30061 arm_block_set_aligned_vect (rtx dstbase,
30062 unsigned HOST_WIDE_INT length,
30063 unsigned HOST_WIDE_INT value,
30064 unsigned HOST_WIDE_INT align)
30065 {
30066 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30067 rtx dst, addr, mem;
30068 rtx val_vec, reg;
30069 machine_mode mode;
30070 unsigned int offset = 0;
30071
30072 gcc_assert ((align & 0x3) == 0);
30073 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30074 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30075 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30076 mode = V16QImode;
30077 else
30078 mode = V8QImode;
30079
30080 nelt_mode = GET_MODE_NUNITS (mode);
30081 gcc_assert (length >= nelt_mode);
30082 /* Skip if it isn't profitable. */
30083 if (!arm_block_set_vect_profit_p (length, align, mode))
30084 return false;
30085
30086 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30087
30088 reg = gen_reg_rtx (mode);
30089 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30090 /* Emit instruction loading the constant value. */
30091 emit_move_insn (reg, val_vec);
30092
30093 i = 0;
30094 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30095 if (mode == V16QImode)
30096 {
30097 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30098 emit_insn (gen_movmisalignv16qi (mem, reg));
30099 i += nelt_mode;
30100 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30101 if (i + nelt_v8 < length && i + nelt_v16 > length)
30102 {
30103 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30104 offset += length - nelt_mode;
30105 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30106 /* We are shifting bytes back, set the alignment accordingly. */
30107 if ((length & 0x3) == 0)
30108 set_mem_align (mem, BITS_PER_UNIT * 4);
30109 else if ((length & 0x1) == 0)
30110 set_mem_align (mem, BITS_PER_UNIT * 2);
30111 else
30112 set_mem_align (mem, BITS_PER_UNIT);
30113
30114 emit_insn (gen_movmisalignv16qi (mem, reg));
30115 return true;
30116 }
30117 /* Fall through for bytes leftover. */
30118 mode = V8QImode;
30119 nelt_mode = GET_MODE_NUNITS (mode);
30120 reg = gen_lowpart (V8QImode, reg);
30121 }
30122
30123 /* Handle 8 bytes in a vector. */
30124 for (; (i + nelt_mode <= length); i += nelt_mode)
30125 {
30126 addr = plus_constant (Pmode, dst, i);
30127 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30128 emit_move_insn (mem, reg);
30129 }
30130
30131 /* Handle single word leftover by shifting 4 bytes back. We can
30132 use aligned access for this case. */
30133 if (i + UNITS_PER_WORD == length)
30134 {
30135 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30136 offset += i - UNITS_PER_WORD;
30137 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30138 /* We are shifting 4 bytes back, set the alignment accordingly. */
30139 if (align > UNITS_PER_WORD)
30140 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30141
30142 emit_move_insn (mem, reg);
30143 }
30144 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30145 We have to use unaligned access for this case. */
30146 else if (i < length)
30147 {
30148 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30149 offset += length - nelt_mode;
30150 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30151 /* We are shifting bytes back, set the alignment accordingly. */
30152 if ((length & 1) == 0)
30153 set_mem_align (mem, BITS_PER_UNIT * 2);
30154 else
30155 set_mem_align (mem, BITS_PER_UNIT);
30156
30157 emit_insn (gen_movmisalignv8qi (mem, reg));
30158 }
30159
30160 return true;
30161 }
30162
30163 /* Set a block of memory using plain strh/strb instructions, only
30164 using instructions allowed by ALIGN on processor. We fill the
30165 first LENGTH bytes of the memory area starting from DSTBASE
30166 with byte constant VALUE. ALIGN is the alignment requirement
30167 of memory. */
30168 static bool
30169 arm_block_set_unaligned_non_vect (rtx dstbase,
30170 unsigned HOST_WIDE_INT length,
30171 unsigned HOST_WIDE_INT value,
30172 unsigned HOST_WIDE_INT align)
30173 {
30174 unsigned int i;
30175 rtx dst, addr, mem;
30176 rtx val_exp, val_reg, reg;
30177 machine_mode mode;
30178 HOST_WIDE_INT v = value;
30179
30180 gcc_assert (align == 1 || align == 2);
30181
30182 if (align == 2)
30183 v |= (value << BITS_PER_UNIT);
30184
30185 v = sext_hwi (v, BITS_PER_WORD);
30186 val_exp = GEN_INT (v);
30187 /* Skip if it isn't profitable. */
30188 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30189 align, true, false))
30190 return false;
30191
30192 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30193 mode = (align == 2 ? HImode : QImode);
30194 val_reg = force_reg (SImode, val_exp);
30195 reg = gen_lowpart (mode, val_reg);
30196
30197 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30198 {
30199 addr = plus_constant (Pmode, dst, i);
30200 mem = adjust_automodify_address (dstbase, mode, addr, i);
30201 emit_move_insn (mem, reg);
30202 }
30203
30204 /* Handle single byte leftover. */
30205 if (i + 1 == length)
30206 {
30207 reg = gen_lowpart (QImode, val_reg);
30208 addr = plus_constant (Pmode, dst, i);
30209 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30210 emit_move_insn (mem, reg);
30211 i++;
30212 }
30213
30214 gcc_assert (i == length);
30215 return true;
30216 }
30217
30218 /* Set a block of memory using plain strd/str/strh/strb instructions,
30219 to permit unaligned copies on processors which support unaligned
30220 semantics for those instructions. We fill the first LENGTH bytes
30221 of the memory area starting from DSTBASE with byte constant VALUE.
30222 ALIGN is the alignment requirement of memory. */
30223 static bool
30224 arm_block_set_aligned_non_vect (rtx dstbase,
30225 unsigned HOST_WIDE_INT length,
30226 unsigned HOST_WIDE_INT value,
30227 unsigned HOST_WIDE_INT align)
30228 {
30229 unsigned int i;
30230 rtx dst, addr, mem;
30231 rtx val_exp, val_reg, reg;
30232 unsigned HOST_WIDE_INT v;
30233 bool use_strd_p;
30234
30235 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30236 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30237
30238 v = (value | (value << 8) | (value << 16) | (value << 24));
30239 if (length < UNITS_PER_WORD)
30240 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30241
30242 if (use_strd_p)
30243 v |= (v << BITS_PER_WORD);
30244 else
30245 v = sext_hwi (v, BITS_PER_WORD);
30246
30247 val_exp = GEN_INT (v);
30248 /* Skip if it isn't profitable. */
30249 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30250 align, false, use_strd_p))
30251 {
30252 if (!use_strd_p)
30253 return false;
30254
30255 /* Try without strd. */
30256 v = (v >> BITS_PER_WORD);
30257 v = sext_hwi (v, BITS_PER_WORD);
30258 val_exp = GEN_INT (v);
30259 use_strd_p = false;
30260 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30261 align, false, use_strd_p))
30262 return false;
30263 }
30264
30265 i = 0;
30266 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30267 /* Handle double words using strd if possible. */
30268 if (use_strd_p)
30269 {
30270 val_reg = force_reg (DImode, val_exp);
30271 reg = val_reg;
30272 for (; (i + 8 <= length); i += 8)
30273 {
30274 addr = plus_constant (Pmode, dst, i);
30275 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30276 emit_move_insn (mem, reg);
30277 }
30278 }
30279 else
30280 val_reg = force_reg (SImode, val_exp);
30281
30282 /* Handle words. */
30283 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30284 for (; (i + 4 <= length); i += 4)
30285 {
30286 addr = plus_constant (Pmode, dst, i);
30287 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30288 if ((align & 3) == 0)
30289 emit_move_insn (mem, reg);
30290 else
30291 emit_insn (gen_unaligned_storesi (mem, reg));
30292 }
30293
30294 /* Merge last pair of STRH and STRB into a STR if possible. */
30295 if (unaligned_access && i > 0 && (i + 3) == length)
30296 {
30297 addr = plus_constant (Pmode, dst, i - 1);
30298 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30299 /* We are shifting one byte back, set the alignment accordingly. */
30300 if ((align & 1) == 0)
30301 set_mem_align (mem, BITS_PER_UNIT);
30302
30303 /* Most likely this is an unaligned access, and we can't tell at
30304 compilation time. */
30305 emit_insn (gen_unaligned_storesi (mem, reg));
30306 return true;
30307 }
30308
30309 /* Handle half word leftover. */
30310 if (i + 2 <= length)
30311 {
30312 reg = gen_lowpart (HImode, val_reg);
30313 addr = plus_constant (Pmode, dst, i);
30314 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30315 if ((align & 1) == 0)
30316 emit_move_insn (mem, reg);
30317 else
30318 emit_insn (gen_unaligned_storehi (mem, reg));
30319
30320 i += 2;
30321 }
30322
30323 /* Handle single byte leftover. */
30324 if (i + 1 == length)
30325 {
30326 reg = gen_lowpart (QImode, val_reg);
30327 addr = plus_constant (Pmode, dst, i);
30328 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30329 emit_move_insn (mem, reg);
30330 }
30331
30332 return true;
30333 }
30334
30335 /* Set a block of memory using vectorization instructions for both
30336 aligned and unaligned cases. We fill the first LENGTH bytes of
30337 the memory area starting from DSTBASE with byte constant VALUE.
30338 ALIGN is the alignment requirement of memory. */
30339 static bool
30340 arm_block_set_vect (rtx dstbase,
30341 unsigned HOST_WIDE_INT length,
30342 unsigned HOST_WIDE_INT value,
30343 unsigned HOST_WIDE_INT align)
30344 {
30345 /* Check whether we need to use unaligned store instruction. */
30346 if (((align & 3) != 0 || (length & 3) != 0)
30347 /* Check whether unaligned store instruction is available. */
30348 && (!unaligned_access || BYTES_BIG_ENDIAN))
30349 return false;
30350
30351 if ((align & 3) == 0)
30352 return arm_block_set_aligned_vect (dstbase, length, value, align);
30353 else
30354 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30355 }
30356
30357 /* Expand string store operation. Firstly we try to do that by using
30358 vectorization instructions, then try with ARM unaligned access and
30359 double-word store if profitable. OPERANDS[0] is the destination,
30360 OPERANDS[1] is the number of bytes, operands[2] is the value to
30361 initialize the memory, OPERANDS[3] is the known alignment of the
30362 destination. */
30363 bool
30364 arm_gen_setmem (rtx *operands)
30365 {
30366 rtx dstbase = operands[0];
30367 unsigned HOST_WIDE_INT length;
30368 unsigned HOST_WIDE_INT value;
30369 unsigned HOST_WIDE_INT align;
30370
30371 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30372 return false;
30373
30374 length = UINTVAL (operands[1]);
30375 if (length > 64)
30376 return false;
30377
30378 value = (UINTVAL (operands[2]) & 0xFF);
30379 align = UINTVAL (operands[3]);
30380 if (TARGET_NEON && length >= 8
30381 && current_tune->string_ops_prefer_neon
30382 && arm_block_set_vect (dstbase, length, value, align))
30383 return true;
30384
30385 if (!unaligned_access && (align & 3) != 0)
30386 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30387
30388 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30389 }
30390
30391
30392 static bool
30393 arm_macro_fusion_p (void)
30394 {
30395 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30396 }
30397
30398 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30399 for MOVW / MOVT macro fusion. */
30400
30401 static bool
30402 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30403 {
30404 /* We are trying to fuse
30405 movw imm / movt imm
30406 instructions as a group that gets scheduled together. */
30407
30408 rtx set_dest = SET_DEST (curr_set);
30409
30410 if (GET_MODE (set_dest) != SImode)
30411 return false;
30412
30413 /* We are trying to match:
30414 prev (movw) == (set (reg r0) (const_int imm16))
30415 curr (movt) == (set (zero_extract (reg r0)
30416 (const_int 16)
30417 (const_int 16))
30418 (const_int imm16_1))
30419 or
30420 prev (movw) == (set (reg r1)
30421 (high (symbol_ref ("SYM"))))
30422 curr (movt) == (set (reg r0)
30423 (lo_sum (reg r1)
30424 (symbol_ref ("SYM")))) */
30425
30426 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30427 {
30428 if (CONST_INT_P (SET_SRC (curr_set))
30429 && CONST_INT_P (SET_SRC (prev_set))
30430 && REG_P (XEXP (set_dest, 0))
30431 && REG_P (SET_DEST (prev_set))
30432 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30433 return true;
30434
30435 }
30436 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30437 && REG_P (SET_DEST (curr_set))
30438 && REG_P (SET_DEST (prev_set))
30439 && GET_CODE (SET_SRC (prev_set)) == HIGH
30440 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30441 return true;
30442
30443 return false;
30444 }
30445
30446 static bool
30447 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30448 {
30449 rtx prev_set = single_set (prev);
30450 rtx curr_set = single_set (curr);
30451
30452 if (!prev_set
30453 || !curr_set)
30454 return false;
30455
30456 if (any_condjump_p (curr))
30457 return false;
30458
30459 if (!arm_macro_fusion_p ())
30460 return false;
30461
30462 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30463 && aarch_crypto_can_dual_issue (prev, curr))
30464 return true;
30465
30466 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30467 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30468 return true;
30469
30470 return false;
30471 }
30472
30473 /* Return true iff the instruction fusion described by OP is enabled. */
30474 bool
30475 arm_fusion_enabled_p (tune_params::fuse_ops op)
30476 {
30477 return current_tune->fusible_ops & op;
30478 }
30479
30480 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30481 scheduled for speculative execution. Reject the long-running division
30482 and square-root instructions. */
30483
30484 static bool
30485 arm_sched_can_speculate_insn (rtx_insn *insn)
30486 {
30487 switch (get_attr_type (insn))
30488 {
30489 case TYPE_SDIV:
30490 case TYPE_UDIV:
30491 case TYPE_FDIVS:
30492 case TYPE_FDIVD:
30493 case TYPE_FSQRTS:
30494 case TYPE_FSQRTD:
30495 case TYPE_NEON_FP_SQRT_S:
30496 case TYPE_NEON_FP_SQRT_D:
30497 case TYPE_NEON_FP_SQRT_S_Q:
30498 case TYPE_NEON_FP_SQRT_D_Q:
30499 case TYPE_NEON_FP_DIV_S:
30500 case TYPE_NEON_FP_DIV_D:
30501 case TYPE_NEON_FP_DIV_S_Q:
30502 case TYPE_NEON_FP_DIV_D_Q:
30503 return false;
30504 default:
30505 return true;
30506 }
30507 }
30508
30509 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30510
30511 static unsigned HOST_WIDE_INT
30512 arm_asan_shadow_offset (void)
30513 {
30514 return HOST_WIDE_INT_1U << 29;
30515 }
30516
30517
30518 /* This is a temporary fix for PR60655. Ideally we need
30519 to handle most of these cases in the generic part but
30520 currently we reject minus (..) (sym_ref). We try to
30521 ameliorate the case with minus (sym_ref1) (sym_ref2)
30522 where they are in the same section. */
30523
30524 static bool
30525 arm_const_not_ok_for_debug_p (rtx p)
30526 {
30527 tree decl_op0 = NULL;
30528 tree decl_op1 = NULL;
30529
30530 if (GET_CODE (p) == UNSPEC)
30531 return true;
30532 if (GET_CODE (p) == MINUS)
30533 {
30534 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30535 {
30536 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30537 if (decl_op1
30538 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30539 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30540 {
30541 if ((VAR_P (decl_op1)
30542 || TREE_CODE (decl_op1) == CONST_DECL)
30543 && (VAR_P (decl_op0)
30544 || TREE_CODE (decl_op0) == CONST_DECL))
30545 return (get_variable_section (decl_op1, false)
30546 != get_variable_section (decl_op0, false));
30547
30548 if (TREE_CODE (decl_op1) == LABEL_DECL
30549 && TREE_CODE (decl_op0) == LABEL_DECL)
30550 return (DECL_CONTEXT (decl_op1)
30551 != DECL_CONTEXT (decl_op0));
30552 }
30553
30554 return true;
30555 }
30556 }
30557
30558 return false;
30559 }
30560
30561 /* return TRUE if x is a reference to a value in a constant pool */
30562 extern bool
30563 arm_is_constant_pool_ref (rtx x)
30564 {
30565 return (MEM_P (x)
30566 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30567 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30568 }
30569
30570 /* Remember the last target of arm_set_current_function. */
30571 static GTY(()) tree arm_previous_fndecl;
30572
30573 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30574
30575 void
30576 save_restore_target_globals (tree new_tree)
30577 {
30578 /* If we have a previous state, use it. */
30579 if (TREE_TARGET_GLOBALS (new_tree))
30580 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30581 else if (new_tree == target_option_default_node)
30582 restore_target_globals (&default_target_globals);
30583 else
30584 {
30585 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30586 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30587 }
30588
30589 arm_option_params_internal ();
30590 }
30591
30592 /* Invalidate arm_previous_fndecl. */
30593
30594 void
30595 arm_reset_previous_fndecl (void)
30596 {
30597 arm_previous_fndecl = NULL_TREE;
30598 }
30599
30600 /* Establish appropriate back-end context for processing the function
30601 FNDECL. The argument might be NULL to indicate processing at top
30602 level, outside of any function scope. */
30603
30604 static void
30605 arm_set_current_function (tree fndecl)
30606 {
30607 if (!fndecl || fndecl == arm_previous_fndecl)
30608 return;
30609
30610 tree old_tree = (arm_previous_fndecl
30611 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30612 : NULL_TREE);
30613
30614 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30615
30616 /* If current function has no attributes but previous one did,
30617 use the default node. */
30618 if (! new_tree && old_tree)
30619 new_tree = target_option_default_node;
30620
30621 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30622 the default have been handled by save_restore_target_globals from
30623 arm_pragma_target_parse. */
30624 if (old_tree == new_tree)
30625 return;
30626
30627 arm_previous_fndecl = fndecl;
30628
30629 /* First set the target options. */
30630 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30631
30632 save_restore_target_globals (new_tree);
30633 }
30634
30635 /* Implement TARGET_OPTION_PRINT. */
30636
30637 static void
30638 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30639 {
30640 int flags = ptr->x_target_flags;
30641 const char *fpu_name;
30642
30643 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30644 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30645
30646 fprintf (file, "%*sselected isa %s\n", indent, "",
30647 TARGET_THUMB2_P (flags) ? "thumb2" :
30648 TARGET_THUMB_P (flags) ? "thumb1" :
30649 "arm");
30650
30651 if (ptr->x_arm_arch_string)
30652 fprintf (file, "%*sselected architecture %s\n", indent, "",
30653 ptr->x_arm_arch_string);
30654
30655 if (ptr->x_arm_cpu_string)
30656 fprintf (file, "%*sselected CPU %s\n", indent, "",
30657 ptr->x_arm_cpu_string);
30658
30659 if (ptr->x_arm_tune_string)
30660 fprintf (file, "%*sselected tune %s\n", indent, "",
30661 ptr->x_arm_tune_string);
30662
30663 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30664 }
30665
30666 /* Hook to determine if one function can safely inline another. */
30667
30668 static bool
30669 arm_can_inline_p (tree caller, tree callee)
30670 {
30671 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30672 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30673 bool can_inline = true;
30674
30675 struct cl_target_option *caller_opts
30676 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30677 : target_option_default_node);
30678
30679 struct cl_target_option *callee_opts
30680 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30681 : target_option_default_node);
30682
30683 if (callee_opts == caller_opts)
30684 return true;
30685
30686 /* Callee's ISA features should be a subset of the caller's. */
30687 struct arm_build_target caller_target;
30688 struct arm_build_target callee_target;
30689 caller_target.isa = sbitmap_alloc (isa_num_bits);
30690 callee_target.isa = sbitmap_alloc (isa_num_bits);
30691
30692 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30693 false);
30694 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30695 false);
30696 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30697 can_inline = false;
30698
30699 sbitmap_free (caller_target.isa);
30700 sbitmap_free (callee_target.isa);
30701
30702 /* OK to inline between different modes.
30703 Function with mode specific instructions, e.g using asm,
30704 must be explicitly protected with noinline. */
30705 return can_inline;
30706 }
30707
30708 /* Hook to fix function's alignment affected by target attribute. */
30709
30710 static void
30711 arm_relayout_function (tree fndecl)
30712 {
30713 if (DECL_USER_ALIGN (fndecl))
30714 return;
30715
30716 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30717
30718 if (!callee_tree)
30719 callee_tree = target_option_default_node;
30720
30721 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30722 SET_DECL_ALIGN
30723 (fndecl,
30724 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30725 }
30726
30727 /* Inner function to process the attribute((target(...))), take an argument and
30728 set the current options from the argument. If we have a list, recursively
30729 go over the list. */
30730
30731 static bool
30732 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30733 {
30734 if (TREE_CODE (args) == TREE_LIST)
30735 {
30736 bool ret = true;
30737
30738 for (; args; args = TREE_CHAIN (args))
30739 if (TREE_VALUE (args)
30740 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30741 ret = false;
30742 return ret;
30743 }
30744
30745 else if (TREE_CODE (args) != STRING_CST)
30746 {
30747 error ("attribute %<target%> argument not a string");
30748 return false;
30749 }
30750
30751 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30752 char *q;
30753
30754 while ((q = strtok (argstr, ",")) != NULL)
30755 {
30756 while (ISSPACE (*q)) ++q;
30757
30758 argstr = NULL;
30759 if (!strncmp (q, "thumb", 5))
30760 opts->x_target_flags |= MASK_THUMB;
30761
30762 else if (!strncmp (q, "arm", 3))
30763 opts->x_target_flags &= ~MASK_THUMB;
30764
30765 else if (!strncmp (q, "fpu=", 4))
30766 {
30767 int fpu_index;
30768 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30769 &fpu_index, CL_TARGET))
30770 {
30771 error ("invalid fpu for target attribute or pragma %qs", q);
30772 return false;
30773 }
30774 if (fpu_index == TARGET_FPU_auto)
30775 {
30776 /* This doesn't really make sense until we support
30777 general dynamic selection of the architecture and all
30778 sub-features. */
30779 sorry ("auto fpu selection not currently permitted here");
30780 return false;
30781 }
30782 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30783 }
30784 else if (!strncmp (q, "arch=", 5))
30785 {
30786 char* arch = q+5;
30787 const arch_option *arm_selected_arch
30788 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30789
30790 if (!arm_selected_arch)
30791 {
30792 error ("invalid architecture for target attribute or pragma %qs",
30793 q);
30794 return false;
30795 }
30796
30797 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30798 }
30799 else if (q[0] == '+')
30800 {
30801 opts->x_arm_arch_string
30802 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30803 }
30804 else
30805 {
30806 error ("unknown target attribute or pragma %qs", q);
30807 return false;
30808 }
30809 }
30810
30811 return true;
30812 }
30813
30814 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30815
30816 tree
30817 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30818 struct gcc_options *opts_set)
30819 {
30820 struct cl_target_option cl_opts;
30821
30822 if (!arm_valid_target_attribute_rec (args, opts))
30823 return NULL_TREE;
30824
30825 cl_target_option_save (&cl_opts, opts);
30826 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30827 arm_option_check_internal (opts);
30828 /* Do any overrides, such as global options arch=xxx.
30829 We do this since arm_active_target was overridden. */
30830 arm_option_reconfigure_globals ();
30831 arm_options_perform_arch_sanity_checks ();
30832 arm_option_override_internal (opts, opts_set);
30833
30834 return build_target_option_node (opts);
30835 }
30836
30837 static void
30838 add_attribute (const char * mode, tree *attributes)
30839 {
30840 size_t len = strlen (mode);
30841 tree value = build_string (len, mode);
30842
30843 TREE_TYPE (value) = build_array_type (char_type_node,
30844 build_index_type (size_int (len)));
30845
30846 *attributes = tree_cons (get_identifier ("target"),
30847 build_tree_list (NULL_TREE, value),
30848 *attributes);
30849 }
30850
30851 /* For testing. Insert thumb or arm modes alternatively on functions. */
30852
30853 static void
30854 arm_insert_attributes (tree fndecl, tree * attributes)
30855 {
30856 const char *mode;
30857
30858 if (! TARGET_FLIP_THUMB)
30859 return;
30860
30861 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30862 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
30863 return;
30864
30865 /* Nested definitions must inherit mode. */
30866 if (current_function_decl)
30867 {
30868 mode = TARGET_THUMB ? "thumb" : "arm";
30869 add_attribute (mode, attributes);
30870 return;
30871 }
30872
30873 /* If there is already a setting don't change it. */
30874 if (lookup_attribute ("target", *attributes) != NULL)
30875 return;
30876
30877 mode = thumb_flipper ? "thumb" : "arm";
30878 add_attribute (mode, attributes);
30879
30880 thumb_flipper = !thumb_flipper;
30881 }
30882
30883 /* Hook to validate attribute((target("string"))). */
30884
30885 static bool
30886 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30887 tree args, int ARG_UNUSED (flags))
30888 {
30889 bool ret = true;
30890 struct gcc_options func_options;
30891 tree cur_tree, new_optimize;
30892 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30893
30894 /* Get the optimization options of the current function. */
30895 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30896
30897 /* If the function changed the optimization levels as well as setting target
30898 options, start with the optimizations specified. */
30899 if (!func_optimize)
30900 func_optimize = optimization_default_node;
30901
30902 /* Init func_options. */
30903 memset (&func_options, 0, sizeof (func_options));
30904 init_options_struct (&func_options, NULL);
30905 lang_hooks.init_options_struct (&func_options);
30906
30907 /* Initialize func_options to the defaults. */
30908 cl_optimization_restore (&func_options,
30909 TREE_OPTIMIZATION (func_optimize));
30910
30911 cl_target_option_restore (&func_options,
30912 TREE_TARGET_OPTION (target_option_default_node));
30913
30914 /* Set func_options flags with new target mode. */
30915 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30916 &global_options_set);
30917
30918 if (cur_tree == NULL_TREE)
30919 ret = false;
30920
30921 new_optimize = build_optimization_node (&func_options);
30922
30923 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30924
30925 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30926
30927 finalize_options_struct (&func_options);
30928
30929 return ret;
30930 }
30931
30932 /* Match an ISA feature bitmap to a named FPU. We always use the
30933 first entry that exactly matches the feature set, so that we
30934 effectively canonicalize the FPU name for the assembler. */
30935 static const char*
30936 arm_identify_fpu_from_isa (sbitmap isa)
30937 {
30938 auto_sbitmap fpubits (isa_num_bits);
30939 auto_sbitmap cand_fpubits (isa_num_bits);
30940
30941 bitmap_and (fpubits, isa, isa_all_fpubits);
30942
30943 /* If there are no ISA feature bits relating to the FPU, we must be
30944 doing soft-float. */
30945 if (bitmap_empty_p (fpubits))
30946 return "softvfp";
30947
30948 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30949 {
30950 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30951 if (bitmap_equal_p (fpubits, cand_fpubits))
30952 return all_fpus[i].name;
30953 }
30954 /* We must find an entry, or things have gone wrong. */
30955 gcc_unreachable ();
30956 }
30957
30958 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30959 by the function fndecl. */
30960 void
30961 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30962 {
30963 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30964
30965 struct cl_target_option *targ_options;
30966 if (target_parts)
30967 targ_options = TREE_TARGET_OPTION (target_parts);
30968 else
30969 targ_options = TREE_TARGET_OPTION (target_option_current_node);
30970 gcc_assert (targ_options);
30971
30972 /* Only update the assembler .arch string if it is distinct from the last
30973 such string we printed. arch_to_print is set conditionally in case
30974 targ_options->x_arm_arch_string is NULL which can be the case
30975 when cc1 is invoked directly without passing -march option. */
30976 std::string arch_to_print;
30977 if (targ_options->x_arm_arch_string)
30978 arch_to_print = targ_options->x_arm_arch_string;
30979
30980 if (arch_to_print != arm_last_printed_arch_string)
30981 {
30982 std::string arch_name
30983 = arch_to_print.substr (0, arch_to_print.find ("+"));
30984 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30985 const arch_option *arch
30986 = arm_parse_arch_option_name (all_architectures, "-march",
30987 targ_options->x_arm_arch_string);
30988 auto_sbitmap opt_bits (isa_num_bits);
30989
30990 gcc_assert (arch);
30991 if (arch->common.extensions)
30992 {
30993 for (const struct cpu_arch_extension *opt = arch->common.extensions;
30994 opt->name != NULL;
30995 opt++)
30996 {
30997 if (!opt->remove)
30998 {
30999 arm_initialize_isa (opt_bits, opt->isa_bits);
31000 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31001 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31002 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31003 opt->name);
31004 }
31005 }
31006 }
31007
31008 arm_last_printed_arch_string = arch_to_print;
31009 }
31010
31011 fprintf (stream, "\t.syntax unified\n");
31012
31013 if (TARGET_THUMB)
31014 {
31015 if (is_called_in_ARM_mode (decl)
31016 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31017 && cfun->is_thunk))
31018 fprintf (stream, "\t.code 32\n");
31019 else if (TARGET_THUMB1)
31020 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31021 else
31022 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31023 }
31024 else
31025 fprintf (stream, "\t.arm\n");
31026
31027 std::string fpu_to_print
31028 = TARGET_SOFT_FLOAT
31029 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31030
31031 if (fpu_to_print != arm_last_printed_arch_string)
31032 {
31033 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31034 arm_last_printed_fpu_string = fpu_to_print;
31035 }
31036
31037 if (TARGET_POKE_FUNCTION_NAME)
31038 arm_poke_function_name (stream, (const char *) name);
31039 }
31040
31041 /* If MEM is in the form of [base+offset], extract the two parts
31042 of address and set to BASE and OFFSET, otherwise return false
31043 after clearing BASE and OFFSET. */
31044
31045 static bool
31046 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31047 {
31048 rtx addr;
31049
31050 gcc_assert (MEM_P (mem));
31051
31052 addr = XEXP (mem, 0);
31053
31054 /* Strip off const from addresses like (const (addr)). */
31055 if (GET_CODE (addr) == CONST)
31056 addr = XEXP (addr, 0);
31057
31058 if (GET_CODE (addr) == REG)
31059 {
31060 *base = addr;
31061 *offset = const0_rtx;
31062 return true;
31063 }
31064
31065 if (GET_CODE (addr) == PLUS
31066 && GET_CODE (XEXP (addr, 0)) == REG
31067 && CONST_INT_P (XEXP (addr, 1)))
31068 {
31069 *base = XEXP (addr, 0);
31070 *offset = XEXP (addr, 1);
31071 return true;
31072 }
31073
31074 *base = NULL_RTX;
31075 *offset = NULL_RTX;
31076
31077 return false;
31078 }
31079
31080 /* If INSN is a load or store of address in the form of [base+offset],
31081 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31082 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31083 otherwise return FALSE. */
31084
31085 static bool
31086 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31087 {
31088 rtx x, dest, src;
31089
31090 gcc_assert (INSN_P (insn));
31091 x = PATTERN (insn);
31092 if (GET_CODE (x) != SET)
31093 return false;
31094
31095 src = SET_SRC (x);
31096 dest = SET_DEST (x);
31097 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31098 {
31099 *is_load = false;
31100 extract_base_offset_in_addr (dest, base, offset);
31101 }
31102 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31103 {
31104 *is_load = true;
31105 extract_base_offset_in_addr (src, base, offset);
31106 }
31107 else
31108 return false;
31109
31110 return (*base != NULL_RTX && *offset != NULL_RTX);
31111 }
31112
31113 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31114
31115 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31116 and PRI are only calculated for these instructions. For other instruction,
31117 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31118 instruction fusion can be supported by returning different priorities.
31119
31120 It's important that irrelevant instructions get the largest FUSION_PRI. */
31121
31122 static void
31123 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31124 int *fusion_pri, int *pri)
31125 {
31126 int tmp, off_val;
31127 bool is_load;
31128 rtx base, offset;
31129
31130 gcc_assert (INSN_P (insn));
31131
31132 tmp = max_pri - 1;
31133 if (!fusion_load_store (insn, &base, &offset, &is_load))
31134 {
31135 *pri = tmp;
31136 *fusion_pri = tmp;
31137 return;
31138 }
31139
31140 /* Load goes first. */
31141 if (is_load)
31142 *fusion_pri = tmp - 1;
31143 else
31144 *fusion_pri = tmp - 2;
31145
31146 tmp /= 2;
31147
31148 /* INSN with smaller base register goes first. */
31149 tmp -= ((REGNO (base) & 0xff) << 20);
31150
31151 /* INSN with smaller offset goes first. */
31152 off_val = (int)(INTVAL (offset));
31153 if (off_val >= 0)
31154 tmp -= (off_val & 0xfffff);
31155 else
31156 tmp += ((- off_val) & 0xfffff);
31157
31158 *pri = tmp;
31159 return;
31160 }
31161
31162
31163 /* Construct and return a PARALLEL RTX vector with elements numbering the
31164 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31165 the vector - from the perspective of the architecture. This does not
31166 line up with GCC's perspective on lane numbers, so we end up with
31167 different masks depending on our target endian-ness. The diagram
31168 below may help. We must draw the distinction when building masks
31169 which select one half of the vector. An instruction selecting
31170 architectural low-lanes for a big-endian target, must be described using
31171 a mask selecting GCC high-lanes.
31172
31173 Big-Endian Little-Endian
31174
31175 GCC 0 1 2 3 3 2 1 0
31176 | x | x | x | x | | x | x | x | x |
31177 Architecture 3 2 1 0 3 2 1 0
31178
31179 Low Mask: { 2, 3 } { 0, 1 }
31180 High Mask: { 0, 1 } { 2, 3 }
31181 */
31182
31183 rtx
31184 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31185 {
31186 int nunits = GET_MODE_NUNITS (mode);
31187 rtvec v = rtvec_alloc (nunits / 2);
31188 int high_base = nunits / 2;
31189 int low_base = 0;
31190 int base;
31191 rtx t1;
31192 int i;
31193
31194 if (BYTES_BIG_ENDIAN)
31195 base = high ? low_base : high_base;
31196 else
31197 base = high ? high_base : low_base;
31198
31199 for (i = 0; i < nunits / 2; i++)
31200 RTVEC_ELT (v, i) = GEN_INT (base + i);
31201
31202 t1 = gen_rtx_PARALLEL (mode, v);
31203 return t1;
31204 }
31205
31206 /* Check OP for validity as a PARALLEL RTX vector with elements
31207 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31208 from the perspective of the architecture. See the diagram above
31209 arm_simd_vect_par_cnst_half_p for more details. */
31210
31211 bool
31212 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31213 bool high)
31214 {
31215 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31216 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31217 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31218 int i = 0;
31219
31220 if (!VECTOR_MODE_P (mode))
31221 return false;
31222
31223 if (count_op != count_ideal)
31224 return false;
31225
31226 for (i = 0; i < count_ideal; i++)
31227 {
31228 rtx elt_op = XVECEXP (op, 0, i);
31229 rtx elt_ideal = XVECEXP (ideal, 0, i);
31230
31231 if (!CONST_INT_P (elt_op)
31232 || INTVAL (elt_ideal) != INTVAL (elt_op))
31233 return false;
31234 }
31235 return true;
31236 }
31237
31238 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31239 in Thumb1. */
31240 static bool
31241 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31242 const_tree)
31243 {
31244 /* For now, we punt and not handle this for TARGET_THUMB1. */
31245 if (vcall_offset && TARGET_THUMB1)
31246 return false;
31247
31248 /* Otherwise ok. */
31249 return true;
31250 }
31251
31252 /* Generate RTL for a conditional branch with rtx comparison CODE in
31253 mode CC_MODE. The destination of the unlikely conditional branch
31254 is LABEL_REF. */
31255
31256 void
31257 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31258 rtx label_ref)
31259 {
31260 rtx x;
31261 x = gen_rtx_fmt_ee (code, VOIDmode,
31262 gen_rtx_REG (cc_mode, CC_REGNUM),
31263 const0_rtx);
31264
31265 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31266 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31267 pc_rtx);
31268 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31269 }
31270
31271 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31272
31273 For pure-code sections there is no letter code for this attribute, so
31274 output all the section flags numerically when this is needed. */
31275
31276 static bool
31277 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31278 {
31279
31280 if (flags & SECTION_ARM_PURECODE)
31281 {
31282 *num = 0x20000000;
31283
31284 if (!(flags & SECTION_DEBUG))
31285 *num |= 0x2;
31286 if (flags & SECTION_EXCLUDE)
31287 *num |= 0x80000000;
31288 if (flags & SECTION_WRITE)
31289 *num |= 0x1;
31290 if (flags & SECTION_CODE)
31291 *num |= 0x4;
31292 if (flags & SECTION_MERGE)
31293 *num |= 0x10;
31294 if (flags & SECTION_STRINGS)
31295 *num |= 0x20;
31296 if (flags & SECTION_TLS)
31297 *num |= 0x400;
31298 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31299 *num |= 0x200;
31300
31301 return true;
31302 }
31303
31304 return false;
31305 }
31306
31307 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31308
31309 If pure-code is passed as an option, make sure all functions are in
31310 sections that have the SHF_ARM_PURECODE attribute. */
31311
31312 static section *
31313 arm_function_section (tree decl, enum node_frequency freq,
31314 bool startup, bool exit)
31315 {
31316 const char * section_name;
31317 section * sec;
31318
31319 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31320 return default_function_section (decl, freq, startup, exit);
31321
31322 if (!target_pure_code)
31323 return default_function_section (decl, freq, startup, exit);
31324
31325
31326 section_name = DECL_SECTION_NAME (decl);
31327
31328 /* If a function is not in a named section then it falls under the 'default'
31329 text section, also known as '.text'. We can preserve previous behavior as
31330 the default text section already has the SHF_ARM_PURECODE section
31331 attribute. */
31332 if (!section_name)
31333 {
31334 section *default_sec = default_function_section (decl, freq, startup,
31335 exit);
31336
31337 /* If default_sec is not null, then it must be a special section like for
31338 example .text.startup. We set the pure-code attribute and return the
31339 same section to preserve existing behavior. */
31340 if (default_sec)
31341 default_sec->common.flags |= SECTION_ARM_PURECODE;
31342 return default_sec;
31343 }
31344
31345 /* Otherwise look whether a section has already been created with
31346 'section_name'. */
31347 sec = get_named_section (decl, section_name, 0);
31348 if (!sec)
31349 /* If that is not the case passing NULL as the section's name to
31350 'get_named_section' will create a section with the declaration's
31351 section name. */
31352 sec = get_named_section (decl, NULL, 0);
31353
31354 /* Set the SHF_ARM_PURECODE attribute. */
31355 sec->common.flags |= SECTION_ARM_PURECODE;
31356
31357 return sec;
31358 }
31359
31360 /* Implements the TARGET_SECTION_FLAGS hook.
31361
31362 If DECL is a function declaration and pure-code is passed as an option
31363 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31364 section's name and RELOC indicates whether the declarations initializer may
31365 contain runtime relocations. */
31366
31367 static unsigned int
31368 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31369 {
31370 unsigned int flags = default_section_type_flags (decl, name, reloc);
31371
31372 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31373 flags |= SECTION_ARM_PURECODE;
31374
31375 return flags;
31376 }
31377
31378 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31379
31380 static void
31381 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31382 rtx op0, rtx op1,
31383 rtx *quot_p, rtx *rem_p)
31384 {
31385 if (mode == SImode)
31386 gcc_assert (!TARGET_IDIV);
31387
31388 scalar_int_mode libval_mode
31389 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31390
31391 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31392 libval_mode,
31393 op0, GET_MODE (op0),
31394 op1, GET_MODE (op1));
31395
31396 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31397 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31398 GET_MODE_SIZE (mode));
31399
31400 gcc_assert (quotient);
31401 gcc_assert (remainder);
31402
31403 *quot_p = quotient;
31404 *rem_p = remainder;
31405 }
31406
31407 /* This function checks for the availability of the coprocessor builtin passed
31408 in BUILTIN for the current target. Returns true if it is available and
31409 false otherwise. If a BUILTIN is passed for which this function has not
31410 been implemented it will cause an exception. */
31411
31412 bool
31413 arm_coproc_builtin_available (enum unspecv builtin)
31414 {
31415 /* None of these builtins are available in Thumb mode if the target only
31416 supports Thumb-1. */
31417 if (TARGET_THUMB1)
31418 return false;
31419
31420 switch (builtin)
31421 {
31422 case VUNSPEC_CDP:
31423 case VUNSPEC_LDC:
31424 case VUNSPEC_LDCL:
31425 case VUNSPEC_STC:
31426 case VUNSPEC_STCL:
31427 case VUNSPEC_MCR:
31428 case VUNSPEC_MRC:
31429 if (arm_arch4)
31430 return true;
31431 break;
31432 case VUNSPEC_CDP2:
31433 case VUNSPEC_LDC2:
31434 case VUNSPEC_LDC2L:
31435 case VUNSPEC_STC2:
31436 case VUNSPEC_STC2L:
31437 case VUNSPEC_MCR2:
31438 case VUNSPEC_MRC2:
31439 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31440 ARMv8-{A,M}. */
31441 if (arm_arch5t)
31442 return true;
31443 break;
31444 case VUNSPEC_MCRR:
31445 case VUNSPEC_MRRC:
31446 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31447 ARMv8-{A,M}. */
31448 if (arm_arch6 || arm_arch5te)
31449 return true;
31450 break;
31451 case VUNSPEC_MCRR2:
31452 case VUNSPEC_MRRC2:
31453 if (arm_arch6)
31454 return true;
31455 break;
31456 default:
31457 gcc_unreachable ();
31458 }
31459 return false;
31460 }
31461
31462 /* This function returns true if OP is a valid memory operand for the ldc and
31463 stc coprocessor instructions and false otherwise. */
31464
31465 bool
31466 arm_coproc_ldc_stc_legitimate_address (rtx op)
31467 {
31468 HOST_WIDE_INT range;
31469 /* Has to be a memory operand. */
31470 if (!MEM_P (op))
31471 return false;
31472
31473 op = XEXP (op, 0);
31474
31475 /* We accept registers. */
31476 if (REG_P (op))
31477 return true;
31478
31479 switch GET_CODE (op)
31480 {
31481 case PLUS:
31482 {
31483 /* Or registers with an offset. */
31484 if (!REG_P (XEXP (op, 0)))
31485 return false;
31486
31487 op = XEXP (op, 1);
31488
31489 /* The offset must be an immediate though. */
31490 if (!CONST_INT_P (op))
31491 return false;
31492
31493 range = INTVAL (op);
31494
31495 /* Within the range of [-1020,1020]. */
31496 if (!IN_RANGE (range, -1020, 1020))
31497 return false;
31498
31499 /* And a multiple of 4. */
31500 return (range % 4) == 0;
31501 }
31502 case PRE_INC:
31503 case POST_INC:
31504 case PRE_DEC:
31505 case POST_DEC:
31506 return REG_P (XEXP (op, 0));
31507 default:
31508 gcc_unreachable ();
31509 }
31510 return false;
31511 }
31512
31513 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31514
31515 In VFPv1, VFP registers could only be accessed in the mode they were
31516 set, so subregs would be invalid there. However, we don't support
31517 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31518
31519 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31520 VFP registers in little-endian order. We can't describe that accurately to
31521 GCC, so avoid taking subregs of such values.
31522
31523 The only exception is going from a 128-bit to a 64-bit type. In that
31524 case the data layout happens to be consistent for big-endian, so we
31525 explicitly allow that case. */
31526
31527 static bool
31528 arm_can_change_mode_class (machine_mode from, machine_mode to,
31529 reg_class_t rclass)
31530 {
31531 if (TARGET_BIG_END
31532 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31533 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31534 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31535 && reg_classes_intersect_p (VFP_REGS, rclass))
31536 return false;
31537 return true;
31538 }
31539
31540 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31541 strcpy from constants will be faster. */
31542
31543 static HOST_WIDE_INT
31544 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31545 {
31546 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31547 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31548 return MAX (align, BITS_PER_WORD * factor);
31549 return align;
31550 }
31551
31552 /* Emit a speculation barrier on target architectures that do not have
31553 DSB/ISB directly. Such systems probably don't need a barrier
31554 themselves, but if the code is ever run on a later architecture, it
31555 might become a problem. */
31556 void
31557 arm_emit_speculation_barrier_function ()
31558 {
31559 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
31560 }
31561
31562 #if CHECKING_P
31563 namespace selftest {
31564
31565 /* Scan the static data tables generated by parsecpu.awk looking for
31566 potential issues with the data. We primarily check for
31567 inconsistencies in the option extensions at present (extensions
31568 that duplicate others but aren't marked as aliases). Furthermore,
31569 for correct canonicalization later options must never be a subset
31570 of an earlier option. Any extension should also only specify other
31571 feature bits and never an architecture bit. The architecture is inferred
31572 from the declaration of the extension. */
31573 static void
31574 arm_test_cpu_arch_data (void)
31575 {
31576 const arch_option *arch;
31577 const cpu_option *cpu;
31578 auto_sbitmap target_isa (isa_num_bits);
31579 auto_sbitmap isa1 (isa_num_bits);
31580 auto_sbitmap isa2 (isa_num_bits);
31581
31582 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31583 {
31584 const cpu_arch_extension *ext1, *ext2;
31585
31586 if (arch->common.extensions == NULL)
31587 continue;
31588
31589 arm_initialize_isa (target_isa, arch->common.isa_bits);
31590
31591 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31592 {
31593 if (ext1->alias)
31594 continue;
31595
31596 arm_initialize_isa (isa1, ext1->isa_bits);
31597 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31598 {
31599 if (ext2->alias || ext1->remove != ext2->remove)
31600 continue;
31601
31602 arm_initialize_isa (isa2, ext2->isa_bits);
31603 /* If the option is a subset of the parent option, it doesn't
31604 add anything and so isn't useful. */
31605 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31606
31607 /* If the extension specifies any architectural bits then
31608 disallow it. Extensions should only specify feature bits. */
31609 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31610 }
31611 }
31612 }
31613
31614 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31615 {
31616 const cpu_arch_extension *ext1, *ext2;
31617
31618 if (cpu->common.extensions == NULL)
31619 continue;
31620
31621 arm_initialize_isa (target_isa, arch->common.isa_bits);
31622
31623 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31624 {
31625 if (ext1->alias)
31626 continue;
31627
31628 arm_initialize_isa (isa1, ext1->isa_bits);
31629 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31630 {
31631 if (ext2->alias || ext1->remove != ext2->remove)
31632 continue;
31633
31634 arm_initialize_isa (isa2, ext2->isa_bits);
31635 /* If the option is a subset of the parent option, it doesn't
31636 add anything and so isn't useful. */
31637 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31638
31639 /* If the extension specifies any architectural bits then
31640 disallow it. Extensions should only specify feature bits. */
31641 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31642 }
31643 }
31644 }
31645 }
31646
31647 /* Scan the static data tables generated by parsecpu.awk looking for
31648 potential issues with the data. Here we check for consistency between the
31649 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31650 a feature bit that is not defined by any FPU flag. */
31651 static void
31652 arm_test_fpu_data (void)
31653 {
31654 auto_sbitmap isa_all_fpubits (isa_num_bits);
31655 auto_sbitmap fpubits (isa_num_bits);
31656 auto_sbitmap tmpset (isa_num_bits);
31657
31658 static const enum isa_feature fpu_bitlist[]
31659 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31660 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31661
31662 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31663 {
31664 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31665 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31666 bitmap_clear (isa_all_fpubits);
31667 bitmap_copy (isa_all_fpubits, tmpset);
31668 }
31669
31670 if (!bitmap_empty_p (isa_all_fpubits))
31671 {
31672 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31673 " group that are not defined by any FPU.\n"
31674 " Check your arm-cpus.in.\n");
31675 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31676 }
31677 }
31678
31679 static void
31680 arm_run_selftests (void)
31681 {
31682 arm_test_cpu_arch_data ();
31683 arm_test_fpu_data ();
31684 }
31685 } /* Namespace selftest. */
31686
31687 #undef TARGET_RUN_TARGET_SELFTESTS
31688 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31689 #endif /* CHECKING_P */
31690
31691 struct gcc_target targetm = TARGET_INITIALIZER;
31692
31693 #include "gt-arm.h"