]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
Use function_arg_info for TARGET_PASS_BY_REFERENCE
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
80
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t,
191 const function_arg_info &);
192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
193 const_tree, bool);
194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
195 const_tree, bool);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 const function_arg_info &);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
232
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 const_tree type,
286 int misalignment,
287 bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static void arm_autovectorize_vector_sizes (vector_sizes *, bool);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 const vec_perm_indices &);
299
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
310
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 \f
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
331 {
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
343 NULL },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
346 NULL },
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute, NULL },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
356
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
360 */
361 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
362 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute, NULL },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
367 NULL },
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute, NULL },
372 #endif
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry, NULL },
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call, NULL },
378 { NULL, 0, 0, false, false, false, false, NULL, NULL }
379 };
380 \f
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
386
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
389
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
392
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
395
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
400
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
405
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
412
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
415
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
418
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
421
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
424
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
427
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
430
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
433
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
436
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
439
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
442
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
445
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
448
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
451
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
454
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
457
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
460
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
463
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
466
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
469
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
472
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
475
476 #undef TARGET_ENCODE_SECTION_INFO
477 #ifdef ARM_PE
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
479 #else
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
481 #endif
482
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
485
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
488
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
491
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
494
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
497
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
500
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
503
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
513
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
525
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
528
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
535
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
538
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
555
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
558
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
561
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
568
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
571
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
574
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
577
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
580
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
583
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
586
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
589
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
592
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
595
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
598
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
601
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
605
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
608
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
611
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
614
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
617
618 #if ARM_UNWIND_INFO
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
621
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
625
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
628
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
631
632 #endif /* ARM_UNWIND_INFO */
633
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
636
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
639
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
642
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
646 #endif
647
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
650
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
653
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
656
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
659
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
665
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
668
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671 arm_first_cycle_multipass_dfa_lookahead
672
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675 arm_first_cycle_multipass_dfa_lookahead_guard
676
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
679
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
682
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
689
690 #ifdef HAVE_AS_TLS
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
693 #endif
694
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
697
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
700
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
703
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
706
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
709
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
712
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
715
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
718
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
721
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
724
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727 arm_builtin_vectorized_function
728
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
731
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734 arm_vector_alignment_reachable
735
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738 arm_builtin_support_vector_misalignment
739
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742 arm_preferred_rename_class
743
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
746
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749 arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
752
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755 arm_canonicalize_comparison
756
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
759
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
762
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
765
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
768
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
771
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
774
775 #undef TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
777
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
780
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
783
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
786
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
789
790 /* Although the architecture reserves bits 0 and 1, only the former is
791 used for ARM/Thumb ISA selection in v7 and earlier versions. */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
794
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
797
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
802
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
805
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
808
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
811 \f
812 /* Obstack for minipool constant handling. */
813 static struct obstack minipool_obstack;
814 static char * minipool_startobj;
815
816 /* The maximum number of insns skipped which
817 will be conditionalised if possible. */
818 static int max_insns_skipped = 5;
819
820 extern FILE * asm_out_file;
821
822 /* True if we are currently building a constant table. */
823 int making_const_table;
824
825 /* The processor for which instructions should be scheduled. */
826 enum processor_type arm_tune = TARGET_CPU_arm_none;
827
828 /* The current tuning set. */
829 const struct tune_params *current_tune;
830
831 /* Which floating point hardware to schedule for. */
832 int arm_fpu_attr;
833
834 /* Used for Thumb call_via trampolines. */
835 rtx thumb_call_via_label[14];
836 static int thumb_call_reg_needed;
837
838 /* The bits in this mask specify which instruction scheduling options should
839 be used. */
840 unsigned int tune_flags = 0;
841
842 /* The highest ARM architecture version supported by the
843 target. */
844 enum base_architecture arm_base_arch = BASE_ARCH_0;
845
846 /* Active target architecture and tuning. */
847
848 struct arm_build_target arm_active_target;
849
850 /* The following are used in the arm.md file as equivalents to bits
851 in the above two flag variables. */
852
853 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
854 int arm_arch4 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
857 int arm_arch4t = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
860 int arm_arch5t = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
863 int arm_arch5te = 0;
864
865 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
866 int arm_arch6 = 0;
867
868 /* Nonzero if this chip supports the ARM 6K extensions. */
869 int arm_arch6k = 0;
870
871 /* Nonzero if this chip supports the ARM 6KZ extensions. */
872 int arm_arch6kz = 0;
873
874 /* Nonzero if instructions present in ARMv6-M can be used. */
875 int arm_arch6m = 0;
876
877 /* Nonzero if this chip supports the ARM 7 extensions. */
878 int arm_arch7 = 0;
879
880 /* Nonzero if this chip supports the Large Physical Address Extension. */
881 int arm_arch_lpae = 0;
882
883 /* Nonzero if instructions not present in the 'M' profile can be used. */
884 int arm_arch_notm = 0;
885
886 /* Nonzero if instructions present in ARMv7E-M can be used. */
887 int arm_arch7em = 0;
888
889 /* Nonzero if instructions present in ARMv8 can be used. */
890 int arm_arch8 = 0;
891
892 /* Nonzero if this chip supports the ARMv8.1 extensions. */
893 int arm_arch8_1 = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
896 int arm_arch8_2 = 0;
897
898 /* Nonzero if this chip supports the ARM Architecture 8.3 extensions. */
899 int arm_arch8_3 = 0;
900
901 /* Nonzero if this chip supports the ARM Architecture 8.4 extensions. */
902 int arm_arch8_4 = 0;
903
904 /* Nonzero if this chip supports the FP16 instructions extension of ARM
905 Architecture 8.2. */
906 int arm_fp16_inst = 0;
907
908 /* Nonzero if this chip can benefit from load scheduling. */
909 int arm_ld_sched = 0;
910
911 /* Nonzero if this chip is a StrongARM. */
912 int arm_tune_strongarm = 0;
913
914 /* Nonzero if this chip supports Intel Wireless MMX technology. */
915 int arm_arch_iwmmxt = 0;
916
917 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
918 int arm_arch_iwmmxt2 = 0;
919
920 /* Nonzero if this chip is an XScale. */
921 int arm_arch_xscale = 0;
922
923 /* Nonzero if tuning for XScale */
924 int arm_tune_xscale = 0;
925
926 /* Nonzero if we want to tune for stores that access the write-buffer.
927 This typically means an ARM6 or ARM7 with MMU or MPU. */
928 int arm_tune_wbuf = 0;
929
930 /* Nonzero if tuning for Cortex-A9. */
931 int arm_tune_cortex_a9 = 0;
932
933 /* Nonzero if we should define __THUMB_INTERWORK__ in the
934 preprocessor.
935 XXX This is a bit of a hack, it's intended to help work around
936 problems in GLD which doesn't understand that armv5t code is
937 interworking clean. */
938 int arm_cpp_interwork = 0;
939
940 /* Nonzero if chip supports Thumb 1. */
941 int arm_arch_thumb1;
942
943 /* Nonzero if chip supports Thumb 2. */
944 int arm_arch_thumb2;
945
946 /* Nonzero if chip supports integer division instruction. */
947 int arm_arch_arm_hwdiv;
948 int arm_arch_thumb_hwdiv;
949
950 /* Nonzero if chip disallows volatile memory access in IT block. */
951 int arm_arch_no_volatile_ce;
952
953 /* Nonzero if we should use Neon to handle 64-bits operations rather
954 than core registers. */
955 int prefer_neon_for_64bits = 0;
956
957 /* Nonzero if we shouldn't use literal pools. */
958 bool arm_disable_literal_pool = false;
959
960 /* The register number to be used for the PIC offset register. */
961 unsigned arm_pic_register = INVALID_REGNUM;
962
963 enum arm_pcs arm_pcs_default;
964
965 /* For an explanation of these variables, see final_prescan_insn below. */
966 int arm_ccfsm_state;
967 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
968 enum arm_cond_code arm_current_cc;
969
970 rtx arm_target_insn;
971 int arm_target_label;
972 /* The number of conditionally executed insns, including the current insn. */
973 int arm_condexec_count = 0;
974 /* A bitmask specifying the patterns for the IT block.
975 Zero means do not output an IT block before this insn. */
976 int arm_condexec_mask = 0;
977 /* The number of bits used in arm_condexec_mask. */
978 int arm_condexec_masklen = 0;
979
980 /* Nonzero if chip supports the ARMv8 CRC instructions. */
981 int arm_arch_crc = 0;
982
983 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
984 int arm_arch_dotprod = 0;
985
986 /* Nonzero if chip supports the ARMv8-M security extensions. */
987 int arm_arch_cmse = 0;
988
989 /* Nonzero if the core has a very small, high-latency, multiply unit. */
990 int arm_m_profile_small_mul = 0;
991
992 /* The condition codes of the ARM, and the inverse function. */
993 static const char * const arm_condition_codes[] =
994 {
995 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
996 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
997 };
998
999 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
1000 int arm_regs_in_sequence[] =
1001 {
1002 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
1003 };
1004
1005 #define ARM_LSL_NAME "lsl"
1006 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1007
1008 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1009 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1010 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1011 \f
1012 /* Initialization code. */
1013
1014 struct cpu_tune
1015 {
1016 enum processor_type scheduler;
1017 unsigned int tune_flags;
1018 const struct tune_params *tune;
1019 };
1020
1021 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1022 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1023 { \
1024 num_slots, \
1025 l1_size, \
1026 l1_line_size \
1027 }
1028
1029 /* arm generic vectorizer costs. */
1030 static const
1031 struct cpu_vec_costs arm_default_vec_cost = {
1032 1, /* scalar_stmt_cost. */
1033 1, /* scalar load_cost. */
1034 1, /* scalar_store_cost. */
1035 1, /* vec_stmt_cost. */
1036 1, /* vec_to_scalar_cost. */
1037 1, /* scalar_to_vec_cost. */
1038 1, /* vec_align_load_cost. */
1039 1, /* vec_unalign_load_cost. */
1040 1, /* vec_unalign_store_cost. */
1041 1, /* vec_store_cost. */
1042 3, /* cond_taken_branch_cost. */
1043 1, /* cond_not_taken_branch_cost. */
1044 };
1045
1046 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1047 #include "aarch-cost-tables.h"
1048
1049
1050
1051 const struct cpu_cost_table cortexa9_extra_costs =
1052 {
1053 /* ALU */
1054 {
1055 0, /* arith. */
1056 0, /* logical. */
1057 0, /* shift. */
1058 COSTS_N_INSNS (1), /* shift_reg. */
1059 COSTS_N_INSNS (1), /* arith_shift. */
1060 COSTS_N_INSNS (2), /* arith_shift_reg. */
1061 0, /* log_shift. */
1062 COSTS_N_INSNS (1), /* log_shift_reg. */
1063 COSTS_N_INSNS (1), /* extend. */
1064 COSTS_N_INSNS (2), /* extend_arith. */
1065 COSTS_N_INSNS (1), /* bfi. */
1066 COSTS_N_INSNS (1), /* bfx. */
1067 0, /* clz. */
1068 0, /* rev. */
1069 0, /* non_exec. */
1070 true /* non_exec_costs_exec. */
1071 },
1072 {
1073 /* MULT SImode */
1074 {
1075 COSTS_N_INSNS (3), /* simple. */
1076 COSTS_N_INSNS (3), /* flag_setting. */
1077 COSTS_N_INSNS (2), /* extend. */
1078 COSTS_N_INSNS (3), /* add. */
1079 COSTS_N_INSNS (2), /* extend_add. */
1080 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1081 },
1082 /* MULT DImode */
1083 {
1084 0, /* simple (N/A). */
1085 0, /* flag_setting (N/A). */
1086 COSTS_N_INSNS (4), /* extend. */
1087 0, /* add (N/A). */
1088 COSTS_N_INSNS (4), /* extend_add. */
1089 0 /* idiv (N/A). */
1090 }
1091 },
1092 /* LD/ST */
1093 {
1094 COSTS_N_INSNS (2), /* load. */
1095 COSTS_N_INSNS (2), /* load_sign_extend. */
1096 COSTS_N_INSNS (2), /* ldrd. */
1097 COSTS_N_INSNS (2), /* ldm_1st. */
1098 1, /* ldm_regs_per_insn_1st. */
1099 2, /* ldm_regs_per_insn_subsequent. */
1100 COSTS_N_INSNS (5), /* loadf. */
1101 COSTS_N_INSNS (5), /* loadd. */
1102 COSTS_N_INSNS (1), /* load_unaligned. */
1103 COSTS_N_INSNS (2), /* store. */
1104 COSTS_N_INSNS (2), /* strd. */
1105 COSTS_N_INSNS (2), /* stm_1st. */
1106 1, /* stm_regs_per_insn_1st. */
1107 2, /* stm_regs_per_insn_subsequent. */
1108 COSTS_N_INSNS (1), /* storef. */
1109 COSTS_N_INSNS (1), /* stored. */
1110 COSTS_N_INSNS (1), /* store_unaligned. */
1111 COSTS_N_INSNS (1), /* loadv. */
1112 COSTS_N_INSNS (1) /* storev. */
1113 },
1114 {
1115 /* FP SFmode */
1116 {
1117 COSTS_N_INSNS (14), /* div. */
1118 COSTS_N_INSNS (4), /* mult. */
1119 COSTS_N_INSNS (7), /* mult_addsub. */
1120 COSTS_N_INSNS (30), /* fma. */
1121 COSTS_N_INSNS (3), /* addsub. */
1122 COSTS_N_INSNS (1), /* fpconst. */
1123 COSTS_N_INSNS (1), /* neg. */
1124 COSTS_N_INSNS (3), /* compare. */
1125 COSTS_N_INSNS (3), /* widen. */
1126 COSTS_N_INSNS (3), /* narrow. */
1127 COSTS_N_INSNS (3), /* toint. */
1128 COSTS_N_INSNS (3), /* fromint. */
1129 COSTS_N_INSNS (3) /* roundint. */
1130 },
1131 /* FP DFmode */
1132 {
1133 COSTS_N_INSNS (24), /* div. */
1134 COSTS_N_INSNS (5), /* mult. */
1135 COSTS_N_INSNS (8), /* mult_addsub. */
1136 COSTS_N_INSNS (30), /* fma. */
1137 COSTS_N_INSNS (3), /* addsub. */
1138 COSTS_N_INSNS (1), /* fpconst. */
1139 COSTS_N_INSNS (1), /* neg. */
1140 COSTS_N_INSNS (3), /* compare. */
1141 COSTS_N_INSNS (3), /* widen. */
1142 COSTS_N_INSNS (3), /* narrow. */
1143 COSTS_N_INSNS (3), /* toint. */
1144 COSTS_N_INSNS (3), /* fromint. */
1145 COSTS_N_INSNS (3) /* roundint. */
1146 }
1147 },
1148 /* Vector */
1149 {
1150 COSTS_N_INSNS (1) /* alu. */
1151 }
1152 };
1153
1154 const struct cpu_cost_table cortexa8_extra_costs =
1155 {
1156 /* ALU */
1157 {
1158 0, /* arith. */
1159 0, /* logical. */
1160 COSTS_N_INSNS (1), /* shift. */
1161 0, /* shift_reg. */
1162 COSTS_N_INSNS (1), /* arith_shift. */
1163 0, /* arith_shift_reg. */
1164 COSTS_N_INSNS (1), /* log_shift. */
1165 0, /* log_shift_reg. */
1166 0, /* extend. */
1167 0, /* extend_arith. */
1168 0, /* bfi. */
1169 0, /* bfx. */
1170 0, /* clz. */
1171 0, /* rev. */
1172 0, /* non_exec. */
1173 true /* non_exec_costs_exec. */
1174 },
1175 {
1176 /* MULT SImode */
1177 {
1178 COSTS_N_INSNS (1), /* simple. */
1179 COSTS_N_INSNS (1), /* flag_setting. */
1180 COSTS_N_INSNS (1), /* extend. */
1181 COSTS_N_INSNS (1), /* add. */
1182 COSTS_N_INSNS (1), /* extend_add. */
1183 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1184 },
1185 /* MULT DImode */
1186 {
1187 0, /* simple (N/A). */
1188 0, /* flag_setting (N/A). */
1189 COSTS_N_INSNS (2), /* extend. */
1190 0, /* add (N/A). */
1191 COSTS_N_INSNS (2), /* extend_add. */
1192 0 /* idiv (N/A). */
1193 }
1194 },
1195 /* LD/ST */
1196 {
1197 COSTS_N_INSNS (1), /* load. */
1198 COSTS_N_INSNS (1), /* load_sign_extend. */
1199 COSTS_N_INSNS (1), /* ldrd. */
1200 COSTS_N_INSNS (1), /* ldm_1st. */
1201 1, /* ldm_regs_per_insn_1st. */
1202 2, /* ldm_regs_per_insn_subsequent. */
1203 COSTS_N_INSNS (1), /* loadf. */
1204 COSTS_N_INSNS (1), /* loadd. */
1205 COSTS_N_INSNS (1), /* load_unaligned. */
1206 COSTS_N_INSNS (1), /* store. */
1207 COSTS_N_INSNS (1), /* strd. */
1208 COSTS_N_INSNS (1), /* stm_1st. */
1209 1, /* stm_regs_per_insn_1st. */
1210 2, /* stm_regs_per_insn_subsequent. */
1211 COSTS_N_INSNS (1), /* storef. */
1212 COSTS_N_INSNS (1), /* stored. */
1213 COSTS_N_INSNS (1), /* store_unaligned. */
1214 COSTS_N_INSNS (1), /* loadv. */
1215 COSTS_N_INSNS (1) /* storev. */
1216 },
1217 {
1218 /* FP SFmode */
1219 {
1220 COSTS_N_INSNS (36), /* div. */
1221 COSTS_N_INSNS (11), /* mult. */
1222 COSTS_N_INSNS (20), /* mult_addsub. */
1223 COSTS_N_INSNS (30), /* fma. */
1224 COSTS_N_INSNS (9), /* addsub. */
1225 COSTS_N_INSNS (3), /* fpconst. */
1226 COSTS_N_INSNS (3), /* neg. */
1227 COSTS_N_INSNS (6), /* compare. */
1228 COSTS_N_INSNS (4), /* widen. */
1229 COSTS_N_INSNS (4), /* narrow. */
1230 COSTS_N_INSNS (8), /* toint. */
1231 COSTS_N_INSNS (8), /* fromint. */
1232 COSTS_N_INSNS (8) /* roundint. */
1233 },
1234 /* FP DFmode */
1235 {
1236 COSTS_N_INSNS (64), /* div. */
1237 COSTS_N_INSNS (16), /* mult. */
1238 COSTS_N_INSNS (25), /* mult_addsub. */
1239 COSTS_N_INSNS (30), /* fma. */
1240 COSTS_N_INSNS (9), /* addsub. */
1241 COSTS_N_INSNS (3), /* fpconst. */
1242 COSTS_N_INSNS (3), /* neg. */
1243 COSTS_N_INSNS (6), /* compare. */
1244 COSTS_N_INSNS (6), /* widen. */
1245 COSTS_N_INSNS (6), /* narrow. */
1246 COSTS_N_INSNS (8), /* toint. */
1247 COSTS_N_INSNS (8), /* fromint. */
1248 COSTS_N_INSNS (8) /* roundint. */
1249 }
1250 },
1251 /* Vector */
1252 {
1253 COSTS_N_INSNS (1) /* alu. */
1254 }
1255 };
1256
1257 const struct cpu_cost_table cortexa5_extra_costs =
1258 {
1259 /* ALU */
1260 {
1261 0, /* arith. */
1262 0, /* logical. */
1263 COSTS_N_INSNS (1), /* shift. */
1264 COSTS_N_INSNS (1), /* shift_reg. */
1265 COSTS_N_INSNS (1), /* arith_shift. */
1266 COSTS_N_INSNS (1), /* arith_shift_reg. */
1267 COSTS_N_INSNS (1), /* log_shift. */
1268 COSTS_N_INSNS (1), /* log_shift_reg. */
1269 COSTS_N_INSNS (1), /* extend. */
1270 COSTS_N_INSNS (1), /* extend_arith. */
1271 COSTS_N_INSNS (1), /* bfi. */
1272 COSTS_N_INSNS (1), /* bfx. */
1273 COSTS_N_INSNS (1), /* clz. */
1274 COSTS_N_INSNS (1), /* rev. */
1275 0, /* non_exec. */
1276 true /* non_exec_costs_exec. */
1277 },
1278
1279 {
1280 /* MULT SImode */
1281 {
1282 0, /* simple. */
1283 COSTS_N_INSNS (1), /* flag_setting. */
1284 COSTS_N_INSNS (1), /* extend. */
1285 COSTS_N_INSNS (1), /* add. */
1286 COSTS_N_INSNS (1), /* extend_add. */
1287 COSTS_N_INSNS (7) /* idiv. */
1288 },
1289 /* MULT DImode */
1290 {
1291 0, /* simple (N/A). */
1292 0, /* flag_setting (N/A). */
1293 COSTS_N_INSNS (1), /* extend. */
1294 0, /* add. */
1295 COSTS_N_INSNS (2), /* extend_add. */
1296 0 /* idiv (N/A). */
1297 }
1298 },
1299 /* LD/ST */
1300 {
1301 COSTS_N_INSNS (1), /* load. */
1302 COSTS_N_INSNS (1), /* load_sign_extend. */
1303 COSTS_N_INSNS (6), /* ldrd. */
1304 COSTS_N_INSNS (1), /* ldm_1st. */
1305 1, /* ldm_regs_per_insn_1st. */
1306 2, /* ldm_regs_per_insn_subsequent. */
1307 COSTS_N_INSNS (2), /* loadf. */
1308 COSTS_N_INSNS (4), /* loadd. */
1309 COSTS_N_INSNS (1), /* load_unaligned. */
1310 COSTS_N_INSNS (1), /* store. */
1311 COSTS_N_INSNS (3), /* strd. */
1312 COSTS_N_INSNS (1), /* stm_1st. */
1313 1, /* stm_regs_per_insn_1st. */
1314 2, /* stm_regs_per_insn_subsequent. */
1315 COSTS_N_INSNS (2), /* storef. */
1316 COSTS_N_INSNS (2), /* stored. */
1317 COSTS_N_INSNS (1), /* store_unaligned. */
1318 COSTS_N_INSNS (1), /* loadv. */
1319 COSTS_N_INSNS (1) /* storev. */
1320 },
1321 {
1322 /* FP SFmode */
1323 {
1324 COSTS_N_INSNS (15), /* div. */
1325 COSTS_N_INSNS (3), /* mult. */
1326 COSTS_N_INSNS (7), /* mult_addsub. */
1327 COSTS_N_INSNS (7), /* fma. */
1328 COSTS_N_INSNS (3), /* addsub. */
1329 COSTS_N_INSNS (3), /* fpconst. */
1330 COSTS_N_INSNS (3), /* neg. */
1331 COSTS_N_INSNS (3), /* compare. */
1332 COSTS_N_INSNS (3), /* widen. */
1333 COSTS_N_INSNS (3), /* narrow. */
1334 COSTS_N_INSNS (3), /* toint. */
1335 COSTS_N_INSNS (3), /* fromint. */
1336 COSTS_N_INSNS (3) /* roundint. */
1337 },
1338 /* FP DFmode */
1339 {
1340 COSTS_N_INSNS (30), /* div. */
1341 COSTS_N_INSNS (6), /* mult. */
1342 COSTS_N_INSNS (10), /* mult_addsub. */
1343 COSTS_N_INSNS (7), /* fma. */
1344 COSTS_N_INSNS (3), /* addsub. */
1345 COSTS_N_INSNS (3), /* fpconst. */
1346 COSTS_N_INSNS (3), /* neg. */
1347 COSTS_N_INSNS (3), /* compare. */
1348 COSTS_N_INSNS (3), /* widen. */
1349 COSTS_N_INSNS (3), /* narrow. */
1350 COSTS_N_INSNS (3), /* toint. */
1351 COSTS_N_INSNS (3), /* fromint. */
1352 COSTS_N_INSNS (3) /* roundint. */
1353 }
1354 },
1355 /* Vector */
1356 {
1357 COSTS_N_INSNS (1) /* alu. */
1358 }
1359 };
1360
1361
1362 const struct cpu_cost_table cortexa7_extra_costs =
1363 {
1364 /* ALU */
1365 {
1366 0, /* arith. */
1367 0, /* logical. */
1368 COSTS_N_INSNS (1), /* shift. */
1369 COSTS_N_INSNS (1), /* shift_reg. */
1370 COSTS_N_INSNS (1), /* arith_shift. */
1371 COSTS_N_INSNS (1), /* arith_shift_reg. */
1372 COSTS_N_INSNS (1), /* log_shift. */
1373 COSTS_N_INSNS (1), /* log_shift_reg. */
1374 COSTS_N_INSNS (1), /* extend. */
1375 COSTS_N_INSNS (1), /* extend_arith. */
1376 COSTS_N_INSNS (1), /* bfi. */
1377 COSTS_N_INSNS (1), /* bfx. */
1378 COSTS_N_INSNS (1), /* clz. */
1379 COSTS_N_INSNS (1), /* rev. */
1380 0, /* non_exec. */
1381 true /* non_exec_costs_exec. */
1382 },
1383
1384 {
1385 /* MULT SImode */
1386 {
1387 0, /* simple. */
1388 COSTS_N_INSNS (1), /* flag_setting. */
1389 COSTS_N_INSNS (1), /* extend. */
1390 COSTS_N_INSNS (1), /* add. */
1391 COSTS_N_INSNS (1), /* extend_add. */
1392 COSTS_N_INSNS (7) /* idiv. */
1393 },
1394 /* MULT DImode */
1395 {
1396 0, /* simple (N/A). */
1397 0, /* flag_setting (N/A). */
1398 COSTS_N_INSNS (1), /* extend. */
1399 0, /* add. */
1400 COSTS_N_INSNS (2), /* extend_add. */
1401 0 /* idiv (N/A). */
1402 }
1403 },
1404 /* LD/ST */
1405 {
1406 COSTS_N_INSNS (1), /* load. */
1407 COSTS_N_INSNS (1), /* load_sign_extend. */
1408 COSTS_N_INSNS (3), /* ldrd. */
1409 COSTS_N_INSNS (1), /* ldm_1st. */
1410 1, /* ldm_regs_per_insn_1st. */
1411 2, /* ldm_regs_per_insn_subsequent. */
1412 COSTS_N_INSNS (2), /* loadf. */
1413 COSTS_N_INSNS (2), /* loadd. */
1414 COSTS_N_INSNS (1), /* load_unaligned. */
1415 COSTS_N_INSNS (1), /* store. */
1416 COSTS_N_INSNS (3), /* strd. */
1417 COSTS_N_INSNS (1), /* stm_1st. */
1418 1, /* stm_regs_per_insn_1st. */
1419 2, /* stm_regs_per_insn_subsequent. */
1420 COSTS_N_INSNS (2), /* storef. */
1421 COSTS_N_INSNS (2), /* stored. */
1422 COSTS_N_INSNS (1), /* store_unaligned. */
1423 COSTS_N_INSNS (1), /* loadv. */
1424 COSTS_N_INSNS (1) /* storev. */
1425 },
1426 {
1427 /* FP SFmode */
1428 {
1429 COSTS_N_INSNS (15), /* div. */
1430 COSTS_N_INSNS (3), /* mult. */
1431 COSTS_N_INSNS (7), /* mult_addsub. */
1432 COSTS_N_INSNS (7), /* fma. */
1433 COSTS_N_INSNS (3), /* addsub. */
1434 COSTS_N_INSNS (3), /* fpconst. */
1435 COSTS_N_INSNS (3), /* neg. */
1436 COSTS_N_INSNS (3), /* compare. */
1437 COSTS_N_INSNS (3), /* widen. */
1438 COSTS_N_INSNS (3), /* narrow. */
1439 COSTS_N_INSNS (3), /* toint. */
1440 COSTS_N_INSNS (3), /* fromint. */
1441 COSTS_N_INSNS (3) /* roundint. */
1442 },
1443 /* FP DFmode */
1444 {
1445 COSTS_N_INSNS (30), /* div. */
1446 COSTS_N_INSNS (6), /* mult. */
1447 COSTS_N_INSNS (10), /* mult_addsub. */
1448 COSTS_N_INSNS (7), /* fma. */
1449 COSTS_N_INSNS (3), /* addsub. */
1450 COSTS_N_INSNS (3), /* fpconst. */
1451 COSTS_N_INSNS (3), /* neg. */
1452 COSTS_N_INSNS (3), /* compare. */
1453 COSTS_N_INSNS (3), /* widen. */
1454 COSTS_N_INSNS (3), /* narrow. */
1455 COSTS_N_INSNS (3), /* toint. */
1456 COSTS_N_INSNS (3), /* fromint. */
1457 COSTS_N_INSNS (3) /* roundint. */
1458 }
1459 },
1460 /* Vector */
1461 {
1462 COSTS_N_INSNS (1) /* alu. */
1463 }
1464 };
1465
1466 const struct cpu_cost_table cortexa12_extra_costs =
1467 {
1468 /* ALU */
1469 {
1470 0, /* arith. */
1471 0, /* logical. */
1472 0, /* shift. */
1473 COSTS_N_INSNS (1), /* shift_reg. */
1474 COSTS_N_INSNS (1), /* arith_shift. */
1475 COSTS_N_INSNS (1), /* arith_shift_reg. */
1476 COSTS_N_INSNS (1), /* log_shift. */
1477 COSTS_N_INSNS (1), /* log_shift_reg. */
1478 0, /* extend. */
1479 COSTS_N_INSNS (1), /* extend_arith. */
1480 0, /* bfi. */
1481 COSTS_N_INSNS (1), /* bfx. */
1482 COSTS_N_INSNS (1), /* clz. */
1483 COSTS_N_INSNS (1), /* rev. */
1484 0, /* non_exec. */
1485 true /* non_exec_costs_exec. */
1486 },
1487 /* MULT SImode */
1488 {
1489 {
1490 COSTS_N_INSNS (2), /* simple. */
1491 COSTS_N_INSNS (3), /* flag_setting. */
1492 COSTS_N_INSNS (2), /* extend. */
1493 COSTS_N_INSNS (3), /* add. */
1494 COSTS_N_INSNS (2), /* extend_add. */
1495 COSTS_N_INSNS (18) /* idiv. */
1496 },
1497 /* MULT DImode */
1498 {
1499 0, /* simple (N/A). */
1500 0, /* flag_setting (N/A). */
1501 COSTS_N_INSNS (3), /* extend. */
1502 0, /* add (N/A). */
1503 COSTS_N_INSNS (3), /* extend_add. */
1504 0 /* idiv (N/A). */
1505 }
1506 },
1507 /* LD/ST */
1508 {
1509 COSTS_N_INSNS (3), /* load. */
1510 COSTS_N_INSNS (3), /* load_sign_extend. */
1511 COSTS_N_INSNS (3), /* ldrd. */
1512 COSTS_N_INSNS (3), /* ldm_1st. */
1513 1, /* ldm_regs_per_insn_1st. */
1514 2, /* ldm_regs_per_insn_subsequent. */
1515 COSTS_N_INSNS (3), /* loadf. */
1516 COSTS_N_INSNS (3), /* loadd. */
1517 0, /* load_unaligned. */
1518 0, /* store. */
1519 0, /* strd. */
1520 0, /* stm_1st. */
1521 1, /* stm_regs_per_insn_1st. */
1522 2, /* stm_regs_per_insn_subsequent. */
1523 COSTS_N_INSNS (2), /* storef. */
1524 COSTS_N_INSNS (2), /* stored. */
1525 0, /* store_unaligned. */
1526 COSTS_N_INSNS (1), /* loadv. */
1527 COSTS_N_INSNS (1) /* storev. */
1528 },
1529 {
1530 /* FP SFmode */
1531 {
1532 COSTS_N_INSNS (17), /* div. */
1533 COSTS_N_INSNS (4), /* mult. */
1534 COSTS_N_INSNS (8), /* mult_addsub. */
1535 COSTS_N_INSNS (8), /* fma. */
1536 COSTS_N_INSNS (4), /* addsub. */
1537 COSTS_N_INSNS (2), /* fpconst. */
1538 COSTS_N_INSNS (2), /* neg. */
1539 COSTS_N_INSNS (2), /* compare. */
1540 COSTS_N_INSNS (4), /* widen. */
1541 COSTS_N_INSNS (4), /* narrow. */
1542 COSTS_N_INSNS (4), /* toint. */
1543 COSTS_N_INSNS (4), /* fromint. */
1544 COSTS_N_INSNS (4) /* roundint. */
1545 },
1546 /* FP DFmode */
1547 {
1548 COSTS_N_INSNS (31), /* div. */
1549 COSTS_N_INSNS (4), /* mult. */
1550 COSTS_N_INSNS (8), /* mult_addsub. */
1551 COSTS_N_INSNS (8), /* fma. */
1552 COSTS_N_INSNS (4), /* addsub. */
1553 COSTS_N_INSNS (2), /* fpconst. */
1554 COSTS_N_INSNS (2), /* neg. */
1555 COSTS_N_INSNS (2), /* compare. */
1556 COSTS_N_INSNS (4), /* widen. */
1557 COSTS_N_INSNS (4), /* narrow. */
1558 COSTS_N_INSNS (4), /* toint. */
1559 COSTS_N_INSNS (4), /* fromint. */
1560 COSTS_N_INSNS (4) /* roundint. */
1561 }
1562 },
1563 /* Vector */
1564 {
1565 COSTS_N_INSNS (1) /* alu. */
1566 }
1567 };
1568
1569 const struct cpu_cost_table cortexa15_extra_costs =
1570 {
1571 /* ALU */
1572 {
1573 0, /* arith. */
1574 0, /* logical. */
1575 0, /* shift. */
1576 0, /* shift_reg. */
1577 COSTS_N_INSNS (1), /* arith_shift. */
1578 COSTS_N_INSNS (1), /* arith_shift_reg. */
1579 COSTS_N_INSNS (1), /* log_shift. */
1580 COSTS_N_INSNS (1), /* log_shift_reg. */
1581 0, /* extend. */
1582 COSTS_N_INSNS (1), /* extend_arith. */
1583 COSTS_N_INSNS (1), /* bfi. */
1584 0, /* bfx. */
1585 0, /* clz. */
1586 0, /* rev. */
1587 0, /* non_exec. */
1588 true /* non_exec_costs_exec. */
1589 },
1590 /* MULT SImode */
1591 {
1592 {
1593 COSTS_N_INSNS (2), /* simple. */
1594 COSTS_N_INSNS (3), /* flag_setting. */
1595 COSTS_N_INSNS (2), /* extend. */
1596 COSTS_N_INSNS (2), /* add. */
1597 COSTS_N_INSNS (2), /* extend_add. */
1598 COSTS_N_INSNS (18) /* idiv. */
1599 },
1600 /* MULT DImode */
1601 {
1602 0, /* simple (N/A). */
1603 0, /* flag_setting (N/A). */
1604 COSTS_N_INSNS (3), /* extend. */
1605 0, /* add (N/A). */
1606 COSTS_N_INSNS (3), /* extend_add. */
1607 0 /* idiv (N/A). */
1608 }
1609 },
1610 /* LD/ST */
1611 {
1612 COSTS_N_INSNS (3), /* load. */
1613 COSTS_N_INSNS (3), /* load_sign_extend. */
1614 COSTS_N_INSNS (3), /* ldrd. */
1615 COSTS_N_INSNS (4), /* ldm_1st. */
1616 1, /* ldm_regs_per_insn_1st. */
1617 2, /* ldm_regs_per_insn_subsequent. */
1618 COSTS_N_INSNS (4), /* loadf. */
1619 COSTS_N_INSNS (4), /* loadd. */
1620 0, /* load_unaligned. */
1621 0, /* store. */
1622 0, /* strd. */
1623 COSTS_N_INSNS (1), /* stm_1st. */
1624 1, /* stm_regs_per_insn_1st. */
1625 2, /* stm_regs_per_insn_subsequent. */
1626 0, /* storef. */
1627 0, /* stored. */
1628 0, /* store_unaligned. */
1629 COSTS_N_INSNS (1), /* loadv. */
1630 COSTS_N_INSNS (1) /* storev. */
1631 },
1632 {
1633 /* FP SFmode */
1634 {
1635 COSTS_N_INSNS (17), /* div. */
1636 COSTS_N_INSNS (4), /* mult. */
1637 COSTS_N_INSNS (8), /* mult_addsub. */
1638 COSTS_N_INSNS (8), /* fma. */
1639 COSTS_N_INSNS (4), /* addsub. */
1640 COSTS_N_INSNS (2), /* fpconst. */
1641 COSTS_N_INSNS (2), /* neg. */
1642 COSTS_N_INSNS (5), /* compare. */
1643 COSTS_N_INSNS (4), /* widen. */
1644 COSTS_N_INSNS (4), /* narrow. */
1645 COSTS_N_INSNS (4), /* toint. */
1646 COSTS_N_INSNS (4), /* fromint. */
1647 COSTS_N_INSNS (4) /* roundint. */
1648 },
1649 /* FP DFmode */
1650 {
1651 COSTS_N_INSNS (31), /* div. */
1652 COSTS_N_INSNS (4), /* mult. */
1653 COSTS_N_INSNS (8), /* mult_addsub. */
1654 COSTS_N_INSNS (8), /* fma. */
1655 COSTS_N_INSNS (4), /* addsub. */
1656 COSTS_N_INSNS (2), /* fpconst. */
1657 COSTS_N_INSNS (2), /* neg. */
1658 COSTS_N_INSNS (2), /* compare. */
1659 COSTS_N_INSNS (4), /* widen. */
1660 COSTS_N_INSNS (4), /* narrow. */
1661 COSTS_N_INSNS (4), /* toint. */
1662 COSTS_N_INSNS (4), /* fromint. */
1663 COSTS_N_INSNS (4) /* roundint. */
1664 }
1665 },
1666 /* Vector */
1667 {
1668 COSTS_N_INSNS (1) /* alu. */
1669 }
1670 };
1671
1672 const struct cpu_cost_table v7m_extra_costs =
1673 {
1674 /* ALU */
1675 {
1676 0, /* arith. */
1677 0, /* logical. */
1678 0, /* shift. */
1679 0, /* shift_reg. */
1680 0, /* arith_shift. */
1681 COSTS_N_INSNS (1), /* arith_shift_reg. */
1682 0, /* log_shift. */
1683 COSTS_N_INSNS (1), /* log_shift_reg. */
1684 0, /* extend. */
1685 COSTS_N_INSNS (1), /* extend_arith. */
1686 0, /* bfi. */
1687 0, /* bfx. */
1688 0, /* clz. */
1689 0, /* rev. */
1690 COSTS_N_INSNS (1), /* non_exec. */
1691 false /* non_exec_costs_exec. */
1692 },
1693 {
1694 /* MULT SImode */
1695 {
1696 COSTS_N_INSNS (1), /* simple. */
1697 COSTS_N_INSNS (1), /* flag_setting. */
1698 COSTS_N_INSNS (2), /* extend. */
1699 COSTS_N_INSNS (1), /* add. */
1700 COSTS_N_INSNS (3), /* extend_add. */
1701 COSTS_N_INSNS (8) /* idiv. */
1702 },
1703 /* MULT DImode */
1704 {
1705 0, /* simple (N/A). */
1706 0, /* flag_setting (N/A). */
1707 COSTS_N_INSNS (2), /* extend. */
1708 0, /* add (N/A). */
1709 COSTS_N_INSNS (3), /* extend_add. */
1710 0 /* idiv (N/A). */
1711 }
1712 },
1713 /* LD/ST */
1714 {
1715 COSTS_N_INSNS (2), /* load. */
1716 0, /* load_sign_extend. */
1717 COSTS_N_INSNS (3), /* ldrd. */
1718 COSTS_N_INSNS (2), /* ldm_1st. */
1719 1, /* ldm_regs_per_insn_1st. */
1720 1, /* ldm_regs_per_insn_subsequent. */
1721 COSTS_N_INSNS (2), /* loadf. */
1722 COSTS_N_INSNS (3), /* loadd. */
1723 COSTS_N_INSNS (1), /* load_unaligned. */
1724 COSTS_N_INSNS (2), /* store. */
1725 COSTS_N_INSNS (3), /* strd. */
1726 COSTS_N_INSNS (2), /* stm_1st. */
1727 1, /* stm_regs_per_insn_1st. */
1728 1, /* stm_regs_per_insn_subsequent. */
1729 COSTS_N_INSNS (2), /* storef. */
1730 COSTS_N_INSNS (3), /* stored. */
1731 COSTS_N_INSNS (1), /* store_unaligned. */
1732 COSTS_N_INSNS (1), /* loadv. */
1733 COSTS_N_INSNS (1) /* storev. */
1734 },
1735 {
1736 /* FP SFmode */
1737 {
1738 COSTS_N_INSNS (7), /* div. */
1739 COSTS_N_INSNS (2), /* mult. */
1740 COSTS_N_INSNS (5), /* mult_addsub. */
1741 COSTS_N_INSNS (3), /* fma. */
1742 COSTS_N_INSNS (1), /* addsub. */
1743 0, /* fpconst. */
1744 0, /* neg. */
1745 0, /* compare. */
1746 0, /* widen. */
1747 0, /* narrow. */
1748 0, /* toint. */
1749 0, /* fromint. */
1750 0 /* roundint. */
1751 },
1752 /* FP DFmode */
1753 {
1754 COSTS_N_INSNS (15), /* div. */
1755 COSTS_N_INSNS (5), /* mult. */
1756 COSTS_N_INSNS (7), /* mult_addsub. */
1757 COSTS_N_INSNS (7), /* fma. */
1758 COSTS_N_INSNS (3), /* addsub. */
1759 0, /* fpconst. */
1760 0, /* neg. */
1761 0, /* compare. */
1762 0, /* widen. */
1763 0, /* narrow. */
1764 0, /* toint. */
1765 0, /* fromint. */
1766 0 /* roundint. */
1767 }
1768 },
1769 /* Vector */
1770 {
1771 COSTS_N_INSNS (1) /* alu. */
1772 }
1773 };
1774
1775 const struct addr_mode_cost_table generic_addr_mode_costs =
1776 {
1777 /* int. */
1778 {
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1782 },
1783 /* float. */
1784 {
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1788 },
1789 /* vector. */
1790 {
1791 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1792 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1793 COSTS_N_INSNS (0) /* AMO_WB. */
1794 }
1795 };
1796
1797 const struct tune_params arm_slowmul_tune =
1798 {
1799 &generic_extra_costs, /* Insn extra costs. */
1800 &generic_addr_mode_costs, /* Addressing mode costs. */
1801 NULL, /* Sched adj cost. */
1802 arm_default_branch_cost,
1803 &arm_default_vec_cost,
1804 3, /* Constant limit. */
1805 5, /* Max cond insns. */
1806 8, /* Memset max inline. */
1807 1, /* Issue rate. */
1808 ARM_PREFETCH_NOT_BENEFICIAL,
1809 tune_params::PREF_CONST_POOL_TRUE,
1810 tune_params::PREF_LDRD_FALSE,
1811 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1812 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1813 tune_params::DISPARAGE_FLAGS_NEITHER,
1814 tune_params::PREF_NEON_64_FALSE,
1815 tune_params::PREF_NEON_STRINGOPS_FALSE,
1816 tune_params::FUSE_NOTHING,
1817 tune_params::SCHED_AUTOPREF_OFF
1818 };
1819
1820 const struct tune_params arm_fastmul_tune =
1821 {
1822 &generic_extra_costs, /* Insn extra costs. */
1823 &generic_addr_mode_costs, /* Addressing mode costs. */
1824 NULL, /* Sched adj cost. */
1825 arm_default_branch_cost,
1826 &arm_default_vec_cost,
1827 1, /* Constant limit. */
1828 5, /* Max cond insns. */
1829 8, /* Memset max inline. */
1830 1, /* Issue rate. */
1831 ARM_PREFETCH_NOT_BENEFICIAL,
1832 tune_params::PREF_CONST_POOL_TRUE,
1833 tune_params::PREF_LDRD_FALSE,
1834 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1835 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1836 tune_params::DISPARAGE_FLAGS_NEITHER,
1837 tune_params::PREF_NEON_64_FALSE,
1838 tune_params::PREF_NEON_STRINGOPS_FALSE,
1839 tune_params::FUSE_NOTHING,
1840 tune_params::SCHED_AUTOPREF_OFF
1841 };
1842
1843 /* StrongARM has early execution of branches, so a sequence that is worth
1844 skipping is shorter. Set max_insns_skipped to a lower value. */
1845
1846 const struct tune_params arm_strongarm_tune =
1847 {
1848 &generic_extra_costs, /* Insn extra costs. */
1849 &generic_addr_mode_costs, /* Addressing mode costs. */
1850 NULL, /* Sched adj cost. */
1851 arm_default_branch_cost,
1852 &arm_default_vec_cost,
1853 1, /* Constant limit. */
1854 3, /* Max cond insns. */
1855 8, /* Memset max inline. */
1856 1, /* Issue rate. */
1857 ARM_PREFETCH_NOT_BENEFICIAL,
1858 tune_params::PREF_CONST_POOL_TRUE,
1859 tune_params::PREF_LDRD_FALSE,
1860 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1861 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1862 tune_params::DISPARAGE_FLAGS_NEITHER,
1863 tune_params::PREF_NEON_64_FALSE,
1864 tune_params::PREF_NEON_STRINGOPS_FALSE,
1865 tune_params::FUSE_NOTHING,
1866 tune_params::SCHED_AUTOPREF_OFF
1867 };
1868
1869 const struct tune_params arm_xscale_tune =
1870 {
1871 &generic_extra_costs, /* Insn extra costs. */
1872 &generic_addr_mode_costs, /* Addressing mode costs. */
1873 xscale_sched_adjust_cost,
1874 arm_default_branch_cost,
1875 &arm_default_vec_cost,
1876 2, /* Constant limit. */
1877 3, /* Max cond insns. */
1878 8, /* Memset max inline. */
1879 1, /* Issue rate. */
1880 ARM_PREFETCH_NOT_BENEFICIAL,
1881 tune_params::PREF_CONST_POOL_TRUE,
1882 tune_params::PREF_LDRD_FALSE,
1883 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1884 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1885 tune_params::DISPARAGE_FLAGS_NEITHER,
1886 tune_params::PREF_NEON_64_FALSE,
1887 tune_params::PREF_NEON_STRINGOPS_FALSE,
1888 tune_params::FUSE_NOTHING,
1889 tune_params::SCHED_AUTOPREF_OFF
1890 };
1891
1892 const struct tune_params arm_9e_tune =
1893 {
1894 &generic_extra_costs, /* Insn extra costs. */
1895 &generic_addr_mode_costs, /* Addressing mode costs. */
1896 NULL, /* Sched adj cost. */
1897 arm_default_branch_cost,
1898 &arm_default_vec_cost,
1899 1, /* Constant limit. */
1900 5, /* Max cond insns. */
1901 8, /* Memset max inline. */
1902 1, /* Issue rate. */
1903 ARM_PREFETCH_NOT_BENEFICIAL,
1904 tune_params::PREF_CONST_POOL_TRUE,
1905 tune_params::PREF_LDRD_FALSE,
1906 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1907 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1908 tune_params::DISPARAGE_FLAGS_NEITHER,
1909 tune_params::PREF_NEON_64_FALSE,
1910 tune_params::PREF_NEON_STRINGOPS_FALSE,
1911 tune_params::FUSE_NOTHING,
1912 tune_params::SCHED_AUTOPREF_OFF
1913 };
1914
1915 const struct tune_params arm_marvell_pj4_tune =
1916 {
1917 &generic_extra_costs, /* Insn extra costs. */
1918 &generic_addr_mode_costs, /* Addressing mode costs. */
1919 NULL, /* Sched adj cost. */
1920 arm_default_branch_cost,
1921 &arm_default_vec_cost,
1922 1, /* Constant limit. */
1923 5, /* Max cond insns. */
1924 8, /* Memset max inline. */
1925 2, /* Issue rate. */
1926 ARM_PREFETCH_NOT_BENEFICIAL,
1927 tune_params::PREF_CONST_POOL_TRUE,
1928 tune_params::PREF_LDRD_FALSE,
1929 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1930 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1931 tune_params::DISPARAGE_FLAGS_NEITHER,
1932 tune_params::PREF_NEON_64_FALSE,
1933 tune_params::PREF_NEON_STRINGOPS_FALSE,
1934 tune_params::FUSE_NOTHING,
1935 tune_params::SCHED_AUTOPREF_OFF
1936 };
1937
1938 const struct tune_params arm_v6t2_tune =
1939 {
1940 &generic_extra_costs, /* Insn extra costs. */
1941 &generic_addr_mode_costs, /* Addressing mode costs. */
1942 NULL, /* Sched adj cost. */
1943 arm_default_branch_cost,
1944 &arm_default_vec_cost,
1945 1, /* Constant limit. */
1946 5, /* Max cond insns. */
1947 8, /* Memset max inline. */
1948 1, /* Issue rate. */
1949 ARM_PREFETCH_NOT_BENEFICIAL,
1950 tune_params::PREF_CONST_POOL_FALSE,
1951 tune_params::PREF_LDRD_FALSE,
1952 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1953 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1954 tune_params::DISPARAGE_FLAGS_NEITHER,
1955 tune_params::PREF_NEON_64_FALSE,
1956 tune_params::PREF_NEON_STRINGOPS_FALSE,
1957 tune_params::FUSE_NOTHING,
1958 tune_params::SCHED_AUTOPREF_OFF
1959 };
1960
1961
1962 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1963 const struct tune_params arm_cortex_tune =
1964 {
1965 &generic_extra_costs,
1966 &generic_addr_mode_costs, /* Addressing mode costs. */
1967 NULL, /* Sched adj cost. */
1968 arm_default_branch_cost,
1969 &arm_default_vec_cost,
1970 1, /* Constant limit. */
1971 5, /* Max cond insns. */
1972 8, /* Memset max inline. */
1973 2, /* Issue rate. */
1974 ARM_PREFETCH_NOT_BENEFICIAL,
1975 tune_params::PREF_CONST_POOL_FALSE,
1976 tune_params::PREF_LDRD_FALSE,
1977 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1978 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1979 tune_params::DISPARAGE_FLAGS_NEITHER,
1980 tune_params::PREF_NEON_64_FALSE,
1981 tune_params::PREF_NEON_STRINGOPS_FALSE,
1982 tune_params::FUSE_NOTHING,
1983 tune_params::SCHED_AUTOPREF_OFF
1984 };
1985
1986 const struct tune_params arm_cortex_a8_tune =
1987 {
1988 &cortexa8_extra_costs,
1989 &generic_addr_mode_costs, /* Addressing mode costs. */
1990 NULL, /* Sched adj cost. */
1991 arm_default_branch_cost,
1992 &arm_default_vec_cost,
1993 1, /* Constant limit. */
1994 5, /* Max cond insns. */
1995 8, /* Memset max inline. */
1996 2, /* Issue rate. */
1997 ARM_PREFETCH_NOT_BENEFICIAL,
1998 tune_params::PREF_CONST_POOL_FALSE,
1999 tune_params::PREF_LDRD_FALSE,
2000 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2001 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2002 tune_params::DISPARAGE_FLAGS_NEITHER,
2003 tune_params::PREF_NEON_64_FALSE,
2004 tune_params::PREF_NEON_STRINGOPS_TRUE,
2005 tune_params::FUSE_NOTHING,
2006 tune_params::SCHED_AUTOPREF_OFF
2007 };
2008
2009 const struct tune_params arm_cortex_a7_tune =
2010 {
2011 &cortexa7_extra_costs,
2012 &generic_addr_mode_costs, /* Addressing mode costs. */
2013 NULL, /* Sched adj cost. */
2014 arm_default_branch_cost,
2015 &arm_default_vec_cost,
2016 1, /* Constant limit. */
2017 5, /* Max cond insns. */
2018 8, /* Memset max inline. */
2019 2, /* Issue rate. */
2020 ARM_PREFETCH_NOT_BENEFICIAL,
2021 tune_params::PREF_CONST_POOL_FALSE,
2022 tune_params::PREF_LDRD_FALSE,
2023 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2024 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2025 tune_params::DISPARAGE_FLAGS_NEITHER,
2026 tune_params::PREF_NEON_64_FALSE,
2027 tune_params::PREF_NEON_STRINGOPS_TRUE,
2028 tune_params::FUSE_NOTHING,
2029 tune_params::SCHED_AUTOPREF_OFF
2030 };
2031
2032 const struct tune_params arm_cortex_a15_tune =
2033 {
2034 &cortexa15_extra_costs,
2035 &generic_addr_mode_costs, /* Addressing mode costs. */
2036 NULL, /* Sched adj cost. */
2037 arm_default_branch_cost,
2038 &arm_default_vec_cost,
2039 1, /* Constant limit. */
2040 2, /* Max cond insns. */
2041 8, /* Memset max inline. */
2042 3, /* Issue rate. */
2043 ARM_PREFETCH_NOT_BENEFICIAL,
2044 tune_params::PREF_CONST_POOL_FALSE,
2045 tune_params::PREF_LDRD_TRUE,
2046 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2047 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2048 tune_params::DISPARAGE_FLAGS_ALL,
2049 tune_params::PREF_NEON_64_FALSE,
2050 tune_params::PREF_NEON_STRINGOPS_TRUE,
2051 tune_params::FUSE_NOTHING,
2052 tune_params::SCHED_AUTOPREF_FULL
2053 };
2054
2055 const struct tune_params arm_cortex_a35_tune =
2056 {
2057 &cortexa53_extra_costs,
2058 &generic_addr_mode_costs, /* Addressing mode costs. */
2059 NULL, /* Sched adj cost. */
2060 arm_default_branch_cost,
2061 &arm_default_vec_cost,
2062 1, /* Constant limit. */
2063 5, /* Max cond insns. */
2064 8, /* Memset max inline. */
2065 1, /* Issue rate. */
2066 ARM_PREFETCH_NOT_BENEFICIAL,
2067 tune_params::PREF_CONST_POOL_FALSE,
2068 tune_params::PREF_LDRD_FALSE,
2069 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2070 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2071 tune_params::DISPARAGE_FLAGS_NEITHER,
2072 tune_params::PREF_NEON_64_FALSE,
2073 tune_params::PREF_NEON_STRINGOPS_TRUE,
2074 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2075 tune_params::SCHED_AUTOPREF_OFF
2076 };
2077
2078 const struct tune_params arm_cortex_a53_tune =
2079 {
2080 &cortexa53_extra_costs,
2081 &generic_addr_mode_costs, /* Addressing mode costs. */
2082 NULL, /* Sched adj cost. */
2083 arm_default_branch_cost,
2084 &arm_default_vec_cost,
2085 1, /* Constant limit. */
2086 5, /* Max cond insns. */
2087 8, /* Memset max inline. */
2088 2, /* Issue rate. */
2089 ARM_PREFETCH_NOT_BENEFICIAL,
2090 tune_params::PREF_CONST_POOL_FALSE,
2091 tune_params::PREF_LDRD_FALSE,
2092 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2093 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2094 tune_params::DISPARAGE_FLAGS_NEITHER,
2095 tune_params::PREF_NEON_64_FALSE,
2096 tune_params::PREF_NEON_STRINGOPS_TRUE,
2097 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2098 tune_params::SCHED_AUTOPREF_OFF
2099 };
2100
2101 const struct tune_params arm_cortex_a57_tune =
2102 {
2103 &cortexa57_extra_costs,
2104 &generic_addr_mode_costs, /* addressing mode costs */
2105 NULL, /* Sched adj cost. */
2106 arm_default_branch_cost,
2107 &arm_default_vec_cost,
2108 1, /* Constant limit. */
2109 2, /* Max cond insns. */
2110 8, /* Memset max inline. */
2111 3, /* Issue rate. */
2112 ARM_PREFETCH_NOT_BENEFICIAL,
2113 tune_params::PREF_CONST_POOL_FALSE,
2114 tune_params::PREF_LDRD_TRUE,
2115 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2116 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2117 tune_params::DISPARAGE_FLAGS_ALL,
2118 tune_params::PREF_NEON_64_FALSE,
2119 tune_params::PREF_NEON_STRINGOPS_TRUE,
2120 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2121 tune_params::SCHED_AUTOPREF_FULL
2122 };
2123
2124 const struct tune_params arm_exynosm1_tune =
2125 {
2126 &exynosm1_extra_costs,
2127 &generic_addr_mode_costs, /* Addressing mode costs. */
2128 NULL, /* Sched adj cost. */
2129 arm_default_branch_cost,
2130 &arm_default_vec_cost,
2131 1, /* Constant limit. */
2132 2, /* Max cond insns. */
2133 8, /* Memset max inline. */
2134 3, /* Issue rate. */
2135 ARM_PREFETCH_NOT_BENEFICIAL,
2136 tune_params::PREF_CONST_POOL_FALSE,
2137 tune_params::PREF_LDRD_TRUE,
2138 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2139 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2140 tune_params::DISPARAGE_FLAGS_ALL,
2141 tune_params::PREF_NEON_64_FALSE,
2142 tune_params::PREF_NEON_STRINGOPS_TRUE,
2143 tune_params::FUSE_NOTHING,
2144 tune_params::SCHED_AUTOPREF_OFF
2145 };
2146
2147 const struct tune_params arm_xgene1_tune =
2148 {
2149 &xgene1_extra_costs,
2150 &generic_addr_mode_costs, /* Addressing mode costs. */
2151 NULL, /* Sched adj cost. */
2152 arm_default_branch_cost,
2153 &arm_default_vec_cost,
2154 1, /* Constant limit. */
2155 2, /* Max cond insns. */
2156 32, /* Memset max inline. */
2157 4, /* Issue rate. */
2158 ARM_PREFETCH_NOT_BENEFICIAL,
2159 tune_params::PREF_CONST_POOL_FALSE,
2160 tune_params::PREF_LDRD_TRUE,
2161 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2162 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2163 tune_params::DISPARAGE_FLAGS_ALL,
2164 tune_params::PREF_NEON_64_FALSE,
2165 tune_params::PREF_NEON_STRINGOPS_FALSE,
2166 tune_params::FUSE_NOTHING,
2167 tune_params::SCHED_AUTOPREF_OFF
2168 };
2169
2170 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2171 less appealing. Set max_insns_skipped to a low value. */
2172
2173 const struct tune_params arm_cortex_a5_tune =
2174 {
2175 &cortexa5_extra_costs,
2176 &generic_addr_mode_costs, /* Addressing mode costs. */
2177 NULL, /* Sched adj cost. */
2178 arm_cortex_a5_branch_cost,
2179 &arm_default_vec_cost,
2180 1, /* Constant limit. */
2181 1, /* Max cond insns. */
2182 8, /* Memset max inline. */
2183 2, /* Issue rate. */
2184 ARM_PREFETCH_NOT_BENEFICIAL,
2185 tune_params::PREF_CONST_POOL_FALSE,
2186 tune_params::PREF_LDRD_FALSE,
2187 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2188 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2189 tune_params::DISPARAGE_FLAGS_NEITHER,
2190 tune_params::PREF_NEON_64_FALSE,
2191 tune_params::PREF_NEON_STRINGOPS_TRUE,
2192 tune_params::FUSE_NOTHING,
2193 tune_params::SCHED_AUTOPREF_OFF
2194 };
2195
2196 const struct tune_params arm_cortex_a9_tune =
2197 {
2198 &cortexa9_extra_costs,
2199 &generic_addr_mode_costs, /* Addressing mode costs. */
2200 cortex_a9_sched_adjust_cost,
2201 arm_default_branch_cost,
2202 &arm_default_vec_cost,
2203 1, /* Constant limit. */
2204 5, /* Max cond insns. */
2205 8, /* Memset max inline. */
2206 2, /* Issue rate. */
2207 ARM_PREFETCH_BENEFICIAL(4,32,32),
2208 tune_params::PREF_CONST_POOL_FALSE,
2209 tune_params::PREF_LDRD_FALSE,
2210 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2211 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2212 tune_params::DISPARAGE_FLAGS_NEITHER,
2213 tune_params::PREF_NEON_64_FALSE,
2214 tune_params::PREF_NEON_STRINGOPS_FALSE,
2215 tune_params::FUSE_NOTHING,
2216 tune_params::SCHED_AUTOPREF_OFF
2217 };
2218
2219 const struct tune_params arm_cortex_a12_tune =
2220 {
2221 &cortexa12_extra_costs,
2222 &generic_addr_mode_costs, /* Addressing mode costs. */
2223 NULL, /* Sched adj cost. */
2224 arm_default_branch_cost,
2225 &arm_default_vec_cost, /* Vectorizer costs. */
2226 1, /* Constant limit. */
2227 2, /* Max cond insns. */
2228 8, /* Memset max inline. */
2229 2, /* Issue rate. */
2230 ARM_PREFETCH_NOT_BENEFICIAL,
2231 tune_params::PREF_CONST_POOL_FALSE,
2232 tune_params::PREF_LDRD_TRUE,
2233 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2234 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2235 tune_params::DISPARAGE_FLAGS_ALL,
2236 tune_params::PREF_NEON_64_FALSE,
2237 tune_params::PREF_NEON_STRINGOPS_TRUE,
2238 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2239 tune_params::SCHED_AUTOPREF_OFF
2240 };
2241
2242 const struct tune_params arm_cortex_a73_tune =
2243 {
2244 &cortexa57_extra_costs,
2245 &generic_addr_mode_costs, /* Addressing mode costs. */
2246 NULL, /* Sched adj cost. */
2247 arm_default_branch_cost,
2248 &arm_default_vec_cost, /* Vectorizer costs. */
2249 1, /* Constant limit. */
2250 2, /* Max cond insns. */
2251 8, /* Memset max inline. */
2252 2, /* Issue rate. */
2253 ARM_PREFETCH_NOT_BENEFICIAL,
2254 tune_params::PREF_CONST_POOL_FALSE,
2255 tune_params::PREF_LDRD_TRUE,
2256 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2257 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2258 tune_params::DISPARAGE_FLAGS_ALL,
2259 tune_params::PREF_NEON_64_FALSE,
2260 tune_params::PREF_NEON_STRINGOPS_TRUE,
2261 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2262 tune_params::SCHED_AUTOPREF_FULL
2263 };
2264
2265 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2266 cycle to execute each. An LDR from the constant pool also takes two cycles
2267 to execute, but mildly increases pipelining opportunity (consecutive
2268 loads/stores can be pipelined together, saving one cycle), and may also
2269 improve icache utilisation. Hence we prefer the constant pool for such
2270 processors. */
2271
2272 const struct tune_params arm_v7m_tune =
2273 {
2274 &v7m_extra_costs,
2275 &generic_addr_mode_costs, /* Addressing mode costs. */
2276 NULL, /* Sched adj cost. */
2277 arm_cortex_m_branch_cost,
2278 &arm_default_vec_cost,
2279 1, /* Constant limit. */
2280 2, /* Max cond insns. */
2281 8, /* Memset max inline. */
2282 1, /* Issue rate. */
2283 ARM_PREFETCH_NOT_BENEFICIAL,
2284 tune_params::PREF_CONST_POOL_TRUE,
2285 tune_params::PREF_LDRD_FALSE,
2286 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2287 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2288 tune_params::DISPARAGE_FLAGS_NEITHER,
2289 tune_params::PREF_NEON_64_FALSE,
2290 tune_params::PREF_NEON_STRINGOPS_FALSE,
2291 tune_params::FUSE_NOTHING,
2292 tune_params::SCHED_AUTOPREF_OFF
2293 };
2294
2295 /* Cortex-M7 tuning. */
2296
2297 const struct tune_params arm_cortex_m7_tune =
2298 {
2299 &v7m_extra_costs,
2300 &generic_addr_mode_costs, /* Addressing mode costs. */
2301 NULL, /* Sched adj cost. */
2302 arm_cortex_m7_branch_cost,
2303 &arm_default_vec_cost,
2304 0, /* Constant limit. */
2305 1, /* Max cond insns. */
2306 8, /* Memset max inline. */
2307 2, /* Issue rate. */
2308 ARM_PREFETCH_NOT_BENEFICIAL,
2309 tune_params::PREF_CONST_POOL_TRUE,
2310 tune_params::PREF_LDRD_FALSE,
2311 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2312 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2313 tune_params::DISPARAGE_FLAGS_NEITHER,
2314 tune_params::PREF_NEON_64_FALSE,
2315 tune_params::PREF_NEON_STRINGOPS_FALSE,
2316 tune_params::FUSE_NOTHING,
2317 tune_params::SCHED_AUTOPREF_OFF
2318 };
2319
2320 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2321 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2322 cortex-m23. */
2323 const struct tune_params arm_v6m_tune =
2324 {
2325 &generic_extra_costs, /* Insn extra costs. */
2326 &generic_addr_mode_costs, /* Addressing mode costs. */
2327 NULL, /* Sched adj cost. */
2328 arm_default_branch_cost,
2329 &arm_default_vec_cost, /* Vectorizer costs. */
2330 1, /* Constant limit. */
2331 5, /* Max cond insns. */
2332 8, /* Memset max inline. */
2333 1, /* Issue rate. */
2334 ARM_PREFETCH_NOT_BENEFICIAL,
2335 tune_params::PREF_CONST_POOL_FALSE,
2336 tune_params::PREF_LDRD_FALSE,
2337 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2338 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2339 tune_params::DISPARAGE_FLAGS_NEITHER,
2340 tune_params::PREF_NEON_64_FALSE,
2341 tune_params::PREF_NEON_STRINGOPS_FALSE,
2342 tune_params::FUSE_NOTHING,
2343 tune_params::SCHED_AUTOPREF_OFF
2344 };
2345
2346 const struct tune_params arm_fa726te_tune =
2347 {
2348 &generic_extra_costs, /* Insn extra costs. */
2349 &generic_addr_mode_costs, /* Addressing mode costs. */
2350 fa726te_sched_adjust_cost,
2351 arm_default_branch_cost,
2352 &arm_default_vec_cost,
2353 1, /* Constant limit. */
2354 5, /* Max cond insns. */
2355 8, /* Memset max inline. */
2356 2, /* Issue rate. */
2357 ARM_PREFETCH_NOT_BENEFICIAL,
2358 tune_params::PREF_CONST_POOL_TRUE,
2359 tune_params::PREF_LDRD_FALSE,
2360 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2361 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2362 tune_params::DISPARAGE_FLAGS_NEITHER,
2363 tune_params::PREF_NEON_64_FALSE,
2364 tune_params::PREF_NEON_STRINGOPS_FALSE,
2365 tune_params::FUSE_NOTHING,
2366 tune_params::SCHED_AUTOPREF_OFF
2367 };
2368
2369 /* Auto-generated CPU, FPU and architecture tables. */
2370 #include "arm-cpu-data.h"
2371
2372 /* The name of the preprocessor macro to define for this architecture. PROFILE
2373 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2374 is thus chosen to be big enough to hold the longest architecture name. */
2375
2376 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2377
2378 /* Supported TLS relocations. */
2379
2380 enum tls_reloc {
2381 TLS_GD32,
2382 TLS_LDM32,
2383 TLS_LDO32,
2384 TLS_IE32,
2385 TLS_LE32,
2386 TLS_DESCSEQ /* GNU scheme */
2387 };
2388
2389 /* The maximum number of insns to be used when loading a constant. */
2390 inline static int
2391 arm_constant_limit (bool size_p)
2392 {
2393 return size_p ? 1 : current_tune->constant_limit;
2394 }
2395
2396 /* Emit an insn that's a simple single-set. Both the operands must be known
2397 to be valid. */
2398 inline static rtx_insn *
2399 emit_set_insn (rtx x, rtx y)
2400 {
2401 return emit_insn (gen_rtx_SET (x, y));
2402 }
2403
2404 /* Return the number of bits set in VALUE. */
2405 static unsigned
2406 bit_count (unsigned long value)
2407 {
2408 unsigned long count = 0;
2409
2410 while (value)
2411 {
2412 count++;
2413 value &= value - 1; /* Clear the least-significant set bit. */
2414 }
2415
2416 return count;
2417 }
2418
2419 /* Return the number of bits set in BMAP. */
2420 static unsigned
2421 bitmap_popcount (const sbitmap bmap)
2422 {
2423 unsigned int count = 0;
2424 unsigned int n = 0;
2425 sbitmap_iterator sbi;
2426
2427 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2428 count++;
2429 return count;
2430 }
2431
2432 typedef struct
2433 {
2434 machine_mode mode;
2435 const char *name;
2436 } arm_fixed_mode_set;
2437
2438 /* A small helper for setting fixed-point library libfuncs. */
2439
2440 static void
2441 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2442 const char *funcname, const char *modename,
2443 int num_suffix)
2444 {
2445 char buffer[50];
2446
2447 if (num_suffix == 0)
2448 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2449 else
2450 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2451
2452 set_optab_libfunc (optable, mode, buffer);
2453 }
2454
2455 static void
2456 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2457 machine_mode from, const char *funcname,
2458 const char *toname, const char *fromname)
2459 {
2460 char buffer[50];
2461 const char *maybe_suffix_2 = "";
2462
2463 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2464 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2465 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2466 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2467 maybe_suffix_2 = "2";
2468
2469 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2470 maybe_suffix_2);
2471
2472 set_conv_libfunc (optable, to, from, buffer);
2473 }
2474
2475 static GTY(()) rtx speculation_barrier_libfunc;
2476
2477 /* Set up library functions unique to ARM. */
2478 static void
2479 arm_init_libfuncs (void)
2480 {
2481 /* For Linux, we have access to kernel support for atomic operations. */
2482 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2483 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2484
2485 /* There are no special library functions unless we are using the
2486 ARM BPABI. */
2487 if (!TARGET_BPABI)
2488 return;
2489
2490 /* The functions below are described in Section 4 of the "Run-Time
2491 ABI for the ARM architecture", Version 1.0. */
2492
2493 /* Double-precision floating-point arithmetic. Table 2. */
2494 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2495 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2496 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2497 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2498 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2499
2500 /* Double-precision comparisons. Table 3. */
2501 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2502 set_optab_libfunc (ne_optab, DFmode, NULL);
2503 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2504 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2505 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2506 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2507 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2508
2509 /* Single-precision floating-point arithmetic. Table 4. */
2510 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2511 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2512 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2513 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2514 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2515
2516 /* Single-precision comparisons. Table 5. */
2517 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2518 set_optab_libfunc (ne_optab, SFmode, NULL);
2519 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2520 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2521 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2522 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2523 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2524
2525 /* Floating-point to integer conversions. Table 6. */
2526 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2527 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2528 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2529 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2530 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2531 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2532 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2533 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2534
2535 /* Conversions between floating types. Table 7. */
2536 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2537 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2538
2539 /* Integer to floating-point conversions. Table 8. */
2540 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2541 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2542 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2543 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2544 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2545 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2546 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2547 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2548
2549 /* Long long. Table 9. */
2550 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2551 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2552 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2553 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2554 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2555 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2556 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2557 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2558
2559 /* Integer (32/32->32) division. \S 4.3.1. */
2560 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2561 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2562
2563 /* The divmod functions are designed so that they can be used for
2564 plain division, even though they return both the quotient and the
2565 remainder. The quotient is returned in the usual location (i.e.,
2566 r0 for SImode, {r0, r1} for DImode), just as would be expected
2567 for an ordinary division routine. Because the AAPCS calling
2568 conventions specify that all of { r0, r1, r2, r3 } are
2569 callee-saved registers, there is no need to tell the compiler
2570 explicitly that those registers are clobbered by these
2571 routines. */
2572 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2573 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2574
2575 /* For SImode division the ABI provides div-without-mod routines,
2576 which are faster. */
2577 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2578 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2579
2580 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2581 divmod libcalls instead. */
2582 set_optab_libfunc (smod_optab, DImode, NULL);
2583 set_optab_libfunc (umod_optab, DImode, NULL);
2584 set_optab_libfunc (smod_optab, SImode, NULL);
2585 set_optab_libfunc (umod_optab, SImode, NULL);
2586
2587 /* Half-precision float operations. The compiler handles all operations
2588 with NULL libfuncs by converting the SFmode. */
2589 switch (arm_fp16_format)
2590 {
2591 case ARM_FP16_FORMAT_IEEE:
2592 case ARM_FP16_FORMAT_ALTERNATIVE:
2593
2594 /* Conversions. */
2595 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2596 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2597 ? "__gnu_f2h_ieee"
2598 : "__gnu_f2h_alternative"));
2599 set_conv_libfunc (sext_optab, SFmode, HFmode,
2600 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2601 ? "__gnu_h2f_ieee"
2602 : "__gnu_h2f_alternative"));
2603
2604 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2605 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2606 ? "__gnu_d2h_ieee"
2607 : "__gnu_d2h_alternative"));
2608
2609 /* Arithmetic. */
2610 set_optab_libfunc (add_optab, HFmode, NULL);
2611 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2612 set_optab_libfunc (smul_optab, HFmode, NULL);
2613 set_optab_libfunc (neg_optab, HFmode, NULL);
2614 set_optab_libfunc (sub_optab, HFmode, NULL);
2615
2616 /* Comparisons. */
2617 set_optab_libfunc (eq_optab, HFmode, NULL);
2618 set_optab_libfunc (ne_optab, HFmode, NULL);
2619 set_optab_libfunc (lt_optab, HFmode, NULL);
2620 set_optab_libfunc (le_optab, HFmode, NULL);
2621 set_optab_libfunc (ge_optab, HFmode, NULL);
2622 set_optab_libfunc (gt_optab, HFmode, NULL);
2623 set_optab_libfunc (unord_optab, HFmode, NULL);
2624 break;
2625
2626 default:
2627 break;
2628 }
2629
2630 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2631 {
2632 const arm_fixed_mode_set fixed_arith_modes[] =
2633 {
2634 { E_QQmode, "qq" },
2635 { E_UQQmode, "uqq" },
2636 { E_HQmode, "hq" },
2637 { E_UHQmode, "uhq" },
2638 { E_SQmode, "sq" },
2639 { E_USQmode, "usq" },
2640 { E_DQmode, "dq" },
2641 { E_UDQmode, "udq" },
2642 { E_TQmode, "tq" },
2643 { E_UTQmode, "utq" },
2644 { E_HAmode, "ha" },
2645 { E_UHAmode, "uha" },
2646 { E_SAmode, "sa" },
2647 { E_USAmode, "usa" },
2648 { E_DAmode, "da" },
2649 { E_UDAmode, "uda" },
2650 { E_TAmode, "ta" },
2651 { E_UTAmode, "uta" }
2652 };
2653 const arm_fixed_mode_set fixed_conv_modes[] =
2654 {
2655 { E_QQmode, "qq" },
2656 { E_UQQmode, "uqq" },
2657 { E_HQmode, "hq" },
2658 { E_UHQmode, "uhq" },
2659 { E_SQmode, "sq" },
2660 { E_USQmode, "usq" },
2661 { E_DQmode, "dq" },
2662 { E_UDQmode, "udq" },
2663 { E_TQmode, "tq" },
2664 { E_UTQmode, "utq" },
2665 { E_HAmode, "ha" },
2666 { E_UHAmode, "uha" },
2667 { E_SAmode, "sa" },
2668 { E_USAmode, "usa" },
2669 { E_DAmode, "da" },
2670 { E_UDAmode, "uda" },
2671 { E_TAmode, "ta" },
2672 { E_UTAmode, "uta" },
2673 { E_QImode, "qi" },
2674 { E_HImode, "hi" },
2675 { E_SImode, "si" },
2676 { E_DImode, "di" },
2677 { E_TImode, "ti" },
2678 { E_SFmode, "sf" },
2679 { E_DFmode, "df" }
2680 };
2681 unsigned int i, j;
2682
2683 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2684 {
2685 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2686 "add", fixed_arith_modes[i].name, 3);
2687 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2688 "ssadd", fixed_arith_modes[i].name, 3);
2689 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2690 "usadd", fixed_arith_modes[i].name, 3);
2691 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2692 "sub", fixed_arith_modes[i].name, 3);
2693 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2694 "sssub", fixed_arith_modes[i].name, 3);
2695 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2696 "ussub", fixed_arith_modes[i].name, 3);
2697 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2698 "mul", fixed_arith_modes[i].name, 3);
2699 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2700 "ssmul", fixed_arith_modes[i].name, 3);
2701 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2702 "usmul", fixed_arith_modes[i].name, 3);
2703 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2704 "div", fixed_arith_modes[i].name, 3);
2705 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2706 "udiv", fixed_arith_modes[i].name, 3);
2707 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2708 "ssdiv", fixed_arith_modes[i].name, 3);
2709 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2710 "usdiv", fixed_arith_modes[i].name, 3);
2711 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2712 "neg", fixed_arith_modes[i].name, 2);
2713 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2714 "ssneg", fixed_arith_modes[i].name, 2);
2715 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2716 "usneg", fixed_arith_modes[i].name, 2);
2717 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2718 "ashl", fixed_arith_modes[i].name, 3);
2719 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2720 "ashr", fixed_arith_modes[i].name, 3);
2721 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2722 "lshr", fixed_arith_modes[i].name, 3);
2723 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2724 "ssashl", fixed_arith_modes[i].name, 3);
2725 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2726 "usashl", fixed_arith_modes[i].name, 3);
2727 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2728 "cmp", fixed_arith_modes[i].name, 2);
2729 }
2730
2731 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2732 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2733 {
2734 if (i == j
2735 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2736 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2737 continue;
2738
2739 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2740 fixed_conv_modes[j].mode, "fract",
2741 fixed_conv_modes[i].name,
2742 fixed_conv_modes[j].name);
2743 arm_set_fixed_conv_libfunc (satfract_optab,
2744 fixed_conv_modes[i].mode,
2745 fixed_conv_modes[j].mode, "satfract",
2746 fixed_conv_modes[i].name,
2747 fixed_conv_modes[j].name);
2748 arm_set_fixed_conv_libfunc (fractuns_optab,
2749 fixed_conv_modes[i].mode,
2750 fixed_conv_modes[j].mode, "fractuns",
2751 fixed_conv_modes[i].name,
2752 fixed_conv_modes[j].name);
2753 arm_set_fixed_conv_libfunc (satfractuns_optab,
2754 fixed_conv_modes[i].mode,
2755 fixed_conv_modes[j].mode, "satfractuns",
2756 fixed_conv_modes[i].name,
2757 fixed_conv_modes[j].name);
2758 }
2759 }
2760
2761 if (TARGET_AAPCS_BASED)
2762 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2763
2764 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2765 }
2766
2767 /* On AAPCS systems, this is the "struct __va_list". */
2768 static GTY(()) tree va_list_type;
2769
2770 /* Return the type to use as __builtin_va_list. */
2771 static tree
2772 arm_build_builtin_va_list (void)
2773 {
2774 tree va_list_name;
2775 tree ap_field;
2776
2777 if (!TARGET_AAPCS_BASED)
2778 return std_build_builtin_va_list ();
2779
2780 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2781 defined as:
2782
2783 struct __va_list
2784 {
2785 void *__ap;
2786 };
2787
2788 The C Library ABI further reinforces this definition in \S
2789 4.1.
2790
2791 We must follow this definition exactly. The structure tag
2792 name is visible in C++ mangled names, and thus forms a part
2793 of the ABI. The field name may be used by people who
2794 #include <stdarg.h>. */
2795 /* Create the type. */
2796 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2797 /* Give it the required name. */
2798 va_list_name = build_decl (BUILTINS_LOCATION,
2799 TYPE_DECL,
2800 get_identifier ("__va_list"),
2801 va_list_type);
2802 DECL_ARTIFICIAL (va_list_name) = 1;
2803 TYPE_NAME (va_list_type) = va_list_name;
2804 TYPE_STUB_DECL (va_list_type) = va_list_name;
2805 /* Create the __ap field. */
2806 ap_field = build_decl (BUILTINS_LOCATION,
2807 FIELD_DECL,
2808 get_identifier ("__ap"),
2809 ptr_type_node);
2810 DECL_ARTIFICIAL (ap_field) = 1;
2811 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2812 TYPE_FIELDS (va_list_type) = ap_field;
2813 /* Compute its layout. */
2814 layout_type (va_list_type);
2815
2816 return va_list_type;
2817 }
2818
2819 /* Return an expression of type "void *" pointing to the next
2820 available argument in a variable-argument list. VALIST is the
2821 user-level va_list object, of type __builtin_va_list. */
2822 static tree
2823 arm_extract_valist_ptr (tree valist)
2824 {
2825 if (TREE_TYPE (valist) == error_mark_node)
2826 return error_mark_node;
2827
2828 /* On an AAPCS target, the pointer is stored within "struct
2829 va_list". */
2830 if (TARGET_AAPCS_BASED)
2831 {
2832 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2833 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2834 valist, ap_field, NULL_TREE);
2835 }
2836
2837 return valist;
2838 }
2839
2840 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2841 static void
2842 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2843 {
2844 valist = arm_extract_valist_ptr (valist);
2845 std_expand_builtin_va_start (valist, nextarg);
2846 }
2847
2848 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2849 static tree
2850 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2851 gimple_seq *post_p)
2852 {
2853 valist = arm_extract_valist_ptr (valist);
2854 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2855 }
2856
2857 /* Check any incompatible options that the user has specified. */
2858 static void
2859 arm_option_check_internal (struct gcc_options *opts)
2860 {
2861 int flags = opts->x_target_flags;
2862
2863 /* iWMMXt and NEON are incompatible. */
2864 if (TARGET_IWMMXT
2865 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2866 error ("iWMMXt and NEON are incompatible");
2867
2868 /* Make sure that the processor choice does not conflict with any of the
2869 other command line choices. */
2870 if (TARGET_ARM_P (flags)
2871 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2872 error ("target CPU does not support ARM mode");
2873
2874 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2875 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2876 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2877
2878 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2879 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2880
2881 /* If this target is normally configured to use APCS frames, warn if they
2882 are turned off and debugging is turned on. */
2883 if (TARGET_ARM_P (flags)
2884 && write_symbols != NO_DEBUG
2885 && !TARGET_APCS_FRAME
2886 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2887 warning (0, "%<-g%> with %<-mno-apcs-frame%> may not give sensible "
2888 "debugging");
2889
2890 /* iWMMXt unsupported under Thumb mode. */
2891 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2892 error ("iWMMXt unsupported under Thumb mode");
2893
2894 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2895 error ("cannot use %<-mtp=cp15%> with 16-bit Thumb");
2896
2897 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2898 {
2899 error ("RTP PIC is incompatible with Thumb");
2900 flag_pic = 0;
2901 }
2902
2903 if (target_pure_code || target_slow_flash_data)
2904 {
2905 const char *flag = (target_pure_code ? "-mpure-code" :
2906 "-mslow-flash-data");
2907
2908 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2909 with MOVT. */
2910 if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2911 error ("%s only supports non-pic code on M-profile targets with the "
2912 "MOVT instruction", flag);
2913
2914 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2915 -mword-relocations forbids relocation of MOVT/MOVW. */
2916 if (target_word_relocations)
2917 error ("%s incompatible with %<-mword-relocations%>", flag);
2918 }
2919 }
2920
2921 /* Recompute the global settings depending on target attribute options. */
2922
2923 static void
2924 arm_option_params_internal (void)
2925 {
2926 /* If we are not using the default (ARM mode) section anchor offset
2927 ranges, then set the correct ranges now. */
2928 if (TARGET_THUMB1)
2929 {
2930 /* Thumb-1 LDR instructions cannot have negative offsets.
2931 Permissible positive offset ranges are 5-bit (for byte loads),
2932 6-bit (for halfword loads), or 7-bit (for word loads).
2933 Empirical results suggest a 7-bit anchor range gives the best
2934 overall code size. */
2935 targetm.min_anchor_offset = 0;
2936 targetm.max_anchor_offset = 127;
2937 }
2938 else if (TARGET_THUMB2)
2939 {
2940 /* The minimum is set such that the total size of the block
2941 for a particular anchor is 248 + 1 + 4095 bytes, which is
2942 divisible by eight, ensuring natural spacing of anchors. */
2943 targetm.min_anchor_offset = -248;
2944 targetm.max_anchor_offset = 4095;
2945 }
2946 else
2947 {
2948 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2949 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2950 }
2951
2952 /* Increase the number of conditional instructions with -Os. */
2953 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2954
2955 /* For THUMB2, we limit the conditional sequence to one IT block. */
2956 if (TARGET_THUMB2)
2957 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2958 }
2959
2960 /* True if -mflip-thumb should next add an attribute for the default
2961 mode, false if it should next add an attribute for the opposite mode. */
2962 static GTY(()) bool thumb_flipper;
2963
2964 /* Options after initial target override. */
2965 static GTY(()) tree init_optimize;
2966
2967 static void
2968 arm_override_options_after_change_1 (struct gcc_options *opts)
2969 {
2970 /* -falign-functions without argument: supply one. */
2971 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2972 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2973 && opts->x_optimize_size ? "2" : "4";
2974 }
2975
2976 /* Implement targetm.override_options_after_change. */
2977
2978 static void
2979 arm_override_options_after_change (void)
2980 {
2981 arm_configure_build_target (&arm_active_target,
2982 TREE_TARGET_OPTION (target_option_default_node),
2983 &global_options_set, false);
2984
2985 arm_override_options_after_change_1 (&global_options);
2986 }
2987
2988 /* Implement TARGET_OPTION_SAVE. */
2989 static void
2990 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2991 {
2992 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2993 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2994 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2995 }
2996
2997 /* Implement TARGET_OPTION_RESTORE. */
2998 static void
2999 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
3000 {
3001 opts->x_arm_arch_string = ptr->x_arm_arch_string;
3002 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
3003 opts->x_arm_tune_string = ptr->x_arm_tune_string;
3004 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
3005 false);
3006 }
3007
3008 /* Reset options between modes that the user has specified. */
3009 static void
3010 arm_option_override_internal (struct gcc_options *opts,
3011 struct gcc_options *opts_set)
3012 {
3013 arm_override_options_after_change_1 (opts);
3014
3015 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3016 {
3017 /* The default is to enable interworking, so this warning message would
3018 be confusing to users who have just compiled with
3019 eg, -march=armv4. */
3020 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3021 opts->x_target_flags &= ~MASK_INTERWORK;
3022 }
3023
3024 if (TARGET_THUMB_P (opts->x_target_flags)
3025 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3026 {
3027 warning (0, "target CPU does not support THUMB instructions");
3028 opts->x_target_flags &= ~MASK_THUMB;
3029 }
3030
3031 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3032 {
3033 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3034 opts->x_target_flags &= ~MASK_APCS_FRAME;
3035 }
3036
3037 /* Callee super interworking implies thumb interworking. Adding
3038 this to the flags here simplifies the logic elsewhere. */
3039 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3040 opts->x_target_flags |= MASK_INTERWORK;
3041
3042 /* need to remember initial values so combinaisons of options like
3043 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3044 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3045
3046 if (! opts_set->x_arm_restrict_it)
3047 opts->x_arm_restrict_it = arm_arch8;
3048
3049 /* ARM execution state and M profile don't have [restrict] IT. */
3050 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3051 opts->x_arm_restrict_it = 0;
3052
3053 /* Enable -munaligned-access by default for
3054 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3055 i.e. Thumb2 and ARM state only.
3056 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3057 - ARMv8 architecture-base processors.
3058
3059 Disable -munaligned-access by default for
3060 - all pre-ARMv6 architecture-based processors
3061 - ARMv6-M architecture-based processors
3062 - ARMv8-M Baseline processors. */
3063
3064 if (! opts_set->x_unaligned_access)
3065 {
3066 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3067 && arm_arch6 && (arm_arch_notm || arm_arch7));
3068 }
3069 else if (opts->x_unaligned_access == 1
3070 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3071 {
3072 warning (0, "target CPU does not support unaligned accesses");
3073 opts->x_unaligned_access = 0;
3074 }
3075
3076 /* Don't warn since it's on by default in -O2. */
3077 if (TARGET_THUMB1_P (opts->x_target_flags))
3078 opts->x_flag_schedule_insns = 0;
3079 else
3080 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3081
3082 /* Disable shrink-wrap when optimizing function for size, since it tends to
3083 generate additional returns. */
3084 if (optimize_function_for_size_p (cfun)
3085 && TARGET_THUMB2_P (opts->x_target_flags))
3086 opts->x_flag_shrink_wrap = false;
3087 else
3088 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3089
3090 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3091 - epilogue_insns - does not accurately model the corresponding insns
3092 emitted in the asm file. In particular, see the comment in thumb_exit
3093 'Find out how many of the (return) argument registers we can corrupt'.
3094 As a consequence, the epilogue may clobber registers without fipa-ra
3095 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3096 TODO: Accurately model clobbers for epilogue_insns and reenable
3097 fipa-ra. */
3098 if (TARGET_THUMB1_P (opts->x_target_flags))
3099 opts->x_flag_ipa_ra = 0;
3100 else
3101 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3102
3103 /* Thumb2 inline assembly code should always use unified syntax.
3104 This will apply to ARM and Thumb1 eventually. */
3105 if (TARGET_THUMB2_P (opts->x_target_flags))
3106 opts->x_inline_asm_unified = true;
3107
3108 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3109 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3110 #endif
3111 }
3112
3113 static sbitmap isa_all_fpubits;
3114 static sbitmap isa_quirkbits;
3115
3116 /* Configure a build target TARGET from the user-specified options OPTS and
3117 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3118 architecture have been specified, but the two are not identical. */
3119 void
3120 arm_configure_build_target (struct arm_build_target *target,
3121 struct cl_target_option *opts,
3122 struct gcc_options *opts_set,
3123 bool warn_compatible)
3124 {
3125 const cpu_option *arm_selected_tune = NULL;
3126 const arch_option *arm_selected_arch = NULL;
3127 const cpu_option *arm_selected_cpu = NULL;
3128 const arm_fpu_desc *arm_selected_fpu = NULL;
3129 const char *tune_opts = NULL;
3130 const char *arch_opts = NULL;
3131 const char *cpu_opts = NULL;
3132
3133 bitmap_clear (target->isa);
3134 target->core_name = NULL;
3135 target->arch_name = NULL;
3136
3137 if (opts_set->x_arm_arch_string)
3138 {
3139 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3140 "-march",
3141 opts->x_arm_arch_string);
3142 arch_opts = strchr (opts->x_arm_arch_string, '+');
3143 }
3144
3145 if (opts_set->x_arm_cpu_string)
3146 {
3147 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3148 opts->x_arm_cpu_string);
3149 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3150 arm_selected_tune = arm_selected_cpu;
3151 /* If taking the tuning from -mcpu, we don't need to rescan the
3152 options for tuning. */
3153 }
3154
3155 if (opts_set->x_arm_tune_string)
3156 {
3157 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3158 opts->x_arm_tune_string);
3159 tune_opts = strchr (opts->x_arm_tune_string, '+');
3160 }
3161
3162 if (arm_selected_arch)
3163 {
3164 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3165 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3166 arch_opts);
3167
3168 if (arm_selected_cpu)
3169 {
3170 auto_sbitmap cpu_isa (isa_num_bits);
3171 auto_sbitmap isa_delta (isa_num_bits);
3172
3173 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3174 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3175 cpu_opts);
3176 bitmap_xor (isa_delta, cpu_isa, target->isa);
3177 /* Ignore any bits that are quirk bits. */
3178 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3179 /* Ignore (for now) any bits that might be set by -mfpu. */
3180 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3181
3182 if (!bitmap_empty_p (isa_delta))
3183 {
3184 if (warn_compatible)
3185 warning (0, "switch %<-mcpu=%s%> conflicts "
3186 "with %<-march=%s%> switch",
3187 arm_selected_cpu->common.name,
3188 arm_selected_arch->common.name);
3189 /* -march wins for code generation.
3190 -mcpu wins for default tuning. */
3191 if (!arm_selected_tune)
3192 arm_selected_tune = arm_selected_cpu;
3193
3194 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3195 target->arch_name = arm_selected_arch->common.name;
3196 }
3197 else
3198 {
3199 /* Architecture and CPU are essentially the same.
3200 Prefer the CPU setting. */
3201 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3202 target->core_name = arm_selected_cpu->common.name;
3203 /* Copy the CPU's capabilities, so that we inherit the
3204 appropriate extensions and quirks. */
3205 bitmap_copy (target->isa, cpu_isa);
3206 }
3207 }
3208 else
3209 {
3210 /* Pick a CPU based on the architecture. */
3211 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3212 target->arch_name = arm_selected_arch->common.name;
3213 /* Note: target->core_name is left unset in this path. */
3214 }
3215 }
3216 else if (arm_selected_cpu)
3217 {
3218 target->core_name = arm_selected_cpu->common.name;
3219 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3220 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3221 cpu_opts);
3222 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3223 }
3224 /* If the user did not specify a processor or architecture, choose
3225 one for them. */
3226 else
3227 {
3228 const cpu_option *sel;
3229 auto_sbitmap sought_isa (isa_num_bits);
3230 bitmap_clear (sought_isa);
3231 auto_sbitmap default_isa (isa_num_bits);
3232
3233 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3234 TARGET_CPU_DEFAULT);
3235 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3236 gcc_assert (arm_selected_cpu->common.name);
3237
3238 /* RWE: All of the selection logic below (to the end of this
3239 'if' clause) looks somewhat suspect. It appears to be mostly
3240 there to support forcing thumb support when the default CPU
3241 does not have thumb (somewhat dubious in terms of what the
3242 user might be expecting). I think it should be removed once
3243 support for the pre-thumb era cores is removed. */
3244 sel = arm_selected_cpu;
3245 arm_initialize_isa (default_isa, sel->common.isa_bits);
3246 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3247 cpu_opts);
3248
3249 /* Now check to see if the user has specified any command line
3250 switches that require certain abilities from the cpu. */
3251
3252 if (TARGET_INTERWORK || TARGET_THUMB)
3253 bitmap_set_bit (sought_isa, isa_bit_thumb);
3254
3255 /* If there are such requirements and the default CPU does not
3256 satisfy them, we need to run over the complete list of
3257 cores looking for one that is satisfactory. */
3258 if (!bitmap_empty_p (sought_isa)
3259 && !bitmap_subset_p (sought_isa, default_isa))
3260 {
3261 auto_sbitmap candidate_isa (isa_num_bits);
3262 /* We're only interested in a CPU with at least the
3263 capabilities of the default CPU and the required
3264 additional features. */
3265 bitmap_ior (default_isa, default_isa, sought_isa);
3266
3267 /* Try to locate a CPU type that supports all of the abilities
3268 of the default CPU, plus the extra abilities requested by
3269 the user. */
3270 for (sel = all_cores; sel->common.name != NULL; sel++)
3271 {
3272 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3273 /* An exact match? */
3274 if (bitmap_equal_p (default_isa, candidate_isa))
3275 break;
3276 }
3277
3278 if (sel->common.name == NULL)
3279 {
3280 unsigned current_bit_count = isa_num_bits;
3281 const cpu_option *best_fit = NULL;
3282
3283 /* Ideally we would like to issue an error message here
3284 saying that it was not possible to find a CPU compatible
3285 with the default CPU, but which also supports the command
3286 line options specified by the programmer, and so they
3287 ought to use the -mcpu=<name> command line option to
3288 override the default CPU type.
3289
3290 If we cannot find a CPU that has exactly the
3291 characteristics of the default CPU and the given
3292 command line options we scan the array again looking
3293 for a best match. The best match must have at least
3294 the capabilities of the perfect match. */
3295 for (sel = all_cores; sel->common.name != NULL; sel++)
3296 {
3297 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3298
3299 if (bitmap_subset_p (default_isa, candidate_isa))
3300 {
3301 unsigned count;
3302
3303 bitmap_and_compl (candidate_isa, candidate_isa,
3304 default_isa);
3305 count = bitmap_popcount (candidate_isa);
3306
3307 if (count < current_bit_count)
3308 {
3309 best_fit = sel;
3310 current_bit_count = count;
3311 }
3312 }
3313
3314 gcc_assert (best_fit);
3315 sel = best_fit;
3316 }
3317 }
3318 arm_selected_cpu = sel;
3319 }
3320
3321 /* Now we know the CPU, we can finally initialize the target
3322 structure. */
3323 target->core_name = arm_selected_cpu->common.name;
3324 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3325 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3326 cpu_opts);
3327 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3328 }
3329
3330 gcc_assert (arm_selected_cpu);
3331 gcc_assert (arm_selected_arch);
3332
3333 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3334 {
3335 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3336 auto_sbitmap fpu_bits (isa_num_bits);
3337
3338 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3339 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3340 bitmap_ior (target->isa, target->isa, fpu_bits);
3341 }
3342
3343 if (!arm_selected_tune)
3344 arm_selected_tune = arm_selected_cpu;
3345 else /* Validate the features passed to -mtune. */
3346 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3347
3348 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3349
3350 /* Finish initializing the target structure. */
3351 target->arch_pp_name = arm_selected_arch->arch;
3352 target->base_arch = arm_selected_arch->base_arch;
3353 target->profile = arm_selected_arch->profile;
3354
3355 target->tune_flags = tune_data->tune_flags;
3356 target->tune = tune_data->tune;
3357 target->tune_core = tune_data->scheduler;
3358 arm_option_reconfigure_globals ();
3359 }
3360
3361 /* Fix up any incompatible options that the user has specified. */
3362 static void
3363 arm_option_override (void)
3364 {
3365 static const enum isa_feature fpu_bitlist[]
3366 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3367 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3368 cl_target_option opts;
3369
3370 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3371 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3372
3373 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3374 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3375
3376 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3377
3378 if (!global_options_set.x_arm_fpu_index)
3379 {
3380 bool ok;
3381 int fpu_index;
3382
3383 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3384 CL_TARGET);
3385 gcc_assert (ok);
3386 arm_fpu_index = (enum fpu_type) fpu_index;
3387 }
3388
3389 cl_target_option_save (&opts, &global_options);
3390 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3391 true);
3392
3393 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3394 SUBTARGET_OVERRIDE_OPTIONS;
3395 #endif
3396
3397 /* Initialize boolean versions of the architectural flags, for use
3398 in the arm.md file and for enabling feature flags. */
3399 arm_option_reconfigure_globals ();
3400
3401 arm_tune = arm_active_target.tune_core;
3402 tune_flags = arm_active_target.tune_flags;
3403 current_tune = arm_active_target.tune;
3404
3405 /* TBD: Dwarf info for apcs frame is not handled yet. */
3406 if (TARGET_APCS_FRAME)
3407 flag_shrink_wrap = false;
3408
3409 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3410 {
3411 warning (0, "%<-mapcs-stack-check%> incompatible with "
3412 "%<-mno-apcs-frame%>");
3413 target_flags |= MASK_APCS_FRAME;
3414 }
3415
3416 if (TARGET_POKE_FUNCTION_NAME)
3417 target_flags |= MASK_APCS_FRAME;
3418
3419 if (TARGET_APCS_REENT && flag_pic)
3420 error ("%<-fpic%> and %<-mapcs-reent%> are incompatible");
3421
3422 if (TARGET_APCS_REENT)
3423 warning (0, "APCS reentrant code not supported. Ignored");
3424
3425 /* Set up some tuning parameters. */
3426 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3427 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3428 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3429 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3430 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3431 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3432
3433 /* For arm2/3 there is no need to do any scheduling if we are doing
3434 software floating-point. */
3435 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3436 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3437
3438 /* Override the default structure alignment for AAPCS ABI. */
3439 if (!global_options_set.x_arm_structure_size_boundary)
3440 {
3441 if (TARGET_AAPCS_BASED)
3442 arm_structure_size_boundary = 8;
3443 }
3444 else
3445 {
3446 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3447
3448 if (arm_structure_size_boundary != 8
3449 && arm_structure_size_boundary != 32
3450 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3451 {
3452 if (ARM_DOUBLEWORD_ALIGN)
3453 warning (0,
3454 "structure size boundary can only be set to 8, 32 or 64");
3455 else
3456 warning (0, "structure size boundary can only be set to 8 or 32");
3457 arm_structure_size_boundary
3458 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3459 }
3460 }
3461
3462 if (TARGET_VXWORKS_RTP)
3463 {
3464 if (!global_options_set.x_arm_pic_data_is_text_relative)
3465 arm_pic_data_is_text_relative = 0;
3466 }
3467 else if (flag_pic
3468 && !arm_pic_data_is_text_relative
3469 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3470 /* When text & data segments don't have a fixed displacement, the
3471 intended use is with a single, read only, pic base register.
3472 Unless the user explicitly requested not to do that, set
3473 it. */
3474 target_flags |= MASK_SINGLE_PIC_BASE;
3475
3476 /* If stack checking is disabled, we can use r10 as the PIC register,
3477 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3478 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3479 {
3480 if (TARGET_VXWORKS_RTP)
3481 warning (0, "RTP PIC is incompatible with %<-msingle-pic-base%>");
3482 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3483 }
3484
3485 if (flag_pic && TARGET_VXWORKS_RTP)
3486 arm_pic_register = 9;
3487
3488 if (arm_pic_register_string != NULL)
3489 {
3490 int pic_register = decode_reg_name (arm_pic_register_string);
3491
3492 if (!flag_pic)
3493 warning (0, "%<-mpic-register=%> is useless without %<-fpic%>");
3494
3495 /* Prevent the user from choosing an obviously stupid PIC register. */
3496 else if (pic_register < 0 || call_used_regs[pic_register]
3497 || pic_register == HARD_FRAME_POINTER_REGNUM
3498 || pic_register == STACK_POINTER_REGNUM
3499 || pic_register >= PC_REGNUM
3500 || (TARGET_VXWORKS_RTP
3501 && (unsigned int) pic_register != arm_pic_register))
3502 error ("unable to use %qs for PIC register", arm_pic_register_string);
3503 else
3504 arm_pic_register = pic_register;
3505 }
3506
3507 if (flag_pic)
3508 target_word_relocations = 1;
3509
3510 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3511 if (fix_cm3_ldrd == 2)
3512 {
3513 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3514 fix_cm3_ldrd = 1;
3515 else
3516 fix_cm3_ldrd = 0;
3517 }
3518
3519 /* Hot/Cold partitioning is not currently supported, since we can't
3520 handle literal pool placement in that case. */
3521 if (flag_reorder_blocks_and_partition)
3522 {
3523 inform (input_location,
3524 "%<-freorder-blocks-and-partition%> not supported "
3525 "on this architecture");
3526 flag_reorder_blocks_and_partition = 0;
3527 flag_reorder_blocks = 1;
3528 }
3529
3530 if (flag_pic)
3531 /* Hoisting PIC address calculations more aggressively provides a small,
3532 but measurable, size reduction for PIC code. Therefore, we decrease
3533 the bar for unrestricted expression hoisting to the cost of PIC address
3534 calculation, which is 2 instructions. */
3535 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3536 global_options.x_param_values,
3537 global_options_set.x_param_values);
3538
3539 /* ARM EABI defaults to strict volatile bitfields. */
3540 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3541 && abi_version_at_least(2))
3542 flag_strict_volatile_bitfields = 1;
3543
3544 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3545 have deemed it beneficial (signified by setting
3546 prefetch.num_slots to 1 or more). */
3547 if (flag_prefetch_loop_arrays < 0
3548 && HAVE_prefetch
3549 && optimize >= 3
3550 && current_tune->prefetch.num_slots > 0)
3551 flag_prefetch_loop_arrays = 1;
3552
3553 /* Set up parameters to be used in prefetching algorithm. Do not
3554 override the defaults unless we are tuning for a core we have
3555 researched values for. */
3556 if (current_tune->prefetch.num_slots > 0)
3557 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3558 current_tune->prefetch.num_slots,
3559 global_options.x_param_values,
3560 global_options_set.x_param_values);
3561 if (current_tune->prefetch.l1_cache_line_size >= 0)
3562 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3563 current_tune->prefetch.l1_cache_line_size,
3564 global_options.x_param_values,
3565 global_options_set.x_param_values);
3566 if (current_tune->prefetch.l1_cache_size >= 0)
3567 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3568 current_tune->prefetch.l1_cache_size,
3569 global_options.x_param_values,
3570 global_options_set.x_param_values);
3571
3572 /* Use Neon to perform 64-bits operations rather than core
3573 registers. */
3574 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3575 if (use_neon_for_64bits == 1)
3576 prefer_neon_for_64bits = true;
3577
3578 /* Use the alternative scheduling-pressure algorithm by default. */
3579 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3580 global_options.x_param_values,
3581 global_options_set.x_param_values);
3582
3583 /* Look through ready list and all of queue for instructions
3584 relevant for L2 auto-prefetcher. */
3585 int param_sched_autopref_queue_depth;
3586
3587 switch (current_tune->sched_autopref)
3588 {
3589 case tune_params::SCHED_AUTOPREF_OFF:
3590 param_sched_autopref_queue_depth = -1;
3591 break;
3592
3593 case tune_params::SCHED_AUTOPREF_RANK:
3594 param_sched_autopref_queue_depth = 0;
3595 break;
3596
3597 case tune_params::SCHED_AUTOPREF_FULL:
3598 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3599 break;
3600
3601 default:
3602 gcc_unreachable ();
3603 }
3604
3605 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3606 param_sched_autopref_queue_depth,
3607 global_options.x_param_values,
3608 global_options_set.x_param_values);
3609
3610 /* Currently, for slow flash data, we just disable literal pools. We also
3611 disable it for pure-code. */
3612 if (target_slow_flash_data || target_pure_code)
3613 arm_disable_literal_pool = true;
3614
3615 /* Disable scheduling fusion by default if it's not armv7 processor
3616 or doesn't prefer ldrd/strd. */
3617 if (flag_schedule_fusion == 2
3618 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3619 flag_schedule_fusion = 0;
3620
3621 /* Need to remember initial options before they are overriden. */
3622 init_optimize = build_optimization_node (&global_options);
3623
3624 arm_options_perform_arch_sanity_checks ();
3625 arm_option_override_internal (&global_options, &global_options_set);
3626 arm_option_check_internal (&global_options);
3627 arm_option_params_internal ();
3628
3629 /* Create the default target_options structure. */
3630 target_option_default_node = target_option_current_node
3631 = build_target_option_node (&global_options);
3632
3633 /* Register global variables with the garbage collector. */
3634 arm_add_gc_roots ();
3635
3636 /* Init initial mode for testing. */
3637 thumb_flipper = TARGET_THUMB;
3638 }
3639
3640
3641 /* Reconfigure global status flags from the active_target.isa. */
3642 void
3643 arm_option_reconfigure_globals (void)
3644 {
3645 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3646 arm_base_arch = arm_active_target.base_arch;
3647
3648 /* Initialize boolean versions of the architectural flags, for use
3649 in the arm.md file. */
3650 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3651 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3652 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3653 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3654 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3655 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3656 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3657 arm_arch6m = arm_arch6 && !arm_arch_notm;
3658 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3659 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3660 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3661 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3662 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3663 arm_arch8_3 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_3);
3664 arm_arch8_4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_4);
3665 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3666 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3667 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3668 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3669 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3670 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3671 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3672 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3673 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3674 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3675 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3676 if (arm_fp16_inst)
3677 {
3678 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3679 error ("selected fp16 options are incompatible");
3680 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3681 }
3682
3683 /* And finally, set up some quirks. */
3684 arm_arch_no_volatile_ce
3685 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3686 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3687 isa_bit_quirk_armv6kz);
3688
3689 /* Use the cp15 method if it is available. */
3690 if (target_thread_pointer == TP_AUTO)
3691 {
3692 if (arm_arch6k && !TARGET_THUMB1)
3693 target_thread_pointer = TP_CP15;
3694 else
3695 target_thread_pointer = TP_SOFT;
3696 }
3697 }
3698
3699 /* Perform some validation between the desired architecture and the rest of the
3700 options. */
3701 void
3702 arm_options_perform_arch_sanity_checks (void)
3703 {
3704 /* V5T code we generate is completely interworking capable, so we turn off
3705 TARGET_INTERWORK here to avoid many tests later on. */
3706
3707 /* XXX However, we must pass the right pre-processor defines to CPP
3708 or GLD can get confused. This is a hack. */
3709 if (TARGET_INTERWORK)
3710 arm_cpp_interwork = 1;
3711
3712 if (arm_arch5t)
3713 target_flags &= ~MASK_INTERWORK;
3714
3715 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3716 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3717
3718 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3719 error ("iwmmxt abi requires an iwmmxt capable cpu");
3720
3721 /* BPABI targets use linker tricks to allow interworking on cores
3722 without thumb support. */
3723 if (TARGET_INTERWORK
3724 && !TARGET_BPABI
3725 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3726 {
3727 warning (0, "target CPU does not support interworking" );
3728 target_flags &= ~MASK_INTERWORK;
3729 }
3730
3731 /* If soft-float is specified then don't use FPU. */
3732 if (TARGET_SOFT_FLOAT)
3733 arm_fpu_attr = FPU_NONE;
3734 else
3735 arm_fpu_attr = FPU_VFP;
3736
3737 if (TARGET_AAPCS_BASED)
3738 {
3739 if (TARGET_CALLER_INTERWORKING)
3740 error ("AAPCS does not support %<-mcaller-super-interworking%>");
3741 else
3742 if (TARGET_CALLEE_INTERWORKING)
3743 error ("AAPCS does not support %<-mcallee-super-interworking%>");
3744 }
3745
3746 /* __fp16 support currently assumes the core has ldrh. */
3747 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3748 sorry ("__fp16 and no ldrh");
3749
3750 if (use_cmse && !arm_arch_cmse)
3751 error ("target CPU does not support ARMv8-M Security Extensions");
3752
3753 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3754 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3755 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3756 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3757
3758
3759 if (TARGET_AAPCS_BASED)
3760 {
3761 if (arm_abi == ARM_ABI_IWMMXT)
3762 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3763 else if (TARGET_HARD_FLOAT_ABI)
3764 {
3765 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3766 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3767 error ("%<-mfloat-abi=hard%>: selected processor lacks an FPU");
3768 }
3769 else
3770 arm_pcs_default = ARM_PCS_AAPCS;
3771 }
3772 else
3773 {
3774 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3775 sorry ("%<-mfloat-abi=hard%> and VFP");
3776
3777 if (arm_abi == ARM_ABI_APCS)
3778 arm_pcs_default = ARM_PCS_APCS;
3779 else
3780 arm_pcs_default = ARM_PCS_ATPCS;
3781 }
3782 }
3783
3784 static void
3785 arm_add_gc_roots (void)
3786 {
3787 gcc_obstack_init(&minipool_obstack);
3788 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3789 }
3790 \f
3791 /* A table of known ARM exception types.
3792 For use with the interrupt function attribute. */
3793
3794 typedef struct
3795 {
3796 const char *const arg;
3797 const unsigned long return_value;
3798 }
3799 isr_attribute_arg;
3800
3801 static const isr_attribute_arg isr_attribute_args [] =
3802 {
3803 { "IRQ", ARM_FT_ISR },
3804 { "irq", ARM_FT_ISR },
3805 { "FIQ", ARM_FT_FIQ },
3806 { "fiq", ARM_FT_FIQ },
3807 { "ABORT", ARM_FT_ISR },
3808 { "abort", ARM_FT_ISR },
3809 { "ABORT", ARM_FT_ISR },
3810 { "abort", ARM_FT_ISR },
3811 { "UNDEF", ARM_FT_EXCEPTION },
3812 { "undef", ARM_FT_EXCEPTION },
3813 { "SWI", ARM_FT_EXCEPTION },
3814 { "swi", ARM_FT_EXCEPTION },
3815 { NULL, ARM_FT_NORMAL }
3816 };
3817
3818 /* Returns the (interrupt) function type of the current
3819 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3820
3821 static unsigned long
3822 arm_isr_value (tree argument)
3823 {
3824 const isr_attribute_arg * ptr;
3825 const char * arg;
3826
3827 if (!arm_arch_notm)
3828 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3829
3830 /* No argument - default to IRQ. */
3831 if (argument == NULL_TREE)
3832 return ARM_FT_ISR;
3833
3834 /* Get the value of the argument. */
3835 if (TREE_VALUE (argument) == NULL_TREE
3836 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3837 return ARM_FT_UNKNOWN;
3838
3839 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3840
3841 /* Check it against the list of known arguments. */
3842 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3843 if (streq (arg, ptr->arg))
3844 return ptr->return_value;
3845
3846 /* An unrecognized interrupt type. */
3847 return ARM_FT_UNKNOWN;
3848 }
3849
3850 /* Computes the type of the current function. */
3851
3852 static unsigned long
3853 arm_compute_func_type (void)
3854 {
3855 unsigned long type = ARM_FT_UNKNOWN;
3856 tree a;
3857 tree attr;
3858
3859 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3860
3861 /* Decide if the current function is volatile. Such functions
3862 never return, and many memory cycles can be saved by not storing
3863 register values that will never be needed again. This optimization
3864 was added to speed up context switching in a kernel application. */
3865 if (optimize > 0
3866 && (TREE_NOTHROW (current_function_decl)
3867 || !(flag_unwind_tables
3868 || (flag_exceptions
3869 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3870 && TREE_THIS_VOLATILE (current_function_decl))
3871 type |= ARM_FT_VOLATILE;
3872
3873 if (cfun->static_chain_decl != NULL)
3874 type |= ARM_FT_NESTED;
3875
3876 attr = DECL_ATTRIBUTES (current_function_decl);
3877
3878 a = lookup_attribute ("naked", attr);
3879 if (a != NULL_TREE)
3880 type |= ARM_FT_NAKED;
3881
3882 a = lookup_attribute ("isr", attr);
3883 if (a == NULL_TREE)
3884 a = lookup_attribute ("interrupt", attr);
3885
3886 if (a == NULL_TREE)
3887 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3888 else
3889 type |= arm_isr_value (TREE_VALUE (a));
3890
3891 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3892 type |= ARM_FT_CMSE_ENTRY;
3893
3894 return type;
3895 }
3896
3897 /* Returns the type of the current function. */
3898
3899 unsigned long
3900 arm_current_func_type (void)
3901 {
3902 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3903 cfun->machine->func_type = arm_compute_func_type ();
3904
3905 return cfun->machine->func_type;
3906 }
3907
3908 bool
3909 arm_allocate_stack_slots_for_args (void)
3910 {
3911 /* Naked functions should not allocate stack slots for arguments. */
3912 return !IS_NAKED (arm_current_func_type ());
3913 }
3914
3915 static bool
3916 arm_warn_func_return (tree decl)
3917 {
3918 /* Naked functions are implemented entirely in assembly, including the
3919 return sequence, so suppress warnings about this. */
3920 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3921 }
3922
3923 \f
3924 /* Output assembler code for a block containing the constant parts
3925 of a trampoline, leaving space for the variable parts.
3926
3927 On the ARM, (if r8 is the static chain regnum, and remembering that
3928 referencing pc adds an offset of 8) the trampoline looks like:
3929 ldr r8, [pc, #0]
3930 ldr pc, [pc]
3931 .word static chain value
3932 .word function's address
3933 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3934
3935 static void
3936 arm_asm_trampoline_template (FILE *f)
3937 {
3938 fprintf (f, "\t.syntax unified\n");
3939
3940 if (TARGET_ARM)
3941 {
3942 fprintf (f, "\t.arm\n");
3943 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3944 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3945 }
3946 else if (TARGET_THUMB2)
3947 {
3948 fprintf (f, "\t.thumb\n");
3949 /* The Thumb-2 trampoline is similar to the arm implementation.
3950 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3951 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3952 STATIC_CHAIN_REGNUM, PC_REGNUM);
3953 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3954 }
3955 else
3956 {
3957 ASM_OUTPUT_ALIGN (f, 2);
3958 fprintf (f, "\t.code\t16\n");
3959 fprintf (f, ".Ltrampoline_start:\n");
3960 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3961 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3962 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3963 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3964 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3965 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3966 }
3967 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3968 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3969 }
3970
3971 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3972
3973 static void
3974 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3975 {
3976 rtx fnaddr, mem, a_tramp;
3977
3978 emit_block_move (m_tramp, assemble_trampoline_template (),
3979 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3980
3981 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3982 emit_move_insn (mem, chain_value);
3983
3984 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3985 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3986 emit_move_insn (mem, fnaddr);
3987
3988 a_tramp = XEXP (m_tramp, 0);
3989 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3990 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3991 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3992 }
3993
3994 /* Thumb trampolines should be entered in thumb mode, so set
3995 the bottom bit of the address. */
3996
3997 static rtx
3998 arm_trampoline_adjust_address (rtx addr)
3999 {
4000 if (TARGET_THUMB)
4001 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
4002 NULL, 0, OPTAB_LIB_WIDEN);
4003 return addr;
4004 }
4005 \f
4006 /* Return 1 if it is possible to return using a single instruction.
4007 If SIBLING is non-null, this is a test for a return before a sibling
4008 call. SIBLING is the call insn, so we can examine its register usage. */
4009
4010 int
4011 use_return_insn (int iscond, rtx sibling)
4012 {
4013 int regno;
4014 unsigned int func_type;
4015 unsigned long saved_int_regs;
4016 unsigned HOST_WIDE_INT stack_adjust;
4017 arm_stack_offsets *offsets;
4018
4019 /* Never use a return instruction before reload has run. */
4020 if (!reload_completed)
4021 return 0;
4022
4023 func_type = arm_current_func_type ();
4024
4025 /* Naked, volatile and stack alignment functions need special
4026 consideration. */
4027 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4028 return 0;
4029
4030 /* So do interrupt functions that use the frame pointer and Thumb
4031 interrupt functions. */
4032 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4033 return 0;
4034
4035 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4036 && !optimize_function_for_size_p (cfun))
4037 return 0;
4038
4039 offsets = arm_get_frame_offsets ();
4040 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4041
4042 /* As do variadic functions. */
4043 if (crtl->args.pretend_args_size
4044 || cfun->machine->uses_anonymous_args
4045 /* Or if the function calls __builtin_eh_return () */
4046 || crtl->calls_eh_return
4047 /* Or if the function calls alloca */
4048 || cfun->calls_alloca
4049 /* Or if there is a stack adjustment. However, if the stack pointer
4050 is saved on the stack, we can use a pre-incrementing stack load. */
4051 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4052 && stack_adjust == 4))
4053 /* Or if the static chain register was saved above the frame, under the
4054 assumption that the stack pointer isn't saved on the stack. */
4055 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4056 && arm_compute_static_chain_stack_bytes() != 0))
4057 return 0;
4058
4059 saved_int_regs = offsets->saved_regs_mask;
4060
4061 /* Unfortunately, the insn
4062
4063 ldmib sp, {..., sp, ...}
4064
4065 triggers a bug on most SA-110 based devices, such that the stack
4066 pointer won't be correctly restored if the instruction takes a
4067 page fault. We work around this problem by popping r3 along with
4068 the other registers, since that is never slower than executing
4069 another instruction.
4070
4071 We test for !arm_arch5t here, because code for any architecture
4072 less than this could potentially be run on one of the buggy
4073 chips. */
4074 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4075 {
4076 /* Validate that r3 is a call-clobbered register (always true in
4077 the default abi) ... */
4078 if (!call_used_regs[3])
4079 return 0;
4080
4081 /* ... that it isn't being used for a return value ... */
4082 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4083 return 0;
4084
4085 /* ... or for a tail-call argument ... */
4086 if (sibling)
4087 {
4088 gcc_assert (CALL_P (sibling));
4089
4090 if (find_regno_fusage (sibling, USE, 3))
4091 return 0;
4092 }
4093
4094 /* ... and that there are no call-saved registers in r0-r2
4095 (always true in the default ABI). */
4096 if (saved_int_regs & 0x7)
4097 return 0;
4098 }
4099
4100 /* Can't be done if interworking with Thumb, and any registers have been
4101 stacked. */
4102 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4103 return 0;
4104
4105 /* On StrongARM, conditional returns are expensive if they aren't
4106 taken and multiple registers have been stacked. */
4107 if (iscond && arm_tune_strongarm)
4108 {
4109 /* Conditional return when just the LR is stored is a simple
4110 conditional-load instruction, that's not expensive. */
4111 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4112 return 0;
4113
4114 if (flag_pic
4115 && arm_pic_register != INVALID_REGNUM
4116 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4117 return 0;
4118 }
4119
4120 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4121 several instructions if anything needs to be popped. */
4122 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4123 return 0;
4124
4125 /* If there are saved registers but the LR isn't saved, then we need
4126 two instructions for the return. */
4127 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4128 return 0;
4129
4130 /* Can't be done if any of the VFP regs are pushed,
4131 since this also requires an insn. */
4132 if (TARGET_HARD_FLOAT)
4133 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4134 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4135 return 0;
4136
4137 if (TARGET_REALLY_IWMMXT)
4138 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4139 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4140 return 0;
4141
4142 return 1;
4143 }
4144
4145 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4146 shrink-wrapping if possible. This is the case if we need to emit a
4147 prologue, which we can test by looking at the offsets. */
4148 bool
4149 use_simple_return_p (void)
4150 {
4151 arm_stack_offsets *offsets;
4152
4153 /* Note this function can be called before or after reload. */
4154 if (!reload_completed)
4155 arm_compute_frame_layout ();
4156
4157 offsets = arm_get_frame_offsets ();
4158 return offsets->outgoing_args != 0;
4159 }
4160
4161 /* Return TRUE if int I is a valid immediate ARM constant. */
4162
4163 int
4164 const_ok_for_arm (HOST_WIDE_INT i)
4165 {
4166 int lowbit;
4167
4168 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4169 be all zero, or all one. */
4170 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4171 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4172 != ((~(unsigned HOST_WIDE_INT) 0)
4173 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4174 return FALSE;
4175
4176 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4177
4178 /* Fast return for 0 and small values. We must do this for zero, since
4179 the code below can't handle that one case. */
4180 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4181 return TRUE;
4182
4183 /* Get the number of trailing zeros. */
4184 lowbit = ffs((int) i) - 1;
4185
4186 /* Only even shifts are allowed in ARM mode so round down to the
4187 nearest even number. */
4188 if (TARGET_ARM)
4189 lowbit &= ~1;
4190
4191 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4192 return TRUE;
4193
4194 if (TARGET_ARM)
4195 {
4196 /* Allow rotated constants in ARM mode. */
4197 if (lowbit <= 4
4198 && ((i & ~0xc000003f) == 0
4199 || (i & ~0xf000000f) == 0
4200 || (i & ~0xfc000003) == 0))
4201 return TRUE;
4202 }
4203 else if (TARGET_THUMB2)
4204 {
4205 HOST_WIDE_INT v;
4206
4207 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4208 v = i & 0xff;
4209 v |= v << 16;
4210 if (i == v || i == (v | (v << 8)))
4211 return TRUE;
4212
4213 /* Allow repeated pattern 0xXY00XY00. */
4214 v = i & 0xff00;
4215 v |= v << 16;
4216 if (i == v)
4217 return TRUE;
4218 }
4219 else if (TARGET_HAVE_MOVT)
4220 {
4221 /* Thumb-1 Targets with MOVT. */
4222 if (i > 0xffff)
4223 return FALSE;
4224 else
4225 return TRUE;
4226 }
4227
4228 return FALSE;
4229 }
4230
4231 /* Return true if I is a valid constant for the operation CODE. */
4232 int
4233 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4234 {
4235 if (const_ok_for_arm (i))
4236 return 1;
4237
4238 switch (code)
4239 {
4240 case SET:
4241 /* See if we can use movw. */
4242 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4243 return 1;
4244 else
4245 /* Otherwise, try mvn. */
4246 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4247
4248 case PLUS:
4249 /* See if we can use addw or subw. */
4250 if (TARGET_THUMB2
4251 && ((i & 0xfffff000) == 0
4252 || ((-i) & 0xfffff000) == 0))
4253 return 1;
4254 /* Fall through. */
4255 case COMPARE:
4256 case EQ:
4257 case NE:
4258 case GT:
4259 case LE:
4260 case LT:
4261 case GE:
4262 case GEU:
4263 case LTU:
4264 case GTU:
4265 case LEU:
4266 case UNORDERED:
4267 case ORDERED:
4268 case UNEQ:
4269 case UNGE:
4270 case UNLT:
4271 case UNGT:
4272 case UNLE:
4273 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4274
4275 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4276 case XOR:
4277 return 0;
4278
4279 case IOR:
4280 if (TARGET_THUMB2)
4281 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4282 return 0;
4283
4284 case AND:
4285 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4286
4287 default:
4288 gcc_unreachable ();
4289 }
4290 }
4291
4292 /* Return true if I is a valid di mode constant for the operation CODE. */
4293 int
4294 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4295 {
4296 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4297 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4298 rtx hi = GEN_INT (hi_val);
4299 rtx lo = GEN_INT (lo_val);
4300
4301 if (TARGET_THUMB1)
4302 return 0;
4303
4304 switch (code)
4305 {
4306 case AND:
4307 case IOR:
4308 case XOR:
4309 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4310 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4311 case PLUS:
4312 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4313
4314 default:
4315 return 0;
4316 }
4317 }
4318
4319 /* Emit a sequence of insns to handle a large constant.
4320 CODE is the code of the operation required, it can be any of SET, PLUS,
4321 IOR, AND, XOR, MINUS;
4322 MODE is the mode in which the operation is being performed;
4323 VAL is the integer to operate on;
4324 SOURCE is the other operand (a register, or a null-pointer for SET);
4325 SUBTARGETS means it is safe to create scratch registers if that will
4326 either produce a simpler sequence, or we will want to cse the values.
4327 Return value is the number of insns emitted. */
4328
4329 /* ??? Tweak this for thumb2. */
4330 int
4331 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4332 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4333 {
4334 rtx cond;
4335
4336 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4337 cond = COND_EXEC_TEST (PATTERN (insn));
4338 else
4339 cond = NULL_RTX;
4340
4341 if (subtargets || code == SET
4342 || (REG_P (target) && REG_P (source)
4343 && REGNO (target) != REGNO (source)))
4344 {
4345 /* After arm_reorg has been called, we can't fix up expensive
4346 constants by pushing them into memory so we must synthesize
4347 them in-line, regardless of the cost. This is only likely to
4348 be more costly on chips that have load delay slots and we are
4349 compiling without running the scheduler (so no splitting
4350 occurred before the final instruction emission).
4351
4352 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4353 */
4354 if (!cfun->machine->after_arm_reorg
4355 && !cond
4356 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4357 1, 0)
4358 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4359 + (code != SET))))
4360 {
4361 if (code == SET)
4362 {
4363 /* Currently SET is the only monadic value for CODE, all
4364 the rest are diadic. */
4365 if (TARGET_USE_MOVT)
4366 arm_emit_movpair (target, GEN_INT (val));
4367 else
4368 emit_set_insn (target, GEN_INT (val));
4369
4370 return 1;
4371 }
4372 else
4373 {
4374 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4375
4376 if (TARGET_USE_MOVT)
4377 arm_emit_movpair (temp, GEN_INT (val));
4378 else
4379 emit_set_insn (temp, GEN_INT (val));
4380
4381 /* For MINUS, the value is subtracted from, since we never
4382 have subtraction of a constant. */
4383 if (code == MINUS)
4384 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4385 else
4386 emit_set_insn (target,
4387 gen_rtx_fmt_ee (code, mode, source, temp));
4388 return 2;
4389 }
4390 }
4391 }
4392
4393 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4394 1);
4395 }
4396
4397 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4398 ARM/THUMB2 immediates, and add up to VAL.
4399 Thr function return value gives the number of insns required. */
4400 static int
4401 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4402 struct four_ints *return_sequence)
4403 {
4404 int best_consecutive_zeros = 0;
4405 int i;
4406 int best_start = 0;
4407 int insns1, insns2;
4408 struct four_ints tmp_sequence;
4409
4410 /* If we aren't targeting ARM, the best place to start is always at
4411 the bottom, otherwise look more closely. */
4412 if (TARGET_ARM)
4413 {
4414 for (i = 0; i < 32; i += 2)
4415 {
4416 int consecutive_zeros = 0;
4417
4418 if (!(val & (3 << i)))
4419 {
4420 while ((i < 32) && !(val & (3 << i)))
4421 {
4422 consecutive_zeros += 2;
4423 i += 2;
4424 }
4425 if (consecutive_zeros > best_consecutive_zeros)
4426 {
4427 best_consecutive_zeros = consecutive_zeros;
4428 best_start = i - consecutive_zeros;
4429 }
4430 i -= 2;
4431 }
4432 }
4433 }
4434
4435 /* So long as it won't require any more insns to do so, it's
4436 desirable to emit a small constant (in bits 0...9) in the last
4437 insn. This way there is more chance that it can be combined with
4438 a later addressing insn to form a pre-indexed load or store
4439 operation. Consider:
4440
4441 *((volatile int *)0xe0000100) = 1;
4442 *((volatile int *)0xe0000110) = 2;
4443
4444 We want this to wind up as:
4445
4446 mov rA, #0xe0000000
4447 mov rB, #1
4448 str rB, [rA, #0x100]
4449 mov rB, #2
4450 str rB, [rA, #0x110]
4451
4452 rather than having to synthesize both large constants from scratch.
4453
4454 Therefore, we calculate how many insns would be required to emit
4455 the constant starting from `best_start', and also starting from
4456 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4457 yield a shorter sequence, we may as well use zero. */
4458 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4459 if (best_start != 0
4460 && ((HOST_WIDE_INT_1U << best_start) < val))
4461 {
4462 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4463 if (insns2 <= insns1)
4464 {
4465 *return_sequence = tmp_sequence;
4466 insns1 = insns2;
4467 }
4468 }
4469
4470 return insns1;
4471 }
4472
4473 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4474 static int
4475 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4476 struct four_ints *return_sequence, int i)
4477 {
4478 int remainder = val & 0xffffffff;
4479 int insns = 0;
4480
4481 /* Try and find a way of doing the job in either two or three
4482 instructions.
4483
4484 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4485 location. We start at position I. This may be the MSB, or
4486 optimial_immediate_sequence may have positioned it at the largest block
4487 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4488 wrapping around to the top of the word when we drop off the bottom.
4489 In the worst case this code should produce no more than four insns.
4490
4491 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4492 constants, shifted to any arbitrary location. We should always start
4493 at the MSB. */
4494 do
4495 {
4496 int end;
4497 unsigned int b1, b2, b3, b4;
4498 unsigned HOST_WIDE_INT result;
4499 int loc;
4500
4501 gcc_assert (insns < 4);
4502
4503 if (i <= 0)
4504 i += 32;
4505
4506 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4507 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4508 {
4509 loc = i;
4510 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4511 /* We can use addw/subw for the last 12 bits. */
4512 result = remainder;
4513 else
4514 {
4515 /* Use an 8-bit shifted/rotated immediate. */
4516 end = i - 8;
4517 if (end < 0)
4518 end += 32;
4519 result = remainder & ((0x0ff << end)
4520 | ((i < end) ? (0xff >> (32 - end))
4521 : 0));
4522 i -= 8;
4523 }
4524 }
4525 else
4526 {
4527 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4528 arbitrary shifts. */
4529 i -= TARGET_ARM ? 2 : 1;
4530 continue;
4531 }
4532
4533 /* Next, see if we can do a better job with a thumb2 replicated
4534 constant.
4535
4536 We do it this way around to catch the cases like 0x01F001E0 where
4537 two 8-bit immediates would work, but a replicated constant would
4538 make it worse.
4539
4540 TODO: 16-bit constants that don't clear all the bits, but still win.
4541 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4542 if (TARGET_THUMB2)
4543 {
4544 b1 = (remainder & 0xff000000) >> 24;
4545 b2 = (remainder & 0x00ff0000) >> 16;
4546 b3 = (remainder & 0x0000ff00) >> 8;
4547 b4 = remainder & 0xff;
4548
4549 if (loc > 24)
4550 {
4551 /* The 8-bit immediate already found clears b1 (and maybe b2),
4552 but must leave b3 and b4 alone. */
4553
4554 /* First try to find a 32-bit replicated constant that clears
4555 almost everything. We can assume that we can't do it in one,
4556 or else we wouldn't be here. */
4557 unsigned int tmp = b1 & b2 & b3 & b4;
4558 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4559 + (tmp << 24);
4560 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4561 + (tmp == b3) + (tmp == b4);
4562 if (tmp
4563 && (matching_bytes >= 3
4564 || (matching_bytes == 2
4565 && const_ok_for_op (remainder & ~tmp2, code))))
4566 {
4567 /* At least 3 of the bytes match, and the fourth has at
4568 least as many bits set, or two of the bytes match
4569 and it will only require one more insn to finish. */
4570 result = tmp2;
4571 i = tmp != b1 ? 32
4572 : tmp != b2 ? 24
4573 : tmp != b3 ? 16
4574 : 8;
4575 }
4576
4577 /* Second, try to find a 16-bit replicated constant that can
4578 leave three of the bytes clear. If b2 or b4 is already
4579 zero, then we can. If the 8-bit from above would not
4580 clear b2 anyway, then we still win. */
4581 else if (b1 == b3 && (!b2 || !b4
4582 || (remainder & 0x00ff0000 & ~result)))
4583 {
4584 result = remainder & 0xff00ff00;
4585 i = 24;
4586 }
4587 }
4588 else if (loc > 16)
4589 {
4590 /* The 8-bit immediate already found clears b2 (and maybe b3)
4591 and we don't get here unless b1 is alredy clear, but it will
4592 leave b4 unchanged. */
4593
4594 /* If we can clear b2 and b4 at once, then we win, since the
4595 8-bits couldn't possibly reach that far. */
4596 if (b2 == b4)
4597 {
4598 result = remainder & 0x00ff00ff;
4599 i = 16;
4600 }
4601 }
4602 }
4603
4604 return_sequence->i[insns++] = result;
4605 remainder &= ~result;
4606
4607 if (code == SET || code == MINUS)
4608 code = PLUS;
4609 }
4610 while (remainder);
4611
4612 return insns;
4613 }
4614
4615 /* Emit an instruction with the indicated PATTERN. If COND is
4616 non-NULL, conditionalize the execution of the instruction on COND
4617 being true. */
4618
4619 static void
4620 emit_constant_insn (rtx cond, rtx pattern)
4621 {
4622 if (cond)
4623 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4624 emit_insn (pattern);
4625 }
4626
4627 /* As above, but extra parameter GENERATE which, if clear, suppresses
4628 RTL generation. */
4629
4630 static int
4631 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4632 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4633 int subtargets, int generate)
4634 {
4635 int can_invert = 0;
4636 int can_negate = 0;
4637 int final_invert = 0;
4638 int i;
4639 int set_sign_bit_copies = 0;
4640 int clear_sign_bit_copies = 0;
4641 int clear_zero_bit_copies = 0;
4642 int set_zero_bit_copies = 0;
4643 int insns = 0, neg_insns, inv_insns;
4644 unsigned HOST_WIDE_INT temp1, temp2;
4645 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4646 struct four_ints *immediates;
4647 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4648
4649 /* Find out which operations are safe for a given CODE. Also do a quick
4650 check for degenerate cases; these can occur when DImode operations
4651 are split. */
4652 switch (code)
4653 {
4654 case SET:
4655 can_invert = 1;
4656 break;
4657
4658 case PLUS:
4659 can_negate = 1;
4660 break;
4661
4662 case IOR:
4663 if (remainder == 0xffffffff)
4664 {
4665 if (generate)
4666 emit_constant_insn (cond,
4667 gen_rtx_SET (target,
4668 GEN_INT (ARM_SIGN_EXTEND (val))));
4669 return 1;
4670 }
4671
4672 if (remainder == 0)
4673 {
4674 if (reload_completed && rtx_equal_p (target, source))
4675 return 0;
4676
4677 if (generate)
4678 emit_constant_insn (cond, gen_rtx_SET (target, source));
4679 return 1;
4680 }
4681 break;
4682
4683 case AND:
4684 if (remainder == 0)
4685 {
4686 if (generate)
4687 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4688 return 1;
4689 }
4690 if (remainder == 0xffffffff)
4691 {
4692 if (reload_completed && rtx_equal_p (target, source))
4693 return 0;
4694 if (generate)
4695 emit_constant_insn (cond, gen_rtx_SET (target, source));
4696 return 1;
4697 }
4698 can_invert = 1;
4699 break;
4700
4701 case XOR:
4702 if (remainder == 0)
4703 {
4704 if (reload_completed && rtx_equal_p (target, source))
4705 return 0;
4706 if (generate)
4707 emit_constant_insn (cond, gen_rtx_SET (target, source));
4708 return 1;
4709 }
4710
4711 if (remainder == 0xffffffff)
4712 {
4713 if (generate)
4714 emit_constant_insn (cond,
4715 gen_rtx_SET (target,
4716 gen_rtx_NOT (mode, source)));
4717 return 1;
4718 }
4719 final_invert = 1;
4720 break;
4721
4722 case MINUS:
4723 /* We treat MINUS as (val - source), since (source - val) is always
4724 passed as (source + (-val)). */
4725 if (remainder == 0)
4726 {
4727 if (generate)
4728 emit_constant_insn (cond,
4729 gen_rtx_SET (target,
4730 gen_rtx_NEG (mode, source)));
4731 return 1;
4732 }
4733 if (const_ok_for_arm (val))
4734 {
4735 if (generate)
4736 emit_constant_insn (cond,
4737 gen_rtx_SET (target,
4738 gen_rtx_MINUS (mode, GEN_INT (val),
4739 source)));
4740 return 1;
4741 }
4742
4743 break;
4744
4745 default:
4746 gcc_unreachable ();
4747 }
4748
4749 /* If we can do it in one insn get out quickly. */
4750 if (const_ok_for_op (val, code))
4751 {
4752 if (generate)
4753 emit_constant_insn (cond,
4754 gen_rtx_SET (target,
4755 (source
4756 ? gen_rtx_fmt_ee (code, mode, source,
4757 GEN_INT (val))
4758 : GEN_INT (val))));
4759 return 1;
4760 }
4761
4762 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4763 insn. */
4764 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4765 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4766 {
4767 if (generate)
4768 {
4769 if (mode == SImode && i == 16)
4770 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4771 smaller insn. */
4772 emit_constant_insn (cond,
4773 gen_zero_extendhisi2
4774 (target, gen_lowpart (HImode, source)));
4775 else
4776 /* Extz only supports SImode, but we can coerce the operands
4777 into that mode. */
4778 emit_constant_insn (cond,
4779 gen_extzv_t2 (gen_lowpart (SImode, target),
4780 gen_lowpart (SImode, source),
4781 GEN_INT (i), const0_rtx));
4782 }
4783
4784 return 1;
4785 }
4786
4787 /* Calculate a few attributes that may be useful for specific
4788 optimizations. */
4789 /* Count number of leading zeros. */
4790 for (i = 31; i >= 0; i--)
4791 {
4792 if ((remainder & (1 << i)) == 0)
4793 clear_sign_bit_copies++;
4794 else
4795 break;
4796 }
4797
4798 /* Count number of leading 1's. */
4799 for (i = 31; i >= 0; i--)
4800 {
4801 if ((remainder & (1 << i)) != 0)
4802 set_sign_bit_copies++;
4803 else
4804 break;
4805 }
4806
4807 /* Count number of trailing zero's. */
4808 for (i = 0; i <= 31; i++)
4809 {
4810 if ((remainder & (1 << i)) == 0)
4811 clear_zero_bit_copies++;
4812 else
4813 break;
4814 }
4815
4816 /* Count number of trailing 1's. */
4817 for (i = 0; i <= 31; i++)
4818 {
4819 if ((remainder & (1 << i)) != 0)
4820 set_zero_bit_copies++;
4821 else
4822 break;
4823 }
4824
4825 switch (code)
4826 {
4827 case SET:
4828 /* See if we can do this by sign_extending a constant that is known
4829 to be negative. This is a good, way of doing it, since the shift
4830 may well merge into a subsequent insn. */
4831 if (set_sign_bit_copies > 1)
4832 {
4833 if (const_ok_for_arm
4834 (temp1 = ARM_SIGN_EXTEND (remainder
4835 << (set_sign_bit_copies - 1))))
4836 {
4837 if (generate)
4838 {
4839 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4840 emit_constant_insn (cond,
4841 gen_rtx_SET (new_src, GEN_INT (temp1)));
4842 emit_constant_insn (cond,
4843 gen_ashrsi3 (target, new_src,
4844 GEN_INT (set_sign_bit_copies - 1)));
4845 }
4846 return 2;
4847 }
4848 /* For an inverted constant, we will need to set the low bits,
4849 these will be shifted out of harm's way. */
4850 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4851 if (const_ok_for_arm (~temp1))
4852 {
4853 if (generate)
4854 {
4855 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4856 emit_constant_insn (cond,
4857 gen_rtx_SET (new_src, GEN_INT (temp1)));
4858 emit_constant_insn (cond,
4859 gen_ashrsi3 (target, new_src,
4860 GEN_INT (set_sign_bit_copies - 1)));
4861 }
4862 return 2;
4863 }
4864 }
4865
4866 /* See if we can calculate the value as the difference between two
4867 valid immediates. */
4868 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4869 {
4870 int topshift = clear_sign_bit_copies & ~1;
4871
4872 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4873 & (0xff000000 >> topshift));
4874
4875 /* If temp1 is zero, then that means the 9 most significant
4876 bits of remainder were 1 and we've caused it to overflow.
4877 When topshift is 0 we don't need to do anything since we
4878 can borrow from 'bit 32'. */
4879 if (temp1 == 0 && topshift != 0)
4880 temp1 = 0x80000000 >> (topshift - 1);
4881
4882 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4883
4884 if (const_ok_for_arm (temp2))
4885 {
4886 if (generate)
4887 {
4888 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4889 emit_constant_insn (cond,
4890 gen_rtx_SET (new_src, GEN_INT (temp1)));
4891 emit_constant_insn (cond,
4892 gen_addsi3 (target, new_src,
4893 GEN_INT (-temp2)));
4894 }
4895
4896 return 2;
4897 }
4898 }
4899
4900 /* See if we can generate this by setting the bottom (or the top)
4901 16 bits, and then shifting these into the other half of the
4902 word. We only look for the simplest cases, to do more would cost
4903 too much. Be careful, however, not to generate this when the
4904 alternative would take fewer insns. */
4905 if (val & 0xffff0000)
4906 {
4907 temp1 = remainder & 0xffff0000;
4908 temp2 = remainder & 0x0000ffff;
4909
4910 /* Overlaps outside this range are best done using other methods. */
4911 for (i = 9; i < 24; i++)
4912 {
4913 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4914 && !const_ok_for_arm (temp2))
4915 {
4916 rtx new_src = (subtargets
4917 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4918 : target);
4919 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4920 source, subtargets, generate);
4921 source = new_src;
4922 if (generate)
4923 emit_constant_insn
4924 (cond,
4925 gen_rtx_SET
4926 (target,
4927 gen_rtx_IOR (mode,
4928 gen_rtx_ASHIFT (mode, source,
4929 GEN_INT (i)),
4930 source)));
4931 return insns + 1;
4932 }
4933 }
4934
4935 /* Don't duplicate cases already considered. */
4936 for (i = 17; i < 24; i++)
4937 {
4938 if (((temp1 | (temp1 >> i)) == remainder)
4939 && !const_ok_for_arm (temp1))
4940 {
4941 rtx new_src = (subtargets
4942 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4943 : target);
4944 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4945 source, subtargets, generate);
4946 source = new_src;
4947 if (generate)
4948 emit_constant_insn
4949 (cond,
4950 gen_rtx_SET (target,
4951 gen_rtx_IOR
4952 (mode,
4953 gen_rtx_LSHIFTRT (mode, source,
4954 GEN_INT (i)),
4955 source)));
4956 return insns + 1;
4957 }
4958 }
4959 }
4960 break;
4961
4962 case IOR:
4963 case XOR:
4964 /* If we have IOR or XOR, and the constant can be loaded in a
4965 single instruction, and we can find a temporary to put it in,
4966 then this can be done in two instructions instead of 3-4. */
4967 if (subtargets
4968 /* TARGET can't be NULL if SUBTARGETS is 0 */
4969 || (reload_completed && !reg_mentioned_p (target, source)))
4970 {
4971 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4972 {
4973 if (generate)
4974 {
4975 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4976
4977 emit_constant_insn (cond,
4978 gen_rtx_SET (sub, GEN_INT (val)));
4979 emit_constant_insn (cond,
4980 gen_rtx_SET (target,
4981 gen_rtx_fmt_ee (code, mode,
4982 source, sub)));
4983 }
4984 return 2;
4985 }
4986 }
4987
4988 if (code == XOR)
4989 break;
4990
4991 /* Convert.
4992 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4993 and the remainder 0s for e.g. 0xfff00000)
4994 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4995
4996 This can be done in 2 instructions by using shifts with mov or mvn.
4997 e.g. for
4998 x = x | 0xfff00000;
4999 we generate.
5000 mvn r0, r0, asl #12
5001 mvn r0, r0, lsr #12 */
5002 if (set_sign_bit_copies > 8
5003 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
5004 {
5005 if (generate)
5006 {
5007 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5008 rtx shift = GEN_INT (set_sign_bit_copies);
5009
5010 emit_constant_insn
5011 (cond,
5012 gen_rtx_SET (sub,
5013 gen_rtx_NOT (mode,
5014 gen_rtx_ASHIFT (mode,
5015 source,
5016 shift))));
5017 emit_constant_insn
5018 (cond,
5019 gen_rtx_SET (target,
5020 gen_rtx_NOT (mode,
5021 gen_rtx_LSHIFTRT (mode, sub,
5022 shift))));
5023 }
5024 return 2;
5025 }
5026
5027 /* Convert
5028 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5029 to
5030 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5031
5032 For eg. r0 = r0 | 0xfff
5033 mvn r0, r0, lsr #12
5034 mvn r0, r0, asl #12
5035
5036 */
5037 if (set_zero_bit_copies > 8
5038 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5039 {
5040 if (generate)
5041 {
5042 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5043 rtx shift = GEN_INT (set_zero_bit_copies);
5044
5045 emit_constant_insn
5046 (cond,
5047 gen_rtx_SET (sub,
5048 gen_rtx_NOT (mode,
5049 gen_rtx_LSHIFTRT (mode,
5050 source,
5051 shift))));
5052 emit_constant_insn
5053 (cond,
5054 gen_rtx_SET (target,
5055 gen_rtx_NOT (mode,
5056 gen_rtx_ASHIFT (mode, sub,
5057 shift))));
5058 }
5059 return 2;
5060 }
5061
5062 /* This will never be reached for Thumb2 because orn is a valid
5063 instruction. This is for Thumb1 and the ARM 32 bit cases.
5064
5065 x = y | constant (such that ~constant is a valid constant)
5066 Transform this to
5067 x = ~(~y & ~constant).
5068 */
5069 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5070 {
5071 if (generate)
5072 {
5073 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5074 emit_constant_insn (cond,
5075 gen_rtx_SET (sub,
5076 gen_rtx_NOT (mode, source)));
5077 source = sub;
5078 if (subtargets)
5079 sub = gen_reg_rtx (mode);
5080 emit_constant_insn (cond,
5081 gen_rtx_SET (sub,
5082 gen_rtx_AND (mode, source,
5083 GEN_INT (temp1))));
5084 emit_constant_insn (cond,
5085 gen_rtx_SET (target,
5086 gen_rtx_NOT (mode, sub)));
5087 }
5088 return 3;
5089 }
5090 break;
5091
5092 case AND:
5093 /* See if two shifts will do 2 or more insn's worth of work. */
5094 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5095 {
5096 HOST_WIDE_INT shift_mask = ((0xffffffff
5097 << (32 - clear_sign_bit_copies))
5098 & 0xffffffff);
5099
5100 if ((remainder | shift_mask) != 0xffffffff)
5101 {
5102 HOST_WIDE_INT new_val
5103 = ARM_SIGN_EXTEND (remainder | shift_mask);
5104
5105 if (generate)
5106 {
5107 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5108 insns = arm_gen_constant (AND, SImode, cond, new_val,
5109 new_src, source, subtargets, 1);
5110 source = new_src;
5111 }
5112 else
5113 {
5114 rtx targ = subtargets ? NULL_RTX : target;
5115 insns = arm_gen_constant (AND, mode, cond, new_val,
5116 targ, source, subtargets, 0);
5117 }
5118 }
5119
5120 if (generate)
5121 {
5122 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5123 rtx shift = GEN_INT (clear_sign_bit_copies);
5124
5125 emit_insn (gen_ashlsi3 (new_src, source, shift));
5126 emit_insn (gen_lshrsi3 (target, new_src, shift));
5127 }
5128
5129 return insns + 2;
5130 }
5131
5132 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5133 {
5134 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5135
5136 if ((remainder | shift_mask) != 0xffffffff)
5137 {
5138 HOST_WIDE_INT new_val
5139 = ARM_SIGN_EXTEND (remainder | shift_mask);
5140 if (generate)
5141 {
5142 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5143
5144 insns = arm_gen_constant (AND, mode, cond, new_val,
5145 new_src, source, subtargets, 1);
5146 source = new_src;
5147 }
5148 else
5149 {
5150 rtx targ = subtargets ? NULL_RTX : target;
5151
5152 insns = arm_gen_constant (AND, mode, cond, new_val,
5153 targ, source, subtargets, 0);
5154 }
5155 }
5156
5157 if (generate)
5158 {
5159 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5160 rtx shift = GEN_INT (clear_zero_bit_copies);
5161
5162 emit_insn (gen_lshrsi3 (new_src, source, shift));
5163 emit_insn (gen_ashlsi3 (target, new_src, shift));
5164 }
5165
5166 return insns + 2;
5167 }
5168
5169 break;
5170
5171 default:
5172 break;
5173 }
5174
5175 /* Calculate what the instruction sequences would be if we generated it
5176 normally, negated, or inverted. */
5177 if (code == AND)
5178 /* AND cannot be split into multiple insns, so invert and use BIC. */
5179 insns = 99;
5180 else
5181 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5182
5183 if (can_negate)
5184 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5185 &neg_immediates);
5186 else
5187 neg_insns = 99;
5188
5189 if (can_invert || final_invert)
5190 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5191 &inv_immediates);
5192 else
5193 inv_insns = 99;
5194
5195 immediates = &pos_immediates;
5196
5197 /* Is the negated immediate sequence more efficient? */
5198 if (neg_insns < insns && neg_insns <= inv_insns)
5199 {
5200 insns = neg_insns;
5201 immediates = &neg_immediates;
5202 }
5203 else
5204 can_negate = 0;
5205
5206 /* Is the inverted immediate sequence more efficient?
5207 We must allow for an extra NOT instruction for XOR operations, although
5208 there is some chance that the final 'mvn' will get optimized later. */
5209 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5210 {
5211 insns = inv_insns;
5212 immediates = &inv_immediates;
5213 }
5214 else
5215 {
5216 can_invert = 0;
5217 final_invert = 0;
5218 }
5219
5220 /* Now output the chosen sequence as instructions. */
5221 if (generate)
5222 {
5223 for (i = 0; i < insns; i++)
5224 {
5225 rtx new_src, temp1_rtx;
5226
5227 temp1 = immediates->i[i];
5228
5229 if (code == SET || code == MINUS)
5230 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5231 else if ((final_invert || i < (insns - 1)) && subtargets)
5232 new_src = gen_reg_rtx (mode);
5233 else
5234 new_src = target;
5235
5236 if (can_invert)
5237 temp1 = ~temp1;
5238 else if (can_negate)
5239 temp1 = -temp1;
5240
5241 temp1 = trunc_int_for_mode (temp1, mode);
5242 temp1_rtx = GEN_INT (temp1);
5243
5244 if (code == SET)
5245 ;
5246 else if (code == MINUS)
5247 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5248 else
5249 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5250
5251 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5252 source = new_src;
5253
5254 if (code == SET)
5255 {
5256 can_negate = can_invert;
5257 can_invert = 0;
5258 code = PLUS;
5259 }
5260 else if (code == MINUS)
5261 code = PLUS;
5262 }
5263 }
5264
5265 if (final_invert)
5266 {
5267 if (generate)
5268 emit_constant_insn (cond, gen_rtx_SET (target,
5269 gen_rtx_NOT (mode, source)));
5270 insns++;
5271 }
5272
5273 return insns;
5274 }
5275
5276 /* Canonicalize a comparison so that we are more likely to recognize it.
5277 This can be done for a few constant compares, where we can make the
5278 immediate value easier to load. */
5279
5280 static void
5281 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5282 bool op0_preserve_value)
5283 {
5284 machine_mode mode;
5285 unsigned HOST_WIDE_INT i, maxval;
5286
5287 mode = GET_MODE (*op0);
5288 if (mode == VOIDmode)
5289 mode = GET_MODE (*op1);
5290
5291 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5292
5293 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5294 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5295 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5296 for GTU/LEU in Thumb mode. */
5297 if (mode == DImode)
5298 {
5299
5300 if (*code == GT || *code == LE
5301 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5302 {
5303 /* Missing comparison. First try to use an available
5304 comparison. */
5305 if (CONST_INT_P (*op1))
5306 {
5307 i = INTVAL (*op1);
5308 switch (*code)
5309 {
5310 case GT:
5311 case LE:
5312 if (i != maxval
5313 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5314 {
5315 *op1 = GEN_INT (i + 1);
5316 *code = *code == GT ? GE : LT;
5317 return;
5318 }
5319 break;
5320 case GTU:
5321 case LEU:
5322 if (i != ~((unsigned HOST_WIDE_INT) 0)
5323 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5324 {
5325 *op1 = GEN_INT (i + 1);
5326 *code = *code == GTU ? GEU : LTU;
5327 return;
5328 }
5329 break;
5330 default:
5331 gcc_unreachable ();
5332 }
5333 }
5334
5335 /* If that did not work, reverse the condition. */
5336 if (!op0_preserve_value)
5337 {
5338 std::swap (*op0, *op1);
5339 *code = (int)swap_condition ((enum rtx_code)*code);
5340 }
5341 }
5342 return;
5343 }
5344
5345 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5346 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5347 to facilitate possible combining with a cmp into 'ands'. */
5348 if (mode == SImode
5349 && GET_CODE (*op0) == ZERO_EXTEND
5350 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5351 && GET_MODE (XEXP (*op0, 0)) == QImode
5352 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5353 && subreg_lowpart_p (XEXP (*op0, 0))
5354 && *op1 == const0_rtx)
5355 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5356 GEN_INT (255));
5357
5358 /* Comparisons smaller than DImode. Only adjust comparisons against
5359 an out-of-range constant. */
5360 if (!CONST_INT_P (*op1)
5361 || const_ok_for_arm (INTVAL (*op1))
5362 || const_ok_for_arm (- INTVAL (*op1)))
5363 return;
5364
5365 i = INTVAL (*op1);
5366
5367 switch (*code)
5368 {
5369 case EQ:
5370 case NE:
5371 return;
5372
5373 case GT:
5374 case LE:
5375 if (i != maxval
5376 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5377 {
5378 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5379 *code = *code == GT ? GE : LT;
5380 return;
5381 }
5382 break;
5383
5384 case GE:
5385 case LT:
5386 if (i != ~maxval
5387 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5388 {
5389 *op1 = GEN_INT (i - 1);
5390 *code = *code == GE ? GT : LE;
5391 return;
5392 }
5393 break;
5394
5395 case GTU:
5396 case LEU:
5397 if (i != ~((unsigned HOST_WIDE_INT) 0)
5398 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5399 {
5400 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5401 *code = *code == GTU ? GEU : LTU;
5402 return;
5403 }
5404 break;
5405
5406 case GEU:
5407 case LTU:
5408 if (i != 0
5409 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5410 {
5411 *op1 = GEN_INT (i - 1);
5412 *code = *code == GEU ? GTU : LEU;
5413 return;
5414 }
5415 break;
5416
5417 default:
5418 gcc_unreachable ();
5419 }
5420 }
5421
5422
5423 /* Define how to find the value returned by a function. */
5424
5425 static rtx
5426 arm_function_value(const_tree type, const_tree func,
5427 bool outgoing ATTRIBUTE_UNUSED)
5428 {
5429 machine_mode mode;
5430 int unsignedp ATTRIBUTE_UNUSED;
5431 rtx r ATTRIBUTE_UNUSED;
5432
5433 mode = TYPE_MODE (type);
5434
5435 if (TARGET_AAPCS_BASED)
5436 return aapcs_allocate_return_reg (mode, type, func);
5437
5438 /* Promote integer types. */
5439 if (INTEGRAL_TYPE_P (type))
5440 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5441
5442 /* Promotes small structs returned in a register to full-word size
5443 for big-endian AAPCS. */
5444 if (arm_return_in_msb (type))
5445 {
5446 HOST_WIDE_INT size = int_size_in_bytes (type);
5447 if (size % UNITS_PER_WORD != 0)
5448 {
5449 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5450 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5451 }
5452 }
5453
5454 return arm_libcall_value_1 (mode);
5455 }
5456
5457 /* libcall hashtable helpers. */
5458
5459 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5460 {
5461 static inline hashval_t hash (const rtx_def *);
5462 static inline bool equal (const rtx_def *, const rtx_def *);
5463 static inline void remove (rtx_def *);
5464 };
5465
5466 inline bool
5467 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5468 {
5469 return rtx_equal_p (p1, p2);
5470 }
5471
5472 inline hashval_t
5473 libcall_hasher::hash (const rtx_def *p1)
5474 {
5475 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5476 }
5477
5478 typedef hash_table<libcall_hasher> libcall_table_type;
5479
5480 static void
5481 add_libcall (libcall_table_type *htab, rtx libcall)
5482 {
5483 *htab->find_slot (libcall, INSERT) = libcall;
5484 }
5485
5486 static bool
5487 arm_libcall_uses_aapcs_base (const_rtx libcall)
5488 {
5489 static bool init_done = false;
5490 static libcall_table_type *libcall_htab = NULL;
5491
5492 if (!init_done)
5493 {
5494 init_done = true;
5495
5496 libcall_htab = new libcall_table_type (31);
5497 add_libcall (libcall_htab,
5498 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5499 add_libcall (libcall_htab,
5500 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5501 add_libcall (libcall_htab,
5502 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5503 add_libcall (libcall_htab,
5504 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5505
5506 add_libcall (libcall_htab,
5507 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5508 add_libcall (libcall_htab,
5509 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5510 add_libcall (libcall_htab,
5511 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5512 add_libcall (libcall_htab,
5513 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5514
5515 add_libcall (libcall_htab,
5516 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5517 add_libcall (libcall_htab,
5518 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5519 add_libcall (libcall_htab,
5520 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5521 add_libcall (libcall_htab,
5522 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5523 add_libcall (libcall_htab,
5524 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5525 add_libcall (libcall_htab,
5526 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5527 add_libcall (libcall_htab,
5528 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5529 add_libcall (libcall_htab,
5530 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5531
5532 /* Values from double-precision helper functions are returned in core
5533 registers if the selected core only supports single-precision
5534 arithmetic, even if we are using the hard-float ABI. The same is
5535 true for single-precision helpers, but we will never be using the
5536 hard-float ABI on a CPU which doesn't support single-precision
5537 operations in hardware. */
5538 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5539 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5540 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5541 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5542 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5543 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5544 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5545 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5546 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5547 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5548 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5549 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5550 SFmode));
5551 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5552 DFmode));
5553 add_libcall (libcall_htab,
5554 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5555 }
5556
5557 return libcall && libcall_htab->find (libcall) != NULL;
5558 }
5559
5560 static rtx
5561 arm_libcall_value_1 (machine_mode mode)
5562 {
5563 if (TARGET_AAPCS_BASED)
5564 return aapcs_libcall_value (mode);
5565 else if (TARGET_IWMMXT_ABI
5566 && arm_vector_mode_supported_p (mode))
5567 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5568 else
5569 return gen_rtx_REG (mode, ARG_REGISTER (1));
5570 }
5571
5572 /* Define how to find the value returned by a library function
5573 assuming the value has mode MODE. */
5574
5575 static rtx
5576 arm_libcall_value (machine_mode mode, const_rtx libcall)
5577 {
5578 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5579 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5580 {
5581 /* The following libcalls return their result in integer registers,
5582 even though they return a floating point value. */
5583 if (arm_libcall_uses_aapcs_base (libcall))
5584 return gen_rtx_REG (mode, ARG_REGISTER(1));
5585
5586 }
5587
5588 return arm_libcall_value_1 (mode);
5589 }
5590
5591 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5592
5593 static bool
5594 arm_function_value_regno_p (const unsigned int regno)
5595 {
5596 if (regno == ARG_REGISTER (1)
5597 || (TARGET_32BIT
5598 && TARGET_AAPCS_BASED
5599 && TARGET_HARD_FLOAT
5600 && regno == FIRST_VFP_REGNUM)
5601 || (TARGET_IWMMXT_ABI
5602 && regno == FIRST_IWMMXT_REGNUM))
5603 return true;
5604
5605 return false;
5606 }
5607
5608 /* Determine the amount of memory needed to store the possible return
5609 registers of an untyped call. */
5610 int
5611 arm_apply_result_size (void)
5612 {
5613 int size = 16;
5614
5615 if (TARGET_32BIT)
5616 {
5617 if (TARGET_HARD_FLOAT_ABI)
5618 size += 32;
5619 if (TARGET_IWMMXT_ABI)
5620 size += 8;
5621 }
5622
5623 return size;
5624 }
5625
5626 /* Decide whether TYPE should be returned in memory (true)
5627 or in a register (false). FNTYPE is the type of the function making
5628 the call. */
5629 static bool
5630 arm_return_in_memory (const_tree type, const_tree fntype)
5631 {
5632 HOST_WIDE_INT size;
5633
5634 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5635
5636 if (TARGET_AAPCS_BASED)
5637 {
5638 /* Simple, non-aggregate types (ie not including vectors and
5639 complex) are always returned in a register (or registers).
5640 We don't care about which register here, so we can short-cut
5641 some of the detail. */
5642 if (!AGGREGATE_TYPE_P (type)
5643 && TREE_CODE (type) != VECTOR_TYPE
5644 && TREE_CODE (type) != COMPLEX_TYPE)
5645 return false;
5646
5647 /* Any return value that is no larger than one word can be
5648 returned in r0. */
5649 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5650 return false;
5651
5652 /* Check any available co-processors to see if they accept the
5653 type as a register candidate (VFP, for example, can return
5654 some aggregates in consecutive registers). These aren't
5655 available if the call is variadic. */
5656 if (aapcs_select_return_coproc (type, fntype) >= 0)
5657 return false;
5658
5659 /* Vector values should be returned using ARM registers, not
5660 memory (unless they're over 16 bytes, which will break since
5661 we only have four call-clobbered registers to play with). */
5662 if (TREE_CODE (type) == VECTOR_TYPE)
5663 return (size < 0 || size > (4 * UNITS_PER_WORD));
5664
5665 /* The rest go in memory. */
5666 return true;
5667 }
5668
5669 if (TREE_CODE (type) == VECTOR_TYPE)
5670 return (size < 0 || size > (4 * UNITS_PER_WORD));
5671
5672 if (!AGGREGATE_TYPE_P (type) &&
5673 (TREE_CODE (type) != VECTOR_TYPE))
5674 /* All simple types are returned in registers. */
5675 return false;
5676
5677 if (arm_abi != ARM_ABI_APCS)
5678 {
5679 /* ATPCS and later return aggregate types in memory only if they are
5680 larger than a word (or are variable size). */
5681 return (size < 0 || size > UNITS_PER_WORD);
5682 }
5683
5684 /* For the arm-wince targets we choose to be compatible with Microsoft's
5685 ARM and Thumb compilers, which always return aggregates in memory. */
5686 #ifndef ARM_WINCE
5687 /* All structures/unions bigger than one word are returned in memory.
5688 Also catch the case where int_size_in_bytes returns -1. In this case
5689 the aggregate is either huge or of variable size, and in either case
5690 we will want to return it via memory and not in a register. */
5691 if (size < 0 || size > UNITS_PER_WORD)
5692 return true;
5693
5694 if (TREE_CODE (type) == RECORD_TYPE)
5695 {
5696 tree field;
5697
5698 /* For a struct the APCS says that we only return in a register
5699 if the type is 'integer like' and every addressable element
5700 has an offset of zero. For practical purposes this means
5701 that the structure can have at most one non bit-field element
5702 and that this element must be the first one in the structure. */
5703
5704 /* Find the first field, ignoring non FIELD_DECL things which will
5705 have been created by C++. */
5706 for (field = TYPE_FIELDS (type);
5707 field && TREE_CODE (field) != FIELD_DECL;
5708 field = DECL_CHAIN (field))
5709 continue;
5710
5711 if (field == NULL)
5712 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5713
5714 /* Check that the first field is valid for returning in a register. */
5715
5716 /* ... Floats are not allowed */
5717 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5718 return true;
5719
5720 /* ... Aggregates that are not themselves valid for returning in
5721 a register are not allowed. */
5722 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5723 return true;
5724
5725 /* Now check the remaining fields, if any. Only bitfields are allowed,
5726 since they are not addressable. */
5727 for (field = DECL_CHAIN (field);
5728 field;
5729 field = DECL_CHAIN (field))
5730 {
5731 if (TREE_CODE (field) != FIELD_DECL)
5732 continue;
5733
5734 if (!DECL_BIT_FIELD_TYPE (field))
5735 return true;
5736 }
5737
5738 return false;
5739 }
5740
5741 if (TREE_CODE (type) == UNION_TYPE)
5742 {
5743 tree field;
5744
5745 /* Unions can be returned in registers if every element is
5746 integral, or can be returned in an integer register. */
5747 for (field = TYPE_FIELDS (type);
5748 field;
5749 field = DECL_CHAIN (field))
5750 {
5751 if (TREE_CODE (field) != FIELD_DECL)
5752 continue;
5753
5754 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5755 return true;
5756
5757 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5758 return true;
5759 }
5760
5761 return false;
5762 }
5763 #endif /* not ARM_WINCE */
5764
5765 /* Return all other types in memory. */
5766 return true;
5767 }
5768
5769 const struct pcs_attribute_arg
5770 {
5771 const char *arg;
5772 enum arm_pcs value;
5773 } pcs_attribute_args[] =
5774 {
5775 {"aapcs", ARM_PCS_AAPCS},
5776 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5777 #if 0
5778 /* We could recognize these, but changes would be needed elsewhere
5779 * to implement them. */
5780 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5781 {"atpcs", ARM_PCS_ATPCS},
5782 {"apcs", ARM_PCS_APCS},
5783 #endif
5784 {NULL, ARM_PCS_UNKNOWN}
5785 };
5786
5787 static enum arm_pcs
5788 arm_pcs_from_attribute (tree attr)
5789 {
5790 const struct pcs_attribute_arg *ptr;
5791 const char *arg;
5792
5793 /* Get the value of the argument. */
5794 if (TREE_VALUE (attr) == NULL_TREE
5795 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5796 return ARM_PCS_UNKNOWN;
5797
5798 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5799
5800 /* Check it against the list of known arguments. */
5801 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5802 if (streq (arg, ptr->arg))
5803 return ptr->value;
5804
5805 /* An unrecognized interrupt type. */
5806 return ARM_PCS_UNKNOWN;
5807 }
5808
5809 /* Get the PCS variant to use for this call. TYPE is the function's type
5810 specification, DECL is the specific declartion. DECL may be null if
5811 the call could be indirect or if this is a library call. */
5812 static enum arm_pcs
5813 arm_get_pcs_model (const_tree type, const_tree decl)
5814 {
5815 bool user_convention = false;
5816 enum arm_pcs user_pcs = arm_pcs_default;
5817 tree attr;
5818
5819 gcc_assert (type);
5820
5821 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5822 if (attr)
5823 {
5824 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5825 user_convention = true;
5826 }
5827
5828 if (TARGET_AAPCS_BASED)
5829 {
5830 /* Detect varargs functions. These always use the base rules
5831 (no argument is ever a candidate for a co-processor
5832 register). */
5833 bool base_rules = stdarg_p (type);
5834
5835 if (user_convention)
5836 {
5837 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5838 sorry ("non-AAPCS derived PCS variant");
5839 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5840 error ("variadic functions must use the base AAPCS variant");
5841 }
5842
5843 if (base_rules)
5844 return ARM_PCS_AAPCS;
5845 else if (user_convention)
5846 return user_pcs;
5847 else if (decl && flag_unit_at_a_time)
5848 {
5849 /* Local functions never leak outside this compilation unit,
5850 so we are free to use whatever conventions are
5851 appropriate. */
5852 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5853 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5854 if (i && i->local)
5855 return ARM_PCS_AAPCS_LOCAL;
5856 }
5857 }
5858 else if (user_convention && user_pcs != arm_pcs_default)
5859 sorry ("PCS variant");
5860
5861 /* For everything else we use the target's default. */
5862 return arm_pcs_default;
5863 }
5864
5865
5866 static void
5867 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5868 const_tree fntype ATTRIBUTE_UNUSED,
5869 rtx libcall ATTRIBUTE_UNUSED,
5870 const_tree fndecl ATTRIBUTE_UNUSED)
5871 {
5872 /* Record the unallocated VFP registers. */
5873 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5874 pcum->aapcs_vfp_reg_alloc = 0;
5875 }
5876
5877 /* Walk down the type tree of TYPE counting consecutive base elements.
5878 If *MODEP is VOIDmode, then set it to the first valid floating point
5879 type. If a non-floating point type is found, or if a floating point
5880 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5881 otherwise return the count in the sub-tree. */
5882 static int
5883 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5884 {
5885 machine_mode mode;
5886 HOST_WIDE_INT size;
5887
5888 switch (TREE_CODE (type))
5889 {
5890 case REAL_TYPE:
5891 mode = TYPE_MODE (type);
5892 if (mode != DFmode && mode != SFmode && mode != HFmode)
5893 return -1;
5894
5895 if (*modep == VOIDmode)
5896 *modep = mode;
5897
5898 if (*modep == mode)
5899 return 1;
5900
5901 break;
5902
5903 case COMPLEX_TYPE:
5904 mode = TYPE_MODE (TREE_TYPE (type));
5905 if (mode != DFmode && mode != SFmode)
5906 return -1;
5907
5908 if (*modep == VOIDmode)
5909 *modep = mode;
5910
5911 if (*modep == mode)
5912 return 2;
5913
5914 break;
5915
5916 case VECTOR_TYPE:
5917 /* Use V2SImode and V4SImode as representatives of all 64-bit
5918 and 128-bit vector types, whether or not those modes are
5919 supported with the present options. */
5920 size = int_size_in_bytes (type);
5921 switch (size)
5922 {
5923 case 8:
5924 mode = V2SImode;
5925 break;
5926 case 16:
5927 mode = V4SImode;
5928 break;
5929 default:
5930 return -1;
5931 }
5932
5933 if (*modep == VOIDmode)
5934 *modep = mode;
5935
5936 /* Vector modes are considered to be opaque: two vectors are
5937 equivalent for the purposes of being homogeneous aggregates
5938 if they are the same size. */
5939 if (*modep == mode)
5940 return 1;
5941
5942 break;
5943
5944 case ARRAY_TYPE:
5945 {
5946 int count;
5947 tree index = TYPE_DOMAIN (type);
5948
5949 /* Can't handle incomplete types nor sizes that are not
5950 fixed. */
5951 if (!COMPLETE_TYPE_P (type)
5952 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5953 return -1;
5954
5955 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5956 if (count == -1
5957 || !index
5958 || !TYPE_MAX_VALUE (index)
5959 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5960 || !TYPE_MIN_VALUE (index)
5961 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5962 || count < 0)
5963 return -1;
5964
5965 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5966 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5967
5968 /* There must be no padding. */
5969 if (wi::to_wide (TYPE_SIZE (type))
5970 != count * GET_MODE_BITSIZE (*modep))
5971 return -1;
5972
5973 return count;
5974 }
5975
5976 case RECORD_TYPE:
5977 {
5978 int count = 0;
5979 int sub_count;
5980 tree field;
5981
5982 /* Can't handle incomplete types nor sizes that are not
5983 fixed. */
5984 if (!COMPLETE_TYPE_P (type)
5985 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5986 return -1;
5987
5988 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5989 {
5990 if (TREE_CODE (field) != FIELD_DECL)
5991 continue;
5992
5993 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5994 if (sub_count < 0)
5995 return -1;
5996 count += sub_count;
5997 }
5998
5999 /* There must be no padding. */
6000 if (wi::to_wide (TYPE_SIZE (type))
6001 != count * GET_MODE_BITSIZE (*modep))
6002 return -1;
6003
6004 return count;
6005 }
6006
6007 case UNION_TYPE:
6008 case QUAL_UNION_TYPE:
6009 {
6010 /* These aren't very interesting except in a degenerate case. */
6011 int count = 0;
6012 int sub_count;
6013 tree field;
6014
6015 /* Can't handle incomplete types nor sizes that are not
6016 fixed. */
6017 if (!COMPLETE_TYPE_P (type)
6018 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6019 return -1;
6020
6021 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6022 {
6023 if (TREE_CODE (field) != FIELD_DECL)
6024 continue;
6025
6026 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6027 if (sub_count < 0)
6028 return -1;
6029 count = count > sub_count ? count : sub_count;
6030 }
6031
6032 /* There must be no padding. */
6033 if (wi::to_wide (TYPE_SIZE (type))
6034 != count * GET_MODE_BITSIZE (*modep))
6035 return -1;
6036
6037 return count;
6038 }
6039
6040 default:
6041 break;
6042 }
6043
6044 return -1;
6045 }
6046
6047 /* Return true if PCS_VARIANT should use VFP registers. */
6048 static bool
6049 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6050 {
6051 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6052 {
6053 static bool seen_thumb1_vfp = false;
6054
6055 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6056 {
6057 sorry ("Thumb-1 hard-float VFP ABI");
6058 /* sorry() is not immediately fatal, so only display this once. */
6059 seen_thumb1_vfp = true;
6060 }
6061
6062 return true;
6063 }
6064
6065 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6066 return false;
6067
6068 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6069 (TARGET_VFP_DOUBLE || !is_double));
6070 }
6071
6072 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6073 suitable for passing or returning in VFP registers for the PCS
6074 variant selected. If it is, then *BASE_MODE is updated to contain
6075 a machine mode describing each element of the argument's type and
6076 *COUNT to hold the number of such elements. */
6077 static bool
6078 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6079 machine_mode mode, const_tree type,
6080 machine_mode *base_mode, int *count)
6081 {
6082 machine_mode new_mode = VOIDmode;
6083
6084 /* If we have the type information, prefer that to working things
6085 out from the mode. */
6086 if (type)
6087 {
6088 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6089
6090 if (ag_count > 0 && ag_count <= 4)
6091 *count = ag_count;
6092 else
6093 return false;
6094 }
6095 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6096 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6097 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6098 {
6099 *count = 1;
6100 new_mode = mode;
6101 }
6102 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6103 {
6104 *count = 2;
6105 new_mode = (mode == DCmode ? DFmode : SFmode);
6106 }
6107 else
6108 return false;
6109
6110
6111 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6112 return false;
6113
6114 *base_mode = new_mode;
6115
6116 if (TARGET_GENERAL_REGS_ONLY)
6117 error ("argument of type %qT not permitted with -mgeneral-regs-only",
6118 type);
6119
6120 return true;
6121 }
6122
6123 static bool
6124 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6125 machine_mode mode, const_tree type)
6126 {
6127 int count ATTRIBUTE_UNUSED;
6128 machine_mode ag_mode ATTRIBUTE_UNUSED;
6129
6130 if (!use_vfp_abi (pcs_variant, false))
6131 return false;
6132 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6133 &ag_mode, &count);
6134 }
6135
6136 static bool
6137 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6138 const_tree type)
6139 {
6140 if (!use_vfp_abi (pcum->pcs_variant, false))
6141 return false;
6142
6143 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6144 &pcum->aapcs_vfp_rmode,
6145 &pcum->aapcs_vfp_rcount);
6146 }
6147
6148 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6149 for the behaviour of this function. */
6150
6151 static bool
6152 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6153 const_tree type ATTRIBUTE_UNUSED)
6154 {
6155 int rmode_size
6156 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6157 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6158 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6159 int regno;
6160
6161 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6162 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6163 {
6164 pcum->aapcs_vfp_reg_alloc = mask << regno;
6165 if (mode == BLKmode
6166 || (mode == TImode && ! TARGET_NEON)
6167 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6168 {
6169 int i;
6170 int rcount = pcum->aapcs_vfp_rcount;
6171 int rshift = shift;
6172 machine_mode rmode = pcum->aapcs_vfp_rmode;
6173 rtx par;
6174 if (!TARGET_NEON)
6175 {
6176 /* Avoid using unsupported vector modes. */
6177 if (rmode == V2SImode)
6178 rmode = DImode;
6179 else if (rmode == V4SImode)
6180 {
6181 rmode = DImode;
6182 rcount *= 2;
6183 rshift /= 2;
6184 }
6185 }
6186 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6187 for (i = 0; i < rcount; i++)
6188 {
6189 rtx tmp = gen_rtx_REG (rmode,
6190 FIRST_VFP_REGNUM + regno + i * rshift);
6191 tmp = gen_rtx_EXPR_LIST
6192 (VOIDmode, tmp,
6193 GEN_INT (i * GET_MODE_SIZE (rmode)));
6194 XVECEXP (par, 0, i) = tmp;
6195 }
6196
6197 pcum->aapcs_reg = par;
6198 }
6199 else
6200 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6201 return true;
6202 }
6203 return false;
6204 }
6205
6206 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6207 comment there for the behaviour of this function. */
6208
6209 static rtx
6210 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6211 machine_mode mode,
6212 const_tree type ATTRIBUTE_UNUSED)
6213 {
6214 if (!use_vfp_abi (pcs_variant, false))
6215 return NULL;
6216
6217 if (mode == BLKmode
6218 || (GET_MODE_CLASS (mode) == MODE_INT
6219 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6220 && !TARGET_NEON))
6221 {
6222 int count;
6223 machine_mode ag_mode;
6224 int i;
6225 rtx par;
6226 int shift;
6227
6228 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6229 &ag_mode, &count);
6230
6231 if (!TARGET_NEON)
6232 {
6233 if (ag_mode == V2SImode)
6234 ag_mode = DImode;
6235 else if (ag_mode == V4SImode)
6236 {
6237 ag_mode = DImode;
6238 count *= 2;
6239 }
6240 }
6241 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6242 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6243 for (i = 0; i < count; i++)
6244 {
6245 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6246 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6247 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6248 XVECEXP (par, 0, i) = tmp;
6249 }
6250
6251 return par;
6252 }
6253
6254 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6255 }
6256
6257 static void
6258 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6259 machine_mode mode ATTRIBUTE_UNUSED,
6260 const_tree type ATTRIBUTE_UNUSED)
6261 {
6262 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6263 pcum->aapcs_vfp_reg_alloc = 0;
6264 return;
6265 }
6266
6267 #define AAPCS_CP(X) \
6268 { \
6269 aapcs_ ## X ## _cum_init, \
6270 aapcs_ ## X ## _is_call_candidate, \
6271 aapcs_ ## X ## _allocate, \
6272 aapcs_ ## X ## _is_return_candidate, \
6273 aapcs_ ## X ## _allocate_return_reg, \
6274 aapcs_ ## X ## _advance \
6275 }
6276
6277 /* Table of co-processors that can be used to pass arguments in
6278 registers. Idealy no arugment should be a candidate for more than
6279 one co-processor table entry, but the table is processed in order
6280 and stops after the first match. If that entry then fails to put
6281 the argument into a co-processor register, the argument will go on
6282 the stack. */
6283 static struct
6284 {
6285 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6286 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6287
6288 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6289 BLKmode) is a candidate for this co-processor's registers; this
6290 function should ignore any position-dependent state in
6291 CUMULATIVE_ARGS and only use call-type dependent information. */
6292 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6293
6294 /* Return true if the argument does get a co-processor register; it
6295 should set aapcs_reg to an RTX of the register allocated as is
6296 required for a return from FUNCTION_ARG. */
6297 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6298
6299 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6300 be returned in this co-processor's registers. */
6301 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6302
6303 /* Allocate and return an RTX element to hold the return type of a call. This
6304 routine must not fail and will only be called if is_return_candidate
6305 returned true with the same parameters. */
6306 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6307
6308 /* Finish processing this argument and prepare to start processing
6309 the next one. */
6310 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6311 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6312 {
6313 AAPCS_CP(vfp)
6314 };
6315
6316 #undef AAPCS_CP
6317
6318 static int
6319 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6320 const_tree type)
6321 {
6322 int i;
6323
6324 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6325 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6326 return i;
6327
6328 return -1;
6329 }
6330
6331 static int
6332 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6333 {
6334 /* We aren't passed a decl, so we can't check that a call is local.
6335 However, it isn't clear that that would be a win anyway, since it
6336 might limit some tail-calling opportunities. */
6337 enum arm_pcs pcs_variant;
6338
6339 if (fntype)
6340 {
6341 const_tree fndecl = NULL_TREE;
6342
6343 if (TREE_CODE (fntype) == FUNCTION_DECL)
6344 {
6345 fndecl = fntype;
6346 fntype = TREE_TYPE (fntype);
6347 }
6348
6349 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6350 }
6351 else
6352 pcs_variant = arm_pcs_default;
6353
6354 if (pcs_variant != ARM_PCS_AAPCS)
6355 {
6356 int i;
6357
6358 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6359 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6360 TYPE_MODE (type),
6361 type))
6362 return i;
6363 }
6364 return -1;
6365 }
6366
6367 static rtx
6368 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6369 const_tree fntype)
6370 {
6371 /* We aren't passed a decl, so we can't check that a call is local.
6372 However, it isn't clear that that would be a win anyway, since it
6373 might limit some tail-calling opportunities. */
6374 enum arm_pcs pcs_variant;
6375 int unsignedp ATTRIBUTE_UNUSED;
6376
6377 if (fntype)
6378 {
6379 const_tree fndecl = NULL_TREE;
6380
6381 if (TREE_CODE (fntype) == FUNCTION_DECL)
6382 {
6383 fndecl = fntype;
6384 fntype = TREE_TYPE (fntype);
6385 }
6386
6387 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6388 }
6389 else
6390 pcs_variant = arm_pcs_default;
6391
6392 /* Promote integer types. */
6393 if (type && INTEGRAL_TYPE_P (type))
6394 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6395
6396 if (pcs_variant != ARM_PCS_AAPCS)
6397 {
6398 int i;
6399
6400 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6401 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6402 type))
6403 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6404 mode, type);
6405 }
6406
6407 /* Promotes small structs returned in a register to full-word size
6408 for big-endian AAPCS. */
6409 if (type && arm_return_in_msb (type))
6410 {
6411 HOST_WIDE_INT size = int_size_in_bytes (type);
6412 if (size % UNITS_PER_WORD != 0)
6413 {
6414 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6415 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6416 }
6417 }
6418
6419 return gen_rtx_REG (mode, R0_REGNUM);
6420 }
6421
6422 static rtx
6423 aapcs_libcall_value (machine_mode mode)
6424 {
6425 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6426 && GET_MODE_SIZE (mode) <= 4)
6427 mode = SImode;
6428
6429 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6430 }
6431
6432 /* Lay out a function argument using the AAPCS rules. The rule
6433 numbers referred to here are those in the AAPCS. */
6434 static void
6435 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6436 const_tree type, bool named)
6437 {
6438 int nregs, nregs2;
6439 int ncrn;
6440
6441 /* We only need to do this once per argument. */
6442 if (pcum->aapcs_arg_processed)
6443 return;
6444
6445 pcum->aapcs_arg_processed = true;
6446
6447 /* Special case: if named is false then we are handling an incoming
6448 anonymous argument which is on the stack. */
6449 if (!named)
6450 return;
6451
6452 /* Is this a potential co-processor register candidate? */
6453 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6454 {
6455 int slot = aapcs_select_call_coproc (pcum, mode, type);
6456 pcum->aapcs_cprc_slot = slot;
6457
6458 /* We don't have to apply any of the rules from part B of the
6459 preparation phase, these are handled elsewhere in the
6460 compiler. */
6461
6462 if (slot >= 0)
6463 {
6464 /* A Co-processor register candidate goes either in its own
6465 class of registers or on the stack. */
6466 if (!pcum->aapcs_cprc_failed[slot])
6467 {
6468 /* C1.cp - Try to allocate the argument to co-processor
6469 registers. */
6470 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6471 return;
6472
6473 /* C2.cp - Put the argument on the stack and note that we
6474 can't assign any more candidates in this slot. We also
6475 need to note that we have allocated stack space, so that
6476 we won't later try to split a non-cprc candidate between
6477 core registers and the stack. */
6478 pcum->aapcs_cprc_failed[slot] = true;
6479 pcum->can_split = false;
6480 }
6481
6482 /* We didn't get a register, so this argument goes on the
6483 stack. */
6484 gcc_assert (pcum->can_split == false);
6485 return;
6486 }
6487 }
6488
6489 /* C3 - For double-word aligned arguments, round the NCRN up to the
6490 next even number. */
6491 ncrn = pcum->aapcs_ncrn;
6492 if (ncrn & 1)
6493 {
6494 int res = arm_needs_doubleword_align (mode, type);
6495 /* Only warn during RTL expansion of call stmts, otherwise we would
6496 warn e.g. during gimplification even on functions that will be
6497 always inlined, and we'd warn multiple times. Don't warn when
6498 called in expand_function_start either, as we warn instead in
6499 arm_function_arg_boundary in that case. */
6500 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6501 inform (input_location, "parameter passing for argument of type "
6502 "%qT changed in GCC 7.1", type);
6503 else if (res > 0)
6504 ncrn++;
6505 }
6506
6507 nregs = ARM_NUM_REGS2(mode, type);
6508
6509 /* Sigh, this test should really assert that nregs > 0, but a GCC
6510 extension allows empty structs and then gives them empty size; it
6511 then allows such a structure to be passed by value. For some of
6512 the code below we have to pretend that such an argument has
6513 non-zero size so that we 'locate' it correctly either in
6514 registers or on the stack. */
6515 gcc_assert (nregs >= 0);
6516
6517 nregs2 = nregs ? nregs : 1;
6518
6519 /* C4 - Argument fits entirely in core registers. */
6520 if (ncrn + nregs2 <= NUM_ARG_REGS)
6521 {
6522 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6523 pcum->aapcs_next_ncrn = ncrn + nregs;
6524 return;
6525 }
6526
6527 /* C5 - Some core registers left and there are no arguments already
6528 on the stack: split this argument between the remaining core
6529 registers and the stack. */
6530 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6531 {
6532 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6533 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6534 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6535 return;
6536 }
6537
6538 /* C6 - NCRN is set to 4. */
6539 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6540
6541 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6542 return;
6543 }
6544
6545 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6546 for a call to a function whose data type is FNTYPE.
6547 For a library call, FNTYPE is NULL. */
6548 void
6549 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6550 rtx libname,
6551 tree fndecl ATTRIBUTE_UNUSED)
6552 {
6553 /* Long call handling. */
6554 if (fntype)
6555 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6556 else
6557 pcum->pcs_variant = arm_pcs_default;
6558
6559 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6560 {
6561 if (arm_libcall_uses_aapcs_base (libname))
6562 pcum->pcs_variant = ARM_PCS_AAPCS;
6563
6564 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6565 pcum->aapcs_reg = NULL_RTX;
6566 pcum->aapcs_partial = 0;
6567 pcum->aapcs_arg_processed = false;
6568 pcum->aapcs_cprc_slot = -1;
6569 pcum->can_split = true;
6570
6571 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6572 {
6573 int i;
6574
6575 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6576 {
6577 pcum->aapcs_cprc_failed[i] = false;
6578 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6579 }
6580 }
6581 return;
6582 }
6583
6584 /* Legacy ABIs */
6585
6586 /* On the ARM, the offset starts at 0. */
6587 pcum->nregs = 0;
6588 pcum->iwmmxt_nregs = 0;
6589 pcum->can_split = true;
6590
6591 /* Varargs vectors are treated the same as long long.
6592 named_count avoids having to change the way arm handles 'named' */
6593 pcum->named_count = 0;
6594 pcum->nargs = 0;
6595
6596 if (TARGET_REALLY_IWMMXT && fntype)
6597 {
6598 tree fn_arg;
6599
6600 for (fn_arg = TYPE_ARG_TYPES (fntype);
6601 fn_arg;
6602 fn_arg = TREE_CHAIN (fn_arg))
6603 pcum->named_count += 1;
6604
6605 if (! pcum->named_count)
6606 pcum->named_count = INT_MAX;
6607 }
6608 }
6609
6610 /* Return 2 if double word alignment is required for argument passing,
6611 but wasn't required before the fix for PR88469.
6612 Return 1 if double word alignment is required for argument passing.
6613 Return -1 if double word alignment used to be required for argument
6614 passing before PR77728 ABI fix, but is not required anymore.
6615 Return 0 if double word alignment is not required and wasn't requried
6616 before either. */
6617 static int
6618 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6619 {
6620 if (!type)
6621 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6622
6623 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6624 if (!AGGREGATE_TYPE_P (type))
6625 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6626
6627 /* Array types: Use member alignment of element type. */
6628 if (TREE_CODE (type) == ARRAY_TYPE)
6629 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6630
6631 int ret = 0;
6632 int ret2 = 0;
6633 /* Record/aggregate types: Use greatest member alignment of any member. */
6634 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6635 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6636 {
6637 if (TREE_CODE (field) == FIELD_DECL)
6638 return 1;
6639 else
6640 /* Before PR77728 fix, we were incorrectly considering also
6641 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6642 Make sure we can warn about that with -Wpsabi. */
6643 ret = -1;
6644 }
6645 else if (TREE_CODE (field) == FIELD_DECL
6646 && DECL_BIT_FIELD_TYPE (field)
6647 && TYPE_ALIGN (DECL_BIT_FIELD_TYPE (field)) > PARM_BOUNDARY)
6648 ret2 = 1;
6649
6650 if (ret2)
6651 return 2;
6652
6653 return ret;
6654 }
6655
6656
6657 /* Determine where to put an argument to a function.
6658 Value is zero to push the argument on the stack,
6659 or a hard register in which to store the argument.
6660
6661 MODE is the argument's machine mode.
6662 TYPE is the data type of the argument (as a tree).
6663 This is null for libcalls where that information may
6664 not be available.
6665 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6666 the preceding args and about the function being called.
6667 NAMED is nonzero if this argument is a named parameter
6668 (otherwise it is an extra parameter matching an ellipsis).
6669
6670 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6671 other arguments are passed on the stack. If (NAMED == 0) (which happens
6672 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6673 defined), say it is passed in the stack (function_prologue will
6674 indeed make it pass in the stack if necessary). */
6675
6676 static rtx
6677 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6678 const_tree type, bool named)
6679 {
6680 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6681 int nregs;
6682
6683 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6684 a call insn (op3 of a call_value insn). */
6685 if (mode == VOIDmode)
6686 return const0_rtx;
6687
6688 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6689 {
6690 aapcs_layout_arg (pcum, mode, type, named);
6691 return pcum->aapcs_reg;
6692 }
6693
6694 /* Varargs vectors are treated the same as long long.
6695 named_count avoids having to change the way arm handles 'named' */
6696 if (TARGET_IWMMXT_ABI
6697 && arm_vector_mode_supported_p (mode)
6698 && pcum->named_count > pcum->nargs + 1)
6699 {
6700 if (pcum->iwmmxt_nregs <= 9)
6701 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6702 else
6703 {
6704 pcum->can_split = false;
6705 return NULL_RTX;
6706 }
6707 }
6708
6709 /* Put doubleword aligned quantities in even register pairs. */
6710 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6711 {
6712 int res = arm_needs_doubleword_align (mode, type);
6713 if (res < 0 && warn_psabi)
6714 inform (input_location, "parameter passing for argument of type "
6715 "%qT changed in GCC 7.1", type);
6716 else if (res > 0)
6717 {
6718 pcum->nregs++;
6719 if (res > 1 && warn_psabi)
6720 inform (input_location, "parameter passing for argument of type "
6721 "%qT changed in GCC 9.1", type);
6722 }
6723 }
6724
6725 /* Only allow splitting an arg between regs and memory if all preceding
6726 args were allocated to regs. For args passed by reference we only count
6727 the reference pointer. */
6728 if (pcum->can_split)
6729 nregs = 1;
6730 else
6731 nregs = ARM_NUM_REGS2 (mode, type);
6732
6733 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6734 return NULL_RTX;
6735
6736 return gen_rtx_REG (mode, pcum->nregs);
6737 }
6738
6739 static unsigned int
6740 arm_function_arg_boundary (machine_mode mode, const_tree type)
6741 {
6742 if (!ARM_DOUBLEWORD_ALIGN)
6743 return PARM_BOUNDARY;
6744
6745 int res = arm_needs_doubleword_align (mode, type);
6746 if (res < 0 && warn_psabi)
6747 inform (input_location, "parameter passing for argument of type %qT "
6748 "changed in GCC 7.1", type);
6749 if (res > 1 && warn_psabi)
6750 inform (input_location, "parameter passing for argument of type "
6751 "%qT changed in GCC 9.1", type);
6752
6753 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6754 }
6755
6756 static int
6757 arm_arg_partial_bytes (cumulative_args_t pcum_v, const function_arg_info &arg)
6758 {
6759 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6760 int nregs = pcum->nregs;
6761
6762 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6763 {
6764 aapcs_layout_arg (pcum, arg.mode, arg.type, arg.named);
6765 return pcum->aapcs_partial;
6766 }
6767
6768 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (arg.mode))
6769 return 0;
6770
6771 if (NUM_ARG_REGS > nregs
6772 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (arg.mode, arg.type))
6773 && pcum->can_split)
6774 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6775
6776 return 0;
6777 }
6778
6779 /* Update the data in PCUM to advance over an argument
6780 of mode MODE and data type TYPE.
6781 (TYPE is null for libcalls where that information may not be available.) */
6782
6783 static void
6784 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6785 const_tree type, bool named)
6786 {
6787 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6788
6789 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6790 {
6791 aapcs_layout_arg (pcum, mode, type, named);
6792
6793 if (pcum->aapcs_cprc_slot >= 0)
6794 {
6795 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6796 type);
6797 pcum->aapcs_cprc_slot = -1;
6798 }
6799
6800 /* Generic stuff. */
6801 pcum->aapcs_arg_processed = false;
6802 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6803 pcum->aapcs_reg = NULL_RTX;
6804 pcum->aapcs_partial = 0;
6805 }
6806 else
6807 {
6808 pcum->nargs += 1;
6809 if (arm_vector_mode_supported_p (mode)
6810 && pcum->named_count > pcum->nargs
6811 && TARGET_IWMMXT_ABI)
6812 pcum->iwmmxt_nregs += 1;
6813 else
6814 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6815 }
6816 }
6817
6818 /* Variable sized types are passed by reference. This is a GCC
6819 extension to the ARM ABI. */
6820
6821 static bool
6822 arm_pass_by_reference (cumulative_args_t, const function_arg_info &arg)
6823 {
6824 return arg.type && TREE_CODE (TYPE_SIZE (arg.type)) != INTEGER_CST;
6825 }
6826 \f
6827 /* Encode the current state of the #pragma [no_]long_calls. */
6828 typedef enum
6829 {
6830 OFF, /* No #pragma [no_]long_calls is in effect. */
6831 LONG, /* #pragma long_calls is in effect. */
6832 SHORT /* #pragma no_long_calls is in effect. */
6833 } arm_pragma_enum;
6834
6835 static arm_pragma_enum arm_pragma_long_calls = OFF;
6836
6837 void
6838 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6839 {
6840 arm_pragma_long_calls = LONG;
6841 }
6842
6843 void
6844 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6845 {
6846 arm_pragma_long_calls = SHORT;
6847 }
6848
6849 void
6850 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6851 {
6852 arm_pragma_long_calls = OFF;
6853 }
6854 \f
6855 /* Handle an attribute requiring a FUNCTION_DECL;
6856 arguments as in struct attribute_spec.handler. */
6857 static tree
6858 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6859 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6860 {
6861 if (TREE_CODE (*node) != FUNCTION_DECL)
6862 {
6863 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6864 name);
6865 *no_add_attrs = true;
6866 }
6867
6868 return NULL_TREE;
6869 }
6870
6871 /* Handle an "interrupt" or "isr" attribute;
6872 arguments as in struct attribute_spec.handler. */
6873 static tree
6874 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6875 bool *no_add_attrs)
6876 {
6877 if (DECL_P (*node))
6878 {
6879 if (TREE_CODE (*node) != FUNCTION_DECL)
6880 {
6881 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6882 name);
6883 *no_add_attrs = true;
6884 }
6885 /* FIXME: the argument if any is checked for type attributes;
6886 should it be checked for decl ones? */
6887 }
6888 else
6889 {
6890 if (TREE_CODE (*node) == FUNCTION_TYPE
6891 || TREE_CODE (*node) == METHOD_TYPE)
6892 {
6893 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6894 {
6895 warning (OPT_Wattributes, "%qE attribute ignored",
6896 name);
6897 *no_add_attrs = true;
6898 }
6899 }
6900 else if (TREE_CODE (*node) == POINTER_TYPE
6901 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6902 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6903 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6904 {
6905 *node = build_variant_type_copy (*node);
6906 TREE_TYPE (*node) = build_type_attribute_variant
6907 (TREE_TYPE (*node),
6908 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6909 *no_add_attrs = true;
6910 }
6911 else
6912 {
6913 /* Possibly pass this attribute on from the type to a decl. */
6914 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6915 | (int) ATTR_FLAG_FUNCTION_NEXT
6916 | (int) ATTR_FLAG_ARRAY_NEXT))
6917 {
6918 *no_add_attrs = true;
6919 return tree_cons (name, args, NULL_TREE);
6920 }
6921 else
6922 {
6923 warning (OPT_Wattributes, "%qE attribute ignored",
6924 name);
6925 }
6926 }
6927 }
6928
6929 return NULL_TREE;
6930 }
6931
6932 /* Handle a "pcs" attribute; arguments as in struct
6933 attribute_spec.handler. */
6934 static tree
6935 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6936 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6937 {
6938 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6939 {
6940 warning (OPT_Wattributes, "%qE attribute ignored", name);
6941 *no_add_attrs = true;
6942 }
6943 return NULL_TREE;
6944 }
6945
6946 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6947 /* Handle the "notshared" attribute. This attribute is another way of
6948 requesting hidden visibility. ARM's compiler supports
6949 "__declspec(notshared)"; we support the same thing via an
6950 attribute. */
6951
6952 static tree
6953 arm_handle_notshared_attribute (tree *node,
6954 tree name ATTRIBUTE_UNUSED,
6955 tree args ATTRIBUTE_UNUSED,
6956 int flags ATTRIBUTE_UNUSED,
6957 bool *no_add_attrs)
6958 {
6959 tree decl = TYPE_NAME (*node);
6960
6961 if (decl)
6962 {
6963 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6964 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6965 *no_add_attrs = false;
6966 }
6967 return NULL_TREE;
6968 }
6969 #endif
6970
6971 /* This function returns true if a function with declaration FNDECL and type
6972 FNTYPE uses the stack to pass arguments or return variables and false
6973 otherwise. This is used for functions with the attributes
6974 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6975 diagnostic messages if the stack is used. NAME is the name of the attribute
6976 used. */
6977
6978 static bool
6979 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6980 {
6981 function_args_iterator args_iter;
6982 CUMULATIVE_ARGS args_so_far_v;
6983 cumulative_args_t args_so_far;
6984 bool first_param = true;
6985 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6986
6987 /* Error out if any argument is passed on the stack. */
6988 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6989 args_so_far = pack_cumulative_args (&args_so_far_v);
6990 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6991 {
6992 rtx arg_rtx;
6993 machine_mode arg_mode = TYPE_MODE (arg_type);
6994
6995 prev_arg_type = arg_type;
6996 if (VOID_TYPE_P (arg_type))
6997 continue;
6998
6999 function_arg_info arg (arg_type, /*named=*/true);
7000 if (!first_param)
7001 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
7002 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
7003 if (!arg_rtx || arm_arg_partial_bytes (args_so_far, arg))
7004 {
7005 error ("%qE attribute not available to functions with arguments "
7006 "passed on the stack", name);
7007 return true;
7008 }
7009 first_param = false;
7010 }
7011
7012 /* Error out for variadic functions since we cannot control how many
7013 arguments will be passed and thus stack could be used. stdarg_p () is not
7014 used for the checking to avoid browsing arguments twice. */
7015 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
7016 {
7017 error ("%qE attribute not available to functions with variable number "
7018 "of arguments", name);
7019 return true;
7020 }
7021
7022 /* Error out if return value is passed on the stack. */
7023 ret_type = TREE_TYPE (fntype);
7024 if (arm_return_in_memory (ret_type, fntype))
7025 {
7026 error ("%qE attribute not available to functions that return value on "
7027 "the stack", name);
7028 return true;
7029 }
7030 return false;
7031 }
7032
7033 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7034 function will check whether the attribute is allowed here and will add the
7035 attribute to the function declaration tree or otherwise issue a warning. */
7036
7037 static tree
7038 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7039 tree /* args */,
7040 int /* flags */,
7041 bool *no_add_attrs)
7042 {
7043 tree fndecl;
7044
7045 if (!use_cmse)
7046 {
7047 *no_add_attrs = true;
7048 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7049 "option.", name);
7050 return NULL_TREE;
7051 }
7052
7053 /* Ignore attribute for function types. */
7054 if (TREE_CODE (*node) != FUNCTION_DECL)
7055 {
7056 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7057 name);
7058 *no_add_attrs = true;
7059 return NULL_TREE;
7060 }
7061
7062 fndecl = *node;
7063
7064 /* Warn for static linkage functions. */
7065 if (!TREE_PUBLIC (fndecl))
7066 {
7067 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7068 "with static linkage", name);
7069 *no_add_attrs = true;
7070 return NULL_TREE;
7071 }
7072
7073 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7074 TREE_TYPE (fndecl));
7075 return NULL_TREE;
7076 }
7077
7078
7079 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7080 function will check whether the attribute is allowed here and will add the
7081 attribute to the function type tree or otherwise issue a diagnostic. The
7082 reason we check this at declaration time is to only allow the use of the
7083 attribute with declarations of function pointers and not function
7084 declarations. This function checks NODE is of the expected type and issues
7085 diagnostics otherwise using NAME. If it is not of the expected type
7086 *NO_ADD_ATTRS will be set to true. */
7087
7088 static tree
7089 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7090 tree /* args */,
7091 int /* flags */,
7092 bool *no_add_attrs)
7093 {
7094 tree decl = NULL_TREE, fntype = NULL_TREE;
7095 tree type;
7096
7097 if (!use_cmse)
7098 {
7099 *no_add_attrs = true;
7100 warning (OPT_Wattributes, "%qE attribute ignored without %<-mcmse%> "
7101 "option.", name);
7102 return NULL_TREE;
7103 }
7104
7105 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7106 {
7107 decl = *node;
7108 fntype = TREE_TYPE (decl);
7109 }
7110
7111 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7112 fntype = TREE_TYPE (fntype);
7113
7114 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7115 {
7116 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7117 "function pointer", name);
7118 *no_add_attrs = true;
7119 return NULL_TREE;
7120 }
7121
7122 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7123
7124 if (*no_add_attrs)
7125 return NULL_TREE;
7126
7127 /* Prevent trees being shared among function types with and without
7128 cmse_nonsecure_call attribute. */
7129 type = TREE_TYPE (decl);
7130
7131 type = build_distinct_type_copy (type);
7132 TREE_TYPE (decl) = type;
7133 fntype = type;
7134
7135 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7136 {
7137 type = fntype;
7138 fntype = TREE_TYPE (fntype);
7139 fntype = build_distinct_type_copy (fntype);
7140 TREE_TYPE (type) = fntype;
7141 }
7142
7143 /* Construct a type attribute and add it to the function type. */
7144 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7145 TYPE_ATTRIBUTES (fntype));
7146 TYPE_ATTRIBUTES (fntype) = attrs;
7147 return NULL_TREE;
7148 }
7149
7150 /* Return 0 if the attributes for two types are incompatible, 1 if they
7151 are compatible, and 2 if they are nearly compatible (which causes a
7152 warning to be generated). */
7153 static int
7154 arm_comp_type_attributes (const_tree type1, const_tree type2)
7155 {
7156 int l1, l2, s1, s2;
7157
7158 /* Check for mismatch of non-default calling convention. */
7159 if (TREE_CODE (type1) != FUNCTION_TYPE)
7160 return 1;
7161
7162 /* Check for mismatched call attributes. */
7163 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7164 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7165 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7166 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7167
7168 /* Only bother to check if an attribute is defined. */
7169 if (l1 | l2 | s1 | s2)
7170 {
7171 /* If one type has an attribute, the other must have the same attribute. */
7172 if ((l1 != l2) || (s1 != s2))
7173 return 0;
7174
7175 /* Disallow mixed attributes. */
7176 if ((l1 & s2) || (l2 & s1))
7177 return 0;
7178 }
7179
7180 /* Check for mismatched ISR attribute. */
7181 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7182 if (! l1)
7183 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7184 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7185 if (! l2)
7186 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7187 if (l1 != l2)
7188 return 0;
7189
7190 l1 = lookup_attribute ("cmse_nonsecure_call",
7191 TYPE_ATTRIBUTES (type1)) != NULL;
7192 l2 = lookup_attribute ("cmse_nonsecure_call",
7193 TYPE_ATTRIBUTES (type2)) != NULL;
7194
7195 if (l1 != l2)
7196 return 0;
7197
7198 return 1;
7199 }
7200
7201 /* Assigns default attributes to newly defined type. This is used to
7202 set short_call/long_call attributes for function types of
7203 functions defined inside corresponding #pragma scopes. */
7204 static void
7205 arm_set_default_type_attributes (tree type)
7206 {
7207 /* Add __attribute__ ((long_call)) to all functions, when
7208 inside #pragma long_calls or __attribute__ ((short_call)),
7209 when inside #pragma no_long_calls. */
7210 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7211 {
7212 tree type_attr_list, attr_name;
7213 type_attr_list = TYPE_ATTRIBUTES (type);
7214
7215 if (arm_pragma_long_calls == LONG)
7216 attr_name = get_identifier ("long_call");
7217 else if (arm_pragma_long_calls == SHORT)
7218 attr_name = get_identifier ("short_call");
7219 else
7220 return;
7221
7222 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7223 TYPE_ATTRIBUTES (type) = type_attr_list;
7224 }
7225 }
7226 \f
7227 /* Return true if DECL is known to be linked into section SECTION. */
7228
7229 static bool
7230 arm_function_in_section_p (tree decl, section *section)
7231 {
7232 /* We can only be certain about the prevailing symbol definition. */
7233 if (!decl_binds_to_current_def_p (decl))
7234 return false;
7235
7236 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7237 if (!DECL_SECTION_NAME (decl))
7238 {
7239 /* Make sure that we will not create a unique section for DECL. */
7240 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7241 return false;
7242 }
7243
7244 return function_section (decl) == section;
7245 }
7246
7247 /* Return nonzero if a 32-bit "long_call" should be generated for
7248 a call from the current function to DECL. We generate a long_call
7249 if the function:
7250
7251 a. has an __attribute__((long call))
7252 or b. is within the scope of a #pragma long_calls
7253 or c. the -mlong-calls command line switch has been specified
7254
7255 However we do not generate a long call if the function:
7256
7257 d. has an __attribute__ ((short_call))
7258 or e. is inside the scope of a #pragma no_long_calls
7259 or f. is defined in the same section as the current function. */
7260
7261 bool
7262 arm_is_long_call_p (tree decl)
7263 {
7264 tree attrs;
7265
7266 if (!decl)
7267 return TARGET_LONG_CALLS;
7268
7269 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7270 if (lookup_attribute ("short_call", attrs))
7271 return false;
7272
7273 /* For "f", be conservative, and only cater for cases in which the
7274 whole of the current function is placed in the same section. */
7275 if (!flag_reorder_blocks_and_partition
7276 && TREE_CODE (decl) == FUNCTION_DECL
7277 && arm_function_in_section_p (decl, current_function_section ()))
7278 return false;
7279
7280 if (lookup_attribute ("long_call", attrs))
7281 return true;
7282
7283 return TARGET_LONG_CALLS;
7284 }
7285
7286 /* Return nonzero if it is ok to make a tail-call to DECL. */
7287 static bool
7288 arm_function_ok_for_sibcall (tree decl, tree exp)
7289 {
7290 unsigned long func_type;
7291
7292 if (cfun->machine->sibcall_blocked)
7293 return false;
7294
7295 /* Never tailcall something if we are generating code for Thumb-1. */
7296 if (TARGET_THUMB1)
7297 return false;
7298
7299 /* The PIC register is live on entry to VxWorks PLT entries, so we
7300 must make the call before restoring the PIC register. */
7301 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7302 return false;
7303
7304 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7305 may be used both as target of the call and base register for restoring
7306 the VFP registers */
7307 if (TARGET_APCS_FRAME && TARGET_ARM
7308 && TARGET_HARD_FLOAT
7309 && decl && arm_is_long_call_p (decl))
7310 return false;
7311
7312 /* If we are interworking and the function is not declared static
7313 then we can't tail-call it unless we know that it exists in this
7314 compilation unit (since it might be a Thumb routine). */
7315 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7316 && !TREE_ASM_WRITTEN (decl))
7317 return false;
7318
7319 func_type = arm_current_func_type ();
7320 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7321 if (IS_INTERRUPT (func_type))
7322 return false;
7323
7324 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7325 generated for entry functions themselves. */
7326 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7327 return false;
7328
7329 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7330 this would complicate matters for later code generation. */
7331 if (TREE_CODE (exp) == CALL_EXPR)
7332 {
7333 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7334 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7335 return false;
7336 }
7337
7338 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7339 {
7340 /* Check that the return value locations are the same. For
7341 example that we aren't returning a value from the sibling in
7342 a VFP register but then need to transfer it to a core
7343 register. */
7344 rtx a, b;
7345 tree decl_or_type = decl;
7346
7347 /* If it is an indirect function pointer, get the function type. */
7348 if (!decl)
7349 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7350
7351 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7352 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7353 cfun->decl, false);
7354 if (!rtx_equal_p (a, b))
7355 return false;
7356 }
7357
7358 /* Never tailcall if function may be called with a misaligned SP. */
7359 if (IS_STACKALIGN (func_type))
7360 return false;
7361
7362 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7363 references should become a NOP. Don't convert such calls into
7364 sibling calls. */
7365 if (TARGET_AAPCS_BASED
7366 && arm_abi == ARM_ABI_AAPCS
7367 && decl
7368 && DECL_WEAK (decl))
7369 return false;
7370
7371 /* We cannot do a tailcall for an indirect call by descriptor if all the
7372 argument registers are used because the only register left to load the
7373 address is IP and it will already contain the static chain. */
7374 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7375 {
7376 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7377 CUMULATIVE_ARGS cum;
7378 cumulative_args_t cum_v;
7379
7380 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7381 cum_v = pack_cumulative_args (&cum);
7382
7383 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7384 {
7385 tree type = TREE_VALUE (t);
7386 if (!VOID_TYPE_P (type))
7387 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7388 }
7389
7390 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7391 return false;
7392 }
7393
7394 /* Everything else is ok. */
7395 return true;
7396 }
7397
7398 \f
7399 /* Addressing mode support functions. */
7400
7401 /* Return nonzero if X is a legitimate immediate operand when compiling
7402 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7403 int
7404 legitimate_pic_operand_p (rtx x)
7405 {
7406 if (GET_CODE (x) == SYMBOL_REF
7407 || (GET_CODE (x) == CONST
7408 && GET_CODE (XEXP (x, 0)) == PLUS
7409 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7410 return 0;
7411
7412 return 1;
7413 }
7414
7415 /* Record that the current function needs a PIC register. If PIC_REG is null,
7416 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7417 both case cfun->machine->pic_reg is initialized if we have not already done
7418 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7419 PIC register is reloaded in the current position of the instruction stream
7420 irregardless of whether it was loaded before. Otherwise, it is only loaded
7421 if not already done so (crtl->uses_pic_offset_table is null). Note that
7422 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7423 is only supported iff COMPUTE_NOW is false. */
7424
7425 static void
7426 require_pic_register (rtx pic_reg, bool compute_now)
7427 {
7428 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7429
7430 /* A lot of the logic here is made obscure by the fact that this
7431 routine gets called as part of the rtx cost estimation process.
7432 We don't want those calls to affect any assumptions about the real
7433 function; and further, we can't call entry_of_function() until we
7434 start the real expansion process. */
7435 if (!crtl->uses_pic_offset_table || compute_now)
7436 {
7437 gcc_assert (can_create_pseudo_p ()
7438 || (pic_reg != NULL_RTX
7439 && REG_P (pic_reg)
7440 && GET_MODE (pic_reg) == Pmode));
7441 if (arm_pic_register != INVALID_REGNUM
7442 && !compute_now
7443 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7444 {
7445 if (!cfun->machine->pic_reg)
7446 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7447
7448 /* Play games to avoid marking the function as needing pic
7449 if we are being called as part of the cost-estimation
7450 process. */
7451 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7452 crtl->uses_pic_offset_table = 1;
7453 }
7454 else
7455 {
7456 rtx_insn *seq, *insn;
7457
7458 if (pic_reg == NULL_RTX)
7459 pic_reg = gen_reg_rtx (Pmode);
7460 if (!cfun->machine->pic_reg)
7461 cfun->machine->pic_reg = pic_reg;
7462
7463 /* Play games to avoid marking the function as needing pic
7464 if we are being called as part of the cost-estimation
7465 process. */
7466 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7467 {
7468 crtl->uses_pic_offset_table = 1;
7469 start_sequence ();
7470
7471 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7472 && arm_pic_register > LAST_LO_REGNUM
7473 && !compute_now)
7474 emit_move_insn (cfun->machine->pic_reg,
7475 gen_rtx_REG (Pmode, arm_pic_register));
7476 else
7477 arm_load_pic_register (0UL, pic_reg);
7478
7479 seq = get_insns ();
7480 end_sequence ();
7481
7482 for (insn = seq; insn; insn = NEXT_INSN (insn))
7483 if (INSN_P (insn))
7484 INSN_LOCATION (insn) = prologue_location;
7485
7486 /* We can be called during expansion of PHI nodes, where
7487 we can't yet emit instructions directly in the final
7488 insn stream. Queue the insns on the entry edge, they will
7489 be committed after everything else is expanded. */
7490 if (currently_expanding_to_rtl)
7491 insert_insn_on_edge (seq,
7492 single_succ_edge
7493 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7494 else
7495 emit_insn (seq);
7496 }
7497 }
7498 }
7499 }
7500
7501 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7502 created to hold the result of the load. If not NULL, PIC_REG indicates
7503 which register to use as PIC register, otherwise it is decided by register
7504 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7505 location in the instruction stream, irregardless of whether it was loaded
7506 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7507 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7508
7509 Returns the register REG into which the PIC load is performed. */
7510
7511 rtx
7512 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7513 bool compute_now)
7514 {
7515 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7516
7517 if (GET_CODE (orig) == SYMBOL_REF
7518 || GET_CODE (orig) == LABEL_REF)
7519 {
7520 if (reg == 0)
7521 {
7522 gcc_assert (can_create_pseudo_p ());
7523 reg = gen_reg_rtx (Pmode);
7524 }
7525
7526 /* VxWorks does not impose a fixed gap between segments; the run-time
7527 gap can be different from the object-file gap. We therefore can't
7528 use GOTOFF unless we are absolutely sure that the symbol is in the
7529 same segment as the GOT. Unfortunately, the flexibility of linker
7530 scripts means that we can't be sure of that in general, so assume
7531 that GOTOFF is never valid on VxWorks. */
7532 /* References to weak symbols cannot be resolved locally: they
7533 may be overridden by a non-weak definition at link time. */
7534 rtx_insn *insn;
7535 if ((GET_CODE (orig) == LABEL_REF
7536 || (GET_CODE (orig) == SYMBOL_REF
7537 && SYMBOL_REF_LOCAL_P (orig)
7538 && (SYMBOL_REF_DECL (orig)
7539 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7540 && NEED_GOT_RELOC
7541 && arm_pic_data_is_text_relative)
7542 insn = arm_pic_static_addr (orig, reg);
7543 else
7544 {
7545 rtx pat;
7546 rtx mem;
7547
7548 /* If this function doesn't have a pic register, create one now. */
7549 require_pic_register (pic_reg, compute_now);
7550
7551 if (pic_reg == NULL_RTX)
7552 pic_reg = cfun->machine->pic_reg;
7553
7554 pat = gen_calculate_pic_address (reg, pic_reg, orig);
7555
7556 /* Make the MEM as close to a constant as possible. */
7557 mem = SET_SRC (pat);
7558 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7559 MEM_READONLY_P (mem) = 1;
7560 MEM_NOTRAP_P (mem) = 1;
7561
7562 insn = emit_insn (pat);
7563 }
7564
7565 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7566 by loop. */
7567 set_unique_reg_note (insn, REG_EQUAL, orig);
7568
7569 return reg;
7570 }
7571 else if (GET_CODE (orig) == CONST)
7572 {
7573 rtx base, offset;
7574
7575 if (GET_CODE (XEXP (orig, 0)) == PLUS
7576 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7577 return orig;
7578
7579 /* Handle the case where we have: const (UNSPEC_TLS). */
7580 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7581 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7582 return orig;
7583
7584 /* Handle the case where we have:
7585 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7586 CONST_INT. */
7587 if (GET_CODE (XEXP (orig, 0)) == PLUS
7588 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7589 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7590 {
7591 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7592 return orig;
7593 }
7594
7595 if (reg == 0)
7596 {
7597 gcc_assert (can_create_pseudo_p ());
7598 reg = gen_reg_rtx (Pmode);
7599 }
7600
7601 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7602
7603 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
7604 pic_reg, compute_now);
7605 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7606 base == reg ? 0 : reg, pic_reg,
7607 compute_now);
7608
7609 if (CONST_INT_P (offset))
7610 {
7611 /* The base register doesn't really matter, we only want to
7612 test the index for the appropriate mode. */
7613 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7614 {
7615 gcc_assert (can_create_pseudo_p ());
7616 offset = force_reg (Pmode, offset);
7617 }
7618
7619 if (CONST_INT_P (offset))
7620 return plus_constant (Pmode, base, INTVAL (offset));
7621 }
7622
7623 if (GET_MODE_SIZE (mode) > 4
7624 && (GET_MODE_CLASS (mode) == MODE_INT
7625 || TARGET_SOFT_FLOAT))
7626 {
7627 emit_insn (gen_addsi3 (reg, base, offset));
7628 return reg;
7629 }
7630
7631 return gen_rtx_PLUS (Pmode, base, offset);
7632 }
7633
7634 return orig;
7635 }
7636
7637
7638 /* Whether a register is callee saved or not. This is necessary because high
7639 registers are marked as caller saved when optimizing for size on Thumb-1
7640 targets despite being callee saved in order to avoid using them. */
7641 #define callee_saved_reg_p(reg) \
7642 (!call_used_regs[reg] \
7643 || (TARGET_THUMB1 && optimize_size \
7644 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
7645
7646 /* Return a mask for the call-clobbered low registers that are unused
7647 at the end of the prologue. */
7648 static unsigned long
7649 thumb1_prologue_unused_call_clobbered_lo_regs (void)
7650 {
7651 unsigned long mask = 0;
7652 bitmap prologue_live_out = df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun));
7653
7654 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7655 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (prologue_live_out, reg))
7656 mask |= 1 << (reg - FIRST_LO_REGNUM);
7657 return mask;
7658 }
7659
7660 /* Similarly for the start of the epilogue. */
7661 static unsigned long
7662 thumb1_epilogue_unused_call_clobbered_lo_regs (void)
7663 {
7664 unsigned long mask = 0;
7665 bitmap epilogue_live_in = df_get_live_in (EXIT_BLOCK_PTR_FOR_FN (cfun));
7666
7667 for (int reg = FIRST_LO_REGNUM; reg <= LAST_LO_REGNUM; reg++)
7668 if (!callee_saved_reg_p (reg) && !REGNO_REG_SET_P (epilogue_live_in, reg))
7669 mask |= 1 << (reg - FIRST_LO_REGNUM);
7670 return mask;
7671 }
7672
7673 /* Find a spare register to use during the prolog of a function. */
7674
7675 static int
7676 thumb_find_work_register (unsigned long pushed_regs_mask)
7677 {
7678 int reg;
7679
7680 unsigned long unused_regs
7681 = thumb1_prologue_unused_call_clobbered_lo_regs ();
7682
7683 /* Check the argument registers first as these are call-used. The
7684 register allocation order means that sometimes r3 might be used
7685 but earlier argument registers might not, so check them all. */
7686 for (reg = LAST_LO_REGNUM; reg >= FIRST_LO_REGNUM; reg--)
7687 if (unused_regs & (1 << (reg - FIRST_LO_REGNUM)))
7688 return reg;
7689
7690 /* Otherwise look for a call-saved register that is going to be pushed. */
7691 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7692 if (pushed_regs_mask & (1 << reg))
7693 return reg;
7694
7695 if (TARGET_THUMB2)
7696 {
7697 /* Thumb-2 can use high regs. */
7698 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7699 if (pushed_regs_mask & (1 << reg))
7700 return reg;
7701 }
7702 /* Something went wrong - thumb_compute_save_reg_mask()
7703 should have arranged for a suitable register to be pushed. */
7704 gcc_unreachable ();
7705 }
7706
7707 static GTY(()) int pic_labelno;
7708
7709 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7710 low register. */
7711
7712 void
7713 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
7714 {
7715 rtx l1, labelno, pic_tmp, pic_rtx;
7716
7717 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7718 return;
7719
7720 gcc_assert (flag_pic);
7721
7722 if (pic_reg == NULL_RTX)
7723 pic_reg = cfun->machine->pic_reg;
7724 if (TARGET_VXWORKS_RTP)
7725 {
7726 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7727 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7728 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7729
7730 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7731
7732 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7733 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7734 }
7735 else
7736 {
7737 /* We use an UNSPEC rather than a LABEL_REF because this label
7738 never appears in the code stream. */
7739
7740 labelno = GEN_INT (pic_labelno++);
7741 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7742 l1 = gen_rtx_CONST (VOIDmode, l1);
7743
7744 /* On the ARM the PC register contains 'dot + 8' at the time of the
7745 addition, on the Thumb it is 'dot + 4'. */
7746 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7747 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7748 UNSPEC_GOTSYM_OFF);
7749 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7750
7751 if (TARGET_32BIT)
7752 {
7753 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7754 }
7755 else /* TARGET_THUMB1 */
7756 {
7757 if (arm_pic_register != INVALID_REGNUM
7758 && REGNO (pic_reg) > LAST_LO_REGNUM)
7759 {
7760 /* We will have pushed the pic register, so we should always be
7761 able to find a work register. */
7762 pic_tmp = gen_rtx_REG (SImode,
7763 thumb_find_work_register (saved_regs));
7764 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7765 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7766 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7767 }
7768 else if (arm_pic_register != INVALID_REGNUM
7769 && arm_pic_register > LAST_LO_REGNUM
7770 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7771 {
7772 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7773 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7774 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7775 }
7776 else
7777 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7778 }
7779 }
7780
7781 /* Need to emit this whether or not we obey regdecls,
7782 since setjmp/longjmp can cause life info to screw up. */
7783 emit_use (pic_reg);
7784 }
7785
7786 /* Generate code to load the address of a static var when flag_pic is set. */
7787 static rtx_insn *
7788 arm_pic_static_addr (rtx orig, rtx reg)
7789 {
7790 rtx l1, labelno, offset_rtx;
7791
7792 gcc_assert (flag_pic);
7793
7794 /* We use an UNSPEC rather than a LABEL_REF because this label
7795 never appears in the code stream. */
7796 labelno = GEN_INT (pic_labelno++);
7797 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7798 l1 = gen_rtx_CONST (VOIDmode, l1);
7799
7800 /* On the ARM the PC register contains 'dot + 8' at the time of the
7801 addition, on the Thumb it is 'dot + 4'. */
7802 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7803 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7804 UNSPEC_SYMBOL_OFFSET);
7805 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7806
7807 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7808 }
7809
7810 /* Return nonzero if X is valid as an ARM state addressing register. */
7811 static int
7812 arm_address_register_rtx_p (rtx x, int strict_p)
7813 {
7814 int regno;
7815
7816 if (!REG_P (x))
7817 return 0;
7818
7819 regno = REGNO (x);
7820
7821 if (strict_p)
7822 return ARM_REGNO_OK_FOR_BASE_P (regno);
7823
7824 return (regno <= LAST_ARM_REGNUM
7825 || regno >= FIRST_PSEUDO_REGISTER
7826 || regno == FRAME_POINTER_REGNUM
7827 || regno == ARG_POINTER_REGNUM);
7828 }
7829
7830 /* Return TRUE if this rtx is the difference of a symbol and a label,
7831 and will reduce to a PC-relative relocation in the object file.
7832 Expressions like this can be left alone when generating PIC, rather
7833 than forced through the GOT. */
7834 static int
7835 pcrel_constant_p (rtx x)
7836 {
7837 if (GET_CODE (x) == MINUS)
7838 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7839
7840 return FALSE;
7841 }
7842
7843 /* Return true if X will surely end up in an index register after next
7844 splitting pass. */
7845 static bool
7846 will_be_in_index_register (const_rtx x)
7847 {
7848 /* arm.md: calculate_pic_address will split this into a register. */
7849 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7850 }
7851
7852 /* Return nonzero if X is a valid ARM state address operand. */
7853 int
7854 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7855 int strict_p)
7856 {
7857 bool use_ldrd;
7858 enum rtx_code code = GET_CODE (x);
7859
7860 if (arm_address_register_rtx_p (x, strict_p))
7861 return 1;
7862
7863 use_ldrd = (TARGET_LDRD
7864 && (mode == DImode || mode == DFmode));
7865
7866 if (code == POST_INC || code == PRE_DEC
7867 || ((code == PRE_INC || code == POST_DEC)
7868 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7869 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7870
7871 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7872 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7873 && GET_CODE (XEXP (x, 1)) == PLUS
7874 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7875 {
7876 rtx addend = XEXP (XEXP (x, 1), 1);
7877
7878 /* Don't allow ldrd post increment by register because it's hard
7879 to fixup invalid register choices. */
7880 if (use_ldrd
7881 && GET_CODE (x) == POST_MODIFY
7882 && REG_P (addend))
7883 return 0;
7884
7885 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7886 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7887 }
7888
7889 /* After reload constants split into minipools will have addresses
7890 from a LABEL_REF. */
7891 else if (reload_completed
7892 && (code == LABEL_REF
7893 || (code == CONST
7894 && GET_CODE (XEXP (x, 0)) == PLUS
7895 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7896 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7897 return 1;
7898
7899 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7900 return 0;
7901
7902 else if (code == PLUS)
7903 {
7904 rtx xop0 = XEXP (x, 0);
7905 rtx xop1 = XEXP (x, 1);
7906
7907 return ((arm_address_register_rtx_p (xop0, strict_p)
7908 && ((CONST_INT_P (xop1)
7909 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7910 || (!strict_p && will_be_in_index_register (xop1))))
7911 || (arm_address_register_rtx_p (xop1, strict_p)
7912 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7913 }
7914
7915 #if 0
7916 /* Reload currently can't handle MINUS, so disable this for now */
7917 else if (GET_CODE (x) == MINUS)
7918 {
7919 rtx xop0 = XEXP (x, 0);
7920 rtx xop1 = XEXP (x, 1);
7921
7922 return (arm_address_register_rtx_p (xop0, strict_p)
7923 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7924 }
7925 #endif
7926
7927 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7928 && code == SYMBOL_REF
7929 && CONSTANT_POOL_ADDRESS_P (x)
7930 && ! (flag_pic
7931 && symbol_mentioned_p (get_pool_constant (x))
7932 && ! pcrel_constant_p (get_pool_constant (x))))
7933 return 1;
7934
7935 return 0;
7936 }
7937
7938 /* Return true if we can avoid creating a constant pool entry for x. */
7939 static bool
7940 can_avoid_literal_pool_for_label_p (rtx x)
7941 {
7942 /* Normally we can assign constant values to target registers without
7943 the help of constant pool. But there are cases we have to use constant
7944 pool like:
7945 1) assign a label to register.
7946 2) sign-extend a 8bit value to 32bit and then assign to register.
7947
7948 Constant pool access in format:
7949 (set (reg r0) (mem (symbol_ref (".LC0"))))
7950 will cause the use of literal pool (later in function arm_reorg).
7951 So here we mark such format as an invalid format, then the compiler
7952 will adjust it into:
7953 (set (reg r0) (symbol_ref (".LC0")))
7954 (set (reg r0) (mem (reg r0))).
7955 No extra register is required, and (mem (reg r0)) won't cause the use
7956 of literal pools. */
7957 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7958 && CONSTANT_POOL_ADDRESS_P (x))
7959 return 1;
7960 return 0;
7961 }
7962
7963
7964 /* Return nonzero if X is a valid Thumb-2 address operand. */
7965 static int
7966 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7967 {
7968 bool use_ldrd;
7969 enum rtx_code code = GET_CODE (x);
7970
7971 if (arm_address_register_rtx_p (x, strict_p))
7972 return 1;
7973
7974 use_ldrd = (TARGET_LDRD
7975 && (mode == DImode || mode == DFmode));
7976
7977 if (code == POST_INC || code == PRE_DEC
7978 || ((code == PRE_INC || code == POST_DEC)
7979 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7980 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7981
7982 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7983 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7984 && GET_CODE (XEXP (x, 1)) == PLUS
7985 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7986 {
7987 /* Thumb-2 only has autoincrement by constant. */
7988 rtx addend = XEXP (XEXP (x, 1), 1);
7989 HOST_WIDE_INT offset;
7990
7991 if (!CONST_INT_P (addend))
7992 return 0;
7993
7994 offset = INTVAL(addend);
7995 if (GET_MODE_SIZE (mode) <= 4)
7996 return (offset > -256 && offset < 256);
7997
7998 return (use_ldrd && offset > -1024 && offset < 1024
7999 && (offset & 3) == 0);
8000 }
8001
8002 /* After reload constants split into minipools will have addresses
8003 from a LABEL_REF. */
8004 else if (reload_completed
8005 && (code == LABEL_REF
8006 || (code == CONST
8007 && GET_CODE (XEXP (x, 0)) == PLUS
8008 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8009 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8010 return 1;
8011
8012 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
8013 return 0;
8014
8015 else if (code == PLUS)
8016 {
8017 rtx xop0 = XEXP (x, 0);
8018 rtx xop1 = XEXP (x, 1);
8019
8020 return ((arm_address_register_rtx_p (xop0, strict_p)
8021 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
8022 || (!strict_p && will_be_in_index_register (xop1))))
8023 || (arm_address_register_rtx_p (xop1, strict_p)
8024 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
8025 }
8026
8027 else if (can_avoid_literal_pool_for_label_p (x))
8028 return 0;
8029
8030 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8031 && code == SYMBOL_REF
8032 && CONSTANT_POOL_ADDRESS_P (x)
8033 && ! (flag_pic
8034 && symbol_mentioned_p (get_pool_constant (x))
8035 && ! pcrel_constant_p (get_pool_constant (x))))
8036 return 1;
8037
8038 return 0;
8039 }
8040
8041 /* Return nonzero if INDEX is valid for an address index operand in
8042 ARM state. */
8043 static int
8044 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8045 int strict_p)
8046 {
8047 HOST_WIDE_INT range;
8048 enum rtx_code code = GET_CODE (index);
8049
8050 /* Standard coprocessor addressing modes. */
8051 if (TARGET_HARD_FLOAT
8052 && (mode == SFmode || mode == DFmode))
8053 return (code == CONST_INT && INTVAL (index) < 1024
8054 && INTVAL (index) > -1024
8055 && (INTVAL (index) & 3) == 0);
8056
8057 /* For quad modes, we restrict the constant offset to be slightly less
8058 than what the instruction format permits. We do this because for
8059 quad mode moves, we will actually decompose them into two separate
8060 double-mode reads or writes. INDEX must therefore be a valid
8061 (double-mode) offset and so should INDEX+8. */
8062 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8063 return (code == CONST_INT
8064 && INTVAL (index) < 1016
8065 && INTVAL (index) > -1024
8066 && (INTVAL (index) & 3) == 0);
8067
8068 /* We have no such constraint on double mode offsets, so we permit the
8069 full range of the instruction format. */
8070 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8071 return (code == CONST_INT
8072 && INTVAL (index) < 1024
8073 && INTVAL (index) > -1024
8074 && (INTVAL (index) & 3) == 0);
8075
8076 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8077 return (code == CONST_INT
8078 && INTVAL (index) < 1024
8079 && INTVAL (index) > -1024
8080 && (INTVAL (index) & 3) == 0);
8081
8082 if (arm_address_register_rtx_p (index, strict_p)
8083 && (GET_MODE_SIZE (mode) <= 4))
8084 return 1;
8085
8086 if (mode == DImode || mode == DFmode)
8087 {
8088 if (code == CONST_INT)
8089 {
8090 HOST_WIDE_INT val = INTVAL (index);
8091
8092 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8093 If vldr is selected it uses arm_coproc_mem_operand. */
8094 if (TARGET_LDRD)
8095 return val > -256 && val < 256;
8096 else
8097 return val > -4096 && val < 4092;
8098 }
8099
8100 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8101 }
8102
8103 if (GET_MODE_SIZE (mode) <= 4
8104 && ! (arm_arch4
8105 && (mode == HImode
8106 || mode == HFmode
8107 || (mode == QImode && outer == SIGN_EXTEND))))
8108 {
8109 if (code == MULT)
8110 {
8111 rtx xiop0 = XEXP (index, 0);
8112 rtx xiop1 = XEXP (index, 1);
8113
8114 return ((arm_address_register_rtx_p (xiop0, strict_p)
8115 && power_of_two_operand (xiop1, SImode))
8116 || (arm_address_register_rtx_p (xiop1, strict_p)
8117 && power_of_two_operand (xiop0, SImode)));
8118 }
8119 else if (code == LSHIFTRT || code == ASHIFTRT
8120 || code == ASHIFT || code == ROTATERT)
8121 {
8122 rtx op = XEXP (index, 1);
8123
8124 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8125 && CONST_INT_P (op)
8126 && INTVAL (op) > 0
8127 && INTVAL (op) <= 31);
8128 }
8129 }
8130
8131 /* For ARM v4 we may be doing a sign-extend operation during the
8132 load. */
8133 if (arm_arch4)
8134 {
8135 if (mode == HImode
8136 || mode == HFmode
8137 || (outer == SIGN_EXTEND && mode == QImode))
8138 range = 256;
8139 else
8140 range = 4096;
8141 }
8142 else
8143 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8144
8145 return (code == CONST_INT
8146 && INTVAL (index) < range
8147 && INTVAL (index) > -range);
8148 }
8149
8150 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8151 index operand. i.e. 1, 2, 4 or 8. */
8152 static bool
8153 thumb2_index_mul_operand (rtx op)
8154 {
8155 HOST_WIDE_INT val;
8156
8157 if (!CONST_INT_P (op))
8158 return false;
8159
8160 val = INTVAL(op);
8161 return (val == 1 || val == 2 || val == 4 || val == 8);
8162 }
8163
8164 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8165 static int
8166 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8167 {
8168 enum rtx_code code = GET_CODE (index);
8169
8170 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8171 /* Standard coprocessor addressing modes. */
8172 if (TARGET_HARD_FLOAT
8173 && (mode == SFmode || mode == DFmode))
8174 return (code == CONST_INT && INTVAL (index) < 1024
8175 /* Thumb-2 allows only > -256 index range for it's core register
8176 load/stores. Since we allow SF/DF in core registers, we have
8177 to use the intersection between -256~4096 (core) and -1024~1024
8178 (coprocessor). */
8179 && INTVAL (index) > -256
8180 && (INTVAL (index) & 3) == 0);
8181
8182 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8183 {
8184 /* For DImode assume values will usually live in core regs
8185 and only allow LDRD addressing modes. */
8186 if (!TARGET_LDRD || mode != DImode)
8187 return (code == CONST_INT
8188 && INTVAL (index) < 1024
8189 && INTVAL (index) > -1024
8190 && (INTVAL (index) & 3) == 0);
8191 }
8192
8193 /* For quad modes, we restrict the constant offset to be slightly less
8194 than what the instruction format permits. We do this because for
8195 quad mode moves, we will actually decompose them into two separate
8196 double-mode reads or writes. INDEX must therefore be a valid
8197 (double-mode) offset and so should INDEX+8. */
8198 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8199 return (code == CONST_INT
8200 && INTVAL (index) < 1016
8201 && INTVAL (index) > -1024
8202 && (INTVAL (index) & 3) == 0);
8203
8204 /* We have no such constraint on double mode offsets, so we permit the
8205 full range of the instruction format. */
8206 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8207 return (code == CONST_INT
8208 && INTVAL (index) < 1024
8209 && INTVAL (index) > -1024
8210 && (INTVAL (index) & 3) == 0);
8211
8212 if (arm_address_register_rtx_p (index, strict_p)
8213 && (GET_MODE_SIZE (mode) <= 4))
8214 return 1;
8215
8216 if (mode == DImode || mode == DFmode)
8217 {
8218 if (code == CONST_INT)
8219 {
8220 HOST_WIDE_INT val = INTVAL (index);
8221 /* Thumb-2 ldrd only has reg+const addressing modes.
8222 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8223 If vldr is selected it uses arm_coproc_mem_operand. */
8224 if (TARGET_LDRD)
8225 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8226 else
8227 return IN_RANGE (val, -255, 4095 - 4);
8228 }
8229 else
8230 return 0;
8231 }
8232
8233 if (code == MULT)
8234 {
8235 rtx xiop0 = XEXP (index, 0);
8236 rtx xiop1 = XEXP (index, 1);
8237
8238 return ((arm_address_register_rtx_p (xiop0, strict_p)
8239 && thumb2_index_mul_operand (xiop1))
8240 || (arm_address_register_rtx_p (xiop1, strict_p)
8241 && thumb2_index_mul_operand (xiop0)));
8242 }
8243 else if (code == ASHIFT)
8244 {
8245 rtx op = XEXP (index, 1);
8246
8247 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8248 && CONST_INT_P (op)
8249 && INTVAL (op) > 0
8250 && INTVAL (op) <= 3);
8251 }
8252
8253 return (code == CONST_INT
8254 && INTVAL (index) < 4096
8255 && INTVAL (index) > -256);
8256 }
8257
8258 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8259 static int
8260 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8261 {
8262 int regno;
8263
8264 if (!REG_P (x))
8265 return 0;
8266
8267 regno = REGNO (x);
8268
8269 if (strict_p)
8270 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8271
8272 return (regno <= LAST_LO_REGNUM
8273 || regno > LAST_VIRTUAL_REGISTER
8274 || regno == FRAME_POINTER_REGNUM
8275 || (GET_MODE_SIZE (mode) >= 4
8276 && (regno == STACK_POINTER_REGNUM
8277 || regno >= FIRST_PSEUDO_REGISTER
8278 || x == hard_frame_pointer_rtx
8279 || x == arg_pointer_rtx)));
8280 }
8281
8282 /* Return nonzero if x is a legitimate index register. This is the case
8283 for any base register that can access a QImode object. */
8284 inline static int
8285 thumb1_index_register_rtx_p (rtx x, int strict_p)
8286 {
8287 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8288 }
8289
8290 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8291
8292 The AP may be eliminated to either the SP or the FP, so we use the
8293 least common denominator, e.g. SImode, and offsets from 0 to 64.
8294
8295 ??? Verify whether the above is the right approach.
8296
8297 ??? Also, the FP may be eliminated to the SP, so perhaps that
8298 needs special handling also.
8299
8300 ??? Look at how the mips16 port solves this problem. It probably uses
8301 better ways to solve some of these problems.
8302
8303 Although it is not incorrect, we don't accept QImode and HImode
8304 addresses based on the frame pointer or arg pointer until the
8305 reload pass starts. This is so that eliminating such addresses
8306 into stack based ones won't produce impossible code. */
8307 int
8308 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8309 {
8310 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8311 return 0;
8312
8313 /* ??? Not clear if this is right. Experiment. */
8314 if (GET_MODE_SIZE (mode) < 4
8315 && !(reload_in_progress || reload_completed)
8316 && (reg_mentioned_p (frame_pointer_rtx, x)
8317 || reg_mentioned_p (arg_pointer_rtx, x)
8318 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8319 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8320 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8321 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8322 return 0;
8323
8324 /* Accept any base register. SP only in SImode or larger. */
8325 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8326 return 1;
8327
8328 /* This is PC relative data before arm_reorg runs. */
8329 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8330 && GET_CODE (x) == SYMBOL_REF
8331 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8332 return 1;
8333
8334 /* This is PC relative data after arm_reorg runs. */
8335 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8336 && reload_completed
8337 && (GET_CODE (x) == LABEL_REF
8338 || (GET_CODE (x) == CONST
8339 && GET_CODE (XEXP (x, 0)) == PLUS
8340 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8341 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8342 return 1;
8343
8344 /* Post-inc indexing only supported for SImode and larger. */
8345 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8346 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8347 return 1;
8348
8349 else if (GET_CODE (x) == PLUS)
8350 {
8351 /* REG+REG address can be any two index registers. */
8352 /* We disallow FRAME+REG addressing since we know that FRAME
8353 will be replaced with STACK, and SP relative addressing only
8354 permits SP+OFFSET. */
8355 if (GET_MODE_SIZE (mode) <= 4
8356 && XEXP (x, 0) != frame_pointer_rtx
8357 && XEXP (x, 1) != frame_pointer_rtx
8358 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8359 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8360 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8361 return 1;
8362
8363 /* REG+const has 5-7 bit offset for non-SP registers. */
8364 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8365 || XEXP (x, 0) == arg_pointer_rtx)
8366 && CONST_INT_P (XEXP (x, 1))
8367 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8368 return 1;
8369
8370 /* REG+const has 10-bit offset for SP, but only SImode and
8371 larger is supported. */
8372 /* ??? Should probably check for DI/DFmode overflow here
8373 just like GO_IF_LEGITIMATE_OFFSET does. */
8374 else if (REG_P (XEXP (x, 0))
8375 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8376 && GET_MODE_SIZE (mode) >= 4
8377 && CONST_INT_P (XEXP (x, 1))
8378 && INTVAL (XEXP (x, 1)) >= 0
8379 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8380 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8381 return 1;
8382
8383 else if (REG_P (XEXP (x, 0))
8384 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8385 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8386 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8387 && REGNO (XEXP (x, 0))
8388 <= LAST_VIRTUAL_POINTER_REGISTER))
8389 && GET_MODE_SIZE (mode) >= 4
8390 && CONST_INT_P (XEXP (x, 1))
8391 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8392 return 1;
8393 }
8394
8395 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8396 && GET_MODE_SIZE (mode) == 4
8397 && GET_CODE (x) == SYMBOL_REF
8398 && CONSTANT_POOL_ADDRESS_P (x)
8399 && ! (flag_pic
8400 && symbol_mentioned_p (get_pool_constant (x))
8401 && ! pcrel_constant_p (get_pool_constant (x))))
8402 return 1;
8403
8404 return 0;
8405 }
8406
8407 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8408 instruction of mode MODE. */
8409 int
8410 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8411 {
8412 switch (GET_MODE_SIZE (mode))
8413 {
8414 case 1:
8415 return val >= 0 && val < 32;
8416
8417 case 2:
8418 return val >= 0 && val < 64 && (val & 1) == 0;
8419
8420 default:
8421 return (val >= 0
8422 && (val + GET_MODE_SIZE (mode)) <= 128
8423 && (val & 3) == 0);
8424 }
8425 }
8426
8427 bool
8428 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8429 {
8430 if (TARGET_ARM)
8431 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8432 else if (TARGET_THUMB2)
8433 return thumb2_legitimate_address_p (mode, x, strict_p);
8434 else /* if (TARGET_THUMB1) */
8435 return thumb1_legitimate_address_p (mode, x, strict_p);
8436 }
8437
8438 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8439
8440 Given an rtx X being reloaded into a reg required to be
8441 in class CLASS, return the class of reg to actually use.
8442 In general this is just CLASS, but for the Thumb core registers and
8443 immediate constants we prefer a LO_REGS class or a subset. */
8444
8445 static reg_class_t
8446 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8447 {
8448 if (TARGET_32BIT)
8449 return rclass;
8450 else
8451 {
8452 if (rclass == GENERAL_REGS)
8453 return LO_REGS;
8454 else
8455 return rclass;
8456 }
8457 }
8458
8459 /* Build the SYMBOL_REF for __tls_get_addr. */
8460
8461 static GTY(()) rtx tls_get_addr_libfunc;
8462
8463 static rtx
8464 get_tls_get_addr (void)
8465 {
8466 if (!tls_get_addr_libfunc)
8467 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8468 return tls_get_addr_libfunc;
8469 }
8470
8471 rtx
8472 arm_load_tp (rtx target)
8473 {
8474 if (!target)
8475 target = gen_reg_rtx (SImode);
8476
8477 if (TARGET_HARD_TP)
8478 {
8479 /* Can return in any reg. */
8480 emit_insn (gen_load_tp_hard (target));
8481 }
8482 else
8483 {
8484 /* Always returned in r0. Immediately copy the result into a pseudo,
8485 otherwise other uses of r0 (e.g. setting up function arguments) may
8486 clobber the value. */
8487
8488 rtx tmp;
8489
8490 emit_insn (gen_load_tp_soft ());
8491
8492 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8493 emit_move_insn (target, tmp);
8494 }
8495 return target;
8496 }
8497
8498 static rtx
8499 load_tls_operand (rtx x, rtx reg)
8500 {
8501 rtx tmp;
8502
8503 if (reg == NULL_RTX)
8504 reg = gen_reg_rtx (SImode);
8505
8506 tmp = gen_rtx_CONST (SImode, x);
8507
8508 emit_move_insn (reg, tmp);
8509
8510 return reg;
8511 }
8512
8513 static rtx_insn *
8514 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8515 {
8516 rtx label, labelno, sum;
8517
8518 gcc_assert (reloc != TLS_DESCSEQ);
8519 start_sequence ();
8520
8521 labelno = GEN_INT (pic_labelno++);
8522 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8523 label = gen_rtx_CONST (VOIDmode, label);
8524
8525 sum = gen_rtx_UNSPEC (Pmode,
8526 gen_rtvec (4, x, GEN_INT (reloc), label,
8527 GEN_INT (TARGET_ARM ? 8 : 4)),
8528 UNSPEC_TLS);
8529 reg = load_tls_operand (sum, reg);
8530
8531 if (TARGET_ARM)
8532 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8533 else
8534 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8535
8536 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8537 LCT_PURE, /* LCT_CONST? */
8538 Pmode, reg, Pmode);
8539
8540 rtx_insn *insns = get_insns ();
8541 end_sequence ();
8542
8543 return insns;
8544 }
8545
8546 static rtx
8547 arm_tls_descseq_addr (rtx x, rtx reg)
8548 {
8549 rtx labelno = GEN_INT (pic_labelno++);
8550 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8551 rtx sum = gen_rtx_UNSPEC (Pmode,
8552 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8553 gen_rtx_CONST (VOIDmode, label),
8554 GEN_INT (!TARGET_ARM)),
8555 UNSPEC_TLS);
8556 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8557
8558 emit_insn (gen_tlscall (x, labelno));
8559 if (!reg)
8560 reg = gen_reg_rtx (SImode);
8561 else
8562 gcc_assert (REGNO (reg) != R0_REGNUM);
8563
8564 emit_move_insn (reg, reg0);
8565
8566 return reg;
8567 }
8568
8569 rtx
8570 legitimize_tls_address (rtx x, rtx reg)
8571 {
8572 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8573 rtx_insn *insns;
8574 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8575
8576 switch (model)
8577 {
8578 case TLS_MODEL_GLOBAL_DYNAMIC:
8579 if (TARGET_GNU2_TLS)
8580 {
8581 reg = arm_tls_descseq_addr (x, reg);
8582
8583 tp = arm_load_tp (NULL_RTX);
8584
8585 dest = gen_rtx_PLUS (Pmode, tp, reg);
8586 }
8587 else
8588 {
8589 /* Original scheme */
8590 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8591 dest = gen_reg_rtx (Pmode);
8592 emit_libcall_block (insns, dest, ret, x);
8593 }
8594 return dest;
8595
8596 case TLS_MODEL_LOCAL_DYNAMIC:
8597 if (TARGET_GNU2_TLS)
8598 {
8599 reg = arm_tls_descseq_addr (x, reg);
8600
8601 tp = arm_load_tp (NULL_RTX);
8602
8603 dest = gen_rtx_PLUS (Pmode, tp, reg);
8604 }
8605 else
8606 {
8607 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8608
8609 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8610 share the LDM result with other LD model accesses. */
8611 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8612 UNSPEC_TLS);
8613 dest = gen_reg_rtx (Pmode);
8614 emit_libcall_block (insns, dest, ret, eqv);
8615
8616 /* Load the addend. */
8617 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8618 GEN_INT (TLS_LDO32)),
8619 UNSPEC_TLS);
8620 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8621 dest = gen_rtx_PLUS (Pmode, dest, addend);
8622 }
8623 return dest;
8624
8625 case TLS_MODEL_INITIAL_EXEC:
8626 labelno = GEN_INT (pic_labelno++);
8627 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8628 label = gen_rtx_CONST (VOIDmode, label);
8629 sum = gen_rtx_UNSPEC (Pmode,
8630 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8631 GEN_INT (TARGET_ARM ? 8 : 4)),
8632 UNSPEC_TLS);
8633 reg = load_tls_operand (sum, reg);
8634
8635 if (TARGET_ARM)
8636 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8637 else if (TARGET_THUMB2)
8638 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8639 else
8640 {
8641 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8642 emit_move_insn (reg, gen_const_mem (SImode, reg));
8643 }
8644
8645 tp = arm_load_tp (NULL_RTX);
8646
8647 return gen_rtx_PLUS (Pmode, tp, reg);
8648
8649 case TLS_MODEL_LOCAL_EXEC:
8650 tp = arm_load_tp (NULL_RTX);
8651
8652 reg = gen_rtx_UNSPEC (Pmode,
8653 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8654 UNSPEC_TLS);
8655 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8656
8657 return gen_rtx_PLUS (Pmode, tp, reg);
8658
8659 default:
8660 abort ();
8661 }
8662 }
8663
8664 /* Try machine-dependent ways of modifying an illegitimate address
8665 to be legitimate. If we find one, return the new, valid address. */
8666 rtx
8667 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8668 {
8669 if (arm_tls_referenced_p (x))
8670 {
8671 rtx addend = NULL;
8672
8673 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8674 {
8675 addend = XEXP (XEXP (x, 0), 1);
8676 x = XEXP (XEXP (x, 0), 0);
8677 }
8678
8679 if (GET_CODE (x) != SYMBOL_REF)
8680 return x;
8681
8682 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8683
8684 x = legitimize_tls_address (x, NULL_RTX);
8685
8686 if (addend)
8687 {
8688 x = gen_rtx_PLUS (SImode, x, addend);
8689 orig_x = x;
8690 }
8691 else
8692 return x;
8693 }
8694
8695 if (!TARGET_ARM)
8696 {
8697 /* TODO: legitimize_address for Thumb2. */
8698 if (TARGET_THUMB2)
8699 return x;
8700 return thumb_legitimize_address (x, orig_x, mode);
8701 }
8702
8703 if (GET_CODE (x) == PLUS)
8704 {
8705 rtx xop0 = XEXP (x, 0);
8706 rtx xop1 = XEXP (x, 1);
8707
8708 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8709 xop0 = force_reg (SImode, xop0);
8710
8711 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8712 && !symbol_mentioned_p (xop1))
8713 xop1 = force_reg (SImode, xop1);
8714
8715 if (ARM_BASE_REGISTER_RTX_P (xop0)
8716 && CONST_INT_P (xop1))
8717 {
8718 HOST_WIDE_INT n, low_n;
8719 rtx base_reg, val;
8720 n = INTVAL (xop1);
8721
8722 /* VFP addressing modes actually allow greater offsets, but for
8723 now we just stick with the lowest common denominator. */
8724 if (mode == DImode || mode == DFmode)
8725 {
8726 low_n = n & 0x0f;
8727 n &= ~0x0f;
8728 if (low_n > 4)
8729 {
8730 n += 16;
8731 low_n -= 16;
8732 }
8733 }
8734 else
8735 {
8736 low_n = ((mode) == TImode ? 0
8737 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8738 n -= low_n;
8739 }
8740
8741 base_reg = gen_reg_rtx (SImode);
8742 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8743 emit_move_insn (base_reg, val);
8744 x = plus_constant (Pmode, base_reg, low_n);
8745 }
8746 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8747 x = gen_rtx_PLUS (SImode, xop0, xop1);
8748 }
8749
8750 /* XXX We don't allow MINUS any more -- see comment in
8751 arm_legitimate_address_outer_p (). */
8752 else if (GET_CODE (x) == MINUS)
8753 {
8754 rtx xop0 = XEXP (x, 0);
8755 rtx xop1 = XEXP (x, 1);
8756
8757 if (CONSTANT_P (xop0))
8758 xop0 = force_reg (SImode, xop0);
8759
8760 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8761 xop1 = force_reg (SImode, xop1);
8762
8763 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8764 x = gen_rtx_MINUS (SImode, xop0, xop1);
8765 }
8766
8767 /* Make sure to take full advantage of the pre-indexed addressing mode
8768 with absolute addresses which often allows for the base register to
8769 be factorized for multiple adjacent memory references, and it might
8770 even allows for the mini pool to be avoided entirely. */
8771 else if (CONST_INT_P (x) && optimize > 0)
8772 {
8773 unsigned int bits;
8774 HOST_WIDE_INT mask, base, index;
8775 rtx base_reg;
8776
8777 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8778 use a 8-bit index. So let's use a 12-bit index for SImode only and
8779 hope that arm_gen_constant will enable ldrb to use more bits. */
8780 bits = (mode == SImode) ? 12 : 8;
8781 mask = (1 << bits) - 1;
8782 base = INTVAL (x) & ~mask;
8783 index = INTVAL (x) & mask;
8784 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8785 {
8786 /* It'll most probably be more efficient to generate the base
8787 with more bits set and use a negative index instead. */
8788 base |= mask;
8789 index -= mask;
8790 }
8791 base_reg = force_reg (SImode, GEN_INT (base));
8792 x = plus_constant (Pmode, base_reg, index);
8793 }
8794
8795 if (flag_pic)
8796 {
8797 /* We need to find and carefully transform any SYMBOL and LABEL
8798 references; so go back to the original address expression. */
8799 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8800 false /*compute_now*/);
8801
8802 if (new_x != orig_x)
8803 x = new_x;
8804 }
8805
8806 return x;
8807 }
8808
8809
8810 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8811 to be legitimate. If we find one, return the new, valid address. */
8812 rtx
8813 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8814 {
8815 if (GET_CODE (x) == PLUS
8816 && CONST_INT_P (XEXP (x, 1))
8817 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8818 || INTVAL (XEXP (x, 1)) < 0))
8819 {
8820 rtx xop0 = XEXP (x, 0);
8821 rtx xop1 = XEXP (x, 1);
8822 HOST_WIDE_INT offset = INTVAL (xop1);
8823
8824 /* Try and fold the offset into a biasing of the base register and
8825 then offsetting that. Don't do this when optimizing for space
8826 since it can cause too many CSEs. */
8827 if (optimize_size && offset >= 0
8828 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8829 {
8830 HOST_WIDE_INT delta;
8831
8832 if (offset >= 256)
8833 delta = offset - (256 - GET_MODE_SIZE (mode));
8834 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8835 delta = 31 * GET_MODE_SIZE (mode);
8836 else
8837 delta = offset & (~31 * GET_MODE_SIZE (mode));
8838
8839 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8840 NULL_RTX);
8841 x = plus_constant (Pmode, xop0, delta);
8842 }
8843 else if (offset < 0 && offset > -256)
8844 /* Small negative offsets are best done with a subtract before the
8845 dereference, forcing these into a register normally takes two
8846 instructions. */
8847 x = force_operand (x, NULL_RTX);
8848 else
8849 {
8850 /* For the remaining cases, force the constant into a register. */
8851 xop1 = force_reg (SImode, xop1);
8852 x = gen_rtx_PLUS (SImode, xop0, xop1);
8853 }
8854 }
8855 else if (GET_CODE (x) == PLUS
8856 && s_register_operand (XEXP (x, 1), SImode)
8857 && !s_register_operand (XEXP (x, 0), SImode))
8858 {
8859 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8860
8861 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8862 }
8863
8864 if (flag_pic)
8865 {
8866 /* We need to find and carefully transform any SYMBOL and LABEL
8867 references; so go back to the original address expression. */
8868 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8869 false /*compute_now*/);
8870
8871 if (new_x != orig_x)
8872 x = new_x;
8873 }
8874
8875 return x;
8876 }
8877
8878 /* Return TRUE if X contains any TLS symbol references. */
8879
8880 bool
8881 arm_tls_referenced_p (rtx x)
8882 {
8883 if (! TARGET_HAVE_TLS)
8884 return false;
8885
8886 subrtx_iterator::array_type array;
8887 FOR_EACH_SUBRTX (iter, array, x, ALL)
8888 {
8889 const_rtx x = *iter;
8890 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8891 {
8892 /* ARM currently does not provide relocations to encode TLS variables
8893 into AArch32 instructions, only data, so there is no way to
8894 currently implement these if a literal pool is disabled. */
8895 if (arm_disable_literal_pool)
8896 sorry ("accessing thread-local storage is not currently supported "
8897 "with %<-mpure-code%> or %<-mslow-flash-data%>");
8898
8899 return true;
8900 }
8901
8902 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8903 TLS offsets, not real symbol references. */
8904 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8905 iter.skip_subrtxes ();
8906 }
8907 return false;
8908 }
8909
8910 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8911
8912 On the ARM, allow any integer (invalid ones are removed later by insn
8913 patterns), nice doubles and symbol_refs which refer to the function's
8914 constant pool XXX.
8915
8916 When generating pic allow anything. */
8917
8918 static bool
8919 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8920 {
8921 return flag_pic || !label_mentioned_p (x);
8922 }
8923
8924 static bool
8925 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8926 {
8927 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8928 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8929 for ARMv8-M Baseline or later the result is valid. */
8930 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8931 x = XEXP (x, 0);
8932
8933 return (CONST_INT_P (x)
8934 || CONST_DOUBLE_P (x)
8935 || CONSTANT_ADDRESS_P (x)
8936 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8937 || flag_pic);
8938 }
8939
8940 static bool
8941 arm_legitimate_constant_p (machine_mode mode, rtx x)
8942 {
8943 return (!arm_cannot_force_const_mem (mode, x)
8944 && (TARGET_32BIT
8945 ? arm_legitimate_constant_p_1 (mode, x)
8946 : thumb_legitimate_constant_p (mode, x)));
8947 }
8948
8949 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8950
8951 static bool
8952 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8953 {
8954 rtx base, offset;
8955 split_const (x, &base, &offset);
8956
8957 if (SYMBOL_REF_P (base))
8958 {
8959 /* Function symbols cannot have an offset due to the Thumb bit. */
8960 if ((SYMBOL_REF_FLAGS (base) & SYMBOL_FLAG_FUNCTION)
8961 && INTVAL (offset) != 0)
8962 return true;
8963
8964 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P
8965 && !offset_within_block_p (base, INTVAL (offset)))
8966 return true;
8967 }
8968 return arm_tls_referenced_p (x);
8969 }
8970 \f
8971 #define REG_OR_SUBREG_REG(X) \
8972 (REG_P (X) \
8973 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8974
8975 #define REG_OR_SUBREG_RTX(X) \
8976 (REG_P (X) ? (X) : SUBREG_REG (X))
8977
8978 static inline int
8979 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8980 {
8981 machine_mode mode = GET_MODE (x);
8982 int total, words;
8983
8984 switch (code)
8985 {
8986 case ASHIFT:
8987 case ASHIFTRT:
8988 case LSHIFTRT:
8989 case ROTATERT:
8990 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8991
8992 case PLUS:
8993 case MINUS:
8994 case COMPARE:
8995 case NEG:
8996 case NOT:
8997 return COSTS_N_INSNS (1);
8998
8999 case MULT:
9000 if (arm_arch6m && arm_m_profile_small_mul)
9001 return COSTS_N_INSNS (32);
9002
9003 if (CONST_INT_P (XEXP (x, 1)))
9004 {
9005 int cycles = 0;
9006 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
9007
9008 while (i)
9009 {
9010 i >>= 2;
9011 cycles++;
9012 }
9013 return COSTS_N_INSNS (2) + cycles;
9014 }
9015 return COSTS_N_INSNS (1) + 16;
9016
9017 case SET:
9018 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9019 the mode. */
9020 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9021 return (COSTS_N_INSNS (words)
9022 + 4 * ((MEM_P (SET_SRC (x)))
9023 + MEM_P (SET_DEST (x))));
9024
9025 case CONST_INT:
9026 if (outer == SET)
9027 {
9028 if (UINTVAL (x) < 256
9029 /* 16-bit constant. */
9030 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
9031 return 0;
9032 if (thumb_shiftable_const (INTVAL (x)))
9033 return COSTS_N_INSNS (2);
9034 return COSTS_N_INSNS (3);
9035 }
9036 else if ((outer == PLUS || outer == COMPARE)
9037 && INTVAL (x) < 256 && INTVAL (x) > -256)
9038 return 0;
9039 else if ((outer == IOR || outer == XOR || outer == AND)
9040 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9041 return COSTS_N_INSNS (1);
9042 else if (outer == AND)
9043 {
9044 int i;
9045 /* This duplicates the tests in the andsi3 expander. */
9046 for (i = 9; i <= 31; i++)
9047 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9048 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9049 return COSTS_N_INSNS (2);
9050 }
9051 else if (outer == ASHIFT || outer == ASHIFTRT
9052 || outer == LSHIFTRT)
9053 return 0;
9054 return COSTS_N_INSNS (2);
9055
9056 case CONST:
9057 case CONST_DOUBLE:
9058 case LABEL_REF:
9059 case SYMBOL_REF:
9060 return COSTS_N_INSNS (3);
9061
9062 case UDIV:
9063 case UMOD:
9064 case DIV:
9065 case MOD:
9066 return 100;
9067
9068 case TRUNCATE:
9069 return 99;
9070
9071 case AND:
9072 case XOR:
9073 case IOR:
9074 /* XXX guess. */
9075 return 8;
9076
9077 case MEM:
9078 /* XXX another guess. */
9079 /* Memory costs quite a lot for the first word, but subsequent words
9080 load at the equivalent of a single insn each. */
9081 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9082 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9083 ? 4 : 0));
9084
9085 case IF_THEN_ELSE:
9086 /* XXX a guess. */
9087 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9088 return 14;
9089 return 2;
9090
9091 case SIGN_EXTEND:
9092 case ZERO_EXTEND:
9093 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9094 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9095
9096 if (mode == SImode)
9097 return total;
9098
9099 if (arm_arch6)
9100 return total + COSTS_N_INSNS (1);
9101
9102 /* Assume a two-shift sequence. Increase the cost slightly so
9103 we prefer actual shifts over an extend operation. */
9104 return total + 1 + COSTS_N_INSNS (2);
9105
9106 default:
9107 return 99;
9108 }
9109 }
9110
9111 /* Estimates the size cost of thumb1 instructions.
9112 For now most of the code is copied from thumb1_rtx_costs. We need more
9113 fine grain tuning when we have more related test cases. */
9114 static inline int
9115 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9116 {
9117 machine_mode mode = GET_MODE (x);
9118 int words, cost;
9119
9120 switch (code)
9121 {
9122 case ASHIFT:
9123 case ASHIFTRT:
9124 case LSHIFTRT:
9125 case ROTATERT:
9126 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9127
9128 case PLUS:
9129 case MINUS:
9130 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9131 defined by RTL expansion, especially for the expansion of
9132 multiplication. */
9133 if ((GET_CODE (XEXP (x, 0)) == MULT
9134 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9135 || (GET_CODE (XEXP (x, 1)) == MULT
9136 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9137 return COSTS_N_INSNS (2);
9138 /* Fall through. */
9139 case COMPARE:
9140 case NEG:
9141 case NOT:
9142 return COSTS_N_INSNS (1);
9143
9144 case MULT:
9145 if (CONST_INT_P (XEXP (x, 1)))
9146 {
9147 /* Thumb1 mul instruction can't operate on const. We must Load it
9148 into a register first. */
9149 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9150 /* For the targets which have a very small and high-latency multiply
9151 unit, we prefer to synthesize the mult with up to 5 instructions,
9152 giving a good balance between size and performance. */
9153 if (arm_arch6m && arm_m_profile_small_mul)
9154 return COSTS_N_INSNS (5);
9155 else
9156 return COSTS_N_INSNS (1) + const_size;
9157 }
9158 return COSTS_N_INSNS (1);
9159
9160 case SET:
9161 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9162 the mode. */
9163 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9164 cost = COSTS_N_INSNS (words);
9165 if (satisfies_constraint_J (SET_SRC (x))
9166 || satisfies_constraint_K (SET_SRC (x))
9167 /* Too big an immediate for a 2-byte mov, using MOVT. */
9168 || (CONST_INT_P (SET_SRC (x))
9169 && UINTVAL (SET_SRC (x)) >= 256
9170 && TARGET_HAVE_MOVT
9171 && satisfies_constraint_j (SET_SRC (x)))
9172 /* thumb1_movdi_insn. */
9173 || ((words > 1) && MEM_P (SET_SRC (x))))
9174 cost += COSTS_N_INSNS (1);
9175 return cost;
9176
9177 case CONST_INT:
9178 if (outer == SET)
9179 {
9180 if (UINTVAL (x) < 256)
9181 return COSTS_N_INSNS (1);
9182 /* movw is 4byte long. */
9183 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9184 return COSTS_N_INSNS (2);
9185 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9186 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9187 return COSTS_N_INSNS (2);
9188 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9189 if (thumb_shiftable_const (INTVAL (x)))
9190 return COSTS_N_INSNS (2);
9191 return COSTS_N_INSNS (3);
9192 }
9193 else if ((outer == PLUS || outer == COMPARE)
9194 && INTVAL (x) < 256 && INTVAL (x) > -256)
9195 return 0;
9196 else if ((outer == IOR || outer == XOR || outer == AND)
9197 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9198 return COSTS_N_INSNS (1);
9199 else if (outer == AND)
9200 {
9201 int i;
9202 /* This duplicates the tests in the andsi3 expander. */
9203 for (i = 9; i <= 31; i++)
9204 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9205 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9206 return COSTS_N_INSNS (2);
9207 }
9208 else if (outer == ASHIFT || outer == ASHIFTRT
9209 || outer == LSHIFTRT)
9210 return 0;
9211 return COSTS_N_INSNS (2);
9212
9213 case CONST:
9214 case CONST_DOUBLE:
9215 case LABEL_REF:
9216 case SYMBOL_REF:
9217 return COSTS_N_INSNS (3);
9218
9219 case UDIV:
9220 case UMOD:
9221 case DIV:
9222 case MOD:
9223 return 100;
9224
9225 case TRUNCATE:
9226 return 99;
9227
9228 case AND:
9229 case XOR:
9230 case IOR:
9231 return COSTS_N_INSNS (1);
9232
9233 case MEM:
9234 return (COSTS_N_INSNS (1)
9235 + COSTS_N_INSNS (1)
9236 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9237 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9238 ? COSTS_N_INSNS (1) : 0));
9239
9240 case IF_THEN_ELSE:
9241 /* XXX a guess. */
9242 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9243 return 14;
9244 return 2;
9245
9246 case ZERO_EXTEND:
9247 /* XXX still guessing. */
9248 switch (GET_MODE (XEXP (x, 0)))
9249 {
9250 case E_QImode:
9251 return (1 + (mode == DImode ? 4 : 0)
9252 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9253
9254 case E_HImode:
9255 return (4 + (mode == DImode ? 4 : 0)
9256 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9257
9258 case E_SImode:
9259 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9260
9261 default:
9262 return 99;
9263 }
9264
9265 default:
9266 return 99;
9267 }
9268 }
9269
9270 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9271 operand, then return the operand that is being shifted. If the shift
9272 is not by a constant, then set SHIFT_REG to point to the operand.
9273 Return NULL if OP is not a shifter operand. */
9274 static rtx
9275 shifter_op_p (rtx op, rtx *shift_reg)
9276 {
9277 enum rtx_code code = GET_CODE (op);
9278
9279 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9280 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9281 return XEXP (op, 0);
9282 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9283 return XEXP (op, 0);
9284 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9285 || code == ASHIFTRT)
9286 {
9287 if (!CONST_INT_P (XEXP (op, 1)))
9288 *shift_reg = XEXP (op, 1);
9289 return XEXP (op, 0);
9290 }
9291
9292 return NULL;
9293 }
9294
9295 static bool
9296 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9297 {
9298 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9299 rtx_code code = GET_CODE (x);
9300 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9301
9302 switch (XINT (x, 1))
9303 {
9304 case UNSPEC_UNALIGNED_LOAD:
9305 /* We can only do unaligned loads into the integer unit, and we can't
9306 use LDM or LDRD. */
9307 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9308 if (speed_p)
9309 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9310 + extra_cost->ldst.load_unaligned);
9311
9312 #ifdef NOT_YET
9313 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9314 ADDR_SPACE_GENERIC, speed_p);
9315 #endif
9316 return true;
9317
9318 case UNSPEC_UNALIGNED_STORE:
9319 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9320 if (speed_p)
9321 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9322 + extra_cost->ldst.store_unaligned);
9323
9324 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9325 #ifdef NOT_YET
9326 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9327 ADDR_SPACE_GENERIC, speed_p);
9328 #endif
9329 return true;
9330
9331 case UNSPEC_VRINTZ:
9332 case UNSPEC_VRINTP:
9333 case UNSPEC_VRINTM:
9334 case UNSPEC_VRINTR:
9335 case UNSPEC_VRINTX:
9336 case UNSPEC_VRINTA:
9337 if (speed_p)
9338 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9339
9340 return true;
9341 default:
9342 *cost = COSTS_N_INSNS (2);
9343 break;
9344 }
9345 return true;
9346 }
9347
9348 /* Cost of a libcall. We assume one insn per argument, an amount for the
9349 call (one insn for -Os) and then one for processing the result. */
9350 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9351
9352 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9353 do \
9354 { \
9355 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9356 if (shift_op != NULL \
9357 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9358 { \
9359 if (shift_reg) \
9360 { \
9361 if (speed_p) \
9362 *cost += extra_cost->alu.arith_shift_reg; \
9363 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9364 ASHIFT, 1, speed_p); \
9365 } \
9366 else if (speed_p) \
9367 *cost += extra_cost->alu.arith_shift; \
9368 \
9369 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9370 ASHIFT, 0, speed_p) \
9371 + rtx_cost (XEXP (x, 1 - IDX), \
9372 GET_MODE (shift_op), \
9373 OP, 1, speed_p)); \
9374 return true; \
9375 } \
9376 } \
9377 while (0)
9378
9379 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9380 considering the costs of the addressing mode and memory access
9381 separately. */
9382 static bool
9383 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9384 int *cost, bool speed_p)
9385 {
9386 machine_mode mode = GET_MODE (x);
9387
9388 *cost = COSTS_N_INSNS (1);
9389
9390 if (flag_pic
9391 && GET_CODE (XEXP (x, 0)) == PLUS
9392 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9393 /* This will be split into two instructions. Add the cost of the
9394 additional instruction here. The cost of the memory access is computed
9395 below. See arm.md:calculate_pic_address. */
9396 *cost += COSTS_N_INSNS (1);
9397
9398 /* Calculate cost of the addressing mode. */
9399 if (speed_p)
9400 {
9401 arm_addr_mode_op op_type;
9402 switch (GET_CODE (XEXP (x, 0)))
9403 {
9404 default:
9405 case REG:
9406 op_type = AMO_DEFAULT;
9407 break;
9408 case MINUS:
9409 /* MINUS does not appear in RTL, but the architecture supports it,
9410 so handle this case defensively. */
9411 /* fall through */
9412 case PLUS:
9413 op_type = AMO_NO_WB;
9414 break;
9415 case PRE_INC:
9416 case PRE_DEC:
9417 case POST_INC:
9418 case POST_DEC:
9419 case PRE_MODIFY:
9420 case POST_MODIFY:
9421 op_type = AMO_WB;
9422 break;
9423 }
9424
9425 if (VECTOR_MODE_P (mode))
9426 *cost += current_tune->addr_mode_costs->vector[op_type];
9427 else if (FLOAT_MODE_P (mode))
9428 *cost += current_tune->addr_mode_costs->fp[op_type];
9429 else
9430 *cost += current_tune->addr_mode_costs->integer[op_type];
9431 }
9432
9433 /* Calculate cost of memory access. */
9434 if (speed_p)
9435 {
9436 if (FLOAT_MODE_P (mode))
9437 {
9438 if (GET_MODE_SIZE (mode) == 8)
9439 *cost += extra_cost->ldst.loadd;
9440 else
9441 *cost += extra_cost->ldst.loadf;
9442 }
9443 else if (VECTOR_MODE_P (mode))
9444 *cost += extra_cost->ldst.loadv;
9445 else
9446 {
9447 /* Integer modes */
9448 if (GET_MODE_SIZE (mode) == 8)
9449 *cost += extra_cost->ldst.ldrd;
9450 else
9451 *cost += extra_cost->ldst.load;
9452 }
9453 }
9454
9455 return true;
9456 }
9457
9458 /* RTX costs. Make an estimate of the cost of executing the operation
9459 X, which is contained within an operation with code OUTER_CODE.
9460 SPEED_P indicates whether the cost desired is the performance cost,
9461 or the size cost. The estimate is stored in COST and the return
9462 value is TRUE if the cost calculation is final, or FALSE if the
9463 caller should recurse through the operands of X to add additional
9464 costs.
9465
9466 We currently make no attempt to model the size savings of Thumb-2
9467 16-bit instructions. At the normal points in compilation where
9468 this code is called we have no measure of whether the condition
9469 flags are live or not, and thus no realistic way to determine what
9470 the size will eventually be. */
9471 static bool
9472 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9473 const struct cpu_cost_table *extra_cost,
9474 int *cost, bool speed_p)
9475 {
9476 machine_mode mode = GET_MODE (x);
9477
9478 *cost = COSTS_N_INSNS (1);
9479
9480 if (TARGET_THUMB1)
9481 {
9482 if (speed_p)
9483 *cost = thumb1_rtx_costs (x, code, outer_code);
9484 else
9485 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9486 return true;
9487 }
9488
9489 switch (code)
9490 {
9491 case SET:
9492 *cost = 0;
9493 /* SET RTXs don't have a mode so we get it from the destination. */
9494 mode = GET_MODE (SET_DEST (x));
9495
9496 if (REG_P (SET_SRC (x))
9497 && REG_P (SET_DEST (x)))
9498 {
9499 /* Assume that most copies can be done with a single insn,
9500 unless we don't have HW FP, in which case everything
9501 larger than word mode will require two insns. */
9502 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9503 && GET_MODE_SIZE (mode) > 4)
9504 || mode == DImode)
9505 ? 2 : 1);
9506 /* Conditional register moves can be encoded
9507 in 16 bits in Thumb mode. */
9508 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9509 *cost >>= 1;
9510
9511 return true;
9512 }
9513
9514 if (CONST_INT_P (SET_SRC (x)))
9515 {
9516 /* Handle CONST_INT here, since the value doesn't have a mode
9517 and we would otherwise be unable to work out the true cost. */
9518 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9519 0, speed_p);
9520 outer_code = SET;
9521 /* Slightly lower the cost of setting a core reg to a constant.
9522 This helps break up chains and allows for better scheduling. */
9523 if (REG_P (SET_DEST (x))
9524 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9525 *cost -= 1;
9526 x = SET_SRC (x);
9527 /* Immediate moves with an immediate in the range [0, 255] can be
9528 encoded in 16 bits in Thumb mode. */
9529 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9530 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9531 *cost >>= 1;
9532 goto const_int_cost;
9533 }
9534
9535 return false;
9536
9537 case MEM:
9538 return arm_mem_costs (x, extra_cost, cost, speed_p);
9539
9540 case PARALLEL:
9541 {
9542 /* Calculations of LDM costs are complex. We assume an initial cost
9543 (ldm_1st) which will load the number of registers mentioned in
9544 ldm_regs_per_insn_1st registers; then each additional
9545 ldm_regs_per_insn_subsequent registers cost one more insn. The
9546 formula for N regs is thus:
9547
9548 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9549 + ldm_regs_per_insn_subsequent - 1)
9550 / ldm_regs_per_insn_subsequent).
9551
9552 Additional costs may also be added for addressing. A similar
9553 formula is used for STM. */
9554
9555 bool is_ldm = load_multiple_operation (x, SImode);
9556 bool is_stm = store_multiple_operation (x, SImode);
9557
9558 if (is_ldm || is_stm)
9559 {
9560 if (speed_p)
9561 {
9562 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9563 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9564 ? extra_cost->ldst.ldm_regs_per_insn_1st
9565 : extra_cost->ldst.stm_regs_per_insn_1st;
9566 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9567 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9568 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9569
9570 *cost += regs_per_insn_1st
9571 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9572 + regs_per_insn_sub - 1)
9573 / regs_per_insn_sub);
9574 return true;
9575 }
9576
9577 }
9578 return false;
9579 }
9580 case DIV:
9581 case UDIV:
9582 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9583 && (mode == SFmode || !TARGET_VFP_SINGLE))
9584 *cost += COSTS_N_INSNS (speed_p
9585 ? extra_cost->fp[mode != SFmode].div : 0);
9586 else if (mode == SImode && TARGET_IDIV)
9587 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9588 else
9589 *cost = LIBCALL_COST (2);
9590
9591 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9592 possible udiv is prefered. */
9593 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9594 return false; /* All arguments must be in registers. */
9595
9596 case MOD:
9597 /* MOD by a power of 2 can be expanded as:
9598 rsbs r1, r0, #0
9599 and r0, r0, #(n - 1)
9600 and r1, r1, #(n - 1)
9601 rsbpl r0, r1, #0. */
9602 if (CONST_INT_P (XEXP (x, 1))
9603 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9604 && mode == SImode)
9605 {
9606 *cost += COSTS_N_INSNS (3);
9607
9608 if (speed_p)
9609 *cost += 2 * extra_cost->alu.logical
9610 + extra_cost->alu.arith;
9611 return true;
9612 }
9613
9614 /* Fall-through. */
9615 case UMOD:
9616 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9617 possible udiv is prefered. */
9618 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9619 return false; /* All arguments must be in registers. */
9620
9621 case ROTATE:
9622 if (mode == SImode && REG_P (XEXP (x, 1)))
9623 {
9624 *cost += (COSTS_N_INSNS (1)
9625 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9626 if (speed_p)
9627 *cost += extra_cost->alu.shift_reg;
9628 return true;
9629 }
9630 /* Fall through */
9631 case ROTATERT:
9632 case ASHIFT:
9633 case LSHIFTRT:
9634 case ASHIFTRT:
9635 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9636 {
9637 *cost += (COSTS_N_INSNS (2)
9638 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9639 if (speed_p)
9640 *cost += 2 * extra_cost->alu.shift;
9641 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9642 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9643 *cost += 1;
9644 return true;
9645 }
9646 else if (mode == SImode)
9647 {
9648 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9649 /* Slightly disparage register shifts at -Os, but not by much. */
9650 if (!CONST_INT_P (XEXP (x, 1)))
9651 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9652 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9653 return true;
9654 }
9655 else if (GET_MODE_CLASS (mode) == MODE_INT
9656 && GET_MODE_SIZE (mode) < 4)
9657 {
9658 if (code == ASHIFT)
9659 {
9660 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9661 /* Slightly disparage register shifts at -Os, but not by
9662 much. */
9663 if (!CONST_INT_P (XEXP (x, 1)))
9664 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9665 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9666 }
9667 else if (code == LSHIFTRT || code == ASHIFTRT)
9668 {
9669 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9670 {
9671 /* Can use SBFX/UBFX. */
9672 if (speed_p)
9673 *cost += extra_cost->alu.bfx;
9674 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9675 }
9676 else
9677 {
9678 *cost += COSTS_N_INSNS (1);
9679 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9680 if (speed_p)
9681 {
9682 if (CONST_INT_P (XEXP (x, 1)))
9683 *cost += 2 * extra_cost->alu.shift;
9684 else
9685 *cost += (extra_cost->alu.shift
9686 + extra_cost->alu.shift_reg);
9687 }
9688 else
9689 /* Slightly disparage register shifts. */
9690 *cost += !CONST_INT_P (XEXP (x, 1));
9691 }
9692 }
9693 else /* Rotates. */
9694 {
9695 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9696 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9697 if (speed_p)
9698 {
9699 if (CONST_INT_P (XEXP (x, 1)))
9700 *cost += (2 * extra_cost->alu.shift
9701 + extra_cost->alu.log_shift);
9702 else
9703 *cost += (extra_cost->alu.shift
9704 + extra_cost->alu.shift_reg
9705 + extra_cost->alu.log_shift_reg);
9706 }
9707 }
9708 return true;
9709 }
9710
9711 *cost = LIBCALL_COST (2);
9712 return false;
9713
9714 case BSWAP:
9715 if (arm_arch6)
9716 {
9717 if (mode == SImode)
9718 {
9719 if (speed_p)
9720 *cost += extra_cost->alu.rev;
9721
9722 return false;
9723 }
9724 }
9725 else
9726 {
9727 /* No rev instruction available. Look at arm_legacy_rev
9728 and thumb_legacy_rev for the form of RTL used then. */
9729 if (TARGET_THUMB)
9730 {
9731 *cost += COSTS_N_INSNS (9);
9732
9733 if (speed_p)
9734 {
9735 *cost += 6 * extra_cost->alu.shift;
9736 *cost += 3 * extra_cost->alu.logical;
9737 }
9738 }
9739 else
9740 {
9741 *cost += COSTS_N_INSNS (4);
9742
9743 if (speed_p)
9744 {
9745 *cost += 2 * extra_cost->alu.shift;
9746 *cost += extra_cost->alu.arith_shift;
9747 *cost += 2 * extra_cost->alu.logical;
9748 }
9749 }
9750 return true;
9751 }
9752 return false;
9753
9754 case MINUS:
9755 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9756 && (mode == SFmode || !TARGET_VFP_SINGLE))
9757 {
9758 if (GET_CODE (XEXP (x, 0)) == MULT
9759 || GET_CODE (XEXP (x, 1)) == MULT)
9760 {
9761 rtx mul_op0, mul_op1, sub_op;
9762
9763 if (speed_p)
9764 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9765
9766 if (GET_CODE (XEXP (x, 0)) == MULT)
9767 {
9768 mul_op0 = XEXP (XEXP (x, 0), 0);
9769 mul_op1 = XEXP (XEXP (x, 0), 1);
9770 sub_op = XEXP (x, 1);
9771 }
9772 else
9773 {
9774 mul_op0 = XEXP (XEXP (x, 1), 0);
9775 mul_op1 = XEXP (XEXP (x, 1), 1);
9776 sub_op = XEXP (x, 0);
9777 }
9778
9779 /* The first operand of the multiply may be optionally
9780 negated. */
9781 if (GET_CODE (mul_op0) == NEG)
9782 mul_op0 = XEXP (mul_op0, 0);
9783
9784 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9785 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9786 + rtx_cost (sub_op, mode, code, 0, speed_p));
9787
9788 return true;
9789 }
9790
9791 if (speed_p)
9792 *cost += extra_cost->fp[mode != SFmode].addsub;
9793 return false;
9794 }
9795
9796 if (mode == SImode)
9797 {
9798 rtx shift_by_reg = NULL;
9799 rtx shift_op;
9800 rtx non_shift_op;
9801
9802 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9803 if (shift_op == NULL)
9804 {
9805 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9806 non_shift_op = XEXP (x, 0);
9807 }
9808 else
9809 non_shift_op = XEXP (x, 1);
9810
9811 if (shift_op != NULL)
9812 {
9813 if (shift_by_reg != NULL)
9814 {
9815 if (speed_p)
9816 *cost += extra_cost->alu.arith_shift_reg;
9817 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9818 }
9819 else if (speed_p)
9820 *cost += extra_cost->alu.arith_shift;
9821
9822 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9823 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9824 return true;
9825 }
9826
9827 if (arm_arch_thumb2
9828 && GET_CODE (XEXP (x, 1)) == MULT)
9829 {
9830 /* MLS. */
9831 if (speed_p)
9832 *cost += extra_cost->mult[0].add;
9833 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9834 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9835 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9836 return true;
9837 }
9838
9839 if (CONST_INT_P (XEXP (x, 0)))
9840 {
9841 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9842 INTVAL (XEXP (x, 0)), NULL_RTX,
9843 NULL_RTX, 1, 0);
9844 *cost = COSTS_N_INSNS (insns);
9845 if (speed_p)
9846 *cost += insns * extra_cost->alu.arith;
9847 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9848 return true;
9849 }
9850 else if (speed_p)
9851 *cost += extra_cost->alu.arith;
9852
9853 return false;
9854 }
9855
9856 if (GET_MODE_CLASS (mode) == MODE_INT
9857 && GET_MODE_SIZE (mode) < 4)
9858 {
9859 rtx shift_op, shift_reg;
9860 shift_reg = NULL;
9861
9862 /* We check both sides of the MINUS for shifter operands since,
9863 unlike PLUS, it's not commutative. */
9864
9865 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9866 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9867
9868 /* Slightly disparage, as we might need to widen the result. */
9869 *cost += 1;
9870 if (speed_p)
9871 *cost += extra_cost->alu.arith;
9872
9873 if (CONST_INT_P (XEXP (x, 0)))
9874 {
9875 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9876 return true;
9877 }
9878
9879 return false;
9880 }
9881
9882 if (mode == DImode)
9883 {
9884 *cost += COSTS_N_INSNS (1);
9885
9886 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9887 {
9888 rtx op1 = XEXP (x, 1);
9889
9890 if (speed_p)
9891 *cost += 2 * extra_cost->alu.arith;
9892
9893 if (GET_CODE (op1) == ZERO_EXTEND)
9894 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9895 0, speed_p);
9896 else
9897 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9898 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9899 0, speed_p);
9900 return true;
9901 }
9902 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9903 {
9904 if (speed_p)
9905 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9906 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9907 0, speed_p)
9908 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9909 return true;
9910 }
9911 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9912 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9913 {
9914 if (speed_p)
9915 *cost += (extra_cost->alu.arith
9916 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9917 ? extra_cost->alu.arith
9918 : extra_cost->alu.arith_shift));
9919 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9920 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9921 GET_CODE (XEXP (x, 1)), 0, speed_p));
9922 return true;
9923 }
9924
9925 if (speed_p)
9926 *cost += 2 * extra_cost->alu.arith;
9927 return false;
9928 }
9929
9930 /* Vector mode? */
9931
9932 *cost = LIBCALL_COST (2);
9933 return false;
9934
9935 case PLUS:
9936 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9937 && (mode == SFmode || !TARGET_VFP_SINGLE))
9938 {
9939 if (GET_CODE (XEXP (x, 0)) == MULT)
9940 {
9941 rtx mul_op0, mul_op1, add_op;
9942
9943 if (speed_p)
9944 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9945
9946 mul_op0 = XEXP (XEXP (x, 0), 0);
9947 mul_op1 = XEXP (XEXP (x, 0), 1);
9948 add_op = XEXP (x, 1);
9949
9950 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9951 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9952 + rtx_cost (add_op, mode, code, 0, speed_p));
9953
9954 return true;
9955 }
9956
9957 if (speed_p)
9958 *cost += extra_cost->fp[mode != SFmode].addsub;
9959 return false;
9960 }
9961 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9962 {
9963 *cost = LIBCALL_COST (2);
9964 return false;
9965 }
9966
9967 /* Narrow modes can be synthesized in SImode, but the range
9968 of useful sub-operations is limited. Check for shift operations
9969 on one of the operands. Only left shifts can be used in the
9970 narrow modes. */
9971 if (GET_MODE_CLASS (mode) == MODE_INT
9972 && GET_MODE_SIZE (mode) < 4)
9973 {
9974 rtx shift_op, shift_reg;
9975 shift_reg = NULL;
9976
9977 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9978
9979 if (CONST_INT_P (XEXP (x, 1)))
9980 {
9981 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9982 INTVAL (XEXP (x, 1)), NULL_RTX,
9983 NULL_RTX, 1, 0);
9984 *cost = COSTS_N_INSNS (insns);
9985 if (speed_p)
9986 *cost += insns * extra_cost->alu.arith;
9987 /* Slightly penalize a narrow operation as the result may
9988 need widening. */
9989 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9990 return true;
9991 }
9992
9993 /* Slightly penalize a narrow operation as the result may
9994 need widening. */
9995 *cost += 1;
9996 if (speed_p)
9997 *cost += extra_cost->alu.arith;
9998
9999 return false;
10000 }
10001
10002 if (mode == SImode)
10003 {
10004 rtx shift_op, shift_reg;
10005
10006 if (TARGET_INT_SIMD
10007 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10008 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
10009 {
10010 /* UXTA[BH] or SXTA[BH]. */
10011 if (speed_p)
10012 *cost += extra_cost->alu.extend_arith;
10013 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10014 0, speed_p)
10015 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
10016 return true;
10017 }
10018
10019 shift_reg = NULL;
10020 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10021 if (shift_op != NULL)
10022 {
10023 if (shift_reg)
10024 {
10025 if (speed_p)
10026 *cost += extra_cost->alu.arith_shift_reg;
10027 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10028 }
10029 else if (speed_p)
10030 *cost += extra_cost->alu.arith_shift;
10031
10032 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10033 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10034 return true;
10035 }
10036 if (GET_CODE (XEXP (x, 0)) == MULT)
10037 {
10038 rtx mul_op = XEXP (x, 0);
10039
10040 if (TARGET_DSP_MULTIPLY
10041 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
10042 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10043 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10044 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10045 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10046 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10047 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10048 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10049 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10050 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10051 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10052 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10053 == 16))))))
10054 {
10055 /* SMLA[BT][BT]. */
10056 if (speed_p)
10057 *cost += extra_cost->mult[0].extend_add;
10058 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10059 SIGN_EXTEND, 0, speed_p)
10060 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10061 SIGN_EXTEND, 0, speed_p)
10062 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10063 return true;
10064 }
10065
10066 if (speed_p)
10067 *cost += extra_cost->mult[0].add;
10068 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10069 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10070 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10071 return true;
10072 }
10073 if (CONST_INT_P (XEXP (x, 1)))
10074 {
10075 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10076 INTVAL (XEXP (x, 1)), NULL_RTX,
10077 NULL_RTX, 1, 0);
10078 *cost = COSTS_N_INSNS (insns);
10079 if (speed_p)
10080 *cost += insns * extra_cost->alu.arith;
10081 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10082 return true;
10083 }
10084 else if (speed_p)
10085 *cost += extra_cost->alu.arith;
10086
10087 return false;
10088 }
10089
10090 if (mode == DImode)
10091 {
10092 if (GET_CODE (XEXP (x, 0)) == MULT
10093 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10094 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10095 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10096 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10097 {
10098 if (speed_p)
10099 *cost += extra_cost->mult[1].extend_add;
10100 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10101 ZERO_EXTEND, 0, speed_p)
10102 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10103 ZERO_EXTEND, 0, speed_p)
10104 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10105 return true;
10106 }
10107
10108 *cost += COSTS_N_INSNS (1);
10109
10110 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10111 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10112 {
10113 if (speed_p)
10114 *cost += (extra_cost->alu.arith
10115 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10116 ? extra_cost->alu.arith
10117 : extra_cost->alu.arith_shift));
10118
10119 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10120 0, speed_p)
10121 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10122 return true;
10123 }
10124
10125 if (speed_p)
10126 *cost += 2 * extra_cost->alu.arith;
10127 return false;
10128 }
10129
10130 /* Vector mode? */
10131 *cost = LIBCALL_COST (2);
10132 return false;
10133 case IOR:
10134 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10135 {
10136 if (speed_p)
10137 *cost += extra_cost->alu.rev;
10138
10139 return true;
10140 }
10141 /* Fall through. */
10142 case AND: case XOR:
10143 if (mode == SImode)
10144 {
10145 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10146 rtx op0 = XEXP (x, 0);
10147 rtx shift_op, shift_reg;
10148
10149 if (subcode == NOT
10150 && (code == AND
10151 || (code == IOR && TARGET_THUMB2)))
10152 op0 = XEXP (op0, 0);
10153
10154 shift_reg = NULL;
10155 shift_op = shifter_op_p (op0, &shift_reg);
10156 if (shift_op != NULL)
10157 {
10158 if (shift_reg)
10159 {
10160 if (speed_p)
10161 *cost += extra_cost->alu.log_shift_reg;
10162 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10163 }
10164 else if (speed_p)
10165 *cost += extra_cost->alu.log_shift;
10166
10167 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10168 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10169 return true;
10170 }
10171
10172 if (CONST_INT_P (XEXP (x, 1)))
10173 {
10174 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10175 INTVAL (XEXP (x, 1)), NULL_RTX,
10176 NULL_RTX, 1, 0);
10177
10178 *cost = COSTS_N_INSNS (insns);
10179 if (speed_p)
10180 *cost += insns * extra_cost->alu.logical;
10181 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10182 return true;
10183 }
10184
10185 if (speed_p)
10186 *cost += extra_cost->alu.logical;
10187 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10188 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10189 return true;
10190 }
10191
10192 if (mode == DImode)
10193 {
10194 rtx op0 = XEXP (x, 0);
10195 enum rtx_code subcode = GET_CODE (op0);
10196
10197 *cost += COSTS_N_INSNS (1);
10198
10199 if (subcode == NOT
10200 && (code == AND
10201 || (code == IOR && TARGET_THUMB2)))
10202 op0 = XEXP (op0, 0);
10203
10204 if (GET_CODE (op0) == ZERO_EXTEND)
10205 {
10206 if (speed_p)
10207 *cost += 2 * extra_cost->alu.logical;
10208
10209 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10210 0, speed_p)
10211 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10212 return true;
10213 }
10214 else if (GET_CODE (op0) == SIGN_EXTEND)
10215 {
10216 if (speed_p)
10217 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10218
10219 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10220 0, speed_p)
10221 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10222 return true;
10223 }
10224
10225 if (speed_p)
10226 *cost += 2 * extra_cost->alu.logical;
10227
10228 return true;
10229 }
10230 /* Vector mode? */
10231
10232 *cost = LIBCALL_COST (2);
10233 return false;
10234
10235 case MULT:
10236 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10237 && (mode == SFmode || !TARGET_VFP_SINGLE))
10238 {
10239 rtx op0 = XEXP (x, 0);
10240
10241 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10242 op0 = XEXP (op0, 0);
10243
10244 if (speed_p)
10245 *cost += extra_cost->fp[mode != SFmode].mult;
10246
10247 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10248 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10249 return true;
10250 }
10251 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10252 {
10253 *cost = LIBCALL_COST (2);
10254 return false;
10255 }
10256
10257 if (mode == SImode)
10258 {
10259 if (TARGET_DSP_MULTIPLY
10260 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10261 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10262 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10263 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10264 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10265 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10266 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10267 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10268 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10269 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10270 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10271 && (INTVAL (XEXP (XEXP (x, 1), 1))
10272 == 16))))))
10273 {
10274 /* SMUL[TB][TB]. */
10275 if (speed_p)
10276 *cost += extra_cost->mult[0].extend;
10277 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10278 SIGN_EXTEND, 0, speed_p);
10279 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10280 SIGN_EXTEND, 1, speed_p);
10281 return true;
10282 }
10283 if (speed_p)
10284 *cost += extra_cost->mult[0].simple;
10285 return false;
10286 }
10287
10288 if (mode == DImode)
10289 {
10290 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10291 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10292 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10293 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10294 {
10295 if (speed_p)
10296 *cost += extra_cost->mult[1].extend;
10297 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10298 ZERO_EXTEND, 0, speed_p)
10299 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10300 ZERO_EXTEND, 0, speed_p));
10301 return true;
10302 }
10303
10304 *cost = LIBCALL_COST (2);
10305 return false;
10306 }
10307
10308 /* Vector mode? */
10309 *cost = LIBCALL_COST (2);
10310 return false;
10311
10312 case NEG:
10313 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10314 && (mode == SFmode || !TARGET_VFP_SINGLE))
10315 {
10316 if (GET_CODE (XEXP (x, 0)) == MULT)
10317 {
10318 /* VNMUL. */
10319 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10320 return true;
10321 }
10322
10323 if (speed_p)
10324 *cost += extra_cost->fp[mode != SFmode].neg;
10325
10326 return false;
10327 }
10328 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10329 {
10330 *cost = LIBCALL_COST (1);
10331 return false;
10332 }
10333
10334 if (mode == SImode)
10335 {
10336 if (GET_CODE (XEXP (x, 0)) == ABS)
10337 {
10338 *cost += COSTS_N_INSNS (1);
10339 /* Assume the non-flag-changing variant. */
10340 if (speed_p)
10341 *cost += (extra_cost->alu.log_shift
10342 + extra_cost->alu.arith_shift);
10343 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10344 return true;
10345 }
10346
10347 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10348 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10349 {
10350 *cost += COSTS_N_INSNS (1);
10351 /* No extra cost for MOV imm and MVN imm. */
10352 /* If the comparison op is using the flags, there's no further
10353 cost, otherwise we need to add the cost of the comparison. */
10354 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10355 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10356 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10357 {
10358 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10359 *cost += (COSTS_N_INSNS (1)
10360 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10361 0, speed_p)
10362 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10363 1, speed_p));
10364 if (speed_p)
10365 *cost += extra_cost->alu.arith;
10366 }
10367 return true;
10368 }
10369
10370 if (speed_p)
10371 *cost += extra_cost->alu.arith;
10372 return false;
10373 }
10374
10375 if (GET_MODE_CLASS (mode) == MODE_INT
10376 && GET_MODE_SIZE (mode) < 4)
10377 {
10378 /* Slightly disparage, as we might need an extend operation. */
10379 *cost += 1;
10380 if (speed_p)
10381 *cost += extra_cost->alu.arith;
10382 return false;
10383 }
10384
10385 if (mode == DImode)
10386 {
10387 *cost += COSTS_N_INSNS (1);
10388 if (speed_p)
10389 *cost += 2 * extra_cost->alu.arith;
10390 return false;
10391 }
10392
10393 /* Vector mode? */
10394 *cost = LIBCALL_COST (1);
10395 return false;
10396
10397 case NOT:
10398 if (mode == SImode)
10399 {
10400 rtx shift_op;
10401 rtx shift_reg = NULL;
10402
10403 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10404
10405 if (shift_op)
10406 {
10407 if (shift_reg != NULL)
10408 {
10409 if (speed_p)
10410 *cost += extra_cost->alu.log_shift_reg;
10411 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10412 }
10413 else if (speed_p)
10414 *cost += extra_cost->alu.log_shift;
10415 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10416 return true;
10417 }
10418
10419 if (speed_p)
10420 *cost += extra_cost->alu.logical;
10421 return false;
10422 }
10423 if (mode == DImode)
10424 {
10425 *cost += COSTS_N_INSNS (1);
10426 return false;
10427 }
10428
10429 /* Vector mode? */
10430
10431 *cost += LIBCALL_COST (1);
10432 return false;
10433
10434 case IF_THEN_ELSE:
10435 {
10436 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10437 {
10438 *cost += COSTS_N_INSNS (3);
10439 return true;
10440 }
10441 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10442 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10443
10444 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10445 /* Assume that if one arm of the if_then_else is a register,
10446 that it will be tied with the result and eliminate the
10447 conditional insn. */
10448 if (REG_P (XEXP (x, 1)))
10449 *cost += op2cost;
10450 else if (REG_P (XEXP (x, 2)))
10451 *cost += op1cost;
10452 else
10453 {
10454 if (speed_p)
10455 {
10456 if (extra_cost->alu.non_exec_costs_exec)
10457 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10458 else
10459 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10460 }
10461 else
10462 *cost += op1cost + op2cost;
10463 }
10464 }
10465 return true;
10466
10467 case COMPARE:
10468 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10469 *cost = 0;
10470 else
10471 {
10472 machine_mode op0mode;
10473 /* We'll mostly assume that the cost of a compare is the cost of the
10474 LHS. However, there are some notable exceptions. */
10475
10476 /* Floating point compares are never done as side-effects. */
10477 op0mode = GET_MODE (XEXP (x, 0));
10478 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10479 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10480 {
10481 if (speed_p)
10482 *cost += extra_cost->fp[op0mode != SFmode].compare;
10483
10484 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10485 {
10486 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10487 return true;
10488 }
10489
10490 return false;
10491 }
10492 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10493 {
10494 *cost = LIBCALL_COST (2);
10495 return false;
10496 }
10497
10498 /* DImode compares normally take two insns. */
10499 if (op0mode == DImode)
10500 {
10501 *cost += COSTS_N_INSNS (1);
10502 if (speed_p)
10503 *cost += 2 * extra_cost->alu.arith;
10504 return false;
10505 }
10506
10507 if (op0mode == SImode)
10508 {
10509 rtx shift_op;
10510 rtx shift_reg;
10511
10512 if (XEXP (x, 1) == const0_rtx
10513 && !(REG_P (XEXP (x, 0))
10514 || (GET_CODE (XEXP (x, 0)) == SUBREG
10515 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10516 {
10517 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10518
10519 /* Multiply operations that set the flags are often
10520 significantly more expensive. */
10521 if (speed_p
10522 && GET_CODE (XEXP (x, 0)) == MULT
10523 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10524 *cost += extra_cost->mult[0].flag_setting;
10525
10526 if (speed_p
10527 && GET_CODE (XEXP (x, 0)) == PLUS
10528 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10529 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10530 0), 1), mode))
10531 *cost += extra_cost->mult[0].flag_setting;
10532 return true;
10533 }
10534
10535 shift_reg = NULL;
10536 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10537 if (shift_op != NULL)
10538 {
10539 if (shift_reg != NULL)
10540 {
10541 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10542 1, speed_p);
10543 if (speed_p)
10544 *cost += extra_cost->alu.arith_shift_reg;
10545 }
10546 else if (speed_p)
10547 *cost += extra_cost->alu.arith_shift;
10548 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10549 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10550 return true;
10551 }
10552
10553 if (speed_p)
10554 *cost += extra_cost->alu.arith;
10555 if (CONST_INT_P (XEXP (x, 1))
10556 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10557 {
10558 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10559 return true;
10560 }
10561 return false;
10562 }
10563
10564 /* Vector mode? */
10565
10566 *cost = LIBCALL_COST (2);
10567 return false;
10568 }
10569 return true;
10570
10571 case EQ:
10572 case NE:
10573 case LT:
10574 case LE:
10575 case GT:
10576 case GE:
10577 case LTU:
10578 case LEU:
10579 case GEU:
10580 case GTU:
10581 case ORDERED:
10582 case UNORDERED:
10583 case UNEQ:
10584 case UNLE:
10585 case UNLT:
10586 case UNGE:
10587 case UNGT:
10588 case LTGT:
10589 if (outer_code == SET)
10590 {
10591 /* Is it a store-flag operation? */
10592 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10593 && XEXP (x, 1) == const0_rtx)
10594 {
10595 /* Thumb also needs an IT insn. */
10596 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10597 return true;
10598 }
10599 if (XEXP (x, 1) == const0_rtx)
10600 {
10601 switch (code)
10602 {
10603 case LT:
10604 /* LSR Rd, Rn, #31. */
10605 if (speed_p)
10606 *cost += extra_cost->alu.shift;
10607 break;
10608
10609 case EQ:
10610 /* RSBS T1, Rn, #0
10611 ADC Rd, Rn, T1. */
10612
10613 case NE:
10614 /* SUBS T1, Rn, #1
10615 SBC Rd, Rn, T1. */
10616 *cost += COSTS_N_INSNS (1);
10617 break;
10618
10619 case LE:
10620 /* RSBS T1, Rn, Rn, LSR #31
10621 ADC Rd, Rn, T1. */
10622 *cost += COSTS_N_INSNS (1);
10623 if (speed_p)
10624 *cost += extra_cost->alu.arith_shift;
10625 break;
10626
10627 case GT:
10628 /* RSB Rd, Rn, Rn, ASR #1
10629 LSR Rd, Rd, #31. */
10630 *cost += COSTS_N_INSNS (1);
10631 if (speed_p)
10632 *cost += (extra_cost->alu.arith_shift
10633 + extra_cost->alu.shift);
10634 break;
10635
10636 case GE:
10637 /* ASR Rd, Rn, #31
10638 ADD Rd, Rn, #1. */
10639 *cost += COSTS_N_INSNS (1);
10640 if (speed_p)
10641 *cost += extra_cost->alu.shift;
10642 break;
10643
10644 default:
10645 /* Remaining cases are either meaningless or would take
10646 three insns anyway. */
10647 *cost = COSTS_N_INSNS (3);
10648 break;
10649 }
10650 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10651 return true;
10652 }
10653 else
10654 {
10655 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10656 if (CONST_INT_P (XEXP (x, 1))
10657 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10658 {
10659 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10660 return true;
10661 }
10662
10663 return false;
10664 }
10665 }
10666 /* Not directly inside a set. If it involves the condition code
10667 register it must be the condition for a branch, cond_exec or
10668 I_T_E operation. Since the comparison is performed elsewhere
10669 this is just the control part which has no additional
10670 cost. */
10671 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10672 && XEXP (x, 1) == const0_rtx)
10673 {
10674 *cost = 0;
10675 return true;
10676 }
10677 return false;
10678
10679 case ABS:
10680 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10681 && (mode == SFmode || !TARGET_VFP_SINGLE))
10682 {
10683 if (speed_p)
10684 *cost += extra_cost->fp[mode != SFmode].neg;
10685
10686 return false;
10687 }
10688 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10689 {
10690 *cost = LIBCALL_COST (1);
10691 return false;
10692 }
10693
10694 if (mode == SImode)
10695 {
10696 if (speed_p)
10697 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10698 return false;
10699 }
10700 /* Vector mode? */
10701 *cost = LIBCALL_COST (1);
10702 return false;
10703
10704 case SIGN_EXTEND:
10705 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10706 && MEM_P (XEXP (x, 0)))
10707 {
10708 if (mode == DImode)
10709 *cost += COSTS_N_INSNS (1);
10710
10711 if (!speed_p)
10712 return true;
10713
10714 if (GET_MODE (XEXP (x, 0)) == SImode)
10715 *cost += extra_cost->ldst.load;
10716 else
10717 *cost += extra_cost->ldst.load_sign_extend;
10718
10719 if (mode == DImode)
10720 *cost += extra_cost->alu.shift;
10721
10722 return true;
10723 }
10724
10725 /* Widening from less than 32-bits requires an extend operation. */
10726 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10727 {
10728 /* We have SXTB/SXTH. */
10729 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10730 if (speed_p)
10731 *cost += extra_cost->alu.extend;
10732 }
10733 else if (GET_MODE (XEXP (x, 0)) != SImode)
10734 {
10735 /* Needs two shifts. */
10736 *cost += COSTS_N_INSNS (1);
10737 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10738 if (speed_p)
10739 *cost += 2 * extra_cost->alu.shift;
10740 }
10741
10742 /* Widening beyond 32-bits requires one more insn. */
10743 if (mode == DImode)
10744 {
10745 *cost += COSTS_N_INSNS (1);
10746 if (speed_p)
10747 *cost += extra_cost->alu.shift;
10748 }
10749
10750 return true;
10751
10752 case ZERO_EXTEND:
10753 if ((arm_arch4
10754 || GET_MODE (XEXP (x, 0)) == SImode
10755 || GET_MODE (XEXP (x, 0)) == QImode)
10756 && MEM_P (XEXP (x, 0)))
10757 {
10758 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10759
10760 if (mode == DImode)
10761 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10762
10763 return true;
10764 }
10765
10766 /* Widening from less than 32-bits requires an extend operation. */
10767 if (GET_MODE (XEXP (x, 0)) == QImode)
10768 {
10769 /* UXTB can be a shorter instruction in Thumb2, but it might
10770 be slower than the AND Rd, Rn, #255 alternative. When
10771 optimizing for speed it should never be slower to use
10772 AND, and we don't really model 16-bit vs 32-bit insns
10773 here. */
10774 if (speed_p)
10775 *cost += extra_cost->alu.logical;
10776 }
10777 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10778 {
10779 /* We have UXTB/UXTH. */
10780 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10781 if (speed_p)
10782 *cost += extra_cost->alu.extend;
10783 }
10784 else if (GET_MODE (XEXP (x, 0)) != SImode)
10785 {
10786 /* Needs two shifts. It's marginally preferable to use
10787 shifts rather than two BIC instructions as the second
10788 shift may merge with a subsequent insn as a shifter
10789 op. */
10790 *cost = COSTS_N_INSNS (2);
10791 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10792 if (speed_p)
10793 *cost += 2 * extra_cost->alu.shift;
10794 }
10795
10796 /* Widening beyond 32-bits requires one more insn. */
10797 if (mode == DImode)
10798 {
10799 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10800 }
10801
10802 return true;
10803
10804 case CONST_INT:
10805 *cost = 0;
10806 /* CONST_INT has no mode, so we cannot tell for sure how many
10807 insns are really going to be needed. The best we can do is
10808 look at the value passed. If it fits in SImode, then assume
10809 that's the mode it will be used for. Otherwise assume it
10810 will be used in DImode. */
10811 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10812 mode = SImode;
10813 else
10814 mode = DImode;
10815
10816 /* Avoid blowing up in arm_gen_constant (). */
10817 if (!(outer_code == PLUS
10818 || outer_code == AND
10819 || outer_code == IOR
10820 || outer_code == XOR
10821 || outer_code == MINUS))
10822 outer_code = SET;
10823
10824 const_int_cost:
10825 if (mode == SImode)
10826 {
10827 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10828 INTVAL (x), NULL, NULL,
10829 0, 0));
10830 /* Extra costs? */
10831 }
10832 else
10833 {
10834 *cost += COSTS_N_INSNS (arm_gen_constant
10835 (outer_code, SImode, NULL,
10836 trunc_int_for_mode (INTVAL (x), SImode),
10837 NULL, NULL, 0, 0)
10838 + arm_gen_constant (outer_code, SImode, NULL,
10839 INTVAL (x) >> 32, NULL,
10840 NULL, 0, 0));
10841 /* Extra costs? */
10842 }
10843
10844 return true;
10845
10846 case CONST:
10847 case LABEL_REF:
10848 case SYMBOL_REF:
10849 if (speed_p)
10850 {
10851 if (arm_arch_thumb2 && !flag_pic)
10852 *cost += COSTS_N_INSNS (1);
10853 else
10854 *cost += extra_cost->ldst.load;
10855 }
10856 else
10857 *cost += COSTS_N_INSNS (1);
10858
10859 if (flag_pic)
10860 {
10861 *cost += COSTS_N_INSNS (1);
10862 if (speed_p)
10863 *cost += extra_cost->alu.arith;
10864 }
10865
10866 return true;
10867
10868 case CONST_FIXED:
10869 *cost = COSTS_N_INSNS (4);
10870 /* Fixme. */
10871 return true;
10872
10873 case CONST_DOUBLE:
10874 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10875 && (mode == SFmode || !TARGET_VFP_SINGLE))
10876 {
10877 if (vfp3_const_double_rtx (x))
10878 {
10879 if (speed_p)
10880 *cost += extra_cost->fp[mode == DFmode].fpconst;
10881 return true;
10882 }
10883
10884 if (speed_p)
10885 {
10886 if (mode == DFmode)
10887 *cost += extra_cost->ldst.loadd;
10888 else
10889 *cost += extra_cost->ldst.loadf;
10890 }
10891 else
10892 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10893
10894 return true;
10895 }
10896 *cost = COSTS_N_INSNS (4);
10897 return true;
10898
10899 case CONST_VECTOR:
10900 /* Fixme. */
10901 if (TARGET_NEON
10902 && TARGET_HARD_FLOAT
10903 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10904 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10905 *cost = COSTS_N_INSNS (1);
10906 else
10907 *cost = COSTS_N_INSNS (4);
10908 return true;
10909
10910 case HIGH:
10911 case LO_SUM:
10912 /* When optimizing for size, we prefer constant pool entries to
10913 MOVW/MOVT pairs, so bump the cost of these slightly. */
10914 if (!speed_p)
10915 *cost += 1;
10916 return true;
10917
10918 case CLZ:
10919 if (speed_p)
10920 *cost += extra_cost->alu.clz;
10921 return false;
10922
10923 case SMIN:
10924 if (XEXP (x, 1) == const0_rtx)
10925 {
10926 if (speed_p)
10927 *cost += extra_cost->alu.log_shift;
10928 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10929 return true;
10930 }
10931 /* Fall through. */
10932 case SMAX:
10933 case UMIN:
10934 case UMAX:
10935 *cost += COSTS_N_INSNS (1);
10936 return false;
10937
10938 case TRUNCATE:
10939 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10940 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10941 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10942 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10943 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10944 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10945 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10946 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10947 == ZERO_EXTEND))))
10948 {
10949 if (speed_p)
10950 *cost += extra_cost->mult[1].extend;
10951 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10952 ZERO_EXTEND, 0, speed_p)
10953 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10954 ZERO_EXTEND, 0, speed_p));
10955 return true;
10956 }
10957 *cost = LIBCALL_COST (1);
10958 return false;
10959
10960 case UNSPEC_VOLATILE:
10961 case UNSPEC:
10962 return arm_unspec_cost (x, outer_code, speed_p, cost);
10963
10964 case PC:
10965 /* Reading the PC is like reading any other register. Writing it
10966 is more expensive, but we take that into account elsewhere. */
10967 *cost = 0;
10968 return true;
10969
10970 case ZERO_EXTRACT:
10971 /* TODO: Simple zero_extract of bottom bits using AND. */
10972 /* Fall through. */
10973 case SIGN_EXTRACT:
10974 if (arm_arch6
10975 && mode == SImode
10976 && CONST_INT_P (XEXP (x, 1))
10977 && CONST_INT_P (XEXP (x, 2)))
10978 {
10979 if (speed_p)
10980 *cost += extra_cost->alu.bfx;
10981 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10982 return true;
10983 }
10984 /* Without UBFX/SBFX, need to resort to shift operations. */
10985 *cost += COSTS_N_INSNS (1);
10986 if (speed_p)
10987 *cost += 2 * extra_cost->alu.shift;
10988 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10989 return true;
10990
10991 case FLOAT_EXTEND:
10992 if (TARGET_HARD_FLOAT)
10993 {
10994 if (speed_p)
10995 *cost += extra_cost->fp[mode == DFmode].widen;
10996 if (!TARGET_VFP5
10997 && GET_MODE (XEXP (x, 0)) == HFmode)
10998 {
10999 /* Pre v8, widening HF->DF is a two-step process, first
11000 widening to SFmode. */
11001 *cost += COSTS_N_INSNS (1);
11002 if (speed_p)
11003 *cost += extra_cost->fp[0].widen;
11004 }
11005 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11006 return true;
11007 }
11008
11009 *cost = LIBCALL_COST (1);
11010 return false;
11011
11012 case FLOAT_TRUNCATE:
11013 if (TARGET_HARD_FLOAT)
11014 {
11015 if (speed_p)
11016 *cost += extra_cost->fp[mode == DFmode].narrow;
11017 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
11018 return true;
11019 /* Vector modes? */
11020 }
11021 *cost = LIBCALL_COST (1);
11022 return false;
11023
11024 case FMA:
11025 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
11026 {
11027 rtx op0 = XEXP (x, 0);
11028 rtx op1 = XEXP (x, 1);
11029 rtx op2 = XEXP (x, 2);
11030
11031
11032 /* vfms or vfnma. */
11033 if (GET_CODE (op0) == NEG)
11034 op0 = XEXP (op0, 0);
11035
11036 /* vfnms or vfnma. */
11037 if (GET_CODE (op2) == NEG)
11038 op2 = XEXP (op2, 0);
11039
11040 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
11041 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
11042 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
11043
11044 if (speed_p)
11045 *cost += extra_cost->fp[mode ==DFmode].fma;
11046
11047 return true;
11048 }
11049
11050 *cost = LIBCALL_COST (3);
11051 return false;
11052
11053 case FIX:
11054 case UNSIGNED_FIX:
11055 if (TARGET_HARD_FLOAT)
11056 {
11057 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11058 a vcvt fixed-point conversion. */
11059 if (code == FIX && mode == SImode
11060 && GET_CODE (XEXP (x, 0)) == FIX
11061 && GET_MODE (XEXP (x, 0)) == SFmode
11062 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11063 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11064 > 0)
11065 {
11066 if (speed_p)
11067 *cost += extra_cost->fp[0].toint;
11068
11069 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11070 code, 0, speed_p);
11071 return true;
11072 }
11073
11074 if (GET_MODE_CLASS (mode) == MODE_INT)
11075 {
11076 mode = GET_MODE (XEXP (x, 0));
11077 if (speed_p)
11078 *cost += extra_cost->fp[mode == DFmode].toint;
11079 /* Strip of the 'cost' of rounding towards zero. */
11080 if (GET_CODE (XEXP (x, 0)) == FIX)
11081 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11082 0, speed_p);
11083 else
11084 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11085 /* ??? Increase the cost to deal with transferring from
11086 FP -> CORE registers? */
11087 return true;
11088 }
11089 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11090 && TARGET_VFP5)
11091 {
11092 if (speed_p)
11093 *cost += extra_cost->fp[mode == DFmode].roundint;
11094 return false;
11095 }
11096 /* Vector costs? */
11097 }
11098 *cost = LIBCALL_COST (1);
11099 return false;
11100
11101 case FLOAT:
11102 case UNSIGNED_FLOAT:
11103 if (TARGET_HARD_FLOAT)
11104 {
11105 /* ??? Increase the cost to deal with transferring from CORE
11106 -> FP registers? */
11107 if (speed_p)
11108 *cost += extra_cost->fp[mode == DFmode].fromint;
11109 return false;
11110 }
11111 *cost = LIBCALL_COST (1);
11112 return false;
11113
11114 case CALL:
11115 return true;
11116
11117 case ASM_OPERANDS:
11118 {
11119 /* Just a guess. Guess number of instructions in the asm
11120 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11121 though (see PR60663). */
11122 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11123 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11124
11125 *cost = COSTS_N_INSNS (asm_length + num_operands);
11126 return true;
11127 }
11128 default:
11129 if (mode != VOIDmode)
11130 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11131 else
11132 *cost = COSTS_N_INSNS (4); /* Who knows? */
11133 return false;
11134 }
11135 }
11136
11137 #undef HANDLE_NARROW_SHIFT_ARITH
11138
11139 /* RTX costs entry point. */
11140
11141 static bool
11142 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11143 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11144 {
11145 bool result;
11146 int code = GET_CODE (x);
11147 gcc_assert (current_tune->insn_extra_cost);
11148
11149 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11150 (enum rtx_code) outer_code,
11151 current_tune->insn_extra_cost,
11152 total, speed);
11153
11154 if (dump_file && arm_verbose_cost)
11155 {
11156 print_rtl_single (dump_file, x);
11157 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11158 *total, result ? "final" : "partial");
11159 }
11160 return result;
11161 }
11162
11163 /* All address computations that can be done are free, but rtx cost returns
11164 the same for practically all of them. So we weight the different types
11165 of address here in the order (most pref first):
11166 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11167 static inline int
11168 arm_arm_address_cost (rtx x)
11169 {
11170 enum rtx_code c = GET_CODE (x);
11171
11172 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11173 return 0;
11174 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11175 return 10;
11176
11177 if (c == PLUS)
11178 {
11179 if (CONST_INT_P (XEXP (x, 1)))
11180 return 2;
11181
11182 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11183 return 3;
11184
11185 return 4;
11186 }
11187
11188 return 6;
11189 }
11190
11191 static inline int
11192 arm_thumb_address_cost (rtx x)
11193 {
11194 enum rtx_code c = GET_CODE (x);
11195
11196 if (c == REG)
11197 return 1;
11198 if (c == PLUS
11199 && REG_P (XEXP (x, 0))
11200 && CONST_INT_P (XEXP (x, 1)))
11201 return 1;
11202
11203 return 2;
11204 }
11205
11206 static int
11207 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11208 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11209 {
11210 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11211 }
11212
11213 /* Adjust cost hook for XScale. */
11214 static bool
11215 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11216 int * cost)
11217 {
11218 /* Some true dependencies can have a higher cost depending
11219 on precisely how certain input operands are used. */
11220 if (dep_type == 0
11221 && recog_memoized (insn) >= 0
11222 && recog_memoized (dep) >= 0)
11223 {
11224 int shift_opnum = get_attr_shift (insn);
11225 enum attr_type attr_type = get_attr_type (dep);
11226
11227 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11228 operand for INSN. If we have a shifted input operand and the
11229 instruction we depend on is another ALU instruction, then we may
11230 have to account for an additional stall. */
11231 if (shift_opnum != 0
11232 && (attr_type == TYPE_ALU_SHIFT_IMM
11233 || attr_type == TYPE_ALUS_SHIFT_IMM
11234 || attr_type == TYPE_LOGIC_SHIFT_IMM
11235 || attr_type == TYPE_LOGICS_SHIFT_IMM
11236 || attr_type == TYPE_ALU_SHIFT_REG
11237 || attr_type == TYPE_ALUS_SHIFT_REG
11238 || attr_type == TYPE_LOGIC_SHIFT_REG
11239 || attr_type == TYPE_LOGICS_SHIFT_REG
11240 || attr_type == TYPE_MOV_SHIFT
11241 || attr_type == TYPE_MVN_SHIFT
11242 || attr_type == TYPE_MOV_SHIFT_REG
11243 || attr_type == TYPE_MVN_SHIFT_REG))
11244 {
11245 rtx shifted_operand;
11246 int opno;
11247
11248 /* Get the shifted operand. */
11249 extract_insn (insn);
11250 shifted_operand = recog_data.operand[shift_opnum];
11251
11252 /* Iterate over all the operands in DEP. If we write an operand
11253 that overlaps with SHIFTED_OPERAND, then we have increase the
11254 cost of this dependency. */
11255 extract_insn (dep);
11256 preprocess_constraints (dep);
11257 for (opno = 0; opno < recog_data.n_operands; opno++)
11258 {
11259 /* We can ignore strict inputs. */
11260 if (recog_data.operand_type[opno] == OP_IN)
11261 continue;
11262
11263 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11264 shifted_operand))
11265 {
11266 *cost = 2;
11267 return false;
11268 }
11269 }
11270 }
11271 }
11272 return true;
11273 }
11274
11275 /* Adjust cost hook for Cortex A9. */
11276 static bool
11277 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11278 int * cost)
11279 {
11280 switch (dep_type)
11281 {
11282 case REG_DEP_ANTI:
11283 *cost = 0;
11284 return false;
11285
11286 case REG_DEP_TRUE:
11287 case REG_DEP_OUTPUT:
11288 if (recog_memoized (insn) >= 0
11289 && recog_memoized (dep) >= 0)
11290 {
11291 if (GET_CODE (PATTERN (insn)) == SET)
11292 {
11293 if (GET_MODE_CLASS
11294 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11295 || GET_MODE_CLASS
11296 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11297 {
11298 enum attr_type attr_type_insn = get_attr_type (insn);
11299 enum attr_type attr_type_dep = get_attr_type (dep);
11300
11301 /* By default all dependencies of the form
11302 s0 = s0 <op> s1
11303 s0 = s0 <op> s2
11304 have an extra latency of 1 cycle because
11305 of the input and output dependency in this
11306 case. However this gets modeled as an true
11307 dependency and hence all these checks. */
11308 if (REG_P (SET_DEST (PATTERN (insn)))
11309 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11310 {
11311 /* FMACS is a special case where the dependent
11312 instruction can be issued 3 cycles before
11313 the normal latency in case of an output
11314 dependency. */
11315 if ((attr_type_insn == TYPE_FMACS
11316 || attr_type_insn == TYPE_FMACD)
11317 && (attr_type_dep == TYPE_FMACS
11318 || attr_type_dep == TYPE_FMACD))
11319 {
11320 if (dep_type == REG_DEP_OUTPUT)
11321 *cost = insn_default_latency (dep) - 3;
11322 else
11323 *cost = insn_default_latency (dep);
11324 return false;
11325 }
11326 else
11327 {
11328 if (dep_type == REG_DEP_OUTPUT)
11329 *cost = insn_default_latency (dep) + 1;
11330 else
11331 *cost = insn_default_latency (dep);
11332 }
11333 return false;
11334 }
11335 }
11336 }
11337 }
11338 break;
11339
11340 default:
11341 gcc_unreachable ();
11342 }
11343
11344 return true;
11345 }
11346
11347 /* Adjust cost hook for FA726TE. */
11348 static bool
11349 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11350 int * cost)
11351 {
11352 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11353 have penalty of 3. */
11354 if (dep_type == REG_DEP_TRUE
11355 && recog_memoized (insn) >= 0
11356 && recog_memoized (dep) >= 0
11357 && get_attr_conds (dep) == CONDS_SET)
11358 {
11359 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11360 if (get_attr_conds (insn) == CONDS_USE
11361 && get_attr_type (insn) != TYPE_BRANCH)
11362 {
11363 *cost = 3;
11364 return false;
11365 }
11366
11367 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11368 || get_attr_conds (insn) == CONDS_USE)
11369 {
11370 *cost = 0;
11371 return false;
11372 }
11373 }
11374
11375 return true;
11376 }
11377
11378 /* Implement TARGET_REGISTER_MOVE_COST.
11379
11380 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11381 it is typically more expensive than a single memory access. We set
11382 the cost to less than two memory accesses so that floating
11383 point to integer conversion does not go through memory. */
11384
11385 int
11386 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11387 reg_class_t from, reg_class_t to)
11388 {
11389 if (TARGET_32BIT)
11390 {
11391 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11392 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11393 return 15;
11394 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11395 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11396 return 4;
11397 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11398 return 20;
11399 else
11400 return 2;
11401 }
11402 else
11403 {
11404 if (from == HI_REGS || to == HI_REGS)
11405 return 4;
11406 else
11407 return 2;
11408 }
11409 }
11410
11411 /* Implement TARGET_MEMORY_MOVE_COST. */
11412
11413 int
11414 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11415 bool in ATTRIBUTE_UNUSED)
11416 {
11417 if (TARGET_32BIT)
11418 return 10;
11419 else
11420 {
11421 if (GET_MODE_SIZE (mode) < 4)
11422 return 8;
11423 else
11424 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11425 }
11426 }
11427
11428 /* Vectorizer cost model implementation. */
11429
11430 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11431 static int
11432 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11433 tree vectype,
11434 int misalign ATTRIBUTE_UNUSED)
11435 {
11436 unsigned elements;
11437
11438 switch (type_of_cost)
11439 {
11440 case scalar_stmt:
11441 return current_tune->vec_costs->scalar_stmt_cost;
11442
11443 case scalar_load:
11444 return current_tune->vec_costs->scalar_load_cost;
11445
11446 case scalar_store:
11447 return current_tune->vec_costs->scalar_store_cost;
11448
11449 case vector_stmt:
11450 return current_tune->vec_costs->vec_stmt_cost;
11451
11452 case vector_load:
11453 return current_tune->vec_costs->vec_align_load_cost;
11454
11455 case vector_store:
11456 return current_tune->vec_costs->vec_store_cost;
11457
11458 case vec_to_scalar:
11459 return current_tune->vec_costs->vec_to_scalar_cost;
11460
11461 case scalar_to_vec:
11462 return current_tune->vec_costs->scalar_to_vec_cost;
11463
11464 case unaligned_load:
11465 case vector_gather_load:
11466 return current_tune->vec_costs->vec_unalign_load_cost;
11467
11468 case unaligned_store:
11469 case vector_scatter_store:
11470 return current_tune->vec_costs->vec_unalign_store_cost;
11471
11472 case cond_branch_taken:
11473 return current_tune->vec_costs->cond_taken_branch_cost;
11474
11475 case cond_branch_not_taken:
11476 return current_tune->vec_costs->cond_not_taken_branch_cost;
11477
11478 case vec_perm:
11479 case vec_promote_demote:
11480 return current_tune->vec_costs->vec_stmt_cost;
11481
11482 case vec_construct:
11483 elements = TYPE_VECTOR_SUBPARTS (vectype);
11484 return elements / 2 + 1;
11485
11486 default:
11487 gcc_unreachable ();
11488 }
11489 }
11490
11491 /* Implement targetm.vectorize.add_stmt_cost. */
11492
11493 static unsigned
11494 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11495 struct _stmt_vec_info *stmt_info, int misalign,
11496 enum vect_cost_model_location where)
11497 {
11498 unsigned *cost = (unsigned *) data;
11499 unsigned retval = 0;
11500
11501 if (flag_vect_cost_model)
11502 {
11503 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11504 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11505
11506 /* Statements in an inner loop relative to the loop being
11507 vectorized are weighted more heavily. The value here is
11508 arbitrary and could potentially be improved with analysis. */
11509 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11510 count *= 50; /* FIXME. */
11511
11512 retval = (unsigned) (count * stmt_cost);
11513 cost[where] += retval;
11514 }
11515
11516 return retval;
11517 }
11518
11519 /* Return true if and only if this insn can dual-issue only as older. */
11520 static bool
11521 cortexa7_older_only (rtx_insn *insn)
11522 {
11523 if (recog_memoized (insn) < 0)
11524 return false;
11525
11526 switch (get_attr_type (insn))
11527 {
11528 case TYPE_ALU_DSP_REG:
11529 case TYPE_ALU_SREG:
11530 case TYPE_ALUS_SREG:
11531 case TYPE_LOGIC_REG:
11532 case TYPE_LOGICS_REG:
11533 case TYPE_ADC_REG:
11534 case TYPE_ADCS_REG:
11535 case TYPE_ADR:
11536 case TYPE_BFM:
11537 case TYPE_REV:
11538 case TYPE_MVN_REG:
11539 case TYPE_SHIFT_IMM:
11540 case TYPE_SHIFT_REG:
11541 case TYPE_LOAD_BYTE:
11542 case TYPE_LOAD_4:
11543 case TYPE_STORE_4:
11544 case TYPE_FFARITHS:
11545 case TYPE_FADDS:
11546 case TYPE_FFARITHD:
11547 case TYPE_FADDD:
11548 case TYPE_FMOV:
11549 case TYPE_F_CVT:
11550 case TYPE_FCMPS:
11551 case TYPE_FCMPD:
11552 case TYPE_FCONSTS:
11553 case TYPE_FCONSTD:
11554 case TYPE_FMULS:
11555 case TYPE_FMACS:
11556 case TYPE_FMULD:
11557 case TYPE_FMACD:
11558 case TYPE_FDIVS:
11559 case TYPE_FDIVD:
11560 case TYPE_F_MRC:
11561 case TYPE_F_MRRC:
11562 case TYPE_F_FLAG:
11563 case TYPE_F_LOADS:
11564 case TYPE_F_STORES:
11565 return true;
11566 default:
11567 return false;
11568 }
11569 }
11570
11571 /* Return true if and only if this insn can dual-issue as younger. */
11572 static bool
11573 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11574 {
11575 if (recog_memoized (insn) < 0)
11576 {
11577 if (verbose > 5)
11578 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11579 return false;
11580 }
11581
11582 switch (get_attr_type (insn))
11583 {
11584 case TYPE_ALU_IMM:
11585 case TYPE_ALUS_IMM:
11586 case TYPE_LOGIC_IMM:
11587 case TYPE_LOGICS_IMM:
11588 case TYPE_EXTEND:
11589 case TYPE_MVN_IMM:
11590 case TYPE_MOV_IMM:
11591 case TYPE_MOV_REG:
11592 case TYPE_MOV_SHIFT:
11593 case TYPE_MOV_SHIFT_REG:
11594 case TYPE_BRANCH:
11595 case TYPE_CALL:
11596 return true;
11597 default:
11598 return false;
11599 }
11600 }
11601
11602
11603 /* Look for an instruction that can dual issue only as an older
11604 instruction, and move it in front of any instructions that can
11605 dual-issue as younger, while preserving the relative order of all
11606 other instructions in the ready list. This is a hueuristic to help
11607 dual-issue in later cycles, by postponing issue of more flexible
11608 instructions. This heuristic may affect dual issue opportunities
11609 in the current cycle. */
11610 static void
11611 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11612 int *n_readyp, int clock)
11613 {
11614 int i;
11615 int first_older_only = -1, first_younger = -1;
11616
11617 if (verbose > 5)
11618 fprintf (file,
11619 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11620 clock,
11621 *n_readyp);
11622
11623 /* Traverse the ready list from the head (the instruction to issue
11624 first), and looking for the first instruction that can issue as
11625 younger and the first instruction that can dual-issue only as
11626 older. */
11627 for (i = *n_readyp - 1; i >= 0; i--)
11628 {
11629 rtx_insn *insn = ready[i];
11630 if (cortexa7_older_only (insn))
11631 {
11632 first_older_only = i;
11633 if (verbose > 5)
11634 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11635 break;
11636 }
11637 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11638 first_younger = i;
11639 }
11640
11641 /* Nothing to reorder because either no younger insn found or insn
11642 that can dual-issue only as older appears before any insn that
11643 can dual-issue as younger. */
11644 if (first_younger == -1)
11645 {
11646 if (verbose > 5)
11647 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11648 return;
11649 }
11650
11651 /* Nothing to reorder because no older-only insn in the ready list. */
11652 if (first_older_only == -1)
11653 {
11654 if (verbose > 5)
11655 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11656 return;
11657 }
11658
11659 /* Move first_older_only insn before first_younger. */
11660 if (verbose > 5)
11661 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11662 INSN_UID(ready [first_older_only]),
11663 INSN_UID(ready [first_younger]));
11664 rtx_insn *first_older_only_insn = ready [first_older_only];
11665 for (i = first_older_only; i < first_younger; i++)
11666 {
11667 ready[i] = ready[i+1];
11668 }
11669
11670 ready[i] = first_older_only_insn;
11671 return;
11672 }
11673
11674 /* Implement TARGET_SCHED_REORDER. */
11675 static int
11676 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11677 int clock)
11678 {
11679 switch (arm_tune)
11680 {
11681 case TARGET_CPU_cortexa7:
11682 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11683 break;
11684 default:
11685 /* Do nothing for other cores. */
11686 break;
11687 }
11688
11689 return arm_issue_rate ();
11690 }
11691
11692 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11693 It corrects the value of COST based on the relationship between
11694 INSN and DEP through the dependence LINK. It returns the new
11695 value. There is a per-core adjust_cost hook to adjust scheduler costs
11696 and the per-core hook can choose to completely override the generic
11697 adjust_cost function. Only put bits of code into arm_adjust_cost that
11698 are common across all cores. */
11699 static int
11700 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11701 unsigned int)
11702 {
11703 rtx i_pat, d_pat;
11704
11705 /* When generating Thumb-1 code, we want to place flag-setting operations
11706 close to a conditional branch which depends on them, so that we can
11707 omit the comparison. */
11708 if (TARGET_THUMB1
11709 && dep_type == 0
11710 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11711 && recog_memoized (dep) >= 0
11712 && get_attr_conds (dep) == CONDS_SET)
11713 return 0;
11714
11715 if (current_tune->sched_adjust_cost != NULL)
11716 {
11717 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11718 return cost;
11719 }
11720
11721 /* XXX Is this strictly true? */
11722 if (dep_type == REG_DEP_ANTI
11723 || dep_type == REG_DEP_OUTPUT)
11724 return 0;
11725
11726 /* Call insns don't incur a stall, even if they follow a load. */
11727 if (dep_type == 0
11728 && CALL_P (insn))
11729 return 1;
11730
11731 if ((i_pat = single_set (insn)) != NULL
11732 && MEM_P (SET_SRC (i_pat))
11733 && (d_pat = single_set (dep)) != NULL
11734 && MEM_P (SET_DEST (d_pat)))
11735 {
11736 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11737 /* This is a load after a store, there is no conflict if the load reads
11738 from a cached area. Assume that loads from the stack, and from the
11739 constant pool are cached, and that others will miss. This is a
11740 hack. */
11741
11742 if ((GET_CODE (src_mem) == SYMBOL_REF
11743 && CONSTANT_POOL_ADDRESS_P (src_mem))
11744 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11745 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11746 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11747 return 1;
11748 }
11749
11750 return cost;
11751 }
11752
11753 int
11754 arm_max_conditional_execute (void)
11755 {
11756 return max_insns_skipped;
11757 }
11758
11759 static int
11760 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11761 {
11762 if (TARGET_32BIT)
11763 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11764 else
11765 return (optimize > 0) ? 2 : 0;
11766 }
11767
11768 static int
11769 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11770 {
11771 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11772 }
11773
11774 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11775 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11776 sequences of non-executed instructions in IT blocks probably take the same
11777 amount of time as executed instructions (and the IT instruction itself takes
11778 space in icache). This function was experimentally determined to give good
11779 results on a popular embedded benchmark. */
11780
11781 static int
11782 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11783 {
11784 return (TARGET_32BIT && speed_p) ? 1
11785 : arm_default_branch_cost (speed_p, predictable_p);
11786 }
11787
11788 static int
11789 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11790 {
11791 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11792 }
11793
11794 static bool fp_consts_inited = false;
11795
11796 static REAL_VALUE_TYPE value_fp0;
11797
11798 static void
11799 init_fp_table (void)
11800 {
11801 REAL_VALUE_TYPE r;
11802
11803 r = REAL_VALUE_ATOF ("0", DFmode);
11804 value_fp0 = r;
11805 fp_consts_inited = true;
11806 }
11807
11808 /* Return TRUE if rtx X is a valid immediate FP constant. */
11809 int
11810 arm_const_double_rtx (rtx x)
11811 {
11812 const REAL_VALUE_TYPE *r;
11813
11814 if (!fp_consts_inited)
11815 init_fp_table ();
11816
11817 r = CONST_DOUBLE_REAL_VALUE (x);
11818 if (REAL_VALUE_MINUS_ZERO (*r))
11819 return 0;
11820
11821 if (real_equal (r, &value_fp0))
11822 return 1;
11823
11824 return 0;
11825 }
11826
11827 /* VFPv3 has a fairly wide range of representable immediates, formed from
11828 "quarter-precision" floating-point values. These can be evaluated using this
11829 formula (with ^ for exponentiation):
11830
11831 -1^s * n * 2^-r
11832
11833 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11834 16 <= n <= 31 and 0 <= r <= 7.
11835
11836 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11837
11838 - A (most-significant) is the sign bit.
11839 - BCD are the exponent (encoded as r XOR 3).
11840 - EFGH are the mantissa (encoded as n - 16).
11841 */
11842
11843 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11844 fconst[sd] instruction, or -1 if X isn't suitable. */
11845 static int
11846 vfp3_const_double_index (rtx x)
11847 {
11848 REAL_VALUE_TYPE r, m;
11849 int sign, exponent;
11850 unsigned HOST_WIDE_INT mantissa, mant_hi;
11851 unsigned HOST_WIDE_INT mask;
11852 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11853 bool fail;
11854
11855 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11856 return -1;
11857
11858 r = *CONST_DOUBLE_REAL_VALUE (x);
11859
11860 /* We can't represent these things, so detect them first. */
11861 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11862 return -1;
11863
11864 /* Extract sign, exponent and mantissa. */
11865 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11866 r = real_value_abs (&r);
11867 exponent = REAL_EXP (&r);
11868 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11869 highest (sign) bit, with a fixed binary point at bit point_pos.
11870 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11871 bits for the mantissa, this may fail (low bits would be lost). */
11872 real_ldexp (&m, &r, point_pos - exponent);
11873 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11874 mantissa = w.elt (0);
11875 mant_hi = w.elt (1);
11876
11877 /* If there are bits set in the low part of the mantissa, we can't
11878 represent this value. */
11879 if (mantissa != 0)
11880 return -1;
11881
11882 /* Now make it so that mantissa contains the most-significant bits, and move
11883 the point_pos to indicate that the least-significant bits have been
11884 discarded. */
11885 point_pos -= HOST_BITS_PER_WIDE_INT;
11886 mantissa = mant_hi;
11887
11888 /* We can permit four significant bits of mantissa only, plus a high bit
11889 which is always 1. */
11890 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11891 if ((mantissa & mask) != 0)
11892 return -1;
11893
11894 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11895 mantissa >>= point_pos - 5;
11896
11897 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11898 floating-point immediate zero with Neon using an integer-zero load, but
11899 that case is handled elsewhere.) */
11900 if (mantissa == 0)
11901 return -1;
11902
11903 gcc_assert (mantissa >= 16 && mantissa <= 31);
11904
11905 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11906 normalized significands are in the range [1, 2). (Our mantissa is shifted
11907 left 4 places at this point relative to normalized IEEE754 values). GCC
11908 internally uses [0.5, 1) (see real.c), so the exponent returned from
11909 REAL_EXP must be altered. */
11910 exponent = 5 - exponent;
11911
11912 if (exponent < 0 || exponent > 7)
11913 return -1;
11914
11915 /* Sign, mantissa and exponent are now in the correct form to plug into the
11916 formula described in the comment above. */
11917 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11918 }
11919
11920 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11921 int
11922 vfp3_const_double_rtx (rtx x)
11923 {
11924 if (!TARGET_VFP3)
11925 return 0;
11926
11927 return vfp3_const_double_index (x) != -1;
11928 }
11929
11930 /* Recognize immediates which can be used in various Neon instructions. Legal
11931 immediates are described by the following table (for VMVN variants, the
11932 bitwise inverse of the constant shown is recognized. In either case, VMOV
11933 is output and the correct instruction to use for a given constant is chosen
11934 by the assembler). The constant shown is replicated across all elements of
11935 the destination vector.
11936
11937 insn elems variant constant (binary)
11938 ---- ----- ------- -----------------
11939 vmov i32 0 00000000 00000000 00000000 abcdefgh
11940 vmov i32 1 00000000 00000000 abcdefgh 00000000
11941 vmov i32 2 00000000 abcdefgh 00000000 00000000
11942 vmov i32 3 abcdefgh 00000000 00000000 00000000
11943 vmov i16 4 00000000 abcdefgh
11944 vmov i16 5 abcdefgh 00000000
11945 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11946 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11947 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11948 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11949 vmvn i16 10 00000000 abcdefgh
11950 vmvn i16 11 abcdefgh 00000000
11951 vmov i32 12 00000000 00000000 abcdefgh 11111111
11952 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11953 vmov i32 14 00000000 abcdefgh 11111111 11111111
11954 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11955 vmov i8 16 abcdefgh
11956 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11957 eeeeeeee ffffffff gggggggg hhhhhhhh
11958 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11959 vmov f32 19 00000000 00000000 00000000 00000000
11960
11961 For case 18, B = !b. Representable values are exactly those accepted by
11962 vfp3_const_double_index, but are output as floating-point numbers rather
11963 than indices.
11964
11965 For case 19, we will change it to vmov.i32 when assembling.
11966
11967 Variants 0-5 (inclusive) may also be used as immediates for the second
11968 operand of VORR/VBIC instructions.
11969
11970 The INVERSE argument causes the bitwise inverse of the given operand to be
11971 recognized instead (used for recognizing legal immediates for the VAND/VORN
11972 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11973 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11974 output, rather than the real insns vbic/vorr).
11975
11976 INVERSE makes no difference to the recognition of float vectors.
11977
11978 The return value is the variant of immediate as shown in the above table, or
11979 -1 if the given value doesn't match any of the listed patterns.
11980 */
11981 static int
11982 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11983 rtx *modconst, int *elementwidth)
11984 {
11985 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11986 matches = 1; \
11987 for (i = 0; i < idx; i += (STRIDE)) \
11988 if (!(TEST)) \
11989 matches = 0; \
11990 if (matches) \
11991 { \
11992 immtype = (CLASS); \
11993 elsize = (ELSIZE); \
11994 break; \
11995 }
11996
11997 unsigned int i, elsize = 0, idx = 0, n_elts;
11998 unsigned int innersize;
11999 unsigned char bytes[16];
12000 int immtype = -1, matches;
12001 unsigned int invmask = inverse ? 0xff : 0;
12002 bool vector = GET_CODE (op) == CONST_VECTOR;
12003
12004 if (vector)
12005 n_elts = CONST_VECTOR_NUNITS (op);
12006 else
12007 {
12008 n_elts = 1;
12009 gcc_assert (mode != VOIDmode);
12010 }
12011
12012 innersize = GET_MODE_UNIT_SIZE (mode);
12013
12014 /* Vectors of float constants. */
12015 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
12016 {
12017 rtx el0 = CONST_VECTOR_ELT (op, 0);
12018
12019 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
12020 return -1;
12021
12022 /* FP16 vectors cannot be represented. */
12023 if (GET_MODE_INNER (mode) == HFmode)
12024 return -1;
12025
12026 /* All elements in the vector must be the same. Note that 0.0 and -0.0
12027 are distinct in this context. */
12028 if (!const_vec_duplicate_p (op))
12029 return -1;
12030
12031 if (modconst)
12032 *modconst = CONST_VECTOR_ELT (op, 0);
12033
12034 if (elementwidth)
12035 *elementwidth = 0;
12036
12037 if (el0 == CONST0_RTX (GET_MODE (el0)))
12038 return 19;
12039 else
12040 return 18;
12041 }
12042
12043 /* The tricks done in the code below apply for little-endian vector layout.
12044 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12045 FIXME: Implement logic for big-endian vectors. */
12046 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12047 return -1;
12048
12049 /* Splat vector constant out into a byte vector. */
12050 for (i = 0; i < n_elts; i++)
12051 {
12052 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12053 unsigned HOST_WIDE_INT elpart;
12054
12055 gcc_assert (CONST_INT_P (el));
12056 elpart = INTVAL (el);
12057
12058 for (unsigned int byte = 0; byte < innersize; byte++)
12059 {
12060 bytes[idx++] = (elpart & 0xff) ^ invmask;
12061 elpart >>= BITS_PER_UNIT;
12062 }
12063 }
12064
12065 /* Sanity check. */
12066 gcc_assert (idx == GET_MODE_SIZE (mode));
12067
12068 do
12069 {
12070 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12071 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12072
12073 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12074 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12075
12076 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12077 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12078
12079 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12080 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12081
12082 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12083
12084 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12085
12086 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12087 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12088
12089 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12090 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12091
12092 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12093 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12094
12095 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12096 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12097
12098 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12099
12100 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12101
12102 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12103 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12104
12105 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12106 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12107
12108 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12109 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12110
12111 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12112 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12113
12114 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12115
12116 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12117 && bytes[i] == bytes[(i + 8) % idx]);
12118 }
12119 while (0);
12120
12121 if (immtype == -1)
12122 return -1;
12123
12124 if (elementwidth)
12125 *elementwidth = elsize;
12126
12127 if (modconst)
12128 {
12129 unsigned HOST_WIDE_INT imm = 0;
12130
12131 /* Un-invert bytes of recognized vector, if necessary. */
12132 if (invmask != 0)
12133 for (i = 0; i < idx; i++)
12134 bytes[i] ^= invmask;
12135
12136 if (immtype == 17)
12137 {
12138 /* FIXME: Broken on 32-bit H_W_I hosts. */
12139 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12140
12141 for (i = 0; i < 8; i++)
12142 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12143 << (i * BITS_PER_UNIT);
12144
12145 *modconst = GEN_INT (imm);
12146 }
12147 else
12148 {
12149 unsigned HOST_WIDE_INT imm = 0;
12150
12151 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12152 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12153
12154 *modconst = GEN_INT (imm);
12155 }
12156 }
12157
12158 return immtype;
12159 #undef CHECK
12160 }
12161
12162 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12163 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12164 float elements), and a modified constant (whatever should be output for a
12165 VMOV) in *MODCONST. */
12166
12167 int
12168 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12169 rtx *modconst, int *elementwidth)
12170 {
12171 rtx tmpconst;
12172 int tmpwidth;
12173 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12174
12175 if (retval == -1)
12176 return 0;
12177
12178 if (modconst)
12179 *modconst = tmpconst;
12180
12181 if (elementwidth)
12182 *elementwidth = tmpwidth;
12183
12184 return 1;
12185 }
12186
12187 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12188 the immediate is valid, write a constant suitable for using as an operand
12189 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12190 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12191
12192 int
12193 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12194 rtx *modconst, int *elementwidth)
12195 {
12196 rtx tmpconst;
12197 int tmpwidth;
12198 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12199
12200 if (retval < 0 || retval > 5)
12201 return 0;
12202
12203 if (modconst)
12204 *modconst = tmpconst;
12205
12206 if (elementwidth)
12207 *elementwidth = tmpwidth;
12208
12209 return 1;
12210 }
12211
12212 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12213 the immediate is valid, write a constant suitable for using as an operand
12214 to VSHR/VSHL to *MODCONST and the corresponding element width to
12215 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12216 because they have different limitations. */
12217
12218 int
12219 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12220 rtx *modconst, int *elementwidth,
12221 bool isleftshift)
12222 {
12223 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12224 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12225 unsigned HOST_WIDE_INT last_elt = 0;
12226 unsigned HOST_WIDE_INT maxshift;
12227
12228 /* Split vector constant out into a byte vector. */
12229 for (i = 0; i < n_elts; i++)
12230 {
12231 rtx el = CONST_VECTOR_ELT (op, i);
12232 unsigned HOST_WIDE_INT elpart;
12233
12234 if (CONST_INT_P (el))
12235 elpart = INTVAL (el);
12236 else if (CONST_DOUBLE_P (el))
12237 return 0;
12238 else
12239 gcc_unreachable ();
12240
12241 if (i != 0 && elpart != last_elt)
12242 return 0;
12243
12244 last_elt = elpart;
12245 }
12246
12247 /* Shift less than element size. */
12248 maxshift = innersize * 8;
12249
12250 if (isleftshift)
12251 {
12252 /* Left shift immediate value can be from 0 to <size>-1. */
12253 if (last_elt >= maxshift)
12254 return 0;
12255 }
12256 else
12257 {
12258 /* Right shift immediate value can be from 1 to <size>. */
12259 if (last_elt == 0 || last_elt > maxshift)
12260 return 0;
12261 }
12262
12263 if (elementwidth)
12264 *elementwidth = innersize * 8;
12265
12266 if (modconst)
12267 *modconst = CONST_VECTOR_ELT (op, 0);
12268
12269 return 1;
12270 }
12271
12272 /* Return a string suitable for output of Neon immediate logic operation
12273 MNEM. */
12274
12275 char *
12276 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12277 int inverse, int quad)
12278 {
12279 int width, is_valid;
12280 static char templ[40];
12281
12282 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12283
12284 gcc_assert (is_valid != 0);
12285
12286 if (quad)
12287 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12288 else
12289 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12290
12291 return templ;
12292 }
12293
12294 /* Return a string suitable for output of Neon immediate shift operation
12295 (VSHR or VSHL) MNEM. */
12296
12297 char *
12298 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12299 machine_mode mode, int quad,
12300 bool isleftshift)
12301 {
12302 int width, is_valid;
12303 static char templ[40];
12304
12305 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12306 gcc_assert (is_valid != 0);
12307
12308 if (quad)
12309 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12310 else
12311 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12312
12313 return templ;
12314 }
12315
12316 /* Output a sequence of pairwise operations to implement a reduction.
12317 NOTE: We do "too much work" here, because pairwise operations work on two
12318 registers-worth of operands in one go. Unfortunately we can't exploit those
12319 extra calculations to do the full operation in fewer steps, I don't think.
12320 Although all vector elements of the result but the first are ignored, we
12321 actually calculate the same result in each of the elements. An alternative
12322 such as initially loading a vector with zero to use as each of the second
12323 operands would use up an additional register and take an extra instruction,
12324 for no particular gain. */
12325
12326 void
12327 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12328 rtx (*reduc) (rtx, rtx, rtx))
12329 {
12330 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12331 rtx tmpsum = op1;
12332
12333 for (i = parts / 2; i >= 1; i /= 2)
12334 {
12335 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12336 emit_insn (reduc (dest, tmpsum, tmpsum));
12337 tmpsum = dest;
12338 }
12339 }
12340
12341 /* If VALS is a vector constant that can be loaded into a register
12342 using VDUP, generate instructions to do so and return an RTX to
12343 assign to the register. Otherwise return NULL_RTX. */
12344
12345 static rtx
12346 neon_vdup_constant (rtx vals)
12347 {
12348 machine_mode mode = GET_MODE (vals);
12349 machine_mode inner_mode = GET_MODE_INNER (mode);
12350 rtx x;
12351
12352 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12353 return NULL_RTX;
12354
12355 if (!const_vec_duplicate_p (vals, &x))
12356 /* The elements are not all the same. We could handle repeating
12357 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12358 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12359 vdup.i16). */
12360 return NULL_RTX;
12361
12362 /* We can load this constant by using VDUP and a constant in a
12363 single ARM register. This will be cheaper than a vector
12364 load. */
12365
12366 x = copy_to_mode_reg (inner_mode, x);
12367 return gen_vec_duplicate (mode, x);
12368 }
12369
12370 /* Generate code to load VALS, which is a PARALLEL containing only
12371 constants (for vec_init) or CONST_VECTOR, efficiently into a
12372 register. Returns an RTX to copy into the register, or NULL_RTX
12373 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12374
12375 rtx
12376 neon_make_constant (rtx vals)
12377 {
12378 machine_mode mode = GET_MODE (vals);
12379 rtx target;
12380 rtx const_vec = NULL_RTX;
12381 int n_elts = GET_MODE_NUNITS (mode);
12382 int n_const = 0;
12383 int i;
12384
12385 if (GET_CODE (vals) == CONST_VECTOR)
12386 const_vec = vals;
12387 else if (GET_CODE (vals) == PARALLEL)
12388 {
12389 /* A CONST_VECTOR must contain only CONST_INTs and
12390 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12391 Only store valid constants in a CONST_VECTOR. */
12392 for (i = 0; i < n_elts; ++i)
12393 {
12394 rtx x = XVECEXP (vals, 0, i);
12395 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12396 n_const++;
12397 }
12398 if (n_const == n_elts)
12399 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12400 }
12401 else
12402 gcc_unreachable ();
12403
12404 if (const_vec != NULL
12405 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12406 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12407 return const_vec;
12408 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12409 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12410 pipeline cycle; creating the constant takes one or two ARM
12411 pipeline cycles. */
12412 return target;
12413 else if (const_vec != NULL_RTX)
12414 /* Load from constant pool. On Cortex-A8 this takes two cycles
12415 (for either double or quad vectors). We cannot take advantage
12416 of single-cycle VLD1 because we need a PC-relative addressing
12417 mode. */
12418 return const_vec;
12419 else
12420 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12421 We cannot construct an initializer. */
12422 return NULL_RTX;
12423 }
12424
12425 /* Initialize vector TARGET to VALS. */
12426
12427 void
12428 neon_expand_vector_init (rtx target, rtx vals)
12429 {
12430 machine_mode mode = GET_MODE (target);
12431 machine_mode inner_mode = GET_MODE_INNER (mode);
12432 int n_elts = GET_MODE_NUNITS (mode);
12433 int n_var = 0, one_var = -1;
12434 bool all_same = true;
12435 rtx x, mem;
12436 int i;
12437
12438 for (i = 0; i < n_elts; ++i)
12439 {
12440 x = XVECEXP (vals, 0, i);
12441 if (!CONSTANT_P (x))
12442 ++n_var, one_var = i;
12443
12444 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12445 all_same = false;
12446 }
12447
12448 if (n_var == 0)
12449 {
12450 rtx constant = neon_make_constant (vals);
12451 if (constant != NULL_RTX)
12452 {
12453 emit_move_insn (target, constant);
12454 return;
12455 }
12456 }
12457
12458 /* Splat a single non-constant element if we can. */
12459 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12460 {
12461 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12462 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12463 return;
12464 }
12465
12466 /* One field is non-constant. Load constant then overwrite varying
12467 field. This is more efficient than using the stack. */
12468 if (n_var == 1)
12469 {
12470 rtx copy = copy_rtx (vals);
12471 rtx merge_mask = GEN_INT (1 << one_var);
12472
12473 /* Load constant part of vector, substitute neighboring value for
12474 varying element. */
12475 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12476 neon_expand_vector_init (target, copy);
12477
12478 /* Insert variable. */
12479 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12480 emit_insn (gen_vec_set_internal (mode, target, x, merge_mask, target));
12481 return;
12482 }
12483
12484 /* Construct the vector in memory one field at a time
12485 and load the whole vector. */
12486 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12487 for (i = 0; i < n_elts; i++)
12488 emit_move_insn (adjust_address_nv (mem, inner_mode,
12489 i * GET_MODE_SIZE (inner_mode)),
12490 XVECEXP (vals, 0, i));
12491 emit_move_insn (target, mem);
12492 }
12493
12494 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12495 ERR if it doesn't. EXP indicates the source location, which includes the
12496 inlining history for intrinsics. */
12497
12498 static void
12499 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12500 const_tree exp, const char *desc)
12501 {
12502 HOST_WIDE_INT lane;
12503
12504 gcc_assert (CONST_INT_P (operand));
12505
12506 lane = INTVAL (operand);
12507
12508 if (lane < low || lane >= high)
12509 {
12510 if (exp)
12511 error ("%K%s %wd out of range %wd - %wd",
12512 exp, desc, lane, low, high - 1);
12513 else
12514 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12515 }
12516 }
12517
12518 /* Bounds-check lanes. */
12519
12520 void
12521 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12522 const_tree exp)
12523 {
12524 bounds_check (operand, low, high, exp, "lane");
12525 }
12526
12527 /* Bounds-check constants. */
12528
12529 void
12530 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12531 {
12532 bounds_check (operand, low, high, NULL_TREE, "constant");
12533 }
12534
12535 HOST_WIDE_INT
12536 neon_element_bits (machine_mode mode)
12537 {
12538 return GET_MODE_UNIT_BITSIZE (mode);
12539 }
12540
12541 \f
12542 /* Predicates for `match_operand' and `match_operator'. */
12543
12544 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12545 WB is true if full writeback address modes are allowed and is false
12546 if limited writeback address modes (POST_INC and PRE_DEC) are
12547 allowed. */
12548
12549 int
12550 arm_coproc_mem_operand (rtx op, bool wb)
12551 {
12552 rtx ind;
12553
12554 /* Reject eliminable registers. */
12555 if (! (reload_in_progress || reload_completed || lra_in_progress)
12556 && ( reg_mentioned_p (frame_pointer_rtx, op)
12557 || reg_mentioned_p (arg_pointer_rtx, op)
12558 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12559 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12560 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12561 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12562 return FALSE;
12563
12564 /* Constants are converted into offsets from labels. */
12565 if (!MEM_P (op))
12566 return FALSE;
12567
12568 ind = XEXP (op, 0);
12569
12570 if (reload_completed
12571 && (GET_CODE (ind) == LABEL_REF
12572 || (GET_CODE (ind) == CONST
12573 && GET_CODE (XEXP (ind, 0)) == PLUS
12574 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12575 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12576 return TRUE;
12577
12578 /* Match: (mem (reg)). */
12579 if (REG_P (ind))
12580 return arm_address_register_rtx_p (ind, 0);
12581
12582 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12583 acceptable in any case (subject to verification by
12584 arm_address_register_rtx_p). We need WB to be true to accept
12585 PRE_INC and POST_DEC. */
12586 if (GET_CODE (ind) == POST_INC
12587 || GET_CODE (ind) == PRE_DEC
12588 || (wb
12589 && (GET_CODE (ind) == PRE_INC
12590 || GET_CODE (ind) == POST_DEC)))
12591 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12592
12593 if (wb
12594 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12595 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12596 && GET_CODE (XEXP (ind, 1)) == PLUS
12597 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12598 ind = XEXP (ind, 1);
12599
12600 /* Match:
12601 (plus (reg)
12602 (const)). */
12603 if (GET_CODE (ind) == PLUS
12604 && REG_P (XEXP (ind, 0))
12605 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12606 && CONST_INT_P (XEXP (ind, 1))
12607 && INTVAL (XEXP (ind, 1)) > -1024
12608 && INTVAL (XEXP (ind, 1)) < 1024
12609 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12610 return TRUE;
12611
12612 return FALSE;
12613 }
12614
12615 /* Return TRUE if OP is a memory operand which we can load or store a vector
12616 to/from. TYPE is one of the following values:
12617 0 - Vector load/stor (vldr)
12618 1 - Core registers (ldm)
12619 2 - Element/structure loads (vld1)
12620 */
12621 int
12622 neon_vector_mem_operand (rtx op, int type, bool strict)
12623 {
12624 rtx ind;
12625
12626 /* Reject eliminable registers. */
12627 if (strict && ! (reload_in_progress || reload_completed)
12628 && (reg_mentioned_p (frame_pointer_rtx, op)
12629 || reg_mentioned_p (arg_pointer_rtx, op)
12630 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12631 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12632 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12633 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12634 return FALSE;
12635
12636 /* Constants are converted into offsets from labels. */
12637 if (!MEM_P (op))
12638 return FALSE;
12639
12640 ind = XEXP (op, 0);
12641
12642 if (reload_completed
12643 && (GET_CODE (ind) == LABEL_REF
12644 || (GET_CODE (ind) == CONST
12645 && GET_CODE (XEXP (ind, 0)) == PLUS
12646 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12647 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12648 return TRUE;
12649
12650 /* Match: (mem (reg)). */
12651 if (REG_P (ind))
12652 return arm_address_register_rtx_p (ind, 0);
12653
12654 /* Allow post-increment with Neon registers. */
12655 if ((type != 1 && GET_CODE (ind) == POST_INC)
12656 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12657 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12658
12659 /* Allow post-increment by register for VLDn */
12660 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12661 && GET_CODE (XEXP (ind, 1)) == PLUS
12662 && REG_P (XEXP (XEXP (ind, 1), 1)))
12663 return true;
12664
12665 /* Match:
12666 (plus (reg)
12667 (const)). */
12668 if (type == 0
12669 && GET_CODE (ind) == PLUS
12670 && REG_P (XEXP (ind, 0))
12671 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12672 && CONST_INT_P (XEXP (ind, 1))
12673 && INTVAL (XEXP (ind, 1)) > -1024
12674 /* For quad modes, we restrict the constant offset to be slightly less
12675 than what the instruction format permits. We have no such constraint
12676 on double mode offsets. (This must match arm_legitimate_index_p.) */
12677 && (INTVAL (XEXP (ind, 1))
12678 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12679 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12680 return TRUE;
12681
12682 return FALSE;
12683 }
12684
12685 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12686 type. */
12687 int
12688 neon_struct_mem_operand (rtx op)
12689 {
12690 rtx ind;
12691
12692 /* Reject eliminable registers. */
12693 if (! (reload_in_progress || reload_completed)
12694 && ( reg_mentioned_p (frame_pointer_rtx, op)
12695 || reg_mentioned_p (arg_pointer_rtx, op)
12696 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12697 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12698 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12699 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12700 return FALSE;
12701
12702 /* Constants are converted into offsets from labels. */
12703 if (!MEM_P (op))
12704 return FALSE;
12705
12706 ind = XEXP (op, 0);
12707
12708 if (reload_completed
12709 && (GET_CODE (ind) == LABEL_REF
12710 || (GET_CODE (ind) == CONST
12711 && GET_CODE (XEXP (ind, 0)) == PLUS
12712 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12713 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12714 return TRUE;
12715
12716 /* Match: (mem (reg)). */
12717 if (REG_P (ind))
12718 return arm_address_register_rtx_p (ind, 0);
12719
12720 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12721 if (GET_CODE (ind) == POST_INC
12722 || GET_CODE (ind) == PRE_DEC)
12723 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12724
12725 return FALSE;
12726 }
12727
12728 /* Prepares the operands for the VCMLA by lane instruction such that the right
12729 register number is selected. This instruction is special in that it always
12730 requires a D register, however there is a choice to be made between Dn[0],
12731 Dn[1], D(n+1)[0], and D(n+1)[1] depending on the mode of the registers.
12732
12733 The VCMLA by lane function always selects two values. For instance given D0
12734 and a V2SF, the only valid index is 0 as the values in S0 and S1 will be
12735 used by the instruction. However given V4SF then index 0 and 1 are valid as
12736 D0[0] or D1[0] are both valid.
12737
12738 This function centralizes that information based on OPERANDS, OPERANDS[3]
12739 will be changed from a REG into a CONST_INT RTX and OPERANDS[4] will be
12740 updated to contain the right index. */
12741
12742 rtx *
12743 neon_vcmla_lane_prepare_operands (rtx *operands)
12744 {
12745 int lane = INTVAL (operands[4]);
12746 machine_mode constmode = SImode;
12747 machine_mode mode = GET_MODE (operands[3]);
12748 int regno = REGNO (operands[3]);
12749 regno = ((regno - FIRST_VFP_REGNUM) >> 1);
12750 if (lane > 0 && lane >= GET_MODE_NUNITS (mode) / 4)
12751 {
12752 operands[3] = gen_int_mode (regno + 1, constmode);
12753 operands[4]
12754 = gen_int_mode (lane - GET_MODE_NUNITS (mode) / 4, constmode);
12755 }
12756 else
12757 {
12758 operands[3] = gen_int_mode (regno, constmode);
12759 operands[4] = gen_int_mode (lane, constmode);
12760 }
12761 return operands;
12762 }
12763
12764
12765 /* Return true if X is a register that will be eliminated later on. */
12766 int
12767 arm_eliminable_register (rtx x)
12768 {
12769 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12770 || REGNO (x) == ARG_POINTER_REGNUM
12771 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12772 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12773 }
12774
12775 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12776 coprocessor registers. Otherwise return NO_REGS. */
12777
12778 enum reg_class
12779 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12780 {
12781 if (mode == HFmode)
12782 {
12783 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12784 return GENERAL_REGS;
12785 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12786 return NO_REGS;
12787 return GENERAL_REGS;
12788 }
12789
12790 /* The neon move patterns handle all legitimate vector and struct
12791 addresses. */
12792 if (TARGET_NEON
12793 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12794 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12795 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12796 || VALID_NEON_STRUCT_MODE (mode)))
12797 return NO_REGS;
12798
12799 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12800 return NO_REGS;
12801
12802 return GENERAL_REGS;
12803 }
12804
12805 /* Values which must be returned in the most-significant end of the return
12806 register. */
12807
12808 static bool
12809 arm_return_in_msb (const_tree valtype)
12810 {
12811 return (TARGET_AAPCS_BASED
12812 && BYTES_BIG_ENDIAN
12813 && (AGGREGATE_TYPE_P (valtype)
12814 || TREE_CODE (valtype) == COMPLEX_TYPE
12815 || FIXED_POINT_TYPE_P (valtype)));
12816 }
12817
12818 /* Return TRUE if X references a SYMBOL_REF. */
12819 int
12820 symbol_mentioned_p (rtx x)
12821 {
12822 const char * fmt;
12823 int i;
12824
12825 if (GET_CODE (x) == SYMBOL_REF)
12826 return 1;
12827
12828 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12829 are constant offsets, not symbols. */
12830 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12831 return 0;
12832
12833 fmt = GET_RTX_FORMAT (GET_CODE (x));
12834
12835 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12836 {
12837 if (fmt[i] == 'E')
12838 {
12839 int j;
12840
12841 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12842 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12843 return 1;
12844 }
12845 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12846 return 1;
12847 }
12848
12849 return 0;
12850 }
12851
12852 /* Return TRUE if X references a LABEL_REF. */
12853 int
12854 label_mentioned_p (rtx x)
12855 {
12856 const char * fmt;
12857 int i;
12858
12859 if (GET_CODE (x) == LABEL_REF)
12860 return 1;
12861
12862 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12863 instruction, but they are constant offsets, not symbols. */
12864 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12865 return 0;
12866
12867 fmt = GET_RTX_FORMAT (GET_CODE (x));
12868 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12869 {
12870 if (fmt[i] == 'E')
12871 {
12872 int j;
12873
12874 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12875 if (label_mentioned_p (XVECEXP (x, i, j)))
12876 return 1;
12877 }
12878 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12879 return 1;
12880 }
12881
12882 return 0;
12883 }
12884
12885 int
12886 tls_mentioned_p (rtx x)
12887 {
12888 switch (GET_CODE (x))
12889 {
12890 case CONST:
12891 return tls_mentioned_p (XEXP (x, 0));
12892
12893 case UNSPEC:
12894 if (XINT (x, 1) == UNSPEC_TLS)
12895 return 1;
12896
12897 /* Fall through. */
12898 default:
12899 return 0;
12900 }
12901 }
12902
12903 /* Must not copy any rtx that uses a pc-relative address.
12904 Also, disallow copying of load-exclusive instructions that
12905 may appear after splitting of compare-and-swap-style operations
12906 so as to prevent those loops from being transformed away from their
12907 canonical forms (see PR 69904). */
12908
12909 static bool
12910 arm_cannot_copy_insn_p (rtx_insn *insn)
12911 {
12912 /* The tls call insn cannot be copied, as it is paired with a data
12913 word. */
12914 if (recog_memoized (insn) == CODE_FOR_tlscall)
12915 return true;
12916
12917 subrtx_iterator::array_type array;
12918 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12919 {
12920 const_rtx x = *iter;
12921 if (GET_CODE (x) == UNSPEC
12922 && (XINT (x, 1) == UNSPEC_PIC_BASE
12923 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12924 return true;
12925 }
12926
12927 rtx set = single_set (insn);
12928 if (set)
12929 {
12930 rtx src = SET_SRC (set);
12931 if (GET_CODE (src) == ZERO_EXTEND)
12932 src = XEXP (src, 0);
12933
12934 /* Catch the load-exclusive and load-acquire operations. */
12935 if (GET_CODE (src) == UNSPEC_VOLATILE
12936 && (XINT (src, 1) == VUNSPEC_LL
12937 || XINT (src, 1) == VUNSPEC_LAX))
12938 return true;
12939 }
12940 return false;
12941 }
12942
12943 enum rtx_code
12944 minmax_code (rtx x)
12945 {
12946 enum rtx_code code = GET_CODE (x);
12947
12948 switch (code)
12949 {
12950 case SMAX:
12951 return GE;
12952 case SMIN:
12953 return LE;
12954 case UMIN:
12955 return LEU;
12956 case UMAX:
12957 return GEU;
12958 default:
12959 gcc_unreachable ();
12960 }
12961 }
12962
12963 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12964
12965 bool
12966 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12967 int *mask, bool *signed_sat)
12968 {
12969 /* The high bound must be a power of two minus one. */
12970 int log = exact_log2 (INTVAL (hi_bound) + 1);
12971 if (log == -1)
12972 return false;
12973
12974 /* The low bound is either zero (for usat) or one less than the
12975 negation of the high bound (for ssat). */
12976 if (INTVAL (lo_bound) == 0)
12977 {
12978 if (mask)
12979 *mask = log;
12980 if (signed_sat)
12981 *signed_sat = false;
12982
12983 return true;
12984 }
12985
12986 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12987 {
12988 if (mask)
12989 *mask = log + 1;
12990 if (signed_sat)
12991 *signed_sat = true;
12992
12993 return true;
12994 }
12995
12996 return false;
12997 }
12998
12999 /* Return 1 if memory locations are adjacent. */
13000 int
13001 adjacent_mem_locations (rtx a, rtx b)
13002 {
13003 /* We don't guarantee to preserve the order of these memory refs. */
13004 if (volatile_refs_p (a) || volatile_refs_p (b))
13005 return 0;
13006
13007 if ((REG_P (XEXP (a, 0))
13008 || (GET_CODE (XEXP (a, 0)) == PLUS
13009 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
13010 && (REG_P (XEXP (b, 0))
13011 || (GET_CODE (XEXP (b, 0)) == PLUS
13012 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
13013 {
13014 HOST_WIDE_INT val0 = 0, val1 = 0;
13015 rtx reg0, reg1;
13016 int val_diff;
13017
13018 if (GET_CODE (XEXP (a, 0)) == PLUS)
13019 {
13020 reg0 = XEXP (XEXP (a, 0), 0);
13021 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
13022 }
13023 else
13024 reg0 = XEXP (a, 0);
13025
13026 if (GET_CODE (XEXP (b, 0)) == PLUS)
13027 {
13028 reg1 = XEXP (XEXP (b, 0), 0);
13029 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
13030 }
13031 else
13032 reg1 = XEXP (b, 0);
13033
13034 /* Don't accept any offset that will require multiple
13035 instructions to handle, since this would cause the
13036 arith_adjacentmem pattern to output an overlong sequence. */
13037 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
13038 return 0;
13039
13040 /* Don't allow an eliminable register: register elimination can make
13041 the offset too large. */
13042 if (arm_eliminable_register (reg0))
13043 return 0;
13044
13045 val_diff = val1 - val0;
13046
13047 if (arm_ld_sched)
13048 {
13049 /* If the target has load delay slots, then there's no benefit
13050 to using an ldm instruction unless the offset is zero and
13051 we are optimizing for size. */
13052 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13053 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13054 && (val_diff == 4 || val_diff == -4));
13055 }
13056
13057 return ((REGNO (reg0) == REGNO (reg1))
13058 && (val_diff == 4 || val_diff == -4));
13059 }
13060
13061 return 0;
13062 }
13063
13064 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13065 for load operations, false for store operations. CONSECUTIVE is true
13066 if the register numbers in the operation must be consecutive in the register
13067 bank. RETURN_PC is true if value is to be loaded in PC.
13068 The pattern we are trying to match for load is:
13069 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13070 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13071 :
13072 :
13073 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13074 ]
13075 where
13076 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13077 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13078 3. If consecutive is TRUE, then for kth register being loaded,
13079 REGNO (R_dk) = REGNO (R_d0) + k.
13080 The pattern for store is similar. */
13081 bool
13082 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13083 bool consecutive, bool return_pc)
13084 {
13085 HOST_WIDE_INT count = XVECLEN (op, 0);
13086 rtx reg, mem, addr;
13087 unsigned regno;
13088 unsigned first_regno;
13089 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13090 rtx elt;
13091 bool addr_reg_in_reglist = false;
13092 bool update = false;
13093 int reg_increment;
13094 int offset_adj;
13095 int regs_per_val;
13096
13097 /* If not in SImode, then registers must be consecutive
13098 (e.g., VLDM instructions for DFmode). */
13099 gcc_assert ((mode == SImode) || consecutive);
13100 /* Setting return_pc for stores is illegal. */
13101 gcc_assert (!return_pc || load);
13102
13103 /* Set up the increments and the regs per val based on the mode. */
13104 reg_increment = GET_MODE_SIZE (mode);
13105 regs_per_val = reg_increment / 4;
13106 offset_adj = return_pc ? 1 : 0;
13107
13108 if (count <= 1
13109 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13110 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13111 return false;
13112
13113 /* Check if this is a write-back. */
13114 elt = XVECEXP (op, 0, offset_adj);
13115 if (GET_CODE (SET_SRC (elt)) == PLUS)
13116 {
13117 i++;
13118 base = 1;
13119 update = true;
13120
13121 /* The offset adjustment must be the number of registers being
13122 popped times the size of a single register. */
13123 if (!REG_P (SET_DEST (elt))
13124 || !REG_P (XEXP (SET_SRC (elt), 0))
13125 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13126 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13127 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13128 ((count - 1 - offset_adj) * reg_increment))
13129 return false;
13130 }
13131
13132 i = i + offset_adj;
13133 base = base + offset_adj;
13134 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13135 success depends on the type: VLDM can do just one reg,
13136 LDM must do at least two. */
13137 if ((count <= i) && (mode == SImode))
13138 return false;
13139
13140 elt = XVECEXP (op, 0, i - 1);
13141 if (GET_CODE (elt) != SET)
13142 return false;
13143
13144 if (load)
13145 {
13146 reg = SET_DEST (elt);
13147 mem = SET_SRC (elt);
13148 }
13149 else
13150 {
13151 reg = SET_SRC (elt);
13152 mem = SET_DEST (elt);
13153 }
13154
13155 if (!REG_P (reg) || !MEM_P (mem))
13156 return false;
13157
13158 regno = REGNO (reg);
13159 first_regno = regno;
13160 addr = XEXP (mem, 0);
13161 if (GET_CODE (addr) == PLUS)
13162 {
13163 if (!CONST_INT_P (XEXP (addr, 1)))
13164 return false;
13165
13166 offset = INTVAL (XEXP (addr, 1));
13167 addr = XEXP (addr, 0);
13168 }
13169
13170 if (!REG_P (addr))
13171 return false;
13172
13173 /* Don't allow SP to be loaded unless it is also the base register. It
13174 guarantees that SP is reset correctly when an LDM instruction
13175 is interrupted. Otherwise, we might end up with a corrupt stack. */
13176 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13177 return false;
13178
13179 if (regno == REGNO (addr))
13180 addr_reg_in_reglist = true;
13181
13182 for (; i < count; i++)
13183 {
13184 elt = XVECEXP (op, 0, i);
13185 if (GET_CODE (elt) != SET)
13186 return false;
13187
13188 if (load)
13189 {
13190 reg = SET_DEST (elt);
13191 mem = SET_SRC (elt);
13192 }
13193 else
13194 {
13195 reg = SET_SRC (elt);
13196 mem = SET_DEST (elt);
13197 }
13198
13199 if (!REG_P (reg)
13200 || GET_MODE (reg) != mode
13201 || REGNO (reg) <= regno
13202 || (consecutive
13203 && (REGNO (reg) !=
13204 (unsigned int) (first_regno + regs_per_val * (i - base))))
13205 /* Don't allow SP to be loaded unless it is also the base register. It
13206 guarantees that SP is reset correctly when an LDM instruction
13207 is interrupted. Otherwise, we might end up with a corrupt stack. */
13208 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13209 || !MEM_P (mem)
13210 || GET_MODE (mem) != mode
13211 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13212 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13213 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13214 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13215 offset + (i - base) * reg_increment))
13216 && (!REG_P (XEXP (mem, 0))
13217 || offset + (i - base) * reg_increment != 0)))
13218 return false;
13219
13220 regno = REGNO (reg);
13221 if (regno == REGNO (addr))
13222 addr_reg_in_reglist = true;
13223 }
13224
13225 if (load)
13226 {
13227 if (update && addr_reg_in_reglist)
13228 return false;
13229
13230 /* For Thumb-1, address register is always modified - either by write-back
13231 or by explicit load. If the pattern does not describe an update,
13232 then the address register must be in the list of loaded registers. */
13233 if (TARGET_THUMB1)
13234 return update || addr_reg_in_reglist;
13235 }
13236
13237 return true;
13238 }
13239
13240 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13241 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13242 instruction. ADD_OFFSET is nonzero if the base address register needs
13243 to be modified with an add instruction before we can use it. */
13244
13245 static bool
13246 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13247 int nops, HOST_WIDE_INT add_offset)
13248 {
13249 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13250 if the offset isn't small enough. The reason 2 ldrs are faster
13251 is because these ARMs are able to do more than one cache access
13252 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13253 whilst the ARM8 has a double bandwidth cache. This means that
13254 these cores can do both an instruction fetch and a data fetch in
13255 a single cycle, so the trick of calculating the address into a
13256 scratch register (one of the result regs) and then doing a load
13257 multiple actually becomes slower (and no smaller in code size).
13258 That is the transformation
13259
13260 ldr rd1, [rbase + offset]
13261 ldr rd2, [rbase + offset + 4]
13262
13263 to
13264
13265 add rd1, rbase, offset
13266 ldmia rd1, {rd1, rd2}
13267
13268 produces worse code -- '3 cycles + any stalls on rd2' instead of
13269 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13270 access per cycle, the first sequence could never complete in less
13271 than 6 cycles, whereas the ldm sequence would only take 5 and
13272 would make better use of sequential accesses if not hitting the
13273 cache.
13274
13275 We cheat here and test 'arm_ld_sched' which we currently know to
13276 only be true for the ARM8, ARM9 and StrongARM. If this ever
13277 changes, then the test below needs to be reworked. */
13278 if (nops == 2 && arm_ld_sched && add_offset != 0)
13279 return false;
13280
13281 /* XScale has load-store double instructions, but they have stricter
13282 alignment requirements than load-store multiple, so we cannot
13283 use them.
13284
13285 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13286 the pipeline until completion.
13287
13288 NREGS CYCLES
13289 1 3
13290 2 4
13291 3 5
13292 4 6
13293
13294 An ldr instruction takes 1-3 cycles, but does not block the
13295 pipeline.
13296
13297 NREGS CYCLES
13298 1 1-3
13299 2 2-6
13300 3 3-9
13301 4 4-12
13302
13303 Best case ldr will always win. However, the more ldr instructions
13304 we issue, the less likely we are to be able to schedule them well.
13305 Using ldr instructions also increases code size.
13306
13307 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13308 for counts of 3 or 4 regs. */
13309 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13310 return false;
13311 return true;
13312 }
13313
13314 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13315 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13316 an array ORDER which describes the sequence to use when accessing the
13317 offsets that produces an ascending order. In this sequence, each
13318 offset must be larger by exactly 4 than the previous one. ORDER[0]
13319 must have been filled in with the lowest offset by the caller.
13320 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13321 we use to verify that ORDER produces an ascending order of registers.
13322 Return true if it was possible to construct such an order, false if
13323 not. */
13324
13325 static bool
13326 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13327 int *unsorted_regs)
13328 {
13329 int i;
13330 for (i = 1; i < nops; i++)
13331 {
13332 int j;
13333
13334 order[i] = order[i - 1];
13335 for (j = 0; j < nops; j++)
13336 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13337 {
13338 /* We must find exactly one offset that is higher than the
13339 previous one by 4. */
13340 if (order[i] != order[i - 1])
13341 return false;
13342 order[i] = j;
13343 }
13344 if (order[i] == order[i - 1])
13345 return false;
13346 /* The register numbers must be ascending. */
13347 if (unsorted_regs != NULL
13348 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13349 return false;
13350 }
13351 return true;
13352 }
13353
13354 /* Used to determine in a peephole whether a sequence of load
13355 instructions can be changed into a load-multiple instruction.
13356 NOPS is the number of separate load instructions we are examining. The
13357 first NOPS entries in OPERANDS are the destination registers, the
13358 next NOPS entries are memory operands. If this function is
13359 successful, *BASE is set to the common base register of the memory
13360 accesses; *LOAD_OFFSET is set to the first memory location's offset
13361 from that base register.
13362 REGS is an array filled in with the destination register numbers.
13363 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13364 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13365 the sequence of registers in REGS matches the loads from ascending memory
13366 locations, and the function verifies that the register numbers are
13367 themselves ascending. If CHECK_REGS is false, the register numbers
13368 are stored in the order they are found in the operands. */
13369 static int
13370 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13371 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13372 {
13373 int unsorted_regs[MAX_LDM_STM_OPS];
13374 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13375 int order[MAX_LDM_STM_OPS];
13376 int base_reg = -1;
13377 int i, ldm_case;
13378
13379 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13380 easily extended if required. */
13381 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13382
13383 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13384
13385 /* Loop over the operands and check that the memory references are
13386 suitable (i.e. immediate offsets from the same base register). At
13387 the same time, extract the target register, and the memory
13388 offsets. */
13389 for (i = 0; i < nops; i++)
13390 {
13391 rtx reg;
13392 rtx offset;
13393
13394 /* Convert a subreg of a mem into the mem itself. */
13395 if (GET_CODE (operands[nops + i]) == SUBREG)
13396 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13397
13398 gcc_assert (MEM_P (operands[nops + i]));
13399
13400 /* Don't reorder volatile memory references; it doesn't seem worth
13401 looking for the case where the order is ok anyway. */
13402 if (MEM_VOLATILE_P (operands[nops + i]))
13403 return 0;
13404
13405 offset = const0_rtx;
13406
13407 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13408 || (GET_CODE (reg) == SUBREG
13409 && REG_P (reg = SUBREG_REG (reg))))
13410 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13411 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13412 || (GET_CODE (reg) == SUBREG
13413 && REG_P (reg = SUBREG_REG (reg))))
13414 && (CONST_INT_P (offset
13415 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13416 {
13417 if (i == 0)
13418 {
13419 base_reg = REGNO (reg);
13420 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13421 return 0;
13422 }
13423 else if (base_reg != (int) REGNO (reg))
13424 /* Not addressed from the same base register. */
13425 return 0;
13426
13427 unsorted_regs[i] = (REG_P (operands[i])
13428 ? REGNO (operands[i])
13429 : REGNO (SUBREG_REG (operands[i])));
13430
13431 /* If it isn't an integer register, or if it overwrites the
13432 base register but isn't the last insn in the list, then
13433 we can't do this. */
13434 if (unsorted_regs[i] < 0
13435 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13436 || unsorted_regs[i] > 14
13437 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13438 return 0;
13439
13440 /* Don't allow SP to be loaded unless it is also the base
13441 register. It guarantees that SP is reset correctly when
13442 an LDM instruction is interrupted. Otherwise, we might
13443 end up with a corrupt stack. */
13444 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13445 return 0;
13446
13447 unsorted_offsets[i] = INTVAL (offset);
13448 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13449 order[0] = i;
13450 }
13451 else
13452 /* Not a suitable memory address. */
13453 return 0;
13454 }
13455
13456 /* All the useful information has now been extracted from the
13457 operands into unsorted_regs and unsorted_offsets; additionally,
13458 order[0] has been set to the lowest offset in the list. Sort
13459 the offsets into order, verifying that they are adjacent, and
13460 check that the register numbers are ascending. */
13461 if (!compute_offset_order (nops, unsorted_offsets, order,
13462 check_regs ? unsorted_regs : NULL))
13463 return 0;
13464
13465 if (saved_order)
13466 memcpy (saved_order, order, sizeof order);
13467
13468 if (base)
13469 {
13470 *base = base_reg;
13471
13472 for (i = 0; i < nops; i++)
13473 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13474
13475 *load_offset = unsorted_offsets[order[0]];
13476 }
13477
13478 if (unsorted_offsets[order[0]] == 0)
13479 ldm_case = 1; /* ldmia */
13480 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13481 ldm_case = 2; /* ldmib */
13482 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13483 ldm_case = 3; /* ldmda */
13484 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13485 ldm_case = 4; /* ldmdb */
13486 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13487 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13488 ldm_case = 5;
13489 else
13490 return 0;
13491
13492 if (!multiple_operation_profitable_p (false, nops,
13493 ldm_case == 5
13494 ? unsorted_offsets[order[0]] : 0))
13495 return 0;
13496
13497 return ldm_case;
13498 }
13499
13500 /* Used to determine in a peephole whether a sequence of store instructions can
13501 be changed into a store-multiple instruction.
13502 NOPS is the number of separate store instructions we are examining.
13503 NOPS_TOTAL is the total number of instructions recognized by the peephole
13504 pattern.
13505 The first NOPS entries in OPERANDS are the source registers, the next
13506 NOPS entries are memory operands. If this function is successful, *BASE is
13507 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13508 to the first memory location's offset from that base register. REGS is an
13509 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13510 likewise filled with the corresponding rtx's.
13511 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13512 numbers to an ascending order of stores.
13513 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13514 from ascending memory locations, and the function verifies that the register
13515 numbers are themselves ascending. If CHECK_REGS is false, the register
13516 numbers are stored in the order they are found in the operands. */
13517 static int
13518 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13519 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13520 HOST_WIDE_INT *load_offset, bool check_regs)
13521 {
13522 int unsorted_regs[MAX_LDM_STM_OPS];
13523 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13524 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13525 int order[MAX_LDM_STM_OPS];
13526 int base_reg = -1;
13527 rtx base_reg_rtx = NULL;
13528 int i, stm_case;
13529
13530 /* Write back of base register is currently only supported for Thumb 1. */
13531 int base_writeback = TARGET_THUMB1;
13532
13533 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13534 easily extended if required. */
13535 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13536
13537 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13538
13539 /* Loop over the operands and check that the memory references are
13540 suitable (i.e. immediate offsets from the same base register). At
13541 the same time, extract the target register, and the memory
13542 offsets. */
13543 for (i = 0; i < nops; i++)
13544 {
13545 rtx reg;
13546 rtx offset;
13547
13548 /* Convert a subreg of a mem into the mem itself. */
13549 if (GET_CODE (operands[nops + i]) == SUBREG)
13550 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13551
13552 gcc_assert (MEM_P (operands[nops + i]));
13553
13554 /* Don't reorder volatile memory references; it doesn't seem worth
13555 looking for the case where the order is ok anyway. */
13556 if (MEM_VOLATILE_P (operands[nops + i]))
13557 return 0;
13558
13559 offset = const0_rtx;
13560
13561 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13562 || (GET_CODE (reg) == SUBREG
13563 && REG_P (reg = SUBREG_REG (reg))))
13564 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13565 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13566 || (GET_CODE (reg) == SUBREG
13567 && REG_P (reg = SUBREG_REG (reg))))
13568 && (CONST_INT_P (offset
13569 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13570 {
13571 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13572 ? operands[i] : SUBREG_REG (operands[i]));
13573 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13574
13575 if (i == 0)
13576 {
13577 base_reg = REGNO (reg);
13578 base_reg_rtx = reg;
13579 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13580 return 0;
13581 }
13582 else if (base_reg != (int) REGNO (reg))
13583 /* Not addressed from the same base register. */
13584 return 0;
13585
13586 /* If it isn't an integer register, then we can't do this. */
13587 if (unsorted_regs[i] < 0
13588 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13589 /* The effects are unpredictable if the base register is
13590 both updated and stored. */
13591 || (base_writeback && unsorted_regs[i] == base_reg)
13592 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13593 || unsorted_regs[i] > 14)
13594 return 0;
13595
13596 unsorted_offsets[i] = INTVAL (offset);
13597 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13598 order[0] = i;
13599 }
13600 else
13601 /* Not a suitable memory address. */
13602 return 0;
13603 }
13604
13605 /* All the useful information has now been extracted from the
13606 operands into unsorted_regs and unsorted_offsets; additionally,
13607 order[0] has been set to the lowest offset in the list. Sort
13608 the offsets into order, verifying that they are adjacent, and
13609 check that the register numbers are ascending. */
13610 if (!compute_offset_order (nops, unsorted_offsets, order,
13611 check_regs ? unsorted_regs : NULL))
13612 return 0;
13613
13614 if (saved_order)
13615 memcpy (saved_order, order, sizeof order);
13616
13617 if (base)
13618 {
13619 *base = base_reg;
13620
13621 for (i = 0; i < nops; i++)
13622 {
13623 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13624 if (reg_rtxs)
13625 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13626 }
13627
13628 *load_offset = unsorted_offsets[order[0]];
13629 }
13630
13631 if (TARGET_THUMB1
13632 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13633 return 0;
13634
13635 if (unsorted_offsets[order[0]] == 0)
13636 stm_case = 1; /* stmia */
13637 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13638 stm_case = 2; /* stmib */
13639 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13640 stm_case = 3; /* stmda */
13641 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13642 stm_case = 4; /* stmdb */
13643 else
13644 return 0;
13645
13646 if (!multiple_operation_profitable_p (false, nops, 0))
13647 return 0;
13648
13649 return stm_case;
13650 }
13651 \f
13652 /* Routines for use in generating RTL. */
13653
13654 /* Generate a load-multiple instruction. COUNT is the number of loads in
13655 the instruction; REGS and MEMS are arrays containing the operands.
13656 BASEREG is the base register to be used in addressing the memory operands.
13657 WBACK_OFFSET is nonzero if the instruction should update the base
13658 register. */
13659
13660 static rtx
13661 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13662 HOST_WIDE_INT wback_offset)
13663 {
13664 int i = 0, j;
13665 rtx result;
13666
13667 if (!multiple_operation_profitable_p (false, count, 0))
13668 {
13669 rtx seq;
13670
13671 start_sequence ();
13672
13673 for (i = 0; i < count; i++)
13674 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13675
13676 if (wback_offset != 0)
13677 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13678
13679 seq = get_insns ();
13680 end_sequence ();
13681
13682 return seq;
13683 }
13684
13685 result = gen_rtx_PARALLEL (VOIDmode,
13686 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13687 if (wback_offset != 0)
13688 {
13689 XVECEXP (result, 0, 0)
13690 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13691 i = 1;
13692 count++;
13693 }
13694
13695 for (j = 0; i < count; i++, j++)
13696 XVECEXP (result, 0, i)
13697 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13698
13699 return result;
13700 }
13701
13702 /* Generate a store-multiple instruction. COUNT is the number of stores in
13703 the instruction; REGS and MEMS are arrays containing the operands.
13704 BASEREG is the base register to be used in addressing the memory operands.
13705 WBACK_OFFSET is nonzero if the instruction should update the base
13706 register. */
13707
13708 static rtx
13709 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13710 HOST_WIDE_INT wback_offset)
13711 {
13712 int i = 0, j;
13713 rtx result;
13714
13715 if (GET_CODE (basereg) == PLUS)
13716 basereg = XEXP (basereg, 0);
13717
13718 if (!multiple_operation_profitable_p (false, count, 0))
13719 {
13720 rtx seq;
13721
13722 start_sequence ();
13723
13724 for (i = 0; i < count; i++)
13725 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13726
13727 if (wback_offset != 0)
13728 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13729
13730 seq = get_insns ();
13731 end_sequence ();
13732
13733 return seq;
13734 }
13735
13736 result = gen_rtx_PARALLEL (VOIDmode,
13737 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13738 if (wback_offset != 0)
13739 {
13740 XVECEXP (result, 0, 0)
13741 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13742 i = 1;
13743 count++;
13744 }
13745
13746 for (j = 0; i < count; i++, j++)
13747 XVECEXP (result, 0, i)
13748 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13749
13750 return result;
13751 }
13752
13753 /* Generate either a load-multiple or a store-multiple instruction. This
13754 function can be used in situations where we can start with a single MEM
13755 rtx and adjust its address upwards.
13756 COUNT is the number of operations in the instruction, not counting a
13757 possible update of the base register. REGS is an array containing the
13758 register operands.
13759 BASEREG is the base register to be used in addressing the memory operands,
13760 which are constructed from BASEMEM.
13761 WRITE_BACK specifies whether the generated instruction should include an
13762 update of the base register.
13763 OFFSETP is used to pass an offset to and from this function; this offset
13764 is not used when constructing the address (instead BASEMEM should have an
13765 appropriate offset in its address), it is used only for setting
13766 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13767
13768 static rtx
13769 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13770 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13771 {
13772 rtx mems[MAX_LDM_STM_OPS];
13773 HOST_WIDE_INT offset = *offsetp;
13774 int i;
13775
13776 gcc_assert (count <= MAX_LDM_STM_OPS);
13777
13778 if (GET_CODE (basereg) == PLUS)
13779 basereg = XEXP (basereg, 0);
13780
13781 for (i = 0; i < count; i++)
13782 {
13783 rtx addr = plus_constant (Pmode, basereg, i * 4);
13784 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13785 offset += 4;
13786 }
13787
13788 if (write_back)
13789 *offsetp = offset;
13790
13791 if (is_load)
13792 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13793 write_back ? 4 * count : 0);
13794 else
13795 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13796 write_back ? 4 * count : 0);
13797 }
13798
13799 rtx
13800 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13801 rtx basemem, HOST_WIDE_INT *offsetp)
13802 {
13803 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13804 offsetp);
13805 }
13806
13807 rtx
13808 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13809 rtx basemem, HOST_WIDE_INT *offsetp)
13810 {
13811 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13812 offsetp);
13813 }
13814
13815 /* Called from a peephole2 expander to turn a sequence of loads into an
13816 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13817 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13818 is true if we can reorder the registers because they are used commutatively
13819 subsequently.
13820 Returns true iff we could generate a new instruction. */
13821
13822 bool
13823 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13824 {
13825 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13826 rtx mems[MAX_LDM_STM_OPS];
13827 int i, j, base_reg;
13828 rtx base_reg_rtx;
13829 HOST_WIDE_INT offset;
13830 int write_back = FALSE;
13831 int ldm_case;
13832 rtx addr;
13833
13834 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13835 &base_reg, &offset, !sort_regs);
13836
13837 if (ldm_case == 0)
13838 return false;
13839
13840 if (sort_regs)
13841 for (i = 0; i < nops - 1; i++)
13842 for (j = i + 1; j < nops; j++)
13843 if (regs[i] > regs[j])
13844 {
13845 int t = regs[i];
13846 regs[i] = regs[j];
13847 regs[j] = t;
13848 }
13849 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13850
13851 if (TARGET_THUMB1)
13852 {
13853 gcc_assert (ldm_case == 1 || ldm_case == 5);
13854
13855 /* Thumb-1 ldm uses writeback except if the base is loaded. */
13856 write_back = true;
13857 for (i = 0; i < nops; i++)
13858 if (base_reg == regs[i])
13859 write_back = false;
13860
13861 /* Ensure the base is dead if it is updated. */
13862 if (write_back && !peep2_reg_dead_p (nops, base_reg_rtx))
13863 return false;
13864 }
13865
13866 if (ldm_case == 5)
13867 {
13868 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13869 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13870 offset = 0;
13871 base_reg_rtx = newbase;
13872 }
13873
13874 for (i = 0; i < nops; i++)
13875 {
13876 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13877 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13878 SImode, addr, 0);
13879 }
13880 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13881 write_back ? offset + i * 4 : 0));
13882 return true;
13883 }
13884
13885 /* Called from a peephole2 expander to turn a sequence of stores into an
13886 STM instruction. OPERANDS are the operands found by the peephole matcher;
13887 NOPS indicates how many separate stores we are trying to combine.
13888 Returns true iff we could generate a new instruction. */
13889
13890 bool
13891 gen_stm_seq (rtx *operands, int nops)
13892 {
13893 int i;
13894 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13895 rtx mems[MAX_LDM_STM_OPS];
13896 int base_reg;
13897 rtx base_reg_rtx;
13898 HOST_WIDE_INT offset;
13899 int write_back = FALSE;
13900 int stm_case;
13901 rtx addr;
13902 bool base_reg_dies;
13903
13904 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13905 mem_order, &base_reg, &offset, true);
13906
13907 if (stm_case == 0)
13908 return false;
13909
13910 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13911
13912 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13913 if (TARGET_THUMB1)
13914 {
13915 gcc_assert (base_reg_dies);
13916 write_back = TRUE;
13917 }
13918
13919 if (stm_case == 5)
13920 {
13921 gcc_assert (base_reg_dies);
13922 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13923 offset = 0;
13924 }
13925
13926 addr = plus_constant (Pmode, base_reg_rtx, offset);
13927
13928 for (i = 0; i < nops; i++)
13929 {
13930 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13931 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13932 SImode, addr, 0);
13933 }
13934 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13935 write_back ? offset + i * 4 : 0));
13936 return true;
13937 }
13938
13939 /* Called from a peephole2 expander to turn a sequence of stores that are
13940 preceded by constant loads into an STM instruction. OPERANDS are the
13941 operands found by the peephole matcher; NOPS indicates how many
13942 separate stores we are trying to combine; there are 2 * NOPS
13943 instructions in the peephole.
13944 Returns true iff we could generate a new instruction. */
13945
13946 bool
13947 gen_const_stm_seq (rtx *operands, int nops)
13948 {
13949 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13950 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13951 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13952 rtx mems[MAX_LDM_STM_OPS];
13953 int base_reg;
13954 rtx base_reg_rtx;
13955 HOST_WIDE_INT offset;
13956 int write_back = FALSE;
13957 int stm_case;
13958 rtx addr;
13959 bool base_reg_dies;
13960 int i, j;
13961 HARD_REG_SET allocated;
13962
13963 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13964 mem_order, &base_reg, &offset, false);
13965
13966 if (stm_case == 0)
13967 return false;
13968
13969 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13970
13971 /* If the same register is used more than once, try to find a free
13972 register. */
13973 CLEAR_HARD_REG_SET (allocated);
13974 for (i = 0; i < nops; i++)
13975 {
13976 for (j = i + 1; j < nops; j++)
13977 if (regs[i] == regs[j])
13978 {
13979 rtx t = peep2_find_free_register (0, nops * 2,
13980 TARGET_THUMB1 ? "l" : "r",
13981 SImode, &allocated);
13982 if (t == NULL_RTX)
13983 return false;
13984 reg_rtxs[i] = t;
13985 regs[i] = REGNO (t);
13986 }
13987 }
13988
13989 /* Compute an ordering that maps the register numbers to an ascending
13990 sequence. */
13991 reg_order[0] = 0;
13992 for (i = 0; i < nops; i++)
13993 if (regs[i] < regs[reg_order[0]])
13994 reg_order[0] = i;
13995
13996 for (i = 1; i < nops; i++)
13997 {
13998 int this_order = reg_order[i - 1];
13999 for (j = 0; j < nops; j++)
14000 if (regs[j] > regs[reg_order[i - 1]]
14001 && (this_order == reg_order[i - 1]
14002 || regs[j] < regs[this_order]))
14003 this_order = j;
14004 reg_order[i] = this_order;
14005 }
14006
14007 /* Ensure that registers that must be live after the instruction end
14008 up with the correct value. */
14009 for (i = 0; i < nops; i++)
14010 {
14011 int this_order = reg_order[i];
14012 if ((this_order != mem_order[i]
14013 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
14014 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
14015 return false;
14016 }
14017
14018 /* Load the constants. */
14019 for (i = 0; i < nops; i++)
14020 {
14021 rtx op = operands[2 * nops + mem_order[i]];
14022 sorted_regs[i] = regs[reg_order[i]];
14023 emit_move_insn (reg_rtxs[reg_order[i]], op);
14024 }
14025
14026 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
14027
14028 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
14029 if (TARGET_THUMB1)
14030 {
14031 gcc_assert (base_reg_dies);
14032 write_back = TRUE;
14033 }
14034
14035 if (stm_case == 5)
14036 {
14037 gcc_assert (base_reg_dies);
14038 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
14039 offset = 0;
14040 }
14041
14042 addr = plus_constant (Pmode, base_reg_rtx, offset);
14043
14044 for (i = 0; i < nops; i++)
14045 {
14046 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
14047 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
14048 SImode, addr, 0);
14049 }
14050 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
14051 write_back ? offset + i * 4 : 0));
14052 return true;
14053 }
14054
14055 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14056 unaligned copies on processors which support unaligned semantics for those
14057 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14058 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14059 An interleave factor of 1 (the minimum) will perform no interleaving.
14060 Load/store multiple are used for aligned addresses where possible. */
14061
14062 static void
14063 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14064 HOST_WIDE_INT length,
14065 unsigned int interleave_factor)
14066 {
14067 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14068 int *regnos = XALLOCAVEC (int, interleave_factor);
14069 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14070 HOST_WIDE_INT i, j;
14071 HOST_WIDE_INT remaining = length, words;
14072 rtx halfword_tmp = NULL, byte_tmp = NULL;
14073 rtx dst, src;
14074 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14075 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14076 HOST_WIDE_INT srcoffset, dstoffset;
14077 HOST_WIDE_INT src_autoinc, dst_autoinc;
14078 rtx mem, addr;
14079
14080 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14081
14082 /* Use hard registers if we have aligned source or destination so we can use
14083 load/store multiple with contiguous registers. */
14084 if (dst_aligned || src_aligned)
14085 for (i = 0; i < interleave_factor; i++)
14086 regs[i] = gen_rtx_REG (SImode, i);
14087 else
14088 for (i = 0; i < interleave_factor; i++)
14089 regs[i] = gen_reg_rtx (SImode);
14090
14091 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14092 src = copy_addr_to_reg (XEXP (srcbase, 0));
14093
14094 srcoffset = dstoffset = 0;
14095
14096 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14097 For copying the last bytes we want to subtract this offset again. */
14098 src_autoinc = dst_autoinc = 0;
14099
14100 for (i = 0; i < interleave_factor; i++)
14101 regnos[i] = i;
14102
14103 /* Copy BLOCK_SIZE_BYTES chunks. */
14104
14105 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14106 {
14107 /* Load words. */
14108 if (src_aligned && interleave_factor > 1)
14109 {
14110 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14111 TRUE, srcbase, &srcoffset));
14112 src_autoinc += UNITS_PER_WORD * interleave_factor;
14113 }
14114 else
14115 {
14116 for (j = 0; j < interleave_factor; j++)
14117 {
14118 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14119 - src_autoinc));
14120 mem = adjust_automodify_address (srcbase, SImode, addr,
14121 srcoffset + j * UNITS_PER_WORD);
14122 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14123 }
14124 srcoffset += block_size_bytes;
14125 }
14126
14127 /* Store words. */
14128 if (dst_aligned && interleave_factor > 1)
14129 {
14130 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14131 TRUE, dstbase, &dstoffset));
14132 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14133 }
14134 else
14135 {
14136 for (j = 0; j < interleave_factor; j++)
14137 {
14138 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14139 - dst_autoinc));
14140 mem = adjust_automodify_address (dstbase, SImode, addr,
14141 dstoffset + j * UNITS_PER_WORD);
14142 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14143 }
14144 dstoffset += block_size_bytes;
14145 }
14146
14147 remaining -= block_size_bytes;
14148 }
14149
14150 /* Copy any whole words left (note these aren't interleaved with any
14151 subsequent halfword/byte load/stores in the interests of simplicity). */
14152
14153 words = remaining / UNITS_PER_WORD;
14154
14155 gcc_assert (words < interleave_factor);
14156
14157 if (src_aligned && words > 1)
14158 {
14159 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14160 &srcoffset));
14161 src_autoinc += UNITS_PER_WORD * words;
14162 }
14163 else
14164 {
14165 for (j = 0; j < words; j++)
14166 {
14167 addr = plus_constant (Pmode, src,
14168 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14169 mem = adjust_automodify_address (srcbase, SImode, addr,
14170 srcoffset + j * UNITS_PER_WORD);
14171 if (src_aligned)
14172 emit_move_insn (regs[j], mem);
14173 else
14174 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14175 }
14176 srcoffset += words * UNITS_PER_WORD;
14177 }
14178
14179 if (dst_aligned && words > 1)
14180 {
14181 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14182 &dstoffset));
14183 dst_autoinc += words * UNITS_PER_WORD;
14184 }
14185 else
14186 {
14187 for (j = 0; j < words; j++)
14188 {
14189 addr = plus_constant (Pmode, dst,
14190 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14191 mem = adjust_automodify_address (dstbase, SImode, addr,
14192 dstoffset + j * UNITS_PER_WORD);
14193 if (dst_aligned)
14194 emit_move_insn (mem, regs[j]);
14195 else
14196 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14197 }
14198 dstoffset += words * UNITS_PER_WORD;
14199 }
14200
14201 remaining -= words * UNITS_PER_WORD;
14202
14203 gcc_assert (remaining < 4);
14204
14205 /* Copy a halfword if necessary. */
14206
14207 if (remaining >= 2)
14208 {
14209 halfword_tmp = gen_reg_rtx (SImode);
14210
14211 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14212 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14213 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14214
14215 /* Either write out immediately, or delay until we've loaded the last
14216 byte, depending on interleave factor. */
14217 if (interleave_factor == 1)
14218 {
14219 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14220 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14221 emit_insn (gen_unaligned_storehi (mem,
14222 gen_lowpart (HImode, halfword_tmp)));
14223 halfword_tmp = NULL;
14224 dstoffset += 2;
14225 }
14226
14227 remaining -= 2;
14228 srcoffset += 2;
14229 }
14230
14231 gcc_assert (remaining < 2);
14232
14233 /* Copy last byte. */
14234
14235 if ((remaining & 1) != 0)
14236 {
14237 byte_tmp = gen_reg_rtx (SImode);
14238
14239 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14240 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14241 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14242
14243 if (interleave_factor == 1)
14244 {
14245 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14246 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14247 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14248 byte_tmp = NULL;
14249 dstoffset++;
14250 }
14251
14252 remaining--;
14253 srcoffset++;
14254 }
14255
14256 /* Store last halfword if we haven't done so already. */
14257
14258 if (halfword_tmp)
14259 {
14260 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14261 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14262 emit_insn (gen_unaligned_storehi (mem,
14263 gen_lowpart (HImode, halfword_tmp)));
14264 dstoffset += 2;
14265 }
14266
14267 /* Likewise for last byte. */
14268
14269 if (byte_tmp)
14270 {
14271 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14272 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14273 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14274 dstoffset++;
14275 }
14276
14277 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14278 }
14279
14280 /* From mips_adjust_block_mem:
14281
14282 Helper function for doing a loop-based block operation on memory
14283 reference MEM. Each iteration of the loop will operate on LENGTH
14284 bytes of MEM.
14285
14286 Create a new base register for use within the loop and point it to
14287 the start of MEM. Create a new memory reference that uses this
14288 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14289
14290 static void
14291 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14292 rtx *loop_mem)
14293 {
14294 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14295
14296 /* Although the new mem does not refer to a known location,
14297 it does keep up to LENGTH bytes of alignment. */
14298 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14299 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14300 }
14301
14302 /* From mips_block_move_loop:
14303
14304 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14305 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14306 the memory regions do not overlap. */
14307
14308 static void
14309 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14310 unsigned int interleave_factor,
14311 HOST_WIDE_INT bytes_per_iter)
14312 {
14313 rtx src_reg, dest_reg, final_src, test;
14314 HOST_WIDE_INT leftover;
14315
14316 leftover = length % bytes_per_iter;
14317 length -= leftover;
14318
14319 /* Create registers and memory references for use within the loop. */
14320 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14321 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14322
14323 /* Calculate the value that SRC_REG should have after the last iteration of
14324 the loop. */
14325 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14326 0, 0, OPTAB_WIDEN);
14327
14328 /* Emit the start of the loop. */
14329 rtx_code_label *label = gen_label_rtx ();
14330 emit_label (label);
14331
14332 /* Emit the loop body. */
14333 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14334 interleave_factor);
14335
14336 /* Move on to the next block. */
14337 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14338 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14339
14340 /* Emit the loop condition. */
14341 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14342 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14343
14344 /* Mop up any left-over bytes. */
14345 if (leftover)
14346 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14347 }
14348
14349 /* Emit a block move when either the source or destination is unaligned (not
14350 aligned to a four-byte boundary). This may need further tuning depending on
14351 core type, optimize_size setting, etc. */
14352
14353 static int
14354 arm_cpymemqi_unaligned (rtx *operands)
14355 {
14356 HOST_WIDE_INT length = INTVAL (operands[2]);
14357
14358 if (optimize_size)
14359 {
14360 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14361 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14362 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14363 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14364 or dst_aligned though: allow more interleaving in those cases since the
14365 resulting code can be smaller. */
14366 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14367 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14368
14369 if (length > 12)
14370 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14371 interleave_factor, bytes_per_iter);
14372 else
14373 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14374 interleave_factor);
14375 }
14376 else
14377 {
14378 /* Note that the loop created by arm_block_move_unaligned_loop may be
14379 subject to loop unrolling, which makes tuning this condition a little
14380 redundant. */
14381 if (length > 32)
14382 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14383 else
14384 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14385 }
14386
14387 return 1;
14388 }
14389
14390 int
14391 arm_gen_cpymemqi (rtx *operands)
14392 {
14393 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14394 HOST_WIDE_INT srcoffset, dstoffset;
14395 rtx src, dst, srcbase, dstbase;
14396 rtx part_bytes_reg = NULL;
14397 rtx mem;
14398
14399 if (!CONST_INT_P (operands[2])
14400 || !CONST_INT_P (operands[3])
14401 || INTVAL (operands[2]) > 64)
14402 return 0;
14403
14404 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14405 return arm_cpymemqi_unaligned (operands);
14406
14407 if (INTVAL (operands[3]) & 3)
14408 return 0;
14409
14410 dstbase = operands[0];
14411 srcbase = operands[1];
14412
14413 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14414 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14415
14416 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14417 out_words_to_go = INTVAL (operands[2]) / 4;
14418 last_bytes = INTVAL (operands[2]) & 3;
14419 dstoffset = srcoffset = 0;
14420
14421 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14422 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14423
14424 while (in_words_to_go >= 2)
14425 {
14426 if (in_words_to_go > 4)
14427 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14428 TRUE, srcbase, &srcoffset));
14429 else
14430 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14431 src, FALSE, srcbase,
14432 &srcoffset));
14433
14434 if (out_words_to_go)
14435 {
14436 if (out_words_to_go > 4)
14437 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14438 TRUE, dstbase, &dstoffset));
14439 else if (out_words_to_go != 1)
14440 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14441 out_words_to_go, dst,
14442 (last_bytes == 0
14443 ? FALSE : TRUE),
14444 dstbase, &dstoffset));
14445 else
14446 {
14447 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14448 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14449 if (last_bytes != 0)
14450 {
14451 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14452 dstoffset += 4;
14453 }
14454 }
14455 }
14456
14457 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14458 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14459 }
14460
14461 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14462 if (out_words_to_go)
14463 {
14464 rtx sreg;
14465
14466 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14467 sreg = copy_to_reg (mem);
14468
14469 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14470 emit_move_insn (mem, sreg);
14471 in_words_to_go--;
14472
14473 gcc_assert (!in_words_to_go); /* Sanity check */
14474 }
14475
14476 if (in_words_to_go)
14477 {
14478 gcc_assert (in_words_to_go > 0);
14479
14480 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14481 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14482 }
14483
14484 gcc_assert (!last_bytes || part_bytes_reg);
14485
14486 if (BYTES_BIG_ENDIAN && last_bytes)
14487 {
14488 rtx tmp = gen_reg_rtx (SImode);
14489
14490 /* The bytes we want are in the top end of the word. */
14491 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14492 GEN_INT (8 * (4 - last_bytes))));
14493 part_bytes_reg = tmp;
14494
14495 while (last_bytes)
14496 {
14497 mem = adjust_automodify_address (dstbase, QImode,
14498 plus_constant (Pmode, dst,
14499 last_bytes - 1),
14500 dstoffset + last_bytes - 1);
14501 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14502
14503 if (--last_bytes)
14504 {
14505 tmp = gen_reg_rtx (SImode);
14506 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14507 part_bytes_reg = tmp;
14508 }
14509 }
14510
14511 }
14512 else
14513 {
14514 if (last_bytes > 1)
14515 {
14516 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14517 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14518 last_bytes -= 2;
14519 if (last_bytes)
14520 {
14521 rtx tmp = gen_reg_rtx (SImode);
14522 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14523 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14524 part_bytes_reg = tmp;
14525 dstoffset += 2;
14526 }
14527 }
14528
14529 if (last_bytes)
14530 {
14531 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14532 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14533 }
14534 }
14535
14536 return 1;
14537 }
14538
14539 /* Helper for gen_cpymem_ldrd_strd. Increase the address of memory rtx
14540 by mode size. */
14541 inline static rtx
14542 next_consecutive_mem (rtx mem)
14543 {
14544 machine_mode mode = GET_MODE (mem);
14545 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14546 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14547
14548 return adjust_automodify_address (mem, mode, addr, offset);
14549 }
14550
14551 /* Copy using LDRD/STRD instructions whenever possible.
14552 Returns true upon success. */
14553 bool
14554 gen_cpymem_ldrd_strd (rtx *operands)
14555 {
14556 unsigned HOST_WIDE_INT len;
14557 HOST_WIDE_INT align;
14558 rtx src, dst, base;
14559 rtx reg0;
14560 bool src_aligned, dst_aligned;
14561 bool src_volatile, dst_volatile;
14562
14563 gcc_assert (CONST_INT_P (operands[2]));
14564 gcc_assert (CONST_INT_P (operands[3]));
14565
14566 len = UINTVAL (operands[2]);
14567 if (len > 64)
14568 return false;
14569
14570 /* Maximum alignment we can assume for both src and dst buffers. */
14571 align = INTVAL (operands[3]);
14572
14573 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14574 return false;
14575
14576 /* Place src and dst addresses in registers
14577 and update the corresponding mem rtx. */
14578 dst = operands[0];
14579 dst_volatile = MEM_VOLATILE_P (dst);
14580 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14581 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14582 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14583
14584 src = operands[1];
14585 src_volatile = MEM_VOLATILE_P (src);
14586 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14587 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14588 src = adjust_automodify_address (src, VOIDmode, base, 0);
14589
14590 if (!unaligned_access && !(src_aligned && dst_aligned))
14591 return false;
14592
14593 if (src_volatile || dst_volatile)
14594 return false;
14595
14596 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14597 if (!(dst_aligned || src_aligned))
14598 return arm_gen_cpymemqi (operands);
14599
14600 /* If the either src or dst is unaligned we'll be accessing it as pairs
14601 of unaligned SImode accesses. Otherwise we can generate DImode
14602 ldrd/strd instructions. */
14603 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14604 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14605
14606 while (len >= 8)
14607 {
14608 len -= 8;
14609 reg0 = gen_reg_rtx (DImode);
14610 rtx low_reg = NULL_RTX;
14611 rtx hi_reg = NULL_RTX;
14612
14613 if (!src_aligned || !dst_aligned)
14614 {
14615 low_reg = gen_lowpart (SImode, reg0);
14616 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14617 }
14618 if (src_aligned)
14619 emit_move_insn (reg0, src);
14620 else
14621 {
14622 emit_insn (gen_unaligned_loadsi (low_reg, src));
14623 src = next_consecutive_mem (src);
14624 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14625 }
14626
14627 if (dst_aligned)
14628 emit_move_insn (dst, reg0);
14629 else
14630 {
14631 emit_insn (gen_unaligned_storesi (dst, low_reg));
14632 dst = next_consecutive_mem (dst);
14633 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14634 }
14635
14636 src = next_consecutive_mem (src);
14637 dst = next_consecutive_mem (dst);
14638 }
14639
14640 gcc_assert (len < 8);
14641 if (len >= 4)
14642 {
14643 /* More than a word but less than a double-word to copy. Copy a word. */
14644 reg0 = gen_reg_rtx (SImode);
14645 src = adjust_address (src, SImode, 0);
14646 dst = adjust_address (dst, SImode, 0);
14647 if (src_aligned)
14648 emit_move_insn (reg0, src);
14649 else
14650 emit_insn (gen_unaligned_loadsi (reg0, src));
14651
14652 if (dst_aligned)
14653 emit_move_insn (dst, reg0);
14654 else
14655 emit_insn (gen_unaligned_storesi (dst, reg0));
14656
14657 src = next_consecutive_mem (src);
14658 dst = next_consecutive_mem (dst);
14659 len -= 4;
14660 }
14661
14662 if (len == 0)
14663 return true;
14664
14665 /* Copy the remaining bytes. */
14666 if (len >= 2)
14667 {
14668 dst = adjust_address (dst, HImode, 0);
14669 src = adjust_address (src, HImode, 0);
14670 reg0 = gen_reg_rtx (SImode);
14671 if (src_aligned)
14672 emit_insn (gen_zero_extendhisi2 (reg0, src));
14673 else
14674 emit_insn (gen_unaligned_loadhiu (reg0, src));
14675
14676 if (dst_aligned)
14677 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14678 else
14679 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14680
14681 src = next_consecutive_mem (src);
14682 dst = next_consecutive_mem (dst);
14683 if (len == 2)
14684 return true;
14685 }
14686
14687 dst = adjust_address (dst, QImode, 0);
14688 src = adjust_address (src, QImode, 0);
14689 reg0 = gen_reg_rtx (QImode);
14690 emit_move_insn (reg0, src);
14691 emit_move_insn (dst, reg0);
14692 return true;
14693 }
14694
14695 /* Select a dominance comparison mode if possible for a test of the general
14696 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14697 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14698 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14699 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14700 In all cases OP will be either EQ or NE, but we don't need to know which
14701 here. If we are unable to support a dominance comparison we return
14702 CC mode. This will then fail to match for the RTL expressions that
14703 generate this call. */
14704 machine_mode
14705 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14706 {
14707 enum rtx_code cond1, cond2;
14708 int swapped = 0;
14709
14710 /* Currently we will probably get the wrong result if the individual
14711 comparisons are not simple. This also ensures that it is safe to
14712 reverse a comparison if necessary. */
14713 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14714 != CCmode)
14715 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14716 != CCmode))
14717 return CCmode;
14718
14719 /* The if_then_else variant of this tests the second condition if the
14720 first passes, but is true if the first fails. Reverse the first
14721 condition to get a true "inclusive-or" expression. */
14722 if (cond_or == DOM_CC_NX_OR_Y)
14723 cond1 = reverse_condition (cond1);
14724
14725 /* If the comparisons are not equal, and one doesn't dominate the other,
14726 then we can't do this. */
14727 if (cond1 != cond2
14728 && !comparison_dominates_p (cond1, cond2)
14729 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14730 return CCmode;
14731
14732 if (swapped)
14733 std::swap (cond1, cond2);
14734
14735 switch (cond1)
14736 {
14737 case EQ:
14738 if (cond_or == DOM_CC_X_AND_Y)
14739 return CC_DEQmode;
14740
14741 switch (cond2)
14742 {
14743 case EQ: return CC_DEQmode;
14744 case LE: return CC_DLEmode;
14745 case LEU: return CC_DLEUmode;
14746 case GE: return CC_DGEmode;
14747 case GEU: return CC_DGEUmode;
14748 default: gcc_unreachable ();
14749 }
14750
14751 case LT:
14752 if (cond_or == DOM_CC_X_AND_Y)
14753 return CC_DLTmode;
14754
14755 switch (cond2)
14756 {
14757 case LT:
14758 return CC_DLTmode;
14759 case LE:
14760 return CC_DLEmode;
14761 case NE:
14762 return CC_DNEmode;
14763 default:
14764 gcc_unreachable ();
14765 }
14766
14767 case GT:
14768 if (cond_or == DOM_CC_X_AND_Y)
14769 return CC_DGTmode;
14770
14771 switch (cond2)
14772 {
14773 case GT:
14774 return CC_DGTmode;
14775 case GE:
14776 return CC_DGEmode;
14777 case NE:
14778 return CC_DNEmode;
14779 default:
14780 gcc_unreachable ();
14781 }
14782
14783 case LTU:
14784 if (cond_or == DOM_CC_X_AND_Y)
14785 return CC_DLTUmode;
14786
14787 switch (cond2)
14788 {
14789 case LTU:
14790 return CC_DLTUmode;
14791 case LEU:
14792 return CC_DLEUmode;
14793 case NE:
14794 return CC_DNEmode;
14795 default:
14796 gcc_unreachable ();
14797 }
14798
14799 case GTU:
14800 if (cond_or == DOM_CC_X_AND_Y)
14801 return CC_DGTUmode;
14802
14803 switch (cond2)
14804 {
14805 case GTU:
14806 return CC_DGTUmode;
14807 case GEU:
14808 return CC_DGEUmode;
14809 case NE:
14810 return CC_DNEmode;
14811 default:
14812 gcc_unreachable ();
14813 }
14814
14815 /* The remaining cases only occur when both comparisons are the
14816 same. */
14817 case NE:
14818 gcc_assert (cond1 == cond2);
14819 return CC_DNEmode;
14820
14821 case LE:
14822 gcc_assert (cond1 == cond2);
14823 return CC_DLEmode;
14824
14825 case GE:
14826 gcc_assert (cond1 == cond2);
14827 return CC_DGEmode;
14828
14829 case LEU:
14830 gcc_assert (cond1 == cond2);
14831 return CC_DLEUmode;
14832
14833 case GEU:
14834 gcc_assert (cond1 == cond2);
14835 return CC_DGEUmode;
14836
14837 default:
14838 gcc_unreachable ();
14839 }
14840 }
14841
14842 machine_mode
14843 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14844 {
14845 /* All floating point compares return CCFP if it is an equality
14846 comparison, and CCFPE otherwise. */
14847 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14848 {
14849 switch (op)
14850 {
14851 case EQ:
14852 case NE:
14853 case UNORDERED:
14854 case ORDERED:
14855 case UNLT:
14856 case UNLE:
14857 case UNGT:
14858 case UNGE:
14859 case UNEQ:
14860 case LTGT:
14861 return CCFPmode;
14862
14863 case LT:
14864 case LE:
14865 case GT:
14866 case GE:
14867 return CCFPEmode;
14868
14869 default:
14870 gcc_unreachable ();
14871 }
14872 }
14873
14874 /* A compare with a shifted operand. Because of canonicalization, the
14875 comparison will have to be swapped when we emit the assembler. */
14876 if (GET_MODE (y) == SImode
14877 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14878 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14879 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14880 || GET_CODE (x) == ROTATERT))
14881 return CC_SWPmode;
14882
14883 /* This operation is performed swapped, but since we only rely on the Z
14884 flag we don't need an additional mode. */
14885 if (GET_MODE (y) == SImode
14886 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14887 && GET_CODE (x) == NEG
14888 && (op == EQ || op == NE))
14889 return CC_Zmode;
14890
14891 /* This is a special case that is used by combine to allow a
14892 comparison of a shifted byte load to be split into a zero-extend
14893 followed by a comparison of the shifted integer (only valid for
14894 equalities and unsigned inequalities). */
14895 if (GET_MODE (x) == SImode
14896 && GET_CODE (x) == ASHIFT
14897 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14898 && GET_CODE (XEXP (x, 0)) == SUBREG
14899 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14900 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14901 && (op == EQ || op == NE
14902 || op == GEU || op == GTU || op == LTU || op == LEU)
14903 && CONST_INT_P (y))
14904 return CC_Zmode;
14905
14906 /* A construct for a conditional compare, if the false arm contains
14907 0, then both conditions must be true, otherwise either condition
14908 must be true. Not all conditions are possible, so CCmode is
14909 returned if it can't be done. */
14910 if (GET_CODE (x) == IF_THEN_ELSE
14911 && (XEXP (x, 2) == const0_rtx
14912 || XEXP (x, 2) == const1_rtx)
14913 && COMPARISON_P (XEXP (x, 0))
14914 && COMPARISON_P (XEXP (x, 1)))
14915 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14916 INTVAL (XEXP (x, 2)));
14917
14918 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14919 if (GET_CODE (x) == AND
14920 && (op == EQ || op == NE)
14921 && COMPARISON_P (XEXP (x, 0))
14922 && COMPARISON_P (XEXP (x, 1)))
14923 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14924 DOM_CC_X_AND_Y);
14925
14926 if (GET_CODE (x) == IOR
14927 && (op == EQ || op == NE)
14928 && COMPARISON_P (XEXP (x, 0))
14929 && COMPARISON_P (XEXP (x, 1)))
14930 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14931 DOM_CC_X_OR_Y);
14932
14933 /* An operation (on Thumb) where we want to test for a single bit.
14934 This is done by shifting that bit up into the top bit of a
14935 scratch register; we can then branch on the sign bit. */
14936 if (TARGET_THUMB1
14937 && GET_MODE (x) == SImode
14938 && (op == EQ || op == NE)
14939 && GET_CODE (x) == ZERO_EXTRACT
14940 && XEXP (x, 1) == const1_rtx)
14941 return CC_Nmode;
14942
14943 /* An operation that sets the condition codes as a side-effect, the
14944 V flag is not set correctly, so we can only use comparisons where
14945 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14946 instead.) */
14947 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14948 if (GET_MODE (x) == SImode
14949 && y == const0_rtx
14950 && (op == EQ || op == NE || op == LT || op == GE)
14951 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14952 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14953 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14954 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14955 || GET_CODE (x) == LSHIFTRT
14956 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14957 || GET_CODE (x) == ROTATERT
14958 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14959 return CC_NOOVmode;
14960
14961 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14962 return CC_Zmode;
14963
14964 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14965 && GET_CODE (x) == PLUS
14966 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14967 return CC_Cmode;
14968
14969 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14970 {
14971 switch (op)
14972 {
14973 case EQ:
14974 case NE:
14975 /* A DImode comparison against zero can be implemented by
14976 or'ing the two halves together. */
14977 if (y == const0_rtx)
14978 return CC_Zmode;
14979
14980 /* We can do an equality test in three Thumb instructions. */
14981 if (!TARGET_32BIT)
14982 return CC_Zmode;
14983
14984 /* FALLTHROUGH */
14985
14986 case LTU:
14987 case LEU:
14988 case GTU:
14989 case GEU:
14990 /* DImode unsigned comparisons can be implemented by cmp +
14991 cmpeq without a scratch register. Not worth doing in
14992 Thumb-2. */
14993 if (TARGET_32BIT)
14994 return CC_CZmode;
14995
14996 /* FALLTHROUGH */
14997
14998 case LT:
14999 case LE:
15000 case GT:
15001 case GE:
15002 /* DImode signed and unsigned comparisons can be implemented
15003 by cmp + sbcs with a scratch register, but that does not
15004 set the Z flag - we must reverse GT/LE/GTU/LEU. */
15005 gcc_assert (op != EQ && op != NE);
15006 return CC_NCVmode;
15007
15008 default:
15009 gcc_unreachable ();
15010 }
15011 }
15012
15013 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
15014 return GET_MODE (x);
15015
15016 return CCmode;
15017 }
15018
15019 /* X and Y are two things to compare using CODE. Emit the compare insn and
15020 return the rtx for register 0 in the proper mode. FP means this is a
15021 floating point compare: I don't think that it is needed on the arm. */
15022 rtx
15023 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
15024 {
15025 machine_mode mode;
15026 rtx cc_reg;
15027 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
15028
15029 /* We might have X as a constant, Y as a register because of the predicates
15030 used for cmpdi. If so, force X to a register here. */
15031 if (dimode_comparison && !REG_P (x))
15032 x = force_reg (DImode, x);
15033
15034 mode = SELECT_CC_MODE (code, x, y);
15035 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
15036
15037 if (dimode_comparison
15038 && mode != CC_CZmode)
15039 {
15040 rtx clobber, set;
15041
15042 /* To compare two non-zero values for equality, XOR them and
15043 then compare against zero. Not used for ARM mode; there
15044 CC_CZmode is cheaper. */
15045 if (mode == CC_Zmode && y != const0_rtx)
15046 {
15047 gcc_assert (!reload_completed);
15048 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
15049 y = const0_rtx;
15050 }
15051
15052 /* A scratch register is required. */
15053 if (reload_completed)
15054 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15055 else
15056 scratch = gen_rtx_SCRATCH (SImode);
15057
15058 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15059 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15060 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15061 }
15062 else
15063 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15064
15065 return cc_reg;
15066 }
15067
15068 /* Generate a sequence of insns that will generate the correct return
15069 address mask depending on the physical architecture that the program
15070 is running on. */
15071 rtx
15072 arm_gen_return_addr_mask (void)
15073 {
15074 rtx reg = gen_reg_rtx (Pmode);
15075
15076 emit_insn (gen_return_addr_mask (reg));
15077 return reg;
15078 }
15079
15080 void
15081 arm_reload_in_hi (rtx *operands)
15082 {
15083 rtx ref = operands[1];
15084 rtx base, scratch;
15085 HOST_WIDE_INT offset = 0;
15086
15087 if (GET_CODE (ref) == SUBREG)
15088 {
15089 offset = SUBREG_BYTE (ref);
15090 ref = SUBREG_REG (ref);
15091 }
15092
15093 if (REG_P (ref))
15094 {
15095 /* We have a pseudo which has been spilt onto the stack; there
15096 are two cases here: the first where there is a simple
15097 stack-slot replacement and a second where the stack-slot is
15098 out of range, or is used as a subreg. */
15099 if (reg_equiv_mem (REGNO (ref)))
15100 {
15101 ref = reg_equiv_mem (REGNO (ref));
15102 base = find_replacement (&XEXP (ref, 0));
15103 }
15104 else
15105 /* The slot is out of range, or was dressed up in a SUBREG. */
15106 base = reg_equiv_address (REGNO (ref));
15107
15108 /* PR 62554: If there is no equivalent memory location then just move
15109 the value as an SImode register move. This happens when the target
15110 architecture variant does not have an HImode register move. */
15111 if (base == NULL)
15112 {
15113 gcc_assert (REG_P (operands[0]));
15114 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15115 gen_rtx_SUBREG (SImode, ref, 0)));
15116 return;
15117 }
15118 }
15119 else
15120 base = find_replacement (&XEXP (ref, 0));
15121
15122 /* Handle the case where the address is too complex to be offset by 1. */
15123 if (GET_CODE (base) == MINUS
15124 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15125 {
15126 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15127
15128 emit_set_insn (base_plus, base);
15129 base = base_plus;
15130 }
15131 else if (GET_CODE (base) == PLUS)
15132 {
15133 /* The addend must be CONST_INT, or we would have dealt with it above. */
15134 HOST_WIDE_INT hi, lo;
15135
15136 offset += INTVAL (XEXP (base, 1));
15137 base = XEXP (base, 0);
15138
15139 /* Rework the address into a legal sequence of insns. */
15140 /* Valid range for lo is -4095 -> 4095 */
15141 lo = (offset >= 0
15142 ? (offset & 0xfff)
15143 : -((-offset) & 0xfff));
15144
15145 /* Corner case, if lo is the max offset then we would be out of range
15146 once we have added the additional 1 below, so bump the msb into the
15147 pre-loading insn(s). */
15148 if (lo == 4095)
15149 lo &= 0x7ff;
15150
15151 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15152 ^ (HOST_WIDE_INT) 0x80000000)
15153 - (HOST_WIDE_INT) 0x80000000);
15154
15155 gcc_assert (hi + lo == offset);
15156
15157 if (hi != 0)
15158 {
15159 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15160
15161 /* Get the base address; addsi3 knows how to handle constants
15162 that require more than one insn. */
15163 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15164 base = base_plus;
15165 offset = lo;
15166 }
15167 }
15168
15169 /* Operands[2] may overlap operands[0] (though it won't overlap
15170 operands[1]), that's why we asked for a DImode reg -- so we can
15171 use the bit that does not overlap. */
15172 if (REGNO (operands[2]) == REGNO (operands[0]))
15173 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15174 else
15175 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15176
15177 emit_insn (gen_zero_extendqisi2 (scratch,
15178 gen_rtx_MEM (QImode,
15179 plus_constant (Pmode, base,
15180 offset))));
15181 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15182 gen_rtx_MEM (QImode,
15183 plus_constant (Pmode, base,
15184 offset + 1))));
15185 if (!BYTES_BIG_ENDIAN)
15186 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15187 gen_rtx_IOR (SImode,
15188 gen_rtx_ASHIFT
15189 (SImode,
15190 gen_rtx_SUBREG (SImode, operands[0], 0),
15191 GEN_INT (8)),
15192 scratch));
15193 else
15194 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15195 gen_rtx_IOR (SImode,
15196 gen_rtx_ASHIFT (SImode, scratch,
15197 GEN_INT (8)),
15198 gen_rtx_SUBREG (SImode, operands[0], 0)));
15199 }
15200
15201 /* Handle storing a half-word to memory during reload by synthesizing as two
15202 byte stores. Take care not to clobber the input values until after we
15203 have moved them somewhere safe. This code assumes that if the DImode
15204 scratch in operands[2] overlaps either the input value or output address
15205 in some way, then that value must die in this insn (we absolutely need
15206 two scratch registers for some corner cases). */
15207 void
15208 arm_reload_out_hi (rtx *operands)
15209 {
15210 rtx ref = operands[0];
15211 rtx outval = operands[1];
15212 rtx base, scratch;
15213 HOST_WIDE_INT offset = 0;
15214
15215 if (GET_CODE (ref) == SUBREG)
15216 {
15217 offset = SUBREG_BYTE (ref);
15218 ref = SUBREG_REG (ref);
15219 }
15220
15221 if (REG_P (ref))
15222 {
15223 /* We have a pseudo which has been spilt onto the stack; there
15224 are two cases here: the first where there is a simple
15225 stack-slot replacement and a second where the stack-slot is
15226 out of range, or is used as a subreg. */
15227 if (reg_equiv_mem (REGNO (ref)))
15228 {
15229 ref = reg_equiv_mem (REGNO (ref));
15230 base = find_replacement (&XEXP (ref, 0));
15231 }
15232 else
15233 /* The slot is out of range, or was dressed up in a SUBREG. */
15234 base = reg_equiv_address (REGNO (ref));
15235
15236 /* PR 62254: If there is no equivalent memory location then just move
15237 the value as an SImode register move. This happens when the target
15238 architecture variant does not have an HImode register move. */
15239 if (base == NULL)
15240 {
15241 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15242
15243 if (REG_P (outval))
15244 {
15245 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15246 gen_rtx_SUBREG (SImode, outval, 0)));
15247 }
15248 else /* SUBREG_P (outval) */
15249 {
15250 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15251 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15252 SUBREG_REG (outval)));
15253 else
15254 /* FIXME: Handle other cases ? */
15255 gcc_unreachable ();
15256 }
15257 return;
15258 }
15259 }
15260 else
15261 base = find_replacement (&XEXP (ref, 0));
15262
15263 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15264
15265 /* Handle the case where the address is too complex to be offset by 1. */
15266 if (GET_CODE (base) == MINUS
15267 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15268 {
15269 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15270
15271 /* Be careful not to destroy OUTVAL. */
15272 if (reg_overlap_mentioned_p (base_plus, outval))
15273 {
15274 /* Updating base_plus might destroy outval, see if we can
15275 swap the scratch and base_plus. */
15276 if (!reg_overlap_mentioned_p (scratch, outval))
15277 std::swap (scratch, base_plus);
15278 else
15279 {
15280 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15281
15282 /* Be conservative and copy OUTVAL into the scratch now,
15283 this should only be necessary if outval is a subreg
15284 of something larger than a word. */
15285 /* XXX Might this clobber base? I can't see how it can,
15286 since scratch is known to overlap with OUTVAL, and
15287 must be wider than a word. */
15288 emit_insn (gen_movhi (scratch_hi, outval));
15289 outval = scratch_hi;
15290 }
15291 }
15292
15293 emit_set_insn (base_plus, base);
15294 base = base_plus;
15295 }
15296 else if (GET_CODE (base) == PLUS)
15297 {
15298 /* The addend must be CONST_INT, or we would have dealt with it above. */
15299 HOST_WIDE_INT hi, lo;
15300
15301 offset += INTVAL (XEXP (base, 1));
15302 base = XEXP (base, 0);
15303
15304 /* Rework the address into a legal sequence of insns. */
15305 /* Valid range for lo is -4095 -> 4095 */
15306 lo = (offset >= 0
15307 ? (offset & 0xfff)
15308 : -((-offset) & 0xfff));
15309
15310 /* Corner case, if lo is the max offset then we would be out of range
15311 once we have added the additional 1 below, so bump the msb into the
15312 pre-loading insn(s). */
15313 if (lo == 4095)
15314 lo &= 0x7ff;
15315
15316 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15317 ^ (HOST_WIDE_INT) 0x80000000)
15318 - (HOST_WIDE_INT) 0x80000000);
15319
15320 gcc_assert (hi + lo == offset);
15321
15322 if (hi != 0)
15323 {
15324 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15325
15326 /* Be careful not to destroy OUTVAL. */
15327 if (reg_overlap_mentioned_p (base_plus, outval))
15328 {
15329 /* Updating base_plus might destroy outval, see if we
15330 can swap the scratch and base_plus. */
15331 if (!reg_overlap_mentioned_p (scratch, outval))
15332 std::swap (scratch, base_plus);
15333 else
15334 {
15335 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15336
15337 /* Be conservative and copy outval into scratch now,
15338 this should only be necessary if outval is a
15339 subreg of something larger than a word. */
15340 /* XXX Might this clobber base? I can't see how it
15341 can, since scratch is known to overlap with
15342 outval. */
15343 emit_insn (gen_movhi (scratch_hi, outval));
15344 outval = scratch_hi;
15345 }
15346 }
15347
15348 /* Get the base address; addsi3 knows how to handle constants
15349 that require more than one insn. */
15350 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15351 base = base_plus;
15352 offset = lo;
15353 }
15354 }
15355
15356 if (BYTES_BIG_ENDIAN)
15357 {
15358 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15359 plus_constant (Pmode, base,
15360 offset + 1)),
15361 gen_lowpart (QImode, outval)));
15362 emit_insn (gen_lshrsi3 (scratch,
15363 gen_rtx_SUBREG (SImode, outval, 0),
15364 GEN_INT (8)));
15365 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15366 offset)),
15367 gen_lowpart (QImode, scratch)));
15368 }
15369 else
15370 {
15371 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15372 offset)),
15373 gen_lowpart (QImode, outval)));
15374 emit_insn (gen_lshrsi3 (scratch,
15375 gen_rtx_SUBREG (SImode, outval, 0),
15376 GEN_INT (8)));
15377 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15378 plus_constant (Pmode, base,
15379 offset + 1)),
15380 gen_lowpart (QImode, scratch)));
15381 }
15382 }
15383
15384 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15385 (padded to the size of a word) should be passed in a register. */
15386
15387 static bool
15388 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15389 {
15390 if (TARGET_AAPCS_BASED)
15391 return must_pass_in_stack_var_size (mode, type);
15392 else
15393 return must_pass_in_stack_var_size_or_pad (mode, type);
15394 }
15395
15396
15397 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15398 byte of a stack argument has useful data. For legacy APCS ABIs we use
15399 the default. For AAPCS based ABIs small aggregate types are placed
15400 in the lowest memory address. */
15401
15402 static pad_direction
15403 arm_function_arg_padding (machine_mode mode, const_tree type)
15404 {
15405 if (!TARGET_AAPCS_BASED)
15406 return default_function_arg_padding (mode, type);
15407
15408 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15409 return PAD_DOWNWARD;
15410
15411 return PAD_UPWARD;
15412 }
15413
15414
15415 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15416 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15417 register has useful data, and return the opposite if the most
15418 significant byte does. */
15419
15420 bool
15421 arm_pad_reg_upward (machine_mode mode,
15422 tree type, int first ATTRIBUTE_UNUSED)
15423 {
15424 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15425 {
15426 /* For AAPCS, small aggregates, small fixed-point types,
15427 and small complex types are always padded upwards. */
15428 if (type)
15429 {
15430 if ((AGGREGATE_TYPE_P (type)
15431 || TREE_CODE (type) == COMPLEX_TYPE
15432 || FIXED_POINT_TYPE_P (type))
15433 && int_size_in_bytes (type) <= 4)
15434 return true;
15435 }
15436 else
15437 {
15438 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15439 && GET_MODE_SIZE (mode) <= 4)
15440 return true;
15441 }
15442 }
15443
15444 /* Otherwise, use default padding. */
15445 return !BYTES_BIG_ENDIAN;
15446 }
15447
15448 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15449 assuming that the address in the base register is word aligned. */
15450 bool
15451 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15452 {
15453 HOST_WIDE_INT max_offset;
15454
15455 /* Offset must be a multiple of 4 in Thumb mode. */
15456 if (TARGET_THUMB2 && ((offset & 3) != 0))
15457 return false;
15458
15459 if (TARGET_THUMB2)
15460 max_offset = 1020;
15461 else if (TARGET_ARM)
15462 max_offset = 255;
15463 else
15464 return false;
15465
15466 return ((offset <= max_offset) && (offset >= -max_offset));
15467 }
15468
15469 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15470 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15471 Assumes that the address in the base register RN is word aligned. Pattern
15472 guarantees that both memory accesses use the same base register,
15473 the offsets are constants within the range, and the gap between the offsets is 4.
15474 If preload complete then check that registers are legal. WBACK indicates whether
15475 address is updated. LOAD indicates whether memory access is load or store. */
15476 bool
15477 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15478 bool wback, bool load)
15479 {
15480 unsigned int t, t2, n;
15481
15482 if (!reload_completed)
15483 return true;
15484
15485 if (!offset_ok_for_ldrd_strd (offset))
15486 return false;
15487
15488 t = REGNO (rt);
15489 t2 = REGNO (rt2);
15490 n = REGNO (rn);
15491
15492 if ((TARGET_THUMB2)
15493 && ((wback && (n == t || n == t2))
15494 || (t == SP_REGNUM)
15495 || (t == PC_REGNUM)
15496 || (t2 == SP_REGNUM)
15497 || (t2 == PC_REGNUM)
15498 || (!load && (n == PC_REGNUM))
15499 || (load && (t == t2))
15500 /* Triggers Cortex-M3 LDRD errata. */
15501 || (!wback && load && fix_cm3_ldrd && (n == t))))
15502 return false;
15503
15504 if ((TARGET_ARM)
15505 && ((wback && (n == t || n == t2))
15506 || (t2 == PC_REGNUM)
15507 || (t % 2 != 0) /* First destination register is not even. */
15508 || (t2 != t + 1)
15509 /* PC can be used as base register (for offset addressing only),
15510 but it is depricated. */
15511 || (n == PC_REGNUM)))
15512 return false;
15513
15514 return true;
15515 }
15516
15517 /* Return true if a 64-bit access with alignment ALIGN and with a
15518 constant offset OFFSET from the base pointer is permitted on this
15519 architecture. */
15520 static bool
15521 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15522 {
15523 return (unaligned_access
15524 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15525 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15526 }
15527
15528 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15529 operand MEM's address contains an immediate offset from the base
15530 register and has no side effects, in which case it sets BASE,
15531 OFFSET and ALIGN accordingly. */
15532 static bool
15533 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15534 {
15535 rtx addr;
15536
15537 gcc_assert (base != NULL && offset != NULL);
15538
15539 /* TODO: Handle more general memory operand patterns, such as
15540 PRE_DEC and PRE_INC. */
15541
15542 if (side_effects_p (mem))
15543 return false;
15544
15545 /* Can't deal with subregs. */
15546 if (GET_CODE (mem) == SUBREG)
15547 return false;
15548
15549 gcc_assert (MEM_P (mem));
15550
15551 *offset = const0_rtx;
15552 *align = MEM_ALIGN (mem);
15553
15554 addr = XEXP (mem, 0);
15555
15556 /* If addr isn't valid for DImode, then we can't handle it. */
15557 if (!arm_legitimate_address_p (DImode, addr,
15558 reload_in_progress || reload_completed))
15559 return false;
15560
15561 if (REG_P (addr))
15562 {
15563 *base = addr;
15564 return true;
15565 }
15566 else if (GET_CODE (addr) == PLUS)
15567 {
15568 *base = XEXP (addr, 0);
15569 *offset = XEXP (addr, 1);
15570 return (REG_P (*base) && CONST_INT_P (*offset));
15571 }
15572
15573 return false;
15574 }
15575
15576 /* Called from a peephole2 to replace two word-size accesses with a
15577 single LDRD/STRD instruction. Returns true iff we can generate a
15578 new instruction sequence. That is, both accesses use the same base
15579 register and the gap between constant offsets is 4. This function
15580 may reorder its operands to match ldrd/strd RTL templates.
15581 OPERANDS are the operands found by the peephole matcher;
15582 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15583 corresponding memory operands. LOAD indicaates whether the access
15584 is load or store. CONST_STORE indicates a store of constant
15585 integer values held in OPERANDS[4,5] and assumes that the pattern
15586 is of length 4 insn, for the purpose of checking dead registers.
15587 COMMUTE indicates that register operands may be reordered. */
15588 bool
15589 gen_operands_ldrd_strd (rtx *operands, bool load,
15590 bool const_store, bool commute)
15591 {
15592 int nops = 2;
15593 HOST_WIDE_INT offsets[2], offset, align[2];
15594 rtx base = NULL_RTX;
15595 rtx cur_base, cur_offset, tmp;
15596 int i, gap;
15597 HARD_REG_SET regset;
15598
15599 gcc_assert (!const_store || !load);
15600 /* Check that the memory references are immediate offsets from the
15601 same base register. Extract the base register, the destination
15602 registers, and the corresponding memory offsets. */
15603 for (i = 0; i < nops; i++)
15604 {
15605 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15606 &align[i]))
15607 return false;
15608
15609 if (i == 0)
15610 base = cur_base;
15611 else if (REGNO (base) != REGNO (cur_base))
15612 return false;
15613
15614 offsets[i] = INTVAL (cur_offset);
15615 if (GET_CODE (operands[i]) == SUBREG)
15616 {
15617 tmp = SUBREG_REG (operands[i]);
15618 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15619 operands[i] = tmp;
15620 }
15621 }
15622
15623 /* Make sure there is no dependency between the individual loads. */
15624 if (load && REGNO (operands[0]) == REGNO (base))
15625 return false; /* RAW */
15626
15627 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15628 return false; /* WAW */
15629
15630 /* If the same input register is used in both stores
15631 when storing different constants, try to find a free register.
15632 For example, the code
15633 mov r0, 0
15634 str r0, [r2]
15635 mov r0, 1
15636 str r0, [r2, #4]
15637 can be transformed into
15638 mov r1, 0
15639 mov r0, 1
15640 strd r1, r0, [r2]
15641 in Thumb mode assuming that r1 is free.
15642 For ARM mode do the same but only if the starting register
15643 can be made to be even. */
15644 if (const_store
15645 && REGNO (operands[0]) == REGNO (operands[1])
15646 && INTVAL (operands[4]) != INTVAL (operands[5]))
15647 {
15648 if (TARGET_THUMB2)
15649 {
15650 CLEAR_HARD_REG_SET (regset);
15651 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15652 if (tmp == NULL_RTX)
15653 return false;
15654
15655 /* Use the new register in the first load to ensure that
15656 if the original input register is not dead after peephole,
15657 then it will have the correct constant value. */
15658 operands[0] = tmp;
15659 }
15660 else if (TARGET_ARM)
15661 {
15662 int regno = REGNO (operands[0]);
15663 if (!peep2_reg_dead_p (4, operands[0]))
15664 {
15665 /* When the input register is even and is not dead after the
15666 pattern, it has to hold the second constant but we cannot
15667 form a legal STRD in ARM mode with this register as the second
15668 register. */
15669 if (regno % 2 == 0)
15670 return false;
15671
15672 /* Is regno-1 free? */
15673 SET_HARD_REG_SET (regset);
15674 CLEAR_HARD_REG_BIT(regset, regno - 1);
15675 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15676 if (tmp == NULL_RTX)
15677 return false;
15678
15679 operands[0] = tmp;
15680 }
15681 else
15682 {
15683 /* Find a DImode register. */
15684 CLEAR_HARD_REG_SET (regset);
15685 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15686 if (tmp != NULL_RTX)
15687 {
15688 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15689 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15690 }
15691 else
15692 {
15693 /* Can we use the input register to form a DI register? */
15694 SET_HARD_REG_SET (regset);
15695 CLEAR_HARD_REG_BIT(regset,
15696 regno % 2 == 0 ? regno + 1 : regno - 1);
15697 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15698 if (tmp == NULL_RTX)
15699 return false;
15700 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15701 }
15702 }
15703
15704 gcc_assert (operands[0] != NULL_RTX);
15705 gcc_assert (operands[1] != NULL_RTX);
15706 gcc_assert (REGNO (operands[0]) % 2 == 0);
15707 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15708 }
15709 }
15710
15711 /* Make sure the instructions are ordered with lower memory access first. */
15712 if (offsets[0] > offsets[1])
15713 {
15714 gap = offsets[0] - offsets[1];
15715 offset = offsets[1];
15716
15717 /* Swap the instructions such that lower memory is accessed first. */
15718 std::swap (operands[0], operands[1]);
15719 std::swap (operands[2], operands[3]);
15720 std::swap (align[0], align[1]);
15721 if (const_store)
15722 std::swap (operands[4], operands[5]);
15723 }
15724 else
15725 {
15726 gap = offsets[1] - offsets[0];
15727 offset = offsets[0];
15728 }
15729
15730 /* Make sure accesses are to consecutive memory locations. */
15731 if (gap != GET_MODE_SIZE (SImode))
15732 return false;
15733
15734 if (!align_ok_ldrd_strd (align[0], offset))
15735 return false;
15736
15737 /* Make sure we generate legal instructions. */
15738 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15739 false, load))
15740 return true;
15741
15742 /* In Thumb state, where registers are almost unconstrained, there
15743 is little hope to fix it. */
15744 if (TARGET_THUMB2)
15745 return false;
15746
15747 if (load && commute)
15748 {
15749 /* Try reordering registers. */
15750 std::swap (operands[0], operands[1]);
15751 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15752 false, load))
15753 return true;
15754 }
15755
15756 if (const_store)
15757 {
15758 /* If input registers are dead after this pattern, they can be
15759 reordered or replaced by other registers that are free in the
15760 current pattern. */
15761 if (!peep2_reg_dead_p (4, operands[0])
15762 || !peep2_reg_dead_p (4, operands[1]))
15763 return false;
15764
15765 /* Try to reorder the input registers. */
15766 /* For example, the code
15767 mov r0, 0
15768 mov r1, 1
15769 str r1, [r2]
15770 str r0, [r2, #4]
15771 can be transformed into
15772 mov r1, 0
15773 mov r0, 1
15774 strd r0, [r2]
15775 */
15776 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15777 false, false))
15778 {
15779 std::swap (operands[0], operands[1]);
15780 return true;
15781 }
15782
15783 /* Try to find a free DI register. */
15784 CLEAR_HARD_REG_SET (regset);
15785 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15786 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15787 while (true)
15788 {
15789 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15790 if (tmp == NULL_RTX)
15791 return false;
15792
15793 /* DREG must be an even-numbered register in DImode.
15794 Split it into SI registers. */
15795 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15796 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15797 gcc_assert (operands[0] != NULL_RTX);
15798 gcc_assert (operands[1] != NULL_RTX);
15799 gcc_assert (REGNO (operands[0]) % 2 == 0);
15800 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15801
15802 return (operands_ok_ldrd_strd (operands[0], operands[1],
15803 base, offset,
15804 false, load));
15805 }
15806 }
15807
15808 return false;
15809 }
15810
15811
15812 /* Return true if parallel execution of the two word-size accesses provided
15813 could be satisfied with a single LDRD/STRD instruction. Two word-size
15814 accesses are represented by the OPERANDS array, where OPERANDS[0,1] are
15815 register operands and OPERANDS[2,3] are the corresponding memory operands.
15816 */
15817 bool
15818 valid_operands_ldrd_strd (rtx *operands, bool load)
15819 {
15820 int nops = 2;
15821 HOST_WIDE_INT offsets[2], offset, align[2];
15822 rtx base = NULL_RTX;
15823 rtx cur_base, cur_offset;
15824 int i, gap;
15825
15826 /* Check that the memory references are immediate offsets from the
15827 same base register. Extract the base register, the destination
15828 registers, and the corresponding memory offsets. */
15829 for (i = 0; i < nops; i++)
15830 {
15831 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15832 &align[i]))
15833 return false;
15834
15835 if (i == 0)
15836 base = cur_base;
15837 else if (REGNO (base) != REGNO (cur_base))
15838 return false;
15839
15840 offsets[i] = INTVAL (cur_offset);
15841 if (GET_CODE (operands[i]) == SUBREG)
15842 return false;
15843 }
15844
15845 if (offsets[0] > offsets[1])
15846 return false;
15847
15848 gap = offsets[1] - offsets[0];
15849 offset = offsets[0];
15850
15851 /* Make sure accesses are to consecutive memory locations. */
15852 if (gap != GET_MODE_SIZE (SImode))
15853 return false;
15854
15855 if (!align_ok_ldrd_strd (align[0], offset))
15856 return false;
15857
15858 return operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15859 false, load);
15860 }
15861
15862 \f
15863 /* Print a symbolic form of X to the debug file, F. */
15864 static void
15865 arm_print_value (FILE *f, rtx x)
15866 {
15867 switch (GET_CODE (x))
15868 {
15869 case CONST_INT:
15870 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15871 return;
15872
15873 case CONST_DOUBLE:
15874 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15875 return;
15876
15877 case CONST_VECTOR:
15878 {
15879 int i;
15880
15881 fprintf (f, "<");
15882 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15883 {
15884 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15885 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15886 fputc (',', f);
15887 }
15888 fprintf (f, ">");
15889 }
15890 return;
15891
15892 case CONST_STRING:
15893 fprintf (f, "\"%s\"", XSTR (x, 0));
15894 return;
15895
15896 case SYMBOL_REF:
15897 fprintf (f, "`%s'", XSTR (x, 0));
15898 return;
15899
15900 case LABEL_REF:
15901 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15902 return;
15903
15904 case CONST:
15905 arm_print_value (f, XEXP (x, 0));
15906 return;
15907
15908 case PLUS:
15909 arm_print_value (f, XEXP (x, 0));
15910 fprintf (f, "+");
15911 arm_print_value (f, XEXP (x, 1));
15912 return;
15913
15914 case PC:
15915 fprintf (f, "pc");
15916 return;
15917
15918 default:
15919 fprintf (f, "????");
15920 return;
15921 }
15922 }
15923 \f
15924 /* Routines for manipulation of the constant pool. */
15925
15926 /* Arm instructions cannot load a large constant directly into a
15927 register; they have to come from a pc relative load. The constant
15928 must therefore be placed in the addressable range of the pc
15929 relative load. Depending on the precise pc relative load
15930 instruction the range is somewhere between 256 bytes and 4k. This
15931 means that we often have to dump a constant inside a function, and
15932 generate code to branch around it.
15933
15934 It is important to minimize this, since the branches will slow
15935 things down and make the code larger.
15936
15937 Normally we can hide the table after an existing unconditional
15938 branch so that there is no interruption of the flow, but in the
15939 worst case the code looks like this:
15940
15941 ldr rn, L1
15942 ...
15943 b L2
15944 align
15945 L1: .long value
15946 L2:
15947 ...
15948
15949 ldr rn, L3
15950 ...
15951 b L4
15952 align
15953 L3: .long value
15954 L4:
15955 ...
15956
15957 We fix this by performing a scan after scheduling, which notices
15958 which instructions need to have their operands fetched from the
15959 constant table and builds the table.
15960
15961 The algorithm starts by building a table of all the constants that
15962 need fixing up and all the natural barriers in the function (places
15963 where a constant table can be dropped without breaking the flow).
15964 For each fixup we note how far the pc-relative replacement will be
15965 able to reach and the offset of the instruction into the function.
15966
15967 Having built the table we then group the fixes together to form
15968 tables that are as large as possible (subject to addressing
15969 constraints) and emit each table of constants after the last
15970 barrier that is within range of all the instructions in the group.
15971 If a group does not contain a barrier, then we forcibly create one
15972 by inserting a jump instruction into the flow. Once the table has
15973 been inserted, the insns are then modified to reference the
15974 relevant entry in the pool.
15975
15976 Possible enhancements to the algorithm (not implemented) are:
15977
15978 1) For some processors and object formats, there may be benefit in
15979 aligning the pools to the start of cache lines; this alignment
15980 would need to be taken into account when calculating addressability
15981 of a pool. */
15982
15983 /* These typedefs are located at the start of this file, so that
15984 they can be used in the prototypes there. This comment is to
15985 remind readers of that fact so that the following structures
15986 can be understood more easily.
15987
15988 typedef struct minipool_node Mnode;
15989 typedef struct minipool_fixup Mfix; */
15990
15991 struct minipool_node
15992 {
15993 /* Doubly linked chain of entries. */
15994 Mnode * next;
15995 Mnode * prev;
15996 /* The maximum offset into the code that this entry can be placed. While
15997 pushing fixes for forward references, all entries are sorted in order
15998 of increasing max_address. */
15999 HOST_WIDE_INT max_address;
16000 /* Similarly for an entry inserted for a backwards ref. */
16001 HOST_WIDE_INT min_address;
16002 /* The number of fixes referencing this entry. This can become zero
16003 if we "unpush" an entry. In this case we ignore the entry when we
16004 come to emit the code. */
16005 int refcount;
16006 /* The offset from the start of the minipool. */
16007 HOST_WIDE_INT offset;
16008 /* The value in table. */
16009 rtx value;
16010 /* The mode of value. */
16011 machine_mode mode;
16012 /* The size of the value. With iWMMXt enabled
16013 sizes > 4 also imply an alignment of 8-bytes. */
16014 int fix_size;
16015 };
16016
16017 struct minipool_fixup
16018 {
16019 Mfix * next;
16020 rtx_insn * insn;
16021 HOST_WIDE_INT address;
16022 rtx * loc;
16023 machine_mode mode;
16024 int fix_size;
16025 rtx value;
16026 Mnode * minipool;
16027 HOST_WIDE_INT forwards;
16028 HOST_WIDE_INT backwards;
16029 };
16030
16031 /* Fixes less than a word need padding out to a word boundary. */
16032 #define MINIPOOL_FIX_SIZE(mode) \
16033 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
16034
16035 static Mnode * minipool_vector_head;
16036 static Mnode * minipool_vector_tail;
16037 static rtx_code_label *minipool_vector_label;
16038 static int minipool_pad;
16039
16040 /* The linked list of all minipool fixes required for this function. */
16041 Mfix * minipool_fix_head;
16042 Mfix * minipool_fix_tail;
16043 /* The fix entry for the current minipool, once it has been placed. */
16044 Mfix * minipool_barrier;
16045
16046 #ifndef JUMP_TABLES_IN_TEXT_SECTION
16047 #define JUMP_TABLES_IN_TEXT_SECTION 0
16048 #endif
16049
16050 static HOST_WIDE_INT
16051 get_jump_table_size (rtx_jump_table_data *insn)
16052 {
16053 /* ADDR_VECs only take room if read-only data does into the text
16054 section. */
16055 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
16056 {
16057 rtx body = PATTERN (insn);
16058 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
16059 HOST_WIDE_INT size;
16060 HOST_WIDE_INT modesize;
16061
16062 modesize = GET_MODE_SIZE (GET_MODE (body));
16063 size = modesize * XVECLEN (body, elt);
16064 switch (modesize)
16065 {
16066 case 1:
16067 /* Round up size of TBB table to a halfword boundary. */
16068 size = (size + 1) & ~HOST_WIDE_INT_1;
16069 break;
16070 case 2:
16071 /* No padding necessary for TBH. */
16072 break;
16073 case 4:
16074 /* Add two bytes for alignment on Thumb. */
16075 if (TARGET_THUMB)
16076 size += 2;
16077 break;
16078 default:
16079 gcc_unreachable ();
16080 }
16081 return size;
16082 }
16083
16084 return 0;
16085 }
16086
16087 /* Return the maximum amount of padding that will be inserted before
16088 label LABEL. */
16089
16090 static HOST_WIDE_INT
16091 get_label_padding (rtx label)
16092 {
16093 HOST_WIDE_INT align, min_insn_size;
16094
16095 align = 1 << label_to_alignment (label).levels[0].log;
16096 min_insn_size = TARGET_THUMB ? 2 : 4;
16097 return align > min_insn_size ? align - min_insn_size : 0;
16098 }
16099
16100 /* Move a minipool fix MP from its current location to before MAX_MP.
16101 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16102 constraints may need updating. */
16103 static Mnode *
16104 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16105 HOST_WIDE_INT max_address)
16106 {
16107 /* The code below assumes these are different. */
16108 gcc_assert (mp != max_mp);
16109
16110 if (max_mp == NULL)
16111 {
16112 if (max_address < mp->max_address)
16113 mp->max_address = max_address;
16114 }
16115 else
16116 {
16117 if (max_address > max_mp->max_address - mp->fix_size)
16118 mp->max_address = max_mp->max_address - mp->fix_size;
16119 else
16120 mp->max_address = max_address;
16121
16122 /* Unlink MP from its current position. Since max_mp is non-null,
16123 mp->prev must be non-null. */
16124 mp->prev->next = mp->next;
16125 if (mp->next != NULL)
16126 mp->next->prev = mp->prev;
16127 else
16128 minipool_vector_tail = mp->prev;
16129
16130 /* Re-insert it before MAX_MP. */
16131 mp->next = max_mp;
16132 mp->prev = max_mp->prev;
16133 max_mp->prev = mp;
16134
16135 if (mp->prev != NULL)
16136 mp->prev->next = mp;
16137 else
16138 minipool_vector_head = mp;
16139 }
16140
16141 /* Save the new entry. */
16142 max_mp = mp;
16143
16144 /* Scan over the preceding entries and adjust their addresses as
16145 required. */
16146 while (mp->prev != NULL
16147 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16148 {
16149 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16150 mp = mp->prev;
16151 }
16152
16153 return max_mp;
16154 }
16155
16156 /* Add a constant to the minipool for a forward reference. Returns the
16157 node added or NULL if the constant will not fit in this pool. */
16158 static Mnode *
16159 add_minipool_forward_ref (Mfix *fix)
16160 {
16161 /* If set, max_mp is the first pool_entry that has a lower
16162 constraint than the one we are trying to add. */
16163 Mnode * max_mp = NULL;
16164 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16165 Mnode * mp;
16166
16167 /* If the minipool starts before the end of FIX->INSN then this FIX
16168 cannot be placed into the current pool. Furthermore, adding the
16169 new constant pool entry may cause the pool to start FIX_SIZE bytes
16170 earlier. */
16171 if (minipool_vector_head &&
16172 (fix->address + get_attr_length (fix->insn)
16173 >= minipool_vector_head->max_address - fix->fix_size))
16174 return NULL;
16175
16176 /* Scan the pool to see if a constant with the same value has
16177 already been added. While we are doing this, also note the
16178 location where we must insert the constant if it doesn't already
16179 exist. */
16180 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16181 {
16182 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16183 && fix->mode == mp->mode
16184 && (!LABEL_P (fix->value)
16185 || (CODE_LABEL_NUMBER (fix->value)
16186 == CODE_LABEL_NUMBER (mp->value)))
16187 && rtx_equal_p (fix->value, mp->value))
16188 {
16189 /* More than one fix references this entry. */
16190 mp->refcount++;
16191 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16192 }
16193
16194 /* Note the insertion point if necessary. */
16195 if (max_mp == NULL
16196 && mp->max_address > max_address)
16197 max_mp = mp;
16198
16199 /* If we are inserting an 8-bytes aligned quantity and
16200 we have not already found an insertion point, then
16201 make sure that all such 8-byte aligned quantities are
16202 placed at the start of the pool. */
16203 if (ARM_DOUBLEWORD_ALIGN
16204 && max_mp == NULL
16205 && fix->fix_size >= 8
16206 && mp->fix_size < 8)
16207 {
16208 max_mp = mp;
16209 max_address = mp->max_address;
16210 }
16211 }
16212
16213 /* The value is not currently in the minipool, so we need to create
16214 a new entry for it. If MAX_MP is NULL, the entry will be put on
16215 the end of the list since the placement is less constrained than
16216 any existing entry. Otherwise, we insert the new fix before
16217 MAX_MP and, if necessary, adjust the constraints on the other
16218 entries. */
16219 mp = XNEW (Mnode);
16220 mp->fix_size = fix->fix_size;
16221 mp->mode = fix->mode;
16222 mp->value = fix->value;
16223 mp->refcount = 1;
16224 /* Not yet required for a backwards ref. */
16225 mp->min_address = -65536;
16226
16227 if (max_mp == NULL)
16228 {
16229 mp->max_address = max_address;
16230 mp->next = NULL;
16231 mp->prev = minipool_vector_tail;
16232
16233 if (mp->prev == NULL)
16234 {
16235 minipool_vector_head = mp;
16236 minipool_vector_label = gen_label_rtx ();
16237 }
16238 else
16239 mp->prev->next = mp;
16240
16241 minipool_vector_tail = mp;
16242 }
16243 else
16244 {
16245 if (max_address > max_mp->max_address - mp->fix_size)
16246 mp->max_address = max_mp->max_address - mp->fix_size;
16247 else
16248 mp->max_address = max_address;
16249
16250 mp->next = max_mp;
16251 mp->prev = max_mp->prev;
16252 max_mp->prev = mp;
16253 if (mp->prev != NULL)
16254 mp->prev->next = mp;
16255 else
16256 minipool_vector_head = mp;
16257 }
16258
16259 /* Save the new entry. */
16260 max_mp = mp;
16261
16262 /* Scan over the preceding entries and adjust their addresses as
16263 required. */
16264 while (mp->prev != NULL
16265 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16266 {
16267 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16268 mp = mp->prev;
16269 }
16270
16271 return max_mp;
16272 }
16273
16274 static Mnode *
16275 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16276 HOST_WIDE_INT min_address)
16277 {
16278 HOST_WIDE_INT offset;
16279
16280 /* The code below assumes these are different. */
16281 gcc_assert (mp != min_mp);
16282
16283 if (min_mp == NULL)
16284 {
16285 if (min_address > mp->min_address)
16286 mp->min_address = min_address;
16287 }
16288 else
16289 {
16290 /* We will adjust this below if it is too loose. */
16291 mp->min_address = min_address;
16292
16293 /* Unlink MP from its current position. Since min_mp is non-null,
16294 mp->next must be non-null. */
16295 mp->next->prev = mp->prev;
16296 if (mp->prev != NULL)
16297 mp->prev->next = mp->next;
16298 else
16299 minipool_vector_head = mp->next;
16300
16301 /* Reinsert it after MIN_MP. */
16302 mp->prev = min_mp;
16303 mp->next = min_mp->next;
16304 min_mp->next = mp;
16305 if (mp->next != NULL)
16306 mp->next->prev = mp;
16307 else
16308 minipool_vector_tail = mp;
16309 }
16310
16311 min_mp = mp;
16312
16313 offset = 0;
16314 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16315 {
16316 mp->offset = offset;
16317 if (mp->refcount > 0)
16318 offset += mp->fix_size;
16319
16320 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16321 mp->next->min_address = mp->min_address + mp->fix_size;
16322 }
16323
16324 return min_mp;
16325 }
16326
16327 /* Add a constant to the minipool for a backward reference. Returns the
16328 node added or NULL if the constant will not fit in this pool.
16329
16330 Note that the code for insertion for a backwards reference can be
16331 somewhat confusing because the calculated offsets for each fix do
16332 not take into account the size of the pool (which is still under
16333 construction. */
16334 static Mnode *
16335 add_minipool_backward_ref (Mfix *fix)
16336 {
16337 /* If set, min_mp is the last pool_entry that has a lower constraint
16338 than the one we are trying to add. */
16339 Mnode *min_mp = NULL;
16340 /* This can be negative, since it is only a constraint. */
16341 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16342 Mnode *mp;
16343
16344 /* If we can't reach the current pool from this insn, or if we can't
16345 insert this entry at the end of the pool without pushing other
16346 fixes out of range, then we don't try. This ensures that we
16347 can't fail later on. */
16348 if (min_address >= minipool_barrier->address
16349 || (minipool_vector_tail->min_address + fix->fix_size
16350 >= minipool_barrier->address))
16351 return NULL;
16352
16353 /* Scan the pool to see if a constant with the same value has
16354 already been added. While we are doing this, also note the
16355 location where we must insert the constant if it doesn't already
16356 exist. */
16357 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16358 {
16359 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16360 && fix->mode == mp->mode
16361 && (!LABEL_P (fix->value)
16362 || (CODE_LABEL_NUMBER (fix->value)
16363 == CODE_LABEL_NUMBER (mp->value)))
16364 && rtx_equal_p (fix->value, mp->value)
16365 /* Check that there is enough slack to move this entry to the
16366 end of the table (this is conservative). */
16367 && (mp->max_address
16368 > (minipool_barrier->address
16369 + minipool_vector_tail->offset
16370 + minipool_vector_tail->fix_size)))
16371 {
16372 mp->refcount++;
16373 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16374 }
16375
16376 if (min_mp != NULL)
16377 mp->min_address += fix->fix_size;
16378 else
16379 {
16380 /* Note the insertion point if necessary. */
16381 if (mp->min_address < min_address)
16382 {
16383 /* For now, we do not allow the insertion of 8-byte alignment
16384 requiring nodes anywhere but at the start of the pool. */
16385 if (ARM_DOUBLEWORD_ALIGN
16386 && fix->fix_size >= 8 && mp->fix_size < 8)
16387 return NULL;
16388 else
16389 min_mp = mp;
16390 }
16391 else if (mp->max_address
16392 < minipool_barrier->address + mp->offset + fix->fix_size)
16393 {
16394 /* Inserting before this entry would push the fix beyond
16395 its maximum address (which can happen if we have
16396 re-located a forwards fix); force the new fix to come
16397 after it. */
16398 if (ARM_DOUBLEWORD_ALIGN
16399 && fix->fix_size >= 8 && mp->fix_size < 8)
16400 return NULL;
16401 else
16402 {
16403 min_mp = mp;
16404 min_address = mp->min_address + fix->fix_size;
16405 }
16406 }
16407 /* Do not insert a non-8-byte aligned quantity before 8-byte
16408 aligned quantities. */
16409 else if (ARM_DOUBLEWORD_ALIGN
16410 && fix->fix_size < 8
16411 && mp->fix_size >= 8)
16412 {
16413 min_mp = mp;
16414 min_address = mp->min_address + fix->fix_size;
16415 }
16416 }
16417 }
16418
16419 /* We need to create a new entry. */
16420 mp = XNEW (Mnode);
16421 mp->fix_size = fix->fix_size;
16422 mp->mode = fix->mode;
16423 mp->value = fix->value;
16424 mp->refcount = 1;
16425 mp->max_address = minipool_barrier->address + 65536;
16426
16427 mp->min_address = min_address;
16428
16429 if (min_mp == NULL)
16430 {
16431 mp->prev = NULL;
16432 mp->next = minipool_vector_head;
16433
16434 if (mp->next == NULL)
16435 {
16436 minipool_vector_tail = mp;
16437 minipool_vector_label = gen_label_rtx ();
16438 }
16439 else
16440 mp->next->prev = mp;
16441
16442 minipool_vector_head = mp;
16443 }
16444 else
16445 {
16446 mp->next = min_mp->next;
16447 mp->prev = min_mp;
16448 min_mp->next = mp;
16449
16450 if (mp->next != NULL)
16451 mp->next->prev = mp;
16452 else
16453 minipool_vector_tail = mp;
16454 }
16455
16456 /* Save the new entry. */
16457 min_mp = mp;
16458
16459 if (mp->prev)
16460 mp = mp->prev;
16461 else
16462 mp->offset = 0;
16463
16464 /* Scan over the following entries and adjust their offsets. */
16465 while (mp->next != NULL)
16466 {
16467 if (mp->next->min_address < mp->min_address + mp->fix_size)
16468 mp->next->min_address = mp->min_address + mp->fix_size;
16469
16470 if (mp->refcount)
16471 mp->next->offset = mp->offset + mp->fix_size;
16472 else
16473 mp->next->offset = mp->offset;
16474
16475 mp = mp->next;
16476 }
16477
16478 return min_mp;
16479 }
16480
16481 static void
16482 assign_minipool_offsets (Mfix *barrier)
16483 {
16484 HOST_WIDE_INT offset = 0;
16485 Mnode *mp;
16486
16487 minipool_barrier = barrier;
16488
16489 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16490 {
16491 mp->offset = offset;
16492
16493 if (mp->refcount > 0)
16494 offset += mp->fix_size;
16495 }
16496 }
16497
16498 /* Output the literal table */
16499 static void
16500 dump_minipool (rtx_insn *scan)
16501 {
16502 Mnode * mp;
16503 Mnode * nmp;
16504 int align64 = 0;
16505
16506 if (ARM_DOUBLEWORD_ALIGN)
16507 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16508 if (mp->refcount > 0 && mp->fix_size >= 8)
16509 {
16510 align64 = 1;
16511 break;
16512 }
16513
16514 if (dump_file)
16515 fprintf (dump_file,
16516 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16517 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16518
16519 scan = emit_label_after (gen_label_rtx (), scan);
16520 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16521 scan = emit_label_after (minipool_vector_label, scan);
16522
16523 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16524 {
16525 if (mp->refcount > 0)
16526 {
16527 if (dump_file)
16528 {
16529 fprintf (dump_file,
16530 ";; Offset %u, min %ld, max %ld ",
16531 (unsigned) mp->offset, (unsigned long) mp->min_address,
16532 (unsigned long) mp->max_address);
16533 arm_print_value (dump_file, mp->value);
16534 fputc ('\n', dump_file);
16535 }
16536
16537 rtx val = copy_rtx (mp->value);
16538
16539 switch (GET_MODE_SIZE (mp->mode))
16540 {
16541 #ifdef HAVE_consttable_1
16542 case 1:
16543 scan = emit_insn_after (gen_consttable_1 (val), scan);
16544 break;
16545
16546 #endif
16547 #ifdef HAVE_consttable_2
16548 case 2:
16549 scan = emit_insn_after (gen_consttable_2 (val), scan);
16550 break;
16551
16552 #endif
16553 #ifdef HAVE_consttable_4
16554 case 4:
16555 scan = emit_insn_after (gen_consttable_4 (val), scan);
16556 break;
16557
16558 #endif
16559 #ifdef HAVE_consttable_8
16560 case 8:
16561 scan = emit_insn_after (gen_consttable_8 (val), scan);
16562 break;
16563
16564 #endif
16565 #ifdef HAVE_consttable_16
16566 case 16:
16567 scan = emit_insn_after (gen_consttable_16 (val), scan);
16568 break;
16569
16570 #endif
16571 default:
16572 gcc_unreachable ();
16573 }
16574 }
16575
16576 nmp = mp->next;
16577 free (mp);
16578 }
16579
16580 minipool_vector_head = minipool_vector_tail = NULL;
16581 scan = emit_insn_after (gen_consttable_end (), scan);
16582 scan = emit_barrier_after (scan);
16583 }
16584
16585 /* Return the cost of forcibly inserting a barrier after INSN. */
16586 static int
16587 arm_barrier_cost (rtx_insn *insn)
16588 {
16589 /* Basing the location of the pool on the loop depth is preferable,
16590 but at the moment, the basic block information seems to be
16591 corrupt by this stage of the compilation. */
16592 int base_cost = 50;
16593 rtx_insn *next = next_nonnote_insn (insn);
16594
16595 if (next != NULL && LABEL_P (next))
16596 base_cost -= 20;
16597
16598 switch (GET_CODE (insn))
16599 {
16600 case CODE_LABEL:
16601 /* It will always be better to place the table before the label, rather
16602 than after it. */
16603 return 50;
16604
16605 case INSN:
16606 case CALL_INSN:
16607 return base_cost;
16608
16609 case JUMP_INSN:
16610 return base_cost - 10;
16611
16612 default:
16613 return base_cost + 10;
16614 }
16615 }
16616
16617 /* Find the best place in the insn stream in the range
16618 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16619 Create the barrier by inserting a jump and add a new fix entry for
16620 it. */
16621 static Mfix *
16622 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16623 {
16624 HOST_WIDE_INT count = 0;
16625 rtx_barrier *barrier;
16626 rtx_insn *from = fix->insn;
16627 /* The instruction after which we will insert the jump. */
16628 rtx_insn *selected = NULL;
16629 int selected_cost;
16630 /* The address at which the jump instruction will be placed. */
16631 HOST_WIDE_INT selected_address;
16632 Mfix * new_fix;
16633 HOST_WIDE_INT max_count = max_address - fix->address;
16634 rtx_code_label *label = gen_label_rtx ();
16635
16636 selected_cost = arm_barrier_cost (from);
16637 selected_address = fix->address;
16638
16639 while (from && count < max_count)
16640 {
16641 rtx_jump_table_data *tmp;
16642 int new_cost;
16643
16644 /* This code shouldn't have been called if there was a natural barrier
16645 within range. */
16646 gcc_assert (!BARRIER_P (from));
16647
16648 /* Count the length of this insn. This must stay in sync with the
16649 code that pushes minipool fixes. */
16650 if (LABEL_P (from))
16651 count += get_label_padding (from);
16652 else
16653 count += get_attr_length (from);
16654
16655 /* If there is a jump table, add its length. */
16656 if (tablejump_p (from, NULL, &tmp))
16657 {
16658 count += get_jump_table_size (tmp);
16659
16660 /* Jump tables aren't in a basic block, so base the cost on
16661 the dispatch insn. If we select this location, we will
16662 still put the pool after the table. */
16663 new_cost = arm_barrier_cost (from);
16664
16665 if (count < max_count
16666 && (!selected || new_cost <= selected_cost))
16667 {
16668 selected = tmp;
16669 selected_cost = new_cost;
16670 selected_address = fix->address + count;
16671 }
16672
16673 /* Continue after the dispatch table. */
16674 from = NEXT_INSN (tmp);
16675 continue;
16676 }
16677
16678 new_cost = arm_barrier_cost (from);
16679
16680 if (count < max_count
16681 && (!selected || new_cost <= selected_cost))
16682 {
16683 selected = from;
16684 selected_cost = new_cost;
16685 selected_address = fix->address + count;
16686 }
16687
16688 from = NEXT_INSN (from);
16689 }
16690
16691 /* Make sure that we found a place to insert the jump. */
16692 gcc_assert (selected);
16693
16694 /* Create a new JUMP_INSN that branches around a barrier. */
16695 from = emit_jump_insn_after (gen_jump (label), selected);
16696 JUMP_LABEL (from) = label;
16697 barrier = emit_barrier_after (from);
16698 emit_label_after (label, barrier);
16699
16700 /* Create a minipool barrier entry for the new barrier. */
16701 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16702 new_fix->insn = barrier;
16703 new_fix->address = selected_address;
16704 new_fix->next = fix->next;
16705 fix->next = new_fix;
16706
16707 return new_fix;
16708 }
16709
16710 /* Record that there is a natural barrier in the insn stream at
16711 ADDRESS. */
16712 static void
16713 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16714 {
16715 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16716
16717 fix->insn = insn;
16718 fix->address = address;
16719
16720 fix->next = NULL;
16721 if (minipool_fix_head != NULL)
16722 minipool_fix_tail->next = fix;
16723 else
16724 minipool_fix_head = fix;
16725
16726 minipool_fix_tail = fix;
16727 }
16728
16729 /* Record INSN, which will need fixing up to load a value from the
16730 minipool. ADDRESS is the offset of the insn since the start of the
16731 function; LOC is a pointer to the part of the insn which requires
16732 fixing; VALUE is the constant that must be loaded, which is of type
16733 MODE. */
16734 static void
16735 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16736 machine_mode mode, rtx value)
16737 {
16738 gcc_assert (!arm_disable_literal_pool);
16739 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16740
16741 fix->insn = insn;
16742 fix->address = address;
16743 fix->loc = loc;
16744 fix->mode = mode;
16745 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16746 fix->value = value;
16747 fix->forwards = get_attr_pool_range (insn);
16748 fix->backwards = get_attr_neg_pool_range (insn);
16749 fix->minipool = NULL;
16750
16751 /* If an insn doesn't have a range defined for it, then it isn't
16752 expecting to be reworked by this code. Better to stop now than
16753 to generate duff assembly code. */
16754 gcc_assert (fix->forwards || fix->backwards);
16755
16756 /* If an entry requires 8-byte alignment then assume all constant pools
16757 require 4 bytes of padding. Trying to do this later on a per-pool
16758 basis is awkward because existing pool entries have to be modified. */
16759 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16760 minipool_pad = 4;
16761
16762 if (dump_file)
16763 {
16764 fprintf (dump_file,
16765 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16766 GET_MODE_NAME (mode),
16767 INSN_UID (insn), (unsigned long) address,
16768 -1 * (long)fix->backwards, (long)fix->forwards);
16769 arm_print_value (dump_file, fix->value);
16770 fprintf (dump_file, "\n");
16771 }
16772
16773 /* Add it to the chain of fixes. */
16774 fix->next = NULL;
16775
16776 if (minipool_fix_head != NULL)
16777 minipool_fix_tail->next = fix;
16778 else
16779 minipool_fix_head = fix;
16780
16781 minipool_fix_tail = fix;
16782 }
16783
16784 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16785 Returns the number of insns needed, or 99 if we always want to synthesize
16786 the value. */
16787 int
16788 arm_max_const_double_inline_cost ()
16789 {
16790 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16791 }
16792
16793 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16794 Returns the number of insns needed, or 99 if we don't know how to
16795 do it. */
16796 int
16797 arm_const_double_inline_cost (rtx val)
16798 {
16799 rtx lowpart, highpart;
16800 machine_mode mode;
16801
16802 mode = GET_MODE (val);
16803
16804 if (mode == VOIDmode)
16805 mode = DImode;
16806
16807 gcc_assert (GET_MODE_SIZE (mode) == 8);
16808
16809 lowpart = gen_lowpart (SImode, val);
16810 highpart = gen_highpart_mode (SImode, mode, val);
16811
16812 gcc_assert (CONST_INT_P (lowpart));
16813 gcc_assert (CONST_INT_P (highpart));
16814
16815 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16816 NULL_RTX, NULL_RTX, 0, 0)
16817 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16818 NULL_RTX, NULL_RTX, 0, 0));
16819 }
16820
16821 /* Cost of loading a SImode constant. */
16822 static inline int
16823 arm_const_inline_cost (enum rtx_code code, rtx val)
16824 {
16825 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16826 NULL_RTX, NULL_RTX, 1, 0);
16827 }
16828
16829 /* Return true if it is worthwhile to split a 64-bit constant into two
16830 32-bit operations. This is the case if optimizing for size, or
16831 if we have load delay slots, or if one 32-bit part can be done with
16832 a single data operation. */
16833 bool
16834 arm_const_double_by_parts (rtx val)
16835 {
16836 machine_mode mode = GET_MODE (val);
16837 rtx part;
16838
16839 if (optimize_size || arm_ld_sched)
16840 return true;
16841
16842 if (mode == VOIDmode)
16843 mode = DImode;
16844
16845 part = gen_highpart_mode (SImode, mode, val);
16846
16847 gcc_assert (CONST_INT_P (part));
16848
16849 if (const_ok_for_arm (INTVAL (part))
16850 || const_ok_for_arm (~INTVAL (part)))
16851 return true;
16852
16853 part = gen_lowpart (SImode, val);
16854
16855 gcc_assert (CONST_INT_P (part));
16856
16857 if (const_ok_for_arm (INTVAL (part))
16858 || const_ok_for_arm (~INTVAL (part)))
16859 return true;
16860
16861 return false;
16862 }
16863
16864 /* Return true if it is possible to inline both the high and low parts
16865 of a 64-bit constant into 32-bit data processing instructions. */
16866 bool
16867 arm_const_double_by_immediates (rtx val)
16868 {
16869 machine_mode mode = GET_MODE (val);
16870 rtx part;
16871
16872 if (mode == VOIDmode)
16873 mode = DImode;
16874
16875 part = gen_highpart_mode (SImode, mode, val);
16876
16877 gcc_assert (CONST_INT_P (part));
16878
16879 if (!const_ok_for_arm (INTVAL (part)))
16880 return false;
16881
16882 part = gen_lowpart (SImode, val);
16883
16884 gcc_assert (CONST_INT_P (part));
16885
16886 if (!const_ok_for_arm (INTVAL (part)))
16887 return false;
16888
16889 return true;
16890 }
16891
16892 /* Scan INSN and note any of its operands that need fixing.
16893 If DO_PUSHES is false we do not actually push any of the fixups
16894 needed. */
16895 static void
16896 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16897 {
16898 int opno;
16899
16900 extract_constrain_insn (insn);
16901
16902 if (recog_data.n_alternatives == 0)
16903 return;
16904
16905 /* Fill in recog_op_alt with information about the constraints of
16906 this insn. */
16907 preprocess_constraints (insn);
16908
16909 const operand_alternative *op_alt = which_op_alt ();
16910 for (opno = 0; opno < recog_data.n_operands; opno++)
16911 {
16912 /* Things we need to fix can only occur in inputs. */
16913 if (recog_data.operand_type[opno] != OP_IN)
16914 continue;
16915
16916 /* If this alternative is a memory reference, then any mention
16917 of constants in this alternative is really to fool reload
16918 into allowing us to accept one there. We need to fix them up
16919 now so that we output the right code. */
16920 if (op_alt[opno].memory_ok)
16921 {
16922 rtx op = recog_data.operand[opno];
16923
16924 if (CONSTANT_P (op))
16925 {
16926 if (do_pushes)
16927 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16928 recog_data.operand_mode[opno], op);
16929 }
16930 else if (MEM_P (op)
16931 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16932 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16933 {
16934 if (do_pushes)
16935 {
16936 rtx cop = avoid_constant_pool_reference (op);
16937
16938 /* Casting the address of something to a mode narrower
16939 than a word can cause avoid_constant_pool_reference()
16940 to return the pool reference itself. That's no good to
16941 us here. Lets just hope that we can use the
16942 constant pool value directly. */
16943 if (op == cop)
16944 cop = get_pool_constant (XEXP (op, 0));
16945
16946 push_minipool_fix (insn, address,
16947 recog_data.operand_loc[opno],
16948 recog_data.operand_mode[opno], cop);
16949 }
16950
16951 }
16952 }
16953 }
16954
16955 return;
16956 }
16957
16958 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16959 and unions in the context of ARMv8-M Security Extensions. It is used as a
16960 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16961 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16962 or four masks, depending on whether it is being computed for a
16963 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16964 respectively. The tree for the type of the argument or a field within an
16965 argument is passed in ARG_TYPE, the current register this argument or field
16966 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16967 argument or field starts at is passed in STARTING_BIT and the last used bit
16968 is kept in LAST_USED_BIT which is also updated accordingly. */
16969
16970 static unsigned HOST_WIDE_INT
16971 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16972 uint32_t * padding_bits_to_clear,
16973 unsigned starting_bit, int * last_used_bit)
16974
16975 {
16976 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16977
16978 if (TREE_CODE (arg_type) == RECORD_TYPE)
16979 {
16980 unsigned current_bit = starting_bit;
16981 tree field;
16982 long int offset, size;
16983
16984
16985 field = TYPE_FIELDS (arg_type);
16986 while (field)
16987 {
16988 /* The offset within a structure is always an offset from
16989 the start of that structure. Make sure we take that into the
16990 calculation of the register based offset that we use here. */
16991 offset = starting_bit;
16992 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16993 offset %= 32;
16994
16995 /* This is the actual size of the field, for bitfields this is the
16996 bitfield width and not the container size. */
16997 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16998
16999 if (*last_used_bit != offset)
17000 {
17001 if (offset < *last_used_bit)
17002 {
17003 /* This field's offset is before the 'last_used_bit', that
17004 means this field goes on the next register. So we need to
17005 pad the rest of the current register and increase the
17006 register number. */
17007 uint32_t mask;
17008 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
17009 mask++;
17010
17011 padding_bits_to_clear[*regno] |= mask;
17012 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17013 (*regno)++;
17014 }
17015 else
17016 {
17017 /* Otherwise we pad the bits between the last field's end and
17018 the start of the new field. */
17019 uint32_t mask;
17020
17021 mask = ((uint32_t)-1) >> (32 - offset);
17022 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
17023 padding_bits_to_clear[*regno] |= mask;
17024 }
17025 current_bit = offset;
17026 }
17027
17028 /* Calculate further padding bits for inner structs/unions too. */
17029 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
17030 {
17031 *last_used_bit = current_bit;
17032 not_to_clear_reg_mask
17033 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
17034 padding_bits_to_clear, offset,
17035 last_used_bit);
17036 }
17037 else
17038 {
17039 /* Update 'current_bit' with this field's size. If the
17040 'current_bit' lies in a subsequent register, update 'regno' and
17041 reset 'current_bit' to point to the current bit in that new
17042 register. */
17043 current_bit += size;
17044 while (current_bit >= 32)
17045 {
17046 current_bit-=32;
17047 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17048 (*regno)++;
17049 }
17050 *last_used_bit = current_bit;
17051 }
17052
17053 field = TREE_CHAIN (field);
17054 }
17055 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
17056 }
17057 else if (TREE_CODE (arg_type) == UNION_TYPE)
17058 {
17059 tree field, field_t;
17060 int i, regno_t, field_size;
17061 int max_reg = -1;
17062 int max_bit = -1;
17063 uint32_t mask;
17064 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
17065 = {-1, -1, -1, -1};
17066
17067 /* To compute the padding bits in a union we only consider bits as
17068 padding bits if they are always either a padding bit or fall outside a
17069 fields size for all fields in the union. */
17070 field = TYPE_FIELDS (arg_type);
17071 while (field)
17072 {
17073 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
17074 = {0U, 0U, 0U, 0U};
17075 int last_used_bit_t = *last_used_bit;
17076 regno_t = *regno;
17077 field_t = TREE_TYPE (field);
17078
17079 /* If the field's type is either a record or a union make sure to
17080 compute their padding bits too. */
17081 if (RECORD_OR_UNION_TYPE_P (field_t))
17082 not_to_clear_reg_mask
17083 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
17084 &padding_bits_to_clear_t[0],
17085 starting_bit, &last_used_bit_t);
17086 else
17087 {
17088 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
17089 regno_t = (field_size / 32) + *regno;
17090 last_used_bit_t = (starting_bit + field_size) % 32;
17091 }
17092
17093 for (i = *regno; i < regno_t; i++)
17094 {
17095 /* For all but the last register used by this field only keep the
17096 padding bits that were padding bits in this field. */
17097 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
17098 }
17099
17100 /* For the last register, keep all padding bits that were padding
17101 bits in this field and any padding bits that are still valid
17102 as padding bits but fall outside of this field's size. */
17103 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
17104 padding_bits_to_clear_res[regno_t]
17105 &= padding_bits_to_clear_t[regno_t] | mask;
17106
17107 /* Update the maximum size of the fields in terms of registers used
17108 ('max_reg') and the 'last_used_bit' in said register. */
17109 if (max_reg < regno_t)
17110 {
17111 max_reg = regno_t;
17112 max_bit = last_used_bit_t;
17113 }
17114 else if (max_reg == regno_t && max_bit < last_used_bit_t)
17115 max_bit = last_used_bit_t;
17116
17117 field = TREE_CHAIN (field);
17118 }
17119
17120 /* Update the current padding_bits_to_clear using the intersection of the
17121 padding bits of all the fields. */
17122 for (i=*regno; i < max_reg; i++)
17123 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
17124
17125 /* Do not keep trailing padding bits, we do not know yet whether this
17126 is the end of the argument. */
17127 mask = ((uint32_t) 1 << max_bit) - 1;
17128 padding_bits_to_clear[max_reg]
17129 |= padding_bits_to_clear_res[max_reg] & mask;
17130
17131 *regno = max_reg;
17132 *last_used_bit = max_bit;
17133 }
17134 else
17135 /* This function should only be used for structs and unions. */
17136 gcc_unreachable ();
17137
17138 return not_to_clear_reg_mask;
17139 }
17140
17141 /* In the context of ARMv8-M Security Extensions, this function is used for both
17142 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17143 registers are used when returning or passing arguments, which is then
17144 returned as a mask. It will also compute a mask to indicate padding/unused
17145 bits for each of these registers, and passes this through the
17146 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17147 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17148 the starting register used to pass this argument or return value is passed
17149 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17150 for struct and union types. */
17151
17152 static unsigned HOST_WIDE_INT
17153 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17154 uint32_t * padding_bits_to_clear)
17155
17156 {
17157 int last_used_bit = 0;
17158 unsigned HOST_WIDE_INT not_to_clear_mask;
17159
17160 if (RECORD_OR_UNION_TYPE_P (arg_type))
17161 {
17162 not_to_clear_mask
17163 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17164 padding_bits_to_clear, 0,
17165 &last_used_bit);
17166
17167
17168 /* If the 'last_used_bit' is not zero, that means we are still using a
17169 part of the last 'regno'. In such cases we must clear the trailing
17170 bits. Otherwise we are not using regno and we should mark it as to
17171 clear. */
17172 if (last_used_bit != 0)
17173 padding_bits_to_clear[regno]
17174 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17175 else
17176 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17177 }
17178 else
17179 {
17180 not_to_clear_mask = 0;
17181 /* We are not dealing with structs nor unions. So these arguments may be
17182 passed in floating point registers too. In some cases a BLKmode is
17183 used when returning or passing arguments in multiple VFP registers. */
17184 if (GET_MODE (arg_rtx) == BLKmode)
17185 {
17186 int i, arg_regs;
17187 rtx reg;
17188
17189 /* This should really only occur when dealing with the hard-float
17190 ABI. */
17191 gcc_assert (TARGET_HARD_FLOAT_ABI);
17192
17193 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17194 {
17195 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17196 gcc_assert (REG_P (reg));
17197
17198 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17199
17200 /* If we are dealing with DF mode, make sure we don't
17201 clear either of the registers it addresses. */
17202 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17203 if (arg_regs > 1)
17204 {
17205 unsigned HOST_WIDE_INT mask;
17206 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17207 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17208 not_to_clear_mask |= mask;
17209 }
17210 }
17211 }
17212 else
17213 {
17214 /* Otherwise we can rely on the MODE to determine how many registers
17215 are being used by this argument. */
17216 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17217 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17218 if (arg_regs > 1)
17219 {
17220 unsigned HOST_WIDE_INT
17221 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17222 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17223 not_to_clear_mask |= mask;
17224 }
17225 }
17226 }
17227
17228 return not_to_clear_mask;
17229 }
17230
17231 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17232 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17233 are to be fully cleared, using the value in register CLEARING_REG if more
17234 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17235 the bits that needs to be cleared in caller-saved core registers, with
17236 SCRATCH_REG used as a scratch register for that clearing.
17237
17238 NOTE: one of three following assertions must hold:
17239 - SCRATCH_REG is a low register
17240 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17241 in TO_CLEAR_BITMAP)
17242 - CLEARING_REG is a low register. */
17243
17244 static void
17245 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17246 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17247 {
17248 bool saved_clearing = false;
17249 rtx saved_clearing_reg = NULL_RTX;
17250 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17251
17252 gcc_assert (arm_arch_cmse);
17253
17254 if (!bitmap_empty_p (to_clear_bitmap))
17255 {
17256 minregno = bitmap_first_set_bit (to_clear_bitmap);
17257 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17258 }
17259 clearing_regno = REGNO (clearing_reg);
17260
17261 /* Clear padding bits. */
17262 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17263 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17264 {
17265 uint64_t mask;
17266 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17267
17268 if (padding_bits_to_clear[i] == 0)
17269 continue;
17270
17271 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17272 CLEARING_REG as scratch. */
17273 if (TARGET_THUMB1
17274 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17275 {
17276 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17277 such that we can use clearing_reg to clear the unused bits in the
17278 arguments. */
17279 if ((clearing_regno > maxregno
17280 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17281 && !saved_clearing)
17282 {
17283 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17284 emit_move_insn (scratch_reg, clearing_reg);
17285 saved_clearing = true;
17286 saved_clearing_reg = scratch_reg;
17287 }
17288 scratch_reg = clearing_reg;
17289 }
17290
17291 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17292 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17293 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17294
17295 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17296 mask = (~padding_bits_to_clear[i]) >> 16;
17297 rtx16 = gen_int_mode (16, SImode);
17298 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17299 if (mask)
17300 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17301
17302 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17303 }
17304 if (saved_clearing)
17305 emit_move_insn (clearing_reg, saved_clearing_reg);
17306
17307
17308 /* Clear full registers. */
17309
17310 /* If not marked for clearing, clearing_reg already does not contain
17311 any secret. */
17312 if (clearing_regno <= maxregno
17313 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17314 {
17315 emit_move_insn (clearing_reg, const0_rtx);
17316 emit_use (clearing_reg);
17317 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17318 }
17319
17320 for (regno = minregno; regno <= maxregno; regno++)
17321 {
17322 if (!bitmap_bit_p (to_clear_bitmap, regno))
17323 continue;
17324
17325 if (IS_VFP_REGNUM (regno))
17326 {
17327 /* If regno is an even vfp register and its successor is also to
17328 be cleared, use vmov. */
17329 if (TARGET_VFP_DOUBLE
17330 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17331 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17332 {
17333 emit_move_insn (gen_rtx_REG (DFmode, regno),
17334 CONST1_RTX (DFmode));
17335 emit_use (gen_rtx_REG (DFmode, regno));
17336 regno++;
17337 }
17338 else
17339 {
17340 emit_move_insn (gen_rtx_REG (SFmode, regno),
17341 CONST1_RTX (SFmode));
17342 emit_use (gen_rtx_REG (SFmode, regno));
17343 }
17344 }
17345 else
17346 {
17347 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17348 emit_use (gen_rtx_REG (SImode, regno));
17349 }
17350 }
17351 }
17352
17353 /* Clears caller saved registers not used to pass arguments before a
17354 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17355 registers is done in __gnu_cmse_nonsecure_call libcall.
17356 See libgcc/config/arm/cmse_nonsecure_call.S. */
17357
17358 static void
17359 cmse_nonsecure_call_clear_caller_saved (void)
17360 {
17361 basic_block bb;
17362
17363 FOR_EACH_BB_FN (bb, cfun)
17364 {
17365 rtx_insn *insn;
17366
17367 FOR_BB_INSNS (bb, insn)
17368 {
17369 unsigned address_regnum, regno, maxregno =
17370 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17371 auto_sbitmap to_clear_bitmap (maxregno + 1);
17372 rtx_insn *seq;
17373 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17374 rtx address;
17375 CUMULATIVE_ARGS args_so_far_v;
17376 cumulative_args_t args_so_far;
17377 tree arg_type, fntype;
17378 bool first_param = true;
17379 function_args_iterator args_iter;
17380 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17381
17382 if (!NONDEBUG_INSN_P (insn))
17383 continue;
17384
17385 if (!CALL_P (insn))
17386 continue;
17387
17388 pat = PATTERN (insn);
17389 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17390 call = XVECEXP (pat, 0, 0);
17391
17392 /* Get the real call RTX if the insn sets a value, ie. returns. */
17393 if (GET_CODE (call) == SET)
17394 call = SET_SRC (call);
17395
17396 /* Check if it is a cmse_nonsecure_call. */
17397 unspec = XEXP (call, 0);
17398 if (GET_CODE (unspec) != UNSPEC
17399 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17400 continue;
17401
17402 /* Determine the caller-saved registers we need to clear. */
17403 bitmap_clear (to_clear_bitmap);
17404 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17405
17406 /* Only look at the caller-saved floating point registers in case of
17407 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17408 lazy store and loads which clear both caller- and callee-saved
17409 registers. */
17410 if (TARGET_HARD_FLOAT_ABI)
17411 {
17412 auto_sbitmap float_bitmap (maxregno + 1);
17413
17414 bitmap_clear (float_bitmap);
17415 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17416 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17417 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17418 }
17419
17420 /* Make sure the register used to hold the function address is not
17421 cleared. */
17422 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17423 gcc_assert (MEM_P (address));
17424 gcc_assert (REG_P (XEXP (address, 0)));
17425 address_regnum = REGNO (XEXP (address, 0));
17426 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17427 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17428
17429 /* Set basic block of call insn so that df rescan is performed on
17430 insns inserted here. */
17431 set_block_for_insn (insn, bb);
17432 df_set_flags (DF_DEFER_INSN_RESCAN);
17433 start_sequence ();
17434
17435 /* Make sure the scheduler doesn't schedule other insns beyond
17436 here. */
17437 emit_insn (gen_blockage ());
17438
17439 /* Walk through all arguments and clear registers appropriately.
17440 */
17441 fntype = TREE_TYPE (MEM_EXPR (address));
17442 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17443 NULL_TREE);
17444 args_so_far = pack_cumulative_args (&args_so_far_v);
17445 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17446 {
17447 rtx arg_rtx;
17448 uint64_t to_clear_args_mask;
17449 machine_mode arg_mode = TYPE_MODE (arg_type);
17450
17451 if (VOID_TYPE_P (arg_type))
17452 continue;
17453
17454 if (!first_param)
17455 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17456 true);
17457
17458 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17459 true);
17460 gcc_assert (REG_P (arg_rtx));
17461 to_clear_args_mask
17462 = compute_not_to_clear_mask (arg_type, arg_rtx,
17463 REGNO (arg_rtx),
17464 &padding_bits_to_clear[0]);
17465 if (to_clear_args_mask)
17466 {
17467 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17468 {
17469 if (to_clear_args_mask & (1ULL << regno))
17470 bitmap_clear_bit (to_clear_bitmap, regno);
17471 }
17472 }
17473
17474 first_param = false;
17475 }
17476
17477 /* We use right shift and left shift to clear the LSB of the address
17478 we jump to instead of using bic, to avoid having to use an extra
17479 register on Thumb-1. */
17480 clearing_reg = XEXP (address, 0);
17481 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17482 emit_insn (gen_rtx_SET (clearing_reg, shift));
17483 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17484 emit_insn (gen_rtx_SET (clearing_reg, shift));
17485
17486 /* Clear caller-saved registers that leak before doing a non-secure
17487 call. */
17488 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17489 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17490 NUM_ARG_REGS, ip_reg, clearing_reg);
17491
17492 seq = get_insns ();
17493 end_sequence ();
17494 emit_insn_before (seq, insn);
17495 }
17496 }
17497 }
17498
17499 /* Rewrite move insn into subtract of 0 if the condition codes will
17500 be useful in next conditional jump insn. */
17501
17502 static void
17503 thumb1_reorg (void)
17504 {
17505 basic_block bb;
17506
17507 FOR_EACH_BB_FN (bb, cfun)
17508 {
17509 rtx dest, src;
17510 rtx cmp, op0, op1, set = NULL;
17511 rtx_insn *prev, *insn = BB_END (bb);
17512 bool insn_clobbered = false;
17513
17514 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17515 insn = PREV_INSN (insn);
17516
17517 /* Find the last cbranchsi4_insn in basic block BB. */
17518 if (insn == BB_HEAD (bb)
17519 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17520 continue;
17521
17522 /* Get the register with which we are comparing. */
17523 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17524 op0 = XEXP (cmp, 0);
17525 op1 = XEXP (cmp, 1);
17526
17527 /* Check that comparison is against ZERO. */
17528 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17529 continue;
17530
17531 /* Find the first flag setting insn before INSN in basic block BB. */
17532 gcc_assert (insn != BB_HEAD (bb));
17533 for (prev = PREV_INSN (insn);
17534 (!insn_clobbered
17535 && prev != BB_HEAD (bb)
17536 && (NOTE_P (prev)
17537 || DEBUG_INSN_P (prev)
17538 || ((set = single_set (prev)) != NULL
17539 && get_attr_conds (prev) == CONDS_NOCOND)));
17540 prev = PREV_INSN (prev))
17541 {
17542 if (reg_set_p (op0, prev))
17543 insn_clobbered = true;
17544 }
17545
17546 /* Skip if op0 is clobbered by insn other than prev. */
17547 if (insn_clobbered)
17548 continue;
17549
17550 if (!set)
17551 continue;
17552
17553 dest = SET_DEST (set);
17554 src = SET_SRC (set);
17555 if (!low_register_operand (dest, SImode)
17556 || !low_register_operand (src, SImode))
17557 continue;
17558
17559 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17560 in INSN. Both src and dest of the move insn are checked. */
17561 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17562 {
17563 dest = copy_rtx (dest);
17564 src = copy_rtx (src);
17565 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17566 PATTERN (prev) = gen_rtx_SET (dest, src);
17567 INSN_CODE (prev) = -1;
17568 /* Set test register in INSN to dest. */
17569 XEXP (cmp, 0) = copy_rtx (dest);
17570 INSN_CODE (insn) = -1;
17571 }
17572 }
17573 }
17574
17575 /* Convert instructions to their cc-clobbering variant if possible, since
17576 that allows us to use smaller encodings. */
17577
17578 static void
17579 thumb2_reorg (void)
17580 {
17581 basic_block bb;
17582 regset_head live;
17583
17584 INIT_REG_SET (&live);
17585
17586 /* We are freeing block_for_insn in the toplev to keep compatibility
17587 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17588 compute_bb_for_insn ();
17589 df_analyze ();
17590
17591 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17592
17593 FOR_EACH_BB_FN (bb, cfun)
17594 {
17595 if ((current_tune->disparage_flag_setting_t16_encodings
17596 == tune_params::DISPARAGE_FLAGS_ALL)
17597 && optimize_bb_for_speed_p (bb))
17598 continue;
17599
17600 rtx_insn *insn;
17601 Convert_Action action = SKIP;
17602 Convert_Action action_for_partial_flag_setting
17603 = ((current_tune->disparage_flag_setting_t16_encodings
17604 != tune_params::DISPARAGE_FLAGS_NEITHER)
17605 && optimize_bb_for_speed_p (bb))
17606 ? SKIP : CONV;
17607
17608 COPY_REG_SET (&live, DF_LR_OUT (bb));
17609 df_simulate_initialize_backwards (bb, &live);
17610 FOR_BB_INSNS_REVERSE (bb, insn)
17611 {
17612 if (NONJUMP_INSN_P (insn)
17613 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17614 && GET_CODE (PATTERN (insn)) == SET)
17615 {
17616 action = SKIP;
17617 rtx pat = PATTERN (insn);
17618 rtx dst = XEXP (pat, 0);
17619 rtx src = XEXP (pat, 1);
17620 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17621
17622 if (UNARY_P (src) || BINARY_P (src))
17623 op0 = XEXP (src, 0);
17624
17625 if (BINARY_P (src))
17626 op1 = XEXP (src, 1);
17627
17628 if (low_register_operand (dst, SImode))
17629 {
17630 switch (GET_CODE (src))
17631 {
17632 case PLUS:
17633 /* Adding two registers and storing the result
17634 in the first source is already a 16-bit
17635 operation. */
17636 if (rtx_equal_p (dst, op0)
17637 && register_operand (op1, SImode))
17638 break;
17639
17640 if (low_register_operand (op0, SImode))
17641 {
17642 /* ADDS <Rd>,<Rn>,<Rm> */
17643 if (low_register_operand (op1, SImode))
17644 action = CONV;
17645 /* ADDS <Rdn>,#<imm8> */
17646 /* SUBS <Rdn>,#<imm8> */
17647 else if (rtx_equal_p (dst, op0)
17648 && CONST_INT_P (op1)
17649 && IN_RANGE (INTVAL (op1), -255, 255))
17650 action = CONV;
17651 /* ADDS <Rd>,<Rn>,#<imm3> */
17652 /* SUBS <Rd>,<Rn>,#<imm3> */
17653 else if (CONST_INT_P (op1)
17654 && IN_RANGE (INTVAL (op1), -7, 7))
17655 action = CONV;
17656 }
17657 /* ADCS <Rd>, <Rn> */
17658 else if (GET_CODE (XEXP (src, 0)) == PLUS
17659 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17660 && low_register_operand (XEXP (XEXP (src, 0), 1),
17661 SImode)
17662 && COMPARISON_P (op1)
17663 && cc_register (XEXP (op1, 0), VOIDmode)
17664 && maybe_get_arm_condition_code (op1) == ARM_CS
17665 && XEXP (op1, 1) == const0_rtx)
17666 action = CONV;
17667 break;
17668
17669 case MINUS:
17670 /* RSBS <Rd>,<Rn>,#0
17671 Not handled here: see NEG below. */
17672 /* SUBS <Rd>,<Rn>,#<imm3>
17673 SUBS <Rdn>,#<imm8>
17674 Not handled here: see PLUS above. */
17675 /* SUBS <Rd>,<Rn>,<Rm> */
17676 if (low_register_operand (op0, SImode)
17677 && low_register_operand (op1, SImode))
17678 action = CONV;
17679 break;
17680
17681 case MULT:
17682 /* MULS <Rdm>,<Rn>,<Rdm>
17683 As an exception to the rule, this is only used
17684 when optimizing for size since MULS is slow on all
17685 known implementations. We do not even want to use
17686 MULS in cold code, if optimizing for speed, so we
17687 test the global flag here. */
17688 if (!optimize_size)
17689 break;
17690 /* Fall through. */
17691 case AND:
17692 case IOR:
17693 case XOR:
17694 /* ANDS <Rdn>,<Rm> */
17695 if (rtx_equal_p (dst, op0)
17696 && low_register_operand (op1, SImode))
17697 action = action_for_partial_flag_setting;
17698 else if (rtx_equal_p (dst, op1)
17699 && low_register_operand (op0, SImode))
17700 action = action_for_partial_flag_setting == SKIP
17701 ? SKIP : SWAP_CONV;
17702 break;
17703
17704 case ASHIFTRT:
17705 case ASHIFT:
17706 case LSHIFTRT:
17707 /* ASRS <Rdn>,<Rm> */
17708 /* LSRS <Rdn>,<Rm> */
17709 /* LSLS <Rdn>,<Rm> */
17710 if (rtx_equal_p (dst, op0)
17711 && low_register_operand (op1, SImode))
17712 action = action_for_partial_flag_setting;
17713 /* ASRS <Rd>,<Rm>,#<imm5> */
17714 /* LSRS <Rd>,<Rm>,#<imm5> */
17715 /* LSLS <Rd>,<Rm>,#<imm5> */
17716 else if (low_register_operand (op0, SImode)
17717 && CONST_INT_P (op1)
17718 && IN_RANGE (INTVAL (op1), 0, 31))
17719 action = action_for_partial_flag_setting;
17720 break;
17721
17722 case ROTATERT:
17723 /* RORS <Rdn>,<Rm> */
17724 if (rtx_equal_p (dst, op0)
17725 && low_register_operand (op1, SImode))
17726 action = action_for_partial_flag_setting;
17727 break;
17728
17729 case NOT:
17730 /* MVNS <Rd>,<Rm> */
17731 if (low_register_operand (op0, SImode))
17732 action = action_for_partial_flag_setting;
17733 break;
17734
17735 case NEG:
17736 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17737 if (low_register_operand (op0, SImode))
17738 action = CONV;
17739 break;
17740
17741 case CONST_INT:
17742 /* MOVS <Rd>,#<imm8> */
17743 if (CONST_INT_P (src)
17744 && IN_RANGE (INTVAL (src), 0, 255))
17745 action = action_for_partial_flag_setting;
17746 break;
17747
17748 case REG:
17749 /* MOVS and MOV<c> with registers have different
17750 encodings, so are not relevant here. */
17751 break;
17752
17753 default:
17754 break;
17755 }
17756 }
17757
17758 if (action != SKIP)
17759 {
17760 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17761 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17762 rtvec vec;
17763
17764 if (action == SWAP_CONV)
17765 {
17766 src = copy_rtx (src);
17767 XEXP (src, 0) = op1;
17768 XEXP (src, 1) = op0;
17769 pat = gen_rtx_SET (dst, src);
17770 vec = gen_rtvec (2, pat, clobber);
17771 }
17772 else /* action == CONV */
17773 vec = gen_rtvec (2, pat, clobber);
17774
17775 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17776 INSN_CODE (insn) = -1;
17777 }
17778 }
17779
17780 if (NONDEBUG_INSN_P (insn))
17781 df_simulate_one_insn_backwards (bb, insn, &live);
17782 }
17783 }
17784
17785 CLEAR_REG_SET (&live);
17786 }
17787
17788 /* Gcc puts the pool in the wrong place for ARM, since we can only
17789 load addresses a limited distance around the pc. We do some
17790 special munging to move the constant pool values to the correct
17791 point in the code. */
17792 static void
17793 arm_reorg (void)
17794 {
17795 rtx_insn *insn;
17796 HOST_WIDE_INT address = 0;
17797 Mfix * fix;
17798
17799 if (use_cmse)
17800 cmse_nonsecure_call_clear_caller_saved ();
17801
17802 /* We cannot run the Thumb passes for thunks because there is no CFG. */
17803 if (cfun->is_thunk)
17804 ;
17805 else if (TARGET_THUMB1)
17806 thumb1_reorg ();
17807 else if (TARGET_THUMB2)
17808 thumb2_reorg ();
17809
17810 /* Ensure all insns that must be split have been split at this point.
17811 Otherwise, the pool placement code below may compute incorrect
17812 insn lengths. Note that when optimizing, all insns have already
17813 been split at this point. */
17814 if (!optimize)
17815 split_all_insns_noflow ();
17816
17817 /* Make sure we do not attempt to create a literal pool even though it should
17818 no longer be necessary to create any. */
17819 if (arm_disable_literal_pool)
17820 return ;
17821
17822 minipool_fix_head = minipool_fix_tail = NULL;
17823
17824 /* The first insn must always be a note, or the code below won't
17825 scan it properly. */
17826 insn = get_insns ();
17827 gcc_assert (NOTE_P (insn));
17828 minipool_pad = 0;
17829
17830 /* Scan all the insns and record the operands that will need fixing. */
17831 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17832 {
17833 if (BARRIER_P (insn))
17834 push_minipool_barrier (insn, address);
17835 else if (INSN_P (insn))
17836 {
17837 rtx_jump_table_data *table;
17838
17839 note_invalid_constants (insn, address, true);
17840 address += get_attr_length (insn);
17841
17842 /* If the insn is a vector jump, add the size of the table
17843 and skip the table. */
17844 if (tablejump_p (insn, NULL, &table))
17845 {
17846 address += get_jump_table_size (table);
17847 insn = table;
17848 }
17849 }
17850 else if (LABEL_P (insn))
17851 /* Add the worst-case padding due to alignment. We don't add
17852 the _current_ padding because the minipool insertions
17853 themselves might change it. */
17854 address += get_label_padding (insn);
17855 }
17856
17857 fix = minipool_fix_head;
17858
17859 /* Now scan the fixups and perform the required changes. */
17860 while (fix)
17861 {
17862 Mfix * ftmp;
17863 Mfix * fdel;
17864 Mfix * last_added_fix;
17865 Mfix * last_barrier = NULL;
17866 Mfix * this_fix;
17867
17868 /* Skip any further barriers before the next fix. */
17869 while (fix && BARRIER_P (fix->insn))
17870 fix = fix->next;
17871
17872 /* No more fixes. */
17873 if (fix == NULL)
17874 break;
17875
17876 last_added_fix = NULL;
17877
17878 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17879 {
17880 if (BARRIER_P (ftmp->insn))
17881 {
17882 if (ftmp->address >= minipool_vector_head->max_address)
17883 break;
17884
17885 last_barrier = ftmp;
17886 }
17887 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17888 break;
17889
17890 last_added_fix = ftmp; /* Keep track of the last fix added. */
17891 }
17892
17893 /* If we found a barrier, drop back to that; any fixes that we
17894 could have reached but come after the barrier will now go in
17895 the next mini-pool. */
17896 if (last_barrier != NULL)
17897 {
17898 /* Reduce the refcount for those fixes that won't go into this
17899 pool after all. */
17900 for (fdel = last_barrier->next;
17901 fdel && fdel != ftmp;
17902 fdel = fdel->next)
17903 {
17904 fdel->minipool->refcount--;
17905 fdel->minipool = NULL;
17906 }
17907
17908 ftmp = last_barrier;
17909 }
17910 else
17911 {
17912 /* ftmp is first fix that we can't fit into this pool and
17913 there no natural barriers that we could use. Insert a
17914 new barrier in the code somewhere between the previous
17915 fix and this one, and arrange to jump around it. */
17916 HOST_WIDE_INT max_address;
17917
17918 /* The last item on the list of fixes must be a barrier, so
17919 we can never run off the end of the list of fixes without
17920 last_barrier being set. */
17921 gcc_assert (ftmp);
17922
17923 max_address = minipool_vector_head->max_address;
17924 /* Check that there isn't another fix that is in range that
17925 we couldn't fit into this pool because the pool was
17926 already too large: we need to put the pool before such an
17927 instruction. The pool itself may come just after the
17928 fix because create_fix_barrier also allows space for a
17929 jump instruction. */
17930 if (ftmp->address < max_address)
17931 max_address = ftmp->address + 1;
17932
17933 last_barrier = create_fix_barrier (last_added_fix, max_address);
17934 }
17935
17936 assign_minipool_offsets (last_barrier);
17937
17938 while (ftmp)
17939 {
17940 if (!BARRIER_P (ftmp->insn)
17941 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17942 == NULL))
17943 break;
17944
17945 ftmp = ftmp->next;
17946 }
17947
17948 /* Scan over the fixes we have identified for this pool, fixing them
17949 up and adding the constants to the pool itself. */
17950 for (this_fix = fix; this_fix && ftmp != this_fix;
17951 this_fix = this_fix->next)
17952 if (!BARRIER_P (this_fix->insn))
17953 {
17954 rtx addr
17955 = plus_constant (Pmode,
17956 gen_rtx_LABEL_REF (VOIDmode,
17957 minipool_vector_label),
17958 this_fix->minipool->offset);
17959 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17960 }
17961
17962 dump_minipool (last_barrier->insn);
17963 fix = ftmp;
17964 }
17965
17966 /* From now on we must synthesize any constants that we can't handle
17967 directly. This can happen if the RTL gets split during final
17968 instruction generation. */
17969 cfun->machine->after_arm_reorg = 1;
17970
17971 /* Free the minipool memory. */
17972 obstack_free (&minipool_obstack, minipool_startobj);
17973 }
17974 \f
17975 /* Routines to output assembly language. */
17976
17977 /* Return string representation of passed in real value. */
17978 static const char *
17979 fp_const_from_val (REAL_VALUE_TYPE *r)
17980 {
17981 if (!fp_consts_inited)
17982 init_fp_table ();
17983
17984 gcc_assert (real_equal (r, &value_fp0));
17985 return "0";
17986 }
17987
17988 /* OPERANDS[0] is the entire list of insns that constitute pop,
17989 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17990 is in the list, UPDATE is true iff the list contains explicit
17991 update of base register. */
17992 void
17993 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17994 bool update)
17995 {
17996 int i;
17997 char pattern[100];
17998 int offset;
17999 const char *conditional;
18000 int num_saves = XVECLEN (operands[0], 0);
18001 unsigned int regno;
18002 unsigned int regno_base = REGNO (operands[1]);
18003 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
18004
18005 offset = 0;
18006 offset += update ? 1 : 0;
18007 offset += return_pc ? 1 : 0;
18008
18009 /* Is the base register in the list? */
18010 for (i = offset; i < num_saves; i++)
18011 {
18012 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
18013 /* If SP is in the list, then the base register must be SP. */
18014 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
18015 /* If base register is in the list, there must be no explicit update. */
18016 if (regno == regno_base)
18017 gcc_assert (!update);
18018 }
18019
18020 conditional = reverse ? "%?%D0" : "%?%d0";
18021 /* Can't use POP if returning from an interrupt. */
18022 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
18023 sprintf (pattern, "pop%s\t{", conditional);
18024 else
18025 {
18026 /* Output ldmfd when the base register is SP, otherwise output ldmia.
18027 It's just a convention, their semantics are identical. */
18028 if (regno_base == SP_REGNUM)
18029 sprintf (pattern, "ldmfd%s\t", conditional);
18030 else if (update)
18031 sprintf (pattern, "ldmia%s\t", conditional);
18032 else
18033 sprintf (pattern, "ldm%s\t", conditional);
18034
18035 strcat (pattern, reg_names[regno_base]);
18036 if (update)
18037 strcat (pattern, "!, {");
18038 else
18039 strcat (pattern, ", {");
18040 }
18041
18042 /* Output the first destination register. */
18043 strcat (pattern,
18044 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
18045
18046 /* Output the rest of the destination registers. */
18047 for (i = offset + 1; i < num_saves; i++)
18048 {
18049 strcat (pattern, ", ");
18050 strcat (pattern,
18051 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
18052 }
18053
18054 strcat (pattern, "}");
18055
18056 if (interrupt_p && return_pc)
18057 strcat (pattern, "^");
18058
18059 output_asm_insn (pattern, &cond);
18060 }
18061
18062
18063 /* Output the assembly for a store multiple. */
18064
18065 const char *
18066 vfp_output_vstmd (rtx * operands)
18067 {
18068 char pattern[100];
18069 int p;
18070 int base;
18071 int i;
18072 rtx addr_reg = REG_P (XEXP (operands[0], 0))
18073 ? XEXP (operands[0], 0)
18074 : XEXP (XEXP (operands[0], 0), 0);
18075 bool push_p = REGNO (addr_reg) == SP_REGNUM;
18076
18077 if (push_p)
18078 strcpy (pattern, "vpush%?.64\t{%P1");
18079 else
18080 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
18081
18082 p = strlen (pattern);
18083
18084 gcc_assert (REG_P (operands[1]));
18085
18086 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
18087 for (i = 1; i < XVECLEN (operands[2], 0); i++)
18088 {
18089 p += sprintf (&pattern[p], ", d%d", base + i);
18090 }
18091 strcpy (&pattern[p], "}");
18092
18093 output_asm_insn (pattern, operands);
18094 return "";
18095 }
18096
18097
18098 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
18099 number of bytes pushed. */
18100
18101 static int
18102 vfp_emit_fstmd (int base_reg, int count)
18103 {
18104 rtx par;
18105 rtx dwarf;
18106 rtx tmp, reg;
18107 int i;
18108
18109 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18110 register pairs are stored by a store multiple insn. We avoid this
18111 by pushing an extra pair. */
18112 if (count == 2 && !arm_arch6)
18113 {
18114 if (base_reg == LAST_VFP_REGNUM - 3)
18115 base_reg -= 2;
18116 count++;
18117 }
18118
18119 /* FSTMD may not store more than 16 doubleword registers at once. Split
18120 larger stores into multiple parts (up to a maximum of two, in
18121 practice). */
18122 if (count > 16)
18123 {
18124 int saved;
18125 /* NOTE: base_reg is an internal register number, so each D register
18126 counts as 2. */
18127 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18128 saved += vfp_emit_fstmd (base_reg, 16);
18129 return saved;
18130 }
18131
18132 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18133 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18134
18135 reg = gen_rtx_REG (DFmode, base_reg);
18136 base_reg += 2;
18137
18138 XVECEXP (par, 0, 0)
18139 = gen_rtx_SET (gen_frame_mem
18140 (BLKmode,
18141 gen_rtx_PRE_MODIFY (Pmode,
18142 stack_pointer_rtx,
18143 plus_constant
18144 (Pmode, stack_pointer_rtx,
18145 - (count * 8)))
18146 ),
18147 gen_rtx_UNSPEC (BLKmode,
18148 gen_rtvec (1, reg),
18149 UNSPEC_PUSH_MULT));
18150
18151 tmp = gen_rtx_SET (stack_pointer_rtx,
18152 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18153 RTX_FRAME_RELATED_P (tmp) = 1;
18154 XVECEXP (dwarf, 0, 0) = tmp;
18155
18156 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18157 RTX_FRAME_RELATED_P (tmp) = 1;
18158 XVECEXP (dwarf, 0, 1) = tmp;
18159
18160 for (i = 1; i < count; i++)
18161 {
18162 reg = gen_rtx_REG (DFmode, base_reg);
18163 base_reg += 2;
18164 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18165
18166 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18167 plus_constant (Pmode,
18168 stack_pointer_rtx,
18169 i * 8)),
18170 reg);
18171 RTX_FRAME_RELATED_P (tmp) = 1;
18172 XVECEXP (dwarf, 0, i + 1) = tmp;
18173 }
18174
18175 par = emit_insn (par);
18176 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18177 RTX_FRAME_RELATED_P (par) = 1;
18178
18179 return count * 8;
18180 }
18181
18182 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18183 has the cmse_nonsecure_call attribute and returns false otherwise. */
18184
18185 bool
18186 detect_cmse_nonsecure_call (tree addr)
18187 {
18188 if (!addr)
18189 return FALSE;
18190
18191 tree fntype = TREE_TYPE (addr);
18192 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18193 TYPE_ATTRIBUTES (fntype)))
18194 return TRUE;
18195 return FALSE;
18196 }
18197
18198
18199 /* Emit a call instruction with pattern PAT. ADDR is the address of
18200 the call target. */
18201
18202 void
18203 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18204 {
18205 rtx insn;
18206
18207 insn = emit_call_insn (pat);
18208
18209 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18210 If the call might use such an entry, add a use of the PIC register
18211 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18212 if (TARGET_VXWORKS_RTP
18213 && flag_pic
18214 && !sibcall
18215 && GET_CODE (addr) == SYMBOL_REF
18216 && (SYMBOL_REF_DECL (addr)
18217 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18218 : !SYMBOL_REF_LOCAL_P (addr)))
18219 {
18220 require_pic_register (NULL_RTX, false /*compute_now*/);
18221 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18222 }
18223
18224 if (TARGET_AAPCS_BASED)
18225 {
18226 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18227 linker. We need to add an IP clobber to allow setting
18228 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18229 is not needed since it's a fixed register. */
18230 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18231 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18232 }
18233 }
18234
18235 /* Output a 'call' insn. */
18236 const char *
18237 output_call (rtx *operands)
18238 {
18239 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18240
18241 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18242 if (REGNO (operands[0]) == LR_REGNUM)
18243 {
18244 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18245 output_asm_insn ("mov%?\t%0, %|lr", operands);
18246 }
18247
18248 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18249
18250 if (TARGET_INTERWORK || arm_arch4t)
18251 output_asm_insn ("bx%?\t%0", operands);
18252 else
18253 output_asm_insn ("mov%?\t%|pc, %0", operands);
18254
18255 return "";
18256 }
18257
18258 /* Output a move from arm registers to arm registers of a long double
18259 OPERANDS[0] is the destination.
18260 OPERANDS[1] is the source. */
18261 const char *
18262 output_mov_long_double_arm_from_arm (rtx *operands)
18263 {
18264 /* We have to be careful here because the two might overlap. */
18265 int dest_start = REGNO (operands[0]);
18266 int src_start = REGNO (operands[1]);
18267 rtx ops[2];
18268 int i;
18269
18270 if (dest_start < src_start)
18271 {
18272 for (i = 0; i < 3; i++)
18273 {
18274 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18275 ops[1] = gen_rtx_REG (SImode, src_start + i);
18276 output_asm_insn ("mov%?\t%0, %1", ops);
18277 }
18278 }
18279 else
18280 {
18281 for (i = 2; i >= 0; i--)
18282 {
18283 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18284 ops[1] = gen_rtx_REG (SImode, src_start + i);
18285 output_asm_insn ("mov%?\t%0, %1", ops);
18286 }
18287 }
18288
18289 return "";
18290 }
18291
18292 void
18293 arm_emit_movpair (rtx dest, rtx src)
18294 {
18295 /* If the src is an immediate, simplify it. */
18296 if (CONST_INT_P (src))
18297 {
18298 HOST_WIDE_INT val = INTVAL (src);
18299 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18300 if ((val >> 16) & 0x0000ffff)
18301 {
18302 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18303 GEN_INT (16)),
18304 GEN_INT ((val >> 16) & 0x0000ffff));
18305 rtx_insn *insn = get_last_insn ();
18306 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18307 }
18308 return;
18309 }
18310 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18311 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18312 rtx_insn *insn = get_last_insn ();
18313 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18314 }
18315
18316 /* Output a move between double words. It must be REG<-MEM
18317 or MEM<-REG. */
18318 const char *
18319 output_move_double (rtx *operands, bool emit, int *count)
18320 {
18321 enum rtx_code code0 = GET_CODE (operands[0]);
18322 enum rtx_code code1 = GET_CODE (operands[1]);
18323 rtx otherops[3];
18324 if (count)
18325 *count = 1;
18326
18327 /* The only case when this might happen is when
18328 you are looking at the length of a DImode instruction
18329 that has an invalid constant in it. */
18330 if (code0 == REG && code1 != MEM)
18331 {
18332 gcc_assert (!emit);
18333 *count = 2;
18334 return "";
18335 }
18336
18337 if (code0 == REG)
18338 {
18339 unsigned int reg0 = REGNO (operands[0]);
18340
18341 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18342
18343 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18344
18345 switch (GET_CODE (XEXP (operands[1], 0)))
18346 {
18347 case REG:
18348
18349 if (emit)
18350 {
18351 if (TARGET_LDRD
18352 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18353 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18354 else
18355 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18356 }
18357 break;
18358
18359 case PRE_INC:
18360 gcc_assert (TARGET_LDRD);
18361 if (emit)
18362 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18363 break;
18364
18365 case PRE_DEC:
18366 if (emit)
18367 {
18368 if (TARGET_LDRD)
18369 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18370 else
18371 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18372 }
18373 break;
18374
18375 case POST_INC:
18376 if (emit)
18377 {
18378 if (TARGET_LDRD)
18379 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18380 else
18381 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18382 }
18383 break;
18384
18385 case POST_DEC:
18386 gcc_assert (TARGET_LDRD);
18387 if (emit)
18388 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18389 break;
18390
18391 case PRE_MODIFY:
18392 case POST_MODIFY:
18393 /* Autoicrement addressing modes should never have overlapping
18394 base and destination registers, and overlapping index registers
18395 are already prohibited, so this doesn't need to worry about
18396 fix_cm3_ldrd. */
18397 otherops[0] = operands[0];
18398 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18399 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18400
18401 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18402 {
18403 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18404 {
18405 /* Registers overlap so split out the increment. */
18406 if (emit)
18407 {
18408 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18409 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18410 }
18411 if (count)
18412 *count = 2;
18413 }
18414 else
18415 {
18416 /* Use a single insn if we can.
18417 FIXME: IWMMXT allows offsets larger than ldrd can
18418 handle, fix these up with a pair of ldr. */
18419 if (TARGET_THUMB2
18420 || !CONST_INT_P (otherops[2])
18421 || (INTVAL (otherops[2]) > -256
18422 && INTVAL (otherops[2]) < 256))
18423 {
18424 if (emit)
18425 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18426 }
18427 else
18428 {
18429 if (emit)
18430 {
18431 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18432 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18433 }
18434 if (count)
18435 *count = 2;
18436
18437 }
18438 }
18439 }
18440 else
18441 {
18442 /* Use a single insn if we can.
18443 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18444 fix these up with a pair of ldr. */
18445 if (TARGET_THUMB2
18446 || !CONST_INT_P (otherops[2])
18447 || (INTVAL (otherops[2]) > -256
18448 && INTVAL (otherops[2]) < 256))
18449 {
18450 if (emit)
18451 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18452 }
18453 else
18454 {
18455 if (emit)
18456 {
18457 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18458 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18459 }
18460 if (count)
18461 *count = 2;
18462 }
18463 }
18464 break;
18465
18466 case LABEL_REF:
18467 case CONST:
18468 /* We might be able to use ldrd %0, %1 here. However the range is
18469 different to ldr/adr, and it is broken on some ARMv7-M
18470 implementations. */
18471 /* Use the second register of the pair to avoid problematic
18472 overlap. */
18473 otherops[1] = operands[1];
18474 if (emit)
18475 output_asm_insn ("adr%?\t%0, %1", otherops);
18476 operands[1] = otherops[0];
18477 if (emit)
18478 {
18479 if (TARGET_LDRD)
18480 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18481 else
18482 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18483 }
18484
18485 if (count)
18486 *count = 2;
18487 break;
18488
18489 /* ??? This needs checking for thumb2. */
18490 default:
18491 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18492 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18493 {
18494 otherops[0] = operands[0];
18495 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18496 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18497
18498 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18499 {
18500 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18501 {
18502 switch ((int) INTVAL (otherops[2]))
18503 {
18504 case -8:
18505 if (emit)
18506 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18507 return "";
18508 case -4:
18509 if (TARGET_THUMB2)
18510 break;
18511 if (emit)
18512 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18513 return "";
18514 case 4:
18515 if (TARGET_THUMB2)
18516 break;
18517 if (emit)
18518 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18519 return "";
18520 }
18521 }
18522 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18523 operands[1] = otherops[0];
18524 if (TARGET_LDRD
18525 && (REG_P (otherops[2])
18526 || TARGET_THUMB2
18527 || (CONST_INT_P (otherops[2])
18528 && INTVAL (otherops[2]) > -256
18529 && INTVAL (otherops[2]) < 256)))
18530 {
18531 if (reg_overlap_mentioned_p (operands[0],
18532 otherops[2]))
18533 {
18534 /* Swap base and index registers over to
18535 avoid a conflict. */
18536 std::swap (otherops[1], otherops[2]);
18537 }
18538 /* If both registers conflict, it will usually
18539 have been fixed by a splitter. */
18540 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18541 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18542 {
18543 if (emit)
18544 {
18545 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18546 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18547 }
18548 if (count)
18549 *count = 2;
18550 }
18551 else
18552 {
18553 otherops[0] = operands[0];
18554 if (emit)
18555 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18556 }
18557 return "";
18558 }
18559
18560 if (CONST_INT_P (otherops[2]))
18561 {
18562 if (emit)
18563 {
18564 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18565 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18566 else
18567 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18568 }
18569 }
18570 else
18571 {
18572 if (emit)
18573 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18574 }
18575 }
18576 else
18577 {
18578 if (emit)
18579 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18580 }
18581
18582 if (count)
18583 *count = 2;
18584
18585 if (TARGET_LDRD)
18586 return "ldrd%?\t%0, [%1]";
18587
18588 return "ldmia%?\t%1, %M0";
18589 }
18590 else
18591 {
18592 otherops[1] = adjust_address (operands[1], SImode, 4);
18593 /* Take care of overlapping base/data reg. */
18594 if (reg_mentioned_p (operands[0], operands[1]))
18595 {
18596 if (emit)
18597 {
18598 output_asm_insn ("ldr%?\t%0, %1", otherops);
18599 output_asm_insn ("ldr%?\t%0, %1", operands);
18600 }
18601 if (count)
18602 *count = 2;
18603
18604 }
18605 else
18606 {
18607 if (emit)
18608 {
18609 output_asm_insn ("ldr%?\t%0, %1", operands);
18610 output_asm_insn ("ldr%?\t%0, %1", otherops);
18611 }
18612 if (count)
18613 *count = 2;
18614 }
18615 }
18616 }
18617 }
18618 else
18619 {
18620 /* Constraints should ensure this. */
18621 gcc_assert (code0 == MEM && code1 == REG);
18622 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18623 || (TARGET_ARM && TARGET_LDRD));
18624
18625 /* For TARGET_ARM the first source register of an STRD
18626 must be even. This is usually the case for double-word
18627 values but user assembly constraints can force an odd
18628 starting register. */
18629 bool allow_strd = TARGET_LDRD
18630 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18631 switch (GET_CODE (XEXP (operands[0], 0)))
18632 {
18633 case REG:
18634 if (emit)
18635 {
18636 if (allow_strd)
18637 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18638 else
18639 output_asm_insn ("stm%?\t%m0, %M1", operands);
18640 }
18641 break;
18642
18643 case PRE_INC:
18644 gcc_assert (allow_strd);
18645 if (emit)
18646 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18647 break;
18648
18649 case PRE_DEC:
18650 if (emit)
18651 {
18652 if (allow_strd)
18653 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18654 else
18655 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18656 }
18657 break;
18658
18659 case POST_INC:
18660 if (emit)
18661 {
18662 if (allow_strd)
18663 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18664 else
18665 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18666 }
18667 break;
18668
18669 case POST_DEC:
18670 gcc_assert (allow_strd);
18671 if (emit)
18672 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18673 break;
18674
18675 case PRE_MODIFY:
18676 case POST_MODIFY:
18677 otherops[0] = operands[1];
18678 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18679 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18680
18681 /* IWMMXT allows offsets larger than strd can handle,
18682 fix these up with a pair of str. */
18683 if (!TARGET_THUMB2
18684 && CONST_INT_P (otherops[2])
18685 && (INTVAL(otherops[2]) <= -256
18686 || INTVAL(otherops[2]) >= 256))
18687 {
18688 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18689 {
18690 if (emit)
18691 {
18692 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18693 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18694 }
18695 if (count)
18696 *count = 2;
18697 }
18698 else
18699 {
18700 if (emit)
18701 {
18702 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18703 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18704 }
18705 if (count)
18706 *count = 2;
18707 }
18708 }
18709 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18710 {
18711 if (emit)
18712 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18713 }
18714 else
18715 {
18716 if (emit)
18717 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18718 }
18719 break;
18720
18721 case PLUS:
18722 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18723 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18724 {
18725 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18726 {
18727 case -8:
18728 if (emit)
18729 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18730 return "";
18731
18732 case -4:
18733 if (TARGET_THUMB2)
18734 break;
18735 if (emit)
18736 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18737 return "";
18738
18739 case 4:
18740 if (TARGET_THUMB2)
18741 break;
18742 if (emit)
18743 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18744 return "";
18745 }
18746 }
18747 if (allow_strd
18748 && (REG_P (otherops[2])
18749 || TARGET_THUMB2
18750 || (CONST_INT_P (otherops[2])
18751 && INTVAL (otherops[2]) > -256
18752 && INTVAL (otherops[2]) < 256)))
18753 {
18754 otherops[0] = operands[1];
18755 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18756 if (emit)
18757 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18758 return "";
18759 }
18760 /* Fall through */
18761
18762 default:
18763 otherops[0] = adjust_address (operands[0], SImode, 4);
18764 otherops[1] = operands[1];
18765 if (emit)
18766 {
18767 output_asm_insn ("str%?\t%1, %0", operands);
18768 output_asm_insn ("str%?\t%H1, %0", otherops);
18769 }
18770 if (count)
18771 *count = 2;
18772 }
18773 }
18774
18775 return "";
18776 }
18777
18778 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18779 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18780
18781 const char *
18782 output_move_quad (rtx *operands)
18783 {
18784 if (REG_P (operands[0]))
18785 {
18786 /* Load, or reg->reg move. */
18787
18788 if (MEM_P (operands[1]))
18789 {
18790 switch (GET_CODE (XEXP (operands[1], 0)))
18791 {
18792 case REG:
18793 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18794 break;
18795
18796 case LABEL_REF:
18797 case CONST:
18798 output_asm_insn ("adr%?\t%0, %1", operands);
18799 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18800 break;
18801
18802 default:
18803 gcc_unreachable ();
18804 }
18805 }
18806 else
18807 {
18808 rtx ops[2];
18809 int dest, src, i;
18810
18811 gcc_assert (REG_P (operands[1]));
18812
18813 dest = REGNO (operands[0]);
18814 src = REGNO (operands[1]);
18815
18816 /* This seems pretty dumb, but hopefully GCC won't try to do it
18817 very often. */
18818 if (dest < src)
18819 for (i = 0; i < 4; i++)
18820 {
18821 ops[0] = gen_rtx_REG (SImode, dest + i);
18822 ops[1] = gen_rtx_REG (SImode, src + i);
18823 output_asm_insn ("mov%?\t%0, %1", ops);
18824 }
18825 else
18826 for (i = 3; i >= 0; i--)
18827 {
18828 ops[0] = gen_rtx_REG (SImode, dest + i);
18829 ops[1] = gen_rtx_REG (SImode, src + i);
18830 output_asm_insn ("mov%?\t%0, %1", ops);
18831 }
18832 }
18833 }
18834 else
18835 {
18836 gcc_assert (MEM_P (operands[0]));
18837 gcc_assert (REG_P (operands[1]));
18838 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18839
18840 switch (GET_CODE (XEXP (operands[0], 0)))
18841 {
18842 case REG:
18843 output_asm_insn ("stm%?\t%m0, %M1", operands);
18844 break;
18845
18846 default:
18847 gcc_unreachable ();
18848 }
18849 }
18850
18851 return "";
18852 }
18853
18854 /* Output a VFP load or store instruction. */
18855
18856 const char *
18857 output_move_vfp (rtx *operands)
18858 {
18859 rtx reg, mem, addr, ops[2];
18860 int load = REG_P (operands[0]);
18861 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18862 int sp = (!TARGET_VFP_FP16INST
18863 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18864 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18865 const char *templ;
18866 char buff[50];
18867 machine_mode mode;
18868
18869 reg = operands[!load];
18870 mem = operands[load];
18871
18872 mode = GET_MODE (reg);
18873
18874 gcc_assert (REG_P (reg));
18875 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18876 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18877 || mode == SFmode
18878 || mode == DFmode
18879 || mode == HImode
18880 || mode == SImode
18881 || mode == DImode
18882 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18883 gcc_assert (MEM_P (mem));
18884
18885 addr = XEXP (mem, 0);
18886
18887 switch (GET_CODE (addr))
18888 {
18889 case PRE_DEC:
18890 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18891 ops[0] = XEXP (addr, 0);
18892 ops[1] = reg;
18893 break;
18894
18895 case POST_INC:
18896 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18897 ops[0] = XEXP (addr, 0);
18898 ops[1] = reg;
18899 break;
18900
18901 default:
18902 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18903 ops[0] = reg;
18904 ops[1] = mem;
18905 break;
18906 }
18907
18908 sprintf (buff, templ,
18909 load ? "ld" : "st",
18910 dp ? "64" : sp ? "32" : "16",
18911 dp ? "P" : "",
18912 integer_p ? "\t%@ int" : "");
18913 output_asm_insn (buff, ops);
18914
18915 return "";
18916 }
18917
18918 /* Output a Neon double-word or quad-word load or store, or a load
18919 or store for larger structure modes.
18920
18921 WARNING: The ordering of elements is weird in big-endian mode,
18922 because the EABI requires that vectors stored in memory appear
18923 as though they were stored by a VSTM, as required by the EABI.
18924 GCC RTL defines element ordering based on in-memory order.
18925 This can be different from the architectural ordering of elements
18926 within a NEON register. The intrinsics defined in arm_neon.h use the
18927 NEON register element ordering, not the GCC RTL element ordering.
18928
18929 For example, the in-memory ordering of a big-endian a quadword
18930 vector with 16-bit elements when stored from register pair {d0,d1}
18931 will be (lowest address first, d0[N] is NEON register element N):
18932
18933 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18934
18935 When necessary, quadword registers (dN, dN+1) are moved to ARM
18936 registers from rN in the order:
18937
18938 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18939
18940 So that STM/LDM can be used on vectors in ARM registers, and the
18941 same memory layout will result as if VSTM/VLDM were used.
18942
18943 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18944 possible, which allows use of appropriate alignment tags.
18945 Note that the choice of "64" is independent of the actual vector
18946 element size; this size simply ensures that the behavior is
18947 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18948
18949 Due to limitations of those instructions, use of VST1.64/VLD1.64
18950 is not possible if:
18951 - the address contains PRE_DEC, or
18952 - the mode refers to more than 4 double-word registers
18953
18954 In those cases, it would be possible to replace VSTM/VLDM by a
18955 sequence of instructions; this is not currently implemented since
18956 this is not certain to actually improve performance. */
18957
18958 const char *
18959 output_move_neon (rtx *operands)
18960 {
18961 rtx reg, mem, addr, ops[2];
18962 int regno, nregs, load = REG_P (operands[0]);
18963 const char *templ;
18964 char buff[50];
18965 machine_mode mode;
18966
18967 reg = operands[!load];
18968 mem = operands[load];
18969
18970 mode = GET_MODE (reg);
18971
18972 gcc_assert (REG_P (reg));
18973 regno = REGNO (reg);
18974 nregs = REG_NREGS (reg) / 2;
18975 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18976 || NEON_REGNO_OK_FOR_QUAD (regno));
18977 gcc_assert (VALID_NEON_DREG_MODE (mode)
18978 || VALID_NEON_QREG_MODE (mode)
18979 || VALID_NEON_STRUCT_MODE (mode));
18980 gcc_assert (MEM_P (mem));
18981
18982 addr = XEXP (mem, 0);
18983
18984 /* Strip off const from addresses like (const (plus (...))). */
18985 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18986 addr = XEXP (addr, 0);
18987
18988 switch (GET_CODE (addr))
18989 {
18990 case POST_INC:
18991 /* We have to use vldm / vstm for too-large modes. */
18992 if (nregs > 4)
18993 {
18994 templ = "v%smia%%?\t%%0!, %%h1";
18995 ops[0] = XEXP (addr, 0);
18996 }
18997 else
18998 {
18999 templ = "v%s1.64\t%%h1, %%A0";
19000 ops[0] = mem;
19001 }
19002 ops[1] = reg;
19003 break;
19004
19005 case PRE_DEC:
19006 /* We have to use vldm / vstm in this case, since there is no
19007 pre-decrement form of the vld1 / vst1 instructions. */
19008 templ = "v%smdb%%?\t%%0!, %%h1";
19009 ops[0] = XEXP (addr, 0);
19010 ops[1] = reg;
19011 break;
19012
19013 case POST_MODIFY:
19014 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
19015 gcc_unreachable ();
19016
19017 case REG:
19018 /* We have to use vldm / vstm for too-large modes. */
19019 if (nregs > 1)
19020 {
19021 if (nregs > 4)
19022 templ = "v%smia%%?\t%%m0, %%h1";
19023 else
19024 templ = "v%s1.64\t%%h1, %%A0";
19025
19026 ops[0] = mem;
19027 ops[1] = reg;
19028 break;
19029 }
19030 /* Fall through. */
19031 case LABEL_REF:
19032 case PLUS:
19033 {
19034 int i;
19035 int overlap = -1;
19036 for (i = 0; i < nregs; i++)
19037 {
19038 /* We're only using DImode here because it's a convenient size. */
19039 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
19040 ops[1] = adjust_address (mem, DImode, 8 * i);
19041 if (reg_overlap_mentioned_p (ops[0], mem))
19042 {
19043 gcc_assert (overlap == -1);
19044 overlap = i;
19045 }
19046 else
19047 {
19048 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19049 output_asm_insn (buff, ops);
19050 }
19051 }
19052 if (overlap != -1)
19053 {
19054 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
19055 ops[1] = adjust_address (mem, SImode, 8 * overlap);
19056 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
19057 output_asm_insn (buff, ops);
19058 }
19059
19060 return "";
19061 }
19062
19063 default:
19064 gcc_unreachable ();
19065 }
19066
19067 sprintf (buff, templ, load ? "ld" : "st");
19068 output_asm_insn (buff, ops);
19069
19070 return "";
19071 }
19072
19073 /* Compute and return the length of neon_mov<mode>, where <mode> is
19074 one of VSTRUCT modes: EI, OI, CI or XI. */
19075 int
19076 arm_attr_length_move_neon (rtx_insn *insn)
19077 {
19078 rtx reg, mem, addr;
19079 int load;
19080 machine_mode mode;
19081
19082 extract_insn_cached (insn);
19083
19084 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
19085 {
19086 mode = GET_MODE (recog_data.operand[0]);
19087 switch (mode)
19088 {
19089 case E_EImode:
19090 case E_OImode:
19091 return 8;
19092 case E_CImode:
19093 return 12;
19094 case E_XImode:
19095 return 16;
19096 default:
19097 gcc_unreachable ();
19098 }
19099 }
19100
19101 load = REG_P (recog_data.operand[0]);
19102 reg = recog_data.operand[!load];
19103 mem = recog_data.operand[load];
19104
19105 gcc_assert (MEM_P (mem));
19106
19107 addr = XEXP (mem, 0);
19108
19109 /* Strip off const from addresses like (const (plus (...))). */
19110 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19111 addr = XEXP (addr, 0);
19112
19113 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
19114 {
19115 int insns = REG_NREGS (reg) / 2;
19116 return insns * 4;
19117 }
19118 else
19119 return 4;
19120 }
19121
19122 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19123 return zero. */
19124
19125 int
19126 arm_address_offset_is_imm (rtx_insn *insn)
19127 {
19128 rtx mem, addr;
19129
19130 extract_insn_cached (insn);
19131
19132 if (REG_P (recog_data.operand[0]))
19133 return 0;
19134
19135 mem = recog_data.operand[0];
19136
19137 gcc_assert (MEM_P (mem));
19138
19139 addr = XEXP (mem, 0);
19140
19141 if (REG_P (addr)
19142 || (GET_CODE (addr) == PLUS
19143 && REG_P (XEXP (addr, 0))
19144 && CONST_INT_P (XEXP (addr, 1))))
19145 return 1;
19146 else
19147 return 0;
19148 }
19149
19150 /* Output an ADD r, s, #n where n may be too big for one instruction.
19151 If adding zero to one register, output nothing. */
19152 const char *
19153 output_add_immediate (rtx *operands)
19154 {
19155 HOST_WIDE_INT n = INTVAL (operands[2]);
19156
19157 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19158 {
19159 if (n < 0)
19160 output_multi_immediate (operands,
19161 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19162 -n);
19163 else
19164 output_multi_immediate (operands,
19165 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19166 n);
19167 }
19168
19169 return "";
19170 }
19171
19172 /* Output a multiple immediate operation.
19173 OPERANDS is the vector of operands referred to in the output patterns.
19174 INSTR1 is the output pattern to use for the first constant.
19175 INSTR2 is the output pattern to use for subsequent constants.
19176 IMMED_OP is the index of the constant slot in OPERANDS.
19177 N is the constant value. */
19178 static const char *
19179 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19180 int immed_op, HOST_WIDE_INT n)
19181 {
19182 #if HOST_BITS_PER_WIDE_INT > 32
19183 n &= 0xffffffff;
19184 #endif
19185
19186 if (n == 0)
19187 {
19188 /* Quick and easy output. */
19189 operands[immed_op] = const0_rtx;
19190 output_asm_insn (instr1, operands);
19191 }
19192 else
19193 {
19194 int i;
19195 const char * instr = instr1;
19196
19197 /* Note that n is never zero here (which would give no output). */
19198 for (i = 0; i < 32; i += 2)
19199 {
19200 if (n & (3 << i))
19201 {
19202 operands[immed_op] = GEN_INT (n & (255 << i));
19203 output_asm_insn (instr, operands);
19204 instr = instr2;
19205 i += 6;
19206 }
19207 }
19208 }
19209
19210 return "";
19211 }
19212
19213 /* Return the name of a shifter operation. */
19214 static const char *
19215 arm_shift_nmem(enum rtx_code code)
19216 {
19217 switch (code)
19218 {
19219 case ASHIFT:
19220 return ARM_LSL_NAME;
19221
19222 case ASHIFTRT:
19223 return "asr";
19224
19225 case LSHIFTRT:
19226 return "lsr";
19227
19228 case ROTATERT:
19229 return "ror";
19230
19231 default:
19232 abort();
19233 }
19234 }
19235
19236 /* Return the appropriate ARM instruction for the operation code.
19237 The returned result should not be overwritten. OP is the rtx of the
19238 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19239 was shifted. */
19240 const char *
19241 arithmetic_instr (rtx op, int shift_first_arg)
19242 {
19243 switch (GET_CODE (op))
19244 {
19245 case PLUS:
19246 return "add";
19247
19248 case MINUS:
19249 return shift_first_arg ? "rsb" : "sub";
19250
19251 case IOR:
19252 return "orr";
19253
19254 case XOR:
19255 return "eor";
19256
19257 case AND:
19258 return "and";
19259
19260 case ASHIFT:
19261 case ASHIFTRT:
19262 case LSHIFTRT:
19263 case ROTATERT:
19264 return arm_shift_nmem(GET_CODE(op));
19265
19266 default:
19267 gcc_unreachable ();
19268 }
19269 }
19270
19271 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19272 for the operation code. The returned result should not be overwritten.
19273 OP is the rtx code of the shift.
19274 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19275 shift. */
19276 static const char *
19277 shift_op (rtx op, HOST_WIDE_INT *amountp)
19278 {
19279 const char * mnem;
19280 enum rtx_code code = GET_CODE (op);
19281
19282 switch (code)
19283 {
19284 case ROTATE:
19285 if (!CONST_INT_P (XEXP (op, 1)))
19286 {
19287 output_operand_lossage ("invalid shift operand");
19288 return NULL;
19289 }
19290
19291 code = ROTATERT;
19292 *amountp = 32 - INTVAL (XEXP (op, 1));
19293 mnem = "ror";
19294 break;
19295
19296 case ASHIFT:
19297 case ASHIFTRT:
19298 case LSHIFTRT:
19299 case ROTATERT:
19300 mnem = arm_shift_nmem(code);
19301 if (CONST_INT_P (XEXP (op, 1)))
19302 {
19303 *amountp = INTVAL (XEXP (op, 1));
19304 }
19305 else if (REG_P (XEXP (op, 1)))
19306 {
19307 *amountp = -1;
19308 return mnem;
19309 }
19310 else
19311 {
19312 output_operand_lossage ("invalid shift operand");
19313 return NULL;
19314 }
19315 break;
19316
19317 case MULT:
19318 /* We never have to worry about the amount being other than a
19319 power of 2, since this case can never be reloaded from a reg. */
19320 if (!CONST_INT_P (XEXP (op, 1)))
19321 {
19322 output_operand_lossage ("invalid shift operand");
19323 return NULL;
19324 }
19325
19326 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19327
19328 /* Amount must be a power of two. */
19329 if (*amountp & (*amountp - 1))
19330 {
19331 output_operand_lossage ("invalid shift operand");
19332 return NULL;
19333 }
19334
19335 *amountp = exact_log2 (*amountp);
19336 gcc_assert (IN_RANGE (*amountp, 0, 31));
19337 return ARM_LSL_NAME;
19338
19339 default:
19340 output_operand_lossage ("invalid shift operand");
19341 return NULL;
19342 }
19343
19344 /* This is not 100% correct, but follows from the desire to merge
19345 multiplication by a power of 2 with the recognizer for a
19346 shift. >=32 is not a valid shift for "lsl", so we must try and
19347 output a shift that produces the correct arithmetical result.
19348 Using lsr #32 is identical except for the fact that the carry bit
19349 is not set correctly if we set the flags; but we never use the
19350 carry bit from such an operation, so we can ignore that. */
19351 if (code == ROTATERT)
19352 /* Rotate is just modulo 32. */
19353 *amountp &= 31;
19354 else if (*amountp != (*amountp & 31))
19355 {
19356 if (code == ASHIFT)
19357 mnem = "lsr";
19358 *amountp = 32;
19359 }
19360
19361 /* Shifts of 0 are no-ops. */
19362 if (*amountp == 0)
19363 return NULL;
19364
19365 return mnem;
19366 }
19367
19368 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19369 because /bin/as is horribly restrictive. The judgement about
19370 whether or not each character is 'printable' (and can be output as
19371 is) or not (and must be printed with an octal escape) must be made
19372 with reference to the *host* character set -- the situation is
19373 similar to that discussed in the comments above pp_c_char in
19374 c-pretty-print.c. */
19375
19376 #define MAX_ASCII_LEN 51
19377
19378 void
19379 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19380 {
19381 int i;
19382 int len_so_far = 0;
19383
19384 fputs ("\t.ascii\t\"", stream);
19385
19386 for (i = 0; i < len; i++)
19387 {
19388 int c = p[i];
19389
19390 if (len_so_far >= MAX_ASCII_LEN)
19391 {
19392 fputs ("\"\n\t.ascii\t\"", stream);
19393 len_so_far = 0;
19394 }
19395
19396 if (ISPRINT (c))
19397 {
19398 if (c == '\\' || c == '\"')
19399 {
19400 putc ('\\', stream);
19401 len_so_far++;
19402 }
19403 putc (c, stream);
19404 len_so_far++;
19405 }
19406 else
19407 {
19408 fprintf (stream, "\\%03o", c);
19409 len_so_far += 4;
19410 }
19411 }
19412
19413 fputs ("\"\n", stream);
19414 }
19415 \f
19416
19417 /* Compute the register save mask for registers 0 through 12
19418 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19419
19420 static unsigned long
19421 arm_compute_save_reg0_reg12_mask (void)
19422 {
19423 unsigned long func_type = arm_current_func_type ();
19424 unsigned long save_reg_mask = 0;
19425 unsigned int reg;
19426
19427 if (IS_INTERRUPT (func_type))
19428 {
19429 unsigned int max_reg;
19430 /* Interrupt functions must not corrupt any registers,
19431 even call clobbered ones. If this is a leaf function
19432 we can just examine the registers used by the RTL, but
19433 otherwise we have to assume that whatever function is
19434 called might clobber anything, and so we have to save
19435 all the call-clobbered registers as well. */
19436 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19437 /* FIQ handlers have registers r8 - r12 banked, so
19438 we only need to check r0 - r7, Normal ISRs only
19439 bank r14 and r15, so we must check up to r12.
19440 r13 is the stack pointer which is always preserved,
19441 so we do not need to consider it here. */
19442 max_reg = 7;
19443 else
19444 max_reg = 12;
19445
19446 for (reg = 0; reg <= max_reg; reg++)
19447 if (df_regs_ever_live_p (reg)
19448 || (! crtl->is_leaf && call_used_regs[reg]))
19449 save_reg_mask |= (1 << reg);
19450
19451 /* Also save the pic base register if necessary. */
19452 if (flag_pic
19453 && !TARGET_SINGLE_PIC_BASE
19454 && arm_pic_register != INVALID_REGNUM
19455 && crtl->uses_pic_offset_table)
19456 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19457 }
19458 else if (IS_VOLATILE(func_type))
19459 {
19460 /* For noreturn functions we historically omitted register saves
19461 altogether. However this really messes up debugging. As a
19462 compromise save just the frame pointers. Combined with the link
19463 register saved elsewhere this should be sufficient to get
19464 a backtrace. */
19465 if (frame_pointer_needed)
19466 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19467 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19468 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19469 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19470 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19471 }
19472 else
19473 {
19474 /* In the normal case we only need to save those registers
19475 which are call saved and which are used by this function. */
19476 for (reg = 0; reg <= 11; reg++)
19477 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19478 save_reg_mask |= (1 << reg);
19479
19480 /* Handle the frame pointer as a special case. */
19481 if (frame_pointer_needed)
19482 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19483
19484 /* If we aren't loading the PIC register,
19485 don't stack it even though it may be live. */
19486 if (flag_pic
19487 && !TARGET_SINGLE_PIC_BASE
19488 && arm_pic_register != INVALID_REGNUM
19489 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19490 || crtl->uses_pic_offset_table))
19491 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19492
19493 /* The prologue will copy SP into R0, so save it. */
19494 if (IS_STACKALIGN (func_type))
19495 save_reg_mask |= 1;
19496 }
19497
19498 /* Save registers so the exception handler can modify them. */
19499 if (crtl->calls_eh_return)
19500 {
19501 unsigned int i;
19502
19503 for (i = 0; ; i++)
19504 {
19505 reg = EH_RETURN_DATA_REGNO (i);
19506 if (reg == INVALID_REGNUM)
19507 break;
19508 save_reg_mask |= 1 << reg;
19509 }
19510 }
19511
19512 return save_reg_mask;
19513 }
19514
19515 /* Return true if r3 is live at the start of the function. */
19516
19517 static bool
19518 arm_r3_live_at_start_p (void)
19519 {
19520 /* Just look at cfg info, which is still close enough to correct at this
19521 point. This gives false positives for broken functions that might use
19522 uninitialized data that happens to be allocated in r3, but who cares? */
19523 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19524 }
19525
19526 /* Compute the number of bytes used to store the static chain register on the
19527 stack, above the stack frame. We need to know this accurately to get the
19528 alignment of the rest of the stack frame correct. */
19529
19530 static int
19531 arm_compute_static_chain_stack_bytes (void)
19532 {
19533 /* Once the value is updated from the init value of -1, do not
19534 re-compute. */
19535 if (cfun->machine->static_chain_stack_bytes != -1)
19536 return cfun->machine->static_chain_stack_bytes;
19537
19538 /* See the defining assertion in arm_expand_prologue. */
19539 if (IS_NESTED (arm_current_func_type ())
19540 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19541 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19542 || flag_stack_clash_protection)
19543 && !df_regs_ever_live_p (LR_REGNUM)))
19544 && arm_r3_live_at_start_p ()
19545 && crtl->args.pretend_args_size == 0)
19546 return 4;
19547
19548 return 0;
19549 }
19550
19551 /* Compute a bit mask of which core registers need to be
19552 saved on the stack for the current function.
19553 This is used by arm_compute_frame_layout, which may add extra registers. */
19554
19555 static unsigned long
19556 arm_compute_save_core_reg_mask (void)
19557 {
19558 unsigned int save_reg_mask = 0;
19559 unsigned long func_type = arm_current_func_type ();
19560 unsigned int reg;
19561
19562 if (IS_NAKED (func_type))
19563 /* This should never really happen. */
19564 return 0;
19565
19566 /* If we are creating a stack frame, then we must save the frame pointer,
19567 IP (which will hold the old stack pointer), LR and the PC. */
19568 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19569 save_reg_mask |=
19570 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19571 | (1 << IP_REGNUM)
19572 | (1 << LR_REGNUM)
19573 | (1 << PC_REGNUM);
19574
19575 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19576
19577 /* Decide if we need to save the link register.
19578 Interrupt routines have their own banked link register,
19579 so they never need to save it.
19580 Otherwise if we do not use the link register we do not need to save
19581 it. If we are pushing other registers onto the stack however, we
19582 can save an instruction in the epilogue by pushing the link register
19583 now and then popping it back into the PC. This incurs extra memory
19584 accesses though, so we only do it when optimizing for size, and only
19585 if we know that we will not need a fancy return sequence. */
19586 if (df_regs_ever_live_p (LR_REGNUM)
19587 || (save_reg_mask
19588 && optimize_size
19589 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19590 && !crtl->tail_call_emit
19591 && !crtl->calls_eh_return))
19592 save_reg_mask |= 1 << LR_REGNUM;
19593
19594 if (cfun->machine->lr_save_eliminated)
19595 save_reg_mask &= ~ (1 << LR_REGNUM);
19596
19597 if (TARGET_REALLY_IWMMXT
19598 && ((bit_count (save_reg_mask)
19599 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19600 arm_compute_static_chain_stack_bytes())
19601 ) % 2) != 0)
19602 {
19603 /* The total number of registers that are going to be pushed
19604 onto the stack is odd. We need to ensure that the stack
19605 is 64-bit aligned before we start to save iWMMXt registers,
19606 and also before we start to create locals. (A local variable
19607 might be a double or long long which we will load/store using
19608 an iWMMXt instruction). Therefore we need to push another
19609 ARM register, so that the stack will be 64-bit aligned. We
19610 try to avoid using the arg registers (r0 -r3) as they might be
19611 used to pass values in a tail call. */
19612 for (reg = 4; reg <= 12; reg++)
19613 if ((save_reg_mask & (1 << reg)) == 0)
19614 break;
19615
19616 if (reg <= 12)
19617 save_reg_mask |= (1 << reg);
19618 else
19619 {
19620 cfun->machine->sibcall_blocked = 1;
19621 save_reg_mask |= (1 << 3);
19622 }
19623 }
19624
19625 /* We may need to push an additional register for use initializing the
19626 PIC base register. */
19627 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19628 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19629 {
19630 reg = thumb_find_work_register (1 << 4);
19631 if (!call_used_regs[reg])
19632 save_reg_mask |= (1 << reg);
19633 }
19634
19635 return save_reg_mask;
19636 }
19637
19638 /* Compute a bit mask of which core registers need to be
19639 saved on the stack for the current function. */
19640 static unsigned long
19641 thumb1_compute_save_core_reg_mask (void)
19642 {
19643 unsigned long mask;
19644 unsigned reg;
19645
19646 mask = 0;
19647 for (reg = 0; reg < 12; reg ++)
19648 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19649 mask |= 1 << reg;
19650
19651 /* Handle the frame pointer as a special case. */
19652 if (frame_pointer_needed)
19653 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19654
19655 if (flag_pic
19656 && !TARGET_SINGLE_PIC_BASE
19657 && arm_pic_register != INVALID_REGNUM
19658 && crtl->uses_pic_offset_table)
19659 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19660
19661 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19662 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19663 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19664
19665 /* LR will also be pushed if any lo regs are pushed. */
19666 if (mask & 0xff || thumb_force_lr_save ())
19667 mask |= (1 << LR_REGNUM);
19668
19669 bool call_clobbered_scratch
19670 = (thumb1_prologue_unused_call_clobbered_lo_regs ()
19671 && thumb1_epilogue_unused_call_clobbered_lo_regs ());
19672
19673 /* Make sure we have a low work register if we need one. We will
19674 need one if we are going to push a high register, but we are not
19675 currently intending to push a low register. However if both the
19676 prologue and epilogue have a spare call-clobbered low register,
19677 then we won't need to find an additional work register. It does
19678 not need to be the same register in the prologue and
19679 epilogue. */
19680 if ((mask & 0xff) == 0
19681 && !call_clobbered_scratch
19682 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19683 {
19684 /* Use thumb_find_work_register to choose which register
19685 we will use. If the register is live then we will
19686 have to push it. Use LAST_LO_REGNUM as our fallback
19687 choice for the register to select. */
19688 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19689 /* Make sure the register returned by thumb_find_work_register is
19690 not part of the return value. */
19691 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19692 reg = LAST_LO_REGNUM;
19693
19694 if (callee_saved_reg_p (reg))
19695 mask |= 1 << reg;
19696 }
19697
19698 /* The 504 below is 8 bytes less than 512 because there are two possible
19699 alignment words. We can't tell here if they will be present or not so we
19700 have to play it safe and assume that they are. */
19701 if ((CALLER_INTERWORKING_SLOT_SIZE +
19702 ROUND_UP_WORD (get_frame_size ()) +
19703 crtl->outgoing_args_size) >= 504)
19704 {
19705 /* This is the same as the code in thumb1_expand_prologue() which
19706 determines which register to use for stack decrement. */
19707 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19708 if (mask & (1 << reg))
19709 break;
19710
19711 if (reg > LAST_LO_REGNUM)
19712 {
19713 /* Make sure we have a register available for stack decrement. */
19714 mask |= 1 << LAST_LO_REGNUM;
19715 }
19716 }
19717
19718 return mask;
19719 }
19720
19721
19722 /* Return the number of bytes required to save VFP registers. */
19723 static int
19724 arm_get_vfp_saved_size (void)
19725 {
19726 unsigned int regno;
19727 int count;
19728 int saved;
19729
19730 saved = 0;
19731 /* Space for saved VFP registers. */
19732 if (TARGET_HARD_FLOAT)
19733 {
19734 count = 0;
19735 for (regno = FIRST_VFP_REGNUM;
19736 regno < LAST_VFP_REGNUM;
19737 regno += 2)
19738 {
19739 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19740 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19741 {
19742 if (count > 0)
19743 {
19744 /* Workaround ARM10 VFPr1 bug. */
19745 if (count == 2 && !arm_arch6)
19746 count++;
19747 saved += count * 8;
19748 }
19749 count = 0;
19750 }
19751 else
19752 count++;
19753 }
19754 if (count > 0)
19755 {
19756 if (count == 2 && !arm_arch6)
19757 count++;
19758 saved += count * 8;
19759 }
19760 }
19761 return saved;
19762 }
19763
19764
19765 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19766 everything bar the final return instruction. If simple_return is true,
19767 then do not output epilogue, because it has already been emitted in RTL.
19768
19769 Note: do not forget to update length attribute of corresponding insn pattern
19770 when changing assembly output (eg. length attribute of
19771 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19772 register clearing sequences). */
19773 const char *
19774 output_return_instruction (rtx operand, bool really_return, bool reverse,
19775 bool simple_return)
19776 {
19777 char conditional[10];
19778 char instr[100];
19779 unsigned reg;
19780 unsigned long live_regs_mask;
19781 unsigned long func_type;
19782 arm_stack_offsets *offsets;
19783
19784 func_type = arm_current_func_type ();
19785
19786 if (IS_NAKED (func_type))
19787 return "";
19788
19789 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19790 {
19791 /* If this function was declared non-returning, and we have
19792 found a tail call, then we have to trust that the called
19793 function won't return. */
19794 if (really_return)
19795 {
19796 rtx ops[2];
19797
19798 /* Otherwise, trap an attempted return by aborting. */
19799 ops[0] = operand;
19800 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19801 : "abort");
19802 assemble_external_libcall (ops[1]);
19803 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19804 }
19805
19806 return "";
19807 }
19808
19809 gcc_assert (!cfun->calls_alloca || really_return);
19810
19811 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19812
19813 cfun->machine->return_used_this_function = 1;
19814
19815 offsets = arm_get_frame_offsets ();
19816 live_regs_mask = offsets->saved_regs_mask;
19817
19818 if (!simple_return && live_regs_mask)
19819 {
19820 const char * return_reg;
19821
19822 /* If we do not have any special requirements for function exit
19823 (e.g. interworking) then we can load the return address
19824 directly into the PC. Otherwise we must load it into LR. */
19825 if (really_return
19826 && !IS_CMSE_ENTRY (func_type)
19827 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19828 return_reg = reg_names[PC_REGNUM];
19829 else
19830 return_reg = reg_names[LR_REGNUM];
19831
19832 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19833 {
19834 /* There are three possible reasons for the IP register
19835 being saved. 1) a stack frame was created, in which case
19836 IP contains the old stack pointer, or 2) an ISR routine
19837 corrupted it, or 3) it was saved to align the stack on
19838 iWMMXt. In case 1, restore IP into SP, otherwise just
19839 restore IP. */
19840 if (frame_pointer_needed)
19841 {
19842 live_regs_mask &= ~ (1 << IP_REGNUM);
19843 live_regs_mask |= (1 << SP_REGNUM);
19844 }
19845 else
19846 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19847 }
19848
19849 /* On some ARM architectures it is faster to use LDR rather than
19850 LDM to load a single register. On other architectures, the
19851 cost is the same. In 26 bit mode, or for exception handlers,
19852 we have to use LDM to load the PC so that the CPSR is also
19853 restored. */
19854 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19855 if (live_regs_mask == (1U << reg))
19856 break;
19857
19858 if (reg <= LAST_ARM_REGNUM
19859 && (reg != LR_REGNUM
19860 || ! really_return
19861 || ! IS_INTERRUPT (func_type)))
19862 {
19863 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19864 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19865 }
19866 else
19867 {
19868 char *p;
19869 int first = 1;
19870
19871 /* Generate the load multiple instruction to restore the
19872 registers. Note we can get here, even if
19873 frame_pointer_needed is true, but only if sp already
19874 points to the base of the saved core registers. */
19875 if (live_regs_mask & (1 << SP_REGNUM))
19876 {
19877 unsigned HOST_WIDE_INT stack_adjust;
19878
19879 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19880 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19881
19882 if (stack_adjust && arm_arch5t && TARGET_ARM)
19883 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19884 else
19885 {
19886 /* If we can't use ldmib (SA110 bug),
19887 then try to pop r3 instead. */
19888 if (stack_adjust)
19889 live_regs_mask |= 1 << 3;
19890
19891 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19892 }
19893 }
19894 /* For interrupt returns we have to use an LDM rather than
19895 a POP so that we can use the exception return variant. */
19896 else if (IS_INTERRUPT (func_type))
19897 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19898 else
19899 sprintf (instr, "pop%s\t{", conditional);
19900
19901 p = instr + strlen (instr);
19902
19903 for (reg = 0; reg <= SP_REGNUM; reg++)
19904 if (live_regs_mask & (1 << reg))
19905 {
19906 int l = strlen (reg_names[reg]);
19907
19908 if (first)
19909 first = 0;
19910 else
19911 {
19912 memcpy (p, ", ", 2);
19913 p += 2;
19914 }
19915
19916 memcpy (p, "%|", 2);
19917 memcpy (p + 2, reg_names[reg], l);
19918 p += l + 2;
19919 }
19920
19921 if (live_regs_mask & (1 << LR_REGNUM))
19922 {
19923 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19924 /* If returning from an interrupt, restore the CPSR. */
19925 if (IS_INTERRUPT (func_type))
19926 strcat (p, "^");
19927 }
19928 else
19929 strcpy (p, "}");
19930 }
19931
19932 output_asm_insn (instr, & operand);
19933
19934 /* See if we need to generate an extra instruction to
19935 perform the actual function return. */
19936 if (really_return
19937 && func_type != ARM_FT_INTERWORKED
19938 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19939 {
19940 /* The return has already been handled
19941 by loading the LR into the PC. */
19942 return "";
19943 }
19944 }
19945
19946 if (really_return)
19947 {
19948 switch ((int) ARM_FUNC_TYPE (func_type))
19949 {
19950 case ARM_FT_ISR:
19951 case ARM_FT_FIQ:
19952 /* ??? This is wrong for unified assembly syntax. */
19953 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19954 break;
19955
19956 case ARM_FT_INTERWORKED:
19957 gcc_assert (arm_arch5t || arm_arch4t);
19958 sprintf (instr, "bx%s\t%%|lr", conditional);
19959 break;
19960
19961 case ARM_FT_EXCEPTION:
19962 /* ??? This is wrong for unified assembly syntax. */
19963 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19964 break;
19965
19966 default:
19967 if (IS_CMSE_ENTRY (func_type))
19968 {
19969 /* Check if we have to clear the 'GE bits' which is only used if
19970 parallel add and subtraction instructions are available. */
19971 if (TARGET_INT_SIMD)
19972 snprintf (instr, sizeof (instr),
19973 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19974 else
19975 snprintf (instr, sizeof (instr),
19976 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19977
19978 output_asm_insn (instr, & operand);
19979 if (TARGET_HARD_FLOAT)
19980 {
19981 /* Clear the cumulative exception-status bits (0-4,7) and the
19982 condition code bits (28-31) of the FPSCR. We need to
19983 remember to clear the first scratch register used (IP) and
19984 save and restore the second (r4). */
19985 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19986 output_asm_insn (instr, & operand);
19987 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19988 output_asm_insn (instr, & operand);
19989 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19990 output_asm_insn (instr, & operand);
19991 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19992 output_asm_insn (instr, & operand);
19993 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19994 output_asm_insn (instr, & operand);
19995 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19996 output_asm_insn (instr, & operand);
19997 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19998 output_asm_insn (instr, & operand);
19999 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
20000 output_asm_insn (instr, & operand);
20001 }
20002 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
20003 }
20004 /* Use bx if it's available. */
20005 else if (arm_arch5t || arm_arch4t)
20006 sprintf (instr, "bx%s\t%%|lr", conditional);
20007 else
20008 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
20009 break;
20010 }
20011
20012 output_asm_insn (instr, & operand);
20013 }
20014
20015 return "";
20016 }
20017
20018 /* Output in FILE asm statements needed to declare the NAME of the function
20019 defined by its DECL node. */
20020
20021 void
20022 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
20023 {
20024 size_t cmse_name_len;
20025 char *cmse_name = 0;
20026 char cmse_prefix[] = "__acle_se_";
20027
20028 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
20029 extra function label for each function with the 'cmse_nonsecure_entry'
20030 attribute. This extra function label should be prepended with
20031 '__acle_se_', telling the linker that it needs to create secure gateway
20032 veneers for this function. */
20033 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
20034 DECL_ATTRIBUTES (decl)))
20035 {
20036 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
20037 cmse_name = XALLOCAVEC (char, cmse_name_len);
20038 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
20039 targetm.asm_out.globalize_label (file, cmse_name);
20040
20041 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
20042 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
20043 }
20044
20045 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
20046 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
20047 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
20048 ASM_OUTPUT_LABEL (file, name);
20049
20050 if (cmse_name)
20051 ASM_OUTPUT_LABEL (file, cmse_name);
20052
20053 ARM_OUTPUT_FN_UNWIND (file, TRUE);
20054 }
20055
20056 /* Write the function name into the code section, directly preceding
20057 the function prologue.
20058
20059 Code will be output similar to this:
20060 t0
20061 .ascii "arm_poke_function_name", 0
20062 .align
20063 t1
20064 .word 0xff000000 + (t1 - t0)
20065 arm_poke_function_name
20066 mov ip, sp
20067 stmfd sp!, {fp, ip, lr, pc}
20068 sub fp, ip, #4
20069
20070 When performing a stack backtrace, code can inspect the value
20071 of 'pc' stored at 'fp' + 0. If the trace function then looks
20072 at location pc - 12 and the top 8 bits are set, then we know
20073 that there is a function name embedded immediately preceding this
20074 location and has length ((pc[-3]) & 0xff000000).
20075
20076 We assume that pc is declared as a pointer to an unsigned long.
20077
20078 It is of no benefit to output the function name if we are assembling
20079 a leaf function. These function types will not contain a stack
20080 backtrace structure, therefore it is not possible to determine the
20081 function name. */
20082 void
20083 arm_poke_function_name (FILE *stream, const char *name)
20084 {
20085 unsigned long alignlength;
20086 unsigned long length;
20087 rtx x;
20088
20089 length = strlen (name) + 1;
20090 alignlength = ROUND_UP_WORD (length);
20091
20092 ASM_OUTPUT_ASCII (stream, name, length);
20093 ASM_OUTPUT_ALIGN (stream, 2);
20094 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
20095 assemble_aligned_integer (UNITS_PER_WORD, x);
20096 }
20097
20098 /* Place some comments into the assembler stream
20099 describing the current function. */
20100 static void
20101 arm_output_function_prologue (FILE *f)
20102 {
20103 unsigned long func_type;
20104
20105 /* Sanity check. */
20106 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20107
20108 func_type = arm_current_func_type ();
20109
20110 switch ((int) ARM_FUNC_TYPE (func_type))
20111 {
20112 default:
20113 case ARM_FT_NORMAL:
20114 break;
20115 case ARM_FT_INTERWORKED:
20116 asm_fprintf (f, "\t%@ Function supports interworking.\n");
20117 break;
20118 case ARM_FT_ISR:
20119 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20120 break;
20121 case ARM_FT_FIQ:
20122 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20123 break;
20124 case ARM_FT_EXCEPTION:
20125 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20126 break;
20127 }
20128
20129 if (IS_NAKED (func_type))
20130 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20131
20132 if (IS_VOLATILE (func_type))
20133 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20134
20135 if (IS_NESTED (func_type))
20136 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20137 if (IS_STACKALIGN (func_type))
20138 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20139 if (IS_CMSE_ENTRY (func_type))
20140 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20141
20142 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20143 (HOST_WIDE_INT) crtl->args.size,
20144 crtl->args.pretend_args_size,
20145 (HOST_WIDE_INT) get_frame_size ());
20146
20147 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20148 frame_pointer_needed,
20149 cfun->machine->uses_anonymous_args);
20150
20151 if (cfun->machine->lr_save_eliminated)
20152 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20153
20154 if (crtl->calls_eh_return)
20155 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20156
20157 }
20158
20159 static void
20160 arm_output_function_epilogue (FILE *)
20161 {
20162 arm_stack_offsets *offsets;
20163
20164 if (TARGET_THUMB1)
20165 {
20166 int regno;
20167
20168 /* Emit any call-via-reg trampolines that are needed for v4t support
20169 of call_reg and call_value_reg type insns. */
20170 for (regno = 0; regno < LR_REGNUM; regno++)
20171 {
20172 rtx label = cfun->machine->call_via[regno];
20173
20174 if (label != NULL)
20175 {
20176 switch_to_section (function_section (current_function_decl));
20177 targetm.asm_out.internal_label (asm_out_file, "L",
20178 CODE_LABEL_NUMBER (label));
20179 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20180 }
20181 }
20182
20183 /* ??? Probably not safe to set this here, since it assumes that a
20184 function will be emitted as assembly immediately after we generate
20185 RTL for it. This does not happen for inline functions. */
20186 cfun->machine->return_used_this_function = 0;
20187 }
20188 else /* TARGET_32BIT */
20189 {
20190 /* We need to take into account any stack-frame rounding. */
20191 offsets = arm_get_frame_offsets ();
20192
20193 gcc_assert (!use_return_insn (FALSE, NULL)
20194 || (cfun->machine->return_used_this_function != 0)
20195 || offsets->saved_regs == offsets->outgoing_args
20196 || frame_pointer_needed);
20197 }
20198 }
20199
20200 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20201 STR and STRD. If an even number of registers are being pushed, one
20202 or more STRD patterns are created for each register pair. If an
20203 odd number of registers are pushed, emit an initial STR followed by
20204 as many STRD instructions as are needed. This works best when the
20205 stack is initially 64-bit aligned (the normal case), since it
20206 ensures that each STRD is also 64-bit aligned. */
20207 static void
20208 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20209 {
20210 int num_regs = 0;
20211 int i;
20212 int regno;
20213 rtx par = NULL_RTX;
20214 rtx dwarf = NULL_RTX;
20215 rtx tmp;
20216 bool first = true;
20217
20218 num_regs = bit_count (saved_regs_mask);
20219
20220 /* Must be at least one register to save, and can't save SP or PC. */
20221 gcc_assert (num_regs > 0 && num_regs <= 14);
20222 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20223 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20224
20225 /* Create sequence for DWARF info. All the frame-related data for
20226 debugging is held in this wrapper. */
20227 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20228
20229 /* Describe the stack adjustment. */
20230 tmp = gen_rtx_SET (stack_pointer_rtx,
20231 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20232 RTX_FRAME_RELATED_P (tmp) = 1;
20233 XVECEXP (dwarf, 0, 0) = tmp;
20234
20235 /* Find the first register. */
20236 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20237 ;
20238
20239 i = 0;
20240
20241 /* If there's an odd number of registers to push. Start off by
20242 pushing a single register. This ensures that subsequent strd
20243 operations are dword aligned (assuming that SP was originally
20244 64-bit aligned). */
20245 if ((num_regs & 1) != 0)
20246 {
20247 rtx reg, mem, insn;
20248
20249 reg = gen_rtx_REG (SImode, regno);
20250 if (num_regs == 1)
20251 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20252 stack_pointer_rtx));
20253 else
20254 mem = gen_frame_mem (Pmode,
20255 gen_rtx_PRE_MODIFY
20256 (Pmode, stack_pointer_rtx,
20257 plus_constant (Pmode, stack_pointer_rtx,
20258 -4 * num_regs)));
20259
20260 tmp = gen_rtx_SET (mem, reg);
20261 RTX_FRAME_RELATED_P (tmp) = 1;
20262 insn = emit_insn (tmp);
20263 RTX_FRAME_RELATED_P (insn) = 1;
20264 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20265 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20266 RTX_FRAME_RELATED_P (tmp) = 1;
20267 i++;
20268 regno++;
20269 XVECEXP (dwarf, 0, i) = tmp;
20270 first = false;
20271 }
20272
20273 while (i < num_regs)
20274 if (saved_regs_mask & (1 << regno))
20275 {
20276 rtx reg1, reg2, mem1, mem2;
20277 rtx tmp0, tmp1, tmp2;
20278 int regno2;
20279
20280 /* Find the register to pair with this one. */
20281 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20282 regno2++)
20283 ;
20284
20285 reg1 = gen_rtx_REG (SImode, regno);
20286 reg2 = gen_rtx_REG (SImode, regno2);
20287
20288 if (first)
20289 {
20290 rtx insn;
20291
20292 first = false;
20293 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20294 stack_pointer_rtx,
20295 -4 * num_regs));
20296 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20297 stack_pointer_rtx,
20298 -4 * (num_regs - 1)));
20299 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20300 plus_constant (Pmode, stack_pointer_rtx,
20301 -4 * (num_regs)));
20302 tmp1 = gen_rtx_SET (mem1, reg1);
20303 tmp2 = gen_rtx_SET (mem2, reg2);
20304 RTX_FRAME_RELATED_P (tmp0) = 1;
20305 RTX_FRAME_RELATED_P (tmp1) = 1;
20306 RTX_FRAME_RELATED_P (tmp2) = 1;
20307 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20308 XVECEXP (par, 0, 0) = tmp0;
20309 XVECEXP (par, 0, 1) = tmp1;
20310 XVECEXP (par, 0, 2) = tmp2;
20311 insn = emit_insn (par);
20312 RTX_FRAME_RELATED_P (insn) = 1;
20313 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20314 }
20315 else
20316 {
20317 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20318 stack_pointer_rtx,
20319 4 * i));
20320 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20321 stack_pointer_rtx,
20322 4 * (i + 1)));
20323 tmp1 = gen_rtx_SET (mem1, reg1);
20324 tmp2 = gen_rtx_SET (mem2, reg2);
20325 RTX_FRAME_RELATED_P (tmp1) = 1;
20326 RTX_FRAME_RELATED_P (tmp2) = 1;
20327 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20328 XVECEXP (par, 0, 0) = tmp1;
20329 XVECEXP (par, 0, 1) = tmp2;
20330 emit_insn (par);
20331 }
20332
20333 /* Create unwind information. This is an approximation. */
20334 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20335 plus_constant (Pmode,
20336 stack_pointer_rtx,
20337 4 * i)),
20338 reg1);
20339 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20340 plus_constant (Pmode,
20341 stack_pointer_rtx,
20342 4 * (i + 1))),
20343 reg2);
20344
20345 RTX_FRAME_RELATED_P (tmp1) = 1;
20346 RTX_FRAME_RELATED_P (tmp2) = 1;
20347 XVECEXP (dwarf, 0, i + 1) = tmp1;
20348 XVECEXP (dwarf, 0, i + 2) = tmp2;
20349 i += 2;
20350 regno = regno2 + 1;
20351 }
20352 else
20353 regno++;
20354
20355 return;
20356 }
20357
20358 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20359 whenever possible, otherwise it emits single-word stores. The first store
20360 also allocates stack space for all saved registers, using writeback with
20361 post-addressing mode. All other stores use offset addressing. If no STRD
20362 can be emitted, this function emits a sequence of single-word stores,
20363 and not an STM as before, because single-word stores provide more freedom
20364 scheduling and can be turned into an STM by peephole optimizations. */
20365 static void
20366 arm_emit_strd_push (unsigned long saved_regs_mask)
20367 {
20368 int num_regs = 0;
20369 int i, j, dwarf_index = 0;
20370 int offset = 0;
20371 rtx dwarf = NULL_RTX;
20372 rtx insn = NULL_RTX;
20373 rtx tmp, mem;
20374
20375 /* TODO: A more efficient code can be emitted by changing the
20376 layout, e.g., first push all pairs that can use STRD to keep the
20377 stack aligned, and then push all other registers. */
20378 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20379 if (saved_regs_mask & (1 << i))
20380 num_regs++;
20381
20382 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20383 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20384 gcc_assert (num_regs > 0);
20385
20386 /* Create sequence for DWARF info. */
20387 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20388
20389 /* For dwarf info, we generate explicit stack update. */
20390 tmp = gen_rtx_SET (stack_pointer_rtx,
20391 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20392 RTX_FRAME_RELATED_P (tmp) = 1;
20393 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20394
20395 /* Save registers. */
20396 offset = - 4 * num_regs;
20397 j = 0;
20398 while (j <= LAST_ARM_REGNUM)
20399 if (saved_regs_mask & (1 << j))
20400 {
20401 if ((j % 2 == 0)
20402 && (saved_regs_mask & (1 << (j + 1))))
20403 {
20404 /* Current register and previous register form register pair for
20405 which STRD can be generated. */
20406 if (offset < 0)
20407 {
20408 /* Allocate stack space for all saved registers. */
20409 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20410 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20411 mem = gen_frame_mem (DImode, tmp);
20412 offset = 0;
20413 }
20414 else if (offset > 0)
20415 mem = gen_frame_mem (DImode,
20416 plus_constant (Pmode,
20417 stack_pointer_rtx,
20418 offset));
20419 else
20420 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20421
20422 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20423 RTX_FRAME_RELATED_P (tmp) = 1;
20424 tmp = emit_insn (tmp);
20425
20426 /* Record the first store insn. */
20427 if (dwarf_index == 1)
20428 insn = tmp;
20429
20430 /* Generate dwarf info. */
20431 mem = gen_frame_mem (SImode,
20432 plus_constant (Pmode,
20433 stack_pointer_rtx,
20434 offset));
20435 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20436 RTX_FRAME_RELATED_P (tmp) = 1;
20437 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20438
20439 mem = gen_frame_mem (SImode,
20440 plus_constant (Pmode,
20441 stack_pointer_rtx,
20442 offset + 4));
20443 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20444 RTX_FRAME_RELATED_P (tmp) = 1;
20445 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20446
20447 offset += 8;
20448 j += 2;
20449 }
20450 else
20451 {
20452 /* Emit a single word store. */
20453 if (offset < 0)
20454 {
20455 /* Allocate stack space for all saved registers. */
20456 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20457 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20458 mem = gen_frame_mem (SImode, tmp);
20459 offset = 0;
20460 }
20461 else if (offset > 0)
20462 mem = gen_frame_mem (SImode,
20463 plus_constant (Pmode,
20464 stack_pointer_rtx,
20465 offset));
20466 else
20467 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20468
20469 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20470 RTX_FRAME_RELATED_P (tmp) = 1;
20471 tmp = emit_insn (tmp);
20472
20473 /* Record the first store insn. */
20474 if (dwarf_index == 1)
20475 insn = tmp;
20476
20477 /* Generate dwarf info. */
20478 mem = gen_frame_mem (SImode,
20479 plus_constant(Pmode,
20480 stack_pointer_rtx,
20481 offset));
20482 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20483 RTX_FRAME_RELATED_P (tmp) = 1;
20484 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20485
20486 offset += 4;
20487 j += 1;
20488 }
20489 }
20490 else
20491 j++;
20492
20493 /* Attach dwarf info to the first insn we generate. */
20494 gcc_assert (insn != NULL_RTX);
20495 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20496 RTX_FRAME_RELATED_P (insn) = 1;
20497 }
20498
20499 /* Generate and emit an insn that we will recognize as a push_multi.
20500 Unfortunately, since this insn does not reflect very well the actual
20501 semantics of the operation, we need to annotate the insn for the benefit
20502 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20503 MASK for registers that should be annotated for DWARF2 frame unwind
20504 information. */
20505 static rtx
20506 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20507 {
20508 int num_regs = 0;
20509 int num_dwarf_regs = 0;
20510 int i, j;
20511 rtx par;
20512 rtx dwarf;
20513 int dwarf_par_index;
20514 rtx tmp, reg;
20515
20516 /* We don't record the PC in the dwarf frame information. */
20517 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20518
20519 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20520 {
20521 if (mask & (1 << i))
20522 num_regs++;
20523 if (dwarf_regs_mask & (1 << i))
20524 num_dwarf_regs++;
20525 }
20526
20527 gcc_assert (num_regs && num_regs <= 16);
20528 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20529
20530 /* For the body of the insn we are going to generate an UNSPEC in
20531 parallel with several USEs. This allows the insn to be recognized
20532 by the push_multi pattern in the arm.md file.
20533
20534 The body of the insn looks something like this:
20535
20536 (parallel [
20537 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20538 (const_int:SI <num>)))
20539 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20540 (use (reg:SI XX))
20541 (use (reg:SI YY))
20542 ...
20543 ])
20544
20545 For the frame note however, we try to be more explicit and actually
20546 show each register being stored into the stack frame, plus a (single)
20547 decrement of the stack pointer. We do it this way in order to be
20548 friendly to the stack unwinding code, which only wants to see a single
20549 stack decrement per instruction. The RTL we generate for the note looks
20550 something like this:
20551
20552 (sequence [
20553 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20554 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20555 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20556 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20557 ...
20558 ])
20559
20560 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20561 instead we'd have a parallel expression detailing all
20562 the stores to the various memory addresses so that debug
20563 information is more up-to-date. Remember however while writing
20564 this to take care of the constraints with the push instruction.
20565
20566 Note also that this has to be taken care of for the VFP registers.
20567
20568 For more see PR43399. */
20569
20570 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20571 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20572 dwarf_par_index = 1;
20573
20574 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20575 {
20576 if (mask & (1 << i))
20577 {
20578 reg = gen_rtx_REG (SImode, i);
20579
20580 XVECEXP (par, 0, 0)
20581 = gen_rtx_SET (gen_frame_mem
20582 (BLKmode,
20583 gen_rtx_PRE_MODIFY (Pmode,
20584 stack_pointer_rtx,
20585 plus_constant
20586 (Pmode, stack_pointer_rtx,
20587 -4 * num_regs))
20588 ),
20589 gen_rtx_UNSPEC (BLKmode,
20590 gen_rtvec (1, reg),
20591 UNSPEC_PUSH_MULT));
20592
20593 if (dwarf_regs_mask & (1 << i))
20594 {
20595 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20596 reg);
20597 RTX_FRAME_RELATED_P (tmp) = 1;
20598 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20599 }
20600
20601 break;
20602 }
20603 }
20604
20605 for (j = 1, i++; j < num_regs; i++)
20606 {
20607 if (mask & (1 << i))
20608 {
20609 reg = gen_rtx_REG (SImode, i);
20610
20611 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20612
20613 if (dwarf_regs_mask & (1 << i))
20614 {
20615 tmp
20616 = gen_rtx_SET (gen_frame_mem
20617 (SImode,
20618 plus_constant (Pmode, stack_pointer_rtx,
20619 4 * j)),
20620 reg);
20621 RTX_FRAME_RELATED_P (tmp) = 1;
20622 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20623 }
20624
20625 j++;
20626 }
20627 }
20628
20629 par = emit_insn (par);
20630
20631 tmp = gen_rtx_SET (stack_pointer_rtx,
20632 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20633 RTX_FRAME_RELATED_P (tmp) = 1;
20634 XVECEXP (dwarf, 0, 0) = tmp;
20635
20636 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20637
20638 return par;
20639 }
20640
20641 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20642 SIZE is the offset to be adjusted.
20643 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20644 static void
20645 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20646 {
20647 rtx dwarf;
20648
20649 RTX_FRAME_RELATED_P (insn) = 1;
20650 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20651 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20652 }
20653
20654 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20655 SAVED_REGS_MASK shows which registers need to be restored.
20656
20657 Unfortunately, since this insn does not reflect very well the actual
20658 semantics of the operation, we need to annotate the insn for the benefit
20659 of DWARF2 frame unwind information. */
20660 static void
20661 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20662 {
20663 int num_regs = 0;
20664 int i, j;
20665 rtx par;
20666 rtx dwarf = NULL_RTX;
20667 rtx tmp, reg;
20668 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20669 int offset_adj;
20670 int emit_update;
20671
20672 offset_adj = return_in_pc ? 1 : 0;
20673 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20674 if (saved_regs_mask & (1 << i))
20675 num_regs++;
20676
20677 gcc_assert (num_regs && num_regs <= 16);
20678
20679 /* If SP is in reglist, then we don't emit SP update insn. */
20680 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20681
20682 /* The parallel needs to hold num_regs SETs
20683 and one SET for the stack update. */
20684 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20685
20686 if (return_in_pc)
20687 XVECEXP (par, 0, 0) = ret_rtx;
20688
20689 if (emit_update)
20690 {
20691 /* Increment the stack pointer, based on there being
20692 num_regs 4-byte registers to restore. */
20693 tmp = gen_rtx_SET (stack_pointer_rtx,
20694 plus_constant (Pmode,
20695 stack_pointer_rtx,
20696 4 * num_regs));
20697 RTX_FRAME_RELATED_P (tmp) = 1;
20698 XVECEXP (par, 0, offset_adj) = tmp;
20699 }
20700
20701 /* Now restore every reg, which may include PC. */
20702 for (j = 0, i = 0; j < num_regs; i++)
20703 if (saved_regs_mask & (1 << i))
20704 {
20705 reg = gen_rtx_REG (SImode, i);
20706 if ((num_regs == 1) && emit_update && !return_in_pc)
20707 {
20708 /* Emit single load with writeback. */
20709 tmp = gen_frame_mem (SImode,
20710 gen_rtx_POST_INC (Pmode,
20711 stack_pointer_rtx));
20712 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20713 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20714 return;
20715 }
20716
20717 tmp = gen_rtx_SET (reg,
20718 gen_frame_mem
20719 (SImode,
20720 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20721 RTX_FRAME_RELATED_P (tmp) = 1;
20722 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20723
20724 /* We need to maintain a sequence for DWARF info too. As dwarf info
20725 should not have PC, skip PC. */
20726 if (i != PC_REGNUM)
20727 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20728
20729 j++;
20730 }
20731
20732 if (return_in_pc)
20733 par = emit_jump_insn (par);
20734 else
20735 par = emit_insn (par);
20736
20737 REG_NOTES (par) = dwarf;
20738 if (!return_in_pc)
20739 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20740 stack_pointer_rtx, stack_pointer_rtx);
20741 }
20742
20743 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20744 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20745
20746 Unfortunately, since this insn does not reflect very well the actual
20747 semantics of the operation, we need to annotate the insn for the benefit
20748 of DWARF2 frame unwind information. */
20749 static void
20750 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20751 {
20752 int i, j;
20753 rtx par;
20754 rtx dwarf = NULL_RTX;
20755 rtx tmp, reg;
20756
20757 gcc_assert (num_regs && num_regs <= 32);
20758
20759 /* Workaround ARM10 VFPr1 bug. */
20760 if (num_regs == 2 && !arm_arch6)
20761 {
20762 if (first_reg == 15)
20763 first_reg--;
20764
20765 num_regs++;
20766 }
20767
20768 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20769 there could be up to 32 D-registers to restore.
20770 If there are more than 16 D-registers, make two recursive calls,
20771 each of which emits one pop_multi instruction. */
20772 if (num_regs > 16)
20773 {
20774 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20775 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20776 return;
20777 }
20778
20779 /* The parallel needs to hold num_regs SETs
20780 and one SET for the stack update. */
20781 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20782
20783 /* Increment the stack pointer, based on there being
20784 num_regs 8-byte registers to restore. */
20785 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20786 RTX_FRAME_RELATED_P (tmp) = 1;
20787 XVECEXP (par, 0, 0) = tmp;
20788
20789 /* Now show every reg that will be restored, using a SET for each. */
20790 for (j = 0, i=first_reg; j < num_regs; i += 2)
20791 {
20792 reg = gen_rtx_REG (DFmode, i);
20793
20794 tmp = gen_rtx_SET (reg,
20795 gen_frame_mem
20796 (DFmode,
20797 plus_constant (Pmode, base_reg, 8 * j)));
20798 RTX_FRAME_RELATED_P (tmp) = 1;
20799 XVECEXP (par, 0, j + 1) = tmp;
20800
20801 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20802
20803 j++;
20804 }
20805
20806 par = emit_insn (par);
20807 REG_NOTES (par) = dwarf;
20808
20809 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20810 if (REGNO (base_reg) == IP_REGNUM)
20811 {
20812 RTX_FRAME_RELATED_P (par) = 1;
20813 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20814 }
20815 else
20816 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20817 base_reg, base_reg);
20818 }
20819
20820 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20821 number of registers are being popped, multiple LDRD patterns are created for
20822 all register pairs. If odd number of registers are popped, last register is
20823 loaded by using LDR pattern. */
20824 static void
20825 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20826 {
20827 int num_regs = 0;
20828 int i, j;
20829 rtx par = NULL_RTX;
20830 rtx dwarf = NULL_RTX;
20831 rtx tmp, reg, tmp1;
20832 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20833
20834 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20835 if (saved_regs_mask & (1 << i))
20836 num_regs++;
20837
20838 gcc_assert (num_regs && num_regs <= 16);
20839
20840 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20841 to be popped. So, if num_regs is even, now it will become odd,
20842 and we can generate pop with PC. If num_regs is odd, it will be
20843 even now, and ldr with return can be generated for PC. */
20844 if (return_in_pc)
20845 num_regs--;
20846
20847 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20848
20849 /* Var j iterates over all the registers to gather all the registers in
20850 saved_regs_mask. Var i gives index of saved registers in stack frame.
20851 A PARALLEL RTX of register-pair is created here, so that pattern for
20852 LDRD can be matched. As PC is always last register to be popped, and
20853 we have already decremented num_regs if PC, we don't have to worry
20854 about PC in this loop. */
20855 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20856 if (saved_regs_mask & (1 << j))
20857 {
20858 /* Create RTX for memory load. */
20859 reg = gen_rtx_REG (SImode, j);
20860 tmp = gen_rtx_SET (reg,
20861 gen_frame_mem (SImode,
20862 plus_constant (Pmode,
20863 stack_pointer_rtx, 4 * i)));
20864 RTX_FRAME_RELATED_P (tmp) = 1;
20865
20866 if (i % 2 == 0)
20867 {
20868 /* When saved-register index (i) is even, the RTX to be emitted is
20869 yet to be created. Hence create it first. The LDRD pattern we
20870 are generating is :
20871 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20872 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20873 where target registers need not be consecutive. */
20874 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20875 dwarf = NULL_RTX;
20876 }
20877
20878 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20879 added as 0th element and if i is odd, reg_i is added as 1st element
20880 of LDRD pattern shown above. */
20881 XVECEXP (par, 0, (i % 2)) = tmp;
20882 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20883
20884 if ((i % 2) == 1)
20885 {
20886 /* When saved-register index (i) is odd, RTXs for both the registers
20887 to be loaded are generated in above given LDRD pattern, and the
20888 pattern can be emitted now. */
20889 par = emit_insn (par);
20890 REG_NOTES (par) = dwarf;
20891 RTX_FRAME_RELATED_P (par) = 1;
20892 }
20893
20894 i++;
20895 }
20896
20897 /* If the number of registers pushed is odd AND return_in_pc is false OR
20898 number of registers are even AND return_in_pc is true, last register is
20899 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20900 then LDR with post increment. */
20901
20902 /* Increment the stack pointer, based on there being
20903 num_regs 4-byte registers to restore. */
20904 tmp = gen_rtx_SET (stack_pointer_rtx,
20905 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20906 RTX_FRAME_RELATED_P (tmp) = 1;
20907 tmp = emit_insn (tmp);
20908 if (!return_in_pc)
20909 {
20910 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20911 stack_pointer_rtx, stack_pointer_rtx);
20912 }
20913
20914 dwarf = NULL_RTX;
20915
20916 if (((num_regs % 2) == 1 && !return_in_pc)
20917 || ((num_regs % 2) == 0 && return_in_pc))
20918 {
20919 /* Scan for the single register to be popped. Skip until the saved
20920 register is found. */
20921 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20922
20923 /* Gen LDR with post increment here. */
20924 tmp1 = gen_rtx_MEM (SImode,
20925 gen_rtx_POST_INC (SImode,
20926 stack_pointer_rtx));
20927 set_mem_alias_set (tmp1, get_frame_alias_set ());
20928
20929 reg = gen_rtx_REG (SImode, j);
20930 tmp = gen_rtx_SET (reg, tmp1);
20931 RTX_FRAME_RELATED_P (tmp) = 1;
20932 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20933
20934 if (return_in_pc)
20935 {
20936 /* If return_in_pc, j must be PC_REGNUM. */
20937 gcc_assert (j == PC_REGNUM);
20938 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20939 XVECEXP (par, 0, 0) = ret_rtx;
20940 XVECEXP (par, 0, 1) = tmp;
20941 par = emit_jump_insn (par);
20942 }
20943 else
20944 {
20945 par = emit_insn (tmp);
20946 REG_NOTES (par) = dwarf;
20947 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20948 stack_pointer_rtx, stack_pointer_rtx);
20949 }
20950
20951 }
20952 else if ((num_regs % 2) == 1 && return_in_pc)
20953 {
20954 /* There are 2 registers to be popped. So, generate the pattern
20955 pop_multiple_with_stack_update_and_return to pop in PC. */
20956 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20957 }
20958
20959 return;
20960 }
20961
20962 /* LDRD in ARM mode needs consecutive registers as operands. This function
20963 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20964 offset addressing and then generates one separate stack udpate. This provides
20965 more scheduling freedom, compared to writeback on every load. However,
20966 if the function returns using load into PC directly
20967 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20968 before the last load. TODO: Add a peephole optimization to recognize
20969 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20970 peephole optimization to merge the load at stack-offset zero
20971 with the stack update instruction using load with writeback
20972 in post-index addressing mode. */
20973 static void
20974 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20975 {
20976 int j = 0;
20977 int offset = 0;
20978 rtx par = NULL_RTX;
20979 rtx dwarf = NULL_RTX;
20980 rtx tmp, mem;
20981
20982 /* Restore saved registers. */
20983 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20984 j = 0;
20985 while (j <= LAST_ARM_REGNUM)
20986 if (saved_regs_mask & (1 << j))
20987 {
20988 if ((j % 2) == 0
20989 && (saved_regs_mask & (1 << (j + 1)))
20990 && (j + 1) != PC_REGNUM)
20991 {
20992 /* Current register and next register form register pair for which
20993 LDRD can be generated. PC is always the last register popped, and
20994 we handle it separately. */
20995 if (offset > 0)
20996 mem = gen_frame_mem (DImode,
20997 plus_constant (Pmode,
20998 stack_pointer_rtx,
20999 offset));
21000 else
21001 mem = gen_frame_mem (DImode, stack_pointer_rtx);
21002
21003 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
21004 tmp = emit_insn (tmp);
21005 RTX_FRAME_RELATED_P (tmp) = 1;
21006
21007 /* Generate dwarf info. */
21008
21009 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21010 gen_rtx_REG (SImode, j),
21011 NULL_RTX);
21012 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21013 gen_rtx_REG (SImode, j + 1),
21014 dwarf);
21015
21016 REG_NOTES (tmp) = dwarf;
21017
21018 offset += 8;
21019 j += 2;
21020 }
21021 else if (j != PC_REGNUM)
21022 {
21023 /* Emit a single word load. */
21024 if (offset > 0)
21025 mem = gen_frame_mem (SImode,
21026 plus_constant (Pmode,
21027 stack_pointer_rtx,
21028 offset));
21029 else
21030 mem = gen_frame_mem (SImode, stack_pointer_rtx);
21031
21032 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
21033 tmp = emit_insn (tmp);
21034 RTX_FRAME_RELATED_P (tmp) = 1;
21035
21036 /* Generate dwarf info. */
21037 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
21038 gen_rtx_REG (SImode, j),
21039 NULL_RTX);
21040
21041 offset += 4;
21042 j += 1;
21043 }
21044 else /* j == PC_REGNUM */
21045 j++;
21046 }
21047 else
21048 j++;
21049
21050 /* Update the stack. */
21051 if (offset > 0)
21052 {
21053 tmp = gen_rtx_SET (stack_pointer_rtx,
21054 plus_constant (Pmode,
21055 stack_pointer_rtx,
21056 offset));
21057 tmp = emit_insn (tmp);
21058 arm_add_cfa_adjust_cfa_note (tmp, offset,
21059 stack_pointer_rtx, stack_pointer_rtx);
21060 offset = 0;
21061 }
21062
21063 if (saved_regs_mask & (1 << PC_REGNUM))
21064 {
21065 /* Only PC is to be popped. */
21066 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
21067 XVECEXP (par, 0, 0) = ret_rtx;
21068 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
21069 gen_frame_mem (SImode,
21070 gen_rtx_POST_INC (SImode,
21071 stack_pointer_rtx)));
21072 RTX_FRAME_RELATED_P (tmp) = 1;
21073 XVECEXP (par, 0, 1) = tmp;
21074 par = emit_jump_insn (par);
21075
21076 /* Generate dwarf info. */
21077 dwarf = alloc_reg_note (REG_CFA_RESTORE,
21078 gen_rtx_REG (SImode, PC_REGNUM),
21079 NULL_RTX);
21080 REG_NOTES (par) = dwarf;
21081 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
21082 stack_pointer_rtx, stack_pointer_rtx);
21083 }
21084 }
21085
21086 /* Calculate the size of the return value that is passed in registers. */
21087 static unsigned
21088 arm_size_return_regs (void)
21089 {
21090 machine_mode mode;
21091
21092 if (crtl->return_rtx != 0)
21093 mode = GET_MODE (crtl->return_rtx);
21094 else
21095 mode = DECL_MODE (DECL_RESULT (current_function_decl));
21096
21097 return GET_MODE_SIZE (mode);
21098 }
21099
21100 /* Return true if the current function needs to save/restore LR. */
21101 static bool
21102 thumb_force_lr_save (void)
21103 {
21104 return !cfun->machine->lr_save_eliminated
21105 && (!crtl->is_leaf
21106 || thumb_far_jump_used_p ()
21107 || df_regs_ever_live_p (LR_REGNUM));
21108 }
21109
21110 /* We do not know if r3 will be available because
21111 we do have an indirect tailcall happening in this
21112 particular case. */
21113 static bool
21114 is_indirect_tailcall_p (rtx call)
21115 {
21116 rtx pat = PATTERN (call);
21117
21118 /* Indirect tail call. */
21119 pat = XVECEXP (pat, 0, 0);
21120 if (GET_CODE (pat) == SET)
21121 pat = SET_SRC (pat);
21122
21123 pat = XEXP (XEXP (pat, 0), 0);
21124 return REG_P (pat);
21125 }
21126
21127 /* Return true if r3 is used by any of the tail call insns in the
21128 current function. */
21129 static bool
21130 any_sibcall_could_use_r3 (void)
21131 {
21132 edge_iterator ei;
21133 edge e;
21134
21135 if (!crtl->tail_call_emit)
21136 return false;
21137 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21138 if (e->flags & EDGE_SIBCALL)
21139 {
21140 rtx_insn *call = BB_END (e->src);
21141 if (!CALL_P (call))
21142 call = prev_nonnote_nondebug_insn (call);
21143 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21144 if (find_regno_fusage (call, USE, 3)
21145 || is_indirect_tailcall_p (call))
21146 return true;
21147 }
21148 return false;
21149 }
21150
21151
21152 /* Compute the distance from register FROM to register TO.
21153 These can be the arg pointer (26), the soft frame pointer (25),
21154 the stack pointer (13) or the hard frame pointer (11).
21155 In thumb mode r7 is used as the soft frame pointer, if needed.
21156 Typical stack layout looks like this:
21157
21158 old stack pointer -> | |
21159 ----
21160 | | \
21161 | | saved arguments for
21162 | | vararg functions
21163 | | /
21164 --
21165 hard FP & arg pointer -> | | \
21166 | | stack
21167 | | frame
21168 | | /
21169 --
21170 | | \
21171 | | call saved
21172 | | registers
21173 soft frame pointer -> | | /
21174 --
21175 | | \
21176 | | local
21177 | | variables
21178 locals base pointer -> | | /
21179 --
21180 | | \
21181 | | outgoing
21182 | | arguments
21183 current stack pointer -> | | /
21184 --
21185
21186 For a given function some or all of these stack components
21187 may not be needed, giving rise to the possibility of
21188 eliminating some of the registers.
21189
21190 The values returned by this function must reflect the behavior
21191 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21192
21193 The sign of the number returned reflects the direction of stack
21194 growth, so the values are positive for all eliminations except
21195 from the soft frame pointer to the hard frame pointer.
21196
21197 SFP may point just inside the local variables block to ensure correct
21198 alignment. */
21199
21200
21201 /* Return cached stack offsets. */
21202
21203 static arm_stack_offsets *
21204 arm_get_frame_offsets (void)
21205 {
21206 struct arm_stack_offsets *offsets;
21207
21208 offsets = &cfun->machine->stack_offsets;
21209
21210 return offsets;
21211 }
21212
21213
21214 /* Calculate stack offsets. These are used to calculate register elimination
21215 offsets and in prologue/epilogue code. Also calculates which registers
21216 should be saved. */
21217
21218 static void
21219 arm_compute_frame_layout (void)
21220 {
21221 struct arm_stack_offsets *offsets;
21222 unsigned long func_type;
21223 int saved;
21224 int core_saved;
21225 HOST_WIDE_INT frame_size;
21226 int i;
21227
21228 offsets = &cfun->machine->stack_offsets;
21229
21230 /* Initially this is the size of the local variables. It will translated
21231 into an offset once we have determined the size of preceding data. */
21232 frame_size = ROUND_UP_WORD (get_frame_size ());
21233
21234 /* Space for variadic functions. */
21235 offsets->saved_args = crtl->args.pretend_args_size;
21236
21237 /* In Thumb mode this is incorrect, but never used. */
21238 offsets->frame
21239 = (offsets->saved_args
21240 + arm_compute_static_chain_stack_bytes ()
21241 + (frame_pointer_needed ? 4 : 0));
21242
21243 if (TARGET_32BIT)
21244 {
21245 unsigned int regno;
21246
21247 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21248 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21249 saved = core_saved;
21250
21251 /* We know that SP will be doubleword aligned on entry, and we must
21252 preserve that condition at any subroutine call. We also require the
21253 soft frame pointer to be doubleword aligned. */
21254
21255 if (TARGET_REALLY_IWMMXT)
21256 {
21257 /* Check for the call-saved iWMMXt registers. */
21258 for (regno = FIRST_IWMMXT_REGNUM;
21259 regno <= LAST_IWMMXT_REGNUM;
21260 regno++)
21261 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21262 saved += 8;
21263 }
21264
21265 func_type = arm_current_func_type ();
21266 /* Space for saved VFP registers. */
21267 if (! IS_VOLATILE (func_type)
21268 && TARGET_HARD_FLOAT)
21269 saved += arm_get_vfp_saved_size ();
21270 }
21271 else /* TARGET_THUMB1 */
21272 {
21273 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21274 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21275 saved = core_saved;
21276 if (TARGET_BACKTRACE)
21277 saved += 16;
21278 }
21279
21280 /* Saved registers include the stack frame. */
21281 offsets->saved_regs
21282 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21283 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21284
21285 /* A leaf function does not need any stack alignment if it has nothing
21286 on the stack. */
21287 if (crtl->is_leaf && frame_size == 0
21288 /* However if it calls alloca(), we have a dynamically allocated
21289 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21290 && ! cfun->calls_alloca)
21291 {
21292 offsets->outgoing_args = offsets->soft_frame;
21293 offsets->locals_base = offsets->soft_frame;
21294 return;
21295 }
21296
21297 /* Ensure SFP has the correct alignment. */
21298 if (ARM_DOUBLEWORD_ALIGN
21299 && (offsets->soft_frame & 7))
21300 {
21301 offsets->soft_frame += 4;
21302 /* Try to align stack by pushing an extra reg. Don't bother doing this
21303 when there is a stack frame as the alignment will be rolled into
21304 the normal stack adjustment. */
21305 if (frame_size + crtl->outgoing_args_size == 0)
21306 {
21307 int reg = -1;
21308
21309 /* Register r3 is caller-saved. Normally it does not need to be
21310 saved on entry by the prologue. However if we choose to save
21311 it for padding then we may confuse the compiler into thinking
21312 a prologue sequence is required when in fact it is not. This
21313 will occur when shrink-wrapping if r3 is used as a scratch
21314 register and there are no other callee-saved writes.
21315
21316 This situation can be avoided when other callee-saved registers
21317 are available and r3 is not mandatory if we choose a callee-saved
21318 register for padding. */
21319 bool prefer_callee_reg_p = false;
21320
21321 /* If it is safe to use r3, then do so. This sometimes
21322 generates better code on Thumb-2 by avoiding the need to
21323 use 32-bit push/pop instructions. */
21324 if (! any_sibcall_could_use_r3 ()
21325 && arm_size_return_regs () <= 12
21326 && (offsets->saved_regs_mask & (1 << 3)) == 0
21327 && (TARGET_THUMB2
21328 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21329 {
21330 reg = 3;
21331 if (!TARGET_THUMB2)
21332 prefer_callee_reg_p = true;
21333 }
21334 if (reg == -1
21335 || prefer_callee_reg_p)
21336 {
21337 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21338 {
21339 /* Avoid fixed registers; they may be changed at
21340 arbitrary times so it's unsafe to restore them
21341 during the epilogue. */
21342 if (!fixed_regs[i]
21343 && (offsets->saved_regs_mask & (1 << i)) == 0)
21344 {
21345 reg = i;
21346 break;
21347 }
21348 }
21349 }
21350
21351 if (reg != -1)
21352 {
21353 offsets->saved_regs += 4;
21354 offsets->saved_regs_mask |= (1 << reg);
21355 }
21356 }
21357 }
21358
21359 offsets->locals_base = offsets->soft_frame + frame_size;
21360 offsets->outgoing_args = (offsets->locals_base
21361 + crtl->outgoing_args_size);
21362
21363 if (ARM_DOUBLEWORD_ALIGN)
21364 {
21365 /* Ensure SP remains doubleword aligned. */
21366 if (offsets->outgoing_args & 7)
21367 offsets->outgoing_args += 4;
21368 gcc_assert (!(offsets->outgoing_args & 7));
21369 }
21370 }
21371
21372
21373 /* Calculate the relative offsets for the different stack pointers. Positive
21374 offsets are in the direction of stack growth. */
21375
21376 HOST_WIDE_INT
21377 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21378 {
21379 arm_stack_offsets *offsets;
21380
21381 offsets = arm_get_frame_offsets ();
21382
21383 /* OK, now we have enough information to compute the distances.
21384 There must be an entry in these switch tables for each pair
21385 of registers in ELIMINABLE_REGS, even if some of the entries
21386 seem to be redundant or useless. */
21387 switch (from)
21388 {
21389 case ARG_POINTER_REGNUM:
21390 switch (to)
21391 {
21392 case THUMB_HARD_FRAME_POINTER_REGNUM:
21393 return 0;
21394
21395 case FRAME_POINTER_REGNUM:
21396 /* This is the reverse of the soft frame pointer
21397 to hard frame pointer elimination below. */
21398 return offsets->soft_frame - offsets->saved_args;
21399
21400 case ARM_HARD_FRAME_POINTER_REGNUM:
21401 /* This is only non-zero in the case where the static chain register
21402 is stored above the frame. */
21403 return offsets->frame - offsets->saved_args - 4;
21404
21405 case STACK_POINTER_REGNUM:
21406 /* If nothing has been pushed on the stack at all
21407 then this will return -4. This *is* correct! */
21408 return offsets->outgoing_args - (offsets->saved_args + 4);
21409
21410 default:
21411 gcc_unreachable ();
21412 }
21413 gcc_unreachable ();
21414
21415 case FRAME_POINTER_REGNUM:
21416 switch (to)
21417 {
21418 case THUMB_HARD_FRAME_POINTER_REGNUM:
21419 return 0;
21420
21421 case ARM_HARD_FRAME_POINTER_REGNUM:
21422 /* The hard frame pointer points to the top entry in the
21423 stack frame. The soft frame pointer to the bottom entry
21424 in the stack frame. If there is no stack frame at all,
21425 then they are identical. */
21426
21427 return offsets->frame - offsets->soft_frame;
21428
21429 case STACK_POINTER_REGNUM:
21430 return offsets->outgoing_args - offsets->soft_frame;
21431
21432 default:
21433 gcc_unreachable ();
21434 }
21435 gcc_unreachable ();
21436
21437 default:
21438 /* You cannot eliminate from the stack pointer.
21439 In theory you could eliminate from the hard frame
21440 pointer to the stack pointer, but this will never
21441 happen, since if a stack frame is not needed the
21442 hard frame pointer will never be used. */
21443 gcc_unreachable ();
21444 }
21445 }
21446
21447 /* Given FROM and TO register numbers, say whether this elimination is
21448 allowed. Frame pointer elimination is automatically handled.
21449
21450 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21451 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21452 pointer, we must eliminate FRAME_POINTER_REGNUM into
21453 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21454 ARG_POINTER_REGNUM. */
21455
21456 bool
21457 arm_can_eliminate (const int from, const int to)
21458 {
21459 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21460 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21461 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21462 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21463 true);
21464 }
21465
21466 /* Emit RTL to save coprocessor registers on function entry. Returns the
21467 number of bytes pushed. */
21468
21469 static int
21470 arm_save_coproc_regs(void)
21471 {
21472 int saved_size = 0;
21473 unsigned reg;
21474 unsigned start_reg;
21475 rtx insn;
21476
21477 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21478 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21479 {
21480 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21481 insn = gen_rtx_MEM (V2SImode, insn);
21482 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21483 RTX_FRAME_RELATED_P (insn) = 1;
21484 saved_size += 8;
21485 }
21486
21487 if (TARGET_HARD_FLOAT)
21488 {
21489 start_reg = FIRST_VFP_REGNUM;
21490
21491 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21492 {
21493 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21494 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21495 {
21496 if (start_reg != reg)
21497 saved_size += vfp_emit_fstmd (start_reg,
21498 (reg - start_reg) / 2);
21499 start_reg = reg + 2;
21500 }
21501 }
21502 if (start_reg != reg)
21503 saved_size += vfp_emit_fstmd (start_reg,
21504 (reg - start_reg) / 2);
21505 }
21506 return saved_size;
21507 }
21508
21509
21510 /* Set the Thumb frame pointer from the stack pointer. */
21511
21512 static void
21513 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21514 {
21515 HOST_WIDE_INT amount;
21516 rtx insn, dwarf;
21517
21518 amount = offsets->outgoing_args - offsets->locals_base;
21519 if (amount < 1024)
21520 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21521 stack_pointer_rtx, GEN_INT (amount)));
21522 else
21523 {
21524 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21525 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21526 expects the first two operands to be the same. */
21527 if (TARGET_THUMB2)
21528 {
21529 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21530 stack_pointer_rtx,
21531 hard_frame_pointer_rtx));
21532 }
21533 else
21534 {
21535 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21536 hard_frame_pointer_rtx,
21537 stack_pointer_rtx));
21538 }
21539 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21540 plus_constant (Pmode, stack_pointer_rtx, amount));
21541 RTX_FRAME_RELATED_P (dwarf) = 1;
21542 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21543 }
21544
21545 RTX_FRAME_RELATED_P (insn) = 1;
21546 }
21547
21548 struct scratch_reg {
21549 rtx reg;
21550 bool saved;
21551 };
21552
21553 /* Return a short-lived scratch register for use as a 2nd scratch register on
21554 function entry after the registers are saved in the prologue. This register
21555 must be released by means of release_scratch_register_on_entry. IP is not
21556 considered since it is always used as the 1st scratch register if available.
21557
21558 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21559 mask of live registers. */
21560
21561 static void
21562 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21563 unsigned long live_regs)
21564 {
21565 int regno = -1;
21566
21567 sr->saved = false;
21568
21569 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21570 regno = LR_REGNUM;
21571 else
21572 {
21573 unsigned int i;
21574
21575 for (i = 4; i < 11; i++)
21576 if (regno1 != i && (live_regs & (1 << i)) != 0)
21577 {
21578 regno = i;
21579 break;
21580 }
21581
21582 if (regno < 0)
21583 {
21584 /* If IP is used as the 1st scratch register for a nested function,
21585 then either r3 wasn't available or is used to preserve IP. */
21586 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21587 regno1 = 3;
21588 regno = (regno1 == 3 ? 2 : 3);
21589 sr->saved
21590 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21591 regno);
21592 }
21593 }
21594
21595 sr->reg = gen_rtx_REG (SImode, regno);
21596 if (sr->saved)
21597 {
21598 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21599 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21600 rtx x = gen_rtx_SET (stack_pointer_rtx,
21601 plus_constant (Pmode, stack_pointer_rtx, -4));
21602 RTX_FRAME_RELATED_P (insn) = 1;
21603 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21604 }
21605 }
21606
21607 /* Release a scratch register obtained from the preceding function. */
21608
21609 static void
21610 release_scratch_register_on_entry (struct scratch_reg *sr)
21611 {
21612 if (sr->saved)
21613 {
21614 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21615 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21616 rtx x = gen_rtx_SET (stack_pointer_rtx,
21617 plus_constant (Pmode, stack_pointer_rtx, 4));
21618 RTX_FRAME_RELATED_P (insn) = 1;
21619 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21620 }
21621 }
21622
21623 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21624
21625 #if PROBE_INTERVAL > 4096
21626 #error Cannot use indexed addressing mode for stack probing
21627 #endif
21628
21629 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21630 inclusive. These are offsets from the current stack pointer. REGNO1
21631 is the index number of the 1st scratch register and LIVE_REGS is the
21632 mask of live registers. */
21633
21634 static void
21635 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21636 unsigned int regno1, unsigned long live_regs)
21637 {
21638 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21639
21640 /* See if we have a constant small number of probes to generate. If so,
21641 that's the easy case. */
21642 if (size <= PROBE_INTERVAL)
21643 {
21644 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21645 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21646 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21647 }
21648
21649 /* The run-time loop is made up of 10 insns in the generic case while the
21650 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21651 else if (size <= 5 * PROBE_INTERVAL)
21652 {
21653 HOST_WIDE_INT i, rem;
21654
21655 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21656 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21657 emit_stack_probe (reg1);
21658
21659 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21660 it exceeds SIZE. If only two probes are needed, this will not
21661 generate any code. Then probe at FIRST + SIZE. */
21662 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21663 {
21664 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21665 emit_stack_probe (reg1);
21666 }
21667
21668 rem = size - (i - PROBE_INTERVAL);
21669 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21670 {
21671 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21672 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21673 }
21674 else
21675 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21676 }
21677
21678 /* Otherwise, do the same as above, but in a loop. Note that we must be
21679 extra careful with variables wrapping around because we might be at
21680 the very top (or the very bottom) of the address space and we have
21681 to be able to handle this case properly; in particular, we use an
21682 equality test for the loop condition. */
21683 else
21684 {
21685 HOST_WIDE_INT rounded_size;
21686 struct scratch_reg sr;
21687
21688 get_scratch_register_on_entry (&sr, regno1, live_regs);
21689
21690 emit_move_insn (reg1, GEN_INT (first));
21691
21692
21693 /* Step 1: round SIZE to the previous multiple of the interval. */
21694
21695 rounded_size = size & -PROBE_INTERVAL;
21696 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21697
21698
21699 /* Step 2: compute initial and final value of the loop counter. */
21700
21701 /* TEST_ADDR = SP + FIRST. */
21702 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21703
21704 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21705 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21706
21707
21708 /* Step 3: the loop
21709
21710 do
21711 {
21712 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21713 probe at TEST_ADDR
21714 }
21715 while (TEST_ADDR != LAST_ADDR)
21716
21717 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21718 until it is equal to ROUNDED_SIZE. */
21719
21720 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21721
21722
21723 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21724 that SIZE is equal to ROUNDED_SIZE. */
21725
21726 if (size != rounded_size)
21727 {
21728 HOST_WIDE_INT rem = size - rounded_size;
21729
21730 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21731 {
21732 emit_set_insn (sr.reg,
21733 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21734 emit_stack_probe (plus_constant (Pmode, sr.reg,
21735 PROBE_INTERVAL - rem));
21736 }
21737 else
21738 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21739 }
21740
21741 release_scratch_register_on_entry (&sr);
21742 }
21743
21744 /* Make sure nothing is scheduled before we are done. */
21745 emit_insn (gen_blockage ());
21746 }
21747
21748 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21749 absolute addresses. */
21750
21751 const char *
21752 output_probe_stack_range (rtx reg1, rtx reg2)
21753 {
21754 static int labelno = 0;
21755 char loop_lab[32];
21756 rtx xops[2];
21757
21758 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21759
21760 /* Loop. */
21761 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21762
21763 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21764 xops[0] = reg1;
21765 xops[1] = GEN_INT (PROBE_INTERVAL);
21766 output_asm_insn ("sub\t%0, %0, %1", xops);
21767
21768 /* Probe at TEST_ADDR. */
21769 output_asm_insn ("str\tr0, [%0, #0]", xops);
21770
21771 /* Test if TEST_ADDR == LAST_ADDR. */
21772 xops[1] = reg2;
21773 output_asm_insn ("cmp\t%0, %1", xops);
21774
21775 /* Branch. */
21776 fputs ("\tbne\t", asm_out_file);
21777 assemble_name_raw (asm_out_file, loop_lab);
21778 fputc ('\n', asm_out_file);
21779
21780 return "";
21781 }
21782
21783 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21784 function. */
21785 void
21786 arm_expand_prologue (void)
21787 {
21788 rtx amount;
21789 rtx insn;
21790 rtx ip_rtx;
21791 unsigned long live_regs_mask;
21792 unsigned long func_type;
21793 int fp_offset = 0;
21794 int saved_pretend_args = 0;
21795 int saved_regs = 0;
21796 unsigned HOST_WIDE_INT args_to_push;
21797 HOST_WIDE_INT size;
21798 arm_stack_offsets *offsets;
21799 bool clobber_ip;
21800
21801 func_type = arm_current_func_type ();
21802
21803 /* Naked functions don't have prologues. */
21804 if (IS_NAKED (func_type))
21805 {
21806 if (flag_stack_usage_info)
21807 current_function_static_stack_size = 0;
21808 return;
21809 }
21810
21811 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21812 args_to_push = crtl->args.pretend_args_size;
21813
21814 /* Compute which register we will have to save onto the stack. */
21815 offsets = arm_get_frame_offsets ();
21816 live_regs_mask = offsets->saved_regs_mask;
21817
21818 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21819
21820 if (IS_STACKALIGN (func_type))
21821 {
21822 rtx r0, r1;
21823
21824 /* Handle a word-aligned stack pointer. We generate the following:
21825
21826 mov r0, sp
21827 bic r1, r0, #7
21828 mov sp, r1
21829 <save and restore r0 in normal prologue/epilogue>
21830 mov sp, r0
21831 bx lr
21832
21833 The unwinder doesn't need to know about the stack realignment.
21834 Just tell it we saved SP in r0. */
21835 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21836
21837 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21838 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21839
21840 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21841 RTX_FRAME_RELATED_P (insn) = 1;
21842 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21843
21844 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21845
21846 /* ??? The CFA changes here, which may cause GDB to conclude that it
21847 has entered a different function. That said, the unwind info is
21848 correct, individually, before and after this instruction because
21849 we've described the save of SP, which will override the default
21850 handling of SP as restoring from the CFA. */
21851 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21852 }
21853
21854 /* Let's compute the static_chain_stack_bytes required and store it. Right
21855 now the value must be -1 as stored by arm_init_machine_status (). */
21856 cfun->machine->static_chain_stack_bytes
21857 = arm_compute_static_chain_stack_bytes ();
21858
21859 /* The static chain register is the same as the IP register. If it is
21860 clobbered when creating the frame, we need to save and restore it. */
21861 clobber_ip = IS_NESTED (func_type)
21862 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21863 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21864 || flag_stack_clash_protection)
21865 && !df_regs_ever_live_p (LR_REGNUM)
21866 && arm_r3_live_at_start_p ()));
21867
21868 /* Find somewhere to store IP whilst the frame is being created.
21869 We try the following places in order:
21870
21871 1. The last argument register r3 if it is available.
21872 2. A slot on the stack above the frame if there are no
21873 arguments to push onto the stack.
21874 3. Register r3 again, after pushing the argument registers
21875 onto the stack, if this is a varargs function.
21876 4. The last slot on the stack created for the arguments to
21877 push, if this isn't a varargs function.
21878
21879 Note - we only need to tell the dwarf2 backend about the SP
21880 adjustment in the second variant; the static chain register
21881 doesn't need to be unwound, as it doesn't contain a value
21882 inherited from the caller. */
21883 if (clobber_ip)
21884 {
21885 if (!arm_r3_live_at_start_p ())
21886 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21887 else if (args_to_push == 0)
21888 {
21889 rtx addr, dwarf;
21890
21891 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21892 saved_regs += 4;
21893
21894 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21895 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21896 fp_offset = 4;
21897
21898 /* Just tell the dwarf backend that we adjusted SP. */
21899 dwarf = gen_rtx_SET (stack_pointer_rtx,
21900 plus_constant (Pmode, stack_pointer_rtx,
21901 -fp_offset));
21902 RTX_FRAME_RELATED_P (insn) = 1;
21903 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21904 }
21905 else
21906 {
21907 /* Store the args on the stack. */
21908 if (cfun->machine->uses_anonymous_args)
21909 {
21910 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21911 (0xf0 >> (args_to_push / 4)) & 0xf);
21912 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21913 saved_pretend_args = 1;
21914 }
21915 else
21916 {
21917 rtx addr, dwarf;
21918
21919 if (args_to_push == 4)
21920 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21921 else
21922 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21923 plus_constant (Pmode,
21924 stack_pointer_rtx,
21925 -args_to_push));
21926
21927 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21928
21929 /* Just tell the dwarf backend that we adjusted SP. */
21930 dwarf = gen_rtx_SET (stack_pointer_rtx,
21931 plus_constant (Pmode, stack_pointer_rtx,
21932 -args_to_push));
21933 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21934 }
21935
21936 RTX_FRAME_RELATED_P (insn) = 1;
21937 fp_offset = args_to_push;
21938 args_to_push = 0;
21939 }
21940 }
21941
21942 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21943 {
21944 if (IS_INTERRUPT (func_type))
21945 {
21946 /* Interrupt functions must not corrupt any registers.
21947 Creating a frame pointer however, corrupts the IP
21948 register, so we must push it first. */
21949 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21950
21951 /* Do not set RTX_FRAME_RELATED_P on this insn.
21952 The dwarf stack unwinding code only wants to see one
21953 stack decrement per function, and this is not it. If
21954 this instruction is labeled as being part of the frame
21955 creation sequence then dwarf2out_frame_debug_expr will
21956 die when it encounters the assignment of IP to FP
21957 later on, since the use of SP here establishes SP as
21958 the CFA register and not IP.
21959
21960 Anyway this instruction is not really part of the stack
21961 frame creation although it is part of the prologue. */
21962 }
21963
21964 insn = emit_set_insn (ip_rtx,
21965 plus_constant (Pmode, stack_pointer_rtx,
21966 fp_offset));
21967 RTX_FRAME_RELATED_P (insn) = 1;
21968 }
21969
21970 if (args_to_push)
21971 {
21972 /* Push the argument registers, or reserve space for them. */
21973 if (cfun->machine->uses_anonymous_args)
21974 insn = emit_multi_reg_push
21975 ((0xf0 >> (args_to_push / 4)) & 0xf,
21976 (0xf0 >> (args_to_push / 4)) & 0xf);
21977 else
21978 insn = emit_insn
21979 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21980 GEN_INT (- args_to_push)));
21981 RTX_FRAME_RELATED_P (insn) = 1;
21982 }
21983
21984 /* If this is an interrupt service routine, and the link register
21985 is going to be pushed, and we're not generating extra
21986 push of IP (needed when frame is needed and frame layout if apcs),
21987 subtracting four from LR now will mean that the function return
21988 can be done with a single instruction. */
21989 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21990 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21991 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21992 && TARGET_ARM)
21993 {
21994 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21995
21996 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21997 }
21998
21999 if (live_regs_mask)
22000 {
22001 unsigned long dwarf_regs_mask = live_regs_mask;
22002
22003 saved_regs += bit_count (live_regs_mask) * 4;
22004 if (optimize_size && !frame_pointer_needed
22005 && saved_regs == offsets->saved_regs - offsets->saved_args)
22006 {
22007 /* If no coprocessor registers are being pushed and we don't have
22008 to worry about a frame pointer then push extra registers to
22009 create the stack frame. This is done in a way that does not
22010 alter the frame layout, so is independent of the epilogue. */
22011 int n;
22012 int frame;
22013 n = 0;
22014 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
22015 n++;
22016 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
22017 if (frame && n * 4 >= frame)
22018 {
22019 n = frame / 4;
22020 live_regs_mask |= (1 << n) - 1;
22021 saved_regs += frame;
22022 }
22023 }
22024
22025 if (TARGET_LDRD
22026 && current_tune->prefer_ldrd_strd
22027 && !optimize_function_for_size_p (cfun))
22028 {
22029 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
22030 if (TARGET_THUMB2)
22031 thumb2_emit_strd_push (live_regs_mask);
22032 else if (TARGET_ARM
22033 && !TARGET_APCS_FRAME
22034 && !IS_INTERRUPT (func_type))
22035 arm_emit_strd_push (live_regs_mask);
22036 else
22037 {
22038 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
22039 RTX_FRAME_RELATED_P (insn) = 1;
22040 }
22041 }
22042 else
22043 {
22044 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
22045 RTX_FRAME_RELATED_P (insn) = 1;
22046 }
22047 }
22048
22049 if (! IS_VOLATILE (func_type))
22050 saved_regs += arm_save_coproc_regs ();
22051
22052 if (frame_pointer_needed && TARGET_ARM)
22053 {
22054 /* Create the new frame pointer. */
22055 if (TARGET_APCS_FRAME)
22056 {
22057 insn = GEN_INT (-(4 + args_to_push + fp_offset));
22058 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
22059 RTX_FRAME_RELATED_P (insn) = 1;
22060 }
22061 else
22062 {
22063 insn = GEN_INT (saved_regs - (4 + fp_offset));
22064 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
22065 stack_pointer_rtx, insn));
22066 RTX_FRAME_RELATED_P (insn) = 1;
22067 }
22068 }
22069
22070 size = offsets->outgoing_args - offsets->saved_args;
22071 if (flag_stack_usage_info)
22072 current_function_static_stack_size = size;
22073
22074 /* If this isn't an interrupt service routine and we have a frame, then do
22075 stack checking. We use IP as the first scratch register, except for the
22076 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
22077 if (!IS_INTERRUPT (func_type)
22078 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
22079 || flag_stack_clash_protection))
22080 {
22081 unsigned int regno;
22082
22083 if (!IS_NESTED (func_type) || clobber_ip)
22084 regno = IP_REGNUM;
22085 else if (df_regs_ever_live_p (LR_REGNUM))
22086 regno = LR_REGNUM;
22087 else
22088 regno = 3;
22089
22090 if (crtl->is_leaf && !cfun->calls_alloca)
22091 {
22092 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
22093 arm_emit_probe_stack_range (get_stack_check_protect (),
22094 size - get_stack_check_protect (),
22095 regno, live_regs_mask);
22096 }
22097 else if (size > 0)
22098 arm_emit_probe_stack_range (get_stack_check_protect (), size,
22099 regno, live_regs_mask);
22100 }
22101
22102 /* Recover the static chain register. */
22103 if (clobber_ip)
22104 {
22105 if (!arm_r3_live_at_start_p () || saved_pretend_args)
22106 insn = gen_rtx_REG (SImode, 3);
22107 else
22108 {
22109 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22110 insn = gen_frame_mem (SImode, insn);
22111 }
22112 emit_set_insn (ip_rtx, insn);
22113 emit_insn (gen_force_register_use (ip_rtx));
22114 }
22115
22116 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22117 {
22118 /* This add can produce multiple insns for a large constant, so we
22119 need to get tricky. */
22120 rtx_insn *last = get_last_insn ();
22121
22122 amount = GEN_INT (offsets->saved_args + saved_regs
22123 - offsets->outgoing_args);
22124
22125 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22126 amount));
22127 do
22128 {
22129 last = last ? NEXT_INSN (last) : get_insns ();
22130 RTX_FRAME_RELATED_P (last) = 1;
22131 }
22132 while (last != insn);
22133
22134 /* If the frame pointer is needed, emit a special barrier that
22135 will prevent the scheduler from moving stores to the frame
22136 before the stack adjustment. */
22137 if (frame_pointer_needed)
22138 emit_insn (gen_stack_tie (stack_pointer_rtx,
22139 hard_frame_pointer_rtx));
22140 }
22141
22142
22143 if (frame_pointer_needed && TARGET_THUMB2)
22144 thumb_set_frame_pointer (offsets);
22145
22146 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22147 {
22148 unsigned long mask;
22149
22150 mask = live_regs_mask;
22151 mask &= THUMB2_WORK_REGS;
22152 if (!IS_NESTED (func_type))
22153 mask |= (1 << IP_REGNUM);
22154 arm_load_pic_register (mask, NULL_RTX);
22155 }
22156
22157 /* If we are profiling, make sure no instructions are scheduled before
22158 the call to mcount. Similarly if the user has requested no
22159 scheduling in the prolog. Similarly if we want non-call exceptions
22160 using the EABI unwinder, to prevent faulting instructions from being
22161 swapped with a stack adjustment. */
22162 if (crtl->profile || !TARGET_SCHED_PROLOG
22163 || (arm_except_unwind_info (&global_options) == UI_TARGET
22164 && cfun->can_throw_non_call_exceptions))
22165 emit_insn (gen_blockage ());
22166
22167 /* If the link register is being kept alive, with the return address in it,
22168 then make sure that it does not get reused by the ce2 pass. */
22169 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22170 cfun->machine->lr_save_eliminated = 1;
22171 }
22172 \f
22173 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22174 static void
22175 arm_print_condition (FILE *stream)
22176 {
22177 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22178 {
22179 /* Branch conversion is not implemented for Thumb-2. */
22180 if (TARGET_THUMB)
22181 {
22182 output_operand_lossage ("predicated Thumb instruction");
22183 return;
22184 }
22185 if (current_insn_predicate != NULL)
22186 {
22187 output_operand_lossage
22188 ("predicated instruction in conditional sequence");
22189 return;
22190 }
22191
22192 fputs (arm_condition_codes[arm_current_cc], stream);
22193 }
22194 else if (current_insn_predicate)
22195 {
22196 enum arm_cond_code code;
22197
22198 if (TARGET_THUMB1)
22199 {
22200 output_operand_lossage ("predicated Thumb instruction");
22201 return;
22202 }
22203
22204 code = get_arm_condition_code (current_insn_predicate);
22205 fputs (arm_condition_codes[code], stream);
22206 }
22207 }
22208
22209
22210 /* Globally reserved letters: acln
22211 Puncutation letters currently used: @_|?().!#
22212 Lower case letters currently used: bcdefhimpqtvwxyz
22213 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22214 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22215
22216 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22217
22218 If CODE is 'd', then the X is a condition operand and the instruction
22219 should only be executed if the condition is true.
22220 if CODE is 'D', then the X is a condition operand and the instruction
22221 should only be executed if the condition is false: however, if the mode
22222 of the comparison is CCFPEmode, then always execute the instruction -- we
22223 do this because in these circumstances !GE does not necessarily imply LT;
22224 in these cases the instruction pattern will take care to make sure that
22225 an instruction containing %d will follow, thereby undoing the effects of
22226 doing this instruction unconditionally.
22227 If CODE is 'N' then X is a floating point operand that must be negated
22228 before output.
22229 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22230 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22231 static void
22232 arm_print_operand (FILE *stream, rtx x, int code)
22233 {
22234 switch (code)
22235 {
22236 case '@':
22237 fputs (ASM_COMMENT_START, stream);
22238 return;
22239
22240 case '_':
22241 fputs (user_label_prefix, stream);
22242 return;
22243
22244 case '|':
22245 fputs (REGISTER_PREFIX, stream);
22246 return;
22247
22248 case '?':
22249 arm_print_condition (stream);
22250 return;
22251
22252 case '.':
22253 /* The current condition code for a condition code setting instruction.
22254 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22255 fputc('s', stream);
22256 arm_print_condition (stream);
22257 return;
22258
22259 case '!':
22260 /* If the instruction is conditionally executed then print
22261 the current condition code, otherwise print 's'. */
22262 gcc_assert (TARGET_THUMB2);
22263 if (current_insn_predicate)
22264 arm_print_condition (stream);
22265 else
22266 fputc('s', stream);
22267 break;
22268
22269 /* %# is a "break" sequence. It doesn't output anything, but is used to
22270 separate e.g. operand numbers from following text, if that text consists
22271 of further digits which we don't want to be part of the operand
22272 number. */
22273 case '#':
22274 return;
22275
22276 case 'N':
22277 {
22278 REAL_VALUE_TYPE r;
22279 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22280 fprintf (stream, "%s", fp_const_from_val (&r));
22281 }
22282 return;
22283
22284 /* An integer or symbol address without a preceding # sign. */
22285 case 'c':
22286 switch (GET_CODE (x))
22287 {
22288 case CONST_INT:
22289 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22290 break;
22291
22292 case SYMBOL_REF:
22293 output_addr_const (stream, x);
22294 break;
22295
22296 case CONST:
22297 if (GET_CODE (XEXP (x, 0)) == PLUS
22298 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22299 {
22300 output_addr_const (stream, x);
22301 break;
22302 }
22303 /* Fall through. */
22304
22305 default:
22306 output_operand_lossage ("Unsupported operand for code '%c'", code);
22307 }
22308 return;
22309
22310 /* An integer that we want to print in HEX. */
22311 case 'x':
22312 switch (GET_CODE (x))
22313 {
22314 case CONST_INT:
22315 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22316 break;
22317
22318 default:
22319 output_operand_lossage ("Unsupported operand for code '%c'", code);
22320 }
22321 return;
22322
22323 case 'B':
22324 if (CONST_INT_P (x))
22325 {
22326 HOST_WIDE_INT val;
22327 val = ARM_SIGN_EXTEND (~INTVAL (x));
22328 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22329 }
22330 else
22331 {
22332 putc ('~', stream);
22333 output_addr_const (stream, x);
22334 }
22335 return;
22336
22337 case 'b':
22338 /* Print the log2 of a CONST_INT. */
22339 {
22340 HOST_WIDE_INT val;
22341
22342 if (!CONST_INT_P (x)
22343 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22344 output_operand_lossage ("Unsupported operand for code '%c'", code);
22345 else
22346 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22347 }
22348 return;
22349
22350 case 'L':
22351 /* The low 16 bits of an immediate constant. */
22352 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22353 return;
22354
22355 case 'i':
22356 fprintf (stream, "%s", arithmetic_instr (x, 1));
22357 return;
22358
22359 case 'I':
22360 fprintf (stream, "%s", arithmetic_instr (x, 0));
22361 return;
22362
22363 case 'S':
22364 {
22365 HOST_WIDE_INT val;
22366 const char *shift;
22367
22368 shift = shift_op (x, &val);
22369
22370 if (shift)
22371 {
22372 fprintf (stream, ", %s ", shift);
22373 if (val == -1)
22374 arm_print_operand (stream, XEXP (x, 1), 0);
22375 else
22376 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22377 }
22378 }
22379 return;
22380
22381 /* An explanation of the 'Q', 'R' and 'H' register operands:
22382
22383 In a pair of registers containing a DI or DF value the 'Q'
22384 operand returns the register number of the register containing
22385 the least significant part of the value. The 'R' operand returns
22386 the register number of the register containing the most
22387 significant part of the value.
22388
22389 The 'H' operand returns the higher of the two register numbers.
22390 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22391 same as the 'Q' operand, since the most significant part of the
22392 value is held in the lower number register. The reverse is true
22393 on systems where WORDS_BIG_ENDIAN is false.
22394
22395 The purpose of these operands is to distinguish between cases
22396 where the endian-ness of the values is important (for example
22397 when they are added together), and cases where the endian-ness
22398 is irrelevant, but the order of register operations is important.
22399 For example when loading a value from memory into a register
22400 pair, the endian-ness does not matter. Provided that the value
22401 from the lower memory address is put into the lower numbered
22402 register, and the value from the higher address is put into the
22403 higher numbered register, the load will work regardless of whether
22404 the value being loaded is big-wordian or little-wordian. The
22405 order of the two register loads can matter however, if the address
22406 of the memory location is actually held in one of the registers
22407 being overwritten by the load.
22408
22409 The 'Q' and 'R' constraints are also available for 64-bit
22410 constants. */
22411 case 'Q':
22412 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22413 {
22414 rtx part = gen_lowpart (SImode, x);
22415 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22416 return;
22417 }
22418
22419 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22420 {
22421 output_operand_lossage ("invalid operand for code '%c'", code);
22422 return;
22423 }
22424
22425 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22426 return;
22427
22428 case 'R':
22429 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22430 {
22431 machine_mode mode = GET_MODE (x);
22432 rtx part;
22433
22434 if (mode == VOIDmode)
22435 mode = DImode;
22436 part = gen_highpart_mode (SImode, mode, x);
22437 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22438 return;
22439 }
22440
22441 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22442 {
22443 output_operand_lossage ("invalid operand for code '%c'", code);
22444 return;
22445 }
22446
22447 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22448 return;
22449
22450 case 'H':
22451 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22452 {
22453 output_operand_lossage ("invalid operand for code '%c'", code);
22454 return;
22455 }
22456
22457 asm_fprintf (stream, "%r", REGNO (x) + 1);
22458 return;
22459
22460 case 'J':
22461 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22462 {
22463 output_operand_lossage ("invalid operand for code '%c'", code);
22464 return;
22465 }
22466
22467 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22468 return;
22469
22470 case 'K':
22471 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22472 {
22473 output_operand_lossage ("invalid operand for code '%c'", code);
22474 return;
22475 }
22476
22477 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22478 return;
22479
22480 case 'm':
22481 asm_fprintf (stream, "%r",
22482 REG_P (XEXP (x, 0))
22483 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22484 return;
22485
22486 case 'M':
22487 asm_fprintf (stream, "{%r-%r}",
22488 REGNO (x),
22489 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22490 return;
22491
22492 /* Like 'M', but writing doubleword vector registers, for use by Neon
22493 insns. */
22494 case 'h':
22495 {
22496 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22497 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22498 if (numregs == 1)
22499 asm_fprintf (stream, "{d%d}", regno);
22500 else
22501 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22502 }
22503 return;
22504
22505 case 'd':
22506 /* CONST_TRUE_RTX means always -- that's the default. */
22507 if (x == const_true_rtx)
22508 return;
22509
22510 if (!COMPARISON_P (x))
22511 {
22512 output_operand_lossage ("invalid operand for code '%c'", code);
22513 return;
22514 }
22515
22516 fputs (arm_condition_codes[get_arm_condition_code (x)],
22517 stream);
22518 return;
22519
22520 case 'D':
22521 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22522 want to do that. */
22523 if (x == const_true_rtx)
22524 {
22525 output_operand_lossage ("instruction never executed");
22526 return;
22527 }
22528 if (!COMPARISON_P (x))
22529 {
22530 output_operand_lossage ("invalid operand for code '%c'", code);
22531 return;
22532 }
22533
22534 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22535 (get_arm_condition_code (x))],
22536 stream);
22537 return;
22538
22539 case 's':
22540 case 'V':
22541 case 'W':
22542 case 'X':
22543 case 'Y':
22544 case 'Z':
22545 /* Former Maverick support, removed after GCC-4.7. */
22546 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22547 return;
22548
22549 case 'U':
22550 if (!REG_P (x)
22551 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22552 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22553 /* Bad value for wCG register number. */
22554 {
22555 output_operand_lossage ("invalid operand for code '%c'", code);
22556 return;
22557 }
22558
22559 else
22560 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22561 return;
22562
22563 /* Print an iWMMXt control register name. */
22564 case 'w':
22565 if (!CONST_INT_P (x)
22566 || INTVAL (x) < 0
22567 || INTVAL (x) >= 16)
22568 /* Bad value for wC register number. */
22569 {
22570 output_operand_lossage ("invalid operand for code '%c'", code);
22571 return;
22572 }
22573
22574 else
22575 {
22576 static const char * wc_reg_names [16] =
22577 {
22578 "wCID", "wCon", "wCSSF", "wCASF",
22579 "wC4", "wC5", "wC6", "wC7",
22580 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22581 "wC12", "wC13", "wC14", "wC15"
22582 };
22583
22584 fputs (wc_reg_names [INTVAL (x)], stream);
22585 }
22586 return;
22587
22588 /* Print the high single-precision register of a VFP double-precision
22589 register. */
22590 case 'p':
22591 {
22592 machine_mode mode = GET_MODE (x);
22593 int regno;
22594
22595 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22596 {
22597 output_operand_lossage ("invalid operand for code '%c'", code);
22598 return;
22599 }
22600
22601 regno = REGNO (x);
22602 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22603 {
22604 output_operand_lossage ("invalid operand for code '%c'", code);
22605 return;
22606 }
22607
22608 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22609 }
22610 return;
22611
22612 /* Print a VFP/Neon double precision or quad precision register name. */
22613 case 'P':
22614 case 'q':
22615 {
22616 machine_mode mode = GET_MODE (x);
22617 int is_quad = (code == 'q');
22618 int regno;
22619
22620 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22621 {
22622 output_operand_lossage ("invalid operand for code '%c'", code);
22623 return;
22624 }
22625
22626 if (!REG_P (x)
22627 || !IS_VFP_REGNUM (REGNO (x)))
22628 {
22629 output_operand_lossage ("invalid operand for code '%c'", code);
22630 return;
22631 }
22632
22633 regno = REGNO (x);
22634 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22635 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22636 {
22637 output_operand_lossage ("invalid operand for code '%c'", code);
22638 return;
22639 }
22640
22641 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22642 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22643 }
22644 return;
22645
22646 /* These two codes print the low/high doubleword register of a Neon quad
22647 register, respectively. For pair-structure types, can also print
22648 low/high quadword registers. */
22649 case 'e':
22650 case 'f':
22651 {
22652 machine_mode mode = GET_MODE (x);
22653 int regno;
22654
22655 if ((GET_MODE_SIZE (mode) != 16
22656 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22657 {
22658 output_operand_lossage ("invalid operand for code '%c'", code);
22659 return;
22660 }
22661
22662 regno = REGNO (x);
22663 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22664 {
22665 output_operand_lossage ("invalid operand for code '%c'", code);
22666 return;
22667 }
22668
22669 if (GET_MODE_SIZE (mode) == 16)
22670 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22671 + (code == 'f' ? 1 : 0));
22672 else
22673 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22674 + (code == 'f' ? 1 : 0));
22675 }
22676 return;
22677
22678 /* Print a VFPv3 floating-point constant, represented as an integer
22679 index. */
22680 case 'G':
22681 {
22682 int index = vfp3_const_double_index (x);
22683 gcc_assert (index != -1);
22684 fprintf (stream, "%d", index);
22685 }
22686 return;
22687
22688 /* Print bits representing opcode features for Neon.
22689
22690 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22691 and polynomials as unsigned.
22692
22693 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22694
22695 Bit 2 is 1 for rounding functions, 0 otherwise. */
22696
22697 /* Identify the type as 's', 'u', 'p' or 'f'. */
22698 case 'T':
22699 {
22700 HOST_WIDE_INT bits = INTVAL (x);
22701 fputc ("uspf"[bits & 3], stream);
22702 }
22703 return;
22704
22705 /* Likewise, but signed and unsigned integers are both 'i'. */
22706 case 'F':
22707 {
22708 HOST_WIDE_INT bits = INTVAL (x);
22709 fputc ("iipf"[bits & 3], stream);
22710 }
22711 return;
22712
22713 /* As for 'T', but emit 'u' instead of 'p'. */
22714 case 't':
22715 {
22716 HOST_WIDE_INT bits = INTVAL (x);
22717 fputc ("usuf"[bits & 3], stream);
22718 }
22719 return;
22720
22721 /* Bit 2: rounding (vs none). */
22722 case 'O':
22723 {
22724 HOST_WIDE_INT bits = INTVAL (x);
22725 fputs ((bits & 4) != 0 ? "r" : "", stream);
22726 }
22727 return;
22728
22729 /* Memory operand for vld1/vst1 instruction. */
22730 case 'A':
22731 {
22732 rtx addr;
22733 bool postinc = FALSE;
22734 rtx postinc_reg = NULL;
22735 unsigned align, memsize, align_bits;
22736
22737 gcc_assert (MEM_P (x));
22738 addr = XEXP (x, 0);
22739 if (GET_CODE (addr) == POST_INC)
22740 {
22741 postinc = 1;
22742 addr = XEXP (addr, 0);
22743 }
22744 if (GET_CODE (addr) == POST_MODIFY)
22745 {
22746 postinc_reg = XEXP( XEXP (addr, 1), 1);
22747 addr = XEXP (addr, 0);
22748 }
22749 asm_fprintf (stream, "[%r", REGNO (addr));
22750
22751 /* We know the alignment of this access, so we can emit a hint in the
22752 instruction (for some alignments) as an aid to the memory subsystem
22753 of the target. */
22754 align = MEM_ALIGN (x) >> 3;
22755 memsize = MEM_SIZE (x);
22756
22757 /* Only certain alignment specifiers are supported by the hardware. */
22758 if (memsize == 32 && (align % 32) == 0)
22759 align_bits = 256;
22760 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22761 align_bits = 128;
22762 else if (memsize >= 8 && (align % 8) == 0)
22763 align_bits = 64;
22764 else
22765 align_bits = 0;
22766
22767 if (align_bits != 0)
22768 asm_fprintf (stream, ":%d", align_bits);
22769
22770 asm_fprintf (stream, "]");
22771
22772 if (postinc)
22773 fputs("!", stream);
22774 if (postinc_reg)
22775 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22776 }
22777 return;
22778
22779 case 'C':
22780 {
22781 rtx addr;
22782
22783 gcc_assert (MEM_P (x));
22784 addr = XEXP (x, 0);
22785 gcc_assert (REG_P (addr));
22786 asm_fprintf (stream, "[%r]", REGNO (addr));
22787 }
22788 return;
22789
22790 /* Translate an S register number into a D register number and element index. */
22791 case 'y':
22792 {
22793 machine_mode mode = GET_MODE (x);
22794 int regno;
22795
22796 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22797 {
22798 output_operand_lossage ("invalid operand for code '%c'", code);
22799 return;
22800 }
22801
22802 regno = REGNO (x);
22803 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22804 {
22805 output_operand_lossage ("invalid operand for code '%c'", code);
22806 return;
22807 }
22808
22809 regno = regno - FIRST_VFP_REGNUM;
22810 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22811 }
22812 return;
22813
22814 case 'v':
22815 gcc_assert (CONST_DOUBLE_P (x));
22816 int result;
22817 result = vfp3_const_double_for_fract_bits (x);
22818 if (result == 0)
22819 result = vfp3_const_double_for_bits (x);
22820 fprintf (stream, "#%d", result);
22821 return;
22822
22823 /* Register specifier for vld1.16/vst1.16. Translate the S register
22824 number into a D register number and element index. */
22825 case 'z':
22826 {
22827 machine_mode mode = GET_MODE (x);
22828 int regno;
22829
22830 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22831 {
22832 output_operand_lossage ("invalid operand for code '%c'", code);
22833 return;
22834 }
22835
22836 regno = REGNO (x);
22837 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22838 {
22839 output_operand_lossage ("invalid operand for code '%c'", code);
22840 return;
22841 }
22842
22843 regno = regno - FIRST_VFP_REGNUM;
22844 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22845 }
22846 return;
22847
22848 default:
22849 if (x == 0)
22850 {
22851 output_operand_lossage ("missing operand");
22852 return;
22853 }
22854
22855 switch (GET_CODE (x))
22856 {
22857 case REG:
22858 asm_fprintf (stream, "%r", REGNO (x));
22859 break;
22860
22861 case MEM:
22862 output_address (GET_MODE (x), XEXP (x, 0));
22863 break;
22864
22865 case CONST_DOUBLE:
22866 {
22867 char fpstr[20];
22868 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22869 sizeof (fpstr), 0, 1);
22870 fprintf (stream, "#%s", fpstr);
22871 }
22872 break;
22873
22874 default:
22875 gcc_assert (GET_CODE (x) != NEG);
22876 fputc ('#', stream);
22877 if (GET_CODE (x) == HIGH)
22878 {
22879 fputs (":lower16:", stream);
22880 x = XEXP (x, 0);
22881 }
22882
22883 output_addr_const (stream, x);
22884 break;
22885 }
22886 }
22887 }
22888 \f
22889 /* Target hook for printing a memory address. */
22890 static void
22891 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22892 {
22893 if (TARGET_32BIT)
22894 {
22895 int is_minus = GET_CODE (x) == MINUS;
22896
22897 if (REG_P (x))
22898 asm_fprintf (stream, "[%r]", REGNO (x));
22899 else if (GET_CODE (x) == PLUS || is_minus)
22900 {
22901 rtx base = XEXP (x, 0);
22902 rtx index = XEXP (x, 1);
22903 HOST_WIDE_INT offset = 0;
22904 if (!REG_P (base)
22905 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22906 {
22907 /* Ensure that BASE is a register. */
22908 /* (one of them must be). */
22909 /* Also ensure the SP is not used as in index register. */
22910 std::swap (base, index);
22911 }
22912 switch (GET_CODE (index))
22913 {
22914 case CONST_INT:
22915 offset = INTVAL (index);
22916 if (is_minus)
22917 offset = -offset;
22918 asm_fprintf (stream, "[%r, #%wd]",
22919 REGNO (base), offset);
22920 break;
22921
22922 case REG:
22923 asm_fprintf (stream, "[%r, %s%r]",
22924 REGNO (base), is_minus ? "-" : "",
22925 REGNO (index));
22926 break;
22927
22928 case MULT:
22929 case ASHIFTRT:
22930 case LSHIFTRT:
22931 case ASHIFT:
22932 case ROTATERT:
22933 {
22934 asm_fprintf (stream, "[%r, %s%r",
22935 REGNO (base), is_minus ? "-" : "",
22936 REGNO (XEXP (index, 0)));
22937 arm_print_operand (stream, index, 'S');
22938 fputs ("]", stream);
22939 break;
22940 }
22941
22942 default:
22943 gcc_unreachable ();
22944 }
22945 }
22946 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22947 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22948 {
22949 gcc_assert (REG_P (XEXP (x, 0)));
22950
22951 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22952 asm_fprintf (stream, "[%r, #%s%d]!",
22953 REGNO (XEXP (x, 0)),
22954 GET_CODE (x) == PRE_DEC ? "-" : "",
22955 GET_MODE_SIZE (mode));
22956 else
22957 asm_fprintf (stream, "[%r], #%s%d",
22958 REGNO (XEXP (x, 0)),
22959 GET_CODE (x) == POST_DEC ? "-" : "",
22960 GET_MODE_SIZE (mode));
22961 }
22962 else if (GET_CODE (x) == PRE_MODIFY)
22963 {
22964 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22965 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22966 asm_fprintf (stream, "#%wd]!",
22967 INTVAL (XEXP (XEXP (x, 1), 1)));
22968 else
22969 asm_fprintf (stream, "%r]!",
22970 REGNO (XEXP (XEXP (x, 1), 1)));
22971 }
22972 else if (GET_CODE (x) == POST_MODIFY)
22973 {
22974 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22975 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22976 asm_fprintf (stream, "#%wd",
22977 INTVAL (XEXP (XEXP (x, 1), 1)));
22978 else
22979 asm_fprintf (stream, "%r",
22980 REGNO (XEXP (XEXP (x, 1), 1)));
22981 }
22982 else output_addr_const (stream, x);
22983 }
22984 else
22985 {
22986 if (REG_P (x))
22987 asm_fprintf (stream, "[%r]", REGNO (x));
22988 else if (GET_CODE (x) == POST_INC)
22989 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22990 else if (GET_CODE (x) == PLUS)
22991 {
22992 gcc_assert (REG_P (XEXP (x, 0)));
22993 if (CONST_INT_P (XEXP (x, 1)))
22994 asm_fprintf (stream, "[%r, #%wd]",
22995 REGNO (XEXP (x, 0)),
22996 INTVAL (XEXP (x, 1)));
22997 else
22998 asm_fprintf (stream, "[%r, %r]",
22999 REGNO (XEXP (x, 0)),
23000 REGNO (XEXP (x, 1)));
23001 }
23002 else
23003 output_addr_const (stream, x);
23004 }
23005 }
23006 \f
23007 /* Target hook for indicating whether a punctuation character for
23008 TARGET_PRINT_OPERAND is valid. */
23009 static bool
23010 arm_print_operand_punct_valid_p (unsigned char code)
23011 {
23012 return (code == '@' || code == '|' || code == '.'
23013 || code == '(' || code == ')' || code == '#'
23014 || (TARGET_32BIT && (code == '?'))
23015 || (TARGET_THUMB2 && (code == '!'))
23016 || (TARGET_THUMB && (code == '_')));
23017 }
23018 \f
23019 /* Target hook for assembling integer objects. The ARM version needs to
23020 handle word-sized values specially. */
23021 static bool
23022 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
23023 {
23024 machine_mode mode;
23025
23026 if (size == UNITS_PER_WORD && aligned_p)
23027 {
23028 fputs ("\t.word\t", asm_out_file);
23029 output_addr_const (asm_out_file, x);
23030
23031 /* Mark symbols as position independent. We only do this in the
23032 .text segment, not in the .data segment. */
23033 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
23034 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
23035 {
23036 /* See legitimize_pic_address for an explanation of the
23037 TARGET_VXWORKS_RTP check. */
23038 /* References to weak symbols cannot be resolved locally:
23039 they may be overridden by a non-weak definition at link
23040 time. */
23041 if (!arm_pic_data_is_text_relative
23042 || (GET_CODE (x) == SYMBOL_REF
23043 && (!SYMBOL_REF_LOCAL_P (x)
23044 || (SYMBOL_REF_DECL (x)
23045 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
23046 fputs ("(GOT)", asm_out_file);
23047 else
23048 fputs ("(GOTOFF)", asm_out_file);
23049 }
23050 fputc ('\n', asm_out_file);
23051 return true;
23052 }
23053
23054 mode = GET_MODE (x);
23055
23056 if (arm_vector_mode_supported_p (mode))
23057 {
23058 int i, units;
23059
23060 gcc_assert (GET_CODE (x) == CONST_VECTOR);
23061
23062 units = CONST_VECTOR_NUNITS (x);
23063 size = GET_MODE_UNIT_SIZE (mode);
23064
23065 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
23066 for (i = 0; i < units; i++)
23067 {
23068 rtx elt = CONST_VECTOR_ELT (x, i);
23069 assemble_integer
23070 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
23071 }
23072 else
23073 for (i = 0; i < units; i++)
23074 {
23075 rtx elt = CONST_VECTOR_ELT (x, i);
23076 assemble_real
23077 (*CONST_DOUBLE_REAL_VALUE (elt),
23078 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
23079 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
23080 }
23081
23082 return true;
23083 }
23084
23085 return default_assemble_integer (x, size, aligned_p);
23086 }
23087
23088 static void
23089 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
23090 {
23091 section *s;
23092
23093 if (!TARGET_AAPCS_BASED)
23094 {
23095 (is_ctor ?
23096 default_named_section_asm_out_constructor
23097 : default_named_section_asm_out_destructor) (symbol, priority);
23098 return;
23099 }
23100
23101 /* Put these in the .init_array section, using a special relocation. */
23102 if (priority != DEFAULT_INIT_PRIORITY)
23103 {
23104 char buf[18];
23105 sprintf (buf, "%s.%.5u",
23106 is_ctor ? ".init_array" : ".fini_array",
23107 priority);
23108 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23109 }
23110 else if (is_ctor)
23111 s = ctors_section;
23112 else
23113 s = dtors_section;
23114
23115 switch_to_section (s);
23116 assemble_align (POINTER_SIZE);
23117 fputs ("\t.word\t", asm_out_file);
23118 output_addr_const (asm_out_file, symbol);
23119 fputs ("(target1)\n", asm_out_file);
23120 }
23121
23122 /* Add a function to the list of static constructors. */
23123
23124 static void
23125 arm_elf_asm_constructor (rtx symbol, int priority)
23126 {
23127 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23128 }
23129
23130 /* Add a function to the list of static destructors. */
23131
23132 static void
23133 arm_elf_asm_destructor (rtx symbol, int priority)
23134 {
23135 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23136 }
23137 \f
23138 /* A finite state machine takes care of noticing whether or not instructions
23139 can be conditionally executed, and thus decrease execution time and code
23140 size by deleting branch instructions. The fsm is controlled by
23141 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23142
23143 /* The state of the fsm controlling condition codes are:
23144 0: normal, do nothing special
23145 1: make ASM_OUTPUT_OPCODE not output this instruction
23146 2: make ASM_OUTPUT_OPCODE not output this instruction
23147 3: make instructions conditional
23148 4: make instructions conditional
23149
23150 State transitions (state->state by whom under condition):
23151 0 -> 1 final_prescan_insn if the `target' is a label
23152 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23153 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23154 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23155 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23156 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23157 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23158 (the target insn is arm_target_insn).
23159
23160 If the jump clobbers the conditions then we use states 2 and 4.
23161
23162 A similar thing can be done with conditional return insns.
23163
23164 XXX In case the `target' is an unconditional branch, this conditionalising
23165 of the instructions always reduces code size, but not always execution
23166 time. But then, I want to reduce the code size to somewhere near what
23167 /bin/cc produces. */
23168
23169 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23170 instructions. When a COND_EXEC instruction is seen the subsequent
23171 instructions are scanned so that multiple conditional instructions can be
23172 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23173 specify the length and true/false mask for the IT block. These will be
23174 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23175
23176 /* Returns the index of the ARM condition code string in
23177 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23178 COMPARISON should be an rtx like `(eq (...) (...))'. */
23179
23180 enum arm_cond_code
23181 maybe_get_arm_condition_code (rtx comparison)
23182 {
23183 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23184 enum arm_cond_code code;
23185 enum rtx_code comp_code = GET_CODE (comparison);
23186
23187 if (GET_MODE_CLASS (mode) != MODE_CC)
23188 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23189 XEXP (comparison, 1));
23190
23191 switch (mode)
23192 {
23193 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23194 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23195 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23196 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23197 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23198 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23199 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23200 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23201 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23202 case E_CC_DLTUmode: code = ARM_CC;
23203
23204 dominance:
23205 if (comp_code == EQ)
23206 return ARM_INVERSE_CONDITION_CODE (code);
23207 if (comp_code == NE)
23208 return code;
23209 return ARM_NV;
23210
23211 case E_CC_NOOVmode:
23212 switch (comp_code)
23213 {
23214 case NE: return ARM_NE;
23215 case EQ: return ARM_EQ;
23216 case GE: return ARM_PL;
23217 case LT: return ARM_MI;
23218 default: return ARM_NV;
23219 }
23220
23221 case E_CC_Zmode:
23222 switch (comp_code)
23223 {
23224 case NE: return ARM_NE;
23225 case EQ: return ARM_EQ;
23226 default: return ARM_NV;
23227 }
23228
23229 case E_CC_Nmode:
23230 switch (comp_code)
23231 {
23232 case NE: return ARM_MI;
23233 case EQ: return ARM_PL;
23234 default: return ARM_NV;
23235 }
23236
23237 case E_CCFPEmode:
23238 case E_CCFPmode:
23239 /* We can handle all cases except UNEQ and LTGT. */
23240 switch (comp_code)
23241 {
23242 case GE: return ARM_GE;
23243 case GT: return ARM_GT;
23244 case LE: return ARM_LS;
23245 case LT: return ARM_MI;
23246 case NE: return ARM_NE;
23247 case EQ: return ARM_EQ;
23248 case ORDERED: return ARM_VC;
23249 case UNORDERED: return ARM_VS;
23250 case UNLT: return ARM_LT;
23251 case UNLE: return ARM_LE;
23252 case UNGT: return ARM_HI;
23253 case UNGE: return ARM_PL;
23254 /* UNEQ and LTGT do not have a representation. */
23255 case UNEQ: /* Fall through. */
23256 case LTGT: /* Fall through. */
23257 default: return ARM_NV;
23258 }
23259
23260 case E_CC_SWPmode:
23261 switch (comp_code)
23262 {
23263 case NE: return ARM_NE;
23264 case EQ: return ARM_EQ;
23265 case GE: return ARM_LE;
23266 case GT: return ARM_LT;
23267 case LE: return ARM_GE;
23268 case LT: return ARM_GT;
23269 case GEU: return ARM_LS;
23270 case GTU: return ARM_CC;
23271 case LEU: return ARM_CS;
23272 case LTU: return ARM_HI;
23273 default: return ARM_NV;
23274 }
23275
23276 case E_CC_Cmode:
23277 switch (comp_code)
23278 {
23279 case LTU: return ARM_CS;
23280 case GEU: return ARM_CC;
23281 case NE: return ARM_CS;
23282 case EQ: return ARM_CC;
23283 default: return ARM_NV;
23284 }
23285
23286 case E_CC_CZmode:
23287 switch (comp_code)
23288 {
23289 case NE: return ARM_NE;
23290 case EQ: return ARM_EQ;
23291 case GEU: return ARM_CS;
23292 case GTU: return ARM_HI;
23293 case LEU: return ARM_LS;
23294 case LTU: return ARM_CC;
23295 default: return ARM_NV;
23296 }
23297
23298 case E_CC_NCVmode:
23299 switch (comp_code)
23300 {
23301 case GE: return ARM_GE;
23302 case LT: return ARM_LT;
23303 case GEU: return ARM_CS;
23304 case LTU: return ARM_CC;
23305 default: return ARM_NV;
23306 }
23307
23308 case E_CC_Vmode:
23309 switch (comp_code)
23310 {
23311 case NE: return ARM_VS;
23312 case EQ: return ARM_VC;
23313 default: return ARM_NV;
23314 }
23315
23316 case E_CCmode:
23317 switch (comp_code)
23318 {
23319 case NE: return ARM_NE;
23320 case EQ: return ARM_EQ;
23321 case GE: return ARM_GE;
23322 case GT: return ARM_GT;
23323 case LE: return ARM_LE;
23324 case LT: return ARM_LT;
23325 case GEU: return ARM_CS;
23326 case GTU: return ARM_HI;
23327 case LEU: return ARM_LS;
23328 case LTU: return ARM_CC;
23329 default: return ARM_NV;
23330 }
23331
23332 default: gcc_unreachable ();
23333 }
23334 }
23335
23336 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23337 static enum arm_cond_code
23338 get_arm_condition_code (rtx comparison)
23339 {
23340 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23341 gcc_assert (code != ARM_NV);
23342 return code;
23343 }
23344
23345 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23346 code registers when not targetting Thumb1. The VFP condition register
23347 only exists when generating hard-float code. */
23348 static bool
23349 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23350 {
23351 if (!TARGET_32BIT)
23352 return false;
23353
23354 *p1 = CC_REGNUM;
23355 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23356 return true;
23357 }
23358
23359 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23360 instructions. */
23361 void
23362 thumb2_final_prescan_insn (rtx_insn *insn)
23363 {
23364 rtx_insn *first_insn = insn;
23365 rtx body = PATTERN (insn);
23366 rtx predicate;
23367 enum arm_cond_code code;
23368 int n;
23369 int mask;
23370 int max;
23371
23372 /* max_insns_skipped in the tune was already taken into account in the
23373 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23374 just emit the IT blocks as we can. It does not make sense to split
23375 the IT blocks. */
23376 max = MAX_INSN_PER_IT_BLOCK;
23377
23378 /* Remove the previous insn from the count of insns to be output. */
23379 if (arm_condexec_count)
23380 arm_condexec_count--;
23381
23382 /* Nothing to do if we are already inside a conditional block. */
23383 if (arm_condexec_count)
23384 return;
23385
23386 if (GET_CODE (body) != COND_EXEC)
23387 return;
23388
23389 /* Conditional jumps are implemented directly. */
23390 if (JUMP_P (insn))
23391 return;
23392
23393 predicate = COND_EXEC_TEST (body);
23394 arm_current_cc = get_arm_condition_code (predicate);
23395
23396 n = get_attr_ce_count (insn);
23397 arm_condexec_count = 1;
23398 arm_condexec_mask = (1 << n) - 1;
23399 arm_condexec_masklen = n;
23400 /* See if subsequent instructions can be combined into the same block. */
23401 for (;;)
23402 {
23403 insn = next_nonnote_insn (insn);
23404
23405 /* Jumping into the middle of an IT block is illegal, so a label or
23406 barrier terminates the block. */
23407 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23408 break;
23409
23410 body = PATTERN (insn);
23411 /* USE and CLOBBER aren't really insns, so just skip them. */
23412 if (GET_CODE (body) == USE
23413 || GET_CODE (body) == CLOBBER)
23414 continue;
23415
23416 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23417 if (GET_CODE (body) != COND_EXEC)
23418 break;
23419 /* Maximum number of conditionally executed instructions in a block. */
23420 n = get_attr_ce_count (insn);
23421 if (arm_condexec_masklen + n > max)
23422 break;
23423
23424 predicate = COND_EXEC_TEST (body);
23425 code = get_arm_condition_code (predicate);
23426 mask = (1 << n) - 1;
23427 if (arm_current_cc == code)
23428 arm_condexec_mask |= (mask << arm_condexec_masklen);
23429 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23430 break;
23431
23432 arm_condexec_count++;
23433 arm_condexec_masklen += n;
23434
23435 /* A jump must be the last instruction in a conditional block. */
23436 if (JUMP_P (insn))
23437 break;
23438 }
23439 /* Restore recog_data (getting the attributes of other insns can
23440 destroy this array, but final.c assumes that it remains intact
23441 across this call). */
23442 extract_constrain_insn_cached (first_insn);
23443 }
23444
23445 void
23446 arm_final_prescan_insn (rtx_insn *insn)
23447 {
23448 /* BODY will hold the body of INSN. */
23449 rtx body = PATTERN (insn);
23450
23451 /* This will be 1 if trying to repeat the trick, and things need to be
23452 reversed if it appears to fail. */
23453 int reverse = 0;
23454
23455 /* If we start with a return insn, we only succeed if we find another one. */
23456 int seeking_return = 0;
23457 enum rtx_code return_code = UNKNOWN;
23458
23459 /* START_INSN will hold the insn from where we start looking. This is the
23460 first insn after the following code_label if REVERSE is true. */
23461 rtx_insn *start_insn = insn;
23462
23463 /* If in state 4, check if the target branch is reached, in order to
23464 change back to state 0. */
23465 if (arm_ccfsm_state == 4)
23466 {
23467 if (insn == arm_target_insn)
23468 {
23469 arm_target_insn = NULL;
23470 arm_ccfsm_state = 0;
23471 }
23472 return;
23473 }
23474
23475 /* If in state 3, it is possible to repeat the trick, if this insn is an
23476 unconditional branch to a label, and immediately following this branch
23477 is the previous target label which is only used once, and the label this
23478 branch jumps to is not too far off. */
23479 if (arm_ccfsm_state == 3)
23480 {
23481 if (simplejump_p (insn))
23482 {
23483 start_insn = next_nonnote_insn (start_insn);
23484 if (BARRIER_P (start_insn))
23485 {
23486 /* XXX Isn't this always a barrier? */
23487 start_insn = next_nonnote_insn (start_insn);
23488 }
23489 if (LABEL_P (start_insn)
23490 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23491 && LABEL_NUSES (start_insn) == 1)
23492 reverse = TRUE;
23493 else
23494 return;
23495 }
23496 else if (ANY_RETURN_P (body))
23497 {
23498 start_insn = next_nonnote_insn (start_insn);
23499 if (BARRIER_P (start_insn))
23500 start_insn = next_nonnote_insn (start_insn);
23501 if (LABEL_P (start_insn)
23502 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23503 && LABEL_NUSES (start_insn) == 1)
23504 {
23505 reverse = TRUE;
23506 seeking_return = 1;
23507 return_code = GET_CODE (body);
23508 }
23509 else
23510 return;
23511 }
23512 else
23513 return;
23514 }
23515
23516 gcc_assert (!arm_ccfsm_state || reverse);
23517 if (!JUMP_P (insn))
23518 return;
23519
23520 /* This jump might be paralleled with a clobber of the condition codes
23521 the jump should always come first */
23522 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23523 body = XVECEXP (body, 0, 0);
23524
23525 if (reverse
23526 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23527 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23528 {
23529 int insns_skipped;
23530 int fail = FALSE, succeed = FALSE;
23531 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23532 int then_not_else = TRUE;
23533 rtx_insn *this_insn = start_insn;
23534 rtx label = 0;
23535
23536 /* Register the insn jumped to. */
23537 if (reverse)
23538 {
23539 if (!seeking_return)
23540 label = XEXP (SET_SRC (body), 0);
23541 }
23542 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23543 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23544 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23545 {
23546 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23547 then_not_else = FALSE;
23548 }
23549 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23550 {
23551 seeking_return = 1;
23552 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23553 }
23554 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23555 {
23556 seeking_return = 1;
23557 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23558 then_not_else = FALSE;
23559 }
23560 else
23561 gcc_unreachable ();
23562
23563 /* See how many insns this branch skips, and what kind of insns. If all
23564 insns are okay, and the label or unconditional branch to the same
23565 label is not too far away, succeed. */
23566 for (insns_skipped = 0;
23567 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23568 {
23569 rtx scanbody;
23570
23571 this_insn = next_nonnote_insn (this_insn);
23572 if (!this_insn)
23573 break;
23574
23575 switch (GET_CODE (this_insn))
23576 {
23577 case CODE_LABEL:
23578 /* Succeed if it is the target label, otherwise fail since
23579 control falls in from somewhere else. */
23580 if (this_insn == label)
23581 {
23582 arm_ccfsm_state = 1;
23583 succeed = TRUE;
23584 }
23585 else
23586 fail = TRUE;
23587 break;
23588
23589 case BARRIER:
23590 /* Succeed if the following insn is the target label.
23591 Otherwise fail.
23592 If return insns are used then the last insn in a function
23593 will be a barrier. */
23594 this_insn = next_nonnote_insn (this_insn);
23595 if (this_insn && this_insn == label)
23596 {
23597 arm_ccfsm_state = 1;
23598 succeed = TRUE;
23599 }
23600 else
23601 fail = TRUE;
23602 break;
23603
23604 case CALL_INSN:
23605 /* The AAPCS says that conditional calls should not be
23606 used since they make interworking inefficient (the
23607 linker can't transform BL<cond> into BLX). That's
23608 only a problem if the machine has BLX. */
23609 if (arm_arch5t)
23610 {
23611 fail = TRUE;
23612 break;
23613 }
23614
23615 /* Succeed if the following insn is the target label, or
23616 if the following two insns are a barrier and the
23617 target label. */
23618 this_insn = next_nonnote_insn (this_insn);
23619 if (this_insn && BARRIER_P (this_insn))
23620 this_insn = next_nonnote_insn (this_insn);
23621
23622 if (this_insn && this_insn == label
23623 && insns_skipped < max_insns_skipped)
23624 {
23625 arm_ccfsm_state = 1;
23626 succeed = TRUE;
23627 }
23628 else
23629 fail = TRUE;
23630 break;
23631
23632 case JUMP_INSN:
23633 /* If this is an unconditional branch to the same label, succeed.
23634 If it is to another label, do nothing. If it is conditional,
23635 fail. */
23636 /* XXX Probably, the tests for SET and the PC are
23637 unnecessary. */
23638
23639 scanbody = PATTERN (this_insn);
23640 if (GET_CODE (scanbody) == SET
23641 && GET_CODE (SET_DEST (scanbody)) == PC)
23642 {
23643 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23644 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23645 {
23646 arm_ccfsm_state = 2;
23647 succeed = TRUE;
23648 }
23649 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23650 fail = TRUE;
23651 }
23652 /* Fail if a conditional return is undesirable (e.g. on a
23653 StrongARM), but still allow this if optimizing for size. */
23654 else if (GET_CODE (scanbody) == return_code
23655 && !use_return_insn (TRUE, NULL)
23656 && !optimize_size)
23657 fail = TRUE;
23658 else if (GET_CODE (scanbody) == return_code)
23659 {
23660 arm_ccfsm_state = 2;
23661 succeed = TRUE;
23662 }
23663 else if (GET_CODE (scanbody) == PARALLEL)
23664 {
23665 switch (get_attr_conds (this_insn))
23666 {
23667 case CONDS_NOCOND:
23668 break;
23669 default:
23670 fail = TRUE;
23671 break;
23672 }
23673 }
23674 else
23675 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23676
23677 break;
23678
23679 case INSN:
23680 /* Instructions using or affecting the condition codes make it
23681 fail. */
23682 scanbody = PATTERN (this_insn);
23683 if (!(GET_CODE (scanbody) == SET
23684 || GET_CODE (scanbody) == PARALLEL)
23685 || get_attr_conds (this_insn) != CONDS_NOCOND)
23686 fail = TRUE;
23687 break;
23688
23689 default:
23690 break;
23691 }
23692 }
23693 if (succeed)
23694 {
23695 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23696 arm_target_label = CODE_LABEL_NUMBER (label);
23697 else
23698 {
23699 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23700
23701 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23702 {
23703 this_insn = next_nonnote_insn (this_insn);
23704 gcc_assert (!this_insn
23705 || (!BARRIER_P (this_insn)
23706 && !LABEL_P (this_insn)));
23707 }
23708 if (!this_insn)
23709 {
23710 /* Oh, dear! we ran off the end.. give up. */
23711 extract_constrain_insn_cached (insn);
23712 arm_ccfsm_state = 0;
23713 arm_target_insn = NULL;
23714 return;
23715 }
23716 arm_target_insn = this_insn;
23717 }
23718
23719 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23720 what it was. */
23721 if (!reverse)
23722 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23723
23724 if (reverse || then_not_else)
23725 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23726 }
23727
23728 /* Restore recog_data (getting the attributes of other insns can
23729 destroy this array, but final.c assumes that it remains intact
23730 across this call. */
23731 extract_constrain_insn_cached (insn);
23732 }
23733 }
23734
23735 /* Output IT instructions. */
23736 void
23737 thumb2_asm_output_opcode (FILE * stream)
23738 {
23739 char buff[5];
23740 int n;
23741
23742 if (arm_condexec_mask)
23743 {
23744 for (n = 0; n < arm_condexec_masklen; n++)
23745 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23746 buff[n] = 0;
23747 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23748 arm_condition_codes[arm_current_cc]);
23749 arm_condexec_mask = 0;
23750 }
23751 }
23752
23753 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23754 UNITS_PER_WORD bytes wide. */
23755 static unsigned int
23756 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23757 {
23758 if (TARGET_32BIT
23759 && regno > PC_REGNUM
23760 && regno != FRAME_POINTER_REGNUM
23761 && regno != ARG_POINTER_REGNUM
23762 && !IS_VFP_REGNUM (regno))
23763 return 1;
23764
23765 return ARM_NUM_REGS (mode);
23766 }
23767
23768 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23769 static bool
23770 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23771 {
23772 if (GET_MODE_CLASS (mode) == MODE_CC)
23773 return (regno == CC_REGNUM
23774 || (TARGET_HARD_FLOAT
23775 && regno == VFPCC_REGNUM));
23776
23777 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23778 return false;
23779
23780 if (TARGET_THUMB1)
23781 /* For the Thumb we only allow values bigger than SImode in
23782 registers 0 - 6, so that there is always a second low
23783 register available to hold the upper part of the value.
23784 We probably we ought to ensure that the register is the
23785 start of an even numbered register pair. */
23786 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23787
23788 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23789 {
23790 if (mode == SFmode || mode == SImode)
23791 return VFP_REGNO_OK_FOR_SINGLE (regno);
23792
23793 if (mode == DFmode)
23794 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23795
23796 if (mode == HFmode)
23797 return VFP_REGNO_OK_FOR_SINGLE (regno);
23798
23799 /* VFP registers can hold HImode values. */
23800 if (mode == HImode)
23801 return VFP_REGNO_OK_FOR_SINGLE (regno);
23802
23803 if (TARGET_NEON)
23804 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23805 || (VALID_NEON_QREG_MODE (mode)
23806 && NEON_REGNO_OK_FOR_QUAD (regno))
23807 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23808 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23809 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23810 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23811 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23812
23813 return false;
23814 }
23815
23816 if (TARGET_REALLY_IWMMXT)
23817 {
23818 if (IS_IWMMXT_GR_REGNUM (regno))
23819 return mode == SImode;
23820
23821 if (IS_IWMMXT_REGNUM (regno))
23822 return VALID_IWMMXT_REG_MODE (mode);
23823 }
23824
23825 /* We allow almost any value to be stored in the general registers.
23826 Restrict doubleword quantities to even register pairs in ARM state
23827 so that we can use ldrd. Do not allow very large Neon structure
23828 opaque modes in general registers; they would use too many. */
23829 if (regno <= LAST_ARM_REGNUM)
23830 {
23831 if (ARM_NUM_REGS (mode) > 4)
23832 return false;
23833
23834 if (TARGET_THUMB2)
23835 return true;
23836
23837 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23838 }
23839
23840 if (regno == FRAME_POINTER_REGNUM
23841 || regno == ARG_POINTER_REGNUM)
23842 /* We only allow integers in the fake hard registers. */
23843 return GET_MODE_CLASS (mode) == MODE_INT;
23844
23845 return false;
23846 }
23847
23848 /* Implement TARGET_MODES_TIEABLE_P. */
23849
23850 static bool
23851 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23852 {
23853 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23854 return true;
23855
23856 /* We specifically want to allow elements of "structure" modes to
23857 be tieable to the structure. This more general condition allows
23858 other rarer situations too. */
23859 if (TARGET_NEON
23860 && (VALID_NEON_DREG_MODE (mode1)
23861 || VALID_NEON_QREG_MODE (mode1)
23862 || VALID_NEON_STRUCT_MODE (mode1))
23863 && (VALID_NEON_DREG_MODE (mode2)
23864 || VALID_NEON_QREG_MODE (mode2)
23865 || VALID_NEON_STRUCT_MODE (mode2)))
23866 return true;
23867
23868 return false;
23869 }
23870
23871 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23872 not used in arm mode. */
23873
23874 enum reg_class
23875 arm_regno_class (int regno)
23876 {
23877 if (regno == PC_REGNUM)
23878 return NO_REGS;
23879
23880 if (TARGET_THUMB1)
23881 {
23882 if (regno == STACK_POINTER_REGNUM)
23883 return STACK_REG;
23884 if (regno == CC_REGNUM)
23885 return CC_REG;
23886 if (regno < 8)
23887 return LO_REGS;
23888 return HI_REGS;
23889 }
23890
23891 if (TARGET_THUMB2 && regno < 8)
23892 return LO_REGS;
23893
23894 if ( regno <= LAST_ARM_REGNUM
23895 || regno == FRAME_POINTER_REGNUM
23896 || regno == ARG_POINTER_REGNUM)
23897 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23898
23899 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23900 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23901
23902 if (IS_VFP_REGNUM (regno))
23903 {
23904 if (regno <= D7_VFP_REGNUM)
23905 return VFP_D0_D7_REGS;
23906 else if (regno <= LAST_LO_VFP_REGNUM)
23907 return VFP_LO_REGS;
23908 else
23909 return VFP_HI_REGS;
23910 }
23911
23912 if (IS_IWMMXT_REGNUM (regno))
23913 return IWMMXT_REGS;
23914
23915 if (IS_IWMMXT_GR_REGNUM (regno))
23916 return IWMMXT_GR_REGS;
23917
23918 return NO_REGS;
23919 }
23920
23921 /* Handle a special case when computing the offset
23922 of an argument from the frame pointer. */
23923 int
23924 arm_debugger_arg_offset (int value, rtx addr)
23925 {
23926 rtx_insn *insn;
23927
23928 /* We are only interested if dbxout_parms() failed to compute the offset. */
23929 if (value != 0)
23930 return 0;
23931
23932 /* We can only cope with the case where the address is held in a register. */
23933 if (!REG_P (addr))
23934 return 0;
23935
23936 /* If we are using the frame pointer to point at the argument, then
23937 an offset of 0 is correct. */
23938 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23939 return 0;
23940
23941 /* If we are using the stack pointer to point at the
23942 argument, then an offset of 0 is correct. */
23943 /* ??? Check this is consistent with thumb2 frame layout. */
23944 if ((TARGET_THUMB || !frame_pointer_needed)
23945 && REGNO (addr) == SP_REGNUM)
23946 return 0;
23947
23948 /* Oh dear. The argument is pointed to by a register rather
23949 than being held in a register, or being stored at a known
23950 offset from the frame pointer. Since GDB only understands
23951 those two kinds of argument we must translate the address
23952 held in the register into an offset from the frame pointer.
23953 We do this by searching through the insns for the function
23954 looking to see where this register gets its value. If the
23955 register is initialized from the frame pointer plus an offset
23956 then we are in luck and we can continue, otherwise we give up.
23957
23958 This code is exercised by producing debugging information
23959 for a function with arguments like this:
23960
23961 double func (double a, double b, int c, double d) {return d;}
23962
23963 Without this code the stab for parameter 'd' will be set to
23964 an offset of 0 from the frame pointer, rather than 8. */
23965
23966 /* The if() statement says:
23967
23968 If the insn is a normal instruction
23969 and if the insn is setting the value in a register
23970 and if the register being set is the register holding the address of the argument
23971 and if the address is computing by an addition
23972 that involves adding to a register
23973 which is the frame pointer
23974 a constant integer
23975
23976 then... */
23977
23978 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23979 {
23980 if ( NONJUMP_INSN_P (insn)
23981 && GET_CODE (PATTERN (insn)) == SET
23982 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23983 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23984 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23985 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23986 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23987 )
23988 {
23989 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23990
23991 break;
23992 }
23993 }
23994
23995 if (value == 0)
23996 {
23997 debug_rtx (addr);
23998 warning (0, "unable to compute real location of stacked parameter");
23999 value = 8; /* XXX magic hack */
24000 }
24001
24002 return value;
24003 }
24004 \f
24005 /* Implement TARGET_PROMOTED_TYPE. */
24006
24007 static tree
24008 arm_promoted_type (const_tree t)
24009 {
24010 if (SCALAR_FLOAT_TYPE_P (t)
24011 && TYPE_PRECISION (t) == 16
24012 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
24013 return float_type_node;
24014 return NULL_TREE;
24015 }
24016
24017 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
24018 This simply adds HFmode as a supported mode; even though we don't
24019 implement arithmetic on this type directly, it's supported by
24020 optabs conversions, much the way the double-word arithmetic is
24021 special-cased in the default hook. */
24022
24023 static bool
24024 arm_scalar_mode_supported_p (scalar_mode mode)
24025 {
24026 if (mode == HFmode)
24027 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
24028 else if (ALL_FIXED_POINT_MODE_P (mode))
24029 return true;
24030 else
24031 return default_scalar_mode_supported_p (mode);
24032 }
24033
24034 /* Set the value of FLT_EVAL_METHOD.
24035 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
24036
24037 0: evaluate all operations and constants, whose semantic type has at
24038 most the range and precision of type float, to the range and
24039 precision of float; evaluate all other operations and constants to
24040 the range and precision of the semantic type;
24041
24042 N, where _FloatN is a supported interchange floating type
24043 evaluate all operations and constants, whose semantic type has at
24044 most the range and precision of _FloatN type, to the range and
24045 precision of the _FloatN type; evaluate all other operations and
24046 constants to the range and precision of the semantic type;
24047
24048 If we have the ARMv8.2-A extensions then we support _Float16 in native
24049 precision, so we should set this to 16. Otherwise, we support the type,
24050 but want to evaluate expressions in float precision, so set this to
24051 0. */
24052
24053 static enum flt_eval_method
24054 arm_excess_precision (enum excess_precision_type type)
24055 {
24056 switch (type)
24057 {
24058 case EXCESS_PRECISION_TYPE_FAST:
24059 case EXCESS_PRECISION_TYPE_STANDARD:
24060 /* We can calculate either in 16-bit range and precision or
24061 32-bit range and precision. Make that decision based on whether
24062 we have native support for the ARMv8.2-A 16-bit floating-point
24063 instructions or not. */
24064 return (TARGET_VFP_FP16INST
24065 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
24066 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
24067 case EXCESS_PRECISION_TYPE_IMPLICIT:
24068 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
24069 default:
24070 gcc_unreachable ();
24071 }
24072 return FLT_EVAL_METHOD_UNPREDICTABLE;
24073 }
24074
24075
24076 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
24077 _Float16 if we are using anything other than ieee format for 16-bit
24078 floating point. Otherwise, punt to the default implementation. */
24079 static opt_scalar_float_mode
24080 arm_floatn_mode (int n, bool extended)
24081 {
24082 if (!extended && n == 16)
24083 {
24084 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
24085 return HFmode;
24086 return opt_scalar_float_mode ();
24087 }
24088
24089 return default_floatn_mode (n, extended);
24090 }
24091
24092
24093 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
24094 not to early-clobber SRC registers in the process.
24095
24096 We assume that the operands described by SRC and DEST represent a
24097 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
24098 number of components into which the copy has been decomposed. */
24099 void
24100 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
24101 {
24102 unsigned int i;
24103
24104 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24105 || REGNO (operands[0]) < REGNO (operands[1]))
24106 {
24107 for (i = 0; i < count; i++)
24108 {
24109 operands[2 * i] = dest[i];
24110 operands[2 * i + 1] = src[i];
24111 }
24112 }
24113 else
24114 {
24115 for (i = 0; i < count; i++)
24116 {
24117 operands[2 * i] = dest[count - i - 1];
24118 operands[2 * i + 1] = src[count - i - 1];
24119 }
24120 }
24121 }
24122
24123 /* Split operands into moves from op[1] + op[2] into op[0]. */
24124
24125 void
24126 neon_split_vcombine (rtx operands[3])
24127 {
24128 unsigned int dest = REGNO (operands[0]);
24129 unsigned int src1 = REGNO (operands[1]);
24130 unsigned int src2 = REGNO (operands[2]);
24131 machine_mode halfmode = GET_MODE (operands[1]);
24132 unsigned int halfregs = REG_NREGS (operands[1]);
24133 rtx destlo, desthi;
24134
24135 if (src1 == dest && src2 == dest + halfregs)
24136 {
24137 /* No-op move. Can't split to nothing; emit something. */
24138 emit_note (NOTE_INSN_DELETED);
24139 return;
24140 }
24141
24142 /* Preserve register attributes for variable tracking. */
24143 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24144 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24145 GET_MODE_SIZE (halfmode));
24146
24147 /* Special case of reversed high/low parts. Use VSWP. */
24148 if (src2 == dest && src1 == dest + halfregs)
24149 {
24150 rtx x = gen_rtx_SET (destlo, operands[1]);
24151 rtx y = gen_rtx_SET (desthi, operands[2]);
24152 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24153 return;
24154 }
24155
24156 if (!reg_overlap_mentioned_p (operands[2], destlo))
24157 {
24158 /* Try to avoid unnecessary moves if part of the result
24159 is in the right place already. */
24160 if (src1 != dest)
24161 emit_move_insn (destlo, operands[1]);
24162 if (src2 != dest + halfregs)
24163 emit_move_insn (desthi, operands[2]);
24164 }
24165 else
24166 {
24167 if (src2 != dest + halfregs)
24168 emit_move_insn (desthi, operands[2]);
24169 if (src1 != dest)
24170 emit_move_insn (destlo, operands[1]);
24171 }
24172 }
24173 \f
24174 /* Return the number (counting from 0) of
24175 the least significant set bit in MASK. */
24176
24177 inline static int
24178 number_of_first_bit_set (unsigned mask)
24179 {
24180 return ctz_hwi (mask);
24181 }
24182
24183 /* Like emit_multi_reg_push, but allowing for a different set of
24184 registers to be described as saved. MASK is the set of registers
24185 to be saved; REAL_REGS is the set of registers to be described as
24186 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24187
24188 static rtx_insn *
24189 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24190 {
24191 unsigned long regno;
24192 rtx par[10], tmp, reg;
24193 rtx_insn *insn;
24194 int i, j;
24195
24196 /* Build the parallel of the registers actually being stored. */
24197 for (i = 0; mask; ++i, mask &= mask - 1)
24198 {
24199 regno = ctz_hwi (mask);
24200 reg = gen_rtx_REG (SImode, regno);
24201
24202 if (i == 0)
24203 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24204 else
24205 tmp = gen_rtx_USE (VOIDmode, reg);
24206
24207 par[i] = tmp;
24208 }
24209
24210 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24211 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24212 tmp = gen_frame_mem (BLKmode, tmp);
24213 tmp = gen_rtx_SET (tmp, par[0]);
24214 par[0] = tmp;
24215
24216 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24217 insn = emit_insn (tmp);
24218
24219 /* Always build the stack adjustment note for unwind info. */
24220 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24221 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24222 par[0] = tmp;
24223
24224 /* Build the parallel of the registers recorded as saved for unwind. */
24225 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24226 {
24227 regno = ctz_hwi (real_regs);
24228 reg = gen_rtx_REG (SImode, regno);
24229
24230 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24231 tmp = gen_frame_mem (SImode, tmp);
24232 tmp = gen_rtx_SET (tmp, reg);
24233 RTX_FRAME_RELATED_P (tmp) = 1;
24234 par[j + 1] = tmp;
24235 }
24236
24237 if (j == 0)
24238 tmp = par[0];
24239 else
24240 {
24241 RTX_FRAME_RELATED_P (par[0]) = 1;
24242 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24243 }
24244
24245 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24246
24247 return insn;
24248 }
24249
24250 /* Emit code to push or pop registers to or from the stack. F is the
24251 assembly file. MASK is the registers to pop. */
24252 static void
24253 thumb_pop (FILE *f, unsigned long mask)
24254 {
24255 int regno;
24256 int lo_mask = mask & 0xFF;
24257
24258 gcc_assert (mask);
24259
24260 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24261 {
24262 /* Special case. Do not generate a POP PC statement here, do it in
24263 thumb_exit() */
24264 thumb_exit (f, -1);
24265 return;
24266 }
24267
24268 fprintf (f, "\tpop\t{");
24269
24270 /* Look at the low registers first. */
24271 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24272 {
24273 if (lo_mask & 1)
24274 {
24275 asm_fprintf (f, "%r", regno);
24276
24277 if ((lo_mask & ~1) != 0)
24278 fprintf (f, ", ");
24279 }
24280 }
24281
24282 if (mask & (1 << PC_REGNUM))
24283 {
24284 /* Catch popping the PC. */
24285 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24286 || IS_CMSE_ENTRY (arm_current_func_type ()))
24287 {
24288 /* The PC is never poped directly, instead
24289 it is popped into r3 and then BX is used. */
24290 fprintf (f, "}\n");
24291
24292 thumb_exit (f, -1);
24293
24294 return;
24295 }
24296 else
24297 {
24298 if (mask & 0xFF)
24299 fprintf (f, ", ");
24300
24301 asm_fprintf (f, "%r", PC_REGNUM);
24302 }
24303 }
24304
24305 fprintf (f, "}\n");
24306 }
24307
24308 /* Generate code to return from a thumb function.
24309 If 'reg_containing_return_addr' is -1, then the return address is
24310 actually on the stack, at the stack pointer.
24311
24312 Note: do not forget to update length attribute of corresponding insn pattern
24313 when changing assembly output (eg. length attribute of epilogue_insns when
24314 updating Armv8-M Baseline Security Extensions register clearing
24315 sequences). */
24316 static void
24317 thumb_exit (FILE *f, int reg_containing_return_addr)
24318 {
24319 unsigned regs_available_for_popping;
24320 unsigned regs_to_pop;
24321 int pops_needed;
24322 unsigned available;
24323 unsigned required;
24324 machine_mode mode;
24325 int size;
24326 int restore_a4 = FALSE;
24327
24328 /* Compute the registers we need to pop. */
24329 regs_to_pop = 0;
24330 pops_needed = 0;
24331
24332 if (reg_containing_return_addr == -1)
24333 {
24334 regs_to_pop |= 1 << LR_REGNUM;
24335 ++pops_needed;
24336 }
24337
24338 if (TARGET_BACKTRACE)
24339 {
24340 /* Restore the (ARM) frame pointer and stack pointer. */
24341 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24342 pops_needed += 2;
24343 }
24344
24345 /* If there is nothing to pop then just emit the BX instruction and
24346 return. */
24347 if (pops_needed == 0)
24348 {
24349 if (crtl->calls_eh_return)
24350 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24351
24352 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24353 {
24354 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24355 reg_containing_return_addr);
24356 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24357 }
24358 else
24359 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24360 return;
24361 }
24362 /* Otherwise if we are not supporting interworking and we have not created
24363 a backtrace structure and the function was not entered in ARM mode then
24364 just pop the return address straight into the PC. */
24365 else if (!TARGET_INTERWORK
24366 && !TARGET_BACKTRACE
24367 && !is_called_in_ARM_mode (current_function_decl)
24368 && !crtl->calls_eh_return
24369 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24370 {
24371 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24372 return;
24373 }
24374
24375 /* Find out how many of the (return) argument registers we can corrupt. */
24376 regs_available_for_popping = 0;
24377
24378 /* If returning via __builtin_eh_return, the bottom three registers
24379 all contain information needed for the return. */
24380 if (crtl->calls_eh_return)
24381 size = 12;
24382 else
24383 {
24384 /* If we can deduce the registers used from the function's
24385 return value. This is more reliable that examining
24386 df_regs_ever_live_p () because that will be set if the register is
24387 ever used in the function, not just if the register is used
24388 to hold a return value. */
24389
24390 if (crtl->return_rtx != 0)
24391 mode = GET_MODE (crtl->return_rtx);
24392 else
24393 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24394
24395 size = GET_MODE_SIZE (mode);
24396
24397 if (size == 0)
24398 {
24399 /* In a void function we can use any argument register.
24400 In a function that returns a structure on the stack
24401 we can use the second and third argument registers. */
24402 if (mode == VOIDmode)
24403 regs_available_for_popping =
24404 (1 << ARG_REGISTER (1))
24405 | (1 << ARG_REGISTER (2))
24406 | (1 << ARG_REGISTER (3));
24407 else
24408 regs_available_for_popping =
24409 (1 << ARG_REGISTER (2))
24410 | (1 << ARG_REGISTER (3));
24411 }
24412 else if (size <= 4)
24413 regs_available_for_popping =
24414 (1 << ARG_REGISTER (2))
24415 | (1 << ARG_REGISTER (3));
24416 else if (size <= 8)
24417 regs_available_for_popping =
24418 (1 << ARG_REGISTER (3));
24419 }
24420
24421 /* Match registers to be popped with registers into which we pop them. */
24422 for (available = regs_available_for_popping,
24423 required = regs_to_pop;
24424 required != 0 && available != 0;
24425 available &= ~(available & - available),
24426 required &= ~(required & - required))
24427 -- pops_needed;
24428
24429 /* If we have any popping registers left over, remove them. */
24430 if (available > 0)
24431 regs_available_for_popping &= ~available;
24432
24433 /* Otherwise if we need another popping register we can use
24434 the fourth argument register. */
24435 else if (pops_needed)
24436 {
24437 /* If we have not found any free argument registers and
24438 reg a4 contains the return address, we must move it. */
24439 if (regs_available_for_popping == 0
24440 && reg_containing_return_addr == LAST_ARG_REGNUM)
24441 {
24442 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24443 reg_containing_return_addr = LR_REGNUM;
24444 }
24445 else if (size > 12)
24446 {
24447 /* Register a4 is being used to hold part of the return value,
24448 but we have dire need of a free, low register. */
24449 restore_a4 = TRUE;
24450
24451 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24452 }
24453
24454 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24455 {
24456 /* The fourth argument register is available. */
24457 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24458
24459 --pops_needed;
24460 }
24461 }
24462
24463 /* Pop as many registers as we can. */
24464 thumb_pop (f, regs_available_for_popping);
24465
24466 /* Process the registers we popped. */
24467 if (reg_containing_return_addr == -1)
24468 {
24469 /* The return address was popped into the lowest numbered register. */
24470 regs_to_pop &= ~(1 << LR_REGNUM);
24471
24472 reg_containing_return_addr =
24473 number_of_first_bit_set (regs_available_for_popping);
24474
24475 /* Remove this register for the mask of available registers, so that
24476 the return address will not be corrupted by further pops. */
24477 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24478 }
24479
24480 /* If we popped other registers then handle them here. */
24481 if (regs_available_for_popping)
24482 {
24483 int frame_pointer;
24484
24485 /* Work out which register currently contains the frame pointer. */
24486 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24487
24488 /* Move it into the correct place. */
24489 asm_fprintf (f, "\tmov\t%r, %r\n",
24490 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24491
24492 /* (Temporarily) remove it from the mask of popped registers. */
24493 regs_available_for_popping &= ~(1 << frame_pointer);
24494 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24495
24496 if (regs_available_for_popping)
24497 {
24498 int stack_pointer;
24499
24500 /* We popped the stack pointer as well,
24501 find the register that contains it. */
24502 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24503
24504 /* Move it into the stack register. */
24505 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24506
24507 /* At this point we have popped all necessary registers, so
24508 do not worry about restoring regs_available_for_popping
24509 to its correct value:
24510
24511 assert (pops_needed == 0)
24512 assert (regs_available_for_popping == (1 << frame_pointer))
24513 assert (regs_to_pop == (1 << STACK_POINTER)) */
24514 }
24515 else
24516 {
24517 /* Since we have just move the popped value into the frame
24518 pointer, the popping register is available for reuse, and
24519 we know that we still have the stack pointer left to pop. */
24520 regs_available_for_popping |= (1 << frame_pointer);
24521 }
24522 }
24523
24524 /* If we still have registers left on the stack, but we no longer have
24525 any registers into which we can pop them, then we must move the return
24526 address into the link register and make available the register that
24527 contained it. */
24528 if (regs_available_for_popping == 0 && pops_needed > 0)
24529 {
24530 regs_available_for_popping |= 1 << reg_containing_return_addr;
24531
24532 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24533 reg_containing_return_addr);
24534
24535 reg_containing_return_addr = LR_REGNUM;
24536 }
24537
24538 /* If we have registers left on the stack then pop some more.
24539 We know that at most we will want to pop FP and SP. */
24540 if (pops_needed > 0)
24541 {
24542 int popped_into;
24543 int move_to;
24544
24545 thumb_pop (f, regs_available_for_popping);
24546
24547 /* We have popped either FP or SP.
24548 Move whichever one it is into the correct register. */
24549 popped_into = number_of_first_bit_set (regs_available_for_popping);
24550 move_to = number_of_first_bit_set (regs_to_pop);
24551
24552 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24553 --pops_needed;
24554 }
24555
24556 /* If we still have not popped everything then we must have only
24557 had one register available to us and we are now popping the SP. */
24558 if (pops_needed > 0)
24559 {
24560 int popped_into;
24561
24562 thumb_pop (f, regs_available_for_popping);
24563
24564 popped_into = number_of_first_bit_set (regs_available_for_popping);
24565
24566 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24567 /*
24568 assert (regs_to_pop == (1 << STACK_POINTER))
24569 assert (pops_needed == 1)
24570 */
24571 }
24572
24573 /* If necessary restore the a4 register. */
24574 if (restore_a4)
24575 {
24576 if (reg_containing_return_addr != LR_REGNUM)
24577 {
24578 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24579 reg_containing_return_addr = LR_REGNUM;
24580 }
24581
24582 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24583 }
24584
24585 if (crtl->calls_eh_return)
24586 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24587
24588 /* Return to caller. */
24589 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24590 {
24591 /* This is for the cases where LR is not being used to contain the return
24592 address. It may therefore contain information that we might not want
24593 to leak, hence it must be cleared. The value in R0 will never be a
24594 secret at this point, so it is safe to use it, see the clearing code
24595 in 'cmse_nonsecure_entry_clear_before_return'. */
24596 if (reg_containing_return_addr != LR_REGNUM)
24597 asm_fprintf (f, "\tmov\tlr, r0\n");
24598
24599 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24600 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24601 }
24602 else
24603 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24604 }
24605 \f
24606 /* Scan INSN just before assembler is output for it.
24607 For Thumb-1, we track the status of the condition codes; this
24608 information is used in the cbranchsi4_insn pattern. */
24609 void
24610 thumb1_final_prescan_insn (rtx_insn *insn)
24611 {
24612 if (flag_print_asm_name)
24613 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24614 INSN_ADDRESSES (INSN_UID (insn)));
24615 /* Don't overwrite the previous setter when we get to a cbranch. */
24616 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24617 {
24618 enum attr_conds conds;
24619
24620 if (cfun->machine->thumb1_cc_insn)
24621 {
24622 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24623 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24624 CC_STATUS_INIT;
24625 }
24626 conds = get_attr_conds (insn);
24627 if (conds == CONDS_SET)
24628 {
24629 rtx set = single_set (insn);
24630 cfun->machine->thumb1_cc_insn = insn;
24631 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24632 cfun->machine->thumb1_cc_op1 = const0_rtx;
24633 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24634 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24635 {
24636 rtx src1 = XEXP (SET_SRC (set), 1);
24637 if (src1 == const0_rtx)
24638 cfun->machine->thumb1_cc_mode = CCmode;
24639 }
24640 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24641 {
24642 /* Record the src register operand instead of dest because
24643 cprop_hardreg pass propagates src. */
24644 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24645 }
24646 }
24647 else if (conds != CONDS_NOCOND)
24648 cfun->machine->thumb1_cc_insn = NULL_RTX;
24649 }
24650
24651 /* Check if unexpected far jump is used. */
24652 if (cfun->machine->lr_save_eliminated
24653 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24654 internal_error("Unexpected thumb1 far jump");
24655 }
24656
24657 int
24658 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24659 {
24660 unsigned HOST_WIDE_INT mask = 0xff;
24661 int i;
24662
24663 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24664 if (val == 0) /* XXX */
24665 return 0;
24666
24667 for (i = 0; i < 25; i++)
24668 if ((val & (mask << i)) == val)
24669 return 1;
24670
24671 return 0;
24672 }
24673
24674 /* Returns nonzero if the current function contains,
24675 or might contain a far jump. */
24676 static int
24677 thumb_far_jump_used_p (void)
24678 {
24679 rtx_insn *insn;
24680 bool far_jump = false;
24681 unsigned int func_size = 0;
24682
24683 /* If we have already decided that far jumps may be used,
24684 do not bother checking again, and always return true even if
24685 it turns out that they are not being used. Once we have made
24686 the decision that far jumps are present (and that hence the link
24687 register will be pushed onto the stack) we cannot go back on it. */
24688 if (cfun->machine->far_jump_used)
24689 return 1;
24690
24691 /* If this function is not being called from the prologue/epilogue
24692 generation code then it must be being called from the
24693 INITIAL_ELIMINATION_OFFSET macro. */
24694 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24695 {
24696 /* In this case we know that we are being asked about the elimination
24697 of the arg pointer register. If that register is not being used,
24698 then there are no arguments on the stack, and we do not have to
24699 worry that a far jump might force the prologue to push the link
24700 register, changing the stack offsets. In this case we can just
24701 return false, since the presence of far jumps in the function will
24702 not affect stack offsets.
24703
24704 If the arg pointer is live (or if it was live, but has now been
24705 eliminated and so set to dead) then we do have to test to see if
24706 the function might contain a far jump. This test can lead to some
24707 false negatives, since before reload is completed, then length of
24708 branch instructions is not known, so gcc defaults to returning their
24709 longest length, which in turn sets the far jump attribute to true.
24710
24711 A false negative will not result in bad code being generated, but it
24712 will result in a needless push and pop of the link register. We
24713 hope that this does not occur too often.
24714
24715 If we need doubleword stack alignment this could affect the other
24716 elimination offsets so we can't risk getting it wrong. */
24717 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24718 cfun->machine->arg_pointer_live = 1;
24719 else if (!cfun->machine->arg_pointer_live)
24720 return 0;
24721 }
24722
24723 /* We should not change far_jump_used during or after reload, as there is
24724 no chance to change stack frame layout. */
24725 if (reload_in_progress || reload_completed)
24726 return 0;
24727
24728 /* Check to see if the function contains a branch
24729 insn with the far jump attribute set. */
24730 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24731 {
24732 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24733 {
24734 far_jump = true;
24735 }
24736 func_size += get_attr_length (insn);
24737 }
24738
24739 /* Attribute far_jump will always be true for thumb1 before
24740 shorten_branch pass. So checking far_jump attribute before
24741 shorten_branch isn't much useful.
24742
24743 Following heuristic tries to estimate more accurately if a far jump
24744 may finally be used. The heuristic is very conservative as there is
24745 no chance to roll-back the decision of not to use far jump.
24746
24747 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24748 2-byte insn is associated with a 4 byte constant pool. Using
24749 function size 2048/3 as the threshold is conservative enough. */
24750 if (far_jump)
24751 {
24752 if ((func_size * 3) >= 2048)
24753 {
24754 /* Record the fact that we have decided that
24755 the function does use far jumps. */
24756 cfun->machine->far_jump_used = 1;
24757 return 1;
24758 }
24759 }
24760
24761 return 0;
24762 }
24763
24764 /* Return nonzero if FUNC must be entered in ARM mode. */
24765 static bool
24766 is_called_in_ARM_mode (tree func)
24767 {
24768 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24769
24770 /* Ignore the problem about functions whose address is taken. */
24771 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24772 return true;
24773
24774 #ifdef ARM_PE
24775 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24776 #else
24777 return false;
24778 #endif
24779 }
24780
24781 /* Given the stack offsets and register mask in OFFSETS, decide how
24782 many additional registers to push instead of subtracting a constant
24783 from SP. For epilogues the principle is the same except we use pop.
24784 FOR_PROLOGUE indicates which we're generating. */
24785 static int
24786 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24787 {
24788 HOST_WIDE_INT amount;
24789 unsigned long live_regs_mask = offsets->saved_regs_mask;
24790 /* Extract a mask of the ones we can give to the Thumb's push/pop
24791 instruction. */
24792 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24793 /* Then count how many other high registers will need to be pushed. */
24794 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24795 int n_free, reg_base, size;
24796
24797 if (!for_prologue && frame_pointer_needed)
24798 amount = offsets->locals_base - offsets->saved_regs;
24799 else
24800 amount = offsets->outgoing_args - offsets->saved_regs;
24801
24802 /* If the stack frame size is 512 exactly, we can save one load
24803 instruction, which should make this a win even when optimizing
24804 for speed. */
24805 if (!optimize_size && amount != 512)
24806 return 0;
24807
24808 /* Can't do this if there are high registers to push. */
24809 if (high_regs_pushed != 0)
24810 return 0;
24811
24812 /* Shouldn't do it in the prologue if no registers would normally
24813 be pushed at all. In the epilogue, also allow it if we'll have
24814 a pop insn for the PC. */
24815 if (l_mask == 0
24816 && (for_prologue
24817 || TARGET_BACKTRACE
24818 || (live_regs_mask & 1 << LR_REGNUM) == 0
24819 || TARGET_INTERWORK
24820 || crtl->args.pretend_args_size != 0))
24821 return 0;
24822
24823 /* Don't do this if thumb_expand_prologue wants to emit instructions
24824 between the push and the stack frame allocation. */
24825 if (for_prologue
24826 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24827 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24828 return 0;
24829
24830 reg_base = 0;
24831 n_free = 0;
24832 if (!for_prologue)
24833 {
24834 size = arm_size_return_regs ();
24835 reg_base = ARM_NUM_INTS (size);
24836 live_regs_mask >>= reg_base;
24837 }
24838
24839 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24840 && (for_prologue || call_used_regs[reg_base + n_free]))
24841 {
24842 live_regs_mask >>= 1;
24843 n_free++;
24844 }
24845
24846 if (n_free == 0)
24847 return 0;
24848 gcc_assert (amount / 4 * 4 == amount);
24849
24850 if (amount >= 512 && (amount - n_free * 4) < 512)
24851 return (amount - 508) / 4;
24852 if (amount <= n_free * 4)
24853 return amount / 4;
24854 return 0;
24855 }
24856
24857 /* The bits which aren't usefully expanded as rtl. */
24858 const char *
24859 thumb1_unexpanded_epilogue (void)
24860 {
24861 arm_stack_offsets *offsets;
24862 int regno;
24863 unsigned long live_regs_mask = 0;
24864 int high_regs_pushed = 0;
24865 int extra_pop;
24866 int had_to_push_lr;
24867 int size;
24868
24869 if (cfun->machine->return_used_this_function != 0)
24870 return "";
24871
24872 if (IS_NAKED (arm_current_func_type ()))
24873 return "";
24874
24875 offsets = arm_get_frame_offsets ();
24876 live_regs_mask = offsets->saved_regs_mask;
24877 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24878
24879 /* If we can deduce the registers used from the function's return value.
24880 This is more reliable that examining df_regs_ever_live_p () because that
24881 will be set if the register is ever used in the function, not just if
24882 the register is used to hold a return value. */
24883 size = arm_size_return_regs ();
24884
24885 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24886 if (extra_pop > 0)
24887 {
24888 unsigned long extra_mask = (1 << extra_pop) - 1;
24889 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24890 }
24891
24892 /* The prolog may have pushed some high registers to use as
24893 work registers. e.g. the testsuite file:
24894 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24895 compiles to produce:
24896 push {r4, r5, r6, r7, lr}
24897 mov r7, r9
24898 mov r6, r8
24899 push {r6, r7}
24900 as part of the prolog. We have to undo that pushing here. */
24901
24902 if (high_regs_pushed)
24903 {
24904 unsigned long mask = live_regs_mask & 0xff;
24905 int next_hi_reg;
24906
24907 mask |= thumb1_epilogue_unused_call_clobbered_lo_regs ();
24908
24909 if (mask == 0)
24910 /* Oh dear! We have no low registers into which we can pop
24911 high registers! */
24912 internal_error
24913 ("no low registers available for popping high registers");
24914
24915 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
24916 if (live_regs_mask & (1 << next_hi_reg))
24917 break;
24918
24919 while (high_regs_pushed)
24920 {
24921 /* Find lo register(s) into which the high register(s) can
24922 be popped. */
24923 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
24924 {
24925 if (mask & (1 << regno))
24926 high_regs_pushed--;
24927 if (high_regs_pushed == 0)
24928 break;
24929 }
24930
24931 if (high_regs_pushed == 0 && regno >= 0)
24932 mask &= ~((1 << regno) - 1);
24933
24934 /* Pop the values into the low register(s). */
24935 thumb_pop (asm_out_file, mask);
24936
24937 /* Move the value(s) into the high registers. */
24938 for (regno = LAST_LO_REGNUM; regno >= 0; regno--)
24939 {
24940 if (mask & (1 << regno))
24941 {
24942 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24943 regno);
24944
24945 for (next_hi_reg--; next_hi_reg > LAST_LO_REGNUM;
24946 next_hi_reg--)
24947 if (live_regs_mask & (1 << next_hi_reg))
24948 break;
24949 }
24950 }
24951 }
24952 live_regs_mask &= ~0x0f00;
24953 }
24954
24955 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24956 live_regs_mask &= 0xff;
24957
24958 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24959 {
24960 /* Pop the return address into the PC. */
24961 if (had_to_push_lr)
24962 live_regs_mask |= 1 << PC_REGNUM;
24963
24964 /* Either no argument registers were pushed or a backtrace
24965 structure was created which includes an adjusted stack
24966 pointer, so just pop everything. */
24967 if (live_regs_mask)
24968 thumb_pop (asm_out_file, live_regs_mask);
24969
24970 /* We have either just popped the return address into the
24971 PC or it is was kept in LR for the entire function.
24972 Note that thumb_pop has already called thumb_exit if the
24973 PC was in the list. */
24974 if (!had_to_push_lr)
24975 thumb_exit (asm_out_file, LR_REGNUM);
24976 }
24977 else
24978 {
24979 /* Pop everything but the return address. */
24980 if (live_regs_mask)
24981 thumb_pop (asm_out_file, live_regs_mask);
24982
24983 if (had_to_push_lr)
24984 {
24985 if (size > 12)
24986 {
24987 /* We have no free low regs, so save one. */
24988 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24989 LAST_ARG_REGNUM);
24990 }
24991
24992 /* Get the return address into a temporary register. */
24993 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24994
24995 if (size > 12)
24996 {
24997 /* Move the return address to lr. */
24998 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24999 LAST_ARG_REGNUM);
25000 /* Restore the low register. */
25001 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
25002 IP_REGNUM);
25003 regno = LR_REGNUM;
25004 }
25005 else
25006 regno = LAST_ARG_REGNUM;
25007 }
25008 else
25009 regno = LR_REGNUM;
25010
25011 /* Remove the argument registers that were pushed onto the stack. */
25012 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
25013 SP_REGNUM, SP_REGNUM,
25014 crtl->args.pretend_args_size);
25015
25016 thumb_exit (asm_out_file, regno);
25017 }
25018
25019 return "";
25020 }
25021
25022 /* Functions to save and restore machine-specific function data. */
25023 static struct machine_function *
25024 arm_init_machine_status (void)
25025 {
25026 struct machine_function *machine;
25027 machine = ggc_cleared_alloc<machine_function> ();
25028
25029 #if ARM_FT_UNKNOWN != 0
25030 machine->func_type = ARM_FT_UNKNOWN;
25031 #endif
25032 machine->static_chain_stack_bytes = -1;
25033 return machine;
25034 }
25035
25036 /* Return an RTX indicating where the return address to the
25037 calling function can be found. */
25038 rtx
25039 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
25040 {
25041 if (count != 0)
25042 return NULL_RTX;
25043
25044 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
25045 }
25046
25047 /* Do anything needed before RTL is emitted for each function. */
25048 void
25049 arm_init_expanders (void)
25050 {
25051 /* Arrange to initialize and mark the machine per-function status. */
25052 init_machine_status = arm_init_machine_status;
25053
25054 /* This is to stop the combine pass optimizing away the alignment
25055 adjustment of va_arg. */
25056 /* ??? It is claimed that this should not be necessary. */
25057 if (cfun)
25058 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
25059 }
25060
25061 /* Check that FUNC is called with a different mode. */
25062
25063 bool
25064 arm_change_mode_p (tree func)
25065 {
25066 if (TREE_CODE (func) != FUNCTION_DECL)
25067 return false;
25068
25069 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
25070
25071 if (!callee_tree)
25072 callee_tree = target_option_default_node;
25073
25074 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
25075 int flags = callee_opts->x_target_flags;
25076
25077 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
25078 }
25079
25080 /* Like arm_compute_initial_elimination offset. Simpler because there
25081 isn't an ABI specified frame pointer for Thumb. Instead, we set it
25082 to point at the base of the local variables after static stack
25083 space for a function has been allocated. */
25084
25085 HOST_WIDE_INT
25086 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
25087 {
25088 arm_stack_offsets *offsets;
25089
25090 offsets = arm_get_frame_offsets ();
25091
25092 switch (from)
25093 {
25094 case ARG_POINTER_REGNUM:
25095 switch (to)
25096 {
25097 case STACK_POINTER_REGNUM:
25098 return offsets->outgoing_args - offsets->saved_args;
25099
25100 case FRAME_POINTER_REGNUM:
25101 return offsets->soft_frame - offsets->saved_args;
25102
25103 case ARM_HARD_FRAME_POINTER_REGNUM:
25104 return offsets->saved_regs - offsets->saved_args;
25105
25106 case THUMB_HARD_FRAME_POINTER_REGNUM:
25107 return offsets->locals_base - offsets->saved_args;
25108
25109 default:
25110 gcc_unreachable ();
25111 }
25112 break;
25113
25114 case FRAME_POINTER_REGNUM:
25115 switch (to)
25116 {
25117 case STACK_POINTER_REGNUM:
25118 return offsets->outgoing_args - offsets->soft_frame;
25119
25120 case ARM_HARD_FRAME_POINTER_REGNUM:
25121 return offsets->saved_regs - offsets->soft_frame;
25122
25123 case THUMB_HARD_FRAME_POINTER_REGNUM:
25124 return offsets->locals_base - offsets->soft_frame;
25125
25126 default:
25127 gcc_unreachable ();
25128 }
25129 break;
25130
25131 default:
25132 gcc_unreachable ();
25133 }
25134 }
25135
25136 /* Generate the function's prologue. */
25137
25138 void
25139 thumb1_expand_prologue (void)
25140 {
25141 rtx_insn *insn;
25142
25143 HOST_WIDE_INT amount;
25144 HOST_WIDE_INT size;
25145 arm_stack_offsets *offsets;
25146 unsigned long func_type;
25147 int regno;
25148 unsigned long live_regs_mask;
25149 unsigned long l_mask;
25150 unsigned high_regs_pushed = 0;
25151 bool lr_needs_saving;
25152
25153 func_type = arm_current_func_type ();
25154
25155 /* Naked functions don't have prologues. */
25156 if (IS_NAKED (func_type))
25157 {
25158 if (flag_stack_usage_info)
25159 current_function_static_stack_size = 0;
25160 return;
25161 }
25162
25163 if (IS_INTERRUPT (func_type))
25164 {
25165 error ("interrupt Service Routines cannot be coded in Thumb mode");
25166 return;
25167 }
25168
25169 if (is_called_in_ARM_mode (current_function_decl))
25170 emit_insn (gen_prologue_thumb1_interwork ());
25171
25172 offsets = arm_get_frame_offsets ();
25173 live_regs_mask = offsets->saved_regs_mask;
25174 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25175
25176 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25177 l_mask = live_regs_mask & 0x40ff;
25178 /* Then count how many other high registers will need to be pushed. */
25179 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25180
25181 if (crtl->args.pretend_args_size)
25182 {
25183 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25184
25185 if (cfun->machine->uses_anonymous_args)
25186 {
25187 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25188 unsigned long mask;
25189
25190 mask = 1ul << (LAST_ARG_REGNUM + 1);
25191 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25192
25193 insn = thumb1_emit_multi_reg_push (mask, 0);
25194 }
25195 else
25196 {
25197 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25198 stack_pointer_rtx, x));
25199 }
25200 RTX_FRAME_RELATED_P (insn) = 1;
25201 }
25202
25203 if (TARGET_BACKTRACE)
25204 {
25205 HOST_WIDE_INT offset = 0;
25206 unsigned work_register;
25207 rtx work_reg, x, arm_hfp_rtx;
25208
25209 /* We have been asked to create a stack backtrace structure.
25210 The code looks like this:
25211
25212 0 .align 2
25213 0 func:
25214 0 sub SP, #16 Reserve space for 4 registers.
25215 2 push {R7} Push low registers.
25216 4 add R7, SP, #20 Get the stack pointer before the push.
25217 6 str R7, [SP, #8] Store the stack pointer
25218 (before reserving the space).
25219 8 mov R7, PC Get hold of the start of this code + 12.
25220 10 str R7, [SP, #16] Store it.
25221 12 mov R7, FP Get hold of the current frame pointer.
25222 14 str R7, [SP, #4] Store it.
25223 16 mov R7, LR Get hold of the current return address.
25224 18 str R7, [SP, #12] Store it.
25225 20 add R7, SP, #16 Point at the start of the
25226 backtrace structure.
25227 22 mov FP, R7 Put this value into the frame pointer. */
25228
25229 work_register = thumb_find_work_register (live_regs_mask);
25230 work_reg = gen_rtx_REG (SImode, work_register);
25231 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25232
25233 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25234 stack_pointer_rtx, GEN_INT (-16)));
25235 RTX_FRAME_RELATED_P (insn) = 1;
25236
25237 if (l_mask)
25238 {
25239 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25240 RTX_FRAME_RELATED_P (insn) = 1;
25241 lr_needs_saving = false;
25242
25243 offset = bit_count (l_mask) * UNITS_PER_WORD;
25244 }
25245
25246 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25247 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25248
25249 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25250 x = gen_frame_mem (SImode, x);
25251 emit_move_insn (x, work_reg);
25252
25253 /* Make sure that the instruction fetching the PC is in the right place
25254 to calculate "start of backtrace creation code + 12". */
25255 /* ??? The stores using the common WORK_REG ought to be enough to
25256 prevent the scheduler from doing anything weird. Failing that
25257 we could always move all of the following into an UNSPEC_VOLATILE. */
25258 if (l_mask)
25259 {
25260 x = gen_rtx_REG (SImode, PC_REGNUM);
25261 emit_move_insn (work_reg, x);
25262
25263 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25264 x = gen_frame_mem (SImode, x);
25265 emit_move_insn (x, work_reg);
25266
25267 emit_move_insn (work_reg, arm_hfp_rtx);
25268
25269 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25270 x = gen_frame_mem (SImode, x);
25271 emit_move_insn (x, work_reg);
25272 }
25273 else
25274 {
25275 emit_move_insn (work_reg, arm_hfp_rtx);
25276
25277 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25278 x = gen_frame_mem (SImode, x);
25279 emit_move_insn (x, work_reg);
25280
25281 x = gen_rtx_REG (SImode, PC_REGNUM);
25282 emit_move_insn (work_reg, x);
25283
25284 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25285 x = gen_frame_mem (SImode, x);
25286 emit_move_insn (x, work_reg);
25287 }
25288
25289 x = gen_rtx_REG (SImode, LR_REGNUM);
25290 emit_move_insn (work_reg, x);
25291
25292 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25293 x = gen_frame_mem (SImode, x);
25294 emit_move_insn (x, work_reg);
25295
25296 x = GEN_INT (offset + 12);
25297 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25298
25299 emit_move_insn (arm_hfp_rtx, work_reg);
25300 }
25301 /* Optimization: If we are not pushing any low registers but we are going
25302 to push some high registers then delay our first push. This will just
25303 be a push of LR and we can combine it with the push of the first high
25304 register. */
25305 else if ((l_mask & 0xff) != 0
25306 || (high_regs_pushed == 0 && lr_needs_saving))
25307 {
25308 unsigned long mask = l_mask;
25309 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25310 insn = thumb1_emit_multi_reg_push (mask, mask);
25311 RTX_FRAME_RELATED_P (insn) = 1;
25312 lr_needs_saving = false;
25313 }
25314
25315 if (high_regs_pushed)
25316 {
25317 unsigned pushable_regs;
25318 unsigned next_hi_reg;
25319 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25320 : crtl->args.info.nregs;
25321 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25322
25323 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25324 if (live_regs_mask & (1 << next_hi_reg))
25325 break;
25326
25327 /* Here we need to mask out registers used for passing arguments
25328 even if they can be pushed. This is to avoid using them to
25329 stash the high registers. Such kind of stash may clobber the
25330 use of arguments. */
25331 pushable_regs = l_mask & (~arg_regs_mask);
25332 pushable_regs |= thumb1_prologue_unused_call_clobbered_lo_regs ();
25333
25334 /* Normally, LR can be used as a scratch register once it has been
25335 saved; but if the function examines its own return address then
25336 the value is still live and we need to avoid using it. */
25337 bool return_addr_live
25338 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
25339 LR_REGNUM);
25340
25341 if (lr_needs_saving || return_addr_live)
25342 pushable_regs &= ~(1 << LR_REGNUM);
25343
25344 if (pushable_regs == 0)
25345 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25346
25347 while (high_regs_pushed > 0)
25348 {
25349 unsigned long real_regs_mask = 0;
25350 unsigned long push_mask = 0;
25351
25352 for (regno = LR_REGNUM; regno >= 0; regno --)
25353 {
25354 if (pushable_regs & (1 << regno))
25355 {
25356 emit_move_insn (gen_rtx_REG (SImode, regno),
25357 gen_rtx_REG (SImode, next_hi_reg));
25358
25359 high_regs_pushed --;
25360 real_regs_mask |= (1 << next_hi_reg);
25361 push_mask |= (1 << regno);
25362
25363 if (high_regs_pushed)
25364 {
25365 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25366 next_hi_reg --)
25367 if (live_regs_mask & (1 << next_hi_reg))
25368 break;
25369 }
25370 else
25371 break;
25372 }
25373 }
25374
25375 /* If we had to find a work register and we have not yet
25376 saved the LR then add it to the list of regs to push. */
25377 if (lr_needs_saving)
25378 {
25379 push_mask |= 1 << LR_REGNUM;
25380 real_regs_mask |= 1 << LR_REGNUM;
25381 lr_needs_saving = false;
25382 /* If the return address is not live at this point, we
25383 can add LR to the list of registers that we can use
25384 for pushes. */
25385 if (!return_addr_live)
25386 pushable_regs |= 1 << LR_REGNUM;
25387 }
25388
25389 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25390 RTX_FRAME_RELATED_P (insn) = 1;
25391 }
25392 }
25393
25394 /* Load the pic register before setting the frame pointer,
25395 so we can use r7 as a temporary work register. */
25396 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25397 arm_load_pic_register (live_regs_mask, NULL_RTX);
25398
25399 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25400 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25401 stack_pointer_rtx);
25402
25403 size = offsets->outgoing_args - offsets->saved_args;
25404 if (flag_stack_usage_info)
25405 current_function_static_stack_size = size;
25406
25407 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25408 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25409 || flag_stack_clash_protection)
25410 && size)
25411 sorry ("%<-fstack-check=specific%> for Thumb-1");
25412
25413 amount = offsets->outgoing_args - offsets->saved_regs;
25414 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25415 if (amount)
25416 {
25417 if (amount < 512)
25418 {
25419 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25420 GEN_INT (- amount)));
25421 RTX_FRAME_RELATED_P (insn) = 1;
25422 }
25423 else
25424 {
25425 rtx reg, dwarf;
25426
25427 /* The stack decrement is too big for an immediate value in a single
25428 insn. In theory we could issue multiple subtracts, but after
25429 three of them it becomes more space efficient to place the full
25430 value in the constant pool and load into a register. (Also the
25431 ARM debugger really likes to see only one stack decrement per
25432 function). So instead we look for a scratch register into which
25433 we can load the decrement, and then we subtract this from the
25434 stack pointer. Unfortunately on the thumb the only available
25435 scratch registers are the argument registers, and we cannot use
25436 these as they may hold arguments to the function. Instead we
25437 attempt to locate a call preserved register which is used by this
25438 function. If we can find one, then we know that it will have
25439 been pushed at the start of the prologue and so we can corrupt
25440 it now. */
25441 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25442 if (live_regs_mask & (1 << regno))
25443 break;
25444
25445 gcc_assert(regno <= LAST_LO_REGNUM);
25446
25447 reg = gen_rtx_REG (SImode, regno);
25448
25449 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25450
25451 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25452 stack_pointer_rtx, reg));
25453
25454 dwarf = gen_rtx_SET (stack_pointer_rtx,
25455 plus_constant (Pmode, stack_pointer_rtx,
25456 -amount));
25457 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25458 RTX_FRAME_RELATED_P (insn) = 1;
25459 }
25460 }
25461
25462 if (frame_pointer_needed)
25463 thumb_set_frame_pointer (offsets);
25464
25465 /* If we are profiling, make sure no instructions are scheduled before
25466 the call to mcount. Similarly if the user has requested no
25467 scheduling in the prolog. Similarly if we want non-call exceptions
25468 using the EABI unwinder, to prevent faulting instructions from being
25469 swapped with a stack adjustment. */
25470 if (crtl->profile || !TARGET_SCHED_PROLOG
25471 || (arm_except_unwind_info (&global_options) == UI_TARGET
25472 && cfun->can_throw_non_call_exceptions))
25473 emit_insn (gen_blockage ());
25474
25475 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25476 if (live_regs_mask & 0xff)
25477 cfun->machine->lr_save_eliminated = 0;
25478 }
25479
25480 /* Clear caller saved registers not used to pass return values and leaked
25481 condition flags before exiting a cmse_nonsecure_entry function. */
25482
25483 void
25484 cmse_nonsecure_entry_clear_before_return (void)
25485 {
25486 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25487 uint32_t padding_bits_to_clear = 0;
25488 auto_sbitmap to_clear_bitmap (maxregno + 1);
25489 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25490 tree result_type;
25491
25492 bitmap_clear (to_clear_bitmap);
25493 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25494 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25495
25496 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25497 registers. */
25498 if (TARGET_HARD_FLOAT)
25499 {
25500 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25501
25502 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25503
25504 /* Make sure we don't clear the two scratch registers used to clear the
25505 relevant FPSCR bits in output_return_instruction. */
25506 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25507 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25508 emit_use (gen_rtx_REG (SImode, 4));
25509 bitmap_clear_bit (to_clear_bitmap, 4);
25510 }
25511
25512 /* If the user has defined registers to be caller saved, these are no longer
25513 restored by the function before returning and must thus be cleared for
25514 security purposes. */
25515 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25516 {
25517 /* We do not touch registers that can be used to pass arguments as per
25518 the AAPCS, since these should never be made callee-saved by user
25519 options. */
25520 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25521 continue;
25522 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25523 continue;
25524 if (call_used_regs[regno])
25525 bitmap_set_bit (to_clear_bitmap, regno);
25526 }
25527
25528 /* Make sure we do not clear the registers used to return the result in. */
25529 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25530 if (!VOID_TYPE_P (result_type))
25531 {
25532 uint64_t to_clear_return_mask;
25533 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25534
25535 /* No need to check that we return in registers, because we don't
25536 support returning on stack yet. */
25537 gcc_assert (REG_P (result_rtl));
25538 to_clear_return_mask
25539 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25540 &padding_bits_to_clear);
25541 if (to_clear_return_mask)
25542 {
25543 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25544 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25545 {
25546 if (to_clear_return_mask & (1ULL << regno))
25547 bitmap_clear_bit (to_clear_bitmap, regno);
25548 }
25549 }
25550 }
25551
25552 if (padding_bits_to_clear != 0)
25553 {
25554 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25555 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25556
25557 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25558 returning a composite type, which only uses r0. Let's make sure that
25559 r1-r3 is cleared too. */
25560 bitmap_clear (to_clear_arg_regs_bitmap);
25561 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25562 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25563 }
25564
25565 /* Clear full registers that leak before returning. */
25566 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25567 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25568 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25569 clearing_reg);
25570 }
25571
25572 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25573 POP instruction can be generated. LR should be replaced by PC. All
25574 the checks required are already done by USE_RETURN_INSN (). Hence,
25575 all we really need to check here is if single register is to be
25576 returned, or multiple register return. */
25577 void
25578 thumb2_expand_return (bool simple_return)
25579 {
25580 int i, num_regs;
25581 unsigned long saved_regs_mask;
25582 arm_stack_offsets *offsets;
25583
25584 offsets = arm_get_frame_offsets ();
25585 saved_regs_mask = offsets->saved_regs_mask;
25586
25587 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25588 if (saved_regs_mask & (1 << i))
25589 num_regs++;
25590
25591 if (!simple_return && saved_regs_mask)
25592 {
25593 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25594 functions or adapt code to handle according to ACLE. This path should
25595 not be reachable for cmse_nonsecure_entry functions though we prefer
25596 to assert it for now to ensure that future code changes do not silently
25597 change this behavior. */
25598 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25599 if (num_regs == 1)
25600 {
25601 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25602 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25603 rtx addr = gen_rtx_MEM (SImode,
25604 gen_rtx_POST_INC (SImode,
25605 stack_pointer_rtx));
25606 set_mem_alias_set (addr, get_frame_alias_set ());
25607 XVECEXP (par, 0, 0) = ret_rtx;
25608 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25609 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25610 emit_jump_insn (par);
25611 }
25612 else
25613 {
25614 saved_regs_mask &= ~ (1 << LR_REGNUM);
25615 saved_regs_mask |= (1 << PC_REGNUM);
25616 arm_emit_multi_reg_pop (saved_regs_mask);
25617 }
25618 }
25619 else
25620 {
25621 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25622 cmse_nonsecure_entry_clear_before_return ();
25623 emit_jump_insn (simple_return_rtx);
25624 }
25625 }
25626
25627 void
25628 thumb1_expand_epilogue (void)
25629 {
25630 HOST_WIDE_INT amount;
25631 arm_stack_offsets *offsets;
25632 int regno;
25633
25634 /* Naked functions don't have prologues. */
25635 if (IS_NAKED (arm_current_func_type ()))
25636 return;
25637
25638 offsets = arm_get_frame_offsets ();
25639 amount = offsets->outgoing_args - offsets->saved_regs;
25640
25641 if (frame_pointer_needed)
25642 {
25643 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25644 amount = offsets->locals_base - offsets->saved_regs;
25645 }
25646 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25647
25648 gcc_assert (amount >= 0);
25649 if (amount)
25650 {
25651 emit_insn (gen_blockage ());
25652
25653 if (amount < 512)
25654 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25655 GEN_INT (amount)));
25656 else
25657 {
25658 /* r3 is always free in the epilogue. */
25659 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25660
25661 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25662 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25663 }
25664 }
25665
25666 /* Emit a USE (stack_pointer_rtx), so that
25667 the stack adjustment will not be deleted. */
25668 emit_insn (gen_force_register_use (stack_pointer_rtx));
25669
25670 if (crtl->profile || !TARGET_SCHED_PROLOG)
25671 emit_insn (gen_blockage ());
25672
25673 /* Emit a clobber for each insn that will be restored in the epilogue,
25674 so that flow2 will get register lifetimes correct. */
25675 for (regno = 0; regno < 13; regno++)
25676 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25677 emit_clobber (gen_rtx_REG (SImode, regno));
25678
25679 if (! df_regs_ever_live_p (LR_REGNUM))
25680 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25681
25682 /* Clear all caller-saved regs that are not used to return. */
25683 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25684 cmse_nonsecure_entry_clear_before_return ();
25685 }
25686
25687 /* Epilogue code for APCS frame. */
25688 static void
25689 arm_expand_epilogue_apcs_frame (bool really_return)
25690 {
25691 unsigned long func_type;
25692 unsigned long saved_regs_mask;
25693 int num_regs = 0;
25694 int i;
25695 int floats_from_frame = 0;
25696 arm_stack_offsets *offsets;
25697
25698 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25699 func_type = arm_current_func_type ();
25700
25701 /* Get frame offsets for ARM. */
25702 offsets = arm_get_frame_offsets ();
25703 saved_regs_mask = offsets->saved_regs_mask;
25704
25705 /* Find the offset of the floating-point save area in the frame. */
25706 floats_from_frame
25707 = (offsets->saved_args
25708 + arm_compute_static_chain_stack_bytes ()
25709 - offsets->frame);
25710
25711 /* Compute how many core registers saved and how far away the floats are. */
25712 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25713 if (saved_regs_mask & (1 << i))
25714 {
25715 num_regs++;
25716 floats_from_frame += 4;
25717 }
25718
25719 if (TARGET_HARD_FLOAT)
25720 {
25721 int start_reg;
25722 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25723
25724 /* The offset is from IP_REGNUM. */
25725 int saved_size = arm_get_vfp_saved_size ();
25726 if (saved_size > 0)
25727 {
25728 rtx_insn *insn;
25729 floats_from_frame += saved_size;
25730 insn = emit_insn (gen_addsi3 (ip_rtx,
25731 hard_frame_pointer_rtx,
25732 GEN_INT (-floats_from_frame)));
25733 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25734 ip_rtx, hard_frame_pointer_rtx);
25735 }
25736
25737 /* Generate VFP register multi-pop. */
25738 start_reg = FIRST_VFP_REGNUM;
25739
25740 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25741 /* Look for a case where a reg does not need restoring. */
25742 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25743 && (!df_regs_ever_live_p (i + 1)
25744 || call_used_regs[i + 1]))
25745 {
25746 if (start_reg != i)
25747 arm_emit_vfp_multi_reg_pop (start_reg,
25748 (i - start_reg) / 2,
25749 gen_rtx_REG (SImode,
25750 IP_REGNUM));
25751 start_reg = i + 2;
25752 }
25753
25754 /* Restore the remaining regs that we have discovered (or possibly
25755 even all of them, if the conditional in the for loop never
25756 fired). */
25757 if (start_reg != i)
25758 arm_emit_vfp_multi_reg_pop (start_reg,
25759 (i - start_reg) / 2,
25760 gen_rtx_REG (SImode, IP_REGNUM));
25761 }
25762
25763 if (TARGET_IWMMXT)
25764 {
25765 /* The frame pointer is guaranteed to be non-double-word aligned, as
25766 it is set to double-word-aligned old_stack_pointer - 4. */
25767 rtx_insn *insn;
25768 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25769
25770 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25771 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25772 {
25773 rtx addr = gen_frame_mem (V2SImode,
25774 plus_constant (Pmode, hard_frame_pointer_rtx,
25775 - lrm_count * 4));
25776 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25777 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25778 gen_rtx_REG (V2SImode, i),
25779 NULL_RTX);
25780 lrm_count += 2;
25781 }
25782 }
25783
25784 /* saved_regs_mask should contain IP which contains old stack pointer
25785 at the time of activation creation. Since SP and IP are adjacent registers,
25786 we can restore the value directly into SP. */
25787 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25788 saved_regs_mask &= ~(1 << IP_REGNUM);
25789 saved_regs_mask |= (1 << SP_REGNUM);
25790
25791 /* There are two registers left in saved_regs_mask - LR and PC. We
25792 only need to restore LR (the return address), but to
25793 save time we can load it directly into PC, unless we need a
25794 special function exit sequence, or we are not really returning. */
25795 if (really_return
25796 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25797 && !crtl->calls_eh_return)
25798 /* Delete LR from the register mask, so that LR on
25799 the stack is loaded into the PC in the register mask. */
25800 saved_regs_mask &= ~(1 << LR_REGNUM);
25801 else
25802 saved_regs_mask &= ~(1 << PC_REGNUM);
25803
25804 num_regs = bit_count (saved_regs_mask);
25805 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25806 {
25807 rtx_insn *insn;
25808 emit_insn (gen_blockage ());
25809 /* Unwind the stack to just below the saved registers. */
25810 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25811 hard_frame_pointer_rtx,
25812 GEN_INT (- 4 * num_regs)));
25813
25814 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25815 stack_pointer_rtx, hard_frame_pointer_rtx);
25816 }
25817
25818 arm_emit_multi_reg_pop (saved_regs_mask);
25819
25820 if (IS_INTERRUPT (func_type))
25821 {
25822 /* Interrupt handlers will have pushed the
25823 IP onto the stack, so restore it now. */
25824 rtx_insn *insn;
25825 rtx addr = gen_rtx_MEM (SImode,
25826 gen_rtx_POST_INC (SImode,
25827 stack_pointer_rtx));
25828 set_mem_alias_set (addr, get_frame_alias_set ());
25829 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25830 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25831 gen_rtx_REG (SImode, IP_REGNUM),
25832 NULL_RTX);
25833 }
25834
25835 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25836 return;
25837
25838 if (crtl->calls_eh_return)
25839 emit_insn (gen_addsi3 (stack_pointer_rtx,
25840 stack_pointer_rtx,
25841 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25842
25843 if (IS_STACKALIGN (func_type))
25844 /* Restore the original stack pointer. Before prologue, the stack was
25845 realigned and the original stack pointer saved in r0. For details,
25846 see comment in arm_expand_prologue. */
25847 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25848
25849 emit_jump_insn (simple_return_rtx);
25850 }
25851
25852 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25853 function is not a sibcall. */
25854 void
25855 arm_expand_epilogue (bool really_return)
25856 {
25857 unsigned long func_type;
25858 unsigned long saved_regs_mask;
25859 int num_regs = 0;
25860 int i;
25861 int amount;
25862 arm_stack_offsets *offsets;
25863
25864 func_type = arm_current_func_type ();
25865
25866 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25867 let output_return_instruction take care of instruction emission if any. */
25868 if (IS_NAKED (func_type)
25869 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25870 {
25871 if (really_return)
25872 emit_jump_insn (simple_return_rtx);
25873 return;
25874 }
25875
25876 /* If we are throwing an exception, then we really must be doing a
25877 return, so we can't tail-call. */
25878 gcc_assert (!crtl->calls_eh_return || really_return);
25879
25880 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25881 {
25882 arm_expand_epilogue_apcs_frame (really_return);
25883 return;
25884 }
25885
25886 /* Get frame offsets for ARM. */
25887 offsets = arm_get_frame_offsets ();
25888 saved_regs_mask = offsets->saved_regs_mask;
25889 num_regs = bit_count (saved_regs_mask);
25890
25891 if (frame_pointer_needed)
25892 {
25893 rtx_insn *insn;
25894 /* Restore stack pointer if necessary. */
25895 if (TARGET_ARM)
25896 {
25897 /* In ARM mode, frame pointer points to first saved register.
25898 Restore stack pointer to last saved register. */
25899 amount = offsets->frame - offsets->saved_regs;
25900
25901 /* Force out any pending memory operations that reference stacked data
25902 before stack de-allocation occurs. */
25903 emit_insn (gen_blockage ());
25904 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25905 hard_frame_pointer_rtx,
25906 GEN_INT (amount)));
25907 arm_add_cfa_adjust_cfa_note (insn, amount,
25908 stack_pointer_rtx,
25909 hard_frame_pointer_rtx);
25910
25911 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25912 deleted. */
25913 emit_insn (gen_force_register_use (stack_pointer_rtx));
25914 }
25915 else
25916 {
25917 /* In Thumb-2 mode, the frame pointer points to the last saved
25918 register. */
25919 amount = offsets->locals_base - offsets->saved_regs;
25920 if (amount)
25921 {
25922 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25923 hard_frame_pointer_rtx,
25924 GEN_INT (amount)));
25925 arm_add_cfa_adjust_cfa_note (insn, amount,
25926 hard_frame_pointer_rtx,
25927 hard_frame_pointer_rtx);
25928 }
25929
25930 /* Force out any pending memory operations that reference stacked data
25931 before stack de-allocation occurs. */
25932 emit_insn (gen_blockage ());
25933 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25934 hard_frame_pointer_rtx));
25935 arm_add_cfa_adjust_cfa_note (insn, 0,
25936 stack_pointer_rtx,
25937 hard_frame_pointer_rtx);
25938 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25939 deleted. */
25940 emit_insn (gen_force_register_use (stack_pointer_rtx));
25941 }
25942 }
25943 else
25944 {
25945 /* Pop off outgoing args and local frame to adjust stack pointer to
25946 last saved register. */
25947 amount = offsets->outgoing_args - offsets->saved_regs;
25948 if (amount)
25949 {
25950 rtx_insn *tmp;
25951 /* Force out any pending memory operations that reference stacked data
25952 before stack de-allocation occurs. */
25953 emit_insn (gen_blockage ());
25954 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25955 stack_pointer_rtx,
25956 GEN_INT (amount)));
25957 arm_add_cfa_adjust_cfa_note (tmp, amount,
25958 stack_pointer_rtx, stack_pointer_rtx);
25959 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25960 not deleted. */
25961 emit_insn (gen_force_register_use (stack_pointer_rtx));
25962 }
25963 }
25964
25965 if (TARGET_HARD_FLOAT)
25966 {
25967 /* Generate VFP register multi-pop. */
25968 int end_reg = LAST_VFP_REGNUM + 1;
25969
25970 /* Scan the registers in reverse order. We need to match
25971 any groupings made in the prologue and generate matching
25972 vldm operations. The need to match groups is because,
25973 unlike pop, vldm can only do consecutive regs. */
25974 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25975 /* Look for a case where a reg does not need restoring. */
25976 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25977 && (!df_regs_ever_live_p (i + 1)
25978 || call_used_regs[i + 1]))
25979 {
25980 /* Restore the regs discovered so far (from reg+2 to
25981 end_reg). */
25982 if (end_reg > i + 2)
25983 arm_emit_vfp_multi_reg_pop (i + 2,
25984 (end_reg - (i + 2)) / 2,
25985 stack_pointer_rtx);
25986 end_reg = i;
25987 }
25988
25989 /* Restore the remaining regs that we have discovered (or possibly
25990 even all of them, if the conditional in the for loop never
25991 fired). */
25992 if (end_reg > i + 2)
25993 arm_emit_vfp_multi_reg_pop (i + 2,
25994 (end_reg - (i + 2)) / 2,
25995 stack_pointer_rtx);
25996 }
25997
25998 if (TARGET_IWMMXT)
25999 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
26000 if (df_regs_ever_live_p (i) && !call_used_regs[i])
26001 {
26002 rtx_insn *insn;
26003 rtx addr = gen_rtx_MEM (V2SImode,
26004 gen_rtx_POST_INC (SImode,
26005 stack_pointer_rtx));
26006 set_mem_alias_set (addr, get_frame_alias_set ());
26007 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
26008 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26009 gen_rtx_REG (V2SImode, i),
26010 NULL_RTX);
26011 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26012 stack_pointer_rtx, stack_pointer_rtx);
26013 }
26014
26015 if (saved_regs_mask)
26016 {
26017 rtx insn;
26018 bool return_in_pc = false;
26019
26020 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
26021 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
26022 && !IS_CMSE_ENTRY (func_type)
26023 && !IS_STACKALIGN (func_type)
26024 && really_return
26025 && crtl->args.pretend_args_size == 0
26026 && saved_regs_mask & (1 << LR_REGNUM)
26027 && !crtl->calls_eh_return)
26028 {
26029 saved_regs_mask &= ~(1 << LR_REGNUM);
26030 saved_regs_mask |= (1 << PC_REGNUM);
26031 return_in_pc = true;
26032 }
26033
26034 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
26035 {
26036 for (i = 0; i <= LAST_ARM_REGNUM; i++)
26037 if (saved_regs_mask & (1 << i))
26038 {
26039 rtx addr = gen_rtx_MEM (SImode,
26040 gen_rtx_POST_INC (SImode,
26041 stack_pointer_rtx));
26042 set_mem_alias_set (addr, get_frame_alias_set ());
26043
26044 if (i == PC_REGNUM)
26045 {
26046 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
26047 XVECEXP (insn, 0, 0) = ret_rtx;
26048 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
26049 addr);
26050 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
26051 insn = emit_jump_insn (insn);
26052 }
26053 else
26054 {
26055 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
26056 addr));
26057 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
26058 gen_rtx_REG (SImode, i),
26059 NULL_RTX);
26060 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
26061 stack_pointer_rtx,
26062 stack_pointer_rtx);
26063 }
26064 }
26065 }
26066 else
26067 {
26068 if (TARGET_LDRD
26069 && current_tune->prefer_ldrd_strd
26070 && !optimize_function_for_size_p (cfun))
26071 {
26072 if (TARGET_THUMB2)
26073 thumb2_emit_ldrd_pop (saved_regs_mask);
26074 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
26075 arm_emit_ldrd_pop (saved_regs_mask);
26076 else
26077 arm_emit_multi_reg_pop (saved_regs_mask);
26078 }
26079 else
26080 arm_emit_multi_reg_pop (saved_regs_mask);
26081 }
26082
26083 if (return_in_pc)
26084 return;
26085 }
26086
26087 amount
26088 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
26089 if (amount)
26090 {
26091 int i, j;
26092 rtx dwarf = NULL_RTX;
26093 rtx_insn *tmp =
26094 emit_insn (gen_addsi3 (stack_pointer_rtx,
26095 stack_pointer_rtx,
26096 GEN_INT (amount)));
26097
26098 RTX_FRAME_RELATED_P (tmp) = 1;
26099
26100 if (cfun->machine->uses_anonymous_args)
26101 {
26102 /* Restore pretend args. Refer arm_expand_prologue on how to save
26103 pretend_args in stack. */
26104 int num_regs = crtl->args.pretend_args_size / 4;
26105 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
26106 for (j = 0, i = 0; j < num_regs; i++)
26107 if (saved_regs_mask & (1 << i))
26108 {
26109 rtx reg = gen_rtx_REG (SImode, i);
26110 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
26111 j++;
26112 }
26113 REG_NOTES (tmp) = dwarf;
26114 }
26115 arm_add_cfa_adjust_cfa_note (tmp, amount,
26116 stack_pointer_rtx, stack_pointer_rtx);
26117 }
26118
26119 /* Clear all caller-saved regs that are not used to return. */
26120 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26121 {
26122 /* CMSE_ENTRY always returns. */
26123 gcc_assert (really_return);
26124 cmse_nonsecure_entry_clear_before_return ();
26125 }
26126
26127 if (!really_return)
26128 return;
26129
26130 if (crtl->calls_eh_return)
26131 emit_insn (gen_addsi3 (stack_pointer_rtx,
26132 stack_pointer_rtx,
26133 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26134
26135 if (IS_STACKALIGN (func_type))
26136 /* Restore the original stack pointer. Before prologue, the stack was
26137 realigned and the original stack pointer saved in r0. For details,
26138 see comment in arm_expand_prologue. */
26139 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26140
26141 emit_jump_insn (simple_return_rtx);
26142 }
26143
26144 /* Implementation of insn prologue_thumb1_interwork. This is the first
26145 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26146
26147 const char *
26148 thumb1_output_interwork (void)
26149 {
26150 const char * name;
26151 FILE *f = asm_out_file;
26152
26153 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26154 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26155 == SYMBOL_REF);
26156 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26157
26158 /* Generate code sequence to switch us into Thumb mode. */
26159 /* The .code 32 directive has already been emitted by
26160 ASM_DECLARE_FUNCTION_NAME. */
26161 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26162 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26163
26164 /* Generate a label, so that the debugger will notice the
26165 change in instruction sets. This label is also used by
26166 the assembler to bypass the ARM code when this function
26167 is called from a Thumb encoded function elsewhere in the
26168 same file. Hence the definition of STUB_NAME here must
26169 agree with the definition in gas/config/tc-arm.c. */
26170
26171 #define STUB_NAME ".real_start_of"
26172
26173 fprintf (f, "\t.code\t16\n");
26174 #ifdef ARM_PE
26175 if (arm_dllexport_name_p (name))
26176 name = arm_strip_name_encoding (name);
26177 #endif
26178 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26179 fprintf (f, "\t.thumb_func\n");
26180 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26181
26182 return "";
26183 }
26184
26185 /* Handle the case of a double word load into a low register from
26186 a computed memory address. The computed address may involve a
26187 register which is overwritten by the load. */
26188 const char *
26189 thumb_load_double_from_address (rtx *operands)
26190 {
26191 rtx addr;
26192 rtx base;
26193 rtx offset;
26194 rtx arg1;
26195 rtx arg2;
26196
26197 gcc_assert (REG_P (operands[0]));
26198 gcc_assert (MEM_P (operands[1]));
26199
26200 /* Get the memory address. */
26201 addr = XEXP (operands[1], 0);
26202
26203 /* Work out how the memory address is computed. */
26204 switch (GET_CODE (addr))
26205 {
26206 case REG:
26207 operands[2] = adjust_address (operands[1], SImode, 4);
26208
26209 if (REGNO (operands[0]) == REGNO (addr))
26210 {
26211 output_asm_insn ("ldr\t%H0, %2", operands);
26212 output_asm_insn ("ldr\t%0, %1", operands);
26213 }
26214 else
26215 {
26216 output_asm_insn ("ldr\t%0, %1", operands);
26217 output_asm_insn ("ldr\t%H0, %2", operands);
26218 }
26219 break;
26220
26221 case CONST:
26222 /* Compute <address> + 4 for the high order load. */
26223 operands[2] = adjust_address (operands[1], SImode, 4);
26224
26225 output_asm_insn ("ldr\t%0, %1", operands);
26226 output_asm_insn ("ldr\t%H0, %2", operands);
26227 break;
26228
26229 case PLUS:
26230 arg1 = XEXP (addr, 0);
26231 arg2 = XEXP (addr, 1);
26232
26233 if (CONSTANT_P (arg1))
26234 base = arg2, offset = arg1;
26235 else
26236 base = arg1, offset = arg2;
26237
26238 gcc_assert (REG_P (base));
26239
26240 /* Catch the case of <address> = <reg> + <reg> */
26241 if (REG_P (offset))
26242 {
26243 int reg_offset = REGNO (offset);
26244 int reg_base = REGNO (base);
26245 int reg_dest = REGNO (operands[0]);
26246
26247 /* Add the base and offset registers together into the
26248 higher destination register. */
26249 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26250 reg_dest + 1, reg_base, reg_offset);
26251
26252 /* Load the lower destination register from the address in
26253 the higher destination register. */
26254 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26255 reg_dest, reg_dest + 1);
26256
26257 /* Load the higher destination register from its own address
26258 plus 4. */
26259 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26260 reg_dest + 1, reg_dest + 1);
26261 }
26262 else
26263 {
26264 /* Compute <address> + 4 for the high order load. */
26265 operands[2] = adjust_address (operands[1], SImode, 4);
26266
26267 /* If the computed address is held in the low order register
26268 then load the high order register first, otherwise always
26269 load the low order register first. */
26270 if (REGNO (operands[0]) == REGNO (base))
26271 {
26272 output_asm_insn ("ldr\t%H0, %2", operands);
26273 output_asm_insn ("ldr\t%0, %1", operands);
26274 }
26275 else
26276 {
26277 output_asm_insn ("ldr\t%0, %1", operands);
26278 output_asm_insn ("ldr\t%H0, %2", operands);
26279 }
26280 }
26281 break;
26282
26283 case LABEL_REF:
26284 /* With no registers to worry about we can just load the value
26285 directly. */
26286 operands[2] = adjust_address (operands[1], SImode, 4);
26287
26288 output_asm_insn ("ldr\t%H0, %2", operands);
26289 output_asm_insn ("ldr\t%0, %1", operands);
26290 break;
26291
26292 default:
26293 gcc_unreachable ();
26294 }
26295
26296 return "";
26297 }
26298
26299 const char *
26300 thumb_output_move_mem_multiple (int n, rtx *operands)
26301 {
26302 switch (n)
26303 {
26304 case 2:
26305 if (REGNO (operands[4]) > REGNO (operands[5]))
26306 std::swap (operands[4], operands[5]);
26307
26308 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26309 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26310 break;
26311
26312 case 3:
26313 if (REGNO (operands[4]) > REGNO (operands[5]))
26314 std::swap (operands[4], operands[5]);
26315 if (REGNO (operands[5]) > REGNO (operands[6]))
26316 std::swap (operands[5], operands[6]);
26317 if (REGNO (operands[4]) > REGNO (operands[5]))
26318 std::swap (operands[4], operands[5]);
26319
26320 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26321 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26322 break;
26323
26324 default:
26325 gcc_unreachable ();
26326 }
26327
26328 return "";
26329 }
26330
26331 /* Output a call-via instruction for thumb state. */
26332 const char *
26333 thumb_call_via_reg (rtx reg)
26334 {
26335 int regno = REGNO (reg);
26336 rtx *labelp;
26337
26338 gcc_assert (regno < LR_REGNUM);
26339
26340 /* If we are in the normal text section we can use a single instance
26341 per compilation unit. If we are doing function sections, then we need
26342 an entry per section, since we can't rely on reachability. */
26343 if (in_section == text_section)
26344 {
26345 thumb_call_reg_needed = 1;
26346
26347 if (thumb_call_via_label[regno] == NULL)
26348 thumb_call_via_label[regno] = gen_label_rtx ();
26349 labelp = thumb_call_via_label + regno;
26350 }
26351 else
26352 {
26353 if (cfun->machine->call_via[regno] == NULL)
26354 cfun->machine->call_via[regno] = gen_label_rtx ();
26355 labelp = cfun->machine->call_via + regno;
26356 }
26357
26358 output_asm_insn ("bl\t%a0", labelp);
26359 return "";
26360 }
26361
26362 /* Routines for generating rtl. */
26363 void
26364 thumb_expand_cpymemqi (rtx *operands)
26365 {
26366 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26367 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26368 HOST_WIDE_INT len = INTVAL (operands[2]);
26369 HOST_WIDE_INT offset = 0;
26370
26371 while (len >= 12)
26372 {
26373 emit_insn (gen_cpymem12b (out, in, out, in));
26374 len -= 12;
26375 }
26376
26377 if (len >= 8)
26378 {
26379 emit_insn (gen_cpymem8b (out, in, out, in));
26380 len -= 8;
26381 }
26382
26383 if (len >= 4)
26384 {
26385 rtx reg = gen_reg_rtx (SImode);
26386 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26387 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26388 len -= 4;
26389 offset += 4;
26390 }
26391
26392 if (len >= 2)
26393 {
26394 rtx reg = gen_reg_rtx (HImode);
26395 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26396 plus_constant (Pmode, in,
26397 offset))));
26398 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26399 offset)),
26400 reg));
26401 len -= 2;
26402 offset += 2;
26403 }
26404
26405 if (len)
26406 {
26407 rtx reg = gen_reg_rtx (QImode);
26408 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26409 plus_constant (Pmode, in,
26410 offset))));
26411 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26412 offset)),
26413 reg));
26414 }
26415 }
26416
26417 void
26418 thumb_reload_out_hi (rtx *operands)
26419 {
26420 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26421 }
26422
26423 /* Return the length of a function name prefix
26424 that starts with the character 'c'. */
26425 static int
26426 arm_get_strip_length (int c)
26427 {
26428 switch (c)
26429 {
26430 ARM_NAME_ENCODING_LENGTHS
26431 default: return 0;
26432 }
26433 }
26434
26435 /* Return a pointer to a function's name with any
26436 and all prefix encodings stripped from it. */
26437 const char *
26438 arm_strip_name_encoding (const char *name)
26439 {
26440 int skip;
26441
26442 while ((skip = arm_get_strip_length (* name)))
26443 name += skip;
26444
26445 return name;
26446 }
26447
26448 /* If there is a '*' anywhere in the name's prefix, then
26449 emit the stripped name verbatim, otherwise prepend an
26450 underscore if leading underscores are being used. */
26451 void
26452 arm_asm_output_labelref (FILE *stream, const char *name)
26453 {
26454 int skip;
26455 int verbatim = 0;
26456
26457 while ((skip = arm_get_strip_length (* name)))
26458 {
26459 verbatim |= (*name == '*');
26460 name += skip;
26461 }
26462
26463 if (verbatim)
26464 fputs (name, stream);
26465 else
26466 asm_fprintf (stream, "%U%s", name);
26467 }
26468
26469 /* This function is used to emit an EABI tag and its associated value.
26470 We emit the numerical value of the tag in case the assembler does not
26471 support textual tags. (Eg gas prior to 2.20). If requested we include
26472 the tag name in a comment so that anyone reading the assembler output
26473 will know which tag is being set.
26474
26475 This function is not static because arm-c.c needs it too. */
26476
26477 void
26478 arm_emit_eabi_attribute (const char *name, int num, int val)
26479 {
26480 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26481 if (flag_verbose_asm || flag_debug_asm)
26482 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26483 asm_fprintf (asm_out_file, "\n");
26484 }
26485
26486 /* This function is used to print CPU tuning information as comment
26487 in assembler file. Pointers are not printed for now. */
26488
26489 void
26490 arm_print_tune_info (void)
26491 {
26492 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26493 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26494 current_tune->constant_limit);
26495 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26496 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26497 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26498 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26499 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26500 "prefetch.l1_cache_size:\t%d\n",
26501 current_tune->prefetch.l1_cache_size);
26502 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26503 "prefetch.l1_cache_line_size:\t%d\n",
26504 current_tune->prefetch.l1_cache_line_size);
26505 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26506 "prefer_constant_pool:\t%d\n",
26507 (int) current_tune->prefer_constant_pool);
26508 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26509 "branch_cost:\t(s:speed, p:predictable)\n");
26510 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26511 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26512 current_tune->branch_cost (false, false));
26513 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26514 current_tune->branch_cost (false, true));
26515 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26516 current_tune->branch_cost (true, false));
26517 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26518 current_tune->branch_cost (true, true));
26519 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26520 "prefer_ldrd_strd:\t%d\n",
26521 (int) current_tune->prefer_ldrd_strd);
26522 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26523 "logical_op_non_short_circuit:\t[%d,%d]\n",
26524 (int) current_tune->logical_op_non_short_circuit_thumb,
26525 (int) current_tune->logical_op_non_short_circuit_arm);
26526 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26527 "prefer_neon_for_64bits:\t%d\n",
26528 (int) current_tune->prefer_neon_for_64bits);
26529 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26530 "disparage_flag_setting_t16_encodings:\t%d\n",
26531 (int) current_tune->disparage_flag_setting_t16_encodings);
26532 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26533 "string_ops_prefer_neon:\t%d\n",
26534 (int) current_tune->string_ops_prefer_neon);
26535 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26536 "max_insns_inline_memset:\t%d\n",
26537 current_tune->max_insns_inline_memset);
26538 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26539 current_tune->fusible_ops);
26540 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26541 (int) current_tune->sched_autopref);
26542 }
26543
26544 /* Print .arch and .arch_extension directives corresponding to the
26545 current architecture configuration. */
26546 static void
26547 arm_print_asm_arch_directives ()
26548 {
26549 const arch_option *arch
26550 = arm_parse_arch_option_name (all_architectures, "-march",
26551 arm_active_target.arch_name);
26552 auto_sbitmap opt_bits (isa_num_bits);
26553
26554 gcc_assert (arch);
26555
26556 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26557 arm_last_printed_arch_string = arm_active_target.arch_name;
26558 if (!arch->common.extensions)
26559 return;
26560
26561 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26562 opt->name != NULL;
26563 opt++)
26564 {
26565 if (!opt->remove)
26566 {
26567 arm_initialize_isa (opt_bits, opt->isa_bits);
26568
26569 /* If every feature bit of this option is set in the target
26570 ISA specification, print out the option name. However,
26571 don't print anything if all the bits are part of the
26572 FPU specification. */
26573 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26574 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26575 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26576 }
26577 }
26578 }
26579
26580 static void
26581 arm_file_start (void)
26582 {
26583 int val;
26584
26585 if (TARGET_BPABI)
26586 {
26587 /* We don't have a specified CPU. Use the architecture to
26588 generate the tags.
26589
26590 Note: it might be better to do this unconditionally, then the
26591 assembler would not need to know about all new CPU names as
26592 they are added. */
26593 if (!arm_active_target.core_name)
26594 {
26595 /* armv7ve doesn't support any extensions. */
26596 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26597 {
26598 /* Keep backward compatability for assemblers
26599 which don't support armv7ve. */
26600 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26601 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26602 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26603 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26604 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26605 arm_last_printed_arch_string = "armv7ve";
26606 }
26607 else
26608 arm_print_asm_arch_directives ();
26609 }
26610 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26611 {
26612 asm_fprintf (asm_out_file, "\t.arch %s\n",
26613 arm_active_target.core_name + 8);
26614 arm_last_printed_arch_string = arm_active_target.core_name + 8;
26615 }
26616 else
26617 {
26618 const char* truncated_name
26619 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26620 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26621 }
26622
26623 if (print_tune_info)
26624 arm_print_tune_info ();
26625
26626 if (! TARGET_SOFT_FLOAT)
26627 {
26628 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26629 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26630
26631 if (TARGET_HARD_FLOAT_ABI)
26632 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26633 }
26634
26635 /* Some of these attributes only apply when the corresponding features
26636 are used. However we don't have any easy way of figuring this out.
26637 Conservatively record the setting that would have been used. */
26638
26639 if (flag_rounding_math)
26640 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26641
26642 if (!flag_unsafe_math_optimizations)
26643 {
26644 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26645 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26646 }
26647 if (flag_signaling_nans)
26648 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26649
26650 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26651 flag_finite_math_only ? 1 : 3);
26652
26653 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26654 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26655 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26656 flag_short_enums ? 1 : 2);
26657
26658 /* Tag_ABI_optimization_goals. */
26659 if (optimize_size)
26660 val = 4;
26661 else if (optimize >= 2)
26662 val = 2;
26663 else if (optimize)
26664 val = 1;
26665 else
26666 val = 6;
26667 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26668
26669 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26670 unaligned_access);
26671
26672 if (arm_fp16_format)
26673 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26674 (int) arm_fp16_format);
26675
26676 if (arm_lang_output_object_attributes_hook)
26677 arm_lang_output_object_attributes_hook();
26678 }
26679
26680 default_file_start ();
26681 }
26682
26683 static void
26684 arm_file_end (void)
26685 {
26686 int regno;
26687
26688 if (NEED_INDICATE_EXEC_STACK)
26689 /* Add .note.GNU-stack. */
26690 file_end_indicate_exec_stack ();
26691
26692 if (! thumb_call_reg_needed)
26693 return;
26694
26695 switch_to_section (text_section);
26696 asm_fprintf (asm_out_file, "\t.code 16\n");
26697 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26698
26699 for (regno = 0; regno < LR_REGNUM; regno++)
26700 {
26701 rtx label = thumb_call_via_label[regno];
26702
26703 if (label != 0)
26704 {
26705 targetm.asm_out.internal_label (asm_out_file, "L",
26706 CODE_LABEL_NUMBER (label));
26707 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26708 }
26709 }
26710 }
26711
26712 #ifndef ARM_PE
26713 /* Symbols in the text segment can be accessed without indirecting via the
26714 constant pool; it may take an extra binary operation, but this is still
26715 faster than indirecting via memory. Don't do this when not optimizing,
26716 since we won't be calculating al of the offsets necessary to do this
26717 simplification. */
26718
26719 static void
26720 arm_encode_section_info (tree decl, rtx rtl, int first)
26721 {
26722 if (optimize > 0 && TREE_CONSTANT (decl))
26723 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26724
26725 default_encode_section_info (decl, rtl, first);
26726 }
26727 #endif /* !ARM_PE */
26728
26729 static void
26730 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26731 {
26732 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26733 && !strcmp (prefix, "L"))
26734 {
26735 arm_ccfsm_state = 0;
26736 arm_target_insn = NULL;
26737 }
26738 default_internal_label (stream, prefix, labelno);
26739 }
26740
26741 /* Output code to add DELTA to the first argument, and then jump
26742 to FUNCTION. Used for C++ multiple inheritance. */
26743
26744 static void
26745 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26746 HOST_WIDE_INT, tree function)
26747 {
26748 static int thunk_label = 0;
26749 char label[256];
26750 char labelpc[256];
26751 int mi_delta = delta;
26752 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26753 int shift = 0;
26754 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26755 ? 1 : 0);
26756 if (mi_delta < 0)
26757 mi_delta = - mi_delta;
26758
26759 final_start_function (emit_barrier (), file, 1);
26760
26761 if (TARGET_THUMB1)
26762 {
26763 int labelno = thunk_label++;
26764 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26765 /* Thunks are entered in arm mode when available. */
26766 if (TARGET_THUMB1_ONLY)
26767 {
26768 /* push r3 so we can use it as a temporary. */
26769 /* TODO: Omit this save if r3 is not used. */
26770 fputs ("\tpush {r3}\n", file);
26771 fputs ("\tldr\tr3, ", file);
26772 }
26773 else
26774 {
26775 fputs ("\tldr\tr12, ", file);
26776 }
26777 assemble_name (file, label);
26778 fputc ('\n', file);
26779 if (flag_pic)
26780 {
26781 /* If we are generating PIC, the ldr instruction below loads
26782 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26783 the address of the add + 8, so we have:
26784
26785 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26786 = target + 1.
26787
26788 Note that we have "+ 1" because some versions of GNU ld
26789 don't set the low bit of the result for R_ARM_REL32
26790 relocations against thumb function symbols.
26791 On ARMv6M this is +4, not +8. */
26792 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26793 assemble_name (file, labelpc);
26794 fputs (":\n", file);
26795 if (TARGET_THUMB1_ONLY)
26796 {
26797 /* This is 2 insns after the start of the thunk, so we know it
26798 is 4-byte aligned. */
26799 fputs ("\tadd\tr3, pc, r3\n", file);
26800 fputs ("\tmov r12, r3\n", file);
26801 }
26802 else
26803 fputs ("\tadd\tr12, pc, r12\n", file);
26804 }
26805 else if (TARGET_THUMB1_ONLY)
26806 fputs ("\tmov r12, r3\n", file);
26807 }
26808 if (TARGET_THUMB1_ONLY)
26809 {
26810 if (mi_delta > 255)
26811 {
26812 fputs ("\tldr\tr3, ", file);
26813 assemble_name (file, label);
26814 fputs ("+4\n", file);
26815 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26816 mi_op, this_regno, this_regno);
26817 }
26818 else if (mi_delta != 0)
26819 {
26820 /* Thumb1 unified syntax requires s suffix in instruction name when
26821 one of the operands is immediate. */
26822 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26823 mi_op, this_regno, this_regno,
26824 mi_delta);
26825 }
26826 }
26827 else
26828 {
26829 /* TODO: Use movw/movt for large constants when available. */
26830 while (mi_delta != 0)
26831 {
26832 if ((mi_delta & (3 << shift)) == 0)
26833 shift += 2;
26834 else
26835 {
26836 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26837 mi_op, this_regno, this_regno,
26838 mi_delta & (0xff << shift));
26839 mi_delta &= ~(0xff << shift);
26840 shift += 8;
26841 }
26842 }
26843 }
26844 if (TARGET_THUMB1)
26845 {
26846 if (TARGET_THUMB1_ONLY)
26847 fputs ("\tpop\t{r3}\n", file);
26848
26849 fprintf (file, "\tbx\tr12\n");
26850 ASM_OUTPUT_ALIGN (file, 2);
26851 assemble_name (file, label);
26852 fputs (":\n", file);
26853 if (flag_pic)
26854 {
26855 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26856 rtx tem = XEXP (DECL_RTL (function), 0);
26857 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26858 pipeline offset is four rather than eight. Adjust the offset
26859 accordingly. */
26860 tem = plus_constant (GET_MODE (tem), tem,
26861 TARGET_THUMB1_ONLY ? -3 : -7);
26862 tem = gen_rtx_MINUS (GET_MODE (tem),
26863 tem,
26864 gen_rtx_SYMBOL_REF (Pmode,
26865 ggc_strdup (labelpc)));
26866 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26867 }
26868 else
26869 /* Output ".word .LTHUNKn". */
26870 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26871
26872 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26873 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26874 }
26875 else
26876 {
26877 fputs ("\tb\t", file);
26878 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26879 if (NEED_PLT_RELOC)
26880 fputs ("(PLT)", file);
26881 fputc ('\n', file);
26882 }
26883
26884 final_end_function ();
26885 }
26886
26887 /* MI thunk handling for TARGET_32BIT. */
26888
26889 static void
26890 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26891 HOST_WIDE_INT vcall_offset, tree function)
26892 {
26893 const bool long_call_p = arm_is_long_call_p (function);
26894
26895 /* On ARM, this_regno is R0 or R1 depending on
26896 whether the function returns an aggregate or not.
26897 */
26898 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26899 function)
26900 ? R1_REGNUM : R0_REGNUM);
26901
26902 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26903 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26904 reload_completed = 1;
26905 emit_note (NOTE_INSN_PROLOGUE_END);
26906
26907 /* Add DELTA to THIS_RTX. */
26908 if (delta != 0)
26909 arm_split_constant (PLUS, Pmode, NULL_RTX,
26910 delta, this_rtx, this_rtx, false);
26911
26912 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26913 if (vcall_offset != 0)
26914 {
26915 /* Load *THIS_RTX. */
26916 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26917 /* Compute *THIS_RTX + VCALL_OFFSET. */
26918 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26919 false);
26920 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26921 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26922 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26923 }
26924
26925 /* Generate a tail call to the target function. */
26926 if (!TREE_USED (function))
26927 {
26928 assemble_external (function);
26929 TREE_USED (function) = 1;
26930 }
26931 rtx funexp = XEXP (DECL_RTL (function), 0);
26932 if (long_call_p)
26933 {
26934 emit_move_insn (temp, funexp);
26935 funexp = temp;
26936 }
26937 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26938 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26939 SIBLING_CALL_P (insn) = 1;
26940 emit_barrier ();
26941
26942 /* Indirect calls require a bit of fixup in PIC mode. */
26943 if (long_call_p)
26944 {
26945 split_all_insns_noflow ();
26946 arm_reorg ();
26947 }
26948
26949 insn = get_insns ();
26950 shorten_branches (insn);
26951 final_start_function (insn, file, 1);
26952 final (insn, file, 1);
26953 final_end_function ();
26954
26955 /* Stop pretending this is a post-reload pass. */
26956 reload_completed = 0;
26957 }
26958
26959 /* Output code to add DELTA to the first argument, and then jump
26960 to FUNCTION. Used for C++ multiple inheritance. */
26961
26962 static void
26963 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26964 HOST_WIDE_INT vcall_offset, tree function)
26965 {
26966 const char *fnname = IDENTIFIER_POINTER (DECL_ASSEMBLER_NAME (thunk));
26967
26968 assemble_start_function (thunk, fnname);
26969 if (TARGET_32BIT)
26970 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26971 else
26972 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26973 assemble_end_function (thunk, fnname);
26974 }
26975
26976 int
26977 arm_emit_vector_const (FILE *file, rtx x)
26978 {
26979 int i;
26980 const char * pattern;
26981
26982 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26983
26984 switch (GET_MODE (x))
26985 {
26986 case E_V2SImode: pattern = "%08x"; break;
26987 case E_V4HImode: pattern = "%04x"; break;
26988 case E_V8QImode: pattern = "%02x"; break;
26989 default: gcc_unreachable ();
26990 }
26991
26992 fprintf (file, "0x");
26993 for (i = CONST_VECTOR_NUNITS (x); i--;)
26994 {
26995 rtx element;
26996
26997 element = CONST_VECTOR_ELT (x, i);
26998 fprintf (file, pattern, INTVAL (element));
26999 }
27000
27001 return 1;
27002 }
27003
27004 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
27005 HFmode constant pool entries are actually loaded with ldr. */
27006 void
27007 arm_emit_fp16_const (rtx c)
27008 {
27009 long bits;
27010
27011 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
27012 if (WORDS_BIG_ENDIAN)
27013 assemble_zeros (2);
27014 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
27015 if (!WORDS_BIG_ENDIAN)
27016 assemble_zeros (2);
27017 }
27018
27019 const char *
27020 arm_output_load_gr (rtx *operands)
27021 {
27022 rtx reg;
27023 rtx offset;
27024 rtx wcgr;
27025 rtx sum;
27026
27027 if (!MEM_P (operands [1])
27028 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
27029 || !REG_P (reg = XEXP (sum, 0))
27030 || !CONST_INT_P (offset = XEXP (sum, 1))
27031 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
27032 return "wldrw%?\t%0, %1";
27033
27034 /* Fix up an out-of-range load of a GR register. */
27035 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
27036 wcgr = operands[0];
27037 operands[0] = reg;
27038 output_asm_insn ("ldr%?\t%0, %1", operands);
27039
27040 operands[0] = wcgr;
27041 operands[1] = reg;
27042 output_asm_insn ("tmcr%?\t%0, %1", operands);
27043 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
27044
27045 return "";
27046 }
27047
27048 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
27049
27050 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
27051 named arg and all anonymous args onto the stack.
27052 XXX I know the prologue shouldn't be pushing registers, but it is faster
27053 that way. */
27054
27055 static void
27056 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
27057 machine_mode mode,
27058 tree type,
27059 int *pretend_size,
27060 int second_time ATTRIBUTE_UNUSED)
27061 {
27062 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
27063 int nregs;
27064
27065 cfun->machine->uses_anonymous_args = 1;
27066 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
27067 {
27068 nregs = pcum->aapcs_ncrn;
27069 if (nregs & 1)
27070 {
27071 int res = arm_needs_doubleword_align (mode, type);
27072 if (res < 0 && warn_psabi)
27073 inform (input_location, "parameter passing for argument of "
27074 "type %qT changed in GCC 7.1", type);
27075 else if (res > 0)
27076 {
27077 nregs++;
27078 if (res > 1 && warn_psabi)
27079 inform (input_location,
27080 "parameter passing for argument of type "
27081 "%qT changed in GCC 9.1", type);
27082 }
27083 }
27084 }
27085 else
27086 nregs = pcum->nregs;
27087
27088 if (nregs < NUM_ARG_REGS)
27089 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
27090 }
27091
27092 /* We can't rely on the caller doing the proper promotion when
27093 using APCS or ATPCS. */
27094
27095 static bool
27096 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
27097 {
27098 return !TARGET_AAPCS_BASED;
27099 }
27100
27101 static machine_mode
27102 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
27103 machine_mode mode,
27104 int *punsignedp ATTRIBUTE_UNUSED,
27105 const_tree fntype ATTRIBUTE_UNUSED,
27106 int for_return ATTRIBUTE_UNUSED)
27107 {
27108 if (GET_MODE_CLASS (mode) == MODE_INT
27109 && GET_MODE_SIZE (mode) < 4)
27110 return SImode;
27111
27112 return mode;
27113 }
27114
27115
27116 static bool
27117 arm_default_short_enums (void)
27118 {
27119 return ARM_DEFAULT_SHORT_ENUMS;
27120 }
27121
27122
27123 /* AAPCS requires that anonymous bitfields affect structure alignment. */
27124
27125 static bool
27126 arm_align_anon_bitfield (void)
27127 {
27128 return TARGET_AAPCS_BASED;
27129 }
27130
27131
27132 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27133
27134 static tree
27135 arm_cxx_guard_type (void)
27136 {
27137 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27138 }
27139
27140
27141 /* The EABI says test the least significant bit of a guard variable. */
27142
27143 static bool
27144 arm_cxx_guard_mask_bit (void)
27145 {
27146 return TARGET_AAPCS_BASED;
27147 }
27148
27149
27150 /* The EABI specifies that all array cookies are 8 bytes long. */
27151
27152 static tree
27153 arm_get_cookie_size (tree type)
27154 {
27155 tree size;
27156
27157 if (!TARGET_AAPCS_BASED)
27158 return default_cxx_get_cookie_size (type);
27159
27160 size = build_int_cst (sizetype, 8);
27161 return size;
27162 }
27163
27164
27165 /* The EABI says that array cookies should also contain the element size. */
27166
27167 static bool
27168 arm_cookie_has_size (void)
27169 {
27170 return TARGET_AAPCS_BASED;
27171 }
27172
27173
27174 /* The EABI says constructors and destructors should return a pointer to
27175 the object constructed/destroyed. */
27176
27177 static bool
27178 arm_cxx_cdtor_returns_this (void)
27179 {
27180 return TARGET_AAPCS_BASED;
27181 }
27182
27183 /* The EABI says that an inline function may never be the key
27184 method. */
27185
27186 static bool
27187 arm_cxx_key_method_may_be_inline (void)
27188 {
27189 return !TARGET_AAPCS_BASED;
27190 }
27191
27192 static void
27193 arm_cxx_determine_class_data_visibility (tree decl)
27194 {
27195 if (!TARGET_AAPCS_BASED
27196 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27197 return;
27198
27199 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27200 is exported. However, on systems without dynamic vague linkage,
27201 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27202 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27203 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27204 else
27205 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27206 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27207 }
27208
27209 static bool
27210 arm_cxx_class_data_always_comdat (void)
27211 {
27212 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27213 vague linkage if the class has no key function. */
27214 return !TARGET_AAPCS_BASED;
27215 }
27216
27217
27218 /* The EABI says __aeabi_atexit should be used to register static
27219 destructors. */
27220
27221 static bool
27222 arm_cxx_use_aeabi_atexit (void)
27223 {
27224 return TARGET_AAPCS_BASED;
27225 }
27226
27227
27228 void
27229 arm_set_return_address (rtx source, rtx scratch)
27230 {
27231 arm_stack_offsets *offsets;
27232 HOST_WIDE_INT delta;
27233 rtx addr, mem;
27234 unsigned long saved_regs;
27235
27236 offsets = arm_get_frame_offsets ();
27237 saved_regs = offsets->saved_regs_mask;
27238
27239 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27240 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27241 else
27242 {
27243 if (frame_pointer_needed)
27244 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27245 else
27246 {
27247 /* LR will be the first saved register. */
27248 delta = offsets->outgoing_args - (offsets->frame + 4);
27249
27250
27251 if (delta >= 4096)
27252 {
27253 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27254 GEN_INT (delta & ~4095)));
27255 addr = scratch;
27256 delta &= 4095;
27257 }
27258 else
27259 addr = stack_pointer_rtx;
27260
27261 addr = plus_constant (Pmode, addr, delta);
27262 }
27263
27264 /* The store needs to be marked to prevent DSE from deleting
27265 it as dead if it is based on fp. */
27266 mem = gen_frame_mem (Pmode, addr);
27267 MEM_VOLATILE_P (mem) = true;
27268 emit_move_insn (mem, source);
27269 }
27270 }
27271
27272
27273 void
27274 thumb_set_return_address (rtx source, rtx scratch)
27275 {
27276 arm_stack_offsets *offsets;
27277 HOST_WIDE_INT delta;
27278 HOST_WIDE_INT limit;
27279 int reg;
27280 rtx addr, mem;
27281 unsigned long mask;
27282
27283 emit_use (source);
27284
27285 offsets = arm_get_frame_offsets ();
27286 mask = offsets->saved_regs_mask;
27287 if (mask & (1 << LR_REGNUM))
27288 {
27289 limit = 1024;
27290 /* Find the saved regs. */
27291 if (frame_pointer_needed)
27292 {
27293 delta = offsets->soft_frame - offsets->saved_args;
27294 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27295 if (TARGET_THUMB1)
27296 limit = 128;
27297 }
27298 else
27299 {
27300 delta = offsets->outgoing_args - offsets->saved_args;
27301 reg = SP_REGNUM;
27302 }
27303 /* Allow for the stack frame. */
27304 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27305 delta -= 16;
27306 /* The link register is always the first saved register. */
27307 delta -= 4;
27308
27309 /* Construct the address. */
27310 addr = gen_rtx_REG (SImode, reg);
27311 if (delta > limit)
27312 {
27313 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27314 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27315 addr = scratch;
27316 }
27317 else
27318 addr = plus_constant (Pmode, addr, delta);
27319
27320 /* The store needs to be marked to prevent DSE from deleting
27321 it as dead if it is based on fp. */
27322 mem = gen_frame_mem (Pmode, addr);
27323 MEM_VOLATILE_P (mem) = true;
27324 emit_move_insn (mem, source);
27325 }
27326 else
27327 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27328 }
27329
27330 /* Implements target hook vector_mode_supported_p. */
27331 bool
27332 arm_vector_mode_supported_p (machine_mode mode)
27333 {
27334 /* Neon also supports V2SImode, etc. listed in the clause below. */
27335 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27336 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27337 || mode == V2DImode || mode == V8HFmode))
27338 return true;
27339
27340 if ((TARGET_NEON || TARGET_IWMMXT)
27341 && ((mode == V2SImode)
27342 || (mode == V4HImode)
27343 || (mode == V8QImode)))
27344 return true;
27345
27346 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27347 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27348 || mode == V2HAmode))
27349 return true;
27350
27351 return false;
27352 }
27353
27354 /* Implements target hook array_mode_supported_p. */
27355
27356 static bool
27357 arm_array_mode_supported_p (machine_mode mode,
27358 unsigned HOST_WIDE_INT nelems)
27359 {
27360 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27361 for now, as the lane-swapping logic needs to be extended in the expanders.
27362 See PR target/82518. */
27363 if (TARGET_NEON && !BYTES_BIG_ENDIAN
27364 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27365 && (nelems >= 2 && nelems <= 4))
27366 return true;
27367
27368 return false;
27369 }
27370
27371 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27372 registers when autovectorizing for Neon, at least until multiple vector
27373 widths are supported properly by the middle-end. */
27374
27375 static machine_mode
27376 arm_preferred_simd_mode (scalar_mode mode)
27377 {
27378 if (TARGET_NEON)
27379 switch (mode)
27380 {
27381 case E_SFmode:
27382 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27383 case E_SImode:
27384 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27385 case E_HImode:
27386 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27387 case E_QImode:
27388 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27389 case E_DImode:
27390 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27391 return V2DImode;
27392 break;
27393
27394 default:;
27395 }
27396
27397 if (TARGET_REALLY_IWMMXT)
27398 switch (mode)
27399 {
27400 case E_SImode:
27401 return V2SImode;
27402 case E_HImode:
27403 return V4HImode;
27404 case E_QImode:
27405 return V8QImode;
27406
27407 default:;
27408 }
27409
27410 return word_mode;
27411 }
27412
27413 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27414
27415 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27416 using r0-r4 for function arguments, r7 for the stack frame and don't have
27417 enough left over to do doubleword arithmetic. For Thumb-2 all the
27418 potentially problematic instructions accept high registers so this is not
27419 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27420 that require many low registers. */
27421 static bool
27422 arm_class_likely_spilled_p (reg_class_t rclass)
27423 {
27424 if ((TARGET_THUMB1 && rclass == LO_REGS)
27425 || rclass == CC_REG)
27426 return true;
27427
27428 return false;
27429 }
27430
27431 /* Implements target hook small_register_classes_for_mode_p. */
27432 bool
27433 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27434 {
27435 return TARGET_THUMB1;
27436 }
27437
27438 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27439 ARM insns and therefore guarantee that the shift count is modulo 256.
27440 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27441 guarantee no particular behavior for out-of-range counts. */
27442
27443 static unsigned HOST_WIDE_INT
27444 arm_shift_truncation_mask (machine_mode mode)
27445 {
27446 return mode == SImode ? 255 : 0;
27447 }
27448
27449
27450 /* Map internal gcc register numbers to DWARF2 register numbers. */
27451
27452 unsigned int
27453 arm_dbx_register_number (unsigned int regno)
27454 {
27455 if (regno < 16)
27456 return regno;
27457
27458 if (IS_VFP_REGNUM (regno))
27459 {
27460 /* See comment in arm_dwarf_register_span. */
27461 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27462 return 64 + regno - FIRST_VFP_REGNUM;
27463 else
27464 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27465 }
27466
27467 if (IS_IWMMXT_GR_REGNUM (regno))
27468 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27469
27470 if (IS_IWMMXT_REGNUM (regno))
27471 return 112 + regno - FIRST_IWMMXT_REGNUM;
27472
27473 return DWARF_FRAME_REGISTERS;
27474 }
27475
27476 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27477 GCC models tham as 64 32-bit registers, so we need to describe this to
27478 the DWARF generation code. Other registers can use the default. */
27479 static rtx
27480 arm_dwarf_register_span (rtx rtl)
27481 {
27482 machine_mode mode;
27483 unsigned regno;
27484 rtx parts[16];
27485 int nregs;
27486 int i;
27487
27488 regno = REGNO (rtl);
27489 if (!IS_VFP_REGNUM (regno))
27490 return NULL_RTX;
27491
27492 /* XXX FIXME: The EABI defines two VFP register ranges:
27493 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27494 256-287: D0-D31
27495 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27496 corresponding D register. Until GDB supports this, we shall use the
27497 legacy encodings. We also use these encodings for D0-D15 for
27498 compatibility with older debuggers. */
27499 mode = GET_MODE (rtl);
27500 if (GET_MODE_SIZE (mode) < 8)
27501 return NULL_RTX;
27502
27503 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27504 {
27505 nregs = GET_MODE_SIZE (mode) / 4;
27506 for (i = 0; i < nregs; i += 2)
27507 if (TARGET_BIG_END)
27508 {
27509 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27510 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27511 }
27512 else
27513 {
27514 parts[i] = gen_rtx_REG (SImode, regno + i);
27515 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27516 }
27517 }
27518 else
27519 {
27520 nregs = GET_MODE_SIZE (mode) / 8;
27521 for (i = 0; i < nregs; i++)
27522 parts[i] = gen_rtx_REG (DImode, regno + i);
27523 }
27524
27525 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27526 }
27527
27528 #if ARM_UNWIND_INFO
27529 /* Emit unwind directives for a store-multiple instruction or stack pointer
27530 push during alignment.
27531 These should only ever be generated by the function prologue code, so
27532 expect them to have a particular form.
27533 The store-multiple instruction sometimes pushes pc as the last register,
27534 although it should not be tracked into unwind information, or for -Os
27535 sometimes pushes some dummy registers before first register that needs
27536 to be tracked in unwind information; such dummy registers are there just
27537 to avoid separate stack adjustment, and will not be restored in the
27538 epilogue. */
27539
27540 static void
27541 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27542 {
27543 int i;
27544 HOST_WIDE_INT offset;
27545 HOST_WIDE_INT nregs;
27546 int reg_size;
27547 unsigned reg;
27548 unsigned lastreg;
27549 unsigned padfirst = 0, padlast = 0;
27550 rtx e;
27551
27552 e = XVECEXP (p, 0, 0);
27553 gcc_assert (GET_CODE (e) == SET);
27554
27555 /* First insn will adjust the stack pointer. */
27556 gcc_assert (GET_CODE (e) == SET
27557 && REG_P (SET_DEST (e))
27558 && REGNO (SET_DEST (e)) == SP_REGNUM
27559 && GET_CODE (SET_SRC (e)) == PLUS);
27560
27561 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27562 nregs = XVECLEN (p, 0) - 1;
27563 gcc_assert (nregs);
27564
27565 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27566 if (reg < 16)
27567 {
27568 /* For -Os dummy registers can be pushed at the beginning to
27569 avoid separate stack pointer adjustment. */
27570 e = XVECEXP (p, 0, 1);
27571 e = XEXP (SET_DEST (e), 0);
27572 if (GET_CODE (e) == PLUS)
27573 padfirst = INTVAL (XEXP (e, 1));
27574 gcc_assert (padfirst == 0 || optimize_size);
27575 /* The function prologue may also push pc, but not annotate it as it is
27576 never restored. We turn this into a stack pointer adjustment. */
27577 e = XVECEXP (p, 0, nregs);
27578 e = XEXP (SET_DEST (e), 0);
27579 if (GET_CODE (e) == PLUS)
27580 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27581 else
27582 padlast = offset - 4;
27583 gcc_assert (padlast == 0 || padlast == 4);
27584 if (padlast == 4)
27585 fprintf (asm_out_file, "\t.pad #4\n");
27586 reg_size = 4;
27587 fprintf (asm_out_file, "\t.save {");
27588 }
27589 else if (IS_VFP_REGNUM (reg))
27590 {
27591 reg_size = 8;
27592 fprintf (asm_out_file, "\t.vsave {");
27593 }
27594 else
27595 /* Unknown register type. */
27596 gcc_unreachable ();
27597
27598 /* If the stack increment doesn't match the size of the saved registers,
27599 something has gone horribly wrong. */
27600 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27601
27602 offset = padfirst;
27603 lastreg = 0;
27604 /* The remaining insns will describe the stores. */
27605 for (i = 1; i <= nregs; i++)
27606 {
27607 /* Expect (set (mem <addr>) (reg)).
27608 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27609 e = XVECEXP (p, 0, i);
27610 gcc_assert (GET_CODE (e) == SET
27611 && MEM_P (SET_DEST (e))
27612 && REG_P (SET_SRC (e)));
27613
27614 reg = REGNO (SET_SRC (e));
27615 gcc_assert (reg >= lastreg);
27616
27617 if (i != 1)
27618 fprintf (asm_out_file, ", ");
27619 /* We can't use %r for vfp because we need to use the
27620 double precision register names. */
27621 if (IS_VFP_REGNUM (reg))
27622 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27623 else
27624 asm_fprintf (asm_out_file, "%r", reg);
27625
27626 if (flag_checking)
27627 {
27628 /* Check that the addresses are consecutive. */
27629 e = XEXP (SET_DEST (e), 0);
27630 if (GET_CODE (e) == PLUS)
27631 gcc_assert (REG_P (XEXP (e, 0))
27632 && REGNO (XEXP (e, 0)) == SP_REGNUM
27633 && CONST_INT_P (XEXP (e, 1))
27634 && offset == INTVAL (XEXP (e, 1)));
27635 else
27636 gcc_assert (i == 1
27637 && REG_P (e)
27638 && REGNO (e) == SP_REGNUM);
27639 offset += reg_size;
27640 }
27641 }
27642 fprintf (asm_out_file, "}\n");
27643 if (padfirst)
27644 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27645 }
27646
27647 /* Emit unwind directives for a SET. */
27648
27649 static void
27650 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27651 {
27652 rtx e0;
27653 rtx e1;
27654 unsigned reg;
27655
27656 e0 = XEXP (p, 0);
27657 e1 = XEXP (p, 1);
27658 switch (GET_CODE (e0))
27659 {
27660 case MEM:
27661 /* Pushing a single register. */
27662 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27663 || !REG_P (XEXP (XEXP (e0, 0), 0))
27664 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27665 abort ();
27666
27667 asm_fprintf (asm_out_file, "\t.save ");
27668 if (IS_VFP_REGNUM (REGNO (e1)))
27669 asm_fprintf(asm_out_file, "{d%d}\n",
27670 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27671 else
27672 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27673 break;
27674
27675 case REG:
27676 if (REGNO (e0) == SP_REGNUM)
27677 {
27678 /* A stack increment. */
27679 if (GET_CODE (e1) != PLUS
27680 || !REG_P (XEXP (e1, 0))
27681 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27682 || !CONST_INT_P (XEXP (e1, 1)))
27683 abort ();
27684
27685 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27686 -INTVAL (XEXP (e1, 1)));
27687 }
27688 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27689 {
27690 HOST_WIDE_INT offset;
27691
27692 if (GET_CODE (e1) == PLUS)
27693 {
27694 if (!REG_P (XEXP (e1, 0))
27695 || !CONST_INT_P (XEXP (e1, 1)))
27696 abort ();
27697 reg = REGNO (XEXP (e1, 0));
27698 offset = INTVAL (XEXP (e1, 1));
27699 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27700 HARD_FRAME_POINTER_REGNUM, reg,
27701 offset);
27702 }
27703 else if (REG_P (e1))
27704 {
27705 reg = REGNO (e1);
27706 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27707 HARD_FRAME_POINTER_REGNUM, reg);
27708 }
27709 else
27710 abort ();
27711 }
27712 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27713 {
27714 /* Move from sp to reg. */
27715 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27716 }
27717 else if (GET_CODE (e1) == PLUS
27718 && REG_P (XEXP (e1, 0))
27719 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27720 && CONST_INT_P (XEXP (e1, 1)))
27721 {
27722 /* Set reg to offset from sp. */
27723 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27724 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27725 }
27726 else
27727 abort ();
27728 break;
27729
27730 default:
27731 abort ();
27732 }
27733 }
27734
27735
27736 /* Emit unwind directives for the given insn. */
27737
27738 static void
27739 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27740 {
27741 rtx note, pat;
27742 bool handled_one = false;
27743
27744 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27745 return;
27746
27747 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27748 && (TREE_NOTHROW (current_function_decl)
27749 || crtl->all_throwers_are_sibcalls))
27750 return;
27751
27752 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27753 return;
27754
27755 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27756 {
27757 switch (REG_NOTE_KIND (note))
27758 {
27759 case REG_FRAME_RELATED_EXPR:
27760 pat = XEXP (note, 0);
27761 goto found;
27762
27763 case REG_CFA_REGISTER:
27764 pat = XEXP (note, 0);
27765 if (pat == NULL)
27766 {
27767 pat = PATTERN (insn);
27768 if (GET_CODE (pat) == PARALLEL)
27769 pat = XVECEXP (pat, 0, 0);
27770 }
27771
27772 /* Only emitted for IS_STACKALIGN re-alignment. */
27773 {
27774 rtx dest, src;
27775 unsigned reg;
27776
27777 src = SET_SRC (pat);
27778 dest = SET_DEST (pat);
27779
27780 gcc_assert (src == stack_pointer_rtx);
27781 reg = REGNO (dest);
27782 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27783 reg + 0x90, reg);
27784 }
27785 handled_one = true;
27786 break;
27787
27788 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27789 to get correct dwarf information for shrink-wrap. We should not
27790 emit unwind information for it because these are used either for
27791 pretend arguments or notes to adjust sp and restore registers from
27792 stack. */
27793 case REG_CFA_DEF_CFA:
27794 case REG_CFA_ADJUST_CFA:
27795 case REG_CFA_RESTORE:
27796 return;
27797
27798 case REG_CFA_EXPRESSION:
27799 case REG_CFA_OFFSET:
27800 /* ??? Only handling here what we actually emit. */
27801 gcc_unreachable ();
27802
27803 default:
27804 break;
27805 }
27806 }
27807 if (handled_one)
27808 return;
27809 pat = PATTERN (insn);
27810 found:
27811
27812 switch (GET_CODE (pat))
27813 {
27814 case SET:
27815 arm_unwind_emit_set (asm_out_file, pat);
27816 break;
27817
27818 case SEQUENCE:
27819 /* Store multiple. */
27820 arm_unwind_emit_sequence (asm_out_file, pat);
27821 break;
27822
27823 default:
27824 abort();
27825 }
27826 }
27827
27828
27829 /* Output a reference from a function exception table to the type_info
27830 object X. The EABI specifies that the symbol should be relocated by
27831 an R_ARM_TARGET2 relocation. */
27832
27833 static bool
27834 arm_output_ttype (rtx x)
27835 {
27836 fputs ("\t.word\t", asm_out_file);
27837 output_addr_const (asm_out_file, x);
27838 /* Use special relocations for symbol references. */
27839 if (!CONST_INT_P (x))
27840 fputs ("(TARGET2)", asm_out_file);
27841 fputc ('\n', asm_out_file);
27842
27843 return TRUE;
27844 }
27845
27846 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27847
27848 static void
27849 arm_asm_emit_except_personality (rtx personality)
27850 {
27851 fputs ("\t.personality\t", asm_out_file);
27852 output_addr_const (asm_out_file, personality);
27853 fputc ('\n', asm_out_file);
27854 }
27855 #endif /* ARM_UNWIND_INFO */
27856
27857 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27858
27859 static void
27860 arm_asm_init_sections (void)
27861 {
27862 #if ARM_UNWIND_INFO
27863 exception_section = get_unnamed_section (0, output_section_asm_op,
27864 "\t.handlerdata");
27865 #endif /* ARM_UNWIND_INFO */
27866
27867 #ifdef OBJECT_FORMAT_ELF
27868 if (target_pure_code)
27869 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27870 #endif
27871 }
27872
27873 /* Output unwind directives for the start/end of a function. */
27874
27875 void
27876 arm_output_fn_unwind (FILE * f, bool prologue)
27877 {
27878 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27879 return;
27880
27881 if (prologue)
27882 fputs ("\t.fnstart\n", f);
27883 else
27884 {
27885 /* If this function will never be unwound, then mark it as such.
27886 The came condition is used in arm_unwind_emit to suppress
27887 the frame annotations. */
27888 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27889 && (TREE_NOTHROW (current_function_decl)
27890 || crtl->all_throwers_are_sibcalls))
27891 fputs("\t.cantunwind\n", f);
27892
27893 fputs ("\t.fnend\n", f);
27894 }
27895 }
27896
27897 static bool
27898 arm_emit_tls_decoration (FILE *fp, rtx x)
27899 {
27900 enum tls_reloc reloc;
27901 rtx val;
27902
27903 val = XVECEXP (x, 0, 0);
27904 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27905
27906 output_addr_const (fp, val);
27907
27908 switch (reloc)
27909 {
27910 case TLS_GD32:
27911 fputs ("(tlsgd)", fp);
27912 break;
27913 case TLS_LDM32:
27914 fputs ("(tlsldm)", fp);
27915 break;
27916 case TLS_LDO32:
27917 fputs ("(tlsldo)", fp);
27918 break;
27919 case TLS_IE32:
27920 fputs ("(gottpoff)", fp);
27921 break;
27922 case TLS_LE32:
27923 fputs ("(tpoff)", fp);
27924 break;
27925 case TLS_DESCSEQ:
27926 fputs ("(tlsdesc)", fp);
27927 break;
27928 default:
27929 gcc_unreachable ();
27930 }
27931
27932 switch (reloc)
27933 {
27934 case TLS_GD32:
27935 case TLS_LDM32:
27936 case TLS_IE32:
27937 case TLS_DESCSEQ:
27938 fputs (" + (. - ", fp);
27939 output_addr_const (fp, XVECEXP (x, 0, 2));
27940 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27941 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27942 output_addr_const (fp, XVECEXP (x, 0, 3));
27943 fputc (')', fp);
27944 break;
27945 default:
27946 break;
27947 }
27948
27949 return TRUE;
27950 }
27951
27952 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27953
27954 static void
27955 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27956 {
27957 gcc_assert (size == 4);
27958 fputs ("\t.word\t", file);
27959 output_addr_const (file, x);
27960 fputs ("(tlsldo)", file);
27961 }
27962
27963 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27964
27965 static bool
27966 arm_output_addr_const_extra (FILE *fp, rtx x)
27967 {
27968 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27969 return arm_emit_tls_decoration (fp, x);
27970 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27971 {
27972 char label[256];
27973 int labelno = INTVAL (XVECEXP (x, 0, 0));
27974
27975 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27976 assemble_name_raw (fp, label);
27977
27978 return TRUE;
27979 }
27980 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27981 {
27982 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27983 if (GOT_PCREL)
27984 fputs ("+.", fp);
27985 fputs ("-(", fp);
27986 output_addr_const (fp, XVECEXP (x, 0, 0));
27987 fputc (')', fp);
27988 return TRUE;
27989 }
27990 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27991 {
27992 output_addr_const (fp, XVECEXP (x, 0, 0));
27993 if (GOT_PCREL)
27994 fputs ("+.", fp);
27995 fputs ("-(", fp);
27996 output_addr_const (fp, XVECEXP (x, 0, 1));
27997 fputc (')', fp);
27998 return TRUE;
27999 }
28000 else if (GET_CODE (x) == CONST_VECTOR)
28001 return arm_emit_vector_const (fp, x);
28002
28003 return FALSE;
28004 }
28005
28006 /* Output assembly for a shift instruction.
28007 SET_FLAGS determines how the instruction modifies the condition codes.
28008 0 - Do not set condition codes.
28009 1 - Set condition codes.
28010 2 - Use smallest instruction. */
28011 const char *
28012 arm_output_shift(rtx * operands, int set_flags)
28013 {
28014 char pattern[100];
28015 static const char flag_chars[3] = {'?', '.', '!'};
28016 const char *shift;
28017 HOST_WIDE_INT val;
28018 char c;
28019
28020 c = flag_chars[set_flags];
28021 shift = shift_op(operands[3], &val);
28022 if (shift)
28023 {
28024 if (val != -1)
28025 operands[2] = GEN_INT(val);
28026 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
28027 }
28028 else
28029 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
28030
28031 output_asm_insn (pattern, operands);
28032 return "";
28033 }
28034
28035 /* Output assembly for a WMMX immediate shift instruction. */
28036 const char *
28037 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
28038 {
28039 int shift = INTVAL (operands[2]);
28040 char templ[50];
28041 machine_mode opmode = GET_MODE (operands[0]);
28042
28043 gcc_assert (shift >= 0);
28044
28045 /* If the shift value in the register versions is > 63 (for D qualifier),
28046 31 (for W qualifier) or 15 (for H qualifier). */
28047 if (((opmode == V4HImode) && (shift > 15))
28048 || ((opmode == V2SImode) && (shift > 31))
28049 || ((opmode == DImode) && (shift > 63)))
28050 {
28051 if (wror_or_wsra)
28052 {
28053 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28054 output_asm_insn (templ, operands);
28055 if (opmode == DImode)
28056 {
28057 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
28058 output_asm_insn (templ, operands);
28059 }
28060 }
28061 else
28062 {
28063 /* The destination register will contain all zeros. */
28064 sprintf (templ, "wzero\t%%0");
28065 output_asm_insn (templ, operands);
28066 }
28067 return "";
28068 }
28069
28070 if ((opmode == DImode) && (shift > 32))
28071 {
28072 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
28073 output_asm_insn (templ, operands);
28074 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
28075 output_asm_insn (templ, operands);
28076 }
28077 else
28078 {
28079 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
28080 output_asm_insn (templ, operands);
28081 }
28082 return "";
28083 }
28084
28085 /* Output assembly for a WMMX tinsr instruction. */
28086 const char *
28087 arm_output_iwmmxt_tinsr (rtx *operands)
28088 {
28089 int mask = INTVAL (operands[3]);
28090 int i;
28091 char templ[50];
28092 int units = mode_nunits[GET_MODE (operands[0])];
28093 gcc_assert ((mask & (mask - 1)) == 0);
28094 for (i = 0; i < units; ++i)
28095 {
28096 if ((mask & 0x01) == 1)
28097 {
28098 break;
28099 }
28100 mask >>= 1;
28101 }
28102 gcc_assert (i < units);
28103 {
28104 switch (GET_MODE (operands[0]))
28105 {
28106 case E_V8QImode:
28107 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
28108 break;
28109 case E_V4HImode:
28110 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
28111 break;
28112 case E_V2SImode:
28113 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
28114 break;
28115 default:
28116 gcc_unreachable ();
28117 break;
28118 }
28119 output_asm_insn (templ, operands);
28120 }
28121 return "";
28122 }
28123
28124 /* Output a Thumb-1 casesi dispatch sequence. */
28125 const char *
28126 thumb1_output_casesi (rtx *operands)
28127 {
28128 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28129
28130 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28131
28132 switch (GET_MODE(diff_vec))
28133 {
28134 case E_QImode:
28135 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28136 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28137 case E_HImode:
28138 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28139 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28140 case E_SImode:
28141 return "bl\t%___gnu_thumb1_case_si";
28142 default:
28143 gcc_unreachable ();
28144 }
28145 }
28146
28147 /* Output a Thumb-2 casesi instruction. */
28148 const char *
28149 thumb2_output_casesi (rtx *operands)
28150 {
28151 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28152
28153 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28154
28155 output_asm_insn ("cmp\t%0, %1", operands);
28156 output_asm_insn ("bhi\t%l3", operands);
28157 switch (GET_MODE(diff_vec))
28158 {
28159 case E_QImode:
28160 return "tbb\t[%|pc, %0]";
28161 case E_HImode:
28162 return "tbh\t[%|pc, %0, lsl #1]";
28163 case E_SImode:
28164 if (flag_pic)
28165 {
28166 output_asm_insn ("adr\t%4, %l2", operands);
28167 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28168 output_asm_insn ("add\t%4, %4, %5", operands);
28169 return "bx\t%4";
28170 }
28171 else
28172 {
28173 output_asm_insn ("adr\t%4, %l2", operands);
28174 return "ldr\t%|pc, [%4, %0, lsl #2]";
28175 }
28176 default:
28177 gcc_unreachable ();
28178 }
28179 }
28180
28181 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28182 per-core tuning structs. */
28183 static int
28184 arm_issue_rate (void)
28185 {
28186 return current_tune->issue_rate;
28187 }
28188
28189 /* Return how many instructions should scheduler lookahead to choose the
28190 best one. */
28191 static int
28192 arm_first_cycle_multipass_dfa_lookahead (void)
28193 {
28194 int issue_rate = arm_issue_rate ();
28195
28196 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28197 }
28198
28199 /* Enable modeling of L2 auto-prefetcher. */
28200 static int
28201 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28202 {
28203 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28204 }
28205
28206 const char *
28207 arm_mangle_type (const_tree type)
28208 {
28209 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28210 has to be managled as if it is in the "std" namespace. */
28211 if (TARGET_AAPCS_BASED
28212 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28213 return "St9__va_list";
28214
28215 /* Half-precision float. */
28216 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28217 return "Dh";
28218
28219 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28220 builtin type. */
28221 if (TYPE_NAME (type) != NULL)
28222 return arm_mangle_builtin_type (type);
28223
28224 /* Use the default mangling. */
28225 return NULL;
28226 }
28227
28228 /* Order of allocation of core registers for Thumb: this allocation is
28229 written over the corresponding initial entries of the array
28230 initialized with REG_ALLOC_ORDER. We allocate all low registers
28231 first. Saving and restoring a low register is usually cheaper than
28232 using a call-clobbered high register. */
28233
28234 static const int thumb_core_reg_alloc_order[] =
28235 {
28236 3, 2, 1, 0, 4, 5, 6, 7,
28237 12, 14, 8, 9, 10, 11
28238 };
28239
28240 /* Adjust register allocation order when compiling for Thumb. */
28241
28242 void
28243 arm_order_regs_for_local_alloc (void)
28244 {
28245 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28246 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28247 if (TARGET_THUMB)
28248 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28249 sizeof (thumb_core_reg_alloc_order));
28250 }
28251
28252 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28253
28254 bool
28255 arm_frame_pointer_required (void)
28256 {
28257 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28258 return true;
28259
28260 /* If the function receives nonlocal gotos, it needs to save the frame
28261 pointer in the nonlocal_goto_save_area object. */
28262 if (cfun->has_nonlocal_label)
28263 return true;
28264
28265 /* The frame pointer is required for non-leaf APCS frames. */
28266 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28267 return true;
28268
28269 /* If we are probing the stack in the prologue, we will have a faulting
28270 instruction prior to the stack adjustment and this requires a frame
28271 pointer if we want to catch the exception using the EABI unwinder. */
28272 if (!IS_INTERRUPT (arm_current_func_type ())
28273 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28274 || flag_stack_clash_protection)
28275 && arm_except_unwind_info (&global_options) == UI_TARGET
28276 && cfun->can_throw_non_call_exceptions)
28277 {
28278 HOST_WIDE_INT size = get_frame_size ();
28279
28280 /* That's irrelevant if there is no stack adjustment. */
28281 if (size <= 0)
28282 return false;
28283
28284 /* That's relevant only if there is a stack probe. */
28285 if (crtl->is_leaf && !cfun->calls_alloca)
28286 {
28287 /* We don't have the final size of the frame so adjust. */
28288 size += 32 * UNITS_PER_WORD;
28289 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28290 return true;
28291 }
28292 else
28293 return true;
28294 }
28295
28296 return false;
28297 }
28298
28299 /* Only thumb1 can't support conditional execution, so return true if
28300 the target is not thumb1. */
28301 static bool
28302 arm_have_conditional_execution (void)
28303 {
28304 return !TARGET_THUMB1;
28305 }
28306
28307 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28308 static HOST_WIDE_INT
28309 arm_vector_alignment (const_tree type)
28310 {
28311 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28312
28313 if (TARGET_AAPCS_BASED)
28314 align = MIN (align, 64);
28315
28316 return align;
28317 }
28318
28319 static void
28320 arm_autovectorize_vector_sizes (vector_sizes *sizes, bool)
28321 {
28322 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28323 {
28324 sizes->safe_push (16);
28325 sizes->safe_push (8);
28326 }
28327 }
28328
28329 static bool
28330 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28331 {
28332 /* Vectors which aren't in packed structures will not be less aligned than
28333 the natural alignment of their element type, so this is safe. */
28334 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28335 return !is_packed;
28336
28337 return default_builtin_vector_alignment_reachable (type, is_packed);
28338 }
28339
28340 static bool
28341 arm_builtin_support_vector_misalignment (machine_mode mode,
28342 const_tree type, int misalignment,
28343 bool is_packed)
28344 {
28345 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28346 {
28347 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28348
28349 if (is_packed)
28350 return align == 1;
28351
28352 /* If the misalignment is unknown, we should be able to handle the access
28353 so long as it is not to a member of a packed data structure. */
28354 if (misalignment == -1)
28355 return true;
28356
28357 /* Return true if the misalignment is a multiple of the natural alignment
28358 of the vector's element type. This is probably always going to be
28359 true in practice, since we've already established that this isn't a
28360 packed access. */
28361 return ((misalignment % align) == 0);
28362 }
28363
28364 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28365 is_packed);
28366 }
28367
28368 static void
28369 arm_conditional_register_usage (void)
28370 {
28371 int regno;
28372
28373 if (TARGET_THUMB1 && optimize_size)
28374 {
28375 /* When optimizing for size on Thumb-1, it's better not
28376 to use the HI regs, because of the overhead of
28377 stacking them. */
28378 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28379 fixed_regs[regno] = call_used_regs[regno] = 1;
28380 }
28381
28382 /* The link register can be clobbered by any branch insn,
28383 but we have no way to track that at present, so mark
28384 it as unavailable. */
28385 if (TARGET_THUMB1)
28386 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28387
28388 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28389 {
28390 /* VFPv3 registers are disabled when earlier VFP
28391 versions are selected due to the definition of
28392 LAST_VFP_REGNUM. */
28393 for (regno = FIRST_VFP_REGNUM;
28394 regno <= LAST_VFP_REGNUM; ++ regno)
28395 {
28396 fixed_regs[regno] = 0;
28397 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28398 || regno >= FIRST_VFP_REGNUM + 32;
28399 }
28400 }
28401
28402 if (TARGET_REALLY_IWMMXT && !TARGET_GENERAL_REGS_ONLY)
28403 {
28404 regno = FIRST_IWMMXT_GR_REGNUM;
28405 /* The 2002/10/09 revision of the XScale ABI has wCG0
28406 and wCG1 as call-preserved registers. The 2002/11/21
28407 revision changed this so that all wCG registers are
28408 scratch registers. */
28409 for (regno = FIRST_IWMMXT_GR_REGNUM;
28410 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28411 fixed_regs[regno] = 0;
28412 /* The XScale ABI has wR0 - wR9 as scratch registers,
28413 the rest as call-preserved registers. */
28414 for (regno = FIRST_IWMMXT_REGNUM;
28415 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28416 {
28417 fixed_regs[regno] = 0;
28418 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28419 }
28420 }
28421
28422 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28423 {
28424 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28425 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28426 }
28427 else if (TARGET_APCS_STACK)
28428 {
28429 fixed_regs[10] = 1;
28430 call_used_regs[10] = 1;
28431 }
28432 /* -mcaller-super-interworking reserves r11 for calls to
28433 _interwork_r11_call_via_rN(). Making the register global
28434 is an easy way of ensuring that it remains valid for all
28435 calls. */
28436 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28437 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28438 {
28439 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28440 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28441 if (TARGET_CALLER_INTERWORKING)
28442 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28443 }
28444 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28445 }
28446
28447 static reg_class_t
28448 arm_preferred_rename_class (reg_class_t rclass)
28449 {
28450 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28451 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28452 and code size can be reduced. */
28453 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28454 return LO_REGS;
28455 else
28456 return NO_REGS;
28457 }
28458
28459 /* Compute the attribute "length" of insn "*push_multi".
28460 So this function MUST be kept in sync with that insn pattern. */
28461 int
28462 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28463 {
28464 int i, regno, hi_reg;
28465 int num_saves = XVECLEN (parallel_op, 0);
28466
28467 /* ARM mode. */
28468 if (TARGET_ARM)
28469 return 4;
28470 /* Thumb1 mode. */
28471 if (TARGET_THUMB1)
28472 return 2;
28473
28474 /* Thumb2 mode. */
28475 regno = REGNO (first_op);
28476 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28477 list is 8-bit. Normally this means all registers in the list must be
28478 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28479 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28480 with 16-bit encoding. */
28481 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28482 for (i = 1; i < num_saves && !hi_reg; i++)
28483 {
28484 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28485 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28486 }
28487
28488 if (!hi_reg)
28489 return 2;
28490 return 4;
28491 }
28492
28493 /* Compute the attribute "length" of insn. Currently, this function is used
28494 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28495 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28496 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28497 true if OPERANDS contains insn which explicit updates base register. */
28498
28499 int
28500 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28501 {
28502 /* ARM mode. */
28503 if (TARGET_ARM)
28504 return 4;
28505 /* Thumb1 mode. */
28506 if (TARGET_THUMB1)
28507 return 2;
28508
28509 rtx parallel_op = operands[0];
28510 /* Initialize to elements number of PARALLEL. */
28511 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28512 /* Initialize the value to base register. */
28513 unsigned regno = REGNO (operands[1]);
28514 /* Skip return and write back pattern.
28515 We only need register pop pattern for later analysis. */
28516 unsigned first_indx = 0;
28517 first_indx += return_pc ? 1 : 0;
28518 first_indx += write_back_p ? 1 : 0;
28519
28520 /* A pop operation can be done through LDM or POP. If the base register is SP
28521 and if it's with write back, then a LDM will be alias of POP. */
28522 bool pop_p = (regno == SP_REGNUM && write_back_p);
28523 bool ldm_p = !pop_p;
28524
28525 /* Check base register for LDM. */
28526 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28527 return 4;
28528
28529 /* Check each register in the list. */
28530 for (; indx >= first_indx; indx--)
28531 {
28532 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28533 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28534 comment in arm_attr_length_push_multi. */
28535 if (REGNO_REG_CLASS (regno) == HI_REGS
28536 && (regno != PC_REGNUM || ldm_p))
28537 return 4;
28538 }
28539
28540 return 2;
28541 }
28542
28543 /* Compute the number of instructions emitted by output_move_double. */
28544 int
28545 arm_count_output_move_double_insns (rtx *operands)
28546 {
28547 int count;
28548 rtx ops[2];
28549 /* output_move_double may modify the operands array, so call it
28550 here on a copy of the array. */
28551 ops[0] = operands[0];
28552 ops[1] = operands[1];
28553 output_move_double (ops, false, &count);
28554 return count;
28555 }
28556
28557 /* Same as above, but operands are a register/memory pair in SImode.
28558 Assumes operands has the base register in position 0 and memory in position
28559 2 (which is the order provided by the arm_{ldrd,strd} patterns). */
28560 int
28561 arm_count_ldrdstrd_insns (rtx *operands, bool load)
28562 {
28563 int count;
28564 rtx ops[2];
28565 int regnum, memnum;
28566 if (load)
28567 regnum = 0, memnum = 1;
28568 else
28569 regnum = 1, memnum = 0;
28570 ops[regnum] = gen_rtx_REG (DImode, REGNO (operands[0]));
28571 ops[memnum] = adjust_address (operands[2], DImode, 0);
28572 output_move_double (ops, false, &count);
28573 return count;
28574 }
28575
28576
28577 int
28578 vfp3_const_double_for_fract_bits (rtx operand)
28579 {
28580 REAL_VALUE_TYPE r0;
28581
28582 if (!CONST_DOUBLE_P (operand))
28583 return 0;
28584
28585 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28586 if (exact_real_inverse (DFmode, &r0)
28587 && !REAL_VALUE_NEGATIVE (r0))
28588 {
28589 if (exact_real_truncate (DFmode, &r0))
28590 {
28591 HOST_WIDE_INT value = real_to_integer (&r0);
28592 value = value & 0xffffffff;
28593 if ((value != 0) && ( (value & (value - 1)) == 0))
28594 {
28595 int ret = exact_log2 (value);
28596 gcc_assert (IN_RANGE (ret, 0, 31));
28597 return ret;
28598 }
28599 }
28600 }
28601 return 0;
28602 }
28603
28604 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28605 log2 is in [1, 32], return that log2. Otherwise return -1.
28606 This is used in the patterns for vcvt.s32.f32 floating-point to
28607 fixed-point conversions. */
28608
28609 int
28610 vfp3_const_double_for_bits (rtx x)
28611 {
28612 const REAL_VALUE_TYPE *r;
28613
28614 if (!CONST_DOUBLE_P (x))
28615 return -1;
28616
28617 r = CONST_DOUBLE_REAL_VALUE (x);
28618
28619 if (REAL_VALUE_NEGATIVE (*r)
28620 || REAL_VALUE_ISNAN (*r)
28621 || REAL_VALUE_ISINF (*r)
28622 || !real_isinteger (r, SFmode))
28623 return -1;
28624
28625 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28626
28627 /* The exact_log2 above will have returned -1 if this is
28628 not an exact log2. */
28629 if (!IN_RANGE (hwint, 1, 32))
28630 return -1;
28631
28632 return hwint;
28633 }
28634
28635 \f
28636 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28637
28638 static void
28639 arm_pre_atomic_barrier (enum memmodel model)
28640 {
28641 if (need_atomic_barrier_p (model, true))
28642 emit_insn (gen_memory_barrier ());
28643 }
28644
28645 static void
28646 arm_post_atomic_barrier (enum memmodel model)
28647 {
28648 if (need_atomic_barrier_p (model, false))
28649 emit_insn (gen_memory_barrier ());
28650 }
28651
28652 /* Emit the load-exclusive and store-exclusive instructions.
28653 Use acquire and release versions if necessary. */
28654
28655 static void
28656 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28657 {
28658 rtx (*gen) (rtx, rtx);
28659
28660 if (acq)
28661 {
28662 switch (mode)
28663 {
28664 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28665 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28666 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28667 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28668 default:
28669 gcc_unreachable ();
28670 }
28671 }
28672 else
28673 {
28674 switch (mode)
28675 {
28676 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28677 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28678 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28679 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28680 default:
28681 gcc_unreachable ();
28682 }
28683 }
28684
28685 emit_insn (gen (rval, mem));
28686 }
28687
28688 static void
28689 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28690 rtx mem, bool rel)
28691 {
28692 rtx (*gen) (rtx, rtx, rtx);
28693
28694 if (rel)
28695 {
28696 switch (mode)
28697 {
28698 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28699 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28700 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28701 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28702 default:
28703 gcc_unreachable ();
28704 }
28705 }
28706 else
28707 {
28708 switch (mode)
28709 {
28710 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28711 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28712 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28713 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28714 default:
28715 gcc_unreachable ();
28716 }
28717 }
28718
28719 emit_insn (gen (bval, rval, mem));
28720 }
28721
28722 /* Mark the previous jump instruction as unlikely. */
28723
28724 static void
28725 emit_unlikely_jump (rtx insn)
28726 {
28727 rtx_insn *jump = emit_jump_insn (insn);
28728 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28729 }
28730
28731 /* Expand a compare and swap pattern. */
28732
28733 void
28734 arm_expand_compare_and_swap (rtx operands[])
28735 {
28736 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28737 machine_mode mode, cmp_mode;
28738
28739 bval = operands[0];
28740 rval = operands[1];
28741 mem = operands[2];
28742 oldval = operands[3];
28743 newval = operands[4];
28744 is_weak = operands[5];
28745 mod_s = operands[6];
28746 mod_f = operands[7];
28747 mode = GET_MODE (mem);
28748
28749 /* Normally the succ memory model must be stronger than fail, but in the
28750 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28751 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28752
28753 if (TARGET_HAVE_LDACQ
28754 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28755 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28756 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28757
28758 switch (mode)
28759 {
28760 case E_QImode:
28761 case E_HImode:
28762 /* For narrow modes, we're going to perform the comparison in SImode,
28763 so do the zero-extension now. */
28764 rval = gen_reg_rtx (SImode);
28765 oldval = convert_modes (SImode, mode, oldval, true);
28766 /* FALLTHRU */
28767
28768 case E_SImode:
28769 /* Force the value into a register if needed. We waited until after
28770 the zero-extension above to do this properly. */
28771 if (!arm_add_operand (oldval, SImode))
28772 oldval = force_reg (SImode, oldval);
28773 break;
28774
28775 case E_DImode:
28776 if (!cmpdi_operand (oldval, mode))
28777 oldval = force_reg (mode, oldval);
28778 break;
28779
28780 default:
28781 gcc_unreachable ();
28782 }
28783
28784 if (TARGET_THUMB1)
28785 cmp_mode = E_SImode;
28786 else
28787 cmp_mode = CC_Zmode;
28788
28789 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28790 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
28791 oldval, newval, is_weak, mod_s, mod_f));
28792
28793 if (mode == QImode || mode == HImode)
28794 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28795
28796 /* In all cases, we arrange for success to be signaled by Z set.
28797 This arrangement allows for the boolean result to be used directly
28798 in a subsequent branch, post optimization. For Thumb-1 targets, the
28799 boolean negation of the result is also stored in bval because Thumb-1
28800 backend lacks dependency tracking for CC flag due to flag-setting not
28801 being represented at RTL level. */
28802 if (TARGET_THUMB1)
28803 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28804 else
28805 {
28806 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28807 emit_insn (gen_rtx_SET (bval, x));
28808 }
28809 }
28810
28811 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28812 another memory store between the load-exclusive and store-exclusive can
28813 reset the monitor from Exclusive to Open state. This means we must wait
28814 until after reload to split the pattern, lest we get a register spill in
28815 the middle of the atomic sequence. Success of the compare and swap is
28816 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28817 for Thumb-1 targets (ie. negation of the boolean value returned by
28818 atomic_compare_and_swapmode standard pattern in operand 0). */
28819
28820 void
28821 arm_split_compare_and_swap (rtx operands[])
28822 {
28823 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
28824 machine_mode mode;
28825 enum memmodel mod_s, mod_f;
28826 bool is_weak;
28827 rtx_code_label *label1, *label2;
28828 rtx x, cond;
28829
28830 rval = operands[1];
28831 mem = operands[2];
28832 oldval = operands[3];
28833 newval = operands[4];
28834 is_weak = (operands[5] != const0_rtx);
28835 mod_s_rtx = operands[6];
28836 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
28837 mod_f = memmodel_from_int (INTVAL (operands[7]));
28838 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28839 mode = GET_MODE (mem);
28840
28841 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28842
28843 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
28844 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
28845
28846 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28847 a full barrier is emitted after the store-release. */
28848 if (is_armv8_sync)
28849 use_acquire = false;
28850
28851 /* Checks whether a barrier is needed and emits one accordingly. */
28852 if (!(use_acquire || use_release))
28853 arm_pre_atomic_barrier (mod_s);
28854
28855 label1 = NULL;
28856 if (!is_weak)
28857 {
28858 label1 = gen_label_rtx ();
28859 emit_label (label1);
28860 }
28861 label2 = gen_label_rtx ();
28862
28863 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28864
28865 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28866 as required to communicate with arm_expand_compare_and_swap. */
28867 if (TARGET_32BIT)
28868 {
28869 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28870 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28871 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28872 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28873 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28874 }
28875 else
28876 {
28877 emit_move_insn (neg_bval, const1_rtx);
28878 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28879 if (thumb1_cmpneg_operand (oldval, SImode))
28880 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28881 label2, cond));
28882 else
28883 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28884 }
28885
28886 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28887
28888 /* Weak or strong, we want EQ to be true for success, so that we
28889 match the flags that we got from the compare above. */
28890 if (TARGET_32BIT)
28891 {
28892 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28893 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28894 emit_insn (gen_rtx_SET (cond, x));
28895 }
28896
28897 if (!is_weak)
28898 {
28899 /* Z is set to boolean value of !neg_bval, as required to communicate
28900 with arm_expand_compare_and_swap. */
28901 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28902 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28903 }
28904
28905 if (!is_mm_relaxed (mod_f))
28906 emit_label (label2);
28907
28908 /* Checks whether a barrier is needed and emits one accordingly. */
28909 if (is_armv8_sync
28910 || !(use_acquire || use_release))
28911 arm_post_atomic_barrier (mod_s);
28912
28913 if (is_mm_relaxed (mod_f))
28914 emit_label (label2);
28915 }
28916
28917 /* Split an atomic operation pattern. Operation is given by CODE and is one
28918 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28919 operation). Operation is performed on the content at MEM and on VALUE
28920 following the memory model MODEL_RTX. The content at MEM before and after
28921 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28922 success of the operation is returned in COND. Using a scratch register or
28923 an operand register for these determines what result is returned for that
28924 pattern. */
28925
28926 void
28927 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28928 rtx value, rtx model_rtx, rtx cond)
28929 {
28930 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28931 machine_mode mode = GET_MODE (mem);
28932 machine_mode wmode = (mode == DImode ? DImode : SImode);
28933 rtx_code_label *label;
28934 bool all_low_regs, bind_old_new;
28935 rtx x;
28936
28937 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28938
28939 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
28940 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
28941
28942 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28943 a full barrier is emitted after the store-release. */
28944 if (is_armv8_sync)
28945 use_acquire = false;
28946
28947 /* Checks whether a barrier is needed and emits one accordingly. */
28948 if (!(use_acquire || use_release))
28949 arm_pre_atomic_barrier (model);
28950
28951 label = gen_label_rtx ();
28952 emit_label (label);
28953
28954 if (new_out)
28955 new_out = gen_lowpart (wmode, new_out);
28956 if (old_out)
28957 old_out = gen_lowpart (wmode, old_out);
28958 else
28959 old_out = new_out;
28960 value = simplify_gen_subreg (wmode, value, mode, 0);
28961
28962 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28963
28964 /* Does the operation require destination and first operand to use the same
28965 register? This is decided by register constraints of relevant insn
28966 patterns in thumb1.md. */
28967 gcc_assert (!new_out || REG_P (new_out));
28968 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28969 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28970 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28971 bind_old_new =
28972 (TARGET_THUMB1
28973 && code != SET
28974 && code != MINUS
28975 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28976
28977 /* We want to return the old value while putting the result of the operation
28978 in the same register as the old value so copy the old value over to the
28979 destination register and use that register for the operation. */
28980 if (old_out && bind_old_new)
28981 {
28982 emit_move_insn (new_out, old_out);
28983 old_out = new_out;
28984 }
28985
28986 switch (code)
28987 {
28988 case SET:
28989 new_out = value;
28990 break;
28991
28992 case NOT:
28993 x = gen_rtx_AND (wmode, old_out, value);
28994 emit_insn (gen_rtx_SET (new_out, x));
28995 x = gen_rtx_NOT (wmode, new_out);
28996 emit_insn (gen_rtx_SET (new_out, x));
28997 break;
28998
28999 case MINUS:
29000 if (CONST_INT_P (value))
29001 {
29002 value = GEN_INT (-INTVAL (value));
29003 code = PLUS;
29004 }
29005 /* FALLTHRU */
29006
29007 case PLUS:
29008 if (mode == DImode)
29009 {
29010 /* DImode plus/minus need to clobber flags. */
29011 /* The adddi3 and subdi3 patterns are incorrectly written so that
29012 they require matching operands, even when we could easily support
29013 three operands. Thankfully, this can be fixed up post-splitting,
29014 as the individual add+adc patterns do accept three operands and
29015 post-reload cprop can make these moves go away. */
29016 emit_move_insn (new_out, old_out);
29017 if (code == PLUS)
29018 x = gen_adddi3 (new_out, new_out, value);
29019 else
29020 x = gen_subdi3 (new_out, new_out, value);
29021 emit_insn (x);
29022 break;
29023 }
29024 /* FALLTHRU */
29025
29026 default:
29027 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
29028 emit_insn (gen_rtx_SET (new_out, x));
29029 break;
29030 }
29031
29032 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
29033 use_release);
29034
29035 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
29036 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
29037
29038 /* Checks whether a barrier is needed and emits one accordingly. */
29039 if (is_armv8_sync
29040 || !(use_acquire || use_release))
29041 arm_post_atomic_barrier (model);
29042 }
29043 \f
29044 #define MAX_VECT_LEN 16
29045
29046 struct expand_vec_perm_d
29047 {
29048 rtx target, op0, op1;
29049 vec_perm_indices perm;
29050 machine_mode vmode;
29051 bool one_vector_p;
29052 bool testing_p;
29053 };
29054
29055 /* Generate a variable permutation. */
29056
29057 static void
29058 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
29059 {
29060 machine_mode vmode = GET_MODE (target);
29061 bool one_vector_p = rtx_equal_p (op0, op1);
29062
29063 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
29064 gcc_checking_assert (GET_MODE (op0) == vmode);
29065 gcc_checking_assert (GET_MODE (op1) == vmode);
29066 gcc_checking_assert (GET_MODE (sel) == vmode);
29067 gcc_checking_assert (TARGET_NEON);
29068
29069 if (one_vector_p)
29070 {
29071 if (vmode == V8QImode)
29072 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
29073 else
29074 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
29075 }
29076 else
29077 {
29078 rtx pair;
29079
29080 if (vmode == V8QImode)
29081 {
29082 pair = gen_reg_rtx (V16QImode);
29083 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
29084 pair = gen_lowpart (TImode, pair);
29085 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
29086 }
29087 else
29088 {
29089 pair = gen_reg_rtx (OImode);
29090 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
29091 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
29092 }
29093 }
29094 }
29095
29096 void
29097 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
29098 {
29099 machine_mode vmode = GET_MODE (target);
29100 unsigned int nelt = GET_MODE_NUNITS (vmode);
29101 bool one_vector_p = rtx_equal_p (op0, op1);
29102 rtx mask;
29103
29104 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29105 numbering of elements for big-endian, we must reverse the order. */
29106 gcc_checking_assert (!BYTES_BIG_ENDIAN);
29107
29108 /* The VTBL instruction does not use a modulo index, so we must take care
29109 of that ourselves. */
29110 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
29111 mask = gen_const_vec_duplicate (vmode, mask);
29112 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
29113
29114 arm_expand_vec_perm_1 (target, op0, op1, sel);
29115 }
29116
29117 /* Map lane ordering between architectural lane order, and GCC lane order,
29118 taking into account ABI. See comment above output_move_neon for details. */
29119
29120 static int
29121 neon_endian_lane_map (machine_mode mode, int lane)
29122 {
29123 if (BYTES_BIG_ENDIAN)
29124 {
29125 int nelems = GET_MODE_NUNITS (mode);
29126 /* Reverse lane order. */
29127 lane = (nelems - 1 - lane);
29128 /* Reverse D register order, to match ABI. */
29129 if (GET_MODE_SIZE (mode) == 16)
29130 lane = lane ^ (nelems / 2);
29131 }
29132 return lane;
29133 }
29134
29135 /* Some permutations index into pairs of vectors, this is a helper function
29136 to map indexes into those pairs of vectors. */
29137
29138 static int
29139 neon_pair_endian_lane_map (machine_mode mode, int lane)
29140 {
29141 int nelem = GET_MODE_NUNITS (mode);
29142 if (BYTES_BIG_ENDIAN)
29143 lane =
29144 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
29145 return lane;
29146 }
29147
29148 /* Generate or test for an insn that supports a constant permutation. */
29149
29150 /* Recognize patterns for the VUZP insns. */
29151
29152 static bool
29153 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29154 {
29155 unsigned int i, odd, mask, nelt = d->perm.length ();
29156 rtx out0, out1, in0, in1;
29157 int first_elem;
29158 int swap_nelt;
29159
29160 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29161 return false;
29162
29163 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29164 big endian pattern on 64 bit vectors, so we correct for that. */
29165 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29166 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29167
29168 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29169
29170 if (first_elem == neon_endian_lane_map (d->vmode, 0))
29171 odd = 0;
29172 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29173 odd = 1;
29174 else
29175 return false;
29176 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29177
29178 for (i = 0; i < nelt; i++)
29179 {
29180 unsigned elt =
29181 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29182 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29183 return false;
29184 }
29185
29186 /* Success! */
29187 if (d->testing_p)
29188 return true;
29189
29190 in0 = d->op0;
29191 in1 = d->op1;
29192 if (swap_nelt != 0)
29193 std::swap (in0, in1);
29194
29195 out0 = d->target;
29196 out1 = gen_reg_rtx (d->vmode);
29197 if (odd)
29198 std::swap (out0, out1);
29199
29200 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
29201 return true;
29202 }
29203
29204 /* Recognize patterns for the VZIP insns. */
29205
29206 static bool
29207 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29208 {
29209 unsigned int i, high, mask, nelt = d->perm.length ();
29210 rtx out0, out1, in0, in1;
29211 int first_elem;
29212 bool is_swapped;
29213
29214 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29215 return false;
29216
29217 is_swapped = BYTES_BIG_ENDIAN;
29218
29219 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29220
29221 high = nelt / 2;
29222 if (first_elem == neon_endian_lane_map (d->vmode, high))
29223 ;
29224 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29225 high = 0;
29226 else
29227 return false;
29228 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29229
29230 for (i = 0; i < nelt / 2; i++)
29231 {
29232 unsigned elt =
29233 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29234 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29235 != elt)
29236 return false;
29237 elt =
29238 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29239 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29240 != elt)
29241 return false;
29242 }
29243
29244 /* Success! */
29245 if (d->testing_p)
29246 return true;
29247
29248 in0 = d->op0;
29249 in1 = d->op1;
29250 if (is_swapped)
29251 std::swap (in0, in1);
29252
29253 out0 = d->target;
29254 out1 = gen_reg_rtx (d->vmode);
29255 if (high)
29256 std::swap (out0, out1);
29257
29258 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
29259 return true;
29260 }
29261
29262 /* Recognize patterns for the VREV insns. */
29263 static bool
29264 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29265 {
29266 unsigned int i, j, diff, nelt = d->perm.length ();
29267 rtx (*gen) (machine_mode, rtx, rtx);
29268
29269 if (!d->one_vector_p)
29270 return false;
29271
29272 diff = d->perm[0];
29273 switch (diff)
29274 {
29275 case 7:
29276 switch (d->vmode)
29277 {
29278 case E_V16QImode:
29279 case E_V8QImode:
29280 gen = gen_neon_vrev64;
29281 break;
29282 default:
29283 return false;
29284 }
29285 break;
29286 case 3:
29287 switch (d->vmode)
29288 {
29289 case E_V16QImode:
29290 case E_V8QImode:
29291 gen = gen_neon_vrev32;
29292 break;
29293 case E_V8HImode:
29294 case E_V4HImode:
29295 case E_V8HFmode:
29296 case E_V4HFmode:
29297 gen = gen_neon_vrev64;
29298 break;
29299 default:
29300 return false;
29301 }
29302 break;
29303 case 1:
29304 switch (d->vmode)
29305 {
29306 case E_V16QImode:
29307 case E_V8QImode:
29308 gen = gen_neon_vrev16;
29309 break;
29310 case E_V8HImode:
29311 case E_V4HImode:
29312 gen = gen_neon_vrev32;
29313 break;
29314 case E_V4SImode:
29315 case E_V2SImode:
29316 case E_V4SFmode:
29317 case E_V2SFmode:
29318 gen = gen_neon_vrev64;
29319 break;
29320 default:
29321 return false;
29322 }
29323 break;
29324 default:
29325 return false;
29326 }
29327
29328 for (i = 0; i < nelt ; i += diff + 1)
29329 for (j = 0; j <= diff; j += 1)
29330 {
29331 /* This is guaranteed to be true as the value of diff
29332 is 7, 3, 1 and we should have enough elements in the
29333 queue to generate this. Getting a vector mask with a
29334 value of diff other than these values implies that
29335 something is wrong by the time we get here. */
29336 gcc_assert (i + j < nelt);
29337 if (d->perm[i + j] != i + diff - j)
29338 return false;
29339 }
29340
29341 /* Success! */
29342 if (d->testing_p)
29343 return true;
29344
29345 emit_insn (gen (d->vmode, d->target, d->op0));
29346 return true;
29347 }
29348
29349 /* Recognize patterns for the VTRN insns. */
29350
29351 static bool
29352 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29353 {
29354 unsigned int i, odd, mask, nelt = d->perm.length ();
29355 rtx out0, out1, in0, in1;
29356
29357 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29358 return false;
29359
29360 /* Note that these are little-endian tests. Adjust for big-endian later. */
29361 if (d->perm[0] == 0)
29362 odd = 0;
29363 else if (d->perm[0] == 1)
29364 odd = 1;
29365 else
29366 return false;
29367 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29368
29369 for (i = 0; i < nelt; i += 2)
29370 {
29371 if (d->perm[i] != i + odd)
29372 return false;
29373 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29374 return false;
29375 }
29376
29377 /* Success! */
29378 if (d->testing_p)
29379 return true;
29380
29381 in0 = d->op0;
29382 in1 = d->op1;
29383 if (BYTES_BIG_ENDIAN)
29384 {
29385 std::swap (in0, in1);
29386 odd = !odd;
29387 }
29388
29389 out0 = d->target;
29390 out1 = gen_reg_rtx (d->vmode);
29391 if (odd)
29392 std::swap (out0, out1);
29393
29394 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
29395 return true;
29396 }
29397
29398 /* Recognize patterns for the VEXT insns. */
29399
29400 static bool
29401 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29402 {
29403 unsigned int i, nelt = d->perm.length ();
29404 rtx offset;
29405
29406 unsigned int location;
29407
29408 unsigned int next = d->perm[0] + 1;
29409
29410 /* TODO: Handle GCC's numbering of elements for big-endian. */
29411 if (BYTES_BIG_ENDIAN)
29412 return false;
29413
29414 /* Check if the extracted indexes are increasing by one. */
29415 for (i = 1; i < nelt; next++, i++)
29416 {
29417 /* If we hit the most significant element of the 2nd vector in
29418 the previous iteration, no need to test further. */
29419 if (next == 2 * nelt)
29420 return false;
29421
29422 /* If we are operating on only one vector: it could be a
29423 rotation. If there are only two elements of size < 64, let
29424 arm_evpc_neon_vrev catch it. */
29425 if (d->one_vector_p && (next == nelt))
29426 {
29427 if ((nelt == 2) && (d->vmode != V2DImode))
29428 return false;
29429 else
29430 next = 0;
29431 }
29432
29433 if (d->perm[i] != next)
29434 return false;
29435 }
29436
29437 location = d->perm[0];
29438
29439 /* Success! */
29440 if (d->testing_p)
29441 return true;
29442
29443 offset = GEN_INT (location);
29444
29445 if(d->vmode == E_DImode)
29446 return false;
29447
29448 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
29449 return true;
29450 }
29451
29452 /* The NEON VTBL instruction is a fully variable permuation that's even
29453 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29454 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29455 can do slightly better by expanding this as a constant where we don't
29456 have to apply a mask. */
29457
29458 static bool
29459 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29460 {
29461 rtx rperm[MAX_VECT_LEN], sel;
29462 machine_mode vmode = d->vmode;
29463 unsigned int i, nelt = d->perm.length ();
29464
29465 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29466 numbering of elements for big-endian, we must reverse the order. */
29467 if (BYTES_BIG_ENDIAN)
29468 return false;
29469
29470 if (d->testing_p)
29471 return true;
29472
29473 /* Generic code will try constant permutation twice. Once with the
29474 original mode and again with the elements lowered to QImode.
29475 So wait and don't do the selector expansion ourselves. */
29476 if (vmode != V8QImode && vmode != V16QImode)
29477 return false;
29478
29479 for (i = 0; i < nelt; ++i)
29480 rperm[i] = GEN_INT (d->perm[i]);
29481 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29482 sel = force_reg (vmode, sel);
29483
29484 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29485 return true;
29486 }
29487
29488 static bool
29489 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29490 {
29491 /* Check if the input mask matches vext before reordering the
29492 operands. */
29493 if (TARGET_NEON)
29494 if (arm_evpc_neon_vext (d))
29495 return true;
29496
29497 /* The pattern matching functions above are written to look for a small
29498 number to begin the sequence (0, 1, N/2). If we begin with an index
29499 from the second operand, we can swap the operands. */
29500 unsigned int nelt = d->perm.length ();
29501 if (d->perm[0] >= nelt)
29502 {
29503 d->perm.rotate_inputs (1);
29504 std::swap (d->op0, d->op1);
29505 }
29506
29507 if (TARGET_NEON)
29508 {
29509 if (arm_evpc_neon_vuzp (d))
29510 return true;
29511 if (arm_evpc_neon_vzip (d))
29512 return true;
29513 if (arm_evpc_neon_vrev (d))
29514 return true;
29515 if (arm_evpc_neon_vtrn (d))
29516 return true;
29517 return arm_evpc_neon_vtbl (d);
29518 }
29519 return false;
29520 }
29521
29522 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29523
29524 static bool
29525 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29526 const vec_perm_indices &sel)
29527 {
29528 struct expand_vec_perm_d d;
29529 int i, nelt, which;
29530
29531 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29532 return false;
29533
29534 d.target = target;
29535 d.op0 = op0;
29536 d.op1 = op1;
29537
29538 d.vmode = vmode;
29539 gcc_assert (VECTOR_MODE_P (d.vmode));
29540 d.testing_p = !target;
29541
29542 nelt = GET_MODE_NUNITS (d.vmode);
29543 for (i = which = 0; i < nelt; ++i)
29544 {
29545 int ei = sel[i] & (2 * nelt - 1);
29546 which |= (ei < nelt ? 1 : 2);
29547 }
29548
29549 switch (which)
29550 {
29551 default:
29552 gcc_unreachable();
29553
29554 case 3:
29555 d.one_vector_p = false;
29556 if (d.testing_p || !rtx_equal_p (op0, op1))
29557 break;
29558
29559 /* The elements of PERM do not suggest that only the first operand
29560 is used, but both operands are identical. Allow easier matching
29561 of the permutation by folding the permutation into the single
29562 input vector. */
29563 /* FALLTHRU */
29564 case 2:
29565 d.op0 = op1;
29566 d.one_vector_p = true;
29567 break;
29568
29569 case 1:
29570 d.op1 = op0;
29571 d.one_vector_p = true;
29572 break;
29573 }
29574
29575 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29576
29577 if (!d.testing_p)
29578 return arm_expand_vec_perm_const_1 (&d);
29579
29580 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29581 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29582 if (!d.one_vector_p)
29583 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29584
29585 start_sequence ();
29586 bool ret = arm_expand_vec_perm_const_1 (&d);
29587 end_sequence ();
29588
29589 return ret;
29590 }
29591
29592 bool
29593 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29594 {
29595 /* If we are soft float and we do not have ldrd
29596 then all auto increment forms are ok. */
29597 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29598 return true;
29599
29600 switch (code)
29601 {
29602 /* Post increment and Pre Decrement are supported for all
29603 instruction forms except for vector forms. */
29604 case ARM_POST_INC:
29605 case ARM_PRE_DEC:
29606 if (VECTOR_MODE_P (mode))
29607 {
29608 if (code != ARM_PRE_DEC)
29609 return true;
29610 else
29611 return false;
29612 }
29613
29614 return true;
29615
29616 case ARM_POST_DEC:
29617 case ARM_PRE_INC:
29618 /* Without LDRD and mode size greater than
29619 word size, there is no point in auto-incrementing
29620 because ldm and stm will not have these forms. */
29621 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29622 return false;
29623
29624 /* Vector and floating point modes do not support
29625 these auto increment forms. */
29626 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29627 return false;
29628
29629 return true;
29630
29631 default:
29632 return false;
29633
29634 }
29635
29636 return false;
29637 }
29638
29639 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29640 on ARM, since we know that shifts by negative amounts are no-ops.
29641 Additionally, the default expansion code is not available or suitable
29642 for post-reload insn splits (this can occur when the register allocator
29643 chooses not to do a shift in NEON).
29644
29645 This function is used in both initial expand and post-reload splits, and
29646 handles all kinds of 64-bit shifts.
29647
29648 Input requirements:
29649 - It is safe for the input and output to be the same register, but
29650 early-clobber rules apply for the shift amount and scratch registers.
29651 - Shift by register requires both scratch registers. In all other cases
29652 the scratch registers may be NULL.
29653 - Ashiftrt by a register also clobbers the CC register. */
29654 void
29655 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29656 rtx amount, rtx scratch1, rtx scratch2)
29657 {
29658 rtx out_high = gen_highpart (SImode, out);
29659 rtx out_low = gen_lowpart (SImode, out);
29660 rtx in_high = gen_highpart (SImode, in);
29661 rtx in_low = gen_lowpart (SImode, in);
29662
29663 /* Terminology:
29664 in = the register pair containing the input value.
29665 out = the destination register pair.
29666 up = the high- or low-part of each pair.
29667 down = the opposite part to "up".
29668 In a shift, we can consider bits to shift from "up"-stream to
29669 "down"-stream, so in a left-shift "up" is the low-part and "down"
29670 is the high-part of each register pair. */
29671
29672 rtx out_up = code == ASHIFT ? out_low : out_high;
29673 rtx out_down = code == ASHIFT ? out_high : out_low;
29674 rtx in_up = code == ASHIFT ? in_low : in_high;
29675 rtx in_down = code == ASHIFT ? in_high : in_low;
29676
29677 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29678 gcc_assert (out
29679 && (REG_P (out) || GET_CODE (out) == SUBREG)
29680 && GET_MODE (out) == DImode);
29681 gcc_assert (in
29682 && (REG_P (in) || GET_CODE (in) == SUBREG)
29683 && GET_MODE (in) == DImode);
29684 gcc_assert (amount
29685 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29686 && GET_MODE (amount) == SImode)
29687 || CONST_INT_P (amount)));
29688 gcc_assert (scratch1 == NULL
29689 || (GET_CODE (scratch1) == SCRATCH)
29690 || (GET_MODE (scratch1) == SImode
29691 && REG_P (scratch1)));
29692 gcc_assert (scratch2 == NULL
29693 || (GET_CODE (scratch2) == SCRATCH)
29694 || (GET_MODE (scratch2) == SImode
29695 && REG_P (scratch2)));
29696 gcc_assert (!REG_P (out) || !REG_P (amount)
29697 || !HARD_REGISTER_P (out)
29698 || (REGNO (out) != REGNO (amount)
29699 && REGNO (out) + 1 != REGNO (amount)));
29700
29701 /* Macros to make following code more readable. */
29702 #define SUB_32(DEST,SRC) \
29703 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29704 #define RSB_32(DEST,SRC) \
29705 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29706 #define SUB_S_32(DEST,SRC) \
29707 gen_addsi3_compare0 ((DEST), (SRC), \
29708 GEN_INT (-32))
29709 #define SET(DEST,SRC) \
29710 gen_rtx_SET ((DEST), (SRC))
29711 #define SHIFT(CODE,SRC,AMOUNT) \
29712 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29713 #define LSHIFT(CODE,SRC,AMOUNT) \
29714 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29715 SImode, (SRC), (AMOUNT))
29716 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29717 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29718 SImode, (SRC), (AMOUNT))
29719 #define ORR(A,B) \
29720 gen_rtx_IOR (SImode, (A), (B))
29721 #define BRANCH(COND,LABEL) \
29722 gen_arm_cond_branch ((LABEL), \
29723 gen_rtx_ ## COND (CCmode, cc_reg, \
29724 const0_rtx), \
29725 cc_reg)
29726
29727 /* Shifts by register and shifts by constant are handled separately. */
29728 if (CONST_INT_P (amount))
29729 {
29730 /* We have a shift-by-constant. */
29731
29732 /* First, handle out-of-range shift amounts.
29733 In both cases we try to match the result an ARM instruction in a
29734 shift-by-register would give. This helps reduce execution
29735 differences between optimization levels, but it won't stop other
29736 parts of the compiler doing different things. This is "undefined
29737 behavior, in any case. */
29738 if (INTVAL (amount) <= 0)
29739 emit_insn (gen_movdi (out, in));
29740 else if (INTVAL (amount) >= 64)
29741 {
29742 if (code == ASHIFTRT)
29743 {
29744 rtx const31_rtx = GEN_INT (31);
29745 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29746 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29747 }
29748 else
29749 emit_insn (gen_movdi (out, const0_rtx));
29750 }
29751
29752 /* Now handle valid shifts. */
29753 else if (INTVAL (amount) < 32)
29754 {
29755 /* Shifts by a constant less than 32. */
29756 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29757
29758 /* Clearing the out register in DImode first avoids lots
29759 of spilling and results in less stack usage.
29760 Later this redundant insn is completely removed.
29761 Do that only if "in" and "out" are different registers. */
29762 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29763 emit_insn (SET (out, const0_rtx));
29764 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29765 emit_insn (SET (out_down,
29766 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29767 out_down)));
29768 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29769 }
29770 else
29771 {
29772 /* Shifts by a constant greater than 31. */
29773 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29774
29775 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29776 emit_insn (SET (out, const0_rtx));
29777 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29778 if (code == ASHIFTRT)
29779 emit_insn (gen_ashrsi3 (out_up, in_up,
29780 GEN_INT (31)));
29781 else
29782 emit_insn (SET (out_up, const0_rtx));
29783 }
29784 }
29785 else
29786 {
29787 /* We have a shift-by-register. */
29788 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29789
29790 /* This alternative requires the scratch registers. */
29791 gcc_assert (scratch1 && REG_P (scratch1));
29792 gcc_assert (scratch2 && REG_P (scratch2));
29793
29794 /* We will need the values "amount-32" and "32-amount" later.
29795 Swapping them around now allows the later code to be more general. */
29796 switch (code)
29797 {
29798 case ASHIFT:
29799 emit_insn (SUB_32 (scratch1, amount));
29800 emit_insn (RSB_32 (scratch2, amount));
29801 break;
29802 case ASHIFTRT:
29803 emit_insn (RSB_32 (scratch1, amount));
29804 /* Also set CC = amount > 32. */
29805 emit_insn (SUB_S_32 (scratch2, amount));
29806 break;
29807 case LSHIFTRT:
29808 emit_insn (RSB_32 (scratch1, amount));
29809 emit_insn (SUB_32 (scratch2, amount));
29810 break;
29811 default:
29812 gcc_unreachable ();
29813 }
29814
29815 /* Emit code like this:
29816
29817 arithmetic-left:
29818 out_down = in_down << amount;
29819 out_down = (in_up << (amount - 32)) | out_down;
29820 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29821 out_up = in_up << amount;
29822
29823 arithmetic-right:
29824 out_down = in_down >> amount;
29825 out_down = (in_up << (32 - amount)) | out_down;
29826 if (amount < 32)
29827 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29828 out_up = in_up << amount;
29829
29830 logical-right:
29831 out_down = in_down >> amount;
29832 out_down = (in_up << (32 - amount)) | out_down;
29833 if (amount < 32)
29834 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29835 out_up = in_up << amount;
29836
29837 The ARM and Thumb2 variants are the same but implemented slightly
29838 differently. If this were only called during expand we could just
29839 use the Thumb2 case and let combine do the right thing, but this
29840 can also be called from post-reload splitters. */
29841
29842 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29843
29844 if (!TARGET_THUMB2)
29845 {
29846 /* Emit code for ARM mode. */
29847 emit_insn (SET (out_down,
29848 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29849 if (code == ASHIFTRT)
29850 {
29851 rtx_code_label *done_label = gen_label_rtx ();
29852 emit_jump_insn (BRANCH (LT, done_label));
29853 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29854 out_down)));
29855 emit_label (done_label);
29856 }
29857 else
29858 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29859 out_down)));
29860 }
29861 else
29862 {
29863 /* Emit code for Thumb2 mode.
29864 Thumb2 can't do shift and or in one insn. */
29865 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29866 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29867
29868 if (code == ASHIFTRT)
29869 {
29870 rtx_code_label *done_label = gen_label_rtx ();
29871 emit_jump_insn (BRANCH (LT, done_label));
29872 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29873 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29874 emit_label (done_label);
29875 }
29876 else
29877 {
29878 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29879 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29880 }
29881 }
29882
29883 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29884 }
29885
29886 #undef SUB_32
29887 #undef RSB_32
29888 #undef SUB_S_32
29889 #undef SET
29890 #undef SHIFT
29891 #undef LSHIFT
29892 #undef REV_LSHIFT
29893 #undef ORR
29894 #undef BRANCH
29895 }
29896
29897 /* Returns true if the pattern is a valid symbolic address, which is either a
29898 symbol_ref or (symbol_ref + addend).
29899
29900 According to the ARM ELF ABI, the initial addend of REL-type relocations
29901 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29902 literal field of the instruction as a 16-bit signed value in the range
29903 -32768 <= A < 32768. */
29904
29905 bool
29906 arm_valid_symbolic_address_p (rtx addr)
29907 {
29908 rtx xop0, xop1 = NULL_RTX;
29909 rtx tmp = addr;
29910
29911 if (target_word_relocations)
29912 return false;
29913
29914 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29915 return true;
29916
29917 /* (const (plus: symbol_ref const_int)) */
29918 if (GET_CODE (addr) == CONST)
29919 tmp = XEXP (addr, 0);
29920
29921 if (GET_CODE (tmp) == PLUS)
29922 {
29923 xop0 = XEXP (tmp, 0);
29924 xop1 = XEXP (tmp, 1);
29925
29926 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29927 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29928 }
29929
29930 return false;
29931 }
29932
29933 /* Returns true if a valid comparison operation and makes
29934 the operands in a form that is valid. */
29935 bool
29936 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29937 {
29938 enum rtx_code code = GET_CODE (*comparison);
29939 int code_int;
29940 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29941 ? GET_MODE (*op2) : GET_MODE (*op1);
29942
29943 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29944
29945 if (code == UNEQ || code == LTGT)
29946 return false;
29947
29948 code_int = (int)code;
29949 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29950 PUT_CODE (*comparison, (enum rtx_code)code_int);
29951
29952 switch (mode)
29953 {
29954 case E_SImode:
29955 if (!arm_add_operand (*op1, mode))
29956 *op1 = force_reg (mode, *op1);
29957 if (!arm_add_operand (*op2, mode))
29958 *op2 = force_reg (mode, *op2);
29959 return true;
29960
29961 case E_DImode:
29962 if (!cmpdi_operand (*op1, mode))
29963 *op1 = force_reg (mode, *op1);
29964 if (!cmpdi_operand (*op2, mode))
29965 *op2 = force_reg (mode, *op2);
29966 return true;
29967
29968 case E_HFmode:
29969 if (!TARGET_VFP_FP16INST)
29970 break;
29971 /* FP16 comparisons are done in SF mode. */
29972 mode = SFmode;
29973 *op1 = convert_to_mode (mode, *op1, 1);
29974 *op2 = convert_to_mode (mode, *op2, 1);
29975 /* Fall through. */
29976 case E_SFmode:
29977 case E_DFmode:
29978 if (!vfp_compare_operand (*op1, mode))
29979 *op1 = force_reg (mode, *op1);
29980 if (!vfp_compare_operand (*op2, mode))
29981 *op2 = force_reg (mode, *op2);
29982 return true;
29983 default:
29984 break;
29985 }
29986
29987 return false;
29988
29989 }
29990
29991 /* Maximum number of instructions to set block of memory. */
29992 static int
29993 arm_block_set_max_insns (void)
29994 {
29995 if (optimize_function_for_size_p (cfun))
29996 return 4;
29997 else
29998 return current_tune->max_insns_inline_memset;
29999 }
30000
30001 /* Return TRUE if it's profitable to set block of memory for
30002 non-vectorized case. VAL is the value to set the memory
30003 with. LENGTH is the number of bytes to set. ALIGN is the
30004 alignment of the destination memory in bytes. UNALIGNED_P
30005 is TRUE if we can only set the memory with instructions
30006 meeting alignment requirements. USE_STRD_P is TRUE if we
30007 can use strd to set the memory. */
30008 static bool
30009 arm_block_set_non_vect_profit_p (rtx val,
30010 unsigned HOST_WIDE_INT length,
30011 unsigned HOST_WIDE_INT align,
30012 bool unaligned_p, bool use_strd_p)
30013 {
30014 int num = 0;
30015 /* For leftovers in bytes of 0-7, we can set the memory block using
30016 strb/strh/str with minimum instruction number. */
30017 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
30018
30019 if (unaligned_p)
30020 {
30021 num = arm_const_inline_cost (SET, val);
30022 num += length / align + length % align;
30023 }
30024 else if (use_strd_p)
30025 {
30026 num = arm_const_double_inline_cost (val);
30027 num += (length >> 3) + leftover[length & 7];
30028 }
30029 else
30030 {
30031 num = arm_const_inline_cost (SET, val);
30032 num += (length >> 2) + leftover[length & 3];
30033 }
30034
30035 /* We may be able to combine last pair STRH/STRB into a single STR
30036 by shifting one byte back. */
30037 if (unaligned_access && length > 3 && (length & 3) == 3)
30038 num--;
30039
30040 return (num <= arm_block_set_max_insns ());
30041 }
30042
30043 /* Return TRUE if it's profitable to set block of memory for
30044 vectorized case. LENGTH is the number of bytes to set.
30045 ALIGN is the alignment of destination memory in bytes.
30046 MODE is the vector mode used to set the memory. */
30047 static bool
30048 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
30049 unsigned HOST_WIDE_INT align,
30050 machine_mode mode)
30051 {
30052 int num;
30053 bool unaligned_p = ((align & 3) != 0);
30054 unsigned int nelt = GET_MODE_NUNITS (mode);
30055
30056 /* Instruction loading constant value. */
30057 num = 1;
30058 /* Instructions storing the memory. */
30059 num += (length + nelt - 1) / nelt;
30060 /* Instructions adjusting the address expression. Only need to
30061 adjust address expression if it's 4 bytes aligned and bytes
30062 leftover can only be stored by mis-aligned store instruction. */
30063 if (!unaligned_p && (length & 3) != 0)
30064 num++;
30065
30066 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
30067 if (!unaligned_p && mode == V16QImode)
30068 num--;
30069
30070 return (num <= arm_block_set_max_insns ());
30071 }
30072
30073 /* Set a block of memory using vectorization instructions for the
30074 unaligned case. We fill the first LENGTH bytes of the memory
30075 area starting from DSTBASE with byte constant VALUE. ALIGN is
30076 the alignment requirement of memory. Return TRUE if succeeded. */
30077 static bool
30078 arm_block_set_unaligned_vect (rtx dstbase,
30079 unsigned HOST_WIDE_INT length,
30080 unsigned HOST_WIDE_INT value,
30081 unsigned HOST_WIDE_INT align)
30082 {
30083 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
30084 rtx dst, mem;
30085 rtx val_vec, reg;
30086 rtx (*gen_func) (rtx, rtx);
30087 machine_mode mode;
30088 unsigned HOST_WIDE_INT v = value;
30089 unsigned int offset = 0;
30090 gcc_assert ((align & 0x3) != 0);
30091 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30092 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30093 if (length >= nelt_v16)
30094 {
30095 mode = V16QImode;
30096 gen_func = gen_movmisalignv16qi;
30097 }
30098 else
30099 {
30100 mode = V8QImode;
30101 gen_func = gen_movmisalignv8qi;
30102 }
30103 nelt_mode = GET_MODE_NUNITS (mode);
30104 gcc_assert (length >= nelt_mode);
30105 /* Skip if it isn't profitable. */
30106 if (!arm_block_set_vect_profit_p (length, align, mode))
30107 return false;
30108
30109 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30110 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30111
30112 v = sext_hwi (v, BITS_PER_WORD);
30113
30114 reg = gen_reg_rtx (mode);
30115 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
30116 /* Emit instruction loading the constant value. */
30117 emit_move_insn (reg, val_vec);
30118
30119 /* Handle nelt_mode bytes in a vector. */
30120 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
30121 {
30122 emit_insn ((*gen_func) (mem, reg));
30123 if (i + 2 * nelt_mode <= length)
30124 {
30125 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
30126 offset += nelt_mode;
30127 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30128 }
30129 }
30130
30131 /* If there are not less than nelt_v8 bytes leftover, we must be in
30132 V16QI mode. */
30133 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
30134
30135 /* Handle (8, 16) bytes leftover. */
30136 if (i + nelt_v8 < length)
30137 {
30138 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
30139 offset += length - i;
30140 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30141
30142 /* We are shifting bytes back, set the alignment accordingly. */
30143 if ((length & 1) != 0 && align >= 2)
30144 set_mem_align (mem, BITS_PER_UNIT);
30145
30146 emit_insn (gen_movmisalignv16qi (mem, reg));
30147 }
30148 /* Handle (0, 8] bytes leftover. */
30149 else if (i < length && i + nelt_v8 >= length)
30150 {
30151 if (mode == V16QImode)
30152 reg = gen_lowpart (V8QImode, reg);
30153
30154 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30155 + (nelt_mode - nelt_v8))));
30156 offset += (length - i) + (nelt_mode - nelt_v8);
30157 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30158
30159 /* We are shifting bytes back, set the alignment accordingly. */
30160 if ((length & 1) != 0 && align >= 2)
30161 set_mem_align (mem, BITS_PER_UNIT);
30162
30163 emit_insn (gen_movmisalignv8qi (mem, reg));
30164 }
30165
30166 return true;
30167 }
30168
30169 /* Set a block of memory using vectorization instructions for the
30170 aligned case. We fill the first LENGTH bytes of the memory area
30171 starting from DSTBASE with byte constant VALUE. ALIGN is the
30172 alignment requirement of memory. Return TRUE if succeeded. */
30173 static bool
30174 arm_block_set_aligned_vect (rtx dstbase,
30175 unsigned HOST_WIDE_INT length,
30176 unsigned HOST_WIDE_INT value,
30177 unsigned HOST_WIDE_INT align)
30178 {
30179 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30180 rtx dst, addr, mem;
30181 rtx val_vec, reg;
30182 machine_mode mode;
30183 unsigned int offset = 0;
30184
30185 gcc_assert ((align & 0x3) == 0);
30186 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30187 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30188 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30189 mode = V16QImode;
30190 else
30191 mode = V8QImode;
30192
30193 nelt_mode = GET_MODE_NUNITS (mode);
30194 gcc_assert (length >= nelt_mode);
30195 /* Skip if it isn't profitable. */
30196 if (!arm_block_set_vect_profit_p (length, align, mode))
30197 return false;
30198
30199 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30200
30201 reg = gen_reg_rtx (mode);
30202 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30203 /* Emit instruction loading the constant value. */
30204 emit_move_insn (reg, val_vec);
30205
30206 i = 0;
30207 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30208 if (mode == V16QImode)
30209 {
30210 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30211 emit_insn (gen_movmisalignv16qi (mem, reg));
30212 i += nelt_mode;
30213 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30214 if (i + nelt_v8 < length && i + nelt_v16 > length)
30215 {
30216 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30217 offset += length - nelt_mode;
30218 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30219 /* We are shifting bytes back, set the alignment accordingly. */
30220 if ((length & 0x3) == 0)
30221 set_mem_align (mem, BITS_PER_UNIT * 4);
30222 else if ((length & 0x1) == 0)
30223 set_mem_align (mem, BITS_PER_UNIT * 2);
30224 else
30225 set_mem_align (mem, BITS_PER_UNIT);
30226
30227 emit_insn (gen_movmisalignv16qi (mem, reg));
30228 return true;
30229 }
30230 /* Fall through for bytes leftover. */
30231 mode = V8QImode;
30232 nelt_mode = GET_MODE_NUNITS (mode);
30233 reg = gen_lowpart (V8QImode, reg);
30234 }
30235
30236 /* Handle 8 bytes in a vector. */
30237 for (; (i + nelt_mode <= length); i += nelt_mode)
30238 {
30239 addr = plus_constant (Pmode, dst, i);
30240 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30241 emit_move_insn (mem, reg);
30242 }
30243
30244 /* Handle single word leftover by shifting 4 bytes back. We can
30245 use aligned access for this case. */
30246 if (i + UNITS_PER_WORD == length)
30247 {
30248 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30249 offset += i - UNITS_PER_WORD;
30250 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30251 /* We are shifting 4 bytes back, set the alignment accordingly. */
30252 if (align > UNITS_PER_WORD)
30253 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30254
30255 emit_move_insn (mem, reg);
30256 }
30257 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30258 We have to use unaligned access for this case. */
30259 else if (i < length)
30260 {
30261 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30262 offset += length - nelt_mode;
30263 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30264 /* We are shifting bytes back, set the alignment accordingly. */
30265 if ((length & 1) == 0)
30266 set_mem_align (mem, BITS_PER_UNIT * 2);
30267 else
30268 set_mem_align (mem, BITS_PER_UNIT);
30269
30270 emit_insn (gen_movmisalignv8qi (mem, reg));
30271 }
30272
30273 return true;
30274 }
30275
30276 /* Set a block of memory using plain strh/strb instructions, only
30277 using instructions allowed by ALIGN on processor. We fill the
30278 first LENGTH bytes of the memory area starting from DSTBASE
30279 with byte constant VALUE. ALIGN is the alignment requirement
30280 of memory. */
30281 static bool
30282 arm_block_set_unaligned_non_vect (rtx dstbase,
30283 unsigned HOST_WIDE_INT length,
30284 unsigned HOST_WIDE_INT value,
30285 unsigned HOST_WIDE_INT align)
30286 {
30287 unsigned int i;
30288 rtx dst, addr, mem;
30289 rtx val_exp, val_reg, reg;
30290 machine_mode mode;
30291 HOST_WIDE_INT v = value;
30292
30293 gcc_assert (align == 1 || align == 2);
30294
30295 if (align == 2)
30296 v |= (value << BITS_PER_UNIT);
30297
30298 v = sext_hwi (v, BITS_PER_WORD);
30299 val_exp = GEN_INT (v);
30300 /* Skip if it isn't profitable. */
30301 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30302 align, true, false))
30303 return false;
30304
30305 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30306 mode = (align == 2 ? HImode : QImode);
30307 val_reg = force_reg (SImode, val_exp);
30308 reg = gen_lowpart (mode, val_reg);
30309
30310 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30311 {
30312 addr = plus_constant (Pmode, dst, i);
30313 mem = adjust_automodify_address (dstbase, mode, addr, i);
30314 emit_move_insn (mem, reg);
30315 }
30316
30317 /* Handle single byte leftover. */
30318 if (i + 1 == length)
30319 {
30320 reg = gen_lowpart (QImode, val_reg);
30321 addr = plus_constant (Pmode, dst, i);
30322 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30323 emit_move_insn (mem, reg);
30324 i++;
30325 }
30326
30327 gcc_assert (i == length);
30328 return true;
30329 }
30330
30331 /* Set a block of memory using plain strd/str/strh/strb instructions,
30332 to permit unaligned copies on processors which support unaligned
30333 semantics for those instructions. We fill the first LENGTH bytes
30334 of the memory area starting from DSTBASE with byte constant VALUE.
30335 ALIGN is the alignment requirement of memory. */
30336 static bool
30337 arm_block_set_aligned_non_vect (rtx dstbase,
30338 unsigned HOST_WIDE_INT length,
30339 unsigned HOST_WIDE_INT value,
30340 unsigned HOST_WIDE_INT align)
30341 {
30342 unsigned int i;
30343 rtx dst, addr, mem;
30344 rtx val_exp, val_reg, reg;
30345 unsigned HOST_WIDE_INT v;
30346 bool use_strd_p;
30347
30348 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30349 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30350
30351 v = (value | (value << 8) | (value << 16) | (value << 24));
30352 if (length < UNITS_PER_WORD)
30353 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30354
30355 if (use_strd_p)
30356 v |= (v << BITS_PER_WORD);
30357 else
30358 v = sext_hwi (v, BITS_PER_WORD);
30359
30360 val_exp = GEN_INT (v);
30361 /* Skip if it isn't profitable. */
30362 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30363 align, false, use_strd_p))
30364 {
30365 if (!use_strd_p)
30366 return false;
30367
30368 /* Try without strd. */
30369 v = (v >> BITS_PER_WORD);
30370 v = sext_hwi (v, BITS_PER_WORD);
30371 val_exp = GEN_INT (v);
30372 use_strd_p = false;
30373 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30374 align, false, use_strd_p))
30375 return false;
30376 }
30377
30378 i = 0;
30379 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30380 /* Handle double words using strd if possible. */
30381 if (use_strd_p)
30382 {
30383 val_reg = force_reg (DImode, val_exp);
30384 reg = val_reg;
30385 for (; (i + 8 <= length); i += 8)
30386 {
30387 addr = plus_constant (Pmode, dst, i);
30388 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30389 emit_move_insn (mem, reg);
30390 }
30391 }
30392 else
30393 val_reg = force_reg (SImode, val_exp);
30394
30395 /* Handle words. */
30396 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30397 for (; (i + 4 <= length); i += 4)
30398 {
30399 addr = plus_constant (Pmode, dst, i);
30400 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30401 if ((align & 3) == 0)
30402 emit_move_insn (mem, reg);
30403 else
30404 emit_insn (gen_unaligned_storesi (mem, reg));
30405 }
30406
30407 /* Merge last pair of STRH and STRB into a STR if possible. */
30408 if (unaligned_access && i > 0 && (i + 3) == length)
30409 {
30410 addr = plus_constant (Pmode, dst, i - 1);
30411 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30412 /* We are shifting one byte back, set the alignment accordingly. */
30413 if ((align & 1) == 0)
30414 set_mem_align (mem, BITS_PER_UNIT);
30415
30416 /* Most likely this is an unaligned access, and we can't tell at
30417 compilation time. */
30418 emit_insn (gen_unaligned_storesi (mem, reg));
30419 return true;
30420 }
30421
30422 /* Handle half word leftover. */
30423 if (i + 2 <= length)
30424 {
30425 reg = gen_lowpart (HImode, val_reg);
30426 addr = plus_constant (Pmode, dst, i);
30427 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30428 if ((align & 1) == 0)
30429 emit_move_insn (mem, reg);
30430 else
30431 emit_insn (gen_unaligned_storehi (mem, reg));
30432
30433 i += 2;
30434 }
30435
30436 /* Handle single byte leftover. */
30437 if (i + 1 == length)
30438 {
30439 reg = gen_lowpart (QImode, val_reg);
30440 addr = plus_constant (Pmode, dst, i);
30441 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30442 emit_move_insn (mem, reg);
30443 }
30444
30445 return true;
30446 }
30447
30448 /* Set a block of memory using vectorization instructions for both
30449 aligned and unaligned cases. We fill the first LENGTH bytes of
30450 the memory area starting from DSTBASE with byte constant VALUE.
30451 ALIGN is the alignment requirement of memory. */
30452 static bool
30453 arm_block_set_vect (rtx dstbase,
30454 unsigned HOST_WIDE_INT length,
30455 unsigned HOST_WIDE_INT value,
30456 unsigned HOST_WIDE_INT align)
30457 {
30458 /* Check whether we need to use unaligned store instruction. */
30459 if (((align & 3) != 0 || (length & 3) != 0)
30460 /* Check whether unaligned store instruction is available. */
30461 && (!unaligned_access || BYTES_BIG_ENDIAN))
30462 return false;
30463
30464 if ((align & 3) == 0)
30465 return arm_block_set_aligned_vect (dstbase, length, value, align);
30466 else
30467 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30468 }
30469
30470 /* Expand string store operation. Firstly we try to do that by using
30471 vectorization instructions, then try with ARM unaligned access and
30472 double-word store if profitable. OPERANDS[0] is the destination,
30473 OPERANDS[1] is the number of bytes, operands[2] is the value to
30474 initialize the memory, OPERANDS[3] is the known alignment of the
30475 destination. */
30476 bool
30477 arm_gen_setmem (rtx *operands)
30478 {
30479 rtx dstbase = operands[0];
30480 unsigned HOST_WIDE_INT length;
30481 unsigned HOST_WIDE_INT value;
30482 unsigned HOST_WIDE_INT align;
30483
30484 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30485 return false;
30486
30487 length = UINTVAL (operands[1]);
30488 if (length > 64)
30489 return false;
30490
30491 value = (UINTVAL (operands[2]) & 0xFF);
30492 align = UINTVAL (operands[3]);
30493 if (TARGET_NEON && length >= 8
30494 && current_tune->string_ops_prefer_neon
30495 && arm_block_set_vect (dstbase, length, value, align))
30496 return true;
30497
30498 if (!unaligned_access && (align & 3) != 0)
30499 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30500
30501 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30502 }
30503
30504
30505 static bool
30506 arm_macro_fusion_p (void)
30507 {
30508 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30509 }
30510
30511 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30512 for MOVW / MOVT macro fusion. */
30513
30514 static bool
30515 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30516 {
30517 /* We are trying to fuse
30518 movw imm / movt imm
30519 instructions as a group that gets scheduled together. */
30520
30521 rtx set_dest = SET_DEST (curr_set);
30522
30523 if (GET_MODE (set_dest) != SImode)
30524 return false;
30525
30526 /* We are trying to match:
30527 prev (movw) == (set (reg r0) (const_int imm16))
30528 curr (movt) == (set (zero_extract (reg r0)
30529 (const_int 16)
30530 (const_int 16))
30531 (const_int imm16_1))
30532 or
30533 prev (movw) == (set (reg r1)
30534 (high (symbol_ref ("SYM"))))
30535 curr (movt) == (set (reg r0)
30536 (lo_sum (reg r1)
30537 (symbol_ref ("SYM")))) */
30538
30539 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30540 {
30541 if (CONST_INT_P (SET_SRC (curr_set))
30542 && CONST_INT_P (SET_SRC (prev_set))
30543 && REG_P (XEXP (set_dest, 0))
30544 && REG_P (SET_DEST (prev_set))
30545 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30546 return true;
30547
30548 }
30549 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30550 && REG_P (SET_DEST (curr_set))
30551 && REG_P (SET_DEST (prev_set))
30552 && GET_CODE (SET_SRC (prev_set)) == HIGH
30553 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30554 return true;
30555
30556 return false;
30557 }
30558
30559 static bool
30560 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30561 {
30562 rtx prev_set = single_set (prev);
30563 rtx curr_set = single_set (curr);
30564
30565 if (!prev_set
30566 || !curr_set)
30567 return false;
30568
30569 if (any_condjump_p (curr))
30570 return false;
30571
30572 if (!arm_macro_fusion_p ())
30573 return false;
30574
30575 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30576 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30577 return true;
30578
30579 return false;
30580 }
30581
30582 /* Return true iff the instruction fusion described by OP is enabled. */
30583 bool
30584 arm_fusion_enabled_p (tune_params::fuse_ops op)
30585 {
30586 return current_tune->fusible_ops & op;
30587 }
30588
30589 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30590 scheduled for speculative execution. Reject the long-running division
30591 and square-root instructions. */
30592
30593 static bool
30594 arm_sched_can_speculate_insn (rtx_insn *insn)
30595 {
30596 switch (get_attr_type (insn))
30597 {
30598 case TYPE_SDIV:
30599 case TYPE_UDIV:
30600 case TYPE_FDIVS:
30601 case TYPE_FDIVD:
30602 case TYPE_FSQRTS:
30603 case TYPE_FSQRTD:
30604 case TYPE_NEON_FP_SQRT_S:
30605 case TYPE_NEON_FP_SQRT_D:
30606 case TYPE_NEON_FP_SQRT_S_Q:
30607 case TYPE_NEON_FP_SQRT_D_Q:
30608 case TYPE_NEON_FP_DIV_S:
30609 case TYPE_NEON_FP_DIV_D:
30610 case TYPE_NEON_FP_DIV_S_Q:
30611 case TYPE_NEON_FP_DIV_D_Q:
30612 return false;
30613 default:
30614 return true;
30615 }
30616 }
30617
30618 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30619
30620 static unsigned HOST_WIDE_INT
30621 arm_asan_shadow_offset (void)
30622 {
30623 return HOST_WIDE_INT_1U << 29;
30624 }
30625
30626
30627 /* This is a temporary fix for PR60655. Ideally we need
30628 to handle most of these cases in the generic part but
30629 currently we reject minus (..) (sym_ref). We try to
30630 ameliorate the case with minus (sym_ref1) (sym_ref2)
30631 where they are in the same section. */
30632
30633 static bool
30634 arm_const_not_ok_for_debug_p (rtx p)
30635 {
30636 tree decl_op0 = NULL;
30637 tree decl_op1 = NULL;
30638
30639 if (GET_CODE (p) == UNSPEC)
30640 return true;
30641 if (GET_CODE (p) == MINUS)
30642 {
30643 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30644 {
30645 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30646 if (decl_op1
30647 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30648 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30649 {
30650 if ((VAR_P (decl_op1)
30651 || TREE_CODE (decl_op1) == CONST_DECL)
30652 && (VAR_P (decl_op0)
30653 || TREE_CODE (decl_op0) == CONST_DECL))
30654 return (get_variable_section (decl_op1, false)
30655 != get_variable_section (decl_op0, false));
30656
30657 if (TREE_CODE (decl_op1) == LABEL_DECL
30658 && TREE_CODE (decl_op0) == LABEL_DECL)
30659 return (DECL_CONTEXT (decl_op1)
30660 != DECL_CONTEXT (decl_op0));
30661 }
30662
30663 return true;
30664 }
30665 }
30666
30667 return false;
30668 }
30669
30670 /* return TRUE if x is a reference to a value in a constant pool */
30671 extern bool
30672 arm_is_constant_pool_ref (rtx x)
30673 {
30674 return (MEM_P (x)
30675 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30676 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30677 }
30678
30679 /* Remember the last target of arm_set_current_function. */
30680 static GTY(()) tree arm_previous_fndecl;
30681
30682 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30683
30684 void
30685 save_restore_target_globals (tree new_tree)
30686 {
30687 /* If we have a previous state, use it. */
30688 if (TREE_TARGET_GLOBALS (new_tree))
30689 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30690 else if (new_tree == target_option_default_node)
30691 restore_target_globals (&default_target_globals);
30692 else
30693 {
30694 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30695 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30696 }
30697
30698 arm_option_params_internal ();
30699 }
30700
30701 /* Invalidate arm_previous_fndecl. */
30702
30703 void
30704 arm_reset_previous_fndecl (void)
30705 {
30706 arm_previous_fndecl = NULL_TREE;
30707 }
30708
30709 /* Establish appropriate back-end context for processing the function
30710 FNDECL. The argument might be NULL to indicate processing at top
30711 level, outside of any function scope. */
30712
30713 static void
30714 arm_set_current_function (tree fndecl)
30715 {
30716 if (!fndecl || fndecl == arm_previous_fndecl)
30717 return;
30718
30719 tree old_tree = (arm_previous_fndecl
30720 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30721 : NULL_TREE);
30722
30723 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30724
30725 /* If current function has no attributes but previous one did,
30726 use the default node. */
30727 if (! new_tree && old_tree)
30728 new_tree = target_option_default_node;
30729
30730 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30731 the default have been handled by save_restore_target_globals from
30732 arm_pragma_target_parse. */
30733 if (old_tree == new_tree)
30734 return;
30735
30736 arm_previous_fndecl = fndecl;
30737
30738 /* First set the target options. */
30739 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30740
30741 save_restore_target_globals (new_tree);
30742 }
30743
30744 /* Implement TARGET_OPTION_PRINT. */
30745
30746 static void
30747 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30748 {
30749 int flags = ptr->x_target_flags;
30750 const char *fpu_name;
30751
30752 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30753 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30754
30755 fprintf (file, "%*sselected isa %s\n", indent, "",
30756 TARGET_THUMB2_P (flags) ? "thumb2" :
30757 TARGET_THUMB_P (flags) ? "thumb1" :
30758 "arm");
30759
30760 if (ptr->x_arm_arch_string)
30761 fprintf (file, "%*sselected architecture %s\n", indent, "",
30762 ptr->x_arm_arch_string);
30763
30764 if (ptr->x_arm_cpu_string)
30765 fprintf (file, "%*sselected CPU %s\n", indent, "",
30766 ptr->x_arm_cpu_string);
30767
30768 if (ptr->x_arm_tune_string)
30769 fprintf (file, "%*sselected tune %s\n", indent, "",
30770 ptr->x_arm_tune_string);
30771
30772 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30773 }
30774
30775 /* Hook to determine if one function can safely inline another. */
30776
30777 static bool
30778 arm_can_inline_p (tree caller, tree callee)
30779 {
30780 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30781 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30782 bool can_inline = true;
30783
30784 struct cl_target_option *caller_opts
30785 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30786 : target_option_default_node);
30787
30788 struct cl_target_option *callee_opts
30789 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30790 : target_option_default_node);
30791
30792 if (callee_opts == caller_opts)
30793 return true;
30794
30795 /* Callee's ISA features should be a subset of the caller's. */
30796 struct arm_build_target caller_target;
30797 struct arm_build_target callee_target;
30798 caller_target.isa = sbitmap_alloc (isa_num_bits);
30799 callee_target.isa = sbitmap_alloc (isa_num_bits);
30800
30801 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30802 false);
30803 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30804 false);
30805 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30806 can_inline = false;
30807
30808 sbitmap_free (caller_target.isa);
30809 sbitmap_free (callee_target.isa);
30810
30811 /* OK to inline between different modes.
30812 Function with mode specific instructions, e.g using asm,
30813 must be explicitly protected with noinline. */
30814 return can_inline;
30815 }
30816
30817 /* Hook to fix function's alignment affected by target attribute. */
30818
30819 static void
30820 arm_relayout_function (tree fndecl)
30821 {
30822 if (DECL_USER_ALIGN (fndecl))
30823 return;
30824
30825 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30826
30827 if (!callee_tree)
30828 callee_tree = target_option_default_node;
30829
30830 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30831 SET_DECL_ALIGN
30832 (fndecl,
30833 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30834 }
30835
30836 /* Inner function to process the attribute((target(...))), take an argument and
30837 set the current options from the argument. If we have a list, recursively
30838 go over the list. */
30839
30840 static bool
30841 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30842 {
30843 if (TREE_CODE (args) == TREE_LIST)
30844 {
30845 bool ret = true;
30846
30847 for (; args; args = TREE_CHAIN (args))
30848 if (TREE_VALUE (args)
30849 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30850 ret = false;
30851 return ret;
30852 }
30853
30854 else if (TREE_CODE (args) != STRING_CST)
30855 {
30856 error ("attribute %<target%> argument not a string");
30857 return false;
30858 }
30859
30860 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30861 char *q;
30862
30863 while ((q = strtok (argstr, ",")) != NULL)
30864 {
30865 argstr = NULL;
30866 if (!strcmp (q, "thumb"))
30867 opts->x_target_flags |= MASK_THUMB;
30868
30869 else if (!strcmp (q, "arm"))
30870 opts->x_target_flags &= ~MASK_THUMB;
30871
30872 else if (!strcmp (q, "general-regs-only"))
30873 opts->x_target_flags |= MASK_GENERAL_REGS_ONLY;
30874
30875 else if (!strncmp (q, "fpu=", 4))
30876 {
30877 int fpu_index;
30878 if (! opt_enum_arg_to_value (OPT_mfpu_, q + 4,
30879 &fpu_index, CL_TARGET))
30880 {
30881 error ("invalid fpu for target attribute or pragma %qs", q);
30882 return false;
30883 }
30884 if (fpu_index == TARGET_FPU_auto)
30885 {
30886 /* This doesn't really make sense until we support
30887 general dynamic selection of the architecture and all
30888 sub-features. */
30889 sorry ("auto fpu selection not currently permitted here");
30890 return false;
30891 }
30892 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30893 }
30894 else if (!strncmp (q, "arch=", 5))
30895 {
30896 char *arch = q + 5;
30897 const arch_option *arm_selected_arch
30898 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30899
30900 if (!arm_selected_arch)
30901 {
30902 error ("invalid architecture for target attribute or pragma %qs",
30903 q);
30904 return false;
30905 }
30906
30907 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30908 }
30909 else if (q[0] == '+')
30910 {
30911 opts->x_arm_arch_string
30912 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30913 }
30914 else
30915 {
30916 error ("unknown target attribute or pragma %qs", q);
30917 return false;
30918 }
30919 }
30920
30921 return true;
30922 }
30923
30924 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30925
30926 tree
30927 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30928 struct gcc_options *opts_set)
30929 {
30930 struct cl_target_option cl_opts;
30931
30932 if (!arm_valid_target_attribute_rec (args, opts))
30933 return NULL_TREE;
30934
30935 cl_target_option_save (&cl_opts, opts);
30936 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30937 arm_option_check_internal (opts);
30938 /* Do any overrides, such as global options arch=xxx.
30939 We do this since arm_active_target was overridden. */
30940 arm_option_reconfigure_globals ();
30941 arm_options_perform_arch_sanity_checks ();
30942 arm_option_override_internal (opts, opts_set);
30943
30944 return build_target_option_node (opts);
30945 }
30946
30947 static void
30948 add_attribute (const char * mode, tree *attributes)
30949 {
30950 size_t len = strlen (mode);
30951 tree value = build_string (len, mode);
30952
30953 TREE_TYPE (value) = build_array_type (char_type_node,
30954 build_index_type (size_int (len)));
30955
30956 *attributes = tree_cons (get_identifier ("target"),
30957 build_tree_list (NULL_TREE, value),
30958 *attributes);
30959 }
30960
30961 /* For testing. Insert thumb or arm modes alternatively on functions. */
30962
30963 static void
30964 arm_insert_attributes (tree fndecl, tree * attributes)
30965 {
30966 const char *mode;
30967
30968 if (! TARGET_FLIP_THUMB)
30969 return;
30970
30971 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30972 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
30973 return;
30974
30975 /* Nested definitions must inherit mode. */
30976 if (current_function_decl)
30977 {
30978 mode = TARGET_THUMB ? "thumb" : "arm";
30979 add_attribute (mode, attributes);
30980 return;
30981 }
30982
30983 /* If there is already a setting don't change it. */
30984 if (lookup_attribute ("target", *attributes) != NULL)
30985 return;
30986
30987 mode = thumb_flipper ? "thumb" : "arm";
30988 add_attribute (mode, attributes);
30989
30990 thumb_flipper = !thumb_flipper;
30991 }
30992
30993 /* Hook to validate attribute((target("string"))). */
30994
30995 static bool
30996 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30997 tree args, int ARG_UNUSED (flags))
30998 {
30999 bool ret = true;
31000 struct gcc_options func_options;
31001 tree cur_tree, new_optimize;
31002 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
31003
31004 /* Get the optimization options of the current function. */
31005 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
31006
31007 /* If the function changed the optimization levels as well as setting target
31008 options, start with the optimizations specified. */
31009 if (!func_optimize)
31010 func_optimize = optimization_default_node;
31011
31012 /* Init func_options. */
31013 memset (&func_options, 0, sizeof (func_options));
31014 init_options_struct (&func_options, NULL);
31015 lang_hooks.init_options_struct (&func_options);
31016
31017 /* Initialize func_options to the defaults. */
31018 cl_optimization_restore (&func_options,
31019 TREE_OPTIMIZATION (func_optimize));
31020
31021 cl_target_option_restore (&func_options,
31022 TREE_TARGET_OPTION (target_option_default_node));
31023
31024 /* Set func_options flags with new target mode. */
31025 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
31026 &global_options_set);
31027
31028 if (cur_tree == NULL_TREE)
31029 ret = false;
31030
31031 new_optimize = build_optimization_node (&func_options);
31032
31033 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
31034
31035 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
31036
31037 finalize_options_struct (&func_options);
31038
31039 return ret;
31040 }
31041
31042 /* Match an ISA feature bitmap to a named FPU. We always use the
31043 first entry that exactly matches the feature set, so that we
31044 effectively canonicalize the FPU name for the assembler. */
31045 static const char*
31046 arm_identify_fpu_from_isa (sbitmap isa)
31047 {
31048 auto_sbitmap fpubits (isa_num_bits);
31049 auto_sbitmap cand_fpubits (isa_num_bits);
31050
31051 bitmap_and (fpubits, isa, isa_all_fpubits);
31052
31053 /* If there are no ISA feature bits relating to the FPU, we must be
31054 doing soft-float. */
31055 if (bitmap_empty_p (fpubits))
31056 return "softvfp";
31057
31058 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31059 {
31060 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
31061 if (bitmap_equal_p (fpubits, cand_fpubits))
31062 return all_fpus[i].name;
31063 }
31064 /* We must find an entry, or things have gone wrong. */
31065 gcc_unreachable ();
31066 }
31067
31068 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
31069 by the function fndecl. */
31070 void
31071 arm_declare_function_name (FILE *stream, const char *name, tree decl)
31072 {
31073 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
31074
31075 struct cl_target_option *targ_options;
31076 if (target_parts)
31077 targ_options = TREE_TARGET_OPTION (target_parts);
31078 else
31079 targ_options = TREE_TARGET_OPTION (target_option_current_node);
31080 gcc_assert (targ_options);
31081
31082 /* Only update the assembler .arch string if it is distinct from the last
31083 such string we printed. arch_to_print is set conditionally in case
31084 targ_options->x_arm_arch_string is NULL which can be the case
31085 when cc1 is invoked directly without passing -march option. */
31086 std::string arch_to_print;
31087 if (targ_options->x_arm_arch_string)
31088 arch_to_print = targ_options->x_arm_arch_string;
31089
31090 if (arch_to_print != arm_last_printed_arch_string)
31091 {
31092 std::string arch_name
31093 = arch_to_print.substr (0, arch_to_print.find ("+"));
31094 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
31095 const arch_option *arch
31096 = arm_parse_arch_option_name (all_architectures, "-march",
31097 targ_options->x_arm_arch_string);
31098 auto_sbitmap opt_bits (isa_num_bits);
31099
31100 gcc_assert (arch);
31101 if (arch->common.extensions)
31102 {
31103 for (const struct cpu_arch_extension *opt = arch->common.extensions;
31104 opt->name != NULL;
31105 opt++)
31106 {
31107 if (!opt->remove)
31108 {
31109 arm_initialize_isa (opt_bits, opt->isa_bits);
31110 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
31111 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
31112 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
31113 opt->name);
31114 }
31115 }
31116 }
31117
31118 arm_last_printed_arch_string = arch_to_print;
31119 }
31120
31121 fprintf (stream, "\t.syntax unified\n");
31122
31123 if (TARGET_THUMB)
31124 {
31125 if (is_called_in_ARM_mode (decl)
31126 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
31127 && cfun->is_thunk))
31128 fprintf (stream, "\t.code 32\n");
31129 else if (TARGET_THUMB1)
31130 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
31131 else
31132 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
31133 }
31134 else
31135 fprintf (stream, "\t.arm\n");
31136
31137 std::string fpu_to_print
31138 = TARGET_SOFT_FLOAT
31139 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
31140
31141 if (fpu_to_print != arm_last_printed_arch_string)
31142 {
31143 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31144 arm_last_printed_fpu_string = fpu_to_print;
31145 }
31146
31147 if (TARGET_POKE_FUNCTION_NAME)
31148 arm_poke_function_name (stream, (const char *) name);
31149 }
31150
31151 /* If MEM is in the form of [base+offset], extract the two parts
31152 of address and set to BASE and OFFSET, otherwise return false
31153 after clearing BASE and OFFSET. */
31154
31155 static bool
31156 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31157 {
31158 rtx addr;
31159
31160 gcc_assert (MEM_P (mem));
31161
31162 addr = XEXP (mem, 0);
31163
31164 /* Strip off const from addresses like (const (addr)). */
31165 if (GET_CODE (addr) == CONST)
31166 addr = XEXP (addr, 0);
31167
31168 if (GET_CODE (addr) == REG)
31169 {
31170 *base = addr;
31171 *offset = const0_rtx;
31172 return true;
31173 }
31174
31175 if (GET_CODE (addr) == PLUS
31176 && GET_CODE (XEXP (addr, 0)) == REG
31177 && CONST_INT_P (XEXP (addr, 1)))
31178 {
31179 *base = XEXP (addr, 0);
31180 *offset = XEXP (addr, 1);
31181 return true;
31182 }
31183
31184 *base = NULL_RTX;
31185 *offset = NULL_RTX;
31186
31187 return false;
31188 }
31189
31190 /* If INSN is a load or store of address in the form of [base+offset],
31191 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31192 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31193 otherwise return FALSE. */
31194
31195 static bool
31196 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31197 {
31198 rtx x, dest, src;
31199
31200 gcc_assert (INSN_P (insn));
31201 x = PATTERN (insn);
31202 if (GET_CODE (x) != SET)
31203 return false;
31204
31205 src = SET_SRC (x);
31206 dest = SET_DEST (x);
31207 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31208 {
31209 *is_load = false;
31210 extract_base_offset_in_addr (dest, base, offset);
31211 }
31212 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31213 {
31214 *is_load = true;
31215 extract_base_offset_in_addr (src, base, offset);
31216 }
31217 else
31218 return false;
31219
31220 return (*base != NULL_RTX && *offset != NULL_RTX);
31221 }
31222
31223 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31224
31225 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31226 and PRI are only calculated for these instructions. For other instruction,
31227 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31228 instruction fusion can be supported by returning different priorities.
31229
31230 It's important that irrelevant instructions get the largest FUSION_PRI. */
31231
31232 static void
31233 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31234 int *fusion_pri, int *pri)
31235 {
31236 int tmp, off_val;
31237 bool is_load;
31238 rtx base, offset;
31239
31240 gcc_assert (INSN_P (insn));
31241
31242 tmp = max_pri - 1;
31243 if (!fusion_load_store (insn, &base, &offset, &is_load))
31244 {
31245 *pri = tmp;
31246 *fusion_pri = tmp;
31247 return;
31248 }
31249
31250 /* Load goes first. */
31251 if (is_load)
31252 *fusion_pri = tmp - 1;
31253 else
31254 *fusion_pri = tmp - 2;
31255
31256 tmp /= 2;
31257
31258 /* INSN with smaller base register goes first. */
31259 tmp -= ((REGNO (base) & 0xff) << 20);
31260
31261 /* INSN with smaller offset goes first. */
31262 off_val = (int)(INTVAL (offset));
31263 if (off_val >= 0)
31264 tmp -= (off_val & 0xfffff);
31265 else
31266 tmp += ((- off_val) & 0xfffff);
31267
31268 *pri = tmp;
31269 return;
31270 }
31271
31272
31273 /* Construct and return a PARALLEL RTX vector with elements numbering the
31274 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31275 the vector - from the perspective of the architecture. This does not
31276 line up with GCC's perspective on lane numbers, so we end up with
31277 different masks depending on our target endian-ness. The diagram
31278 below may help. We must draw the distinction when building masks
31279 which select one half of the vector. An instruction selecting
31280 architectural low-lanes for a big-endian target, must be described using
31281 a mask selecting GCC high-lanes.
31282
31283 Big-Endian Little-Endian
31284
31285 GCC 0 1 2 3 3 2 1 0
31286 | x | x | x | x | | x | x | x | x |
31287 Architecture 3 2 1 0 3 2 1 0
31288
31289 Low Mask: { 2, 3 } { 0, 1 }
31290 High Mask: { 0, 1 } { 2, 3 }
31291 */
31292
31293 rtx
31294 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31295 {
31296 int nunits = GET_MODE_NUNITS (mode);
31297 rtvec v = rtvec_alloc (nunits / 2);
31298 int high_base = nunits / 2;
31299 int low_base = 0;
31300 int base;
31301 rtx t1;
31302 int i;
31303
31304 if (BYTES_BIG_ENDIAN)
31305 base = high ? low_base : high_base;
31306 else
31307 base = high ? high_base : low_base;
31308
31309 for (i = 0; i < nunits / 2; i++)
31310 RTVEC_ELT (v, i) = GEN_INT (base + i);
31311
31312 t1 = gen_rtx_PARALLEL (mode, v);
31313 return t1;
31314 }
31315
31316 /* Check OP for validity as a PARALLEL RTX vector with elements
31317 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31318 from the perspective of the architecture. See the diagram above
31319 arm_simd_vect_par_cnst_half_p for more details. */
31320
31321 bool
31322 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31323 bool high)
31324 {
31325 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31326 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31327 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31328 int i = 0;
31329
31330 if (!VECTOR_MODE_P (mode))
31331 return false;
31332
31333 if (count_op != count_ideal)
31334 return false;
31335
31336 for (i = 0; i < count_ideal; i++)
31337 {
31338 rtx elt_op = XVECEXP (op, 0, i);
31339 rtx elt_ideal = XVECEXP (ideal, 0, i);
31340
31341 if (!CONST_INT_P (elt_op)
31342 || INTVAL (elt_ideal) != INTVAL (elt_op))
31343 return false;
31344 }
31345 return true;
31346 }
31347
31348 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31349 in Thumb1. */
31350 static bool
31351 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31352 const_tree)
31353 {
31354 /* For now, we punt and not handle this for TARGET_THUMB1. */
31355 if (vcall_offset && TARGET_THUMB1)
31356 return false;
31357
31358 /* Otherwise ok. */
31359 return true;
31360 }
31361
31362 /* Generate RTL for a conditional branch with rtx comparison CODE in
31363 mode CC_MODE. The destination of the unlikely conditional branch
31364 is LABEL_REF. */
31365
31366 void
31367 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31368 rtx label_ref)
31369 {
31370 rtx x;
31371 x = gen_rtx_fmt_ee (code, VOIDmode,
31372 gen_rtx_REG (cc_mode, CC_REGNUM),
31373 const0_rtx);
31374
31375 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31376 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31377 pc_rtx);
31378 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31379 }
31380
31381 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31382
31383 For pure-code sections there is no letter code for this attribute, so
31384 output all the section flags numerically when this is needed. */
31385
31386 static bool
31387 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31388 {
31389
31390 if (flags & SECTION_ARM_PURECODE)
31391 {
31392 *num = 0x20000000;
31393
31394 if (!(flags & SECTION_DEBUG))
31395 *num |= 0x2;
31396 if (flags & SECTION_EXCLUDE)
31397 *num |= 0x80000000;
31398 if (flags & SECTION_WRITE)
31399 *num |= 0x1;
31400 if (flags & SECTION_CODE)
31401 *num |= 0x4;
31402 if (flags & SECTION_MERGE)
31403 *num |= 0x10;
31404 if (flags & SECTION_STRINGS)
31405 *num |= 0x20;
31406 if (flags & SECTION_TLS)
31407 *num |= 0x400;
31408 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31409 *num |= 0x200;
31410
31411 return true;
31412 }
31413
31414 return false;
31415 }
31416
31417 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31418
31419 If pure-code is passed as an option, make sure all functions are in
31420 sections that have the SHF_ARM_PURECODE attribute. */
31421
31422 static section *
31423 arm_function_section (tree decl, enum node_frequency freq,
31424 bool startup, bool exit)
31425 {
31426 const char * section_name;
31427 section * sec;
31428
31429 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31430 return default_function_section (decl, freq, startup, exit);
31431
31432 if (!target_pure_code)
31433 return default_function_section (decl, freq, startup, exit);
31434
31435
31436 section_name = DECL_SECTION_NAME (decl);
31437
31438 /* If a function is not in a named section then it falls under the 'default'
31439 text section, also known as '.text'. We can preserve previous behavior as
31440 the default text section already has the SHF_ARM_PURECODE section
31441 attribute. */
31442 if (!section_name)
31443 {
31444 section *default_sec = default_function_section (decl, freq, startup,
31445 exit);
31446
31447 /* If default_sec is not null, then it must be a special section like for
31448 example .text.startup. We set the pure-code attribute and return the
31449 same section to preserve existing behavior. */
31450 if (default_sec)
31451 default_sec->common.flags |= SECTION_ARM_PURECODE;
31452 return default_sec;
31453 }
31454
31455 /* Otherwise look whether a section has already been created with
31456 'section_name'. */
31457 sec = get_named_section (decl, section_name, 0);
31458 if (!sec)
31459 /* If that is not the case passing NULL as the section's name to
31460 'get_named_section' will create a section with the declaration's
31461 section name. */
31462 sec = get_named_section (decl, NULL, 0);
31463
31464 /* Set the SHF_ARM_PURECODE attribute. */
31465 sec->common.flags |= SECTION_ARM_PURECODE;
31466
31467 return sec;
31468 }
31469
31470 /* Implements the TARGET_SECTION_FLAGS hook.
31471
31472 If DECL is a function declaration and pure-code is passed as an option
31473 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31474 section's name and RELOC indicates whether the declarations initializer may
31475 contain runtime relocations. */
31476
31477 static unsigned int
31478 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31479 {
31480 unsigned int flags = default_section_type_flags (decl, name, reloc);
31481
31482 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31483 flags |= SECTION_ARM_PURECODE;
31484
31485 return flags;
31486 }
31487
31488 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31489
31490 static void
31491 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31492 rtx op0, rtx op1,
31493 rtx *quot_p, rtx *rem_p)
31494 {
31495 if (mode == SImode)
31496 gcc_assert (!TARGET_IDIV);
31497
31498 scalar_int_mode libval_mode
31499 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31500
31501 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31502 libval_mode,
31503 op0, GET_MODE (op0),
31504 op1, GET_MODE (op1));
31505
31506 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31507 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31508 GET_MODE_SIZE (mode));
31509
31510 gcc_assert (quotient);
31511 gcc_assert (remainder);
31512
31513 *quot_p = quotient;
31514 *rem_p = remainder;
31515 }
31516
31517 /* This function checks for the availability of the coprocessor builtin passed
31518 in BUILTIN for the current target. Returns true if it is available and
31519 false otherwise. If a BUILTIN is passed for which this function has not
31520 been implemented it will cause an exception. */
31521
31522 bool
31523 arm_coproc_builtin_available (enum unspecv builtin)
31524 {
31525 /* None of these builtins are available in Thumb mode if the target only
31526 supports Thumb-1. */
31527 if (TARGET_THUMB1)
31528 return false;
31529
31530 switch (builtin)
31531 {
31532 case VUNSPEC_CDP:
31533 case VUNSPEC_LDC:
31534 case VUNSPEC_LDCL:
31535 case VUNSPEC_STC:
31536 case VUNSPEC_STCL:
31537 case VUNSPEC_MCR:
31538 case VUNSPEC_MRC:
31539 if (arm_arch4)
31540 return true;
31541 break;
31542 case VUNSPEC_CDP2:
31543 case VUNSPEC_LDC2:
31544 case VUNSPEC_LDC2L:
31545 case VUNSPEC_STC2:
31546 case VUNSPEC_STC2L:
31547 case VUNSPEC_MCR2:
31548 case VUNSPEC_MRC2:
31549 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31550 ARMv8-{A,M}. */
31551 if (arm_arch5t)
31552 return true;
31553 break;
31554 case VUNSPEC_MCRR:
31555 case VUNSPEC_MRRC:
31556 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31557 ARMv8-{A,M}. */
31558 if (arm_arch6 || arm_arch5te)
31559 return true;
31560 break;
31561 case VUNSPEC_MCRR2:
31562 case VUNSPEC_MRRC2:
31563 if (arm_arch6)
31564 return true;
31565 break;
31566 default:
31567 gcc_unreachable ();
31568 }
31569 return false;
31570 }
31571
31572 /* This function returns true if OP is a valid memory operand for the ldc and
31573 stc coprocessor instructions and false otherwise. */
31574
31575 bool
31576 arm_coproc_ldc_stc_legitimate_address (rtx op)
31577 {
31578 HOST_WIDE_INT range;
31579 /* Has to be a memory operand. */
31580 if (!MEM_P (op))
31581 return false;
31582
31583 op = XEXP (op, 0);
31584
31585 /* We accept registers. */
31586 if (REG_P (op))
31587 return true;
31588
31589 switch GET_CODE (op)
31590 {
31591 case PLUS:
31592 {
31593 /* Or registers with an offset. */
31594 if (!REG_P (XEXP (op, 0)))
31595 return false;
31596
31597 op = XEXP (op, 1);
31598
31599 /* The offset must be an immediate though. */
31600 if (!CONST_INT_P (op))
31601 return false;
31602
31603 range = INTVAL (op);
31604
31605 /* Within the range of [-1020,1020]. */
31606 if (!IN_RANGE (range, -1020, 1020))
31607 return false;
31608
31609 /* And a multiple of 4. */
31610 return (range % 4) == 0;
31611 }
31612 case PRE_INC:
31613 case POST_INC:
31614 case PRE_DEC:
31615 case POST_DEC:
31616 return REG_P (XEXP (op, 0));
31617 default:
31618 gcc_unreachable ();
31619 }
31620 return false;
31621 }
31622
31623 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31624
31625 In VFPv1, VFP registers could only be accessed in the mode they were
31626 set, so subregs would be invalid there. However, we don't support
31627 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31628
31629 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31630 VFP registers in little-endian order. We can't describe that accurately to
31631 GCC, so avoid taking subregs of such values.
31632
31633 The only exception is going from a 128-bit to a 64-bit type. In that
31634 case the data layout happens to be consistent for big-endian, so we
31635 explicitly allow that case. */
31636
31637 static bool
31638 arm_can_change_mode_class (machine_mode from, machine_mode to,
31639 reg_class_t rclass)
31640 {
31641 if (TARGET_BIG_END
31642 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31643 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31644 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31645 && reg_classes_intersect_p (VFP_REGS, rclass))
31646 return false;
31647 return true;
31648 }
31649
31650 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31651 strcpy from constants will be faster. */
31652
31653 static HOST_WIDE_INT
31654 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31655 {
31656 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31657 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31658 return MAX (align, BITS_PER_WORD * factor);
31659 return align;
31660 }
31661
31662 /* Emit a speculation barrier on target architectures that do not have
31663 DSB/ISB directly. Such systems probably don't need a barrier
31664 themselves, but if the code is ever run on a later architecture, it
31665 might become a problem. */
31666 void
31667 arm_emit_speculation_barrier_function ()
31668 {
31669 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
31670 }
31671
31672 #if CHECKING_P
31673 namespace selftest {
31674
31675 /* Scan the static data tables generated by parsecpu.awk looking for
31676 potential issues with the data. We primarily check for
31677 inconsistencies in the option extensions at present (extensions
31678 that duplicate others but aren't marked as aliases). Furthermore,
31679 for correct canonicalization later options must never be a subset
31680 of an earlier option. Any extension should also only specify other
31681 feature bits and never an architecture bit. The architecture is inferred
31682 from the declaration of the extension. */
31683 static void
31684 arm_test_cpu_arch_data (void)
31685 {
31686 const arch_option *arch;
31687 const cpu_option *cpu;
31688 auto_sbitmap target_isa (isa_num_bits);
31689 auto_sbitmap isa1 (isa_num_bits);
31690 auto_sbitmap isa2 (isa_num_bits);
31691
31692 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31693 {
31694 const cpu_arch_extension *ext1, *ext2;
31695
31696 if (arch->common.extensions == NULL)
31697 continue;
31698
31699 arm_initialize_isa (target_isa, arch->common.isa_bits);
31700
31701 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31702 {
31703 if (ext1->alias)
31704 continue;
31705
31706 arm_initialize_isa (isa1, ext1->isa_bits);
31707 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31708 {
31709 if (ext2->alias || ext1->remove != ext2->remove)
31710 continue;
31711
31712 arm_initialize_isa (isa2, ext2->isa_bits);
31713 /* If the option is a subset of the parent option, it doesn't
31714 add anything and so isn't useful. */
31715 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31716
31717 /* If the extension specifies any architectural bits then
31718 disallow it. Extensions should only specify feature bits. */
31719 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31720 }
31721 }
31722 }
31723
31724 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31725 {
31726 const cpu_arch_extension *ext1, *ext2;
31727
31728 if (cpu->common.extensions == NULL)
31729 continue;
31730
31731 arm_initialize_isa (target_isa, arch->common.isa_bits);
31732
31733 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31734 {
31735 if (ext1->alias)
31736 continue;
31737
31738 arm_initialize_isa (isa1, ext1->isa_bits);
31739 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31740 {
31741 if (ext2->alias || ext1->remove != ext2->remove)
31742 continue;
31743
31744 arm_initialize_isa (isa2, ext2->isa_bits);
31745 /* If the option is a subset of the parent option, it doesn't
31746 add anything and so isn't useful. */
31747 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31748
31749 /* If the extension specifies any architectural bits then
31750 disallow it. Extensions should only specify feature bits. */
31751 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31752 }
31753 }
31754 }
31755 }
31756
31757 /* Scan the static data tables generated by parsecpu.awk looking for
31758 potential issues with the data. Here we check for consistency between the
31759 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31760 a feature bit that is not defined by any FPU flag. */
31761 static void
31762 arm_test_fpu_data (void)
31763 {
31764 auto_sbitmap isa_all_fpubits (isa_num_bits);
31765 auto_sbitmap fpubits (isa_num_bits);
31766 auto_sbitmap tmpset (isa_num_bits);
31767
31768 static const enum isa_feature fpu_bitlist[]
31769 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31770 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31771
31772 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31773 {
31774 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31775 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31776 bitmap_clear (isa_all_fpubits);
31777 bitmap_copy (isa_all_fpubits, tmpset);
31778 }
31779
31780 if (!bitmap_empty_p (isa_all_fpubits))
31781 {
31782 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31783 " group that are not defined by any FPU.\n"
31784 " Check your arm-cpus.in.\n");
31785 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31786 }
31787 }
31788
31789 static void
31790 arm_run_selftests (void)
31791 {
31792 arm_test_cpu_arch_data ();
31793 arm_test_fpu_data ();
31794 }
31795 } /* Namespace selftest. */
31796
31797 #undef TARGET_RUN_TARGET_SELFTESTS
31798 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31799 #endif /* CHECKING_P */
31800
31801 struct gcc_target targetm = TARGET_INITIALIZER;
31802
31803 #include "gt-arm.h"