]> git.ipfire.org Git - thirdparty/gcc.git/blob - gcc/config/arm/arm.c
2019-01-09 Sandra Loosemore <sandra@codesourcery.com>
[thirdparty/gcc.git] / gcc / config / arm / arm.c
1 /* Output routines for GCC for ARM.
2 Copyright (C) 1991-2019 Free Software Foundation, Inc.
3 Contributed by Pieter `Tiggr' Schoenmakers (rcpieter@win.tue.nl)
4 and Martin Simmons (@harleqn.co.uk).
5 More major hacks by Richard Earnshaw (rearnsha@arm.com).
6
7 This file is part of GCC.
8
9 GCC is free software; you can redistribute it and/or modify it
10 under the terms of the GNU General Public License as published
11 by the Free Software Foundation; either version 3, or (at your
12 option) any later version.
13
14 GCC is distributed in the hope that it will be useful, but WITHOUT
15 ANY WARRANTY; without even the implied warranty of MERCHANTABILITY
16 or FITNESS FOR A PARTICULAR PURPOSE. See the GNU General Public
17 License for more details.
18
19 You should have received a copy of the GNU General Public License
20 along with GCC; see the file COPYING3. If not see
21 <http://www.gnu.org/licenses/>. */
22
23 #define IN_TARGET_CODE 1
24
25 #include "config.h"
26 #define INCLUDE_STRING
27 #include "system.h"
28 #include "coretypes.h"
29 #include "backend.h"
30 #include "target.h"
31 #include "rtl.h"
32 #include "tree.h"
33 #include "memmodel.h"
34 #include "cfghooks.h"
35 #include "df.h"
36 #include "tm_p.h"
37 #include "stringpool.h"
38 #include "attribs.h"
39 #include "optabs.h"
40 #include "regs.h"
41 #include "emit-rtl.h"
42 #include "recog.h"
43 #include "cgraph.h"
44 #include "diagnostic-core.h"
45 #include "alias.h"
46 #include "fold-const.h"
47 #include "stor-layout.h"
48 #include "calls.h"
49 #include "varasm.h"
50 #include "output.h"
51 #include "insn-attr.h"
52 #include "flags.h"
53 #include "reload.h"
54 #include "explow.h"
55 #include "expr.h"
56 #include "cfgrtl.h"
57 #include "sched-int.h"
58 #include "common/common-target.h"
59 #include "langhooks.h"
60 #include "intl.h"
61 #include "libfuncs.h"
62 #include "params.h"
63 #include "opts.h"
64 #include "dumpfile.h"
65 #include "target-globals.h"
66 #include "builtins.h"
67 #include "tm-constrs.h"
68 #include "rtl-iter.h"
69 #include "optabs-libfuncs.h"
70 #include "gimplify.h"
71 #include "gimple.h"
72 #include "selftest.h"
73
74 /* This file should be included last. */
75 #include "target-def.h"
76
77 /* Forward definitions of types. */
78 typedef struct minipool_node Mnode;
79 typedef struct minipool_fixup Mfix;
80
81 /* The last .arch and .fpu assembly strings that we printed. */
82 static std::string arm_last_printed_arch_string;
83 static std::string arm_last_printed_fpu_string;
84
85 void (*arm_lang_output_object_attributes_hook)(void);
86
87 struct four_ints
88 {
89 int i[4];
90 };
91
92 /* Forward function declarations. */
93 static bool arm_const_not_ok_for_debug_p (rtx);
94 static int arm_needs_doubleword_align (machine_mode, const_tree);
95 static int arm_compute_static_chain_stack_bytes (void);
96 static arm_stack_offsets *arm_get_frame_offsets (void);
97 static void arm_compute_frame_layout (void);
98 static void arm_add_gc_roots (void);
99 static int arm_gen_constant (enum rtx_code, machine_mode, rtx,
100 unsigned HOST_WIDE_INT, rtx, rtx, int, int);
101 static unsigned bit_count (unsigned long);
102 static unsigned bitmap_popcount (const sbitmap);
103 static int arm_address_register_rtx_p (rtx, int);
104 static int arm_legitimate_index_p (machine_mode, rtx, RTX_CODE, int);
105 static bool is_called_in_ARM_mode (tree);
106 static int thumb2_legitimate_index_p (machine_mode, rtx, int);
107 static int thumb1_base_register_rtx_p (rtx, machine_mode, int);
108 static rtx arm_legitimize_address (rtx, rtx, machine_mode);
109 static reg_class_t arm_preferred_reload_class (rtx, reg_class_t);
110 static rtx thumb_legitimize_address (rtx, rtx, machine_mode);
111 inline static int thumb1_index_register_rtx_p (rtx, int);
112 static int thumb_far_jump_used_p (void);
113 static bool thumb_force_lr_save (void);
114 static unsigned arm_size_return_regs (void);
115 static bool arm_assemble_integer (rtx, unsigned int, int);
116 static void arm_print_operand (FILE *, rtx, int);
117 static void arm_print_operand_address (FILE *, machine_mode, rtx);
118 static bool arm_print_operand_punct_valid_p (unsigned char code);
119 static const char *fp_const_from_val (REAL_VALUE_TYPE *);
120 static arm_cc get_arm_condition_code (rtx);
121 static bool arm_fixed_condition_code_regs (unsigned int *, unsigned int *);
122 static const char *output_multi_immediate (rtx *, const char *, const char *,
123 int, HOST_WIDE_INT);
124 static const char *shift_op (rtx, HOST_WIDE_INT *);
125 static struct machine_function *arm_init_machine_status (void);
126 static void thumb_exit (FILE *, int);
127 static HOST_WIDE_INT get_jump_table_size (rtx_jump_table_data *);
128 static Mnode *move_minipool_fix_forward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
129 static Mnode *add_minipool_forward_ref (Mfix *);
130 static Mnode *move_minipool_fix_backward_ref (Mnode *, Mnode *, HOST_WIDE_INT);
131 static Mnode *add_minipool_backward_ref (Mfix *);
132 static void assign_minipool_offsets (Mfix *);
133 static void arm_print_value (FILE *, rtx);
134 static void dump_minipool (rtx_insn *);
135 static int arm_barrier_cost (rtx_insn *);
136 static Mfix *create_fix_barrier (Mfix *, HOST_WIDE_INT);
137 static void push_minipool_barrier (rtx_insn *, HOST_WIDE_INT);
138 static void push_minipool_fix (rtx_insn *, HOST_WIDE_INT, rtx *,
139 machine_mode, rtx);
140 static void arm_reorg (void);
141 static void note_invalid_constants (rtx_insn *, HOST_WIDE_INT, int);
142 static unsigned long arm_compute_save_reg0_reg12_mask (void);
143 static unsigned long arm_compute_save_core_reg_mask (void);
144 static unsigned long arm_isr_value (tree);
145 static unsigned long arm_compute_func_type (void);
146 static tree arm_handle_fndecl_attribute (tree *, tree, tree, int, bool *);
147 static tree arm_handle_pcs_attribute (tree *, tree, tree, int, bool *);
148 static tree arm_handle_isr_attribute (tree *, tree, tree, int, bool *);
149 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
150 static tree arm_handle_notshared_attribute (tree *, tree, tree, int, bool *);
151 #endif
152 static tree arm_handle_cmse_nonsecure_entry (tree *, tree, tree, int, bool *);
153 static tree arm_handle_cmse_nonsecure_call (tree *, tree, tree, int, bool *);
154 static void arm_output_function_epilogue (FILE *);
155 static void arm_output_function_prologue (FILE *);
156 static int arm_comp_type_attributes (const_tree, const_tree);
157 static void arm_set_default_type_attributes (tree);
158 static int arm_adjust_cost (rtx_insn *, int, rtx_insn *, int, unsigned int);
159 static int arm_sched_reorder (FILE *, int, rtx_insn **, int *, int);
160 static int optimal_immediate_sequence (enum rtx_code code,
161 unsigned HOST_WIDE_INT val,
162 struct four_ints *return_sequence);
163 static int optimal_immediate_sequence_1 (enum rtx_code code,
164 unsigned HOST_WIDE_INT val,
165 struct four_ints *return_sequence,
166 int i);
167 static int arm_get_strip_length (int);
168 static bool arm_function_ok_for_sibcall (tree, tree);
169 static machine_mode arm_promote_function_mode (const_tree,
170 machine_mode, int *,
171 const_tree, int);
172 static bool arm_return_in_memory (const_tree, const_tree);
173 static rtx arm_function_value (const_tree, const_tree, bool);
174 static rtx arm_libcall_value_1 (machine_mode);
175 static rtx arm_libcall_value (machine_mode, const_rtx);
176 static bool arm_function_value_regno_p (const unsigned int);
177 static void arm_internal_label (FILE *, const char *, unsigned long);
178 static void arm_output_mi_thunk (FILE *, tree, HOST_WIDE_INT, HOST_WIDE_INT,
179 tree);
180 static bool arm_have_conditional_execution (void);
181 static bool arm_cannot_force_const_mem (machine_mode, rtx);
182 static bool arm_legitimate_constant_p (machine_mode, rtx);
183 static bool arm_rtx_costs (rtx, machine_mode, int, int, int *, bool);
184 static int arm_address_cost (rtx, machine_mode, addr_space_t, bool);
185 static int arm_register_move_cost (machine_mode, reg_class_t, reg_class_t);
186 static int arm_memory_move_cost (machine_mode, reg_class_t, bool);
187 static void emit_constant_insn (rtx cond, rtx pattern);
188 static rtx_insn *emit_set_insn (rtx, rtx);
189 static rtx emit_multi_reg_push (unsigned long, unsigned long);
190 static int arm_arg_partial_bytes (cumulative_args_t, machine_mode,
191 tree, bool);
192 static rtx arm_function_arg (cumulative_args_t, machine_mode,
193 const_tree, bool);
194 static void arm_function_arg_advance (cumulative_args_t, machine_mode,
195 const_tree, bool);
196 static pad_direction arm_function_arg_padding (machine_mode, const_tree);
197 static unsigned int arm_function_arg_boundary (machine_mode, const_tree);
198 static rtx aapcs_allocate_return_reg (machine_mode, const_tree,
199 const_tree);
200 static rtx aapcs_libcall_value (machine_mode);
201 static int aapcs_select_return_coproc (const_tree, const_tree);
202
203 #ifdef OBJECT_FORMAT_ELF
204 static void arm_elf_asm_constructor (rtx, int) ATTRIBUTE_UNUSED;
205 static void arm_elf_asm_destructor (rtx, int) ATTRIBUTE_UNUSED;
206 #endif
207 #ifndef ARM_PE
208 static void arm_encode_section_info (tree, rtx, int);
209 #endif
210
211 static void arm_file_end (void);
212 static void arm_file_start (void);
213 static void arm_insert_attributes (tree, tree *);
214
215 static void arm_setup_incoming_varargs (cumulative_args_t, machine_mode,
216 tree, int *, int);
217 static bool arm_pass_by_reference (cumulative_args_t,
218 machine_mode, const_tree, bool);
219 static bool arm_promote_prototypes (const_tree);
220 static bool arm_default_short_enums (void);
221 static bool arm_align_anon_bitfield (void);
222 static bool arm_return_in_msb (const_tree);
223 static bool arm_must_pass_in_stack (machine_mode, const_tree);
224 static bool arm_return_in_memory (const_tree, const_tree);
225 #if ARM_UNWIND_INFO
226 static void arm_unwind_emit (FILE *, rtx_insn *);
227 static bool arm_output_ttype (rtx);
228 static void arm_asm_emit_except_personality (rtx);
229 #endif
230 static void arm_asm_init_sections (void);
231 static rtx arm_dwarf_register_span (rtx);
232
233 static tree arm_cxx_guard_type (void);
234 static bool arm_cxx_guard_mask_bit (void);
235 static tree arm_get_cookie_size (tree);
236 static bool arm_cookie_has_size (void);
237 static bool arm_cxx_cdtor_returns_this (void);
238 static bool arm_cxx_key_method_may_be_inline (void);
239 static void arm_cxx_determine_class_data_visibility (tree);
240 static bool arm_cxx_class_data_always_comdat (void);
241 static bool arm_cxx_use_aeabi_atexit (void);
242 static void arm_init_libfuncs (void);
243 static tree arm_build_builtin_va_list (void);
244 static void arm_expand_builtin_va_start (tree, rtx);
245 static tree arm_gimplify_va_arg_expr (tree, tree, gimple_seq *, gimple_seq *);
246 static void arm_option_override (void);
247 static void arm_option_save (struct cl_target_option *, struct gcc_options *);
248 static void arm_option_restore (struct gcc_options *,
249 struct cl_target_option *);
250 static void arm_override_options_after_change (void);
251 static void arm_option_print (FILE *, int, struct cl_target_option *);
252 static void arm_set_current_function (tree);
253 static bool arm_can_inline_p (tree, tree);
254 static void arm_relayout_function (tree);
255 static bool arm_valid_target_attribute_p (tree, tree, tree, int);
256 static unsigned HOST_WIDE_INT arm_shift_truncation_mask (machine_mode);
257 static bool arm_sched_can_speculate_insn (rtx_insn *);
258 static bool arm_macro_fusion_p (void);
259 static bool arm_cannot_copy_insn_p (rtx_insn *);
260 static int arm_issue_rate (void);
261 static int arm_first_cycle_multipass_dfa_lookahead (void);
262 static int arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *, int);
263 static void arm_output_dwarf_dtprel (FILE *, int, rtx) ATTRIBUTE_UNUSED;
264 static bool arm_output_addr_const_extra (FILE *, rtx);
265 static bool arm_allocate_stack_slots_for_args (void);
266 static bool arm_warn_func_return (tree);
267 static tree arm_promoted_type (const_tree t);
268 static bool arm_scalar_mode_supported_p (scalar_mode);
269 static bool arm_frame_pointer_required (void);
270 static bool arm_can_eliminate (const int, const int);
271 static void arm_asm_trampoline_template (FILE *);
272 static void arm_trampoline_init (rtx, tree, rtx);
273 static rtx arm_trampoline_adjust_address (rtx);
274 static rtx_insn *arm_pic_static_addr (rtx orig, rtx reg);
275 static bool cortex_a9_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
276 static bool xscale_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
277 static bool fa726te_sched_adjust_cost (rtx_insn *, int, rtx_insn *, int *);
278 static bool arm_array_mode_supported_p (machine_mode,
279 unsigned HOST_WIDE_INT);
280 static machine_mode arm_preferred_simd_mode (scalar_mode);
281 static bool arm_class_likely_spilled_p (reg_class_t);
282 static HOST_WIDE_INT arm_vector_alignment (const_tree type);
283 static bool arm_vector_alignment_reachable (const_tree type, bool is_packed);
284 static bool arm_builtin_support_vector_misalignment (machine_mode mode,
285 const_tree type,
286 int misalignment,
287 bool is_packed);
288 static void arm_conditional_register_usage (void);
289 static enum flt_eval_method arm_excess_precision (enum excess_precision_type);
290 static reg_class_t arm_preferred_rename_class (reg_class_t rclass);
291 static void arm_autovectorize_vector_sizes (vector_sizes *);
292 static int arm_default_branch_cost (bool, bool);
293 static int arm_cortex_a5_branch_cost (bool, bool);
294 static int arm_cortex_m_branch_cost (bool, bool);
295 static int arm_cortex_m7_branch_cost (bool, bool);
296
297 static bool arm_vectorize_vec_perm_const (machine_mode, rtx, rtx, rtx,
298 const vec_perm_indices &);
299
300 static bool aarch_macro_fusion_pair_p (rtx_insn*, rtx_insn*);
301
302 static int arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
303 tree vectype,
304 int misalign ATTRIBUTE_UNUSED);
305 static unsigned arm_add_stmt_cost (void *data, int count,
306 enum vect_cost_for_stmt kind,
307 struct _stmt_vec_info *stmt_info,
308 int misalign,
309 enum vect_cost_model_location where);
310
311 static void arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
312 bool op0_preserve_value);
313 static unsigned HOST_WIDE_INT arm_asan_shadow_offset (void);
314
315 static void arm_sched_fusion_priority (rtx_insn *, int, int *, int*);
316 static bool arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT,
317 const_tree);
318 static section *arm_function_section (tree, enum node_frequency, bool, bool);
319 static bool arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num);
320 static unsigned int arm_elf_section_type_flags (tree decl, const char *name,
321 int reloc);
322 static void arm_expand_divmod_libfunc (rtx, machine_mode, rtx, rtx, rtx *, rtx *);
323 static opt_scalar_float_mode arm_floatn_mode (int, bool);
324 static unsigned int arm_hard_regno_nregs (unsigned int, machine_mode);
325 static bool arm_hard_regno_mode_ok (unsigned int, machine_mode);
326 static bool arm_modes_tieable_p (machine_mode, machine_mode);
327 static HOST_WIDE_INT arm_constant_alignment (const_tree, HOST_WIDE_INT);
328 \f
329 /* Table of machine attributes. */
330 static const struct attribute_spec arm_attribute_table[] =
331 {
332 /* { name, min_len, max_len, decl_req, type_req, fn_type_req,
333 affects_type_identity, handler, exclude } */
334 /* Function calls made to this symbol must be done indirectly, because
335 it may lie outside of the 26 bit addressing range of a normal function
336 call. */
337 { "long_call", 0, 0, false, true, true, false, NULL, NULL },
338 /* Whereas these functions are always known to reside within the 26 bit
339 addressing range. */
340 { "short_call", 0, 0, false, true, true, false, NULL, NULL },
341 /* Specify the procedure call conventions for a function. */
342 { "pcs", 1, 1, false, true, true, false, arm_handle_pcs_attribute,
343 NULL },
344 /* Interrupt Service Routines have special prologue and epilogue requirements. */
345 { "isr", 0, 1, false, false, false, false, arm_handle_isr_attribute,
346 NULL },
347 { "interrupt", 0, 1, false, false, false, false, arm_handle_isr_attribute,
348 NULL },
349 { "naked", 0, 0, true, false, false, false,
350 arm_handle_fndecl_attribute, NULL },
351 #ifdef ARM_PE
352 /* ARM/PE has three new attributes:
353 interfacearm - ?
354 dllexport - for exporting a function/variable that will live in a dll
355 dllimport - for importing a function/variable from a dll
356
357 Microsoft allows multiple declspecs in one __declspec, separating
358 them with spaces. We do NOT support this. Instead, use __declspec
359 multiple times.
360 */
361 { "dllimport", 0, 0, true, false, false, false, NULL, NULL },
362 { "dllexport", 0, 0, true, false, false, false, NULL, NULL },
363 { "interfacearm", 0, 0, true, false, false, false,
364 arm_handle_fndecl_attribute, NULL },
365 #elif TARGET_DLLIMPORT_DECL_ATTRIBUTES
366 { "dllimport", 0, 0, false, false, false, false, handle_dll_attribute,
367 NULL },
368 { "dllexport", 0, 0, false, false, false, false, handle_dll_attribute,
369 NULL },
370 { "notshared", 0, 0, false, true, false, false,
371 arm_handle_notshared_attribute, NULL },
372 #endif
373 /* ARMv8-M Security Extensions support. */
374 { "cmse_nonsecure_entry", 0, 0, true, false, false, false,
375 arm_handle_cmse_nonsecure_entry, NULL },
376 { "cmse_nonsecure_call", 0, 0, true, false, false, true,
377 arm_handle_cmse_nonsecure_call, NULL },
378 { NULL, 0, 0, false, false, false, false, NULL, NULL }
379 };
380 \f
381 /* Initialize the GCC target structure. */
382 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
383 #undef TARGET_MERGE_DECL_ATTRIBUTES
384 #define TARGET_MERGE_DECL_ATTRIBUTES merge_dllimport_decl_attributes
385 #endif
386
387 #undef TARGET_LEGITIMIZE_ADDRESS
388 #define TARGET_LEGITIMIZE_ADDRESS arm_legitimize_address
389
390 #undef TARGET_ATTRIBUTE_TABLE
391 #define TARGET_ATTRIBUTE_TABLE arm_attribute_table
392
393 #undef TARGET_INSERT_ATTRIBUTES
394 #define TARGET_INSERT_ATTRIBUTES arm_insert_attributes
395
396 #undef TARGET_ASM_FILE_START
397 #define TARGET_ASM_FILE_START arm_file_start
398 #undef TARGET_ASM_FILE_END
399 #define TARGET_ASM_FILE_END arm_file_end
400
401 #undef TARGET_ASM_ALIGNED_SI_OP
402 #define TARGET_ASM_ALIGNED_SI_OP NULL
403 #undef TARGET_ASM_INTEGER
404 #define TARGET_ASM_INTEGER arm_assemble_integer
405
406 #undef TARGET_PRINT_OPERAND
407 #define TARGET_PRINT_OPERAND arm_print_operand
408 #undef TARGET_PRINT_OPERAND_ADDRESS
409 #define TARGET_PRINT_OPERAND_ADDRESS arm_print_operand_address
410 #undef TARGET_PRINT_OPERAND_PUNCT_VALID_P
411 #define TARGET_PRINT_OPERAND_PUNCT_VALID_P arm_print_operand_punct_valid_p
412
413 #undef TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA
414 #define TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA arm_output_addr_const_extra
415
416 #undef TARGET_ASM_FUNCTION_PROLOGUE
417 #define TARGET_ASM_FUNCTION_PROLOGUE arm_output_function_prologue
418
419 #undef TARGET_ASM_FUNCTION_EPILOGUE
420 #define TARGET_ASM_FUNCTION_EPILOGUE arm_output_function_epilogue
421
422 #undef TARGET_CAN_INLINE_P
423 #define TARGET_CAN_INLINE_P arm_can_inline_p
424
425 #undef TARGET_RELAYOUT_FUNCTION
426 #define TARGET_RELAYOUT_FUNCTION arm_relayout_function
427
428 #undef TARGET_OPTION_OVERRIDE
429 #define TARGET_OPTION_OVERRIDE arm_option_override
430
431 #undef TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE
432 #define TARGET_OVERRIDE_OPTIONS_AFTER_CHANGE arm_override_options_after_change
433
434 #undef TARGET_OPTION_SAVE
435 #define TARGET_OPTION_SAVE arm_option_save
436
437 #undef TARGET_OPTION_RESTORE
438 #define TARGET_OPTION_RESTORE arm_option_restore
439
440 #undef TARGET_OPTION_PRINT
441 #define TARGET_OPTION_PRINT arm_option_print
442
443 #undef TARGET_COMP_TYPE_ATTRIBUTES
444 #define TARGET_COMP_TYPE_ATTRIBUTES arm_comp_type_attributes
445
446 #undef TARGET_SCHED_CAN_SPECULATE_INSN
447 #define TARGET_SCHED_CAN_SPECULATE_INSN arm_sched_can_speculate_insn
448
449 #undef TARGET_SCHED_MACRO_FUSION_P
450 #define TARGET_SCHED_MACRO_FUSION_P arm_macro_fusion_p
451
452 #undef TARGET_SCHED_MACRO_FUSION_PAIR_P
453 #define TARGET_SCHED_MACRO_FUSION_PAIR_P aarch_macro_fusion_pair_p
454
455 #undef TARGET_SET_DEFAULT_TYPE_ATTRIBUTES
456 #define TARGET_SET_DEFAULT_TYPE_ATTRIBUTES arm_set_default_type_attributes
457
458 #undef TARGET_SCHED_ADJUST_COST
459 #define TARGET_SCHED_ADJUST_COST arm_adjust_cost
460
461 #undef TARGET_SET_CURRENT_FUNCTION
462 #define TARGET_SET_CURRENT_FUNCTION arm_set_current_function
463
464 #undef TARGET_OPTION_VALID_ATTRIBUTE_P
465 #define TARGET_OPTION_VALID_ATTRIBUTE_P arm_valid_target_attribute_p
466
467 #undef TARGET_SCHED_REORDER
468 #define TARGET_SCHED_REORDER arm_sched_reorder
469
470 #undef TARGET_REGISTER_MOVE_COST
471 #define TARGET_REGISTER_MOVE_COST arm_register_move_cost
472
473 #undef TARGET_MEMORY_MOVE_COST
474 #define TARGET_MEMORY_MOVE_COST arm_memory_move_cost
475
476 #undef TARGET_ENCODE_SECTION_INFO
477 #ifdef ARM_PE
478 #define TARGET_ENCODE_SECTION_INFO arm_pe_encode_section_info
479 #else
480 #define TARGET_ENCODE_SECTION_INFO arm_encode_section_info
481 #endif
482
483 #undef TARGET_STRIP_NAME_ENCODING
484 #define TARGET_STRIP_NAME_ENCODING arm_strip_name_encoding
485
486 #undef TARGET_ASM_INTERNAL_LABEL
487 #define TARGET_ASM_INTERNAL_LABEL arm_internal_label
488
489 #undef TARGET_FLOATN_MODE
490 #define TARGET_FLOATN_MODE arm_floatn_mode
491
492 #undef TARGET_FUNCTION_OK_FOR_SIBCALL
493 #define TARGET_FUNCTION_OK_FOR_SIBCALL arm_function_ok_for_sibcall
494
495 #undef TARGET_FUNCTION_VALUE
496 #define TARGET_FUNCTION_VALUE arm_function_value
497
498 #undef TARGET_LIBCALL_VALUE
499 #define TARGET_LIBCALL_VALUE arm_libcall_value
500
501 #undef TARGET_FUNCTION_VALUE_REGNO_P
502 #define TARGET_FUNCTION_VALUE_REGNO_P arm_function_value_regno_p
503
504 #undef TARGET_ASM_OUTPUT_MI_THUNK
505 #define TARGET_ASM_OUTPUT_MI_THUNK arm_output_mi_thunk
506 #undef TARGET_ASM_CAN_OUTPUT_MI_THUNK
507 #define TARGET_ASM_CAN_OUTPUT_MI_THUNK arm_can_output_mi_thunk
508
509 #undef TARGET_RTX_COSTS
510 #define TARGET_RTX_COSTS arm_rtx_costs
511 #undef TARGET_ADDRESS_COST
512 #define TARGET_ADDRESS_COST arm_address_cost
513
514 #undef TARGET_SHIFT_TRUNCATION_MASK
515 #define TARGET_SHIFT_TRUNCATION_MASK arm_shift_truncation_mask
516 #undef TARGET_VECTOR_MODE_SUPPORTED_P
517 #define TARGET_VECTOR_MODE_SUPPORTED_P arm_vector_mode_supported_p
518 #undef TARGET_ARRAY_MODE_SUPPORTED_P
519 #define TARGET_ARRAY_MODE_SUPPORTED_P arm_array_mode_supported_p
520 #undef TARGET_VECTORIZE_PREFERRED_SIMD_MODE
521 #define TARGET_VECTORIZE_PREFERRED_SIMD_MODE arm_preferred_simd_mode
522 #undef TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES
523 #define TARGET_VECTORIZE_AUTOVECTORIZE_VECTOR_SIZES \
524 arm_autovectorize_vector_sizes
525
526 #undef TARGET_MACHINE_DEPENDENT_REORG
527 #define TARGET_MACHINE_DEPENDENT_REORG arm_reorg
528
529 #undef TARGET_INIT_BUILTINS
530 #define TARGET_INIT_BUILTINS arm_init_builtins
531 #undef TARGET_EXPAND_BUILTIN
532 #define TARGET_EXPAND_BUILTIN arm_expand_builtin
533 #undef TARGET_BUILTIN_DECL
534 #define TARGET_BUILTIN_DECL arm_builtin_decl
535
536 #undef TARGET_INIT_LIBFUNCS
537 #define TARGET_INIT_LIBFUNCS arm_init_libfuncs
538
539 #undef TARGET_PROMOTE_FUNCTION_MODE
540 #define TARGET_PROMOTE_FUNCTION_MODE arm_promote_function_mode
541 #undef TARGET_PROMOTE_PROTOTYPES
542 #define TARGET_PROMOTE_PROTOTYPES arm_promote_prototypes
543 #undef TARGET_PASS_BY_REFERENCE
544 #define TARGET_PASS_BY_REFERENCE arm_pass_by_reference
545 #undef TARGET_ARG_PARTIAL_BYTES
546 #define TARGET_ARG_PARTIAL_BYTES arm_arg_partial_bytes
547 #undef TARGET_FUNCTION_ARG
548 #define TARGET_FUNCTION_ARG arm_function_arg
549 #undef TARGET_FUNCTION_ARG_ADVANCE
550 #define TARGET_FUNCTION_ARG_ADVANCE arm_function_arg_advance
551 #undef TARGET_FUNCTION_ARG_PADDING
552 #define TARGET_FUNCTION_ARG_PADDING arm_function_arg_padding
553 #undef TARGET_FUNCTION_ARG_BOUNDARY
554 #define TARGET_FUNCTION_ARG_BOUNDARY arm_function_arg_boundary
555
556 #undef TARGET_SETUP_INCOMING_VARARGS
557 #define TARGET_SETUP_INCOMING_VARARGS arm_setup_incoming_varargs
558
559 #undef TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS
560 #define TARGET_ALLOCATE_STACK_SLOTS_FOR_ARGS arm_allocate_stack_slots_for_args
561
562 #undef TARGET_ASM_TRAMPOLINE_TEMPLATE
563 #define TARGET_ASM_TRAMPOLINE_TEMPLATE arm_asm_trampoline_template
564 #undef TARGET_TRAMPOLINE_INIT
565 #define TARGET_TRAMPOLINE_INIT arm_trampoline_init
566 #undef TARGET_TRAMPOLINE_ADJUST_ADDRESS
567 #define TARGET_TRAMPOLINE_ADJUST_ADDRESS arm_trampoline_adjust_address
568
569 #undef TARGET_WARN_FUNC_RETURN
570 #define TARGET_WARN_FUNC_RETURN arm_warn_func_return
571
572 #undef TARGET_DEFAULT_SHORT_ENUMS
573 #define TARGET_DEFAULT_SHORT_ENUMS arm_default_short_enums
574
575 #undef TARGET_ALIGN_ANON_BITFIELD
576 #define TARGET_ALIGN_ANON_BITFIELD arm_align_anon_bitfield
577
578 #undef TARGET_NARROW_VOLATILE_BITFIELD
579 #define TARGET_NARROW_VOLATILE_BITFIELD hook_bool_void_false
580
581 #undef TARGET_CXX_GUARD_TYPE
582 #define TARGET_CXX_GUARD_TYPE arm_cxx_guard_type
583
584 #undef TARGET_CXX_GUARD_MASK_BIT
585 #define TARGET_CXX_GUARD_MASK_BIT arm_cxx_guard_mask_bit
586
587 #undef TARGET_CXX_GET_COOKIE_SIZE
588 #define TARGET_CXX_GET_COOKIE_SIZE arm_get_cookie_size
589
590 #undef TARGET_CXX_COOKIE_HAS_SIZE
591 #define TARGET_CXX_COOKIE_HAS_SIZE arm_cookie_has_size
592
593 #undef TARGET_CXX_CDTOR_RETURNS_THIS
594 #define TARGET_CXX_CDTOR_RETURNS_THIS arm_cxx_cdtor_returns_this
595
596 #undef TARGET_CXX_KEY_METHOD_MAY_BE_INLINE
597 #define TARGET_CXX_KEY_METHOD_MAY_BE_INLINE arm_cxx_key_method_may_be_inline
598
599 #undef TARGET_CXX_USE_AEABI_ATEXIT
600 #define TARGET_CXX_USE_AEABI_ATEXIT arm_cxx_use_aeabi_atexit
601
602 #undef TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY
603 #define TARGET_CXX_DETERMINE_CLASS_DATA_VISIBILITY \
604 arm_cxx_determine_class_data_visibility
605
606 #undef TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT
607 #define TARGET_CXX_CLASS_DATA_ALWAYS_COMDAT arm_cxx_class_data_always_comdat
608
609 #undef TARGET_RETURN_IN_MSB
610 #define TARGET_RETURN_IN_MSB arm_return_in_msb
611
612 #undef TARGET_RETURN_IN_MEMORY
613 #define TARGET_RETURN_IN_MEMORY arm_return_in_memory
614
615 #undef TARGET_MUST_PASS_IN_STACK
616 #define TARGET_MUST_PASS_IN_STACK arm_must_pass_in_stack
617
618 #if ARM_UNWIND_INFO
619 #undef TARGET_ASM_UNWIND_EMIT
620 #define TARGET_ASM_UNWIND_EMIT arm_unwind_emit
621
622 /* EABI unwinding tables use a different format for the typeinfo tables. */
623 #undef TARGET_ASM_TTYPE
624 #define TARGET_ASM_TTYPE arm_output_ttype
625
626 #undef TARGET_ARM_EABI_UNWINDER
627 #define TARGET_ARM_EABI_UNWINDER true
628
629 #undef TARGET_ASM_EMIT_EXCEPT_PERSONALITY
630 #define TARGET_ASM_EMIT_EXCEPT_PERSONALITY arm_asm_emit_except_personality
631
632 #endif /* ARM_UNWIND_INFO */
633
634 #undef TARGET_ASM_INIT_SECTIONS
635 #define TARGET_ASM_INIT_SECTIONS arm_asm_init_sections
636
637 #undef TARGET_DWARF_REGISTER_SPAN
638 #define TARGET_DWARF_REGISTER_SPAN arm_dwarf_register_span
639
640 #undef TARGET_CANNOT_COPY_INSN_P
641 #define TARGET_CANNOT_COPY_INSN_P arm_cannot_copy_insn_p
642
643 #ifdef HAVE_AS_TLS
644 #undef TARGET_HAVE_TLS
645 #define TARGET_HAVE_TLS true
646 #endif
647
648 #undef TARGET_HAVE_CONDITIONAL_EXECUTION
649 #define TARGET_HAVE_CONDITIONAL_EXECUTION arm_have_conditional_execution
650
651 #undef TARGET_LEGITIMATE_CONSTANT_P
652 #define TARGET_LEGITIMATE_CONSTANT_P arm_legitimate_constant_p
653
654 #undef TARGET_CANNOT_FORCE_CONST_MEM
655 #define TARGET_CANNOT_FORCE_CONST_MEM arm_cannot_force_const_mem
656
657 #undef TARGET_MAX_ANCHOR_OFFSET
658 #define TARGET_MAX_ANCHOR_OFFSET 4095
659
660 /* The minimum is set such that the total size of the block
661 for a particular anchor is -4088 + 1 + 4095 bytes, which is
662 divisible by eight, ensuring natural spacing of anchors. */
663 #undef TARGET_MIN_ANCHOR_OFFSET
664 #define TARGET_MIN_ANCHOR_OFFSET -4088
665
666 #undef TARGET_SCHED_ISSUE_RATE
667 #define TARGET_SCHED_ISSUE_RATE arm_issue_rate
668
669 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD
670 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD \
671 arm_first_cycle_multipass_dfa_lookahead
672
673 #undef TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD
674 #define TARGET_SCHED_FIRST_CYCLE_MULTIPASS_DFA_LOOKAHEAD_GUARD \
675 arm_first_cycle_multipass_dfa_lookahead_guard
676
677 #undef TARGET_MANGLE_TYPE
678 #define TARGET_MANGLE_TYPE arm_mangle_type
679
680 #undef TARGET_ATOMIC_ASSIGN_EXPAND_FENV
681 #define TARGET_ATOMIC_ASSIGN_EXPAND_FENV arm_atomic_assign_expand_fenv
682
683 #undef TARGET_BUILD_BUILTIN_VA_LIST
684 #define TARGET_BUILD_BUILTIN_VA_LIST arm_build_builtin_va_list
685 #undef TARGET_EXPAND_BUILTIN_VA_START
686 #define TARGET_EXPAND_BUILTIN_VA_START arm_expand_builtin_va_start
687 #undef TARGET_GIMPLIFY_VA_ARG_EXPR
688 #define TARGET_GIMPLIFY_VA_ARG_EXPR arm_gimplify_va_arg_expr
689
690 #ifdef HAVE_AS_TLS
691 #undef TARGET_ASM_OUTPUT_DWARF_DTPREL
692 #define TARGET_ASM_OUTPUT_DWARF_DTPREL arm_output_dwarf_dtprel
693 #endif
694
695 #undef TARGET_LEGITIMATE_ADDRESS_P
696 #define TARGET_LEGITIMATE_ADDRESS_P arm_legitimate_address_p
697
698 #undef TARGET_PREFERRED_RELOAD_CLASS
699 #define TARGET_PREFERRED_RELOAD_CLASS arm_preferred_reload_class
700
701 #undef TARGET_PROMOTED_TYPE
702 #define TARGET_PROMOTED_TYPE arm_promoted_type
703
704 #undef TARGET_SCALAR_MODE_SUPPORTED_P
705 #define TARGET_SCALAR_MODE_SUPPORTED_P arm_scalar_mode_supported_p
706
707 #undef TARGET_COMPUTE_FRAME_LAYOUT
708 #define TARGET_COMPUTE_FRAME_LAYOUT arm_compute_frame_layout
709
710 #undef TARGET_FRAME_POINTER_REQUIRED
711 #define TARGET_FRAME_POINTER_REQUIRED arm_frame_pointer_required
712
713 #undef TARGET_CAN_ELIMINATE
714 #define TARGET_CAN_ELIMINATE arm_can_eliminate
715
716 #undef TARGET_CONDITIONAL_REGISTER_USAGE
717 #define TARGET_CONDITIONAL_REGISTER_USAGE arm_conditional_register_usage
718
719 #undef TARGET_CLASS_LIKELY_SPILLED_P
720 #define TARGET_CLASS_LIKELY_SPILLED_P arm_class_likely_spilled_p
721
722 #undef TARGET_VECTORIZE_BUILTINS
723 #define TARGET_VECTORIZE_BUILTINS
724
725 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION
726 #define TARGET_VECTORIZE_BUILTIN_VECTORIZED_FUNCTION \
727 arm_builtin_vectorized_function
728
729 #undef TARGET_VECTOR_ALIGNMENT
730 #define TARGET_VECTOR_ALIGNMENT arm_vector_alignment
731
732 #undef TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE
733 #define TARGET_VECTORIZE_VECTOR_ALIGNMENT_REACHABLE \
734 arm_vector_alignment_reachable
735
736 #undef TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT
737 #define TARGET_VECTORIZE_SUPPORT_VECTOR_MISALIGNMENT \
738 arm_builtin_support_vector_misalignment
739
740 #undef TARGET_PREFERRED_RENAME_CLASS
741 #define TARGET_PREFERRED_RENAME_CLASS \
742 arm_preferred_rename_class
743
744 #undef TARGET_VECTORIZE_VEC_PERM_CONST
745 #define TARGET_VECTORIZE_VEC_PERM_CONST arm_vectorize_vec_perm_const
746
747 #undef TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST
748 #define TARGET_VECTORIZE_BUILTIN_VECTORIZATION_COST \
749 arm_builtin_vectorization_cost
750 #undef TARGET_VECTORIZE_ADD_STMT_COST
751 #define TARGET_VECTORIZE_ADD_STMT_COST arm_add_stmt_cost
752
753 #undef TARGET_CANONICALIZE_COMPARISON
754 #define TARGET_CANONICALIZE_COMPARISON \
755 arm_canonicalize_comparison
756
757 #undef TARGET_ASAN_SHADOW_OFFSET
758 #define TARGET_ASAN_SHADOW_OFFSET arm_asan_shadow_offset
759
760 #undef MAX_INSN_PER_IT_BLOCK
761 #define MAX_INSN_PER_IT_BLOCK (arm_restrict_it ? 1 : 4)
762
763 #undef TARGET_CAN_USE_DOLOOP_P
764 #define TARGET_CAN_USE_DOLOOP_P can_use_doloop_if_innermost
765
766 #undef TARGET_CONST_NOT_OK_FOR_DEBUG_P
767 #define TARGET_CONST_NOT_OK_FOR_DEBUG_P arm_const_not_ok_for_debug_p
768
769 #undef TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS
770 #define TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS true
771
772 #undef TARGET_SCHED_FUSION_PRIORITY
773 #define TARGET_SCHED_FUSION_PRIORITY arm_sched_fusion_priority
774
775 #undef TARGET_ASM_FUNCTION_SECTION
776 #define TARGET_ASM_FUNCTION_SECTION arm_function_section
777
778 #undef TARGET_ASM_ELF_FLAGS_NUMERIC
779 #define TARGET_ASM_ELF_FLAGS_NUMERIC arm_asm_elf_flags_numeric
780
781 #undef TARGET_SECTION_TYPE_FLAGS
782 #define TARGET_SECTION_TYPE_FLAGS arm_elf_section_type_flags
783
784 #undef TARGET_EXPAND_DIVMOD_LIBFUNC
785 #define TARGET_EXPAND_DIVMOD_LIBFUNC arm_expand_divmod_libfunc
786
787 #undef TARGET_C_EXCESS_PRECISION
788 #define TARGET_C_EXCESS_PRECISION arm_excess_precision
789
790 /* Although the architecture reserves bits 0 and 1, only the former is
791 used for ARM/Thumb ISA selection in v7 and earlier versions. */
792 #undef TARGET_CUSTOM_FUNCTION_DESCRIPTORS
793 #define TARGET_CUSTOM_FUNCTION_DESCRIPTORS 2
794
795 #undef TARGET_FIXED_CONDITION_CODE_REGS
796 #define TARGET_FIXED_CONDITION_CODE_REGS arm_fixed_condition_code_regs
797
798 #undef TARGET_HARD_REGNO_NREGS
799 #define TARGET_HARD_REGNO_NREGS arm_hard_regno_nregs
800 #undef TARGET_HARD_REGNO_MODE_OK
801 #define TARGET_HARD_REGNO_MODE_OK arm_hard_regno_mode_ok
802
803 #undef TARGET_MODES_TIEABLE_P
804 #define TARGET_MODES_TIEABLE_P arm_modes_tieable_p
805
806 #undef TARGET_CAN_CHANGE_MODE_CLASS
807 #define TARGET_CAN_CHANGE_MODE_CLASS arm_can_change_mode_class
808
809 #undef TARGET_CONSTANT_ALIGNMENT
810 #define TARGET_CONSTANT_ALIGNMENT arm_constant_alignment
811 \f
812 /* Obstack for minipool constant handling. */
813 static struct obstack minipool_obstack;
814 static char * minipool_startobj;
815
816 /* The maximum number of insns skipped which
817 will be conditionalised if possible. */
818 static int max_insns_skipped = 5;
819
820 extern FILE * asm_out_file;
821
822 /* True if we are currently building a constant table. */
823 int making_const_table;
824
825 /* The processor for which instructions should be scheduled. */
826 enum processor_type arm_tune = TARGET_CPU_arm_none;
827
828 /* The current tuning set. */
829 const struct tune_params *current_tune;
830
831 /* Which floating point hardware to schedule for. */
832 int arm_fpu_attr;
833
834 /* Used for Thumb call_via trampolines. */
835 rtx thumb_call_via_label[14];
836 static int thumb_call_reg_needed;
837
838 /* The bits in this mask specify which instruction scheduling options should
839 be used. */
840 unsigned int tune_flags = 0;
841
842 /* The highest ARM architecture version supported by the
843 target. */
844 enum base_architecture arm_base_arch = BASE_ARCH_0;
845
846 /* Active target architecture and tuning. */
847
848 struct arm_build_target arm_active_target;
849
850 /* The following are used in the arm.md file as equivalents to bits
851 in the above two flag variables. */
852
853 /* Nonzero if this chip supports the ARM Architecture 4 extensions. */
854 int arm_arch4 = 0;
855
856 /* Nonzero if this chip supports the ARM Architecture 4t extensions. */
857 int arm_arch4t = 0;
858
859 /* Nonzero if this chip supports the ARM Architecture 5T extensions. */
860 int arm_arch5t = 0;
861
862 /* Nonzero if this chip supports the ARM Architecture 5TE extensions. */
863 int arm_arch5te = 0;
864
865 /* Nonzero if this chip supports the ARM Architecture 6 extensions. */
866 int arm_arch6 = 0;
867
868 /* Nonzero if this chip supports the ARM 6K extensions. */
869 int arm_arch6k = 0;
870
871 /* Nonzero if this chip supports the ARM 6KZ extensions. */
872 int arm_arch6kz = 0;
873
874 /* Nonzero if instructions present in ARMv6-M can be used. */
875 int arm_arch6m = 0;
876
877 /* Nonzero if this chip supports the ARM 7 extensions. */
878 int arm_arch7 = 0;
879
880 /* Nonzero if this chip supports the Large Physical Address Extension. */
881 int arm_arch_lpae = 0;
882
883 /* Nonzero if instructions not present in the 'M' profile can be used. */
884 int arm_arch_notm = 0;
885
886 /* Nonzero if instructions present in ARMv7E-M can be used. */
887 int arm_arch7em = 0;
888
889 /* Nonzero if instructions present in ARMv8 can be used. */
890 int arm_arch8 = 0;
891
892 /* Nonzero if this chip supports the ARMv8.1 extensions. */
893 int arm_arch8_1 = 0;
894
895 /* Nonzero if this chip supports the ARM Architecture 8.2 extensions. */
896 int arm_arch8_2 = 0;
897
898 /* Nonzero if this chip supports the FP16 instructions extension of ARM
899 Architecture 8.2. */
900 int arm_fp16_inst = 0;
901
902 /* Nonzero if this chip can benefit from load scheduling. */
903 int arm_ld_sched = 0;
904
905 /* Nonzero if this chip is a StrongARM. */
906 int arm_tune_strongarm = 0;
907
908 /* Nonzero if this chip supports Intel Wireless MMX technology. */
909 int arm_arch_iwmmxt = 0;
910
911 /* Nonzero if this chip supports Intel Wireless MMX2 technology. */
912 int arm_arch_iwmmxt2 = 0;
913
914 /* Nonzero if this chip is an XScale. */
915 int arm_arch_xscale = 0;
916
917 /* Nonzero if tuning for XScale */
918 int arm_tune_xscale = 0;
919
920 /* Nonzero if we want to tune for stores that access the write-buffer.
921 This typically means an ARM6 or ARM7 with MMU or MPU. */
922 int arm_tune_wbuf = 0;
923
924 /* Nonzero if tuning for Cortex-A9. */
925 int arm_tune_cortex_a9 = 0;
926
927 /* Nonzero if we should define __THUMB_INTERWORK__ in the
928 preprocessor.
929 XXX This is a bit of a hack, it's intended to help work around
930 problems in GLD which doesn't understand that armv5t code is
931 interworking clean. */
932 int arm_cpp_interwork = 0;
933
934 /* Nonzero if chip supports Thumb 1. */
935 int arm_arch_thumb1;
936
937 /* Nonzero if chip supports Thumb 2. */
938 int arm_arch_thumb2;
939
940 /* Nonzero if chip supports integer division instruction. */
941 int arm_arch_arm_hwdiv;
942 int arm_arch_thumb_hwdiv;
943
944 /* Nonzero if chip disallows volatile memory access in IT block. */
945 int arm_arch_no_volatile_ce;
946
947 /* Nonzero if we should use Neon to handle 64-bits operations rather
948 than core registers. */
949 int prefer_neon_for_64bits = 0;
950
951 /* Nonzero if we shouldn't use literal pools. */
952 bool arm_disable_literal_pool = false;
953
954 /* The register number to be used for the PIC offset register. */
955 unsigned arm_pic_register = INVALID_REGNUM;
956
957 enum arm_pcs arm_pcs_default;
958
959 /* For an explanation of these variables, see final_prescan_insn below. */
960 int arm_ccfsm_state;
961 /* arm_current_cc is also used for Thumb-2 cond_exec blocks. */
962 enum arm_cond_code arm_current_cc;
963
964 rtx arm_target_insn;
965 int arm_target_label;
966 /* The number of conditionally executed insns, including the current insn. */
967 int arm_condexec_count = 0;
968 /* A bitmask specifying the patterns for the IT block.
969 Zero means do not output an IT block before this insn. */
970 int arm_condexec_mask = 0;
971 /* The number of bits used in arm_condexec_mask. */
972 int arm_condexec_masklen = 0;
973
974 /* Nonzero if chip supports the ARMv8 CRC instructions. */
975 int arm_arch_crc = 0;
976
977 /* Nonzero if chip supports the AdvSIMD Dot Product instructions. */
978 int arm_arch_dotprod = 0;
979
980 /* Nonzero if chip supports the ARMv8-M security extensions. */
981 int arm_arch_cmse = 0;
982
983 /* Nonzero if the core has a very small, high-latency, multiply unit. */
984 int arm_m_profile_small_mul = 0;
985
986 /* The condition codes of the ARM, and the inverse function. */
987 static const char * const arm_condition_codes[] =
988 {
989 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
990 "hi", "ls", "ge", "lt", "gt", "le", "al", "nv"
991 };
992
993 /* The register numbers in sequence, for passing to arm_gen_load_multiple. */
994 int arm_regs_in_sequence[] =
995 {
996 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15
997 };
998
999 #define ARM_LSL_NAME "lsl"
1000 #define streq(string1, string2) (strcmp (string1, string2) == 0)
1001
1002 #define THUMB2_WORK_REGS (0xff & ~( (1 << THUMB_HARD_FRAME_POINTER_REGNUM) \
1003 | (1 << SP_REGNUM) | (1 << PC_REGNUM) \
1004 | (1 << PIC_OFFSET_TABLE_REGNUM)))
1005 \f
1006 /* Initialization code. */
1007
1008 struct cpu_tune
1009 {
1010 enum processor_type scheduler;
1011 unsigned int tune_flags;
1012 const struct tune_params *tune;
1013 };
1014
1015 #define ARM_PREFETCH_NOT_BENEFICIAL { 0, -1, -1 }
1016 #define ARM_PREFETCH_BENEFICIAL(num_slots,l1_size,l1_line_size) \
1017 { \
1018 num_slots, \
1019 l1_size, \
1020 l1_line_size \
1021 }
1022
1023 /* arm generic vectorizer costs. */
1024 static const
1025 struct cpu_vec_costs arm_default_vec_cost = {
1026 1, /* scalar_stmt_cost. */
1027 1, /* scalar load_cost. */
1028 1, /* scalar_store_cost. */
1029 1, /* vec_stmt_cost. */
1030 1, /* vec_to_scalar_cost. */
1031 1, /* scalar_to_vec_cost. */
1032 1, /* vec_align_load_cost. */
1033 1, /* vec_unalign_load_cost. */
1034 1, /* vec_unalign_store_cost. */
1035 1, /* vec_store_cost. */
1036 3, /* cond_taken_branch_cost. */
1037 1, /* cond_not_taken_branch_cost. */
1038 };
1039
1040 /* Cost tables for AArch32 + AArch64 cores should go in aarch-cost-tables.h */
1041 #include "aarch-cost-tables.h"
1042
1043
1044
1045 const struct cpu_cost_table cortexa9_extra_costs =
1046 {
1047 /* ALU */
1048 {
1049 0, /* arith. */
1050 0, /* logical. */
1051 0, /* shift. */
1052 COSTS_N_INSNS (1), /* shift_reg. */
1053 COSTS_N_INSNS (1), /* arith_shift. */
1054 COSTS_N_INSNS (2), /* arith_shift_reg. */
1055 0, /* log_shift. */
1056 COSTS_N_INSNS (1), /* log_shift_reg. */
1057 COSTS_N_INSNS (1), /* extend. */
1058 COSTS_N_INSNS (2), /* extend_arith. */
1059 COSTS_N_INSNS (1), /* bfi. */
1060 COSTS_N_INSNS (1), /* bfx. */
1061 0, /* clz. */
1062 0, /* rev. */
1063 0, /* non_exec. */
1064 true /* non_exec_costs_exec. */
1065 },
1066 {
1067 /* MULT SImode */
1068 {
1069 COSTS_N_INSNS (3), /* simple. */
1070 COSTS_N_INSNS (3), /* flag_setting. */
1071 COSTS_N_INSNS (2), /* extend. */
1072 COSTS_N_INSNS (3), /* add. */
1073 COSTS_N_INSNS (2), /* extend_add. */
1074 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A9. */
1075 },
1076 /* MULT DImode */
1077 {
1078 0, /* simple (N/A). */
1079 0, /* flag_setting (N/A). */
1080 COSTS_N_INSNS (4), /* extend. */
1081 0, /* add (N/A). */
1082 COSTS_N_INSNS (4), /* extend_add. */
1083 0 /* idiv (N/A). */
1084 }
1085 },
1086 /* LD/ST */
1087 {
1088 COSTS_N_INSNS (2), /* load. */
1089 COSTS_N_INSNS (2), /* load_sign_extend. */
1090 COSTS_N_INSNS (2), /* ldrd. */
1091 COSTS_N_INSNS (2), /* ldm_1st. */
1092 1, /* ldm_regs_per_insn_1st. */
1093 2, /* ldm_regs_per_insn_subsequent. */
1094 COSTS_N_INSNS (5), /* loadf. */
1095 COSTS_N_INSNS (5), /* loadd. */
1096 COSTS_N_INSNS (1), /* load_unaligned. */
1097 COSTS_N_INSNS (2), /* store. */
1098 COSTS_N_INSNS (2), /* strd. */
1099 COSTS_N_INSNS (2), /* stm_1st. */
1100 1, /* stm_regs_per_insn_1st. */
1101 2, /* stm_regs_per_insn_subsequent. */
1102 COSTS_N_INSNS (1), /* storef. */
1103 COSTS_N_INSNS (1), /* stored. */
1104 COSTS_N_INSNS (1), /* store_unaligned. */
1105 COSTS_N_INSNS (1), /* loadv. */
1106 COSTS_N_INSNS (1) /* storev. */
1107 },
1108 {
1109 /* FP SFmode */
1110 {
1111 COSTS_N_INSNS (14), /* div. */
1112 COSTS_N_INSNS (4), /* mult. */
1113 COSTS_N_INSNS (7), /* mult_addsub. */
1114 COSTS_N_INSNS (30), /* fma. */
1115 COSTS_N_INSNS (3), /* addsub. */
1116 COSTS_N_INSNS (1), /* fpconst. */
1117 COSTS_N_INSNS (1), /* neg. */
1118 COSTS_N_INSNS (3), /* compare. */
1119 COSTS_N_INSNS (3), /* widen. */
1120 COSTS_N_INSNS (3), /* narrow. */
1121 COSTS_N_INSNS (3), /* toint. */
1122 COSTS_N_INSNS (3), /* fromint. */
1123 COSTS_N_INSNS (3) /* roundint. */
1124 },
1125 /* FP DFmode */
1126 {
1127 COSTS_N_INSNS (24), /* div. */
1128 COSTS_N_INSNS (5), /* mult. */
1129 COSTS_N_INSNS (8), /* mult_addsub. */
1130 COSTS_N_INSNS (30), /* fma. */
1131 COSTS_N_INSNS (3), /* addsub. */
1132 COSTS_N_INSNS (1), /* fpconst. */
1133 COSTS_N_INSNS (1), /* neg. */
1134 COSTS_N_INSNS (3), /* compare. */
1135 COSTS_N_INSNS (3), /* widen. */
1136 COSTS_N_INSNS (3), /* narrow. */
1137 COSTS_N_INSNS (3), /* toint. */
1138 COSTS_N_INSNS (3), /* fromint. */
1139 COSTS_N_INSNS (3) /* roundint. */
1140 }
1141 },
1142 /* Vector */
1143 {
1144 COSTS_N_INSNS (1) /* alu. */
1145 }
1146 };
1147
1148 const struct cpu_cost_table cortexa8_extra_costs =
1149 {
1150 /* ALU */
1151 {
1152 0, /* arith. */
1153 0, /* logical. */
1154 COSTS_N_INSNS (1), /* shift. */
1155 0, /* shift_reg. */
1156 COSTS_N_INSNS (1), /* arith_shift. */
1157 0, /* arith_shift_reg. */
1158 COSTS_N_INSNS (1), /* log_shift. */
1159 0, /* log_shift_reg. */
1160 0, /* extend. */
1161 0, /* extend_arith. */
1162 0, /* bfi. */
1163 0, /* bfx. */
1164 0, /* clz. */
1165 0, /* rev. */
1166 0, /* non_exec. */
1167 true /* non_exec_costs_exec. */
1168 },
1169 {
1170 /* MULT SImode */
1171 {
1172 COSTS_N_INSNS (1), /* simple. */
1173 COSTS_N_INSNS (1), /* flag_setting. */
1174 COSTS_N_INSNS (1), /* extend. */
1175 COSTS_N_INSNS (1), /* add. */
1176 COSTS_N_INSNS (1), /* extend_add. */
1177 COSTS_N_INSNS (30) /* idiv. No HW div on Cortex A8. */
1178 },
1179 /* MULT DImode */
1180 {
1181 0, /* simple (N/A). */
1182 0, /* flag_setting (N/A). */
1183 COSTS_N_INSNS (2), /* extend. */
1184 0, /* add (N/A). */
1185 COSTS_N_INSNS (2), /* extend_add. */
1186 0 /* idiv (N/A). */
1187 }
1188 },
1189 /* LD/ST */
1190 {
1191 COSTS_N_INSNS (1), /* load. */
1192 COSTS_N_INSNS (1), /* load_sign_extend. */
1193 COSTS_N_INSNS (1), /* ldrd. */
1194 COSTS_N_INSNS (1), /* ldm_1st. */
1195 1, /* ldm_regs_per_insn_1st. */
1196 2, /* ldm_regs_per_insn_subsequent. */
1197 COSTS_N_INSNS (1), /* loadf. */
1198 COSTS_N_INSNS (1), /* loadd. */
1199 COSTS_N_INSNS (1), /* load_unaligned. */
1200 COSTS_N_INSNS (1), /* store. */
1201 COSTS_N_INSNS (1), /* strd. */
1202 COSTS_N_INSNS (1), /* stm_1st. */
1203 1, /* stm_regs_per_insn_1st. */
1204 2, /* stm_regs_per_insn_subsequent. */
1205 COSTS_N_INSNS (1), /* storef. */
1206 COSTS_N_INSNS (1), /* stored. */
1207 COSTS_N_INSNS (1), /* store_unaligned. */
1208 COSTS_N_INSNS (1), /* loadv. */
1209 COSTS_N_INSNS (1) /* storev. */
1210 },
1211 {
1212 /* FP SFmode */
1213 {
1214 COSTS_N_INSNS (36), /* div. */
1215 COSTS_N_INSNS (11), /* mult. */
1216 COSTS_N_INSNS (20), /* mult_addsub. */
1217 COSTS_N_INSNS (30), /* fma. */
1218 COSTS_N_INSNS (9), /* addsub. */
1219 COSTS_N_INSNS (3), /* fpconst. */
1220 COSTS_N_INSNS (3), /* neg. */
1221 COSTS_N_INSNS (6), /* compare. */
1222 COSTS_N_INSNS (4), /* widen. */
1223 COSTS_N_INSNS (4), /* narrow. */
1224 COSTS_N_INSNS (8), /* toint. */
1225 COSTS_N_INSNS (8), /* fromint. */
1226 COSTS_N_INSNS (8) /* roundint. */
1227 },
1228 /* FP DFmode */
1229 {
1230 COSTS_N_INSNS (64), /* div. */
1231 COSTS_N_INSNS (16), /* mult. */
1232 COSTS_N_INSNS (25), /* mult_addsub. */
1233 COSTS_N_INSNS (30), /* fma. */
1234 COSTS_N_INSNS (9), /* addsub. */
1235 COSTS_N_INSNS (3), /* fpconst. */
1236 COSTS_N_INSNS (3), /* neg. */
1237 COSTS_N_INSNS (6), /* compare. */
1238 COSTS_N_INSNS (6), /* widen. */
1239 COSTS_N_INSNS (6), /* narrow. */
1240 COSTS_N_INSNS (8), /* toint. */
1241 COSTS_N_INSNS (8), /* fromint. */
1242 COSTS_N_INSNS (8) /* roundint. */
1243 }
1244 },
1245 /* Vector */
1246 {
1247 COSTS_N_INSNS (1) /* alu. */
1248 }
1249 };
1250
1251 const struct cpu_cost_table cortexa5_extra_costs =
1252 {
1253 /* ALU */
1254 {
1255 0, /* arith. */
1256 0, /* logical. */
1257 COSTS_N_INSNS (1), /* shift. */
1258 COSTS_N_INSNS (1), /* shift_reg. */
1259 COSTS_N_INSNS (1), /* arith_shift. */
1260 COSTS_N_INSNS (1), /* arith_shift_reg. */
1261 COSTS_N_INSNS (1), /* log_shift. */
1262 COSTS_N_INSNS (1), /* log_shift_reg. */
1263 COSTS_N_INSNS (1), /* extend. */
1264 COSTS_N_INSNS (1), /* extend_arith. */
1265 COSTS_N_INSNS (1), /* bfi. */
1266 COSTS_N_INSNS (1), /* bfx. */
1267 COSTS_N_INSNS (1), /* clz. */
1268 COSTS_N_INSNS (1), /* rev. */
1269 0, /* non_exec. */
1270 true /* non_exec_costs_exec. */
1271 },
1272
1273 {
1274 /* MULT SImode */
1275 {
1276 0, /* simple. */
1277 COSTS_N_INSNS (1), /* flag_setting. */
1278 COSTS_N_INSNS (1), /* extend. */
1279 COSTS_N_INSNS (1), /* add. */
1280 COSTS_N_INSNS (1), /* extend_add. */
1281 COSTS_N_INSNS (7) /* idiv. */
1282 },
1283 /* MULT DImode */
1284 {
1285 0, /* simple (N/A). */
1286 0, /* flag_setting (N/A). */
1287 COSTS_N_INSNS (1), /* extend. */
1288 0, /* add. */
1289 COSTS_N_INSNS (2), /* extend_add. */
1290 0 /* idiv (N/A). */
1291 }
1292 },
1293 /* LD/ST */
1294 {
1295 COSTS_N_INSNS (1), /* load. */
1296 COSTS_N_INSNS (1), /* load_sign_extend. */
1297 COSTS_N_INSNS (6), /* ldrd. */
1298 COSTS_N_INSNS (1), /* ldm_1st. */
1299 1, /* ldm_regs_per_insn_1st. */
1300 2, /* ldm_regs_per_insn_subsequent. */
1301 COSTS_N_INSNS (2), /* loadf. */
1302 COSTS_N_INSNS (4), /* loadd. */
1303 COSTS_N_INSNS (1), /* load_unaligned. */
1304 COSTS_N_INSNS (1), /* store. */
1305 COSTS_N_INSNS (3), /* strd. */
1306 COSTS_N_INSNS (1), /* stm_1st. */
1307 1, /* stm_regs_per_insn_1st. */
1308 2, /* stm_regs_per_insn_subsequent. */
1309 COSTS_N_INSNS (2), /* storef. */
1310 COSTS_N_INSNS (2), /* stored. */
1311 COSTS_N_INSNS (1), /* store_unaligned. */
1312 COSTS_N_INSNS (1), /* loadv. */
1313 COSTS_N_INSNS (1) /* storev. */
1314 },
1315 {
1316 /* FP SFmode */
1317 {
1318 COSTS_N_INSNS (15), /* div. */
1319 COSTS_N_INSNS (3), /* mult. */
1320 COSTS_N_INSNS (7), /* mult_addsub. */
1321 COSTS_N_INSNS (7), /* fma. */
1322 COSTS_N_INSNS (3), /* addsub. */
1323 COSTS_N_INSNS (3), /* fpconst. */
1324 COSTS_N_INSNS (3), /* neg. */
1325 COSTS_N_INSNS (3), /* compare. */
1326 COSTS_N_INSNS (3), /* widen. */
1327 COSTS_N_INSNS (3), /* narrow. */
1328 COSTS_N_INSNS (3), /* toint. */
1329 COSTS_N_INSNS (3), /* fromint. */
1330 COSTS_N_INSNS (3) /* roundint. */
1331 },
1332 /* FP DFmode */
1333 {
1334 COSTS_N_INSNS (30), /* div. */
1335 COSTS_N_INSNS (6), /* mult. */
1336 COSTS_N_INSNS (10), /* mult_addsub. */
1337 COSTS_N_INSNS (7), /* fma. */
1338 COSTS_N_INSNS (3), /* addsub. */
1339 COSTS_N_INSNS (3), /* fpconst. */
1340 COSTS_N_INSNS (3), /* neg. */
1341 COSTS_N_INSNS (3), /* compare. */
1342 COSTS_N_INSNS (3), /* widen. */
1343 COSTS_N_INSNS (3), /* narrow. */
1344 COSTS_N_INSNS (3), /* toint. */
1345 COSTS_N_INSNS (3), /* fromint. */
1346 COSTS_N_INSNS (3) /* roundint. */
1347 }
1348 },
1349 /* Vector */
1350 {
1351 COSTS_N_INSNS (1) /* alu. */
1352 }
1353 };
1354
1355
1356 const struct cpu_cost_table cortexa7_extra_costs =
1357 {
1358 /* ALU */
1359 {
1360 0, /* arith. */
1361 0, /* logical. */
1362 COSTS_N_INSNS (1), /* shift. */
1363 COSTS_N_INSNS (1), /* shift_reg. */
1364 COSTS_N_INSNS (1), /* arith_shift. */
1365 COSTS_N_INSNS (1), /* arith_shift_reg. */
1366 COSTS_N_INSNS (1), /* log_shift. */
1367 COSTS_N_INSNS (1), /* log_shift_reg. */
1368 COSTS_N_INSNS (1), /* extend. */
1369 COSTS_N_INSNS (1), /* extend_arith. */
1370 COSTS_N_INSNS (1), /* bfi. */
1371 COSTS_N_INSNS (1), /* bfx. */
1372 COSTS_N_INSNS (1), /* clz. */
1373 COSTS_N_INSNS (1), /* rev. */
1374 0, /* non_exec. */
1375 true /* non_exec_costs_exec. */
1376 },
1377
1378 {
1379 /* MULT SImode */
1380 {
1381 0, /* simple. */
1382 COSTS_N_INSNS (1), /* flag_setting. */
1383 COSTS_N_INSNS (1), /* extend. */
1384 COSTS_N_INSNS (1), /* add. */
1385 COSTS_N_INSNS (1), /* extend_add. */
1386 COSTS_N_INSNS (7) /* idiv. */
1387 },
1388 /* MULT DImode */
1389 {
1390 0, /* simple (N/A). */
1391 0, /* flag_setting (N/A). */
1392 COSTS_N_INSNS (1), /* extend. */
1393 0, /* add. */
1394 COSTS_N_INSNS (2), /* extend_add. */
1395 0 /* idiv (N/A). */
1396 }
1397 },
1398 /* LD/ST */
1399 {
1400 COSTS_N_INSNS (1), /* load. */
1401 COSTS_N_INSNS (1), /* load_sign_extend. */
1402 COSTS_N_INSNS (3), /* ldrd. */
1403 COSTS_N_INSNS (1), /* ldm_1st. */
1404 1, /* ldm_regs_per_insn_1st. */
1405 2, /* ldm_regs_per_insn_subsequent. */
1406 COSTS_N_INSNS (2), /* loadf. */
1407 COSTS_N_INSNS (2), /* loadd. */
1408 COSTS_N_INSNS (1), /* load_unaligned. */
1409 COSTS_N_INSNS (1), /* store. */
1410 COSTS_N_INSNS (3), /* strd. */
1411 COSTS_N_INSNS (1), /* stm_1st. */
1412 1, /* stm_regs_per_insn_1st. */
1413 2, /* stm_regs_per_insn_subsequent. */
1414 COSTS_N_INSNS (2), /* storef. */
1415 COSTS_N_INSNS (2), /* stored. */
1416 COSTS_N_INSNS (1), /* store_unaligned. */
1417 COSTS_N_INSNS (1), /* loadv. */
1418 COSTS_N_INSNS (1) /* storev. */
1419 },
1420 {
1421 /* FP SFmode */
1422 {
1423 COSTS_N_INSNS (15), /* div. */
1424 COSTS_N_INSNS (3), /* mult. */
1425 COSTS_N_INSNS (7), /* mult_addsub. */
1426 COSTS_N_INSNS (7), /* fma. */
1427 COSTS_N_INSNS (3), /* addsub. */
1428 COSTS_N_INSNS (3), /* fpconst. */
1429 COSTS_N_INSNS (3), /* neg. */
1430 COSTS_N_INSNS (3), /* compare. */
1431 COSTS_N_INSNS (3), /* widen. */
1432 COSTS_N_INSNS (3), /* narrow. */
1433 COSTS_N_INSNS (3), /* toint. */
1434 COSTS_N_INSNS (3), /* fromint. */
1435 COSTS_N_INSNS (3) /* roundint. */
1436 },
1437 /* FP DFmode */
1438 {
1439 COSTS_N_INSNS (30), /* div. */
1440 COSTS_N_INSNS (6), /* mult. */
1441 COSTS_N_INSNS (10), /* mult_addsub. */
1442 COSTS_N_INSNS (7), /* fma. */
1443 COSTS_N_INSNS (3), /* addsub. */
1444 COSTS_N_INSNS (3), /* fpconst. */
1445 COSTS_N_INSNS (3), /* neg. */
1446 COSTS_N_INSNS (3), /* compare. */
1447 COSTS_N_INSNS (3), /* widen. */
1448 COSTS_N_INSNS (3), /* narrow. */
1449 COSTS_N_INSNS (3), /* toint. */
1450 COSTS_N_INSNS (3), /* fromint. */
1451 COSTS_N_INSNS (3) /* roundint. */
1452 }
1453 },
1454 /* Vector */
1455 {
1456 COSTS_N_INSNS (1) /* alu. */
1457 }
1458 };
1459
1460 const struct cpu_cost_table cortexa12_extra_costs =
1461 {
1462 /* ALU */
1463 {
1464 0, /* arith. */
1465 0, /* logical. */
1466 0, /* shift. */
1467 COSTS_N_INSNS (1), /* shift_reg. */
1468 COSTS_N_INSNS (1), /* arith_shift. */
1469 COSTS_N_INSNS (1), /* arith_shift_reg. */
1470 COSTS_N_INSNS (1), /* log_shift. */
1471 COSTS_N_INSNS (1), /* log_shift_reg. */
1472 0, /* extend. */
1473 COSTS_N_INSNS (1), /* extend_arith. */
1474 0, /* bfi. */
1475 COSTS_N_INSNS (1), /* bfx. */
1476 COSTS_N_INSNS (1), /* clz. */
1477 COSTS_N_INSNS (1), /* rev. */
1478 0, /* non_exec. */
1479 true /* non_exec_costs_exec. */
1480 },
1481 /* MULT SImode */
1482 {
1483 {
1484 COSTS_N_INSNS (2), /* simple. */
1485 COSTS_N_INSNS (3), /* flag_setting. */
1486 COSTS_N_INSNS (2), /* extend. */
1487 COSTS_N_INSNS (3), /* add. */
1488 COSTS_N_INSNS (2), /* extend_add. */
1489 COSTS_N_INSNS (18) /* idiv. */
1490 },
1491 /* MULT DImode */
1492 {
1493 0, /* simple (N/A). */
1494 0, /* flag_setting (N/A). */
1495 COSTS_N_INSNS (3), /* extend. */
1496 0, /* add (N/A). */
1497 COSTS_N_INSNS (3), /* extend_add. */
1498 0 /* idiv (N/A). */
1499 }
1500 },
1501 /* LD/ST */
1502 {
1503 COSTS_N_INSNS (3), /* load. */
1504 COSTS_N_INSNS (3), /* load_sign_extend. */
1505 COSTS_N_INSNS (3), /* ldrd. */
1506 COSTS_N_INSNS (3), /* ldm_1st. */
1507 1, /* ldm_regs_per_insn_1st. */
1508 2, /* ldm_regs_per_insn_subsequent. */
1509 COSTS_N_INSNS (3), /* loadf. */
1510 COSTS_N_INSNS (3), /* loadd. */
1511 0, /* load_unaligned. */
1512 0, /* store. */
1513 0, /* strd. */
1514 0, /* stm_1st. */
1515 1, /* stm_regs_per_insn_1st. */
1516 2, /* stm_regs_per_insn_subsequent. */
1517 COSTS_N_INSNS (2), /* storef. */
1518 COSTS_N_INSNS (2), /* stored. */
1519 0, /* store_unaligned. */
1520 COSTS_N_INSNS (1), /* loadv. */
1521 COSTS_N_INSNS (1) /* storev. */
1522 },
1523 {
1524 /* FP SFmode */
1525 {
1526 COSTS_N_INSNS (17), /* div. */
1527 COSTS_N_INSNS (4), /* mult. */
1528 COSTS_N_INSNS (8), /* mult_addsub. */
1529 COSTS_N_INSNS (8), /* fma. */
1530 COSTS_N_INSNS (4), /* addsub. */
1531 COSTS_N_INSNS (2), /* fpconst. */
1532 COSTS_N_INSNS (2), /* neg. */
1533 COSTS_N_INSNS (2), /* compare. */
1534 COSTS_N_INSNS (4), /* widen. */
1535 COSTS_N_INSNS (4), /* narrow. */
1536 COSTS_N_INSNS (4), /* toint. */
1537 COSTS_N_INSNS (4), /* fromint. */
1538 COSTS_N_INSNS (4) /* roundint. */
1539 },
1540 /* FP DFmode */
1541 {
1542 COSTS_N_INSNS (31), /* div. */
1543 COSTS_N_INSNS (4), /* mult. */
1544 COSTS_N_INSNS (8), /* mult_addsub. */
1545 COSTS_N_INSNS (8), /* fma. */
1546 COSTS_N_INSNS (4), /* addsub. */
1547 COSTS_N_INSNS (2), /* fpconst. */
1548 COSTS_N_INSNS (2), /* neg. */
1549 COSTS_N_INSNS (2), /* compare. */
1550 COSTS_N_INSNS (4), /* widen. */
1551 COSTS_N_INSNS (4), /* narrow. */
1552 COSTS_N_INSNS (4), /* toint. */
1553 COSTS_N_INSNS (4), /* fromint. */
1554 COSTS_N_INSNS (4) /* roundint. */
1555 }
1556 },
1557 /* Vector */
1558 {
1559 COSTS_N_INSNS (1) /* alu. */
1560 }
1561 };
1562
1563 const struct cpu_cost_table cortexa15_extra_costs =
1564 {
1565 /* ALU */
1566 {
1567 0, /* arith. */
1568 0, /* logical. */
1569 0, /* shift. */
1570 0, /* shift_reg. */
1571 COSTS_N_INSNS (1), /* arith_shift. */
1572 COSTS_N_INSNS (1), /* arith_shift_reg. */
1573 COSTS_N_INSNS (1), /* log_shift. */
1574 COSTS_N_INSNS (1), /* log_shift_reg. */
1575 0, /* extend. */
1576 COSTS_N_INSNS (1), /* extend_arith. */
1577 COSTS_N_INSNS (1), /* bfi. */
1578 0, /* bfx. */
1579 0, /* clz. */
1580 0, /* rev. */
1581 0, /* non_exec. */
1582 true /* non_exec_costs_exec. */
1583 },
1584 /* MULT SImode */
1585 {
1586 {
1587 COSTS_N_INSNS (2), /* simple. */
1588 COSTS_N_INSNS (3), /* flag_setting. */
1589 COSTS_N_INSNS (2), /* extend. */
1590 COSTS_N_INSNS (2), /* add. */
1591 COSTS_N_INSNS (2), /* extend_add. */
1592 COSTS_N_INSNS (18) /* idiv. */
1593 },
1594 /* MULT DImode */
1595 {
1596 0, /* simple (N/A). */
1597 0, /* flag_setting (N/A). */
1598 COSTS_N_INSNS (3), /* extend. */
1599 0, /* add (N/A). */
1600 COSTS_N_INSNS (3), /* extend_add. */
1601 0 /* idiv (N/A). */
1602 }
1603 },
1604 /* LD/ST */
1605 {
1606 COSTS_N_INSNS (3), /* load. */
1607 COSTS_N_INSNS (3), /* load_sign_extend. */
1608 COSTS_N_INSNS (3), /* ldrd. */
1609 COSTS_N_INSNS (4), /* ldm_1st. */
1610 1, /* ldm_regs_per_insn_1st. */
1611 2, /* ldm_regs_per_insn_subsequent. */
1612 COSTS_N_INSNS (4), /* loadf. */
1613 COSTS_N_INSNS (4), /* loadd. */
1614 0, /* load_unaligned. */
1615 0, /* store. */
1616 0, /* strd. */
1617 COSTS_N_INSNS (1), /* stm_1st. */
1618 1, /* stm_regs_per_insn_1st. */
1619 2, /* stm_regs_per_insn_subsequent. */
1620 0, /* storef. */
1621 0, /* stored. */
1622 0, /* store_unaligned. */
1623 COSTS_N_INSNS (1), /* loadv. */
1624 COSTS_N_INSNS (1) /* storev. */
1625 },
1626 {
1627 /* FP SFmode */
1628 {
1629 COSTS_N_INSNS (17), /* div. */
1630 COSTS_N_INSNS (4), /* mult. */
1631 COSTS_N_INSNS (8), /* mult_addsub. */
1632 COSTS_N_INSNS (8), /* fma. */
1633 COSTS_N_INSNS (4), /* addsub. */
1634 COSTS_N_INSNS (2), /* fpconst. */
1635 COSTS_N_INSNS (2), /* neg. */
1636 COSTS_N_INSNS (5), /* compare. */
1637 COSTS_N_INSNS (4), /* widen. */
1638 COSTS_N_INSNS (4), /* narrow. */
1639 COSTS_N_INSNS (4), /* toint. */
1640 COSTS_N_INSNS (4), /* fromint. */
1641 COSTS_N_INSNS (4) /* roundint. */
1642 },
1643 /* FP DFmode */
1644 {
1645 COSTS_N_INSNS (31), /* div. */
1646 COSTS_N_INSNS (4), /* mult. */
1647 COSTS_N_INSNS (8), /* mult_addsub. */
1648 COSTS_N_INSNS (8), /* fma. */
1649 COSTS_N_INSNS (4), /* addsub. */
1650 COSTS_N_INSNS (2), /* fpconst. */
1651 COSTS_N_INSNS (2), /* neg. */
1652 COSTS_N_INSNS (2), /* compare. */
1653 COSTS_N_INSNS (4), /* widen. */
1654 COSTS_N_INSNS (4), /* narrow. */
1655 COSTS_N_INSNS (4), /* toint. */
1656 COSTS_N_INSNS (4), /* fromint. */
1657 COSTS_N_INSNS (4) /* roundint. */
1658 }
1659 },
1660 /* Vector */
1661 {
1662 COSTS_N_INSNS (1) /* alu. */
1663 }
1664 };
1665
1666 const struct cpu_cost_table v7m_extra_costs =
1667 {
1668 /* ALU */
1669 {
1670 0, /* arith. */
1671 0, /* logical. */
1672 0, /* shift. */
1673 0, /* shift_reg. */
1674 0, /* arith_shift. */
1675 COSTS_N_INSNS (1), /* arith_shift_reg. */
1676 0, /* log_shift. */
1677 COSTS_N_INSNS (1), /* log_shift_reg. */
1678 0, /* extend. */
1679 COSTS_N_INSNS (1), /* extend_arith. */
1680 0, /* bfi. */
1681 0, /* bfx. */
1682 0, /* clz. */
1683 0, /* rev. */
1684 COSTS_N_INSNS (1), /* non_exec. */
1685 false /* non_exec_costs_exec. */
1686 },
1687 {
1688 /* MULT SImode */
1689 {
1690 COSTS_N_INSNS (1), /* simple. */
1691 COSTS_N_INSNS (1), /* flag_setting. */
1692 COSTS_N_INSNS (2), /* extend. */
1693 COSTS_N_INSNS (1), /* add. */
1694 COSTS_N_INSNS (3), /* extend_add. */
1695 COSTS_N_INSNS (8) /* idiv. */
1696 },
1697 /* MULT DImode */
1698 {
1699 0, /* simple (N/A). */
1700 0, /* flag_setting (N/A). */
1701 COSTS_N_INSNS (2), /* extend. */
1702 0, /* add (N/A). */
1703 COSTS_N_INSNS (3), /* extend_add. */
1704 0 /* idiv (N/A). */
1705 }
1706 },
1707 /* LD/ST */
1708 {
1709 COSTS_N_INSNS (2), /* load. */
1710 0, /* load_sign_extend. */
1711 COSTS_N_INSNS (3), /* ldrd. */
1712 COSTS_N_INSNS (2), /* ldm_1st. */
1713 1, /* ldm_regs_per_insn_1st. */
1714 1, /* ldm_regs_per_insn_subsequent. */
1715 COSTS_N_INSNS (2), /* loadf. */
1716 COSTS_N_INSNS (3), /* loadd. */
1717 COSTS_N_INSNS (1), /* load_unaligned. */
1718 COSTS_N_INSNS (2), /* store. */
1719 COSTS_N_INSNS (3), /* strd. */
1720 COSTS_N_INSNS (2), /* stm_1st. */
1721 1, /* stm_regs_per_insn_1st. */
1722 1, /* stm_regs_per_insn_subsequent. */
1723 COSTS_N_INSNS (2), /* storef. */
1724 COSTS_N_INSNS (3), /* stored. */
1725 COSTS_N_INSNS (1), /* store_unaligned. */
1726 COSTS_N_INSNS (1), /* loadv. */
1727 COSTS_N_INSNS (1) /* storev. */
1728 },
1729 {
1730 /* FP SFmode */
1731 {
1732 COSTS_N_INSNS (7), /* div. */
1733 COSTS_N_INSNS (2), /* mult. */
1734 COSTS_N_INSNS (5), /* mult_addsub. */
1735 COSTS_N_INSNS (3), /* fma. */
1736 COSTS_N_INSNS (1), /* addsub. */
1737 0, /* fpconst. */
1738 0, /* neg. */
1739 0, /* compare. */
1740 0, /* widen. */
1741 0, /* narrow. */
1742 0, /* toint. */
1743 0, /* fromint. */
1744 0 /* roundint. */
1745 },
1746 /* FP DFmode */
1747 {
1748 COSTS_N_INSNS (15), /* div. */
1749 COSTS_N_INSNS (5), /* mult. */
1750 COSTS_N_INSNS (7), /* mult_addsub. */
1751 COSTS_N_INSNS (7), /* fma. */
1752 COSTS_N_INSNS (3), /* addsub. */
1753 0, /* fpconst. */
1754 0, /* neg. */
1755 0, /* compare. */
1756 0, /* widen. */
1757 0, /* narrow. */
1758 0, /* toint. */
1759 0, /* fromint. */
1760 0 /* roundint. */
1761 }
1762 },
1763 /* Vector */
1764 {
1765 COSTS_N_INSNS (1) /* alu. */
1766 }
1767 };
1768
1769 const struct addr_mode_cost_table generic_addr_mode_costs =
1770 {
1771 /* int. */
1772 {
1773 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1774 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1775 COSTS_N_INSNS (0) /* AMO_WB. */
1776 },
1777 /* float. */
1778 {
1779 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1780 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1781 COSTS_N_INSNS (0) /* AMO_WB. */
1782 },
1783 /* vector. */
1784 {
1785 COSTS_N_INSNS (0), /* AMO_DEFAULT. */
1786 COSTS_N_INSNS (0), /* AMO_NO_WB. */
1787 COSTS_N_INSNS (0) /* AMO_WB. */
1788 }
1789 };
1790
1791 const struct tune_params arm_slowmul_tune =
1792 {
1793 &generic_extra_costs, /* Insn extra costs. */
1794 &generic_addr_mode_costs, /* Addressing mode costs. */
1795 NULL, /* Sched adj cost. */
1796 arm_default_branch_cost,
1797 &arm_default_vec_cost,
1798 3, /* Constant limit. */
1799 5, /* Max cond insns. */
1800 8, /* Memset max inline. */
1801 1, /* Issue rate. */
1802 ARM_PREFETCH_NOT_BENEFICIAL,
1803 tune_params::PREF_CONST_POOL_TRUE,
1804 tune_params::PREF_LDRD_FALSE,
1805 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1806 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1807 tune_params::DISPARAGE_FLAGS_NEITHER,
1808 tune_params::PREF_NEON_64_FALSE,
1809 tune_params::PREF_NEON_STRINGOPS_FALSE,
1810 tune_params::FUSE_NOTHING,
1811 tune_params::SCHED_AUTOPREF_OFF
1812 };
1813
1814 const struct tune_params arm_fastmul_tune =
1815 {
1816 &generic_extra_costs, /* Insn extra costs. */
1817 &generic_addr_mode_costs, /* Addressing mode costs. */
1818 NULL, /* Sched adj cost. */
1819 arm_default_branch_cost,
1820 &arm_default_vec_cost,
1821 1, /* Constant limit. */
1822 5, /* Max cond insns. */
1823 8, /* Memset max inline. */
1824 1, /* Issue rate. */
1825 ARM_PREFETCH_NOT_BENEFICIAL,
1826 tune_params::PREF_CONST_POOL_TRUE,
1827 tune_params::PREF_LDRD_FALSE,
1828 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1829 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1830 tune_params::DISPARAGE_FLAGS_NEITHER,
1831 tune_params::PREF_NEON_64_FALSE,
1832 tune_params::PREF_NEON_STRINGOPS_FALSE,
1833 tune_params::FUSE_NOTHING,
1834 tune_params::SCHED_AUTOPREF_OFF
1835 };
1836
1837 /* StrongARM has early execution of branches, so a sequence that is worth
1838 skipping is shorter. Set max_insns_skipped to a lower value. */
1839
1840 const struct tune_params arm_strongarm_tune =
1841 {
1842 &generic_extra_costs, /* Insn extra costs. */
1843 &generic_addr_mode_costs, /* Addressing mode costs. */
1844 NULL, /* Sched adj cost. */
1845 arm_default_branch_cost,
1846 &arm_default_vec_cost,
1847 1, /* Constant limit. */
1848 3, /* Max cond insns. */
1849 8, /* Memset max inline. */
1850 1, /* Issue rate. */
1851 ARM_PREFETCH_NOT_BENEFICIAL,
1852 tune_params::PREF_CONST_POOL_TRUE,
1853 tune_params::PREF_LDRD_FALSE,
1854 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1855 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1856 tune_params::DISPARAGE_FLAGS_NEITHER,
1857 tune_params::PREF_NEON_64_FALSE,
1858 tune_params::PREF_NEON_STRINGOPS_FALSE,
1859 tune_params::FUSE_NOTHING,
1860 tune_params::SCHED_AUTOPREF_OFF
1861 };
1862
1863 const struct tune_params arm_xscale_tune =
1864 {
1865 &generic_extra_costs, /* Insn extra costs. */
1866 &generic_addr_mode_costs, /* Addressing mode costs. */
1867 xscale_sched_adjust_cost,
1868 arm_default_branch_cost,
1869 &arm_default_vec_cost,
1870 2, /* Constant limit. */
1871 3, /* Max cond insns. */
1872 8, /* Memset max inline. */
1873 1, /* Issue rate. */
1874 ARM_PREFETCH_NOT_BENEFICIAL,
1875 tune_params::PREF_CONST_POOL_TRUE,
1876 tune_params::PREF_LDRD_FALSE,
1877 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1878 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1879 tune_params::DISPARAGE_FLAGS_NEITHER,
1880 tune_params::PREF_NEON_64_FALSE,
1881 tune_params::PREF_NEON_STRINGOPS_FALSE,
1882 tune_params::FUSE_NOTHING,
1883 tune_params::SCHED_AUTOPREF_OFF
1884 };
1885
1886 const struct tune_params arm_9e_tune =
1887 {
1888 &generic_extra_costs, /* Insn extra costs. */
1889 &generic_addr_mode_costs, /* Addressing mode costs. */
1890 NULL, /* Sched adj cost. */
1891 arm_default_branch_cost,
1892 &arm_default_vec_cost,
1893 1, /* Constant limit. */
1894 5, /* Max cond insns. */
1895 8, /* Memset max inline. */
1896 1, /* Issue rate. */
1897 ARM_PREFETCH_NOT_BENEFICIAL,
1898 tune_params::PREF_CONST_POOL_TRUE,
1899 tune_params::PREF_LDRD_FALSE,
1900 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1901 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1902 tune_params::DISPARAGE_FLAGS_NEITHER,
1903 tune_params::PREF_NEON_64_FALSE,
1904 tune_params::PREF_NEON_STRINGOPS_FALSE,
1905 tune_params::FUSE_NOTHING,
1906 tune_params::SCHED_AUTOPREF_OFF
1907 };
1908
1909 const struct tune_params arm_marvell_pj4_tune =
1910 {
1911 &generic_extra_costs, /* Insn extra costs. */
1912 &generic_addr_mode_costs, /* Addressing mode costs. */
1913 NULL, /* Sched adj cost. */
1914 arm_default_branch_cost,
1915 &arm_default_vec_cost,
1916 1, /* Constant limit. */
1917 5, /* Max cond insns. */
1918 8, /* Memset max inline. */
1919 2, /* Issue rate. */
1920 ARM_PREFETCH_NOT_BENEFICIAL,
1921 tune_params::PREF_CONST_POOL_TRUE,
1922 tune_params::PREF_LDRD_FALSE,
1923 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1924 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1925 tune_params::DISPARAGE_FLAGS_NEITHER,
1926 tune_params::PREF_NEON_64_FALSE,
1927 tune_params::PREF_NEON_STRINGOPS_FALSE,
1928 tune_params::FUSE_NOTHING,
1929 tune_params::SCHED_AUTOPREF_OFF
1930 };
1931
1932 const struct tune_params arm_v6t2_tune =
1933 {
1934 &generic_extra_costs, /* Insn extra costs. */
1935 &generic_addr_mode_costs, /* Addressing mode costs. */
1936 NULL, /* Sched adj cost. */
1937 arm_default_branch_cost,
1938 &arm_default_vec_cost,
1939 1, /* Constant limit. */
1940 5, /* Max cond insns. */
1941 8, /* Memset max inline. */
1942 1, /* Issue rate. */
1943 ARM_PREFETCH_NOT_BENEFICIAL,
1944 tune_params::PREF_CONST_POOL_FALSE,
1945 tune_params::PREF_LDRD_FALSE,
1946 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1947 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1948 tune_params::DISPARAGE_FLAGS_NEITHER,
1949 tune_params::PREF_NEON_64_FALSE,
1950 tune_params::PREF_NEON_STRINGOPS_FALSE,
1951 tune_params::FUSE_NOTHING,
1952 tune_params::SCHED_AUTOPREF_OFF
1953 };
1954
1955
1956 /* Generic Cortex tuning. Use more specific tunings if appropriate. */
1957 const struct tune_params arm_cortex_tune =
1958 {
1959 &generic_extra_costs,
1960 &generic_addr_mode_costs, /* Addressing mode costs. */
1961 NULL, /* Sched adj cost. */
1962 arm_default_branch_cost,
1963 &arm_default_vec_cost,
1964 1, /* Constant limit. */
1965 5, /* Max cond insns. */
1966 8, /* Memset max inline. */
1967 2, /* Issue rate. */
1968 ARM_PREFETCH_NOT_BENEFICIAL,
1969 tune_params::PREF_CONST_POOL_FALSE,
1970 tune_params::PREF_LDRD_FALSE,
1971 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1972 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1973 tune_params::DISPARAGE_FLAGS_NEITHER,
1974 tune_params::PREF_NEON_64_FALSE,
1975 tune_params::PREF_NEON_STRINGOPS_FALSE,
1976 tune_params::FUSE_NOTHING,
1977 tune_params::SCHED_AUTOPREF_OFF
1978 };
1979
1980 const struct tune_params arm_cortex_a8_tune =
1981 {
1982 &cortexa8_extra_costs,
1983 &generic_addr_mode_costs, /* Addressing mode costs. */
1984 NULL, /* Sched adj cost. */
1985 arm_default_branch_cost,
1986 &arm_default_vec_cost,
1987 1, /* Constant limit. */
1988 5, /* Max cond insns. */
1989 8, /* Memset max inline. */
1990 2, /* Issue rate. */
1991 ARM_PREFETCH_NOT_BENEFICIAL,
1992 tune_params::PREF_CONST_POOL_FALSE,
1993 tune_params::PREF_LDRD_FALSE,
1994 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
1995 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
1996 tune_params::DISPARAGE_FLAGS_NEITHER,
1997 tune_params::PREF_NEON_64_FALSE,
1998 tune_params::PREF_NEON_STRINGOPS_TRUE,
1999 tune_params::FUSE_NOTHING,
2000 tune_params::SCHED_AUTOPREF_OFF
2001 };
2002
2003 const struct tune_params arm_cortex_a7_tune =
2004 {
2005 &cortexa7_extra_costs,
2006 &generic_addr_mode_costs, /* Addressing mode costs. */
2007 NULL, /* Sched adj cost. */
2008 arm_default_branch_cost,
2009 &arm_default_vec_cost,
2010 1, /* Constant limit. */
2011 5, /* Max cond insns. */
2012 8, /* Memset max inline. */
2013 2, /* Issue rate. */
2014 ARM_PREFETCH_NOT_BENEFICIAL,
2015 tune_params::PREF_CONST_POOL_FALSE,
2016 tune_params::PREF_LDRD_FALSE,
2017 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2018 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2019 tune_params::DISPARAGE_FLAGS_NEITHER,
2020 tune_params::PREF_NEON_64_FALSE,
2021 tune_params::PREF_NEON_STRINGOPS_TRUE,
2022 tune_params::FUSE_NOTHING,
2023 tune_params::SCHED_AUTOPREF_OFF
2024 };
2025
2026 const struct tune_params arm_cortex_a15_tune =
2027 {
2028 &cortexa15_extra_costs,
2029 &generic_addr_mode_costs, /* Addressing mode costs. */
2030 NULL, /* Sched adj cost. */
2031 arm_default_branch_cost,
2032 &arm_default_vec_cost,
2033 1, /* Constant limit. */
2034 2, /* Max cond insns. */
2035 8, /* Memset max inline. */
2036 3, /* Issue rate. */
2037 ARM_PREFETCH_NOT_BENEFICIAL,
2038 tune_params::PREF_CONST_POOL_FALSE,
2039 tune_params::PREF_LDRD_TRUE,
2040 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2041 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2042 tune_params::DISPARAGE_FLAGS_ALL,
2043 tune_params::PREF_NEON_64_FALSE,
2044 tune_params::PREF_NEON_STRINGOPS_TRUE,
2045 tune_params::FUSE_NOTHING,
2046 tune_params::SCHED_AUTOPREF_FULL
2047 };
2048
2049 const struct tune_params arm_cortex_a35_tune =
2050 {
2051 &cortexa53_extra_costs,
2052 &generic_addr_mode_costs, /* Addressing mode costs. */
2053 NULL, /* Sched adj cost. */
2054 arm_default_branch_cost,
2055 &arm_default_vec_cost,
2056 1, /* Constant limit. */
2057 5, /* Max cond insns. */
2058 8, /* Memset max inline. */
2059 1, /* Issue rate. */
2060 ARM_PREFETCH_NOT_BENEFICIAL,
2061 tune_params::PREF_CONST_POOL_FALSE,
2062 tune_params::PREF_LDRD_FALSE,
2063 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2064 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2065 tune_params::DISPARAGE_FLAGS_NEITHER,
2066 tune_params::PREF_NEON_64_FALSE,
2067 tune_params::PREF_NEON_STRINGOPS_TRUE,
2068 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2069 tune_params::SCHED_AUTOPREF_OFF
2070 };
2071
2072 const struct tune_params arm_cortex_a53_tune =
2073 {
2074 &cortexa53_extra_costs,
2075 &generic_addr_mode_costs, /* Addressing mode costs. */
2076 NULL, /* Sched adj cost. */
2077 arm_default_branch_cost,
2078 &arm_default_vec_cost,
2079 1, /* Constant limit. */
2080 5, /* Max cond insns. */
2081 8, /* Memset max inline. */
2082 2, /* Issue rate. */
2083 ARM_PREFETCH_NOT_BENEFICIAL,
2084 tune_params::PREF_CONST_POOL_FALSE,
2085 tune_params::PREF_LDRD_FALSE,
2086 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2087 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2088 tune_params::DISPARAGE_FLAGS_NEITHER,
2089 tune_params::PREF_NEON_64_FALSE,
2090 tune_params::PREF_NEON_STRINGOPS_TRUE,
2091 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2092 tune_params::SCHED_AUTOPREF_OFF
2093 };
2094
2095 const struct tune_params arm_cortex_a57_tune =
2096 {
2097 &cortexa57_extra_costs,
2098 &generic_addr_mode_costs, /* addressing mode costs */
2099 NULL, /* Sched adj cost. */
2100 arm_default_branch_cost,
2101 &arm_default_vec_cost,
2102 1, /* Constant limit. */
2103 2, /* Max cond insns. */
2104 8, /* Memset max inline. */
2105 3, /* Issue rate. */
2106 ARM_PREFETCH_NOT_BENEFICIAL,
2107 tune_params::PREF_CONST_POOL_FALSE,
2108 tune_params::PREF_LDRD_TRUE,
2109 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2110 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2111 tune_params::DISPARAGE_FLAGS_ALL,
2112 tune_params::PREF_NEON_64_FALSE,
2113 tune_params::PREF_NEON_STRINGOPS_TRUE,
2114 FUSE_OPS (tune_params::FUSE_MOVW_MOVT | tune_params::FUSE_AES_AESMC),
2115 tune_params::SCHED_AUTOPREF_FULL
2116 };
2117
2118 const struct tune_params arm_exynosm1_tune =
2119 {
2120 &exynosm1_extra_costs,
2121 &generic_addr_mode_costs, /* Addressing mode costs. */
2122 NULL, /* Sched adj cost. */
2123 arm_default_branch_cost,
2124 &arm_default_vec_cost,
2125 1, /* Constant limit. */
2126 2, /* Max cond insns. */
2127 8, /* Memset max inline. */
2128 3, /* Issue rate. */
2129 ARM_PREFETCH_NOT_BENEFICIAL,
2130 tune_params::PREF_CONST_POOL_FALSE,
2131 tune_params::PREF_LDRD_TRUE,
2132 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2133 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2134 tune_params::DISPARAGE_FLAGS_ALL,
2135 tune_params::PREF_NEON_64_FALSE,
2136 tune_params::PREF_NEON_STRINGOPS_TRUE,
2137 tune_params::FUSE_NOTHING,
2138 tune_params::SCHED_AUTOPREF_OFF
2139 };
2140
2141 const struct tune_params arm_xgene1_tune =
2142 {
2143 &xgene1_extra_costs,
2144 &generic_addr_mode_costs, /* Addressing mode costs. */
2145 NULL, /* Sched adj cost. */
2146 arm_default_branch_cost,
2147 &arm_default_vec_cost,
2148 1, /* Constant limit. */
2149 2, /* Max cond insns. */
2150 32, /* Memset max inline. */
2151 4, /* Issue rate. */
2152 ARM_PREFETCH_NOT_BENEFICIAL,
2153 tune_params::PREF_CONST_POOL_FALSE,
2154 tune_params::PREF_LDRD_TRUE,
2155 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2156 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2157 tune_params::DISPARAGE_FLAGS_ALL,
2158 tune_params::PREF_NEON_64_FALSE,
2159 tune_params::PREF_NEON_STRINGOPS_FALSE,
2160 tune_params::FUSE_NOTHING,
2161 tune_params::SCHED_AUTOPREF_OFF
2162 };
2163
2164 /* Branches can be dual-issued on Cortex-A5, so conditional execution is
2165 less appealing. Set max_insns_skipped to a low value. */
2166
2167 const struct tune_params arm_cortex_a5_tune =
2168 {
2169 &cortexa5_extra_costs,
2170 &generic_addr_mode_costs, /* Addressing mode costs. */
2171 NULL, /* Sched adj cost. */
2172 arm_cortex_a5_branch_cost,
2173 &arm_default_vec_cost,
2174 1, /* Constant limit. */
2175 1, /* Max cond insns. */
2176 8, /* Memset max inline. */
2177 2, /* Issue rate. */
2178 ARM_PREFETCH_NOT_BENEFICIAL,
2179 tune_params::PREF_CONST_POOL_FALSE,
2180 tune_params::PREF_LDRD_FALSE,
2181 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2182 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2183 tune_params::DISPARAGE_FLAGS_NEITHER,
2184 tune_params::PREF_NEON_64_FALSE,
2185 tune_params::PREF_NEON_STRINGOPS_TRUE,
2186 tune_params::FUSE_NOTHING,
2187 tune_params::SCHED_AUTOPREF_OFF
2188 };
2189
2190 const struct tune_params arm_cortex_a9_tune =
2191 {
2192 &cortexa9_extra_costs,
2193 &generic_addr_mode_costs, /* Addressing mode costs. */
2194 cortex_a9_sched_adjust_cost,
2195 arm_default_branch_cost,
2196 &arm_default_vec_cost,
2197 1, /* Constant limit. */
2198 5, /* Max cond insns. */
2199 8, /* Memset max inline. */
2200 2, /* Issue rate. */
2201 ARM_PREFETCH_BENEFICIAL(4,32,32),
2202 tune_params::PREF_CONST_POOL_FALSE,
2203 tune_params::PREF_LDRD_FALSE,
2204 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2205 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2206 tune_params::DISPARAGE_FLAGS_NEITHER,
2207 tune_params::PREF_NEON_64_FALSE,
2208 tune_params::PREF_NEON_STRINGOPS_FALSE,
2209 tune_params::FUSE_NOTHING,
2210 tune_params::SCHED_AUTOPREF_OFF
2211 };
2212
2213 const struct tune_params arm_cortex_a12_tune =
2214 {
2215 &cortexa12_extra_costs,
2216 &generic_addr_mode_costs, /* Addressing mode costs. */
2217 NULL, /* Sched adj cost. */
2218 arm_default_branch_cost,
2219 &arm_default_vec_cost, /* Vectorizer costs. */
2220 1, /* Constant limit. */
2221 2, /* Max cond insns. */
2222 8, /* Memset max inline. */
2223 2, /* Issue rate. */
2224 ARM_PREFETCH_NOT_BENEFICIAL,
2225 tune_params::PREF_CONST_POOL_FALSE,
2226 tune_params::PREF_LDRD_TRUE,
2227 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2228 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2229 tune_params::DISPARAGE_FLAGS_ALL,
2230 tune_params::PREF_NEON_64_FALSE,
2231 tune_params::PREF_NEON_STRINGOPS_TRUE,
2232 FUSE_OPS (tune_params::FUSE_MOVW_MOVT),
2233 tune_params::SCHED_AUTOPREF_OFF
2234 };
2235
2236 const struct tune_params arm_cortex_a73_tune =
2237 {
2238 &cortexa57_extra_costs,
2239 &generic_addr_mode_costs, /* Addressing mode costs. */
2240 NULL, /* Sched adj cost. */
2241 arm_default_branch_cost,
2242 &arm_default_vec_cost, /* Vectorizer costs. */
2243 1, /* Constant limit. */
2244 2, /* Max cond insns. */
2245 8, /* Memset max inline. */
2246 2, /* Issue rate. */
2247 ARM_PREFETCH_NOT_BENEFICIAL,
2248 tune_params::PREF_CONST_POOL_FALSE,
2249 tune_params::PREF_LDRD_TRUE,
2250 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2251 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2252 tune_params::DISPARAGE_FLAGS_ALL,
2253 tune_params::PREF_NEON_64_FALSE,
2254 tune_params::PREF_NEON_STRINGOPS_TRUE,
2255 FUSE_OPS (tune_params::FUSE_AES_AESMC | tune_params::FUSE_MOVW_MOVT),
2256 tune_params::SCHED_AUTOPREF_FULL
2257 };
2258
2259 /* armv7m tuning. On Cortex-M4 cores for example, MOVW/MOVT take a single
2260 cycle to execute each. An LDR from the constant pool also takes two cycles
2261 to execute, but mildly increases pipelining opportunity (consecutive
2262 loads/stores can be pipelined together, saving one cycle), and may also
2263 improve icache utilisation. Hence we prefer the constant pool for such
2264 processors. */
2265
2266 const struct tune_params arm_v7m_tune =
2267 {
2268 &v7m_extra_costs,
2269 &generic_addr_mode_costs, /* Addressing mode costs. */
2270 NULL, /* Sched adj cost. */
2271 arm_cortex_m_branch_cost,
2272 &arm_default_vec_cost,
2273 1, /* Constant limit. */
2274 2, /* Max cond insns. */
2275 8, /* Memset max inline. */
2276 1, /* Issue rate. */
2277 ARM_PREFETCH_NOT_BENEFICIAL,
2278 tune_params::PREF_CONST_POOL_TRUE,
2279 tune_params::PREF_LDRD_FALSE,
2280 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2281 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2282 tune_params::DISPARAGE_FLAGS_NEITHER,
2283 tune_params::PREF_NEON_64_FALSE,
2284 tune_params::PREF_NEON_STRINGOPS_FALSE,
2285 tune_params::FUSE_NOTHING,
2286 tune_params::SCHED_AUTOPREF_OFF
2287 };
2288
2289 /* Cortex-M7 tuning. */
2290
2291 const struct tune_params arm_cortex_m7_tune =
2292 {
2293 &v7m_extra_costs,
2294 &generic_addr_mode_costs, /* Addressing mode costs. */
2295 NULL, /* Sched adj cost. */
2296 arm_cortex_m7_branch_cost,
2297 &arm_default_vec_cost,
2298 0, /* Constant limit. */
2299 1, /* Max cond insns. */
2300 8, /* Memset max inline. */
2301 2, /* Issue rate. */
2302 ARM_PREFETCH_NOT_BENEFICIAL,
2303 tune_params::PREF_CONST_POOL_TRUE,
2304 tune_params::PREF_LDRD_FALSE,
2305 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2306 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2307 tune_params::DISPARAGE_FLAGS_NEITHER,
2308 tune_params::PREF_NEON_64_FALSE,
2309 tune_params::PREF_NEON_STRINGOPS_FALSE,
2310 tune_params::FUSE_NOTHING,
2311 tune_params::SCHED_AUTOPREF_OFF
2312 };
2313
2314 /* The arm_v6m_tune is duplicated from arm_cortex_tune, rather than
2315 arm_v6t2_tune. It is used for cortex-m0, cortex-m1, cortex-m0plus and
2316 cortex-m23. */
2317 const struct tune_params arm_v6m_tune =
2318 {
2319 &generic_extra_costs, /* Insn extra costs. */
2320 &generic_addr_mode_costs, /* Addressing mode costs. */
2321 NULL, /* Sched adj cost. */
2322 arm_default_branch_cost,
2323 &arm_default_vec_cost, /* Vectorizer costs. */
2324 1, /* Constant limit. */
2325 5, /* Max cond insns. */
2326 8, /* Memset max inline. */
2327 1, /* Issue rate. */
2328 ARM_PREFETCH_NOT_BENEFICIAL,
2329 tune_params::PREF_CONST_POOL_FALSE,
2330 tune_params::PREF_LDRD_FALSE,
2331 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* Thumb. */
2332 tune_params::LOG_OP_NON_SHORT_CIRCUIT_FALSE, /* ARM. */
2333 tune_params::DISPARAGE_FLAGS_NEITHER,
2334 tune_params::PREF_NEON_64_FALSE,
2335 tune_params::PREF_NEON_STRINGOPS_FALSE,
2336 tune_params::FUSE_NOTHING,
2337 tune_params::SCHED_AUTOPREF_OFF
2338 };
2339
2340 const struct tune_params arm_fa726te_tune =
2341 {
2342 &generic_extra_costs, /* Insn extra costs. */
2343 &generic_addr_mode_costs, /* Addressing mode costs. */
2344 fa726te_sched_adjust_cost,
2345 arm_default_branch_cost,
2346 &arm_default_vec_cost,
2347 1, /* Constant limit. */
2348 5, /* Max cond insns. */
2349 8, /* Memset max inline. */
2350 2, /* Issue rate. */
2351 ARM_PREFETCH_NOT_BENEFICIAL,
2352 tune_params::PREF_CONST_POOL_TRUE,
2353 tune_params::PREF_LDRD_FALSE,
2354 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* Thumb. */
2355 tune_params::LOG_OP_NON_SHORT_CIRCUIT_TRUE, /* ARM. */
2356 tune_params::DISPARAGE_FLAGS_NEITHER,
2357 tune_params::PREF_NEON_64_FALSE,
2358 tune_params::PREF_NEON_STRINGOPS_FALSE,
2359 tune_params::FUSE_NOTHING,
2360 tune_params::SCHED_AUTOPREF_OFF
2361 };
2362
2363 /* Auto-generated CPU, FPU and architecture tables. */
2364 #include "arm-cpu-data.h"
2365
2366 /* The name of the preprocessor macro to define for this architecture. PROFILE
2367 is replaced by the architecture name (eg. 8A) in arm_option_override () and
2368 is thus chosen to be big enough to hold the longest architecture name. */
2369
2370 char arm_arch_name[] = "__ARM_ARCH_PROFILE__";
2371
2372 /* Supported TLS relocations. */
2373
2374 enum tls_reloc {
2375 TLS_GD32,
2376 TLS_LDM32,
2377 TLS_LDO32,
2378 TLS_IE32,
2379 TLS_LE32,
2380 TLS_DESCSEQ /* GNU scheme */
2381 };
2382
2383 /* The maximum number of insns to be used when loading a constant. */
2384 inline static int
2385 arm_constant_limit (bool size_p)
2386 {
2387 return size_p ? 1 : current_tune->constant_limit;
2388 }
2389
2390 /* Emit an insn that's a simple single-set. Both the operands must be known
2391 to be valid. */
2392 inline static rtx_insn *
2393 emit_set_insn (rtx x, rtx y)
2394 {
2395 return emit_insn (gen_rtx_SET (x, y));
2396 }
2397
2398 /* Return the number of bits set in VALUE. */
2399 static unsigned
2400 bit_count (unsigned long value)
2401 {
2402 unsigned long count = 0;
2403
2404 while (value)
2405 {
2406 count++;
2407 value &= value - 1; /* Clear the least-significant set bit. */
2408 }
2409
2410 return count;
2411 }
2412
2413 /* Return the number of bits set in BMAP. */
2414 static unsigned
2415 bitmap_popcount (const sbitmap bmap)
2416 {
2417 unsigned int count = 0;
2418 unsigned int n = 0;
2419 sbitmap_iterator sbi;
2420
2421 EXECUTE_IF_SET_IN_BITMAP (bmap, 0, n, sbi)
2422 count++;
2423 return count;
2424 }
2425
2426 typedef struct
2427 {
2428 machine_mode mode;
2429 const char *name;
2430 } arm_fixed_mode_set;
2431
2432 /* A small helper for setting fixed-point library libfuncs. */
2433
2434 static void
2435 arm_set_fixed_optab_libfunc (optab optable, machine_mode mode,
2436 const char *funcname, const char *modename,
2437 int num_suffix)
2438 {
2439 char buffer[50];
2440
2441 if (num_suffix == 0)
2442 sprintf (buffer, "__gnu_%s%s", funcname, modename);
2443 else
2444 sprintf (buffer, "__gnu_%s%s%d", funcname, modename, num_suffix);
2445
2446 set_optab_libfunc (optable, mode, buffer);
2447 }
2448
2449 static void
2450 arm_set_fixed_conv_libfunc (convert_optab optable, machine_mode to,
2451 machine_mode from, const char *funcname,
2452 const char *toname, const char *fromname)
2453 {
2454 char buffer[50];
2455 const char *maybe_suffix_2 = "";
2456
2457 /* Follow the logic for selecting a "2" suffix in fixed-bit.h. */
2458 if (ALL_FIXED_POINT_MODE_P (from) && ALL_FIXED_POINT_MODE_P (to)
2459 && UNSIGNED_FIXED_POINT_MODE_P (from) == UNSIGNED_FIXED_POINT_MODE_P (to)
2460 && ALL_FRACT_MODE_P (from) == ALL_FRACT_MODE_P (to))
2461 maybe_suffix_2 = "2";
2462
2463 sprintf (buffer, "__gnu_%s%s%s%s", funcname, fromname, toname,
2464 maybe_suffix_2);
2465
2466 set_conv_libfunc (optable, to, from, buffer);
2467 }
2468
2469 static GTY(()) rtx speculation_barrier_libfunc;
2470
2471 /* Set up library functions unique to ARM. */
2472 static void
2473 arm_init_libfuncs (void)
2474 {
2475 /* For Linux, we have access to kernel support for atomic operations. */
2476 if (arm_abi == ARM_ABI_AAPCS_LINUX)
2477 init_sync_libfuncs (MAX_SYNC_LIBFUNC_SIZE);
2478
2479 /* There are no special library functions unless we are using the
2480 ARM BPABI. */
2481 if (!TARGET_BPABI)
2482 return;
2483
2484 /* The functions below are described in Section 4 of the "Run-Time
2485 ABI for the ARM architecture", Version 1.0. */
2486
2487 /* Double-precision floating-point arithmetic. Table 2. */
2488 set_optab_libfunc (add_optab, DFmode, "__aeabi_dadd");
2489 set_optab_libfunc (sdiv_optab, DFmode, "__aeabi_ddiv");
2490 set_optab_libfunc (smul_optab, DFmode, "__aeabi_dmul");
2491 set_optab_libfunc (neg_optab, DFmode, "__aeabi_dneg");
2492 set_optab_libfunc (sub_optab, DFmode, "__aeabi_dsub");
2493
2494 /* Double-precision comparisons. Table 3. */
2495 set_optab_libfunc (eq_optab, DFmode, "__aeabi_dcmpeq");
2496 set_optab_libfunc (ne_optab, DFmode, NULL);
2497 set_optab_libfunc (lt_optab, DFmode, "__aeabi_dcmplt");
2498 set_optab_libfunc (le_optab, DFmode, "__aeabi_dcmple");
2499 set_optab_libfunc (ge_optab, DFmode, "__aeabi_dcmpge");
2500 set_optab_libfunc (gt_optab, DFmode, "__aeabi_dcmpgt");
2501 set_optab_libfunc (unord_optab, DFmode, "__aeabi_dcmpun");
2502
2503 /* Single-precision floating-point arithmetic. Table 4. */
2504 set_optab_libfunc (add_optab, SFmode, "__aeabi_fadd");
2505 set_optab_libfunc (sdiv_optab, SFmode, "__aeabi_fdiv");
2506 set_optab_libfunc (smul_optab, SFmode, "__aeabi_fmul");
2507 set_optab_libfunc (neg_optab, SFmode, "__aeabi_fneg");
2508 set_optab_libfunc (sub_optab, SFmode, "__aeabi_fsub");
2509
2510 /* Single-precision comparisons. Table 5. */
2511 set_optab_libfunc (eq_optab, SFmode, "__aeabi_fcmpeq");
2512 set_optab_libfunc (ne_optab, SFmode, NULL);
2513 set_optab_libfunc (lt_optab, SFmode, "__aeabi_fcmplt");
2514 set_optab_libfunc (le_optab, SFmode, "__aeabi_fcmple");
2515 set_optab_libfunc (ge_optab, SFmode, "__aeabi_fcmpge");
2516 set_optab_libfunc (gt_optab, SFmode, "__aeabi_fcmpgt");
2517 set_optab_libfunc (unord_optab, SFmode, "__aeabi_fcmpun");
2518
2519 /* Floating-point to integer conversions. Table 6. */
2520 set_conv_libfunc (sfix_optab, SImode, DFmode, "__aeabi_d2iz");
2521 set_conv_libfunc (ufix_optab, SImode, DFmode, "__aeabi_d2uiz");
2522 set_conv_libfunc (sfix_optab, DImode, DFmode, "__aeabi_d2lz");
2523 set_conv_libfunc (ufix_optab, DImode, DFmode, "__aeabi_d2ulz");
2524 set_conv_libfunc (sfix_optab, SImode, SFmode, "__aeabi_f2iz");
2525 set_conv_libfunc (ufix_optab, SImode, SFmode, "__aeabi_f2uiz");
2526 set_conv_libfunc (sfix_optab, DImode, SFmode, "__aeabi_f2lz");
2527 set_conv_libfunc (ufix_optab, DImode, SFmode, "__aeabi_f2ulz");
2528
2529 /* Conversions between floating types. Table 7. */
2530 set_conv_libfunc (trunc_optab, SFmode, DFmode, "__aeabi_d2f");
2531 set_conv_libfunc (sext_optab, DFmode, SFmode, "__aeabi_f2d");
2532
2533 /* Integer to floating-point conversions. Table 8. */
2534 set_conv_libfunc (sfloat_optab, DFmode, SImode, "__aeabi_i2d");
2535 set_conv_libfunc (ufloat_optab, DFmode, SImode, "__aeabi_ui2d");
2536 set_conv_libfunc (sfloat_optab, DFmode, DImode, "__aeabi_l2d");
2537 set_conv_libfunc (ufloat_optab, DFmode, DImode, "__aeabi_ul2d");
2538 set_conv_libfunc (sfloat_optab, SFmode, SImode, "__aeabi_i2f");
2539 set_conv_libfunc (ufloat_optab, SFmode, SImode, "__aeabi_ui2f");
2540 set_conv_libfunc (sfloat_optab, SFmode, DImode, "__aeabi_l2f");
2541 set_conv_libfunc (ufloat_optab, SFmode, DImode, "__aeabi_ul2f");
2542
2543 /* Long long. Table 9. */
2544 set_optab_libfunc (smul_optab, DImode, "__aeabi_lmul");
2545 set_optab_libfunc (sdivmod_optab, DImode, "__aeabi_ldivmod");
2546 set_optab_libfunc (udivmod_optab, DImode, "__aeabi_uldivmod");
2547 set_optab_libfunc (ashl_optab, DImode, "__aeabi_llsl");
2548 set_optab_libfunc (lshr_optab, DImode, "__aeabi_llsr");
2549 set_optab_libfunc (ashr_optab, DImode, "__aeabi_lasr");
2550 set_optab_libfunc (cmp_optab, DImode, "__aeabi_lcmp");
2551 set_optab_libfunc (ucmp_optab, DImode, "__aeabi_ulcmp");
2552
2553 /* Integer (32/32->32) division. \S 4.3.1. */
2554 set_optab_libfunc (sdivmod_optab, SImode, "__aeabi_idivmod");
2555 set_optab_libfunc (udivmod_optab, SImode, "__aeabi_uidivmod");
2556
2557 /* The divmod functions are designed so that they can be used for
2558 plain division, even though they return both the quotient and the
2559 remainder. The quotient is returned in the usual location (i.e.,
2560 r0 for SImode, {r0, r1} for DImode), just as would be expected
2561 for an ordinary division routine. Because the AAPCS calling
2562 conventions specify that all of { r0, r1, r2, r3 } are
2563 callee-saved registers, there is no need to tell the compiler
2564 explicitly that those registers are clobbered by these
2565 routines. */
2566 set_optab_libfunc (sdiv_optab, DImode, "__aeabi_ldivmod");
2567 set_optab_libfunc (udiv_optab, DImode, "__aeabi_uldivmod");
2568
2569 /* For SImode division the ABI provides div-without-mod routines,
2570 which are faster. */
2571 set_optab_libfunc (sdiv_optab, SImode, "__aeabi_idiv");
2572 set_optab_libfunc (udiv_optab, SImode, "__aeabi_uidiv");
2573
2574 /* We don't have mod libcalls. Fortunately gcc knows how to use the
2575 divmod libcalls instead. */
2576 set_optab_libfunc (smod_optab, DImode, NULL);
2577 set_optab_libfunc (umod_optab, DImode, NULL);
2578 set_optab_libfunc (smod_optab, SImode, NULL);
2579 set_optab_libfunc (umod_optab, SImode, NULL);
2580
2581 /* Half-precision float operations. The compiler handles all operations
2582 with NULL libfuncs by converting the SFmode. */
2583 switch (arm_fp16_format)
2584 {
2585 case ARM_FP16_FORMAT_IEEE:
2586 case ARM_FP16_FORMAT_ALTERNATIVE:
2587
2588 /* Conversions. */
2589 set_conv_libfunc (trunc_optab, HFmode, SFmode,
2590 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2591 ? "__gnu_f2h_ieee"
2592 : "__gnu_f2h_alternative"));
2593 set_conv_libfunc (sext_optab, SFmode, HFmode,
2594 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2595 ? "__gnu_h2f_ieee"
2596 : "__gnu_h2f_alternative"));
2597
2598 set_conv_libfunc (trunc_optab, HFmode, DFmode,
2599 (arm_fp16_format == ARM_FP16_FORMAT_IEEE
2600 ? "__gnu_d2h_ieee"
2601 : "__gnu_d2h_alternative"));
2602
2603 /* Arithmetic. */
2604 set_optab_libfunc (add_optab, HFmode, NULL);
2605 set_optab_libfunc (sdiv_optab, HFmode, NULL);
2606 set_optab_libfunc (smul_optab, HFmode, NULL);
2607 set_optab_libfunc (neg_optab, HFmode, NULL);
2608 set_optab_libfunc (sub_optab, HFmode, NULL);
2609
2610 /* Comparisons. */
2611 set_optab_libfunc (eq_optab, HFmode, NULL);
2612 set_optab_libfunc (ne_optab, HFmode, NULL);
2613 set_optab_libfunc (lt_optab, HFmode, NULL);
2614 set_optab_libfunc (le_optab, HFmode, NULL);
2615 set_optab_libfunc (ge_optab, HFmode, NULL);
2616 set_optab_libfunc (gt_optab, HFmode, NULL);
2617 set_optab_libfunc (unord_optab, HFmode, NULL);
2618 break;
2619
2620 default:
2621 break;
2622 }
2623
2624 /* Use names prefixed with __gnu_ for fixed-point helper functions. */
2625 {
2626 const arm_fixed_mode_set fixed_arith_modes[] =
2627 {
2628 { E_QQmode, "qq" },
2629 { E_UQQmode, "uqq" },
2630 { E_HQmode, "hq" },
2631 { E_UHQmode, "uhq" },
2632 { E_SQmode, "sq" },
2633 { E_USQmode, "usq" },
2634 { E_DQmode, "dq" },
2635 { E_UDQmode, "udq" },
2636 { E_TQmode, "tq" },
2637 { E_UTQmode, "utq" },
2638 { E_HAmode, "ha" },
2639 { E_UHAmode, "uha" },
2640 { E_SAmode, "sa" },
2641 { E_USAmode, "usa" },
2642 { E_DAmode, "da" },
2643 { E_UDAmode, "uda" },
2644 { E_TAmode, "ta" },
2645 { E_UTAmode, "uta" }
2646 };
2647 const arm_fixed_mode_set fixed_conv_modes[] =
2648 {
2649 { E_QQmode, "qq" },
2650 { E_UQQmode, "uqq" },
2651 { E_HQmode, "hq" },
2652 { E_UHQmode, "uhq" },
2653 { E_SQmode, "sq" },
2654 { E_USQmode, "usq" },
2655 { E_DQmode, "dq" },
2656 { E_UDQmode, "udq" },
2657 { E_TQmode, "tq" },
2658 { E_UTQmode, "utq" },
2659 { E_HAmode, "ha" },
2660 { E_UHAmode, "uha" },
2661 { E_SAmode, "sa" },
2662 { E_USAmode, "usa" },
2663 { E_DAmode, "da" },
2664 { E_UDAmode, "uda" },
2665 { E_TAmode, "ta" },
2666 { E_UTAmode, "uta" },
2667 { E_QImode, "qi" },
2668 { E_HImode, "hi" },
2669 { E_SImode, "si" },
2670 { E_DImode, "di" },
2671 { E_TImode, "ti" },
2672 { E_SFmode, "sf" },
2673 { E_DFmode, "df" }
2674 };
2675 unsigned int i, j;
2676
2677 for (i = 0; i < ARRAY_SIZE (fixed_arith_modes); i++)
2678 {
2679 arm_set_fixed_optab_libfunc (add_optab, fixed_arith_modes[i].mode,
2680 "add", fixed_arith_modes[i].name, 3);
2681 arm_set_fixed_optab_libfunc (ssadd_optab, fixed_arith_modes[i].mode,
2682 "ssadd", fixed_arith_modes[i].name, 3);
2683 arm_set_fixed_optab_libfunc (usadd_optab, fixed_arith_modes[i].mode,
2684 "usadd", fixed_arith_modes[i].name, 3);
2685 arm_set_fixed_optab_libfunc (sub_optab, fixed_arith_modes[i].mode,
2686 "sub", fixed_arith_modes[i].name, 3);
2687 arm_set_fixed_optab_libfunc (sssub_optab, fixed_arith_modes[i].mode,
2688 "sssub", fixed_arith_modes[i].name, 3);
2689 arm_set_fixed_optab_libfunc (ussub_optab, fixed_arith_modes[i].mode,
2690 "ussub", fixed_arith_modes[i].name, 3);
2691 arm_set_fixed_optab_libfunc (smul_optab, fixed_arith_modes[i].mode,
2692 "mul", fixed_arith_modes[i].name, 3);
2693 arm_set_fixed_optab_libfunc (ssmul_optab, fixed_arith_modes[i].mode,
2694 "ssmul", fixed_arith_modes[i].name, 3);
2695 arm_set_fixed_optab_libfunc (usmul_optab, fixed_arith_modes[i].mode,
2696 "usmul", fixed_arith_modes[i].name, 3);
2697 arm_set_fixed_optab_libfunc (sdiv_optab, fixed_arith_modes[i].mode,
2698 "div", fixed_arith_modes[i].name, 3);
2699 arm_set_fixed_optab_libfunc (udiv_optab, fixed_arith_modes[i].mode,
2700 "udiv", fixed_arith_modes[i].name, 3);
2701 arm_set_fixed_optab_libfunc (ssdiv_optab, fixed_arith_modes[i].mode,
2702 "ssdiv", fixed_arith_modes[i].name, 3);
2703 arm_set_fixed_optab_libfunc (usdiv_optab, fixed_arith_modes[i].mode,
2704 "usdiv", fixed_arith_modes[i].name, 3);
2705 arm_set_fixed_optab_libfunc (neg_optab, fixed_arith_modes[i].mode,
2706 "neg", fixed_arith_modes[i].name, 2);
2707 arm_set_fixed_optab_libfunc (ssneg_optab, fixed_arith_modes[i].mode,
2708 "ssneg", fixed_arith_modes[i].name, 2);
2709 arm_set_fixed_optab_libfunc (usneg_optab, fixed_arith_modes[i].mode,
2710 "usneg", fixed_arith_modes[i].name, 2);
2711 arm_set_fixed_optab_libfunc (ashl_optab, fixed_arith_modes[i].mode,
2712 "ashl", fixed_arith_modes[i].name, 3);
2713 arm_set_fixed_optab_libfunc (ashr_optab, fixed_arith_modes[i].mode,
2714 "ashr", fixed_arith_modes[i].name, 3);
2715 arm_set_fixed_optab_libfunc (lshr_optab, fixed_arith_modes[i].mode,
2716 "lshr", fixed_arith_modes[i].name, 3);
2717 arm_set_fixed_optab_libfunc (ssashl_optab, fixed_arith_modes[i].mode,
2718 "ssashl", fixed_arith_modes[i].name, 3);
2719 arm_set_fixed_optab_libfunc (usashl_optab, fixed_arith_modes[i].mode,
2720 "usashl", fixed_arith_modes[i].name, 3);
2721 arm_set_fixed_optab_libfunc (cmp_optab, fixed_arith_modes[i].mode,
2722 "cmp", fixed_arith_modes[i].name, 2);
2723 }
2724
2725 for (i = 0; i < ARRAY_SIZE (fixed_conv_modes); i++)
2726 for (j = 0; j < ARRAY_SIZE (fixed_conv_modes); j++)
2727 {
2728 if (i == j
2729 || (!ALL_FIXED_POINT_MODE_P (fixed_conv_modes[i].mode)
2730 && !ALL_FIXED_POINT_MODE_P (fixed_conv_modes[j].mode)))
2731 continue;
2732
2733 arm_set_fixed_conv_libfunc (fract_optab, fixed_conv_modes[i].mode,
2734 fixed_conv_modes[j].mode, "fract",
2735 fixed_conv_modes[i].name,
2736 fixed_conv_modes[j].name);
2737 arm_set_fixed_conv_libfunc (satfract_optab,
2738 fixed_conv_modes[i].mode,
2739 fixed_conv_modes[j].mode, "satfract",
2740 fixed_conv_modes[i].name,
2741 fixed_conv_modes[j].name);
2742 arm_set_fixed_conv_libfunc (fractuns_optab,
2743 fixed_conv_modes[i].mode,
2744 fixed_conv_modes[j].mode, "fractuns",
2745 fixed_conv_modes[i].name,
2746 fixed_conv_modes[j].name);
2747 arm_set_fixed_conv_libfunc (satfractuns_optab,
2748 fixed_conv_modes[i].mode,
2749 fixed_conv_modes[j].mode, "satfractuns",
2750 fixed_conv_modes[i].name,
2751 fixed_conv_modes[j].name);
2752 }
2753 }
2754
2755 if (TARGET_AAPCS_BASED)
2756 synchronize_libfunc = init_one_libfunc ("__sync_synchronize");
2757
2758 speculation_barrier_libfunc = init_one_libfunc ("__speculation_barrier");
2759 }
2760
2761 /* On AAPCS systems, this is the "struct __va_list". */
2762 static GTY(()) tree va_list_type;
2763
2764 /* Return the type to use as __builtin_va_list. */
2765 static tree
2766 arm_build_builtin_va_list (void)
2767 {
2768 tree va_list_name;
2769 tree ap_field;
2770
2771 if (!TARGET_AAPCS_BASED)
2772 return std_build_builtin_va_list ();
2773
2774 /* AAPCS \S 7.1.4 requires that va_list be a typedef for a type
2775 defined as:
2776
2777 struct __va_list
2778 {
2779 void *__ap;
2780 };
2781
2782 The C Library ABI further reinforces this definition in \S
2783 4.1.
2784
2785 We must follow this definition exactly. The structure tag
2786 name is visible in C++ mangled names, and thus forms a part
2787 of the ABI. The field name may be used by people who
2788 #include <stdarg.h>. */
2789 /* Create the type. */
2790 va_list_type = lang_hooks.types.make_type (RECORD_TYPE);
2791 /* Give it the required name. */
2792 va_list_name = build_decl (BUILTINS_LOCATION,
2793 TYPE_DECL,
2794 get_identifier ("__va_list"),
2795 va_list_type);
2796 DECL_ARTIFICIAL (va_list_name) = 1;
2797 TYPE_NAME (va_list_type) = va_list_name;
2798 TYPE_STUB_DECL (va_list_type) = va_list_name;
2799 /* Create the __ap field. */
2800 ap_field = build_decl (BUILTINS_LOCATION,
2801 FIELD_DECL,
2802 get_identifier ("__ap"),
2803 ptr_type_node);
2804 DECL_ARTIFICIAL (ap_field) = 1;
2805 DECL_FIELD_CONTEXT (ap_field) = va_list_type;
2806 TYPE_FIELDS (va_list_type) = ap_field;
2807 /* Compute its layout. */
2808 layout_type (va_list_type);
2809
2810 return va_list_type;
2811 }
2812
2813 /* Return an expression of type "void *" pointing to the next
2814 available argument in a variable-argument list. VALIST is the
2815 user-level va_list object, of type __builtin_va_list. */
2816 static tree
2817 arm_extract_valist_ptr (tree valist)
2818 {
2819 if (TREE_TYPE (valist) == error_mark_node)
2820 return error_mark_node;
2821
2822 /* On an AAPCS target, the pointer is stored within "struct
2823 va_list". */
2824 if (TARGET_AAPCS_BASED)
2825 {
2826 tree ap_field = TYPE_FIELDS (TREE_TYPE (valist));
2827 valist = build3 (COMPONENT_REF, TREE_TYPE (ap_field),
2828 valist, ap_field, NULL_TREE);
2829 }
2830
2831 return valist;
2832 }
2833
2834 /* Implement TARGET_EXPAND_BUILTIN_VA_START. */
2835 static void
2836 arm_expand_builtin_va_start (tree valist, rtx nextarg)
2837 {
2838 valist = arm_extract_valist_ptr (valist);
2839 std_expand_builtin_va_start (valist, nextarg);
2840 }
2841
2842 /* Implement TARGET_GIMPLIFY_VA_ARG_EXPR. */
2843 static tree
2844 arm_gimplify_va_arg_expr (tree valist, tree type, gimple_seq *pre_p,
2845 gimple_seq *post_p)
2846 {
2847 valist = arm_extract_valist_ptr (valist);
2848 return std_gimplify_va_arg_expr (valist, type, pre_p, post_p);
2849 }
2850
2851 /* Check any incompatible options that the user has specified. */
2852 static void
2853 arm_option_check_internal (struct gcc_options *opts)
2854 {
2855 int flags = opts->x_target_flags;
2856
2857 /* iWMMXt and NEON are incompatible. */
2858 if (TARGET_IWMMXT
2859 && bitmap_bit_p (arm_active_target.isa, isa_bit_neon))
2860 error ("iWMMXt and NEON are incompatible");
2861
2862 /* Make sure that the processor choice does not conflict with any of the
2863 other command line choices. */
2864 if (TARGET_ARM_P (flags)
2865 && !bitmap_bit_p (arm_active_target.isa, isa_bit_notm))
2866 error ("target CPU does not support ARM mode");
2867
2868 /* TARGET_BACKTRACE cannot be used here as crtl->is_leaf is not set yet. */
2869 if ((TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME) && TARGET_ARM_P (flags))
2870 warning (0, "enabling backtrace support is only meaningful when compiling for the Thumb");
2871
2872 if (TARGET_ARM_P (flags) && TARGET_CALLEE_INTERWORKING)
2873 warning (0, "enabling callee interworking support is only meaningful when compiling for the Thumb");
2874
2875 /* If this target is normally configured to use APCS frames, warn if they
2876 are turned off and debugging is turned on. */
2877 if (TARGET_ARM_P (flags)
2878 && write_symbols != NO_DEBUG
2879 && !TARGET_APCS_FRAME
2880 && (TARGET_DEFAULT & MASK_APCS_FRAME))
2881 warning (0, "-g with -mno-apcs-frame may not give sensible debugging");
2882
2883 /* iWMMXt unsupported under Thumb mode. */
2884 if (TARGET_THUMB_P (flags) && TARGET_IWMMXT)
2885 error ("iWMMXt unsupported under Thumb mode");
2886
2887 if (TARGET_HARD_TP && TARGET_THUMB1_P (flags))
2888 error ("cannot use -mtp=cp15 with 16-bit Thumb");
2889
2890 if (TARGET_THUMB_P (flags) && TARGET_VXWORKS_RTP && flag_pic)
2891 {
2892 error ("RTP PIC is incompatible with Thumb");
2893 flag_pic = 0;
2894 }
2895
2896 if (target_pure_code || target_slow_flash_data)
2897 {
2898 const char *flag = (target_pure_code ? "-mpure-code" :
2899 "-mslow-flash-data");
2900
2901 /* We only support -mpure-code and -mslow-flash-data on M-profile targets
2902 with MOVT. */
2903 if (!TARGET_HAVE_MOVT || arm_arch_notm || flag_pic || TARGET_NEON)
2904 error ("%s only supports non-pic code on M-profile targets with the "
2905 "MOVT instruction", flag);
2906
2907 /* Cannot load addresses: -mslow-flash-data forbids literal pool and
2908 -mword-relocations forbids relocation of MOVT/MOVW. */
2909 if (target_word_relocations)
2910 error ("%s incompatible with -mword-relocations", flag);
2911 }
2912 }
2913
2914 /* Recompute the global settings depending on target attribute options. */
2915
2916 static void
2917 arm_option_params_internal (void)
2918 {
2919 /* If we are not using the default (ARM mode) section anchor offset
2920 ranges, then set the correct ranges now. */
2921 if (TARGET_THUMB1)
2922 {
2923 /* Thumb-1 LDR instructions cannot have negative offsets.
2924 Permissible positive offset ranges are 5-bit (for byte loads),
2925 6-bit (for halfword loads), or 7-bit (for word loads).
2926 Empirical results suggest a 7-bit anchor range gives the best
2927 overall code size. */
2928 targetm.min_anchor_offset = 0;
2929 targetm.max_anchor_offset = 127;
2930 }
2931 else if (TARGET_THUMB2)
2932 {
2933 /* The minimum is set such that the total size of the block
2934 for a particular anchor is 248 + 1 + 4095 bytes, which is
2935 divisible by eight, ensuring natural spacing of anchors. */
2936 targetm.min_anchor_offset = -248;
2937 targetm.max_anchor_offset = 4095;
2938 }
2939 else
2940 {
2941 targetm.min_anchor_offset = TARGET_MIN_ANCHOR_OFFSET;
2942 targetm.max_anchor_offset = TARGET_MAX_ANCHOR_OFFSET;
2943 }
2944
2945 /* Increase the number of conditional instructions with -Os. */
2946 max_insns_skipped = optimize_size ? 4 : current_tune->max_insns_skipped;
2947
2948 /* For THUMB2, we limit the conditional sequence to one IT block. */
2949 if (TARGET_THUMB2)
2950 max_insns_skipped = MIN (max_insns_skipped, MAX_INSN_PER_IT_BLOCK);
2951 }
2952
2953 /* True if -mflip-thumb should next add an attribute for the default
2954 mode, false if it should next add an attribute for the opposite mode. */
2955 static GTY(()) bool thumb_flipper;
2956
2957 /* Options after initial target override. */
2958 static GTY(()) tree init_optimize;
2959
2960 static void
2961 arm_override_options_after_change_1 (struct gcc_options *opts)
2962 {
2963 /* -falign-functions without argument: supply one. */
2964 if (opts->x_flag_align_functions && !opts->x_str_align_functions)
2965 opts->x_str_align_functions = TARGET_THUMB_P (opts->x_target_flags)
2966 && opts->x_optimize_size ? "2" : "4";
2967 }
2968
2969 /* Implement targetm.override_options_after_change. */
2970
2971 static void
2972 arm_override_options_after_change (void)
2973 {
2974 arm_configure_build_target (&arm_active_target,
2975 TREE_TARGET_OPTION (target_option_default_node),
2976 &global_options_set, false);
2977
2978 arm_override_options_after_change_1 (&global_options);
2979 }
2980
2981 /* Implement TARGET_OPTION_SAVE. */
2982 static void
2983 arm_option_save (struct cl_target_option *ptr, struct gcc_options *opts)
2984 {
2985 ptr->x_arm_arch_string = opts->x_arm_arch_string;
2986 ptr->x_arm_cpu_string = opts->x_arm_cpu_string;
2987 ptr->x_arm_tune_string = opts->x_arm_tune_string;
2988 }
2989
2990 /* Implement TARGET_OPTION_RESTORE. */
2991 static void
2992 arm_option_restore (struct gcc_options *opts, struct cl_target_option *ptr)
2993 {
2994 opts->x_arm_arch_string = ptr->x_arm_arch_string;
2995 opts->x_arm_cpu_string = ptr->x_arm_cpu_string;
2996 opts->x_arm_tune_string = ptr->x_arm_tune_string;
2997 arm_configure_build_target (&arm_active_target, ptr, &global_options_set,
2998 false);
2999 }
3000
3001 /* Reset options between modes that the user has specified. */
3002 static void
3003 arm_option_override_internal (struct gcc_options *opts,
3004 struct gcc_options *opts_set)
3005 {
3006 arm_override_options_after_change_1 (opts);
3007
3008 if (TARGET_INTERWORK && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3009 {
3010 /* The default is to enable interworking, so this warning message would
3011 be confusing to users who have just compiled with
3012 eg, -march=armv4. */
3013 /* warning (0, "ignoring -minterwork because target CPU does not support THUMB"); */
3014 opts->x_target_flags &= ~MASK_INTERWORK;
3015 }
3016
3017 if (TARGET_THUMB_P (opts->x_target_flags)
3018 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3019 {
3020 warning (0, "target CPU does not support THUMB instructions");
3021 opts->x_target_flags &= ~MASK_THUMB;
3022 }
3023
3024 if (TARGET_APCS_FRAME && TARGET_THUMB_P (opts->x_target_flags))
3025 {
3026 /* warning (0, "ignoring -mapcs-frame because -mthumb was used"); */
3027 opts->x_target_flags &= ~MASK_APCS_FRAME;
3028 }
3029
3030 /* Callee super interworking implies thumb interworking. Adding
3031 this to the flags here simplifies the logic elsewhere. */
3032 if (TARGET_THUMB_P (opts->x_target_flags) && TARGET_CALLEE_INTERWORKING)
3033 opts->x_target_flags |= MASK_INTERWORK;
3034
3035 /* need to remember initial values so combinaisons of options like
3036 -mflip-thumb -mthumb -fno-schedule-insns work for any attribute. */
3037 cl_optimization *to = TREE_OPTIMIZATION (init_optimize);
3038
3039 if (! opts_set->x_arm_restrict_it)
3040 opts->x_arm_restrict_it = arm_arch8;
3041
3042 /* ARM execution state and M profile don't have [restrict] IT. */
3043 if (!TARGET_THUMB2_P (opts->x_target_flags) || !arm_arch_notm)
3044 opts->x_arm_restrict_it = 0;
3045
3046 /* Enable -munaligned-access by default for
3047 - all ARMv6 architecture-based processors when compiling for a 32-bit ISA
3048 i.e. Thumb2 and ARM state only.
3049 - ARMv7-A, ARMv7-R, and ARMv7-M architecture-based processors.
3050 - ARMv8 architecture-base processors.
3051
3052 Disable -munaligned-access by default for
3053 - all pre-ARMv6 architecture-based processors
3054 - ARMv6-M architecture-based processors
3055 - ARMv8-M Baseline processors. */
3056
3057 if (! opts_set->x_unaligned_access)
3058 {
3059 opts->x_unaligned_access = (TARGET_32BIT_P (opts->x_target_flags)
3060 && arm_arch6 && (arm_arch_notm || arm_arch7));
3061 }
3062 else if (opts->x_unaligned_access == 1
3063 && !(arm_arch6 && (arm_arch_notm || arm_arch7)))
3064 {
3065 warning (0, "target CPU does not support unaligned accesses");
3066 opts->x_unaligned_access = 0;
3067 }
3068
3069 /* Don't warn since it's on by default in -O2. */
3070 if (TARGET_THUMB1_P (opts->x_target_flags))
3071 opts->x_flag_schedule_insns = 0;
3072 else
3073 opts->x_flag_schedule_insns = to->x_flag_schedule_insns;
3074
3075 /* Disable shrink-wrap when optimizing function for size, since it tends to
3076 generate additional returns. */
3077 if (optimize_function_for_size_p (cfun)
3078 && TARGET_THUMB2_P (opts->x_target_flags))
3079 opts->x_flag_shrink_wrap = false;
3080 else
3081 opts->x_flag_shrink_wrap = to->x_flag_shrink_wrap;
3082
3083 /* In Thumb1 mode, we emit the epilogue in RTL, but the last insn
3084 - epilogue_insns - does not accurately model the corresponding insns
3085 emitted in the asm file. In particular, see the comment in thumb_exit
3086 'Find out how many of the (return) argument registers we can corrupt'.
3087 As a consequence, the epilogue may clobber registers without fipa-ra
3088 finding out about it. Therefore, disable fipa-ra in Thumb1 mode.
3089 TODO: Accurately model clobbers for epilogue_insns and reenable
3090 fipa-ra. */
3091 if (TARGET_THUMB1_P (opts->x_target_flags))
3092 opts->x_flag_ipa_ra = 0;
3093 else
3094 opts->x_flag_ipa_ra = to->x_flag_ipa_ra;
3095
3096 /* Thumb2 inline assembly code should always use unified syntax.
3097 This will apply to ARM and Thumb1 eventually. */
3098 opts->x_inline_asm_unified = TARGET_THUMB2_P (opts->x_target_flags);
3099
3100 #ifdef SUBTARGET_OVERRIDE_INTERNAL_OPTIONS
3101 SUBTARGET_OVERRIDE_INTERNAL_OPTIONS;
3102 #endif
3103 }
3104
3105 static sbitmap isa_all_fpubits;
3106 static sbitmap isa_quirkbits;
3107
3108 /* Configure a build target TARGET from the user-specified options OPTS and
3109 OPTS_SET. If WARN_COMPATIBLE, emit a diagnostic if both the CPU and
3110 architecture have been specified, but the two are not identical. */
3111 void
3112 arm_configure_build_target (struct arm_build_target *target,
3113 struct cl_target_option *opts,
3114 struct gcc_options *opts_set,
3115 bool warn_compatible)
3116 {
3117 const cpu_option *arm_selected_tune = NULL;
3118 const arch_option *arm_selected_arch = NULL;
3119 const cpu_option *arm_selected_cpu = NULL;
3120 const arm_fpu_desc *arm_selected_fpu = NULL;
3121 const char *tune_opts = NULL;
3122 const char *arch_opts = NULL;
3123 const char *cpu_opts = NULL;
3124
3125 bitmap_clear (target->isa);
3126 target->core_name = NULL;
3127 target->arch_name = NULL;
3128
3129 if (opts_set->x_arm_arch_string)
3130 {
3131 arm_selected_arch = arm_parse_arch_option_name (all_architectures,
3132 "-march",
3133 opts->x_arm_arch_string);
3134 arch_opts = strchr (opts->x_arm_arch_string, '+');
3135 }
3136
3137 if (opts_set->x_arm_cpu_string)
3138 {
3139 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "-mcpu",
3140 opts->x_arm_cpu_string);
3141 cpu_opts = strchr (opts->x_arm_cpu_string, '+');
3142 arm_selected_tune = arm_selected_cpu;
3143 /* If taking the tuning from -mcpu, we don't need to rescan the
3144 options for tuning. */
3145 }
3146
3147 if (opts_set->x_arm_tune_string)
3148 {
3149 arm_selected_tune = arm_parse_cpu_option_name (all_cores, "-mtune",
3150 opts->x_arm_tune_string);
3151 tune_opts = strchr (opts->x_arm_tune_string, '+');
3152 }
3153
3154 if (arm_selected_arch)
3155 {
3156 arm_initialize_isa (target->isa, arm_selected_arch->common.isa_bits);
3157 arm_parse_option_features (target->isa, &arm_selected_arch->common,
3158 arch_opts);
3159
3160 if (arm_selected_cpu)
3161 {
3162 auto_sbitmap cpu_isa (isa_num_bits);
3163 auto_sbitmap isa_delta (isa_num_bits);
3164
3165 arm_initialize_isa (cpu_isa, arm_selected_cpu->common.isa_bits);
3166 arm_parse_option_features (cpu_isa, &arm_selected_cpu->common,
3167 cpu_opts);
3168 bitmap_xor (isa_delta, cpu_isa, target->isa);
3169 /* Ignore any bits that are quirk bits. */
3170 bitmap_and_compl (isa_delta, isa_delta, isa_quirkbits);
3171 /* Ignore (for now) any bits that might be set by -mfpu. */
3172 bitmap_and_compl (isa_delta, isa_delta, isa_all_fpubits);
3173
3174 if (!bitmap_empty_p (isa_delta))
3175 {
3176 if (warn_compatible)
3177 warning (0, "switch -mcpu=%s conflicts with -march=%s switch",
3178 arm_selected_cpu->common.name,
3179 arm_selected_arch->common.name);
3180 /* -march wins for code generation.
3181 -mcpu wins for default tuning. */
3182 if (!arm_selected_tune)
3183 arm_selected_tune = arm_selected_cpu;
3184
3185 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3186 target->arch_name = arm_selected_arch->common.name;
3187 }
3188 else
3189 {
3190 /* Architecture and CPU are essentially the same.
3191 Prefer the CPU setting. */
3192 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3193 target->core_name = arm_selected_cpu->common.name;
3194 /* Copy the CPU's capabilities, so that we inherit the
3195 appropriate extensions and quirks. */
3196 bitmap_copy (target->isa, cpu_isa);
3197 }
3198 }
3199 else
3200 {
3201 /* Pick a CPU based on the architecture. */
3202 arm_selected_cpu = all_cores + arm_selected_arch->tune_id;
3203 target->arch_name = arm_selected_arch->common.name;
3204 /* Note: target->core_name is left unset in this path. */
3205 }
3206 }
3207 else if (arm_selected_cpu)
3208 {
3209 target->core_name = arm_selected_cpu->common.name;
3210 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3211 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3212 cpu_opts);
3213 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3214 }
3215 /* If the user did not specify a processor or architecture, choose
3216 one for them. */
3217 else
3218 {
3219 const cpu_option *sel;
3220 auto_sbitmap sought_isa (isa_num_bits);
3221 bitmap_clear (sought_isa);
3222 auto_sbitmap default_isa (isa_num_bits);
3223
3224 arm_selected_cpu = arm_parse_cpu_option_name (all_cores, "default CPU",
3225 TARGET_CPU_DEFAULT);
3226 cpu_opts = strchr (TARGET_CPU_DEFAULT, '+');
3227 gcc_assert (arm_selected_cpu->common.name);
3228
3229 /* RWE: All of the selection logic below (to the end of this
3230 'if' clause) looks somewhat suspect. It appears to be mostly
3231 there to support forcing thumb support when the default CPU
3232 does not have thumb (somewhat dubious in terms of what the
3233 user might be expecting). I think it should be removed once
3234 support for the pre-thumb era cores is removed. */
3235 sel = arm_selected_cpu;
3236 arm_initialize_isa (default_isa, sel->common.isa_bits);
3237 arm_parse_option_features (default_isa, &arm_selected_cpu->common,
3238 cpu_opts);
3239
3240 /* Now check to see if the user has specified any command line
3241 switches that require certain abilities from the cpu. */
3242
3243 if (TARGET_INTERWORK || TARGET_THUMB)
3244 bitmap_set_bit (sought_isa, isa_bit_thumb);
3245
3246 /* If there are such requirements and the default CPU does not
3247 satisfy them, we need to run over the complete list of
3248 cores looking for one that is satisfactory. */
3249 if (!bitmap_empty_p (sought_isa)
3250 && !bitmap_subset_p (sought_isa, default_isa))
3251 {
3252 auto_sbitmap candidate_isa (isa_num_bits);
3253 /* We're only interested in a CPU with at least the
3254 capabilities of the default CPU and the required
3255 additional features. */
3256 bitmap_ior (default_isa, default_isa, sought_isa);
3257
3258 /* Try to locate a CPU type that supports all of the abilities
3259 of the default CPU, plus the extra abilities requested by
3260 the user. */
3261 for (sel = all_cores; sel->common.name != NULL; sel++)
3262 {
3263 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3264 /* An exact match? */
3265 if (bitmap_equal_p (default_isa, candidate_isa))
3266 break;
3267 }
3268
3269 if (sel->common.name == NULL)
3270 {
3271 unsigned current_bit_count = isa_num_bits;
3272 const cpu_option *best_fit = NULL;
3273
3274 /* Ideally we would like to issue an error message here
3275 saying that it was not possible to find a CPU compatible
3276 with the default CPU, but which also supports the command
3277 line options specified by the programmer, and so they
3278 ought to use the -mcpu=<name> command line option to
3279 override the default CPU type.
3280
3281 If we cannot find a CPU that has exactly the
3282 characteristics of the default CPU and the given
3283 command line options we scan the array again looking
3284 for a best match. The best match must have at least
3285 the capabilities of the perfect match. */
3286 for (sel = all_cores; sel->common.name != NULL; sel++)
3287 {
3288 arm_initialize_isa (candidate_isa, sel->common.isa_bits);
3289
3290 if (bitmap_subset_p (default_isa, candidate_isa))
3291 {
3292 unsigned count;
3293
3294 bitmap_and_compl (candidate_isa, candidate_isa,
3295 default_isa);
3296 count = bitmap_popcount (candidate_isa);
3297
3298 if (count < current_bit_count)
3299 {
3300 best_fit = sel;
3301 current_bit_count = count;
3302 }
3303 }
3304
3305 gcc_assert (best_fit);
3306 sel = best_fit;
3307 }
3308 }
3309 arm_selected_cpu = sel;
3310 }
3311
3312 /* Now we know the CPU, we can finally initialize the target
3313 structure. */
3314 target->core_name = arm_selected_cpu->common.name;
3315 arm_initialize_isa (target->isa, arm_selected_cpu->common.isa_bits);
3316 arm_parse_option_features (target->isa, &arm_selected_cpu->common,
3317 cpu_opts);
3318 arm_selected_arch = all_architectures + arm_selected_cpu->arch;
3319 }
3320
3321 gcc_assert (arm_selected_cpu);
3322 gcc_assert (arm_selected_arch);
3323
3324 if (opts->x_arm_fpu_index != TARGET_FPU_auto)
3325 {
3326 arm_selected_fpu = &all_fpus[opts->x_arm_fpu_index];
3327 auto_sbitmap fpu_bits (isa_num_bits);
3328
3329 arm_initialize_isa (fpu_bits, arm_selected_fpu->isa_bits);
3330 bitmap_and_compl (target->isa, target->isa, isa_all_fpubits);
3331 bitmap_ior (target->isa, target->isa, fpu_bits);
3332 }
3333
3334 if (!arm_selected_tune)
3335 arm_selected_tune = arm_selected_cpu;
3336 else /* Validate the features passed to -mtune. */
3337 arm_parse_option_features (NULL, &arm_selected_tune->common, tune_opts);
3338
3339 const cpu_tune *tune_data = &all_tunes[arm_selected_tune - all_cores];
3340
3341 /* Finish initializing the target structure. */
3342 target->arch_pp_name = arm_selected_arch->arch;
3343 target->base_arch = arm_selected_arch->base_arch;
3344 target->profile = arm_selected_arch->profile;
3345
3346 target->tune_flags = tune_data->tune_flags;
3347 target->tune = tune_data->tune;
3348 target->tune_core = tune_data->scheduler;
3349 arm_option_reconfigure_globals ();
3350 }
3351
3352 /* Fix up any incompatible options that the user has specified. */
3353 static void
3354 arm_option_override (void)
3355 {
3356 static const enum isa_feature fpu_bitlist[]
3357 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
3358 static const enum isa_feature quirk_bitlist[] = { ISA_ALL_QUIRKS, isa_nobit};
3359 cl_target_option opts;
3360
3361 isa_quirkbits = sbitmap_alloc (isa_num_bits);
3362 arm_initialize_isa (isa_quirkbits, quirk_bitlist);
3363
3364 isa_all_fpubits = sbitmap_alloc (isa_num_bits);
3365 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
3366
3367 arm_active_target.isa = sbitmap_alloc (isa_num_bits);
3368
3369 if (!global_options_set.x_arm_fpu_index)
3370 {
3371 bool ok;
3372 int fpu_index;
3373
3374 ok = opt_enum_arg_to_value (OPT_mfpu_, FPUTYPE_AUTO, &fpu_index,
3375 CL_TARGET);
3376 gcc_assert (ok);
3377 arm_fpu_index = (enum fpu_type) fpu_index;
3378 }
3379
3380 cl_target_option_save (&opts, &global_options);
3381 arm_configure_build_target (&arm_active_target, &opts, &global_options_set,
3382 true);
3383
3384 #ifdef SUBTARGET_OVERRIDE_OPTIONS
3385 SUBTARGET_OVERRIDE_OPTIONS;
3386 #endif
3387
3388 /* Initialize boolean versions of the architectural flags, for use
3389 in the arm.md file and for enabling feature flags. */
3390 arm_option_reconfigure_globals ();
3391
3392 arm_tune = arm_active_target.tune_core;
3393 tune_flags = arm_active_target.tune_flags;
3394 current_tune = arm_active_target.tune;
3395
3396 /* TBD: Dwarf info for apcs frame is not handled yet. */
3397 if (TARGET_APCS_FRAME)
3398 flag_shrink_wrap = false;
3399
3400 if (TARGET_APCS_STACK && !TARGET_APCS_FRAME)
3401 {
3402 warning (0, "-mapcs-stack-check incompatible with -mno-apcs-frame");
3403 target_flags |= MASK_APCS_FRAME;
3404 }
3405
3406 if (TARGET_POKE_FUNCTION_NAME)
3407 target_flags |= MASK_APCS_FRAME;
3408
3409 if (TARGET_APCS_REENT && flag_pic)
3410 error ("-fpic and -mapcs-reent are incompatible");
3411
3412 if (TARGET_APCS_REENT)
3413 warning (0, "APCS reentrant code not supported. Ignored");
3414
3415 /* Set up some tuning parameters. */
3416 arm_ld_sched = (tune_flags & TF_LDSCHED) != 0;
3417 arm_tune_strongarm = (tune_flags & TF_STRONG) != 0;
3418 arm_tune_wbuf = (tune_flags & TF_WBUF) != 0;
3419 arm_tune_xscale = (tune_flags & TF_XSCALE) != 0;
3420 arm_tune_cortex_a9 = (arm_tune == TARGET_CPU_cortexa9) != 0;
3421 arm_m_profile_small_mul = (tune_flags & TF_SMALLMUL) != 0;
3422
3423 /* For arm2/3 there is no need to do any scheduling if we are doing
3424 software floating-point. */
3425 if (TARGET_SOFT_FLOAT && (tune_flags & TF_NO_MODE32))
3426 flag_schedule_insns = flag_schedule_insns_after_reload = 0;
3427
3428 /* Override the default structure alignment for AAPCS ABI. */
3429 if (!global_options_set.x_arm_structure_size_boundary)
3430 {
3431 if (TARGET_AAPCS_BASED)
3432 arm_structure_size_boundary = 8;
3433 }
3434 else
3435 {
3436 warning (0, "option %<-mstructure-size-boundary%> is deprecated");
3437
3438 if (arm_structure_size_boundary != 8
3439 && arm_structure_size_boundary != 32
3440 && !(ARM_DOUBLEWORD_ALIGN && arm_structure_size_boundary == 64))
3441 {
3442 if (ARM_DOUBLEWORD_ALIGN)
3443 warning (0,
3444 "structure size boundary can only be set to 8, 32 or 64");
3445 else
3446 warning (0, "structure size boundary can only be set to 8 or 32");
3447 arm_structure_size_boundary
3448 = (TARGET_AAPCS_BASED ? 8 : DEFAULT_STRUCTURE_SIZE_BOUNDARY);
3449 }
3450 }
3451
3452 if (TARGET_VXWORKS_RTP)
3453 {
3454 if (!global_options_set.x_arm_pic_data_is_text_relative)
3455 arm_pic_data_is_text_relative = 0;
3456 }
3457 else if (flag_pic
3458 && !arm_pic_data_is_text_relative
3459 && !(global_options_set.x_target_flags & MASK_SINGLE_PIC_BASE))
3460 /* When text & data segments don't have a fixed displacement, the
3461 intended use is with a single, read only, pic base register.
3462 Unless the user explicitly requested not to do that, set
3463 it. */
3464 target_flags |= MASK_SINGLE_PIC_BASE;
3465
3466 /* If stack checking is disabled, we can use r10 as the PIC register,
3467 which keeps r9 available. The EABI specifies r9 as the PIC register. */
3468 if (flag_pic && TARGET_SINGLE_PIC_BASE)
3469 {
3470 if (TARGET_VXWORKS_RTP)
3471 warning (0, "RTP PIC is incompatible with -msingle-pic-base");
3472 arm_pic_register = (TARGET_APCS_STACK || TARGET_AAPCS_BASED) ? 9 : 10;
3473 }
3474
3475 if (flag_pic && TARGET_VXWORKS_RTP)
3476 arm_pic_register = 9;
3477
3478 if (arm_pic_register_string != NULL)
3479 {
3480 int pic_register = decode_reg_name (arm_pic_register_string);
3481
3482 if (!flag_pic)
3483 warning (0, "-mpic-register= is useless without -fpic");
3484
3485 /* Prevent the user from choosing an obviously stupid PIC register. */
3486 else if (pic_register < 0 || call_used_regs[pic_register]
3487 || pic_register == HARD_FRAME_POINTER_REGNUM
3488 || pic_register == STACK_POINTER_REGNUM
3489 || pic_register >= PC_REGNUM
3490 || (TARGET_VXWORKS_RTP
3491 && (unsigned int) pic_register != arm_pic_register))
3492 error ("unable to use '%s' for PIC register", arm_pic_register_string);
3493 else
3494 arm_pic_register = pic_register;
3495 }
3496
3497 if (flag_pic)
3498 target_word_relocations = 1;
3499
3500 /* Enable -mfix-cortex-m3-ldrd by default for Cortex-M3 cores. */
3501 if (fix_cm3_ldrd == 2)
3502 {
3503 if (bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_cm3_ldrd))
3504 fix_cm3_ldrd = 1;
3505 else
3506 fix_cm3_ldrd = 0;
3507 }
3508
3509 /* Hot/Cold partitioning is not currently supported, since we can't
3510 handle literal pool placement in that case. */
3511 if (flag_reorder_blocks_and_partition)
3512 {
3513 inform (input_location,
3514 "-freorder-blocks-and-partition not supported on this architecture");
3515 flag_reorder_blocks_and_partition = 0;
3516 flag_reorder_blocks = 1;
3517 }
3518
3519 if (flag_pic)
3520 /* Hoisting PIC address calculations more aggressively provides a small,
3521 but measurable, size reduction for PIC code. Therefore, we decrease
3522 the bar for unrestricted expression hoisting to the cost of PIC address
3523 calculation, which is 2 instructions. */
3524 maybe_set_param_value (PARAM_GCSE_UNRESTRICTED_COST, 2,
3525 global_options.x_param_values,
3526 global_options_set.x_param_values);
3527
3528 /* ARM EABI defaults to strict volatile bitfields. */
3529 if (TARGET_AAPCS_BASED && flag_strict_volatile_bitfields < 0
3530 && abi_version_at_least(2))
3531 flag_strict_volatile_bitfields = 1;
3532
3533 /* Enable sw prefetching at -O3 for CPUS that have prefetch, and we
3534 have deemed it beneficial (signified by setting
3535 prefetch.num_slots to 1 or more). */
3536 if (flag_prefetch_loop_arrays < 0
3537 && HAVE_prefetch
3538 && optimize >= 3
3539 && current_tune->prefetch.num_slots > 0)
3540 flag_prefetch_loop_arrays = 1;
3541
3542 /* Set up parameters to be used in prefetching algorithm. Do not
3543 override the defaults unless we are tuning for a core we have
3544 researched values for. */
3545 if (current_tune->prefetch.num_slots > 0)
3546 maybe_set_param_value (PARAM_SIMULTANEOUS_PREFETCHES,
3547 current_tune->prefetch.num_slots,
3548 global_options.x_param_values,
3549 global_options_set.x_param_values);
3550 if (current_tune->prefetch.l1_cache_line_size >= 0)
3551 maybe_set_param_value (PARAM_L1_CACHE_LINE_SIZE,
3552 current_tune->prefetch.l1_cache_line_size,
3553 global_options.x_param_values,
3554 global_options_set.x_param_values);
3555 if (current_tune->prefetch.l1_cache_size >= 0)
3556 maybe_set_param_value (PARAM_L1_CACHE_SIZE,
3557 current_tune->prefetch.l1_cache_size,
3558 global_options.x_param_values,
3559 global_options_set.x_param_values);
3560
3561 /* Use Neon to perform 64-bits operations rather than core
3562 registers. */
3563 prefer_neon_for_64bits = current_tune->prefer_neon_for_64bits;
3564 if (use_neon_for_64bits == 1)
3565 prefer_neon_for_64bits = true;
3566
3567 /* Use the alternative scheduling-pressure algorithm by default. */
3568 maybe_set_param_value (PARAM_SCHED_PRESSURE_ALGORITHM, SCHED_PRESSURE_MODEL,
3569 global_options.x_param_values,
3570 global_options_set.x_param_values);
3571
3572 /* Look through ready list and all of queue for instructions
3573 relevant for L2 auto-prefetcher. */
3574 int param_sched_autopref_queue_depth;
3575
3576 switch (current_tune->sched_autopref)
3577 {
3578 case tune_params::SCHED_AUTOPREF_OFF:
3579 param_sched_autopref_queue_depth = -1;
3580 break;
3581
3582 case tune_params::SCHED_AUTOPREF_RANK:
3583 param_sched_autopref_queue_depth = 0;
3584 break;
3585
3586 case tune_params::SCHED_AUTOPREF_FULL:
3587 param_sched_autopref_queue_depth = max_insn_queue_index + 1;
3588 break;
3589
3590 default:
3591 gcc_unreachable ();
3592 }
3593
3594 maybe_set_param_value (PARAM_SCHED_AUTOPREF_QUEUE_DEPTH,
3595 param_sched_autopref_queue_depth,
3596 global_options.x_param_values,
3597 global_options_set.x_param_values);
3598
3599 /* Currently, for slow flash data, we just disable literal pools. We also
3600 disable it for pure-code. */
3601 if (target_slow_flash_data || target_pure_code)
3602 arm_disable_literal_pool = true;
3603
3604 /* Disable scheduling fusion by default if it's not armv7 processor
3605 or doesn't prefer ldrd/strd. */
3606 if (flag_schedule_fusion == 2
3607 && (!arm_arch7 || !current_tune->prefer_ldrd_strd))
3608 flag_schedule_fusion = 0;
3609
3610 /* Need to remember initial options before they are overriden. */
3611 init_optimize = build_optimization_node (&global_options);
3612
3613 arm_options_perform_arch_sanity_checks ();
3614 arm_option_override_internal (&global_options, &global_options_set);
3615 arm_option_check_internal (&global_options);
3616 arm_option_params_internal ();
3617
3618 /* Create the default target_options structure. */
3619 target_option_default_node = target_option_current_node
3620 = build_target_option_node (&global_options);
3621
3622 /* Register global variables with the garbage collector. */
3623 arm_add_gc_roots ();
3624
3625 /* Init initial mode for testing. */
3626 thumb_flipper = TARGET_THUMB;
3627 }
3628
3629
3630 /* Reconfigure global status flags from the active_target.isa. */
3631 void
3632 arm_option_reconfigure_globals (void)
3633 {
3634 sprintf (arm_arch_name, "__ARM_ARCH_%s__", arm_active_target.arch_pp_name);
3635 arm_base_arch = arm_active_target.base_arch;
3636
3637 /* Initialize boolean versions of the architectural flags, for use
3638 in the arm.md file. */
3639 arm_arch4 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv4);
3640 arm_arch4t = arm_arch4 && bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3641 arm_arch5t = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5t);
3642 arm_arch5te = bitmap_bit_p (arm_active_target.isa, isa_bit_armv5te);
3643 arm_arch6 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6);
3644 arm_arch6k = bitmap_bit_p (arm_active_target.isa, isa_bit_armv6k);
3645 arm_arch_notm = bitmap_bit_p (arm_active_target.isa, isa_bit_notm);
3646 arm_arch6m = arm_arch6 && !arm_arch_notm;
3647 arm_arch7 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7);
3648 arm_arch7em = bitmap_bit_p (arm_active_target.isa, isa_bit_armv7em);
3649 arm_arch8 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8);
3650 arm_arch8_1 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_1);
3651 arm_arch8_2 = bitmap_bit_p (arm_active_target.isa, isa_bit_armv8_2);
3652 arm_arch_thumb1 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb);
3653 arm_arch_thumb2 = bitmap_bit_p (arm_active_target.isa, isa_bit_thumb2);
3654 arm_arch_xscale = bitmap_bit_p (arm_active_target.isa, isa_bit_xscale);
3655 arm_arch_iwmmxt = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt);
3656 arm_arch_iwmmxt2 = bitmap_bit_p (arm_active_target.isa, isa_bit_iwmmxt2);
3657 arm_arch_thumb_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_tdiv);
3658 arm_arch_arm_hwdiv = bitmap_bit_p (arm_active_target.isa, isa_bit_adiv);
3659 arm_arch_crc = bitmap_bit_p (arm_active_target.isa, isa_bit_crc32);
3660 arm_arch_cmse = bitmap_bit_p (arm_active_target.isa, isa_bit_cmse);
3661 arm_fp16_inst = bitmap_bit_p (arm_active_target.isa, isa_bit_fp16);
3662 arm_arch_lpae = bitmap_bit_p (arm_active_target.isa, isa_bit_lpae);
3663 if (arm_fp16_inst)
3664 {
3665 if (arm_fp16_format == ARM_FP16_FORMAT_ALTERNATIVE)
3666 error ("selected fp16 options are incompatible");
3667 arm_fp16_format = ARM_FP16_FORMAT_IEEE;
3668 }
3669
3670 /* And finally, set up some quirks. */
3671 arm_arch_no_volatile_ce
3672 = bitmap_bit_p (arm_active_target.isa, isa_bit_quirk_no_volatile_ce);
3673 arm_arch6kz = arm_arch6k && bitmap_bit_p (arm_active_target.isa,
3674 isa_bit_quirk_armv6kz);
3675
3676 /* Use the cp15 method if it is available. */
3677 if (target_thread_pointer == TP_AUTO)
3678 {
3679 if (arm_arch6k && !TARGET_THUMB1)
3680 target_thread_pointer = TP_CP15;
3681 else
3682 target_thread_pointer = TP_SOFT;
3683 }
3684 }
3685
3686 /* Perform some validation between the desired architecture and the rest of the
3687 options. */
3688 void
3689 arm_options_perform_arch_sanity_checks (void)
3690 {
3691 /* V5T code we generate is completely interworking capable, so we turn off
3692 TARGET_INTERWORK here to avoid many tests later on. */
3693
3694 /* XXX However, we must pass the right pre-processor defines to CPP
3695 or GLD can get confused. This is a hack. */
3696 if (TARGET_INTERWORK)
3697 arm_cpp_interwork = 1;
3698
3699 if (arm_arch5t)
3700 target_flags &= ~MASK_INTERWORK;
3701
3702 if (TARGET_IWMMXT && !ARM_DOUBLEWORD_ALIGN)
3703 error ("iwmmxt requires an AAPCS compatible ABI for proper operation");
3704
3705 if (TARGET_IWMMXT_ABI && !TARGET_IWMMXT)
3706 error ("iwmmxt abi requires an iwmmxt capable cpu");
3707
3708 /* BPABI targets use linker tricks to allow interworking on cores
3709 without thumb support. */
3710 if (TARGET_INTERWORK
3711 && !TARGET_BPABI
3712 && !bitmap_bit_p (arm_active_target.isa, isa_bit_thumb))
3713 {
3714 warning (0, "target CPU does not support interworking" );
3715 target_flags &= ~MASK_INTERWORK;
3716 }
3717
3718 /* If soft-float is specified then don't use FPU. */
3719 if (TARGET_SOFT_FLOAT)
3720 arm_fpu_attr = FPU_NONE;
3721 else
3722 arm_fpu_attr = FPU_VFP;
3723
3724 if (TARGET_AAPCS_BASED)
3725 {
3726 if (TARGET_CALLER_INTERWORKING)
3727 error ("AAPCS does not support -mcaller-super-interworking");
3728 else
3729 if (TARGET_CALLEE_INTERWORKING)
3730 error ("AAPCS does not support -mcallee-super-interworking");
3731 }
3732
3733 /* __fp16 support currently assumes the core has ldrh. */
3734 if (!arm_arch4 && arm_fp16_format != ARM_FP16_FORMAT_NONE)
3735 sorry ("__fp16 and no ldrh");
3736
3737 if (use_cmse && !arm_arch_cmse)
3738 error ("target CPU does not support ARMv8-M Security Extensions");
3739
3740 /* We don't clear D16-D31 VFP registers for cmse_nonsecure_call functions
3741 and ARMv8-M Baseline and Mainline do not allow such configuration. */
3742 if (use_cmse && LAST_VFP_REGNUM > LAST_LO_VFP_REGNUM)
3743 error ("ARMv8-M Security Extensions incompatible with selected FPU");
3744
3745
3746 if (TARGET_AAPCS_BASED)
3747 {
3748 if (arm_abi == ARM_ABI_IWMMXT)
3749 arm_pcs_default = ARM_PCS_AAPCS_IWMMXT;
3750 else if (TARGET_HARD_FLOAT_ABI)
3751 {
3752 arm_pcs_default = ARM_PCS_AAPCS_VFP;
3753 if (!bitmap_bit_p (arm_active_target.isa, isa_bit_vfpv2))
3754 error ("-mfloat-abi=hard: selected processor lacks an FPU");
3755 }
3756 else
3757 arm_pcs_default = ARM_PCS_AAPCS;
3758 }
3759 else
3760 {
3761 if (arm_float_abi == ARM_FLOAT_ABI_HARD)
3762 sorry ("-mfloat-abi=hard and VFP");
3763
3764 if (arm_abi == ARM_ABI_APCS)
3765 arm_pcs_default = ARM_PCS_APCS;
3766 else
3767 arm_pcs_default = ARM_PCS_ATPCS;
3768 }
3769 }
3770
3771 static void
3772 arm_add_gc_roots (void)
3773 {
3774 gcc_obstack_init(&minipool_obstack);
3775 minipool_startobj = (char *) obstack_alloc (&minipool_obstack, 0);
3776 }
3777 \f
3778 /* A table of known ARM exception types.
3779 For use with the interrupt function attribute. */
3780
3781 typedef struct
3782 {
3783 const char *const arg;
3784 const unsigned long return_value;
3785 }
3786 isr_attribute_arg;
3787
3788 static const isr_attribute_arg isr_attribute_args [] =
3789 {
3790 { "IRQ", ARM_FT_ISR },
3791 { "irq", ARM_FT_ISR },
3792 { "FIQ", ARM_FT_FIQ },
3793 { "fiq", ARM_FT_FIQ },
3794 { "ABORT", ARM_FT_ISR },
3795 { "abort", ARM_FT_ISR },
3796 { "ABORT", ARM_FT_ISR },
3797 { "abort", ARM_FT_ISR },
3798 { "UNDEF", ARM_FT_EXCEPTION },
3799 { "undef", ARM_FT_EXCEPTION },
3800 { "SWI", ARM_FT_EXCEPTION },
3801 { "swi", ARM_FT_EXCEPTION },
3802 { NULL, ARM_FT_NORMAL }
3803 };
3804
3805 /* Returns the (interrupt) function type of the current
3806 function, or ARM_FT_UNKNOWN if the type cannot be determined. */
3807
3808 static unsigned long
3809 arm_isr_value (tree argument)
3810 {
3811 const isr_attribute_arg * ptr;
3812 const char * arg;
3813
3814 if (!arm_arch_notm)
3815 return ARM_FT_NORMAL | ARM_FT_STACKALIGN;
3816
3817 /* No argument - default to IRQ. */
3818 if (argument == NULL_TREE)
3819 return ARM_FT_ISR;
3820
3821 /* Get the value of the argument. */
3822 if (TREE_VALUE (argument) == NULL_TREE
3823 || TREE_CODE (TREE_VALUE (argument)) != STRING_CST)
3824 return ARM_FT_UNKNOWN;
3825
3826 arg = TREE_STRING_POINTER (TREE_VALUE (argument));
3827
3828 /* Check it against the list of known arguments. */
3829 for (ptr = isr_attribute_args; ptr->arg != NULL; ptr++)
3830 if (streq (arg, ptr->arg))
3831 return ptr->return_value;
3832
3833 /* An unrecognized interrupt type. */
3834 return ARM_FT_UNKNOWN;
3835 }
3836
3837 /* Computes the type of the current function. */
3838
3839 static unsigned long
3840 arm_compute_func_type (void)
3841 {
3842 unsigned long type = ARM_FT_UNKNOWN;
3843 tree a;
3844 tree attr;
3845
3846 gcc_assert (TREE_CODE (current_function_decl) == FUNCTION_DECL);
3847
3848 /* Decide if the current function is volatile. Such functions
3849 never return, and many memory cycles can be saved by not storing
3850 register values that will never be needed again. This optimization
3851 was added to speed up context switching in a kernel application. */
3852 if (optimize > 0
3853 && (TREE_NOTHROW (current_function_decl)
3854 || !(flag_unwind_tables
3855 || (flag_exceptions
3856 && arm_except_unwind_info (&global_options) != UI_SJLJ)))
3857 && TREE_THIS_VOLATILE (current_function_decl))
3858 type |= ARM_FT_VOLATILE;
3859
3860 if (cfun->static_chain_decl != NULL)
3861 type |= ARM_FT_NESTED;
3862
3863 attr = DECL_ATTRIBUTES (current_function_decl);
3864
3865 a = lookup_attribute ("naked", attr);
3866 if (a != NULL_TREE)
3867 type |= ARM_FT_NAKED;
3868
3869 a = lookup_attribute ("isr", attr);
3870 if (a == NULL_TREE)
3871 a = lookup_attribute ("interrupt", attr);
3872
3873 if (a == NULL_TREE)
3874 type |= TARGET_INTERWORK ? ARM_FT_INTERWORKED : ARM_FT_NORMAL;
3875 else
3876 type |= arm_isr_value (TREE_VALUE (a));
3877
3878 if (lookup_attribute ("cmse_nonsecure_entry", attr))
3879 type |= ARM_FT_CMSE_ENTRY;
3880
3881 return type;
3882 }
3883
3884 /* Returns the type of the current function. */
3885
3886 unsigned long
3887 arm_current_func_type (void)
3888 {
3889 if (ARM_FUNC_TYPE (cfun->machine->func_type) == ARM_FT_UNKNOWN)
3890 cfun->machine->func_type = arm_compute_func_type ();
3891
3892 return cfun->machine->func_type;
3893 }
3894
3895 bool
3896 arm_allocate_stack_slots_for_args (void)
3897 {
3898 /* Naked functions should not allocate stack slots for arguments. */
3899 return !IS_NAKED (arm_current_func_type ());
3900 }
3901
3902 static bool
3903 arm_warn_func_return (tree decl)
3904 {
3905 /* Naked functions are implemented entirely in assembly, including the
3906 return sequence, so suppress warnings about this. */
3907 return lookup_attribute ("naked", DECL_ATTRIBUTES (decl)) == NULL_TREE;
3908 }
3909
3910 \f
3911 /* Output assembler code for a block containing the constant parts
3912 of a trampoline, leaving space for the variable parts.
3913
3914 On the ARM, (if r8 is the static chain regnum, and remembering that
3915 referencing pc adds an offset of 8) the trampoline looks like:
3916 ldr r8, [pc, #0]
3917 ldr pc, [pc]
3918 .word static chain value
3919 .word function's address
3920 XXX FIXME: When the trampoline returns, r8 will be clobbered. */
3921
3922 static void
3923 arm_asm_trampoline_template (FILE *f)
3924 {
3925 fprintf (f, "\t.syntax unified\n");
3926
3927 if (TARGET_ARM)
3928 {
3929 fprintf (f, "\t.arm\n");
3930 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", STATIC_CHAIN_REGNUM, PC_REGNUM);
3931 asm_fprintf (f, "\tldr\t%r, [%r, #0]\n", PC_REGNUM, PC_REGNUM);
3932 }
3933 else if (TARGET_THUMB2)
3934 {
3935 fprintf (f, "\t.thumb\n");
3936 /* The Thumb-2 trampoline is similar to the arm implementation.
3937 Unlike 16-bit Thumb, we enter the stub in thumb mode. */
3938 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n",
3939 STATIC_CHAIN_REGNUM, PC_REGNUM);
3940 asm_fprintf (f, "\tldr.w\t%r, [%r, #4]\n", PC_REGNUM, PC_REGNUM);
3941 }
3942 else
3943 {
3944 ASM_OUTPUT_ALIGN (f, 2);
3945 fprintf (f, "\t.code\t16\n");
3946 fprintf (f, ".Ltrampoline_start:\n");
3947 asm_fprintf (f, "\tpush\t{r0, r1}\n");
3948 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3949 asm_fprintf (f, "\tmov\t%r, r0\n", STATIC_CHAIN_REGNUM);
3950 asm_fprintf (f, "\tldr\tr0, [%r, #8]\n", PC_REGNUM);
3951 asm_fprintf (f, "\tstr\tr0, [%r, #4]\n", SP_REGNUM);
3952 asm_fprintf (f, "\tpop\t{r0, %r}\n", PC_REGNUM);
3953 }
3954 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3955 assemble_aligned_integer (UNITS_PER_WORD, const0_rtx);
3956 }
3957
3958 /* Emit RTL insns to initialize the variable parts of a trampoline. */
3959
3960 static void
3961 arm_trampoline_init (rtx m_tramp, tree fndecl, rtx chain_value)
3962 {
3963 rtx fnaddr, mem, a_tramp;
3964
3965 emit_block_move (m_tramp, assemble_trampoline_template (),
3966 GEN_INT (TRAMPOLINE_SIZE), BLOCK_OP_NORMAL);
3967
3968 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 8 : 12);
3969 emit_move_insn (mem, chain_value);
3970
3971 mem = adjust_address (m_tramp, SImode, TARGET_32BIT ? 12 : 16);
3972 fnaddr = XEXP (DECL_RTL (fndecl), 0);
3973 emit_move_insn (mem, fnaddr);
3974
3975 a_tramp = XEXP (m_tramp, 0);
3976 emit_library_call (gen_rtx_SYMBOL_REF (Pmode, "__clear_cache"),
3977 LCT_NORMAL, VOIDmode, a_tramp, Pmode,
3978 plus_constant (Pmode, a_tramp, TRAMPOLINE_SIZE), Pmode);
3979 }
3980
3981 /* Thumb trampolines should be entered in thumb mode, so set
3982 the bottom bit of the address. */
3983
3984 static rtx
3985 arm_trampoline_adjust_address (rtx addr)
3986 {
3987 if (TARGET_THUMB)
3988 addr = expand_simple_binop (Pmode, IOR, addr, const1_rtx,
3989 NULL, 0, OPTAB_LIB_WIDEN);
3990 return addr;
3991 }
3992 \f
3993 /* Return 1 if it is possible to return using a single instruction.
3994 If SIBLING is non-null, this is a test for a return before a sibling
3995 call. SIBLING is the call insn, so we can examine its register usage. */
3996
3997 int
3998 use_return_insn (int iscond, rtx sibling)
3999 {
4000 int regno;
4001 unsigned int func_type;
4002 unsigned long saved_int_regs;
4003 unsigned HOST_WIDE_INT stack_adjust;
4004 arm_stack_offsets *offsets;
4005
4006 /* Never use a return instruction before reload has run. */
4007 if (!reload_completed)
4008 return 0;
4009
4010 func_type = arm_current_func_type ();
4011
4012 /* Naked, volatile and stack alignment functions need special
4013 consideration. */
4014 if (func_type & (ARM_FT_VOLATILE | ARM_FT_NAKED | ARM_FT_STACKALIGN))
4015 return 0;
4016
4017 /* So do interrupt functions that use the frame pointer and Thumb
4018 interrupt functions. */
4019 if (IS_INTERRUPT (func_type) && (frame_pointer_needed || TARGET_THUMB))
4020 return 0;
4021
4022 if (TARGET_LDRD && current_tune->prefer_ldrd_strd
4023 && !optimize_function_for_size_p (cfun))
4024 return 0;
4025
4026 offsets = arm_get_frame_offsets ();
4027 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
4028
4029 /* As do variadic functions. */
4030 if (crtl->args.pretend_args_size
4031 || cfun->machine->uses_anonymous_args
4032 /* Or if the function calls __builtin_eh_return () */
4033 || crtl->calls_eh_return
4034 /* Or if the function calls alloca */
4035 || cfun->calls_alloca
4036 /* Or if there is a stack adjustment. However, if the stack pointer
4037 is saved on the stack, we can use a pre-incrementing stack load. */
4038 || !(stack_adjust == 0 || (TARGET_APCS_FRAME && frame_pointer_needed
4039 && stack_adjust == 4))
4040 /* Or if the static chain register was saved above the frame, under the
4041 assumption that the stack pointer isn't saved on the stack. */
4042 || (!(TARGET_APCS_FRAME && frame_pointer_needed)
4043 && arm_compute_static_chain_stack_bytes() != 0))
4044 return 0;
4045
4046 saved_int_regs = offsets->saved_regs_mask;
4047
4048 /* Unfortunately, the insn
4049
4050 ldmib sp, {..., sp, ...}
4051
4052 triggers a bug on most SA-110 based devices, such that the stack
4053 pointer won't be correctly restored if the instruction takes a
4054 page fault. We work around this problem by popping r3 along with
4055 the other registers, since that is never slower than executing
4056 another instruction.
4057
4058 We test for !arm_arch5t here, because code for any architecture
4059 less than this could potentially be run on one of the buggy
4060 chips. */
4061 if (stack_adjust == 4 && !arm_arch5t && TARGET_ARM)
4062 {
4063 /* Validate that r3 is a call-clobbered register (always true in
4064 the default abi) ... */
4065 if (!call_used_regs[3])
4066 return 0;
4067
4068 /* ... that it isn't being used for a return value ... */
4069 if (arm_size_return_regs () >= (4 * UNITS_PER_WORD))
4070 return 0;
4071
4072 /* ... or for a tail-call argument ... */
4073 if (sibling)
4074 {
4075 gcc_assert (CALL_P (sibling));
4076
4077 if (find_regno_fusage (sibling, USE, 3))
4078 return 0;
4079 }
4080
4081 /* ... and that there are no call-saved registers in r0-r2
4082 (always true in the default ABI). */
4083 if (saved_int_regs & 0x7)
4084 return 0;
4085 }
4086
4087 /* Can't be done if interworking with Thumb, and any registers have been
4088 stacked. */
4089 if (TARGET_INTERWORK && saved_int_regs != 0 && !IS_INTERRUPT(func_type))
4090 return 0;
4091
4092 /* On StrongARM, conditional returns are expensive if they aren't
4093 taken and multiple registers have been stacked. */
4094 if (iscond && arm_tune_strongarm)
4095 {
4096 /* Conditional return when just the LR is stored is a simple
4097 conditional-load instruction, that's not expensive. */
4098 if (saved_int_regs != 0 && saved_int_regs != (1 << LR_REGNUM))
4099 return 0;
4100
4101 if (flag_pic
4102 && arm_pic_register != INVALID_REGNUM
4103 && df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM))
4104 return 0;
4105 }
4106
4107 /* ARMv8-M nonsecure entry function need to use bxns to return and thus need
4108 several instructions if anything needs to be popped. */
4109 if (saved_int_regs && IS_CMSE_ENTRY (func_type))
4110 return 0;
4111
4112 /* If there are saved registers but the LR isn't saved, then we need
4113 two instructions for the return. */
4114 if (saved_int_regs && !(saved_int_regs & (1 << LR_REGNUM)))
4115 return 0;
4116
4117 /* Can't be done if any of the VFP regs are pushed,
4118 since this also requires an insn. */
4119 if (TARGET_HARD_FLOAT)
4120 for (regno = FIRST_VFP_REGNUM; regno <= LAST_VFP_REGNUM; regno++)
4121 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
4122 return 0;
4123
4124 if (TARGET_REALLY_IWMMXT)
4125 for (regno = FIRST_IWMMXT_REGNUM; regno <= LAST_IWMMXT_REGNUM; regno++)
4126 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
4127 return 0;
4128
4129 return 1;
4130 }
4131
4132 /* Return TRUE if we should try to use a simple_return insn, i.e. perform
4133 shrink-wrapping if possible. This is the case if we need to emit a
4134 prologue, which we can test by looking at the offsets. */
4135 bool
4136 use_simple_return_p (void)
4137 {
4138 arm_stack_offsets *offsets;
4139
4140 /* Note this function can be called before or after reload. */
4141 if (!reload_completed)
4142 arm_compute_frame_layout ();
4143
4144 offsets = arm_get_frame_offsets ();
4145 return offsets->outgoing_args != 0;
4146 }
4147
4148 /* Return TRUE if int I is a valid immediate ARM constant. */
4149
4150 int
4151 const_ok_for_arm (HOST_WIDE_INT i)
4152 {
4153 int lowbit;
4154
4155 /* For machines with >32 bit HOST_WIDE_INT, the bits above bit 31 must
4156 be all zero, or all one. */
4157 if ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff) != 0
4158 && ((i & ~(unsigned HOST_WIDE_INT) 0xffffffff)
4159 != ((~(unsigned HOST_WIDE_INT) 0)
4160 & ~(unsigned HOST_WIDE_INT) 0xffffffff)))
4161 return FALSE;
4162
4163 i &= (unsigned HOST_WIDE_INT) 0xffffffff;
4164
4165 /* Fast return for 0 and small values. We must do this for zero, since
4166 the code below can't handle that one case. */
4167 if ((i & ~(unsigned HOST_WIDE_INT) 0xff) == 0)
4168 return TRUE;
4169
4170 /* Get the number of trailing zeros. */
4171 lowbit = ffs((int) i) - 1;
4172
4173 /* Only even shifts are allowed in ARM mode so round down to the
4174 nearest even number. */
4175 if (TARGET_ARM)
4176 lowbit &= ~1;
4177
4178 if ((i & ~(((unsigned HOST_WIDE_INT) 0xff) << lowbit)) == 0)
4179 return TRUE;
4180
4181 if (TARGET_ARM)
4182 {
4183 /* Allow rotated constants in ARM mode. */
4184 if (lowbit <= 4
4185 && ((i & ~0xc000003f) == 0
4186 || (i & ~0xf000000f) == 0
4187 || (i & ~0xfc000003) == 0))
4188 return TRUE;
4189 }
4190 else if (TARGET_THUMB2)
4191 {
4192 HOST_WIDE_INT v;
4193
4194 /* Allow repeated patterns 0x00XY00XY or 0xXYXYXYXY. */
4195 v = i & 0xff;
4196 v |= v << 16;
4197 if (i == v || i == (v | (v << 8)))
4198 return TRUE;
4199
4200 /* Allow repeated pattern 0xXY00XY00. */
4201 v = i & 0xff00;
4202 v |= v << 16;
4203 if (i == v)
4204 return TRUE;
4205 }
4206 else if (TARGET_HAVE_MOVT)
4207 {
4208 /* Thumb-1 Targets with MOVT. */
4209 if (i > 0xffff)
4210 return FALSE;
4211 else
4212 return TRUE;
4213 }
4214
4215 return FALSE;
4216 }
4217
4218 /* Return true if I is a valid constant for the operation CODE. */
4219 int
4220 const_ok_for_op (HOST_WIDE_INT i, enum rtx_code code)
4221 {
4222 if (const_ok_for_arm (i))
4223 return 1;
4224
4225 switch (code)
4226 {
4227 case SET:
4228 /* See if we can use movw. */
4229 if (TARGET_HAVE_MOVT && (i & 0xffff0000) == 0)
4230 return 1;
4231 else
4232 /* Otherwise, try mvn. */
4233 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4234
4235 case PLUS:
4236 /* See if we can use addw or subw. */
4237 if (TARGET_THUMB2
4238 && ((i & 0xfffff000) == 0
4239 || ((-i) & 0xfffff000) == 0))
4240 return 1;
4241 /* Fall through. */
4242 case COMPARE:
4243 case EQ:
4244 case NE:
4245 case GT:
4246 case LE:
4247 case LT:
4248 case GE:
4249 case GEU:
4250 case LTU:
4251 case GTU:
4252 case LEU:
4253 case UNORDERED:
4254 case ORDERED:
4255 case UNEQ:
4256 case UNGE:
4257 case UNLT:
4258 case UNGT:
4259 case UNLE:
4260 return const_ok_for_arm (ARM_SIGN_EXTEND (-i));
4261
4262 case MINUS: /* Should only occur with (MINUS I reg) => rsb */
4263 case XOR:
4264 return 0;
4265
4266 case IOR:
4267 if (TARGET_THUMB2)
4268 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4269 return 0;
4270
4271 case AND:
4272 return const_ok_for_arm (ARM_SIGN_EXTEND (~i));
4273
4274 default:
4275 gcc_unreachable ();
4276 }
4277 }
4278
4279 /* Return true if I is a valid di mode constant for the operation CODE. */
4280 int
4281 const_ok_for_dimode_op (HOST_WIDE_INT i, enum rtx_code code)
4282 {
4283 HOST_WIDE_INT hi_val = (i >> 32) & 0xFFFFFFFF;
4284 HOST_WIDE_INT lo_val = i & 0xFFFFFFFF;
4285 rtx hi = GEN_INT (hi_val);
4286 rtx lo = GEN_INT (lo_val);
4287
4288 if (TARGET_THUMB1)
4289 return 0;
4290
4291 switch (code)
4292 {
4293 case AND:
4294 case IOR:
4295 case XOR:
4296 return (const_ok_for_op (hi_val, code) || hi_val == 0xFFFFFFFF)
4297 && (const_ok_for_op (lo_val, code) || lo_val == 0xFFFFFFFF);
4298 case PLUS:
4299 return arm_not_operand (hi, SImode) && arm_add_operand (lo, SImode);
4300
4301 default:
4302 return 0;
4303 }
4304 }
4305
4306 /* Emit a sequence of insns to handle a large constant.
4307 CODE is the code of the operation required, it can be any of SET, PLUS,
4308 IOR, AND, XOR, MINUS;
4309 MODE is the mode in which the operation is being performed;
4310 VAL is the integer to operate on;
4311 SOURCE is the other operand (a register, or a null-pointer for SET);
4312 SUBTARGETS means it is safe to create scratch registers if that will
4313 either produce a simpler sequence, or we will want to cse the values.
4314 Return value is the number of insns emitted. */
4315
4316 /* ??? Tweak this for thumb2. */
4317 int
4318 arm_split_constant (enum rtx_code code, machine_mode mode, rtx insn,
4319 HOST_WIDE_INT val, rtx target, rtx source, int subtargets)
4320 {
4321 rtx cond;
4322
4323 if (insn && GET_CODE (PATTERN (insn)) == COND_EXEC)
4324 cond = COND_EXEC_TEST (PATTERN (insn));
4325 else
4326 cond = NULL_RTX;
4327
4328 if (subtargets || code == SET
4329 || (REG_P (target) && REG_P (source)
4330 && REGNO (target) != REGNO (source)))
4331 {
4332 /* After arm_reorg has been called, we can't fix up expensive
4333 constants by pushing them into memory so we must synthesize
4334 them in-line, regardless of the cost. This is only likely to
4335 be more costly on chips that have load delay slots and we are
4336 compiling without running the scheduler (so no splitting
4337 occurred before the final instruction emission).
4338
4339 Ref: gcc -O1 -mcpu=strongarm gcc.c-torture/compile/980506-2.c
4340 */
4341 if (!cfun->machine->after_arm_reorg
4342 && !cond
4343 && (arm_gen_constant (code, mode, NULL_RTX, val, target, source,
4344 1, 0)
4345 > (arm_constant_limit (optimize_function_for_size_p (cfun))
4346 + (code != SET))))
4347 {
4348 if (code == SET)
4349 {
4350 /* Currently SET is the only monadic value for CODE, all
4351 the rest are diadic. */
4352 if (TARGET_USE_MOVT)
4353 arm_emit_movpair (target, GEN_INT (val));
4354 else
4355 emit_set_insn (target, GEN_INT (val));
4356
4357 return 1;
4358 }
4359 else
4360 {
4361 rtx temp = subtargets ? gen_reg_rtx (mode) : target;
4362
4363 if (TARGET_USE_MOVT)
4364 arm_emit_movpair (temp, GEN_INT (val));
4365 else
4366 emit_set_insn (temp, GEN_INT (val));
4367
4368 /* For MINUS, the value is subtracted from, since we never
4369 have subtraction of a constant. */
4370 if (code == MINUS)
4371 emit_set_insn (target, gen_rtx_MINUS (mode, temp, source));
4372 else
4373 emit_set_insn (target,
4374 gen_rtx_fmt_ee (code, mode, source, temp));
4375 return 2;
4376 }
4377 }
4378 }
4379
4380 return arm_gen_constant (code, mode, cond, val, target, source, subtargets,
4381 1);
4382 }
4383
4384 /* Return a sequence of integers, in RETURN_SEQUENCE that fit into
4385 ARM/THUMB2 immediates, and add up to VAL.
4386 Thr function return value gives the number of insns required. */
4387 static int
4388 optimal_immediate_sequence (enum rtx_code code, unsigned HOST_WIDE_INT val,
4389 struct four_ints *return_sequence)
4390 {
4391 int best_consecutive_zeros = 0;
4392 int i;
4393 int best_start = 0;
4394 int insns1, insns2;
4395 struct four_ints tmp_sequence;
4396
4397 /* If we aren't targeting ARM, the best place to start is always at
4398 the bottom, otherwise look more closely. */
4399 if (TARGET_ARM)
4400 {
4401 for (i = 0; i < 32; i += 2)
4402 {
4403 int consecutive_zeros = 0;
4404
4405 if (!(val & (3 << i)))
4406 {
4407 while ((i < 32) && !(val & (3 << i)))
4408 {
4409 consecutive_zeros += 2;
4410 i += 2;
4411 }
4412 if (consecutive_zeros > best_consecutive_zeros)
4413 {
4414 best_consecutive_zeros = consecutive_zeros;
4415 best_start = i - consecutive_zeros;
4416 }
4417 i -= 2;
4418 }
4419 }
4420 }
4421
4422 /* So long as it won't require any more insns to do so, it's
4423 desirable to emit a small constant (in bits 0...9) in the last
4424 insn. This way there is more chance that it can be combined with
4425 a later addressing insn to form a pre-indexed load or store
4426 operation. Consider:
4427
4428 *((volatile int *)0xe0000100) = 1;
4429 *((volatile int *)0xe0000110) = 2;
4430
4431 We want this to wind up as:
4432
4433 mov rA, #0xe0000000
4434 mov rB, #1
4435 str rB, [rA, #0x100]
4436 mov rB, #2
4437 str rB, [rA, #0x110]
4438
4439 rather than having to synthesize both large constants from scratch.
4440
4441 Therefore, we calculate how many insns would be required to emit
4442 the constant starting from `best_start', and also starting from
4443 zero (i.e. with bit 31 first to be output). If `best_start' doesn't
4444 yield a shorter sequence, we may as well use zero. */
4445 insns1 = optimal_immediate_sequence_1 (code, val, return_sequence, best_start);
4446 if (best_start != 0
4447 && ((HOST_WIDE_INT_1U << best_start) < val))
4448 {
4449 insns2 = optimal_immediate_sequence_1 (code, val, &tmp_sequence, 0);
4450 if (insns2 <= insns1)
4451 {
4452 *return_sequence = tmp_sequence;
4453 insns1 = insns2;
4454 }
4455 }
4456
4457 return insns1;
4458 }
4459
4460 /* As for optimal_immediate_sequence, but starting at bit-position I. */
4461 static int
4462 optimal_immediate_sequence_1 (enum rtx_code code, unsigned HOST_WIDE_INT val,
4463 struct four_ints *return_sequence, int i)
4464 {
4465 int remainder = val & 0xffffffff;
4466 int insns = 0;
4467
4468 /* Try and find a way of doing the job in either two or three
4469 instructions.
4470
4471 In ARM mode we can use 8-bit constants, rotated to any 2-bit aligned
4472 location. We start at position I. This may be the MSB, or
4473 optimial_immediate_sequence may have positioned it at the largest block
4474 of zeros that are aligned on a 2-bit boundary. We then fill up the temps,
4475 wrapping around to the top of the word when we drop off the bottom.
4476 In the worst case this code should produce no more than four insns.
4477
4478 In Thumb2 mode, we can use 32/16-bit replicated constants, and 8-bit
4479 constants, shifted to any arbitrary location. We should always start
4480 at the MSB. */
4481 do
4482 {
4483 int end;
4484 unsigned int b1, b2, b3, b4;
4485 unsigned HOST_WIDE_INT result;
4486 int loc;
4487
4488 gcc_assert (insns < 4);
4489
4490 if (i <= 0)
4491 i += 32;
4492
4493 /* First, find the next normal 12/8-bit shifted/rotated immediate. */
4494 if (remainder & ((TARGET_ARM ? (3 << (i - 2)) : (1 << (i - 1)))))
4495 {
4496 loc = i;
4497 if (i <= 12 && TARGET_THUMB2 && code == PLUS)
4498 /* We can use addw/subw for the last 12 bits. */
4499 result = remainder;
4500 else
4501 {
4502 /* Use an 8-bit shifted/rotated immediate. */
4503 end = i - 8;
4504 if (end < 0)
4505 end += 32;
4506 result = remainder & ((0x0ff << end)
4507 | ((i < end) ? (0xff >> (32 - end))
4508 : 0));
4509 i -= 8;
4510 }
4511 }
4512 else
4513 {
4514 /* Arm allows rotates by a multiple of two. Thumb-2 allows
4515 arbitrary shifts. */
4516 i -= TARGET_ARM ? 2 : 1;
4517 continue;
4518 }
4519
4520 /* Next, see if we can do a better job with a thumb2 replicated
4521 constant.
4522
4523 We do it this way around to catch the cases like 0x01F001E0 where
4524 two 8-bit immediates would work, but a replicated constant would
4525 make it worse.
4526
4527 TODO: 16-bit constants that don't clear all the bits, but still win.
4528 TODO: Arithmetic splitting for set/add/sub, rather than bitwise. */
4529 if (TARGET_THUMB2)
4530 {
4531 b1 = (remainder & 0xff000000) >> 24;
4532 b2 = (remainder & 0x00ff0000) >> 16;
4533 b3 = (remainder & 0x0000ff00) >> 8;
4534 b4 = remainder & 0xff;
4535
4536 if (loc > 24)
4537 {
4538 /* The 8-bit immediate already found clears b1 (and maybe b2),
4539 but must leave b3 and b4 alone. */
4540
4541 /* First try to find a 32-bit replicated constant that clears
4542 almost everything. We can assume that we can't do it in one,
4543 or else we wouldn't be here. */
4544 unsigned int tmp = b1 & b2 & b3 & b4;
4545 unsigned int tmp2 = tmp + (tmp << 8) + (tmp << 16)
4546 + (tmp << 24);
4547 unsigned int matching_bytes = (tmp == b1) + (tmp == b2)
4548 + (tmp == b3) + (tmp == b4);
4549 if (tmp
4550 && (matching_bytes >= 3
4551 || (matching_bytes == 2
4552 && const_ok_for_op (remainder & ~tmp2, code))))
4553 {
4554 /* At least 3 of the bytes match, and the fourth has at
4555 least as many bits set, or two of the bytes match
4556 and it will only require one more insn to finish. */
4557 result = tmp2;
4558 i = tmp != b1 ? 32
4559 : tmp != b2 ? 24
4560 : tmp != b3 ? 16
4561 : 8;
4562 }
4563
4564 /* Second, try to find a 16-bit replicated constant that can
4565 leave three of the bytes clear. If b2 or b4 is already
4566 zero, then we can. If the 8-bit from above would not
4567 clear b2 anyway, then we still win. */
4568 else if (b1 == b3 && (!b2 || !b4
4569 || (remainder & 0x00ff0000 & ~result)))
4570 {
4571 result = remainder & 0xff00ff00;
4572 i = 24;
4573 }
4574 }
4575 else if (loc > 16)
4576 {
4577 /* The 8-bit immediate already found clears b2 (and maybe b3)
4578 and we don't get here unless b1 is alredy clear, but it will
4579 leave b4 unchanged. */
4580
4581 /* If we can clear b2 and b4 at once, then we win, since the
4582 8-bits couldn't possibly reach that far. */
4583 if (b2 == b4)
4584 {
4585 result = remainder & 0x00ff00ff;
4586 i = 16;
4587 }
4588 }
4589 }
4590
4591 return_sequence->i[insns++] = result;
4592 remainder &= ~result;
4593
4594 if (code == SET || code == MINUS)
4595 code = PLUS;
4596 }
4597 while (remainder);
4598
4599 return insns;
4600 }
4601
4602 /* Emit an instruction with the indicated PATTERN. If COND is
4603 non-NULL, conditionalize the execution of the instruction on COND
4604 being true. */
4605
4606 static void
4607 emit_constant_insn (rtx cond, rtx pattern)
4608 {
4609 if (cond)
4610 pattern = gen_rtx_COND_EXEC (VOIDmode, copy_rtx (cond), pattern);
4611 emit_insn (pattern);
4612 }
4613
4614 /* As above, but extra parameter GENERATE which, if clear, suppresses
4615 RTL generation. */
4616
4617 static int
4618 arm_gen_constant (enum rtx_code code, machine_mode mode, rtx cond,
4619 unsigned HOST_WIDE_INT val, rtx target, rtx source,
4620 int subtargets, int generate)
4621 {
4622 int can_invert = 0;
4623 int can_negate = 0;
4624 int final_invert = 0;
4625 int i;
4626 int set_sign_bit_copies = 0;
4627 int clear_sign_bit_copies = 0;
4628 int clear_zero_bit_copies = 0;
4629 int set_zero_bit_copies = 0;
4630 int insns = 0, neg_insns, inv_insns;
4631 unsigned HOST_WIDE_INT temp1, temp2;
4632 unsigned HOST_WIDE_INT remainder = val & 0xffffffff;
4633 struct four_ints *immediates;
4634 struct four_ints pos_immediates, neg_immediates, inv_immediates;
4635
4636 /* Find out which operations are safe for a given CODE. Also do a quick
4637 check for degenerate cases; these can occur when DImode operations
4638 are split. */
4639 switch (code)
4640 {
4641 case SET:
4642 can_invert = 1;
4643 break;
4644
4645 case PLUS:
4646 can_negate = 1;
4647 break;
4648
4649 case IOR:
4650 if (remainder == 0xffffffff)
4651 {
4652 if (generate)
4653 emit_constant_insn (cond,
4654 gen_rtx_SET (target,
4655 GEN_INT (ARM_SIGN_EXTEND (val))));
4656 return 1;
4657 }
4658
4659 if (remainder == 0)
4660 {
4661 if (reload_completed && rtx_equal_p (target, source))
4662 return 0;
4663
4664 if (generate)
4665 emit_constant_insn (cond, gen_rtx_SET (target, source));
4666 return 1;
4667 }
4668 break;
4669
4670 case AND:
4671 if (remainder == 0)
4672 {
4673 if (generate)
4674 emit_constant_insn (cond, gen_rtx_SET (target, const0_rtx));
4675 return 1;
4676 }
4677 if (remainder == 0xffffffff)
4678 {
4679 if (reload_completed && rtx_equal_p (target, source))
4680 return 0;
4681 if (generate)
4682 emit_constant_insn (cond, gen_rtx_SET (target, source));
4683 return 1;
4684 }
4685 can_invert = 1;
4686 break;
4687
4688 case XOR:
4689 if (remainder == 0)
4690 {
4691 if (reload_completed && rtx_equal_p (target, source))
4692 return 0;
4693 if (generate)
4694 emit_constant_insn (cond, gen_rtx_SET (target, source));
4695 return 1;
4696 }
4697
4698 if (remainder == 0xffffffff)
4699 {
4700 if (generate)
4701 emit_constant_insn (cond,
4702 gen_rtx_SET (target,
4703 gen_rtx_NOT (mode, source)));
4704 return 1;
4705 }
4706 final_invert = 1;
4707 break;
4708
4709 case MINUS:
4710 /* We treat MINUS as (val - source), since (source - val) is always
4711 passed as (source + (-val)). */
4712 if (remainder == 0)
4713 {
4714 if (generate)
4715 emit_constant_insn (cond,
4716 gen_rtx_SET (target,
4717 gen_rtx_NEG (mode, source)));
4718 return 1;
4719 }
4720 if (const_ok_for_arm (val))
4721 {
4722 if (generate)
4723 emit_constant_insn (cond,
4724 gen_rtx_SET (target,
4725 gen_rtx_MINUS (mode, GEN_INT (val),
4726 source)));
4727 return 1;
4728 }
4729
4730 break;
4731
4732 default:
4733 gcc_unreachable ();
4734 }
4735
4736 /* If we can do it in one insn get out quickly. */
4737 if (const_ok_for_op (val, code))
4738 {
4739 if (generate)
4740 emit_constant_insn (cond,
4741 gen_rtx_SET (target,
4742 (source
4743 ? gen_rtx_fmt_ee (code, mode, source,
4744 GEN_INT (val))
4745 : GEN_INT (val))));
4746 return 1;
4747 }
4748
4749 /* On targets with UXTH/UBFX, we can deal with AND (2^N)-1 in a single
4750 insn. */
4751 if (code == AND && (i = exact_log2 (remainder + 1)) > 0
4752 && (arm_arch_thumb2 || (i == 16 && arm_arch6 && mode == SImode)))
4753 {
4754 if (generate)
4755 {
4756 if (mode == SImode && i == 16)
4757 /* Use UXTH in preference to UBFX, since on Thumb2 it's a
4758 smaller insn. */
4759 emit_constant_insn (cond,
4760 gen_zero_extendhisi2
4761 (target, gen_lowpart (HImode, source)));
4762 else
4763 /* Extz only supports SImode, but we can coerce the operands
4764 into that mode. */
4765 emit_constant_insn (cond,
4766 gen_extzv_t2 (gen_lowpart (SImode, target),
4767 gen_lowpart (SImode, source),
4768 GEN_INT (i), const0_rtx));
4769 }
4770
4771 return 1;
4772 }
4773
4774 /* Calculate a few attributes that may be useful for specific
4775 optimizations. */
4776 /* Count number of leading zeros. */
4777 for (i = 31; i >= 0; i--)
4778 {
4779 if ((remainder & (1 << i)) == 0)
4780 clear_sign_bit_copies++;
4781 else
4782 break;
4783 }
4784
4785 /* Count number of leading 1's. */
4786 for (i = 31; i >= 0; i--)
4787 {
4788 if ((remainder & (1 << i)) != 0)
4789 set_sign_bit_copies++;
4790 else
4791 break;
4792 }
4793
4794 /* Count number of trailing zero's. */
4795 for (i = 0; i <= 31; i++)
4796 {
4797 if ((remainder & (1 << i)) == 0)
4798 clear_zero_bit_copies++;
4799 else
4800 break;
4801 }
4802
4803 /* Count number of trailing 1's. */
4804 for (i = 0; i <= 31; i++)
4805 {
4806 if ((remainder & (1 << i)) != 0)
4807 set_zero_bit_copies++;
4808 else
4809 break;
4810 }
4811
4812 switch (code)
4813 {
4814 case SET:
4815 /* See if we can do this by sign_extending a constant that is known
4816 to be negative. This is a good, way of doing it, since the shift
4817 may well merge into a subsequent insn. */
4818 if (set_sign_bit_copies > 1)
4819 {
4820 if (const_ok_for_arm
4821 (temp1 = ARM_SIGN_EXTEND (remainder
4822 << (set_sign_bit_copies - 1))))
4823 {
4824 if (generate)
4825 {
4826 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4827 emit_constant_insn (cond,
4828 gen_rtx_SET (new_src, GEN_INT (temp1)));
4829 emit_constant_insn (cond,
4830 gen_ashrsi3 (target, new_src,
4831 GEN_INT (set_sign_bit_copies - 1)));
4832 }
4833 return 2;
4834 }
4835 /* For an inverted constant, we will need to set the low bits,
4836 these will be shifted out of harm's way. */
4837 temp1 |= (1 << (set_sign_bit_copies - 1)) - 1;
4838 if (const_ok_for_arm (~temp1))
4839 {
4840 if (generate)
4841 {
4842 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4843 emit_constant_insn (cond,
4844 gen_rtx_SET (new_src, GEN_INT (temp1)));
4845 emit_constant_insn (cond,
4846 gen_ashrsi3 (target, new_src,
4847 GEN_INT (set_sign_bit_copies - 1)));
4848 }
4849 return 2;
4850 }
4851 }
4852
4853 /* See if we can calculate the value as the difference between two
4854 valid immediates. */
4855 if (clear_sign_bit_copies + clear_zero_bit_copies <= 16)
4856 {
4857 int topshift = clear_sign_bit_copies & ~1;
4858
4859 temp1 = ARM_SIGN_EXTEND ((remainder + (0x00800000 >> topshift))
4860 & (0xff000000 >> topshift));
4861
4862 /* If temp1 is zero, then that means the 9 most significant
4863 bits of remainder were 1 and we've caused it to overflow.
4864 When topshift is 0 we don't need to do anything since we
4865 can borrow from 'bit 32'. */
4866 if (temp1 == 0 && topshift != 0)
4867 temp1 = 0x80000000 >> (topshift - 1);
4868
4869 temp2 = ARM_SIGN_EXTEND (temp1 - remainder);
4870
4871 if (const_ok_for_arm (temp2))
4872 {
4873 if (generate)
4874 {
4875 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
4876 emit_constant_insn (cond,
4877 gen_rtx_SET (new_src, GEN_INT (temp1)));
4878 emit_constant_insn (cond,
4879 gen_addsi3 (target, new_src,
4880 GEN_INT (-temp2)));
4881 }
4882
4883 return 2;
4884 }
4885 }
4886
4887 /* See if we can generate this by setting the bottom (or the top)
4888 16 bits, and then shifting these into the other half of the
4889 word. We only look for the simplest cases, to do more would cost
4890 too much. Be careful, however, not to generate this when the
4891 alternative would take fewer insns. */
4892 if (val & 0xffff0000)
4893 {
4894 temp1 = remainder & 0xffff0000;
4895 temp2 = remainder & 0x0000ffff;
4896
4897 /* Overlaps outside this range are best done using other methods. */
4898 for (i = 9; i < 24; i++)
4899 {
4900 if ((((temp2 | (temp2 << i)) & 0xffffffff) == remainder)
4901 && !const_ok_for_arm (temp2))
4902 {
4903 rtx new_src = (subtargets
4904 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4905 : target);
4906 insns = arm_gen_constant (code, mode, cond, temp2, new_src,
4907 source, subtargets, generate);
4908 source = new_src;
4909 if (generate)
4910 emit_constant_insn
4911 (cond,
4912 gen_rtx_SET
4913 (target,
4914 gen_rtx_IOR (mode,
4915 gen_rtx_ASHIFT (mode, source,
4916 GEN_INT (i)),
4917 source)));
4918 return insns + 1;
4919 }
4920 }
4921
4922 /* Don't duplicate cases already considered. */
4923 for (i = 17; i < 24; i++)
4924 {
4925 if (((temp1 | (temp1 >> i)) == remainder)
4926 && !const_ok_for_arm (temp1))
4927 {
4928 rtx new_src = (subtargets
4929 ? (generate ? gen_reg_rtx (mode) : NULL_RTX)
4930 : target);
4931 insns = arm_gen_constant (code, mode, cond, temp1, new_src,
4932 source, subtargets, generate);
4933 source = new_src;
4934 if (generate)
4935 emit_constant_insn
4936 (cond,
4937 gen_rtx_SET (target,
4938 gen_rtx_IOR
4939 (mode,
4940 gen_rtx_LSHIFTRT (mode, source,
4941 GEN_INT (i)),
4942 source)));
4943 return insns + 1;
4944 }
4945 }
4946 }
4947 break;
4948
4949 case IOR:
4950 case XOR:
4951 /* If we have IOR or XOR, and the constant can be loaded in a
4952 single instruction, and we can find a temporary to put it in,
4953 then this can be done in two instructions instead of 3-4. */
4954 if (subtargets
4955 /* TARGET can't be NULL if SUBTARGETS is 0 */
4956 || (reload_completed && !reg_mentioned_p (target, source)))
4957 {
4958 if (const_ok_for_arm (ARM_SIGN_EXTEND (~val)))
4959 {
4960 if (generate)
4961 {
4962 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4963
4964 emit_constant_insn (cond,
4965 gen_rtx_SET (sub, GEN_INT (val)));
4966 emit_constant_insn (cond,
4967 gen_rtx_SET (target,
4968 gen_rtx_fmt_ee (code, mode,
4969 source, sub)));
4970 }
4971 return 2;
4972 }
4973 }
4974
4975 if (code == XOR)
4976 break;
4977
4978 /* Convert.
4979 x = y | constant ( which is composed of set_sign_bit_copies of leading 1s
4980 and the remainder 0s for e.g. 0xfff00000)
4981 x = ~(~(y ashift set_sign_bit_copies) lshiftrt set_sign_bit_copies)
4982
4983 This can be done in 2 instructions by using shifts with mov or mvn.
4984 e.g. for
4985 x = x | 0xfff00000;
4986 we generate.
4987 mvn r0, r0, asl #12
4988 mvn r0, r0, lsr #12 */
4989 if (set_sign_bit_copies > 8
4990 && (val & (HOST_WIDE_INT_M1U << (32 - set_sign_bit_copies))) == val)
4991 {
4992 if (generate)
4993 {
4994 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
4995 rtx shift = GEN_INT (set_sign_bit_copies);
4996
4997 emit_constant_insn
4998 (cond,
4999 gen_rtx_SET (sub,
5000 gen_rtx_NOT (mode,
5001 gen_rtx_ASHIFT (mode,
5002 source,
5003 shift))));
5004 emit_constant_insn
5005 (cond,
5006 gen_rtx_SET (target,
5007 gen_rtx_NOT (mode,
5008 gen_rtx_LSHIFTRT (mode, sub,
5009 shift))));
5010 }
5011 return 2;
5012 }
5013
5014 /* Convert
5015 x = y | constant (which has set_zero_bit_copies number of trailing ones).
5016 to
5017 x = ~((~y lshiftrt set_zero_bit_copies) ashift set_zero_bit_copies).
5018
5019 For eg. r0 = r0 | 0xfff
5020 mvn r0, r0, lsr #12
5021 mvn r0, r0, asl #12
5022
5023 */
5024 if (set_zero_bit_copies > 8
5025 && (remainder & ((1 << set_zero_bit_copies) - 1)) == remainder)
5026 {
5027 if (generate)
5028 {
5029 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5030 rtx shift = GEN_INT (set_zero_bit_copies);
5031
5032 emit_constant_insn
5033 (cond,
5034 gen_rtx_SET (sub,
5035 gen_rtx_NOT (mode,
5036 gen_rtx_LSHIFTRT (mode,
5037 source,
5038 shift))));
5039 emit_constant_insn
5040 (cond,
5041 gen_rtx_SET (target,
5042 gen_rtx_NOT (mode,
5043 gen_rtx_ASHIFT (mode, sub,
5044 shift))));
5045 }
5046 return 2;
5047 }
5048
5049 /* This will never be reached for Thumb2 because orn is a valid
5050 instruction. This is for Thumb1 and the ARM 32 bit cases.
5051
5052 x = y | constant (such that ~constant is a valid constant)
5053 Transform this to
5054 x = ~(~y & ~constant).
5055 */
5056 if (const_ok_for_arm (temp1 = ARM_SIGN_EXTEND (~val)))
5057 {
5058 if (generate)
5059 {
5060 rtx sub = subtargets ? gen_reg_rtx (mode) : target;
5061 emit_constant_insn (cond,
5062 gen_rtx_SET (sub,
5063 gen_rtx_NOT (mode, source)));
5064 source = sub;
5065 if (subtargets)
5066 sub = gen_reg_rtx (mode);
5067 emit_constant_insn (cond,
5068 gen_rtx_SET (sub,
5069 gen_rtx_AND (mode, source,
5070 GEN_INT (temp1))));
5071 emit_constant_insn (cond,
5072 gen_rtx_SET (target,
5073 gen_rtx_NOT (mode, sub)));
5074 }
5075 return 3;
5076 }
5077 break;
5078
5079 case AND:
5080 /* See if two shifts will do 2 or more insn's worth of work. */
5081 if (clear_sign_bit_copies >= 16 && clear_sign_bit_copies < 24)
5082 {
5083 HOST_WIDE_INT shift_mask = ((0xffffffff
5084 << (32 - clear_sign_bit_copies))
5085 & 0xffffffff);
5086
5087 if ((remainder | shift_mask) != 0xffffffff)
5088 {
5089 HOST_WIDE_INT new_val
5090 = ARM_SIGN_EXTEND (remainder | shift_mask);
5091
5092 if (generate)
5093 {
5094 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5095 insns = arm_gen_constant (AND, SImode, cond, new_val,
5096 new_src, source, subtargets, 1);
5097 source = new_src;
5098 }
5099 else
5100 {
5101 rtx targ = subtargets ? NULL_RTX : target;
5102 insns = arm_gen_constant (AND, mode, cond, new_val,
5103 targ, source, subtargets, 0);
5104 }
5105 }
5106
5107 if (generate)
5108 {
5109 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5110 rtx shift = GEN_INT (clear_sign_bit_copies);
5111
5112 emit_insn (gen_ashlsi3 (new_src, source, shift));
5113 emit_insn (gen_lshrsi3 (target, new_src, shift));
5114 }
5115
5116 return insns + 2;
5117 }
5118
5119 if (clear_zero_bit_copies >= 16 && clear_zero_bit_copies < 24)
5120 {
5121 HOST_WIDE_INT shift_mask = (1 << clear_zero_bit_copies) - 1;
5122
5123 if ((remainder | shift_mask) != 0xffffffff)
5124 {
5125 HOST_WIDE_INT new_val
5126 = ARM_SIGN_EXTEND (remainder | shift_mask);
5127 if (generate)
5128 {
5129 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5130
5131 insns = arm_gen_constant (AND, mode, cond, new_val,
5132 new_src, source, subtargets, 1);
5133 source = new_src;
5134 }
5135 else
5136 {
5137 rtx targ = subtargets ? NULL_RTX : target;
5138
5139 insns = arm_gen_constant (AND, mode, cond, new_val,
5140 targ, source, subtargets, 0);
5141 }
5142 }
5143
5144 if (generate)
5145 {
5146 rtx new_src = subtargets ? gen_reg_rtx (mode) : target;
5147 rtx shift = GEN_INT (clear_zero_bit_copies);
5148
5149 emit_insn (gen_lshrsi3 (new_src, source, shift));
5150 emit_insn (gen_ashlsi3 (target, new_src, shift));
5151 }
5152
5153 return insns + 2;
5154 }
5155
5156 break;
5157
5158 default:
5159 break;
5160 }
5161
5162 /* Calculate what the instruction sequences would be if we generated it
5163 normally, negated, or inverted. */
5164 if (code == AND)
5165 /* AND cannot be split into multiple insns, so invert and use BIC. */
5166 insns = 99;
5167 else
5168 insns = optimal_immediate_sequence (code, remainder, &pos_immediates);
5169
5170 if (can_negate)
5171 neg_insns = optimal_immediate_sequence (code, (-remainder) & 0xffffffff,
5172 &neg_immediates);
5173 else
5174 neg_insns = 99;
5175
5176 if (can_invert || final_invert)
5177 inv_insns = optimal_immediate_sequence (code, remainder ^ 0xffffffff,
5178 &inv_immediates);
5179 else
5180 inv_insns = 99;
5181
5182 immediates = &pos_immediates;
5183
5184 /* Is the negated immediate sequence more efficient? */
5185 if (neg_insns < insns && neg_insns <= inv_insns)
5186 {
5187 insns = neg_insns;
5188 immediates = &neg_immediates;
5189 }
5190 else
5191 can_negate = 0;
5192
5193 /* Is the inverted immediate sequence more efficient?
5194 We must allow for an extra NOT instruction for XOR operations, although
5195 there is some chance that the final 'mvn' will get optimized later. */
5196 if ((inv_insns + 1) < insns || (!final_invert && inv_insns < insns))
5197 {
5198 insns = inv_insns;
5199 immediates = &inv_immediates;
5200 }
5201 else
5202 {
5203 can_invert = 0;
5204 final_invert = 0;
5205 }
5206
5207 /* Now output the chosen sequence as instructions. */
5208 if (generate)
5209 {
5210 for (i = 0; i < insns; i++)
5211 {
5212 rtx new_src, temp1_rtx;
5213
5214 temp1 = immediates->i[i];
5215
5216 if (code == SET || code == MINUS)
5217 new_src = (subtargets ? gen_reg_rtx (mode) : target);
5218 else if ((final_invert || i < (insns - 1)) && subtargets)
5219 new_src = gen_reg_rtx (mode);
5220 else
5221 new_src = target;
5222
5223 if (can_invert)
5224 temp1 = ~temp1;
5225 else if (can_negate)
5226 temp1 = -temp1;
5227
5228 temp1 = trunc_int_for_mode (temp1, mode);
5229 temp1_rtx = GEN_INT (temp1);
5230
5231 if (code == SET)
5232 ;
5233 else if (code == MINUS)
5234 temp1_rtx = gen_rtx_MINUS (mode, temp1_rtx, source);
5235 else
5236 temp1_rtx = gen_rtx_fmt_ee (code, mode, source, temp1_rtx);
5237
5238 emit_constant_insn (cond, gen_rtx_SET (new_src, temp1_rtx));
5239 source = new_src;
5240
5241 if (code == SET)
5242 {
5243 can_negate = can_invert;
5244 can_invert = 0;
5245 code = PLUS;
5246 }
5247 else if (code == MINUS)
5248 code = PLUS;
5249 }
5250 }
5251
5252 if (final_invert)
5253 {
5254 if (generate)
5255 emit_constant_insn (cond, gen_rtx_SET (target,
5256 gen_rtx_NOT (mode, source)));
5257 insns++;
5258 }
5259
5260 return insns;
5261 }
5262
5263 /* Canonicalize a comparison so that we are more likely to recognize it.
5264 This can be done for a few constant compares, where we can make the
5265 immediate value easier to load. */
5266
5267 static void
5268 arm_canonicalize_comparison (int *code, rtx *op0, rtx *op1,
5269 bool op0_preserve_value)
5270 {
5271 machine_mode mode;
5272 unsigned HOST_WIDE_INT i, maxval;
5273
5274 mode = GET_MODE (*op0);
5275 if (mode == VOIDmode)
5276 mode = GET_MODE (*op1);
5277
5278 maxval = (HOST_WIDE_INT_1U << (GET_MODE_BITSIZE (mode) - 1)) - 1;
5279
5280 /* For DImode, we have GE/LT/GEU/LTU comparisons. In ARM mode
5281 we can also use cmp/cmpeq for GTU/LEU. GT/LE must be either
5282 reversed or (for constant OP1) adjusted to GE/LT. Similarly
5283 for GTU/LEU in Thumb mode. */
5284 if (mode == DImode)
5285 {
5286
5287 if (*code == GT || *code == LE
5288 || (!TARGET_ARM && (*code == GTU || *code == LEU)))
5289 {
5290 /* Missing comparison. First try to use an available
5291 comparison. */
5292 if (CONST_INT_P (*op1))
5293 {
5294 i = INTVAL (*op1);
5295 switch (*code)
5296 {
5297 case GT:
5298 case LE:
5299 if (i != maxval
5300 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5301 {
5302 *op1 = GEN_INT (i + 1);
5303 *code = *code == GT ? GE : LT;
5304 return;
5305 }
5306 break;
5307 case GTU:
5308 case LEU:
5309 if (i != ~((unsigned HOST_WIDE_INT) 0)
5310 && arm_const_double_by_immediates (GEN_INT (i + 1)))
5311 {
5312 *op1 = GEN_INT (i + 1);
5313 *code = *code == GTU ? GEU : LTU;
5314 return;
5315 }
5316 break;
5317 default:
5318 gcc_unreachable ();
5319 }
5320 }
5321
5322 /* If that did not work, reverse the condition. */
5323 if (!op0_preserve_value)
5324 {
5325 std::swap (*op0, *op1);
5326 *code = (int)swap_condition ((enum rtx_code)*code);
5327 }
5328 }
5329 return;
5330 }
5331
5332 /* If *op0 is (zero_extend:SI (subreg:QI (reg:SI) 0)) and comparing
5333 with const0_rtx, change it to (and:SI (reg:SI) (const_int 255)),
5334 to facilitate possible combining with a cmp into 'ands'. */
5335 if (mode == SImode
5336 && GET_CODE (*op0) == ZERO_EXTEND
5337 && GET_CODE (XEXP (*op0, 0)) == SUBREG
5338 && GET_MODE (XEXP (*op0, 0)) == QImode
5339 && GET_MODE (SUBREG_REG (XEXP (*op0, 0))) == SImode
5340 && subreg_lowpart_p (XEXP (*op0, 0))
5341 && *op1 == const0_rtx)
5342 *op0 = gen_rtx_AND (SImode, SUBREG_REG (XEXP (*op0, 0)),
5343 GEN_INT (255));
5344
5345 /* Comparisons smaller than DImode. Only adjust comparisons against
5346 an out-of-range constant. */
5347 if (!CONST_INT_P (*op1)
5348 || const_ok_for_arm (INTVAL (*op1))
5349 || const_ok_for_arm (- INTVAL (*op1)))
5350 return;
5351
5352 i = INTVAL (*op1);
5353
5354 switch (*code)
5355 {
5356 case EQ:
5357 case NE:
5358 return;
5359
5360 case GT:
5361 case LE:
5362 if (i != maxval
5363 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5364 {
5365 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5366 *code = *code == GT ? GE : LT;
5367 return;
5368 }
5369 break;
5370
5371 case GE:
5372 case LT:
5373 if (i != ~maxval
5374 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5375 {
5376 *op1 = GEN_INT (i - 1);
5377 *code = *code == GE ? GT : LE;
5378 return;
5379 }
5380 break;
5381
5382 case GTU:
5383 case LEU:
5384 if (i != ~((unsigned HOST_WIDE_INT) 0)
5385 && (const_ok_for_arm (i + 1) || const_ok_for_arm (-(i + 1))))
5386 {
5387 *op1 = GEN_INT (ARM_SIGN_EXTEND (i + 1));
5388 *code = *code == GTU ? GEU : LTU;
5389 return;
5390 }
5391 break;
5392
5393 case GEU:
5394 case LTU:
5395 if (i != 0
5396 && (const_ok_for_arm (i - 1) || const_ok_for_arm (-(i - 1))))
5397 {
5398 *op1 = GEN_INT (i - 1);
5399 *code = *code == GEU ? GTU : LEU;
5400 return;
5401 }
5402 break;
5403
5404 default:
5405 gcc_unreachable ();
5406 }
5407 }
5408
5409
5410 /* Define how to find the value returned by a function. */
5411
5412 static rtx
5413 arm_function_value(const_tree type, const_tree func,
5414 bool outgoing ATTRIBUTE_UNUSED)
5415 {
5416 machine_mode mode;
5417 int unsignedp ATTRIBUTE_UNUSED;
5418 rtx r ATTRIBUTE_UNUSED;
5419
5420 mode = TYPE_MODE (type);
5421
5422 if (TARGET_AAPCS_BASED)
5423 return aapcs_allocate_return_reg (mode, type, func);
5424
5425 /* Promote integer types. */
5426 if (INTEGRAL_TYPE_P (type))
5427 mode = arm_promote_function_mode (type, mode, &unsignedp, func, 1);
5428
5429 /* Promotes small structs returned in a register to full-word size
5430 for big-endian AAPCS. */
5431 if (arm_return_in_msb (type))
5432 {
5433 HOST_WIDE_INT size = int_size_in_bytes (type);
5434 if (size % UNITS_PER_WORD != 0)
5435 {
5436 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
5437 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
5438 }
5439 }
5440
5441 return arm_libcall_value_1 (mode);
5442 }
5443
5444 /* libcall hashtable helpers. */
5445
5446 struct libcall_hasher : nofree_ptr_hash <const rtx_def>
5447 {
5448 static inline hashval_t hash (const rtx_def *);
5449 static inline bool equal (const rtx_def *, const rtx_def *);
5450 static inline void remove (rtx_def *);
5451 };
5452
5453 inline bool
5454 libcall_hasher::equal (const rtx_def *p1, const rtx_def *p2)
5455 {
5456 return rtx_equal_p (p1, p2);
5457 }
5458
5459 inline hashval_t
5460 libcall_hasher::hash (const rtx_def *p1)
5461 {
5462 return hash_rtx (p1, VOIDmode, NULL, NULL, FALSE);
5463 }
5464
5465 typedef hash_table<libcall_hasher> libcall_table_type;
5466
5467 static void
5468 add_libcall (libcall_table_type *htab, rtx libcall)
5469 {
5470 *htab->find_slot (libcall, INSERT) = libcall;
5471 }
5472
5473 static bool
5474 arm_libcall_uses_aapcs_base (const_rtx libcall)
5475 {
5476 static bool init_done = false;
5477 static libcall_table_type *libcall_htab = NULL;
5478
5479 if (!init_done)
5480 {
5481 init_done = true;
5482
5483 libcall_htab = new libcall_table_type (31);
5484 add_libcall (libcall_htab,
5485 convert_optab_libfunc (sfloat_optab, SFmode, SImode));
5486 add_libcall (libcall_htab,
5487 convert_optab_libfunc (sfloat_optab, DFmode, SImode));
5488 add_libcall (libcall_htab,
5489 convert_optab_libfunc (sfloat_optab, SFmode, DImode));
5490 add_libcall (libcall_htab,
5491 convert_optab_libfunc (sfloat_optab, DFmode, DImode));
5492
5493 add_libcall (libcall_htab,
5494 convert_optab_libfunc (ufloat_optab, SFmode, SImode));
5495 add_libcall (libcall_htab,
5496 convert_optab_libfunc (ufloat_optab, DFmode, SImode));
5497 add_libcall (libcall_htab,
5498 convert_optab_libfunc (ufloat_optab, SFmode, DImode));
5499 add_libcall (libcall_htab,
5500 convert_optab_libfunc (ufloat_optab, DFmode, DImode));
5501
5502 add_libcall (libcall_htab,
5503 convert_optab_libfunc (sext_optab, SFmode, HFmode));
5504 add_libcall (libcall_htab,
5505 convert_optab_libfunc (trunc_optab, HFmode, SFmode));
5506 add_libcall (libcall_htab,
5507 convert_optab_libfunc (sfix_optab, SImode, DFmode));
5508 add_libcall (libcall_htab,
5509 convert_optab_libfunc (ufix_optab, SImode, DFmode));
5510 add_libcall (libcall_htab,
5511 convert_optab_libfunc (sfix_optab, DImode, DFmode));
5512 add_libcall (libcall_htab,
5513 convert_optab_libfunc (ufix_optab, DImode, DFmode));
5514 add_libcall (libcall_htab,
5515 convert_optab_libfunc (sfix_optab, DImode, SFmode));
5516 add_libcall (libcall_htab,
5517 convert_optab_libfunc (ufix_optab, DImode, SFmode));
5518
5519 /* Values from double-precision helper functions are returned in core
5520 registers if the selected core only supports single-precision
5521 arithmetic, even if we are using the hard-float ABI. The same is
5522 true for single-precision helpers, but we will never be using the
5523 hard-float ABI on a CPU which doesn't support single-precision
5524 operations in hardware. */
5525 add_libcall (libcall_htab, optab_libfunc (add_optab, DFmode));
5526 add_libcall (libcall_htab, optab_libfunc (sdiv_optab, DFmode));
5527 add_libcall (libcall_htab, optab_libfunc (smul_optab, DFmode));
5528 add_libcall (libcall_htab, optab_libfunc (neg_optab, DFmode));
5529 add_libcall (libcall_htab, optab_libfunc (sub_optab, DFmode));
5530 add_libcall (libcall_htab, optab_libfunc (eq_optab, DFmode));
5531 add_libcall (libcall_htab, optab_libfunc (lt_optab, DFmode));
5532 add_libcall (libcall_htab, optab_libfunc (le_optab, DFmode));
5533 add_libcall (libcall_htab, optab_libfunc (ge_optab, DFmode));
5534 add_libcall (libcall_htab, optab_libfunc (gt_optab, DFmode));
5535 add_libcall (libcall_htab, optab_libfunc (unord_optab, DFmode));
5536 add_libcall (libcall_htab, convert_optab_libfunc (sext_optab, DFmode,
5537 SFmode));
5538 add_libcall (libcall_htab, convert_optab_libfunc (trunc_optab, SFmode,
5539 DFmode));
5540 add_libcall (libcall_htab,
5541 convert_optab_libfunc (trunc_optab, HFmode, DFmode));
5542 }
5543
5544 return libcall && libcall_htab->find (libcall) != NULL;
5545 }
5546
5547 static rtx
5548 arm_libcall_value_1 (machine_mode mode)
5549 {
5550 if (TARGET_AAPCS_BASED)
5551 return aapcs_libcall_value (mode);
5552 else if (TARGET_IWMMXT_ABI
5553 && arm_vector_mode_supported_p (mode))
5554 return gen_rtx_REG (mode, FIRST_IWMMXT_REGNUM);
5555 else
5556 return gen_rtx_REG (mode, ARG_REGISTER (1));
5557 }
5558
5559 /* Define how to find the value returned by a library function
5560 assuming the value has mode MODE. */
5561
5562 static rtx
5563 arm_libcall_value (machine_mode mode, const_rtx libcall)
5564 {
5565 if (TARGET_AAPCS_BASED && arm_pcs_default != ARM_PCS_AAPCS
5566 && GET_MODE_CLASS (mode) == MODE_FLOAT)
5567 {
5568 /* The following libcalls return their result in integer registers,
5569 even though they return a floating point value. */
5570 if (arm_libcall_uses_aapcs_base (libcall))
5571 return gen_rtx_REG (mode, ARG_REGISTER(1));
5572
5573 }
5574
5575 return arm_libcall_value_1 (mode);
5576 }
5577
5578 /* Implement TARGET_FUNCTION_VALUE_REGNO_P. */
5579
5580 static bool
5581 arm_function_value_regno_p (const unsigned int regno)
5582 {
5583 if (regno == ARG_REGISTER (1)
5584 || (TARGET_32BIT
5585 && TARGET_AAPCS_BASED
5586 && TARGET_HARD_FLOAT
5587 && regno == FIRST_VFP_REGNUM)
5588 || (TARGET_IWMMXT_ABI
5589 && regno == FIRST_IWMMXT_REGNUM))
5590 return true;
5591
5592 return false;
5593 }
5594
5595 /* Determine the amount of memory needed to store the possible return
5596 registers of an untyped call. */
5597 int
5598 arm_apply_result_size (void)
5599 {
5600 int size = 16;
5601
5602 if (TARGET_32BIT)
5603 {
5604 if (TARGET_HARD_FLOAT_ABI)
5605 size += 32;
5606 if (TARGET_IWMMXT_ABI)
5607 size += 8;
5608 }
5609
5610 return size;
5611 }
5612
5613 /* Decide whether TYPE should be returned in memory (true)
5614 or in a register (false). FNTYPE is the type of the function making
5615 the call. */
5616 static bool
5617 arm_return_in_memory (const_tree type, const_tree fntype)
5618 {
5619 HOST_WIDE_INT size;
5620
5621 size = int_size_in_bytes (type); /* Negative if not fixed size. */
5622
5623 if (TARGET_AAPCS_BASED)
5624 {
5625 /* Simple, non-aggregate types (ie not including vectors and
5626 complex) are always returned in a register (or registers).
5627 We don't care about which register here, so we can short-cut
5628 some of the detail. */
5629 if (!AGGREGATE_TYPE_P (type)
5630 && TREE_CODE (type) != VECTOR_TYPE
5631 && TREE_CODE (type) != COMPLEX_TYPE)
5632 return false;
5633
5634 /* Any return value that is no larger than one word can be
5635 returned in r0. */
5636 if (((unsigned HOST_WIDE_INT) size) <= UNITS_PER_WORD)
5637 return false;
5638
5639 /* Check any available co-processors to see if they accept the
5640 type as a register candidate (VFP, for example, can return
5641 some aggregates in consecutive registers). These aren't
5642 available if the call is variadic. */
5643 if (aapcs_select_return_coproc (type, fntype) >= 0)
5644 return false;
5645
5646 /* Vector values should be returned using ARM registers, not
5647 memory (unless they're over 16 bytes, which will break since
5648 we only have four call-clobbered registers to play with). */
5649 if (TREE_CODE (type) == VECTOR_TYPE)
5650 return (size < 0 || size > (4 * UNITS_PER_WORD));
5651
5652 /* The rest go in memory. */
5653 return true;
5654 }
5655
5656 if (TREE_CODE (type) == VECTOR_TYPE)
5657 return (size < 0 || size > (4 * UNITS_PER_WORD));
5658
5659 if (!AGGREGATE_TYPE_P (type) &&
5660 (TREE_CODE (type) != VECTOR_TYPE))
5661 /* All simple types are returned in registers. */
5662 return false;
5663
5664 if (arm_abi != ARM_ABI_APCS)
5665 {
5666 /* ATPCS and later return aggregate types in memory only if they are
5667 larger than a word (or are variable size). */
5668 return (size < 0 || size > UNITS_PER_WORD);
5669 }
5670
5671 /* For the arm-wince targets we choose to be compatible with Microsoft's
5672 ARM and Thumb compilers, which always return aggregates in memory. */
5673 #ifndef ARM_WINCE
5674 /* All structures/unions bigger than one word are returned in memory.
5675 Also catch the case where int_size_in_bytes returns -1. In this case
5676 the aggregate is either huge or of variable size, and in either case
5677 we will want to return it via memory and not in a register. */
5678 if (size < 0 || size > UNITS_PER_WORD)
5679 return true;
5680
5681 if (TREE_CODE (type) == RECORD_TYPE)
5682 {
5683 tree field;
5684
5685 /* For a struct the APCS says that we only return in a register
5686 if the type is 'integer like' and every addressable element
5687 has an offset of zero. For practical purposes this means
5688 that the structure can have at most one non bit-field element
5689 and that this element must be the first one in the structure. */
5690
5691 /* Find the first field, ignoring non FIELD_DECL things which will
5692 have been created by C++. */
5693 for (field = TYPE_FIELDS (type);
5694 field && TREE_CODE (field) != FIELD_DECL;
5695 field = DECL_CHAIN (field))
5696 continue;
5697
5698 if (field == NULL)
5699 return false; /* An empty structure. Allowed by an extension to ANSI C. */
5700
5701 /* Check that the first field is valid for returning in a register. */
5702
5703 /* ... Floats are not allowed */
5704 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5705 return true;
5706
5707 /* ... Aggregates that are not themselves valid for returning in
5708 a register are not allowed. */
5709 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5710 return true;
5711
5712 /* Now check the remaining fields, if any. Only bitfields are allowed,
5713 since they are not addressable. */
5714 for (field = DECL_CHAIN (field);
5715 field;
5716 field = DECL_CHAIN (field))
5717 {
5718 if (TREE_CODE (field) != FIELD_DECL)
5719 continue;
5720
5721 if (!DECL_BIT_FIELD_TYPE (field))
5722 return true;
5723 }
5724
5725 return false;
5726 }
5727
5728 if (TREE_CODE (type) == UNION_TYPE)
5729 {
5730 tree field;
5731
5732 /* Unions can be returned in registers if every element is
5733 integral, or can be returned in an integer register. */
5734 for (field = TYPE_FIELDS (type);
5735 field;
5736 field = DECL_CHAIN (field))
5737 {
5738 if (TREE_CODE (field) != FIELD_DECL)
5739 continue;
5740
5741 if (FLOAT_TYPE_P (TREE_TYPE (field)))
5742 return true;
5743
5744 if (arm_return_in_memory (TREE_TYPE (field), NULL_TREE))
5745 return true;
5746 }
5747
5748 return false;
5749 }
5750 #endif /* not ARM_WINCE */
5751
5752 /* Return all other types in memory. */
5753 return true;
5754 }
5755
5756 const struct pcs_attribute_arg
5757 {
5758 const char *arg;
5759 enum arm_pcs value;
5760 } pcs_attribute_args[] =
5761 {
5762 {"aapcs", ARM_PCS_AAPCS},
5763 {"aapcs-vfp", ARM_PCS_AAPCS_VFP},
5764 #if 0
5765 /* We could recognize these, but changes would be needed elsewhere
5766 * to implement them. */
5767 {"aapcs-iwmmxt", ARM_PCS_AAPCS_IWMMXT},
5768 {"atpcs", ARM_PCS_ATPCS},
5769 {"apcs", ARM_PCS_APCS},
5770 #endif
5771 {NULL, ARM_PCS_UNKNOWN}
5772 };
5773
5774 static enum arm_pcs
5775 arm_pcs_from_attribute (tree attr)
5776 {
5777 const struct pcs_attribute_arg *ptr;
5778 const char *arg;
5779
5780 /* Get the value of the argument. */
5781 if (TREE_VALUE (attr) == NULL_TREE
5782 || TREE_CODE (TREE_VALUE (attr)) != STRING_CST)
5783 return ARM_PCS_UNKNOWN;
5784
5785 arg = TREE_STRING_POINTER (TREE_VALUE (attr));
5786
5787 /* Check it against the list of known arguments. */
5788 for (ptr = pcs_attribute_args; ptr->arg != NULL; ptr++)
5789 if (streq (arg, ptr->arg))
5790 return ptr->value;
5791
5792 /* An unrecognized interrupt type. */
5793 return ARM_PCS_UNKNOWN;
5794 }
5795
5796 /* Get the PCS variant to use for this call. TYPE is the function's type
5797 specification, DECL is the specific declartion. DECL may be null if
5798 the call could be indirect or if this is a library call. */
5799 static enum arm_pcs
5800 arm_get_pcs_model (const_tree type, const_tree decl)
5801 {
5802 bool user_convention = false;
5803 enum arm_pcs user_pcs = arm_pcs_default;
5804 tree attr;
5805
5806 gcc_assert (type);
5807
5808 attr = lookup_attribute ("pcs", TYPE_ATTRIBUTES (type));
5809 if (attr)
5810 {
5811 user_pcs = arm_pcs_from_attribute (TREE_VALUE (attr));
5812 user_convention = true;
5813 }
5814
5815 if (TARGET_AAPCS_BASED)
5816 {
5817 /* Detect varargs functions. These always use the base rules
5818 (no argument is ever a candidate for a co-processor
5819 register). */
5820 bool base_rules = stdarg_p (type);
5821
5822 if (user_convention)
5823 {
5824 if (user_pcs > ARM_PCS_AAPCS_LOCAL)
5825 sorry ("non-AAPCS derived PCS variant");
5826 else if (base_rules && user_pcs != ARM_PCS_AAPCS)
5827 error ("variadic functions must use the base AAPCS variant");
5828 }
5829
5830 if (base_rules)
5831 return ARM_PCS_AAPCS;
5832 else if (user_convention)
5833 return user_pcs;
5834 else if (decl && flag_unit_at_a_time)
5835 {
5836 /* Local functions never leak outside this compilation unit,
5837 so we are free to use whatever conventions are
5838 appropriate. */
5839 /* FIXME: remove CONST_CAST_TREE when cgraph is constified. */
5840 cgraph_local_info *i = cgraph_node::local_info (CONST_CAST_TREE(decl));
5841 if (i && i->local)
5842 return ARM_PCS_AAPCS_LOCAL;
5843 }
5844 }
5845 else if (user_convention && user_pcs != arm_pcs_default)
5846 sorry ("PCS variant");
5847
5848 /* For everything else we use the target's default. */
5849 return arm_pcs_default;
5850 }
5851
5852
5853 static void
5854 aapcs_vfp_cum_init (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
5855 const_tree fntype ATTRIBUTE_UNUSED,
5856 rtx libcall ATTRIBUTE_UNUSED,
5857 const_tree fndecl ATTRIBUTE_UNUSED)
5858 {
5859 /* Record the unallocated VFP registers. */
5860 pcum->aapcs_vfp_regs_free = (1 << NUM_VFP_ARG_REGS) - 1;
5861 pcum->aapcs_vfp_reg_alloc = 0;
5862 }
5863
5864 /* Walk down the type tree of TYPE counting consecutive base elements.
5865 If *MODEP is VOIDmode, then set it to the first valid floating point
5866 type. If a non-floating point type is found, or if a floating point
5867 type that doesn't match a non-VOIDmode *MODEP is found, then return -1,
5868 otherwise return the count in the sub-tree. */
5869 static int
5870 aapcs_vfp_sub_candidate (const_tree type, machine_mode *modep)
5871 {
5872 machine_mode mode;
5873 HOST_WIDE_INT size;
5874
5875 switch (TREE_CODE (type))
5876 {
5877 case REAL_TYPE:
5878 mode = TYPE_MODE (type);
5879 if (mode != DFmode && mode != SFmode && mode != HFmode)
5880 return -1;
5881
5882 if (*modep == VOIDmode)
5883 *modep = mode;
5884
5885 if (*modep == mode)
5886 return 1;
5887
5888 break;
5889
5890 case COMPLEX_TYPE:
5891 mode = TYPE_MODE (TREE_TYPE (type));
5892 if (mode != DFmode && mode != SFmode)
5893 return -1;
5894
5895 if (*modep == VOIDmode)
5896 *modep = mode;
5897
5898 if (*modep == mode)
5899 return 2;
5900
5901 break;
5902
5903 case VECTOR_TYPE:
5904 /* Use V2SImode and V4SImode as representatives of all 64-bit
5905 and 128-bit vector types, whether or not those modes are
5906 supported with the present options. */
5907 size = int_size_in_bytes (type);
5908 switch (size)
5909 {
5910 case 8:
5911 mode = V2SImode;
5912 break;
5913 case 16:
5914 mode = V4SImode;
5915 break;
5916 default:
5917 return -1;
5918 }
5919
5920 if (*modep == VOIDmode)
5921 *modep = mode;
5922
5923 /* Vector modes are considered to be opaque: two vectors are
5924 equivalent for the purposes of being homogeneous aggregates
5925 if they are the same size. */
5926 if (*modep == mode)
5927 return 1;
5928
5929 break;
5930
5931 case ARRAY_TYPE:
5932 {
5933 int count;
5934 tree index = TYPE_DOMAIN (type);
5935
5936 /* Can't handle incomplete types nor sizes that are not
5937 fixed. */
5938 if (!COMPLETE_TYPE_P (type)
5939 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5940 return -1;
5941
5942 count = aapcs_vfp_sub_candidate (TREE_TYPE (type), modep);
5943 if (count == -1
5944 || !index
5945 || !TYPE_MAX_VALUE (index)
5946 || !tree_fits_uhwi_p (TYPE_MAX_VALUE (index))
5947 || !TYPE_MIN_VALUE (index)
5948 || !tree_fits_uhwi_p (TYPE_MIN_VALUE (index))
5949 || count < 0)
5950 return -1;
5951
5952 count *= (1 + tree_to_uhwi (TYPE_MAX_VALUE (index))
5953 - tree_to_uhwi (TYPE_MIN_VALUE (index)));
5954
5955 /* There must be no padding. */
5956 if (wi::to_wide (TYPE_SIZE (type))
5957 != count * GET_MODE_BITSIZE (*modep))
5958 return -1;
5959
5960 return count;
5961 }
5962
5963 case RECORD_TYPE:
5964 {
5965 int count = 0;
5966 int sub_count;
5967 tree field;
5968
5969 /* Can't handle incomplete types nor sizes that are not
5970 fixed. */
5971 if (!COMPLETE_TYPE_P (type)
5972 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
5973 return -1;
5974
5975 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
5976 {
5977 if (TREE_CODE (field) != FIELD_DECL)
5978 continue;
5979
5980 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
5981 if (sub_count < 0)
5982 return -1;
5983 count += sub_count;
5984 }
5985
5986 /* There must be no padding. */
5987 if (wi::to_wide (TYPE_SIZE (type))
5988 != count * GET_MODE_BITSIZE (*modep))
5989 return -1;
5990
5991 return count;
5992 }
5993
5994 case UNION_TYPE:
5995 case QUAL_UNION_TYPE:
5996 {
5997 /* These aren't very interesting except in a degenerate case. */
5998 int count = 0;
5999 int sub_count;
6000 tree field;
6001
6002 /* Can't handle incomplete types nor sizes that are not
6003 fixed. */
6004 if (!COMPLETE_TYPE_P (type)
6005 || TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST)
6006 return -1;
6007
6008 for (field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6009 {
6010 if (TREE_CODE (field) != FIELD_DECL)
6011 continue;
6012
6013 sub_count = aapcs_vfp_sub_candidate (TREE_TYPE (field), modep);
6014 if (sub_count < 0)
6015 return -1;
6016 count = count > sub_count ? count : sub_count;
6017 }
6018
6019 /* There must be no padding. */
6020 if (wi::to_wide (TYPE_SIZE (type))
6021 != count * GET_MODE_BITSIZE (*modep))
6022 return -1;
6023
6024 return count;
6025 }
6026
6027 default:
6028 break;
6029 }
6030
6031 return -1;
6032 }
6033
6034 /* Return true if PCS_VARIANT should use VFP registers. */
6035 static bool
6036 use_vfp_abi (enum arm_pcs pcs_variant, bool is_double)
6037 {
6038 if (pcs_variant == ARM_PCS_AAPCS_VFP)
6039 {
6040 static bool seen_thumb1_vfp = false;
6041
6042 if (TARGET_THUMB1 && !seen_thumb1_vfp)
6043 {
6044 sorry ("Thumb-1 hard-float VFP ABI");
6045 /* sorry() is not immediately fatal, so only display this once. */
6046 seen_thumb1_vfp = true;
6047 }
6048
6049 return true;
6050 }
6051
6052 if (pcs_variant != ARM_PCS_AAPCS_LOCAL)
6053 return false;
6054
6055 return (TARGET_32BIT && TARGET_HARD_FLOAT &&
6056 (TARGET_VFP_DOUBLE || !is_double));
6057 }
6058
6059 /* Return true if an argument whose type is TYPE, or mode is MODE, is
6060 suitable for passing or returning in VFP registers for the PCS
6061 variant selected. If it is, then *BASE_MODE is updated to contain
6062 a machine mode describing each element of the argument's type and
6063 *COUNT to hold the number of such elements. */
6064 static bool
6065 aapcs_vfp_is_call_or_return_candidate (enum arm_pcs pcs_variant,
6066 machine_mode mode, const_tree type,
6067 machine_mode *base_mode, int *count)
6068 {
6069 machine_mode new_mode = VOIDmode;
6070
6071 /* If we have the type information, prefer that to working things
6072 out from the mode. */
6073 if (type)
6074 {
6075 int ag_count = aapcs_vfp_sub_candidate (type, &new_mode);
6076
6077 if (ag_count > 0 && ag_count <= 4)
6078 *count = ag_count;
6079 else
6080 return false;
6081 }
6082 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
6083 || GET_MODE_CLASS (mode) == MODE_VECTOR_INT
6084 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
6085 {
6086 *count = 1;
6087 new_mode = mode;
6088 }
6089 else if (GET_MODE_CLASS (mode) == MODE_COMPLEX_FLOAT)
6090 {
6091 *count = 2;
6092 new_mode = (mode == DCmode ? DFmode : SFmode);
6093 }
6094 else
6095 return false;
6096
6097
6098 if (!use_vfp_abi (pcs_variant, ARM_NUM_REGS (new_mode) > 1))
6099 return false;
6100
6101 *base_mode = new_mode;
6102 return true;
6103 }
6104
6105 static bool
6106 aapcs_vfp_is_return_candidate (enum arm_pcs pcs_variant,
6107 machine_mode mode, const_tree type)
6108 {
6109 int count ATTRIBUTE_UNUSED;
6110 machine_mode ag_mode ATTRIBUTE_UNUSED;
6111
6112 if (!use_vfp_abi (pcs_variant, false))
6113 return false;
6114 return aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6115 &ag_mode, &count);
6116 }
6117
6118 static bool
6119 aapcs_vfp_is_call_candidate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6120 const_tree type)
6121 {
6122 if (!use_vfp_abi (pcum->pcs_variant, false))
6123 return false;
6124
6125 return aapcs_vfp_is_call_or_return_candidate (pcum->pcs_variant, mode, type,
6126 &pcum->aapcs_vfp_rmode,
6127 &pcum->aapcs_vfp_rcount);
6128 }
6129
6130 /* Implement the allocate field in aapcs_cp_arg_layout. See the comment there
6131 for the behaviour of this function. */
6132
6133 static bool
6134 aapcs_vfp_allocate (CUMULATIVE_ARGS *pcum, machine_mode mode,
6135 const_tree type ATTRIBUTE_UNUSED)
6136 {
6137 int rmode_size
6138 = MAX (GET_MODE_SIZE (pcum->aapcs_vfp_rmode), GET_MODE_SIZE (SFmode));
6139 int shift = rmode_size / GET_MODE_SIZE (SFmode);
6140 unsigned mask = (1 << (shift * pcum->aapcs_vfp_rcount)) - 1;
6141 int regno;
6142
6143 for (regno = 0; regno < NUM_VFP_ARG_REGS; regno += shift)
6144 if (((pcum->aapcs_vfp_regs_free >> regno) & mask) == mask)
6145 {
6146 pcum->aapcs_vfp_reg_alloc = mask << regno;
6147 if (mode == BLKmode
6148 || (mode == TImode && ! TARGET_NEON)
6149 || ! arm_hard_regno_mode_ok (FIRST_VFP_REGNUM + regno, mode))
6150 {
6151 int i;
6152 int rcount = pcum->aapcs_vfp_rcount;
6153 int rshift = shift;
6154 machine_mode rmode = pcum->aapcs_vfp_rmode;
6155 rtx par;
6156 if (!TARGET_NEON)
6157 {
6158 /* Avoid using unsupported vector modes. */
6159 if (rmode == V2SImode)
6160 rmode = DImode;
6161 else if (rmode == V4SImode)
6162 {
6163 rmode = DImode;
6164 rcount *= 2;
6165 rshift /= 2;
6166 }
6167 }
6168 par = gen_rtx_PARALLEL (mode, rtvec_alloc (rcount));
6169 for (i = 0; i < rcount; i++)
6170 {
6171 rtx tmp = gen_rtx_REG (rmode,
6172 FIRST_VFP_REGNUM + regno + i * rshift);
6173 tmp = gen_rtx_EXPR_LIST
6174 (VOIDmode, tmp,
6175 GEN_INT (i * GET_MODE_SIZE (rmode)));
6176 XVECEXP (par, 0, i) = tmp;
6177 }
6178
6179 pcum->aapcs_reg = par;
6180 }
6181 else
6182 pcum->aapcs_reg = gen_rtx_REG (mode, FIRST_VFP_REGNUM + regno);
6183 return true;
6184 }
6185 return false;
6186 }
6187
6188 /* Implement the allocate_return_reg field in aapcs_cp_arg_layout. See the
6189 comment there for the behaviour of this function. */
6190
6191 static rtx
6192 aapcs_vfp_allocate_return_reg (enum arm_pcs pcs_variant ATTRIBUTE_UNUSED,
6193 machine_mode mode,
6194 const_tree type ATTRIBUTE_UNUSED)
6195 {
6196 if (!use_vfp_abi (pcs_variant, false))
6197 return NULL;
6198
6199 if (mode == BLKmode
6200 || (GET_MODE_CLASS (mode) == MODE_INT
6201 && GET_MODE_SIZE (mode) >= GET_MODE_SIZE (TImode)
6202 && !TARGET_NEON))
6203 {
6204 int count;
6205 machine_mode ag_mode;
6206 int i;
6207 rtx par;
6208 int shift;
6209
6210 aapcs_vfp_is_call_or_return_candidate (pcs_variant, mode, type,
6211 &ag_mode, &count);
6212
6213 if (!TARGET_NEON)
6214 {
6215 if (ag_mode == V2SImode)
6216 ag_mode = DImode;
6217 else if (ag_mode == V4SImode)
6218 {
6219 ag_mode = DImode;
6220 count *= 2;
6221 }
6222 }
6223 shift = GET_MODE_SIZE(ag_mode) / GET_MODE_SIZE(SFmode);
6224 par = gen_rtx_PARALLEL (mode, rtvec_alloc (count));
6225 for (i = 0; i < count; i++)
6226 {
6227 rtx tmp = gen_rtx_REG (ag_mode, FIRST_VFP_REGNUM + i * shift);
6228 tmp = gen_rtx_EXPR_LIST (VOIDmode, tmp,
6229 GEN_INT (i * GET_MODE_SIZE (ag_mode)));
6230 XVECEXP (par, 0, i) = tmp;
6231 }
6232
6233 return par;
6234 }
6235
6236 return gen_rtx_REG (mode, FIRST_VFP_REGNUM);
6237 }
6238
6239 static void
6240 aapcs_vfp_advance (CUMULATIVE_ARGS *pcum ATTRIBUTE_UNUSED,
6241 machine_mode mode ATTRIBUTE_UNUSED,
6242 const_tree type ATTRIBUTE_UNUSED)
6243 {
6244 pcum->aapcs_vfp_regs_free &= ~pcum->aapcs_vfp_reg_alloc;
6245 pcum->aapcs_vfp_reg_alloc = 0;
6246 return;
6247 }
6248
6249 #define AAPCS_CP(X) \
6250 { \
6251 aapcs_ ## X ## _cum_init, \
6252 aapcs_ ## X ## _is_call_candidate, \
6253 aapcs_ ## X ## _allocate, \
6254 aapcs_ ## X ## _is_return_candidate, \
6255 aapcs_ ## X ## _allocate_return_reg, \
6256 aapcs_ ## X ## _advance \
6257 }
6258
6259 /* Table of co-processors that can be used to pass arguments in
6260 registers. Idealy no arugment should be a candidate for more than
6261 one co-processor table entry, but the table is processed in order
6262 and stops after the first match. If that entry then fails to put
6263 the argument into a co-processor register, the argument will go on
6264 the stack. */
6265 static struct
6266 {
6267 /* Initialize co-processor related state in CUMULATIVE_ARGS structure. */
6268 void (*cum_init) (CUMULATIVE_ARGS *, const_tree, rtx, const_tree);
6269
6270 /* Return true if an argument of mode MODE (or type TYPE if MODE is
6271 BLKmode) is a candidate for this co-processor's registers; this
6272 function should ignore any position-dependent state in
6273 CUMULATIVE_ARGS and only use call-type dependent information. */
6274 bool (*is_call_candidate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6275
6276 /* Return true if the argument does get a co-processor register; it
6277 should set aapcs_reg to an RTX of the register allocated as is
6278 required for a return from FUNCTION_ARG. */
6279 bool (*allocate) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6280
6281 /* Return true if a result of mode MODE (or type TYPE if MODE is BLKmode) can
6282 be returned in this co-processor's registers. */
6283 bool (*is_return_candidate) (enum arm_pcs, machine_mode, const_tree);
6284
6285 /* Allocate and return an RTX element to hold the return type of a call. This
6286 routine must not fail and will only be called if is_return_candidate
6287 returned true with the same parameters. */
6288 rtx (*allocate_return_reg) (enum arm_pcs, machine_mode, const_tree);
6289
6290 /* Finish processing this argument and prepare to start processing
6291 the next one. */
6292 void (*advance) (CUMULATIVE_ARGS *, machine_mode, const_tree);
6293 } aapcs_cp_arg_layout[ARM_NUM_COPROC_SLOTS] =
6294 {
6295 AAPCS_CP(vfp)
6296 };
6297
6298 #undef AAPCS_CP
6299
6300 static int
6301 aapcs_select_call_coproc (CUMULATIVE_ARGS *pcum, machine_mode mode,
6302 const_tree type)
6303 {
6304 int i;
6305
6306 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6307 if (aapcs_cp_arg_layout[i].is_call_candidate (pcum, mode, type))
6308 return i;
6309
6310 return -1;
6311 }
6312
6313 static int
6314 aapcs_select_return_coproc (const_tree type, const_tree fntype)
6315 {
6316 /* We aren't passed a decl, so we can't check that a call is local.
6317 However, it isn't clear that that would be a win anyway, since it
6318 might limit some tail-calling opportunities. */
6319 enum arm_pcs pcs_variant;
6320
6321 if (fntype)
6322 {
6323 const_tree fndecl = NULL_TREE;
6324
6325 if (TREE_CODE (fntype) == FUNCTION_DECL)
6326 {
6327 fndecl = fntype;
6328 fntype = TREE_TYPE (fntype);
6329 }
6330
6331 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6332 }
6333 else
6334 pcs_variant = arm_pcs_default;
6335
6336 if (pcs_variant != ARM_PCS_AAPCS)
6337 {
6338 int i;
6339
6340 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6341 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant,
6342 TYPE_MODE (type),
6343 type))
6344 return i;
6345 }
6346 return -1;
6347 }
6348
6349 static rtx
6350 aapcs_allocate_return_reg (machine_mode mode, const_tree type,
6351 const_tree fntype)
6352 {
6353 /* We aren't passed a decl, so we can't check that a call is local.
6354 However, it isn't clear that that would be a win anyway, since it
6355 might limit some tail-calling opportunities. */
6356 enum arm_pcs pcs_variant;
6357 int unsignedp ATTRIBUTE_UNUSED;
6358
6359 if (fntype)
6360 {
6361 const_tree fndecl = NULL_TREE;
6362
6363 if (TREE_CODE (fntype) == FUNCTION_DECL)
6364 {
6365 fndecl = fntype;
6366 fntype = TREE_TYPE (fntype);
6367 }
6368
6369 pcs_variant = arm_get_pcs_model (fntype, fndecl);
6370 }
6371 else
6372 pcs_variant = arm_pcs_default;
6373
6374 /* Promote integer types. */
6375 if (type && INTEGRAL_TYPE_P (type))
6376 mode = arm_promote_function_mode (type, mode, &unsignedp, fntype, 1);
6377
6378 if (pcs_variant != ARM_PCS_AAPCS)
6379 {
6380 int i;
6381
6382 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6383 if (aapcs_cp_arg_layout[i].is_return_candidate (pcs_variant, mode,
6384 type))
6385 return aapcs_cp_arg_layout[i].allocate_return_reg (pcs_variant,
6386 mode, type);
6387 }
6388
6389 /* Promotes small structs returned in a register to full-word size
6390 for big-endian AAPCS. */
6391 if (type && arm_return_in_msb (type))
6392 {
6393 HOST_WIDE_INT size = int_size_in_bytes (type);
6394 if (size % UNITS_PER_WORD != 0)
6395 {
6396 size += UNITS_PER_WORD - size % UNITS_PER_WORD;
6397 mode = int_mode_for_size (size * BITS_PER_UNIT, 0).require ();
6398 }
6399 }
6400
6401 return gen_rtx_REG (mode, R0_REGNUM);
6402 }
6403
6404 static rtx
6405 aapcs_libcall_value (machine_mode mode)
6406 {
6407 if (BYTES_BIG_ENDIAN && ALL_FIXED_POINT_MODE_P (mode)
6408 && GET_MODE_SIZE (mode) <= 4)
6409 mode = SImode;
6410
6411 return aapcs_allocate_return_reg (mode, NULL_TREE, NULL_TREE);
6412 }
6413
6414 /* Lay out a function argument using the AAPCS rules. The rule
6415 numbers referred to here are those in the AAPCS. */
6416 static void
6417 aapcs_layout_arg (CUMULATIVE_ARGS *pcum, machine_mode mode,
6418 const_tree type, bool named)
6419 {
6420 int nregs, nregs2;
6421 int ncrn;
6422
6423 /* We only need to do this once per argument. */
6424 if (pcum->aapcs_arg_processed)
6425 return;
6426
6427 pcum->aapcs_arg_processed = true;
6428
6429 /* Special case: if named is false then we are handling an incoming
6430 anonymous argument which is on the stack. */
6431 if (!named)
6432 return;
6433
6434 /* Is this a potential co-processor register candidate? */
6435 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6436 {
6437 int slot = aapcs_select_call_coproc (pcum, mode, type);
6438 pcum->aapcs_cprc_slot = slot;
6439
6440 /* We don't have to apply any of the rules from part B of the
6441 preparation phase, these are handled elsewhere in the
6442 compiler. */
6443
6444 if (slot >= 0)
6445 {
6446 /* A Co-processor register candidate goes either in its own
6447 class of registers or on the stack. */
6448 if (!pcum->aapcs_cprc_failed[slot])
6449 {
6450 /* C1.cp - Try to allocate the argument to co-processor
6451 registers. */
6452 if (aapcs_cp_arg_layout[slot].allocate (pcum, mode, type))
6453 return;
6454
6455 /* C2.cp - Put the argument on the stack and note that we
6456 can't assign any more candidates in this slot. We also
6457 need to note that we have allocated stack space, so that
6458 we won't later try to split a non-cprc candidate between
6459 core registers and the stack. */
6460 pcum->aapcs_cprc_failed[slot] = true;
6461 pcum->can_split = false;
6462 }
6463
6464 /* We didn't get a register, so this argument goes on the
6465 stack. */
6466 gcc_assert (pcum->can_split == false);
6467 return;
6468 }
6469 }
6470
6471 /* C3 - For double-word aligned arguments, round the NCRN up to the
6472 next even number. */
6473 ncrn = pcum->aapcs_ncrn;
6474 if (ncrn & 1)
6475 {
6476 int res = arm_needs_doubleword_align (mode, type);
6477 /* Only warn during RTL expansion of call stmts, otherwise we would
6478 warn e.g. during gimplification even on functions that will be
6479 always inlined, and we'd warn multiple times. Don't warn when
6480 called in expand_function_start either, as we warn instead in
6481 arm_function_arg_boundary in that case. */
6482 if (res < 0 && warn_psabi && currently_expanding_gimple_stmt)
6483 inform (input_location, "parameter passing for argument of type "
6484 "%qT changed in GCC 7.1", type);
6485 else if (res > 0)
6486 ncrn++;
6487 }
6488
6489 nregs = ARM_NUM_REGS2(mode, type);
6490
6491 /* Sigh, this test should really assert that nregs > 0, but a GCC
6492 extension allows empty structs and then gives them empty size; it
6493 then allows such a structure to be passed by value. For some of
6494 the code below we have to pretend that such an argument has
6495 non-zero size so that we 'locate' it correctly either in
6496 registers or on the stack. */
6497 gcc_assert (nregs >= 0);
6498
6499 nregs2 = nregs ? nregs : 1;
6500
6501 /* C4 - Argument fits entirely in core registers. */
6502 if (ncrn + nregs2 <= NUM_ARG_REGS)
6503 {
6504 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6505 pcum->aapcs_next_ncrn = ncrn + nregs;
6506 return;
6507 }
6508
6509 /* C5 - Some core registers left and there are no arguments already
6510 on the stack: split this argument between the remaining core
6511 registers and the stack. */
6512 if (ncrn < NUM_ARG_REGS && pcum->can_split)
6513 {
6514 pcum->aapcs_reg = gen_rtx_REG (mode, ncrn);
6515 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6516 pcum->aapcs_partial = (NUM_ARG_REGS - ncrn) * UNITS_PER_WORD;
6517 return;
6518 }
6519
6520 /* C6 - NCRN is set to 4. */
6521 pcum->aapcs_next_ncrn = NUM_ARG_REGS;
6522
6523 /* C7,C8 - arugment goes on the stack. We have nothing to do here. */
6524 return;
6525 }
6526
6527 /* Initialize a variable CUM of type CUMULATIVE_ARGS
6528 for a call to a function whose data type is FNTYPE.
6529 For a library call, FNTYPE is NULL. */
6530 void
6531 arm_init_cumulative_args (CUMULATIVE_ARGS *pcum, tree fntype,
6532 rtx libname,
6533 tree fndecl ATTRIBUTE_UNUSED)
6534 {
6535 /* Long call handling. */
6536 if (fntype)
6537 pcum->pcs_variant = arm_get_pcs_model (fntype, fndecl);
6538 else
6539 pcum->pcs_variant = arm_pcs_default;
6540
6541 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6542 {
6543 if (arm_libcall_uses_aapcs_base (libname))
6544 pcum->pcs_variant = ARM_PCS_AAPCS;
6545
6546 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn = 0;
6547 pcum->aapcs_reg = NULL_RTX;
6548 pcum->aapcs_partial = 0;
6549 pcum->aapcs_arg_processed = false;
6550 pcum->aapcs_cprc_slot = -1;
6551 pcum->can_split = true;
6552
6553 if (pcum->pcs_variant != ARM_PCS_AAPCS)
6554 {
6555 int i;
6556
6557 for (i = 0; i < ARM_NUM_COPROC_SLOTS; i++)
6558 {
6559 pcum->aapcs_cprc_failed[i] = false;
6560 aapcs_cp_arg_layout[i].cum_init (pcum, fntype, libname, fndecl);
6561 }
6562 }
6563 return;
6564 }
6565
6566 /* Legacy ABIs */
6567
6568 /* On the ARM, the offset starts at 0. */
6569 pcum->nregs = 0;
6570 pcum->iwmmxt_nregs = 0;
6571 pcum->can_split = true;
6572
6573 /* Varargs vectors are treated the same as long long.
6574 named_count avoids having to change the way arm handles 'named' */
6575 pcum->named_count = 0;
6576 pcum->nargs = 0;
6577
6578 if (TARGET_REALLY_IWMMXT && fntype)
6579 {
6580 tree fn_arg;
6581
6582 for (fn_arg = TYPE_ARG_TYPES (fntype);
6583 fn_arg;
6584 fn_arg = TREE_CHAIN (fn_arg))
6585 pcum->named_count += 1;
6586
6587 if (! pcum->named_count)
6588 pcum->named_count = INT_MAX;
6589 }
6590 }
6591
6592 /* Return 1 if double word alignment is required for argument passing.
6593 Return -1 if double word alignment used to be required for argument
6594 passing before PR77728 ABI fix, but is not required anymore.
6595 Return 0 if double word alignment is not required and wasn't requried
6596 before either. */
6597 static int
6598 arm_needs_doubleword_align (machine_mode mode, const_tree type)
6599 {
6600 if (!type)
6601 return GET_MODE_ALIGNMENT (mode) > PARM_BOUNDARY;
6602
6603 /* Scalar and vector types: Use natural alignment, i.e. of base type. */
6604 if (!AGGREGATE_TYPE_P (type))
6605 return TYPE_ALIGN (TYPE_MAIN_VARIANT (type)) > PARM_BOUNDARY;
6606
6607 /* Array types: Use member alignment of element type. */
6608 if (TREE_CODE (type) == ARRAY_TYPE)
6609 return TYPE_ALIGN (TREE_TYPE (type)) > PARM_BOUNDARY;
6610
6611 int ret = 0;
6612 /* Record/aggregate types: Use greatest member alignment of any member. */
6613 for (tree field = TYPE_FIELDS (type); field; field = DECL_CHAIN (field))
6614 if (DECL_ALIGN (field) > PARM_BOUNDARY)
6615 {
6616 if (TREE_CODE (field) == FIELD_DECL)
6617 return 1;
6618 else
6619 /* Before PR77728 fix, we were incorrectly considering also
6620 other aggregate fields, like VAR_DECLs, TYPE_DECLs etc.
6621 Make sure we can warn about that with -Wpsabi. */
6622 ret = -1;
6623 }
6624
6625 return ret;
6626 }
6627
6628
6629 /* Determine where to put an argument to a function.
6630 Value is zero to push the argument on the stack,
6631 or a hard register in which to store the argument.
6632
6633 MODE is the argument's machine mode.
6634 TYPE is the data type of the argument (as a tree).
6635 This is null for libcalls where that information may
6636 not be available.
6637 CUM is a variable of type CUMULATIVE_ARGS which gives info about
6638 the preceding args and about the function being called.
6639 NAMED is nonzero if this argument is a named parameter
6640 (otherwise it is an extra parameter matching an ellipsis).
6641
6642 On the ARM, normally the first 16 bytes are passed in registers r0-r3; all
6643 other arguments are passed on the stack. If (NAMED == 0) (which happens
6644 only in assign_parms, since TARGET_SETUP_INCOMING_VARARGS is
6645 defined), say it is passed in the stack (function_prologue will
6646 indeed make it pass in the stack if necessary). */
6647
6648 static rtx
6649 arm_function_arg (cumulative_args_t pcum_v, machine_mode mode,
6650 const_tree type, bool named)
6651 {
6652 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6653 int nregs;
6654
6655 /* Handle the special case quickly. Pick an arbitrary value for op2 of
6656 a call insn (op3 of a call_value insn). */
6657 if (mode == VOIDmode)
6658 return const0_rtx;
6659
6660 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6661 {
6662 aapcs_layout_arg (pcum, mode, type, named);
6663 return pcum->aapcs_reg;
6664 }
6665
6666 /* Varargs vectors are treated the same as long long.
6667 named_count avoids having to change the way arm handles 'named' */
6668 if (TARGET_IWMMXT_ABI
6669 && arm_vector_mode_supported_p (mode)
6670 && pcum->named_count > pcum->nargs + 1)
6671 {
6672 if (pcum->iwmmxt_nregs <= 9)
6673 return gen_rtx_REG (mode, pcum->iwmmxt_nregs + FIRST_IWMMXT_REGNUM);
6674 else
6675 {
6676 pcum->can_split = false;
6677 return NULL_RTX;
6678 }
6679 }
6680
6681 /* Put doubleword aligned quantities in even register pairs. */
6682 if ((pcum->nregs & 1) && ARM_DOUBLEWORD_ALIGN)
6683 {
6684 int res = arm_needs_doubleword_align (mode, type);
6685 if (res < 0 && warn_psabi)
6686 inform (input_location, "parameter passing for argument of type "
6687 "%qT changed in GCC 7.1", type);
6688 else if (res > 0)
6689 pcum->nregs++;
6690 }
6691
6692 /* Only allow splitting an arg between regs and memory if all preceding
6693 args were allocated to regs. For args passed by reference we only count
6694 the reference pointer. */
6695 if (pcum->can_split)
6696 nregs = 1;
6697 else
6698 nregs = ARM_NUM_REGS2 (mode, type);
6699
6700 if (!named || pcum->nregs + nregs > NUM_ARG_REGS)
6701 return NULL_RTX;
6702
6703 return gen_rtx_REG (mode, pcum->nregs);
6704 }
6705
6706 static unsigned int
6707 arm_function_arg_boundary (machine_mode mode, const_tree type)
6708 {
6709 if (!ARM_DOUBLEWORD_ALIGN)
6710 return PARM_BOUNDARY;
6711
6712 int res = arm_needs_doubleword_align (mode, type);
6713 if (res < 0 && warn_psabi)
6714 inform (input_location, "parameter passing for argument of type %qT "
6715 "changed in GCC 7.1", type);
6716
6717 return res > 0 ? DOUBLEWORD_ALIGNMENT : PARM_BOUNDARY;
6718 }
6719
6720 static int
6721 arm_arg_partial_bytes (cumulative_args_t pcum_v, machine_mode mode,
6722 tree type, bool named)
6723 {
6724 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6725 int nregs = pcum->nregs;
6726
6727 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6728 {
6729 aapcs_layout_arg (pcum, mode, type, named);
6730 return pcum->aapcs_partial;
6731 }
6732
6733 if (TARGET_IWMMXT_ABI && arm_vector_mode_supported_p (mode))
6734 return 0;
6735
6736 if (NUM_ARG_REGS > nregs
6737 && (NUM_ARG_REGS < nregs + ARM_NUM_REGS2 (mode, type))
6738 && pcum->can_split)
6739 return (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
6740
6741 return 0;
6742 }
6743
6744 /* Update the data in PCUM to advance over an argument
6745 of mode MODE and data type TYPE.
6746 (TYPE is null for libcalls where that information may not be available.) */
6747
6748 static void
6749 arm_function_arg_advance (cumulative_args_t pcum_v, machine_mode mode,
6750 const_tree type, bool named)
6751 {
6752 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
6753
6754 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
6755 {
6756 aapcs_layout_arg (pcum, mode, type, named);
6757
6758 if (pcum->aapcs_cprc_slot >= 0)
6759 {
6760 aapcs_cp_arg_layout[pcum->aapcs_cprc_slot].advance (pcum, mode,
6761 type);
6762 pcum->aapcs_cprc_slot = -1;
6763 }
6764
6765 /* Generic stuff. */
6766 pcum->aapcs_arg_processed = false;
6767 pcum->aapcs_ncrn = pcum->aapcs_next_ncrn;
6768 pcum->aapcs_reg = NULL_RTX;
6769 pcum->aapcs_partial = 0;
6770 }
6771 else
6772 {
6773 pcum->nargs += 1;
6774 if (arm_vector_mode_supported_p (mode)
6775 && pcum->named_count > pcum->nargs
6776 && TARGET_IWMMXT_ABI)
6777 pcum->iwmmxt_nregs += 1;
6778 else
6779 pcum->nregs += ARM_NUM_REGS2 (mode, type);
6780 }
6781 }
6782
6783 /* Variable sized types are passed by reference. This is a GCC
6784 extension to the ARM ABI. */
6785
6786 static bool
6787 arm_pass_by_reference (cumulative_args_t cum ATTRIBUTE_UNUSED,
6788 machine_mode mode ATTRIBUTE_UNUSED,
6789 const_tree type, bool named ATTRIBUTE_UNUSED)
6790 {
6791 return type && TREE_CODE (TYPE_SIZE (type)) != INTEGER_CST;
6792 }
6793 \f
6794 /* Encode the current state of the #pragma [no_]long_calls. */
6795 typedef enum
6796 {
6797 OFF, /* No #pragma [no_]long_calls is in effect. */
6798 LONG, /* #pragma long_calls is in effect. */
6799 SHORT /* #pragma no_long_calls is in effect. */
6800 } arm_pragma_enum;
6801
6802 static arm_pragma_enum arm_pragma_long_calls = OFF;
6803
6804 void
6805 arm_pr_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6806 {
6807 arm_pragma_long_calls = LONG;
6808 }
6809
6810 void
6811 arm_pr_no_long_calls (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6812 {
6813 arm_pragma_long_calls = SHORT;
6814 }
6815
6816 void
6817 arm_pr_long_calls_off (struct cpp_reader * pfile ATTRIBUTE_UNUSED)
6818 {
6819 arm_pragma_long_calls = OFF;
6820 }
6821 \f
6822 /* Handle an attribute requiring a FUNCTION_DECL;
6823 arguments as in struct attribute_spec.handler. */
6824 static tree
6825 arm_handle_fndecl_attribute (tree *node, tree name, tree args ATTRIBUTE_UNUSED,
6826 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6827 {
6828 if (TREE_CODE (*node) != FUNCTION_DECL)
6829 {
6830 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6831 name);
6832 *no_add_attrs = true;
6833 }
6834
6835 return NULL_TREE;
6836 }
6837
6838 /* Handle an "interrupt" or "isr" attribute;
6839 arguments as in struct attribute_spec.handler. */
6840 static tree
6841 arm_handle_isr_attribute (tree *node, tree name, tree args, int flags,
6842 bool *no_add_attrs)
6843 {
6844 if (DECL_P (*node))
6845 {
6846 if (TREE_CODE (*node) != FUNCTION_DECL)
6847 {
6848 warning (OPT_Wattributes, "%qE attribute only applies to functions",
6849 name);
6850 *no_add_attrs = true;
6851 }
6852 /* FIXME: the argument if any is checked for type attributes;
6853 should it be checked for decl ones? */
6854 }
6855 else
6856 {
6857 if (TREE_CODE (*node) == FUNCTION_TYPE
6858 || TREE_CODE (*node) == METHOD_TYPE)
6859 {
6860 if (arm_isr_value (args) == ARM_FT_UNKNOWN)
6861 {
6862 warning (OPT_Wattributes, "%qE attribute ignored",
6863 name);
6864 *no_add_attrs = true;
6865 }
6866 }
6867 else if (TREE_CODE (*node) == POINTER_TYPE
6868 && (TREE_CODE (TREE_TYPE (*node)) == FUNCTION_TYPE
6869 || TREE_CODE (TREE_TYPE (*node)) == METHOD_TYPE)
6870 && arm_isr_value (args) != ARM_FT_UNKNOWN)
6871 {
6872 *node = build_variant_type_copy (*node);
6873 TREE_TYPE (*node) = build_type_attribute_variant
6874 (TREE_TYPE (*node),
6875 tree_cons (name, args, TYPE_ATTRIBUTES (TREE_TYPE (*node))));
6876 *no_add_attrs = true;
6877 }
6878 else
6879 {
6880 /* Possibly pass this attribute on from the type to a decl. */
6881 if (flags & ((int) ATTR_FLAG_DECL_NEXT
6882 | (int) ATTR_FLAG_FUNCTION_NEXT
6883 | (int) ATTR_FLAG_ARRAY_NEXT))
6884 {
6885 *no_add_attrs = true;
6886 return tree_cons (name, args, NULL_TREE);
6887 }
6888 else
6889 {
6890 warning (OPT_Wattributes, "%qE attribute ignored",
6891 name);
6892 }
6893 }
6894 }
6895
6896 return NULL_TREE;
6897 }
6898
6899 /* Handle a "pcs" attribute; arguments as in struct
6900 attribute_spec.handler. */
6901 static tree
6902 arm_handle_pcs_attribute (tree *node ATTRIBUTE_UNUSED, tree name, tree args,
6903 int flags ATTRIBUTE_UNUSED, bool *no_add_attrs)
6904 {
6905 if (arm_pcs_from_attribute (args) == ARM_PCS_UNKNOWN)
6906 {
6907 warning (OPT_Wattributes, "%qE attribute ignored", name);
6908 *no_add_attrs = true;
6909 }
6910 return NULL_TREE;
6911 }
6912
6913 #if TARGET_DLLIMPORT_DECL_ATTRIBUTES
6914 /* Handle the "notshared" attribute. This attribute is another way of
6915 requesting hidden visibility. ARM's compiler supports
6916 "__declspec(notshared)"; we support the same thing via an
6917 attribute. */
6918
6919 static tree
6920 arm_handle_notshared_attribute (tree *node,
6921 tree name ATTRIBUTE_UNUSED,
6922 tree args ATTRIBUTE_UNUSED,
6923 int flags ATTRIBUTE_UNUSED,
6924 bool *no_add_attrs)
6925 {
6926 tree decl = TYPE_NAME (*node);
6927
6928 if (decl)
6929 {
6930 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
6931 DECL_VISIBILITY_SPECIFIED (decl) = 1;
6932 *no_add_attrs = false;
6933 }
6934 return NULL_TREE;
6935 }
6936 #endif
6937
6938 /* This function returns true if a function with declaration FNDECL and type
6939 FNTYPE uses the stack to pass arguments or return variables and false
6940 otherwise. This is used for functions with the attributes
6941 'cmse_nonsecure_call' or 'cmse_nonsecure_entry' and this function will issue
6942 diagnostic messages if the stack is used. NAME is the name of the attribute
6943 used. */
6944
6945 static bool
6946 cmse_func_args_or_return_in_stack (tree fndecl, tree name, tree fntype)
6947 {
6948 function_args_iterator args_iter;
6949 CUMULATIVE_ARGS args_so_far_v;
6950 cumulative_args_t args_so_far;
6951 bool first_param = true;
6952 tree arg_type, prev_arg_type = NULL_TREE, ret_type;
6953
6954 /* Error out if any argument is passed on the stack. */
6955 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX, fndecl);
6956 args_so_far = pack_cumulative_args (&args_so_far_v);
6957 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
6958 {
6959 rtx arg_rtx;
6960 machine_mode arg_mode = TYPE_MODE (arg_type);
6961
6962 prev_arg_type = arg_type;
6963 if (VOID_TYPE_P (arg_type))
6964 continue;
6965
6966 if (!first_param)
6967 arm_function_arg_advance (args_so_far, arg_mode, arg_type, true);
6968 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type, true);
6969 if (!arg_rtx
6970 || arm_arg_partial_bytes (args_so_far, arg_mode, arg_type, true))
6971 {
6972 error ("%qE attribute not available to functions with arguments "
6973 "passed on the stack", name);
6974 return true;
6975 }
6976 first_param = false;
6977 }
6978
6979 /* Error out for variadic functions since we cannot control how many
6980 arguments will be passed and thus stack could be used. stdarg_p () is not
6981 used for the checking to avoid browsing arguments twice. */
6982 if (prev_arg_type != NULL_TREE && !VOID_TYPE_P (prev_arg_type))
6983 {
6984 error ("%qE attribute not available to functions with variable number "
6985 "of arguments", name);
6986 return true;
6987 }
6988
6989 /* Error out if return value is passed on the stack. */
6990 ret_type = TREE_TYPE (fntype);
6991 if (arm_return_in_memory (ret_type, fntype))
6992 {
6993 error ("%qE attribute not available to functions that return value on "
6994 "the stack", name);
6995 return true;
6996 }
6997 return false;
6998 }
6999
7000 /* Called upon detection of the use of the cmse_nonsecure_entry attribute, this
7001 function will check whether the attribute is allowed here and will add the
7002 attribute to the function declaration tree or otherwise issue a warning. */
7003
7004 static tree
7005 arm_handle_cmse_nonsecure_entry (tree *node, tree name,
7006 tree /* args */,
7007 int /* flags */,
7008 bool *no_add_attrs)
7009 {
7010 tree fndecl;
7011
7012 if (!use_cmse)
7013 {
7014 *no_add_attrs = true;
7015 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7016 name);
7017 return NULL_TREE;
7018 }
7019
7020 /* Ignore attribute for function types. */
7021 if (TREE_CODE (*node) != FUNCTION_DECL)
7022 {
7023 warning (OPT_Wattributes, "%qE attribute only applies to functions",
7024 name);
7025 *no_add_attrs = true;
7026 return NULL_TREE;
7027 }
7028
7029 fndecl = *node;
7030
7031 /* Warn for static linkage functions. */
7032 if (!TREE_PUBLIC (fndecl))
7033 {
7034 warning (OPT_Wattributes, "%qE attribute has no effect on functions "
7035 "with static linkage", name);
7036 *no_add_attrs = true;
7037 return NULL_TREE;
7038 }
7039
7040 *no_add_attrs |= cmse_func_args_or_return_in_stack (fndecl, name,
7041 TREE_TYPE (fndecl));
7042 return NULL_TREE;
7043 }
7044
7045
7046 /* Called upon detection of the use of the cmse_nonsecure_call attribute, this
7047 function will check whether the attribute is allowed here and will add the
7048 attribute to the function type tree or otherwise issue a diagnostic. The
7049 reason we check this at declaration time is to only allow the use of the
7050 attribute with declarations of function pointers and not function
7051 declarations. This function checks NODE is of the expected type and issues
7052 diagnostics otherwise using NAME. If it is not of the expected type
7053 *NO_ADD_ATTRS will be set to true. */
7054
7055 static tree
7056 arm_handle_cmse_nonsecure_call (tree *node, tree name,
7057 tree /* args */,
7058 int /* flags */,
7059 bool *no_add_attrs)
7060 {
7061 tree decl = NULL_TREE, fntype = NULL_TREE;
7062 tree type;
7063
7064 if (!use_cmse)
7065 {
7066 *no_add_attrs = true;
7067 warning (OPT_Wattributes, "%qE attribute ignored without -mcmse option.",
7068 name);
7069 return NULL_TREE;
7070 }
7071
7072 if (TREE_CODE (*node) == VAR_DECL || TREE_CODE (*node) == TYPE_DECL)
7073 {
7074 decl = *node;
7075 fntype = TREE_TYPE (decl);
7076 }
7077
7078 while (fntype != NULL_TREE && TREE_CODE (fntype) == POINTER_TYPE)
7079 fntype = TREE_TYPE (fntype);
7080
7081 if (!decl || TREE_CODE (fntype) != FUNCTION_TYPE)
7082 {
7083 warning (OPT_Wattributes, "%qE attribute only applies to base type of a "
7084 "function pointer", name);
7085 *no_add_attrs = true;
7086 return NULL_TREE;
7087 }
7088
7089 *no_add_attrs |= cmse_func_args_or_return_in_stack (NULL, name, fntype);
7090
7091 if (*no_add_attrs)
7092 return NULL_TREE;
7093
7094 /* Prevent trees being shared among function types with and without
7095 cmse_nonsecure_call attribute. */
7096 type = TREE_TYPE (decl);
7097
7098 type = build_distinct_type_copy (type);
7099 TREE_TYPE (decl) = type;
7100 fntype = type;
7101
7102 while (TREE_CODE (fntype) != FUNCTION_TYPE)
7103 {
7104 type = fntype;
7105 fntype = TREE_TYPE (fntype);
7106 fntype = build_distinct_type_copy (fntype);
7107 TREE_TYPE (type) = fntype;
7108 }
7109
7110 /* Construct a type attribute and add it to the function type. */
7111 tree attrs = tree_cons (get_identifier ("cmse_nonsecure_call"), NULL_TREE,
7112 TYPE_ATTRIBUTES (fntype));
7113 TYPE_ATTRIBUTES (fntype) = attrs;
7114 return NULL_TREE;
7115 }
7116
7117 /* Return 0 if the attributes for two types are incompatible, 1 if they
7118 are compatible, and 2 if they are nearly compatible (which causes a
7119 warning to be generated). */
7120 static int
7121 arm_comp_type_attributes (const_tree type1, const_tree type2)
7122 {
7123 int l1, l2, s1, s2;
7124
7125 /* Check for mismatch of non-default calling convention. */
7126 if (TREE_CODE (type1) != FUNCTION_TYPE)
7127 return 1;
7128
7129 /* Check for mismatched call attributes. */
7130 l1 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type1)) != NULL;
7131 l2 = lookup_attribute ("long_call", TYPE_ATTRIBUTES (type2)) != NULL;
7132 s1 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type1)) != NULL;
7133 s2 = lookup_attribute ("short_call", TYPE_ATTRIBUTES (type2)) != NULL;
7134
7135 /* Only bother to check if an attribute is defined. */
7136 if (l1 | l2 | s1 | s2)
7137 {
7138 /* If one type has an attribute, the other must have the same attribute. */
7139 if ((l1 != l2) || (s1 != s2))
7140 return 0;
7141
7142 /* Disallow mixed attributes. */
7143 if ((l1 & s2) || (l2 & s1))
7144 return 0;
7145 }
7146
7147 /* Check for mismatched ISR attribute. */
7148 l1 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type1)) != NULL;
7149 if (! l1)
7150 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type1)) != NULL;
7151 l2 = lookup_attribute ("isr", TYPE_ATTRIBUTES (type2)) != NULL;
7152 if (! l2)
7153 l1 = lookup_attribute ("interrupt", TYPE_ATTRIBUTES (type2)) != NULL;
7154 if (l1 != l2)
7155 return 0;
7156
7157 l1 = lookup_attribute ("cmse_nonsecure_call",
7158 TYPE_ATTRIBUTES (type1)) != NULL;
7159 l2 = lookup_attribute ("cmse_nonsecure_call",
7160 TYPE_ATTRIBUTES (type2)) != NULL;
7161
7162 if (l1 != l2)
7163 return 0;
7164
7165 return 1;
7166 }
7167
7168 /* Assigns default attributes to newly defined type. This is used to
7169 set short_call/long_call attributes for function types of
7170 functions defined inside corresponding #pragma scopes. */
7171 static void
7172 arm_set_default_type_attributes (tree type)
7173 {
7174 /* Add __attribute__ ((long_call)) to all functions, when
7175 inside #pragma long_calls or __attribute__ ((short_call)),
7176 when inside #pragma no_long_calls. */
7177 if (TREE_CODE (type) == FUNCTION_TYPE || TREE_CODE (type) == METHOD_TYPE)
7178 {
7179 tree type_attr_list, attr_name;
7180 type_attr_list = TYPE_ATTRIBUTES (type);
7181
7182 if (arm_pragma_long_calls == LONG)
7183 attr_name = get_identifier ("long_call");
7184 else if (arm_pragma_long_calls == SHORT)
7185 attr_name = get_identifier ("short_call");
7186 else
7187 return;
7188
7189 type_attr_list = tree_cons (attr_name, NULL_TREE, type_attr_list);
7190 TYPE_ATTRIBUTES (type) = type_attr_list;
7191 }
7192 }
7193 \f
7194 /* Return true if DECL is known to be linked into section SECTION. */
7195
7196 static bool
7197 arm_function_in_section_p (tree decl, section *section)
7198 {
7199 /* We can only be certain about the prevailing symbol definition. */
7200 if (!decl_binds_to_current_def_p (decl))
7201 return false;
7202
7203 /* If DECL_SECTION_NAME is set, assume it is trustworthy. */
7204 if (!DECL_SECTION_NAME (decl))
7205 {
7206 /* Make sure that we will not create a unique section for DECL. */
7207 if (flag_function_sections || DECL_COMDAT_GROUP (decl))
7208 return false;
7209 }
7210
7211 return function_section (decl) == section;
7212 }
7213
7214 /* Return nonzero if a 32-bit "long_call" should be generated for
7215 a call from the current function to DECL. We generate a long_call
7216 if the function:
7217
7218 a. has an __attribute__((long call))
7219 or b. is within the scope of a #pragma long_calls
7220 or c. the -mlong-calls command line switch has been specified
7221
7222 However we do not generate a long call if the function:
7223
7224 d. has an __attribute__ ((short_call))
7225 or e. is inside the scope of a #pragma no_long_calls
7226 or f. is defined in the same section as the current function. */
7227
7228 bool
7229 arm_is_long_call_p (tree decl)
7230 {
7231 tree attrs;
7232
7233 if (!decl)
7234 return TARGET_LONG_CALLS;
7235
7236 attrs = TYPE_ATTRIBUTES (TREE_TYPE (decl));
7237 if (lookup_attribute ("short_call", attrs))
7238 return false;
7239
7240 /* For "f", be conservative, and only cater for cases in which the
7241 whole of the current function is placed in the same section. */
7242 if (!flag_reorder_blocks_and_partition
7243 && TREE_CODE (decl) == FUNCTION_DECL
7244 && arm_function_in_section_p (decl, current_function_section ()))
7245 return false;
7246
7247 if (lookup_attribute ("long_call", attrs))
7248 return true;
7249
7250 return TARGET_LONG_CALLS;
7251 }
7252
7253 /* Return nonzero if it is ok to make a tail-call to DECL. */
7254 static bool
7255 arm_function_ok_for_sibcall (tree decl, tree exp)
7256 {
7257 unsigned long func_type;
7258
7259 if (cfun->machine->sibcall_blocked)
7260 return false;
7261
7262 /* Never tailcall something if we are generating code for Thumb-1. */
7263 if (TARGET_THUMB1)
7264 return false;
7265
7266 /* The PIC register is live on entry to VxWorks PLT entries, so we
7267 must make the call before restoring the PIC register. */
7268 if (TARGET_VXWORKS_RTP && flag_pic && decl && !targetm.binds_local_p (decl))
7269 return false;
7270
7271 /* ??? Cannot tail-call to long calls with APCS frame and VFP, because IP
7272 may be used both as target of the call and base register for restoring
7273 the VFP registers */
7274 if (TARGET_APCS_FRAME && TARGET_ARM
7275 && TARGET_HARD_FLOAT
7276 && decl && arm_is_long_call_p (decl))
7277 return false;
7278
7279 /* If we are interworking and the function is not declared static
7280 then we can't tail-call it unless we know that it exists in this
7281 compilation unit (since it might be a Thumb routine). */
7282 if (TARGET_INTERWORK && decl && TREE_PUBLIC (decl)
7283 && !TREE_ASM_WRITTEN (decl))
7284 return false;
7285
7286 func_type = arm_current_func_type ();
7287 /* Never tailcall from an ISR routine - it needs a special exit sequence. */
7288 if (IS_INTERRUPT (func_type))
7289 return false;
7290
7291 /* ARMv8-M non-secure entry functions need to return with bxns which is only
7292 generated for entry functions themselves. */
7293 if (IS_CMSE_ENTRY (arm_current_func_type ()))
7294 return false;
7295
7296 /* We do not allow ARMv8-M non-secure calls to be turned into sibling calls,
7297 this would complicate matters for later code generation. */
7298 if (TREE_CODE (exp) == CALL_EXPR)
7299 {
7300 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7301 if (lookup_attribute ("cmse_nonsecure_call", TYPE_ATTRIBUTES (fntype)))
7302 return false;
7303 }
7304
7305 if (!VOID_TYPE_P (TREE_TYPE (DECL_RESULT (cfun->decl))))
7306 {
7307 /* Check that the return value locations are the same. For
7308 example that we aren't returning a value from the sibling in
7309 a VFP register but then need to transfer it to a core
7310 register. */
7311 rtx a, b;
7312 tree decl_or_type = decl;
7313
7314 /* If it is an indirect function pointer, get the function type. */
7315 if (!decl)
7316 decl_or_type = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7317
7318 a = arm_function_value (TREE_TYPE (exp), decl_or_type, false);
7319 b = arm_function_value (TREE_TYPE (DECL_RESULT (cfun->decl)),
7320 cfun->decl, false);
7321 if (!rtx_equal_p (a, b))
7322 return false;
7323 }
7324
7325 /* Never tailcall if function may be called with a misaligned SP. */
7326 if (IS_STACKALIGN (func_type))
7327 return false;
7328
7329 /* The AAPCS says that, on bare-metal, calls to unresolved weak
7330 references should become a NOP. Don't convert such calls into
7331 sibling calls. */
7332 if (TARGET_AAPCS_BASED
7333 && arm_abi == ARM_ABI_AAPCS
7334 && decl
7335 && DECL_WEAK (decl))
7336 return false;
7337
7338 /* We cannot do a tailcall for an indirect call by descriptor if all the
7339 argument registers are used because the only register left to load the
7340 address is IP and it will already contain the static chain. */
7341 if (!decl && CALL_EXPR_BY_DESCRIPTOR (exp) && !flag_trampolines)
7342 {
7343 tree fntype = TREE_TYPE (TREE_TYPE (CALL_EXPR_FN (exp)));
7344 CUMULATIVE_ARGS cum;
7345 cumulative_args_t cum_v;
7346
7347 arm_init_cumulative_args (&cum, fntype, NULL_RTX, NULL_TREE);
7348 cum_v = pack_cumulative_args (&cum);
7349
7350 for (tree t = TYPE_ARG_TYPES (fntype); t; t = TREE_CHAIN (t))
7351 {
7352 tree type = TREE_VALUE (t);
7353 if (!VOID_TYPE_P (type))
7354 arm_function_arg_advance (cum_v, TYPE_MODE (type), type, true);
7355 }
7356
7357 if (!arm_function_arg (cum_v, SImode, integer_type_node, true))
7358 return false;
7359 }
7360
7361 /* Everything else is ok. */
7362 return true;
7363 }
7364
7365 \f
7366 /* Addressing mode support functions. */
7367
7368 /* Return nonzero if X is a legitimate immediate operand when compiling
7369 for PIC. We know that X satisfies CONSTANT_P and flag_pic is true. */
7370 int
7371 legitimate_pic_operand_p (rtx x)
7372 {
7373 if (GET_CODE (x) == SYMBOL_REF
7374 || (GET_CODE (x) == CONST
7375 && GET_CODE (XEXP (x, 0)) == PLUS
7376 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF))
7377 return 0;
7378
7379 return 1;
7380 }
7381
7382 /* Record that the current function needs a PIC register. If PIC_REG is null,
7383 a new pseudo is allocated as PIC register, otherwise PIC_REG is used. In
7384 both case cfun->machine->pic_reg is initialized if we have not already done
7385 so. COMPUTE_NOW decide whether and where to set the PIC register. If true,
7386 PIC register is reloaded in the current position of the instruction stream
7387 irregardless of whether it was loaded before. Otherwise, it is only loaded
7388 if not already done so (crtl->uses_pic_offset_table is null). Note that
7389 nonnull PIC_REG is only supported iff COMPUTE_NOW is true and null PIC_REG
7390 is only supported iff COMPUTE_NOW is false. */
7391
7392 static void
7393 require_pic_register (rtx pic_reg, bool compute_now)
7394 {
7395 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7396
7397 /* A lot of the logic here is made obscure by the fact that this
7398 routine gets called as part of the rtx cost estimation process.
7399 We don't want those calls to affect any assumptions about the real
7400 function; and further, we can't call entry_of_function() until we
7401 start the real expansion process. */
7402 if (!crtl->uses_pic_offset_table || compute_now)
7403 {
7404 gcc_assert (can_create_pseudo_p ()
7405 || (pic_reg != NULL_RTX
7406 && REG_P (pic_reg)
7407 && GET_MODE (pic_reg) == Pmode));
7408 if (arm_pic_register != INVALID_REGNUM
7409 && !compute_now
7410 && !(TARGET_THUMB1 && arm_pic_register > LAST_LO_REGNUM))
7411 {
7412 if (!cfun->machine->pic_reg)
7413 cfun->machine->pic_reg = gen_rtx_REG (Pmode, arm_pic_register);
7414
7415 /* Play games to avoid marking the function as needing pic
7416 if we are being called as part of the cost-estimation
7417 process. */
7418 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7419 crtl->uses_pic_offset_table = 1;
7420 }
7421 else
7422 {
7423 rtx_insn *seq, *insn;
7424
7425 if (pic_reg == NULL_RTX)
7426 pic_reg = gen_reg_rtx (Pmode);
7427 if (!cfun->machine->pic_reg)
7428 cfun->machine->pic_reg = pic_reg;
7429
7430 /* Play games to avoid marking the function as needing pic
7431 if we are being called as part of the cost-estimation
7432 process. */
7433 if (current_ir_type () != IR_GIMPLE || currently_expanding_to_rtl)
7434 {
7435 crtl->uses_pic_offset_table = 1;
7436 start_sequence ();
7437
7438 if (TARGET_THUMB1 && arm_pic_register != INVALID_REGNUM
7439 && arm_pic_register > LAST_LO_REGNUM
7440 && !compute_now)
7441 emit_move_insn (cfun->machine->pic_reg,
7442 gen_rtx_REG (Pmode, arm_pic_register));
7443 else
7444 arm_load_pic_register (0UL, pic_reg);
7445
7446 seq = get_insns ();
7447 end_sequence ();
7448
7449 for (insn = seq; insn; insn = NEXT_INSN (insn))
7450 if (INSN_P (insn))
7451 INSN_LOCATION (insn) = prologue_location;
7452
7453 /* We can be called during expansion of PHI nodes, where
7454 we can't yet emit instructions directly in the final
7455 insn stream. Queue the insns on the entry edge, they will
7456 be committed after everything else is expanded. */
7457 if (currently_expanding_to_rtl)
7458 insert_insn_on_edge (seq,
7459 single_succ_edge
7460 (ENTRY_BLOCK_PTR_FOR_FN (cfun)));
7461 else
7462 emit_insn (seq);
7463 }
7464 }
7465 }
7466 }
7467
7468 /* Legitimize PIC load to ORIG into REG. If REG is NULL, a new pseudo is
7469 created to hold the result of the load. If not NULL, PIC_REG indicates
7470 which register to use as PIC register, otherwise it is decided by register
7471 allocator. COMPUTE_NOW forces the PIC register to be loaded at the current
7472 location in the instruction stream, irregardless of whether it was loaded
7473 previously. Note that nonnull PIC_REG is only supported iff COMPUTE_NOW is
7474 true and null PIC_REG is only supported iff COMPUTE_NOW is false.
7475
7476 Returns the register REG into which the PIC load is performed. */
7477
7478 rtx
7479 legitimize_pic_address (rtx orig, machine_mode mode, rtx reg, rtx pic_reg,
7480 bool compute_now)
7481 {
7482 gcc_assert (compute_now == (pic_reg != NULL_RTX));
7483
7484 if (GET_CODE (orig) == SYMBOL_REF
7485 || GET_CODE (orig) == LABEL_REF)
7486 {
7487 if (reg == 0)
7488 {
7489 gcc_assert (can_create_pseudo_p ());
7490 reg = gen_reg_rtx (Pmode);
7491 }
7492
7493 /* VxWorks does not impose a fixed gap between segments; the run-time
7494 gap can be different from the object-file gap. We therefore can't
7495 use GOTOFF unless we are absolutely sure that the symbol is in the
7496 same segment as the GOT. Unfortunately, the flexibility of linker
7497 scripts means that we can't be sure of that in general, so assume
7498 that GOTOFF is never valid on VxWorks. */
7499 /* References to weak symbols cannot be resolved locally: they
7500 may be overridden by a non-weak definition at link time. */
7501 rtx_insn *insn;
7502 if ((GET_CODE (orig) == LABEL_REF
7503 || (GET_CODE (orig) == SYMBOL_REF
7504 && SYMBOL_REF_LOCAL_P (orig)
7505 && (SYMBOL_REF_DECL (orig)
7506 ? !DECL_WEAK (SYMBOL_REF_DECL (orig)) : 1)))
7507 && NEED_GOT_RELOC
7508 && arm_pic_data_is_text_relative)
7509 insn = arm_pic_static_addr (orig, reg);
7510 else
7511 {
7512 rtx pat;
7513 rtx mem;
7514
7515 /* If this function doesn't have a pic register, create one now. */
7516 require_pic_register (pic_reg, compute_now);
7517
7518 if (pic_reg == NULL_RTX)
7519 pic_reg = cfun->machine->pic_reg;
7520
7521 pat = gen_calculate_pic_address (reg, pic_reg, orig);
7522
7523 /* Make the MEM as close to a constant as possible. */
7524 mem = SET_SRC (pat);
7525 gcc_assert (MEM_P (mem) && !MEM_VOLATILE_P (mem));
7526 MEM_READONLY_P (mem) = 1;
7527 MEM_NOTRAP_P (mem) = 1;
7528
7529 insn = emit_insn (pat);
7530 }
7531
7532 /* Put a REG_EQUAL note on this insn, so that it can be optimized
7533 by loop. */
7534 set_unique_reg_note (insn, REG_EQUAL, orig);
7535
7536 return reg;
7537 }
7538 else if (GET_CODE (orig) == CONST)
7539 {
7540 rtx base, offset;
7541
7542 if (GET_CODE (XEXP (orig, 0)) == PLUS
7543 && XEXP (XEXP (orig, 0), 0) == cfun->machine->pic_reg)
7544 return orig;
7545
7546 /* Handle the case where we have: const (UNSPEC_TLS). */
7547 if (GET_CODE (XEXP (orig, 0)) == UNSPEC
7548 && XINT (XEXP (orig, 0), 1) == UNSPEC_TLS)
7549 return orig;
7550
7551 /* Handle the case where we have:
7552 const (plus (UNSPEC_TLS) (ADDEND)). The ADDEND must be a
7553 CONST_INT. */
7554 if (GET_CODE (XEXP (orig, 0)) == PLUS
7555 && GET_CODE (XEXP (XEXP (orig, 0), 0)) == UNSPEC
7556 && XINT (XEXP (XEXP (orig, 0), 0), 1) == UNSPEC_TLS)
7557 {
7558 gcc_assert (CONST_INT_P (XEXP (XEXP (orig, 0), 1)));
7559 return orig;
7560 }
7561
7562 if (reg == 0)
7563 {
7564 gcc_assert (can_create_pseudo_p ());
7565 reg = gen_reg_rtx (Pmode);
7566 }
7567
7568 gcc_assert (GET_CODE (XEXP (orig, 0)) == PLUS);
7569
7570 base = legitimize_pic_address (XEXP (XEXP (orig, 0), 0), Pmode, reg,
7571 pic_reg, compute_now);
7572 offset = legitimize_pic_address (XEXP (XEXP (orig, 0), 1), Pmode,
7573 base == reg ? 0 : reg, pic_reg,
7574 compute_now);
7575
7576 if (CONST_INT_P (offset))
7577 {
7578 /* The base register doesn't really matter, we only want to
7579 test the index for the appropriate mode. */
7580 if (!arm_legitimate_index_p (mode, offset, SET, 0))
7581 {
7582 gcc_assert (can_create_pseudo_p ());
7583 offset = force_reg (Pmode, offset);
7584 }
7585
7586 if (CONST_INT_P (offset))
7587 return plus_constant (Pmode, base, INTVAL (offset));
7588 }
7589
7590 if (GET_MODE_SIZE (mode) > 4
7591 && (GET_MODE_CLASS (mode) == MODE_INT
7592 || TARGET_SOFT_FLOAT))
7593 {
7594 emit_insn (gen_addsi3 (reg, base, offset));
7595 return reg;
7596 }
7597
7598 return gen_rtx_PLUS (Pmode, base, offset);
7599 }
7600
7601 return orig;
7602 }
7603
7604
7605 /* Find a spare register to use during the prolog of a function. */
7606
7607 static int
7608 thumb_find_work_register (unsigned long pushed_regs_mask)
7609 {
7610 int reg;
7611
7612 /* Check the argument registers first as these are call-used. The
7613 register allocation order means that sometimes r3 might be used
7614 but earlier argument registers might not, so check them all. */
7615 for (reg = LAST_ARG_REGNUM; reg >= 0; reg --)
7616 if (!df_regs_ever_live_p (reg))
7617 return reg;
7618
7619 /* Before going on to check the call-saved registers we can try a couple
7620 more ways of deducing that r3 is available. The first is when we are
7621 pushing anonymous arguments onto the stack and we have less than 4
7622 registers worth of fixed arguments(*). In this case r3 will be part of
7623 the variable argument list and so we can be sure that it will be
7624 pushed right at the start of the function. Hence it will be available
7625 for the rest of the prologue.
7626 (*): ie crtl->args.pretend_args_size is greater than 0. */
7627 if (cfun->machine->uses_anonymous_args
7628 && crtl->args.pretend_args_size > 0)
7629 return LAST_ARG_REGNUM;
7630
7631 /* The other case is when we have fixed arguments but less than 4 registers
7632 worth. In this case r3 might be used in the body of the function, but
7633 it is not being used to convey an argument into the function. In theory
7634 we could just check crtl->args.size to see how many bytes are
7635 being passed in argument registers, but it seems that it is unreliable.
7636 Sometimes it will have the value 0 when in fact arguments are being
7637 passed. (See testcase execute/20021111-1.c for an example). So we also
7638 check the args_info.nregs field as well. The problem with this field is
7639 that it makes no allowances for arguments that are passed to the
7640 function but which are not used. Hence we could miss an opportunity
7641 when a function has an unused argument in r3. But it is better to be
7642 safe than to be sorry. */
7643 if (! cfun->machine->uses_anonymous_args
7644 && crtl->args.size >= 0
7645 && crtl->args.size <= (LAST_ARG_REGNUM * UNITS_PER_WORD)
7646 && (TARGET_AAPCS_BASED
7647 ? crtl->args.info.aapcs_ncrn < 4
7648 : crtl->args.info.nregs < 4))
7649 return LAST_ARG_REGNUM;
7650
7651 /* Otherwise look for a call-saved register that is going to be pushed. */
7652 for (reg = LAST_LO_REGNUM; reg > LAST_ARG_REGNUM; reg --)
7653 if (pushed_regs_mask & (1 << reg))
7654 return reg;
7655
7656 if (TARGET_THUMB2)
7657 {
7658 /* Thumb-2 can use high regs. */
7659 for (reg = FIRST_HI_REGNUM; reg < 15; reg ++)
7660 if (pushed_regs_mask & (1 << reg))
7661 return reg;
7662 }
7663 /* Something went wrong - thumb_compute_save_reg_mask()
7664 should have arranged for a suitable register to be pushed. */
7665 gcc_unreachable ();
7666 }
7667
7668 static GTY(()) int pic_labelno;
7669
7670 /* Generate code to load the PIC register. In thumb mode SCRATCH is a
7671 low register. */
7672
7673 void
7674 arm_load_pic_register (unsigned long saved_regs ATTRIBUTE_UNUSED, rtx pic_reg)
7675 {
7676 rtx l1, labelno, pic_tmp, pic_rtx;
7677
7678 if (crtl->uses_pic_offset_table == 0 || TARGET_SINGLE_PIC_BASE)
7679 return;
7680
7681 gcc_assert (flag_pic);
7682
7683 if (pic_reg == NULL_RTX)
7684 pic_reg = cfun->machine->pic_reg;
7685 if (TARGET_VXWORKS_RTP)
7686 {
7687 pic_rtx = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_BASE);
7688 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7689 emit_insn (gen_pic_load_addr_32bit (pic_reg, pic_rtx));
7690
7691 emit_insn (gen_rtx_SET (pic_reg, gen_rtx_MEM (Pmode, pic_reg)));
7692
7693 pic_tmp = gen_rtx_SYMBOL_REF (Pmode, VXWORKS_GOTT_INDEX);
7694 emit_insn (gen_pic_offset_arm (pic_reg, pic_reg, pic_tmp));
7695 }
7696 else
7697 {
7698 /* We use an UNSPEC rather than a LABEL_REF because this label
7699 never appears in the code stream. */
7700
7701 labelno = GEN_INT (pic_labelno++);
7702 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7703 l1 = gen_rtx_CONST (VOIDmode, l1);
7704
7705 /* On the ARM the PC register contains 'dot + 8' at the time of the
7706 addition, on the Thumb it is 'dot + 4'. */
7707 pic_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7708 pic_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, pic_rtx),
7709 UNSPEC_GOTSYM_OFF);
7710 pic_rtx = gen_rtx_CONST (Pmode, pic_rtx);
7711
7712 if (TARGET_32BIT)
7713 {
7714 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7715 }
7716 else /* TARGET_THUMB1 */
7717 {
7718 if (arm_pic_register != INVALID_REGNUM
7719 && REGNO (pic_reg) > LAST_LO_REGNUM)
7720 {
7721 /* We will have pushed the pic register, so we should always be
7722 able to find a work register. */
7723 pic_tmp = gen_rtx_REG (SImode,
7724 thumb_find_work_register (saved_regs));
7725 emit_insn (gen_pic_load_addr_thumb1 (pic_tmp, pic_rtx));
7726 emit_insn (gen_movsi (pic_offset_table_rtx, pic_tmp));
7727 emit_insn (gen_pic_add_dot_plus_four (pic_reg, pic_reg, labelno));
7728 }
7729 else if (arm_pic_register != INVALID_REGNUM
7730 && arm_pic_register > LAST_LO_REGNUM
7731 && REGNO (pic_reg) <= LAST_LO_REGNUM)
7732 {
7733 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7734 emit_move_insn (gen_rtx_REG (Pmode, arm_pic_register), pic_reg);
7735 emit_use (gen_rtx_REG (Pmode, arm_pic_register));
7736 }
7737 else
7738 emit_insn (gen_pic_load_addr_unified (pic_reg, pic_rtx, labelno));
7739 }
7740 }
7741
7742 /* Need to emit this whether or not we obey regdecls,
7743 since setjmp/longjmp can cause life info to screw up. */
7744 emit_use (pic_reg);
7745 }
7746
7747 /* Generate code to load the address of a static var when flag_pic is set. */
7748 static rtx_insn *
7749 arm_pic_static_addr (rtx orig, rtx reg)
7750 {
7751 rtx l1, labelno, offset_rtx;
7752
7753 gcc_assert (flag_pic);
7754
7755 /* We use an UNSPEC rather than a LABEL_REF because this label
7756 never appears in the code stream. */
7757 labelno = GEN_INT (pic_labelno++);
7758 l1 = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
7759 l1 = gen_rtx_CONST (VOIDmode, l1);
7760
7761 /* On the ARM the PC register contains 'dot + 8' at the time of the
7762 addition, on the Thumb it is 'dot + 4'. */
7763 offset_rtx = plus_constant (Pmode, l1, TARGET_ARM ? 8 : 4);
7764 offset_rtx = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, orig, offset_rtx),
7765 UNSPEC_SYMBOL_OFFSET);
7766 offset_rtx = gen_rtx_CONST (Pmode, offset_rtx);
7767
7768 return emit_insn (gen_pic_load_addr_unified (reg, offset_rtx, labelno));
7769 }
7770
7771 /* Return nonzero if X is valid as an ARM state addressing register. */
7772 static int
7773 arm_address_register_rtx_p (rtx x, int strict_p)
7774 {
7775 int regno;
7776
7777 if (!REG_P (x))
7778 return 0;
7779
7780 regno = REGNO (x);
7781
7782 if (strict_p)
7783 return ARM_REGNO_OK_FOR_BASE_P (regno);
7784
7785 return (regno <= LAST_ARM_REGNUM
7786 || regno >= FIRST_PSEUDO_REGISTER
7787 || regno == FRAME_POINTER_REGNUM
7788 || regno == ARG_POINTER_REGNUM);
7789 }
7790
7791 /* Return TRUE if this rtx is the difference of a symbol and a label,
7792 and will reduce to a PC-relative relocation in the object file.
7793 Expressions like this can be left alone when generating PIC, rather
7794 than forced through the GOT. */
7795 static int
7796 pcrel_constant_p (rtx x)
7797 {
7798 if (GET_CODE (x) == MINUS)
7799 return symbol_mentioned_p (XEXP (x, 0)) && label_mentioned_p (XEXP (x, 1));
7800
7801 return FALSE;
7802 }
7803
7804 /* Return true if X will surely end up in an index register after next
7805 splitting pass. */
7806 static bool
7807 will_be_in_index_register (const_rtx x)
7808 {
7809 /* arm.md: calculate_pic_address will split this into a register. */
7810 return GET_CODE (x) == UNSPEC && (XINT (x, 1) == UNSPEC_PIC_SYM);
7811 }
7812
7813 /* Return nonzero if X is a valid ARM state address operand. */
7814 int
7815 arm_legitimate_address_outer_p (machine_mode mode, rtx x, RTX_CODE outer,
7816 int strict_p)
7817 {
7818 bool use_ldrd;
7819 enum rtx_code code = GET_CODE (x);
7820
7821 if (arm_address_register_rtx_p (x, strict_p))
7822 return 1;
7823
7824 use_ldrd = (TARGET_LDRD
7825 && (mode == DImode || mode == DFmode));
7826
7827 if (code == POST_INC || code == PRE_DEC
7828 || ((code == PRE_INC || code == POST_DEC)
7829 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7830 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7831
7832 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7833 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7834 && GET_CODE (XEXP (x, 1)) == PLUS
7835 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7836 {
7837 rtx addend = XEXP (XEXP (x, 1), 1);
7838
7839 /* Don't allow ldrd post increment by register because it's hard
7840 to fixup invalid register choices. */
7841 if (use_ldrd
7842 && GET_CODE (x) == POST_MODIFY
7843 && REG_P (addend))
7844 return 0;
7845
7846 return ((use_ldrd || GET_MODE_SIZE (mode) <= 4)
7847 && arm_legitimate_index_p (mode, addend, outer, strict_p));
7848 }
7849
7850 /* After reload constants split into minipools will have addresses
7851 from a LABEL_REF. */
7852 else if (reload_completed
7853 && (code == LABEL_REF
7854 || (code == CONST
7855 && GET_CODE (XEXP (x, 0)) == PLUS
7856 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7857 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7858 return 1;
7859
7860 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7861 return 0;
7862
7863 else if (code == PLUS)
7864 {
7865 rtx xop0 = XEXP (x, 0);
7866 rtx xop1 = XEXP (x, 1);
7867
7868 return ((arm_address_register_rtx_p (xop0, strict_p)
7869 && ((CONST_INT_P (xop1)
7870 && arm_legitimate_index_p (mode, xop1, outer, strict_p))
7871 || (!strict_p && will_be_in_index_register (xop1))))
7872 || (arm_address_register_rtx_p (xop1, strict_p)
7873 && arm_legitimate_index_p (mode, xop0, outer, strict_p)));
7874 }
7875
7876 #if 0
7877 /* Reload currently can't handle MINUS, so disable this for now */
7878 else if (GET_CODE (x) == MINUS)
7879 {
7880 rtx xop0 = XEXP (x, 0);
7881 rtx xop1 = XEXP (x, 1);
7882
7883 return (arm_address_register_rtx_p (xop0, strict_p)
7884 && arm_legitimate_index_p (mode, xop1, outer, strict_p));
7885 }
7886 #endif
7887
7888 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7889 && code == SYMBOL_REF
7890 && CONSTANT_POOL_ADDRESS_P (x)
7891 && ! (flag_pic
7892 && symbol_mentioned_p (get_pool_constant (x))
7893 && ! pcrel_constant_p (get_pool_constant (x))))
7894 return 1;
7895
7896 return 0;
7897 }
7898
7899 /* Return true if we can avoid creating a constant pool entry for x. */
7900 static bool
7901 can_avoid_literal_pool_for_label_p (rtx x)
7902 {
7903 /* Normally we can assign constant values to target registers without
7904 the help of constant pool. But there are cases we have to use constant
7905 pool like:
7906 1) assign a label to register.
7907 2) sign-extend a 8bit value to 32bit and then assign to register.
7908
7909 Constant pool access in format:
7910 (set (reg r0) (mem (symbol_ref (".LC0"))))
7911 will cause the use of literal pool (later in function arm_reorg).
7912 So here we mark such format as an invalid format, then the compiler
7913 will adjust it into:
7914 (set (reg r0) (symbol_ref (".LC0")))
7915 (set (reg r0) (mem (reg r0))).
7916 No extra register is required, and (mem (reg r0)) won't cause the use
7917 of literal pools. */
7918 if (arm_disable_literal_pool && GET_CODE (x) == SYMBOL_REF
7919 && CONSTANT_POOL_ADDRESS_P (x))
7920 return 1;
7921 return 0;
7922 }
7923
7924
7925 /* Return nonzero if X is a valid Thumb-2 address operand. */
7926 static int
7927 thumb2_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
7928 {
7929 bool use_ldrd;
7930 enum rtx_code code = GET_CODE (x);
7931
7932 if (arm_address_register_rtx_p (x, strict_p))
7933 return 1;
7934
7935 use_ldrd = (TARGET_LDRD
7936 && (mode == DImode || mode == DFmode));
7937
7938 if (code == POST_INC || code == PRE_DEC
7939 || ((code == PRE_INC || code == POST_DEC)
7940 && (use_ldrd || GET_MODE_SIZE (mode) <= 4)))
7941 return arm_address_register_rtx_p (XEXP (x, 0), strict_p);
7942
7943 else if ((code == POST_MODIFY || code == PRE_MODIFY)
7944 && arm_address_register_rtx_p (XEXP (x, 0), strict_p)
7945 && GET_CODE (XEXP (x, 1)) == PLUS
7946 && rtx_equal_p (XEXP (XEXP (x, 1), 0), XEXP (x, 0)))
7947 {
7948 /* Thumb-2 only has autoincrement by constant. */
7949 rtx addend = XEXP (XEXP (x, 1), 1);
7950 HOST_WIDE_INT offset;
7951
7952 if (!CONST_INT_P (addend))
7953 return 0;
7954
7955 offset = INTVAL(addend);
7956 if (GET_MODE_SIZE (mode) <= 4)
7957 return (offset > -256 && offset < 256);
7958
7959 return (use_ldrd && offset > -1024 && offset < 1024
7960 && (offset & 3) == 0);
7961 }
7962
7963 /* After reload constants split into minipools will have addresses
7964 from a LABEL_REF. */
7965 else if (reload_completed
7966 && (code == LABEL_REF
7967 || (code == CONST
7968 && GET_CODE (XEXP (x, 0)) == PLUS
7969 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
7970 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
7971 return 1;
7972
7973 else if (mode == TImode || (TARGET_NEON && VALID_NEON_STRUCT_MODE (mode)))
7974 return 0;
7975
7976 else if (code == PLUS)
7977 {
7978 rtx xop0 = XEXP (x, 0);
7979 rtx xop1 = XEXP (x, 1);
7980
7981 return ((arm_address_register_rtx_p (xop0, strict_p)
7982 && (thumb2_legitimate_index_p (mode, xop1, strict_p)
7983 || (!strict_p && will_be_in_index_register (xop1))))
7984 || (arm_address_register_rtx_p (xop1, strict_p)
7985 && thumb2_legitimate_index_p (mode, xop0, strict_p)));
7986 }
7987
7988 else if (can_avoid_literal_pool_for_label_p (x))
7989 return 0;
7990
7991 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
7992 && code == SYMBOL_REF
7993 && CONSTANT_POOL_ADDRESS_P (x)
7994 && ! (flag_pic
7995 && symbol_mentioned_p (get_pool_constant (x))
7996 && ! pcrel_constant_p (get_pool_constant (x))))
7997 return 1;
7998
7999 return 0;
8000 }
8001
8002 /* Return nonzero if INDEX is valid for an address index operand in
8003 ARM state. */
8004 static int
8005 arm_legitimate_index_p (machine_mode mode, rtx index, RTX_CODE outer,
8006 int strict_p)
8007 {
8008 HOST_WIDE_INT range;
8009 enum rtx_code code = GET_CODE (index);
8010
8011 /* Standard coprocessor addressing modes. */
8012 if (TARGET_HARD_FLOAT
8013 && (mode == SFmode || mode == DFmode))
8014 return (code == CONST_INT && INTVAL (index) < 1024
8015 && INTVAL (index) > -1024
8016 && (INTVAL (index) & 3) == 0);
8017
8018 /* For quad modes, we restrict the constant offset to be slightly less
8019 than what the instruction format permits. We do this because for
8020 quad mode moves, we will actually decompose them into two separate
8021 double-mode reads or writes. INDEX must therefore be a valid
8022 (double-mode) offset and so should INDEX+8. */
8023 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8024 return (code == CONST_INT
8025 && INTVAL (index) < 1016
8026 && INTVAL (index) > -1024
8027 && (INTVAL (index) & 3) == 0);
8028
8029 /* We have no such constraint on double mode offsets, so we permit the
8030 full range of the instruction format. */
8031 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8032 return (code == CONST_INT
8033 && INTVAL (index) < 1024
8034 && INTVAL (index) > -1024
8035 && (INTVAL (index) & 3) == 0);
8036
8037 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8038 return (code == CONST_INT
8039 && INTVAL (index) < 1024
8040 && INTVAL (index) > -1024
8041 && (INTVAL (index) & 3) == 0);
8042
8043 if (arm_address_register_rtx_p (index, strict_p)
8044 && (GET_MODE_SIZE (mode) <= 4))
8045 return 1;
8046
8047 if (mode == DImode || mode == DFmode)
8048 {
8049 if (code == CONST_INT)
8050 {
8051 HOST_WIDE_INT val = INTVAL (index);
8052
8053 /* Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8054 If vldr is selected it uses arm_coproc_mem_operand. */
8055 if (TARGET_LDRD)
8056 return val > -256 && val < 256;
8057 else
8058 return val > -4096 && val < 4092;
8059 }
8060
8061 return TARGET_LDRD && arm_address_register_rtx_p (index, strict_p);
8062 }
8063
8064 if (GET_MODE_SIZE (mode) <= 4
8065 && ! (arm_arch4
8066 && (mode == HImode
8067 || mode == HFmode
8068 || (mode == QImode && outer == SIGN_EXTEND))))
8069 {
8070 if (code == MULT)
8071 {
8072 rtx xiop0 = XEXP (index, 0);
8073 rtx xiop1 = XEXP (index, 1);
8074
8075 return ((arm_address_register_rtx_p (xiop0, strict_p)
8076 && power_of_two_operand (xiop1, SImode))
8077 || (arm_address_register_rtx_p (xiop1, strict_p)
8078 && power_of_two_operand (xiop0, SImode)));
8079 }
8080 else if (code == LSHIFTRT || code == ASHIFTRT
8081 || code == ASHIFT || code == ROTATERT)
8082 {
8083 rtx op = XEXP (index, 1);
8084
8085 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8086 && CONST_INT_P (op)
8087 && INTVAL (op) > 0
8088 && INTVAL (op) <= 31);
8089 }
8090 }
8091
8092 /* For ARM v4 we may be doing a sign-extend operation during the
8093 load. */
8094 if (arm_arch4)
8095 {
8096 if (mode == HImode
8097 || mode == HFmode
8098 || (outer == SIGN_EXTEND && mode == QImode))
8099 range = 256;
8100 else
8101 range = 4096;
8102 }
8103 else
8104 range = (mode == HImode || mode == HFmode) ? 4095 : 4096;
8105
8106 return (code == CONST_INT
8107 && INTVAL (index) < range
8108 && INTVAL (index) > -range);
8109 }
8110
8111 /* Return true if OP is a valid index scaling factor for Thumb-2 address
8112 index operand. i.e. 1, 2, 4 or 8. */
8113 static bool
8114 thumb2_index_mul_operand (rtx op)
8115 {
8116 HOST_WIDE_INT val;
8117
8118 if (!CONST_INT_P (op))
8119 return false;
8120
8121 val = INTVAL(op);
8122 return (val == 1 || val == 2 || val == 4 || val == 8);
8123 }
8124
8125 /* Return nonzero if INDEX is a valid Thumb-2 address index operand. */
8126 static int
8127 thumb2_legitimate_index_p (machine_mode mode, rtx index, int strict_p)
8128 {
8129 enum rtx_code code = GET_CODE (index);
8130
8131 /* ??? Combine arm and thumb2 coprocessor addressing modes. */
8132 /* Standard coprocessor addressing modes. */
8133 if (TARGET_HARD_FLOAT
8134 && (mode == SFmode || mode == DFmode))
8135 return (code == CONST_INT && INTVAL (index) < 1024
8136 /* Thumb-2 allows only > -256 index range for it's core register
8137 load/stores. Since we allow SF/DF in core registers, we have
8138 to use the intersection between -256~4096 (core) and -1024~1024
8139 (coprocessor). */
8140 && INTVAL (index) > -256
8141 && (INTVAL (index) & 3) == 0);
8142
8143 if (TARGET_REALLY_IWMMXT && VALID_IWMMXT_REG_MODE (mode))
8144 {
8145 /* For DImode assume values will usually live in core regs
8146 and only allow LDRD addressing modes. */
8147 if (!TARGET_LDRD || mode != DImode)
8148 return (code == CONST_INT
8149 && INTVAL (index) < 1024
8150 && INTVAL (index) > -1024
8151 && (INTVAL (index) & 3) == 0);
8152 }
8153
8154 /* For quad modes, we restrict the constant offset to be slightly less
8155 than what the instruction format permits. We do this because for
8156 quad mode moves, we will actually decompose them into two separate
8157 double-mode reads or writes. INDEX must therefore be a valid
8158 (double-mode) offset and so should INDEX+8. */
8159 if (TARGET_NEON && VALID_NEON_QREG_MODE (mode))
8160 return (code == CONST_INT
8161 && INTVAL (index) < 1016
8162 && INTVAL (index) > -1024
8163 && (INTVAL (index) & 3) == 0);
8164
8165 /* We have no such constraint on double mode offsets, so we permit the
8166 full range of the instruction format. */
8167 if (TARGET_NEON && VALID_NEON_DREG_MODE (mode))
8168 return (code == CONST_INT
8169 && INTVAL (index) < 1024
8170 && INTVAL (index) > -1024
8171 && (INTVAL (index) & 3) == 0);
8172
8173 if (arm_address_register_rtx_p (index, strict_p)
8174 && (GET_MODE_SIZE (mode) <= 4))
8175 return 1;
8176
8177 if (mode == DImode || mode == DFmode)
8178 {
8179 if (code == CONST_INT)
8180 {
8181 HOST_WIDE_INT val = INTVAL (index);
8182 /* Thumb-2 ldrd only has reg+const addressing modes.
8183 Assume we emit ldrd or 2x ldr if !TARGET_LDRD.
8184 If vldr is selected it uses arm_coproc_mem_operand. */
8185 if (TARGET_LDRD)
8186 return IN_RANGE (val, -1020, 1020) && (val & 3) == 0;
8187 else
8188 return IN_RANGE (val, -255, 4095 - 4);
8189 }
8190 else
8191 return 0;
8192 }
8193
8194 if (code == MULT)
8195 {
8196 rtx xiop0 = XEXP (index, 0);
8197 rtx xiop1 = XEXP (index, 1);
8198
8199 return ((arm_address_register_rtx_p (xiop0, strict_p)
8200 && thumb2_index_mul_operand (xiop1))
8201 || (arm_address_register_rtx_p (xiop1, strict_p)
8202 && thumb2_index_mul_operand (xiop0)));
8203 }
8204 else if (code == ASHIFT)
8205 {
8206 rtx op = XEXP (index, 1);
8207
8208 return (arm_address_register_rtx_p (XEXP (index, 0), strict_p)
8209 && CONST_INT_P (op)
8210 && INTVAL (op) > 0
8211 && INTVAL (op) <= 3);
8212 }
8213
8214 return (code == CONST_INT
8215 && INTVAL (index) < 4096
8216 && INTVAL (index) > -256);
8217 }
8218
8219 /* Return nonzero if X is valid as a 16-bit Thumb state base register. */
8220 static int
8221 thumb1_base_register_rtx_p (rtx x, machine_mode mode, int strict_p)
8222 {
8223 int regno;
8224
8225 if (!REG_P (x))
8226 return 0;
8227
8228 regno = REGNO (x);
8229
8230 if (strict_p)
8231 return THUMB1_REGNO_MODE_OK_FOR_BASE_P (regno, mode);
8232
8233 return (regno <= LAST_LO_REGNUM
8234 || regno > LAST_VIRTUAL_REGISTER
8235 || regno == FRAME_POINTER_REGNUM
8236 || (GET_MODE_SIZE (mode) >= 4
8237 && (regno == STACK_POINTER_REGNUM
8238 || regno >= FIRST_PSEUDO_REGISTER
8239 || x == hard_frame_pointer_rtx
8240 || x == arg_pointer_rtx)));
8241 }
8242
8243 /* Return nonzero if x is a legitimate index register. This is the case
8244 for any base register that can access a QImode object. */
8245 inline static int
8246 thumb1_index_register_rtx_p (rtx x, int strict_p)
8247 {
8248 return thumb1_base_register_rtx_p (x, QImode, strict_p);
8249 }
8250
8251 /* Return nonzero if x is a legitimate 16-bit Thumb-state address.
8252
8253 The AP may be eliminated to either the SP or the FP, so we use the
8254 least common denominator, e.g. SImode, and offsets from 0 to 64.
8255
8256 ??? Verify whether the above is the right approach.
8257
8258 ??? Also, the FP may be eliminated to the SP, so perhaps that
8259 needs special handling also.
8260
8261 ??? Look at how the mips16 port solves this problem. It probably uses
8262 better ways to solve some of these problems.
8263
8264 Although it is not incorrect, we don't accept QImode and HImode
8265 addresses based on the frame pointer or arg pointer until the
8266 reload pass starts. This is so that eliminating such addresses
8267 into stack based ones won't produce impossible code. */
8268 int
8269 thumb1_legitimate_address_p (machine_mode mode, rtx x, int strict_p)
8270 {
8271 if (TARGET_HAVE_MOVT && can_avoid_literal_pool_for_label_p (x))
8272 return 0;
8273
8274 /* ??? Not clear if this is right. Experiment. */
8275 if (GET_MODE_SIZE (mode) < 4
8276 && !(reload_in_progress || reload_completed)
8277 && (reg_mentioned_p (frame_pointer_rtx, x)
8278 || reg_mentioned_p (arg_pointer_rtx, x)
8279 || reg_mentioned_p (virtual_incoming_args_rtx, x)
8280 || reg_mentioned_p (virtual_outgoing_args_rtx, x)
8281 || reg_mentioned_p (virtual_stack_dynamic_rtx, x)
8282 || reg_mentioned_p (virtual_stack_vars_rtx, x)))
8283 return 0;
8284
8285 /* Accept any base register. SP only in SImode or larger. */
8286 else if (thumb1_base_register_rtx_p (x, mode, strict_p))
8287 return 1;
8288
8289 /* This is PC relative data before arm_reorg runs. */
8290 else if (GET_MODE_SIZE (mode) >= 4 && CONSTANT_P (x)
8291 && GET_CODE (x) == SYMBOL_REF
8292 && CONSTANT_POOL_ADDRESS_P (x) && !flag_pic)
8293 return 1;
8294
8295 /* This is PC relative data after arm_reorg runs. */
8296 else if ((GET_MODE_SIZE (mode) >= 4 || mode == HFmode)
8297 && reload_completed
8298 && (GET_CODE (x) == LABEL_REF
8299 || (GET_CODE (x) == CONST
8300 && GET_CODE (XEXP (x, 0)) == PLUS
8301 && GET_CODE (XEXP (XEXP (x, 0), 0)) == LABEL_REF
8302 && CONST_INT_P (XEXP (XEXP (x, 0), 1)))))
8303 return 1;
8304
8305 /* Post-inc indexing only supported for SImode and larger. */
8306 else if (GET_CODE (x) == POST_INC && GET_MODE_SIZE (mode) >= 4
8307 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p))
8308 return 1;
8309
8310 else if (GET_CODE (x) == PLUS)
8311 {
8312 /* REG+REG address can be any two index registers. */
8313 /* We disallow FRAME+REG addressing since we know that FRAME
8314 will be replaced with STACK, and SP relative addressing only
8315 permits SP+OFFSET. */
8316 if (GET_MODE_SIZE (mode) <= 4
8317 && XEXP (x, 0) != frame_pointer_rtx
8318 && XEXP (x, 1) != frame_pointer_rtx
8319 && thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8320 && (thumb1_index_register_rtx_p (XEXP (x, 1), strict_p)
8321 || (!strict_p && will_be_in_index_register (XEXP (x, 1)))))
8322 return 1;
8323
8324 /* REG+const has 5-7 bit offset for non-SP registers. */
8325 else if ((thumb1_index_register_rtx_p (XEXP (x, 0), strict_p)
8326 || XEXP (x, 0) == arg_pointer_rtx)
8327 && CONST_INT_P (XEXP (x, 1))
8328 && thumb_legitimate_offset_p (mode, INTVAL (XEXP (x, 1))))
8329 return 1;
8330
8331 /* REG+const has 10-bit offset for SP, but only SImode and
8332 larger is supported. */
8333 /* ??? Should probably check for DI/DFmode overflow here
8334 just like GO_IF_LEGITIMATE_OFFSET does. */
8335 else if (REG_P (XEXP (x, 0))
8336 && REGNO (XEXP (x, 0)) == STACK_POINTER_REGNUM
8337 && GET_MODE_SIZE (mode) >= 4
8338 && CONST_INT_P (XEXP (x, 1))
8339 && INTVAL (XEXP (x, 1)) >= 0
8340 && INTVAL (XEXP (x, 1)) + GET_MODE_SIZE (mode) <= 1024
8341 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8342 return 1;
8343
8344 else if (REG_P (XEXP (x, 0))
8345 && (REGNO (XEXP (x, 0)) == FRAME_POINTER_REGNUM
8346 || REGNO (XEXP (x, 0)) == ARG_POINTER_REGNUM
8347 || (REGNO (XEXP (x, 0)) >= FIRST_VIRTUAL_REGISTER
8348 && REGNO (XEXP (x, 0))
8349 <= LAST_VIRTUAL_POINTER_REGISTER))
8350 && GET_MODE_SIZE (mode) >= 4
8351 && CONST_INT_P (XEXP (x, 1))
8352 && (INTVAL (XEXP (x, 1)) & 3) == 0)
8353 return 1;
8354 }
8355
8356 else if (GET_MODE_CLASS (mode) != MODE_FLOAT
8357 && GET_MODE_SIZE (mode) == 4
8358 && GET_CODE (x) == SYMBOL_REF
8359 && CONSTANT_POOL_ADDRESS_P (x)
8360 && ! (flag_pic
8361 && symbol_mentioned_p (get_pool_constant (x))
8362 && ! pcrel_constant_p (get_pool_constant (x))))
8363 return 1;
8364
8365 return 0;
8366 }
8367
8368 /* Return nonzero if VAL can be used as an offset in a Thumb-state address
8369 instruction of mode MODE. */
8370 int
8371 thumb_legitimate_offset_p (machine_mode mode, HOST_WIDE_INT val)
8372 {
8373 switch (GET_MODE_SIZE (mode))
8374 {
8375 case 1:
8376 return val >= 0 && val < 32;
8377
8378 case 2:
8379 return val >= 0 && val < 64 && (val & 1) == 0;
8380
8381 default:
8382 return (val >= 0
8383 && (val + GET_MODE_SIZE (mode)) <= 128
8384 && (val & 3) == 0);
8385 }
8386 }
8387
8388 bool
8389 arm_legitimate_address_p (machine_mode mode, rtx x, bool strict_p)
8390 {
8391 if (TARGET_ARM)
8392 return arm_legitimate_address_outer_p (mode, x, SET, strict_p);
8393 else if (TARGET_THUMB2)
8394 return thumb2_legitimate_address_p (mode, x, strict_p);
8395 else /* if (TARGET_THUMB1) */
8396 return thumb1_legitimate_address_p (mode, x, strict_p);
8397 }
8398
8399 /* Worker function for TARGET_PREFERRED_RELOAD_CLASS.
8400
8401 Given an rtx X being reloaded into a reg required to be
8402 in class CLASS, return the class of reg to actually use.
8403 In general this is just CLASS, but for the Thumb core registers and
8404 immediate constants we prefer a LO_REGS class or a subset. */
8405
8406 static reg_class_t
8407 arm_preferred_reload_class (rtx x ATTRIBUTE_UNUSED, reg_class_t rclass)
8408 {
8409 if (TARGET_32BIT)
8410 return rclass;
8411 else
8412 {
8413 if (rclass == GENERAL_REGS)
8414 return LO_REGS;
8415 else
8416 return rclass;
8417 }
8418 }
8419
8420 /* Build the SYMBOL_REF for __tls_get_addr. */
8421
8422 static GTY(()) rtx tls_get_addr_libfunc;
8423
8424 static rtx
8425 get_tls_get_addr (void)
8426 {
8427 if (!tls_get_addr_libfunc)
8428 tls_get_addr_libfunc = init_one_libfunc ("__tls_get_addr");
8429 return tls_get_addr_libfunc;
8430 }
8431
8432 rtx
8433 arm_load_tp (rtx target)
8434 {
8435 if (!target)
8436 target = gen_reg_rtx (SImode);
8437
8438 if (TARGET_HARD_TP)
8439 {
8440 /* Can return in any reg. */
8441 emit_insn (gen_load_tp_hard (target));
8442 }
8443 else
8444 {
8445 /* Always returned in r0. Immediately copy the result into a pseudo,
8446 otherwise other uses of r0 (e.g. setting up function arguments) may
8447 clobber the value. */
8448
8449 rtx tmp;
8450
8451 emit_insn (gen_load_tp_soft ());
8452
8453 tmp = gen_rtx_REG (SImode, R0_REGNUM);
8454 emit_move_insn (target, tmp);
8455 }
8456 return target;
8457 }
8458
8459 static rtx
8460 load_tls_operand (rtx x, rtx reg)
8461 {
8462 rtx tmp;
8463
8464 if (reg == NULL_RTX)
8465 reg = gen_reg_rtx (SImode);
8466
8467 tmp = gen_rtx_CONST (SImode, x);
8468
8469 emit_move_insn (reg, tmp);
8470
8471 return reg;
8472 }
8473
8474 static rtx_insn *
8475 arm_call_tls_get_addr (rtx x, rtx reg, rtx *valuep, int reloc)
8476 {
8477 rtx label, labelno, sum;
8478
8479 gcc_assert (reloc != TLS_DESCSEQ);
8480 start_sequence ();
8481
8482 labelno = GEN_INT (pic_labelno++);
8483 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8484 label = gen_rtx_CONST (VOIDmode, label);
8485
8486 sum = gen_rtx_UNSPEC (Pmode,
8487 gen_rtvec (4, x, GEN_INT (reloc), label,
8488 GEN_INT (TARGET_ARM ? 8 : 4)),
8489 UNSPEC_TLS);
8490 reg = load_tls_operand (sum, reg);
8491
8492 if (TARGET_ARM)
8493 emit_insn (gen_pic_add_dot_plus_eight (reg, reg, labelno));
8494 else
8495 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8496
8497 *valuep = emit_library_call_value (get_tls_get_addr (), NULL_RTX,
8498 LCT_PURE, /* LCT_CONST? */
8499 Pmode, reg, Pmode);
8500
8501 rtx_insn *insns = get_insns ();
8502 end_sequence ();
8503
8504 return insns;
8505 }
8506
8507 static rtx
8508 arm_tls_descseq_addr (rtx x, rtx reg)
8509 {
8510 rtx labelno = GEN_INT (pic_labelno++);
8511 rtx label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8512 rtx sum = gen_rtx_UNSPEC (Pmode,
8513 gen_rtvec (4, x, GEN_INT (TLS_DESCSEQ),
8514 gen_rtx_CONST (VOIDmode, label),
8515 GEN_INT (!TARGET_ARM)),
8516 UNSPEC_TLS);
8517 rtx reg0 = load_tls_operand (sum, gen_rtx_REG (SImode, R0_REGNUM));
8518
8519 emit_insn (gen_tlscall (x, labelno));
8520 if (!reg)
8521 reg = gen_reg_rtx (SImode);
8522 else
8523 gcc_assert (REGNO (reg) != R0_REGNUM);
8524
8525 emit_move_insn (reg, reg0);
8526
8527 return reg;
8528 }
8529
8530 rtx
8531 legitimize_tls_address (rtx x, rtx reg)
8532 {
8533 rtx dest, tp, label, labelno, sum, ret, eqv, addend;
8534 rtx_insn *insns;
8535 unsigned int model = SYMBOL_REF_TLS_MODEL (x);
8536
8537 switch (model)
8538 {
8539 case TLS_MODEL_GLOBAL_DYNAMIC:
8540 if (TARGET_GNU2_TLS)
8541 {
8542 reg = arm_tls_descseq_addr (x, reg);
8543
8544 tp = arm_load_tp (NULL_RTX);
8545
8546 dest = gen_rtx_PLUS (Pmode, tp, reg);
8547 }
8548 else
8549 {
8550 /* Original scheme */
8551 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_GD32);
8552 dest = gen_reg_rtx (Pmode);
8553 emit_libcall_block (insns, dest, ret, x);
8554 }
8555 return dest;
8556
8557 case TLS_MODEL_LOCAL_DYNAMIC:
8558 if (TARGET_GNU2_TLS)
8559 {
8560 reg = arm_tls_descseq_addr (x, reg);
8561
8562 tp = arm_load_tp (NULL_RTX);
8563
8564 dest = gen_rtx_PLUS (Pmode, tp, reg);
8565 }
8566 else
8567 {
8568 insns = arm_call_tls_get_addr (x, reg, &ret, TLS_LDM32);
8569
8570 /* Attach a unique REG_EQUIV, to allow the RTL optimizers to
8571 share the LDM result with other LD model accesses. */
8572 eqv = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, const1_rtx),
8573 UNSPEC_TLS);
8574 dest = gen_reg_rtx (Pmode);
8575 emit_libcall_block (insns, dest, ret, eqv);
8576
8577 /* Load the addend. */
8578 addend = gen_rtx_UNSPEC (Pmode, gen_rtvec (2, x,
8579 GEN_INT (TLS_LDO32)),
8580 UNSPEC_TLS);
8581 addend = force_reg (SImode, gen_rtx_CONST (SImode, addend));
8582 dest = gen_rtx_PLUS (Pmode, dest, addend);
8583 }
8584 return dest;
8585
8586 case TLS_MODEL_INITIAL_EXEC:
8587 labelno = GEN_INT (pic_labelno++);
8588 label = gen_rtx_UNSPEC (Pmode, gen_rtvec (1, labelno), UNSPEC_PIC_LABEL);
8589 label = gen_rtx_CONST (VOIDmode, label);
8590 sum = gen_rtx_UNSPEC (Pmode,
8591 gen_rtvec (4, x, GEN_INT (TLS_IE32), label,
8592 GEN_INT (TARGET_ARM ? 8 : 4)),
8593 UNSPEC_TLS);
8594 reg = load_tls_operand (sum, reg);
8595
8596 if (TARGET_ARM)
8597 emit_insn (gen_tls_load_dot_plus_eight (reg, reg, labelno));
8598 else if (TARGET_THUMB2)
8599 emit_insn (gen_tls_load_dot_plus_four (reg, NULL, reg, labelno));
8600 else
8601 {
8602 emit_insn (gen_pic_add_dot_plus_four (reg, reg, labelno));
8603 emit_move_insn (reg, gen_const_mem (SImode, reg));
8604 }
8605
8606 tp = arm_load_tp (NULL_RTX);
8607
8608 return gen_rtx_PLUS (Pmode, tp, reg);
8609
8610 case TLS_MODEL_LOCAL_EXEC:
8611 tp = arm_load_tp (NULL_RTX);
8612
8613 reg = gen_rtx_UNSPEC (Pmode,
8614 gen_rtvec (2, x, GEN_INT (TLS_LE32)),
8615 UNSPEC_TLS);
8616 reg = force_reg (SImode, gen_rtx_CONST (SImode, reg));
8617
8618 return gen_rtx_PLUS (Pmode, tp, reg);
8619
8620 default:
8621 abort ();
8622 }
8623 }
8624
8625 /* Try machine-dependent ways of modifying an illegitimate address
8626 to be legitimate. If we find one, return the new, valid address. */
8627 rtx
8628 arm_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8629 {
8630 if (arm_tls_referenced_p (x))
8631 {
8632 rtx addend = NULL;
8633
8634 if (GET_CODE (x) == CONST && GET_CODE (XEXP (x, 0)) == PLUS)
8635 {
8636 addend = XEXP (XEXP (x, 0), 1);
8637 x = XEXP (XEXP (x, 0), 0);
8638 }
8639
8640 if (GET_CODE (x) != SYMBOL_REF)
8641 return x;
8642
8643 gcc_assert (SYMBOL_REF_TLS_MODEL (x) != 0);
8644
8645 x = legitimize_tls_address (x, NULL_RTX);
8646
8647 if (addend)
8648 {
8649 x = gen_rtx_PLUS (SImode, x, addend);
8650 orig_x = x;
8651 }
8652 else
8653 return x;
8654 }
8655
8656 if (!TARGET_ARM)
8657 {
8658 /* TODO: legitimize_address for Thumb2. */
8659 if (TARGET_THUMB2)
8660 return x;
8661 return thumb_legitimize_address (x, orig_x, mode);
8662 }
8663
8664 if (GET_CODE (x) == PLUS)
8665 {
8666 rtx xop0 = XEXP (x, 0);
8667 rtx xop1 = XEXP (x, 1);
8668
8669 if (CONSTANT_P (xop0) && !symbol_mentioned_p (xop0))
8670 xop0 = force_reg (SImode, xop0);
8671
8672 if (CONSTANT_P (xop1) && !CONST_INT_P (xop1)
8673 && !symbol_mentioned_p (xop1))
8674 xop1 = force_reg (SImode, xop1);
8675
8676 if (ARM_BASE_REGISTER_RTX_P (xop0)
8677 && CONST_INT_P (xop1))
8678 {
8679 HOST_WIDE_INT n, low_n;
8680 rtx base_reg, val;
8681 n = INTVAL (xop1);
8682
8683 /* VFP addressing modes actually allow greater offsets, but for
8684 now we just stick with the lowest common denominator. */
8685 if (mode == DImode || mode == DFmode)
8686 {
8687 low_n = n & 0x0f;
8688 n &= ~0x0f;
8689 if (low_n > 4)
8690 {
8691 n += 16;
8692 low_n -= 16;
8693 }
8694 }
8695 else
8696 {
8697 low_n = ((mode) == TImode ? 0
8698 : n >= 0 ? (n & 0xfff) : -((-n) & 0xfff));
8699 n -= low_n;
8700 }
8701
8702 base_reg = gen_reg_rtx (SImode);
8703 val = force_operand (plus_constant (Pmode, xop0, n), NULL_RTX);
8704 emit_move_insn (base_reg, val);
8705 x = plus_constant (Pmode, base_reg, low_n);
8706 }
8707 else if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8708 x = gen_rtx_PLUS (SImode, xop0, xop1);
8709 }
8710
8711 /* XXX We don't allow MINUS any more -- see comment in
8712 arm_legitimate_address_outer_p (). */
8713 else if (GET_CODE (x) == MINUS)
8714 {
8715 rtx xop0 = XEXP (x, 0);
8716 rtx xop1 = XEXP (x, 1);
8717
8718 if (CONSTANT_P (xop0))
8719 xop0 = force_reg (SImode, xop0);
8720
8721 if (CONSTANT_P (xop1) && ! symbol_mentioned_p (xop1))
8722 xop1 = force_reg (SImode, xop1);
8723
8724 if (xop0 != XEXP (x, 0) || xop1 != XEXP (x, 1))
8725 x = gen_rtx_MINUS (SImode, xop0, xop1);
8726 }
8727
8728 /* Make sure to take full advantage of the pre-indexed addressing mode
8729 with absolute addresses which often allows for the base register to
8730 be factorized for multiple adjacent memory references, and it might
8731 even allows for the mini pool to be avoided entirely. */
8732 else if (CONST_INT_P (x) && optimize > 0)
8733 {
8734 unsigned int bits;
8735 HOST_WIDE_INT mask, base, index;
8736 rtx base_reg;
8737
8738 /* ldr and ldrb can use a 12-bit index, ldrsb and the rest can only
8739 use a 8-bit index. So let's use a 12-bit index for SImode only and
8740 hope that arm_gen_constant will enable ldrb to use more bits. */
8741 bits = (mode == SImode) ? 12 : 8;
8742 mask = (1 << bits) - 1;
8743 base = INTVAL (x) & ~mask;
8744 index = INTVAL (x) & mask;
8745 if (bit_count (base & 0xffffffff) > (32 - bits)/2)
8746 {
8747 /* It'll most probably be more efficient to generate the base
8748 with more bits set and use a negative index instead. */
8749 base |= mask;
8750 index -= mask;
8751 }
8752 base_reg = force_reg (SImode, GEN_INT (base));
8753 x = plus_constant (Pmode, base_reg, index);
8754 }
8755
8756 if (flag_pic)
8757 {
8758 /* We need to find and carefully transform any SYMBOL and LABEL
8759 references; so go back to the original address expression. */
8760 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8761 false /*compute_now*/);
8762
8763 if (new_x != orig_x)
8764 x = new_x;
8765 }
8766
8767 return x;
8768 }
8769
8770
8771 /* Try machine-dependent ways of modifying an illegitimate Thumb address
8772 to be legitimate. If we find one, return the new, valid address. */
8773 rtx
8774 thumb_legitimize_address (rtx x, rtx orig_x, machine_mode mode)
8775 {
8776 if (GET_CODE (x) == PLUS
8777 && CONST_INT_P (XEXP (x, 1))
8778 && (INTVAL (XEXP (x, 1)) >= 32 * GET_MODE_SIZE (mode)
8779 || INTVAL (XEXP (x, 1)) < 0))
8780 {
8781 rtx xop0 = XEXP (x, 0);
8782 rtx xop1 = XEXP (x, 1);
8783 HOST_WIDE_INT offset = INTVAL (xop1);
8784
8785 /* Try and fold the offset into a biasing of the base register and
8786 then offsetting that. Don't do this when optimizing for space
8787 since it can cause too many CSEs. */
8788 if (optimize_size && offset >= 0
8789 && offset < 256 + 31 * GET_MODE_SIZE (mode))
8790 {
8791 HOST_WIDE_INT delta;
8792
8793 if (offset >= 256)
8794 delta = offset - (256 - GET_MODE_SIZE (mode));
8795 else if (offset < 32 * GET_MODE_SIZE (mode) + 8)
8796 delta = 31 * GET_MODE_SIZE (mode);
8797 else
8798 delta = offset & (~31 * GET_MODE_SIZE (mode));
8799
8800 xop0 = force_operand (plus_constant (Pmode, xop0, offset - delta),
8801 NULL_RTX);
8802 x = plus_constant (Pmode, xop0, delta);
8803 }
8804 else if (offset < 0 && offset > -256)
8805 /* Small negative offsets are best done with a subtract before the
8806 dereference, forcing these into a register normally takes two
8807 instructions. */
8808 x = force_operand (x, NULL_RTX);
8809 else
8810 {
8811 /* For the remaining cases, force the constant into a register. */
8812 xop1 = force_reg (SImode, xop1);
8813 x = gen_rtx_PLUS (SImode, xop0, xop1);
8814 }
8815 }
8816 else if (GET_CODE (x) == PLUS
8817 && s_register_operand (XEXP (x, 1), SImode)
8818 && !s_register_operand (XEXP (x, 0), SImode))
8819 {
8820 rtx xop0 = force_operand (XEXP (x, 0), NULL_RTX);
8821
8822 x = gen_rtx_PLUS (SImode, xop0, XEXP (x, 1));
8823 }
8824
8825 if (flag_pic)
8826 {
8827 /* We need to find and carefully transform any SYMBOL and LABEL
8828 references; so go back to the original address expression. */
8829 rtx new_x = legitimize_pic_address (orig_x, mode, NULL_RTX, NULL_RTX,
8830 false /*compute_now*/);
8831
8832 if (new_x != orig_x)
8833 x = new_x;
8834 }
8835
8836 return x;
8837 }
8838
8839 /* Return TRUE if X contains any TLS symbol references. */
8840
8841 bool
8842 arm_tls_referenced_p (rtx x)
8843 {
8844 if (! TARGET_HAVE_TLS)
8845 return false;
8846
8847 subrtx_iterator::array_type array;
8848 FOR_EACH_SUBRTX (iter, array, x, ALL)
8849 {
8850 const_rtx x = *iter;
8851 if (GET_CODE (x) == SYMBOL_REF && SYMBOL_REF_TLS_MODEL (x) != 0)
8852 {
8853 /* ARM currently does not provide relocations to encode TLS variables
8854 into AArch32 instructions, only data, so there is no way to
8855 currently implement these if a literal pool is disabled. */
8856 if (arm_disable_literal_pool)
8857 sorry ("accessing thread-local storage is not currently supported "
8858 "with -mpure-code or -mslow-flash-data");
8859
8860 return true;
8861 }
8862
8863 /* Don't recurse into UNSPEC_TLS looking for TLS symbols; these are
8864 TLS offsets, not real symbol references. */
8865 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
8866 iter.skip_subrtxes ();
8867 }
8868 return false;
8869 }
8870
8871 /* Implement TARGET_LEGITIMATE_CONSTANT_P.
8872
8873 On the ARM, allow any integer (invalid ones are removed later by insn
8874 patterns), nice doubles and symbol_refs which refer to the function's
8875 constant pool XXX.
8876
8877 When generating pic allow anything. */
8878
8879 static bool
8880 arm_legitimate_constant_p_1 (machine_mode, rtx x)
8881 {
8882 return flag_pic || !label_mentioned_p (x);
8883 }
8884
8885 static bool
8886 thumb_legitimate_constant_p (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8887 {
8888 /* Splitters for TARGET_USE_MOVT call arm_emit_movpair which creates high
8889 RTX. These RTX must therefore be allowed for Thumb-1 so that when run
8890 for ARMv8-M Baseline or later the result is valid. */
8891 if (TARGET_HAVE_MOVT && GET_CODE (x) == HIGH)
8892 x = XEXP (x, 0);
8893
8894 return (CONST_INT_P (x)
8895 || CONST_DOUBLE_P (x)
8896 || CONSTANT_ADDRESS_P (x)
8897 || (TARGET_HAVE_MOVT && GET_CODE (x) == SYMBOL_REF)
8898 || flag_pic);
8899 }
8900
8901 static bool
8902 arm_legitimate_constant_p (machine_mode mode, rtx x)
8903 {
8904 return (!arm_cannot_force_const_mem (mode, x)
8905 && (TARGET_32BIT
8906 ? arm_legitimate_constant_p_1 (mode, x)
8907 : thumb_legitimate_constant_p (mode, x)));
8908 }
8909
8910 /* Implement TARGET_CANNOT_FORCE_CONST_MEM. */
8911
8912 static bool
8913 arm_cannot_force_const_mem (machine_mode mode ATTRIBUTE_UNUSED, rtx x)
8914 {
8915 rtx base, offset;
8916
8917 if (ARM_OFFSETS_MUST_BE_WITHIN_SECTIONS_P)
8918 {
8919 split_const (x, &base, &offset);
8920 if (GET_CODE (base) == SYMBOL_REF
8921 && !offset_within_block_p (base, INTVAL (offset)))
8922 return true;
8923 }
8924 return arm_tls_referenced_p (x);
8925 }
8926 \f
8927 #define REG_OR_SUBREG_REG(X) \
8928 (REG_P (X) \
8929 || (GET_CODE (X) == SUBREG && REG_P (SUBREG_REG (X))))
8930
8931 #define REG_OR_SUBREG_RTX(X) \
8932 (REG_P (X) ? (X) : SUBREG_REG (X))
8933
8934 static inline int
8935 thumb1_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
8936 {
8937 machine_mode mode = GET_MODE (x);
8938 int total, words;
8939
8940 switch (code)
8941 {
8942 case ASHIFT:
8943 case ASHIFTRT:
8944 case LSHIFTRT:
8945 case ROTATERT:
8946 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
8947
8948 case PLUS:
8949 case MINUS:
8950 case COMPARE:
8951 case NEG:
8952 case NOT:
8953 return COSTS_N_INSNS (1);
8954
8955 case MULT:
8956 if (arm_arch6m && arm_m_profile_small_mul)
8957 return COSTS_N_INSNS (32);
8958
8959 if (CONST_INT_P (XEXP (x, 1)))
8960 {
8961 int cycles = 0;
8962 unsigned HOST_WIDE_INT i = INTVAL (XEXP (x, 1));
8963
8964 while (i)
8965 {
8966 i >>= 2;
8967 cycles++;
8968 }
8969 return COSTS_N_INSNS (2) + cycles;
8970 }
8971 return COSTS_N_INSNS (1) + 16;
8972
8973 case SET:
8974 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
8975 the mode. */
8976 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
8977 return (COSTS_N_INSNS (words)
8978 + 4 * ((MEM_P (SET_SRC (x)))
8979 + MEM_P (SET_DEST (x))));
8980
8981 case CONST_INT:
8982 if (outer == SET)
8983 {
8984 if (UINTVAL (x) < 256
8985 /* 16-bit constant. */
8986 || (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000)))
8987 return 0;
8988 if (thumb_shiftable_const (INTVAL (x)))
8989 return COSTS_N_INSNS (2);
8990 return COSTS_N_INSNS (3);
8991 }
8992 else if ((outer == PLUS || outer == COMPARE)
8993 && INTVAL (x) < 256 && INTVAL (x) > -256)
8994 return 0;
8995 else if ((outer == IOR || outer == XOR || outer == AND)
8996 && INTVAL (x) < 256 && INTVAL (x) >= -256)
8997 return COSTS_N_INSNS (1);
8998 else if (outer == AND)
8999 {
9000 int i;
9001 /* This duplicates the tests in the andsi3 expander. */
9002 for (i = 9; i <= 31; i++)
9003 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9004 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9005 return COSTS_N_INSNS (2);
9006 }
9007 else if (outer == ASHIFT || outer == ASHIFTRT
9008 || outer == LSHIFTRT)
9009 return 0;
9010 return COSTS_N_INSNS (2);
9011
9012 case CONST:
9013 case CONST_DOUBLE:
9014 case LABEL_REF:
9015 case SYMBOL_REF:
9016 return COSTS_N_INSNS (3);
9017
9018 case UDIV:
9019 case UMOD:
9020 case DIV:
9021 case MOD:
9022 return 100;
9023
9024 case TRUNCATE:
9025 return 99;
9026
9027 case AND:
9028 case XOR:
9029 case IOR:
9030 /* XXX guess. */
9031 return 8;
9032
9033 case MEM:
9034 /* XXX another guess. */
9035 /* Memory costs quite a lot for the first word, but subsequent words
9036 load at the equivalent of a single insn each. */
9037 return (10 + 4 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9038 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9039 ? 4 : 0));
9040
9041 case IF_THEN_ELSE:
9042 /* XXX a guess. */
9043 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9044 return 14;
9045 return 2;
9046
9047 case SIGN_EXTEND:
9048 case ZERO_EXTEND:
9049 total = mode == DImode ? COSTS_N_INSNS (1) : 0;
9050 total += thumb1_rtx_costs (XEXP (x, 0), GET_CODE (XEXP (x, 0)), code);
9051
9052 if (mode == SImode)
9053 return total;
9054
9055 if (arm_arch6)
9056 return total + COSTS_N_INSNS (1);
9057
9058 /* Assume a two-shift sequence. Increase the cost slightly so
9059 we prefer actual shifts over an extend operation. */
9060 return total + 1 + COSTS_N_INSNS (2);
9061
9062 default:
9063 return 99;
9064 }
9065 }
9066
9067 /* Estimates the size cost of thumb1 instructions.
9068 For now most of the code is copied from thumb1_rtx_costs. We need more
9069 fine grain tuning when we have more related test cases. */
9070 static inline int
9071 thumb1_size_rtx_costs (rtx x, enum rtx_code code, enum rtx_code outer)
9072 {
9073 machine_mode mode = GET_MODE (x);
9074 int words, cost;
9075
9076 switch (code)
9077 {
9078 case ASHIFT:
9079 case ASHIFTRT:
9080 case LSHIFTRT:
9081 case ROTATERT:
9082 return (mode == SImode) ? COSTS_N_INSNS (1) : COSTS_N_INSNS (2);
9083
9084 case PLUS:
9085 case MINUS:
9086 /* Thumb-1 needs two instructions to fulfill shiftadd/shiftsub0/shiftsub1
9087 defined by RTL expansion, especially for the expansion of
9088 multiplication. */
9089 if ((GET_CODE (XEXP (x, 0)) == MULT
9090 && power_of_two_operand (XEXP (XEXP (x,0),1), SImode))
9091 || (GET_CODE (XEXP (x, 1)) == MULT
9092 && power_of_two_operand (XEXP (XEXP (x, 1), 1), SImode)))
9093 return COSTS_N_INSNS (2);
9094 /* Fall through. */
9095 case COMPARE:
9096 case NEG:
9097 case NOT:
9098 return COSTS_N_INSNS (1);
9099
9100 case MULT:
9101 if (CONST_INT_P (XEXP (x, 1)))
9102 {
9103 /* Thumb1 mul instruction can't operate on const. We must Load it
9104 into a register first. */
9105 int const_size = thumb1_size_rtx_costs (XEXP (x, 1), CONST_INT, SET);
9106 /* For the targets which have a very small and high-latency multiply
9107 unit, we prefer to synthesize the mult with up to 5 instructions,
9108 giving a good balance between size and performance. */
9109 if (arm_arch6m && arm_m_profile_small_mul)
9110 return COSTS_N_INSNS (5);
9111 else
9112 return COSTS_N_INSNS (1) + const_size;
9113 }
9114 return COSTS_N_INSNS (1);
9115
9116 case SET:
9117 /* A SET doesn't have a mode, so let's look at the SET_DEST to get
9118 the mode. */
9119 words = ARM_NUM_INTS (GET_MODE_SIZE (GET_MODE (SET_DEST (x))));
9120 cost = COSTS_N_INSNS (words);
9121 if (satisfies_constraint_J (SET_SRC (x))
9122 || satisfies_constraint_K (SET_SRC (x))
9123 /* Too big an immediate for a 2-byte mov, using MOVT. */
9124 || (CONST_INT_P (SET_SRC (x))
9125 && UINTVAL (SET_SRC (x)) >= 256
9126 && TARGET_HAVE_MOVT
9127 && satisfies_constraint_j (SET_SRC (x)))
9128 /* thumb1_movdi_insn. */
9129 || ((words > 1) && MEM_P (SET_SRC (x))))
9130 cost += COSTS_N_INSNS (1);
9131 return cost;
9132
9133 case CONST_INT:
9134 if (outer == SET)
9135 {
9136 if (UINTVAL (x) < 256)
9137 return COSTS_N_INSNS (1);
9138 /* movw is 4byte long. */
9139 if (TARGET_HAVE_MOVT && !(INTVAL (x) & 0xffff0000))
9140 return COSTS_N_INSNS (2);
9141 /* See split "TARGET_THUMB1 && satisfies_constraint_J". */
9142 if (INTVAL (x) >= -255 && INTVAL (x) <= -1)
9143 return COSTS_N_INSNS (2);
9144 /* See split "TARGET_THUMB1 && satisfies_constraint_K". */
9145 if (thumb_shiftable_const (INTVAL (x)))
9146 return COSTS_N_INSNS (2);
9147 return COSTS_N_INSNS (3);
9148 }
9149 else if ((outer == PLUS || outer == COMPARE)
9150 && INTVAL (x) < 256 && INTVAL (x) > -256)
9151 return 0;
9152 else if ((outer == IOR || outer == XOR || outer == AND)
9153 && INTVAL (x) < 256 && INTVAL (x) >= -256)
9154 return COSTS_N_INSNS (1);
9155 else if (outer == AND)
9156 {
9157 int i;
9158 /* This duplicates the tests in the andsi3 expander. */
9159 for (i = 9; i <= 31; i++)
9160 if ((HOST_WIDE_INT_1 << i) - 1 == INTVAL (x)
9161 || (HOST_WIDE_INT_1 << i) - 1 == ~INTVAL (x))
9162 return COSTS_N_INSNS (2);
9163 }
9164 else if (outer == ASHIFT || outer == ASHIFTRT
9165 || outer == LSHIFTRT)
9166 return 0;
9167 return COSTS_N_INSNS (2);
9168
9169 case CONST:
9170 case CONST_DOUBLE:
9171 case LABEL_REF:
9172 case SYMBOL_REF:
9173 return COSTS_N_INSNS (3);
9174
9175 case UDIV:
9176 case UMOD:
9177 case DIV:
9178 case MOD:
9179 return 100;
9180
9181 case TRUNCATE:
9182 return 99;
9183
9184 case AND:
9185 case XOR:
9186 case IOR:
9187 return COSTS_N_INSNS (1);
9188
9189 case MEM:
9190 return (COSTS_N_INSNS (1)
9191 + COSTS_N_INSNS (1)
9192 * ((GET_MODE_SIZE (mode) - 1) / UNITS_PER_WORD)
9193 + ((GET_CODE (x) == SYMBOL_REF && CONSTANT_POOL_ADDRESS_P (x))
9194 ? COSTS_N_INSNS (1) : 0));
9195
9196 case IF_THEN_ELSE:
9197 /* XXX a guess. */
9198 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
9199 return 14;
9200 return 2;
9201
9202 case ZERO_EXTEND:
9203 /* XXX still guessing. */
9204 switch (GET_MODE (XEXP (x, 0)))
9205 {
9206 case E_QImode:
9207 return (1 + (mode == DImode ? 4 : 0)
9208 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9209
9210 case E_HImode:
9211 return (4 + (mode == DImode ? 4 : 0)
9212 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9213
9214 case E_SImode:
9215 return (1 + (MEM_P (XEXP (x, 0)) ? 10 : 0));
9216
9217 default:
9218 return 99;
9219 }
9220
9221 default:
9222 return 99;
9223 }
9224 }
9225
9226 /* Helper function for arm_rtx_costs. If the operand is a valid shift
9227 operand, then return the operand that is being shifted. If the shift
9228 is not by a constant, then set SHIFT_REG to point to the operand.
9229 Return NULL if OP is not a shifter operand. */
9230 static rtx
9231 shifter_op_p (rtx op, rtx *shift_reg)
9232 {
9233 enum rtx_code code = GET_CODE (op);
9234
9235 if (code == MULT && CONST_INT_P (XEXP (op, 1))
9236 && exact_log2 (INTVAL (XEXP (op, 1))) > 0)
9237 return XEXP (op, 0);
9238 else if (code == ROTATE && CONST_INT_P (XEXP (op, 1)))
9239 return XEXP (op, 0);
9240 else if (code == ROTATERT || code == ASHIFT || code == LSHIFTRT
9241 || code == ASHIFTRT)
9242 {
9243 if (!CONST_INT_P (XEXP (op, 1)))
9244 *shift_reg = XEXP (op, 1);
9245 return XEXP (op, 0);
9246 }
9247
9248 return NULL;
9249 }
9250
9251 static bool
9252 arm_unspec_cost (rtx x, enum rtx_code /* outer_code */, bool speed_p, int *cost)
9253 {
9254 const struct cpu_cost_table *extra_cost = current_tune->insn_extra_cost;
9255 rtx_code code = GET_CODE (x);
9256 gcc_assert (code == UNSPEC || code == UNSPEC_VOLATILE);
9257
9258 switch (XINT (x, 1))
9259 {
9260 case UNSPEC_UNALIGNED_LOAD:
9261 /* We can only do unaligned loads into the integer unit, and we can't
9262 use LDM or LDRD. */
9263 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9264 if (speed_p)
9265 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.load
9266 + extra_cost->ldst.load_unaligned);
9267
9268 #ifdef NOT_YET
9269 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9270 ADDR_SPACE_GENERIC, speed_p);
9271 #endif
9272 return true;
9273
9274 case UNSPEC_UNALIGNED_STORE:
9275 *cost = COSTS_N_INSNS (ARM_NUM_REGS (GET_MODE (x)));
9276 if (speed_p)
9277 *cost += (ARM_NUM_REGS (GET_MODE (x)) * extra_cost->ldst.store
9278 + extra_cost->ldst.store_unaligned);
9279
9280 *cost += rtx_cost (XVECEXP (x, 0, 0), VOIDmode, UNSPEC, 0, speed_p);
9281 #ifdef NOT_YET
9282 *cost += arm_address_cost (XEXP (XVECEXP (x, 0, 0), 0), GET_MODE (x),
9283 ADDR_SPACE_GENERIC, speed_p);
9284 #endif
9285 return true;
9286
9287 case UNSPEC_VRINTZ:
9288 case UNSPEC_VRINTP:
9289 case UNSPEC_VRINTM:
9290 case UNSPEC_VRINTR:
9291 case UNSPEC_VRINTX:
9292 case UNSPEC_VRINTA:
9293 if (speed_p)
9294 *cost += extra_cost->fp[GET_MODE (x) == DFmode].roundint;
9295
9296 return true;
9297 default:
9298 *cost = COSTS_N_INSNS (2);
9299 break;
9300 }
9301 return true;
9302 }
9303
9304 /* Cost of a libcall. We assume one insn per argument, an amount for the
9305 call (one insn for -Os) and then one for processing the result. */
9306 #define LIBCALL_COST(N) COSTS_N_INSNS (N + (speed_p ? 18 : 2))
9307
9308 #define HANDLE_NARROW_SHIFT_ARITH(OP, IDX) \
9309 do \
9310 { \
9311 shift_op = shifter_op_p (XEXP (x, IDX), &shift_reg); \
9312 if (shift_op != NULL \
9313 && arm_rtx_shift_left_p (XEXP (x, IDX))) \
9314 { \
9315 if (shift_reg) \
9316 { \
9317 if (speed_p) \
9318 *cost += extra_cost->alu.arith_shift_reg; \
9319 *cost += rtx_cost (shift_reg, GET_MODE (shift_reg), \
9320 ASHIFT, 1, speed_p); \
9321 } \
9322 else if (speed_p) \
9323 *cost += extra_cost->alu.arith_shift; \
9324 \
9325 *cost += (rtx_cost (shift_op, GET_MODE (shift_op), \
9326 ASHIFT, 0, speed_p) \
9327 + rtx_cost (XEXP (x, 1 - IDX), \
9328 GET_MODE (shift_op), \
9329 OP, 1, speed_p)); \
9330 return true; \
9331 } \
9332 } \
9333 while (0)
9334
9335 /* Helper function for arm_rtx_costs_internal. Calculates the cost of a MEM,
9336 considering the costs of the addressing mode and memory access
9337 separately. */
9338 static bool
9339 arm_mem_costs (rtx x, const struct cpu_cost_table *extra_cost,
9340 int *cost, bool speed_p)
9341 {
9342 machine_mode mode = GET_MODE (x);
9343
9344 *cost = COSTS_N_INSNS (1);
9345
9346 if (flag_pic
9347 && GET_CODE (XEXP (x, 0)) == PLUS
9348 && will_be_in_index_register (XEXP (XEXP (x, 0), 1)))
9349 /* This will be split into two instructions. Add the cost of the
9350 additional instruction here. The cost of the memory access is computed
9351 below. See arm.md:calculate_pic_address. */
9352 *cost += COSTS_N_INSNS (1);
9353
9354 /* Calculate cost of the addressing mode. */
9355 if (speed_p)
9356 {
9357 arm_addr_mode_op op_type;
9358 switch (GET_CODE (XEXP (x, 0)))
9359 {
9360 default:
9361 case REG:
9362 op_type = AMO_DEFAULT;
9363 break;
9364 case MINUS:
9365 /* MINUS does not appear in RTL, but the architecture supports it,
9366 so handle this case defensively. */
9367 /* fall through */
9368 case PLUS:
9369 op_type = AMO_NO_WB;
9370 break;
9371 case PRE_INC:
9372 case PRE_DEC:
9373 case POST_INC:
9374 case POST_DEC:
9375 case PRE_MODIFY:
9376 case POST_MODIFY:
9377 op_type = AMO_WB;
9378 break;
9379 }
9380
9381 if (VECTOR_MODE_P (mode))
9382 *cost += current_tune->addr_mode_costs->vector[op_type];
9383 else if (FLOAT_MODE_P (mode))
9384 *cost += current_tune->addr_mode_costs->fp[op_type];
9385 else
9386 *cost += current_tune->addr_mode_costs->integer[op_type];
9387 }
9388
9389 /* Calculate cost of memory access. */
9390 if (speed_p)
9391 {
9392 if (FLOAT_MODE_P (mode))
9393 {
9394 if (GET_MODE_SIZE (mode) == 8)
9395 *cost += extra_cost->ldst.loadd;
9396 else
9397 *cost += extra_cost->ldst.loadf;
9398 }
9399 else if (VECTOR_MODE_P (mode))
9400 *cost += extra_cost->ldst.loadv;
9401 else
9402 {
9403 /* Integer modes */
9404 if (GET_MODE_SIZE (mode) == 8)
9405 *cost += extra_cost->ldst.ldrd;
9406 else
9407 *cost += extra_cost->ldst.load;
9408 }
9409 }
9410
9411 return true;
9412 }
9413
9414 /* RTX costs. Make an estimate of the cost of executing the operation
9415 X, which is contained within an operation with code OUTER_CODE.
9416 SPEED_P indicates whether the cost desired is the performance cost,
9417 or the size cost. The estimate is stored in COST and the return
9418 value is TRUE if the cost calculation is final, or FALSE if the
9419 caller should recurse through the operands of X to add additional
9420 costs.
9421
9422 We currently make no attempt to model the size savings of Thumb-2
9423 16-bit instructions. At the normal points in compilation where
9424 this code is called we have no measure of whether the condition
9425 flags are live or not, and thus no realistic way to determine what
9426 the size will eventually be. */
9427 static bool
9428 arm_rtx_costs_internal (rtx x, enum rtx_code code, enum rtx_code outer_code,
9429 const struct cpu_cost_table *extra_cost,
9430 int *cost, bool speed_p)
9431 {
9432 machine_mode mode = GET_MODE (x);
9433
9434 *cost = COSTS_N_INSNS (1);
9435
9436 if (TARGET_THUMB1)
9437 {
9438 if (speed_p)
9439 *cost = thumb1_rtx_costs (x, code, outer_code);
9440 else
9441 *cost = thumb1_size_rtx_costs (x, code, outer_code);
9442 return true;
9443 }
9444
9445 switch (code)
9446 {
9447 case SET:
9448 *cost = 0;
9449 /* SET RTXs don't have a mode so we get it from the destination. */
9450 mode = GET_MODE (SET_DEST (x));
9451
9452 if (REG_P (SET_SRC (x))
9453 && REG_P (SET_DEST (x)))
9454 {
9455 /* Assume that most copies can be done with a single insn,
9456 unless we don't have HW FP, in which case everything
9457 larger than word mode will require two insns. */
9458 *cost = COSTS_N_INSNS (((!TARGET_HARD_FLOAT
9459 && GET_MODE_SIZE (mode) > 4)
9460 || mode == DImode)
9461 ? 2 : 1);
9462 /* Conditional register moves can be encoded
9463 in 16 bits in Thumb mode. */
9464 if (!speed_p && TARGET_THUMB && outer_code == COND_EXEC)
9465 *cost >>= 1;
9466
9467 return true;
9468 }
9469
9470 if (CONST_INT_P (SET_SRC (x)))
9471 {
9472 /* Handle CONST_INT here, since the value doesn't have a mode
9473 and we would otherwise be unable to work out the true cost. */
9474 *cost = rtx_cost (SET_DEST (x), GET_MODE (SET_DEST (x)), SET,
9475 0, speed_p);
9476 outer_code = SET;
9477 /* Slightly lower the cost of setting a core reg to a constant.
9478 This helps break up chains and allows for better scheduling. */
9479 if (REG_P (SET_DEST (x))
9480 && REGNO (SET_DEST (x)) <= LR_REGNUM)
9481 *cost -= 1;
9482 x = SET_SRC (x);
9483 /* Immediate moves with an immediate in the range [0, 255] can be
9484 encoded in 16 bits in Thumb mode. */
9485 if (!speed_p && TARGET_THUMB && GET_MODE (x) == SImode
9486 && INTVAL (x) >= 0 && INTVAL (x) <=255)
9487 *cost >>= 1;
9488 goto const_int_cost;
9489 }
9490
9491 return false;
9492
9493 case MEM:
9494 return arm_mem_costs (x, extra_cost, cost, speed_p);
9495
9496 case PARALLEL:
9497 {
9498 /* Calculations of LDM costs are complex. We assume an initial cost
9499 (ldm_1st) which will load the number of registers mentioned in
9500 ldm_regs_per_insn_1st registers; then each additional
9501 ldm_regs_per_insn_subsequent registers cost one more insn. The
9502 formula for N regs is thus:
9503
9504 ldm_1st + COSTS_N_INSNS ((max (N - ldm_regs_per_insn_1st, 0)
9505 + ldm_regs_per_insn_subsequent - 1)
9506 / ldm_regs_per_insn_subsequent).
9507
9508 Additional costs may also be added for addressing. A similar
9509 formula is used for STM. */
9510
9511 bool is_ldm = load_multiple_operation (x, SImode);
9512 bool is_stm = store_multiple_operation (x, SImode);
9513
9514 if (is_ldm || is_stm)
9515 {
9516 if (speed_p)
9517 {
9518 HOST_WIDE_INT nregs = XVECLEN (x, 0);
9519 HOST_WIDE_INT regs_per_insn_1st = is_ldm
9520 ? extra_cost->ldst.ldm_regs_per_insn_1st
9521 : extra_cost->ldst.stm_regs_per_insn_1st;
9522 HOST_WIDE_INT regs_per_insn_sub = is_ldm
9523 ? extra_cost->ldst.ldm_regs_per_insn_subsequent
9524 : extra_cost->ldst.stm_regs_per_insn_subsequent;
9525
9526 *cost += regs_per_insn_1st
9527 + COSTS_N_INSNS (((MAX (nregs - regs_per_insn_1st, 0))
9528 + regs_per_insn_sub - 1)
9529 / regs_per_insn_sub);
9530 return true;
9531 }
9532
9533 }
9534 return false;
9535 }
9536 case DIV:
9537 case UDIV:
9538 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9539 && (mode == SFmode || !TARGET_VFP_SINGLE))
9540 *cost += COSTS_N_INSNS (speed_p
9541 ? extra_cost->fp[mode != SFmode].div : 0);
9542 else if (mode == SImode && TARGET_IDIV)
9543 *cost += COSTS_N_INSNS (speed_p ? extra_cost->mult[0].idiv : 0);
9544 else
9545 *cost = LIBCALL_COST (2);
9546
9547 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9548 possible udiv is prefered. */
9549 *cost += (code == DIV ? COSTS_N_INSNS (1) : 0);
9550 return false; /* All arguments must be in registers. */
9551
9552 case MOD:
9553 /* MOD by a power of 2 can be expanded as:
9554 rsbs r1, r0, #0
9555 and r0, r0, #(n - 1)
9556 and r1, r1, #(n - 1)
9557 rsbpl r0, r1, #0. */
9558 if (CONST_INT_P (XEXP (x, 1))
9559 && exact_log2 (INTVAL (XEXP (x, 1))) > 0
9560 && mode == SImode)
9561 {
9562 *cost += COSTS_N_INSNS (3);
9563
9564 if (speed_p)
9565 *cost += 2 * extra_cost->alu.logical
9566 + extra_cost->alu.arith;
9567 return true;
9568 }
9569
9570 /* Fall-through. */
9571 case UMOD:
9572 /* Make the cost of sdiv more expensive so when both sdiv and udiv are
9573 possible udiv is prefered. */
9574 *cost = LIBCALL_COST (2) + (code == MOD ? COSTS_N_INSNS (1) : 0);
9575 return false; /* All arguments must be in registers. */
9576
9577 case ROTATE:
9578 if (mode == SImode && REG_P (XEXP (x, 1)))
9579 {
9580 *cost += (COSTS_N_INSNS (1)
9581 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9582 if (speed_p)
9583 *cost += extra_cost->alu.shift_reg;
9584 return true;
9585 }
9586 /* Fall through */
9587 case ROTATERT:
9588 case ASHIFT:
9589 case LSHIFTRT:
9590 case ASHIFTRT:
9591 if (mode == DImode && CONST_INT_P (XEXP (x, 1)))
9592 {
9593 *cost += (COSTS_N_INSNS (2)
9594 + rtx_cost (XEXP (x, 0), mode, code, 0, speed_p));
9595 if (speed_p)
9596 *cost += 2 * extra_cost->alu.shift;
9597 /* Slightly disparage left shift by 1 at so we prefer adddi3. */
9598 if (code == ASHIFT && XEXP (x, 1) == CONST1_RTX (SImode))
9599 *cost += 1;
9600 return true;
9601 }
9602 else if (mode == SImode)
9603 {
9604 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9605 /* Slightly disparage register shifts at -Os, but not by much. */
9606 if (!CONST_INT_P (XEXP (x, 1)))
9607 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9608 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9609 return true;
9610 }
9611 else if (GET_MODE_CLASS (mode) == MODE_INT
9612 && GET_MODE_SIZE (mode) < 4)
9613 {
9614 if (code == ASHIFT)
9615 {
9616 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9617 /* Slightly disparage register shifts at -Os, but not by
9618 much. */
9619 if (!CONST_INT_P (XEXP (x, 1)))
9620 *cost += (speed_p ? extra_cost->alu.shift_reg : 1
9621 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
9622 }
9623 else if (code == LSHIFTRT || code == ASHIFTRT)
9624 {
9625 if (arm_arch_thumb2 && CONST_INT_P (XEXP (x, 1)))
9626 {
9627 /* Can use SBFX/UBFX. */
9628 if (speed_p)
9629 *cost += extra_cost->alu.bfx;
9630 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9631 }
9632 else
9633 {
9634 *cost += COSTS_N_INSNS (1);
9635 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9636 if (speed_p)
9637 {
9638 if (CONST_INT_P (XEXP (x, 1)))
9639 *cost += 2 * extra_cost->alu.shift;
9640 else
9641 *cost += (extra_cost->alu.shift
9642 + extra_cost->alu.shift_reg);
9643 }
9644 else
9645 /* Slightly disparage register shifts. */
9646 *cost += !CONST_INT_P (XEXP (x, 1));
9647 }
9648 }
9649 else /* Rotates. */
9650 {
9651 *cost = COSTS_N_INSNS (2 + !CONST_INT_P (XEXP (x, 1)));
9652 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
9653 if (speed_p)
9654 {
9655 if (CONST_INT_P (XEXP (x, 1)))
9656 *cost += (2 * extra_cost->alu.shift
9657 + extra_cost->alu.log_shift);
9658 else
9659 *cost += (extra_cost->alu.shift
9660 + extra_cost->alu.shift_reg
9661 + extra_cost->alu.log_shift_reg);
9662 }
9663 }
9664 return true;
9665 }
9666
9667 *cost = LIBCALL_COST (2);
9668 return false;
9669
9670 case BSWAP:
9671 if (arm_arch6)
9672 {
9673 if (mode == SImode)
9674 {
9675 if (speed_p)
9676 *cost += extra_cost->alu.rev;
9677
9678 return false;
9679 }
9680 }
9681 else
9682 {
9683 /* No rev instruction available. Look at arm_legacy_rev
9684 and thumb_legacy_rev for the form of RTL used then. */
9685 if (TARGET_THUMB)
9686 {
9687 *cost += COSTS_N_INSNS (9);
9688
9689 if (speed_p)
9690 {
9691 *cost += 6 * extra_cost->alu.shift;
9692 *cost += 3 * extra_cost->alu.logical;
9693 }
9694 }
9695 else
9696 {
9697 *cost += COSTS_N_INSNS (4);
9698
9699 if (speed_p)
9700 {
9701 *cost += 2 * extra_cost->alu.shift;
9702 *cost += extra_cost->alu.arith_shift;
9703 *cost += 2 * extra_cost->alu.logical;
9704 }
9705 }
9706 return true;
9707 }
9708 return false;
9709
9710 case MINUS:
9711 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9712 && (mode == SFmode || !TARGET_VFP_SINGLE))
9713 {
9714 if (GET_CODE (XEXP (x, 0)) == MULT
9715 || GET_CODE (XEXP (x, 1)) == MULT)
9716 {
9717 rtx mul_op0, mul_op1, sub_op;
9718
9719 if (speed_p)
9720 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9721
9722 if (GET_CODE (XEXP (x, 0)) == MULT)
9723 {
9724 mul_op0 = XEXP (XEXP (x, 0), 0);
9725 mul_op1 = XEXP (XEXP (x, 0), 1);
9726 sub_op = XEXP (x, 1);
9727 }
9728 else
9729 {
9730 mul_op0 = XEXP (XEXP (x, 1), 0);
9731 mul_op1 = XEXP (XEXP (x, 1), 1);
9732 sub_op = XEXP (x, 0);
9733 }
9734
9735 /* The first operand of the multiply may be optionally
9736 negated. */
9737 if (GET_CODE (mul_op0) == NEG)
9738 mul_op0 = XEXP (mul_op0, 0);
9739
9740 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9741 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9742 + rtx_cost (sub_op, mode, code, 0, speed_p));
9743
9744 return true;
9745 }
9746
9747 if (speed_p)
9748 *cost += extra_cost->fp[mode != SFmode].addsub;
9749 return false;
9750 }
9751
9752 if (mode == SImode)
9753 {
9754 rtx shift_by_reg = NULL;
9755 rtx shift_op;
9756 rtx non_shift_op;
9757
9758 shift_op = shifter_op_p (XEXP (x, 0), &shift_by_reg);
9759 if (shift_op == NULL)
9760 {
9761 shift_op = shifter_op_p (XEXP (x, 1), &shift_by_reg);
9762 non_shift_op = XEXP (x, 0);
9763 }
9764 else
9765 non_shift_op = XEXP (x, 1);
9766
9767 if (shift_op != NULL)
9768 {
9769 if (shift_by_reg != NULL)
9770 {
9771 if (speed_p)
9772 *cost += extra_cost->alu.arith_shift_reg;
9773 *cost += rtx_cost (shift_by_reg, mode, code, 0, speed_p);
9774 }
9775 else if (speed_p)
9776 *cost += extra_cost->alu.arith_shift;
9777
9778 *cost += rtx_cost (shift_op, mode, code, 0, speed_p);
9779 *cost += rtx_cost (non_shift_op, mode, code, 0, speed_p);
9780 return true;
9781 }
9782
9783 if (arm_arch_thumb2
9784 && GET_CODE (XEXP (x, 1)) == MULT)
9785 {
9786 /* MLS. */
9787 if (speed_p)
9788 *cost += extra_cost->mult[0].add;
9789 *cost += rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p);
9790 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode, MULT, 0, speed_p);
9791 *cost += rtx_cost (XEXP (XEXP (x, 1), 1), mode, MULT, 1, speed_p);
9792 return true;
9793 }
9794
9795 if (CONST_INT_P (XEXP (x, 0)))
9796 {
9797 int insns = arm_gen_constant (MINUS, SImode, NULL_RTX,
9798 INTVAL (XEXP (x, 0)), NULL_RTX,
9799 NULL_RTX, 1, 0);
9800 *cost = COSTS_N_INSNS (insns);
9801 if (speed_p)
9802 *cost += insns * extra_cost->alu.arith;
9803 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9804 return true;
9805 }
9806 else if (speed_p)
9807 *cost += extra_cost->alu.arith;
9808
9809 return false;
9810 }
9811
9812 if (GET_MODE_CLASS (mode) == MODE_INT
9813 && GET_MODE_SIZE (mode) < 4)
9814 {
9815 rtx shift_op, shift_reg;
9816 shift_reg = NULL;
9817
9818 /* We check both sides of the MINUS for shifter operands since,
9819 unlike PLUS, it's not commutative. */
9820
9821 HANDLE_NARROW_SHIFT_ARITH (MINUS, 0);
9822 HANDLE_NARROW_SHIFT_ARITH (MINUS, 1);
9823
9824 /* Slightly disparage, as we might need to widen the result. */
9825 *cost += 1;
9826 if (speed_p)
9827 *cost += extra_cost->alu.arith;
9828
9829 if (CONST_INT_P (XEXP (x, 0)))
9830 {
9831 *cost += rtx_cost (XEXP (x, 1), mode, code, 1, speed_p);
9832 return true;
9833 }
9834
9835 return false;
9836 }
9837
9838 if (mode == DImode)
9839 {
9840 *cost += COSTS_N_INSNS (1);
9841
9842 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND)
9843 {
9844 rtx op1 = XEXP (x, 1);
9845
9846 if (speed_p)
9847 *cost += 2 * extra_cost->alu.arith;
9848
9849 if (GET_CODE (op1) == ZERO_EXTEND)
9850 *cost += rtx_cost (XEXP (op1, 0), VOIDmode, ZERO_EXTEND,
9851 0, speed_p);
9852 else
9853 *cost += rtx_cost (op1, mode, MINUS, 1, speed_p);
9854 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9855 0, speed_p);
9856 return true;
9857 }
9858 else if (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
9859 {
9860 if (speed_p)
9861 *cost += extra_cost->alu.arith + extra_cost->alu.arith_shift;
9862 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, SIGN_EXTEND,
9863 0, speed_p)
9864 + rtx_cost (XEXP (x, 1), mode, MINUS, 1, speed_p));
9865 return true;
9866 }
9867 else if (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9868 || GET_CODE (XEXP (x, 1)) == SIGN_EXTEND)
9869 {
9870 if (speed_p)
9871 *cost += (extra_cost->alu.arith
9872 + (GET_CODE (XEXP (x, 1)) == ZERO_EXTEND
9873 ? extra_cost->alu.arith
9874 : extra_cost->alu.arith_shift));
9875 *cost += (rtx_cost (XEXP (x, 0), mode, MINUS, 0, speed_p)
9876 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
9877 GET_CODE (XEXP (x, 1)), 0, speed_p));
9878 return true;
9879 }
9880
9881 if (speed_p)
9882 *cost += 2 * extra_cost->alu.arith;
9883 return false;
9884 }
9885
9886 /* Vector mode? */
9887
9888 *cost = LIBCALL_COST (2);
9889 return false;
9890
9891 case PLUS:
9892 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
9893 && (mode == SFmode || !TARGET_VFP_SINGLE))
9894 {
9895 if (GET_CODE (XEXP (x, 0)) == MULT)
9896 {
9897 rtx mul_op0, mul_op1, add_op;
9898
9899 if (speed_p)
9900 *cost += extra_cost->fp[mode != SFmode].mult_addsub;
9901
9902 mul_op0 = XEXP (XEXP (x, 0), 0);
9903 mul_op1 = XEXP (XEXP (x, 0), 1);
9904 add_op = XEXP (x, 1);
9905
9906 *cost += (rtx_cost (mul_op0, mode, code, 0, speed_p)
9907 + rtx_cost (mul_op1, mode, code, 0, speed_p)
9908 + rtx_cost (add_op, mode, code, 0, speed_p));
9909
9910 return true;
9911 }
9912
9913 if (speed_p)
9914 *cost += extra_cost->fp[mode != SFmode].addsub;
9915 return false;
9916 }
9917 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
9918 {
9919 *cost = LIBCALL_COST (2);
9920 return false;
9921 }
9922
9923 /* Narrow modes can be synthesized in SImode, but the range
9924 of useful sub-operations is limited. Check for shift operations
9925 on one of the operands. Only left shifts can be used in the
9926 narrow modes. */
9927 if (GET_MODE_CLASS (mode) == MODE_INT
9928 && GET_MODE_SIZE (mode) < 4)
9929 {
9930 rtx shift_op, shift_reg;
9931 shift_reg = NULL;
9932
9933 HANDLE_NARROW_SHIFT_ARITH (PLUS, 0);
9934
9935 if (CONST_INT_P (XEXP (x, 1)))
9936 {
9937 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
9938 INTVAL (XEXP (x, 1)), NULL_RTX,
9939 NULL_RTX, 1, 0);
9940 *cost = COSTS_N_INSNS (insns);
9941 if (speed_p)
9942 *cost += insns * extra_cost->alu.arith;
9943 /* Slightly penalize a narrow operation as the result may
9944 need widening. */
9945 *cost += 1 + rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
9946 return true;
9947 }
9948
9949 /* Slightly penalize a narrow operation as the result may
9950 need widening. */
9951 *cost += 1;
9952 if (speed_p)
9953 *cost += extra_cost->alu.arith;
9954
9955 return false;
9956 }
9957
9958 if (mode == SImode)
9959 {
9960 rtx shift_op, shift_reg;
9961
9962 if (TARGET_INT_SIMD
9963 && (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
9964 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND))
9965 {
9966 /* UXTA[BH] or SXTA[BH]. */
9967 if (speed_p)
9968 *cost += extra_cost->alu.extend_arith;
9969 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
9970 0, speed_p)
9971 + rtx_cost (XEXP (x, 1), mode, PLUS, 0, speed_p));
9972 return true;
9973 }
9974
9975 shift_reg = NULL;
9976 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
9977 if (shift_op != NULL)
9978 {
9979 if (shift_reg)
9980 {
9981 if (speed_p)
9982 *cost += extra_cost->alu.arith_shift_reg;
9983 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
9984 }
9985 else if (speed_p)
9986 *cost += extra_cost->alu.arith_shift;
9987
9988 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
9989 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
9990 return true;
9991 }
9992 if (GET_CODE (XEXP (x, 0)) == MULT)
9993 {
9994 rtx mul_op = XEXP (x, 0);
9995
9996 if (TARGET_DSP_MULTIPLY
9997 && ((GET_CODE (XEXP (mul_op, 0)) == SIGN_EXTEND
9998 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
9999 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10000 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10001 && INTVAL (XEXP (XEXP (mul_op, 1), 1)) == 16)))
10002 || (GET_CODE (XEXP (mul_op, 0)) == ASHIFTRT
10003 && CONST_INT_P (XEXP (XEXP (mul_op, 0), 1))
10004 && INTVAL (XEXP (XEXP (mul_op, 0), 1)) == 16
10005 && (GET_CODE (XEXP (mul_op, 1)) == SIGN_EXTEND
10006 || (GET_CODE (XEXP (mul_op, 1)) == ASHIFTRT
10007 && CONST_INT_P (XEXP (XEXP (mul_op, 1), 1))
10008 && (INTVAL (XEXP (XEXP (mul_op, 1), 1))
10009 == 16))))))
10010 {
10011 /* SMLA[BT][BT]. */
10012 if (speed_p)
10013 *cost += extra_cost->mult[0].extend_add;
10014 *cost += (rtx_cost (XEXP (XEXP (mul_op, 0), 0), mode,
10015 SIGN_EXTEND, 0, speed_p)
10016 + rtx_cost (XEXP (XEXP (mul_op, 1), 0), mode,
10017 SIGN_EXTEND, 0, speed_p)
10018 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10019 return true;
10020 }
10021
10022 if (speed_p)
10023 *cost += extra_cost->mult[0].add;
10024 *cost += (rtx_cost (XEXP (mul_op, 0), mode, MULT, 0, speed_p)
10025 + rtx_cost (XEXP (mul_op, 1), mode, MULT, 1, speed_p)
10026 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10027 return true;
10028 }
10029 if (CONST_INT_P (XEXP (x, 1)))
10030 {
10031 int insns = arm_gen_constant (PLUS, SImode, NULL_RTX,
10032 INTVAL (XEXP (x, 1)), NULL_RTX,
10033 NULL_RTX, 1, 0);
10034 *cost = COSTS_N_INSNS (insns);
10035 if (speed_p)
10036 *cost += insns * extra_cost->alu.arith;
10037 *cost += rtx_cost (XEXP (x, 0), mode, PLUS, 0, speed_p);
10038 return true;
10039 }
10040 else if (speed_p)
10041 *cost += extra_cost->alu.arith;
10042
10043 return false;
10044 }
10045
10046 if (mode == DImode)
10047 {
10048 if (GET_CODE (XEXP (x, 0)) == MULT
10049 && ((GET_CODE (XEXP (XEXP (x, 0), 0)) == ZERO_EXTEND
10050 && GET_CODE (XEXP (XEXP (x, 0), 1)) == ZERO_EXTEND)
10051 || (GET_CODE (XEXP (XEXP (x, 0), 0)) == SIGN_EXTEND
10052 && GET_CODE (XEXP (XEXP (x, 0), 1)) == SIGN_EXTEND)))
10053 {
10054 if (speed_p)
10055 *cost += extra_cost->mult[1].extend_add;
10056 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
10057 ZERO_EXTEND, 0, speed_p)
10058 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 1), 0), mode,
10059 ZERO_EXTEND, 0, speed_p)
10060 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10061 return true;
10062 }
10063
10064 *cost += COSTS_N_INSNS (1);
10065
10066 if (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10067 || GET_CODE (XEXP (x, 0)) == SIGN_EXTEND)
10068 {
10069 if (speed_p)
10070 *cost += (extra_cost->alu.arith
10071 + (GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10072 ? extra_cost->alu.arith
10073 : extra_cost->alu.arith_shift));
10074
10075 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode, ZERO_EXTEND,
10076 0, speed_p)
10077 + rtx_cost (XEXP (x, 1), mode, PLUS, 1, speed_p));
10078 return true;
10079 }
10080
10081 if (speed_p)
10082 *cost += 2 * extra_cost->alu.arith;
10083 return false;
10084 }
10085
10086 /* Vector mode? */
10087 *cost = LIBCALL_COST (2);
10088 return false;
10089 case IOR:
10090 if (mode == SImode && arm_arch6 && aarch_rev16_p (x))
10091 {
10092 if (speed_p)
10093 *cost += extra_cost->alu.rev;
10094
10095 return true;
10096 }
10097 /* Fall through. */
10098 case AND: case XOR:
10099 if (mode == SImode)
10100 {
10101 enum rtx_code subcode = GET_CODE (XEXP (x, 0));
10102 rtx op0 = XEXP (x, 0);
10103 rtx shift_op, shift_reg;
10104
10105 if (subcode == NOT
10106 && (code == AND
10107 || (code == IOR && TARGET_THUMB2)))
10108 op0 = XEXP (op0, 0);
10109
10110 shift_reg = NULL;
10111 shift_op = shifter_op_p (op0, &shift_reg);
10112 if (shift_op != NULL)
10113 {
10114 if (shift_reg)
10115 {
10116 if (speed_p)
10117 *cost += extra_cost->alu.log_shift_reg;
10118 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10119 }
10120 else if (speed_p)
10121 *cost += extra_cost->alu.log_shift;
10122
10123 *cost += (rtx_cost (shift_op, mode, ASHIFT, 0, speed_p)
10124 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10125 return true;
10126 }
10127
10128 if (CONST_INT_P (XEXP (x, 1)))
10129 {
10130 int insns = arm_gen_constant (code, SImode, NULL_RTX,
10131 INTVAL (XEXP (x, 1)), NULL_RTX,
10132 NULL_RTX, 1, 0);
10133
10134 *cost = COSTS_N_INSNS (insns);
10135 if (speed_p)
10136 *cost += insns * extra_cost->alu.logical;
10137 *cost += rtx_cost (op0, mode, code, 0, speed_p);
10138 return true;
10139 }
10140
10141 if (speed_p)
10142 *cost += extra_cost->alu.logical;
10143 *cost += (rtx_cost (op0, mode, code, 0, speed_p)
10144 + rtx_cost (XEXP (x, 1), mode, code, 1, speed_p));
10145 return true;
10146 }
10147
10148 if (mode == DImode)
10149 {
10150 rtx op0 = XEXP (x, 0);
10151 enum rtx_code subcode = GET_CODE (op0);
10152
10153 *cost += COSTS_N_INSNS (1);
10154
10155 if (subcode == NOT
10156 && (code == AND
10157 || (code == IOR && TARGET_THUMB2)))
10158 op0 = XEXP (op0, 0);
10159
10160 if (GET_CODE (op0) == ZERO_EXTEND)
10161 {
10162 if (speed_p)
10163 *cost += 2 * extra_cost->alu.logical;
10164
10165 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, ZERO_EXTEND,
10166 0, speed_p)
10167 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10168 return true;
10169 }
10170 else if (GET_CODE (op0) == SIGN_EXTEND)
10171 {
10172 if (speed_p)
10173 *cost += extra_cost->alu.logical + extra_cost->alu.log_shift;
10174
10175 *cost += (rtx_cost (XEXP (op0, 0), VOIDmode, SIGN_EXTEND,
10176 0, speed_p)
10177 + rtx_cost (XEXP (x, 1), mode, code, 0, speed_p));
10178 return true;
10179 }
10180
10181 if (speed_p)
10182 *cost += 2 * extra_cost->alu.logical;
10183
10184 return true;
10185 }
10186 /* Vector mode? */
10187
10188 *cost = LIBCALL_COST (2);
10189 return false;
10190
10191 case MULT:
10192 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10193 && (mode == SFmode || !TARGET_VFP_SINGLE))
10194 {
10195 rtx op0 = XEXP (x, 0);
10196
10197 if (GET_CODE (op0) == NEG && !flag_rounding_math)
10198 op0 = XEXP (op0, 0);
10199
10200 if (speed_p)
10201 *cost += extra_cost->fp[mode != SFmode].mult;
10202
10203 *cost += (rtx_cost (op0, mode, MULT, 0, speed_p)
10204 + rtx_cost (XEXP (x, 1), mode, MULT, 1, speed_p));
10205 return true;
10206 }
10207 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10208 {
10209 *cost = LIBCALL_COST (2);
10210 return false;
10211 }
10212
10213 if (mode == SImode)
10214 {
10215 if (TARGET_DSP_MULTIPLY
10216 && ((GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10217 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10218 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10219 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10220 && INTVAL (XEXP (XEXP (x, 1), 1)) == 16)))
10221 || (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10222 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10223 && INTVAL (XEXP (XEXP (x, 0), 1)) == 16
10224 && (GET_CODE (XEXP (x, 1)) == SIGN_EXTEND
10225 || (GET_CODE (XEXP (x, 1)) == ASHIFTRT
10226 && CONST_INT_P (XEXP (XEXP (x, 1), 1))
10227 && (INTVAL (XEXP (XEXP (x, 1), 1))
10228 == 16))))))
10229 {
10230 /* SMUL[TB][TB]. */
10231 if (speed_p)
10232 *cost += extra_cost->mult[0].extend;
10233 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode,
10234 SIGN_EXTEND, 0, speed_p);
10235 *cost += rtx_cost (XEXP (XEXP (x, 1), 0), mode,
10236 SIGN_EXTEND, 1, speed_p);
10237 return true;
10238 }
10239 if (speed_p)
10240 *cost += extra_cost->mult[0].simple;
10241 return false;
10242 }
10243
10244 if (mode == DImode)
10245 {
10246 if ((GET_CODE (XEXP (x, 0)) == ZERO_EXTEND
10247 && GET_CODE (XEXP (x, 1)) == ZERO_EXTEND)
10248 || (GET_CODE (XEXP (x, 0)) == SIGN_EXTEND
10249 && GET_CODE (XEXP (x, 1)) == SIGN_EXTEND))
10250 {
10251 if (speed_p)
10252 *cost += extra_cost->mult[1].extend;
10253 *cost += (rtx_cost (XEXP (XEXP (x, 0), 0), VOIDmode,
10254 ZERO_EXTEND, 0, speed_p)
10255 + rtx_cost (XEXP (XEXP (x, 1), 0), VOIDmode,
10256 ZERO_EXTEND, 0, speed_p));
10257 return true;
10258 }
10259
10260 *cost = LIBCALL_COST (2);
10261 return false;
10262 }
10263
10264 /* Vector mode? */
10265 *cost = LIBCALL_COST (2);
10266 return false;
10267
10268 case NEG:
10269 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10270 && (mode == SFmode || !TARGET_VFP_SINGLE))
10271 {
10272 if (GET_CODE (XEXP (x, 0)) == MULT)
10273 {
10274 /* VNMUL. */
10275 *cost = rtx_cost (XEXP (x, 0), mode, NEG, 0, speed_p);
10276 return true;
10277 }
10278
10279 if (speed_p)
10280 *cost += extra_cost->fp[mode != SFmode].neg;
10281
10282 return false;
10283 }
10284 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10285 {
10286 *cost = LIBCALL_COST (1);
10287 return false;
10288 }
10289
10290 if (mode == SImode)
10291 {
10292 if (GET_CODE (XEXP (x, 0)) == ABS)
10293 {
10294 *cost += COSTS_N_INSNS (1);
10295 /* Assume the non-flag-changing variant. */
10296 if (speed_p)
10297 *cost += (extra_cost->alu.log_shift
10298 + extra_cost->alu.arith_shift);
10299 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, ABS, 0, speed_p);
10300 return true;
10301 }
10302
10303 if (GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMPARE
10304 || GET_RTX_CLASS (GET_CODE (XEXP (x, 0))) == RTX_COMM_COMPARE)
10305 {
10306 *cost += COSTS_N_INSNS (1);
10307 /* No extra cost for MOV imm and MVN imm. */
10308 /* If the comparison op is using the flags, there's no further
10309 cost, otherwise we need to add the cost of the comparison. */
10310 if (!(REG_P (XEXP (XEXP (x, 0), 0))
10311 && REGNO (XEXP (XEXP (x, 0), 0)) == CC_REGNUM
10312 && XEXP (XEXP (x, 0), 1) == const0_rtx))
10313 {
10314 mode = GET_MODE (XEXP (XEXP (x, 0), 0));
10315 *cost += (COSTS_N_INSNS (1)
10316 + rtx_cost (XEXP (XEXP (x, 0), 0), mode, COMPARE,
10317 0, speed_p)
10318 + rtx_cost (XEXP (XEXP (x, 0), 1), mode, COMPARE,
10319 1, speed_p));
10320 if (speed_p)
10321 *cost += extra_cost->alu.arith;
10322 }
10323 return true;
10324 }
10325
10326 if (speed_p)
10327 *cost += extra_cost->alu.arith;
10328 return false;
10329 }
10330
10331 if (GET_MODE_CLASS (mode) == MODE_INT
10332 && GET_MODE_SIZE (mode) < 4)
10333 {
10334 /* Slightly disparage, as we might need an extend operation. */
10335 *cost += 1;
10336 if (speed_p)
10337 *cost += extra_cost->alu.arith;
10338 return false;
10339 }
10340
10341 if (mode == DImode)
10342 {
10343 *cost += COSTS_N_INSNS (1);
10344 if (speed_p)
10345 *cost += 2 * extra_cost->alu.arith;
10346 return false;
10347 }
10348
10349 /* Vector mode? */
10350 *cost = LIBCALL_COST (1);
10351 return false;
10352
10353 case NOT:
10354 if (mode == SImode)
10355 {
10356 rtx shift_op;
10357 rtx shift_reg = NULL;
10358
10359 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10360
10361 if (shift_op)
10362 {
10363 if (shift_reg != NULL)
10364 {
10365 if (speed_p)
10366 *cost += extra_cost->alu.log_shift_reg;
10367 *cost += rtx_cost (shift_reg, mode, ASHIFT, 1, speed_p);
10368 }
10369 else if (speed_p)
10370 *cost += extra_cost->alu.log_shift;
10371 *cost += rtx_cost (shift_op, mode, ASHIFT, 0, speed_p);
10372 return true;
10373 }
10374
10375 if (speed_p)
10376 *cost += extra_cost->alu.logical;
10377 return false;
10378 }
10379 if (mode == DImode)
10380 {
10381 *cost += COSTS_N_INSNS (1);
10382 return false;
10383 }
10384
10385 /* Vector mode? */
10386
10387 *cost += LIBCALL_COST (1);
10388 return false;
10389
10390 case IF_THEN_ELSE:
10391 {
10392 if (GET_CODE (XEXP (x, 1)) == PC || GET_CODE (XEXP (x, 2)) == PC)
10393 {
10394 *cost += COSTS_N_INSNS (3);
10395 return true;
10396 }
10397 int op1cost = rtx_cost (XEXP (x, 1), mode, SET, 1, speed_p);
10398 int op2cost = rtx_cost (XEXP (x, 2), mode, SET, 1, speed_p);
10399
10400 *cost = rtx_cost (XEXP (x, 0), mode, IF_THEN_ELSE, 0, speed_p);
10401 /* Assume that if one arm of the if_then_else is a register,
10402 that it will be tied with the result and eliminate the
10403 conditional insn. */
10404 if (REG_P (XEXP (x, 1)))
10405 *cost += op2cost;
10406 else if (REG_P (XEXP (x, 2)))
10407 *cost += op1cost;
10408 else
10409 {
10410 if (speed_p)
10411 {
10412 if (extra_cost->alu.non_exec_costs_exec)
10413 *cost += op1cost + op2cost + extra_cost->alu.non_exec;
10414 else
10415 *cost += MAX (op1cost, op2cost) + extra_cost->alu.non_exec;
10416 }
10417 else
10418 *cost += op1cost + op2cost;
10419 }
10420 }
10421 return true;
10422
10423 case COMPARE:
10424 if (cc_register (XEXP (x, 0), VOIDmode) && XEXP (x, 1) == const0_rtx)
10425 *cost = 0;
10426 else
10427 {
10428 machine_mode op0mode;
10429 /* We'll mostly assume that the cost of a compare is the cost of the
10430 LHS. However, there are some notable exceptions. */
10431
10432 /* Floating point compares are never done as side-effects. */
10433 op0mode = GET_MODE (XEXP (x, 0));
10434 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (op0mode) == MODE_FLOAT
10435 && (op0mode == SFmode || !TARGET_VFP_SINGLE))
10436 {
10437 if (speed_p)
10438 *cost += extra_cost->fp[op0mode != SFmode].compare;
10439
10440 if (XEXP (x, 1) == CONST0_RTX (op0mode))
10441 {
10442 *cost += rtx_cost (XEXP (x, 0), op0mode, code, 0, speed_p);
10443 return true;
10444 }
10445
10446 return false;
10447 }
10448 else if (GET_MODE_CLASS (op0mode) == MODE_FLOAT)
10449 {
10450 *cost = LIBCALL_COST (2);
10451 return false;
10452 }
10453
10454 /* DImode compares normally take two insns. */
10455 if (op0mode == DImode)
10456 {
10457 *cost += COSTS_N_INSNS (1);
10458 if (speed_p)
10459 *cost += 2 * extra_cost->alu.arith;
10460 return false;
10461 }
10462
10463 if (op0mode == SImode)
10464 {
10465 rtx shift_op;
10466 rtx shift_reg;
10467
10468 if (XEXP (x, 1) == const0_rtx
10469 && !(REG_P (XEXP (x, 0))
10470 || (GET_CODE (XEXP (x, 0)) == SUBREG
10471 && REG_P (SUBREG_REG (XEXP (x, 0))))))
10472 {
10473 *cost = rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10474
10475 /* Multiply operations that set the flags are often
10476 significantly more expensive. */
10477 if (speed_p
10478 && GET_CODE (XEXP (x, 0)) == MULT
10479 && !power_of_two_operand (XEXP (XEXP (x, 0), 1), mode))
10480 *cost += extra_cost->mult[0].flag_setting;
10481
10482 if (speed_p
10483 && GET_CODE (XEXP (x, 0)) == PLUS
10484 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10485 && !power_of_two_operand (XEXP (XEXP (XEXP (x, 0),
10486 0), 1), mode))
10487 *cost += extra_cost->mult[0].flag_setting;
10488 return true;
10489 }
10490
10491 shift_reg = NULL;
10492 shift_op = shifter_op_p (XEXP (x, 0), &shift_reg);
10493 if (shift_op != NULL)
10494 {
10495 if (shift_reg != NULL)
10496 {
10497 *cost += rtx_cost (shift_reg, op0mode, ASHIFT,
10498 1, speed_p);
10499 if (speed_p)
10500 *cost += extra_cost->alu.arith_shift_reg;
10501 }
10502 else if (speed_p)
10503 *cost += extra_cost->alu.arith_shift;
10504 *cost += rtx_cost (shift_op, op0mode, ASHIFT, 0, speed_p);
10505 *cost += rtx_cost (XEXP (x, 1), op0mode, COMPARE, 1, speed_p);
10506 return true;
10507 }
10508
10509 if (speed_p)
10510 *cost += extra_cost->alu.arith;
10511 if (CONST_INT_P (XEXP (x, 1))
10512 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10513 {
10514 *cost += rtx_cost (XEXP (x, 0), op0mode, COMPARE, 0, speed_p);
10515 return true;
10516 }
10517 return false;
10518 }
10519
10520 /* Vector mode? */
10521
10522 *cost = LIBCALL_COST (2);
10523 return false;
10524 }
10525 return true;
10526
10527 case EQ:
10528 case NE:
10529 case LT:
10530 case LE:
10531 case GT:
10532 case GE:
10533 case LTU:
10534 case LEU:
10535 case GEU:
10536 case GTU:
10537 case ORDERED:
10538 case UNORDERED:
10539 case UNEQ:
10540 case UNLE:
10541 case UNLT:
10542 case UNGE:
10543 case UNGT:
10544 case LTGT:
10545 if (outer_code == SET)
10546 {
10547 /* Is it a store-flag operation? */
10548 if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10549 && XEXP (x, 1) == const0_rtx)
10550 {
10551 /* Thumb also needs an IT insn. */
10552 *cost += COSTS_N_INSNS (TARGET_THUMB ? 2 : 1);
10553 return true;
10554 }
10555 if (XEXP (x, 1) == const0_rtx)
10556 {
10557 switch (code)
10558 {
10559 case LT:
10560 /* LSR Rd, Rn, #31. */
10561 if (speed_p)
10562 *cost += extra_cost->alu.shift;
10563 break;
10564
10565 case EQ:
10566 /* RSBS T1, Rn, #0
10567 ADC Rd, Rn, T1. */
10568
10569 case NE:
10570 /* SUBS T1, Rn, #1
10571 SBC Rd, Rn, T1. */
10572 *cost += COSTS_N_INSNS (1);
10573 break;
10574
10575 case LE:
10576 /* RSBS T1, Rn, Rn, LSR #31
10577 ADC Rd, Rn, T1. */
10578 *cost += COSTS_N_INSNS (1);
10579 if (speed_p)
10580 *cost += extra_cost->alu.arith_shift;
10581 break;
10582
10583 case GT:
10584 /* RSB Rd, Rn, Rn, ASR #1
10585 LSR Rd, Rd, #31. */
10586 *cost += COSTS_N_INSNS (1);
10587 if (speed_p)
10588 *cost += (extra_cost->alu.arith_shift
10589 + extra_cost->alu.shift);
10590 break;
10591
10592 case GE:
10593 /* ASR Rd, Rn, #31
10594 ADD Rd, Rn, #1. */
10595 *cost += COSTS_N_INSNS (1);
10596 if (speed_p)
10597 *cost += extra_cost->alu.shift;
10598 break;
10599
10600 default:
10601 /* Remaining cases are either meaningless or would take
10602 three insns anyway. */
10603 *cost = COSTS_N_INSNS (3);
10604 break;
10605 }
10606 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10607 return true;
10608 }
10609 else
10610 {
10611 *cost += COSTS_N_INSNS (TARGET_THUMB ? 3 : 2);
10612 if (CONST_INT_P (XEXP (x, 1))
10613 && const_ok_for_op (INTVAL (XEXP (x, 1)), COMPARE))
10614 {
10615 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10616 return true;
10617 }
10618
10619 return false;
10620 }
10621 }
10622 /* Not directly inside a set. If it involves the condition code
10623 register it must be the condition for a branch, cond_exec or
10624 I_T_E operation. Since the comparison is performed elsewhere
10625 this is just the control part which has no additional
10626 cost. */
10627 else if (REG_P (XEXP (x, 0)) && REGNO (XEXP (x, 0)) == CC_REGNUM
10628 && XEXP (x, 1) == const0_rtx)
10629 {
10630 *cost = 0;
10631 return true;
10632 }
10633 return false;
10634
10635 case ABS:
10636 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10637 && (mode == SFmode || !TARGET_VFP_SINGLE))
10638 {
10639 if (speed_p)
10640 *cost += extra_cost->fp[mode != SFmode].neg;
10641
10642 return false;
10643 }
10644 else if (GET_MODE_CLASS (mode) == MODE_FLOAT)
10645 {
10646 *cost = LIBCALL_COST (1);
10647 return false;
10648 }
10649
10650 if (mode == SImode)
10651 {
10652 if (speed_p)
10653 *cost += extra_cost->alu.log_shift + extra_cost->alu.arith_shift;
10654 return false;
10655 }
10656 /* Vector mode? */
10657 *cost = LIBCALL_COST (1);
10658 return false;
10659
10660 case SIGN_EXTEND:
10661 if ((arm_arch4 || GET_MODE (XEXP (x, 0)) == SImode)
10662 && MEM_P (XEXP (x, 0)))
10663 {
10664 if (mode == DImode)
10665 *cost += COSTS_N_INSNS (1);
10666
10667 if (!speed_p)
10668 return true;
10669
10670 if (GET_MODE (XEXP (x, 0)) == SImode)
10671 *cost += extra_cost->ldst.load;
10672 else
10673 *cost += extra_cost->ldst.load_sign_extend;
10674
10675 if (mode == DImode)
10676 *cost += extra_cost->alu.shift;
10677
10678 return true;
10679 }
10680
10681 /* Widening from less than 32-bits requires an extend operation. */
10682 if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10683 {
10684 /* We have SXTB/SXTH. */
10685 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10686 if (speed_p)
10687 *cost += extra_cost->alu.extend;
10688 }
10689 else if (GET_MODE (XEXP (x, 0)) != SImode)
10690 {
10691 /* Needs two shifts. */
10692 *cost += COSTS_N_INSNS (1);
10693 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10694 if (speed_p)
10695 *cost += 2 * extra_cost->alu.shift;
10696 }
10697
10698 /* Widening beyond 32-bits requires one more insn. */
10699 if (mode == DImode)
10700 {
10701 *cost += COSTS_N_INSNS (1);
10702 if (speed_p)
10703 *cost += extra_cost->alu.shift;
10704 }
10705
10706 return true;
10707
10708 case ZERO_EXTEND:
10709 if ((arm_arch4
10710 || GET_MODE (XEXP (x, 0)) == SImode
10711 || GET_MODE (XEXP (x, 0)) == QImode)
10712 && MEM_P (XEXP (x, 0)))
10713 {
10714 *cost = rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10715
10716 if (mode == DImode)
10717 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10718
10719 return true;
10720 }
10721
10722 /* Widening from less than 32-bits requires an extend operation. */
10723 if (GET_MODE (XEXP (x, 0)) == QImode)
10724 {
10725 /* UXTB can be a shorter instruction in Thumb2, but it might
10726 be slower than the AND Rd, Rn, #255 alternative. When
10727 optimizing for speed it should never be slower to use
10728 AND, and we don't really model 16-bit vs 32-bit insns
10729 here. */
10730 if (speed_p)
10731 *cost += extra_cost->alu.logical;
10732 }
10733 else if (GET_MODE (XEXP (x, 0)) != SImode && arm_arch6)
10734 {
10735 /* We have UXTB/UXTH. */
10736 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10737 if (speed_p)
10738 *cost += extra_cost->alu.extend;
10739 }
10740 else if (GET_MODE (XEXP (x, 0)) != SImode)
10741 {
10742 /* Needs two shifts. It's marginally preferable to use
10743 shifts rather than two BIC instructions as the second
10744 shift may merge with a subsequent insn as a shifter
10745 op. */
10746 *cost = COSTS_N_INSNS (2);
10747 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10748 if (speed_p)
10749 *cost += 2 * extra_cost->alu.shift;
10750 }
10751
10752 /* Widening beyond 32-bits requires one more insn. */
10753 if (mode == DImode)
10754 {
10755 *cost += COSTS_N_INSNS (1); /* No speed penalty. */
10756 }
10757
10758 return true;
10759
10760 case CONST_INT:
10761 *cost = 0;
10762 /* CONST_INT has no mode, so we cannot tell for sure how many
10763 insns are really going to be needed. The best we can do is
10764 look at the value passed. If it fits in SImode, then assume
10765 that's the mode it will be used for. Otherwise assume it
10766 will be used in DImode. */
10767 if (INTVAL (x) == trunc_int_for_mode (INTVAL (x), SImode))
10768 mode = SImode;
10769 else
10770 mode = DImode;
10771
10772 /* Avoid blowing up in arm_gen_constant (). */
10773 if (!(outer_code == PLUS
10774 || outer_code == AND
10775 || outer_code == IOR
10776 || outer_code == XOR
10777 || outer_code == MINUS))
10778 outer_code = SET;
10779
10780 const_int_cost:
10781 if (mode == SImode)
10782 {
10783 *cost += COSTS_N_INSNS (arm_gen_constant (outer_code, SImode, NULL,
10784 INTVAL (x), NULL, NULL,
10785 0, 0));
10786 /* Extra costs? */
10787 }
10788 else
10789 {
10790 *cost += COSTS_N_INSNS (arm_gen_constant
10791 (outer_code, SImode, NULL,
10792 trunc_int_for_mode (INTVAL (x), SImode),
10793 NULL, NULL, 0, 0)
10794 + arm_gen_constant (outer_code, SImode, NULL,
10795 INTVAL (x) >> 32, NULL,
10796 NULL, 0, 0));
10797 /* Extra costs? */
10798 }
10799
10800 return true;
10801
10802 case CONST:
10803 case LABEL_REF:
10804 case SYMBOL_REF:
10805 if (speed_p)
10806 {
10807 if (arm_arch_thumb2 && !flag_pic)
10808 *cost += COSTS_N_INSNS (1);
10809 else
10810 *cost += extra_cost->ldst.load;
10811 }
10812 else
10813 *cost += COSTS_N_INSNS (1);
10814
10815 if (flag_pic)
10816 {
10817 *cost += COSTS_N_INSNS (1);
10818 if (speed_p)
10819 *cost += extra_cost->alu.arith;
10820 }
10821
10822 return true;
10823
10824 case CONST_FIXED:
10825 *cost = COSTS_N_INSNS (4);
10826 /* Fixme. */
10827 return true;
10828
10829 case CONST_DOUBLE:
10830 if (TARGET_HARD_FLOAT && GET_MODE_CLASS (mode) == MODE_FLOAT
10831 && (mode == SFmode || !TARGET_VFP_SINGLE))
10832 {
10833 if (vfp3_const_double_rtx (x))
10834 {
10835 if (speed_p)
10836 *cost += extra_cost->fp[mode == DFmode].fpconst;
10837 return true;
10838 }
10839
10840 if (speed_p)
10841 {
10842 if (mode == DFmode)
10843 *cost += extra_cost->ldst.loadd;
10844 else
10845 *cost += extra_cost->ldst.loadf;
10846 }
10847 else
10848 *cost += COSTS_N_INSNS (1 + (mode == DFmode));
10849
10850 return true;
10851 }
10852 *cost = COSTS_N_INSNS (4);
10853 return true;
10854
10855 case CONST_VECTOR:
10856 /* Fixme. */
10857 if (TARGET_NEON
10858 && TARGET_HARD_FLOAT
10859 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
10860 && neon_immediate_valid_for_move (x, mode, NULL, NULL))
10861 *cost = COSTS_N_INSNS (1);
10862 else
10863 *cost = COSTS_N_INSNS (4);
10864 return true;
10865
10866 case HIGH:
10867 case LO_SUM:
10868 /* When optimizing for size, we prefer constant pool entries to
10869 MOVW/MOVT pairs, so bump the cost of these slightly. */
10870 if (!speed_p)
10871 *cost += 1;
10872 return true;
10873
10874 case CLZ:
10875 if (speed_p)
10876 *cost += extra_cost->alu.clz;
10877 return false;
10878
10879 case SMIN:
10880 if (XEXP (x, 1) == const0_rtx)
10881 {
10882 if (speed_p)
10883 *cost += extra_cost->alu.log_shift;
10884 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10885 return true;
10886 }
10887 /* Fall through. */
10888 case SMAX:
10889 case UMIN:
10890 case UMAX:
10891 *cost += COSTS_N_INSNS (1);
10892 return false;
10893
10894 case TRUNCATE:
10895 if (GET_CODE (XEXP (x, 0)) == ASHIFTRT
10896 && CONST_INT_P (XEXP (XEXP (x, 0), 1))
10897 && INTVAL (XEXP (XEXP (x, 0), 1)) == 32
10898 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
10899 && ((GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == SIGN_EXTEND
10900 && GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1)) == SIGN_EXTEND)
10901 || (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 0)) == ZERO_EXTEND
10902 && (GET_CODE (XEXP (XEXP (XEXP (x, 0), 0), 1))
10903 == ZERO_EXTEND))))
10904 {
10905 if (speed_p)
10906 *cost += extra_cost->mult[1].extend;
10907 *cost += (rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), VOIDmode,
10908 ZERO_EXTEND, 0, speed_p)
10909 + rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 1), VOIDmode,
10910 ZERO_EXTEND, 0, speed_p));
10911 return true;
10912 }
10913 *cost = LIBCALL_COST (1);
10914 return false;
10915
10916 case UNSPEC_VOLATILE:
10917 case UNSPEC:
10918 return arm_unspec_cost (x, outer_code, speed_p, cost);
10919
10920 case PC:
10921 /* Reading the PC is like reading any other register. Writing it
10922 is more expensive, but we take that into account elsewhere. */
10923 *cost = 0;
10924 return true;
10925
10926 case ZERO_EXTRACT:
10927 /* TODO: Simple zero_extract of bottom bits using AND. */
10928 /* Fall through. */
10929 case SIGN_EXTRACT:
10930 if (arm_arch6
10931 && mode == SImode
10932 && CONST_INT_P (XEXP (x, 1))
10933 && CONST_INT_P (XEXP (x, 2)))
10934 {
10935 if (speed_p)
10936 *cost += extra_cost->alu.bfx;
10937 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
10938 return true;
10939 }
10940 /* Without UBFX/SBFX, need to resort to shift operations. */
10941 *cost += COSTS_N_INSNS (1);
10942 if (speed_p)
10943 *cost += 2 * extra_cost->alu.shift;
10944 *cost += rtx_cost (XEXP (x, 0), mode, ASHIFT, 0, speed_p);
10945 return true;
10946
10947 case FLOAT_EXTEND:
10948 if (TARGET_HARD_FLOAT)
10949 {
10950 if (speed_p)
10951 *cost += extra_cost->fp[mode == DFmode].widen;
10952 if (!TARGET_VFP5
10953 && GET_MODE (XEXP (x, 0)) == HFmode)
10954 {
10955 /* Pre v8, widening HF->DF is a two-step process, first
10956 widening to SFmode. */
10957 *cost += COSTS_N_INSNS (1);
10958 if (speed_p)
10959 *cost += extra_cost->fp[0].widen;
10960 }
10961 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10962 return true;
10963 }
10964
10965 *cost = LIBCALL_COST (1);
10966 return false;
10967
10968 case FLOAT_TRUNCATE:
10969 if (TARGET_HARD_FLOAT)
10970 {
10971 if (speed_p)
10972 *cost += extra_cost->fp[mode == DFmode].narrow;
10973 *cost += rtx_cost (XEXP (x, 0), VOIDmode, code, 0, speed_p);
10974 return true;
10975 /* Vector modes? */
10976 }
10977 *cost = LIBCALL_COST (1);
10978 return false;
10979
10980 case FMA:
10981 if (TARGET_32BIT && TARGET_HARD_FLOAT && TARGET_FMA)
10982 {
10983 rtx op0 = XEXP (x, 0);
10984 rtx op1 = XEXP (x, 1);
10985 rtx op2 = XEXP (x, 2);
10986
10987
10988 /* vfms or vfnma. */
10989 if (GET_CODE (op0) == NEG)
10990 op0 = XEXP (op0, 0);
10991
10992 /* vfnms or vfnma. */
10993 if (GET_CODE (op2) == NEG)
10994 op2 = XEXP (op2, 0);
10995
10996 *cost += rtx_cost (op0, mode, FMA, 0, speed_p);
10997 *cost += rtx_cost (op1, mode, FMA, 1, speed_p);
10998 *cost += rtx_cost (op2, mode, FMA, 2, speed_p);
10999
11000 if (speed_p)
11001 *cost += extra_cost->fp[mode ==DFmode].fma;
11002
11003 return true;
11004 }
11005
11006 *cost = LIBCALL_COST (3);
11007 return false;
11008
11009 case FIX:
11010 case UNSIGNED_FIX:
11011 if (TARGET_HARD_FLOAT)
11012 {
11013 /* The *combine_vcvtf2i reduces a vmul+vcvt into
11014 a vcvt fixed-point conversion. */
11015 if (code == FIX && mode == SImode
11016 && GET_CODE (XEXP (x, 0)) == FIX
11017 && GET_MODE (XEXP (x, 0)) == SFmode
11018 && GET_CODE (XEXP (XEXP (x, 0), 0)) == MULT
11019 && vfp3_const_double_for_bits (XEXP (XEXP (XEXP (x, 0), 0), 1))
11020 > 0)
11021 {
11022 if (speed_p)
11023 *cost += extra_cost->fp[0].toint;
11024
11025 *cost += rtx_cost (XEXP (XEXP (XEXP (x, 0), 0), 0), mode,
11026 code, 0, speed_p);
11027 return true;
11028 }
11029
11030 if (GET_MODE_CLASS (mode) == MODE_INT)
11031 {
11032 mode = GET_MODE (XEXP (x, 0));
11033 if (speed_p)
11034 *cost += extra_cost->fp[mode == DFmode].toint;
11035 /* Strip of the 'cost' of rounding towards zero. */
11036 if (GET_CODE (XEXP (x, 0)) == FIX)
11037 *cost += rtx_cost (XEXP (XEXP (x, 0), 0), mode, code,
11038 0, speed_p);
11039 else
11040 *cost += rtx_cost (XEXP (x, 0), mode, code, 0, speed_p);
11041 /* ??? Increase the cost to deal with transferring from
11042 FP -> CORE registers? */
11043 return true;
11044 }
11045 else if (GET_MODE_CLASS (mode) == MODE_FLOAT
11046 && TARGET_VFP5)
11047 {
11048 if (speed_p)
11049 *cost += extra_cost->fp[mode == DFmode].roundint;
11050 return false;
11051 }
11052 /* Vector costs? */
11053 }
11054 *cost = LIBCALL_COST (1);
11055 return false;
11056
11057 case FLOAT:
11058 case UNSIGNED_FLOAT:
11059 if (TARGET_HARD_FLOAT)
11060 {
11061 /* ??? Increase the cost to deal with transferring from CORE
11062 -> FP registers? */
11063 if (speed_p)
11064 *cost += extra_cost->fp[mode == DFmode].fromint;
11065 return false;
11066 }
11067 *cost = LIBCALL_COST (1);
11068 return false;
11069
11070 case CALL:
11071 return true;
11072
11073 case ASM_OPERANDS:
11074 {
11075 /* Just a guess. Guess number of instructions in the asm
11076 plus one insn per input. Always a minimum of COSTS_N_INSNS (1)
11077 though (see PR60663). */
11078 int asm_length = MAX (1, asm_str_count (ASM_OPERANDS_TEMPLATE (x)));
11079 int num_operands = ASM_OPERANDS_INPUT_LENGTH (x);
11080
11081 *cost = COSTS_N_INSNS (asm_length + num_operands);
11082 return true;
11083 }
11084 default:
11085 if (mode != VOIDmode)
11086 *cost = COSTS_N_INSNS (ARM_NUM_REGS (mode));
11087 else
11088 *cost = COSTS_N_INSNS (4); /* Who knows? */
11089 return false;
11090 }
11091 }
11092
11093 #undef HANDLE_NARROW_SHIFT_ARITH
11094
11095 /* RTX costs entry point. */
11096
11097 static bool
11098 arm_rtx_costs (rtx x, machine_mode mode ATTRIBUTE_UNUSED, int outer_code,
11099 int opno ATTRIBUTE_UNUSED, int *total, bool speed)
11100 {
11101 bool result;
11102 int code = GET_CODE (x);
11103 gcc_assert (current_tune->insn_extra_cost);
11104
11105 result = arm_rtx_costs_internal (x, (enum rtx_code) code,
11106 (enum rtx_code) outer_code,
11107 current_tune->insn_extra_cost,
11108 total, speed);
11109
11110 if (dump_file && arm_verbose_cost)
11111 {
11112 print_rtl_single (dump_file, x);
11113 fprintf (dump_file, "\n%s cost: %d (%s)\n", speed ? "Hot" : "Cold",
11114 *total, result ? "final" : "partial");
11115 }
11116 return result;
11117 }
11118
11119 /* All address computations that can be done are free, but rtx cost returns
11120 the same for practically all of them. So we weight the different types
11121 of address here in the order (most pref first):
11122 PRE/POST_INC/DEC, SHIFT or NON-INT sum, INT sum, REG, MEM or LABEL. */
11123 static inline int
11124 arm_arm_address_cost (rtx x)
11125 {
11126 enum rtx_code c = GET_CODE (x);
11127
11128 if (c == PRE_INC || c == PRE_DEC || c == POST_INC || c == POST_DEC)
11129 return 0;
11130 if (c == MEM || c == LABEL_REF || c == SYMBOL_REF)
11131 return 10;
11132
11133 if (c == PLUS)
11134 {
11135 if (CONST_INT_P (XEXP (x, 1)))
11136 return 2;
11137
11138 if (ARITHMETIC_P (XEXP (x, 0)) || ARITHMETIC_P (XEXP (x, 1)))
11139 return 3;
11140
11141 return 4;
11142 }
11143
11144 return 6;
11145 }
11146
11147 static inline int
11148 arm_thumb_address_cost (rtx x)
11149 {
11150 enum rtx_code c = GET_CODE (x);
11151
11152 if (c == REG)
11153 return 1;
11154 if (c == PLUS
11155 && REG_P (XEXP (x, 0))
11156 && CONST_INT_P (XEXP (x, 1)))
11157 return 1;
11158
11159 return 2;
11160 }
11161
11162 static int
11163 arm_address_cost (rtx x, machine_mode mode ATTRIBUTE_UNUSED,
11164 addr_space_t as ATTRIBUTE_UNUSED, bool speed ATTRIBUTE_UNUSED)
11165 {
11166 return TARGET_32BIT ? arm_arm_address_cost (x) : arm_thumb_address_cost (x);
11167 }
11168
11169 /* Adjust cost hook for XScale. */
11170 static bool
11171 xscale_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11172 int * cost)
11173 {
11174 /* Some true dependencies can have a higher cost depending
11175 on precisely how certain input operands are used. */
11176 if (dep_type == 0
11177 && recog_memoized (insn) >= 0
11178 && recog_memoized (dep) >= 0)
11179 {
11180 int shift_opnum = get_attr_shift (insn);
11181 enum attr_type attr_type = get_attr_type (dep);
11182
11183 /* If nonzero, SHIFT_OPNUM contains the operand number of a shifted
11184 operand for INSN. If we have a shifted input operand and the
11185 instruction we depend on is another ALU instruction, then we may
11186 have to account for an additional stall. */
11187 if (shift_opnum != 0
11188 && (attr_type == TYPE_ALU_SHIFT_IMM
11189 || attr_type == TYPE_ALUS_SHIFT_IMM
11190 || attr_type == TYPE_LOGIC_SHIFT_IMM
11191 || attr_type == TYPE_LOGICS_SHIFT_IMM
11192 || attr_type == TYPE_ALU_SHIFT_REG
11193 || attr_type == TYPE_ALUS_SHIFT_REG
11194 || attr_type == TYPE_LOGIC_SHIFT_REG
11195 || attr_type == TYPE_LOGICS_SHIFT_REG
11196 || attr_type == TYPE_MOV_SHIFT
11197 || attr_type == TYPE_MVN_SHIFT
11198 || attr_type == TYPE_MOV_SHIFT_REG
11199 || attr_type == TYPE_MVN_SHIFT_REG))
11200 {
11201 rtx shifted_operand;
11202 int opno;
11203
11204 /* Get the shifted operand. */
11205 extract_insn (insn);
11206 shifted_operand = recog_data.operand[shift_opnum];
11207
11208 /* Iterate over all the operands in DEP. If we write an operand
11209 that overlaps with SHIFTED_OPERAND, then we have increase the
11210 cost of this dependency. */
11211 extract_insn (dep);
11212 preprocess_constraints (dep);
11213 for (opno = 0; opno < recog_data.n_operands; opno++)
11214 {
11215 /* We can ignore strict inputs. */
11216 if (recog_data.operand_type[opno] == OP_IN)
11217 continue;
11218
11219 if (reg_overlap_mentioned_p (recog_data.operand[opno],
11220 shifted_operand))
11221 {
11222 *cost = 2;
11223 return false;
11224 }
11225 }
11226 }
11227 }
11228 return true;
11229 }
11230
11231 /* Adjust cost hook for Cortex A9. */
11232 static bool
11233 cortex_a9_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11234 int * cost)
11235 {
11236 switch (dep_type)
11237 {
11238 case REG_DEP_ANTI:
11239 *cost = 0;
11240 return false;
11241
11242 case REG_DEP_TRUE:
11243 case REG_DEP_OUTPUT:
11244 if (recog_memoized (insn) >= 0
11245 && recog_memoized (dep) >= 0)
11246 {
11247 if (GET_CODE (PATTERN (insn)) == SET)
11248 {
11249 if (GET_MODE_CLASS
11250 (GET_MODE (SET_DEST (PATTERN (insn)))) == MODE_FLOAT
11251 || GET_MODE_CLASS
11252 (GET_MODE (SET_SRC (PATTERN (insn)))) == MODE_FLOAT)
11253 {
11254 enum attr_type attr_type_insn = get_attr_type (insn);
11255 enum attr_type attr_type_dep = get_attr_type (dep);
11256
11257 /* By default all dependencies of the form
11258 s0 = s0 <op> s1
11259 s0 = s0 <op> s2
11260 have an extra latency of 1 cycle because
11261 of the input and output dependency in this
11262 case. However this gets modeled as an true
11263 dependency and hence all these checks. */
11264 if (REG_P (SET_DEST (PATTERN (insn)))
11265 && reg_set_p (SET_DEST (PATTERN (insn)), dep))
11266 {
11267 /* FMACS is a special case where the dependent
11268 instruction can be issued 3 cycles before
11269 the normal latency in case of an output
11270 dependency. */
11271 if ((attr_type_insn == TYPE_FMACS
11272 || attr_type_insn == TYPE_FMACD)
11273 && (attr_type_dep == TYPE_FMACS
11274 || attr_type_dep == TYPE_FMACD))
11275 {
11276 if (dep_type == REG_DEP_OUTPUT)
11277 *cost = insn_default_latency (dep) - 3;
11278 else
11279 *cost = insn_default_latency (dep);
11280 return false;
11281 }
11282 else
11283 {
11284 if (dep_type == REG_DEP_OUTPUT)
11285 *cost = insn_default_latency (dep) + 1;
11286 else
11287 *cost = insn_default_latency (dep);
11288 }
11289 return false;
11290 }
11291 }
11292 }
11293 }
11294 break;
11295
11296 default:
11297 gcc_unreachable ();
11298 }
11299
11300 return true;
11301 }
11302
11303 /* Adjust cost hook for FA726TE. */
11304 static bool
11305 fa726te_sched_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep,
11306 int * cost)
11307 {
11308 /* For FA726TE, true dependency on CPSR (i.e. set cond followed by predicated)
11309 have penalty of 3. */
11310 if (dep_type == REG_DEP_TRUE
11311 && recog_memoized (insn) >= 0
11312 && recog_memoized (dep) >= 0
11313 && get_attr_conds (dep) == CONDS_SET)
11314 {
11315 /* Use of carry (e.g. 64-bit arithmetic) in ALU: 3-cycle latency. */
11316 if (get_attr_conds (insn) == CONDS_USE
11317 && get_attr_type (insn) != TYPE_BRANCH)
11318 {
11319 *cost = 3;
11320 return false;
11321 }
11322
11323 if (GET_CODE (PATTERN (insn)) == COND_EXEC
11324 || get_attr_conds (insn) == CONDS_USE)
11325 {
11326 *cost = 0;
11327 return false;
11328 }
11329 }
11330
11331 return true;
11332 }
11333
11334 /* Implement TARGET_REGISTER_MOVE_COST.
11335
11336 Moves between VFP_REGS and GENERAL_REGS are a single insn, but
11337 it is typically more expensive than a single memory access. We set
11338 the cost to less than two memory accesses so that floating
11339 point to integer conversion does not go through memory. */
11340
11341 int
11342 arm_register_move_cost (machine_mode mode ATTRIBUTE_UNUSED,
11343 reg_class_t from, reg_class_t to)
11344 {
11345 if (TARGET_32BIT)
11346 {
11347 if ((IS_VFP_CLASS (from) && !IS_VFP_CLASS (to))
11348 || (!IS_VFP_CLASS (from) && IS_VFP_CLASS (to)))
11349 return 15;
11350 else if ((from == IWMMXT_REGS && to != IWMMXT_REGS)
11351 || (from != IWMMXT_REGS && to == IWMMXT_REGS))
11352 return 4;
11353 else if (from == IWMMXT_GR_REGS || to == IWMMXT_GR_REGS)
11354 return 20;
11355 else
11356 return 2;
11357 }
11358 else
11359 {
11360 if (from == HI_REGS || to == HI_REGS)
11361 return 4;
11362 else
11363 return 2;
11364 }
11365 }
11366
11367 /* Implement TARGET_MEMORY_MOVE_COST. */
11368
11369 int
11370 arm_memory_move_cost (machine_mode mode, reg_class_t rclass,
11371 bool in ATTRIBUTE_UNUSED)
11372 {
11373 if (TARGET_32BIT)
11374 return 10;
11375 else
11376 {
11377 if (GET_MODE_SIZE (mode) < 4)
11378 return 8;
11379 else
11380 return ((2 * GET_MODE_SIZE (mode)) * (rclass == LO_REGS ? 1 : 2));
11381 }
11382 }
11383
11384 /* Vectorizer cost model implementation. */
11385
11386 /* Implement targetm.vectorize.builtin_vectorization_cost. */
11387 static int
11388 arm_builtin_vectorization_cost (enum vect_cost_for_stmt type_of_cost,
11389 tree vectype,
11390 int misalign ATTRIBUTE_UNUSED)
11391 {
11392 unsigned elements;
11393
11394 switch (type_of_cost)
11395 {
11396 case scalar_stmt:
11397 return current_tune->vec_costs->scalar_stmt_cost;
11398
11399 case scalar_load:
11400 return current_tune->vec_costs->scalar_load_cost;
11401
11402 case scalar_store:
11403 return current_tune->vec_costs->scalar_store_cost;
11404
11405 case vector_stmt:
11406 return current_tune->vec_costs->vec_stmt_cost;
11407
11408 case vector_load:
11409 return current_tune->vec_costs->vec_align_load_cost;
11410
11411 case vector_store:
11412 return current_tune->vec_costs->vec_store_cost;
11413
11414 case vec_to_scalar:
11415 return current_tune->vec_costs->vec_to_scalar_cost;
11416
11417 case scalar_to_vec:
11418 return current_tune->vec_costs->scalar_to_vec_cost;
11419
11420 case unaligned_load:
11421 case vector_gather_load:
11422 return current_tune->vec_costs->vec_unalign_load_cost;
11423
11424 case unaligned_store:
11425 case vector_scatter_store:
11426 return current_tune->vec_costs->vec_unalign_store_cost;
11427
11428 case cond_branch_taken:
11429 return current_tune->vec_costs->cond_taken_branch_cost;
11430
11431 case cond_branch_not_taken:
11432 return current_tune->vec_costs->cond_not_taken_branch_cost;
11433
11434 case vec_perm:
11435 case vec_promote_demote:
11436 return current_tune->vec_costs->vec_stmt_cost;
11437
11438 case vec_construct:
11439 elements = TYPE_VECTOR_SUBPARTS (vectype);
11440 return elements / 2 + 1;
11441
11442 default:
11443 gcc_unreachable ();
11444 }
11445 }
11446
11447 /* Implement targetm.vectorize.add_stmt_cost. */
11448
11449 static unsigned
11450 arm_add_stmt_cost (void *data, int count, enum vect_cost_for_stmt kind,
11451 struct _stmt_vec_info *stmt_info, int misalign,
11452 enum vect_cost_model_location where)
11453 {
11454 unsigned *cost = (unsigned *) data;
11455 unsigned retval = 0;
11456
11457 if (flag_vect_cost_model)
11458 {
11459 tree vectype = stmt_info ? stmt_vectype (stmt_info) : NULL_TREE;
11460 int stmt_cost = arm_builtin_vectorization_cost (kind, vectype, misalign);
11461
11462 /* Statements in an inner loop relative to the loop being
11463 vectorized are weighted more heavily. The value here is
11464 arbitrary and could potentially be improved with analysis. */
11465 if (where == vect_body && stmt_info && stmt_in_inner_loop_p (stmt_info))
11466 count *= 50; /* FIXME. */
11467
11468 retval = (unsigned) (count * stmt_cost);
11469 cost[where] += retval;
11470 }
11471
11472 return retval;
11473 }
11474
11475 /* Return true if and only if this insn can dual-issue only as older. */
11476 static bool
11477 cortexa7_older_only (rtx_insn *insn)
11478 {
11479 if (recog_memoized (insn) < 0)
11480 return false;
11481
11482 switch (get_attr_type (insn))
11483 {
11484 case TYPE_ALU_DSP_REG:
11485 case TYPE_ALU_SREG:
11486 case TYPE_ALUS_SREG:
11487 case TYPE_LOGIC_REG:
11488 case TYPE_LOGICS_REG:
11489 case TYPE_ADC_REG:
11490 case TYPE_ADCS_REG:
11491 case TYPE_ADR:
11492 case TYPE_BFM:
11493 case TYPE_REV:
11494 case TYPE_MVN_REG:
11495 case TYPE_SHIFT_IMM:
11496 case TYPE_SHIFT_REG:
11497 case TYPE_LOAD_BYTE:
11498 case TYPE_LOAD_4:
11499 case TYPE_STORE_4:
11500 case TYPE_FFARITHS:
11501 case TYPE_FADDS:
11502 case TYPE_FFARITHD:
11503 case TYPE_FADDD:
11504 case TYPE_FMOV:
11505 case TYPE_F_CVT:
11506 case TYPE_FCMPS:
11507 case TYPE_FCMPD:
11508 case TYPE_FCONSTS:
11509 case TYPE_FCONSTD:
11510 case TYPE_FMULS:
11511 case TYPE_FMACS:
11512 case TYPE_FMULD:
11513 case TYPE_FMACD:
11514 case TYPE_FDIVS:
11515 case TYPE_FDIVD:
11516 case TYPE_F_MRC:
11517 case TYPE_F_MRRC:
11518 case TYPE_F_FLAG:
11519 case TYPE_F_LOADS:
11520 case TYPE_F_STORES:
11521 return true;
11522 default:
11523 return false;
11524 }
11525 }
11526
11527 /* Return true if and only if this insn can dual-issue as younger. */
11528 static bool
11529 cortexa7_younger (FILE *file, int verbose, rtx_insn *insn)
11530 {
11531 if (recog_memoized (insn) < 0)
11532 {
11533 if (verbose > 5)
11534 fprintf (file, ";; not cortexa7_younger %d\n", INSN_UID (insn));
11535 return false;
11536 }
11537
11538 switch (get_attr_type (insn))
11539 {
11540 case TYPE_ALU_IMM:
11541 case TYPE_ALUS_IMM:
11542 case TYPE_LOGIC_IMM:
11543 case TYPE_LOGICS_IMM:
11544 case TYPE_EXTEND:
11545 case TYPE_MVN_IMM:
11546 case TYPE_MOV_IMM:
11547 case TYPE_MOV_REG:
11548 case TYPE_MOV_SHIFT:
11549 case TYPE_MOV_SHIFT_REG:
11550 case TYPE_BRANCH:
11551 case TYPE_CALL:
11552 return true;
11553 default:
11554 return false;
11555 }
11556 }
11557
11558
11559 /* Look for an instruction that can dual issue only as an older
11560 instruction, and move it in front of any instructions that can
11561 dual-issue as younger, while preserving the relative order of all
11562 other instructions in the ready list. This is a hueuristic to help
11563 dual-issue in later cycles, by postponing issue of more flexible
11564 instructions. This heuristic may affect dual issue opportunities
11565 in the current cycle. */
11566 static void
11567 cortexa7_sched_reorder (FILE *file, int verbose, rtx_insn **ready,
11568 int *n_readyp, int clock)
11569 {
11570 int i;
11571 int first_older_only = -1, first_younger = -1;
11572
11573 if (verbose > 5)
11574 fprintf (file,
11575 ";; sched_reorder for cycle %d with %d insns in ready list\n",
11576 clock,
11577 *n_readyp);
11578
11579 /* Traverse the ready list from the head (the instruction to issue
11580 first), and looking for the first instruction that can issue as
11581 younger and the first instruction that can dual-issue only as
11582 older. */
11583 for (i = *n_readyp - 1; i >= 0; i--)
11584 {
11585 rtx_insn *insn = ready[i];
11586 if (cortexa7_older_only (insn))
11587 {
11588 first_older_only = i;
11589 if (verbose > 5)
11590 fprintf (file, ";; reorder older found %d\n", INSN_UID (insn));
11591 break;
11592 }
11593 else if (cortexa7_younger (file, verbose, insn) && first_younger == -1)
11594 first_younger = i;
11595 }
11596
11597 /* Nothing to reorder because either no younger insn found or insn
11598 that can dual-issue only as older appears before any insn that
11599 can dual-issue as younger. */
11600 if (first_younger == -1)
11601 {
11602 if (verbose > 5)
11603 fprintf (file, ";; sched_reorder nothing to reorder as no younger\n");
11604 return;
11605 }
11606
11607 /* Nothing to reorder because no older-only insn in the ready list. */
11608 if (first_older_only == -1)
11609 {
11610 if (verbose > 5)
11611 fprintf (file, ";; sched_reorder nothing to reorder as no older_only\n");
11612 return;
11613 }
11614
11615 /* Move first_older_only insn before first_younger. */
11616 if (verbose > 5)
11617 fprintf (file, ";; cortexa7_sched_reorder insn %d before %d\n",
11618 INSN_UID(ready [first_older_only]),
11619 INSN_UID(ready [first_younger]));
11620 rtx_insn *first_older_only_insn = ready [first_older_only];
11621 for (i = first_older_only; i < first_younger; i++)
11622 {
11623 ready[i] = ready[i+1];
11624 }
11625
11626 ready[i] = first_older_only_insn;
11627 return;
11628 }
11629
11630 /* Implement TARGET_SCHED_REORDER. */
11631 static int
11632 arm_sched_reorder (FILE *file, int verbose, rtx_insn **ready, int *n_readyp,
11633 int clock)
11634 {
11635 switch (arm_tune)
11636 {
11637 case TARGET_CPU_cortexa7:
11638 cortexa7_sched_reorder (file, verbose, ready, n_readyp, clock);
11639 break;
11640 default:
11641 /* Do nothing for other cores. */
11642 break;
11643 }
11644
11645 return arm_issue_rate ();
11646 }
11647
11648 /* This function implements the target macro TARGET_SCHED_ADJUST_COST.
11649 It corrects the value of COST based on the relationship between
11650 INSN and DEP through the dependence LINK. It returns the new
11651 value. There is a per-core adjust_cost hook to adjust scheduler costs
11652 and the per-core hook can choose to completely override the generic
11653 adjust_cost function. Only put bits of code into arm_adjust_cost that
11654 are common across all cores. */
11655 static int
11656 arm_adjust_cost (rtx_insn *insn, int dep_type, rtx_insn *dep, int cost,
11657 unsigned int)
11658 {
11659 rtx i_pat, d_pat;
11660
11661 /* When generating Thumb-1 code, we want to place flag-setting operations
11662 close to a conditional branch which depends on them, so that we can
11663 omit the comparison. */
11664 if (TARGET_THUMB1
11665 && dep_type == 0
11666 && recog_memoized (insn) == CODE_FOR_cbranchsi4_insn
11667 && recog_memoized (dep) >= 0
11668 && get_attr_conds (dep) == CONDS_SET)
11669 return 0;
11670
11671 if (current_tune->sched_adjust_cost != NULL)
11672 {
11673 if (!current_tune->sched_adjust_cost (insn, dep_type, dep, &cost))
11674 return cost;
11675 }
11676
11677 /* XXX Is this strictly true? */
11678 if (dep_type == REG_DEP_ANTI
11679 || dep_type == REG_DEP_OUTPUT)
11680 return 0;
11681
11682 /* Call insns don't incur a stall, even if they follow a load. */
11683 if (dep_type == 0
11684 && CALL_P (insn))
11685 return 1;
11686
11687 if ((i_pat = single_set (insn)) != NULL
11688 && MEM_P (SET_SRC (i_pat))
11689 && (d_pat = single_set (dep)) != NULL
11690 && MEM_P (SET_DEST (d_pat)))
11691 {
11692 rtx src_mem = XEXP (SET_SRC (i_pat), 0);
11693 /* This is a load after a store, there is no conflict if the load reads
11694 from a cached area. Assume that loads from the stack, and from the
11695 constant pool are cached, and that others will miss. This is a
11696 hack. */
11697
11698 if ((GET_CODE (src_mem) == SYMBOL_REF
11699 && CONSTANT_POOL_ADDRESS_P (src_mem))
11700 || reg_mentioned_p (stack_pointer_rtx, src_mem)
11701 || reg_mentioned_p (frame_pointer_rtx, src_mem)
11702 || reg_mentioned_p (hard_frame_pointer_rtx, src_mem))
11703 return 1;
11704 }
11705
11706 return cost;
11707 }
11708
11709 int
11710 arm_max_conditional_execute (void)
11711 {
11712 return max_insns_skipped;
11713 }
11714
11715 static int
11716 arm_default_branch_cost (bool speed_p, bool predictable_p ATTRIBUTE_UNUSED)
11717 {
11718 if (TARGET_32BIT)
11719 return (TARGET_THUMB2 && !speed_p) ? 1 : 4;
11720 else
11721 return (optimize > 0) ? 2 : 0;
11722 }
11723
11724 static int
11725 arm_cortex_a5_branch_cost (bool speed_p, bool predictable_p)
11726 {
11727 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11728 }
11729
11730 /* Thumb-2 branches are relatively cheap on Cortex-M processors ("1 + P cycles"
11731 on Cortex-M4, where P varies from 1 to 3 according to some criteria), since
11732 sequences of non-executed instructions in IT blocks probably take the same
11733 amount of time as executed instructions (and the IT instruction itself takes
11734 space in icache). This function was experimentally determined to give good
11735 results on a popular embedded benchmark. */
11736
11737 static int
11738 arm_cortex_m_branch_cost (bool speed_p, bool predictable_p)
11739 {
11740 return (TARGET_32BIT && speed_p) ? 1
11741 : arm_default_branch_cost (speed_p, predictable_p);
11742 }
11743
11744 static int
11745 arm_cortex_m7_branch_cost (bool speed_p, bool predictable_p)
11746 {
11747 return speed_p ? 0 : arm_default_branch_cost (speed_p, predictable_p);
11748 }
11749
11750 static bool fp_consts_inited = false;
11751
11752 static REAL_VALUE_TYPE value_fp0;
11753
11754 static void
11755 init_fp_table (void)
11756 {
11757 REAL_VALUE_TYPE r;
11758
11759 r = REAL_VALUE_ATOF ("0", DFmode);
11760 value_fp0 = r;
11761 fp_consts_inited = true;
11762 }
11763
11764 /* Return TRUE if rtx X is a valid immediate FP constant. */
11765 int
11766 arm_const_double_rtx (rtx x)
11767 {
11768 const REAL_VALUE_TYPE *r;
11769
11770 if (!fp_consts_inited)
11771 init_fp_table ();
11772
11773 r = CONST_DOUBLE_REAL_VALUE (x);
11774 if (REAL_VALUE_MINUS_ZERO (*r))
11775 return 0;
11776
11777 if (real_equal (r, &value_fp0))
11778 return 1;
11779
11780 return 0;
11781 }
11782
11783 /* VFPv3 has a fairly wide range of representable immediates, formed from
11784 "quarter-precision" floating-point values. These can be evaluated using this
11785 formula (with ^ for exponentiation):
11786
11787 -1^s * n * 2^-r
11788
11789 Where 's' is a sign bit (0/1), 'n' and 'r' are integers such that
11790 16 <= n <= 31 and 0 <= r <= 7.
11791
11792 These values are mapped onto an 8-bit integer ABCDEFGH s.t.
11793
11794 - A (most-significant) is the sign bit.
11795 - BCD are the exponent (encoded as r XOR 3).
11796 - EFGH are the mantissa (encoded as n - 16).
11797 */
11798
11799 /* Return an integer index for a VFPv3 immediate operand X suitable for the
11800 fconst[sd] instruction, or -1 if X isn't suitable. */
11801 static int
11802 vfp3_const_double_index (rtx x)
11803 {
11804 REAL_VALUE_TYPE r, m;
11805 int sign, exponent;
11806 unsigned HOST_WIDE_INT mantissa, mant_hi;
11807 unsigned HOST_WIDE_INT mask;
11808 int point_pos = 2 * HOST_BITS_PER_WIDE_INT - 1;
11809 bool fail;
11810
11811 if (!TARGET_VFP3 || !CONST_DOUBLE_P (x))
11812 return -1;
11813
11814 r = *CONST_DOUBLE_REAL_VALUE (x);
11815
11816 /* We can't represent these things, so detect them first. */
11817 if (REAL_VALUE_ISINF (r) || REAL_VALUE_ISNAN (r) || REAL_VALUE_MINUS_ZERO (r))
11818 return -1;
11819
11820 /* Extract sign, exponent and mantissa. */
11821 sign = REAL_VALUE_NEGATIVE (r) ? 1 : 0;
11822 r = real_value_abs (&r);
11823 exponent = REAL_EXP (&r);
11824 /* For the mantissa, we expand into two HOST_WIDE_INTS, apart from the
11825 highest (sign) bit, with a fixed binary point at bit point_pos.
11826 WARNING: If there's ever a VFP version which uses more than 2 * H_W_I - 1
11827 bits for the mantissa, this may fail (low bits would be lost). */
11828 real_ldexp (&m, &r, point_pos - exponent);
11829 wide_int w = real_to_integer (&m, &fail, HOST_BITS_PER_WIDE_INT * 2);
11830 mantissa = w.elt (0);
11831 mant_hi = w.elt (1);
11832
11833 /* If there are bits set in the low part of the mantissa, we can't
11834 represent this value. */
11835 if (mantissa != 0)
11836 return -1;
11837
11838 /* Now make it so that mantissa contains the most-significant bits, and move
11839 the point_pos to indicate that the least-significant bits have been
11840 discarded. */
11841 point_pos -= HOST_BITS_PER_WIDE_INT;
11842 mantissa = mant_hi;
11843
11844 /* We can permit four significant bits of mantissa only, plus a high bit
11845 which is always 1. */
11846 mask = (HOST_WIDE_INT_1U << (point_pos - 5)) - 1;
11847 if ((mantissa & mask) != 0)
11848 return -1;
11849
11850 /* Now we know the mantissa is in range, chop off the unneeded bits. */
11851 mantissa >>= point_pos - 5;
11852
11853 /* The mantissa may be zero. Disallow that case. (It's possible to load the
11854 floating-point immediate zero with Neon using an integer-zero load, but
11855 that case is handled elsewhere.) */
11856 if (mantissa == 0)
11857 return -1;
11858
11859 gcc_assert (mantissa >= 16 && mantissa <= 31);
11860
11861 /* The value of 5 here would be 4 if GCC used IEEE754-like encoding (where
11862 normalized significands are in the range [1, 2). (Our mantissa is shifted
11863 left 4 places at this point relative to normalized IEEE754 values). GCC
11864 internally uses [0.5, 1) (see real.c), so the exponent returned from
11865 REAL_EXP must be altered. */
11866 exponent = 5 - exponent;
11867
11868 if (exponent < 0 || exponent > 7)
11869 return -1;
11870
11871 /* Sign, mantissa and exponent are now in the correct form to plug into the
11872 formula described in the comment above. */
11873 return (sign << 7) | ((exponent ^ 3) << 4) | (mantissa - 16);
11874 }
11875
11876 /* Return TRUE if rtx X is a valid immediate VFPv3 constant. */
11877 int
11878 vfp3_const_double_rtx (rtx x)
11879 {
11880 if (!TARGET_VFP3)
11881 return 0;
11882
11883 return vfp3_const_double_index (x) != -1;
11884 }
11885
11886 /* Recognize immediates which can be used in various Neon instructions. Legal
11887 immediates are described by the following table (for VMVN variants, the
11888 bitwise inverse of the constant shown is recognized. In either case, VMOV
11889 is output and the correct instruction to use for a given constant is chosen
11890 by the assembler). The constant shown is replicated across all elements of
11891 the destination vector.
11892
11893 insn elems variant constant (binary)
11894 ---- ----- ------- -----------------
11895 vmov i32 0 00000000 00000000 00000000 abcdefgh
11896 vmov i32 1 00000000 00000000 abcdefgh 00000000
11897 vmov i32 2 00000000 abcdefgh 00000000 00000000
11898 vmov i32 3 abcdefgh 00000000 00000000 00000000
11899 vmov i16 4 00000000 abcdefgh
11900 vmov i16 5 abcdefgh 00000000
11901 vmvn i32 6 00000000 00000000 00000000 abcdefgh
11902 vmvn i32 7 00000000 00000000 abcdefgh 00000000
11903 vmvn i32 8 00000000 abcdefgh 00000000 00000000
11904 vmvn i32 9 abcdefgh 00000000 00000000 00000000
11905 vmvn i16 10 00000000 abcdefgh
11906 vmvn i16 11 abcdefgh 00000000
11907 vmov i32 12 00000000 00000000 abcdefgh 11111111
11908 vmvn i32 13 00000000 00000000 abcdefgh 11111111
11909 vmov i32 14 00000000 abcdefgh 11111111 11111111
11910 vmvn i32 15 00000000 abcdefgh 11111111 11111111
11911 vmov i8 16 abcdefgh
11912 vmov i64 17 aaaaaaaa bbbbbbbb cccccccc dddddddd
11913 eeeeeeee ffffffff gggggggg hhhhhhhh
11914 vmov f32 18 aBbbbbbc defgh000 00000000 00000000
11915 vmov f32 19 00000000 00000000 00000000 00000000
11916
11917 For case 18, B = !b. Representable values are exactly those accepted by
11918 vfp3_const_double_index, but are output as floating-point numbers rather
11919 than indices.
11920
11921 For case 19, we will change it to vmov.i32 when assembling.
11922
11923 Variants 0-5 (inclusive) may also be used as immediates for the second
11924 operand of VORR/VBIC instructions.
11925
11926 The INVERSE argument causes the bitwise inverse of the given operand to be
11927 recognized instead (used for recognizing legal immediates for the VAND/VORN
11928 pseudo-instructions). If INVERSE is true, the value placed in *MODCONST is
11929 *not* inverted (i.e. the pseudo-instruction forms vand/vorn should still be
11930 output, rather than the real insns vbic/vorr).
11931
11932 INVERSE makes no difference to the recognition of float vectors.
11933
11934 The return value is the variant of immediate as shown in the above table, or
11935 -1 if the given value doesn't match any of the listed patterns.
11936 */
11937 static int
11938 neon_valid_immediate (rtx op, machine_mode mode, int inverse,
11939 rtx *modconst, int *elementwidth)
11940 {
11941 #define CHECK(STRIDE, ELSIZE, CLASS, TEST) \
11942 matches = 1; \
11943 for (i = 0; i < idx; i += (STRIDE)) \
11944 if (!(TEST)) \
11945 matches = 0; \
11946 if (matches) \
11947 { \
11948 immtype = (CLASS); \
11949 elsize = (ELSIZE); \
11950 break; \
11951 }
11952
11953 unsigned int i, elsize = 0, idx = 0, n_elts;
11954 unsigned int innersize;
11955 unsigned char bytes[16];
11956 int immtype = -1, matches;
11957 unsigned int invmask = inverse ? 0xff : 0;
11958 bool vector = GET_CODE (op) == CONST_VECTOR;
11959
11960 if (vector)
11961 n_elts = CONST_VECTOR_NUNITS (op);
11962 else
11963 {
11964 n_elts = 1;
11965 if (mode == VOIDmode)
11966 mode = DImode;
11967 }
11968
11969 innersize = GET_MODE_UNIT_SIZE (mode);
11970
11971 /* Vectors of float constants. */
11972 if (GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT)
11973 {
11974 rtx el0 = CONST_VECTOR_ELT (op, 0);
11975
11976 if (!vfp3_const_double_rtx (el0) && el0 != CONST0_RTX (GET_MODE (el0)))
11977 return -1;
11978
11979 /* FP16 vectors cannot be represented. */
11980 if (GET_MODE_INNER (mode) == HFmode)
11981 return -1;
11982
11983 /* All elements in the vector must be the same. Note that 0.0 and -0.0
11984 are distinct in this context. */
11985 if (!const_vec_duplicate_p (op))
11986 return -1;
11987
11988 if (modconst)
11989 *modconst = CONST_VECTOR_ELT (op, 0);
11990
11991 if (elementwidth)
11992 *elementwidth = 0;
11993
11994 if (el0 == CONST0_RTX (GET_MODE (el0)))
11995 return 19;
11996 else
11997 return 18;
11998 }
11999
12000 /* The tricks done in the code below apply for little-endian vector layout.
12001 For big-endian vectors only allow vectors of the form { a, a, a..., a }.
12002 FIXME: Implement logic for big-endian vectors. */
12003 if (BYTES_BIG_ENDIAN && vector && !const_vec_duplicate_p (op))
12004 return -1;
12005
12006 /* Splat vector constant out into a byte vector. */
12007 for (i = 0; i < n_elts; i++)
12008 {
12009 rtx el = vector ? CONST_VECTOR_ELT (op, i) : op;
12010 unsigned HOST_WIDE_INT elpart;
12011
12012 gcc_assert (CONST_INT_P (el));
12013 elpart = INTVAL (el);
12014
12015 for (unsigned int byte = 0; byte < innersize; byte++)
12016 {
12017 bytes[idx++] = (elpart & 0xff) ^ invmask;
12018 elpart >>= BITS_PER_UNIT;
12019 }
12020 }
12021
12022 /* Sanity check. */
12023 gcc_assert (idx == GET_MODE_SIZE (mode));
12024
12025 do
12026 {
12027 CHECK (4, 32, 0, bytes[i] == bytes[0] && bytes[i + 1] == 0
12028 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12029
12030 CHECK (4, 32, 1, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12031 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12032
12033 CHECK (4, 32, 2, bytes[i] == 0 && bytes[i + 1] == 0
12034 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12035
12036 CHECK (4, 32, 3, bytes[i] == 0 && bytes[i + 1] == 0
12037 && bytes[i + 2] == 0 && bytes[i + 3] == bytes[3]);
12038
12039 CHECK (2, 16, 4, bytes[i] == bytes[0] && bytes[i + 1] == 0);
12040
12041 CHECK (2, 16, 5, bytes[i] == 0 && bytes[i + 1] == bytes[1]);
12042
12043 CHECK (4, 32, 6, bytes[i] == bytes[0] && bytes[i + 1] == 0xff
12044 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12045
12046 CHECK (4, 32, 7, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12047 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12048
12049 CHECK (4, 32, 8, bytes[i] == 0xff && bytes[i + 1] == 0xff
12050 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12051
12052 CHECK (4, 32, 9, bytes[i] == 0xff && bytes[i + 1] == 0xff
12053 && bytes[i + 2] == 0xff && bytes[i + 3] == bytes[3]);
12054
12055 CHECK (2, 16, 10, bytes[i] == bytes[0] && bytes[i + 1] == 0xff);
12056
12057 CHECK (2, 16, 11, bytes[i] == 0xff && bytes[i + 1] == bytes[1]);
12058
12059 CHECK (4, 32, 12, bytes[i] == 0xff && bytes[i + 1] == bytes[1]
12060 && bytes[i + 2] == 0 && bytes[i + 3] == 0);
12061
12062 CHECK (4, 32, 13, bytes[i] == 0 && bytes[i + 1] == bytes[1]
12063 && bytes[i + 2] == 0xff && bytes[i + 3] == 0xff);
12064
12065 CHECK (4, 32, 14, bytes[i] == 0xff && bytes[i + 1] == 0xff
12066 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0);
12067
12068 CHECK (4, 32, 15, bytes[i] == 0 && bytes[i + 1] == 0
12069 && bytes[i + 2] == bytes[2] && bytes[i + 3] == 0xff);
12070
12071 CHECK (1, 8, 16, bytes[i] == bytes[0]);
12072
12073 CHECK (1, 64, 17, (bytes[i] == 0 || bytes[i] == 0xff)
12074 && bytes[i] == bytes[(i + 8) % idx]);
12075 }
12076 while (0);
12077
12078 if (immtype == -1)
12079 return -1;
12080
12081 if (elementwidth)
12082 *elementwidth = elsize;
12083
12084 if (modconst)
12085 {
12086 unsigned HOST_WIDE_INT imm = 0;
12087
12088 /* Un-invert bytes of recognized vector, if necessary. */
12089 if (invmask != 0)
12090 for (i = 0; i < idx; i++)
12091 bytes[i] ^= invmask;
12092
12093 if (immtype == 17)
12094 {
12095 /* FIXME: Broken on 32-bit H_W_I hosts. */
12096 gcc_assert (sizeof (HOST_WIDE_INT) == 8);
12097
12098 for (i = 0; i < 8; i++)
12099 imm |= (unsigned HOST_WIDE_INT) (bytes[i] ? 0xff : 0)
12100 << (i * BITS_PER_UNIT);
12101
12102 *modconst = GEN_INT (imm);
12103 }
12104 else
12105 {
12106 unsigned HOST_WIDE_INT imm = 0;
12107
12108 for (i = 0; i < elsize / BITS_PER_UNIT; i++)
12109 imm |= (unsigned HOST_WIDE_INT) bytes[i] << (i * BITS_PER_UNIT);
12110
12111 *modconst = GEN_INT (imm);
12112 }
12113 }
12114
12115 return immtype;
12116 #undef CHECK
12117 }
12118
12119 /* Return TRUE if rtx X is legal for use as either a Neon VMOV (or, implicitly,
12120 VMVN) immediate. Write back width per element to *ELEMENTWIDTH (or zero for
12121 float elements), and a modified constant (whatever should be output for a
12122 VMOV) in *MODCONST. */
12123
12124 int
12125 neon_immediate_valid_for_move (rtx op, machine_mode mode,
12126 rtx *modconst, int *elementwidth)
12127 {
12128 rtx tmpconst;
12129 int tmpwidth;
12130 int retval = neon_valid_immediate (op, mode, 0, &tmpconst, &tmpwidth);
12131
12132 if (retval == -1)
12133 return 0;
12134
12135 if (modconst)
12136 *modconst = tmpconst;
12137
12138 if (elementwidth)
12139 *elementwidth = tmpwidth;
12140
12141 return 1;
12142 }
12143
12144 /* Return TRUE if rtx X is legal for use in a VORR or VBIC instruction. If
12145 the immediate is valid, write a constant suitable for using as an operand
12146 to VORR/VBIC/VAND/VORN to *MODCONST and the corresponding element width to
12147 *ELEMENTWIDTH. See neon_valid_immediate for description of INVERSE. */
12148
12149 int
12150 neon_immediate_valid_for_logic (rtx op, machine_mode mode, int inverse,
12151 rtx *modconst, int *elementwidth)
12152 {
12153 rtx tmpconst;
12154 int tmpwidth;
12155 int retval = neon_valid_immediate (op, mode, inverse, &tmpconst, &tmpwidth);
12156
12157 if (retval < 0 || retval > 5)
12158 return 0;
12159
12160 if (modconst)
12161 *modconst = tmpconst;
12162
12163 if (elementwidth)
12164 *elementwidth = tmpwidth;
12165
12166 return 1;
12167 }
12168
12169 /* Return TRUE if rtx OP is legal for use in a VSHR or VSHL instruction. If
12170 the immediate is valid, write a constant suitable for using as an operand
12171 to VSHR/VSHL to *MODCONST and the corresponding element width to
12172 *ELEMENTWIDTH. ISLEFTSHIFT is for determine left or right shift,
12173 because they have different limitations. */
12174
12175 int
12176 neon_immediate_valid_for_shift (rtx op, machine_mode mode,
12177 rtx *modconst, int *elementwidth,
12178 bool isleftshift)
12179 {
12180 unsigned int innersize = GET_MODE_UNIT_SIZE (mode);
12181 unsigned int n_elts = CONST_VECTOR_NUNITS (op), i;
12182 unsigned HOST_WIDE_INT last_elt = 0;
12183 unsigned HOST_WIDE_INT maxshift;
12184
12185 /* Split vector constant out into a byte vector. */
12186 for (i = 0; i < n_elts; i++)
12187 {
12188 rtx el = CONST_VECTOR_ELT (op, i);
12189 unsigned HOST_WIDE_INT elpart;
12190
12191 if (CONST_INT_P (el))
12192 elpart = INTVAL (el);
12193 else if (CONST_DOUBLE_P (el))
12194 return 0;
12195 else
12196 gcc_unreachable ();
12197
12198 if (i != 0 && elpart != last_elt)
12199 return 0;
12200
12201 last_elt = elpart;
12202 }
12203
12204 /* Shift less than element size. */
12205 maxshift = innersize * 8;
12206
12207 if (isleftshift)
12208 {
12209 /* Left shift immediate value can be from 0 to <size>-1. */
12210 if (last_elt >= maxshift)
12211 return 0;
12212 }
12213 else
12214 {
12215 /* Right shift immediate value can be from 1 to <size>. */
12216 if (last_elt == 0 || last_elt > maxshift)
12217 return 0;
12218 }
12219
12220 if (elementwidth)
12221 *elementwidth = innersize * 8;
12222
12223 if (modconst)
12224 *modconst = CONST_VECTOR_ELT (op, 0);
12225
12226 return 1;
12227 }
12228
12229 /* Return a string suitable for output of Neon immediate logic operation
12230 MNEM. */
12231
12232 char *
12233 neon_output_logic_immediate (const char *mnem, rtx *op2, machine_mode mode,
12234 int inverse, int quad)
12235 {
12236 int width, is_valid;
12237 static char templ[40];
12238
12239 is_valid = neon_immediate_valid_for_logic (*op2, mode, inverse, op2, &width);
12240
12241 gcc_assert (is_valid != 0);
12242
12243 if (quad)
12244 sprintf (templ, "%s.i%d\t%%q0, %%2", mnem, width);
12245 else
12246 sprintf (templ, "%s.i%d\t%%P0, %%2", mnem, width);
12247
12248 return templ;
12249 }
12250
12251 /* Return a string suitable for output of Neon immediate shift operation
12252 (VSHR or VSHL) MNEM. */
12253
12254 char *
12255 neon_output_shift_immediate (const char *mnem, char sign, rtx *op2,
12256 machine_mode mode, int quad,
12257 bool isleftshift)
12258 {
12259 int width, is_valid;
12260 static char templ[40];
12261
12262 is_valid = neon_immediate_valid_for_shift (*op2, mode, op2, &width, isleftshift);
12263 gcc_assert (is_valid != 0);
12264
12265 if (quad)
12266 sprintf (templ, "%s.%c%d\t%%q0, %%q1, %%2", mnem, sign, width);
12267 else
12268 sprintf (templ, "%s.%c%d\t%%P0, %%P1, %%2", mnem, sign, width);
12269
12270 return templ;
12271 }
12272
12273 /* Output a sequence of pairwise operations to implement a reduction.
12274 NOTE: We do "too much work" here, because pairwise operations work on two
12275 registers-worth of operands in one go. Unfortunately we can't exploit those
12276 extra calculations to do the full operation in fewer steps, I don't think.
12277 Although all vector elements of the result but the first are ignored, we
12278 actually calculate the same result in each of the elements. An alternative
12279 such as initially loading a vector with zero to use as each of the second
12280 operands would use up an additional register and take an extra instruction,
12281 for no particular gain. */
12282
12283 void
12284 neon_pairwise_reduce (rtx op0, rtx op1, machine_mode mode,
12285 rtx (*reduc) (rtx, rtx, rtx))
12286 {
12287 unsigned int i, parts = GET_MODE_SIZE (mode) / GET_MODE_UNIT_SIZE (mode);
12288 rtx tmpsum = op1;
12289
12290 for (i = parts / 2; i >= 1; i /= 2)
12291 {
12292 rtx dest = (i == 1) ? op0 : gen_reg_rtx (mode);
12293 emit_insn (reduc (dest, tmpsum, tmpsum));
12294 tmpsum = dest;
12295 }
12296 }
12297
12298 /* If VALS is a vector constant that can be loaded into a register
12299 using VDUP, generate instructions to do so and return an RTX to
12300 assign to the register. Otherwise return NULL_RTX. */
12301
12302 static rtx
12303 neon_vdup_constant (rtx vals)
12304 {
12305 machine_mode mode = GET_MODE (vals);
12306 machine_mode inner_mode = GET_MODE_INNER (mode);
12307 rtx x;
12308
12309 if (GET_CODE (vals) != CONST_VECTOR || GET_MODE_SIZE (inner_mode) > 4)
12310 return NULL_RTX;
12311
12312 if (!const_vec_duplicate_p (vals, &x))
12313 /* The elements are not all the same. We could handle repeating
12314 patterns of a mode larger than INNER_MODE here (e.g. int8x8_t
12315 {0, C, 0, C, 0, C, 0, C} which can be loaded using
12316 vdup.i16). */
12317 return NULL_RTX;
12318
12319 /* We can load this constant by using VDUP and a constant in a
12320 single ARM register. This will be cheaper than a vector
12321 load. */
12322
12323 x = copy_to_mode_reg (inner_mode, x);
12324 return gen_vec_duplicate (mode, x);
12325 }
12326
12327 /* Generate code to load VALS, which is a PARALLEL containing only
12328 constants (for vec_init) or CONST_VECTOR, efficiently into a
12329 register. Returns an RTX to copy into the register, or NULL_RTX
12330 for a PARALLEL that cannot be converted into a CONST_VECTOR. */
12331
12332 rtx
12333 neon_make_constant (rtx vals)
12334 {
12335 machine_mode mode = GET_MODE (vals);
12336 rtx target;
12337 rtx const_vec = NULL_RTX;
12338 int n_elts = GET_MODE_NUNITS (mode);
12339 int n_const = 0;
12340 int i;
12341
12342 if (GET_CODE (vals) == CONST_VECTOR)
12343 const_vec = vals;
12344 else if (GET_CODE (vals) == PARALLEL)
12345 {
12346 /* A CONST_VECTOR must contain only CONST_INTs and
12347 CONST_DOUBLEs, but CONSTANT_P allows more (e.g. SYMBOL_REF).
12348 Only store valid constants in a CONST_VECTOR. */
12349 for (i = 0; i < n_elts; ++i)
12350 {
12351 rtx x = XVECEXP (vals, 0, i);
12352 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
12353 n_const++;
12354 }
12355 if (n_const == n_elts)
12356 const_vec = gen_rtx_CONST_VECTOR (mode, XVEC (vals, 0));
12357 }
12358 else
12359 gcc_unreachable ();
12360
12361 if (const_vec != NULL
12362 && neon_immediate_valid_for_move (const_vec, mode, NULL, NULL))
12363 /* Load using VMOV. On Cortex-A8 this takes one cycle. */
12364 return const_vec;
12365 else if ((target = neon_vdup_constant (vals)) != NULL_RTX)
12366 /* Loaded using VDUP. On Cortex-A8 the VDUP takes one NEON
12367 pipeline cycle; creating the constant takes one or two ARM
12368 pipeline cycles. */
12369 return target;
12370 else if (const_vec != NULL_RTX)
12371 /* Load from constant pool. On Cortex-A8 this takes two cycles
12372 (for either double or quad vectors). We cannot take advantage
12373 of single-cycle VLD1 because we need a PC-relative addressing
12374 mode. */
12375 return const_vec;
12376 else
12377 /* A PARALLEL containing something not valid inside CONST_VECTOR.
12378 We cannot construct an initializer. */
12379 return NULL_RTX;
12380 }
12381
12382 /* Initialize vector TARGET to VALS. */
12383
12384 void
12385 neon_expand_vector_init (rtx target, rtx vals)
12386 {
12387 machine_mode mode = GET_MODE (target);
12388 machine_mode inner_mode = GET_MODE_INNER (mode);
12389 int n_elts = GET_MODE_NUNITS (mode);
12390 int n_var = 0, one_var = -1;
12391 bool all_same = true;
12392 rtx x, mem;
12393 int i;
12394
12395 for (i = 0; i < n_elts; ++i)
12396 {
12397 x = XVECEXP (vals, 0, i);
12398 if (!CONSTANT_P (x))
12399 ++n_var, one_var = i;
12400
12401 if (i > 0 && !rtx_equal_p (x, XVECEXP (vals, 0, 0)))
12402 all_same = false;
12403 }
12404
12405 if (n_var == 0)
12406 {
12407 rtx constant = neon_make_constant (vals);
12408 if (constant != NULL_RTX)
12409 {
12410 emit_move_insn (target, constant);
12411 return;
12412 }
12413 }
12414
12415 /* Splat a single non-constant element if we can. */
12416 if (all_same && GET_MODE_SIZE (inner_mode) <= 4)
12417 {
12418 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, 0));
12419 emit_insn (gen_rtx_SET (target, gen_vec_duplicate (mode, x)));
12420 return;
12421 }
12422
12423 /* One field is non-constant. Load constant then overwrite varying
12424 field. This is more efficient than using the stack. */
12425 if (n_var == 1)
12426 {
12427 rtx copy = copy_rtx (vals);
12428 rtx index = GEN_INT (one_var);
12429
12430 /* Load constant part of vector, substitute neighboring value for
12431 varying element. */
12432 XVECEXP (copy, 0, one_var) = XVECEXP (vals, 0, (one_var + 1) % n_elts);
12433 neon_expand_vector_init (target, copy);
12434
12435 /* Insert variable. */
12436 x = copy_to_mode_reg (inner_mode, XVECEXP (vals, 0, one_var));
12437 switch (mode)
12438 {
12439 case E_V8QImode:
12440 emit_insn (gen_neon_vset_lanev8qi (target, x, target, index));
12441 break;
12442 case E_V16QImode:
12443 emit_insn (gen_neon_vset_lanev16qi (target, x, target, index));
12444 break;
12445 case E_V4HImode:
12446 emit_insn (gen_neon_vset_lanev4hi (target, x, target, index));
12447 break;
12448 case E_V8HImode:
12449 emit_insn (gen_neon_vset_lanev8hi (target, x, target, index));
12450 break;
12451 case E_V2SImode:
12452 emit_insn (gen_neon_vset_lanev2si (target, x, target, index));
12453 break;
12454 case E_V4SImode:
12455 emit_insn (gen_neon_vset_lanev4si (target, x, target, index));
12456 break;
12457 case E_V2SFmode:
12458 emit_insn (gen_neon_vset_lanev2sf (target, x, target, index));
12459 break;
12460 case E_V4SFmode:
12461 emit_insn (gen_neon_vset_lanev4sf (target, x, target, index));
12462 break;
12463 case E_V2DImode:
12464 emit_insn (gen_neon_vset_lanev2di (target, x, target, index));
12465 break;
12466 default:
12467 gcc_unreachable ();
12468 }
12469 return;
12470 }
12471
12472 /* Construct the vector in memory one field at a time
12473 and load the whole vector. */
12474 mem = assign_stack_temp (mode, GET_MODE_SIZE (mode));
12475 for (i = 0; i < n_elts; i++)
12476 emit_move_insn (adjust_address_nv (mem, inner_mode,
12477 i * GET_MODE_SIZE (inner_mode)),
12478 XVECEXP (vals, 0, i));
12479 emit_move_insn (target, mem);
12480 }
12481
12482 /* Ensure OPERAND lies between LOW (inclusive) and HIGH (exclusive). Raise
12483 ERR if it doesn't. EXP indicates the source location, which includes the
12484 inlining history for intrinsics. */
12485
12486 static void
12487 bounds_check (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12488 const_tree exp, const char *desc)
12489 {
12490 HOST_WIDE_INT lane;
12491
12492 gcc_assert (CONST_INT_P (operand));
12493
12494 lane = INTVAL (operand);
12495
12496 if (lane < low || lane >= high)
12497 {
12498 if (exp)
12499 error ("%K%s %wd out of range %wd - %wd",
12500 exp, desc, lane, low, high - 1);
12501 else
12502 error ("%s %wd out of range %wd - %wd", desc, lane, low, high - 1);
12503 }
12504 }
12505
12506 /* Bounds-check lanes. */
12507
12508 void
12509 neon_lane_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high,
12510 const_tree exp)
12511 {
12512 bounds_check (operand, low, high, exp, "lane");
12513 }
12514
12515 /* Bounds-check constants. */
12516
12517 void
12518 arm_const_bounds (rtx operand, HOST_WIDE_INT low, HOST_WIDE_INT high)
12519 {
12520 bounds_check (operand, low, high, NULL_TREE, "constant");
12521 }
12522
12523 HOST_WIDE_INT
12524 neon_element_bits (machine_mode mode)
12525 {
12526 return GET_MODE_UNIT_BITSIZE (mode);
12527 }
12528
12529 \f
12530 /* Predicates for `match_operand' and `match_operator'. */
12531
12532 /* Return TRUE if OP is a valid coprocessor memory address pattern.
12533 WB is true if full writeback address modes are allowed and is false
12534 if limited writeback address modes (POST_INC and PRE_DEC) are
12535 allowed. */
12536
12537 int
12538 arm_coproc_mem_operand (rtx op, bool wb)
12539 {
12540 rtx ind;
12541
12542 /* Reject eliminable registers. */
12543 if (! (reload_in_progress || reload_completed || lra_in_progress)
12544 && ( reg_mentioned_p (frame_pointer_rtx, op)
12545 || reg_mentioned_p (arg_pointer_rtx, op)
12546 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12547 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12548 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12549 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12550 return FALSE;
12551
12552 /* Constants are converted into offsets from labels. */
12553 if (!MEM_P (op))
12554 return FALSE;
12555
12556 ind = XEXP (op, 0);
12557
12558 if (reload_completed
12559 && (GET_CODE (ind) == LABEL_REF
12560 || (GET_CODE (ind) == CONST
12561 && GET_CODE (XEXP (ind, 0)) == PLUS
12562 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12563 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12564 return TRUE;
12565
12566 /* Match: (mem (reg)). */
12567 if (REG_P (ind))
12568 return arm_address_register_rtx_p (ind, 0);
12569
12570 /* Autoincremment addressing modes. POST_INC and PRE_DEC are
12571 acceptable in any case (subject to verification by
12572 arm_address_register_rtx_p). We need WB to be true to accept
12573 PRE_INC and POST_DEC. */
12574 if (GET_CODE (ind) == POST_INC
12575 || GET_CODE (ind) == PRE_DEC
12576 || (wb
12577 && (GET_CODE (ind) == PRE_INC
12578 || GET_CODE (ind) == POST_DEC)))
12579 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12580
12581 if (wb
12582 && (GET_CODE (ind) == POST_MODIFY || GET_CODE (ind) == PRE_MODIFY)
12583 && arm_address_register_rtx_p (XEXP (ind, 0), 0)
12584 && GET_CODE (XEXP (ind, 1)) == PLUS
12585 && rtx_equal_p (XEXP (XEXP (ind, 1), 0), XEXP (ind, 0)))
12586 ind = XEXP (ind, 1);
12587
12588 /* Match:
12589 (plus (reg)
12590 (const)). */
12591 if (GET_CODE (ind) == PLUS
12592 && REG_P (XEXP (ind, 0))
12593 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12594 && CONST_INT_P (XEXP (ind, 1))
12595 && INTVAL (XEXP (ind, 1)) > -1024
12596 && INTVAL (XEXP (ind, 1)) < 1024
12597 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12598 return TRUE;
12599
12600 return FALSE;
12601 }
12602
12603 /* Return TRUE if OP is a memory operand which we can load or store a vector
12604 to/from. TYPE is one of the following values:
12605 0 - Vector load/stor (vldr)
12606 1 - Core registers (ldm)
12607 2 - Element/structure loads (vld1)
12608 */
12609 int
12610 neon_vector_mem_operand (rtx op, int type, bool strict)
12611 {
12612 rtx ind;
12613
12614 /* Reject eliminable registers. */
12615 if (strict && ! (reload_in_progress || reload_completed)
12616 && (reg_mentioned_p (frame_pointer_rtx, op)
12617 || reg_mentioned_p (arg_pointer_rtx, op)
12618 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12619 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12620 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12621 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12622 return FALSE;
12623
12624 /* Constants are converted into offsets from labels. */
12625 if (!MEM_P (op))
12626 return FALSE;
12627
12628 ind = XEXP (op, 0);
12629
12630 if (reload_completed
12631 && (GET_CODE (ind) == LABEL_REF
12632 || (GET_CODE (ind) == CONST
12633 && GET_CODE (XEXP (ind, 0)) == PLUS
12634 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12635 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12636 return TRUE;
12637
12638 /* Match: (mem (reg)). */
12639 if (REG_P (ind))
12640 return arm_address_register_rtx_p (ind, 0);
12641
12642 /* Allow post-increment with Neon registers. */
12643 if ((type != 1 && GET_CODE (ind) == POST_INC)
12644 || (type == 0 && GET_CODE (ind) == PRE_DEC))
12645 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12646
12647 /* Allow post-increment by register for VLDn */
12648 if (type == 2 && GET_CODE (ind) == POST_MODIFY
12649 && GET_CODE (XEXP (ind, 1)) == PLUS
12650 && REG_P (XEXP (XEXP (ind, 1), 1)))
12651 return true;
12652
12653 /* Match:
12654 (plus (reg)
12655 (const)). */
12656 if (type == 0
12657 && GET_CODE (ind) == PLUS
12658 && REG_P (XEXP (ind, 0))
12659 && REG_MODE_OK_FOR_BASE_P (XEXP (ind, 0), VOIDmode)
12660 && CONST_INT_P (XEXP (ind, 1))
12661 && INTVAL (XEXP (ind, 1)) > -1024
12662 /* For quad modes, we restrict the constant offset to be slightly less
12663 than what the instruction format permits. We have no such constraint
12664 on double mode offsets. (This must match arm_legitimate_index_p.) */
12665 && (INTVAL (XEXP (ind, 1))
12666 < (VALID_NEON_QREG_MODE (GET_MODE (op))? 1016 : 1024))
12667 && (INTVAL (XEXP (ind, 1)) & 3) == 0)
12668 return TRUE;
12669
12670 return FALSE;
12671 }
12672
12673 /* Return TRUE if OP is a mem suitable for loading/storing a Neon struct
12674 type. */
12675 int
12676 neon_struct_mem_operand (rtx op)
12677 {
12678 rtx ind;
12679
12680 /* Reject eliminable registers. */
12681 if (! (reload_in_progress || reload_completed)
12682 && ( reg_mentioned_p (frame_pointer_rtx, op)
12683 || reg_mentioned_p (arg_pointer_rtx, op)
12684 || reg_mentioned_p (virtual_incoming_args_rtx, op)
12685 || reg_mentioned_p (virtual_outgoing_args_rtx, op)
12686 || reg_mentioned_p (virtual_stack_dynamic_rtx, op)
12687 || reg_mentioned_p (virtual_stack_vars_rtx, op)))
12688 return FALSE;
12689
12690 /* Constants are converted into offsets from labels. */
12691 if (!MEM_P (op))
12692 return FALSE;
12693
12694 ind = XEXP (op, 0);
12695
12696 if (reload_completed
12697 && (GET_CODE (ind) == LABEL_REF
12698 || (GET_CODE (ind) == CONST
12699 && GET_CODE (XEXP (ind, 0)) == PLUS
12700 && GET_CODE (XEXP (XEXP (ind, 0), 0)) == LABEL_REF
12701 && CONST_INT_P (XEXP (XEXP (ind, 0), 1)))))
12702 return TRUE;
12703
12704 /* Match: (mem (reg)). */
12705 if (REG_P (ind))
12706 return arm_address_register_rtx_p (ind, 0);
12707
12708 /* vldm/vstm allows POST_INC (ia) and PRE_DEC (db). */
12709 if (GET_CODE (ind) == POST_INC
12710 || GET_CODE (ind) == PRE_DEC)
12711 return arm_address_register_rtx_p (XEXP (ind, 0), 0);
12712
12713 return FALSE;
12714 }
12715
12716 /* Return true if X is a register that will be eliminated later on. */
12717 int
12718 arm_eliminable_register (rtx x)
12719 {
12720 return REG_P (x) && (REGNO (x) == FRAME_POINTER_REGNUM
12721 || REGNO (x) == ARG_POINTER_REGNUM
12722 || (REGNO (x) >= FIRST_VIRTUAL_REGISTER
12723 && REGNO (x) <= LAST_VIRTUAL_REGISTER));
12724 }
12725
12726 /* Return GENERAL_REGS if a scratch register required to reload x to/from
12727 coprocessor registers. Otherwise return NO_REGS. */
12728
12729 enum reg_class
12730 coproc_secondary_reload_class (machine_mode mode, rtx x, bool wb)
12731 {
12732 if (mode == HFmode)
12733 {
12734 if (!TARGET_NEON_FP16 && !TARGET_VFP_FP16INST)
12735 return GENERAL_REGS;
12736 if (s_register_operand (x, mode) || neon_vector_mem_operand (x, 2, true))
12737 return NO_REGS;
12738 return GENERAL_REGS;
12739 }
12740
12741 /* The neon move patterns handle all legitimate vector and struct
12742 addresses. */
12743 if (TARGET_NEON
12744 && (MEM_P (x) || GET_CODE (x) == CONST_VECTOR)
12745 && (GET_MODE_CLASS (mode) == MODE_VECTOR_INT
12746 || GET_MODE_CLASS (mode) == MODE_VECTOR_FLOAT
12747 || VALID_NEON_STRUCT_MODE (mode)))
12748 return NO_REGS;
12749
12750 if (arm_coproc_mem_operand (x, wb) || s_register_operand (x, mode))
12751 return NO_REGS;
12752
12753 return GENERAL_REGS;
12754 }
12755
12756 /* Values which must be returned in the most-significant end of the return
12757 register. */
12758
12759 static bool
12760 arm_return_in_msb (const_tree valtype)
12761 {
12762 return (TARGET_AAPCS_BASED
12763 && BYTES_BIG_ENDIAN
12764 && (AGGREGATE_TYPE_P (valtype)
12765 || TREE_CODE (valtype) == COMPLEX_TYPE
12766 || FIXED_POINT_TYPE_P (valtype)));
12767 }
12768
12769 /* Return TRUE if X references a SYMBOL_REF. */
12770 int
12771 symbol_mentioned_p (rtx x)
12772 {
12773 const char * fmt;
12774 int i;
12775
12776 if (GET_CODE (x) == SYMBOL_REF)
12777 return 1;
12778
12779 /* UNSPEC_TLS entries for a symbol include the SYMBOL_REF, but they
12780 are constant offsets, not symbols. */
12781 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12782 return 0;
12783
12784 fmt = GET_RTX_FORMAT (GET_CODE (x));
12785
12786 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12787 {
12788 if (fmt[i] == 'E')
12789 {
12790 int j;
12791
12792 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12793 if (symbol_mentioned_p (XVECEXP (x, i, j)))
12794 return 1;
12795 }
12796 else if (fmt[i] == 'e' && symbol_mentioned_p (XEXP (x, i)))
12797 return 1;
12798 }
12799
12800 return 0;
12801 }
12802
12803 /* Return TRUE if X references a LABEL_REF. */
12804 int
12805 label_mentioned_p (rtx x)
12806 {
12807 const char * fmt;
12808 int i;
12809
12810 if (GET_CODE (x) == LABEL_REF)
12811 return 1;
12812
12813 /* UNSPEC_TLS entries for a symbol include a LABEL_REF for the referencing
12814 instruction, but they are constant offsets, not symbols. */
12815 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
12816 return 0;
12817
12818 fmt = GET_RTX_FORMAT (GET_CODE (x));
12819 for (i = GET_RTX_LENGTH (GET_CODE (x)) - 1; i >= 0; i--)
12820 {
12821 if (fmt[i] == 'E')
12822 {
12823 int j;
12824
12825 for (j = XVECLEN (x, i) - 1; j >= 0; j--)
12826 if (label_mentioned_p (XVECEXP (x, i, j)))
12827 return 1;
12828 }
12829 else if (fmt[i] == 'e' && label_mentioned_p (XEXP (x, i)))
12830 return 1;
12831 }
12832
12833 return 0;
12834 }
12835
12836 int
12837 tls_mentioned_p (rtx x)
12838 {
12839 switch (GET_CODE (x))
12840 {
12841 case CONST:
12842 return tls_mentioned_p (XEXP (x, 0));
12843
12844 case UNSPEC:
12845 if (XINT (x, 1) == UNSPEC_TLS)
12846 return 1;
12847
12848 /* Fall through. */
12849 default:
12850 return 0;
12851 }
12852 }
12853
12854 /* Must not copy any rtx that uses a pc-relative address.
12855 Also, disallow copying of load-exclusive instructions that
12856 may appear after splitting of compare-and-swap-style operations
12857 so as to prevent those loops from being transformed away from their
12858 canonical forms (see PR 69904). */
12859
12860 static bool
12861 arm_cannot_copy_insn_p (rtx_insn *insn)
12862 {
12863 /* The tls call insn cannot be copied, as it is paired with a data
12864 word. */
12865 if (recog_memoized (insn) == CODE_FOR_tlscall)
12866 return true;
12867
12868 subrtx_iterator::array_type array;
12869 FOR_EACH_SUBRTX (iter, array, PATTERN (insn), ALL)
12870 {
12871 const_rtx x = *iter;
12872 if (GET_CODE (x) == UNSPEC
12873 && (XINT (x, 1) == UNSPEC_PIC_BASE
12874 || XINT (x, 1) == UNSPEC_PIC_UNIFIED))
12875 return true;
12876 }
12877
12878 rtx set = single_set (insn);
12879 if (set)
12880 {
12881 rtx src = SET_SRC (set);
12882 if (GET_CODE (src) == ZERO_EXTEND)
12883 src = XEXP (src, 0);
12884
12885 /* Catch the load-exclusive and load-acquire operations. */
12886 if (GET_CODE (src) == UNSPEC_VOLATILE
12887 && (XINT (src, 1) == VUNSPEC_LL
12888 || XINT (src, 1) == VUNSPEC_LAX))
12889 return true;
12890 }
12891 return false;
12892 }
12893
12894 enum rtx_code
12895 minmax_code (rtx x)
12896 {
12897 enum rtx_code code = GET_CODE (x);
12898
12899 switch (code)
12900 {
12901 case SMAX:
12902 return GE;
12903 case SMIN:
12904 return LE;
12905 case UMIN:
12906 return LEU;
12907 case UMAX:
12908 return GEU;
12909 default:
12910 gcc_unreachable ();
12911 }
12912 }
12913
12914 /* Match pair of min/max operators that can be implemented via usat/ssat. */
12915
12916 bool
12917 arm_sat_operator_match (rtx lo_bound, rtx hi_bound,
12918 int *mask, bool *signed_sat)
12919 {
12920 /* The high bound must be a power of two minus one. */
12921 int log = exact_log2 (INTVAL (hi_bound) + 1);
12922 if (log == -1)
12923 return false;
12924
12925 /* The low bound is either zero (for usat) or one less than the
12926 negation of the high bound (for ssat). */
12927 if (INTVAL (lo_bound) == 0)
12928 {
12929 if (mask)
12930 *mask = log;
12931 if (signed_sat)
12932 *signed_sat = false;
12933
12934 return true;
12935 }
12936
12937 if (INTVAL (lo_bound) == -INTVAL (hi_bound) - 1)
12938 {
12939 if (mask)
12940 *mask = log + 1;
12941 if (signed_sat)
12942 *signed_sat = true;
12943
12944 return true;
12945 }
12946
12947 return false;
12948 }
12949
12950 /* Return 1 if memory locations are adjacent. */
12951 int
12952 adjacent_mem_locations (rtx a, rtx b)
12953 {
12954 /* We don't guarantee to preserve the order of these memory refs. */
12955 if (volatile_refs_p (a) || volatile_refs_p (b))
12956 return 0;
12957
12958 if ((REG_P (XEXP (a, 0))
12959 || (GET_CODE (XEXP (a, 0)) == PLUS
12960 && CONST_INT_P (XEXP (XEXP (a, 0), 1))))
12961 && (REG_P (XEXP (b, 0))
12962 || (GET_CODE (XEXP (b, 0)) == PLUS
12963 && CONST_INT_P (XEXP (XEXP (b, 0), 1)))))
12964 {
12965 HOST_WIDE_INT val0 = 0, val1 = 0;
12966 rtx reg0, reg1;
12967 int val_diff;
12968
12969 if (GET_CODE (XEXP (a, 0)) == PLUS)
12970 {
12971 reg0 = XEXP (XEXP (a, 0), 0);
12972 val0 = INTVAL (XEXP (XEXP (a, 0), 1));
12973 }
12974 else
12975 reg0 = XEXP (a, 0);
12976
12977 if (GET_CODE (XEXP (b, 0)) == PLUS)
12978 {
12979 reg1 = XEXP (XEXP (b, 0), 0);
12980 val1 = INTVAL (XEXP (XEXP (b, 0), 1));
12981 }
12982 else
12983 reg1 = XEXP (b, 0);
12984
12985 /* Don't accept any offset that will require multiple
12986 instructions to handle, since this would cause the
12987 arith_adjacentmem pattern to output an overlong sequence. */
12988 if (!const_ok_for_op (val0, PLUS) || !const_ok_for_op (val1, PLUS))
12989 return 0;
12990
12991 /* Don't allow an eliminable register: register elimination can make
12992 the offset too large. */
12993 if (arm_eliminable_register (reg0))
12994 return 0;
12995
12996 val_diff = val1 - val0;
12997
12998 if (arm_ld_sched)
12999 {
13000 /* If the target has load delay slots, then there's no benefit
13001 to using an ldm instruction unless the offset is zero and
13002 we are optimizing for size. */
13003 return (optimize_size && (REGNO (reg0) == REGNO (reg1))
13004 && (val0 == 0 || val1 == 0 || val0 == 4 || val1 == 4)
13005 && (val_diff == 4 || val_diff == -4));
13006 }
13007
13008 return ((REGNO (reg0) == REGNO (reg1))
13009 && (val_diff == 4 || val_diff == -4));
13010 }
13011
13012 return 0;
13013 }
13014
13015 /* Return true if OP is a valid load or store multiple operation. LOAD is true
13016 for load operations, false for store operations. CONSECUTIVE is true
13017 if the register numbers in the operation must be consecutive in the register
13018 bank. RETURN_PC is true if value is to be loaded in PC.
13019 The pattern we are trying to match for load is:
13020 [(SET (R_d0) (MEM (PLUS (addr) (offset))))
13021 (SET (R_d1) (MEM (PLUS (addr) (offset + <reg_increment>))))
13022 :
13023 :
13024 (SET (R_dn) (MEM (PLUS (addr) (offset + n * <reg_increment>))))
13025 ]
13026 where
13027 1. If offset is 0, first insn should be (SET (R_d0) (MEM (src_addr))).
13028 2. REGNO (R_d0) < REGNO (R_d1) < ... < REGNO (R_dn).
13029 3. If consecutive is TRUE, then for kth register being loaded,
13030 REGNO (R_dk) = REGNO (R_d0) + k.
13031 The pattern for store is similar. */
13032 bool
13033 ldm_stm_operation_p (rtx op, bool load, machine_mode mode,
13034 bool consecutive, bool return_pc)
13035 {
13036 HOST_WIDE_INT count = XVECLEN (op, 0);
13037 rtx reg, mem, addr;
13038 unsigned regno;
13039 unsigned first_regno;
13040 HOST_WIDE_INT i = 1, base = 0, offset = 0;
13041 rtx elt;
13042 bool addr_reg_in_reglist = false;
13043 bool update = false;
13044 int reg_increment;
13045 int offset_adj;
13046 int regs_per_val;
13047
13048 /* If not in SImode, then registers must be consecutive
13049 (e.g., VLDM instructions for DFmode). */
13050 gcc_assert ((mode == SImode) || consecutive);
13051 /* Setting return_pc for stores is illegal. */
13052 gcc_assert (!return_pc || load);
13053
13054 /* Set up the increments and the regs per val based on the mode. */
13055 reg_increment = GET_MODE_SIZE (mode);
13056 regs_per_val = reg_increment / 4;
13057 offset_adj = return_pc ? 1 : 0;
13058
13059 if (count <= 1
13060 || GET_CODE (XVECEXP (op, 0, offset_adj)) != SET
13061 || (load && !REG_P (SET_DEST (XVECEXP (op, 0, offset_adj)))))
13062 return false;
13063
13064 /* Check if this is a write-back. */
13065 elt = XVECEXP (op, 0, offset_adj);
13066 if (GET_CODE (SET_SRC (elt)) == PLUS)
13067 {
13068 i++;
13069 base = 1;
13070 update = true;
13071
13072 /* The offset adjustment must be the number of registers being
13073 popped times the size of a single register. */
13074 if (!REG_P (SET_DEST (elt))
13075 || !REG_P (XEXP (SET_SRC (elt), 0))
13076 || (REGNO (SET_DEST (elt)) != REGNO (XEXP (SET_SRC (elt), 0)))
13077 || !CONST_INT_P (XEXP (SET_SRC (elt), 1))
13078 || INTVAL (XEXP (SET_SRC (elt), 1)) !=
13079 ((count - 1 - offset_adj) * reg_increment))
13080 return false;
13081 }
13082
13083 i = i + offset_adj;
13084 base = base + offset_adj;
13085 /* Perform a quick check so we don't blow up below. If only one reg is loaded,
13086 success depends on the type: VLDM can do just one reg,
13087 LDM must do at least two. */
13088 if ((count <= i) && (mode == SImode))
13089 return false;
13090
13091 elt = XVECEXP (op, 0, i - 1);
13092 if (GET_CODE (elt) != SET)
13093 return false;
13094
13095 if (load)
13096 {
13097 reg = SET_DEST (elt);
13098 mem = SET_SRC (elt);
13099 }
13100 else
13101 {
13102 reg = SET_SRC (elt);
13103 mem = SET_DEST (elt);
13104 }
13105
13106 if (!REG_P (reg) || !MEM_P (mem))
13107 return false;
13108
13109 regno = REGNO (reg);
13110 first_regno = regno;
13111 addr = XEXP (mem, 0);
13112 if (GET_CODE (addr) == PLUS)
13113 {
13114 if (!CONST_INT_P (XEXP (addr, 1)))
13115 return false;
13116
13117 offset = INTVAL (XEXP (addr, 1));
13118 addr = XEXP (addr, 0);
13119 }
13120
13121 if (!REG_P (addr))
13122 return false;
13123
13124 /* Don't allow SP to be loaded unless it is also the base register. It
13125 guarantees that SP is reset correctly when an LDM instruction
13126 is interrupted. Otherwise, we might end up with a corrupt stack. */
13127 if (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13128 return false;
13129
13130 for (; i < count; i++)
13131 {
13132 elt = XVECEXP (op, 0, i);
13133 if (GET_CODE (elt) != SET)
13134 return false;
13135
13136 if (load)
13137 {
13138 reg = SET_DEST (elt);
13139 mem = SET_SRC (elt);
13140 }
13141 else
13142 {
13143 reg = SET_SRC (elt);
13144 mem = SET_DEST (elt);
13145 }
13146
13147 if (!REG_P (reg)
13148 || GET_MODE (reg) != mode
13149 || REGNO (reg) <= regno
13150 || (consecutive
13151 && (REGNO (reg) !=
13152 (unsigned int) (first_regno + regs_per_val * (i - base))))
13153 /* Don't allow SP to be loaded unless it is also the base register. It
13154 guarantees that SP is reset correctly when an LDM instruction
13155 is interrupted. Otherwise, we might end up with a corrupt stack. */
13156 || (load && (REGNO (reg) == SP_REGNUM) && (REGNO (addr) != SP_REGNUM))
13157 || !MEM_P (mem)
13158 || GET_MODE (mem) != mode
13159 || ((GET_CODE (XEXP (mem, 0)) != PLUS
13160 || !rtx_equal_p (XEXP (XEXP (mem, 0), 0), addr)
13161 || !CONST_INT_P (XEXP (XEXP (mem, 0), 1))
13162 || (INTVAL (XEXP (XEXP (mem, 0), 1)) !=
13163 offset + (i - base) * reg_increment))
13164 && (!REG_P (XEXP (mem, 0))
13165 || offset + (i - base) * reg_increment != 0)))
13166 return false;
13167
13168 regno = REGNO (reg);
13169 if (regno == REGNO (addr))
13170 addr_reg_in_reglist = true;
13171 }
13172
13173 if (load)
13174 {
13175 if (update && addr_reg_in_reglist)
13176 return false;
13177
13178 /* For Thumb-1, address register is always modified - either by write-back
13179 or by explicit load. If the pattern does not describe an update,
13180 then the address register must be in the list of loaded registers. */
13181 if (TARGET_THUMB1)
13182 return update || addr_reg_in_reglist;
13183 }
13184
13185 return true;
13186 }
13187
13188 /* Return true iff it would be profitable to turn a sequence of NOPS loads
13189 or stores (depending on IS_STORE) into a load-multiple or store-multiple
13190 instruction. ADD_OFFSET is nonzero if the base address register needs
13191 to be modified with an add instruction before we can use it. */
13192
13193 static bool
13194 multiple_operation_profitable_p (bool is_store ATTRIBUTE_UNUSED,
13195 int nops, HOST_WIDE_INT add_offset)
13196 {
13197 /* For ARM8,9 & StrongARM, 2 ldr instructions are faster than an ldm
13198 if the offset isn't small enough. The reason 2 ldrs are faster
13199 is because these ARMs are able to do more than one cache access
13200 in a single cycle. The ARM9 and StrongARM have Harvard caches,
13201 whilst the ARM8 has a double bandwidth cache. This means that
13202 these cores can do both an instruction fetch and a data fetch in
13203 a single cycle, so the trick of calculating the address into a
13204 scratch register (one of the result regs) and then doing a load
13205 multiple actually becomes slower (and no smaller in code size).
13206 That is the transformation
13207
13208 ldr rd1, [rbase + offset]
13209 ldr rd2, [rbase + offset + 4]
13210
13211 to
13212
13213 add rd1, rbase, offset
13214 ldmia rd1, {rd1, rd2}
13215
13216 produces worse code -- '3 cycles + any stalls on rd2' instead of
13217 '2 cycles + any stalls on rd2'. On ARMs with only one cache
13218 access per cycle, the first sequence could never complete in less
13219 than 6 cycles, whereas the ldm sequence would only take 5 and
13220 would make better use of sequential accesses if not hitting the
13221 cache.
13222
13223 We cheat here and test 'arm_ld_sched' which we currently know to
13224 only be true for the ARM8, ARM9 and StrongARM. If this ever
13225 changes, then the test below needs to be reworked. */
13226 if (nops == 2 && arm_ld_sched && add_offset != 0)
13227 return false;
13228
13229 /* XScale has load-store double instructions, but they have stricter
13230 alignment requirements than load-store multiple, so we cannot
13231 use them.
13232
13233 For XScale ldm requires 2 + NREGS cycles to complete and blocks
13234 the pipeline until completion.
13235
13236 NREGS CYCLES
13237 1 3
13238 2 4
13239 3 5
13240 4 6
13241
13242 An ldr instruction takes 1-3 cycles, but does not block the
13243 pipeline.
13244
13245 NREGS CYCLES
13246 1 1-3
13247 2 2-6
13248 3 3-9
13249 4 4-12
13250
13251 Best case ldr will always win. However, the more ldr instructions
13252 we issue, the less likely we are to be able to schedule them well.
13253 Using ldr instructions also increases code size.
13254
13255 As a compromise, we use ldr for counts of 1 or 2 regs, and ldm
13256 for counts of 3 or 4 regs. */
13257 if (nops <= 2 && arm_tune_xscale && !optimize_size)
13258 return false;
13259 return true;
13260 }
13261
13262 /* Subroutine of load_multiple_sequence and store_multiple_sequence.
13263 Given an array of UNSORTED_OFFSETS, of which there are NOPS, compute
13264 an array ORDER which describes the sequence to use when accessing the
13265 offsets that produces an ascending order. In this sequence, each
13266 offset must be larger by exactly 4 than the previous one. ORDER[0]
13267 must have been filled in with the lowest offset by the caller.
13268 If UNSORTED_REGS is nonnull, it is an array of register numbers that
13269 we use to verify that ORDER produces an ascending order of registers.
13270 Return true if it was possible to construct such an order, false if
13271 not. */
13272
13273 static bool
13274 compute_offset_order (int nops, HOST_WIDE_INT *unsorted_offsets, int *order,
13275 int *unsorted_regs)
13276 {
13277 int i;
13278 for (i = 1; i < nops; i++)
13279 {
13280 int j;
13281
13282 order[i] = order[i - 1];
13283 for (j = 0; j < nops; j++)
13284 if (unsorted_offsets[j] == unsorted_offsets[order[i - 1]] + 4)
13285 {
13286 /* We must find exactly one offset that is higher than the
13287 previous one by 4. */
13288 if (order[i] != order[i - 1])
13289 return false;
13290 order[i] = j;
13291 }
13292 if (order[i] == order[i - 1])
13293 return false;
13294 /* The register numbers must be ascending. */
13295 if (unsorted_regs != NULL
13296 && unsorted_regs[order[i]] <= unsorted_regs[order[i - 1]])
13297 return false;
13298 }
13299 return true;
13300 }
13301
13302 /* Used to determine in a peephole whether a sequence of load
13303 instructions can be changed into a load-multiple instruction.
13304 NOPS is the number of separate load instructions we are examining. The
13305 first NOPS entries in OPERANDS are the destination registers, the
13306 next NOPS entries are memory operands. If this function is
13307 successful, *BASE is set to the common base register of the memory
13308 accesses; *LOAD_OFFSET is set to the first memory location's offset
13309 from that base register.
13310 REGS is an array filled in with the destination register numbers.
13311 SAVED_ORDER (if nonnull), is an array filled in with an order that maps
13312 insn numbers to an ascending order of stores. If CHECK_REGS is true,
13313 the sequence of registers in REGS matches the loads from ascending memory
13314 locations, and the function verifies that the register numbers are
13315 themselves ascending. If CHECK_REGS is false, the register numbers
13316 are stored in the order they are found in the operands. */
13317 static int
13318 load_multiple_sequence (rtx *operands, int nops, int *regs, int *saved_order,
13319 int *base, HOST_WIDE_INT *load_offset, bool check_regs)
13320 {
13321 int unsorted_regs[MAX_LDM_STM_OPS];
13322 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13323 int order[MAX_LDM_STM_OPS];
13324 rtx base_reg_rtx = NULL;
13325 int base_reg = -1;
13326 int i, ldm_case;
13327
13328 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13329 easily extended if required. */
13330 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13331
13332 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13333
13334 /* Loop over the operands and check that the memory references are
13335 suitable (i.e. immediate offsets from the same base register). At
13336 the same time, extract the target register, and the memory
13337 offsets. */
13338 for (i = 0; i < nops; i++)
13339 {
13340 rtx reg;
13341 rtx offset;
13342
13343 /* Convert a subreg of a mem into the mem itself. */
13344 if (GET_CODE (operands[nops + i]) == SUBREG)
13345 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13346
13347 gcc_assert (MEM_P (operands[nops + i]));
13348
13349 /* Don't reorder volatile memory references; it doesn't seem worth
13350 looking for the case where the order is ok anyway. */
13351 if (MEM_VOLATILE_P (operands[nops + i]))
13352 return 0;
13353
13354 offset = const0_rtx;
13355
13356 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13357 || (GET_CODE (reg) == SUBREG
13358 && REG_P (reg = SUBREG_REG (reg))))
13359 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13360 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13361 || (GET_CODE (reg) == SUBREG
13362 && REG_P (reg = SUBREG_REG (reg))))
13363 && (CONST_INT_P (offset
13364 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13365 {
13366 if (i == 0)
13367 {
13368 base_reg = REGNO (reg);
13369 base_reg_rtx = reg;
13370 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13371 return 0;
13372 }
13373 else if (base_reg != (int) REGNO (reg))
13374 /* Not addressed from the same base register. */
13375 return 0;
13376
13377 unsorted_regs[i] = (REG_P (operands[i])
13378 ? REGNO (operands[i])
13379 : REGNO (SUBREG_REG (operands[i])));
13380
13381 /* If it isn't an integer register, or if it overwrites the
13382 base register but isn't the last insn in the list, then
13383 we can't do this. */
13384 if (unsorted_regs[i] < 0
13385 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13386 || unsorted_regs[i] > 14
13387 || (i != nops - 1 && unsorted_regs[i] == base_reg))
13388 return 0;
13389
13390 /* Don't allow SP to be loaded unless it is also the base
13391 register. It guarantees that SP is reset correctly when
13392 an LDM instruction is interrupted. Otherwise, we might
13393 end up with a corrupt stack. */
13394 if (unsorted_regs[i] == SP_REGNUM && base_reg != SP_REGNUM)
13395 return 0;
13396
13397 unsorted_offsets[i] = INTVAL (offset);
13398 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13399 order[0] = i;
13400 }
13401 else
13402 /* Not a suitable memory address. */
13403 return 0;
13404 }
13405
13406 /* All the useful information has now been extracted from the
13407 operands into unsorted_regs and unsorted_offsets; additionally,
13408 order[0] has been set to the lowest offset in the list. Sort
13409 the offsets into order, verifying that they are adjacent, and
13410 check that the register numbers are ascending. */
13411 if (!compute_offset_order (nops, unsorted_offsets, order,
13412 check_regs ? unsorted_regs : NULL))
13413 return 0;
13414
13415 if (saved_order)
13416 memcpy (saved_order, order, sizeof order);
13417
13418 if (base)
13419 {
13420 *base = base_reg;
13421
13422 for (i = 0; i < nops; i++)
13423 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13424
13425 *load_offset = unsorted_offsets[order[0]];
13426 }
13427
13428 if (TARGET_THUMB1
13429 && !peep2_reg_dead_p (nops, base_reg_rtx))
13430 return 0;
13431
13432 if (unsorted_offsets[order[0]] == 0)
13433 ldm_case = 1; /* ldmia */
13434 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13435 ldm_case = 2; /* ldmib */
13436 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13437 ldm_case = 3; /* ldmda */
13438 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13439 ldm_case = 4; /* ldmdb */
13440 else if (const_ok_for_arm (unsorted_offsets[order[0]])
13441 || const_ok_for_arm (-unsorted_offsets[order[0]]))
13442 ldm_case = 5;
13443 else
13444 return 0;
13445
13446 if (!multiple_operation_profitable_p (false, nops,
13447 ldm_case == 5
13448 ? unsorted_offsets[order[0]] : 0))
13449 return 0;
13450
13451 return ldm_case;
13452 }
13453
13454 /* Used to determine in a peephole whether a sequence of store instructions can
13455 be changed into a store-multiple instruction.
13456 NOPS is the number of separate store instructions we are examining.
13457 NOPS_TOTAL is the total number of instructions recognized by the peephole
13458 pattern.
13459 The first NOPS entries in OPERANDS are the source registers, the next
13460 NOPS entries are memory operands. If this function is successful, *BASE is
13461 set to the common base register of the memory accesses; *LOAD_OFFSET is set
13462 to the first memory location's offset from that base register. REGS is an
13463 array filled in with the source register numbers, REG_RTXS (if nonnull) is
13464 likewise filled with the corresponding rtx's.
13465 SAVED_ORDER (if nonnull), is an array filled in with an order that maps insn
13466 numbers to an ascending order of stores.
13467 If CHECK_REGS is true, the sequence of registers in *REGS matches the stores
13468 from ascending memory locations, and the function verifies that the register
13469 numbers are themselves ascending. If CHECK_REGS is false, the register
13470 numbers are stored in the order they are found in the operands. */
13471 static int
13472 store_multiple_sequence (rtx *operands, int nops, int nops_total,
13473 int *regs, rtx *reg_rtxs, int *saved_order, int *base,
13474 HOST_WIDE_INT *load_offset, bool check_regs)
13475 {
13476 int unsorted_regs[MAX_LDM_STM_OPS];
13477 rtx unsorted_reg_rtxs[MAX_LDM_STM_OPS];
13478 HOST_WIDE_INT unsorted_offsets[MAX_LDM_STM_OPS];
13479 int order[MAX_LDM_STM_OPS];
13480 int base_reg = -1;
13481 rtx base_reg_rtx = NULL;
13482 int i, stm_case;
13483
13484 /* Write back of base register is currently only supported for Thumb 1. */
13485 int base_writeback = TARGET_THUMB1;
13486
13487 /* Can only handle up to MAX_LDM_STM_OPS insns at present, though could be
13488 easily extended if required. */
13489 gcc_assert (nops >= 2 && nops <= MAX_LDM_STM_OPS);
13490
13491 memset (order, 0, MAX_LDM_STM_OPS * sizeof (int));
13492
13493 /* Loop over the operands and check that the memory references are
13494 suitable (i.e. immediate offsets from the same base register). At
13495 the same time, extract the target register, and the memory
13496 offsets. */
13497 for (i = 0; i < nops; i++)
13498 {
13499 rtx reg;
13500 rtx offset;
13501
13502 /* Convert a subreg of a mem into the mem itself. */
13503 if (GET_CODE (operands[nops + i]) == SUBREG)
13504 operands[nops + i] = alter_subreg (operands + (nops + i), true);
13505
13506 gcc_assert (MEM_P (operands[nops + i]));
13507
13508 /* Don't reorder volatile memory references; it doesn't seem worth
13509 looking for the case where the order is ok anyway. */
13510 if (MEM_VOLATILE_P (operands[nops + i]))
13511 return 0;
13512
13513 offset = const0_rtx;
13514
13515 if ((REG_P (reg = XEXP (operands[nops + i], 0))
13516 || (GET_CODE (reg) == SUBREG
13517 && REG_P (reg = SUBREG_REG (reg))))
13518 || (GET_CODE (XEXP (operands[nops + i], 0)) == PLUS
13519 && ((REG_P (reg = XEXP (XEXP (operands[nops + i], 0), 0)))
13520 || (GET_CODE (reg) == SUBREG
13521 && REG_P (reg = SUBREG_REG (reg))))
13522 && (CONST_INT_P (offset
13523 = XEXP (XEXP (operands[nops + i], 0), 1)))))
13524 {
13525 unsorted_reg_rtxs[i] = (REG_P (operands[i])
13526 ? operands[i] : SUBREG_REG (operands[i]));
13527 unsorted_regs[i] = REGNO (unsorted_reg_rtxs[i]);
13528
13529 if (i == 0)
13530 {
13531 base_reg = REGNO (reg);
13532 base_reg_rtx = reg;
13533 if (TARGET_THUMB1 && base_reg > LAST_LO_REGNUM)
13534 return 0;
13535 }
13536 else if (base_reg != (int) REGNO (reg))
13537 /* Not addressed from the same base register. */
13538 return 0;
13539
13540 /* If it isn't an integer register, then we can't do this. */
13541 if (unsorted_regs[i] < 0
13542 || (TARGET_THUMB1 && unsorted_regs[i] > LAST_LO_REGNUM)
13543 /* The effects are unpredictable if the base register is
13544 both updated and stored. */
13545 || (base_writeback && unsorted_regs[i] == base_reg)
13546 || (TARGET_THUMB2 && unsorted_regs[i] == SP_REGNUM)
13547 || unsorted_regs[i] > 14)
13548 return 0;
13549
13550 unsorted_offsets[i] = INTVAL (offset);
13551 if (i == 0 || unsorted_offsets[i] < unsorted_offsets[order[0]])
13552 order[0] = i;
13553 }
13554 else
13555 /* Not a suitable memory address. */
13556 return 0;
13557 }
13558
13559 /* All the useful information has now been extracted from the
13560 operands into unsorted_regs and unsorted_offsets; additionally,
13561 order[0] has been set to the lowest offset in the list. Sort
13562 the offsets into order, verifying that they are adjacent, and
13563 check that the register numbers are ascending. */
13564 if (!compute_offset_order (nops, unsorted_offsets, order,
13565 check_regs ? unsorted_regs : NULL))
13566 return 0;
13567
13568 if (saved_order)
13569 memcpy (saved_order, order, sizeof order);
13570
13571 if (base)
13572 {
13573 *base = base_reg;
13574
13575 for (i = 0; i < nops; i++)
13576 {
13577 regs[i] = unsorted_regs[check_regs ? order[i] : i];
13578 if (reg_rtxs)
13579 reg_rtxs[i] = unsorted_reg_rtxs[check_regs ? order[i] : i];
13580 }
13581
13582 *load_offset = unsorted_offsets[order[0]];
13583 }
13584
13585 if (TARGET_THUMB1
13586 && !peep2_reg_dead_p (nops_total, base_reg_rtx))
13587 return 0;
13588
13589 if (unsorted_offsets[order[0]] == 0)
13590 stm_case = 1; /* stmia */
13591 else if (TARGET_ARM && unsorted_offsets[order[0]] == 4)
13592 stm_case = 2; /* stmib */
13593 else if (TARGET_ARM && unsorted_offsets[order[nops - 1]] == 0)
13594 stm_case = 3; /* stmda */
13595 else if (TARGET_32BIT && unsorted_offsets[order[nops - 1]] == -4)
13596 stm_case = 4; /* stmdb */
13597 else
13598 return 0;
13599
13600 if (!multiple_operation_profitable_p (false, nops, 0))
13601 return 0;
13602
13603 return stm_case;
13604 }
13605 \f
13606 /* Routines for use in generating RTL. */
13607
13608 /* Generate a load-multiple instruction. COUNT is the number of loads in
13609 the instruction; REGS and MEMS are arrays containing the operands.
13610 BASEREG is the base register to be used in addressing the memory operands.
13611 WBACK_OFFSET is nonzero if the instruction should update the base
13612 register. */
13613
13614 static rtx
13615 arm_gen_load_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13616 HOST_WIDE_INT wback_offset)
13617 {
13618 int i = 0, j;
13619 rtx result;
13620
13621 if (!multiple_operation_profitable_p (false, count, 0))
13622 {
13623 rtx seq;
13624
13625 start_sequence ();
13626
13627 for (i = 0; i < count; i++)
13628 emit_move_insn (gen_rtx_REG (SImode, regs[i]), mems[i]);
13629
13630 if (wback_offset != 0)
13631 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13632
13633 seq = get_insns ();
13634 end_sequence ();
13635
13636 return seq;
13637 }
13638
13639 result = gen_rtx_PARALLEL (VOIDmode,
13640 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13641 if (wback_offset != 0)
13642 {
13643 XVECEXP (result, 0, 0)
13644 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13645 i = 1;
13646 count++;
13647 }
13648
13649 for (j = 0; i < count; i++, j++)
13650 XVECEXP (result, 0, i)
13651 = gen_rtx_SET (gen_rtx_REG (SImode, regs[j]), mems[j]);
13652
13653 return result;
13654 }
13655
13656 /* Generate a store-multiple instruction. COUNT is the number of stores in
13657 the instruction; REGS and MEMS are arrays containing the operands.
13658 BASEREG is the base register to be used in addressing the memory operands.
13659 WBACK_OFFSET is nonzero if the instruction should update the base
13660 register. */
13661
13662 static rtx
13663 arm_gen_store_multiple_1 (int count, int *regs, rtx *mems, rtx basereg,
13664 HOST_WIDE_INT wback_offset)
13665 {
13666 int i = 0, j;
13667 rtx result;
13668
13669 if (GET_CODE (basereg) == PLUS)
13670 basereg = XEXP (basereg, 0);
13671
13672 if (!multiple_operation_profitable_p (false, count, 0))
13673 {
13674 rtx seq;
13675
13676 start_sequence ();
13677
13678 for (i = 0; i < count; i++)
13679 emit_move_insn (mems[i], gen_rtx_REG (SImode, regs[i]));
13680
13681 if (wback_offset != 0)
13682 emit_move_insn (basereg, plus_constant (Pmode, basereg, wback_offset));
13683
13684 seq = get_insns ();
13685 end_sequence ();
13686
13687 return seq;
13688 }
13689
13690 result = gen_rtx_PARALLEL (VOIDmode,
13691 rtvec_alloc (count + (wback_offset != 0 ? 1 : 0)));
13692 if (wback_offset != 0)
13693 {
13694 XVECEXP (result, 0, 0)
13695 = gen_rtx_SET (basereg, plus_constant (Pmode, basereg, wback_offset));
13696 i = 1;
13697 count++;
13698 }
13699
13700 for (j = 0; i < count; i++, j++)
13701 XVECEXP (result, 0, i)
13702 = gen_rtx_SET (mems[j], gen_rtx_REG (SImode, regs[j]));
13703
13704 return result;
13705 }
13706
13707 /* Generate either a load-multiple or a store-multiple instruction. This
13708 function can be used in situations where we can start with a single MEM
13709 rtx and adjust its address upwards.
13710 COUNT is the number of operations in the instruction, not counting a
13711 possible update of the base register. REGS is an array containing the
13712 register operands.
13713 BASEREG is the base register to be used in addressing the memory operands,
13714 which are constructed from BASEMEM.
13715 WRITE_BACK specifies whether the generated instruction should include an
13716 update of the base register.
13717 OFFSETP is used to pass an offset to and from this function; this offset
13718 is not used when constructing the address (instead BASEMEM should have an
13719 appropriate offset in its address), it is used only for setting
13720 MEM_OFFSET. It is updated only if WRITE_BACK is true.*/
13721
13722 static rtx
13723 arm_gen_multiple_op (bool is_load, int *regs, int count, rtx basereg,
13724 bool write_back, rtx basemem, HOST_WIDE_INT *offsetp)
13725 {
13726 rtx mems[MAX_LDM_STM_OPS];
13727 HOST_WIDE_INT offset = *offsetp;
13728 int i;
13729
13730 gcc_assert (count <= MAX_LDM_STM_OPS);
13731
13732 if (GET_CODE (basereg) == PLUS)
13733 basereg = XEXP (basereg, 0);
13734
13735 for (i = 0; i < count; i++)
13736 {
13737 rtx addr = plus_constant (Pmode, basereg, i * 4);
13738 mems[i] = adjust_automodify_address_nv (basemem, SImode, addr, offset);
13739 offset += 4;
13740 }
13741
13742 if (write_back)
13743 *offsetp = offset;
13744
13745 if (is_load)
13746 return arm_gen_load_multiple_1 (count, regs, mems, basereg,
13747 write_back ? 4 * count : 0);
13748 else
13749 return arm_gen_store_multiple_1 (count, regs, mems, basereg,
13750 write_back ? 4 * count : 0);
13751 }
13752
13753 rtx
13754 arm_gen_load_multiple (int *regs, int count, rtx basereg, int write_back,
13755 rtx basemem, HOST_WIDE_INT *offsetp)
13756 {
13757 return arm_gen_multiple_op (TRUE, regs, count, basereg, write_back, basemem,
13758 offsetp);
13759 }
13760
13761 rtx
13762 arm_gen_store_multiple (int *regs, int count, rtx basereg, int write_back,
13763 rtx basemem, HOST_WIDE_INT *offsetp)
13764 {
13765 return arm_gen_multiple_op (FALSE, regs, count, basereg, write_back, basemem,
13766 offsetp);
13767 }
13768
13769 /* Called from a peephole2 expander to turn a sequence of loads into an
13770 LDM instruction. OPERANDS are the operands found by the peephole matcher;
13771 NOPS indicates how many separate loads we are trying to combine. SORT_REGS
13772 is true if we can reorder the registers because they are used commutatively
13773 subsequently.
13774 Returns true iff we could generate a new instruction. */
13775
13776 bool
13777 gen_ldm_seq (rtx *operands, int nops, bool sort_regs)
13778 {
13779 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13780 rtx mems[MAX_LDM_STM_OPS];
13781 int i, j, base_reg;
13782 rtx base_reg_rtx;
13783 HOST_WIDE_INT offset;
13784 int write_back = FALSE;
13785 int ldm_case;
13786 rtx addr;
13787
13788 ldm_case = load_multiple_sequence (operands, nops, regs, mem_order,
13789 &base_reg, &offset, !sort_regs);
13790
13791 if (ldm_case == 0)
13792 return false;
13793
13794 if (sort_regs)
13795 for (i = 0; i < nops - 1; i++)
13796 for (j = i + 1; j < nops; j++)
13797 if (regs[i] > regs[j])
13798 {
13799 int t = regs[i];
13800 regs[i] = regs[j];
13801 regs[j] = t;
13802 }
13803 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13804
13805 if (TARGET_THUMB1)
13806 {
13807 gcc_assert (peep2_reg_dead_p (nops, base_reg_rtx));
13808 gcc_assert (ldm_case == 1 || ldm_case == 5);
13809 write_back = TRUE;
13810 }
13811
13812 if (ldm_case == 5)
13813 {
13814 rtx newbase = TARGET_THUMB1 ? base_reg_rtx : gen_rtx_REG (SImode, regs[0]);
13815 emit_insn (gen_addsi3 (newbase, base_reg_rtx, GEN_INT (offset)));
13816 offset = 0;
13817 if (!TARGET_THUMB1)
13818 base_reg_rtx = newbase;
13819 }
13820
13821 for (i = 0; i < nops; i++)
13822 {
13823 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13824 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13825 SImode, addr, 0);
13826 }
13827 emit_insn (arm_gen_load_multiple_1 (nops, regs, mems, base_reg_rtx,
13828 write_back ? offset + i * 4 : 0));
13829 return true;
13830 }
13831
13832 /* Called from a peephole2 expander to turn a sequence of stores into an
13833 STM instruction. OPERANDS are the operands found by the peephole matcher;
13834 NOPS indicates how many separate stores we are trying to combine.
13835 Returns true iff we could generate a new instruction. */
13836
13837 bool
13838 gen_stm_seq (rtx *operands, int nops)
13839 {
13840 int i;
13841 int regs[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13842 rtx mems[MAX_LDM_STM_OPS];
13843 int base_reg;
13844 rtx base_reg_rtx;
13845 HOST_WIDE_INT offset;
13846 int write_back = FALSE;
13847 int stm_case;
13848 rtx addr;
13849 bool base_reg_dies;
13850
13851 stm_case = store_multiple_sequence (operands, nops, nops, regs, NULL,
13852 mem_order, &base_reg, &offset, true);
13853
13854 if (stm_case == 0)
13855 return false;
13856
13857 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13858
13859 base_reg_dies = peep2_reg_dead_p (nops, base_reg_rtx);
13860 if (TARGET_THUMB1)
13861 {
13862 gcc_assert (base_reg_dies);
13863 write_back = TRUE;
13864 }
13865
13866 if (stm_case == 5)
13867 {
13868 gcc_assert (base_reg_dies);
13869 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13870 offset = 0;
13871 }
13872
13873 addr = plus_constant (Pmode, base_reg_rtx, offset);
13874
13875 for (i = 0; i < nops; i++)
13876 {
13877 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13878 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13879 SImode, addr, 0);
13880 }
13881 emit_insn (arm_gen_store_multiple_1 (nops, regs, mems, base_reg_rtx,
13882 write_back ? offset + i * 4 : 0));
13883 return true;
13884 }
13885
13886 /* Called from a peephole2 expander to turn a sequence of stores that are
13887 preceded by constant loads into an STM instruction. OPERANDS are the
13888 operands found by the peephole matcher; NOPS indicates how many
13889 separate stores we are trying to combine; there are 2 * NOPS
13890 instructions in the peephole.
13891 Returns true iff we could generate a new instruction. */
13892
13893 bool
13894 gen_const_stm_seq (rtx *operands, int nops)
13895 {
13896 int regs[MAX_LDM_STM_OPS], sorted_regs[MAX_LDM_STM_OPS];
13897 int reg_order[MAX_LDM_STM_OPS], mem_order[MAX_LDM_STM_OPS];
13898 rtx reg_rtxs[MAX_LDM_STM_OPS], orig_reg_rtxs[MAX_LDM_STM_OPS];
13899 rtx mems[MAX_LDM_STM_OPS];
13900 int base_reg;
13901 rtx base_reg_rtx;
13902 HOST_WIDE_INT offset;
13903 int write_back = FALSE;
13904 int stm_case;
13905 rtx addr;
13906 bool base_reg_dies;
13907 int i, j;
13908 HARD_REG_SET allocated;
13909
13910 stm_case = store_multiple_sequence (operands, nops, 2 * nops, regs, reg_rtxs,
13911 mem_order, &base_reg, &offset, false);
13912
13913 if (stm_case == 0)
13914 return false;
13915
13916 memcpy (orig_reg_rtxs, reg_rtxs, sizeof orig_reg_rtxs);
13917
13918 /* If the same register is used more than once, try to find a free
13919 register. */
13920 CLEAR_HARD_REG_SET (allocated);
13921 for (i = 0; i < nops; i++)
13922 {
13923 for (j = i + 1; j < nops; j++)
13924 if (regs[i] == regs[j])
13925 {
13926 rtx t = peep2_find_free_register (0, nops * 2,
13927 TARGET_THUMB1 ? "l" : "r",
13928 SImode, &allocated);
13929 if (t == NULL_RTX)
13930 return false;
13931 reg_rtxs[i] = t;
13932 regs[i] = REGNO (t);
13933 }
13934 }
13935
13936 /* Compute an ordering that maps the register numbers to an ascending
13937 sequence. */
13938 reg_order[0] = 0;
13939 for (i = 0; i < nops; i++)
13940 if (regs[i] < regs[reg_order[0]])
13941 reg_order[0] = i;
13942
13943 for (i = 1; i < nops; i++)
13944 {
13945 int this_order = reg_order[i - 1];
13946 for (j = 0; j < nops; j++)
13947 if (regs[j] > regs[reg_order[i - 1]]
13948 && (this_order == reg_order[i - 1]
13949 || regs[j] < regs[this_order]))
13950 this_order = j;
13951 reg_order[i] = this_order;
13952 }
13953
13954 /* Ensure that registers that must be live after the instruction end
13955 up with the correct value. */
13956 for (i = 0; i < nops; i++)
13957 {
13958 int this_order = reg_order[i];
13959 if ((this_order != mem_order[i]
13960 || orig_reg_rtxs[this_order] != reg_rtxs[this_order])
13961 && !peep2_reg_dead_p (nops * 2, orig_reg_rtxs[this_order]))
13962 return false;
13963 }
13964
13965 /* Load the constants. */
13966 for (i = 0; i < nops; i++)
13967 {
13968 rtx op = operands[2 * nops + mem_order[i]];
13969 sorted_regs[i] = regs[reg_order[i]];
13970 emit_move_insn (reg_rtxs[reg_order[i]], op);
13971 }
13972
13973 base_reg_rtx = gen_rtx_REG (Pmode, base_reg);
13974
13975 base_reg_dies = peep2_reg_dead_p (nops * 2, base_reg_rtx);
13976 if (TARGET_THUMB1)
13977 {
13978 gcc_assert (base_reg_dies);
13979 write_back = TRUE;
13980 }
13981
13982 if (stm_case == 5)
13983 {
13984 gcc_assert (base_reg_dies);
13985 emit_insn (gen_addsi3 (base_reg_rtx, base_reg_rtx, GEN_INT (offset)));
13986 offset = 0;
13987 }
13988
13989 addr = plus_constant (Pmode, base_reg_rtx, offset);
13990
13991 for (i = 0; i < nops; i++)
13992 {
13993 addr = plus_constant (Pmode, base_reg_rtx, offset + i * 4);
13994 mems[i] = adjust_automodify_address_nv (operands[nops + mem_order[i]],
13995 SImode, addr, 0);
13996 }
13997 emit_insn (arm_gen_store_multiple_1 (nops, sorted_regs, mems, base_reg_rtx,
13998 write_back ? offset + i * 4 : 0));
13999 return true;
14000 }
14001
14002 /* Copy a block of memory using plain ldr/str/ldrh/strh instructions, to permit
14003 unaligned copies on processors which support unaligned semantics for those
14004 instructions. INTERLEAVE_FACTOR can be used to attempt to hide load latency
14005 (using more registers) by doing e.g. load/load/store/store for a factor of 2.
14006 An interleave factor of 1 (the minimum) will perform no interleaving.
14007 Load/store multiple are used for aligned addresses where possible. */
14008
14009 static void
14010 arm_block_move_unaligned_straight (rtx dstbase, rtx srcbase,
14011 HOST_WIDE_INT length,
14012 unsigned int interleave_factor)
14013 {
14014 rtx *regs = XALLOCAVEC (rtx, interleave_factor);
14015 int *regnos = XALLOCAVEC (int, interleave_factor);
14016 HOST_WIDE_INT block_size_bytes = interleave_factor * UNITS_PER_WORD;
14017 HOST_WIDE_INT i, j;
14018 HOST_WIDE_INT remaining = length, words;
14019 rtx halfword_tmp = NULL, byte_tmp = NULL;
14020 rtx dst, src;
14021 bool src_aligned = MEM_ALIGN (srcbase) >= BITS_PER_WORD;
14022 bool dst_aligned = MEM_ALIGN (dstbase) >= BITS_PER_WORD;
14023 HOST_WIDE_INT srcoffset, dstoffset;
14024 HOST_WIDE_INT src_autoinc, dst_autoinc;
14025 rtx mem, addr;
14026
14027 gcc_assert (interleave_factor >= 1 && interleave_factor <= 4);
14028
14029 /* Use hard registers if we have aligned source or destination so we can use
14030 load/store multiple with contiguous registers. */
14031 if (dst_aligned || src_aligned)
14032 for (i = 0; i < interleave_factor; i++)
14033 regs[i] = gen_rtx_REG (SImode, i);
14034 else
14035 for (i = 0; i < interleave_factor; i++)
14036 regs[i] = gen_reg_rtx (SImode);
14037
14038 dst = copy_addr_to_reg (XEXP (dstbase, 0));
14039 src = copy_addr_to_reg (XEXP (srcbase, 0));
14040
14041 srcoffset = dstoffset = 0;
14042
14043 /* Calls to arm_gen_load_multiple and arm_gen_store_multiple update SRC/DST.
14044 For copying the last bytes we want to subtract this offset again. */
14045 src_autoinc = dst_autoinc = 0;
14046
14047 for (i = 0; i < interleave_factor; i++)
14048 regnos[i] = i;
14049
14050 /* Copy BLOCK_SIZE_BYTES chunks. */
14051
14052 for (i = 0; i + block_size_bytes <= length; i += block_size_bytes)
14053 {
14054 /* Load words. */
14055 if (src_aligned && interleave_factor > 1)
14056 {
14057 emit_insn (arm_gen_load_multiple (regnos, interleave_factor, src,
14058 TRUE, srcbase, &srcoffset));
14059 src_autoinc += UNITS_PER_WORD * interleave_factor;
14060 }
14061 else
14062 {
14063 for (j = 0; j < interleave_factor; j++)
14064 {
14065 addr = plus_constant (Pmode, src, (srcoffset + j * UNITS_PER_WORD
14066 - src_autoinc));
14067 mem = adjust_automodify_address (srcbase, SImode, addr,
14068 srcoffset + j * UNITS_PER_WORD);
14069 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14070 }
14071 srcoffset += block_size_bytes;
14072 }
14073
14074 /* Store words. */
14075 if (dst_aligned && interleave_factor > 1)
14076 {
14077 emit_insn (arm_gen_store_multiple (regnos, interleave_factor, dst,
14078 TRUE, dstbase, &dstoffset));
14079 dst_autoinc += UNITS_PER_WORD * interleave_factor;
14080 }
14081 else
14082 {
14083 for (j = 0; j < interleave_factor; j++)
14084 {
14085 addr = plus_constant (Pmode, dst, (dstoffset + j * UNITS_PER_WORD
14086 - dst_autoinc));
14087 mem = adjust_automodify_address (dstbase, SImode, addr,
14088 dstoffset + j * UNITS_PER_WORD);
14089 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14090 }
14091 dstoffset += block_size_bytes;
14092 }
14093
14094 remaining -= block_size_bytes;
14095 }
14096
14097 /* Copy any whole words left (note these aren't interleaved with any
14098 subsequent halfword/byte load/stores in the interests of simplicity). */
14099
14100 words = remaining / UNITS_PER_WORD;
14101
14102 gcc_assert (words < interleave_factor);
14103
14104 if (src_aligned && words > 1)
14105 {
14106 emit_insn (arm_gen_load_multiple (regnos, words, src, TRUE, srcbase,
14107 &srcoffset));
14108 src_autoinc += UNITS_PER_WORD * words;
14109 }
14110 else
14111 {
14112 for (j = 0; j < words; j++)
14113 {
14114 addr = plus_constant (Pmode, src,
14115 srcoffset + j * UNITS_PER_WORD - src_autoinc);
14116 mem = adjust_automodify_address (srcbase, SImode, addr,
14117 srcoffset + j * UNITS_PER_WORD);
14118 if (src_aligned)
14119 emit_move_insn (regs[j], mem);
14120 else
14121 emit_insn (gen_unaligned_loadsi (regs[j], mem));
14122 }
14123 srcoffset += words * UNITS_PER_WORD;
14124 }
14125
14126 if (dst_aligned && words > 1)
14127 {
14128 emit_insn (arm_gen_store_multiple (regnos, words, dst, TRUE, dstbase,
14129 &dstoffset));
14130 dst_autoinc += words * UNITS_PER_WORD;
14131 }
14132 else
14133 {
14134 for (j = 0; j < words; j++)
14135 {
14136 addr = plus_constant (Pmode, dst,
14137 dstoffset + j * UNITS_PER_WORD - dst_autoinc);
14138 mem = adjust_automodify_address (dstbase, SImode, addr,
14139 dstoffset + j * UNITS_PER_WORD);
14140 if (dst_aligned)
14141 emit_move_insn (mem, regs[j]);
14142 else
14143 emit_insn (gen_unaligned_storesi (mem, regs[j]));
14144 }
14145 dstoffset += words * UNITS_PER_WORD;
14146 }
14147
14148 remaining -= words * UNITS_PER_WORD;
14149
14150 gcc_assert (remaining < 4);
14151
14152 /* Copy a halfword if necessary. */
14153
14154 if (remaining >= 2)
14155 {
14156 halfword_tmp = gen_reg_rtx (SImode);
14157
14158 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14159 mem = adjust_automodify_address (srcbase, HImode, addr, srcoffset);
14160 emit_insn (gen_unaligned_loadhiu (halfword_tmp, mem));
14161
14162 /* Either write out immediately, or delay until we've loaded the last
14163 byte, depending on interleave factor. */
14164 if (interleave_factor == 1)
14165 {
14166 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14167 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14168 emit_insn (gen_unaligned_storehi (mem,
14169 gen_lowpart (HImode, halfword_tmp)));
14170 halfword_tmp = NULL;
14171 dstoffset += 2;
14172 }
14173
14174 remaining -= 2;
14175 srcoffset += 2;
14176 }
14177
14178 gcc_assert (remaining < 2);
14179
14180 /* Copy last byte. */
14181
14182 if ((remaining & 1) != 0)
14183 {
14184 byte_tmp = gen_reg_rtx (SImode);
14185
14186 addr = plus_constant (Pmode, src, srcoffset - src_autoinc);
14187 mem = adjust_automodify_address (srcbase, QImode, addr, srcoffset);
14188 emit_move_insn (gen_lowpart (QImode, byte_tmp), mem);
14189
14190 if (interleave_factor == 1)
14191 {
14192 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14193 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14194 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14195 byte_tmp = NULL;
14196 dstoffset++;
14197 }
14198
14199 remaining--;
14200 srcoffset++;
14201 }
14202
14203 /* Store last halfword if we haven't done so already. */
14204
14205 if (halfword_tmp)
14206 {
14207 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14208 mem = adjust_automodify_address (dstbase, HImode, addr, dstoffset);
14209 emit_insn (gen_unaligned_storehi (mem,
14210 gen_lowpart (HImode, halfword_tmp)));
14211 dstoffset += 2;
14212 }
14213
14214 /* Likewise for last byte. */
14215
14216 if (byte_tmp)
14217 {
14218 addr = plus_constant (Pmode, dst, dstoffset - dst_autoinc);
14219 mem = adjust_automodify_address (dstbase, QImode, addr, dstoffset);
14220 emit_move_insn (mem, gen_lowpart (QImode, byte_tmp));
14221 dstoffset++;
14222 }
14223
14224 gcc_assert (remaining == 0 && srcoffset == dstoffset);
14225 }
14226
14227 /* From mips_adjust_block_mem:
14228
14229 Helper function for doing a loop-based block operation on memory
14230 reference MEM. Each iteration of the loop will operate on LENGTH
14231 bytes of MEM.
14232
14233 Create a new base register for use within the loop and point it to
14234 the start of MEM. Create a new memory reference that uses this
14235 register. Store them in *LOOP_REG and *LOOP_MEM respectively. */
14236
14237 static void
14238 arm_adjust_block_mem (rtx mem, HOST_WIDE_INT length, rtx *loop_reg,
14239 rtx *loop_mem)
14240 {
14241 *loop_reg = copy_addr_to_reg (XEXP (mem, 0));
14242
14243 /* Although the new mem does not refer to a known location,
14244 it does keep up to LENGTH bytes of alignment. */
14245 *loop_mem = change_address (mem, BLKmode, *loop_reg);
14246 set_mem_align (*loop_mem, MIN (MEM_ALIGN (mem), length * BITS_PER_UNIT));
14247 }
14248
14249 /* From mips_block_move_loop:
14250
14251 Move LENGTH bytes from SRC to DEST using a loop that moves BYTES_PER_ITER
14252 bytes at a time. LENGTH must be at least BYTES_PER_ITER. Assume that
14253 the memory regions do not overlap. */
14254
14255 static void
14256 arm_block_move_unaligned_loop (rtx dest, rtx src, HOST_WIDE_INT length,
14257 unsigned int interleave_factor,
14258 HOST_WIDE_INT bytes_per_iter)
14259 {
14260 rtx src_reg, dest_reg, final_src, test;
14261 HOST_WIDE_INT leftover;
14262
14263 leftover = length % bytes_per_iter;
14264 length -= leftover;
14265
14266 /* Create registers and memory references for use within the loop. */
14267 arm_adjust_block_mem (src, bytes_per_iter, &src_reg, &src);
14268 arm_adjust_block_mem (dest, bytes_per_iter, &dest_reg, &dest);
14269
14270 /* Calculate the value that SRC_REG should have after the last iteration of
14271 the loop. */
14272 final_src = expand_simple_binop (Pmode, PLUS, src_reg, GEN_INT (length),
14273 0, 0, OPTAB_WIDEN);
14274
14275 /* Emit the start of the loop. */
14276 rtx_code_label *label = gen_label_rtx ();
14277 emit_label (label);
14278
14279 /* Emit the loop body. */
14280 arm_block_move_unaligned_straight (dest, src, bytes_per_iter,
14281 interleave_factor);
14282
14283 /* Move on to the next block. */
14284 emit_move_insn (src_reg, plus_constant (Pmode, src_reg, bytes_per_iter));
14285 emit_move_insn (dest_reg, plus_constant (Pmode, dest_reg, bytes_per_iter));
14286
14287 /* Emit the loop condition. */
14288 test = gen_rtx_NE (VOIDmode, src_reg, final_src);
14289 emit_jump_insn (gen_cbranchsi4 (test, src_reg, final_src, label));
14290
14291 /* Mop up any left-over bytes. */
14292 if (leftover)
14293 arm_block_move_unaligned_straight (dest, src, leftover, interleave_factor);
14294 }
14295
14296 /* Emit a block move when either the source or destination is unaligned (not
14297 aligned to a four-byte boundary). This may need further tuning depending on
14298 core type, optimize_size setting, etc. */
14299
14300 static int
14301 arm_movmemqi_unaligned (rtx *operands)
14302 {
14303 HOST_WIDE_INT length = INTVAL (operands[2]);
14304
14305 if (optimize_size)
14306 {
14307 bool src_aligned = MEM_ALIGN (operands[1]) >= BITS_PER_WORD;
14308 bool dst_aligned = MEM_ALIGN (operands[0]) >= BITS_PER_WORD;
14309 /* Inlined memcpy using ldr/str/ldrh/strh can be quite big: try to limit
14310 size of code if optimizing for size. We'll use ldm/stm if src_aligned
14311 or dst_aligned though: allow more interleaving in those cases since the
14312 resulting code can be smaller. */
14313 unsigned int interleave_factor = (src_aligned || dst_aligned) ? 2 : 1;
14314 HOST_WIDE_INT bytes_per_iter = (src_aligned || dst_aligned) ? 8 : 4;
14315
14316 if (length > 12)
14317 arm_block_move_unaligned_loop (operands[0], operands[1], length,
14318 interleave_factor, bytes_per_iter);
14319 else
14320 arm_block_move_unaligned_straight (operands[0], operands[1], length,
14321 interleave_factor);
14322 }
14323 else
14324 {
14325 /* Note that the loop created by arm_block_move_unaligned_loop may be
14326 subject to loop unrolling, which makes tuning this condition a little
14327 redundant. */
14328 if (length > 32)
14329 arm_block_move_unaligned_loop (operands[0], operands[1], length, 4, 16);
14330 else
14331 arm_block_move_unaligned_straight (operands[0], operands[1], length, 4);
14332 }
14333
14334 return 1;
14335 }
14336
14337 int
14338 arm_gen_movmemqi (rtx *operands)
14339 {
14340 HOST_WIDE_INT in_words_to_go, out_words_to_go, last_bytes;
14341 HOST_WIDE_INT srcoffset, dstoffset;
14342 rtx src, dst, srcbase, dstbase;
14343 rtx part_bytes_reg = NULL;
14344 rtx mem;
14345
14346 if (!CONST_INT_P (operands[2])
14347 || !CONST_INT_P (operands[3])
14348 || INTVAL (operands[2]) > 64)
14349 return 0;
14350
14351 if (unaligned_access && (INTVAL (operands[3]) & 3) != 0)
14352 return arm_movmemqi_unaligned (operands);
14353
14354 if (INTVAL (operands[3]) & 3)
14355 return 0;
14356
14357 dstbase = operands[0];
14358 srcbase = operands[1];
14359
14360 dst = copy_to_mode_reg (SImode, XEXP (dstbase, 0));
14361 src = copy_to_mode_reg (SImode, XEXP (srcbase, 0));
14362
14363 in_words_to_go = ARM_NUM_INTS (INTVAL (operands[2]));
14364 out_words_to_go = INTVAL (operands[2]) / 4;
14365 last_bytes = INTVAL (operands[2]) & 3;
14366 dstoffset = srcoffset = 0;
14367
14368 if (out_words_to_go != in_words_to_go && ((in_words_to_go - 1) & 3) != 0)
14369 part_bytes_reg = gen_rtx_REG (SImode, (in_words_to_go - 1) & 3);
14370
14371 while (in_words_to_go >= 2)
14372 {
14373 if (in_words_to_go > 4)
14374 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, 4, src,
14375 TRUE, srcbase, &srcoffset));
14376 else
14377 emit_insn (arm_gen_load_multiple (arm_regs_in_sequence, in_words_to_go,
14378 src, FALSE, srcbase,
14379 &srcoffset));
14380
14381 if (out_words_to_go)
14382 {
14383 if (out_words_to_go > 4)
14384 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence, 4, dst,
14385 TRUE, dstbase, &dstoffset));
14386 else if (out_words_to_go != 1)
14387 emit_insn (arm_gen_store_multiple (arm_regs_in_sequence,
14388 out_words_to_go, dst,
14389 (last_bytes == 0
14390 ? FALSE : TRUE),
14391 dstbase, &dstoffset));
14392 else
14393 {
14394 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14395 emit_move_insn (mem, gen_rtx_REG (SImode, R0_REGNUM));
14396 if (last_bytes != 0)
14397 {
14398 emit_insn (gen_addsi3 (dst, dst, GEN_INT (4)));
14399 dstoffset += 4;
14400 }
14401 }
14402 }
14403
14404 in_words_to_go -= in_words_to_go < 4 ? in_words_to_go : 4;
14405 out_words_to_go -= out_words_to_go < 4 ? out_words_to_go : 4;
14406 }
14407
14408 /* OUT_WORDS_TO_GO will be zero here if there are byte stores to do. */
14409 if (out_words_to_go)
14410 {
14411 rtx sreg;
14412
14413 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14414 sreg = copy_to_reg (mem);
14415
14416 mem = adjust_automodify_address (dstbase, SImode, dst, dstoffset);
14417 emit_move_insn (mem, sreg);
14418 in_words_to_go--;
14419
14420 gcc_assert (!in_words_to_go); /* Sanity check */
14421 }
14422
14423 if (in_words_to_go)
14424 {
14425 gcc_assert (in_words_to_go > 0);
14426
14427 mem = adjust_automodify_address (srcbase, SImode, src, srcoffset);
14428 part_bytes_reg = copy_to_mode_reg (SImode, mem);
14429 }
14430
14431 gcc_assert (!last_bytes || part_bytes_reg);
14432
14433 if (BYTES_BIG_ENDIAN && last_bytes)
14434 {
14435 rtx tmp = gen_reg_rtx (SImode);
14436
14437 /* The bytes we want are in the top end of the word. */
14438 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg,
14439 GEN_INT (8 * (4 - last_bytes))));
14440 part_bytes_reg = tmp;
14441
14442 while (last_bytes)
14443 {
14444 mem = adjust_automodify_address (dstbase, QImode,
14445 plus_constant (Pmode, dst,
14446 last_bytes - 1),
14447 dstoffset + last_bytes - 1);
14448 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14449
14450 if (--last_bytes)
14451 {
14452 tmp = gen_reg_rtx (SImode);
14453 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (8)));
14454 part_bytes_reg = tmp;
14455 }
14456 }
14457
14458 }
14459 else
14460 {
14461 if (last_bytes > 1)
14462 {
14463 mem = adjust_automodify_address (dstbase, HImode, dst, dstoffset);
14464 emit_move_insn (mem, gen_lowpart (HImode, part_bytes_reg));
14465 last_bytes -= 2;
14466 if (last_bytes)
14467 {
14468 rtx tmp = gen_reg_rtx (SImode);
14469 emit_insn (gen_addsi3 (dst, dst, const2_rtx));
14470 emit_insn (gen_lshrsi3 (tmp, part_bytes_reg, GEN_INT (16)));
14471 part_bytes_reg = tmp;
14472 dstoffset += 2;
14473 }
14474 }
14475
14476 if (last_bytes)
14477 {
14478 mem = adjust_automodify_address (dstbase, QImode, dst, dstoffset);
14479 emit_move_insn (mem, gen_lowpart (QImode, part_bytes_reg));
14480 }
14481 }
14482
14483 return 1;
14484 }
14485
14486 /* Helper for gen_movmem_ldrd_strd. Increase the address of memory rtx
14487 by mode size. */
14488 inline static rtx
14489 next_consecutive_mem (rtx mem)
14490 {
14491 machine_mode mode = GET_MODE (mem);
14492 HOST_WIDE_INT offset = GET_MODE_SIZE (mode);
14493 rtx addr = plus_constant (Pmode, XEXP (mem, 0), offset);
14494
14495 return adjust_automodify_address (mem, mode, addr, offset);
14496 }
14497
14498 /* Copy using LDRD/STRD instructions whenever possible.
14499 Returns true upon success. */
14500 bool
14501 gen_movmem_ldrd_strd (rtx *operands)
14502 {
14503 unsigned HOST_WIDE_INT len;
14504 HOST_WIDE_INT align;
14505 rtx src, dst, base;
14506 rtx reg0;
14507 bool src_aligned, dst_aligned;
14508 bool src_volatile, dst_volatile;
14509
14510 gcc_assert (CONST_INT_P (operands[2]));
14511 gcc_assert (CONST_INT_P (operands[3]));
14512
14513 len = UINTVAL (operands[2]);
14514 if (len > 64)
14515 return false;
14516
14517 /* Maximum alignment we can assume for both src and dst buffers. */
14518 align = INTVAL (operands[3]);
14519
14520 if ((!unaligned_access) && (len >= 4) && ((align & 3) != 0))
14521 return false;
14522
14523 /* Place src and dst addresses in registers
14524 and update the corresponding mem rtx. */
14525 dst = operands[0];
14526 dst_volatile = MEM_VOLATILE_P (dst);
14527 dst_aligned = MEM_ALIGN (dst) >= BITS_PER_WORD;
14528 base = copy_to_mode_reg (SImode, XEXP (dst, 0));
14529 dst = adjust_automodify_address (dst, VOIDmode, base, 0);
14530
14531 src = operands[1];
14532 src_volatile = MEM_VOLATILE_P (src);
14533 src_aligned = MEM_ALIGN (src) >= BITS_PER_WORD;
14534 base = copy_to_mode_reg (SImode, XEXP (src, 0));
14535 src = adjust_automodify_address (src, VOIDmode, base, 0);
14536
14537 if (!unaligned_access && !(src_aligned && dst_aligned))
14538 return false;
14539
14540 if (src_volatile || dst_volatile)
14541 return false;
14542
14543 /* If we cannot generate any LDRD/STRD, try to generate LDM/STM. */
14544 if (!(dst_aligned || src_aligned))
14545 return arm_gen_movmemqi (operands);
14546
14547 /* If the either src or dst is unaligned we'll be accessing it as pairs
14548 of unaligned SImode accesses. Otherwise we can generate DImode
14549 ldrd/strd instructions. */
14550 src = adjust_address (src, src_aligned ? DImode : SImode, 0);
14551 dst = adjust_address (dst, dst_aligned ? DImode : SImode, 0);
14552
14553 while (len >= 8)
14554 {
14555 len -= 8;
14556 reg0 = gen_reg_rtx (DImode);
14557 rtx low_reg = NULL_RTX;
14558 rtx hi_reg = NULL_RTX;
14559
14560 if (!src_aligned || !dst_aligned)
14561 {
14562 low_reg = gen_lowpart (SImode, reg0);
14563 hi_reg = gen_highpart_mode (SImode, DImode, reg0);
14564 }
14565 if (src_aligned)
14566 emit_move_insn (reg0, src);
14567 else
14568 {
14569 emit_insn (gen_unaligned_loadsi (low_reg, src));
14570 src = next_consecutive_mem (src);
14571 emit_insn (gen_unaligned_loadsi (hi_reg, src));
14572 }
14573
14574 if (dst_aligned)
14575 emit_move_insn (dst, reg0);
14576 else
14577 {
14578 emit_insn (gen_unaligned_storesi (dst, low_reg));
14579 dst = next_consecutive_mem (dst);
14580 emit_insn (gen_unaligned_storesi (dst, hi_reg));
14581 }
14582
14583 src = next_consecutive_mem (src);
14584 dst = next_consecutive_mem (dst);
14585 }
14586
14587 gcc_assert (len < 8);
14588 if (len >= 4)
14589 {
14590 /* More than a word but less than a double-word to copy. Copy a word. */
14591 reg0 = gen_reg_rtx (SImode);
14592 src = adjust_address (src, SImode, 0);
14593 dst = adjust_address (dst, SImode, 0);
14594 if (src_aligned)
14595 emit_move_insn (reg0, src);
14596 else
14597 emit_insn (gen_unaligned_loadsi (reg0, src));
14598
14599 if (dst_aligned)
14600 emit_move_insn (dst, reg0);
14601 else
14602 emit_insn (gen_unaligned_storesi (dst, reg0));
14603
14604 src = next_consecutive_mem (src);
14605 dst = next_consecutive_mem (dst);
14606 len -= 4;
14607 }
14608
14609 if (len == 0)
14610 return true;
14611
14612 /* Copy the remaining bytes. */
14613 if (len >= 2)
14614 {
14615 dst = adjust_address (dst, HImode, 0);
14616 src = adjust_address (src, HImode, 0);
14617 reg0 = gen_reg_rtx (SImode);
14618 if (src_aligned)
14619 emit_insn (gen_zero_extendhisi2 (reg0, src));
14620 else
14621 emit_insn (gen_unaligned_loadhiu (reg0, src));
14622
14623 if (dst_aligned)
14624 emit_insn (gen_movhi (dst, gen_lowpart(HImode, reg0)));
14625 else
14626 emit_insn (gen_unaligned_storehi (dst, gen_lowpart (HImode, reg0)));
14627
14628 src = next_consecutive_mem (src);
14629 dst = next_consecutive_mem (dst);
14630 if (len == 2)
14631 return true;
14632 }
14633
14634 dst = adjust_address (dst, QImode, 0);
14635 src = adjust_address (src, QImode, 0);
14636 reg0 = gen_reg_rtx (QImode);
14637 emit_move_insn (reg0, src);
14638 emit_move_insn (dst, reg0);
14639 return true;
14640 }
14641
14642 /* Select a dominance comparison mode if possible for a test of the general
14643 form (OP (COND_OR (X) (Y)) (const_int 0)). We support three forms.
14644 COND_OR == DOM_CC_X_AND_Y => (X && Y)
14645 COND_OR == DOM_CC_NX_OR_Y => ((! X) || Y)
14646 COND_OR == DOM_CC_X_OR_Y => (X || Y)
14647 In all cases OP will be either EQ or NE, but we don't need to know which
14648 here. If we are unable to support a dominance comparison we return
14649 CC mode. This will then fail to match for the RTL expressions that
14650 generate this call. */
14651 machine_mode
14652 arm_select_dominance_cc_mode (rtx x, rtx y, HOST_WIDE_INT cond_or)
14653 {
14654 enum rtx_code cond1, cond2;
14655 int swapped = 0;
14656
14657 /* Currently we will probably get the wrong result if the individual
14658 comparisons are not simple. This also ensures that it is safe to
14659 reverse a comparison if necessary. */
14660 if ((arm_select_cc_mode (cond1 = GET_CODE (x), XEXP (x, 0), XEXP (x, 1))
14661 != CCmode)
14662 || (arm_select_cc_mode (cond2 = GET_CODE (y), XEXP (y, 0), XEXP (y, 1))
14663 != CCmode))
14664 return CCmode;
14665
14666 /* The if_then_else variant of this tests the second condition if the
14667 first passes, but is true if the first fails. Reverse the first
14668 condition to get a true "inclusive-or" expression. */
14669 if (cond_or == DOM_CC_NX_OR_Y)
14670 cond1 = reverse_condition (cond1);
14671
14672 /* If the comparisons are not equal, and one doesn't dominate the other,
14673 then we can't do this. */
14674 if (cond1 != cond2
14675 && !comparison_dominates_p (cond1, cond2)
14676 && (swapped = 1, !comparison_dominates_p (cond2, cond1)))
14677 return CCmode;
14678
14679 if (swapped)
14680 std::swap (cond1, cond2);
14681
14682 switch (cond1)
14683 {
14684 case EQ:
14685 if (cond_or == DOM_CC_X_AND_Y)
14686 return CC_DEQmode;
14687
14688 switch (cond2)
14689 {
14690 case EQ: return CC_DEQmode;
14691 case LE: return CC_DLEmode;
14692 case LEU: return CC_DLEUmode;
14693 case GE: return CC_DGEmode;
14694 case GEU: return CC_DGEUmode;
14695 default: gcc_unreachable ();
14696 }
14697
14698 case LT:
14699 if (cond_or == DOM_CC_X_AND_Y)
14700 return CC_DLTmode;
14701
14702 switch (cond2)
14703 {
14704 case LT:
14705 return CC_DLTmode;
14706 case LE:
14707 return CC_DLEmode;
14708 case NE:
14709 return CC_DNEmode;
14710 default:
14711 gcc_unreachable ();
14712 }
14713
14714 case GT:
14715 if (cond_or == DOM_CC_X_AND_Y)
14716 return CC_DGTmode;
14717
14718 switch (cond2)
14719 {
14720 case GT:
14721 return CC_DGTmode;
14722 case GE:
14723 return CC_DGEmode;
14724 case NE:
14725 return CC_DNEmode;
14726 default:
14727 gcc_unreachable ();
14728 }
14729
14730 case LTU:
14731 if (cond_or == DOM_CC_X_AND_Y)
14732 return CC_DLTUmode;
14733
14734 switch (cond2)
14735 {
14736 case LTU:
14737 return CC_DLTUmode;
14738 case LEU:
14739 return CC_DLEUmode;
14740 case NE:
14741 return CC_DNEmode;
14742 default:
14743 gcc_unreachable ();
14744 }
14745
14746 case GTU:
14747 if (cond_or == DOM_CC_X_AND_Y)
14748 return CC_DGTUmode;
14749
14750 switch (cond2)
14751 {
14752 case GTU:
14753 return CC_DGTUmode;
14754 case GEU:
14755 return CC_DGEUmode;
14756 case NE:
14757 return CC_DNEmode;
14758 default:
14759 gcc_unreachable ();
14760 }
14761
14762 /* The remaining cases only occur when both comparisons are the
14763 same. */
14764 case NE:
14765 gcc_assert (cond1 == cond2);
14766 return CC_DNEmode;
14767
14768 case LE:
14769 gcc_assert (cond1 == cond2);
14770 return CC_DLEmode;
14771
14772 case GE:
14773 gcc_assert (cond1 == cond2);
14774 return CC_DGEmode;
14775
14776 case LEU:
14777 gcc_assert (cond1 == cond2);
14778 return CC_DLEUmode;
14779
14780 case GEU:
14781 gcc_assert (cond1 == cond2);
14782 return CC_DGEUmode;
14783
14784 default:
14785 gcc_unreachable ();
14786 }
14787 }
14788
14789 machine_mode
14790 arm_select_cc_mode (enum rtx_code op, rtx x, rtx y)
14791 {
14792 /* All floating point compares return CCFP if it is an equality
14793 comparison, and CCFPE otherwise. */
14794 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_FLOAT)
14795 {
14796 switch (op)
14797 {
14798 case EQ:
14799 case NE:
14800 case UNORDERED:
14801 case ORDERED:
14802 case UNLT:
14803 case UNLE:
14804 case UNGT:
14805 case UNGE:
14806 case UNEQ:
14807 case LTGT:
14808 return CCFPmode;
14809
14810 case LT:
14811 case LE:
14812 case GT:
14813 case GE:
14814 return CCFPEmode;
14815
14816 default:
14817 gcc_unreachable ();
14818 }
14819 }
14820
14821 /* A compare with a shifted operand. Because of canonicalization, the
14822 comparison will have to be swapped when we emit the assembler. */
14823 if (GET_MODE (y) == SImode
14824 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14825 && (GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14826 || GET_CODE (x) == LSHIFTRT || GET_CODE (x) == ROTATE
14827 || GET_CODE (x) == ROTATERT))
14828 return CC_SWPmode;
14829
14830 /* This operation is performed swapped, but since we only rely on the Z
14831 flag we don't need an additional mode. */
14832 if (GET_MODE (y) == SImode
14833 && (REG_P (y) || (GET_CODE (y) == SUBREG))
14834 && GET_CODE (x) == NEG
14835 && (op == EQ || op == NE))
14836 return CC_Zmode;
14837
14838 /* This is a special case that is used by combine to allow a
14839 comparison of a shifted byte load to be split into a zero-extend
14840 followed by a comparison of the shifted integer (only valid for
14841 equalities and unsigned inequalities). */
14842 if (GET_MODE (x) == SImode
14843 && GET_CODE (x) == ASHIFT
14844 && CONST_INT_P (XEXP (x, 1)) && INTVAL (XEXP (x, 1)) == 24
14845 && GET_CODE (XEXP (x, 0)) == SUBREG
14846 && MEM_P (SUBREG_REG (XEXP (x, 0)))
14847 && GET_MODE (SUBREG_REG (XEXP (x, 0))) == QImode
14848 && (op == EQ || op == NE
14849 || op == GEU || op == GTU || op == LTU || op == LEU)
14850 && CONST_INT_P (y))
14851 return CC_Zmode;
14852
14853 /* A construct for a conditional compare, if the false arm contains
14854 0, then both conditions must be true, otherwise either condition
14855 must be true. Not all conditions are possible, so CCmode is
14856 returned if it can't be done. */
14857 if (GET_CODE (x) == IF_THEN_ELSE
14858 && (XEXP (x, 2) == const0_rtx
14859 || XEXP (x, 2) == const1_rtx)
14860 && COMPARISON_P (XEXP (x, 0))
14861 && COMPARISON_P (XEXP (x, 1)))
14862 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14863 INTVAL (XEXP (x, 2)));
14864
14865 /* Alternate canonicalizations of the above. These are somewhat cleaner. */
14866 if (GET_CODE (x) == AND
14867 && (op == EQ || op == NE)
14868 && COMPARISON_P (XEXP (x, 0))
14869 && COMPARISON_P (XEXP (x, 1)))
14870 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14871 DOM_CC_X_AND_Y);
14872
14873 if (GET_CODE (x) == IOR
14874 && (op == EQ || op == NE)
14875 && COMPARISON_P (XEXP (x, 0))
14876 && COMPARISON_P (XEXP (x, 1)))
14877 return arm_select_dominance_cc_mode (XEXP (x, 0), XEXP (x, 1),
14878 DOM_CC_X_OR_Y);
14879
14880 /* An operation (on Thumb) where we want to test for a single bit.
14881 This is done by shifting that bit up into the top bit of a
14882 scratch register; we can then branch on the sign bit. */
14883 if (TARGET_THUMB1
14884 && GET_MODE (x) == SImode
14885 && (op == EQ || op == NE)
14886 && GET_CODE (x) == ZERO_EXTRACT
14887 && XEXP (x, 1) == const1_rtx)
14888 return CC_Nmode;
14889
14890 /* An operation that sets the condition codes as a side-effect, the
14891 V flag is not set correctly, so we can only use comparisons where
14892 this doesn't matter. (For LT and GE we can use "mi" and "pl"
14893 instead.) */
14894 /* ??? Does the ZERO_EXTRACT case really apply to thumb2? */
14895 if (GET_MODE (x) == SImode
14896 && y == const0_rtx
14897 && (op == EQ || op == NE || op == LT || op == GE)
14898 && (GET_CODE (x) == PLUS || GET_CODE (x) == MINUS
14899 || GET_CODE (x) == AND || GET_CODE (x) == IOR
14900 || GET_CODE (x) == XOR || GET_CODE (x) == MULT
14901 || GET_CODE (x) == NOT || GET_CODE (x) == NEG
14902 || GET_CODE (x) == LSHIFTRT
14903 || GET_CODE (x) == ASHIFT || GET_CODE (x) == ASHIFTRT
14904 || GET_CODE (x) == ROTATERT
14905 || (TARGET_32BIT && GET_CODE (x) == ZERO_EXTRACT)))
14906 return CC_NOOVmode;
14907
14908 if (GET_MODE (x) == QImode && (op == EQ || op == NE))
14909 return CC_Zmode;
14910
14911 if (GET_MODE (x) == SImode && (op == LTU || op == GEU)
14912 && GET_CODE (x) == PLUS
14913 && (rtx_equal_p (XEXP (x, 0), y) || rtx_equal_p (XEXP (x, 1), y)))
14914 return CC_Cmode;
14915
14916 if (GET_MODE (x) == DImode || GET_MODE (y) == DImode)
14917 {
14918 switch (op)
14919 {
14920 case EQ:
14921 case NE:
14922 /* A DImode comparison against zero can be implemented by
14923 or'ing the two halves together. */
14924 if (y == const0_rtx)
14925 return CC_Zmode;
14926
14927 /* We can do an equality test in three Thumb instructions. */
14928 if (!TARGET_32BIT)
14929 return CC_Zmode;
14930
14931 /* FALLTHROUGH */
14932
14933 case LTU:
14934 case LEU:
14935 case GTU:
14936 case GEU:
14937 /* DImode unsigned comparisons can be implemented by cmp +
14938 cmpeq without a scratch register. Not worth doing in
14939 Thumb-2. */
14940 if (TARGET_32BIT)
14941 return CC_CZmode;
14942
14943 /* FALLTHROUGH */
14944
14945 case LT:
14946 case LE:
14947 case GT:
14948 case GE:
14949 /* DImode signed and unsigned comparisons can be implemented
14950 by cmp + sbcs with a scratch register, but that does not
14951 set the Z flag - we must reverse GT/LE/GTU/LEU. */
14952 gcc_assert (op != EQ && op != NE);
14953 return CC_NCVmode;
14954
14955 default:
14956 gcc_unreachable ();
14957 }
14958 }
14959
14960 if (GET_MODE_CLASS (GET_MODE (x)) == MODE_CC)
14961 return GET_MODE (x);
14962
14963 return CCmode;
14964 }
14965
14966 /* X and Y are two things to compare using CODE. Emit the compare insn and
14967 return the rtx for register 0 in the proper mode. FP means this is a
14968 floating point compare: I don't think that it is needed on the arm. */
14969 rtx
14970 arm_gen_compare_reg (enum rtx_code code, rtx x, rtx y, rtx scratch)
14971 {
14972 machine_mode mode;
14973 rtx cc_reg;
14974 int dimode_comparison = GET_MODE (x) == DImode || GET_MODE (y) == DImode;
14975
14976 /* We might have X as a constant, Y as a register because of the predicates
14977 used for cmpdi. If so, force X to a register here. */
14978 if (dimode_comparison && !REG_P (x))
14979 x = force_reg (DImode, x);
14980
14981 mode = SELECT_CC_MODE (code, x, y);
14982 cc_reg = gen_rtx_REG (mode, CC_REGNUM);
14983
14984 if (dimode_comparison
14985 && mode != CC_CZmode)
14986 {
14987 rtx clobber, set;
14988
14989 /* To compare two non-zero values for equality, XOR them and
14990 then compare against zero. Not used for ARM mode; there
14991 CC_CZmode is cheaper. */
14992 if (mode == CC_Zmode && y != const0_rtx)
14993 {
14994 gcc_assert (!reload_completed);
14995 x = expand_binop (DImode, xor_optab, x, y, NULL_RTX, 0, OPTAB_WIDEN);
14996 y = const0_rtx;
14997 }
14998
14999 /* A scratch register is required. */
15000 if (reload_completed)
15001 gcc_assert (scratch != NULL && GET_MODE (scratch) == SImode);
15002 else
15003 scratch = gen_rtx_SCRATCH (SImode);
15004
15005 clobber = gen_rtx_CLOBBER (VOIDmode, scratch);
15006 set = gen_rtx_SET (cc_reg, gen_rtx_COMPARE (mode, x, y));
15007 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, set, clobber)));
15008 }
15009 else
15010 emit_set_insn (cc_reg, gen_rtx_COMPARE (mode, x, y));
15011
15012 return cc_reg;
15013 }
15014
15015 /* Generate a sequence of insns that will generate the correct return
15016 address mask depending on the physical architecture that the program
15017 is running on. */
15018 rtx
15019 arm_gen_return_addr_mask (void)
15020 {
15021 rtx reg = gen_reg_rtx (Pmode);
15022
15023 emit_insn (gen_return_addr_mask (reg));
15024 return reg;
15025 }
15026
15027 void
15028 arm_reload_in_hi (rtx *operands)
15029 {
15030 rtx ref = operands[1];
15031 rtx base, scratch;
15032 HOST_WIDE_INT offset = 0;
15033
15034 if (GET_CODE (ref) == SUBREG)
15035 {
15036 offset = SUBREG_BYTE (ref);
15037 ref = SUBREG_REG (ref);
15038 }
15039
15040 if (REG_P (ref))
15041 {
15042 /* We have a pseudo which has been spilt onto the stack; there
15043 are two cases here: the first where there is a simple
15044 stack-slot replacement and a second where the stack-slot is
15045 out of range, or is used as a subreg. */
15046 if (reg_equiv_mem (REGNO (ref)))
15047 {
15048 ref = reg_equiv_mem (REGNO (ref));
15049 base = find_replacement (&XEXP (ref, 0));
15050 }
15051 else
15052 /* The slot is out of range, or was dressed up in a SUBREG. */
15053 base = reg_equiv_address (REGNO (ref));
15054
15055 /* PR 62554: If there is no equivalent memory location then just move
15056 the value as an SImode register move. This happens when the target
15057 architecture variant does not have an HImode register move. */
15058 if (base == NULL)
15059 {
15060 gcc_assert (REG_P (operands[0]));
15061 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, operands[0], 0),
15062 gen_rtx_SUBREG (SImode, ref, 0)));
15063 return;
15064 }
15065 }
15066 else
15067 base = find_replacement (&XEXP (ref, 0));
15068
15069 /* Handle the case where the address is too complex to be offset by 1. */
15070 if (GET_CODE (base) == MINUS
15071 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15072 {
15073 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15074
15075 emit_set_insn (base_plus, base);
15076 base = base_plus;
15077 }
15078 else if (GET_CODE (base) == PLUS)
15079 {
15080 /* The addend must be CONST_INT, or we would have dealt with it above. */
15081 HOST_WIDE_INT hi, lo;
15082
15083 offset += INTVAL (XEXP (base, 1));
15084 base = XEXP (base, 0);
15085
15086 /* Rework the address into a legal sequence of insns. */
15087 /* Valid range for lo is -4095 -> 4095 */
15088 lo = (offset >= 0
15089 ? (offset & 0xfff)
15090 : -((-offset) & 0xfff));
15091
15092 /* Corner case, if lo is the max offset then we would be out of range
15093 once we have added the additional 1 below, so bump the msb into the
15094 pre-loading insn(s). */
15095 if (lo == 4095)
15096 lo &= 0x7ff;
15097
15098 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15099 ^ (HOST_WIDE_INT) 0x80000000)
15100 - (HOST_WIDE_INT) 0x80000000);
15101
15102 gcc_assert (hi + lo == offset);
15103
15104 if (hi != 0)
15105 {
15106 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15107
15108 /* Get the base address; addsi3 knows how to handle constants
15109 that require more than one insn. */
15110 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15111 base = base_plus;
15112 offset = lo;
15113 }
15114 }
15115
15116 /* Operands[2] may overlap operands[0] (though it won't overlap
15117 operands[1]), that's why we asked for a DImode reg -- so we can
15118 use the bit that does not overlap. */
15119 if (REGNO (operands[2]) == REGNO (operands[0]))
15120 scratch = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15121 else
15122 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15123
15124 emit_insn (gen_zero_extendqisi2 (scratch,
15125 gen_rtx_MEM (QImode,
15126 plus_constant (Pmode, base,
15127 offset))));
15128 emit_insn (gen_zero_extendqisi2 (gen_rtx_SUBREG (SImode, operands[0], 0),
15129 gen_rtx_MEM (QImode,
15130 plus_constant (Pmode, base,
15131 offset + 1))));
15132 if (!BYTES_BIG_ENDIAN)
15133 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15134 gen_rtx_IOR (SImode,
15135 gen_rtx_ASHIFT
15136 (SImode,
15137 gen_rtx_SUBREG (SImode, operands[0], 0),
15138 GEN_INT (8)),
15139 scratch));
15140 else
15141 emit_set_insn (gen_rtx_SUBREG (SImode, operands[0], 0),
15142 gen_rtx_IOR (SImode,
15143 gen_rtx_ASHIFT (SImode, scratch,
15144 GEN_INT (8)),
15145 gen_rtx_SUBREG (SImode, operands[0], 0)));
15146 }
15147
15148 /* Handle storing a half-word to memory during reload by synthesizing as two
15149 byte stores. Take care not to clobber the input values until after we
15150 have moved them somewhere safe. This code assumes that if the DImode
15151 scratch in operands[2] overlaps either the input value or output address
15152 in some way, then that value must die in this insn (we absolutely need
15153 two scratch registers for some corner cases). */
15154 void
15155 arm_reload_out_hi (rtx *operands)
15156 {
15157 rtx ref = operands[0];
15158 rtx outval = operands[1];
15159 rtx base, scratch;
15160 HOST_WIDE_INT offset = 0;
15161
15162 if (GET_CODE (ref) == SUBREG)
15163 {
15164 offset = SUBREG_BYTE (ref);
15165 ref = SUBREG_REG (ref);
15166 }
15167
15168 if (REG_P (ref))
15169 {
15170 /* We have a pseudo which has been spilt onto the stack; there
15171 are two cases here: the first where there is a simple
15172 stack-slot replacement and a second where the stack-slot is
15173 out of range, or is used as a subreg. */
15174 if (reg_equiv_mem (REGNO (ref)))
15175 {
15176 ref = reg_equiv_mem (REGNO (ref));
15177 base = find_replacement (&XEXP (ref, 0));
15178 }
15179 else
15180 /* The slot is out of range, or was dressed up in a SUBREG. */
15181 base = reg_equiv_address (REGNO (ref));
15182
15183 /* PR 62254: If there is no equivalent memory location then just move
15184 the value as an SImode register move. This happens when the target
15185 architecture variant does not have an HImode register move. */
15186 if (base == NULL)
15187 {
15188 gcc_assert (REG_P (outval) || SUBREG_P (outval));
15189
15190 if (REG_P (outval))
15191 {
15192 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15193 gen_rtx_SUBREG (SImode, outval, 0)));
15194 }
15195 else /* SUBREG_P (outval) */
15196 {
15197 if (GET_MODE (SUBREG_REG (outval)) == SImode)
15198 emit_insn (gen_movsi (gen_rtx_SUBREG (SImode, ref, 0),
15199 SUBREG_REG (outval)));
15200 else
15201 /* FIXME: Handle other cases ? */
15202 gcc_unreachable ();
15203 }
15204 return;
15205 }
15206 }
15207 else
15208 base = find_replacement (&XEXP (ref, 0));
15209
15210 scratch = gen_rtx_REG (SImode, REGNO (operands[2]));
15211
15212 /* Handle the case where the address is too complex to be offset by 1. */
15213 if (GET_CODE (base) == MINUS
15214 || (GET_CODE (base) == PLUS && !CONST_INT_P (XEXP (base, 1))))
15215 {
15216 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15217
15218 /* Be careful not to destroy OUTVAL. */
15219 if (reg_overlap_mentioned_p (base_plus, outval))
15220 {
15221 /* Updating base_plus might destroy outval, see if we can
15222 swap the scratch and base_plus. */
15223 if (!reg_overlap_mentioned_p (scratch, outval))
15224 std::swap (scratch, base_plus);
15225 else
15226 {
15227 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15228
15229 /* Be conservative and copy OUTVAL into the scratch now,
15230 this should only be necessary if outval is a subreg
15231 of something larger than a word. */
15232 /* XXX Might this clobber base? I can't see how it can,
15233 since scratch is known to overlap with OUTVAL, and
15234 must be wider than a word. */
15235 emit_insn (gen_movhi (scratch_hi, outval));
15236 outval = scratch_hi;
15237 }
15238 }
15239
15240 emit_set_insn (base_plus, base);
15241 base = base_plus;
15242 }
15243 else if (GET_CODE (base) == PLUS)
15244 {
15245 /* The addend must be CONST_INT, or we would have dealt with it above. */
15246 HOST_WIDE_INT hi, lo;
15247
15248 offset += INTVAL (XEXP (base, 1));
15249 base = XEXP (base, 0);
15250
15251 /* Rework the address into a legal sequence of insns. */
15252 /* Valid range for lo is -4095 -> 4095 */
15253 lo = (offset >= 0
15254 ? (offset & 0xfff)
15255 : -((-offset) & 0xfff));
15256
15257 /* Corner case, if lo is the max offset then we would be out of range
15258 once we have added the additional 1 below, so bump the msb into the
15259 pre-loading insn(s). */
15260 if (lo == 4095)
15261 lo &= 0x7ff;
15262
15263 hi = ((((offset - lo) & (HOST_WIDE_INT) 0xffffffff)
15264 ^ (HOST_WIDE_INT) 0x80000000)
15265 - (HOST_WIDE_INT) 0x80000000);
15266
15267 gcc_assert (hi + lo == offset);
15268
15269 if (hi != 0)
15270 {
15271 rtx base_plus = gen_rtx_REG (SImode, REGNO (operands[2]) + 1);
15272
15273 /* Be careful not to destroy OUTVAL. */
15274 if (reg_overlap_mentioned_p (base_plus, outval))
15275 {
15276 /* Updating base_plus might destroy outval, see if we
15277 can swap the scratch and base_plus. */
15278 if (!reg_overlap_mentioned_p (scratch, outval))
15279 std::swap (scratch, base_plus);
15280 else
15281 {
15282 rtx scratch_hi = gen_rtx_REG (HImode, REGNO (operands[2]));
15283
15284 /* Be conservative and copy outval into scratch now,
15285 this should only be necessary if outval is a
15286 subreg of something larger than a word. */
15287 /* XXX Might this clobber base? I can't see how it
15288 can, since scratch is known to overlap with
15289 outval. */
15290 emit_insn (gen_movhi (scratch_hi, outval));
15291 outval = scratch_hi;
15292 }
15293 }
15294
15295 /* Get the base address; addsi3 knows how to handle constants
15296 that require more than one insn. */
15297 emit_insn (gen_addsi3 (base_plus, base, GEN_INT (hi)));
15298 base = base_plus;
15299 offset = lo;
15300 }
15301 }
15302
15303 if (BYTES_BIG_ENDIAN)
15304 {
15305 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15306 plus_constant (Pmode, base,
15307 offset + 1)),
15308 gen_lowpart (QImode, outval)));
15309 emit_insn (gen_lshrsi3 (scratch,
15310 gen_rtx_SUBREG (SImode, outval, 0),
15311 GEN_INT (8)));
15312 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15313 offset)),
15314 gen_lowpart (QImode, scratch)));
15315 }
15316 else
15317 {
15318 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, base,
15319 offset)),
15320 gen_lowpart (QImode, outval)));
15321 emit_insn (gen_lshrsi3 (scratch,
15322 gen_rtx_SUBREG (SImode, outval, 0),
15323 GEN_INT (8)));
15324 emit_insn (gen_movqi (gen_rtx_MEM (QImode,
15325 plus_constant (Pmode, base,
15326 offset + 1)),
15327 gen_lowpart (QImode, scratch)));
15328 }
15329 }
15330
15331 /* Return true if a type must be passed in memory. For AAPCS, small aggregates
15332 (padded to the size of a word) should be passed in a register. */
15333
15334 static bool
15335 arm_must_pass_in_stack (machine_mode mode, const_tree type)
15336 {
15337 if (TARGET_AAPCS_BASED)
15338 return must_pass_in_stack_var_size (mode, type);
15339 else
15340 return must_pass_in_stack_var_size_or_pad (mode, type);
15341 }
15342
15343
15344 /* Implement TARGET_FUNCTION_ARG_PADDING; return PAD_UPWARD if the lowest
15345 byte of a stack argument has useful data. For legacy APCS ABIs we use
15346 the default. For AAPCS based ABIs small aggregate types are placed
15347 in the lowest memory address. */
15348
15349 static pad_direction
15350 arm_function_arg_padding (machine_mode mode, const_tree type)
15351 {
15352 if (!TARGET_AAPCS_BASED)
15353 return default_function_arg_padding (mode, type);
15354
15355 if (type && BYTES_BIG_ENDIAN && INTEGRAL_TYPE_P (type))
15356 return PAD_DOWNWARD;
15357
15358 return PAD_UPWARD;
15359 }
15360
15361
15362 /* Similarly, for use by BLOCK_REG_PADDING (MODE, TYPE, FIRST).
15363 Return !BYTES_BIG_ENDIAN if the least significant byte of the
15364 register has useful data, and return the opposite if the most
15365 significant byte does. */
15366
15367 bool
15368 arm_pad_reg_upward (machine_mode mode,
15369 tree type, int first ATTRIBUTE_UNUSED)
15370 {
15371 if (TARGET_AAPCS_BASED && BYTES_BIG_ENDIAN)
15372 {
15373 /* For AAPCS, small aggregates, small fixed-point types,
15374 and small complex types are always padded upwards. */
15375 if (type)
15376 {
15377 if ((AGGREGATE_TYPE_P (type)
15378 || TREE_CODE (type) == COMPLEX_TYPE
15379 || FIXED_POINT_TYPE_P (type))
15380 && int_size_in_bytes (type) <= 4)
15381 return true;
15382 }
15383 else
15384 {
15385 if ((COMPLEX_MODE_P (mode) || ALL_FIXED_POINT_MODE_P (mode))
15386 && GET_MODE_SIZE (mode) <= 4)
15387 return true;
15388 }
15389 }
15390
15391 /* Otherwise, use default padding. */
15392 return !BYTES_BIG_ENDIAN;
15393 }
15394
15395 /* Returns true iff OFFSET is valid for use in an LDRD/STRD instruction,
15396 assuming that the address in the base register is word aligned. */
15397 bool
15398 offset_ok_for_ldrd_strd (HOST_WIDE_INT offset)
15399 {
15400 HOST_WIDE_INT max_offset;
15401
15402 /* Offset must be a multiple of 4 in Thumb mode. */
15403 if (TARGET_THUMB2 && ((offset & 3) != 0))
15404 return false;
15405
15406 if (TARGET_THUMB2)
15407 max_offset = 1020;
15408 else if (TARGET_ARM)
15409 max_offset = 255;
15410 else
15411 return false;
15412
15413 return ((offset <= max_offset) && (offset >= -max_offset));
15414 }
15415
15416 /* Checks whether the operands are valid for use in an LDRD/STRD instruction.
15417 Assumes that RT, RT2, and RN are REG. This is guaranteed by the patterns.
15418 Assumes that the address in the base register RN is word aligned. Pattern
15419 guarantees that both memory accesses use the same base register,
15420 the offsets are constants within the range, and the gap between the offsets is 4.
15421 If preload complete then check that registers are legal. WBACK indicates whether
15422 address is updated. LOAD indicates whether memory access is load or store. */
15423 bool
15424 operands_ok_ldrd_strd (rtx rt, rtx rt2, rtx rn, HOST_WIDE_INT offset,
15425 bool wback, bool load)
15426 {
15427 unsigned int t, t2, n;
15428
15429 if (!reload_completed)
15430 return true;
15431
15432 if (!offset_ok_for_ldrd_strd (offset))
15433 return false;
15434
15435 t = REGNO (rt);
15436 t2 = REGNO (rt2);
15437 n = REGNO (rn);
15438
15439 if ((TARGET_THUMB2)
15440 && ((wback && (n == t || n == t2))
15441 || (t == SP_REGNUM)
15442 || (t == PC_REGNUM)
15443 || (t2 == SP_REGNUM)
15444 || (t2 == PC_REGNUM)
15445 || (!load && (n == PC_REGNUM))
15446 || (load && (t == t2))
15447 /* Triggers Cortex-M3 LDRD errata. */
15448 || (!wback && load && fix_cm3_ldrd && (n == t))))
15449 return false;
15450
15451 if ((TARGET_ARM)
15452 && ((wback && (n == t || n == t2))
15453 || (t2 == PC_REGNUM)
15454 || (t % 2 != 0) /* First destination register is not even. */
15455 || (t2 != t + 1)
15456 /* PC can be used as base register (for offset addressing only),
15457 but it is depricated. */
15458 || (n == PC_REGNUM)))
15459 return false;
15460
15461 return true;
15462 }
15463
15464 /* Return true if a 64-bit access with alignment ALIGN and with a
15465 constant offset OFFSET from the base pointer is permitted on this
15466 architecture. */
15467 static bool
15468 align_ok_ldrd_strd (HOST_WIDE_INT align, HOST_WIDE_INT offset)
15469 {
15470 return (unaligned_access
15471 ? (align >= BITS_PER_WORD && (offset & 3) == 0)
15472 : (align >= 2 * BITS_PER_WORD && (offset & 7) == 0));
15473 }
15474
15475 /* Helper for gen_operands_ldrd_strd. Returns true iff the memory
15476 operand MEM's address contains an immediate offset from the base
15477 register and has no side effects, in which case it sets BASE,
15478 OFFSET and ALIGN accordingly. */
15479 static bool
15480 mem_ok_for_ldrd_strd (rtx mem, rtx *base, rtx *offset, HOST_WIDE_INT *align)
15481 {
15482 rtx addr;
15483
15484 gcc_assert (base != NULL && offset != NULL);
15485
15486 /* TODO: Handle more general memory operand patterns, such as
15487 PRE_DEC and PRE_INC. */
15488
15489 if (side_effects_p (mem))
15490 return false;
15491
15492 /* Can't deal with subregs. */
15493 if (GET_CODE (mem) == SUBREG)
15494 return false;
15495
15496 gcc_assert (MEM_P (mem));
15497
15498 *offset = const0_rtx;
15499 *align = MEM_ALIGN (mem);
15500
15501 addr = XEXP (mem, 0);
15502
15503 /* If addr isn't valid for DImode, then we can't handle it. */
15504 if (!arm_legitimate_address_p (DImode, addr,
15505 reload_in_progress || reload_completed))
15506 return false;
15507
15508 if (REG_P (addr))
15509 {
15510 *base = addr;
15511 return true;
15512 }
15513 else if (GET_CODE (addr) == PLUS || GET_CODE (addr) == MINUS)
15514 {
15515 *base = XEXP (addr, 0);
15516 *offset = XEXP (addr, 1);
15517 return (REG_P (*base) && CONST_INT_P (*offset));
15518 }
15519
15520 return false;
15521 }
15522
15523 /* Called from a peephole2 to replace two word-size accesses with a
15524 single LDRD/STRD instruction. Returns true iff we can generate a
15525 new instruction sequence. That is, both accesses use the same base
15526 register and the gap between constant offsets is 4. This function
15527 may reorder its operands to match ldrd/strd RTL templates.
15528 OPERANDS are the operands found by the peephole matcher;
15529 OPERANDS[0,1] are register operands, and OPERANDS[2,3] are the
15530 corresponding memory operands. LOAD indicaates whether the access
15531 is load or store. CONST_STORE indicates a store of constant
15532 integer values held in OPERANDS[4,5] and assumes that the pattern
15533 is of length 4 insn, for the purpose of checking dead registers.
15534 COMMUTE indicates that register operands may be reordered. */
15535 bool
15536 gen_operands_ldrd_strd (rtx *operands, bool load,
15537 bool const_store, bool commute)
15538 {
15539 int nops = 2;
15540 HOST_WIDE_INT offsets[2], offset, align[2];
15541 rtx base = NULL_RTX;
15542 rtx cur_base, cur_offset, tmp;
15543 int i, gap;
15544 HARD_REG_SET regset;
15545
15546 gcc_assert (!const_store || !load);
15547 /* Check that the memory references are immediate offsets from the
15548 same base register. Extract the base register, the destination
15549 registers, and the corresponding memory offsets. */
15550 for (i = 0; i < nops; i++)
15551 {
15552 if (!mem_ok_for_ldrd_strd (operands[nops+i], &cur_base, &cur_offset,
15553 &align[i]))
15554 return false;
15555
15556 if (i == 0)
15557 base = cur_base;
15558 else if (REGNO (base) != REGNO (cur_base))
15559 return false;
15560
15561 offsets[i] = INTVAL (cur_offset);
15562 if (GET_CODE (operands[i]) == SUBREG)
15563 {
15564 tmp = SUBREG_REG (operands[i]);
15565 gcc_assert (GET_MODE (operands[i]) == GET_MODE (tmp));
15566 operands[i] = tmp;
15567 }
15568 }
15569
15570 /* Make sure there is no dependency between the individual loads. */
15571 if (load && REGNO (operands[0]) == REGNO (base))
15572 return false; /* RAW */
15573
15574 if (load && REGNO (operands[0]) == REGNO (operands[1]))
15575 return false; /* WAW */
15576
15577 /* If the same input register is used in both stores
15578 when storing different constants, try to find a free register.
15579 For example, the code
15580 mov r0, 0
15581 str r0, [r2]
15582 mov r0, 1
15583 str r0, [r2, #4]
15584 can be transformed into
15585 mov r1, 0
15586 mov r0, 1
15587 strd r1, r0, [r2]
15588 in Thumb mode assuming that r1 is free.
15589 For ARM mode do the same but only if the starting register
15590 can be made to be even. */
15591 if (const_store
15592 && REGNO (operands[0]) == REGNO (operands[1])
15593 && INTVAL (operands[4]) != INTVAL (operands[5]))
15594 {
15595 if (TARGET_THUMB2)
15596 {
15597 CLEAR_HARD_REG_SET (regset);
15598 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15599 if (tmp == NULL_RTX)
15600 return false;
15601
15602 /* Use the new register in the first load to ensure that
15603 if the original input register is not dead after peephole,
15604 then it will have the correct constant value. */
15605 operands[0] = tmp;
15606 }
15607 else if (TARGET_ARM)
15608 {
15609 int regno = REGNO (operands[0]);
15610 if (!peep2_reg_dead_p (4, operands[0]))
15611 {
15612 /* When the input register is even and is not dead after the
15613 pattern, it has to hold the second constant but we cannot
15614 form a legal STRD in ARM mode with this register as the second
15615 register. */
15616 if (regno % 2 == 0)
15617 return false;
15618
15619 /* Is regno-1 free? */
15620 SET_HARD_REG_SET (regset);
15621 CLEAR_HARD_REG_BIT(regset, regno - 1);
15622 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15623 if (tmp == NULL_RTX)
15624 return false;
15625
15626 operands[0] = tmp;
15627 }
15628 else
15629 {
15630 /* Find a DImode register. */
15631 CLEAR_HARD_REG_SET (regset);
15632 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15633 if (tmp != NULL_RTX)
15634 {
15635 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15636 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15637 }
15638 else
15639 {
15640 /* Can we use the input register to form a DI register? */
15641 SET_HARD_REG_SET (regset);
15642 CLEAR_HARD_REG_BIT(regset,
15643 regno % 2 == 0 ? regno + 1 : regno - 1);
15644 tmp = peep2_find_free_register (0, 4, "r", SImode, &regset);
15645 if (tmp == NULL_RTX)
15646 return false;
15647 operands[regno % 2 == 1 ? 0 : 1] = tmp;
15648 }
15649 }
15650
15651 gcc_assert (operands[0] != NULL_RTX);
15652 gcc_assert (operands[1] != NULL_RTX);
15653 gcc_assert (REGNO (operands[0]) % 2 == 0);
15654 gcc_assert (REGNO (operands[1]) == REGNO (operands[0]) + 1);
15655 }
15656 }
15657
15658 /* Make sure the instructions are ordered with lower memory access first. */
15659 if (offsets[0] > offsets[1])
15660 {
15661 gap = offsets[0] - offsets[1];
15662 offset = offsets[1];
15663
15664 /* Swap the instructions such that lower memory is accessed first. */
15665 std::swap (operands[0], operands[1]);
15666 std::swap (operands[2], operands[3]);
15667 std::swap (align[0], align[1]);
15668 if (const_store)
15669 std::swap (operands[4], operands[5]);
15670 }
15671 else
15672 {
15673 gap = offsets[1] - offsets[0];
15674 offset = offsets[0];
15675 }
15676
15677 /* Make sure accesses are to consecutive memory locations. */
15678 if (gap != 4)
15679 return false;
15680
15681 if (!align_ok_ldrd_strd (align[0], offset))
15682 return false;
15683
15684 /* Make sure we generate legal instructions. */
15685 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15686 false, load))
15687 return true;
15688
15689 /* In Thumb state, where registers are almost unconstrained, there
15690 is little hope to fix it. */
15691 if (TARGET_THUMB2)
15692 return false;
15693
15694 if (load && commute)
15695 {
15696 /* Try reordering registers. */
15697 std::swap (operands[0], operands[1]);
15698 if (operands_ok_ldrd_strd (operands[0], operands[1], base, offset,
15699 false, load))
15700 return true;
15701 }
15702
15703 if (const_store)
15704 {
15705 /* If input registers are dead after this pattern, they can be
15706 reordered or replaced by other registers that are free in the
15707 current pattern. */
15708 if (!peep2_reg_dead_p (4, operands[0])
15709 || !peep2_reg_dead_p (4, operands[1]))
15710 return false;
15711
15712 /* Try to reorder the input registers. */
15713 /* For example, the code
15714 mov r0, 0
15715 mov r1, 1
15716 str r1, [r2]
15717 str r0, [r2, #4]
15718 can be transformed into
15719 mov r1, 0
15720 mov r0, 1
15721 strd r0, [r2]
15722 */
15723 if (operands_ok_ldrd_strd (operands[1], operands[0], base, offset,
15724 false, false))
15725 {
15726 std::swap (operands[0], operands[1]);
15727 return true;
15728 }
15729
15730 /* Try to find a free DI register. */
15731 CLEAR_HARD_REG_SET (regset);
15732 add_to_hard_reg_set (&regset, SImode, REGNO (operands[0]));
15733 add_to_hard_reg_set (&regset, SImode, REGNO (operands[1]));
15734 while (true)
15735 {
15736 tmp = peep2_find_free_register (0, 4, "r", DImode, &regset);
15737 if (tmp == NULL_RTX)
15738 return false;
15739
15740 /* DREG must be an even-numbered register in DImode.
15741 Split it into SI registers. */
15742 operands[0] = simplify_gen_subreg (SImode, tmp, DImode, 0);
15743 operands[1] = simplify_gen_subreg (SImode, tmp, DImode, 4);
15744 gcc_assert (operands[0] != NULL_RTX);
15745 gcc_assert (operands[1] != NULL_RTX);
15746 gcc_assert (REGNO (operands[0]) % 2 == 0);
15747 gcc_assert (REGNO (operands[0]) + 1 == REGNO (operands[1]));
15748
15749 return (operands_ok_ldrd_strd (operands[0], operands[1],
15750 base, offset,
15751 false, load));
15752 }
15753 }
15754
15755 return false;
15756 }
15757
15758
15759
15760 \f
15761 /* Print a symbolic form of X to the debug file, F. */
15762 static void
15763 arm_print_value (FILE *f, rtx x)
15764 {
15765 switch (GET_CODE (x))
15766 {
15767 case CONST_INT:
15768 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
15769 return;
15770
15771 case CONST_DOUBLE:
15772 fprintf (f, "<0x%lx,0x%lx>", (long)XWINT (x, 2), (long)XWINT (x, 3));
15773 return;
15774
15775 case CONST_VECTOR:
15776 {
15777 int i;
15778
15779 fprintf (f, "<");
15780 for (i = 0; i < CONST_VECTOR_NUNITS (x); i++)
15781 {
15782 fprintf (f, HOST_WIDE_INT_PRINT_HEX, INTVAL (CONST_VECTOR_ELT (x, i)));
15783 if (i < (CONST_VECTOR_NUNITS (x) - 1))
15784 fputc (',', f);
15785 }
15786 fprintf (f, ">");
15787 }
15788 return;
15789
15790 case CONST_STRING:
15791 fprintf (f, "\"%s\"", XSTR (x, 0));
15792 return;
15793
15794 case SYMBOL_REF:
15795 fprintf (f, "`%s'", XSTR (x, 0));
15796 return;
15797
15798 case LABEL_REF:
15799 fprintf (f, "L%d", INSN_UID (XEXP (x, 0)));
15800 return;
15801
15802 case CONST:
15803 arm_print_value (f, XEXP (x, 0));
15804 return;
15805
15806 case PLUS:
15807 arm_print_value (f, XEXP (x, 0));
15808 fprintf (f, "+");
15809 arm_print_value (f, XEXP (x, 1));
15810 return;
15811
15812 case PC:
15813 fprintf (f, "pc");
15814 return;
15815
15816 default:
15817 fprintf (f, "????");
15818 return;
15819 }
15820 }
15821 \f
15822 /* Routines for manipulation of the constant pool. */
15823
15824 /* Arm instructions cannot load a large constant directly into a
15825 register; they have to come from a pc relative load. The constant
15826 must therefore be placed in the addressable range of the pc
15827 relative load. Depending on the precise pc relative load
15828 instruction the range is somewhere between 256 bytes and 4k. This
15829 means that we often have to dump a constant inside a function, and
15830 generate code to branch around it.
15831
15832 It is important to minimize this, since the branches will slow
15833 things down and make the code larger.
15834
15835 Normally we can hide the table after an existing unconditional
15836 branch so that there is no interruption of the flow, but in the
15837 worst case the code looks like this:
15838
15839 ldr rn, L1
15840 ...
15841 b L2
15842 align
15843 L1: .long value
15844 L2:
15845 ...
15846
15847 ldr rn, L3
15848 ...
15849 b L4
15850 align
15851 L3: .long value
15852 L4:
15853 ...
15854
15855 We fix this by performing a scan after scheduling, which notices
15856 which instructions need to have their operands fetched from the
15857 constant table and builds the table.
15858
15859 The algorithm starts by building a table of all the constants that
15860 need fixing up and all the natural barriers in the function (places
15861 where a constant table can be dropped without breaking the flow).
15862 For each fixup we note how far the pc-relative replacement will be
15863 able to reach and the offset of the instruction into the function.
15864
15865 Having built the table we then group the fixes together to form
15866 tables that are as large as possible (subject to addressing
15867 constraints) and emit each table of constants after the last
15868 barrier that is within range of all the instructions in the group.
15869 If a group does not contain a barrier, then we forcibly create one
15870 by inserting a jump instruction into the flow. Once the table has
15871 been inserted, the insns are then modified to reference the
15872 relevant entry in the pool.
15873
15874 Possible enhancements to the algorithm (not implemented) are:
15875
15876 1) For some processors and object formats, there may be benefit in
15877 aligning the pools to the start of cache lines; this alignment
15878 would need to be taken into account when calculating addressability
15879 of a pool. */
15880
15881 /* These typedefs are located at the start of this file, so that
15882 they can be used in the prototypes there. This comment is to
15883 remind readers of that fact so that the following structures
15884 can be understood more easily.
15885
15886 typedef struct minipool_node Mnode;
15887 typedef struct minipool_fixup Mfix; */
15888
15889 struct minipool_node
15890 {
15891 /* Doubly linked chain of entries. */
15892 Mnode * next;
15893 Mnode * prev;
15894 /* The maximum offset into the code that this entry can be placed. While
15895 pushing fixes for forward references, all entries are sorted in order
15896 of increasing max_address. */
15897 HOST_WIDE_INT max_address;
15898 /* Similarly for an entry inserted for a backwards ref. */
15899 HOST_WIDE_INT min_address;
15900 /* The number of fixes referencing this entry. This can become zero
15901 if we "unpush" an entry. In this case we ignore the entry when we
15902 come to emit the code. */
15903 int refcount;
15904 /* The offset from the start of the minipool. */
15905 HOST_WIDE_INT offset;
15906 /* The value in table. */
15907 rtx value;
15908 /* The mode of value. */
15909 machine_mode mode;
15910 /* The size of the value. With iWMMXt enabled
15911 sizes > 4 also imply an alignment of 8-bytes. */
15912 int fix_size;
15913 };
15914
15915 struct minipool_fixup
15916 {
15917 Mfix * next;
15918 rtx_insn * insn;
15919 HOST_WIDE_INT address;
15920 rtx * loc;
15921 machine_mode mode;
15922 int fix_size;
15923 rtx value;
15924 Mnode * minipool;
15925 HOST_WIDE_INT forwards;
15926 HOST_WIDE_INT backwards;
15927 };
15928
15929 /* Fixes less than a word need padding out to a word boundary. */
15930 #define MINIPOOL_FIX_SIZE(mode) \
15931 (GET_MODE_SIZE ((mode)) >= 4 ? GET_MODE_SIZE ((mode)) : 4)
15932
15933 static Mnode * minipool_vector_head;
15934 static Mnode * minipool_vector_tail;
15935 static rtx_code_label *minipool_vector_label;
15936 static int minipool_pad;
15937
15938 /* The linked list of all minipool fixes required for this function. */
15939 Mfix * minipool_fix_head;
15940 Mfix * minipool_fix_tail;
15941 /* The fix entry for the current minipool, once it has been placed. */
15942 Mfix * minipool_barrier;
15943
15944 #ifndef JUMP_TABLES_IN_TEXT_SECTION
15945 #define JUMP_TABLES_IN_TEXT_SECTION 0
15946 #endif
15947
15948 static HOST_WIDE_INT
15949 get_jump_table_size (rtx_jump_table_data *insn)
15950 {
15951 /* ADDR_VECs only take room if read-only data does into the text
15952 section. */
15953 if (JUMP_TABLES_IN_TEXT_SECTION || readonly_data_section == text_section)
15954 {
15955 rtx body = PATTERN (insn);
15956 int elt = GET_CODE (body) == ADDR_DIFF_VEC ? 1 : 0;
15957 HOST_WIDE_INT size;
15958 HOST_WIDE_INT modesize;
15959
15960 modesize = GET_MODE_SIZE (GET_MODE (body));
15961 size = modesize * XVECLEN (body, elt);
15962 switch (modesize)
15963 {
15964 case 1:
15965 /* Round up size of TBB table to a halfword boundary. */
15966 size = (size + 1) & ~HOST_WIDE_INT_1;
15967 break;
15968 case 2:
15969 /* No padding necessary for TBH. */
15970 break;
15971 case 4:
15972 /* Add two bytes for alignment on Thumb. */
15973 if (TARGET_THUMB)
15974 size += 2;
15975 break;
15976 default:
15977 gcc_unreachable ();
15978 }
15979 return size;
15980 }
15981
15982 return 0;
15983 }
15984
15985 /* Return the maximum amount of padding that will be inserted before
15986 label LABEL. */
15987
15988 static HOST_WIDE_INT
15989 get_label_padding (rtx label)
15990 {
15991 HOST_WIDE_INT align, min_insn_size;
15992
15993 align = 1 << label_to_alignment (label).levels[0].log;
15994 min_insn_size = TARGET_THUMB ? 2 : 4;
15995 return align > min_insn_size ? align - min_insn_size : 0;
15996 }
15997
15998 /* Move a minipool fix MP from its current location to before MAX_MP.
15999 If MAX_MP is NULL, then MP doesn't need moving, but the addressing
16000 constraints may need updating. */
16001 static Mnode *
16002 move_minipool_fix_forward_ref (Mnode *mp, Mnode *max_mp,
16003 HOST_WIDE_INT max_address)
16004 {
16005 /* The code below assumes these are different. */
16006 gcc_assert (mp != max_mp);
16007
16008 if (max_mp == NULL)
16009 {
16010 if (max_address < mp->max_address)
16011 mp->max_address = max_address;
16012 }
16013 else
16014 {
16015 if (max_address > max_mp->max_address - mp->fix_size)
16016 mp->max_address = max_mp->max_address - mp->fix_size;
16017 else
16018 mp->max_address = max_address;
16019
16020 /* Unlink MP from its current position. Since max_mp is non-null,
16021 mp->prev must be non-null. */
16022 mp->prev->next = mp->next;
16023 if (mp->next != NULL)
16024 mp->next->prev = mp->prev;
16025 else
16026 minipool_vector_tail = mp->prev;
16027
16028 /* Re-insert it before MAX_MP. */
16029 mp->next = max_mp;
16030 mp->prev = max_mp->prev;
16031 max_mp->prev = mp;
16032
16033 if (mp->prev != NULL)
16034 mp->prev->next = mp;
16035 else
16036 minipool_vector_head = mp;
16037 }
16038
16039 /* Save the new entry. */
16040 max_mp = mp;
16041
16042 /* Scan over the preceding entries and adjust their addresses as
16043 required. */
16044 while (mp->prev != NULL
16045 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16046 {
16047 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16048 mp = mp->prev;
16049 }
16050
16051 return max_mp;
16052 }
16053
16054 /* Add a constant to the minipool for a forward reference. Returns the
16055 node added or NULL if the constant will not fit in this pool. */
16056 static Mnode *
16057 add_minipool_forward_ref (Mfix *fix)
16058 {
16059 /* If set, max_mp is the first pool_entry that has a lower
16060 constraint than the one we are trying to add. */
16061 Mnode * max_mp = NULL;
16062 HOST_WIDE_INT max_address = fix->address + fix->forwards - minipool_pad;
16063 Mnode * mp;
16064
16065 /* If the minipool starts before the end of FIX->INSN then this FIX
16066 cannot be placed into the current pool. Furthermore, adding the
16067 new constant pool entry may cause the pool to start FIX_SIZE bytes
16068 earlier. */
16069 if (minipool_vector_head &&
16070 (fix->address + get_attr_length (fix->insn)
16071 >= minipool_vector_head->max_address - fix->fix_size))
16072 return NULL;
16073
16074 /* Scan the pool to see if a constant with the same value has
16075 already been added. While we are doing this, also note the
16076 location where we must insert the constant if it doesn't already
16077 exist. */
16078 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16079 {
16080 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16081 && fix->mode == mp->mode
16082 && (!LABEL_P (fix->value)
16083 || (CODE_LABEL_NUMBER (fix->value)
16084 == CODE_LABEL_NUMBER (mp->value)))
16085 && rtx_equal_p (fix->value, mp->value))
16086 {
16087 /* More than one fix references this entry. */
16088 mp->refcount++;
16089 return move_minipool_fix_forward_ref (mp, max_mp, max_address);
16090 }
16091
16092 /* Note the insertion point if necessary. */
16093 if (max_mp == NULL
16094 && mp->max_address > max_address)
16095 max_mp = mp;
16096
16097 /* If we are inserting an 8-bytes aligned quantity and
16098 we have not already found an insertion point, then
16099 make sure that all such 8-byte aligned quantities are
16100 placed at the start of the pool. */
16101 if (ARM_DOUBLEWORD_ALIGN
16102 && max_mp == NULL
16103 && fix->fix_size >= 8
16104 && mp->fix_size < 8)
16105 {
16106 max_mp = mp;
16107 max_address = mp->max_address;
16108 }
16109 }
16110
16111 /* The value is not currently in the minipool, so we need to create
16112 a new entry for it. If MAX_MP is NULL, the entry will be put on
16113 the end of the list since the placement is less constrained than
16114 any existing entry. Otherwise, we insert the new fix before
16115 MAX_MP and, if necessary, adjust the constraints on the other
16116 entries. */
16117 mp = XNEW (Mnode);
16118 mp->fix_size = fix->fix_size;
16119 mp->mode = fix->mode;
16120 mp->value = fix->value;
16121 mp->refcount = 1;
16122 /* Not yet required for a backwards ref. */
16123 mp->min_address = -65536;
16124
16125 if (max_mp == NULL)
16126 {
16127 mp->max_address = max_address;
16128 mp->next = NULL;
16129 mp->prev = minipool_vector_tail;
16130
16131 if (mp->prev == NULL)
16132 {
16133 minipool_vector_head = mp;
16134 minipool_vector_label = gen_label_rtx ();
16135 }
16136 else
16137 mp->prev->next = mp;
16138
16139 minipool_vector_tail = mp;
16140 }
16141 else
16142 {
16143 if (max_address > max_mp->max_address - mp->fix_size)
16144 mp->max_address = max_mp->max_address - mp->fix_size;
16145 else
16146 mp->max_address = max_address;
16147
16148 mp->next = max_mp;
16149 mp->prev = max_mp->prev;
16150 max_mp->prev = mp;
16151 if (mp->prev != NULL)
16152 mp->prev->next = mp;
16153 else
16154 minipool_vector_head = mp;
16155 }
16156
16157 /* Save the new entry. */
16158 max_mp = mp;
16159
16160 /* Scan over the preceding entries and adjust their addresses as
16161 required. */
16162 while (mp->prev != NULL
16163 && mp->prev->max_address > mp->max_address - mp->prev->fix_size)
16164 {
16165 mp->prev->max_address = mp->max_address - mp->prev->fix_size;
16166 mp = mp->prev;
16167 }
16168
16169 return max_mp;
16170 }
16171
16172 static Mnode *
16173 move_minipool_fix_backward_ref (Mnode *mp, Mnode *min_mp,
16174 HOST_WIDE_INT min_address)
16175 {
16176 HOST_WIDE_INT offset;
16177
16178 /* The code below assumes these are different. */
16179 gcc_assert (mp != min_mp);
16180
16181 if (min_mp == NULL)
16182 {
16183 if (min_address > mp->min_address)
16184 mp->min_address = min_address;
16185 }
16186 else
16187 {
16188 /* We will adjust this below if it is too loose. */
16189 mp->min_address = min_address;
16190
16191 /* Unlink MP from its current position. Since min_mp is non-null,
16192 mp->next must be non-null. */
16193 mp->next->prev = mp->prev;
16194 if (mp->prev != NULL)
16195 mp->prev->next = mp->next;
16196 else
16197 minipool_vector_head = mp->next;
16198
16199 /* Reinsert it after MIN_MP. */
16200 mp->prev = min_mp;
16201 mp->next = min_mp->next;
16202 min_mp->next = mp;
16203 if (mp->next != NULL)
16204 mp->next->prev = mp;
16205 else
16206 minipool_vector_tail = mp;
16207 }
16208
16209 min_mp = mp;
16210
16211 offset = 0;
16212 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16213 {
16214 mp->offset = offset;
16215 if (mp->refcount > 0)
16216 offset += mp->fix_size;
16217
16218 if (mp->next && mp->next->min_address < mp->min_address + mp->fix_size)
16219 mp->next->min_address = mp->min_address + mp->fix_size;
16220 }
16221
16222 return min_mp;
16223 }
16224
16225 /* Add a constant to the minipool for a backward reference. Returns the
16226 node added or NULL if the constant will not fit in this pool.
16227
16228 Note that the code for insertion for a backwards reference can be
16229 somewhat confusing because the calculated offsets for each fix do
16230 not take into account the size of the pool (which is still under
16231 construction. */
16232 static Mnode *
16233 add_minipool_backward_ref (Mfix *fix)
16234 {
16235 /* If set, min_mp is the last pool_entry that has a lower constraint
16236 than the one we are trying to add. */
16237 Mnode *min_mp = NULL;
16238 /* This can be negative, since it is only a constraint. */
16239 HOST_WIDE_INT min_address = fix->address - fix->backwards;
16240 Mnode *mp;
16241
16242 /* If we can't reach the current pool from this insn, or if we can't
16243 insert this entry at the end of the pool without pushing other
16244 fixes out of range, then we don't try. This ensures that we
16245 can't fail later on. */
16246 if (min_address >= minipool_barrier->address
16247 || (minipool_vector_tail->min_address + fix->fix_size
16248 >= minipool_barrier->address))
16249 return NULL;
16250
16251 /* Scan the pool to see if a constant with the same value has
16252 already been added. While we are doing this, also note the
16253 location where we must insert the constant if it doesn't already
16254 exist. */
16255 for (mp = minipool_vector_tail; mp != NULL; mp = mp->prev)
16256 {
16257 if (GET_CODE (fix->value) == GET_CODE (mp->value)
16258 && fix->mode == mp->mode
16259 && (!LABEL_P (fix->value)
16260 || (CODE_LABEL_NUMBER (fix->value)
16261 == CODE_LABEL_NUMBER (mp->value)))
16262 && rtx_equal_p (fix->value, mp->value)
16263 /* Check that there is enough slack to move this entry to the
16264 end of the table (this is conservative). */
16265 && (mp->max_address
16266 > (minipool_barrier->address
16267 + minipool_vector_tail->offset
16268 + minipool_vector_tail->fix_size)))
16269 {
16270 mp->refcount++;
16271 return move_minipool_fix_backward_ref (mp, min_mp, min_address);
16272 }
16273
16274 if (min_mp != NULL)
16275 mp->min_address += fix->fix_size;
16276 else
16277 {
16278 /* Note the insertion point if necessary. */
16279 if (mp->min_address < min_address)
16280 {
16281 /* For now, we do not allow the insertion of 8-byte alignment
16282 requiring nodes anywhere but at the start of the pool. */
16283 if (ARM_DOUBLEWORD_ALIGN
16284 && fix->fix_size >= 8 && mp->fix_size < 8)
16285 return NULL;
16286 else
16287 min_mp = mp;
16288 }
16289 else if (mp->max_address
16290 < minipool_barrier->address + mp->offset + fix->fix_size)
16291 {
16292 /* Inserting before this entry would push the fix beyond
16293 its maximum address (which can happen if we have
16294 re-located a forwards fix); force the new fix to come
16295 after it. */
16296 if (ARM_DOUBLEWORD_ALIGN
16297 && fix->fix_size >= 8 && mp->fix_size < 8)
16298 return NULL;
16299 else
16300 {
16301 min_mp = mp;
16302 min_address = mp->min_address + fix->fix_size;
16303 }
16304 }
16305 /* Do not insert a non-8-byte aligned quantity before 8-byte
16306 aligned quantities. */
16307 else if (ARM_DOUBLEWORD_ALIGN
16308 && fix->fix_size < 8
16309 && mp->fix_size >= 8)
16310 {
16311 min_mp = mp;
16312 min_address = mp->min_address + fix->fix_size;
16313 }
16314 }
16315 }
16316
16317 /* We need to create a new entry. */
16318 mp = XNEW (Mnode);
16319 mp->fix_size = fix->fix_size;
16320 mp->mode = fix->mode;
16321 mp->value = fix->value;
16322 mp->refcount = 1;
16323 mp->max_address = minipool_barrier->address + 65536;
16324
16325 mp->min_address = min_address;
16326
16327 if (min_mp == NULL)
16328 {
16329 mp->prev = NULL;
16330 mp->next = minipool_vector_head;
16331
16332 if (mp->next == NULL)
16333 {
16334 minipool_vector_tail = mp;
16335 minipool_vector_label = gen_label_rtx ();
16336 }
16337 else
16338 mp->next->prev = mp;
16339
16340 minipool_vector_head = mp;
16341 }
16342 else
16343 {
16344 mp->next = min_mp->next;
16345 mp->prev = min_mp;
16346 min_mp->next = mp;
16347
16348 if (mp->next != NULL)
16349 mp->next->prev = mp;
16350 else
16351 minipool_vector_tail = mp;
16352 }
16353
16354 /* Save the new entry. */
16355 min_mp = mp;
16356
16357 if (mp->prev)
16358 mp = mp->prev;
16359 else
16360 mp->offset = 0;
16361
16362 /* Scan over the following entries and adjust their offsets. */
16363 while (mp->next != NULL)
16364 {
16365 if (mp->next->min_address < mp->min_address + mp->fix_size)
16366 mp->next->min_address = mp->min_address + mp->fix_size;
16367
16368 if (mp->refcount)
16369 mp->next->offset = mp->offset + mp->fix_size;
16370 else
16371 mp->next->offset = mp->offset;
16372
16373 mp = mp->next;
16374 }
16375
16376 return min_mp;
16377 }
16378
16379 static void
16380 assign_minipool_offsets (Mfix *barrier)
16381 {
16382 HOST_WIDE_INT offset = 0;
16383 Mnode *mp;
16384
16385 minipool_barrier = barrier;
16386
16387 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16388 {
16389 mp->offset = offset;
16390
16391 if (mp->refcount > 0)
16392 offset += mp->fix_size;
16393 }
16394 }
16395
16396 /* Output the literal table */
16397 static void
16398 dump_minipool (rtx_insn *scan)
16399 {
16400 Mnode * mp;
16401 Mnode * nmp;
16402 int align64 = 0;
16403
16404 if (ARM_DOUBLEWORD_ALIGN)
16405 for (mp = minipool_vector_head; mp != NULL; mp = mp->next)
16406 if (mp->refcount > 0 && mp->fix_size >= 8)
16407 {
16408 align64 = 1;
16409 break;
16410 }
16411
16412 if (dump_file)
16413 fprintf (dump_file,
16414 ";; Emitting minipool after insn %u; address %ld; align %d (bytes)\n",
16415 INSN_UID (scan), (unsigned long) minipool_barrier->address, align64 ? 8 : 4);
16416
16417 scan = emit_label_after (gen_label_rtx (), scan);
16418 scan = emit_insn_after (align64 ? gen_align_8 () : gen_align_4 (), scan);
16419 scan = emit_label_after (minipool_vector_label, scan);
16420
16421 for (mp = minipool_vector_head; mp != NULL; mp = nmp)
16422 {
16423 if (mp->refcount > 0)
16424 {
16425 if (dump_file)
16426 {
16427 fprintf (dump_file,
16428 ";; Offset %u, min %ld, max %ld ",
16429 (unsigned) mp->offset, (unsigned long) mp->min_address,
16430 (unsigned long) mp->max_address);
16431 arm_print_value (dump_file, mp->value);
16432 fputc ('\n', dump_file);
16433 }
16434
16435 rtx val = copy_rtx (mp->value);
16436
16437 switch (GET_MODE_SIZE (mp->mode))
16438 {
16439 #ifdef HAVE_consttable_1
16440 case 1:
16441 scan = emit_insn_after (gen_consttable_1 (val), scan);
16442 break;
16443
16444 #endif
16445 #ifdef HAVE_consttable_2
16446 case 2:
16447 scan = emit_insn_after (gen_consttable_2 (val), scan);
16448 break;
16449
16450 #endif
16451 #ifdef HAVE_consttable_4
16452 case 4:
16453 scan = emit_insn_after (gen_consttable_4 (val), scan);
16454 break;
16455
16456 #endif
16457 #ifdef HAVE_consttable_8
16458 case 8:
16459 scan = emit_insn_after (gen_consttable_8 (val), scan);
16460 break;
16461
16462 #endif
16463 #ifdef HAVE_consttable_16
16464 case 16:
16465 scan = emit_insn_after (gen_consttable_16 (val), scan);
16466 break;
16467
16468 #endif
16469 default:
16470 gcc_unreachable ();
16471 }
16472 }
16473
16474 nmp = mp->next;
16475 free (mp);
16476 }
16477
16478 minipool_vector_head = minipool_vector_tail = NULL;
16479 scan = emit_insn_after (gen_consttable_end (), scan);
16480 scan = emit_barrier_after (scan);
16481 }
16482
16483 /* Return the cost of forcibly inserting a barrier after INSN. */
16484 static int
16485 arm_barrier_cost (rtx_insn *insn)
16486 {
16487 /* Basing the location of the pool on the loop depth is preferable,
16488 but at the moment, the basic block information seems to be
16489 corrupt by this stage of the compilation. */
16490 int base_cost = 50;
16491 rtx_insn *next = next_nonnote_insn (insn);
16492
16493 if (next != NULL && LABEL_P (next))
16494 base_cost -= 20;
16495
16496 switch (GET_CODE (insn))
16497 {
16498 case CODE_LABEL:
16499 /* It will always be better to place the table before the label, rather
16500 than after it. */
16501 return 50;
16502
16503 case INSN:
16504 case CALL_INSN:
16505 return base_cost;
16506
16507 case JUMP_INSN:
16508 return base_cost - 10;
16509
16510 default:
16511 return base_cost + 10;
16512 }
16513 }
16514
16515 /* Find the best place in the insn stream in the range
16516 (FIX->address,MAX_ADDRESS) to forcibly insert a minipool barrier.
16517 Create the barrier by inserting a jump and add a new fix entry for
16518 it. */
16519 static Mfix *
16520 create_fix_barrier (Mfix *fix, HOST_WIDE_INT max_address)
16521 {
16522 HOST_WIDE_INT count = 0;
16523 rtx_barrier *barrier;
16524 rtx_insn *from = fix->insn;
16525 /* The instruction after which we will insert the jump. */
16526 rtx_insn *selected = NULL;
16527 int selected_cost;
16528 /* The address at which the jump instruction will be placed. */
16529 HOST_WIDE_INT selected_address;
16530 Mfix * new_fix;
16531 HOST_WIDE_INT max_count = max_address - fix->address;
16532 rtx_code_label *label = gen_label_rtx ();
16533
16534 selected_cost = arm_barrier_cost (from);
16535 selected_address = fix->address;
16536
16537 while (from && count < max_count)
16538 {
16539 rtx_jump_table_data *tmp;
16540 int new_cost;
16541
16542 /* This code shouldn't have been called if there was a natural barrier
16543 within range. */
16544 gcc_assert (!BARRIER_P (from));
16545
16546 /* Count the length of this insn. This must stay in sync with the
16547 code that pushes minipool fixes. */
16548 if (LABEL_P (from))
16549 count += get_label_padding (from);
16550 else
16551 count += get_attr_length (from);
16552
16553 /* If there is a jump table, add its length. */
16554 if (tablejump_p (from, NULL, &tmp))
16555 {
16556 count += get_jump_table_size (tmp);
16557
16558 /* Jump tables aren't in a basic block, so base the cost on
16559 the dispatch insn. If we select this location, we will
16560 still put the pool after the table. */
16561 new_cost = arm_barrier_cost (from);
16562
16563 if (count < max_count
16564 && (!selected || new_cost <= selected_cost))
16565 {
16566 selected = tmp;
16567 selected_cost = new_cost;
16568 selected_address = fix->address + count;
16569 }
16570
16571 /* Continue after the dispatch table. */
16572 from = NEXT_INSN (tmp);
16573 continue;
16574 }
16575
16576 new_cost = arm_barrier_cost (from);
16577
16578 if (count < max_count
16579 && (!selected || new_cost <= selected_cost))
16580 {
16581 selected = from;
16582 selected_cost = new_cost;
16583 selected_address = fix->address + count;
16584 }
16585
16586 from = NEXT_INSN (from);
16587 }
16588
16589 /* Make sure that we found a place to insert the jump. */
16590 gcc_assert (selected);
16591
16592 /* Create a new JUMP_INSN that branches around a barrier. */
16593 from = emit_jump_insn_after (gen_jump (label), selected);
16594 JUMP_LABEL (from) = label;
16595 barrier = emit_barrier_after (from);
16596 emit_label_after (label, barrier);
16597
16598 /* Create a minipool barrier entry for the new barrier. */
16599 new_fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* new_fix));
16600 new_fix->insn = barrier;
16601 new_fix->address = selected_address;
16602 new_fix->next = fix->next;
16603 fix->next = new_fix;
16604
16605 return new_fix;
16606 }
16607
16608 /* Record that there is a natural barrier in the insn stream at
16609 ADDRESS. */
16610 static void
16611 push_minipool_barrier (rtx_insn *insn, HOST_WIDE_INT address)
16612 {
16613 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16614
16615 fix->insn = insn;
16616 fix->address = address;
16617
16618 fix->next = NULL;
16619 if (minipool_fix_head != NULL)
16620 minipool_fix_tail->next = fix;
16621 else
16622 minipool_fix_head = fix;
16623
16624 minipool_fix_tail = fix;
16625 }
16626
16627 /* Record INSN, which will need fixing up to load a value from the
16628 minipool. ADDRESS is the offset of the insn since the start of the
16629 function; LOC is a pointer to the part of the insn which requires
16630 fixing; VALUE is the constant that must be loaded, which is of type
16631 MODE. */
16632 static void
16633 push_minipool_fix (rtx_insn *insn, HOST_WIDE_INT address, rtx *loc,
16634 machine_mode mode, rtx value)
16635 {
16636 gcc_assert (!arm_disable_literal_pool);
16637 Mfix * fix = (Mfix *) obstack_alloc (&minipool_obstack, sizeof (* fix));
16638
16639 fix->insn = insn;
16640 fix->address = address;
16641 fix->loc = loc;
16642 fix->mode = mode;
16643 fix->fix_size = MINIPOOL_FIX_SIZE (mode);
16644 fix->value = value;
16645 fix->forwards = get_attr_pool_range (insn);
16646 fix->backwards = get_attr_neg_pool_range (insn);
16647 fix->minipool = NULL;
16648
16649 /* If an insn doesn't have a range defined for it, then it isn't
16650 expecting to be reworked by this code. Better to stop now than
16651 to generate duff assembly code. */
16652 gcc_assert (fix->forwards || fix->backwards);
16653
16654 /* If an entry requires 8-byte alignment then assume all constant pools
16655 require 4 bytes of padding. Trying to do this later on a per-pool
16656 basis is awkward because existing pool entries have to be modified. */
16657 if (ARM_DOUBLEWORD_ALIGN && fix->fix_size >= 8)
16658 minipool_pad = 4;
16659
16660 if (dump_file)
16661 {
16662 fprintf (dump_file,
16663 ";; %smode fixup for i%d; addr %lu, range (%ld,%ld): ",
16664 GET_MODE_NAME (mode),
16665 INSN_UID (insn), (unsigned long) address,
16666 -1 * (long)fix->backwards, (long)fix->forwards);
16667 arm_print_value (dump_file, fix->value);
16668 fprintf (dump_file, "\n");
16669 }
16670
16671 /* Add it to the chain of fixes. */
16672 fix->next = NULL;
16673
16674 if (minipool_fix_head != NULL)
16675 minipool_fix_tail->next = fix;
16676 else
16677 minipool_fix_head = fix;
16678
16679 minipool_fix_tail = fix;
16680 }
16681
16682 /* Return maximum allowed cost of synthesizing a 64-bit constant VAL inline.
16683 Returns the number of insns needed, or 99 if we always want to synthesize
16684 the value. */
16685 int
16686 arm_max_const_double_inline_cost ()
16687 {
16688 return ((optimize_size || arm_ld_sched) ? 3 : 4);
16689 }
16690
16691 /* Return the cost of synthesizing a 64-bit constant VAL inline.
16692 Returns the number of insns needed, or 99 if we don't know how to
16693 do it. */
16694 int
16695 arm_const_double_inline_cost (rtx val)
16696 {
16697 rtx lowpart, highpart;
16698 machine_mode mode;
16699
16700 mode = GET_MODE (val);
16701
16702 if (mode == VOIDmode)
16703 mode = DImode;
16704
16705 gcc_assert (GET_MODE_SIZE (mode) == 8);
16706
16707 lowpart = gen_lowpart (SImode, val);
16708 highpart = gen_highpart_mode (SImode, mode, val);
16709
16710 gcc_assert (CONST_INT_P (lowpart));
16711 gcc_assert (CONST_INT_P (highpart));
16712
16713 return (arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (lowpart),
16714 NULL_RTX, NULL_RTX, 0, 0)
16715 + arm_gen_constant (SET, SImode, NULL_RTX, INTVAL (highpart),
16716 NULL_RTX, NULL_RTX, 0, 0));
16717 }
16718
16719 /* Cost of loading a SImode constant. */
16720 static inline int
16721 arm_const_inline_cost (enum rtx_code code, rtx val)
16722 {
16723 return arm_gen_constant (code, SImode, NULL_RTX, INTVAL (val),
16724 NULL_RTX, NULL_RTX, 1, 0);
16725 }
16726
16727 /* Return true if it is worthwhile to split a 64-bit constant into two
16728 32-bit operations. This is the case if optimizing for size, or
16729 if we have load delay slots, or if one 32-bit part can be done with
16730 a single data operation. */
16731 bool
16732 arm_const_double_by_parts (rtx val)
16733 {
16734 machine_mode mode = GET_MODE (val);
16735 rtx part;
16736
16737 if (optimize_size || arm_ld_sched)
16738 return true;
16739
16740 if (mode == VOIDmode)
16741 mode = DImode;
16742
16743 part = gen_highpart_mode (SImode, mode, val);
16744
16745 gcc_assert (CONST_INT_P (part));
16746
16747 if (const_ok_for_arm (INTVAL (part))
16748 || const_ok_for_arm (~INTVAL (part)))
16749 return true;
16750
16751 part = gen_lowpart (SImode, val);
16752
16753 gcc_assert (CONST_INT_P (part));
16754
16755 if (const_ok_for_arm (INTVAL (part))
16756 || const_ok_for_arm (~INTVAL (part)))
16757 return true;
16758
16759 return false;
16760 }
16761
16762 /* Return true if it is possible to inline both the high and low parts
16763 of a 64-bit constant into 32-bit data processing instructions. */
16764 bool
16765 arm_const_double_by_immediates (rtx val)
16766 {
16767 machine_mode mode = GET_MODE (val);
16768 rtx part;
16769
16770 if (mode == VOIDmode)
16771 mode = DImode;
16772
16773 part = gen_highpart_mode (SImode, mode, val);
16774
16775 gcc_assert (CONST_INT_P (part));
16776
16777 if (!const_ok_for_arm (INTVAL (part)))
16778 return false;
16779
16780 part = gen_lowpart (SImode, val);
16781
16782 gcc_assert (CONST_INT_P (part));
16783
16784 if (!const_ok_for_arm (INTVAL (part)))
16785 return false;
16786
16787 return true;
16788 }
16789
16790 /* Scan INSN and note any of its operands that need fixing.
16791 If DO_PUSHES is false we do not actually push any of the fixups
16792 needed. */
16793 static void
16794 note_invalid_constants (rtx_insn *insn, HOST_WIDE_INT address, int do_pushes)
16795 {
16796 int opno;
16797
16798 extract_constrain_insn (insn);
16799
16800 if (recog_data.n_alternatives == 0)
16801 return;
16802
16803 /* Fill in recog_op_alt with information about the constraints of
16804 this insn. */
16805 preprocess_constraints (insn);
16806
16807 const operand_alternative *op_alt = which_op_alt ();
16808 for (opno = 0; opno < recog_data.n_operands; opno++)
16809 {
16810 /* Things we need to fix can only occur in inputs. */
16811 if (recog_data.operand_type[opno] != OP_IN)
16812 continue;
16813
16814 /* If this alternative is a memory reference, then any mention
16815 of constants in this alternative is really to fool reload
16816 into allowing us to accept one there. We need to fix them up
16817 now so that we output the right code. */
16818 if (op_alt[opno].memory_ok)
16819 {
16820 rtx op = recog_data.operand[opno];
16821
16822 if (CONSTANT_P (op))
16823 {
16824 if (do_pushes)
16825 push_minipool_fix (insn, address, recog_data.operand_loc[opno],
16826 recog_data.operand_mode[opno], op);
16827 }
16828 else if (MEM_P (op)
16829 && GET_CODE (XEXP (op, 0)) == SYMBOL_REF
16830 && CONSTANT_POOL_ADDRESS_P (XEXP (op, 0)))
16831 {
16832 if (do_pushes)
16833 {
16834 rtx cop = avoid_constant_pool_reference (op);
16835
16836 /* Casting the address of something to a mode narrower
16837 than a word can cause avoid_constant_pool_reference()
16838 to return the pool reference itself. That's no good to
16839 us here. Lets just hope that we can use the
16840 constant pool value directly. */
16841 if (op == cop)
16842 cop = get_pool_constant (XEXP (op, 0));
16843
16844 push_minipool_fix (insn, address,
16845 recog_data.operand_loc[opno],
16846 recog_data.operand_mode[opno], cop);
16847 }
16848
16849 }
16850 }
16851 }
16852
16853 return;
16854 }
16855
16856 /* This function computes the clear mask and PADDING_BITS_TO_CLEAR for structs
16857 and unions in the context of ARMv8-M Security Extensions. It is used as a
16858 helper function for both 'cmse_nonsecure_call' and 'cmse_nonsecure_entry'
16859 functions. The PADDING_BITS_TO_CLEAR pointer can be the base to either one
16860 or four masks, depending on whether it is being computed for a
16861 'cmse_nonsecure_entry' return value or a 'cmse_nonsecure_call' argument
16862 respectively. The tree for the type of the argument or a field within an
16863 argument is passed in ARG_TYPE, the current register this argument or field
16864 starts in is kept in the pointer REGNO and updated accordingly, the bit this
16865 argument or field starts at is passed in STARTING_BIT and the last used bit
16866 is kept in LAST_USED_BIT which is also updated accordingly. */
16867
16868 static unsigned HOST_WIDE_INT
16869 comp_not_to_clear_mask_str_un (tree arg_type, int * regno,
16870 uint32_t * padding_bits_to_clear,
16871 unsigned starting_bit, int * last_used_bit)
16872
16873 {
16874 unsigned HOST_WIDE_INT not_to_clear_reg_mask = 0;
16875
16876 if (TREE_CODE (arg_type) == RECORD_TYPE)
16877 {
16878 unsigned current_bit = starting_bit;
16879 tree field;
16880 long int offset, size;
16881
16882
16883 field = TYPE_FIELDS (arg_type);
16884 while (field)
16885 {
16886 /* The offset within a structure is always an offset from
16887 the start of that structure. Make sure we take that into the
16888 calculation of the register based offset that we use here. */
16889 offset = starting_bit;
16890 offset += TREE_INT_CST_ELT (DECL_FIELD_BIT_OFFSET (field), 0);
16891 offset %= 32;
16892
16893 /* This is the actual size of the field, for bitfields this is the
16894 bitfield width and not the container size. */
16895 size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16896
16897 if (*last_used_bit != offset)
16898 {
16899 if (offset < *last_used_bit)
16900 {
16901 /* This field's offset is before the 'last_used_bit', that
16902 means this field goes on the next register. So we need to
16903 pad the rest of the current register and increase the
16904 register number. */
16905 uint32_t mask;
16906 mask = ((uint32_t)-1) - ((uint32_t) 1 << *last_used_bit);
16907 mask++;
16908
16909 padding_bits_to_clear[*regno] |= mask;
16910 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16911 (*regno)++;
16912 }
16913 else
16914 {
16915 /* Otherwise we pad the bits between the last field's end and
16916 the start of the new field. */
16917 uint32_t mask;
16918
16919 mask = ((uint32_t)-1) >> (32 - offset);
16920 mask -= ((uint32_t) 1 << *last_used_bit) - 1;
16921 padding_bits_to_clear[*regno] |= mask;
16922 }
16923 current_bit = offset;
16924 }
16925
16926 /* Calculate further padding bits for inner structs/unions too. */
16927 if (RECORD_OR_UNION_TYPE_P (TREE_TYPE (field)))
16928 {
16929 *last_used_bit = current_bit;
16930 not_to_clear_reg_mask
16931 |= comp_not_to_clear_mask_str_un (TREE_TYPE (field), regno,
16932 padding_bits_to_clear, offset,
16933 last_used_bit);
16934 }
16935 else
16936 {
16937 /* Update 'current_bit' with this field's size. If the
16938 'current_bit' lies in a subsequent register, update 'regno' and
16939 reset 'current_bit' to point to the current bit in that new
16940 register. */
16941 current_bit += size;
16942 while (current_bit >= 32)
16943 {
16944 current_bit-=32;
16945 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16946 (*regno)++;
16947 }
16948 *last_used_bit = current_bit;
16949 }
16950
16951 field = TREE_CHAIN (field);
16952 }
16953 not_to_clear_reg_mask |= HOST_WIDE_INT_1U << *regno;
16954 }
16955 else if (TREE_CODE (arg_type) == UNION_TYPE)
16956 {
16957 tree field, field_t;
16958 int i, regno_t, field_size;
16959 int max_reg = -1;
16960 int max_bit = -1;
16961 uint32_t mask;
16962 uint32_t padding_bits_to_clear_res[NUM_ARG_REGS]
16963 = {-1, -1, -1, -1};
16964
16965 /* To compute the padding bits in a union we only consider bits as
16966 padding bits if they are always either a padding bit or fall outside a
16967 fields size for all fields in the union. */
16968 field = TYPE_FIELDS (arg_type);
16969 while (field)
16970 {
16971 uint32_t padding_bits_to_clear_t[NUM_ARG_REGS]
16972 = {0U, 0U, 0U, 0U};
16973 int last_used_bit_t = *last_used_bit;
16974 regno_t = *regno;
16975 field_t = TREE_TYPE (field);
16976
16977 /* If the field's type is either a record or a union make sure to
16978 compute their padding bits too. */
16979 if (RECORD_OR_UNION_TYPE_P (field_t))
16980 not_to_clear_reg_mask
16981 |= comp_not_to_clear_mask_str_un (field_t, &regno_t,
16982 &padding_bits_to_clear_t[0],
16983 starting_bit, &last_used_bit_t);
16984 else
16985 {
16986 field_size = TREE_INT_CST_ELT (DECL_SIZE (field), 0);
16987 regno_t = (field_size / 32) + *regno;
16988 last_used_bit_t = (starting_bit + field_size) % 32;
16989 }
16990
16991 for (i = *regno; i < regno_t; i++)
16992 {
16993 /* For all but the last register used by this field only keep the
16994 padding bits that were padding bits in this field. */
16995 padding_bits_to_clear_res[i] &= padding_bits_to_clear_t[i];
16996 }
16997
16998 /* For the last register, keep all padding bits that were padding
16999 bits in this field and any padding bits that are still valid
17000 as padding bits but fall outside of this field's size. */
17001 mask = (((uint32_t) -1) - ((uint32_t) 1 << last_used_bit_t)) + 1;
17002 padding_bits_to_clear_res[regno_t]
17003 &= padding_bits_to_clear_t[regno_t] | mask;
17004
17005 /* Update the maximum size of the fields in terms of registers used
17006 ('max_reg') and the 'last_used_bit' in said register. */
17007 if (max_reg < regno_t)
17008 {
17009 max_reg = regno_t;
17010 max_bit = last_used_bit_t;
17011 }
17012 else if (max_reg == regno_t && max_bit < last_used_bit_t)
17013 max_bit = last_used_bit_t;
17014
17015 field = TREE_CHAIN (field);
17016 }
17017
17018 /* Update the current padding_bits_to_clear using the intersection of the
17019 padding bits of all the fields. */
17020 for (i=*regno; i < max_reg; i++)
17021 padding_bits_to_clear[i] |= padding_bits_to_clear_res[i];
17022
17023 /* Do not keep trailing padding bits, we do not know yet whether this
17024 is the end of the argument. */
17025 mask = ((uint32_t) 1 << max_bit) - 1;
17026 padding_bits_to_clear[max_reg]
17027 |= padding_bits_to_clear_res[max_reg] & mask;
17028
17029 *regno = max_reg;
17030 *last_used_bit = max_bit;
17031 }
17032 else
17033 /* This function should only be used for structs and unions. */
17034 gcc_unreachable ();
17035
17036 return not_to_clear_reg_mask;
17037 }
17038
17039 /* In the context of ARMv8-M Security Extensions, this function is used for both
17040 'cmse_nonsecure_call' and 'cmse_nonsecure_entry' functions to compute what
17041 registers are used when returning or passing arguments, which is then
17042 returned as a mask. It will also compute a mask to indicate padding/unused
17043 bits for each of these registers, and passes this through the
17044 PADDING_BITS_TO_CLEAR pointer. The tree of the argument type is passed in
17045 ARG_TYPE, the rtl representation of the argument is passed in ARG_RTX and
17046 the starting register used to pass this argument or return value is passed
17047 in REGNO. It makes use of 'comp_not_to_clear_mask_str_un' to compute these
17048 for struct and union types. */
17049
17050 static unsigned HOST_WIDE_INT
17051 compute_not_to_clear_mask (tree arg_type, rtx arg_rtx, int regno,
17052 uint32_t * padding_bits_to_clear)
17053
17054 {
17055 int last_used_bit = 0;
17056 unsigned HOST_WIDE_INT not_to_clear_mask;
17057
17058 if (RECORD_OR_UNION_TYPE_P (arg_type))
17059 {
17060 not_to_clear_mask
17061 = comp_not_to_clear_mask_str_un (arg_type, &regno,
17062 padding_bits_to_clear, 0,
17063 &last_used_bit);
17064
17065
17066 /* If the 'last_used_bit' is not zero, that means we are still using a
17067 part of the last 'regno'. In such cases we must clear the trailing
17068 bits. Otherwise we are not using regno and we should mark it as to
17069 clear. */
17070 if (last_used_bit != 0)
17071 padding_bits_to_clear[regno]
17072 |= ((uint32_t)-1) - ((uint32_t) 1 << last_used_bit) + 1;
17073 else
17074 not_to_clear_mask &= ~(HOST_WIDE_INT_1U << regno);
17075 }
17076 else
17077 {
17078 not_to_clear_mask = 0;
17079 /* We are not dealing with structs nor unions. So these arguments may be
17080 passed in floating point registers too. In some cases a BLKmode is
17081 used when returning or passing arguments in multiple VFP registers. */
17082 if (GET_MODE (arg_rtx) == BLKmode)
17083 {
17084 int i, arg_regs;
17085 rtx reg;
17086
17087 /* This should really only occur when dealing with the hard-float
17088 ABI. */
17089 gcc_assert (TARGET_HARD_FLOAT_ABI);
17090
17091 for (i = 0; i < XVECLEN (arg_rtx, 0); i++)
17092 {
17093 reg = XEXP (XVECEXP (arg_rtx, 0, i), 0);
17094 gcc_assert (REG_P (reg));
17095
17096 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (reg);
17097
17098 /* If we are dealing with DF mode, make sure we don't
17099 clear either of the registers it addresses. */
17100 arg_regs = ARM_NUM_REGS (GET_MODE (reg));
17101 if (arg_regs > 1)
17102 {
17103 unsigned HOST_WIDE_INT mask;
17104 mask = HOST_WIDE_INT_1U << (REGNO (reg) + arg_regs);
17105 mask -= HOST_WIDE_INT_1U << REGNO (reg);
17106 not_to_clear_mask |= mask;
17107 }
17108 }
17109 }
17110 else
17111 {
17112 /* Otherwise we can rely on the MODE to determine how many registers
17113 are being used by this argument. */
17114 int arg_regs = ARM_NUM_REGS (GET_MODE (arg_rtx));
17115 not_to_clear_mask |= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17116 if (arg_regs > 1)
17117 {
17118 unsigned HOST_WIDE_INT
17119 mask = HOST_WIDE_INT_1U << (REGNO (arg_rtx) + arg_regs);
17120 mask -= HOST_WIDE_INT_1U << REGNO (arg_rtx);
17121 not_to_clear_mask |= mask;
17122 }
17123 }
17124 }
17125
17126 return not_to_clear_mask;
17127 }
17128
17129 /* Clear registers secret before doing a cmse_nonsecure_call or returning from
17130 a cmse_nonsecure_entry function. TO_CLEAR_BITMAP indicates which registers
17131 are to be fully cleared, using the value in register CLEARING_REG if more
17132 efficient. The PADDING_BITS_LEN entries array PADDING_BITS_TO_CLEAR gives
17133 the bits that needs to be cleared in caller-saved core registers, with
17134 SCRATCH_REG used as a scratch register for that clearing.
17135
17136 NOTE: one of three following assertions must hold:
17137 - SCRATCH_REG is a low register
17138 - CLEARING_REG is in the set of registers fully cleared (ie. its bit is set
17139 in TO_CLEAR_BITMAP)
17140 - CLEARING_REG is a low register. */
17141
17142 static void
17143 cmse_clear_registers (sbitmap to_clear_bitmap, uint32_t *padding_bits_to_clear,
17144 int padding_bits_len, rtx scratch_reg, rtx clearing_reg)
17145 {
17146 bool saved_clearing = false;
17147 rtx saved_clearing_reg = NULL_RTX;
17148 int i, regno, clearing_regno, minregno = R0_REGNUM, maxregno = minregno - 1;
17149
17150 gcc_assert (arm_arch_cmse);
17151
17152 if (!bitmap_empty_p (to_clear_bitmap))
17153 {
17154 minregno = bitmap_first_set_bit (to_clear_bitmap);
17155 maxregno = bitmap_last_set_bit (to_clear_bitmap);
17156 }
17157 clearing_regno = REGNO (clearing_reg);
17158
17159 /* Clear padding bits. */
17160 gcc_assert (padding_bits_len <= NUM_ARG_REGS);
17161 for (i = 0, regno = R0_REGNUM; i < padding_bits_len; i++, regno++)
17162 {
17163 uint64_t mask;
17164 rtx rtx16, dest, cleared_reg = gen_rtx_REG (SImode, regno);
17165
17166 if (padding_bits_to_clear[i] == 0)
17167 continue;
17168
17169 /* If this is a Thumb-1 target and SCRATCH_REG is not a low register, use
17170 CLEARING_REG as scratch. */
17171 if (TARGET_THUMB1
17172 && REGNO (scratch_reg) > LAST_LO_REGNUM)
17173 {
17174 /* clearing_reg is not to be cleared, copy its value into scratch_reg
17175 such that we can use clearing_reg to clear the unused bits in the
17176 arguments. */
17177 if ((clearing_regno > maxregno
17178 || !bitmap_bit_p (to_clear_bitmap, clearing_regno))
17179 && !saved_clearing)
17180 {
17181 gcc_assert (clearing_regno <= LAST_LO_REGNUM);
17182 emit_move_insn (scratch_reg, clearing_reg);
17183 saved_clearing = true;
17184 saved_clearing_reg = scratch_reg;
17185 }
17186 scratch_reg = clearing_reg;
17187 }
17188
17189 /* Fill the lower half of the negated padding_bits_to_clear[i]. */
17190 mask = (~padding_bits_to_clear[i]) & 0xFFFF;
17191 emit_move_insn (scratch_reg, gen_int_mode (mask, SImode));
17192
17193 /* Fill the top half of the negated padding_bits_to_clear[i]. */
17194 mask = (~padding_bits_to_clear[i]) >> 16;
17195 rtx16 = gen_int_mode (16, SImode);
17196 dest = gen_rtx_ZERO_EXTRACT (SImode, scratch_reg, rtx16, rtx16);
17197 if (mask)
17198 emit_insn (gen_rtx_SET (dest, gen_int_mode (mask, SImode)));
17199
17200 emit_insn (gen_andsi3 (cleared_reg, cleared_reg, scratch_reg));
17201 }
17202 if (saved_clearing)
17203 emit_move_insn (clearing_reg, saved_clearing_reg);
17204
17205
17206 /* Clear full registers. */
17207
17208 /* If not marked for clearing, clearing_reg already does not contain
17209 any secret. */
17210 if (clearing_regno <= maxregno
17211 && bitmap_bit_p (to_clear_bitmap, clearing_regno))
17212 {
17213 emit_move_insn (clearing_reg, const0_rtx);
17214 emit_use (clearing_reg);
17215 bitmap_clear_bit (to_clear_bitmap, clearing_regno);
17216 }
17217
17218 for (regno = minregno; regno <= maxregno; regno++)
17219 {
17220 if (!bitmap_bit_p (to_clear_bitmap, regno))
17221 continue;
17222
17223 if (IS_VFP_REGNUM (regno))
17224 {
17225 /* If regno is an even vfp register and its successor is also to
17226 be cleared, use vmov. */
17227 if (TARGET_VFP_DOUBLE
17228 && VFP_REGNO_OK_FOR_DOUBLE (regno)
17229 && bitmap_bit_p (to_clear_bitmap, regno + 1))
17230 {
17231 emit_move_insn (gen_rtx_REG (DFmode, regno),
17232 CONST1_RTX (DFmode));
17233 emit_use (gen_rtx_REG (DFmode, regno));
17234 regno++;
17235 }
17236 else
17237 {
17238 emit_move_insn (gen_rtx_REG (SFmode, regno),
17239 CONST1_RTX (SFmode));
17240 emit_use (gen_rtx_REG (SFmode, regno));
17241 }
17242 }
17243 else
17244 {
17245 emit_move_insn (gen_rtx_REG (SImode, regno), clearing_reg);
17246 emit_use (gen_rtx_REG (SImode, regno));
17247 }
17248 }
17249 }
17250
17251 /* Clears caller saved registers not used to pass arguments before a
17252 cmse_nonsecure_call. Saving, clearing and restoring of callee saved
17253 registers is done in __gnu_cmse_nonsecure_call libcall.
17254 See libgcc/config/arm/cmse_nonsecure_call.S. */
17255
17256 static void
17257 cmse_nonsecure_call_clear_caller_saved (void)
17258 {
17259 basic_block bb;
17260
17261 FOR_EACH_BB_FN (bb, cfun)
17262 {
17263 rtx_insn *insn;
17264
17265 FOR_BB_INSNS (bb, insn)
17266 {
17267 unsigned address_regnum, regno, maxregno =
17268 TARGET_HARD_FLOAT_ABI ? D7_VFP_REGNUM : NUM_ARG_REGS - 1;
17269 auto_sbitmap to_clear_bitmap (maxregno + 1);
17270 rtx_insn *seq;
17271 rtx pat, call, unspec, clearing_reg, ip_reg, shift;
17272 rtx address;
17273 CUMULATIVE_ARGS args_so_far_v;
17274 cumulative_args_t args_so_far;
17275 tree arg_type, fntype;
17276 bool first_param = true;
17277 function_args_iterator args_iter;
17278 uint32_t padding_bits_to_clear[4] = {0U, 0U, 0U, 0U};
17279
17280 if (!NONDEBUG_INSN_P (insn))
17281 continue;
17282
17283 if (!CALL_P (insn))
17284 continue;
17285
17286 pat = PATTERN (insn);
17287 gcc_assert (GET_CODE (pat) == PARALLEL && XVECLEN (pat, 0) > 0);
17288 call = XVECEXP (pat, 0, 0);
17289
17290 /* Get the real call RTX if the insn sets a value, ie. returns. */
17291 if (GET_CODE (call) == SET)
17292 call = SET_SRC (call);
17293
17294 /* Check if it is a cmse_nonsecure_call. */
17295 unspec = XEXP (call, 0);
17296 if (GET_CODE (unspec) != UNSPEC
17297 || XINT (unspec, 1) != UNSPEC_NONSECURE_MEM)
17298 continue;
17299
17300 /* Determine the caller-saved registers we need to clear. */
17301 bitmap_clear (to_clear_bitmap);
17302 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
17303
17304 /* Only look at the caller-saved floating point registers in case of
17305 -mfloat-abi=hard. For -mfloat-abi=softfp we will be using the
17306 lazy store and loads which clear both caller- and callee-saved
17307 registers. */
17308 if (TARGET_HARD_FLOAT_ABI)
17309 {
17310 auto_sbitmap float_bitmap (maxregno + 1);
17311
17312 bitmap_clear (float_bitmap);
17313 bitmap_set_range (float_bitmap, FIRST_VFP_REGNUM,
17314 D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1);
17315 bitmap_ior (to_clear_bitmap, to_clear_bitmap, float_bitmap);
17316 }
17317
17318 /* Make sure the register used to hold the function address is not
17319 cleared. */
17320 address = RTVEC_ELT (XVEC (unspec, 0), 0);
17321 gcc_assert (MEM_P (address));
17322 gcc_assert (REG_P (XEXP (address, 0)));
17323 address_regnum = REGNO (XEXP (address, 0));
17324 if (address_regnum < R0_REGNUM + NUM_ARG_REGS)
17325 bitmap_clear_bit (to_clear_bitmap, address_regnum);
17326
17327 /* Set basic block of call insn so that df rescan is performed on
17328 insns inserted here. */
17329 set_block_for_insn (insn, bb);
17330 df_set_flags (DF_DEFER_INSN_RESCAN);
17331 start_sequence ();
17332
17333 /* Make sure the scheduler doesn't schedule other insns beyond
17334 here. */
17335 emit_insn (gen_blockage ());
17336
17337 /* Walk through all arguments and clear registers appropriately.
17338 */
17339 fntype = TREE_TYPE (MEM_EXPR (address));
17340 arm_init_cumulative_args (&args_so_far_v, fntype, NULL_RTX,
17341 NULL_TREE);
17342 args_so_far = pack_cumulative_args (&args_so_far_v);
17343 FOREACH_FUNCTION_ARGS (fntype, arg_type, args_iter)
17344 {
17345 rtx arg_rtx;
17346 uint64_t to_clear_args_mask;
17347 machine_mode arg_mode = TYPE_MODE (arg_type);
17348
17349 if (VOID_TYPE_P (arg_type))
17350 continue;
17351
17352 if (!first_param)
17353 arm_function_arg_advance (args_so_far, arg_mode, arg_type,
17354 true);
17355
17356 arg_rtx = arm_function_arg (args_so_far, arg_mode, arg_type,
17357 true);
17358 gcc_assert (REG_P (arg_rtx));
17359 to_clear_args_mask
17360 = compute_not_to_clear_mask (arg_type, arg_rtx,
17361 REGNO (arg_rtx),
17362 &padding_bits_to_clear[0]);
17363 if (to_clear_args_mask)
17364 {
17365 for (regno = R0_REGNUM; regno <= maxregno; regno++)
17366 {
17367 if (to_clear_args_mask & (1ULL << regno))
17368 bitmap_clear_bit (to_clear_bitmap, regno);
17369 }
17370 }
17371
17372 first_param = false;
17373 }
17374
17375 /* We use right shift and left shift to clear the LSB of the address
17376 we jump to instead of using bic, to avoid having to use an extra
17377 register on Thumb-1. */
17378 clearing_reg = XEXP (address, 0);
17379 shift = gen_rtx_LSHIFTRT (SImode, clearing_reg, const1_rtx);
17380 emit_insn (gen_rtx_SET (clearing_reg, shift));
17381 shift = gen_rtx_ASHIFT (SImode, clearing_reg, const1_rtx);
17382 emit_insn (gen_rtx_SET (clearing_reg, shift));
17383
17384 /* Clear caller-saved registers that leak before doing a non-secure
17385 call. */
17386 ip_reg = gen_rtx_REG (SImode, IP_REGNUM);
17387 cmse_clear_registers (to_clear_bitmap, padding_bits_to_clear,
17388 NUM_ARG_REGS, ip_reg, clearing_reg);
17389
17390 seq = get_insns ();
17391 end_sequence ();
17392 emit_insn_before (seq, insn);
17393 }
17394 }
17395 }
17396
17397 /* Rewrite move insn into subtract of 0 if the condition codes will
17398 be useful in next conditional jump insn. */
17399
17400 static void
17401 thumb1_reorg (void)
17402 {
17403 basic_block bb;
17404
17405 FOR_EACH_BB_FN (bb, cfun)
17406 {
17407 rtx dest, src;
17408 rtx cmp, op0, op1, set = NULL;
17409 rtx_insn *prev, *insn = BB_END (bb);
17410 bool insn_clobbered = false;
17411
17412 while (insn != BB_HEAD (bb) && !NONDEBUG_INSN_P (insn))
17413 insn = PREV_INSN (insn);
17414
17415 /* Find the last cbranchsi4_insn in basic block BB. */
17416 if (insn == BB_HEAD (bb)
17417 || INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
17418 continue;
17419
17420 /* Get the register with which we are comparing. */
17421 cmp = XEXP (SET_SRC (PATTERN (insn)), 0);
17422 op0 = XEXP (cmp, 0);
17423 op1 = XEXP (cmp, 1);
17424
17425 /* Check that comparison is against ZERO. */
17426 if (!CONST_INT_P (op1) || INTVAL (op1) != 0)
17427 continue;
17428
17429 /* Find the first flag setting insn before INSN in basic block BB. */
17430 gcc_assert (insn != BB_HEAD (bb));
17431 for (prev = PREV_INSN (insn);
17432 (!insn_clobbered
17433 && prev != BB_HEAD (bb)
17434 && (NOTE_P (prev)
17435 || DEBUG_INSN_P (prev)
17436 || ((set = single_set (prev)) != NULL
17437 && get_attr_conds (prev) == CONDS_NOCOND)));
17438 prev = PREV_INSN (prev))
17439 {
17440 if (reg_set_p (op0, prev))
17441 insn_clobbered = true;
17442 }
17443
17444 /* Skip if op0 is clobbered by insn other than prev. */
17445 if (insn_clobbered)
17446 continue;
17447
17448 if (!set)
17449 continue;
17450
17451 dest = SET_DEST (set);
17452 src = SET_SRC (set);
17453 if (!low_register_operand (dest, SImode)
17454 || !low_register_operand (src, SImode))
17455 continue;
17456
17457 /* Rewrite move into subtract of 0 if its operand is compared with ZERO
17458 in INSN. Both src and dest of the move insn are checked. */
17459 if (REGNO (op0) == REGNO (src) || REGNO (op0) == REGNO (dest))
17460 {
17461 dest = copy_rtx (dest);
17462 src = copy_rtx (src);
17463 src = gen_rtx_MINUS (SImode, src, const0_rtx);
17464 PATTERN (prev) = gen_rtx_SET (dest, src);
17465 INSN_CODE (prev) = -1;
17466 /* Set test register in INSN to dest. */
17467 XEXP (cmp, 0) = copy_rtx (dest);
17468 INSN_CODE (insn) = -1;
17469 }
17470 }
17471 }
17472
17473 /* Convert instructions to their cc-clobbering variant if possible, since
17474 that allows us to use smaller encodings. */
17475
17476 static void
17477 thumb2_reorg (void)
17478 {
17479 basic_block bb;
17480 regset_head live;
17481
17482 INIT_REG_SET (&live);
17483
17484 /* We are freeing block_for_insn in the toplev to keep compatibility
17485 with old MDEP_REORGS that are not CFG based. Recompute it now. */
17486 compute_bb_for_insn ();
17487 df_analyze ();
17488
17489 enum Convert_Action {SKIP, CONV, SWAP_CONV};
17490
17491 FOR_EACH_BB_FN (bb, cfun)
17492 {
17493 if ((current_tune->disparage_flag_setting_t16_encodings
17494 == tune_params::DISPARAGE_FLAGS_ALL)
17495 && optimize_bb_for_speed_p (bb))
17496 continue;
17497
17498 rtx_insn *insn;
17499 Convert_Action action = SKIP;
17500 Convert_Action action_for_partial_flag_setting
17501 = ((current_tune->disparage_flag_setting_t16_encodings
17502 != tune_params::DISPARAGE_FLAGS_NEITHER)
17503 && optimize_bb_for_speed_p (bb))
17504 ? SKIP : CONV;
17505
17506 COPY_REG_SET (&live, DF_LR_OUT (bb));
17507 df_simulate_initialize_backwards (bb, &live);
17508 FOR_BB_INSNS_REVERSE (bb, insn)
17509 {
17510 if (NONJUMP_INSN_P (insn)
17511 && !REGNO_REG_SET_P (&live, CC_REGNUM)
17512 && GET_CODE (PATTERN (insn)) == SET)
17513 {
17514 action = SKIP;
17515 rtx pat = PATTERN (insn);
17516 rtx dst = XEXP (pat, 0);
17517 rtx src = XEXP (pat, 1);
17518 rtx op0 = NULL_RTX, op1 = NULL_RTX;
17519
17520 if (UNARY_P (src) || BINARY_P (src))
17521 op0 = XEXP (src, 0);
17522
17523 if (BINARY_P (src))
17524 op1 = XEXP (src, 1);
17525
17526 if (low_register_operand (dst, SImode))
17527 {
17528 switch (GET_CODE (src))
17529 {
17530 case PLUS:
17531 /* Adding two registers and storing the result
17532 in the first source is already a 16-bit
17533 operation. */
17534 if (rtx_equal_p (dst, op0)
17535 && register_operand (op1, SImode))
17536 break;
17537
17538 if (low_register_operand (op0, SImode))
17539 {
17540 /* ADDS <Rd>,<Rn>,<Rm> */
17541 if (low_register_operand (op1, SImode))
17542 action = CONV;
17543 /* ADDS <Rdn>,#<imm8> */
17544 /* SUBS <Rdn>,#<imm8> */
17545 else if (rtx_equal_p (dst, op0)
17546 && CONST_INT_P (op1)
17547 && IN_RANGE (INTVAL (op1), -255, 255))
17548 action = CONV;
17549 /* ADDS <Rd>,<Rn>,#<imm3> */
17550 /* SUBS <Rd>,<Rn>,#<imm3> */
17551 else if (CONST_INT_P (op1)
17552 && IN_RANGE (INTVAL (op1), -7, 7))
17553 action = CONV;
17554 }
17555 /* ADCS <Rd>, <Rn> */
17556 else if (GET_CODE (XEXP (src, 0)) == PLUS
17557 && rtx_equal_p (XEXP (XEXP (src, 0), 0), dst)
17558 && low_register_operand (XEXP (XEXP (src, 0), 1),
17559 SImode)
17560 && COMPARISON_P (op1)
17561 && cc_register (XEXP (op1, 0), VOIDmode)
17562 && maybe_get_arm_condition_code (op1) == ARM_CS
17563 && XEXP (op1, 1) == const0_rtx)
17564 action = CONV;
17565 break;
17566
17567 case MINUS:
17568 /* RSBS <Rd>,<Rn>,#0
17569 Not handled here: see NEG below. */
17570 /* SUBS <Rd>,<Rn>,#<imm3>
17571 SUBS <Rdn>,#<imm8>
17572 Not handled here: see PLUS above. */
17573 /* SUBS <Rd>,<Rn>,<Rm> */
17574 if (low_register_operand (op0, SImode)
17575 && low_register_operand (op1, SImode))
17576 action = CONV;
17577 break;
17578
17579 case MULT:
17580 /* MULS <Rdm>,<Rn>,<Rdm>
17581 As an exception to the rule, this is only used
17582 when optimizing for size since MULS is slow on all
17583 known implementations. We do not even want to use
17584 MULS in cold code, if optimizing for speed, so we
17585 test the global flag here. */
17586 if (!optimize_size)
17587 break;
17588 /* Fall through. */
17589 case AND:
17590 case IOR:
17591 case XOR:
17592 /* ANDS <Rdn>,<Rm> */
17593 if (rtx_equal_p (dst, op0)
17594 && low_register_operand (op1, SImode))
17595 action = action_for_partial_flag_setting;
17596 else if (rtx_equal_p (dst, op1)
17597 && low_register_operand (op0, SImode))
17598 action = action_for_partial_flag_setting == SKIP
17599 ? SKIP : SWAP_CONV;
17600 break;
17601
17602 case ASHIFTRT:
17603 case ASHIFT:
17604 case LSHIFTRT:
17605 /* ASRS <Rdn>,<Rm> */
17606 /* LSRS <Rdn>,<Rm> */
17607 /* LSLS <Rdn>,<Rm> */
17608 if (rtx_equal_p (dst, op0)
17609 && low_register_operand (op1, SImode))
17610 action = action_for_partial_flag_setting;
17611 /* ASRS <Rd>,<Rm>,#<imm5> */
17612 /* LSRS <Rd>,<Rm>,#<imm5> */
17613 /* LSLS <Rd>,<Rm>,#<imm5> */
17614 else if (low_register_operand (op0, SImode)
17615 && CONST_INT_P (op1)
17616 && IN_RANGE (INTVAL (op1), 0, 31))
17617 action = action_for_partial_flag_setting;
17618 break;
17619
17620 case ROTATERT:
17621 /* RORS <Rdn>,<Rm> */
17622 if (rtx_equal_p (dst, op0)
17623 && low_register_operand (op1, SImode))
17624 action = action_for_partial_flag_setting;
17625 break;
17626
17627 case NOT:
17628 /* MVNS <Rd>,<Rm> */
17629 if (low_register_operand (op0, SImode))
17630 action = action_for_partial_flag_setting;
17631 break;
17632
17633 case NEG:
17634 /* NEGS <Rd>,<Rm> (a.k.a RSBS) */
17635 if (low_register_operand (op0, SImode))
17636 action = CONV;
17637 break;
17638
17639 case CONST_INT:
17640 /* MOVS <Rd>,#<imm8> */
17641 if (CONST_INT_P (src)
17642 && IN_RANGE (INTVAL (src), 0, 255))
17643 action = action_for_partial_flag_setting;
17644 break;
17645
17646 case REG:
17647 /* MOVS and MOV<c> with registers have different
17648 encodings, so are not relevant here. */
17649 break;
17650
17651 default:
17652 break;
17653 }
17654 }
17655
17656 if (action != SKIP)
17657 {
17658 rtx ccreg = gen_rtx_REG (CCmode, CC_REGNUM);
17659 rtx clobber = gen_rtx_CLOBBER (VOIDmode, ccreg);
17660 rtvec vec;
17661
17662 if (action == SWAP_CONV)
17663 {
17664 src = copy_rtx (src);
17665 XEXP (src, 0) = op1;
17666 XEXP (src, 1) = op0;
17667 pat = gen_rtx_SET (dst, src);
17668 vec = gen_rtvec (2, pat, clobber);
17669 }
17670 else /* action == CONV */
17671 vec = gen_rtvec (2, pat, clobber);
17672
17673 PATTERN (insn) = gen_rtx_PARALLEL (VOIDmode, vec);
17674 INSN_CODE (insn) = -1;
17675 }
17676 }
17677
17678 if (NONDEBUG_INSN_P (insn))
17679 df_simulate_one_insn_backwards (bb, insn, &live);
17680 }
17681 }
17682
17683 CLEAR_REG_SET (&live);
17684 }
17685
17686 /* Gcc puts the pool in the wrong place for ARM, since we can only
17687 load addresses a limited distance around the pc. We do some
17688 special munging to move the constant pool values to the correct
17689 point in the code. */
17690 static void
17691 arm_reorg (void)
17692 {
17693 rtx_insn *insn;
17694 HOST_WIDE_INT address = 0;
17695 Mfix * fix;
17696
17697 if (use_cmse)
17698 cmse_nonsecure_call_clear_caller_saved ();
17699
17700 /* We cannot run the Thumb passes for thunks because there is no CFG. */
17701 if (cfun->is_thunk)
17702 ;
17703 else if (TARGET_THUMB1)
17704 thumb1_reorg ();
17705 else if (TARGET_THUMB2)
17706 thumb2_reorg ();
17707
17708 /* Ensure all insns that must be split have been split at this point.
17709 Otherwise, the pool placement code below may compute incorrect
17710 insn lengths. Note that when optimizing, all insns have already
17711 been split at this point. */
17712 if (!optimize)
17713 split_all_insns_noflow ();
17714
17715 /* Make sure we do not attempt to create a literal pool even though it should
17716 no longer be necessary to create any. */
17717 if (arm_disable_literal_pool)
17718 return ;
17719
17720 minipool_fix_head = minipool_fix_tail = NULL;
17721
17722 /* The first insn must always be a note, or the code below won't
17723 scan it properly. */
17724 insn = get_insns ();
17725 gcc_assert (NOTE_P (insn));
17726 minipool_pad = 0;
17727
17728 /* Scan all the insns and record the operands that will need fixing. */
17729 for (insn = next_nonnote_insn (insn); insn; insn = next_nonnote_insn (insn))
17730 {
17731 if (BARRIER_P (insn))
17732 push_minipool_barrier (insn, address);
17733 else if (INSN_P (insn))
17734 {
17735 rtx_jump_table_data *table;
17736
17737 note_invalid_constants (insn, address, true);
17738 address += get_attr_length (insn);
17739
17740 /* If the insn is a vector jump, add the size of the table
17741 and skip the table. */
17742 if (tablejump_p (insn, NULL, &table))
17743 {
17744 address += get_jump_table_size (table);
17745 insn = table;
17746 }
17747 }
17748 else if (LABEL_P (insn))
17749 /* Add the worst-case padding due to alignment. We don't add
17750 the _current_ padding because the minipool insertions
17751 themselves might change it. */
17752 address += get_label_padding (insn);
17753 }
17754
17755 fix = minipool_fix_head;
17756
17757 /* Now scan the fixups and perform the required changes. */
17758 while (fix)
17759 {
17760 Mfix * ftmp;
17761 Mfix * fdel;
17762 Mfix * last_added_fix;
17763 Mfix * last_barrier = NULL;
17764 Mfix * this_fix;
17765
17766 /* Skip any further barriers before the next fix. */
17767 while (fix && BARRIER_P (fix->insn))
17768 fix = fix->next;
17769
17770 /* No more fixes. */
17771 if (fix == NULL)
17772 break;
17773
17774 last_added_fix = NULL;
17775
17776 for (ftmp = fix; ftmp; ftmp = ftmp->next)
17777 {
17778 if (BARRIER_P (ftmp->insn))
17779 {
17780 if (ftmp->address >= minipool_vector_head->max_address)
17781 break;
17782
17783 last_barrier = ftmp;
17784 }
17785 else if ((ftmp->minipool = add_minipool_forward_ref (ftmp)) == NULL)
17786 break;
17787
17788 last_added_fix = ftmp; /* Keep track of the last fix added. */
17789 }
17790
17791 /* If we found a barrier, drop back to that; any fixes that we
17792 could have reached but come after the barrier will now go in
17793 the next mini-pool. */
17794 if (last_barrier != NULL)
17795 {
17796 /* Reduce the refcount for those fixes that won't go into this
17797 pool after all. */
17798 for (fdel = last_barrier->next;
17799 fdel && fdel != ftmp;
17800 fdel = fdel->next)
17801 {
17802 fdel->minipool->refcount--;
17803 fdel->minipool = NULL;
17804 }
17805
17806 ftmp = last_barrier;
17807 }
17808 else
17809 {
17810 /* ftmp is first fix that we can't fit into this pool and
17811 there no natural barriers that we could use. Insert a
17812 new barrier in the code somewhere between the previous
17813 fix and this one, and arrange to jump around it. */
17814 HOST_WIDE_INT max_address;
17815
17816 /* The last item on the list of fixes must be a barrier, so
17817 we can never run off the end of the list of fixes without
17818 last_barrier being set. */
17819 gcc_assert (ftmp);
17820
17821 max_address = minipool_vector_head->max_address;
17822 /* Check that there isn't another fix that is in range that
17823 we couldn't fit into this pool because the pool was
17824 already too large: we need to put the pool before such an
17825 instruction. The pool itself may come just after the
17826 fix because create_fix_barrier also allows space for a
17827 jump instruction. */
17828 if (ftmp->address < max_address)
17829 max_address = ftmp->address + 1;
17830
17831 last_barrier = create_fix_barrier (last_added_fix, max_address);
17832 }
17833
17834 assign_minipool_offsets (last_barrier);
17835
17836 while (ftmp)
17837 {
17838 if (!BARRIER_P (ftmp->insn)
17839 && ((ftmp->minipool = add_minipool_backward_ref (ftmp))
17840 == NULL))
17841 break;
17842
17843 ftmp = ftmp->next;
17844 }
17845
17846 /* Scan over the fixes we have identified for this pool, fixing them
17847 up and adding the constants to the pool itself. */
17848 for (this_fix = fix; this_fix && ftmp != this_fix;
17849 this_fix = this_fix->next)
17850 if (!BARRIER_P (this_fix->insn))
17851 {
17852 rtx addr
17853 = plus_constant (Pmode,
17854 gen_rtx_LABEL_REF (VOIDmode,
17855 minipool_vector_label),
17856 this_fix->minipool->offset);
17857 *this_fix->loc = gen_rtx_MEM (this_fix->mode, addr);
17858 }
17859
17860 dump_minipool (last_barrier->insn);
17861 fix = ftmp;
17862 }
17863
17864 /* From now on we must synthesize any constants that we can't handle
17865 directly. This can happen if the RTL gets split during final
17866 instruction generation. */
17867 cfun->machine->after_arm_reorg = 1;
17868
17869 /* Free the minipool memory. */
17870 obstack_free (&minipool_obstack, minipool_startobj);
17871 }
17872 \f
17873 /* Routines to output assembly language. */
17874
17875 /* Return string representation of passed in real value. */
17876 static const char *
17877 fp_const_from_val (REAL_VALUE_TYPE *r)
17878 {
17879 if (!fp_consts_inited)
17880 init_fp_table ();
17881
17882 gcc_assert (real_equal (r, &value_fp0));
17883 return "0";
17884 }
17885
17886 /* OPERANDS[0] is the entire list of insns that constitute pop,
17887 OPERANDS[1] is the base register, RETURN_PC is true iff return insn
17888 is in the list, UPDATE is true iff the list contains explicit
17889 update of base register. */
17890 void
17891 arm_output_multireg_pop (rtx *operands, bool return_pc, rtx cond, bool reverse,
17892 bool update)
17893 {
17894 int i;
17895 char pattern[100];
17896 int offset;
17897 const char *conditional;
17898 int num_saves = XVECLEN (operands[0], 0);
17899 unsigned int regno;
17900 unsigned int regno_base = REGNO (operands[1]);
17901 bool interrupt_p = IS_INTERRUPT (arm_current_func_type ());
17902
17903 offset = 0;
17904 offset += update ? 1 : 0;
17905 offset += return_pc ? 1 : 0;
17906
17907 /* Is the base register in the list? */
17908 for (i = offset; i < num_saves; i++)
17909 {
17910 regno = REGNO (XEXP (XVECEXP (operands[0], 0, i), 0));
17911 /* If SP is in the list, then the base register must be SP. */
17912 gcc_assert ((regno != SP_REGNUM) || (regno_base == SP_REGNUM));
17913 /* If base register is in the list, there must be no explicit update. */
17914 if (regno == regno_base)
17915 gcc_assert (!update);
17916 }
17917
17918 conditional = reverse ? "%?%D0" : "%?%d0";
17919 /* Can't use POP if returning from an interrupt. */
17920 if ((regno_base == SP_REGNUM) && update && !(interrupt_p && return_pc))
17921 sprintf (pattern, "pop%s\t{", conditional);
17922 else
17923 {
17924 /* Output ldmfd when the base register is SP, otherwise output ldmia.
17925 It's just a convention, their semantics are identical. */
17926 if (regno_base == SP_REGNUM)
17927 sprintf (pattern, "ldmfd%s\t", conditional);
17928 else if (update)
17929 sprintf (pattern, "ldmia%s\t", conditional);
17930 else
17931 sprintf (pattern, "ldm%s\t", conditional);
17932
17933 strcat (pattern, reg_names[regno_base]);
17934 if (update)
17935 strcat (pattern, "!, {");
17936 else
17937 strcat (pattern, ", {");
17938 }
17939
17940 /* Output the first destination register. */
17941 strcat (pattern,
17942 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, offset), 0))]);
17943
17944 /* Output the rest of the destination registers. */
17945 for (i = offset + 1; i < num_saves; i++)
17946 {
17947 strcat (pattern, ", ");
17948 strcat (pattern,
17949 reg_names[REGNO (XEXP (XVECEXP (operands[0], 0, i), 0))]);
17950 }
17951
17952 strcat (pattern, "}");
17953
17954 if (interrupt_p && return_pc)
17955 strcat (pattern, "^");
17956
17957 output_asm_insn (pattern, &cond);
17958 }
17959
17960
17961 /* Output the assembly for a store multiple. */
17962
17963 const char *
17964 vfp_output_vstmd (rtx * operands)
17965 {
17966 char pattern[100];
17967 int p;
17968 int base;
17969 int i;
17970 rtx addr_reg = REG_P (XEXP (operands[0], 0))
17971 ? XEXP (operands[0], 0)
17972 : XEXP (XEXP (operands[0], 0), 0);
17973 bool push_p = REGNO (addr_reg) == SP_REGNUM;
17974
17975 if (push_p)
17976 strcpy (pattern, "vpush%?.64\t{%P1");
17977 else
17978 strcpy (pattern, "vstmdb%?.64\t%m0!, {%P1");
17979
17980 p = strlen (pattern);
17981
17982 gcc_assert (REG_P (operands[1]));
17983
17984 base = (REGNO (operands[1]) - FIRST_VFP_REGNUM) / 2;
17985 for (i = 1; i < XVECLEN (operands[2], 0); i++)
17986 {
17987 p += sprintf (&pattern[p], ", d%d", base + i);
17988 }
17989 strcpy (&pattern[p], "}");
17990
17991 output_asm_insn (pattern, operands);
17992 return "";
17993 }
17994
17995
17996 /* Emit RTL to save block of VFP register pairs to the stack. Returns the
17997 number of bytes pushed. */
17998
17999 static int
18000 vfp_emit_fstmd (int base_reg, int count)
18001 {
18002 rtx par;
18003 rtx dwarf;
18004 rtx tmp, reg;
18005 int i;
18006
18007 /* Workaround ARM10 VFPr1 bug. Data corruption can occur when exactly two
18008 register pairs are stored by a store multiple insn. We avoid this
18009 by pushing an extra pair. */
18010 if (count == 2 && !arm_arch6)
18011 {
18012 if (base_reg == LAST_VFP_REGNUM - 3)
18013 base_reg -= 2;
18014 count++;
18015 }
18016
18017 /* FSTMD may not store more than 16 doubleword registers at once. Split
18018 larger stores into multiple parts (up to a maximum of two, in
18019 practice). */
18020 if (count > 16)
18021 {
18022 int saved;
18023 /* NOTE: base_reg is an internal register number, so each D register
18024 counts as 2. */
18025 saved = vfp_emit_fstmd (base_reg + 32, count - 16);
18026 saved += vfp_emit_fstmd (base_reg, 16);
18027 return saved;
18028 }
18029
18030 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (count));
18031 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (count + 1));
18032
18033 reg = gen_rtx_REG (DFmode, base_reg);
18034 base_reg += 2;
18035
18036 XVECEXP (par, 0, 0)
18037 = gen_rtx_SET (gen_frame_mem
18038 (BLKmode,
18039 gen_rtx_PRE_MODIFY (Pmode,
18040 stack_pointer_rtx,
18041 plus_constant
18042 (Pmode, stack_pointer_rtx,
18043 - (count * 8)))
18044 ),
18045 gen_rtx_UNSPEC (BLKmode,
18046 gen_rtvec (1, reg),
18047 UNSPEC_PUSH_MULT));
18048
18049 tmp = gen_rtx_SET (stack_pointer_rtx,
18050 plus_constant (Pmode, stack_pointer_rtx, -(count * 8)));
18051 RTX_FRAME_RELATED_P (tmp) = 1;
18052 XVECEXP (dwarf, 0, 0) = tmp;
18053
18054 tmp = gen_rtx_SET (gen_frame_mem (DFmode, stack_pointer_rtx), reg);
18055 RTX_FRAME_RELATED_P (tmp) = 1;
18056 XVECEXP (dwarf, 0, 1) = tmp;
18057
18058 for (i = 1; i < count; i++)
18059 {
18060 reg = gen_rtx_REG (DFmode, base_reg);
18061 base_reg += 2;
18062 XVECEXP (par, 0, i) = gen_rtx_USE (VOIDmode, reg);
18063
18064 tmp = gen_rtx_SET (gen_frame_mem (DFmode,
18065 plus_constant (Pmode,
18066 stack_pointer_rtx,
18067 i * 8)),
18068 reg);
18069 RTX_FRAME_RELATED_P (tmp) = 1;
18070 XVECEXP (dwarf, 0, i + 1) = tmp;
18071 }
18072
18073 par = emit_insn (par);
18074 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
18075 RTX_FRAME_RELATED_P (par) = 1;
18076
18077 return count * 8;
18078 }
18079
18080 /* Returns true if -mcmse has been passed and the function pointed to by 'addr'
18081 has the cmse_nonsecure_call attribute and returns false otherwise. */
18082
18083 bool
18084 detect_cmse_nonsecure_call (tree addr)
18085 {
18086 if (!addr)
18087 return FALSE;
18088
18089 tree fntype = TREE_TYPE (addr);
18090 if (use_cmse && lookup_attribute ("cmse_nonsecure_call",
18091 TYPE_ATTRIBUTES (fntype)))
18092 return TRUE;
18093 return FALSE;
18094 }
18095
18096
18097 /* Emit a call instruction with pattern PAT. ADDR is the address of
18098 the call target. */
18099
18100 void
18101 arm_emit_call_insn (rtx pat, rtx addr, bool sibcall)
18102 {
18103 rtx insn;
18104
18105 insn = emit_call_insn (pat);
18106
18107 /* The PIC register is live on entry to VxWorks PIC PLT entries.
18108 If the call might use such an entry, add a use of the PIC register
18109 to the instruction's CALL_INSN_FUNCTION_USAGE. */
18110 if (TARGET_VXWORKS_RTP
18111 && flag_pic
18112 && !sibcall
18113 && GET_CODE (addr) == SYMBOL_REF
18114 && (SYMBOL_REF_DECL (addr)
18115 ? !targetm.binds_local_p (SYMBOL_REF_DECL (addr))
18116 : !SYMBOL_REF_LOCAL_P (addr)))
18117 {
18118 require_pic_register (NULL_RTX, false /*compute_now*/);
18119 use_reg (&CALL_INSN_FUNCTION_USAGE (insn), cfun->machine->pic_reg);
18120 }
18121
18122 if (TARGET_AAPCS_BASED)
18123 {
18124 /* For AAPCS, IP and CC can be clobbered by veneers inserted by the
18125 linker. We need to add an IP clobber to allow setting
18126 TARGET_CALL_FUSAGE_CONTAINS_NON_CALLEE_CLOBBERS to true. A CC clobber
18127 is not needed since it's a fixed register. */
18128 rtx *fusage = &CALL_INSN_FUNCTION_USAGE (insn);
18129 clobber_reg (fusage, gen_rtx_REG (word_mode, IP_REGNUM));
18130 }
18131 }
18132
18133 /* Output a 'call' insn. */
18134 const char *
18135 output_call (rtx *operands)
18136 {
18137 gcc_assert (!arm_arch5t); /* Patterns should call blx <reg> directly. */
18138
18139 /* Handle calls to lr using ip (which may be clobbered in subr anyway). */
18140 if (REGNO (operands[0]) == LR_REGNUM)
18141 {
18142 operands[0] = gen_rtx_REG (SImode, IP_REGNUM);
18143 output_asm_insn ("mov%?\t%0, %|lr", operands);
18144 }
18145
18146 output_asm_insn ("mov%?\t%|lr, %|pc", operands);
18147
18148 if (TARGET_INTERWORK || arm_arch4t)
18149 output_asm_insn ("bx%?\t%0", operands);
18150 else
18151 output_asm_insn ("mov%?\t%|pc, %0", operands);
18152
18153 return "";
18154 }
18155
18156 /* Output a move from arm registers to arm registers of a long double
18157 OPERANDS[0] is the destination.
18158 OPERANDS[1] is the source. */
18159 const char *
18160 output_mov_long_double_arm_from_arm (rtx *operands)
18161 {
18162 /* We have to be careful here because the two might overlap. */
18163 int dest_start = REGNO (operands[0]);
18164 int src_start = REGNO (operands[1]);
18165 rtx ops[2];
18166 int i;
18167
18168 if (dest_start < src_start)
18169 {
18170 for (i = 0; i < 3; i++)
18171 {
18172 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18173 ops[1] = gen_rtx_REG (SImode, src_start + i);
18174 output_asm_insn ("mov%?\t%0, %1", ops);
18175 }
18176 }
18177 else
18178 {
18179 for (i = 2; i >= 0; i--)
18180 {
18181 ops[0] = gen_rtx_REG (SImode, dest_start + i);
18182 ops[1] = gen_rtx_REG (SImode, src_start + i);
18183 output_asm_insn ("mov%?\t%0, %1", ops);
18184 }
18185 }
18186
18187 return "";
18188 }
18189
18190 void
18191 arm_emit_movpair (rtx dest, rtx src)
18192 {
18193 /* If the src is an immediate, simplify it. */
18194 if (CONST_INT_P (src))
18195 {
18196 HOST_WIDE_INT val = INTVAL (src);
18197 emit_set_insn (dest, GEN_INT (val & 0x0000ffff));
18198 if ((val >> 16) & 0x0000ffff)
18199 {
18200 emit_set_insn (gen_rtx_ZERO_EXTRACT (SImode, dest, GEN_INT (16),
18201 GEN_INT (16)),
18202 GEN_INT ((val >> 16) & 0x0000ffff));
18203 rtx_insn *insn = get_last_insn ();
18204 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18205 }
18206 return;
18207 }
18208 emit_set_insn (dest, gen_rtx_HIGH (SImode, src));
18209 emit_set_insn (dest, gen_rtx_LO_SUM (SImode, dest, src));
18210 rtx_insn *insn = get_last_insn ();
18211 set_unique_reg_note (insn, REG_EQUAL, copy_rtx (src));
18212 }
18213
18214 /* Output a move between double words. It must be REG<-MEM
18215 or MEM<-REG. */
18216 const char *
18217 output_move_double (rtx *operands, bool emit, int *count)
18218 {
18219 enum rtx_code code0 = GET_CODE (operands[0]);
18220 enum rtx_code code1 = GET_CODE (operands[1]);
18221 rtx otherops[3];
18222 if (count)
18223 *count = 1;
18224
18225 /* The only case when this might happen is when
18226 you are looking at the length of a DImode instruction
18227 that has an invalid constant in it. */
18228 if (code0 == REG && code1 != MEM)
18229 {
18230 gcc_assert (!emit);
18231 *count = 2;
18232 return "";
18233 }
18234
18235 if (code0 == REG)
18236 {
18237 unsigned int reg0 = REGNO (operands[0]);
18238
18239 otherops[0] = gen_rtx_REG (SImode, 1 + reg0);
18240
18241 gcc_assert (code1 == MEM); /* Constraints should ensure this. */
18242
18243 switch (GET_CODE (XEXP (operands[1], 0)))
18244 {
18245 case REG:
18246
18247 if (emit)
18248 {
18249 if (TARGET_LDRD
18250 && !(fix_cm3_ldrd && reg0 == REGNO(XEXP (operands[1], 0))))
18251 output_asm_insn ("ldrd%?\t%0, [%m1]", operands);
18252 else
18253 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18254 }
18255 break;
18256
18257 case PRE_INC:
18258 gcc_assert (TARGET_LDRD);
18259 if (emit)
18260 output_asm_insn ("ldrd%?\t%0, [%m1, #8]!", operands);
18261 break;
18262
18263 case PRE_DEC:
18264 if (emit)
18265 {
18266 if (TARGET_LDRD)
18267 output_asm_insn ("ldrd%?\t%0, [%m1, #-8]!", operands);
18268 else
18269 output_asm_insn ("ldmdb%?\t%m1!, %M0", operands);
18270 }
18271 break;
18272
18273 case POST_INC:
18274 if (emit)
18275 {
18276 if (TARGET_LDRD)
18277 output_asm_insn ("ldrd%?\t%0, [%m1], #8", operands);
18278 else
18279 output_asm_insn ("ldmia%?\t%m1!, %M0", operands);
18280 }
18281 break;
18282
18283 case POST_DEC:
18284 gcc_assert (TARGET_LDRD);
18285 if (emit)
18286 output_asm_insn ("ldrd%?\t%0, [%m1], #-8", operands);
18287 break;
18288
18289 case PRE_MODIFY:
18290 case POST_MODIFY:
18291 /* Autoicrement addressing modes should never have overlapping
18292 base and destination registers, and overlapping index registers
18293 are already prohibited, so this doesn't need to worry about
18294 fix_cm3_ldrd. */
18295 otherops[0] = operands[0];
18296 otherops[1] = XEXP (XEXP (XEXP (operands[1], 0), 1), 0);
18297 otherops[2] = XEXP (XEXP (XEXP (operands[1], 0), 1), 1);
18298
18299 if (GET_CODE (XEXP (operands[1], 0)) == PRE_MODIFY)
18300 {
18301 if (reg_overlap_mentioned_p (otherops[0], otherops[2]))
18302 {
18303 /* Registers overlap so split out the increment. */
18304 if (emit)
18305 {
18306 output_asm_insn ("add%?\t%1, %1, %2", otherops);
18307 output_asm_insn ("ldrd%?\t%0, [%1] @split", otherops);
18308 }
18309 if (count)
18310 *count = 2;
18311 }
18312 else
18313 {
18314 /* Use a single insn if we can.
18315 FIXME: IWMMXT allows offsets larger than ldrd can
18316 handle, fix these up with a pair of ldr. */
18317 if (TARGET_THUMB2
18318 || !CONST_INT_P (otherops[2])
18319 || (INTVAL (otherops[2]) > -256
18320 && INTVAL (otherops[2]) < 256))
18321 {
18322 if (emit)
18323 output_asm_insn ("ldrd%?\t%0, [%1, %2]!", otherops);
18324 }
18325 else
18326 {
18327 if (emit)
18328 {
18329 output_asm_insn ("ldr%?\t%0, [%1, %2]!", otherops);
18330 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18331 }
18332 if (count)
18333 *count = 2;
18334
18335 }
18336 }
18337 }
18338 else
18339 {
18340 /* Use a single insn if we can.
18341 FIXME: IWMMXT allows offsets larger than ldrd can handle,
18342 fix these up with a pair of ldr. */
18343 if (TARGET_THUMB2
18344 || !CONST_INT_P (otherops[2])
18345 || (INTVAL (otherops[2]) > -256
18346 && INTVAL (otherops[2]) < 256))
18347 {
18348 if (emit)
18349 output_asm_insn ("ldrd%?\t%0, [%1], %2", otherops);
18350 }
18351 else
18352 {
18353 if (emit)
18354 {
18355 output_asm_insn ("ldr%?\t%H0, [%1, #4]", otherops);
18356 output_asm_insn ("ldr%?\t%0, [%1], %2", otherops);
18357 }
18358 if (count)
18359 *count = 2;
18360 }
18361 }
18362 break;
18363
18364 case LABEL_REF:
18365 case CONST:
18366 /* We might be able to use ldrd %0, %1 here. However the range is
18367 different to ldr/adr, and it is broken on some ARMv7-M
18368 implementations. */
18369 /* Use the second register of the pair to avoid problematic
18370 overlap. */
18371 otherops[1] = operands[1];
18372 if (emit)
18373 output_asm_insn ("adr%?\t%0, %1", otherops);
18374 operands[1] = otherops[0];
18375 if (emit)
18376 {
18377 if (TARGET_LDRD)
18378 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18379 else
18380 output_asm_insn ("ldmia%?\t%1, %M0", operands);
18381 }
18382
18383 if (count)
18384 *count = 2;
18385 break;
18386
18387 /* ??? This needs checking for thumb2. */
18388 default:
18389 if (arm_add_operand (XEXP (XEXP (operands[1], 0), 1),
18390 GET_MODE (XEXP (XEXP (operands[1], 0), 1))))
18391 {
18392 otherops[0] = operands[0];
18393 otherops[1] = XEXP (XEXP (operands[1], 0), 0);
18394 otherops[2] = XEXP (XEXP (operands[1], 0), 1);
18395
18396 if (GET_CODE (XEXP (operands[1], 0)) == PLUS)
18397 {
18398 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18399 {
18400 switch ((int) INTVAL (otherops[2]))
18401 {
18402 case -8:
18403 if (emit)
18404 output_asm_insn ("ldmdb%?\t%1, %M0", otherops);
18405 return "";
18406 case -4:
18407 if (TARGET_THUMB2)
18408 break;
18409 if (emit)
18410 output_asm_insn ("ldmda%?\t%1, %M0", otherops);
18411 return "";
18412 case 4:
18413 if (TARGET_THUMB2)
18414 break;
18415 if (emit)
18416 output_asm_insn ("ldmib%?\t%1, %M0", otherops);
18417 return "";
18418 }
18419 }
18420 otherops[0] = gen_rtx_REG(SImode, REGNO(operands[0]) + 1);
18421 operands[1] = otherops[0];
18422 if (TARGET_LDRD
18423 && (REG_P (otherops[2])
18424 || TARGET_THUMB2
18425 || (CONST_INT_P (otherops[2])
18426 && INTVAL (otherops[2]) > -256
18427 && INTVAL (otherops[2]) < 256)))
18428 {
18429 if (reg_overlap_mentioned_p (operands[0],
18430 otherops[2]))
18431 {
18432 /* Swap base and index registers over to
18433 avoid a conflict. */
18434 std::swap (otherops[1], otherops[2]);
18435 }
18436 /* If both registers conflict, it will usually
18437 have been fixed by a splitter. */
18438 if (reg_overlap_mentioned_p (operands[0], otherops[2])
18439 || (fix_cm3_ldrd && reg0 == REGNO (otherops[1])))
18440 {
18441 if (emit)
18442 {
18443 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18444 output_asm_insn ("ldrd%?\t%0, [%1]", operands);
18445 }
18446 if (count)
18447 *count = 2;
18448 }
18449 else
18450 {
18451 otherops[0] = operands[0];
18452 if (emit)
18453 output_asm_insn ("ldrd%?\t%0, [%1, %2]", otherops);
18454 }
18455 return "";
18456 }
18457
18458 if (CONST_INT_P (otherops[2]))
18459 {
18460 if (emit)
18461 {
18462 if (!(const_ok_for_arm (INTVAL (otherops[2]))))
18463 output_asm_insn ("sub%?\t%0, %1, #%n2", otherops);
18464 else
18465 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18466 }
18467 }
18468 else
18469 {
18470 if (emit)
18471 output_asm_insn ("add%?\t%0, %1, %2", otherops);
18472 }
18473 }
18474 else
18475 {
18476 if (emit)
18477 output_asm_insn ("sub%?\t%0, %1, %2", otherops);
18478 }
18479
18480 if (count)
18481 *count = 2;
18482
18483 if (TARGET_LDRD)
18484 return "ldrd%?\t%0, [%1]";
18485
18486 return "ldmia%?\t%1, %M0";
18487 }
18488 else
18489 {
18490 otherops[1] = adjust_address (operands[1], SImode, 4);
18491 /* Take care of overlapping base/data reg. */
18492 if (reg_mentioned_p (operands[0], operands[1]))
18493 {
18494 if (emit)
18495 {
18496 output_asm_insn ("ldr%?\t%0, %1", otherops);
18497 output_asm_insn ("ldr%?\t%0, %1", operands);
18498 }
18499 if (count)
18500 *count = 2;
18501
18502 }
18503 else
18504 {
18505 if (emit)
18506 {
18507 output_asm_insn ("ldr%?\t%0, %1", operands);
18508 output_asm_insn ("ldr%?\t%0, %1", otherops);
18509 }
18510 if (count)
18511 *count = 2;
18512 }
18513 }
18514 }
18515 }
18516 else
18517 {
18518 /* Constraints should ensure this. */
18519 gcc_assert (code0 == MEM && code1 == REG);
18520 gcc_assert ((REGNO (operands[1]) != IP_REGNUM)
18521 || (TARGET_ARM && TARGET_LDRD));
18522
18523 /* For TARGET_ARM the first source register of an STRD
18524 must be even. This is usually the case for double-word
18525 values but user assembly constraints can force an odd
18526 starting register. */
18527 bool allow_strd = TARGET_LDRD
18528 && !(TARGET_ARM && (REGNO (operands[1]) & 1) == 1);
18529 switch (GET_CODE (XEXP (operands[0], 0)))
18530 {
18531 case REG:
18532 if (emit)
18533 {
18534 if (allow_strd)
18535 output_asm_insn ("strd%?\t%1, [%m0]", operands);
18536 else
18537 output_asm_insn ("stm%?\t%m0, %M1", operands);
18538 }
18539 break;
18540
18541 case PRE_INC:
18542 gcc_assert (allow_strd);
18543 if (emit)
18544 output_asm_insn ("strd%?\t%1, [%m0, #8]!", operands);
18545 break;
18546
18547 case PRE_DEC:
18548 if (emit)
18549 {
18550 if (allow_strd)
18551 output_asm_insn ("strd%?\t%1, [%m0, #-8]!", operands);
18552 else
18553 output_asm_insn ("stmdb%?\t%m0!, %M1", operands);
18554 }
18555 break;
18556
18557 case POST_INC:
18558 if (emit)
18559 {
18560 if (allow_strd)
18561 output_asm_insn ("strd%?\t%1, [%m0], #8", operands);
18562 else
18563 output_asm_insn ("stm%?\t%m0!, %M1", operands);
18564 }
18565 break;
18566
18567 case POST_DEC:
18568 gcc_assert (allow_strd);
18569 if (emit)
18570 output_asm_insn ("strd%?\t%1, [%m0], #-8", operands);
18571 break;
18572
18573 case PRE_MODIFY:
18574 case POST_MODIFY:
18575 otherops[0] = operands[1];
18576 otherops[1] = XEXP (XEXP (XEXP (operands[0], 0), 1), 0);
18577 otherops[2] = XEXP (XEXP (XEXP (operands[0], 0), 1), 1);
18578
18579 /* IWMMXT allows offsets larger than strd can handle,
18580 fix these up with a pair of str. */
18581 if (!TARGET_THUMB2
18582 && CONST_INT_P (otherops[2])
18583 && (INTVAL(otherops[2]) <= -256
18584 || INTVAL(otherops[2]) >= 256))
18585 {
18586 if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18587 {
18588 if (emit)
18589 {
18590 output_asm_insn ("str%?\t%0, [%1, %2]!", otherops);
18591 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18592 }
18593 if (count)
18594 *count = 2;
18595 }
18596 else
18597 {
18598 if (emit)
18599 {
18600 output_asm_insn ("str%?\t%H0, [%1, #4]", otherops);
18601 output_asm_insn ("str%?\t%0, [%1], %2", otherops);
18602 }
18603 if (count)
18604 *count = 2;
18605 }
18606 }
18607 else if (GET_CODE (XEXP (operands[0], 0)) == PRE_MODIFY)
18608 {
18609 if (emit)
18610 output_asm_insn ("strd%?\t%0, [%1, %2]!", otherops);
18611 }
18612 else
18613 {
18614 if (emit)
18615 output_asm_insn ("strd%?\t%0, [%1], %2", otherops);
18616 }
18617 break;
18618
18619 case PLUS:
18620 otherops[2] = XEXP (XEXP (operands[0], 0), 1);
18621 if (CONST_INT_P (otherops[2]) && !TARGET_LDRD)
18622 {
18623 switch ((int) INTVAL (XEXP (XEXP (operands[0], 0), 1)))
18624 {
18625 case -8:
18626 if (emit)
18627 output_asm_insn ("stmdb%?\t%m0, %M1", operands);
18628 return "";
18629
18630 case -4:
18631 if (TARGET_THUMB2)
18632 break;
18633 if (emit)
18634 output_asm_insn ("stmda%?\t%m0, %M1", operands);
18635 return "";
18636
18637 case 4:
18638 if (TARGET_THUMB2)
18639 break;
18640 if (emit)
18641 output_asm_insn ("stmib%?\t%m0, %M1", operands);
18642 return "";
18643 }
18644 }
18645 if (allow_strd
18646 && (REG_P (otherops[2])
18647 || TARGET_THUMB2
18648 || (CONST_INT_P (otherops[2])
18649 && INTVAL (otherops[2]) > -256
18650 && INTVAL (otherops[2]) < 256)))
18651 {
18652 otherops[0] = operands[1];
18653 otherops[1] = XEXP (XEXP (operands[0], 0), 0);
18654 if (emit)
18655 output_asm_insn ("strd%?\t%0, [%1, %2]", otherops);
18656 return "";
18657 }
18658 /* Fall through */
18659
18660 default:
18661 otherops[0] = adjust_address (operands[0], SImode, 4);
18662 otherops[1] = operands[1];
18663 if (emit)
18664 {
18665 output_asm_insn ("str%?\t%1, %0", operands);
18666 output_asm_insn ("str%?\t%H1, %0", otherops);
18667 }
18668 if (count)
18669 *count = 2;
18670 }
18671 }
18672
18673 return "";
18674 }
18675
18676 /* Output a move, load or store for quad-word vectors in ARM registers. Only
18677 handles MEMs accepted by neon_vector_mem_operand with TYPE=1. */
18678
18679 const char *
18680 output_move_quad (rtx *operands)
18681 {
18682 if (REG_P (operands[0]))
18683 {
18684 /* Load, or reg->reg move. */
18685
18686 if (MEM_P (operands[1]))
18687 {
18688 switch (GET_CODE (XEXP (operands[1], 0)))
18689 {
18690 case REG:
18691 output_asm_insn ("ldmia%?\t%m1, %M0", operands);
18692 break;
18693
18694 case LABEL_REF:
18695 case CONST:
18696 output_asm_insn ("adr%?\t%0, %1", operands);
18697 output_asm_insn ("ldmia%?\t%0, %M0", operands);
18698 break;
18699
18700 default:
18701 gcc_unreachable ();
18702 }
18703 }
18704 else
18705 {
18706 rtx ops[2];
18707 int dest, src, i;
18708
18709 gcc_assert (REG_P (operands[1]));
18710
18711 dest = REGNO (operands[0]);
18712 src = REGNO (operands[1]);
18713
18714 /* This seems pretty dumb, but hopefully GCC won't try to do it
18715 very often. */
18716 if (dest < src)
18717 for (i = 0; i < 4; i++)
18718 {
18719 ops[0] = gen_rtx_REG (SImode, dest + i);
18720 ops[1] = gen_rtx_REG (SImode, src + i);
18721 output_asm_insn ("mov%?\t%0, %1", ops);
18722 }
18723 else
18724 for (i = 3; i >= 0; i--)
18725 {
18726 ops[0] = gen_rtx_REG (SImode, dest + i);
18727 ops[1] = gen_rtx_REG (SImode, src + i);
18728 output_asm_insn ("mov%?\t%0, %1", ops);
18729 }
18730 }
18731 }
18732 else
18733 {
18734 gcc_assert (MEM_P (operands[0]));
18735 gcc_assert (REG_P (operands[1]));
18736 gcc_assert (!reg_overlap_mentioned_p (operands[1], operands[0]));
18737
18738 switch (GET_CODE (XEXP (operands[0], 0)))
18739 {
18740 case REG:
18741 output_asm_insn ("stm%?\t%m0, %M1", operands);
18742 break;
18743
18744 default:
18745 gcc_unreachable ();
18746 }
18747 }
18748
18749 return "";
18750 }
18751
18752 /* Output a VFP load or store instruction. */
18753
18754 const char *
18755 output_move_vfp (rtx *operands)
18756 {
18757 rtx reg, mem, addr, ops[2];
18758 int load = REG_P (operands[0]);
18759 int dp = GET_MODE_SIZE (GET_MODE (operands[0])) == 8;
18760 int sp = (!TARGET_VFP_FP16INST
18761 || GET_MODE_SIZE (GET_MODE (operands[0])) == 4);
18762 int integer_p = GET_MODE_CLASS (GET_MODE (operands[0])) == MODE_INT;
18763 const char *templ;
18764 char buff[50];
18765 machine_mode mode;
18766
18767 reg = operands[!load];
18768 mem = operands[load];
18769
18770 mode = GET_MODE (reg);
18771
18772 gcc_assert (REG_P (reg));
18773 gcc_assert (IS_VFP_REGNUM (REGNO (reg)));
18774 gcc_assert ((mode == HFmode && TARGET_HARD_FLOAT)
18775 || mode == SFmode
18776 || mode == DFmode
18777 || mode == HImode
18778 || mode == SImode
18779 || mode == DImode
18780 || (TARGET_NEON && VALID_NEON_DREG_MODE (mode)));
18781 gcc_assert (MEM_P (mem));
18782
18783 addr = XEXP (mem, 0);
18784
18785 switch (GET_CODE (addr))
18786 {
18787 case PRE_DEC:
18788 templ = "v%smdb%%?.%s\t%%0!, {%%%s1}%s";
18789 ops[0] = XEXP (addr, 0);
18790 ops[1] = reg;
18791 break;
18792
18793 case POST_INC:
18794 templ = "v%smia%%?.%s\t%%0!, {%%%s1}%s";
18795 ops[0] = XEXP (addr, 0);
18796 ops[1] = reg;
18797 break;
18798
18799 default:
18800 templ = "v%sr%%?.%s\t%%%s0, %%1%s";
18801 ops[0] = reg;
18802 ops[1] = mem;
18803 break;
18804 }
18805
18806 sprintf (buff, templ,
18807 load ? "ld" : "st",
18808 dp ? "64" : sp ? "32" : "16",
18809 dp ? "P" : "",
18810 integer_p ? "\t%@ int" : "");
18811 output_asm_insn (buff, ops);
18812
18813 return "";
18814 }
18815
18816 /* Output a Neon double-word or quad-word load or store, or a load
18817 or store for larger structure modes.
18818
18819 WARNING: The ordering of elements is weird in big-endian mode,
18820 because the EABI requires that vectors stored in memory appear
18821 as though they were stored by a VSTM, as required by the EABI.
18822 GCC RTL defines element ordering based on in-memory order.
18823 This can be different from the architectural ordering of elements
18824 within a NEON register. The intrinsics defined in arm_neon.h use the
18825 NEON register element ordering, not the GCC RTL element ordering.
18826
18827 For example, the in-memory ordering of a big-endian a quadword
18828 vector with 16-bit elements when stored from register pair {d0,d1}
18829 will be (lowest address first, d0[N] is NEON register element N):
18830
18831 [d0[3], d0[2], d0[1], d0[0], d1[7], d1[6], d1[5], d1[4]]
18832
18833 When necessary, quadword registers (dN, dN+1) are moved to ARM
18834 registers from rN in the order:
18835
18836 dN -> (rN+1, rN), dN+1 -> (rN+3, rN+2)
18837
18838 So that STM/LDM can be used on vectors in ARM registers, and the
18839 same memory layout will result as if VSTM/VLDM were used.
18840
18841 Instead of VSTM/VLDM we prefer to use VST1.64/VLD1.64 where
18842 possible, which allows use of appropriate alignment tags.
18843 Note that the choice of "64" is independent of the actual vector
18844 element size; this size simply ensures that the behavior is
18845 equivalent to VSTM/VLDM in both little-endian and big-endian mode.
18846
18847 Due to limitations of those instructions, use of VST1.64/VLD1.64
18848 is not possible if:
18849 - the address contains PRE_DEC, or
18850 - the mode refers to more than 4 double-word registers
18851
18852 In those cases, it would be possible to replace VSTM/VLDM by a
18853 sequence of instructions; this is not currently implemented since
18854 this is not certain to actually improve performance. */
18855
18856 const char *
18857 output_move_neon (rtx *operands)
18858 {
18859 rtx reg, mem, addr, ops[2];
18860 int regno, nregs, load = REG_P (operands[0]);
18861 const char *templ;
18862 char buff[50];
18863 machine_mode mode;
18864
18865 reg = operands[!load];
18866 mem = operands[load];
18867
18868 mode = GET_MODE (reg);
18869
18870 gcc_assert (REG_P (reg));
18871 regno = REGNO (reg);
18872 nregs = REG_NREGS (reg) / 2;
18873 gcc_assert (VFP_REGNO_OK_FOR_DOUBLE (regno)
18874 || NEON_REGNO_OK_FOR_QUAD (regno));
18875 gcc_assert (VALID_NEON_DREG_MODE (mode)
18876 || VALID_NEON_QREG_MODE (mode)
18877 || VALID_NEON_STRUCT_MODE (mode));
18878 gcc_assert (MEM_P (mem));
18879
18880 addr = XEXP (mem, 0);
18881
18882 /* Strip off const from addresses like (const (plus (...))). */
18883 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
18884 addr = XEXP (addr, 0);
18885
18886 switch (GET_CODE (addr))
18887 {
18888 case POST_INC:
18889 /* We have to use vldm / vstm for too-large modes. */
18890 if (nregs > 4)
18891 {
18892 templ = "v%smia%%?\t%%0!, %%h1";
18893 ops[0] = XEXP (addr, 0);
18894 }
18895 else
18896 {
18897 templ = "v%s1.64\t%%h1, %%A0";
18898 ops[0] = mem;
18899 }
18900 ops[1] = reg;
18901 break;
18902
18903 case PRE_DEC:
18904 /* We have to use vldm / vstm in this case, since there is no
18905 pre-decrement form of the vld1 / vst1 instructions. */
18906 templ = "v%smdb%%?\t%%0!, %%h1";
18907 ops[0] = XEXP (addr, 0);
18908 ops[1] = reg;
18909 break;
18910
18911 case POST_MODIFY:
18912 /* FIXME: Not currently enabled in neon_vector_mem_operand. */
18913 gcc_unreachable ();
18914
18915 case REG:
18916 /* We have to use vldm / vstm for too-large modes. */
18917 if (nregs > 1)
18918 {
18919 if (nregs > 4)
18920 templ = "v%smia%%?\t%%m0, %%h1";
18921 else
18922 templ = "v%s1.64\t%%h1, %%A0";
18923
18924 ops[0] = mem;
18925 ops[1] = reg;
18926 break;
18927 }
18928 /* Fall through. */
18929 case LABEL_REF:
18930 case PLUS:
18931 {
18932 int i;
18933 int overlap = -1;
18934 for (i = 0; i < nregs; i++)
18935 {
18936 /* We're only using DImode here because it's a convenient size. */
18937 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * i);
18938 ops[1] = adjust_address (mem, DImode, 8 * i);
18939 if (reg_overlap_mentioned_p (ops[0], mem))
18940 {
18941 gcc_assert (overlap == -1);
18942 overlap = i;
18943 }
18944 else
18945 {
18946 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18947 output_asm_insn (buff, ops);
18948 }
18949 }
18950 if (overlap != -1)
18951 {
18952 ops[0] = gen_rtx_REG (DImode, REGNO (reg) + 2 * overlap);
18953 ops[1] = adjust_address (mem, SImode, 8 * overlap);
18954 sprintf (buff, "v%sr%%?\t%%P0, %%1", load ? "ld" : "st");
18955 output_asm_insn (buff, ops);
18956 }
18957
18958 return "";
18959 }
18960
18961 default:
18962 gcc_unreachable ();
18963 }
18964
18965 sprintf (buff, templ, load ? "ld" : "st");
18966 output_asm_insn (buff, ops);
18967
18968 return "";
18969 }
18970
18971 /* Compute and return the length of neon_mov<mode>, where <mode> is
18972 one of VSTRUCT modes: EI, OI, CI or XI. */
18973 int
18974 arm_attr_length_move_neon (rtx_insn *insn)
18975 {
18976 rtx reg, mem, addr;
18977 int load;
18978 machine_mode mode;
18979
18980 extract_insn_cached (insn);
18981
18982 if (REG_P (recog_data.operand[0]) && REG_P (recog_data.operand[1]))
18983 {
18984 mode = GET_MODE (recog_data.operand[0]);
18985 switch (mode)
18986 {
18987 case E_EImode:
18988 case E_OImode:
18989 return 8;
18990 case E_CImode:
18991 return 12;
18992 case E_XImode:
18993 return 16;
18994 default:
18995 gcc_unreachable ();
18996 }
18997 }
18998
18999 load = REG_P (recog_data.operand[0]);
19000 reg = recog_data.operand[!load];
19001 mem = recog_data.operand[load];
19002
19003 gcc_assert (MEM_P (mem));
19004
19005 addr = XEXP (mem, 0);
19006
19007 /* Strip off const from addresses like (const (plus (...))). */
19008 if (GET_CODE (addr) == CONST && GET_CODE (XEXP (addr, 0)) == PLUS)
19009 addr = XEXP (addr, 0);
19010
19011 if (GET_CODE (addr) == LABEL_REF || GET_CODE (addr) == PLUS)
19012 {
19013 int insns = REG_NREGS (reg) / 2;
19014 return insns * 4;
19015 }
19016 else
19017 return 4;
19018 }
19019
19020 /* Return nonzero if the offset in the address is an immediate. Otherwise,
19021 return zero. */
19022
19023 int
19024 arm_address_offset_is_imm (rtx_insn *insn)
19025 {
19026 rtx mem, addr;
19027
19028 extract_insn_cached (insn);
19029
19030 if (REG_P (recog_data.operand[0]))
19031 return 0;
19032
19033 mem = recog_data.operand[0];
19034
19035 gcc_assert (MEM_P (mem));
19036
19037 addr = XEXP (mem, 0);
19038
19039 if (REG_P (addr)
19040 || (GET_CODE (addr) == PLUS
19041 && REG_P (XEXP (addr, 0))
19042 && CONST_INT_P (XEXP (addr, 1))))
19043 return 1;
19044 else
19045 return 0;
19046 }
19047
19048 /* Output an ADD r, s, #n where n may be too big for one instruction.
19049 If adding zero to one register, output nothing. */
19050 const char *
19051 output_add_immediate (rtx *operands)
19052 {
19053 HOST_WIDE_INT n = INTVAL (operands[2]);
19054
19055 if (n != 0 || REGNO (operands[0]) != REGNO (operands[1]))
19056 {
19057 if (n < 0)
19058 output_multi_immediate (operands,
19059 "sub%?\t%0, %1, %2", "sub%?\t%0, %0, %2", 2,
19060 -n);
19061 else
19062 output_multi_immediate (operands,
19063 "add%?\t%0, %1, %2", "add%?\t%0, %0, %2", 2,
19064 n);
19065 }
19066
19067 return "";
19068 }
19069
19070 /* Output a multiple immediate operation.
19071 OPERANDS is the vector of operands referred to in the output patterns.
19072 INSTR1 is the output pattern to use for the first constant.
19073 INSTR2 is the output pattern to use for subsequent constants.
19074 IMMED_OP is the index of the constant slot in OPERANDS.
19075 N is the constant value. */
19076 static const char *
19077 output_multi_immediate (rtx *operands, const char *instr1, const char *instr2,
19078 int immed_op, HOST_WIDE_INT n)
19079 {
19080 #if HOST_BITS_PER_WIDE_INT > 32
19081 n &= 0xffffffff;
19082 #endif
19083
19084 if (n == 0)
19085 {
19086 /* Quick and easy output. */
19087 operands[immed_op] = const0_rtx;
19088 output_asm_insn (instr1, operands);
19089 }
19090 else
19091 {
19092 int i;
19093 const char * instr = instr1;
19094
19095 /* Note that n is never zero here (which would give no output). */
19096 for (i = 0; i < 32; i += 2)
19097 {
19098 if (n & (3 << i))
19099 {
19100 operands[immed_op] = GEN_INT (n & (255 << i));
19101 output_asm_insn (instr, operands);
19102 instr = instr2;
19103 i += 6;
19104 }
19105 }
19106 }
19107
19108 return "";
19109 }
19110
19111 /* Return the name of a shifter operation. */
19112 static const char *
19113 arm_shift_nmem(enum rtx_code code)
19114 {
19115 switch (code)
19116 {
19117 case ASHIFT:
19118 return ARM_LSL_NAME;
19119
19120 case ASHIFTRT:
19121 return "asr";
19122
19123 case LSHIFTRT:
19124 return "lsr";
19125
19126 case ROTATERT:
19127 return "ror";
19128
19129 default:
19130 abort();
19131 }
19132 }
19133
19134 /* Return the appropriate ARM instruction for the operation code.
19135 The returned result should not be overwritten. OP is the rtx of the
19136 operation. SHIFT_FIRST_ARG is TRUE if the first argument of the operator
19137 was shifted. */
19138 const char *
19139 arithmetic_instr (rtx op, int shift_first_arg)
19140 {
19141 switch (GET_CODE (op))
19142 {
19143 case PLUS:
19144 return "add";
19145
19146 case MINUS:
19147 return shift_first_arg ? "rsb" : "sub";
19148
19149 case IOR:
19150 return "orr";
19151
19152 case XOR:
19153 return "eor";
19154
19155 case AND:
19156 return "and";
19157
19158 case ASHIFT:
19159 case ASHIFTRT:
19160 case LSHIFTRT:
19161 case ROTATERT:
19162 return arm_shift_nmem(GET_CODE(op));
19163
19164 default:
19165 gcc_unreachable ();
19166 }
19167 }
19168
19169 /* Ensure valid constant shifts and return the appropriate shift mnemonic
19170 for the operation code. The returned result should not be overwritten.
19171 OP is the rtx code of the shift.
19172 On exit, *AMOUNTP will be -1 if the shift is by a register, or a constant
19173 shift. */
19174 static const char *
19175 shift_op (rtx op, HOST_WIDE_INT *amountp)
19176 {
19177 const char * mnem;
19178 enum rtx_code code = GET_CODE (op);
19179
19180 switch (code)
19181 {
19182 case ROTATE:
19183 if (!CONST_INT_P (XEXP (op, 1)))
19184 {
19185 output_operand_lossage ("invalid shift operand");
19186 return NULL;
19187 }
19188
19189 code = ROTATERT;
19190 *amountp = 32 - INTVAL (XEXP (op, 1));
19191 mnem = "ror";
19192 break;
19193
19194 case ASHIFT:
19195 case ASHIFTRT:
19196 case LSHIFTRT:
19197 case ROTATERT:
19198 mnem = arm_shift_nmem(code);
19199 if (CONST_INT_P (XEXP (op, 1)))
19200 {
19201 *amountp = INTVAL (XEXP (op, 1));
19202 }
19203 else if (REG_P (XEXP (op, 1)))
19204 {
19205 *amountp = -1;
19206 return mnem;
19207 }
19208 else
19209 {
19210 output_operand_lossage ("invalid shift operand");
19211 return NULL;
19212 }
19213 break;
19214
19215 case MULT:
19216 /* We never have to worry about the amount being other than a
19217 power of 2, since this case can never be reloaded from a reg. */
19218 if (!CONST_INT_P (XEXP (op, 1)))
19219 {
19220 output_operand_lossage ("invalid shift operand");
19221 return NULL;
19222 }
19223
19224 *amountp = INTVAL (XEXP (op, 1)) & 0xFFFFFFFF;
19225
19226 /* Amount must be a power of two. */
19227 if (*amountp & (*amountp - 1))
19228 {
19229 output_operand_lossage ("invalid shift operand");
19230 return NULL;
19231 }
19232
19233 *amountp = exact_log2 (*amountp);
19234 gcc_assert (IN_RANGE (*amountp, 0, 31));
19235 return ARM_LSL_NAME;
19236
19237 default:
19238 output_operand_lossage ("invalid shift operand");
19239 return NULL;
19240 }
19241
19242 /* This is not 100% correct, but follows from the desire to merge
19243 multiplication by a power of 2 with the recognizer for a
19244 shift. >=32 is not a valid shift for "lsl", so we must try and
19245 output a shift that produces the correct arithmetical result.
19246 Using lsr #32 is identical except for the fact that the carry bit
19247 is not set correctly if we set the flags; but we never use the
19248 carry bit from such an operation, so we can ignore that. */
19249 if (code == ROTATERT)
19250 /* Rotate is just modulo 32. */
19251 *amountp &= 31;
19252 else if (*amountp != (*amountp & 31))
19253 {
19254 if (code == ASHIFT)
19255 mnem = "lsr";
19256 *amountp = 32;
19257 }
19258
19259 /* Shifts of 0 are no-ops. */
19260 if (*amountp == 0)
19261 return NULL;
19262
19263 return mnem;
19264 }
19265
19266 /* Output a .ascii pseudo-op, keeping track of lengths. This is
19267 because /bin/as is horribly restrictive. The judgement about
19268 whether or not each character is 'printable' (and can be output as
19269 is) or not (and must be printed with an octal escape) must be made
19270 with reference to the *host* character set -- the situation is
19271 similar to that discussed in the comments above pp_c_char in
19272 c-pretty-print.c. */
19273
19274 #define MAX_ASCII_LEN 51
19275
19276 void
19277 output_ascii_pseudo_op (FILE *stream, const unsigned char *p, int len)
19278 {
19279 int i;
19280 int len_so_far = 0;
19281
19282 fputs ("\t.ascii\t\"", stream);
19283
19284 for (i = 0; i < len; i++)
19285 {
19286 int c = p[i];
19287
19288 if (len_so_far >= MAX_ASCII_LEN)
19289 {
19290 fputs ("\"\n\t.ascii\t\"", stream);
19291 len_so_far = 0;
19292 }
19293
19294 if (ISPRINT (c))
19295 {
19296 if (c == '\\' || c == '\"')
19297 {
19298 putc ('\\', stream);
19299 len_so_far++;
19300 }
19301 putc (c, stream);
19302 len_so_far++;
19303 }
19304 else
19305 {
19306 fprintf (stream, "\\%03o", c);
19307 len_so_far += 4;
19308 }
19309 }
19310
19311 fputs ("\"\n", stream);
19312 }
19313 \f
19314 /* Whether a register is callee saved or not. This is necessary because high
19315 registers are marked as caller saved when optimizing for size on Thumb-1
19316 targets despite being callee saved in order to avoid using them. */
19317 #define callee_saved_reg_p(reg) \
19318 (!call_used_regs[reg] \
19319 || (TARGET_THUMB1 && optimize_size \
19320 && reg >= FIRST_HI_REGNUM && reg <= LAST_HI_REGNUM))
19321
19322 /* Compute the register save mask for registers 0 through 12
19323 inclusive. This code is used by arm_compute_save_core_reg_mask (). */
19324
19325 static unsigned long
19326 arm_compute_save_reg0_reg12_mask (void)
19327 {
19328 unsigned long func_type = arm_current_func_type ();
19329 unsigned long save_reg_mask = 0;
19330 unsigned int reg;
19331
19332 if (IS_INTERRUPT (func_type))
19333 {
19334 unsigned int max_reg;
19335 /* Interrupt functions must not corrupt any registers,
19336 even call clobbered ones. If this is a leaf function
19337 we can just examine the registers used by the RTL, but
19338 otherwise we have to assume that whatever function is
19339 called might clobber anything, and so we have to save
19340 all the call-clobbered registers as well. */
19341 if (ARM_FUNC_TYPE (func_type) == ARM_FT_FIQ)
19342 /* FIQ handlers have registers r8 - r12 banked, so
19343 we only need to check r0 - r7, Normal ISRs only
19344 bank r14 and r15, so we must check up to r12.
19345 r13 is the stack pointer which is always preserved,
19346 so we do not need to consider it here. */
19347 max_reg = 7;
19348 else
19349 max_reg = 12;
19350
19351 for (reg = 0; reg <= max_reg; reg++)
19352 if (df_regs_ever_live_p (reg)
19353 || (! crtl->is_leaf && call_used_regs[reg]))
19354 save_reg_mask |= (1 << reg);
19355
19356 /* Also save the pic base register if necessary. */
19357 if (flag_pic
19358 && !TARGET_SINGLE_PIC_BASE
19359 && arm_pic_register != INVALID_REGNUM
19360 && crtl->uses_pic_offset_table)
19361 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19362 }
19363 else if (IS_VOLATILE(func_type))
19364 {
19365 /* For noreturn functions we historically omitted register saves
19366 altogether. However this really messes up debugging. As a
19367 compromise save just the frame pointers. Combined with the link
19368 register saved elsewhere this should be sufficient to get
19369 a backtrace. */
19370 if (frame_pointer_needed)
19371 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19372 if (df_regs_ever_live_p (ARM_HARD_FRAME_POINTER_REGNUM))
19373 save_reg_mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19374 if (df_regs_ever_live_p (THUMB_HARD_FRAME_POINTER_REGNUM))
19375 save_reg_mask |= 1 << THUMB_HARD_FRAME_POINTER_REGNUM;
19376 }
19377 else
19378 {
19379 /* In the normal case we only need to save those registers
19380 which are call saved and which are used by this function. */
19381 for (reg = 0; reg <= 11; reg++)
19382 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19383 save_reg_mask |= (1 << reg);
19384
19385 /* Handle the frame pointer as a special case. */
19386 if (frame_pointer_needed)
19387 save_reg_mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19388
19389 /* If we aren't loading the PIC register,
19390 don't stack it even though it may be live. */
19391 if (flag_pic
19392 && !TARGET_SINGLE_PIC_BASE
19393 && arm_pic_register != INVALID_REGNUM
19394 && (df_regs_ever_live_p (PIC_OFFSET_TABLE_REGNUM)
19395 || crtl->uses_pic_offset_table))
19396 save_reg_mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19397
19398 /* The prologue will copy SP into R0, so save it. */
19399 if (IS_STACKALIGN (func_type))
19400 save_reg_mask |= 1;
19401 }
19402
19403 /* Save registers so the exception handler can modify them. */
19404 if (crtl->calls_eh_return)
19405 {
19406 unsigned int i;
19407
19408 for (i = 0; ; i++)
19409 {
19410 reg = EH_RETURN_DATA_REGNO (i);
19411 if (reg == INVALID_REGNUM)
19412 break;
19413 save_reg_mask |= 1 << reg;
19414 }
19415 }
19416
19417 return save_reg_mask;
19418 }
19419
19420 /* Return true if r3 is live at the start of the function. */
19421
19422 static bool
19423 arm_r3_live_at_start_p (void)
19424 {
19425 /* Just look at cfg info, which is still close enough to correct at this
19426 point. This gives false positives for broken functions that might use
19427 uninitialized data that happens to be allocated in r3, but who cares? */
19428 return REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)), 3);
19429 }
19430
19431 /* Compute the number of bytes used to store the static chain register on the
19432 stack, above the stack frame. We need to know this accurately to get the
19433 alignment of the rest of the stack frame correct. */
19434
19435 static int
19436 arm_compute_static_chain_stack_bytes (void)
19437 {
19438 /* Once the value is updated from the init value of -1, do not
19439 re-compute. */
19440 if (cfun->machine->static_chain_stack_bytes != -1)
19441 return cfun->machine->static_chain_stack_bytes;
19442
19443 /* See the defining assertion in arm_expand_prologue. */
19444 if (IS_NESTED (arm_current_func_type ())
19445 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19446 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
19447 || flag_stack_clash_protection)
19448 && !df_regs_ever_live_p (LR_REGNUM)))
19449 && arm_r3_live_at_start_p ()
19450 && crtl->args.pretend_args_size == 0)
19451 return 4;
19452
19453 return 0;
19454 }
19455
19456 /* Compute a bit mask of which core registers need to be
19457 saved on the stack for the current function.
19458 This is used by arm_compute_frame_layout, which may add extra registers. */
19459
19460 static unsigned long
19461 arm_compute_save_core_reg_mask (void)
19462 {
19463 unsigned int save_reg_mask = 0;
19464 unsigned long func_type = arm_current_func_type ();
19465 unsigned int reg;
19466
19467 if (IS_NAKED (func_type))
19468 /* This should never really happen. */
19469 return 0;
19470
19471 /* If we are creating a stack frame, then we must save the frame pointer,
19472 IP (which will hold the old stack pointer), LR and the PC. */
19473 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
19474 save_reg_mask |=
19475 (1 << ARM_HARD_FRAME_POINTER_REGNUM)
19476 | (1 << IP_REGNUM)
19477 | (1 << LR_REGNUM)
19478 | (1 << PC_REGNUM);
19479
19480 save_reg_mask |= arm_compute_save_reg0_reg12_mask ();
19481
19482 /* Decide if we need to save the link register.
19483 Interrupt routines have their own banked link register,
19484 so they never need to save it.
19485 Otherwise if we do not use the link register we do not need to save
19486 it. If we are pushing other registers onto the stack however, we
19487 can save an instruction in the epilogue by pushing the link register
19488 now and then popping it back into the PC. This incurs extra memory
19489 accesses though, so we only do it when optimizing for size, and only
19490 if we know that we will not need a fancy return sequence. */
19491 if (df_regs_ever_live_p (LR_REGNUM)
19492 || (save_reg_mask
19493 && optimize_size
19494 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
19495 && !crtl->tail_call_emit
19496 && !crtl->calls_eh_return))
19497 save_reg_mask |= 1 << LR_REGNUM;
19498
19499 if (cfun->machine->lr_save_eliminated)
19500 save_reg_mask &= ~ (1 << LR_REGNUM);
19501
19502 if (TARGET_REALLY_IWMMXT
19503 && ((bit_count (save_reg_mask)
19504 + ARM_NUM_INTS (crtl->args.pretend_args_size +
19505 arm_compute_static_chain_stack_bytes())
19506 ) % 2) != 0)
19507 {
19508 /* The total number of registers that are going to be pushed
19509 onto the stack is odd. We need to ensure that the stack
19510 is 64-bit aligned before we start to save iWMMXt registers,
19511 and also before we start to create locals. (A local variable
19512 might be a double or long long which we will load/store using
19513 an iWMMXt instruction). Therefore we need to push another
19514 ARM register, so that the stack will be 64-bit aligned. We
19515 try to avoid using the arg registers (r0 -r3) as they might be
19516 used to pass values in a tail call. */
19517 for (reg = 4; reg <= 12; reg++)
19518 if ((save_reg_mask & (1 << reg)) == 0)
19519 break;
19520
19521 if (reg <= 12)
19522 save_reg_mask |= (1 << reg);
19523 else
19524 {
19525 cfun->machine->sibcall_blocked = 1;
19526 save_reg_mask |= (1 << 3);
19527 }
19528 }
19529
19530 /* We may need to push an additional register for use initializing the
19531 PIC base register. */
19532 if (TARGET_THUMB2 && IS_NESTED (func_type) && flag_pic
19533 && (save_reg_mask & THUMB2_WORK_REGS) == 0)
19534 {
19535 reg = thumb_find_work_register (1 << 4);
19536 if (!call_used_regs[reg])
19537 save_reg_mask |= (1 << reg);
19538 }
19539
19540 return save_reg_mask;
19541 }
19542
19543 /* Compute a bit mask of which core registers need to be
19544 saved on the stack for the current function. */
19545 static unsigned long
19546 thumb1_compute_save_core_reg_mask (void)
19547 {
19548 unsigned long mask;
19549 unsigned reg;
19550
19551 mask = 0;
19552 for (reg = 0; reg < 12; reg ++)
19553 if (df_regs_ever_live_p (reg) && callee_saved_reg_p (reg))
19554 mask |= 1 << reg;
19555
19556 /* Handle the frame pointer as a special case. */
19557 if (frame_pointer_needed)
19558 mask |= 1 << HARD_FRAME_POINTER_REGNUM;
19559
19560 if (flag_pic
19561 && !TARGET_SINGLE_PIC_BASE
19562 && arm_pic_register != INVALID_REGNUM
19563 && crtl->uses_pic_offset_table)
19564 mask |= 1 << PIC_OFFSET_TABLE_REGNUM;
19565
19566 /* See if we might need r11 for calls to _interwork_r11_call_via_rN(). */
19567 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
19568 mask |= 1 << ARM_HARD_FRAME_POINTER_REGNUM;
19569
19570 /* LR will also be pushed if any lo regs are pushed. */
19571 if (mask & 0xff || thumb_force_lr_save ())
19572 mask |= (1 << LR_REGNUM);
19573
19574 /* Make sure we have a low work register if we need one.
19575 We will need one if we are going to push a high register,
19576 but we are not currently intending to push a low register. */
19577 if ((mask & 0xff) == 0
19578 && ((mask & 0x0f00) || TARGET_BACKTRACE))
19579 {
19580 /* Use thumb_find_work_register to choose which register
19581 we will use. If the register is live then we will
19582 have to push it. Use LAST_LO_REGNUM as our fallback
19583 choice for the register to select. */
19584 reg = thumb_find_work_register (1 << LAST_LO_REGNUM);
19585 /* Make sure the register returned by thumb_find_work_register is
19586 not part of the return value. */
19587 if (reg * UNITS_PER_WORD <= (unsigned) arm_size_return_regs ())
19588 reg = LAST_LO_REGNUM;
19589
19590 if (callee_saved_reg_p (reg))
19591 mask |= 1 << reg;
19592 }
19593
19594 /* The 504 below is 8 bytes less than 512 because there are two possible
19595 alignment words. We can't tell here if they will be present or not so we
19596 have to play it safe and assume that they are. */
19597 if ((CALLER_INTERWORKING_SLOT_SIZE +
19598 ROUND_UP_WORD (get_frame_size ()) +
19599 crtl->outgoing_args_size) >= 504)
19600 {
19601 /* This is the same as the code in thumb1_expand_prologue() which
19602 determines which register to use for stack decrement. */
19603 for (reg = LAST_ARG_REGNUM + 1; reg <= LAST_LO_REGNUM; reg++)
19604 if (mask & (1 << reg))
19605 break;
19606
19607 if (reg > LAST_LO_REGNUM)
19608 {
19609 /* Make sure we have a register available for stack decrement. */
19610 mask |= 1 << LAST_LO_REGNUM;
19611 }
19612 }
19613
19614 return mask;
19615 }
19616
19617
19618 /* Return the number of bytes required to save VFP registers. */
19619 static int
19620 arm_get_vfp_saved_size (void)
19621 {
19622 unsigned int regno;
19623 int count;
19624 int saved;
19625
19626 saved = 0;
19627 /* Space for saved VFP registers. */
19628 if (TARGET_HARD_FLOAT)
19629 {
19630 count = 0;
19631 for (regno = FIRST_VFP_REGNUM;
19632 regno < LAST_VFP_REGNUM;
19633 regno += 2)
19634 {
19635 if ((!df_regs_ever_live_p (regno) || call_used_regs[regno])
19636 && (!df_regs_ever_live_p (regno + 1) || call_used_regs[regno + 1]))
19637 {
19638 if (count > 0)
19639 {
19640 /* Workaround ARM10 VFPr1 bug. */
19641 if (count == 2 && !arm_arch6)
19642 count++;
19643 saved += count * 8;
19644 }
19645 count = 0;
19646 }
19647 else
19648 count++;
19649 }
19650 if (count > 0)
19651 {
19652 if (count == 2 && !arm_arch6)
19653 count++;
19654 saved += count * 8;
19655 }
19656 }
19657 return saved;
19658 }
19659
19660
19661 /* Generate a function exit sequence. If REALLY_RETURN is false, then do
19662 everything bar the final return instruction. If simple_return is true,
19663 then do not output epilogue, because it has already been emitted in RTL.
19664
19665 Note: do not forget to update length attribute of corresponding insn pattern
19666 when changing assembly output (eg. length attribute of
19667 thumb2_cmse_entry_return when updating Armv8-M Mainline Security Extensions
19668 register clearing sequences). */
19669 const char *
19670 output_return_instruction (rtx operand, bool really_return, bool reverse,
19671 bool simple_return)
19672 {
19673 char conditional[10];
19674 char instr[100];
19675 unsigned reg;
19676 unsigned long live_regs_mask;
19677 unsigned long func_type;
19678 arm_stack_offsets *offsets;
19679
19680 func_type = arm_current_func_type ();
19681
19682 if (IS_NAKED (func_type))
19683 return "";
19684
19685 if (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN)
19686 {
19687 /* If this function was declared non-returning, and we have
19688 found a tail call, then we have to trust that the called
19689 function won't return. */
19690 if (really_return)
19691 {
19692 rtx ops[2];
19693
19694 /* Otherwise, trap an attempted return by aborting. */
19695 ops[0] = operand;
19696 ops[1] = gen_rtx_SYMBOL_REF (Pmode, NEED_PLT_RELOC ? "abort(PLT)"
19697 : "abort");
19698 assemble_external_libcall (ops[1]);
19699 output_asm_insn (reverse ? "bl%D0\t%a1" : "bl%d0\t%a1", ops);
19700 }
19701
19702 return "";
19703 }
19704
19705 gcc_assert (!cfun->calls_alloca || really_return);
19706
19707 sprintf (conditional, "%%?%%%c0", reverse ? 'D' : 'd');
19708
19709 cfun->machine->return_used_this_function = 1;
19710
19711 offsets = arm_get_frame_offsets ();
19712 live_regs_mask = offsets->saved_regs_mask;
19713
19714 if (!simple_return && live_regs_mask)
19715 {
19716 const char * return_reg;
19717
19718 /* If we do not have any special requirements for function exit
19719 (e.g. interworking) then we can load the return address
19720 directly into the PC. Otherwise we must load it into LR. */
19721 if (really_return
19722 && !IS_CMSE_ENTRY (func_type)
19723 && (IS_INTERRUPT (func_type) || !TARGET_INTERWORK))
19724 return_reg = reg_names[PC_REGNUM];
19725 else
19726 return_reg = reg_names[LR_REGNUM];
19727
19728 if ((live_regs_mask & (1 << IP_REGNUM)) == (1 << IP_REGNUM))
19729 {
19730 /* There are three possible reasons for the IP register
19731 being saved. 1) a stack frame was created, in which case
19732 IP contains the old stack pointer, or 2) an ISR routine
19733 corrupted it, or 3) it was saved to align the stack on
19734 iWMMXt. In case 1, restore IP into SP, otherwise just
19735 restore IP. */
19736 if (frame_pointer_needed)
19737 {
19738 live_regs_mask &= ~ (1 << IP_REGNUM);
19739 live_regs_mask |= (1 << SP_REGNUM);
19740 }
19741 else
19742 gcc_assert (IS_INTERRUPT (func_type) || TARGET_REALLY_IWMMXT);
19743 }
19744
19745 /* On some ARM architectures it is faster to use LDR rather than
19746 LDM to load a single register. On other architectures, the
19747 cost is the same. In 26 bit mode, or for exception handlers,
19748 we have to use LDM to load the PC so that the CPSR is also
19749 restored. */
19750 for (reg = 0; reg <= LAST_ARM_REGNUM; reg++)
19751 if (live_regs_mask == (1U << reg))
19752 break;
19753
19754 if (reg <= LAST_ARM_REGNUM
19755 && (reg != LR_REGNUM
19756 || ! really_return
19757 || ! IS_INTERRUPT (func_type)))
19758 {
19759 sprintf (instr, "ldr%s\t%%|%s, [%%|sp], #4", conditional,
19760 (reg == LR_REGNUM) ? return_reg : reg_names[reg]);
19761 }
19762 else
19763 {
19764 char *p;
19765 int first = 1;
19766
19767 /* Generate the load multiple instruction to restore the
19768 registers. Note we can get here, even if
19769 frame_pointer_needed is true, but only if sp already
19770 points to the base of the saved core registers. */
19771 if (live_regs_mask & (1 << SP_REGNUM))
19772 {
19773 unsigned HOST_WIDE_INT stack_adjust;
19774
19775 stack_adjust = offsets->outgoing_args - offsets->saved_regs;
19776 gcc_assert (stack_adjust == 0 || stack_adjust == 4);
19777
19778 if (stack_adjust && arm_arch5t && TARGET_ARM)
19779 sprintf (instr, "ldmib%s\t%%|sp, {", conditional);
19780 else
19781 {
19782 /* If we can't use ldmib (SA110 bug),
19783 then try to pop r3 instead. */
19784 if (stack_adjust)
19785 live_regs_mask |= 1 << 3;
19786
19787 sprintf (instr, "ldmfd%s\t%%|sp, {", conditional);
19788 }
19789 }
19790 /* For interrupt returns we have to use an LDM rather than
19791 a POP so that we can use the exception return variant. */
19792 else if (IS_INTERRUPT (func_type))
19793 sprintf (instr, "ldmfd%s\t%%|sp!, {", conditional);
19794 else
19795 sprintf (instr, "pop%s\t{", conditional);
19796
19797 p = instr + strlen (instr);
19798
19799 for (reg = 0; reg <= SP_REGNUM; reg++)
19800 if (live_regs_mask & (1 << reg))
19801 {
19802 int l = strlen (reg_names[reg]);
19803
19804 if (first)
19805 first = 0;
19806 else
19807 {
19808 memcpy (p, ", ", 2);
19809 p += 2;
19810 }
19811
19812 memcpy (p, "%|", 2);
19813 memcpy (p + 2, reg_names[reg], l);
19814 p += l + 2;
19815 }
19816
19817 if (live_regs_mask & (1 << LR_REGNUM))
19818 {
19819 sprintf (p, "%s%%|%s}", first ? "" : ", ", return_reg);
19820 /* If returning from an interrupt, restore the CPSR. */
19821 if (IS_INTERRUPT (func_type))
19822 strcat (p, "^");
19823 }
19824 else
19825 strcpy (p, "}");
19826 }
19827
19828 output_asm_insn (instr, & operand);
19829
19830 /* See if we need to generate an extra instruction to
19831 perform the actual function return. */
19832 if (really_return
19833 && func_type != ARM_FT_INTERWORKED
19834 && (live_regs_mask & (1 << LR_REGNUM)) != 0)
19835 {
19836 /* The return has already been handled
19837 by loading the LR into the PC. */
19838 return "";
19839 }
19840 }
19841
19842 if (really_return)
19843 {
19844 switch ((int) ARM_FUNC_TYPE (func_type))
19845 {
19846 case ARM_FT_ISR:
19847 case ARM_FT_FIQ:
19848 /* ??? This is wrong for unified assembly syntax. */
19849 sprintf (instr, "sub%ss\t%%|pc, %%|lr, #4", conditional);
19850 break;
19851
19852 case ARM_FT_INTERWORKED:
19853 gcc_assert (arm_arch5t || arm_arch4t);
19854 sprintf (instr, "bx%s\t%%|lr", conditional);
19855 break;
19856
19857 case ARM_FT_EXCEPTION:
19858 /* ??? This is wrong for unified assembly syntax. */
19859 sprintf (instr, "mov%ss\t%%|pc, %%|lr", conditional);
19860 break;
19861
19862 default:
19863 if (IS_CMSE_ENTRY (func_type))
19864 {
19865 /* Check if we have to clear the 'GE bits' which is only used if
19866 parallel add and subtraction instructions are available. */
19867 if (TARGET_INT_SIMD)
19868 snprintf (instr, sizeof (instr),
19869 "msr%s\tAPSR_nzcvqg, %%|lr", conditional);
19870 else
19871 snprintf (instr, sizeof (instr),
19872 "msr%s\tAPSR_nzcvq, %%|lr", conditional);
19873
19874 output_asm_insn (instr, & operand);
19875 if (TARGET_HARD_FLOAT)
19876 {
19877 /* Clear the cumulative exception-status bits (0-4,7) and the
19878 condition code bits (28-31) of the FPSCR. We need to
19879 remember to clear the first scratch register used (IP) and
19880 save and restore the second (r4). */
19881 snprintf (instr, sizeof (instr), "push\t{%%|r4}");
19882 output_asm_insn (instr, & operand);
19883 snprintf (instr, sizeof (instr), "vmrs\t%%|ip, fpscr");
19884 output_asm_insn (instr, & operand);
19885 snprintf (instr, sizeof (instr), "movw\t%%|r4, #65376");
19886 output_asm_insn (instr, & operand);
19887 snprintf (instr, sizeof (instr), "movt\t%%|r4, #4095");
19888 output_asm_insn (instr, & operand);
19889 snprintf (instr, sizeof (instr), "and\t%%|ip, %%|r4");
19890 output_asm_insn (instr, & operand);
19891 snprintf (instr, sizeof (instr), "vmsr\tfpscr, %%|ip");
19892 output_asm_insn (instr, & operand);
19893 snprintf (instr, sizeof (instr), "pop\t{%%|r4}");
19894 output_asm_insn (instr, & operand);
19895 snprintf (instr, sizeof (instr), "mov\t%%|ip, %%|lr");
19896 output_asm_insn (instr, & operand);
19897 }
19898 snprintf (instr, sizeof (instr), "bxns\t%%|lr");
19899 }
19900 /* Use bx if it's available. */
19901 else if (arm_arch5t || arm_arch4t)
19902 sprintf (instr, "bx%s\t%%|lr", conditional);
19903 else
19904 sprintf (instr, "mov%s\t%%|pc, %%|lr", conditional);
19905 break;
19906 }
19907
19908 output_asm_insn (instr, & operand);
19909 }
19910
19911 return "";
19912 }
19913
19914 /* Output in FILE asm statements needed to declare the NAME of the function
19915 defined by its DECL node. */
19916
19917 void
19918 arm_asm_declare_function_name (FILE *file, const char *name, tree decl)
19919 {
19920 size_t cmse_name_len;
19921 char *cmse_name = 0;
19922 char cmse_prefix[] = "__acle_se_";
19923
19924 /* When compiling with ARMv8-M Security Extensions enabled, we should print an
19925 extra function label for each function with the 'cmse_nonsecure_entry'
19926 attribute. This extra function label should be prepended with
19927 '__acle_se_', telling the linker that it needs to create secure gateway
19928 veneers for this function. */
19929 if (use_cmse && lookup_attribute ("cmse_nonsecure_entry",
19930 DECL_ATTRIBUTES (decl)))
19931 {
19932 cmse_name_len = sizeof (cmse_prefix) + strlen (name);
19933 cmse_name = XALLOCAVEC (char, cmse_name_len);
19934 snprintf (cmse_name, cmse_name_len, "%s%s", cmse_prefix, name);
19935 targetm.asm_out.globalize_label (file, cmse_name);
19936
19937 ARM_DECLARE_FUNCTION_NAME (file, cmse_name, decl);
19938 ASM_OUTPUT_TYPE_DIRECTIVE (file, cmse_name, "function");
19939 }
19940
19941 ARM_DECLARE_FUNCTION_NAME (file, name, decl);
19942 ASM_OUTPUT_TYPE_DIRECTIVE (file, name, "function");
19943 ASM_DECLARE_RESULT (file, DECL_RESULT (decl));
19944 ASM_OUTPUT_LABEL (file, name);
19945
19946 if (cmse_name)
19947 ASM_OUTPUT_LABEL (file, cmse_name);
19948
19949 ARM_OUTPUT_FN_UNWIND (file, TRUE);
19950 }
19951
19952 /* Write the function name into the code section, directly preceding
19953 the function prologue.
19954
19955 Code will be output similar to this:
19956 t0
19957 .ascii "arm_poke_function_name", 0
19958 .align
19959 t1
19960 .word 0xff000000 + (t1 - t0)
19961 arm_poke_function_name
19962 mov ip, sp
19963 stmfd sp!, {fp, ip, lr, pc}
19964 sub fp, ip, #4
19965
19966 When performing a stack backtrace, code can inspect the value
19967 of 'pc' stored at 'fp' + 0. If the trace function then looks
19968 at location pc - 12 and the top 8 bits are set, then we know
19969 that there is a function name embedded immediately preceding this
19970 location and has length ((pc[-3]) & 0xff000000).
19971
19972 We assume that pc is declared as a pointer to an unsigned long.
19973
19974 It is of no benefit to output the function name if we are assembling
19975 a leaf function. These function types will not contain a stack
19976 backtrace structure, therefore it is not possible to determine the
19977 function name. */
19978 void
19979 arm_poke_function_name (FILE *stream, const char *name)
19980 {
19981 unsigned long alignlength;
19982 unsigned long length;
19983 rtx x;
19984
19985 length = strlen (name) + 1;
19986 alignlength = ROUND_UP_WORD (length);
19987
19988 ASM_OUTPUT_ASCII (stream, name, length);
19989 ASM_OUTPUT_ALIGN (stream, 2);
19990 x = GEN_INT ((unsigned HOST_WIDE_INT) 0xff000000 + alignlength);
19991 assemble_aligned_integer (UNITS_PER_WORD, x);
19992 }
19993
19994 /* Place some comments into the assembler stream
19995 describing the current function. */
19996 static void
19997 arm_output_function_prologue (FILE *f)
19998 {
19999 unsigned long func_type;
20000
20001 /* Sanity check. */
20002 gcc_assert (!arm_ccfsm_state && !arm_target_insn);
20003
20004 func_type = arm_current_func_type ();
20005
20006 switch ((int) ARM_FUNC_TYPE (func_type))
20007 {
20008 default:
20009 case ARM_FT_NORMAL:
20010 break;
20011 case ARM_FT_INTERWORKED:
20012 asm_fprintf (f, "\t%@ Function supports interworking.\n");
20013 break;
20014 case ARM_FT_ISR:
20015 asm_fprintf (f, "\t%@ Interrupt Service Routine.\n");
20016 break;
20017 case ARM_FT_FIQ:
20018 asm_fprintf (f, "\t%@ Fast Interrupt Service Routine.\n");
20019 break;
20020 case ARM_FT_EXCEPTION:
20021 asm_fprintf (f, "\t%@ ARM Exception Handler.\n");
20022 break;
20023 }
20024
20025 if (IS_NAKED (func_type))
20026 asm_fprintf (f, "\t%@ Naked Function: prologue and epilogue provided by programmer.\n");
20027
20028 if (IS_VOLATILE (func_type))
20029 asm_fprintf (f, "\t%@ Volatile: function does not return.\n");
20030
20031 if (IS_NESTED (func_type))
20032 asm_fprintf (f, "\t%@ Nested: function declared inside another function.\n");
20033 if (IS_STACKALIGN (func_type))
20034 asm_fprintf (f, "\t%@ Stack Align: May be called with mis-aligned SP.\n");
20035 if (IS_CMSE_ENTRY (func_type))
20036 asm_fprintf (f, "\t%@ Non-secure entry function: called from non-secure code.\n");
20037
20038 asm_fprintf (f, "\t%@ args = %wd, pretend = %d, frame = %wd\n",
20039 (HOST_WIDE_INT) crtl->args.size,
20040 crtl->args.pretend_args_size,
20041 (HOST_WIDE_INT) get_frame_size ());
20042
20043 asm_fprintf (f, "\t%@ frame_needed = %d, uses_anonymous_args = %d\n",
20044 frame_pointer_needed,
20045 cfun->machine->uses_anonymous_args);
20046
20047 if (cfun->machine->lr_save_eliminated)
20048 asm_fprintf (f, "\t%@ link register save eliminated.\n");
20049
20050 if (crtl->calls_eh_return)
20051 asm_fprintf (f, "\t@ Calls __builtin_eh_return.\n");
20052
20053 }
20054
20055 static void
20056 arm_output_function_epilogue (FILE *)
20057 {
20058 arm_stack_offsets *offsets;
20059
20060 if (TARGET_THUMB1)
20061 {
20062 int regno;
20063
20064 /* Emit any call-via-reg trampolines that are needed for v4t support
20065 of call_reg and call_value_reg type insns. */
20066 for (regno = 0; regno < LR_REGNUM; regno++)
20067 {
20068 rtx label = cfun->machine->call_via[regno];
20069
20070 if (label != NULL)
20071 {
20072 switch_to_section (function_section (current_function_decl));
20073 targetm.asm_out.internal_label (asm_out_file, "L",
20074 CODE_LABEL_NUMBER (label));
20075 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
20076 }
20077 }
20078
20079 /* ??? Probably not safe to set this here, since it assumes that a
20080 function will be emitted as assembly immediately after we generate
20081 RTL for it. This does not happen for inline functions. */
20082 cfun->machine->return_used_this_function = 0;
20083 }
20084 else /* TARGET_32BIT */
20085 {
20086 /* We need to take into account any stack-frame rounding. */
20087 offsets = arm_get_frame_offsets ();
20088
20089 gcc_assert (!use_return_insn (FALSE, NULL)
20090 || (cfun->machine->return_used_this_function != 0)
20091 || offsets->saved_regs == offsets->outgoing_args
20092 || frame_pointer_needed);
20093 }
20094 }
20095
20096 /* Generate and emit a sequence of insns equivalent to PUSH, but using
20097 STR and STRD. If an even number of registers are being pushed, one
20098 or more STRD patterns are created for each register pair. If an
20099 odd number of registers are pushed, emit an initial STR followed by
20100 as many STRD instructions as are needed. This works best when the
20101 stack is initially 64-bit aligned (the normal case), since it
20102 ensures that each STRD is also 64-bit aligned. */
20103 static void
20104 thumb2_emit_strd_push (unsigned long saved_regs_mask)
20105 {
20106 int num_regs = 0;
20107 int i;
20108 int regno;
20109 rtx par = NULL_RTX;
20110 rtx dwarf = NULL_RTX;
20111 rtx tmp;
20112 bool first = true;
20113
20114 num_regs = bit_count (saved_regs_mask);
20115
20116 /* Must be at least one register to save, and can't save SP or PC. */
20117 gcc_assert (num_regs > 0 && num_regs <= 14);
20118 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20119 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20120
20121 /* Create sequence for DWARF info. All the frame-related data for
20122 debugging is held in this wrapper. */
20123 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20124
20125 /* Describe the stack adjustment. */
20126 tmp = gen_rtx_SET (stack_pointer_rtx,
20127 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20128 RTX_FRAME_RELATED_P (tmp) = 1;
20129 XVECEXP (dwarf, 0, 0) = tmp;
20130
20131 /* Find the first register. */
20132 for (regno = 0; (saved_regs_mask & (1 << regno)) == 0; regno++)
20133 ;
20134
20135 i = 0;
20136
20137 /* If there's an odd number of registers to push. Start off by
20138 pushing a single register. This ensures that subsequent strd
20139 operations are dword aligned (assuming that SP was originally
20140 64-bit aligned). */
20141 if ((num_regs & 1) != 0)
20142 {
20143 rtx reg, mem, insn;
20144
20145 reg = gen_rtx_REG (SImode, regno);
20146 if (num_regs == 1)
20147 mem = gen_frame_mem (Pmode, gen_rtx_PRE_DEC (Pmode,
20148 stack_pointer_rtx));
20149 else
20150 mem = gen_frame_mem (Pmode,
20151 gen_rtx_PRE_MODIFY
20152 (Pmode, stack_pointer_rtx,
20153 plus_constant (Pmode, stack_pointer_rtx,
20154 -4 * num_regs)));
20155
20156 tmp = gen_rtx_SET (mem, reg);
20157 RTX_FRAME_RELATED_P (tmp) = 1;
20158 insn = emit_insn (tmp);
20159 RTX_FRAME_RELATED_P (insn) = 1;
20160 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20161 tmp = gen_rtx_SET (gen_frame_mem (Pmode, stack_pointer_rtx), reg);
20162 RTX_FRAME_RELATED_P (tmp) = 1;
20163 i++;
20164 regno++;
20165 XVECEXP (dwarf, 0, i) = tmp;
20166 first = false;
20167 }
20168
20169 while (i < num_regs)
20170 if (saved_regs_mask & (1 << regno))
20171 {
20172 rtx reg1, reg2, mem1, mem2;
20173 rtx tmp0, tmp1, tmp2;
20174 int regno2;
20175
20176 /* Find the register to pair with this one. */
20177 for (regno2 = regno + 1; (saved_regs_mask & (1 << regno2)) == 0;
20178 regno2++)
20179 ;
20180
20181 reg1 = gen_rtx_REG (SImode, regno);
20182 reg2 = gen_rtx_REG (SImode, regno2);
20183
20184 if (first)
20185 {
20186 rtx insn;
20187
20188 first = false;
20189 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20190 stack_pointer_rtx,
20191 -4 * num_regs));
20192 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20193 stack_pointer_rtx,
20194 -4 * (num_regs - 1)));
20195 tmp0 = gen_rtx_SET (stack_pointer_rtx,
20196 plus_constant (Pmode, stack_pointer_rtx,
20197 -4 * (num_regs)));
20198 tmp1 = gen_rtx_SET (mem1, reg1);
20199 tmp2 = gen_rtx_SET (mem2, reg2);
20200 RTX_FRAME_RELATED_P (tmp0) = 1;
20201 RTX_FRAME_RELATED_P (tmp1) = 1;
20202 RTX_FRAME_RELATED_P (tmp2) = 1;
20203 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (3));
20204 XVECEXP (par, 0, 0) = tmp0;
20205 XVECEXP (par, 0, 1) = tmp1;
20206 XVECEXP (par, 0, 2) = tmp2;
20207 insn = emit_insn (par);
20208 RTX_FRAME_RELATED_P (insn) = 1;
20209 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20210 }
20211 else
20212 {
20213 mem1 = gen_frame_mem (Pmode, plus_constant (Pmode,
20214 stack_pointer_rtx,
20215 4 * i));
20216 mem2 = gen_frame_mem (Pmode, plus_constant (Pmode,
20217 stack_pointer_rtx,
20218 4 * (i + 1)));
20219 tmp1 = gen_rtx_SET (mem1, reg1);
20220 tmp2 = gen_rtx_SET (mem2, reg2);
20221 RTX_FRAME_RELATED_P (tmp1) = 1;
20222 RTX_FRAME_RELATED_P (tmp2) = 1;
20223 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20224 XVECEXP (par, 0, 0) = tmp1;
20225 XVECEXP (par, 0, 1) = tmp2;
20226 emit_insn (par);
20227 }
20228
20229 /* Create unwind information. This is an approximation. */
20230 tmp1 = gen_rtx_SET (gen_frame_mem (Pmode,
20231 plus_constant (Pmode,
20232 stack_pointer_rtx,
20233 4 * i)),
20234 reg1);
20235 tmp2 = gen_rtx_SET (gen_frame_mem (Pmode,
20236 plus_constant (Pmode,
20237 stack_pointer_rtx,
20238 4 * (i + 1))),
20239 reg2);
20240
20241 RTX_FRAME_RELATED_P (tmp1) = 1;
20242 RTX_FRAME_RELATED_P (tmp2) = 1;
20243 XVECEXP (dwarf, 0, i + 1) = tmp1;
20244 XVECEXP (dwarf, 0, i + 2) = tmp2;
20245 i += 2;
20246 regno = regno2 + 1;
20247 }
20248 else
20249 regno++;
20250
20251 return;
20252 }
20253
20254 /* STRD in ARM mode requires consecutive registers. This function emits STRD
20255 whenever possible, otherwise it emits single-word stores. The first store
20256 also allocates stack space for all saved registers, using writeback with
20257 post-addressing mode. All other stores use offset addressing. If no STRD
20258 can be emitted, this function emits a sequence of single-word stores,
20259 and not an STM as before, because single-word stores provide more freedom
20260 scheduling and can be turned into an STM by peephole optimizations. */
20261 static void
20262 arm_emit_strd_push (unsigned long saved_regs_mask)
20263 {
20264 int num_regs = 0;
20265 int i, j, dwarf_index = 0;
20266 int offset = 0;
20267 rtx dwarf = NULL_RTX;
20268 rtx insn = NULL_RTX;
20269 rtx tmp, mem;
20270
20271 /* TODO: A more efficient code can be emitted by changing the
20272 layout, e.g., first push all pairs that can use STRD to keep the
20273 stack aligned, and then push all other registers. */
20274 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20275 if (saved_regs_mask & (1 << i))
20276 num_regs++;
20277
20278 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20279 gcc_assert (!(saved_regs_mask & (1 << PC_REGNUM)));
20280 gcc_assert (num_regs > 0);
20281
20282 /* Create sequence for DWARF info. */
20283 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_regs + 1));
20284
20285 /* For dwarf info, we generate explicit stack update. */
20286 tmp = gen_rtx_SET (stack_pointer_rtx,
20287 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20288 RTX_FRAME_RELATED_P (tmp) = 1;
20289 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20290
20291 /* Save registers. */
20292 offset = - 4 * num_regs;
20293 j = 0;
20294 while (j <= LAST_ARM_REGNUM)
20295 if (saved_regs_mask & (1 << j))
20296 {
20297 if ((j % 2 == 0)
20298 && (saved_regs_mask & (1 << (j + 1))))
20299 {
20300 /* Current register and previous register form register pair for
20301 which STRD can be generated. */
20302 if (offset < 0)
20303 {
20304 /* Allocate stack space for all saved registers. */
20305 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20306 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20307 mem = gen_frame_mem (DImode, tmp);
20308 offset = 0;
20309 }
20310 else if (offset > 0)
20311 mem = gen_frame_mem (DImode,
20312 plus_constant (Pmode,
20313 stack_pointer_rtx,
20314 offset));
20315 else
20316 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20317
20318 tmp = gen_rtx_SET (mem, gen_rtx_REG (DImode, j));
20319 RTX_FRAME_RELATED_P (tmp) = 1;
20320 tmp = emit_insn (tmp);
20321
20322 /* Record the first store insn. */
20323 if (dwarf_index == 1)
20324 insn = tmp;
20325
20326 /* Generate dwarf info. */
20327 mem = gen_frame_mem (SImode,
20328 plus_constant (Pmode,
20329 stack_pointer_rtx,
20330 offset));
20331 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20332 RTX_FRAME_RELATED_P (tmp) = 1;
20333 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20334
20335 mem = gen_frame_mem (SImode,
20336 plus_constant (Pmode,
20337 stack_pointer_rtx,
20338 offset + 4));
20339 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j + 1));
20340 RTX_FRAME_RELATED_P (tmp) = 1;
20341 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20342
20343 offset += 8;
20344 j += 2;
20345 }
20346 else
20347 {
20348 /* Emit a single word store. */
20349 if (offset < 0)
20350 {
20351 /* Allocate stack space for all saved registers. */
20352 tmp = plus_constant (Pmode, stack_pointer_rtx, offset);
20353 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
20354 mem = gen_frame_mem (SImode, tmp);
20355 offset = 0;
20356 }
20357 else if (offset > 0)
20358 mem = gen_frame_mem (SImode,
20359 plus_constant (Pmode,
20360 stack_pointer_rtx,
20361 offset));
20362 else
20363 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20364
20365 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20366 RTX_FRAME_RELATED_P (tmp) = 1;
20367 tmp = emit_insn (tmp);
20368
20369 /* Record the first store insn. */
20370 if (dwarf_index == 1)
20371 insn = tmp;
20372
20373 /* Generate dwarf info. */
20374 mem = gen_frame_mem (SImode,
20375 plus_constant(Pmode,
20376 stack_pointer_rtx,
20377 offset));
20378 tmp = gen_rtx_SET (mem, gen_rtx_REG (SImode, j));
20379 RTX_FRAME_RELATED_P (tmp) = 1;
20380 XVECEXP (dwarf, 0, dwarf_index++) = tmp;
20381
20382 offset += 4;
20383 j += 1;
20384 }
20385 }
20386 else
20387 j++;
20388
20389 /* Attach dwarf info to the first insn we generate. */
20390 gcc_assert (insn != NULL_RTX);
20391 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
20392 RTX_FRAME_RELATED_P (insn) = 1;
20393 }
20394
20395 /* Generate and emit an insn that we will recognize as a push_multi.
20396 Unfortunately, since this insn does not reflect very well the actual
20397 semantics of the operation, we need to annotate the insn for the benefit
20398 of DWARF2 frame unwind information. DWARF_REGS_MASK is a subset of
20399 MASK for registers that should be annotated for DWARF2 frame unwind
20400 information. */
20401 static rtx
20402 emit_multi_reg_push (unsigned long mask, unsigned long dwarf_regs_mask)
20403 {
20404 int num_regs = 0;
20405 int num_dwarf_regs = 0;
20406 int i, j;
20407 rtx par;
20408 rtx dwarf;
20409 int dwarf_par_index;
20410 rtx tmp, reg;
20411
20412 /* We don't record the PC in the dwarf frame information. */
20413 dwarf_regs_mask &= ~(1 << PC_REGNUM);
20414
20415 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20416 {
20417 if (mask & (1 << i))
20418 num_regs++;
20419 if (dwarf_regs_mask & (1 << i))
20420 num_dwarf_regs++;
20421 }
20422
20423 gcc_assert (num_regs && num_regs <= 16);
20424 gcc_assert ((dwarf_regs_mask & ~mask) == 0);
20425
20426 /* For the body of the insn we are going to generate an UNSPEC in
20427 parallel with several USEs. This allows the insn to be recognized
20428 by the push_multi pattern in the arm.md file.
20429
20430 The body of the insn looks something like this:
20431
20432 (parallel [
20433 (set (mem:BLK (pre_modify:SI (reg:SI sp)
20434 (const_int:SI <num>)))
20435 (unspec:BLK [(reg:SI r4)] UNSPEC_PUSH_MULT))
20436 (use (reg:SI XX))
20437 (use (reg:SI YY))
20438 ...
20439 ])
20440
20441 For the frame note however, we try to be more explicit and actually
20442 show each register being stored into the stack frame, plus a (single)
20443 decrement of the stack pointer. We do it this way in order to be
20444 friendly to the stack unwinding code, which only wants to see a single
20445 stack decrement per instruction. The RTL we generate for the note looks
20446 something like this:
20447
20448 (sequence [
20449 (set (reg:SI sp) (plus:SI (reg:SI sp) (const_int -20)))
20450 (set (mem:SI (reg:SI sp)) (reg:SI r4))
20451 (set (mem:SI (plus:SI (reg:SI sp) (const_int 4))) (reg:SI XX))
20452 (set (mem:SI (plus:SI (reg:SI sp) (const_int 8))) (reg:SI YY))
20453 ...
20454 ])
20455
20456 FIXME:: In an ideal world the PRE_MODIFY would not exist and
20457 instead we'd have a parallel expression detailing all
20458 the stores to the various memory addresses so that debug
20459 information is more up-to-date. Remember however while writing
20460 this to take care of the constraints with the push instruction.
20461
20462 Note also that this has to be taken care of for the VFP registers.
20463
20464 For more see PR43399. */
20465
20466 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs));
20467 dwarf = gen_rtx_SEQUENCE (VOIDmode, rtvec_alloc (num_dwarf_regs + 1));
20468 dwarf_par_index = 1;
20469
20470 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20471 {
20472 if (mask & (1 << i))
20473 {
20474 reg = gen_rtx_REG (SImode, i);
20475
20476 XVECEXP (par, 0, 0)
20477 = gen_rtx_SET (gen_frame_mem
20478 (BLKmode,
20479 gen_rtx_PRE_MODIFY (Pmode,
20480 stack_pointer_rtx,
20481 plus_constant
20482 (Pmode, stack_pointer_rtx,
20483 -4 * num_regs))
20484 ),
20485 gen_rtx_UNSPEC (BLKmode,
20486 gen_rtvec (1, reg),
20487 UNSPEC_PUSH_MULT));
20488
20489 if (dwarf_regs_mask & (1 << i))
20490 {
20491 tmp = gen_rtx_SET (gen_frame_mem (SImode, stack_pointer_rtx),
20492 reg);
20493 RTX_FRAME_RELATED_P (tmp) = 1;
20494 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20495 }
20496
20497 break;
20498 }
20499 }
20500
20501 for (j = 1, i++; j < num_regs; i++)
20502 {
20503 if (mask & (1 << i))
20504 {
20505 reg = gen_rtx_REG (SImode, i);
20506
20507 XVECEXP (par, 0, j) = gen_rtx_USE (VOIDmode, reg);
20508
20509 if (dwarf_regs_mask & (1 << i))
20510 {
20511 tmp
20512 = gen_rtx_SET (gen_frame_mem
20513 (SImode,
20514 plus_constant (Pmode, stack_pointer_rtx,
20515 4 * j)),
20516 reg);
20517 RTX_FRAME_RELATED_P (tmp) = 1;
20518 XVECEXP (dwarf, 0, dwarf_par_index++) = tmp;
20519 }
20520
20521 j++;
20522 }
20523 }
20524
20525 par = emit_insn (par);
20526
20527 tmp = gen_rtx_SET (stack_pointer_rtx,
20528 plus_constant (Pmode, stack_pointer_rtx, -4 * num_regs));
20529 RTX_FRAME_RELATED_P (tmp) = 1;
20530 XVECEXP (dwarf, 0, 0) = tmp;
20531
20532 add_reg_note (par, REG_FRAME_RELATED_EXPR, dwarf);
20533
20534 return par;
20535 }
20536
20537 /* Add a REG_CFA_ADJUST_CFA REG note to INSN.
20538 SIZE is the offset to be adjusted.
20539 DEST and SRC might be stack_pointer_rtx or hard_frame_pointer_rtx. */
20540 static void
20541 arm_add_cfa_adjust_cfa_note (rtx insn, int size, rtx dest, rtx src)
20542 {
20543 rtx dwarf;
20544
20545 RTX_FRAME_RELATED_P (insn) = 1;
20546 dwarf = gen_rtx_SET (dest, plus_constant (Pmode, src, size));
20547 add_reg_note (insn, REG_CFA_ADJUST_CFA, dwarf);
20548 }
20549
20550 /* Generate and emit an insn pattern that we will recognize as a pop_multi.
20551 SAVED_REGS_MASK shows which registers need to be restored.
20552
20553 Unfortunately, since this insn does not reflect very well the actual
20554 semantics of the operation, we need to annotate the insn for the benefit
20555 of DWARF2 frame unwind information. */
20556 static void
20557 arm_emit_multi_reg_pop (unsigned long saved_regs_mask)
20558 {
20559 int num_regs = 0;
20560 int i, j;
20561 rtx par;
20562 rtx dwarf = NULL_RTX;
20563 rtx tmp, reg;
20564 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20565 int offset_adj;
20566 int emit_update;
20567
20568 offset_adj = return_in_pc ? 1 : 0;
20569 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20570 if (saved_regs_mask & (1 << i))
20571 num_regs++;
20572
20573 gcc_assert (num_regs && num_regs <= 16);
20574
20575 /* If SP is in reglist, then we don't emit SP update insn. */
20576 emit_update = (saved_regs_mask & (1 << SP_REGNUM)) ? 0 : 1;
20577
20578 /* The parallel needs to hold num_regs SETs
20579 and one SET for the stack update. */
20580 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + emit_update + offset_adj));
20581
20582 if (return_in_pc)
20583 XVECEXP (par, 0, 0) = ret_rtx;
20584
20585 if (emit_update)
20586 {
20587 /* Increment the stack pointer, based on there being
20588 num_regs 4-byte registers to restore. */
20589 tmp = gen_rtx_SET (stack_pointer_rtx,
20590 plus_constant (Pmode,
20591 stack_pointer_rtx,
20592 4 * num_regs));
20593 RTX_FRAME_RELATED_P (tmp) = 1;
20594 XVECEXP (par, 0, offset_adj) = tmp;
20595 }
20596
20597 /* Now restore every reg, which may include PC. */
20598 for (j = 0, i = 0; j < num_regs; i++)
20599 if (saved_regs_mask & (1 << i))
20600 {
20601 reg = gen_rtx_REG (SImode, i);
20602 if ((num_regs == 1) && emit_update && !return_in_pc)
20603 {
20604 /* Emit single load with writeback. */
20605 tmp = gen_frame_mem (SImode,
20606 gen_rtx_POST_INC (Pmode,
20607 stack_pointer_rtx));
20608 tmp = emit_insn (gen_rtx_SET (reg, tmp));
20609 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20610 return;
20611 }
20612
20613 tmp = gen_rtx_SET (reg,
20614 gen_frame_mem
20615 (SImode,
20616 plus_constant (Pmode, stack_pointer_rtx, 4 * j)));
20617 RTX_FRAME_RELATED_P (tmp) = 1;
20618 XVECEXP (par, 0, j + emit_update + offset_adj) = tmp;
20619
20620 /* We need to maintain a sequence for DWARF info too. As dwarf info
20621 should not have PC, skip PC. */
20622 if (i != PC_REGNUM)
20623 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20624
20625 j++;
20626 }
20627
20628 if (return_in_pc)
20629 par = emit_jump_insn (par);
20630 else
20631 par = emit_insn (par);
20632
20633 REG_NOTES (par) = dwarf;
20634 if (!return_in_pc)
20635 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD * num_regs,
20636 stack_pointer_rtx, stack_pointer_rtx);
20637 }
20638
20639 /* Generate and emit an insn pattern that we will recognize as a pop_multi
20640 of NUM_REGS consecutive VFP regs, starting at FIRST_REG.
20641
20642 Unfortunately, since this insn does not reflect very well the actual
20643 semantics of the operation, we need to annotate the insn for the benefit
20644 of DWARF2 frame unwind information. */
20645 static void
20646 arm_emit_vfp_multi_reg_pop (int first_reg, int num_regs, rtx base_reg)
20647 {
20648 int i, j;
20649 rtx par;
20650 rtx dwarf = NULL_RTX;
20651 rtx tmp, reg;
20652
20653 gcc_assert (num_regs && num_regs <= 32);
20654
20655 /* Workaround ARM10 VFPr1 bug. */
20656 if (num_regs == 2 && !arm_arch6)
20657 {
20658 if (first_reg == 15)
20659 first_reg--;
20660
20661 num_regs++;
20662 }
20663
20664 /* We can emit at most 16 D-registers in a single pop_multi instruction, and
20665 there could be up to 32 D-registers to restore.
20666 If there are more than 16 D-registers, make two recursive calls,
20667 each of which emits one pop_multi instruction. */
20668 if (num_regs > 16)
20669 {
20670 arm_emit_vfp_multi_reg_pop (first_reg, 16, base_reg);
20671 arm_emit_vfp_multi_reg_pop (first_reg + 16, num_regs - 16, base_reg);
20672 return;
20673 }
20674
20675 /* The parallel needs to hold num_regs SETs
20676 and one SET for the stack update. */
20677 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (num_regs + 1));
20678
20679 /* Increment the stack pointer, based on there being
20680 num_regs 8-byte registers to restore. */
20681 tmp = gen_rtx_SET (base_reg, plus_constant (Pmode, base_reg, 8 * num_regs));
20682 RTX_FRAME_RELATED_P (tmp) = 1;
20683 XVECEXP (par, 0, 0) = tmp;
20684
20685 /* Now show every reg that will be restored, using a SET for each. */
20686 for (j = 0, i=first_reg; j < num_regs; i += 2)
20687 {
20688 reg = gen_rtx_REG (DFmode, i);
20689
20690 tmp = gen_rtx_SET (reg,
20691 gen_frame_mem
20692 (DFmode,
20693 plus_constant (Pmode, base_reg, 8 * j)));
20694 RTX_FRAME_RELATED_P (tmp) = 1;
20695 XVECEXP (par, 0, j + 1) = tmp;
20696
20697 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20698
20699 j++;
20700 }
20701
20702 par = emit_insn (par);
20703 REG_NOTES (par) = dwarf;
20704
20705 /* Make sure cfa doesn't leave with IP_REGNUM to allow unwinding fron FP. */
20706 if (REGNO (base_reg) == IP_REGNUM)
20707 {
20708 RTX_FRAME_RELATED_P (par) = 1;
20709 add_reg_note (par, REG_CFA_DEF_CFA, hard_frame_pointer_rtx);
20710 }
20711 else
20712 arm_add_cfa_adjust_cfa_note (par, 2 * UNITS_PER_WORD * num_regs,
20713 base_reg, base_reg);
20714 }
20715
20716 /* Generate and emit a pattern that will be recognized as LDRD pattern. If even
20717 number of registers are being popped, multiple LDRD patterns are created for
20718 all register pairs. If odd number of registers are popped, last register is
20719 loaded by using LDR pattern. */
20720 static void
20721 thumb2_emit_ldrd_pop (unsigned long saved_regs_mask)
20722 {
20723 int num_regs = 0;
20724 int i, j;
20725 rtx par = NULL_RTX;
20726 rtx dwarf = NULL_RTX;
20727 rtx tmp, reg, tmp1;
20728 bool return_in_pc = saved_regs_mask & (1 << PC_REGNUM);
20729
20730 for (i = 0; i <= LAST_ARM_REGNUM; i++)
20731 if (saved_regs_mask & (1 << i))
20732 num_regs++;
20733
20734 gcc_assert (num_regs && num_regs <= 16);
20735
20736 /* We cannot generate ldrd for PC. Hence, reduce the count if PC is
20737 to be popped. So, if num_regs is even, now it will become odd,
20738 and we can generate pop with PC. If num_regs is odd, it will be
20739 even now, and ldr with return can be generated for PC. */
20740 if (return_in_pc)
20741 num_regs--;
20742
20743 gcc_assert (!(saved_regs_mask & (1 << SP_REGNUM)));
20744
20745 /* Var j iterates over all the registers to gather all the registers in
20746 saved_regs_mask. Var i gives index of saved registers in stack frame.
20747 A PARALLEL RTX of register-pair is created here, so that pattern for
20748 LDRD can be matched. As PC is always last register to be popped, and
20749 we have already decremented num_regs if PC, we don't have to worry
20750 about PC in this loop. */
20751 for (i = 0, j = 0; i < (num_regs - (num_regs % 2)); j++)
20752 if (saved_regs_mask & (1 << j))
20753 {
20754 /* Create RTX for memory load. */
20755 reg = gen_rtx_REG (SImode, j);
20756 tmp = gen_rtx_SET (reg,
20757 gen_frame_mem (SImode,
20758 plus_constant (Pmode,
20759 stack_pointer_rtx, 4 * i)));
20760 RTX_FRAME_RELATED_P (tmp) = 1;
20761
20762 if (i % 2 == 0)
20763 {
20764 /* When saved-register index (i) is even, the RTX to be emitted is
20765 yet to be created. Hence create it first. The LDRD pattern we
20766 are generating is :
20767 [ (SET (reg_t0) (MEM (PLUS (SP) (NUM))))
20768 (SET (reg_t1) (MEM (PLUS (SP) (NUM + 4)))) ]
20769 where target registers need not be consecutive. */
20770 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20771 dwarf = NULL_RTX;
20772 }
20773
20774 /* ith register is added in PARALLEL RTX. If i is even, the reg_i is
20775 added as 0th element and if i is odd, reg_i is added as 1st element
20776 of LDRD pattern shown above. */
20777 XVECEXP (par, 0, (i % 2)) = tmp;
20778 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20779
20780 if ((i % 2) == 1)
20781 {
20782 /* When saved-register index (i) is odd, RTXs for both the registers
20783 to be loaded are generated in above given LDRD pattern, and the
20784 pattern can be emitted now. */
20785 par = emit_insn (par);
20786 REG_NOTES (par) = dwarf;
20787 RTX_FRAME_RELATED_P (par) = 1;
20788 }
20789
20790 i++;
20791 }
20792
20793 /* If the number of registers pushed is odd AND return_in_pc is false OR
20794 number of registers are even AND return_in_pc is true, last register is
20795 popped using LDR. It can be PC as well. Hence, adjust the stack first and
20796 then LDR with post increment. */
20797
20798 /* Increment the stack pointer, based on there being
20799 num_regs 4-byte registers to restore. */
20800 tmp = gen_rtx_SET (stack_pointer_rtx,
20801 plus_constant (Pmode, stack_pointer_rtx, 4 * i));
20802 RTX_FRAME_RELATED_P (tmp) = 1;
20803 tmp = emit_insn (tmp);
20804 if (!return_in_pc)
20805 {
20806 arm_add_cfa_adjust_cfa_note (tmp, UNITS_PER_WORD * i,
20807 stack_pointer_rtx, stack_pointer_rtx);
20808 }
20809
20810 dwarf = NULL_RTX;
20811
20812 if (((num_regs % 2) == 1 && !return_in_pc)
20813 || ((num_regs % 2) == 0 && return_in_pc))
20814 {
20815 /* Scan for the single register to be popped. Skip until the saved
20816 register is found. */
20817 for (; (saved_regs_mask & (1 << j)) == 0; j++);
20818
20819 /* Gen LDR with post increment here. */
20820 tmp1 = gen_rtx_MEM (SImode,
20821 gen_rtx_POST_INC (SImode,
20822 stack_pointer_rtx));
20823 set_mem_alias_set (tmp1, get_frame_alias_set ());
20824
20825 reg = gen_rtx_REG (SImode, j);
20826 tmp = gen_rtx_SET (reg, tmp1);
20827 RTX_FRAME_RELATED_P (tmp) = 1;
20828 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
20829
20830 if (return_in_pc)
20831 {
20832 /* If return_in_pc, j must be PC_REGNUM. */
20833 gcc_assert (j == PC_REGNUM);
20834 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20835 XVECEXP (par, 0, 0) = ret_rtx;
20836 XVECEXP (par, 0, 1) = tmp;
20837 par = emit_jump_insn (par);
20838 }
20839 else
20840 {
20841 par = emit_insn (tmp);
20842 REG_NOTES (par) = dwarf;
20843 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20844 stack_pointer_rtx, stack_pointer_rtx);
20845 }
20846
20847 }
20848 else if ((num_regs % 2) == 1 && return_in_pc)
20849 {
20850 /* There are 2 registers to be popped. So, generate the pattern
20851 pop_multiple_with_stack_update_and_return to pop in PC. */
20852 arm_emit_multi_reg_pop (saved_regs_mask & (~((1 << j) - 1)));
20853 }
20854
20855 return;
20856 }
20857
20858 /* LDRD in ARM mode needs consecutive registers as operands. This function
20859 emits LDRD whenever possible, otherwise it emits single-word loads. It uses
20860 offset addressing and then generates one separate stack udpate. This provides
20861 more scheduling freedom, compared to writeback on every load. However,
20862 if the function returns using load into PC directly
20863 (i.e., if PC is in SAVED_REGS_MASK), the stack needs to be updated
20864 before the last load. TODO: Add a peephole optimization to recognize
20865 the new epilogue sequence as an LDM instruction whenever possible. TODO: Add
20866 peephole optimization to merge the load at stack-offset zero
20867 with the stack update instruction using load with writeback
20868 in post-index addressing mode. */
20869 static void
20870 arm_emit_ldrd_pop (unsigned long saved_regs_mask)
20871 {
20872 int j = 0;
20873 int offset = 0;
20874 rtx par = NULL_RTX;
20875 rtx dwarf = NULL_RTX;
20876 rtx tmp, mem;
20877
20878 /* Restore saved registers. */
20879 gcc_assert (!((saved_regs_mask & (1 << SP_REGNUM))));
20880 j = 0;
20881 while (j <= LAST_ARM_REGNUM)
20882 if (saved_regs_mask & (1 << j))
20883 {
20884 if ((j % 2) == 0
20885 && (saved_regs_mask & (1 << (j + 1)))
20886 && (j + 1) != PC_REGNUM)
20887 {
20888 /* Current register and next register form register pair for which
20889 LDRD can be generated. PC is always the last register popped, and
20890 we handle it separately. */
20891 if (offset > 0)
20892 mem = gen_frame_mem (DImode,
20893 plus_constant (Pmode,
20894 stack_pointer_rtx,
20895 offset));
20896 else
20897 mem = gen_frame_mem (DImode, stack_pointer_rtx);
20898
20899 tmp = gen_rtx_SET (gen_rtx_REG (DImode, j), mem);
20900 tmp = emit_insn (tmp);
20901 RTX_FRAME_RELATED_P (tmp) = 1;
20902
20903 /* Generate dwarf info. */
20904
20905 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20906 gen_rtx_REG (SImode, j),
20907 NULL_RTX);
20908 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20909 gen_rtx_REG (SImode, j + 1),
20910 dwarf);
20911
20912 REG_NOTES (tmp) = dwarf;
20913
20914 offset += 8;
20915 j += 2;
20916 }
20917 else if (j != PC_REGNUM)
20918 {
20919 /* Emit a single word load. */
20920 if (offset > 0)
20921 mem = gen_frame_mem (SImode,
20922 plus_constant (Pmode,
20923 stack_pointer_rtx,
20924 offset));
20925 else
20926 mem = gen_frame_mem (SImode, stack_pointer_rtx);
20927
20928 tmp = gen_rtx_SET (gen_rtx_REG (SImode, j), mem);
20929 tmp = emit_insn (tmp);
20930 RTX_FRAME_RELATED_P (tmp) = 1;
20931
20932 /* Generate dwarf info. */
20933 REG_NOTES (tmp) = alloc_reg_note (REG_CFA_RESTORE,
20934 gen_rtx_REG (SImode, j),
20935 NULL_RTX);
20936
20937 offset += 4;
20938 j += 1;
20939 }
20940 else /* j == PC_REGNUM */
20941 j++;
20942 }
20943 else
20944 j++;
20945
20946 /* Update the stack. */
20947 if (offset > 0)
20948 {
20949 tmp = gen_rtx_SET (stack_pointer_rtx,
20950 plus_constant (Pmode,
20951 stack_pointer_rtx,
20952 offset));
20953 tmp = emit_insn (tmp);
20954 arm_add_cfa_adjust_cfa_note (tmp, offset,
20955 stack_pointer_rtx, stack_pointer_rtx);
20956 offset = 0;
20957 }
20958
20959 if (saved_regs_mask & (1 << PC_REGNUM))
20960 {
20961 /* Only PC is to be popped. */
20962 par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
20963 XVECEXP (par, 0, 0) = ret_rtx;
20964 tmp = gen_rtx_SET (gen_rtx_REG (SImode, PC_REGNUM),
20965 gen_frame_mem (SImode,
20966 gen_rtx_POST_INC (SImode,
20967 stack_pointer_rtx)));
20968 RTX_FRAME_RELATED_P (tmp) = 1;
20969 XVECEXP (par, 0, 1) = tmp;
20970 par = emit_jump_insn (par);
20971
20972 /* Generate dwarf info. */
20973 dwarf = alloc_reg_note (REG_CFA_RESTORE,
20974 gen_rtx_REG (SImode, PC_REGNUM),
20975 NULL_RTX);
20976 REG_NOTES (par) = dwarf;
20977 arm_add_cfa_adjust_cfa_note (par, UNITS_PER_WORD,
20978 stack_pointer_rtx, stack_pointer_rtx);
20979 }
20980 }
20981
20982 /* Calculate the size of the return value that is passed in registers. */
20983 static unsigned
20984 arm_size_return_regs (void)
20985 {
20986 machine_mode mode;
20987
20988 if (crtl->return_rtx != 0)
20989 mode = GET_MODE (crtl->return_rtx);
20990 else
20991 mode = DECL_MODE (DECL_RESULT (current_function_decl));
20992
20993 return GET_MODE_SIZE (mode);
20994 }
20995
20996 /* Return true if the current function needs to save/restore LR. */
20997 static bool
20998 thumb_force_lr_save (void)
20999 {
21000 return !cfun->machine->lr_save_eliminated
21001 && (!crtl->is_leaf
21002 || thumb_far_jump_used_p ()
21003 || df_regs_ever_live_p (LR_REGNUM));
21004 }
21005
21006 /* We do not know if r3 will be available because
21007 we do have an indirect tailcall happening in this
21008 particular case. */
21009 static bool
21010 is_indirect_tailcall_p (rtx call)
21011 {
21012 rtx pat = PATTERN (call);
21013
21014 /* Indirect tail call. */
21015 pat = XVECEXP (pat, 0, 0);
21016 if (GET_CODE (pat) == SET)
21017 pat = SET_SRC (pat);
21018
21019 pat = XEXP (XEXP (pat, 0), 0);
21020 return REG_P (pat);
21021 }
21022
21023 /* Return true if r3 is used by any of the tail call insns in the
21024 current function. */
21025 static bool
21026 any_sibcall_could_use_r3 (void)
21027 {
21028 edge_iterator ei;
21029 edge e;
21030
21031 if (!crtl->tail_call_emit)
21032 return false;
21033 FOR_EACH_EDGE (e, ei, EXIT_BLOCK_PTR_FOR_FN (cfun)->preds)
21034 if (e->flags & EDGE_SIBCALL)
21035 {
21036 rtx_insn *call = BB_END (e->src);
21037 if (!CALL_P (call))
21038 call = prev_nonnote_nondebug_insn (call);
21039 gcc_assert (CALL_P (call) && SIBLING_CALL_P (call));
21040 if (find_regno_fusage (call, USE, 3)
21041 || is_indirect_tailcall_p (call))
21042 return true;
21043 }
21044 return false;
21045 }
21046
21047
21048 /* Compute the distance from register FROM to register TO.
21049 These can be the arg pointer (26), the soft frame pointer (25),
21050 the stack pointer (13) or the hard frame pointer (11).
21051 In thumb mode r7 is used as the soft frame pointer, if needed.
21052 Typical stack layout looks like this:
21053
21054 old stack pointer -> | |
21055 ----
21056 | | \
21057 | | saved arguments for
21058 | | vararg functions
21059 | | /
21060 --
21061 hard FP & arg pointer -> | | \
21062 | | stack
21063 | | frame
21064 | | /
21065 --
21066 | | \
21067 | | call saved
21068 | | registers
21069 soft frame pointer -> | | /
21070 --
21071 | | \
21072 | | local
21073 | | variables
21074 locals base pointer -> | | /
21075 --
21076 | | \
21077 | | outgoing
21078 | | arguments
21079 current stack pointer -> | | /
21080 --
21081
21082 For a given function some or all of these stack components
21083 may not be needed, giving rise to the possibility of
21084 eliminating some of the registers.
21085
21086 The values returned by this function must reflect the behavior
21087 of arm_expand_prologue () and arm_compute_save_core_reg_mask ().
21088
21089 The sign of the number returned reflects the direction of stack
21090 growth, so the values are positive for all eliminations except
21091 from the soft frame pointer to the hard frame pointer.
21092
21093 SFP may point just inside the local variables block to ensure correct
21094 alignment. */
21095
21096
21097 /* Return cached stack offsets. */
21098
21099 static arm_stack_offsets *
21100 arm_get_frame_offsets (void)
21101 {
21102 struct arm_stack_offsets *offsets;
21103
21104 offsets = &cfun->machine->stack_offsets;
21105
21106 return offsets;
21107 }
21108
21109
21110 /* Calculate stack offsets. These are used to calculate register elimination
21111 offsets and in prologue/epilogue code. Also calculates which registers
21112 should be saved. */
21113
21114 static void
21115 arm_compute_frame_layout (void)
21116 {
21117 struct arm_stack_offsets *offsets;
21118 unsigned long func_type;
21119 int saved;
21120 int core_saved;
21121 HOST_WIDE_INT frame_size;
21122 int i;
21123
21124 offsets = &cfun->machine->stack_offsets;
21125
21126 /* Initially this is the size of the local variables. It will translated
21127 into an offset once we have determined the size of preceding data. */
21128 frame_size = ROUND_UP_WORD (get_frame_size ());
21129
21130 /* Space for variadic functions. */
21131 offsets->saved_args = crtl->args.pretend_args_size;
21132
21133 /* In Thumb mode this is incorrect, but never used. */
21134 offsets->frame
21135 = (offsets->saved_args
21136 + arm_compute_static_chain_stack_bytes ()
21137 + (frame_pointer_needed ? 4 : 0));
21138
21139 if (TARGET_32BIT)
21140 {
21141 unsigned int regno;
21142
21143 offsets->saved_regs_mask = arm_compute_save_core_reg_mask ();
21144 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21145 saved = core_saved;
21146
21147 /* We know that SP will be doubleword aligned on entry, and we must
21148 preserve that condition at any subroutine call. We also require the
21149 soft frame pointer to be doubleword aligned. */
21150
21151 if (TARGET_REALLY_IWMMXT)
21152 {
21153 /* Check for the call-saved iWMMXt registers. */
21154 for (regno = FIRST_IWMMXT_REGNUM;
21155 regno <= LAST_IWMMXT_REGNUM;
21156 regno++)
21157 if (df_regs_ever_live_p (regno) && ! call_used_regs[regno])
21158 saved += 8;
21159 }
21160
21161 func_type = arm_current_func_type ();
21162 /* Space for saved VFP registers. */
21163 if (! IS_VOLATILE (func_type)
21164 && TARGET_HARD_FLOAT)
21165 saved += arm_get_vfp_saved_size ();
21166 }
21167 else /* TARGET_THUMB1 */
21168 {
21169 offsets->saved_regs_mask = thumb1_compute_save_core_reg_mask ();
21170 core_saved = bit_count (offsets->saved_regs_mask) * 4;
21171 saved = core_saved;
21172 if (TARGET_BACKTRACE)
21173 saved += 16;
21174 }
21175
21176 /* Saved registers include the stack frame. */
21177 offsets->saved_regs
21178 = offsets->saved_args + arm_compute_static_chain_stack_bytes () + saved;
21179 offsets->soft_frame = offsets->saved_regs + CALLER_INTERWORKING_SLOT_SIZE;
21180
21181 /* A leaf function does not need any stack alignment if it has nothing
21182 on the stack. */
21183 if (crtl->is_leaf && frame_size == 0
21184 /* However if it calls alloca(), we have a dynamically allocated
21185 block of BIGGEST_ALIGNMENT on stack, so still do stack alignment. */
21186 && ! cfun->calls_alloca)
21187 {
21188 offsets->outgoing_args = offsets->soft_frame;
21189 offsets->locals_base = offsets->soft_frame;
21190 return;
21191 }
21192
21193 /* Ensure SFP has the correct alignment. */
21194 if (ARM_DOUBLEWORD_ALIGN
21195 && (offsets->soft_frame & 7))
21196 {
21197 offsets->soft_frame += 4;
21198 /* Try to align stack by pushing an extra reg. Don't bother doing this
21199 when there is a stack frame as the alignment will be rolled into
21200 the normal stack adjustment. */
21201 if (frame_size + crtl->outgoing_args_size == 0)
21202 {
21203 int reg = -1;
21204
21205 /* Register r3 is caller-saved. Normally it does not need to be
21206 saved on entry by the prologue. However if we choose to save
21207 it for padding then we may confuse the compiler into thinking
21208 a prologue sequence is required when in fact it is not. This
21209 will occur when shrink-wrapping if r3 is used as a scratch
21210 register and there are no other callee-saved writes.
21211
21212 This situation can be avoided when other callee-saved registers
21213 are available and r3 is not mandatory if we choose a callee-saved
21214 register for padding. */
21215 bool prefer_callee_reg_p = false;
21216
21217 /* If it is safe to use r3, then do so. This sometimes
21218 generates better code on Thumb-2 by avoiding the need to
21219 use 32-bit push/pop instructions. */
21220 if (! any_sibcall_could_use_r3 ()
21221 && arm_size_return_regs () <= 12
21222 && (offsets->saved_regs_mask & (1 << 3)) == 0
21223 && (TARGET_THUMB2
21224 || !(TARGET_LDRD && current_tune->prefer_ldrd_strd)))
21225 {
21226 reg = 3;
21227 if (!TARGET_THUMB2)
21228 prefer_callee_reg_p = true;
21229 }
21230 if (reg == -1
21231 || prefer_callee_reg_p)
21232 {
21233 for (i = 4; i <= (TARGET_THUMB1 ? LAST_LO_REGNUM : 11); i++)
21234 {
21235 /* Avoid fixed registers; they may be changed at
21236 arbitrary times so it's unsafe to restore them
21237 during the epilogue. */
21238 if (!fixed_regs[i]
21239 && (offsets->saved_regs_mask & (1 << i)) == 0)
21240 {
21241 reg = i;
21242 break;
21243 }
21244 }
21245 }
21246
21247 if (reg != -1)
21248 {
21249 offsets->saved_regs += 4;
21250 offsets->saved_regs_mask |= (1 << reg);
21251 }
21252 }
21253 }
21254
21255 offsets->locals_base = offsets->soft_frame + frame_size;
21256 offsets->outgoing_args = (offsets->locals_base
21257 + crtl->outgoing_args_size);
21258
21259 if (ARM_DOUBLEWORD_ALIGN)
21260 {
21261 /* Ensure SP remains doubleword aligned. */
21262 if (offsets->outgoing_args & 7)
21263 offsets->outgoing_args += 4;
21264 gcc_assert (!(offsets->outgoing_args & 7));
21265 }
21266 }
21267
21268
21269 /* Calculate the relative offsets for the different stack pointers. Positive
21270 offsets are in the direction of stack growth. */
21271
21272 HOST_WIDE_INT
21273 arm_compute_initial_elimination_offset (unsigned int from, unsigned int to)
21274 {
21275 arm_stack_offsets *offsets;
21276
21277 offsets = arm_get_frame_offsets ();
21278
21279 /* OK, now we have enough information to compute the distances.
21280 There must be an entry in these switch tables for each pair
21281 of registers in ELIMINABLE_REGS, even if some of the entries
21282 seem to be redundant or useless. */
21283 switch (from)
21284 {
21285 case ARG_POINTER_REGNUM:
21286 switch (to)
21287 {
21288 case THUMB_HARD_FRAME_POINTER_REGNUM:
21289 return 0;
21290
21291 case FRAME_POINTER_REGNUM:
21292 /* This is the reverse of the soft frame pointer
21293 to hard frame pointer elimination below. */
21294 return offsets->soft_frame - offsets->saved_args;
21295
21296 case ARM_HARD_FRAME_POINTER_REGNUM:
21297 /* This is only non-zero in the case where the static chain register
21298 is stored above the frame. */
21299 return offsets->frame - offsets->saved_args - 4;
21300
21301 case STACK_POINTER_REGNUM:
21302 /* If nothing has been pushed on the stack at all
21303 then this will return -4. This *is* correct! */
21304 return offsets->outgoing_args - (offsets->saved_args + 4);
21305
21306 default:
21307 gcc_unreachable ();
21308 }
21309 gcc_unreachable ();
21310
21311 case FRAME_POINTER_REGNUM:
21312 switch (to)
21313 {
21314 case THUMB_HARD_FRAME_POINTER_REGNUM:
21315 return 0;
21316
21317 case ARM_HARD_FRAME_POINTER_REGNUM:
21318 /* The hard frame pointer points to the top entry in the
21319 stack frame. The soft frame pointer to the bottom entry
21320 in the stack frame. If there is no stack frame at all,
21321 then they are identical. */
21322
21323 return offsets->frame - offsets->soft_frame;
21324
21325 case STACK_POINTER_REGNUM:
21326 return offsets->outgoing_args - offsets->soft_frame;
21327
21328 default:
21329 gcc_unreachable ();
21330 }
21331 gcc_unreachable ();
21332
21333 default:
21334 /* You cannot eliminate from the stack pointer.
21335 In theory you could eliminate from the hard frame
21336 pointer to the stack pointer, but this will never
21337 happen, since if a stack frame is not needed the
21338 hard frame pointer will never be used. */
21339 gcc_unreachable ();
21340 }
21341 }
21342
21343 /* Given FROM and TO register numbers, say whether this elimination is
21344 allowed. Frame pointer elimination is automatically handled.
21345
21346 All eliminations are permissible. Note that ARG_POINTER_REGNUM and
21347 HARD_FRAME_POINTER_REGNUM are in fact the same thing. If we need a frame
21348 pointer, we must eliminate FRAME_POINTER_REGNUM into
21349 HARD_FRAME_POINTER_REGNUM and not into STACK_POINTER_REGNUM or
21350 ARG_POINTER_REGNUM. */
21351
21352 bool
21353 arm_can_eliminate (const int from, const int to)
21354 {
21355 return ((to == FRAME_POINTER_REGNUM && from == ARG_POINTER_REGNUM) ? false :
21356 (to == STACK_POINTER_REGNUM && frame_pointer_needed) ? false :
21357 (to == ARM_HARD_FRAME_POINTER_REGNUM && TARGET_THUMB) ? false :
21358 (to == THUMB_HARD_FRAME_POINTER_REGNUM && TARGET_ARM) ? false :
21359 true);
21360 }
21361
21362 /* Emit RTL to save coprocessor registers on function entry. Returns the
21363 number of bytes pushed. */
21364
21365 static int
21366 arm_save_coproc_regs(void)
21367 {
21368 int saved_size = 0;
21369 unsigned reg;
21370 unsigned start_reg;
21371 rtx insn;
21372
21373 for (reg = LAST_IWMMXT_REGNUM; reg >= FIRST_IWMMXT_REGNUM; reg--)
21374 if (df_regs_ever_live_p (reg) && ! call_used_regs[reg])
21375 {
21376 insn = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21377 insn = gen_rtx_MEM (V2SImode, insn);
21378 insn = emit_set_insn (insn, gen_rtx_REG (V2SImode, reg));
21379 RTX_FRAME_RELATED_P (insn) = 1;
21380 saved_size += 8;
21381 }
21382
21383 if (TARGET_HARD_FLOAT)
21384 {
21385 start_reg = FIRST_VFP_REGNUM;
21386
21387 for (reg = FIRST_VFP_REGNUM; reg < LAST_VFP_REGNUM; reg += 2)
21388 {
21389 if ((!df_regs_ever_live_p (reg) || call_used_regs[reg])
21390 && (!df_regs_ever_live_p (reg + 1) || call_used_regs[reg + 1]))
21391 {
21392 if (start_reg != reg)
21393 saved_size += vfp_emit_fstmd (start_reg,
21394 (reg - start_reg) / 2);
21395 start_reg = reg + 2;
21396 }
21397 }
21398 if (start_reg != reg)
21399 saved_size += vfp_emit_fstmd (start_reg,
21400 (reg - start_reg) / 2);
21401 }
21402 return saved_size;
21403 }
21404
21405
21406 /* Set the Thumb frame pointer from the stack pointer. */
21407
21408 static void
21409 thumb_set_frame_pointer (arm_stack_offsets *offsets)
21410 {
21411 HOST_WIDE_INT amount;
21412 rtx insn, dwarf;
21413
21414 amount = offsets->outgoing_args - offsets->locals_base;
21415 if (amount < 1024)
21416 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21417 stack_pointer_rtx, GEN_INT (amount)));
21418 else
21419 {
21420 emit_insn (gen_movsi (hard_frame_pointer_rtx, GEN_INT (amount)));
21421 /* Thumb-2 RTL patterns expect sp as the first input. Thumb-1
21422 expects the first two operands to be the same. */
21423 if (TARGET_THUMB2)
21424 {
21425 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21426 stack_pointer_rtx,
21427 hard_frame_pointer_rtx));
21428 }
21429 else
21430 {
21431 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21432 hard_frame_pointer_rtx,
21433 stack_pointer_rtx));
21434 }
21435 dwarf = gen_rtx_SET (hard_frame_pointer_rtx,
21436 plus_constant (Pmode, stack_pointer_rtx, amount));
21437 RTX_FRAME_RELATED_P (dwarf) = 1;
21438 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21439 }
21440
21441 RTX_FRAME_RELATED_P (insn) = 1;
21442 }
21443
21444 struct scratch_reg {
21445 rtx reg;
21446 bool saved;
21447 };
21448
21449 /* Return a short-lived scratch register for use as a 2nd scratch register on
21450 function entry after the registers are saved in the prologue. This register
21451 must be released by means of release_scratch_register_on_entry. IP is not
21452 considered since it is always used as the 1st scratch register if available.
21453
21454 REGNO1 is the index number of the 1st scratch register and LIVE_REGS is the
21455 mask of live registers. */
21456
21457 static void
21458 get_scratch_register_on_entry (struct scratch_reg *sr, unsigned int regno1,
21459 unsigned long live_regs)
21460 {
21461 int regno = -1;
21462
21463 sr->saved = false;
21464
21465 if (regno1 != LR_REGNUM && (live_regs & (1 << LR_REGNUM)) != 0)
21466 regno = LR_REGNUM;
21467 else
21468 {
21469 unsigned int i;
21470
21471 for (i = 4; i < 11; i++)
21472 if (regno1 != i && (live_regs & (1 << i)) != 0)
21473 {
21474 regno = i;
21475 break;
21476 }
21477
21478 if (regno < 0)
21479 {
21480 /* If IP is used as the 1st scratch register for a nested function,
21481 then either r3 wasn't available or is used to preserve IP. */
21482 if (regno1 == IP_REGNUM && IS_NESTED (arm_current_func_type ()))
21483 regno1 = 3;
21484 regno = (regno1 == 3 ? 2 : 3);
21485 sr->saved
21486 = REGNO_REG_SET_P (df_get_live_out (ENTRY_BLOCK_PTR_FOR_FN (cfun)),
21487 regno);
21488 }
21489 }
21490
21491 sr->reg = gen_rtx_REG (SImode, regno);
21492 if (sr->saved)
21493 {
21494 rtx addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21495 rtx insn = emit_set_insn (gen_frame_mem (SImode, addr), sr->reg);
21496 rtx x = gen_rtx_SET (stack_pointer_rtx,
21497 plus_constant (Pmode, stack_pointer_rtx, -4));
21498 RTX_FRAME_RELATED_P (insn) = 1;
21499 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21500 }
21501 }
21502
21503 /* Release a scratch register obtained from the preceding function. */
21504
21505 static void
21506 release_scratch_register_on_entry (struct scratch_reg *sr)
21507 {
21508 if (sr->saved)
21509 {
21510 rtx addr = gen_rtx_POST_INC (Pmode, stack_pointer_rtx);
21511 rtx insn = emit_set_insn (sr->reg, gen_frame_mem (SImode, addr));
21512 rtx x = gen_rtx_SET (stack_pointer_rtx,
21513 plus_constant (Pmode, stack_pointer_rtx, 4));
21514 RTX_FRAME_RELATED_P (insn) = 1;
21515 add_reg_note (insn, REG_FRAME_RELATED_EXPR, x);
21516 }
21517 }
21518
21519 #define PROBE_INTERVAL (1 << STACK_CHECK_PROBE_INTERVAL_EXP)
21520
21521 #if PROBE_INTERVAL > 4096
21522 #error Cannot use indexed addressing mode for stack probing
21523 #endif
21524
21525 /* Emit code to probe a range of stack addresses from FIRST to FIRST+SIZE,
21526 inclusive. These are offsets from the current stack pointer. REGNO1
21527 is the index number of the 1st scratch register and LIVE_REGS is the
21528 mask of live registers. */
21529
21530 static void
21531 arm_emit_probe_stack_range (HOST_WIDE_INT first, HOST_WIDE_INT size,
21532 unsigned int regno1, unsigned long live_regs)
21533 {
21534 rtx reg1 = gen_rtx_REG (Pmode, regno1);
21535
21536 /* See if we have a constant small number of probes to generate. If so,
21537 that's the easy case. */
21538 if (size <= PROBE_INTERVAL)
21539 {
21540 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21541 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21542 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - size));
21543 }
21544
21545 /* The run-time loop is made up of 10 insns in the generic case while the
21546 compile-time loop is made up of 4+2*(n-2) insns for n # of intervals. */
21547 else if (size <= 5 * PROBE_INTERVAL)
21548 {
21549 HOST_WIDE_INT i, rem;
21550
21551 emit_move_insn (reg1, GEN_INT (first + PROBE_INTERVAL));
21552 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21553 emit_stack_probe (reg1);
21554
21555 /* Probe at FIRST + N * PROBE_INTERVAL for values of N from 2 until
21556 it exceeds SIZE. If only two probes are needed, this will not
21557 generate any code. Then probe at FIRST + SIZE. */
21558 for (i = 2 * PROBE_INTERVAL; i < size; i += PROBE_INTERVAL)
21559 {
21560 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21561 emit_stack_probe (reg1);
21562 }
21563
21564 rem = size - (i - PROBE_INTERVAL);
21565 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21566 {
21567 emit_set_insn (reg1, plus_constant (Pmode, reg1, -PROBE_INTERVAL));
21568 emit_stack_probe (plus_constant (Pmode, reg1, PROBE_INTERVAL - rem));
21569 }
21570 else
21571 emit_stack_probe (plus_constant (Pmode, reg1, -rem));
21572 }
21573
21574 /* Otherwise, do the same as above, but in a loop. Note that we must be
21575 extra careful with variables wrapping around because we might be at
21576 the very top (or the very bottom) of the address space and we have
21577 to be able to handle this case properly; in particular, we use an
21578 equality test for the loop condition. */
21579 else
21580 {
21581 HOST_WIDE_INT rounded_size;
21582 struct scratch_reg sr;
21583
21584 get_scratch_register_on_entry (&sr, regno1, live_regs);
21585
21586 emit_move_insn (reg1, GEN_INT (first));
21587
21588
21589 /* Step 1: round SIZE to the previous multiple of the interval. */
21590
21591 rounded_size = size & -PROBE_INTERVAL;
21592 emit_move_insn (sr.reg, GEN_INT (rounded_size));
21593
21594
21595 /* Step 2: compute initial and final value of the loop counter. */
21596
21597 /* TEST_ADDR = SP + FIRST. */
21598 emit_set_insn (reg1, gen_rtx_MINUS (Pmode, stack_pointer_rtx, reg1));
21599
21600 /* LAST_ADDR = SP + FIRST + ROUNDED_SIZE. */
21601 emit_set_insn (sr.reg, gen_rtx_MINUS (Pmode, reg1, sr.reg));
21602
21603
21604 /* Step 3: the loop
21605
21606 do
21607 {
21608 TEST_ADDR = TEST_ADDR + PROBE_INTERVAL
21609 probe at TEST_ADDR
21610 }
21611 while (TEST_ADDR != LAST_ADDR)
21612
21613 probes at FIRST + N * PROBE_INTERVAL for values of N from 1
21614 until it is equal to ROUNDED_SIZE. */
21615
21616 emit_insn (gen_probe_stack_range (reg1, reg1, sr.reg));
21617
21618
21619 /* Step 4: probe at FIRST + SIZE if we cannot assert at compile-time
21620 that SIZE is equal to ROUNDED_SIZE. */
21621
21622 if (size != rounded_size)
21623 {
21624 HOST_WIDE_INT rem = size - rounded_size;
21625
21626 if (rem > 4095 || (TARGET_THUMB2 && rem > 255))
21627 {
21628 emit_set_insn (sr.reg,
21629 plus_constant (Pmode, sr.reg, -PROBE_INTERVAL));
21630 emit_stack_probe (plus_constant (Pmode, sr.reg,
21631 PROBE_INTERVAL - rem));
21632 }
21633 else
21634 emit_stack_probe (plus_constant (Pmode, sr.reg, -rem));
21635 }
21636
21637 release_scratch_register_on_entry (&sr);
21638 }
21639
21640 /* Make sure nothing is scheduled before we are done. */
21641 emit_insn (gen_blockage ());
21642 }
21643
21644 /* Probe a range of stack addresses from REG1 to REG2 inclusive. These are
21645 absolute addresses. */
21646
21647 const char *
21648 output_probe_stack_range (rtx reg1, rtx reg2)
21649 {
21650 static int labelno = 0;
21651 char loop_lab[32];
21652 rtx xops[2];
21653
21654 ASM_GENERATE_INTERNAL_LABEL (loop_lab, "LPSRL", labelno++);
21655
21656 /* Loop. */
21657 ASM_OUTPUT_INTERNAL_LABEL (asm_out_file, loop_lab);
21658
21659 /* TEST_ADDR = TEST_ADDR + PROBE_INTERVAL. */
21660 xops[0] = reg1;
21661 xops[1] = GEN_INT (PROBE_INTERVAL);
21662 output_asm_insn ("sub\t%0, %0, %1", xops);
21663
21664 /* Probe at TEST_ADDR. */
21665 output_asm_insn ("str\tr0, [%0, #0]", xops);
21666
21667 /* Test if TEST_ADDR == LAST_ADDR. */
21668 xops[1] = reg2;
21669 output_asm_insn ("cmp\t%0, %1", xops);
21670
21671 /* Branch. */
21672 fputs ("\tbne\t", asm_out_file);
21673 assemble_name_raw (asm_out_file, loop_lab);
21674 fputc ('\n', asm_out_file);
21675
21676 return "";
21677 }
21678
21679 /* Generate the prologue instructions for entry into an ARM or Thumb-2
21680 function. */
21681 void
21682 arm_expand_prologue (void)
21683 {
21684 rtx amount;
21685 rtx insn;
21686 rtx ip_rtx;
21687 unsigned long live_regs_mask;
21688 unsigned long func_type;
21689 int fp_offset = 0;
21690 int saved_pretend_args = 0;
21691 int saved_regs = 0;
21692 unsigned HOST_WIDE_INT args_to_push;
21693 HOST_WIDE_INT size;
21694 arm_stack_offsets *offsets;
21695 bool clobber_ip;
21696
21697 func_type = arm_current_func_type ();
21698
21699 /* Naked functions don't have prologues. */
21700 if (IS_NAKED (func_type))
21701 {
21702 if (flag_stack_usage_info)
21703 current_function_static_stack_size = 0;
21704 return;
21705 }
21706
21707 /* Make a copy of c_f_p_a_s as we may need to modify it locally. */
21708 args_to_push = crtl->args.pretend_args_size;
21709
21710 /* Compute which register we will have to save onto the stack. */
21711 offsets = arm_get_frame_offsets ();
21712 live_regs_mask = offsets->saved_regs_mask;
21713
21714 ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
21715
21716 if (IS_STACKALIGN (func_type))
21717 {
21718 rtx r0, r1;
21719
21720 /* Handle a word-aligned stack pointer. We generate the following:
21721
21722 mov r0, sp
21723 bic r1, r0, #7
21724 mov sp, r1
21725 <save and restore r0 in normal prologue/epilogue>
21726 mov sp, r0
21727 bx lr
21728
21729 The unwinder doesn't need to know about the stack realignment.
21730 Just tell it we saved SP in r0. */
21731 gcc_assert (TARGET_THUMB2 && !arm_arch_notm && args_to_push == 0);
21732
21733 r0 = gen_rtx_REG (SImode, R0_REGNUM);
21734 r1 = gen_rtx_REG (SImode, R1_REGNUM);
21735
21736 insn = emit_insn (gen_movsi (r0, stack_pointer_rtx));
21737 RTX_FRAME_RELATED_P (insn) = 1;
21738 add_reg_note (insn, REG_CFA_REGISTER, NULL);
21739
21740 emit_insn (gen_andsi3 (r1, r0, GEN_INT (~(HOST_WIDE_INT)7)));
21741
21742 /* ??? The CFA changes here, which may cause GDB to conclude that it
21743 has entered a different function. That said, the unwind info is
21744 correct, individually, before and after this instruction because
21745 we've described the save of SP, which will override the default
21746 handling of SP as restoring from the CFA. */
21747 emit_insn (gen_movsi (stack_pointer_rtx, r1));
21748 }
21749
21750 /* Let's compute the static_chain_stack_bytes required and store it. Right
21751 now the value must be -1 as stored by arm_init_machine_status (). */
21752 cfun->machine->static_chain_stack_bytes
21753 = arm_compute_static_chain_stack_bytes ();
21754
21755 /* The static chain register is the same as the IP register. If it is
21756 clobbered when creating the frame, we need to save and restore it. */
21757 clobber_ip = IS_NESTED (func_type)
21758 && ((TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21759 || ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21760 || flag_stack_clash_protection)
21761 && !df_regs_ever_live_p (LR_REGNUM)
21762 && arm_r3_live_at_start_p ()));
21763
21764 /* Find somewhere to store IP whilst the frame is being created.
21765 We try the following places in order:
21766
21767 1. The last argument register r3 if it is available.
21768 2. A slot on the stack above the frame if there are no
21769 arguments to push onto the stack.
21770 3. Register r3 again, after pushing the argument registers
21771 onto the stack, if this is a varargs function.
21772 4. The last slot on the stack created for the arguments to
21773 push, if this isn't a varargs function.
21774
21775 Note - we only need to tell the dwarf2 backend about the SP
21776 adjustment in the second variant; the static chain register
21777 doesn't need to be unwound, as it doesn't contain a value
21778 inherited from the caller. */
21779 if (clobber_ip)
21780 {
21781 if (!arm_r3_live_at_start_p ())
21782 insn = emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21783 else if (args_to_push == 0)
21784 {
21785 rtx addr, dwarf;
21786
21787 gcc_assert(arm_compute_static_chain_stack_bytes() == 4);
21788 saved_regs += 4;
21789
21790 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21791 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21792 fp_offset = 4;
21793
21794 /* Just tell the dwarf backend that we adjusted SP. */
21795 dwarf = gen_rtx_SET (stack_pointer_rtx,
21796 plus_constant (Pmode, stack_pointer_rtx,
21797 -fp_offset));
21798 RTX_FRAME_RELATED_P (insn) = 1;
21799 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21800 }
21801 else
21802 {
21803 /* Store the args on the stack. */
21804 if (cfun->machine->uses_anonymous_args)
21805 {
21806 insn = emit_multi_reg_push ((0xf0 >> (args_to_push / 4)) & 0xf,
21807 (0xf0 >> (args_to_push / 4)) & 0xf);
21808 emit_set_insn (gen_rtx_REG (SImode, 3), ip_rtx);
21809 saved_pretend_args = 1;
21810 }
21811 else
21812 {
21813 rtx addr, dwarf;
21814
21815 if (args_to_push == 4)
21816 addr = gen_rtx_PRE_DEC (Pmode, stack_pointer_rtx);
21817 else
21818 addr = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx,
21819 plus_constant (Pmode,
21820 stack_pointer_rtx,
21821 -args_to_push));
21822
21823 insn = emit_set_insn (gen_frame_mem (SImode, addr), ip_rtx);
21824
21825 /* Just tell the dwarf backend that we adjusted SP. */
21826 dwarf = gen_rtx_SET (stack_pointer_rtx,
21827 plus_constant (Pmode, stack_pointer_rtx,
21828 -args_to_push));
21829 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
21830 }
21831
21832 RTX_FRAME_RELATED_P (insn) = 1;
21833 fp_offset = args_to_push;
21834 args_to_push = 0;
21835 }
21836 }
21837
21838 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
21839 {
21840 if (IS_INTERRUPT (func_type))
21841 {
21842 /* Interrupt functions must not corrupt any registers.
21843 Creating a frame pointer however, corrupts the IP
21844 register, so we must push it first. */
21845 emit_multi_reg_push (1 << IP_REGNUM, 1 << IP_REGNUM);
21846
21847 /* Do not set RTX_FRAME_RELATED_P on this insn.
21848 The dwarf stack unwinding code only wants to see one
21849 stack decrement per function, and this is not it. If
21850 this instruction is labeled as being part of the frame
21851 creation sequence then dwarf2out_frame_debug_expr will
21852 die when it encounters the assignment of IP to FP
21853 later on, since the use of SP here establishes SP as
21854 the CFA register and not IP.
21855
21856 Anyway this instruction is not really part of the stack
21857 frame creation although it is part of the prologue. */
21858 }
21859
21860 insn = emit_set_insn (ip_rtx,
21861 plus_constant (Pmode, stack_pointer_rtx,
21862 fp_offset));
21863 RTX_FRAME_RELATED_P (insn) = 1;
21864 }
21865
21866 if (args_to_push)
21867 {
21868 /* Push the argument registers, or reserve space for them. */
21869 if (cfun->machine->uses_anonymous_args)
21870 insn = emit_multi_reg_push
21871 ((0xf0 >> (args_to_push / 4)) & 0xf,
21872 (0xf0 >> (args_to_push / 4)) & 0xf);
21873 else
21874 insn = emit_insn
21875 (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
21876 GEN_INT (- args_to_push)));
21877 RTX_FRAME_RELATED_P (insn) = 1;
21878 }
21879
21880 /* If this is an interrupt service routine, and the link register
21881 is going to be pushed, and we're not generating extra
21882 push of IP (needed when frame is needed and frame layout if apcs),
21883 subtracting four from LR now will mean that the function return
21884 can be done with a single instruction. */
21885 if ((func_type == ARM_FT_ISR || func_type == ARM_FT_FIQ)
21886 && (live_regs_mask & (1 << LR_REGNUM)) != 0
21887 && !(frame_pointer_needed && TARGET_APCS_FRAME)
21888 && TARGET_ARM)
21889 {
21890 rtx lr = gen_rtx_REG (SImode, LR_REGNUM);
21891
21892 emit_set_insn (lr, plus_constant (SImode, lr, -4));
21893 }
21894
21895 if (live_regs_mask)
21896 {
21897 unsigned long dwarf_regs_mask = live_regs_mask;
21898
21899 saved_regs += bit_count (live_regs_mask) * 4;
21900 if (optimize_size && !frame_pointer_needed
21901 && saved_regs == offsets->saved_regs - offsets->saved_args)
21902 {
21903 /* If no coprocessor registers are being pushed and we don't have
21904 to worry about a frame pointer then push extra registers to
21905 create the stack frame. This is done in a way that does not
21906 alter the frame layout, so is independent of the epilogue. */
21907 int n;
21908 int frame;
21909 n = 0;
21910 while (n < 8 && (live_regs_mask & (1 << n)) == 0)
21911 n++;
21912 frame = offsets->outgoing_args - (offsets->saved_args + saved_regs);
21913 if (frame && n * 4 >= frame)
21914 {
21915 n = frame / 4;
21916 live_regs_mask |= (1 << n) - 1;
21917 saved_regs += frame;
21918 }
21919 }
21920
21921 if (TARGET_LDRD
21922 && current_tune->prefer_ldrd_strd
21923 && !optimize_function_for_size_p (cfun))
21924 {
21925 gcc_checking_assert (live_regs_mask == dwarf_regs_mask);
21926 if (TARGET_THUMB2)
21927 thumb2_emit_strd_push (live_regs_mask);
21928 else if (TARGET_ARM
21929 && !TARGET_APCS_FRAME
21930 && !IS_INTERRUPT (func_type))
21931 arm_emit_strd_push (live_regs_mask);
21932 else
21933 {
21934 insn = emit_multi_reg_push (live_regs_mask, live_regs_mask);
21935 RTX_FRAME_RELATED_P (insn) = 1;
21936 }
21937 }
21938 else
21939 {
21940 insn = emit_multi_reg_push (live_regs_mask, dwarf_regs_mask);
21941 RTX_FRAME_RELATED_P (insn) = 1;
21942 }
21943 }
21944
21945 if (! IS_VOLATILE (func_type))
21946 saved_regs += arm_save_coproc_regs ();
21947
21948 if (frame_pointer_needed && TARGET_ARM)
21949 {
21950 /* Create the new frame pointer. */
21951 if (TARGET_APCS_FRAME)
21952 {
21953 insn = GEN_INT (-(4 + args_to_push + fp_offset));
21954 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx, ip_rtx, insn));
21955 RTX_FRAME_RELATED_P (insn) = 1;
21956 }
21957 else
21958 {
21959 insn = GEN_INT (saved_regs - (4 + fp_offset));
21960 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
21961 stack_pointer_rtx, insn));
21962 RTX_FRAME_RELATED_P (insn) = 1;
21963 }
21964 }
21965
21966 size = offsets->outgoing_args - offsets->saved_args;
21967 if (flag_stack_usage_info)
21968 current_function_static_stack_size = size;
21969
21970 /* If this isn't an interrupt service routine and we have a frame, then do
21971 stack checking. We use IP as the first scratch register, except for the
21972 non-APCS nested functions if LR or r3 are available (see clobber_ip). */
21973 if (!IS_INTERRUPT (func_type)
21974 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
21975 || flag_stack_clash_protection))
21976 {
21977 unsigned int regno;
21978
21979 if (!IS_NESTED (func_type) || clobber_ip)
21980 regno = IP_REGNUM;
21981 else if (df_regs_ever_live_p (LR_REGNUM))
21982 regno = LR_REGNUM;
21983 else
21984 regno = 3;
21985
21986 if (crtl->is_leaf && !cfun->calls_alloca)
21987 {
21988 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
21989 arm_emit_probe_stack_range (get_stack_check_protect (),
21990 size - get_stack_check_protect (),
21991 regno, live_regs_mask);
21992 }
21993 else if (size > 0)
21994 arm_emit_probe_stack_range (get_stack_check_protect (), size,
21995 regno, live_regs_mask);
21996 }
21997
21998 /* Recover the static chain register. */
21999 if (clobber_ip)
22000 {
22001 if (!arm_r3_live_at_start_p () || saved_pretend_args)
22002 insn = gen_rtx_REG (SImode, 3);
22003 else
22004 {
22005 insn = plus_constant (Pmode, hard_frame_pointer_rtx, 4);
22006 insn = gen_frame_mem (SImode, insn);
22007 }
22008 emit_set_insn (ip_rtx, insn);
22009 emit_insn (gen_force_register_use (ip_rtx));
22010 }
22011
22012 if (offsets->outgoing_args != offsets->saved_args + saved_regs)
22013 {
22014 /* This add can produce multiple insns for a large constant, so we
22015 need to get tricky. */
22016 rtx_insn *last = get_last_insn ();
22017
22018 amount = GEN_INT (offsets->saved_args + saved_regs
22019 - offsets->outgoing_args);
22020
22021 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
22022 amount));
22023 do
22024 {
22025 last = last ? NEXT_INSN (last) : get_insns ();
22026 RTX_FRAME_RELATED_P (last) = 1;
22027 }
22028 while (last != insn);
22029
22030 /* If the frame pointer is needed, emit a special barrier that
22031 will prevent the scheduler from moving stores to the frame
22032 before the stack adjustment. */
22033 if (frame_pointer_needed)
22034 emit_insn (gen_stack_tie (stack_pointer_rtx,
22035 hard_frame_pointer_rtx));
22036 }
22037
22038
22039 if (frame_pointer_needed && TARGET_THUMB2)
22040 thumb_set_frame_pointer (offsets);
22041
22042 if (flag_pic && arm_pic_register != INVALID_REGNUM)
22043 {
22044 unsigned long mask;
22045
22046 mask = live_regs_mask;
22047 mask &= THUMB2_WORK_REGS;
22048 if (!IS_NESTED (func_type))
22049 mask |= (1 << IP_REGNUM);
22050 arm_load_pic_register (mask, NULL_RTX);
22051 }
22052
22053 /* If we are profiling, make sure no instructions are scheduled before
22054 the call to mcount. Similarly if the user has requested no
22055 scheduling in the prolog. Similarly if we want non-call exceptions
22056 using the EABI unwinder, to prevent faulting instructions from being
22057 swapped with a stack adjustment. */
22058 if (crtl->profile || !TARGET_SCHED_PROLOG
22059 || (arm_except_unwind_info (&global_options) == UI_TARGET
22060 && cfun->can_throw_non_call_exceptions))
22061 emit_insn (gen_blockage ());
22062
22063 /* If the link register is being kept alive, with the return address in it,
22064 then make sure that it does not get reused by the ce2 pass. */
22065 if ((live_regs_mask & (1 << LR_REGNUM)) == 0)
22066 cfun->machine->lr_save_eliminated = 1;
22067 }
22068 \f
22069 /* Print condition code to STREAM. Helper function for arm_print_operand. */
22070 static void
22071 arm_print_condition (FILE *stream)
22072 {
22073 if (arm_ccfsm_state == 3 || arm_ccfsm_state == 4)
22074 {
22075 /* Branch conversion is not implemented for Thumb-2. */
22076 if (TARGET_THUMB)
22077 {
22078 output_operand_lossage ("predicated Thumb instruction");
22079 return;
22080 }
22081 if (current_insn_predicate != NULL)
22082 {
22083 output_operand_lossage
22084 ("predicated instruction in conditional sequence");
22085 return;
22086 }
22087
22088 fputs (arm_condition_codes[arm_current_cc], stream);
22089 }
22090 else if (current_insn_predicate)
22091 {
22092 enum arm_cond_code code;
22093
22094 if (TARGET_THUMB1)
22095 {
22096 output_operand_lossage ("predicated Thumb instruction");
22097 return;
22098 }
22099
22100 code = get_arm_condition_code (current_insn_predicate);
22101 fputs (arm_condition_codes[code], stream);
22102 }
22103 }
22104
22105
22106 /* Globally reserved letters: acln
22107 Puncutation letters currently used: @_|?().!#
22108 Lower case letters currently used: bcdefhimpqtvwxyz
22109 Upper case letters currently used: ABCDFGHJKLMNOPQRSTU
22110 Letters previously used, but now deprecated/obsolete: sVWXYZ.
22111
22112 Note that the global reservation for 'c' is only for CONSTANT_ADDRESS_P.
22113
22114 If CODE is 'd', then the X is a condition operand and the instruction
22115 should only be executed if the condition is true.
22116 if CODE is 'D', then the X is a condition operand and the instruction
22117 should only be executed if the condition is false: however, if the mode
22118 of the comparison is CCFPEmode, then always execute the instruction -- we
22119 do this because in these circumstances !GE does not necessarily imply LT;
22120 in these cases the instruction pattern will take care to make sure that
22121 an instruction containing %d will follow, thereby undoing the effects of
22122 doing this instruction unconditionally.
22123 If CODE is 'N' then X is a floating point operand that must be negated
22124 before output.
22125 If CODE is 'B' then output a bitwise inverted value of X (a const int).
22126 If X is a REG and CODE is `M', output a ldm/stm style multi-reg. */
22127 static void
22128 arm_print_operand (FILE *stream, rtx x, int code)
22129 {
22130 switch (code)
22131 {
22132 case '@':
22133 fputs (ASM_COMMENT_START, stream);
22134 return;
22135
22136 case '_':
22137 fputs (user_label_prefix, stream);
22138 return;
22139
22140 case '|':
22141 fputs (REGISTER_PREFIX, stream);
22142 return;
22143
22144 case '?':
22145 arm_print_condition (stream);
22146 return;
22147
22148 case '.':
22149 /* The current condition code for a condition code setting instruction.
22150 Preceded by 's' in unified syntax, otherwise followed by 's'. */
22151 fputc('s', stream);
22152 arm_print_condition (stream);
22153 return;
22154
22155 case '!':
22156 /* If the instruction is conditionally executed then print
22157 the current condition code, otherwise print 's'. */
22158 gcc_assert (TARGET_THUMB2);
22159 if (current_insn_predicate)
22160 arm_print_condition (stream);
22161 else
22162 fputc('s', stream);
22163 break;
22164
22165 /* %# is a "break" sequence. It doesn't output anything, but is used to
22166 separate e.g. operand numbers from following text, if that text consists
22167 of further digits which we don't want to be part of the operand
22168 number. */
22169 case '#':
22170 return;
22171
22172 case 'N':
22173 {
22174 REAL_VALUE_TYPE r;
22175 r = real_value_negate (CONST_DOUBLE_REAL_VALUE (x));
22176 fprintf (stream, "%s", fp_const_from_val (&r));
22177 }
22178 return;
22179
22180 /* An integer or symbol address without a preceding # sign. */
22181 case 'c':
22182 switch (GET_CODE (x))
22183 {
22184 case CONST_INT:
22185 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL (x));
22186 break;
22187
22188 case SYMBOL_REF:
22189 output_addr_const (stream, x);
22190 break;
22191
22192 case CONST:
22193 if (GET_CODE (XEXP (x, 0)) == PLUS
22194 && GET_CODE (XEXP (XEXP (x, 0), 0)) == SYMBOL_REF)
22195 {
22196 output_addr_const (stream, x);
22197 break;
22198 }
22199 /* Fall through. */
22200
22201 default:
22202 output_operand_lossage ("Unsupported operand for code '%c'", code);
22203 }
22204 return;
22205
22206 /* An integer that we want to print in HEX. */
22207 case 'x':
22208 switch (GET_CODE (x))
22209 {
22210 case CONST_INT:
22211 fprintf (stream, "#" HOST_WIDE_INT_PRINT_HEX, INTVAL (x));
22212 break;
22213
22214 default:
22215 output_operand_lossage ("Unsupported operand for code '%c'", code);
22216 }
22217 return;
22218
22219 case 'B':
22220 if (CONST_INT_P (x))
22221 {
22222 HOST_WIDE_INT val;
22223 val = ARM_SIGN_EXTEND (~INTVAL (x));
22224 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, val);
22225 }
22226 else
22227 {
22228 putc ('~', stream);
22229 output_addr_const (stream, x);
22230 }
22231 return;
22232
22233 case 'b':
22234 /* Print the log2 of a CONST_INT. */
22235 {
22236 HOST_WIDE_INT val;
22237
22238 if (!CONST_INT_P (x)
22239 || (val = exact_log2 (INTVAL (x) & 0xffffffff)) < 0)
22240 output_operand_lossage ("Unsupported operand for code '%c'", code);
22241 else
22242 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22243 }
22244 return;
22245
22246 case 'L':
22247 /* The low 16 bits of an immediate constant. */
22248 fprintf (stream, HOST_WIDE_INT_PRINT_DEC, INTVAL(x) & 0xffff);
22249 return;
22250
22251 case 'i':
22252 fprintf (stream, "%s", arithmetic_instr (x, 1));
22253 return;
22254
22255 case 'I':
22256 fprintf (stream, "%s", arithmetic_instr (x, 0));
22257 return;
22258
22259 case 'S':
22260 {
22261 HOST_WIDE_INT val;
22262 const char *shift;
22263
22264 shift = shift_op (x, &val);
22265
22266 if (shift)
22267 {
22268 fprintf (stream, ", %s ", shift);
22269 if (val == -1)
22270 arm_print_operand (stream, XEXP (x, 1), 0);
22271 else
22272 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, val);
22273 }
22274 }
22275 return;
22276
22277 /* An explanation of the 'Q', 'R' and 'H' register operands:
22278
22279 In a pair of registers containing a DI or DF value the 'Q'
22280 operand returns the register number of the register containing
22281 the least significant part of the value. The 'R' operand returns
22282 the register number of the register containing the most
22283 significant part of the value.
22284
22285 The 'H' operand returns the higher of the two register numbers.
22286 On a run where WORDS_BIG_ENDIAN is true the 'H' operand is the
22287 same as the 'Q' operand, since the most significant part of the
22288 value is held in the lower number register. The reverse is true
22289 on systems where WORDS_BIG_ENDIAN is false.
22290
22291 The purpose of these operands is to distinguish between cases
22292 where the endian-ness of the values is important (for example
22293 when they are added together), and cases where the endian-ness
22294 is irrelevant, but the order of register operations is important.
22295 For example when loading a value from memory into a register
22296 pair, the endian-ness does not matter. Provided that the value
22297 from the lower memory address is put into the lower numbered
22298 register, and the value from the higher address is put into the
22299 higher numbered register, the load will work regardless of whether
22300 the value being loaded is big-wordian or little-wordian. The
22301 order of the two register loads can matter however, if the address
22302 of the memory location is actually held in one of the registers
22303 being overwritten by the load.
22304
22305 The 'Q' and 'R' constraints are also available for 64-bit
22306 constants. */
22307 case 'Q':
22308 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22309 {
22310 rtx part = gen_lowpart (SImode, x);
22311 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22312 return;
22313 }
22314
22315 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22316 {
22317 output_operand_lossage ("invalid operand for code '%c'", code);
22318 return;
22319 }
22320
22321 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 1 : 0));
22322 return;
22323
22324 case 'R':
22325 if (CONST_INT_P (x) || CONST_DOUBLE_P (x))
22326 {
22327 machine_mode mode = GET_MODE (x);
22328 rtx part;
22329
22330 if (mode == VOIDmode)
22331 mode = DImode;
22332 part = gen_highpart_mode (SImode, mode, x);
22333 fprintf (stream, "#" HOST_WIDE_INT_PRINT_DEC, INTVAL (part));
22334 return;
22335 }
22336
22337 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22338 {
22339 output_operand_lossage ("invalid operand for code '%c'", code);
22340 return;
22341 }
22342
22343 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 0 : 1));
22344 return;
22345
22346 case 'H':
22347 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22348 {
22349 output_operand_lossage ("invalid operand for code '%c'", code);
22350 return;
22351 }
22352
22353 asm_fprintf (stream, "%r", REGNO (x) + 1);
22354 return;
22355
22356 case 'J':
22357 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22358 {
22359 output_operand_lossage ("invalid operand for code '%c'", code);
22360 return;
22361 }
22362
22363 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 3 : 2));
22364 return;
22365
22366 case 'K':
22367 if (!REG_P (x) || REGNO (x) > LAST_ARM_REGNUM)
22368 {
22369 output_operand_lossage ("invalid operand for code '%c'", code);
22370 return;
22371 }
22372
22373 asm_fprintf (stream, "%r", REGNO (x) + (WORDS_BIG_ENDIAN ? 2 : 3));
22374 return;
22375
22376 case 'm':
22377 asm_fprintf (stream, "%r",
22378 REG_P (XEXP (x, 0))
22379 ? REGNO (XEXP (x, 0)) : REGNO (XEXP (XEXP (x, 0), 0)));
22380 return;
22381
22382 case 'M':
22383 asm_fprintf (stream, "{%r-%r}",
22384 REGNO (x),
22385 REGNO (x) + ARM_NUM_REGS (GET_MODE (x)) - 1);
22386 return;
22387
22388 /* Like 'M', but writing doubleword vector registers, for use by Neon
22389 insns. */
22390 case 'h':
22391 {
22392 int regno = (REGNO (x) - FIRST_VFP_REGNUM) / 2;
22393 int numregs = ARM_NUM_REGS (GET_MODE (x)) / 2;
22394 if (numregs == 1)
22395 asm_fprintf (stream, "{d%d}", regno);
22396 else
22397 asm_fprintf (stream, "{d%d-d%d}", regno, regno + numregs - 1);
22398 }
22399 return;
22400
22401 case 'd':
22402 /* CONST_TRUE_RTX means always -- that's the default. */
22403 if (x == const_true_rtx)
22404 return;
22405
22406 if (!COMPARISON_P (x))
22407 {
22408 output_operand_lossage ("invalid operand for code '%c'", code);
22409 return;
22410 }
22411
22412 fputs (arm_condition_codes[get_arm_condition_code (x)],
22413 stream);
22414 return;
22415
22416 case 'D':
22417 /* CONST_TRUE_RTX means not always -- i.e. never. We shouldn't ever
22418 want to do that. */
22419 if (x == const_true_rtx)
22420 {
22421 output_operand_lossage ("instruction never executed");
22422 return;
22423 }
22424 if (!COMPARISON_P (x))
22425 {
22426 output_operand_lossage ("invalid operand for code '%c'", code);
22427 return;
22428 }
22429
22430 fputs (arm_condition_codes[ARM_INVERSE_CONDITION_CODE
22431 (get_arm_condition_code (x))],
22432 stream);
22433 return;
22434
22435 case 's':
22436 case 'V':
22437 case 'W':
22438 case 'X':
22439 case 'Y':
22440 case 'Z':
22441 /* Former Maverick support, removed after GCC-4.7. */
22442 output_operand_lossage ("obsolete Maverick format code '%c'", code);
22443 return;
22444
22445 case 'U':
22446 if (!REG_P (x)
22447 || REGNO (x) < FIRST_IWMMXT_GR_REGNUM
22448 || REGNO (x) > LAST_IWMMXT_GR_REGNUM)
22449 /* Bad value for wCG register number. */
22450 {
22451 output_operand_lossage ("invalid operand for code '%c'", code);
22452 return;
22453 }
22454
22455 else
22456 fprintf (stream, "%d", REGNO (x) - FIRST_IWMMXT_GR_REGNUM);
22457 return;
22458
22459 /* Print an iWMMXt control register name. */
22460 case 'w':
22461 if (!CONST_INT_P (x)
22462 || INTVAL (x) < 0
22463 || INTVAL (x) >= 16)
22464 /* Bad value for wC register number. */
22465 {
22466 output_operand_lossage ("invalid operand for code '%c'", code);
22467 return;
22468 }
22469
22470 else
22471 {
22472 static const char * wc_reg_names [16] =
22473 {
22474 "wCID", "wCon", "wCSSF", "wCASF",
22475 "wC4", "wC5", "wC6", "wC7",
22476 "wCGR0", "wCGR1", "wCGR2", "wCGR3",
22477 "wC12", "wC13", "wC14", "wC15"
22478 };
22479
22480 fputs (wc_reg_names [INTVAL (x)], stream);
22481 }
22482 return;
22483
22484 /* Print the high single-precision register of a VFP double-precision
22485 register. */
22486 case 'p':
22487 {
22488 machine_mode mode = GET_MODE (x);
22489 int regno;
22490
22491 if (GET_MODE_SIZE (mode) != 8 || !REG_P (x))
22492 {
22493 output_operand_lossage ("invalid operand for code '%c'", code);
22494 return;
22495 }
22496
22497 regno = REGNO (x);
22498 if (!VFP_REGNO_OK_FOR_DOUBLE (regno))
22499 {
22500 output_operand_lossage ("invalid operand for code '%c'", code);
22501 return;
22502 }
22503
22504 fprintf (stream, "s%d", regno - FIRST_VFP_REGNUM + 1);
22505 }
22506 return;
22507
22508 /* Print a VFP/Neon double precision or quad precision register name. */
22509 case 'P':
22510 case 'q':
22511 {
22512 machine_mode mode = GET_MODE (x);
22513 int is_quad = (code == 'q');
22514 int regno;
22515
22516 if (GET_MODE_SIZE (mode) != (is_quad ? 16 : 8))
22517 {
22518 output_operand_lossage ("invalid operand for code '%c'", code);
22519 return;
22520 }
22521
22522 if (!REG_P (x)
22523 || !IS_VFP_REGNUM (REGNO (x)))
22524 {
22525 output_operand_lossage ("invalid operand for code '%c'", code);
22526 return;
22527 }
22528
22529 regno = REGNO (x);
22530 if ((is_quad && !NEON_REGNO_OK_FOR_QUAD (regno))
22531 || (!is_quad && !VFP_REGNO_OK_FOR_DOUBLE (regno)))
22532 {
22533 output_operand_lossage ("invalid operand for code '%c'", code);
22534 return;
22535 }
22536
22537 fprintf (stream, "%c%d", is_quad ? 'q' : 'd',
22538 (regno - FIRST_VFP_REGNUM) >> (is_quad ? 2 : 1));
22539 }
22540 return;
22541
22542 /* These two codes print the low/high doubleword register of a Neon quad
22543 register, respectively. For pair-structure types, can also print
22544 low/high quadword registers. */
22545 case 'e':
22546 case 'f':
22547 {
22548 machine_mode mode = GET_MODE (x);
22549 int regno;
22550
22551 if ((GET_MODE_SIZE (mode) != 16
22552 && GET_MODE_SIZE (mode) != 32) || !REG_P (x))
22553 {
22554 output_operand_lossage ("invalid operand for code '%c'", code);
22555 return;
22556 }
22557
22558 regno = REGNO (x);
22559 if (!NEON_REGNO_OK_FOR_QUAD (regno))
22560 {
22561 output_operand_lossage ("invalid operand for code '%c'", code);
22562 return;
22563 }
22564
22565 if (GET_MODE_SIZE (mode) == 16)
22566 fprintf (stream, "d%d", ((regno - FIRST_VFP_REGNUM) >> 1)
22567 + (code == 'f' ? 1 : 0));
22568 else
22569 fprintf (stream, "q%d", ((regno - FIRST_VFP_REGNUM) >> 2)
22570 + (code == 'f' ? 1 : 0));
22571 }
22572 return;
22573
22574 /* Print a VFPv3 floating-point constant, represented as an integer
22575 index. */
22576 case 'G':
22577 {
22578 int index = vfp3_const_double_index (x);
22579 gcc_assert (index != -1);
22580 fprintf (stream, "%d", index);
22581 }
22582 return;
22583
22584 /* Print bits representing opcode features for Neon.
22585
22586 Bit 0 is 1 for signed, 0 for unsigned. Floats count as signed
22587 and polynomials as unsigned.
22588
22589 Bit 1 is 1 for floats and polynomials, 0 for ordinary integers.
22590
22591 Bit 2 is 1 for rounding functions, 0 otherwise. */
22592
22593 /* Identify the type as 's', 'u', 'p' or 'f'. */
22594 case 'T':
22595 {
22596 HOST_WIDE_INT bits = INTVAL (x);
22597 fputc ("uspf"[bits & 3], stream);
22598 }
22599 return;
22600
22601 /* Likewise, but signed and unsigned integers are both 'i'. */
22602 case 'F':
22603 {
22604 HOST_WIDE_INT bits = INTVAL (x);
22605 fputc ("iipf"[bits & 3], stream);
22606 }
22607 return;
22608
22609 /* As for 'T', but emit 'u' instead of 'p'. */
22610 case 't':
22611 {
22612 HOST_WIDE_INT bits = INTVAL (x);
22613 fputc ("usuf"[bits & 3], stream);
22614 }
22615 return;
22616
22617 /* Bit 2: rounding (vs none). */
22618 case 'O':
22619 {
22620 HOST_WIDE_INT bits = INTVAL (x);
22621 fputs ((bits & 4) != 0 ? "r" : "", stream);
22622 }
22623 return;
22624
22625 /* Memory operand for vld1/vst1 instruction. */
22626 case 'A':
22627 {
22628 rtx addr;
22629 bool postinc = FALSE;
22630 rtx postinc_reg = NULL;
22631 unsigned align, memsize, align_bits;
22632
22633 gcc_assert (MEM_P (x));
22634 addr = XEXP (x, 0);
22635 if (GET_CODE (addr) == POST_INC)
22636 {
22637 postinc = 1;
22638 addr = XEXP (addr, 0);
22639 }
22640 if (GET_CODE (addr) == POST_MODIFY)
22641 {
22642 postinc_reg = XEXP( XEXP (addr, 1), 1);
22643 addr = XEXP (addr, 0);
22644 }
22645 asm_fprintf (stream, "[%r", REGNO (addr));
22646
22647 /* We know the alignment of this access, so we can emit a hint in the
22648 instruction (for some alignments) as an aid to the memory subsystem
22649 of the target. */
22650 align = MEM_ALIGN (x) >> 3;
22651 memsize = MEM_SIZE (x);
22652
22653 /* Only certain alignment specifiers are supported by the hardware. */
22654 if (memsize == 32 && (align % 32) == 0)
22655 align_bits = 256;
22656 else if ((memsize == 16 || memsize == 32) && (align % 16) == 0)
22657 align_bits = 128;
22658 else if (memsize >= 8 && (align % 8) == 0)
22659 align_bits = 64;
22660 else
22661 align_bits = 0;
22662
22663 if (align_bits != 0)
22664 asm_fprintf (stream, ":%d", align_bits);
22665
22666 asm_fprintf (stream, "]");
22667
22668 if (postinc)
22669 fputs("!", stream);
22670 if (postinc_reg)
22671 asm_fprintf (stream, ", %r", REGNO (postinc_reg));
22672 }
22673 return;
22674
22675 case 'C':
22676 {
22677 rtx addr;
22678
22679 gcc_assert (MEM_P (x));
22680 addr = XEXP (x, 0);
22681 gcc_assert (REG_P (addr));
22682 asm_fprintf (stream, "[%r]", REGNO (addr));
22683 }
22684 return;
22685
22686 /* Translate an S register number into a D register number and element index. */
22687 case 'y':
22688 {
22689 machine_mode mode = GET_MODE (x);
22690 int regno;
22691
22692 if (GET_MODE_SIZE (mode) != 4 || !REG_P (x))
22693 {
22694 output_operand_lossage ("invalid operand for code '%c'", code);
22695 return;
22696 }
22697
22698 regno = REGNO (x);
22699 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22700 {
22701 output_operand_lossage ("invalid operand for code '%c'", code);
22702 return;
22703 }
22704
22705 regno = regno - FIRST_VFP_REGNUM;
22706 fprintf (stream, "d%d[%d]", regno / 2, regno % 2);
22707 }
22708 return;
22709
22710 case 'v':
22711 gcc_assert (CONST_DOUBLE_P (x));
22712 int result;
22713 result = vfp3_const_double_for_fract_bits (x);
22714 if (result == 0)
22715 result = vfp3_const_double_for_bits (x);
22716 fprintf (stream, "#%d", result);
22717 return;
22718
22719 /* Register specifier for vld1.16/vst1.16. Translate the S register
22720 number into a D register number and element index. */
22721 case 'z':
22722 {
22723 machine_mode mode = GET_MODE (x);
22724 int regno;
22725
22726 if (GET_MODE_SIZE (mode) != 2 || !REG_P (x))
22727 {
22728 output_operand_lossage ("invalid operand for code '%c'", code);
22729 return;
22730 }
22731
22732 regno = REGNO (x);
22733 if (!VFP_REGNO_OK_FOR_SINGLE (regno))
22734 {
22735 output_operand_lossage ("invalid operand for code '%c'", code);
22736 return;
22737 }
22738
22739 regno = regno - FIRST_VFP_REGNUM;
22740 fprintf (stream, "d%d[%d]", regno/2, ((regno % 2) ? 2 : 0));
22741 }
22742 return;
22743
22744 default:
22745 if (x == 0)
22746 {
22747 output_operand_lossage ("missing operand");
22748 return;
22749 }
22750
22751 switch (GET_CODE (x))
22752 {
22753 case REG:
22754 asm_fprintf (stream, "%r", REGNO (x));
22755 break;
22756
22757 case MEM:
22758 output_address (GET_MODE (x), XEXP (x, 0));
22759 break;
22760
22761 case CONST_DOUBLE:
22762 {
22763 char fpstr[20];
22764 real_to_decimal (fpstr, CONST_DOUBLE_REAL_VALUE (x),
22765 sizeof (fpstr), 0, 1);
22766 fprintf (stream, "#%s", fpstr);
22767 }
22768 break;
22769
22770 default:
22771 gcc_assert (GET_CODE (x) != NEG);
22772 fputc ('#', stream);
22773 if (GET_CODE (x) == HIGH)
22774 {
22775 fputs (":lower16:", stream);
22776 x = XEXP (x, 0);
22777 }
22778
22779 output_addr_const (stream, x);
22780 break;
22781 }
22782 }
22783 }
22784 \f
22785 /* Target hook for printing a memory address. */
22786 static void
22787 arm_print_operand_address (FILE *stream, machine_mode mode, rtx x)
22788 {
22789 if (TARGET_32BIT)
22790 {
22791 int is_minus = GET_CODE (x) == MINUS;
22792
22793 if (REG_P (x))
22794 asm_fprintf (stream, "[%r]", REGNO (x));
22795 else if (GET_CODE (x) == PLUS || is_minus)
22796 {
22797 rtx base = XEXP (x, 0);
22798 rtx index = XEXP (x, 1);
22799 HOST_WIDE_INT offset = 0;
22800 if (!REG_P (base)
22801 || (REG_P (index) && REGNO (index) == SP_REGNUM))
22802 {
22803 /* Ensure that BASE is a register. */
22804 /* (one of them must be). */
22805 /* Also ensure the SP is not used as in index register. */
22806 std::swap (base, index);
22807 }
22808 switch (GET_CODE (index))
22809 {
22810 case CONST_INT:
22811 offset = INTVAL (index);
22812 if (is_minus)
22813 offset = -offset;
22814 asm_fprintf (stream, "[%r, #%wd]",
22815 REGNO (base), offset);
22816 break;
22817
22818 case REG:
22819 asm_fprintf (stream, "[%r, %s%r]",
22820 REGNO (base), is_minus ? "-" : "",
22821 REGNO (index));
22822 break;
22823
22824 case MULT:
22825 case ASHIFTRT:
22826 case LSHIFTRT:
22827 case ASHIFT:
22828 case ROTATERT:
22829 {
22830 asm_fprintf (stream, "[%r, %s%r",
22831 REGNO (base), is_minus ? "-" : "",
22832 REGNO (XEXP (index, 0)));
22833 arm_print_operand (stream, index, 'S');
22834 fputs ("]", stream);
22835 break;
22836 }
22837
22838 default:
22839 gcc_unreachable ();
22840 }
22841 }
22842 else if (GET_CODE (x) == PRE_INC || GET_CODE (x) == POST_INC
22843 || GET_CODE (x) == PRE_DEC || GET_CODE (x) == POST_DEC)
22844 {
22845 gcc_assert (REG_P (XEXP (x, 0)));
22846
22847 if (GET_CODE (x) == PRE_DEC || GET_CODE (x) == PRE_INC)
22848 asm_fprintf (stream, "[%r, #%s%d]!",
22849 REGNO (XEXP (x, 0)),
22850 GET_CODE (x) == PRE_DEC ? "-" : "",
22851 GET_MODE_SIZE (mode));
22852 else
22853 asm_fprintf (stream, "[%r], #%s%d",
22854 REGNO (XEXP (x, 0)),
22855 GET_CODE (x) == POST_DEC ? "-" : "",
22856 GET_MODE_SIZE (mode));
22857 }
22858 else if (GET_CODE (x) == PRE_MODIFY)
22859 {
22860 asm_fprintf (stream, "[%r, ", REGNO (XEXP (x, 0)));
22861 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22862 asm_fprintf (stream, "#%wd]!",
22863 INTVAL (XEXP (XEXP (x, 1), 1)));
22864 else
22865 asm_fprintf (stream, "%r]!",
22866 REGNO (XEXP (XEXP (x, 1), 1)));
22867 }
22868 else if (GET_CODE (x) == POST_MODIFY)
22869 {
22870 asm_fprintf (stream, "[%r], ", REGNO (XEXP (x, 0)));
22871 if (CONST_INT_P (XEXP (XEXP (x, 1), 1)))
22872 asm_fprintf (stream, "#%wd",
22873 INTVAL (XEXP (XEXP (x, 1), 1)));
22874 else
22875 asm_fprintf (stream, "%r",
22876 REGNO (XEXP (XEXP (x, 1), 1)));
22877 }
22878 else output_addr_const (stream, x);
22879 }
22880 else
22881 {
22882 if (REG_P (x))
22883 asm_fprintf (stream, "[%r]", REGNO (x));
22884 else if (GET_CODE (x) == POST_INC)
22885 asm_fprintf (stream, "%r!", REGNO (XEXP (x, 0)));
22886 else if (GET_CODE (x) == PLUS)
22887 {
22888 gcc_assert (REG_P (XEXP (x, 0)));
22889 if (CONST_INT_P (XEXP (x, 1)))
22890 asm_fprintf (stream, "[%r, #%wd]",
22891 REGNO (XEXP (x, 0)),
22892 INTVAL (XEXP (x, 1)));
22893 else
22894 asm_fprintf (stream, "[%r, %r]",
22895 REGNO (XEXP (x, 0)),
22896 REGNO (XEXP (x, 1)));
22897 }
22898 else
22899 output_addr_const (stream, x);
22900 }
22901 }
22902 \f
22903 /* Target hook for indicating whether a punctuation character for
22904 TARGET_PRINT_OPERAND is valid. */
22905 static bool
22906 arm_print_operand_punct_valid_p (unsigned char code)
22907 {
22908 return (code == '@' || code == '|' || code == '.'
22909 || code == '(' || code == ')' || code == '#'
22910 || (TARGET_32BIT && (code == '?'))
22911 || (TARGET_THUMB2 && (code == '!'))
22912 || (TARGET_THUMB && (code == '_')));
22913 }
22914 \f
22915 /* Target hook for assembling integer objects. The ARM version needs to
22916 handle word-sized values specially. */
22917 static bool
22918 arm_assemble_integer (rtx x, unsigned int size, int aligned_p)
22919 {
22920 machine_mode mode;
22921
22922 if (size == UNITS_PER_WORD && aligned_p)
22923 {
22924 fputs ("\t.word\t", asm_out_file);
22925 output_addr_const (asm_out_file, x);
22926
22927 /* Mark symbols as position independent. We only do this in the
22928 .text segment, not in the .data segment. */
22929 if (NEED_GOT_RELOC && flag_pic && making_const_table &&
22930 (GET_CODE (x) == SYMBOL_REF || GET_CODE (x) == LABEL_REF))
22931 {
22932 /* See legitimize_pic_address for an explanation of the
22933 TARGET_VXWORKS_RTP check. */
22934 /* References to weak symbols cannot be resolved locally:
22935 they may be overridden by a non-weak definition at link
22936 time. */
22937 if (!arm_pic_data_is_text_relative
22938 || (GET_CODE (x) == SYMBOL_REF
22939 && (!SYMBOL_REF_LOCAL_P (x)
22940 || (SYMBOL_REF_DECL (x)
22941 ? DECL_WEAK (SYMBOL_REF_DECL (x)) : 0))))
22942 fputs ("(GOT)", asm_out_file);
22943 else
22944 fputs ("(GOTOFF)", asm_out_file);
22945 }
22946 fputc ('\n', asm_out_file);
22947 return true;
22948 }
22949
22950 mode = GET_MODE (x);
22951
22952 if (arm_vector_mode_supported_p (mode))
22953 {
22954 int i, units;
22955
22956 gcc_assert (GET_CODE (x) == CONST_VECTOR);
22957
22958 units = CONST_VECTOR_NUNITS (x);
22959 size = GET_MODE_UNIT_SIZE (mode);
22960
22961 if (GET_MODE_CLASS (mode) == MODE_VECTOR_INT)
22962 for (i = 0; i < units; i++)
22963 {
22964 rtx elt = CONST_VECTOR_ELT (x, i);
22965 assemble_integer
22966 (elt, size, i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT, 1);
22967 }
22968 else
22969 for (i = 0; i < units; i++)
22970 {
22971 rtx elt = CONST_VECTOR_ELT (x, i);
22972 assemble_real
22973 (*CONST_DOUBLE_REAL_VALUE (elt),
22974 as_a <scalar_float_mode> (GET_MODE_INNER (mode)),
22975 i == 0 ? BIGGEST_ALIGNMENT : size * BITS_PER_UNIT);
22976 }
22977
22978 return true;
22979 }
22980
22981 return default_assemble_integer (x, size, aligned_p);
22982 }
22983
22984 static void
22985 arm_elf_asm_cdtor (rtx symbol, int priority, bool is_ctor)
22986 {
22987 section *s;
22988
22989 if (!TARGET_AAPCS_BASED)
22990 {
22991 (is_ctor ?
22992 default_named_section_asm_out_constructor
22993 : default_named_section_asm_out_destructor) (symbol, priority);
22994 return;
22995 }
22996
22997 /* Put these in the .init_array section, using a special relocation. */
22998 if (priority != DEFAULT_INIT_PRIORITY)
22999 {
23000 char buf[18];
23001 sprintf (buf, "%s.%.5u",
23002 is_ctor ? ".init_array" : ".fini_array",
23003 priority);
23004 s = get_section (buf, SECTION_WRITE | SECTION_NOTYPE, NULL_TREE);
23005 }
23006 else if (is_ctor)
23007 s = ctors_section;
23008 else
23009 s = dtors_section;
23010
23011 switch_to_section (s);
23012 assemble_align (POINTER_SIZE);
23013 fputs ("\t.word\t", asm_out_file);
23014 output_addr_const (asm_out_file, symbol);
23015 fputs ("(target1)\n", asm_out_file);
23016 }
23017
23018 /* Add a function to the list of static constructors. */
23019
23020 static void
23021 arm_elf_asm_constructor (rtx symbol, int priority)
23022 {
23023 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/true);
23024 }
23025
23026 /* Add a function to the list of static destructors. */
23027
23028 static void
23029 arm_elf_asm_destructor (rtx symbol, int priority)
23030 {
23031 arm_elf_asm_cdtor (symbol, priority, /*is_ctor=*/false);
23032 }
23033 \f
23034 /* A finite state machine takes care of noticing whether or not instructions
23035 can be conditionally executed, and thus decrease execution time and code
23036 size by deleting branch instructions. The fsm is controlled by
23037 final_prescan_insn, and controls the actions of ASM_OUTPUT_OPCODE. */
23038
23039 /* The state of the fsm controlling condition codes are:
23040 0: normal, do nothing special
23041 1: make ASM_OUTPUT_OPCODE not output this instruction
23042 2: make ASM_OUTPUT_OPCODE not output this instruction
23043 3: make instructions conditional
23044 4: make instructions conditional
23045
23046 State transitions (state->state by whom under condition):
23047 0 -> 1 final_prescan_insn if the `target' is a label
23048 0 -> 2 final_prescan_insn if the `target' is an unconditional branch
23049 1 -> 3 ASM_OUTPUT_OPCODE after not having output the conditional branch
23050 2 -> 4 ASM_OUTPUT_OPCODE after not having output the conditional branch
23051 3 -> 0 (*targetm.asm_out.internal_label) if the `target' label is reached
23052 (the target label has CODE_LABEL_NUMBER equal to arm_target_label).
23053 4 -> 0 final_prescan_insn if the `target' unconditional branch is reached
23054 (the target insn is arm_target_insn).
23055
23056 If the jump clobbers the conditions then we use states 2 and 4.
23057
23058 A similar thing can be done with conditional return insns.
23059
23060 XXX In case the `target' is an unconditional branch, this conditionalising
23061 of the instructions always reduces code size, but not always execution
23062 time. But then, I want to reduce the code size to somewhere near what
23063 /bin/cc produces. */
23064
23065 /* In addition to this, state is maintained for Thumb-2 COND_EXEC
23066 instructions. When a COND_EXEC instruction is seen the subsequent
23067 instructions are scanned so that multiple conditional instructions can be
23068 combined into a single IT block. arm_condexec_count and arm_condexec_mask
23069 specify the length and true/false mask for the IT block. These will be
23070 decremented/zeroed by arm_asm_output_opcode as the insns are output. */
23071
23072 /* Returns the index of the ARM condition code string in
23073 `arm_condition_codes', or ARM_NV if the comparison is invalid.
23074 COMPARISON should be an rtx like `(eq (...) (...))'. */
23075
23076 enum arm_cond_code
23077 maybe_get_arm_condition_code (rtx comparison)
23078 {
23079 machine_mode mode = GET_MODE (XEXP (comparison, 0));
23080 enum arm_cond_code code;
23081 enum rtx_code comp_code = GET_CODE (comparison);
23082
23083 if (GET_MODE_CLASS (mode) != MODE_CC)
23084 mode = SELECT_CC_MODE (comp_code, XEXP (comparison, 0),
23085 XEXP (comparison, 1));
23086
23087 switch (mode)
23088 {
23089 case E_CC_DNEmode: code = ARM_NE; goto dominance;
23090 case E_CC_DEQmode: code = ARM_EQ; goto dominance;
23091 case E_CC_DGEmode: code = ARM_GE; goto dominance;
23092 case E_CC_DGTmode: code = ARM_GT; goto dominance;
23093 case E_CC_DLEmode: code = ARM_LE; goto dominance;
23094 case E_CC_DLTmode: code = ARM_LT; goto dominance;
23095 case E_CC_DGEUmode: code = ARM_CS; goto dominance;
23096 case E_CC_DGTUmode: code = ARM_HI; goto dominance;
23097 case E_CC_DLEUmode: code = ARM_LS; goto dominance;
23098 case E_CC_DLTUmode: code = ARM_CC;
23099
23100 dominance:
23101 if (comp_code == EQ)
23102 return ARM_INVERSE_CONDITION_CODE (code);
23103 if (comp_code == NE)
23104 return code;
23105 return ARM_NV;
23106
23107 case E_CC_NOOVmode:
23108 switch (comp_code)
23109 {
23110 case NE: return ARM_NE;
23111 case EQ: return ARM_EQ;
23112 case GE: return ARM_PL;
23113 case LT: return ARM_MI;
23114 default: return ARM_NV;
23115 }
23116
23117 case E_CC_Zmode:
23118 switch (comp_code)
23119 {
23120 case NE: return ARM_NE;
23121 case EQ: return ARM_EQ;
23122 default: return ARM_NV;
23123 }
23124
23125 case E_CC_Nmode:
23126 switch (comp_code)
23127 {
23128 case NE: return ARM_MI;
23129 case EQ: return ARM_PL;
23130 default: return ARM_NV;
23131 }
23132
23133 case E_CCFPEmode:
23134 case E_CCFPmode:
23135 /* We can handle all cases except UNEQ and LTGT. */
23136 switch (comp_code)
23137 {
23138 case GE: return ARM_GE;
23139 case GT: return ARM_GT;
23140 case LE: return ARM_LS;
23141 case LT: return ARM_MI;
23142 case NE: return ARM_NE;
23143 case EQ: return ARM_EQ;
23144 case ORDERED: return ARM_VC;
23145 case UNORDERED: return ARM_VS;
23146 case UNLT: return ARM_LT;
23147 case UNLE: return ARM_LE;
23148 case UNGT: return ARM_HI;
23149 case UNGE: return ARM_PL;
23150 /* UNEQ and LTGT do not have a representation. */
23151 case UNEQ: /* Fall through. */
23152 case LTGT: /* Fall through. */
23153 default: return ARM_NV;
23154 }
23155
23156 case E_CC_SWPmode:
23157 switch (comp_code)
23158 {
23159 case NE: return ARM_NE;
23160 case EQ: return ARM_EQ;
23161 case GE: return ARM_LE;
23162 case GT: return ARM_LT;
23163 case LE: return ARM_GE;
23164 case LT: return ARM_GT;
23165 case GEU: return ARM_LS;
23166 case GTU: return ARM_CC;
23167 case LEU: return ARM_CS;
23168 case LTU: return ARM_HI;
23169 default: return ARM_NV;
23170 }
23171
23172 case E_CC_Cmode:
23173 switch (comp_code)
23174 {
23175 case LTU: return ARM_CS;
23176 case GEU: return ARM_CC;
23177 case NE: return ARM_CS;
23178 case EQ: return ARM_CC;
23179 default: return ARM_NV;
23180 }
23181
23182 case E_CC_CZmode:
23183 switch (comp_code)
23184 {
23185 case NE: return ARM_NE;
23186 case EQ: return ARM_EQ;
23187 case GEU: return ARM_CS;
23188 case GTU: return ARM_HI;
23189 case LEU: return ARM_LS;
23190 case LTU: return ARM_CC;
23191 default: return ARM_NV;
23192 }
23193
23194 case E_CC_NCVmode:
23195 switch (comp_code)
23196 {
23197 case GE: return ARM_GE;
23198 case LT: return ARM_LT;
23199 case GEU: return ARM_CS;
23200 case LTU: return ARM_CC;
23201 default: return ARM_NV;
23202 }
23203
23204 case E_CC_Vmode:
23205 switch (comp_code)
23206 {
23207 case NE: return ARM_VS;
23208 case EQ: return ARM_VC;
23209 default: return ARM_NV;
23210 }
23211
23212 case E_CCmode:
23213 switch (comp_code)
23214 {
23215 case NE: return ARM_NE;
23216 case EQ: return ARM_EQ;
23217 case GE: return ARM_GE;
23218 case GT: return ARM_GT;
23219 case LE: return ARM_LE;
23220 case LT: return ARM_LT;
23221 case GEU: return ARM_CS;
23222 case GTU: return ARM_HI;
23223 case LEU: return ARM_LS;
23224 case LTU: return ARM_CC;
23225 default: return ARM_NV;
23226 }
23227
23228 default: gcc_unreachable ();
23229 }
23230 }
23231
23232 /* Like maybe_get_arm_condition_code, but never return ARM_NV. */
23233 static enum arm_cond_code
23234 get_arm_condition_code (rtx comparison)
23235 {
23236 enum arm_cond_code code = maybe_get_arm_condition_code (comparison);
23237 gcc_assert (code != ARM_NV);
23238 return code;
23239 }
23240
23241 /* Implement TARGET_FIXED_CONDITION_CODE_REGS. We only have condition
23242 code registers when not targetting Thumb1. The VFP condition register
23243 only exists when generating hard-float code. */
23244 static bool
23245 arm_fixed_condition_code_regs (unsigned int *p1, unsigned int *p2)
23246 {
23247 if (!TARGET_32BIT)
23248 return false;
23249
23250 *p1 = CC_REGNUM;
23251 *p2 = TARGET_HARD_FLOAT ? VFPCC_REGNUM : INVALID_REGNUM;
23252 return true;
23253 }
23254
23255 /* Tell arm_asm_output_opcode to output IT blocks for conditionally executed
23256 instructions. */
23257 void
23258 thumb2_final_prescan_insn (rtx_insn *insn)
23259 {
23260 rtx_insn *first_insn = insn;
23261 rtx body = PATTERN (insn);
23262 rtx predicate;
23263 enum arm_cond_code code;
23264 int n;
23265 int mask;
23266 int max;
23267
23268 /* max_insns_skipped in the tune was already taken into account in the
23269 cost model of ifcvt pass when generating COND_EXEC insns. At this stage
23270 just emit the IT blocks as we can. It does not make sense to split
23271 the IT blocks. */
23272 max = MAX_INSN_PER_IT_BLOCK;
23273
23274 /* Remove the previous insn from the count of insns to be output. */
23275 if (arm_condexec_count)
23276 arm_condexec_count--;
23277
23278 /* Nothing to do if we are already inside a conditional block. */
23279 if (arm_condexec_count)
23280 return;
23281
23282 if (GET_CODE (body) != COND_EXEC)
23283 return;
23284
23285 /* Conditional jumps are implemented directly. */
23286 if (JUMP_P (insn))
23287 return;
23288
23289 predicate = COND_EXEC_TEST (body);
23290 arm_current_cc = get_arm_condition_code (predicate);
23291
23292 n = get_attr_ce_count (insn);
23293 arm_condexec_count = 1;
23294 arm_condexec_mask = (1 << n) - 1;
23295 arm_condexec_masklen = n;
23296 /* See if subsequent instructions can be combined into the same block. */
23297 for (;;)
23298 {
23299 insn = next_nonnote_insn (insn);
23300
23301 /* Jumping into the middle of an IT block is illegal, so a label or
23302 barrier terminates the block. */
23303 if (!NONJUMP_INSN_P (insn) && !JUMP_P (insn))
23304 break;
23305
23306 body = PATTERN (insn);
23307 /* USE and CLOBBER aren't really insns, so just skip them. */
23308 if (GET_CODE (body) == USE
23309 || GET_CODE (body) == CLOBBER)
23310 continue;
23311
23312 /* ??? Recognize conditional jumps, and combine them with IT blocks. */
23313 if (GET_CODE (body) != COND_EXEC)
23314 break;
23315 /* Maximum number of conditionally executed instructions in a block. */
23316 n = get_attr_ce_count (insn);
23317 if (arm_condexec_masklen + n > max)
23318 break;
23319
23320 predicate = COND_EXEC_TEST (body);
23321 code = get_arm_condition_code (predicate);
23322 mask = (1 << n) - 1;
23323 if (arm_current_cc == code)
23324 arm_condexec_mask |= (mask << arm_condexec_masklen);
23325 else if (arm_current_cc != ARM_INVERSE_CONDITION_CODE(code))
23326 break;
23327
23328 arm_condexec_count++;
23329 arm_condexec_masklen += n;
23330
23331 /* A jump must be the last instruction in a conditional block. */
23332 if (JUMP_P (insn))
23333 break;
23334 }
23335 /* Restore recog_data (getting the attributes of other insns can
23336 destroy this array, but final.c assumes that it remains intact
23337 across this call). */
23338 extract_constrain_insn_cached (first_insn);
23339 }
23340
23341 void
23342 arm_final_prescan_insn (rtx_insn *insn)
23343 {
23344 /* BODY will hold the body of INSN. */
23345 rtx body = PATTERN (insn);
23346
23347 /* This will be 1 if trying to repeat the trick, and things need to be
23348 reversed if it appears to fail. */
23349 int reverse = 0;
23350
23351 /* If we start with a return insn, we only succeed if we find another one. */
23352 int seeking_return = 0;
23353 enum rtx_code return_code = UNKNOWN;
23354
23355 /* START_INSN will hold the insn from where we start looking. This is the
23356 first insn after the following code_label if REVERSE is true. */
23357 rtx_insn *start_insn = insn;
23358
23359 /* If in state 4, check if the target branch is reached, in order to
23360 change back to state 0. */
23361 if (arm_ccfsm_state == 4)
23362 {
23363 if (insn == arm_target_insn)
23364 {
23365 arm_target_insn = NULL;
23366 arm_ccfsm_state = 0;
23367 }
23368 return;
23369 }
23370
23371 /* If in state 3, it is possible to repeat the trick, if this insn is an
23372 unconditional branch to a label, and immediately following this branch
23373 is the previous target label which is only used once, and the label this
23374 branch jumps to is not too far off. */
23375 if (arm_ccfsm_state == 3)
23376 {
23377 if (simplejump_p (insn))
23378 {
23379 start_insn = next_nonnote_insn (start_insn);
23380 if (BARRIER_P (start_insn))
23381 {
23382 /* XXX Isn't this always a barrier? */
23383 start_insn = next_nonnote_insn (start_insn);
23384 }
23385 if (LABEL_P (start_insn)
23386 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23387 && LABEL_NUSES (start_insn) == 1)
23388 reverse = TRUE;
23389 else
23390 return;
23391 }
23392 else if (ANY_RETURN_P (body))
23393 {
23394 start_insn = next_nonnote_insn (start_insn);
23395 if (BARRIER_P (start_insn))
23396 start_insn = next_nonnote_insn (start_insn);
23397 if (LABEL_P (start_insn)
23398 && CODE_LABEL_NUMBER (start_insn) == arm_target_label
23399 && LABEL_NUSES (start_insn) == 1)
23400 {
23401 reverse = TRUE;
23402 seeking_return = 1;
23403 return_code = GET_CODE (body);
23404 }
23405 else
23406 return;
23407 }
23408 else
23409 return;
23410 }
23411
23412 gcc_assert (!arm_ccfsm_state || reverse);
23413 if (!JUMP_P (insn))
23414 return;
23415
23416 /* This jump might be paralleled with a clobber of the condition codes
23417 the jump should always come first */
23418 if (GET_CODE (body) == PARALLEL && XVECLEN (body, 0) > 0)
23419 body = XVECEXP (body, 0, 0);
23420
23421 if (reverse
23422 || (GET_CODE (body) == SET && GET_CODE (SET_DEST (body)) == PC
23423 && GET_CODE (SET_SRC (body)) == IF_THEN_ELSE))
23424 {
23425 int insns_skipped;
23426 int fail = FALSE, succeed = FALSE;
23427 /* Flag which part of the IF_THEN_ELSE is the LABEL_REF. */
23428 int then_not_else = TRUE;
23429 rtx_insn *this_insn = start_insn;
23430 rtx label = 0;
23431
23432 /* Register the insn jumped to. */
23433 if (reverse)
23434 {
23435 if (!seeking_return)
23436 label = XEXP (SET_SRC (body), 0);
23437 }
23438 else if (GET_CODE (XEXP (SET_SRC (body), 1)) == LABEL_REF)
23439 label = XEXP (XEXP (SET_SRC (body), 1), 0);
23440 else if (GET_CODE (XEXP (SET_SRC (body), 2)) == LABEL_REF)
23441 {
23442 label = XEXP (XEXP (SET_SRC (body), 2), 0);
23443 then_not_else = FALSE;
23444 }
23445 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 1)))
23446 {
23447 seeking_return = 1;
23448 return_code = GET_CODE (XEXP (SET_SRC (body), 1));
23449 }
23450 else if (ANY_RETURN_P (XEXP (SET_SRC (body), 2)))
23451 {
23452 seeking_return = 1;
23453 return_code = GET_CODE (XEXP (SET_SRC (body), 2));
23454 then_not_else = FALSE;
23455 }
23456 else
23457 gcc_unreachable ();
23458
23459 /* See how many insns this branch skips, and what kind of insns. If all
23460 insns are okay, and the label or unconditional branch to the same
23461 label is not too far away, succeed. */
23462 for (insns_skipped = 0;
23463 !fail && !succeed && insns_skipped++ < max_insns_skipped;)
23464 {
23465 rtx scanbody;
23466
23467 this_insn = next_nonnote_insn (this_insn);
23468 if (!this_insn)
23469 break;
23470
23471 switch (GET_CODE (this_insn))
23472 {
23473 case CODE_LABEL:
23474 /* Succeed if it is the target label, otherwise fail since
23475 control falls in from somewhere else. */
23476 if (this_insn == label)
23477 {
23478 arm_ccfsm_state = 1;
23479 succeed = TRUE;
23480 }
23481 else
23482 fail = TRUE;
23483 break;
23484
23485 case BARRIER:
23486 /* Succeed if the following insn is the target label.
23487 Otherwise fail.
23488 If return insns are used then the last insn in a function
23489 will be a barrier. */
23490 this_insn = next_nonnote_insn (this_insn);
23491 if (this_insn && this_insn == label)
23492 {
23493 arm_ccfsm_state = 1;
23494 succeed = TRUE;
23495 }
23496 else
23497 fail = TRUE;
23498 break;
23499
23500 case CALL_INSN:
23501 /* The AAPCS says that conditional calls should not be
23502 used since they make interworking inefficient (the
23503 linker can't transform BL<cond> into BLX). That's
23504 only a problem if the machine has BLX. */
23505 if (arm_arch5t)
23506 {
23507 fail = TRUE;
23508 break;
23509 }
23510
23511 /* Succeed if the following insn is the target label, or
23512 if the following two insns are a barrier and the
23513 target label. */
23514 this_insn = next_nonnote_insn (this_insn);
23515 if (this_insn && BARRIER_P (this_insn))
23516 this_insn = next_nonnote_insn (this_insn);
23517
23518 if (this_insn && this_insn == label
23519 && insns_skipped < max_insns_skipped)
23520 {
23521 arm_ccfsm_state = 1;
23522 succeed = TRUE;
23523 }
23524 else
23525 fail = TRUE;
23526 break;
23527
23528 case JUMP_INSN:
23529 /* If this is an unconditional branch to the same label, succeed.
23530 If it is to another label, do nothing. If it is conditional,
23531 fail. */
23532 /* XXX Probably, the tests for SET and the PC are
23533 unnecessary. */
23534
23535 scanbody = PATTERN (this_insn);
23536 if (GET_CODE (scanbody) == SET
23537 && GET_CODE (SET_DEST (scanbody)) == PC)
23538 {
23539 if (GET_CODE (SET_SRC (scanbody)) == LABEL_REF
23540 && XEXP (SET_SRC (scanbody), 0) == label && !reverse)
23541 {
23542 arm_ccfsm_state = 2;
23543 succeed = TRUE;
23544 }
23545 else if (GET_CODE (SET_SRC (scanbody)) == IF_THEN_ELSE)
23546 fail = TRUE;
23547 }
23548 /* Fail if a conditional return is undesirable (e.g. on a
23549 StrongARM), but still allow this if optimizing for size. */
23550 else if (GET_CODE (scanbody) == return_code
23551 && !use_return_insn (TRUE, NULL)
23552 && !optimize_size)
23553 fail = TRUE;
23554 else if (GET_CODE (scanbody) == return_code)
23555 {
23556 arm_ccfsm_state = 2;
23557 succeed = TRUE;
23558 }
23559 else if (GET_CODE (scanbody) == PARALLEL)
23560 {
23561 switch (get_attr_conds (this_insn))
23562 {
23563 case CONDS_NOCOND:
23564 break;
23565 default:
23566 fail = TRUE;
23567 break;
23568 }
23569 }
23570 else
23571 fail = TRUE; /* Unrecognized jump (e.g. epilogue). */
23572
23573 break;
23574
23575 case INSN:
23576 /* Instructions using or affecting the condition codes make it
23577 fail. */
23578 scanbody = PATTERN (this_insn);
23579 if (!(GET_CODE (scanbody) == SET
23580 || GET_CODE (scanbody) == PARALLEL)
23581 || get_attr_conds (this_insn) != CONDS_NOCOND)
23582 fail = TRUE;
23583 break;
23584
23585 default:
23586 break;
23587 }
23588 }
23589 if (succeed)
23590 {
23591 if ((!seeking_return) && (arm_ccfsm_state == 1 || reverse))
23592 arm_target_label = CODE_LABEL_NUMBER (label);
23593 else
23594 {
23595 gcc_assert (seeking_return || arm_ccfsm_state == 2);
23596
23597 while (this_insn && GET_CODE (PATTERN (this_insn)) == USE)
23598 {
23599 this_insn = next_nonnote_insn (this_insn);
23600 gcc_assert (!this_insn
23601 || (!BARRIER_P (this_insn)
23602 && !LABEL_P (this_insn)));
23603 }
23604 if (!this_insn)
23605 {
23606 /* Oh, dear! we ran off the end.. give up. */
23607 extract_constrain_insn_cached (insn);
23608 arm_ccfsm_state = 0;
23609 arm_target_insn = NULL;
23610 return;
23611 }
23612 arm_target_insn = this_insn;
23613 }
23614
23615 /* If REVERSE is true, ARM_CURRENT_CC needs to be inverted from
23616 what it was. */
23617 if (!reverse)
23618 arm_current_cc = get_arm_condition_code (XEXP (SET_SRC (body), 0));
23619
23620 if (reverse || then_not_else)
23621 arm_current_cc = ARM_INVERSE_CONDITION_CODE (arm_current_cc);
23622 }
23623
23624 /* Restore recog_data (getting the attributes of other insns can
23625 destroy this array, but final.c assumes that it remains intact
23626 across this call. */
23627 extract_constrain_insn_cached (insn);
23628 }
23629 }
23630
23631 /* Output IT instructions. */
23632 void
23633 thumb2_asm_output_opcode (FILE * stream)
23634 {
23635 char buff[5];
23636 int n;
23637
23638 if (arm_condexec_mask)
23639 {
23640 for (n = 0; n < arm_condexec_masklen; n++)
23641 buff[n] = (arm_condexec_mask & (1 << n)) ? 't' : 'e';
23642 buff[n] = 0;
23643 asm_fprintf(stream, "i%s\t%s\n\t", buff,
23644 arm_condition_codes[arm_current_cc]);
23645 arm_condexec_mask = 0;
23646 }
23647 }
23648
23649 /* Implement TARGET_HARD_REGNO_NREGS. On the ARM core regs are
23650 UNITS_PER_WORD bytes wide. */
23651 static unsigned int
23652 arm_hard_regno_nregs (unsigned int regno, machine_mode mode)
23653 {
23654 if (TARGET_32BIT
23655 && regno > PC_REGNUM
23656 && regno != FRAME_POINTER_REGNUM
23657 && regno != ARG_POINTER_REGNUM
23658 && !IS_VFP_REGNUM (regno))
23659 return 1;
23660
23661 return ARM_NUM_REGS (mode);
23662 }
23663
23664 /* Implement TARGET_HARD_REGNO_MODE_OK. */
23665 static bool
23666 arm_hard_regno_mode_ok (unsigned int regno, machine_mode mode)
23667 {
23668 if (GET_MODE_CLASS (mode) == MODE_CC)
23669 return (regno == CC_REGNUM
23670 || (TARGET_HARD_FLOAT
23671 && regno == VFPCC_REGNUM));
23672
23673 if (regno == CC_REGNUM && GET_MODE_CLASS (mode) != MODE_CC)
23674 return false;
23675
23676 if (TARGET_THUMB1)
23677 /* For the Thumb we only allow values bigger than SImode in
23678 registers 0 - 6, so that there is always a second low
23679 register available to hold the upper part of the value.
23680 We probably we ought to ensure that the register is the
23681 start of an even numbered register pair. */
23682 return (ARM_NUM_REGS (mode) < 2) || (regno < LAST_LO_REGNUM);
23683
23684 if (TARGET_HARD_FLOAT && IS_VFP_REGNUM (regno))
23685 {
23686 if (mode == SFmode || mode == SImode)
23687 return VFP_REGNO_OK_FOR_SINGLE (regno);
23688
23689 if (mode == DFmode)
23690 return VFP_REGNO_OK_FOR_DOUBLE (regno);
23691
23692 if (mode == HFmode)
23693 return VFP_REGNO_OK_FOR_SINGLE (regno);
23694
23695 /* VFP registers can hold HImode values. */
23696 if (mode == HImode)
23697 return VFP_REGNO_OK_FOR_SINGLE (regno);
23698
23699 if (TARGET_NEON)
23700 return (VALID_NEON_DREG_MODE (mode) && VFP_REGNO_OK_FOR_DOUBLE (regno))
23701 || (VALID_NEON_QREG_MODE (mode)
23702 && NEON_REGNO_OK_FOR_QUAD (regno))
23703 || (mode == TImode && NEON_REGNO_OK_FOR_NREGS (regno, 2))
23704 || (mode == EImode && NEON_REGNO_OK_FOR_NREGS (regno, 3))
23705 || (mode == OImode && NEON_REGNO_OK_FOR_NREGS (regno, 4))
23706 || (mode == CImode && NEON_REGNO_OK_FOR_NREGS (regno, 6))
23707 || (mode == XImode && NEON_REGNO_OK_FOR_NREGS (regno, 8));
23708
23709 return false;
23710 }
23711
23712 if (TARGET_REALLY_IWMMXT)
23713 {
23714 if (IS_IWMMXT_GR_REGNUM (regno))
23715 return mode == SImode;
23716
23717 if (IS_IWMMXT_REGNUM (regno))
23718 return VALID_IWMMXT_REG_MODE (mode);
23719 }
23720
23721 /* We allow almost any value to be stored in the general registers.
23722 Restrict doubleword quantities to even register pairs in ARM state
23723 so that we can use ldrd. Do not allow very large Neon structure
23724 opaque modes in general registers; they would use too many. */
23725 if (regno <= LAST_ARM_REGNUM)
23726 {
23727 if (ARM_NUM_REGS (mode) > 4)
23728 return false;
23729
23730 if (TARGET_THUMB2)
23731 return true;
23732
23733 return !(TARGET_LDRD && GET_MODE_SIZE (mode) > 4 && (regno & 1) != 0);
23734 }
23735
23736 if (regno == FRAME_POINTER_REGNUM
23737 || regno == ARG_POINTER_REGNUM)
23738 /* We only allow integers in the fake hard registers. */
23739 return GET_MODE_CLASS (mode) == MODE_INT;
23740
23741 return false;
23742 }
23743
23744 /* Implement TARGET_MODES_TIEABLE_P. */
23745
23746 static bool
23747 arm_modes_tieable_p (machine_mode mode1, machine_mode mode2)
23748 {
23749 if (GET_MODE_CLASS (mode1) == GET_MODE_CLASS (mode2))
23750 return true;
23751
23752 /* We specifically want to allow elements of "structure" modes to
23753 be tieable to the structure. This more general condition allows
23754 other rarer situations too. */
23755 if (TARGET_NEON
23756 && (VALID_NEON_DREG_MODE (mode1)
23757 || VALID_NEON_QREG_MODE (mode1)
23758 || VALID_NEON_STRUCT_MODE (mode1))
23759 && (VALID_NEON_DREG_MODE (mode2)
23760 || VALID_NEON_QREG_MODE (mode2)
23761 || VALID_NEON_STRUCT_MODE (mode2)))
23762 return true;
23763
23764 return false;
23765 }
23766
23767 /* For efficiency and historical reasons LO_REGS, HI_REGS and CC_REGS are
23768 not used in arm mode. */
23769
23770 enum reg_class
23771 arm_regno_class (int regno)
23772 {
23773 if (regno == PC_REGNUM)
23774 return NO_REGS;
23775
23776 if (TARGET_THUMB1)
23777 {
23778 if (regno == STACK_POINTER_REGNUM)
23779 return STACK_REG;
23780 if (regno == CC_REGNUM)
23781 return CC_REG;
23782 if (regno < 8)
23783 return LO_REGS;
23784 return HI_REGS;
23785 }
23786
23787 if (TARGET_THUMB2 && regno < 8)
23788 return LO_REGS;
23789
23790 if ( regno <= LAST_ARM_REGNUM
23791 || regno == FRAME_POINTER_REGNUM
23792 || regno == ARG_POINTER_REGNUM)
23793 return TARGET_THUMB2 ? HI_REGS : GENERAL_REGS;
23794
23795 if (regno == CC_REGNUM || regno == VFPCC_REGNUM)
23796 return TARGET_THUMB2 ? CC_REG : NO_REGS;
23797
23798 if (IS_VFP_REGNUM (regno))
23799 {
23800 if (regno <= D7_VFP_REGNUM)
23801 return VFP_D0_D7_REGS;
23802 else if (regno <= LAST_LO_VFP_REGNUM)
23803 return VFP_LO_REGS;
23804 else
23805 return VFP_HI_REGS;
23806 }
23807
23808 if (IS_IWMMXT_REGNUM (regno))
23809 return IWMMXT_REGS;
23810
23811 if (IS_IWMMXT_GR_REGNUM (regno))
23812 return IWMMXT_GR_REGS;
23813
23814 return NO_REGS;
23815 }
23816
23817 /* Handle a special case when computing the offset
23818 of an argument from the frame pointer. */
23819 int
23820 arm_debugger_arg_offset (int value, rtx addr)
23821 {
23822 rtx_insn *insn;
23823
23824 /* We are only interested if dbxout_parms() failed to compute the offset. */
23825 if (value != 0)
23826 return 0;
23827
23828 /* We can only cope with the case where the address is held in a register. */
23829 if (!REG_P (addr))
23830 return 0;
23831
23832 /* If we are using the frame pointer to point at the argument, then
23833 an offset of 0 is correct. */
23834 if (REGNO (addr) == (unsigned) HARD_FRAME_POINTER_REGNUM)
23835 return 0;
23836
23837 /* If we are using the stack pointer to point at the
23838 argument, then an offset of 0 is correct. */
23839 /* ??? Check this is consistent with thumb2 frame layout. */
23840 if ((TARGET_THUMB || !frame_pointer_needed)
23841 && REGNO (addr) == SP_REGNUM)
23842 return 0;
23843
23844 /* Oh dear. The argument is pointed to by a register rather
23845 than being held in a register, or being stored at a known
23846 offset from the frame pointer. Since GDB only understands
23847 those two kinds of argument we must translate the address
23848 held in the register into an offset from the frame pointer.
23849 We do this by searching through the insns for the function
23850 looking to see where this register gets its value. If the
23851 register is initialized from the frame pointer plus an offset
23852 then we are in luck and we can continue, otherwise we give up.
23853
23854 This code is exercised by producing debugging information
23855 for a function with arguments like this:
23856
23857 double func (double a, double b, int c, double d) {return d;}
23858
23859 Without this code the stab for parameter 'd' will be set to
23860 an offset of 0 from the frame pointer, rather than 8. */
23861
23862 /* The if() statement says:
23863
23864 If the insn is a normal instruction
23865 and if the insn is setting the value in a register
23866 and if the register being set is the register holding the address of the argument
23867 and if the address is computing by an addition
23868 that involves adding to a register
23869 which is the frame pointer
23870 a constant integer
23871
23872 then... */
23873
23874 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
23875 {
23876 if ( NONJUMP_INSN_P (insn)
23877 && GET_CODE (PATTERN (insn)) == SET
23878 && REGNO (XEXP (PATTERN (insn), 0)) == REGNO (addr)
23879 && GET_CODE (XEXP (PATTERN (insn), 1)) == PLUS
23880 && REG_P (XEXP (XEXP (PATTERN (insn), 1), 0))
23881 && REGNO (XEXP (XEXP (PATTERN (insn), 1), 0)) == (unsigned) HARD_FRAME_POINTER_REGNUM
23882 && CONST_INT_P (XEXP (XEXP (PATTERN (insn), 1), 1))
23883 )
23884 {
23885 value = INTVAL (XEXP (XEXP (PATTERN (insn), 1), 1));
23886
23887 break;
23888 }
23889 }
23890
23891 if (value == 0)
23892 {
23893 debug_rtx (addr);
23894 warning (0, "unable to compute real location of stacked parameter");
23895 value = 8; /* XXX magic hack */
23896 }
23897
23898 return value;
23899 }
23900 \f
23901 /* Implement TARGET_PROMOTED_TYPE. */
23902
23903 static tree
23904 arm_promoted_type (const_tree t)
23905 {
23906 if (SCALAR_FLOAT_TYPE_P (t)
23907 && TYPE_PRECISION (t) == 16
23908 && TYPE_MAIN_VARIANT (t) == arm_fp16_type_node)
23909 return float_type_node;
23910 return NULL_TREE;
23911 }
23912
23913 /* Implement TARGET_SCALAR_MODE_SUPPORTED_P.
23914 This simply adds HFmode as a supported mode; even though we don't
23915 implement arithmetic on this type directly, it's supported by
23916 optabs conversions, much the way the double-word arithmetic is
23917 special-cased in the default hook. */
23918
23919 static bool
23920 arm_scalar_mode_supported_p (scalar_mode mode)
23921 {
23922 if (mode == HFmode)
23923 return (arm_fp16_format != ARM_FP16_FORMAT_NONE);
23924 else if (ALL_FIXED_POINT_MODE_P (mode))
23925 return true;
23926 else
23927 return default_scalar_mode_supported_p (mode);
23928 }
23929
23930 /* Set the value of FLT_EVAL_METHOD.
23931 ISO/IEC TS 18661-3 defines two values that we'd like to make use of:
23932
23933 0: evaluate all operations and constants, whose semantic type has at
23934 most the range and precision of type float, to the range and
23935 precision of float; evaluate all other operations and constants to
23936 the range and precision of the semantic type;
23937
23938 N, where _FloatN is a supported interchange floating type
23939 evaluate all operations and constants, whose semantic type has at
23940 most the range and precision of _FloatN type, to the range and
23941 precision of the _FloatN type; evaluate all other operations and
23942 constants to the range and precision of the semantic type;
23943
23944 If we have the ARMv8.2-A extensions then we support _Float16 in native
23945 precision, so we should set this to 16. Otherwise, we support the type,
23946 but want to evaluate expressions in float precision, so set this to
23947 0. */
23948
23949 static enum flt_eval_method
23950 arm_excess_precision (enum excess_precision_type type)
23951 {
23952 switch (type)
23953 {
23954 case EXCESS_PRECISION_TYPE_FAST:
23955 case EXCESS_PRECISION_TYPE_STANDARD:
23956 /* We can calculate either in 16-bit range and precision or
23957 32-bit range and precision. Make that decision based on whether
23958 we have native support for the ARMv8.2-A 16-bit floating-point
23959 instructions or not. */
23960 return (TARGET_VFP_FP16INST
23961 ? FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16
23962 : FLT_EVAL_METHOD_PROMOTE_TO_FLOAT);
23963 case EXCESS_PRECISION_TYPE_IMPLICIT:
23964 return FLT_EVAL_METHOD_PROMOTE_TO_FLOAT16;
23965 default:
23966 gcc_unreachable ();
23967 }
23968 return FLT_EVAL_METHOD_UNPREDICTABLE;
23969 }
23970
23971
23972 /* Implement TARGET_FLOATN_MODE. Make very sure that we don't provide
23973 _Float16 if we are using anything other than ieee format for 16-bit
23974 floating point. Otherwise, punt to the default implementation. */
23975 static opt_scalar_float_mode
23976 arm_floatn_mode (int n, bool extended)
23977 {
23978 if (!extended && n == 16)
23979 {
23980 if (arm_fp16_format == ARM_FP16_FORMAT_IEEE)
23981 return HFmode;
23982 return opt_scalar_float_mode ();
23983 }
23984
23985 return default_floatn_mode (n, extended);
23986 }
23987
23988
23989 /* Set up OPERANDS for a register copy from SRC to DEST, taking care
23990 not to early-clobber SRC registers in the process.
23991
23992 We assume that the operands described by SRC and DEST represent a
23993 decomposed copy of OPERANDS[1] into OPERANDS[0]. COUNT is the
23994 number of components into which the copy has been decomposed. */
23995 void
23996 neon_disambiguate_copy (rtx *operands, rtx *dest, rtx *src, unsigned int count)
23997 {
23998 unsigned int i;
23999
24000 if (!reg_overlap_mentioned_p (operands[0], operands[1])
24001 || REGNO (operands[0]) < REGNO (operands[1]))
24002 {
24003 for (i = 0; i < count; i++)
24004 {
24005 operands[2 * i] = dest[i];
24006 operands[2 * i + 1] = src[i];
24007 }
24008 }
24009 else
24010 {
24011 for (i = 0; i < count; i++)
24012 {
24013 operands[2 * i] = dest[count - i - 1];
24014 operands[2 * i + 1] = src[count - i - 1];
24015 }
24016 }
24017 }
24018
24019 /* Split operands into moves from op[1] + op[2] into op[0]. */
24020
24021 void
24022 neon_split_vcombine (rtx operands[3])
24023 {
24024 unsigned int dest = REGNO (operands[0]);
24025 unsigned int src1 = REGNO (operands[1]);
24026 unsigned int src2 = REGNO (operands[2]);
24027 machine_mode halfmode = GET_MODE (operands[1]);
24028 unsigned int halfregs = REG_NREGS (operands[1]);
24029 rtx destlo, desthi;
24030
24031 if (src1 == dest && src2 == dest + halfregs)
24032 {
24033 /* No-op move. Can't split to nothing; emit something. */
24034 emit_note (NOTE_INSN_DELETED);
24035 return;
24036 }
24037
24038 /* Preserve register attributes for variable tracking. */
24039 destlo = gen_rtx_REG_offset (operands[0], halfmode, dest, 0);
24040 desthi = gen_rtx_REG_offset (operands[0], halfmode, dest + halfregs,
24041 GET_MODE_SIZE (halfmode));
24042
24043 /* Special case of reversed high/low parts. Use VSWP. */
24044 if (src2 == dest && src1 == dest + halfregs)
24045 {
24046 rtx x = gen_rtx_SET (destlo, operands[1]);
24047 rtx y = gen_rtx_SET (desthi, operands[2]);
24048 emit_insn (gen_rtx_PARALLEL (VOIDmode, gen_rtvec (2, x, y)));
24049 return;
24050 }
24051
24052 if (!reg_overlap_mentioned_p (operands[2], destlo))
24053 {
24054 /* Try to avoid unnecessary moves if part of the result
24055 is in the right place already. */
24056 if (src1 != dest)
24057 emit_move_insn (destlo, operands[1]);
24058 if (src2 != dest + halfregs)
24059 emit_move_insn (desthi, operands[2]);
24060 }
24061 else
24062 {
24063 if (src2 != dest + halfregs)
24064 emit_move_insn (desthi, operands[2]);
24065 if (src1 != dest)
24066 emit_move_insn (destlo, operands[1]);
24067 }
24068 }
24069 \f
24070 /* Return the number (counting from 0) of
24071 the least significant set bit in MASK. */
24072
24073 inline static int
24074 number_of_first_bit_set (unsigned mask)
24075 {
24076 return ctz_hwi (mask);
24077 }
24078
24079 /* Like emit_multi_reg_push, but allowing for a different set of
24080 registers to be described as saved. MASK is the set of registers
24081 to be saved; REAL_REGS is the set of registers to be described as
24082 saved. If REAL_REGS is 0, only describe the stack adjustment. */
24083
24084 static rtx_insn *
24085 thumb1_emit_multi_reg_push (unsigned long mask, unsigned long real_regs)
24086 {
24087 unsigned long regno;
24088 rtx par[10], tmp, reg;
24089 rtx_insn *insn;
24090 int i, j;
24091
24092 /* Build the parallel of the registers actually being stored. */
24093 for (i = 0; mask; ++i, mask &= mask - 1)
24094 {
24095 regno = ctz_hwi (mask);
24096 reg = gen_rtx_REG (SImode, regno);
24097
24098 if (i == 0)
24099 tmp = gen_rtx_UNSPEC (BLKmode, gen_rtvec (1, reg), UNSPEC_PUSH_MULT);
24100 else
24101 tmp = gen_rtx_USE (VOIDmode, reg);
24102
24103 par[i] = tmp;
24104 }
24105
24106 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24107 tmp = gen_rtx_PRE_MODIFY (Pmode, stack_pointer_rtx, tmp);
24108 tmp = gen_frame_mem (BLKmode, tmp);
24109 tmp = gen_rtx_SET (tmp, par[0]);
24110 par[0] = tmp;
24111
24112 tmp = gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (i, par));
24113 insn = emit_insn (tmp);
24114
24115 /* Always build the stack adjustment note for unwind info. */
24116 tmp = plus_constant (Pmode, stack_pointer_rtx, -4 * i);
24117 tmp = gen_rtx_SET (stack_pointer_rtx, tmp);
24118 par[0] = tmp;
24119
24120 /* Build the parallel of the registers recorded as saved for unwind. */
24121 for (j = 0; real_regs; ++j, real_regs &= real_regs - 1)
24122 {
24123 regno = ctz_hwi (real_regs);
24124 reg = gen_rtx_REG (SImode, regno);
24125
24126 tmp = plus_constant (Pmode, stack_pointer_rtx, j * 4);
24127 tmp = gen_frame_mem (SImode, tmp);
24128 tmp = gen_rtx_SET (tmp, reg);
24129 RTX_FRAME_RELATED_P (tmp) = 1;
24130 par[j + 1] = tmp;
24131 }
24132
24133 if (j == 0)
24134 tmp = par[0];
24135 else
24136 {
24137 RTX_FRAME_RELATED_P (par[0]) = 1;
24138 tmp = gen_rtx_SEQUENCE (VOIDmode, gen_rtvec_v (j + 1, par));
24139 }
24140
24141 add_reg_note (insn, REG_FRAME_RELATED_EXPR, tmp);
24142
24143 return insn;
24144 }
24145
24146 /* Emit code to push or pop registers to or from the stack. F is the
24147 assembly file. MASK is the registers to pop. */
24148 static void
24149 thumb_pop (FILE *f, unsigned long mask)
24150 {
24151 int regno;
24152 int lo_mask = mask & 0xFF;
24153
24154 gcc_assert (mask);
24155
24156 if (lo_mask == 0 && (mask & (1 << PC_REGNUM)))
24157 {
24158 /* Special case. Do not generate a POP PC statement here, do it in
24159 thumb_exit() */
24160 thumb_exit (f, -1);
24161 return;
24162 }
24163
24164 fprintf (f, "\tpop\t{");
24165
24166 /* Look at the low registers first. */
24167 for (regno = 0; regno <= LAST_LO_REGNUM; regno++, lo_mask >>= 1)
24168 {
24169 if (lo_mask & 1)
24170 {
24171 asm_fprintf (f, "%r", regno);
24172
24173 if ((lo_mask & ~1) != 0)
24174 fprintf (f, ", ");
24175 }
24176 }
24177
24178 if (mask & (1 << PC_REGNUM))
24179 {
24180 /* Catch popping the PC. */
24181 if (TARGET_INTERWORK || TARGET_BACKTRACE || crtl->calls_eh_return
24182 || IS_CMSE_ENTRY (arm_current_func_type ()))
24183 {
24184 /* The PC is never poped directly, instead
24185 it is popped into r3 and then BX is used. */
24186 fprintf (f, "}\n");
24187
24188 thumb_exit (f, -1);
24189
24190 return;
24191 }
24192 else
24193 {
24194 if (mask & 0xFF)
24195 fprintf (f, ", ");
24196
24197 asm_fprintf (f, "%r", PC_REGNUM);
24198 }
24199 }
24200
24201 fprintf (f, "}\n");
24202 }
24203
24204 /* Generate code to return from a thumb function.
24205 If 'reg_containing_return_addr' is -1, then the return address is
24206 actually on the stack, at the stack pointer.
24207
24208 Note: do not forget to update length attribute of corresponding insn pattern
24209 when changing assembly output (eg. length attribute of epilogue_insns when
24210 updating Armv8-M Baseline Security Extensions register clearing
24211 sequences). */
24212 static void
24213 thumb_exit (FILE *f, int reg_containing_return_addr)
24214 {
24215 unsigned regs_available_for_popping;
24216 unsigned regs_to_pop;
24217 int pops_needed;
24218 unsigned available;
24219 unsigned required;
24220 machine_mode mode;
24221 int size;
24222 int restore_a4 = FALSE;
24223
24224 /* Compute the registers we need to pop. */
24225 regs_to_pop = 0;
24226 pops_needed = 0;
24227
24228 if (reg_containing_return_addr == -1)
24229 {
24230 regs_to_pop |= 1 << LR_REGNUM;
24231 ++pops_needed;
24232 }
24233
24234 if (TARGET_BACKTRACE)
24235 {
24236 /* Restore the (ARM) frame pointer and stack pointer. */
24237 regs_to_pop |= (1 << ARM_HARD_FRAME_POINTER_REGNUM) | (1 << SP_REGNUM);
24238 pops_needed += 2;
24239 }
24240
24241 /* If there is nothing to pop then just emit the BX instruction and
24242 return. */
24243 if (pops_needed == 0)
24244 {
24245 if (crtl->calls_eh_return)
24246 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24247
24248 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24249 {
24250 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n",
24251 reg_containing_return_addr);
24252 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24253 }
24254 else
24255 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24256 return;
24257 }
24258 /* Otherwise if we are not supporting interworking and we have not created
24259 a backtrace structure and the function was not entered in ARM mode then
24260 just pop the return address straight into the PC. */
24261 else if (!TARGET_INTERWORK
24262 && !TARGET_BACKTRACE
24263 && !is_called_in_ARM_mode (current_function_decl)
24264 && !crtl->calls_eh_return
24265 && !IS_CMSE_ENTRY (arm_current_func_type ()))
24266 {
24267 asm_fprintf (f, "\tpop\t{%r}\n", PC_REGNUM);
24268 return;
24269 }
24270
24271 /* Find out how many of the (return) argument registers we can corrupt. */
24272 regs_available_for_popping = 0;
24273
24274 /* If returning via __builtin_eh_return, the bottom three registers
24275 all contain information needed for the return. */
24276 if (crtl->calls_eh_return)
24277 size = 12;
24278 else
24279 {
24280 /* If we can deduce the registers used from the function's
24281 return value. This is more reliable that examining
24282 df_regs_ever_live_p () because that will be set if the register is
24283 ever used in the function, not just if the register is used
24284 to hold a return value. */
24285
24286 if (crtl->return_rtx != 0)
24287 mode = GET_MODE (crtl->return_rtx);
24288 else
24289 mode = DECL_MODE (DECL_RESULT (current_function_decl));
24290
24291 size = GET_MODE_SIZE (mode);
24292
24293 if (size == 0)
24294 {
24295 /* In a void function we can use any argument register.
24296 In a function that returns a structure on the stack
24297 we can use the second and third argument registers. */
24298 if (mode == VOIDmode)
24299 regs_available_for_popping =
24300 (1 << ARG_REGISTER (1))
24301 | (1 << ARG_REGISTER (2))
24302 | (1 << ARG_REGISTER (3));
24303 else
24304 regs_available_for_popping =
24305 (1 << ARG_REGISTER (2))
24306 | (1 << ARG_REGISTER (3));
24307 }
24308 else if (size <= 4)
24309 regs_available_for_popping =
24310 (1 << ARG_REGISTER (2))
24311 | (1 << ARG_REGISTER (3));
24312 else if (size <= 8)
24313 regs_available_for_popping =
24314 (1 << ARG_REGISTER (3));
24315 }
24316
24317 /* Match registers to be popped with registers into which we pop them. */
24318 for (available = regs_available_for_popping,
24319 required = regs_to_pop;
24320 required != 0 && available != 0;
24321 available &= ~(available & - available),
24322 required &= ~(required & - required))
24323 -- pops_needed;
24324
24325 /* If we have any popping registers left over, remove them. */
24326 if (available > 0)
24327 regs_available_for_popping &= ~available;
24328
24329 /* Otherwise if we need another popping register we can use
24330 the fourth argument register. */
24331 else if (pops_needed)
24332 {
24333 /* If we have not found any free argument registers and
24334 reg a4 contains the return address, we must move it. */
24335 if (regs_available_for_popping == 0
24336 && reg_containing_return_addr == LAST_ARG_REGNUM)
24337 {
24338 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24339 reg_containing_return_addr = LR_REGNUM;
24340 }
24341 else if (size > 12)
24342 {
24343 /* Register a4 is being used to hold part of the return value,
24344 but we have dire need of a free, low register. */
24345 restore_a4 = TRUE;
24346
24347 asm_fprintf (f, "\tmov\t%r, %r\n",IP_REGNUM, LAST_ARG_REGNUM);
24348 }
24349
24350 if (reg_containing_return_addr != LAST_ARG_REGNUM)
24351 {
24352 /* The fourth argument register is available. */
24353 regs_available_for_popping |= 1 << LAST_ARG_REGNUM;
24354
24355 --pops_needed;
24356 }
24357 }
24358
24359 /* Pop as many registers as we can. */
24360 thumb_pop (f, regs_available_for_popping);
24361
24362 /* Process the registers we popped. */
24363 if (reg_containing_return_addr == -1)
24364 {
24365 /* The return address was popped into the lowest numbered register. */
24366 regs_to_pop &= ~(1 << LR_REGNUM);
24367
24368 reg_containing_return_addr =
24369 number_of_first_bit_set (regs_available_for_popping);
24370
24371 /* Remove this register for the mask of available registers, so that
24372 the return address will not be corrupted by further pops. */
24373 regs_available_for_popping &= ~(1 << reg_containing_return_addr);
24374 }
24375
24376 /* If we popped other registers then handle them here. */
24377 if (regs_available_for_popping)
24378 {
24379 int frame_pointer;
24380
24381 /* Work out which register currently contains the frame pointer. */
24382 frame_pointer = number_of_first_bit_set (regs_available_for_popping);
24383
24384 /* Move it into the correct place. */
24385 asm_fprintf (f, "\tmov\t%r, %r\n",
24386 ARM_HARD_FRAME_POINTER_REGNUM, frame_pointer);
24387
24388 /* (Temporarily) remove it from the mask of popped registers. */
24389 regs_available_for_popping &= ~(1 << frame_pointer);
24390 regs_to_pop &= ~(1 << ARM_HARD_FRAME_POINTER_REGNUM);
24391
24392 if (regs_available_for_popping)
24393 {
24394 int stack_pointer;
24395
24396 /* We popped the stack pointer as well,
24397 find the register that contains it. */
24398 stack_pointer = number_of_first_bit_set (regs_available_for_popping);
24399
24400 /* Move it into the stack register. */
24401 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, stack_pointer);
24402
24403 /* At this point we have popped all necessary registers, so
24404 do not worry about restoring regs_available_for_popping
24405 to its correct value:
24406
24407 assert (pops_needed == 0)
24408 assert (regs_available_for_popping == (1 << frame_pointer))
24409 assert (regs_to_pop == (1 << STACK_POINTER)) */
24410 }
24411 else
24412 {
24413 /* Since we have just move the popped value into the frame
24414 pointer, the popping register is available for reuse, and
24415 we know that we still have the stack pointer left to pop. */
24416 regs_available_for_popping |= (1 << frame_pointer);
24417 }
24418 }
24419
24420 /* If we still have registers left on the stack, but we no longer have
24421 any registers into which we can pop them, then we must move the return
24422 address into the link register and make available the register that
24423 contained it. */
24424 if (regs_available_for_popping == 0 && pops_needed > 0)
24425 {
24426 regs_available_for_popping |= 1 << reg_containing_return_addr;
24427
24428 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM,
24429 reg_containing_return_addr);
24430
24431 reg_containing_return_addr = LR_REGNUM;
24432 }
24433
24434 /* If we have registers left on the stack then pop some more.
24435 We know that at most we will want to pop FP and SP. */
24436 if (pops_needed > 0)
24437 {
24438 int popped_into;
24439 int move_to;
24440
24441 thumb_pop (f, regs_available_for_popping);
24442
24443 /* We have popped either FP or SP.
24444 Move whichever one it is into the correct register. */
24445 popped_into = number_of_first_bit_set (regs_available_for_popping);
24446 move_to = number_of_first_bit_set (regs_to_pop);
24447
24448 asm_fprintf (f, "\tmov\t%r, %r\n", move_to, popped_into);
24449 --pops_needed;
24450 }
24451
24452 /* If we still have not popped everything then we must have only
24453 had one register available to us and we are now popping the SP. */
24454 if (pops_needed > 0)
24455 {
24456 int popped_into;
24457
24458 thumb_pop (f, regs_available_for_popping);
24459
24460 popped_into = number_of_first_bit_set (regs_available_for_popping);
24461
24462 asm_fprintf (f, "\tmov\t%r, %r\n", SP_REGNUM, popped_into);
24463 /*
24464 assert (regs_to_pop == (1 << STACK_POINTER))
24465 assert (pops_needed == 1)
24466 */
24467 }
24468
24469 /* If necessary restore the a4 register. */
24470 if (restore_a4)
24471 {
24472 if (reg_containing_return_addr != LR_REGNUM)
24473 {
24474 asm_fprintf (f, "\tmov\t%r, %r\n", LR_REGNUM, LAST_ARG_REGNUM);
24475 reg_containing_return_addr = LR_REGNUM;
24476 }
24477
24478 asm_fprintf (f, "\tmov\t%r, %r\n", LAST_ARG_REGNUM, IP_REGNUM);
24479 }
24480
24481 if (crtl->calls_eh_return)
24482 asm_fprintf (f, "\tadd\t%r, %r\n", SP_REGNUM, ARM_EH_STACKADJ_REGNUM);
24483
24484 /* Return to caller. */
24485 if (IS_CMSE_ENTRY (arm_current_func_type ()))
24486 {
24487 /* This is for the cases where LR is not being used to contain the return
24488 address. It may therefore contain information that we might not want
24489 to leak, hence it must be cleared. The value in R0 will never be a
24490 secret at this point, so it is safe to use it, see the clearing code
24491 in 'cmse_nonsecure_entry_clear_before_return'. */
24492 if (reg_containing_return_addr != LR_REGNUM)
24493 asm_fprintf (f, "\tmov\tlr, r0\n");
24494
24495 asm_fprintf (f, "\tmsr\tAPSR_nzcvq, %r\n", reg_containing_return_addr);
24496 asm_fprintf (f, "\tbxns\t%r\n", reg_containing_return_addr);
24497 }
24498 else
24499 asm_fprintf (f, "\tbx\t%r\n", reg_containing_return_addr);
24500 }
24501 \f
24502 /* Scan INSN just before assembler is output for it.
24503 For Thumb-1, we track the status of the condition codes; this
24504 information is used in the cbranchsi4_insn pattern. */
24505 void
24506 thumb1_final_prescan_insn (rtx_insn *insn)
24507 {
24508 if (flag_print_asm_name)
24509 asm_fprintf (asm_out_file, "%@ 0x%04x\n",
24510 INSN_ADDRESSES (INSN_UID (insn)));
24511 /* Don't overwrite the previous setter when we get to a cbranch. */
24512 if (INSN_CODE (insn) != CODE_FOR_cbranchsi4_insn)
24513 {
24514 enum attr_conds conds;
24515
24516 if (cfun->machine->thumb1_cc_insn)
24517 {
24518 if (modified_in_p (cfun->machine->thumb1_cc_op0, insn)
24519 || modified_in_p (cfun->machine->thumb1_cc_op1, insn))
24520 CC_STATUS_INIT;
24521 }
24522 conds = get_attr_conds (insn);
24523 if (conds == CONDS_SET)
24524 {
24525 rtx set = single_set (insn);
24526 cfun->machine->thumb1_cc_insn = insn;
24527 cfun->machine->thumb1_cc_op0 = SET_DEST (set);
24528 cfun->machine->thumb1_cc_op1 = const0_rtx;
24529 cfun->machine->thumb1_cc_mode = CC_NOOVmode;
24530 if (INSN_CODE (insn) == CODE_FOR_thumb1_subsi3_insn)
24531 {
24532 rtx src1 = XEXP (SET_SRC (set), 1);
24533 if (src1 == const0_rtx)
24534 cfun->machine->thumb1_cc_mode = CCmode;
24535 }
24536 else if (REG_P (SET_DEST (set)) && REG_P (SET_SRC (set)))
24537 {
24538 /* Record the src register operand instead of dest because
24539 cprop_hardreg pass propagates src. */
24540 cfun->machine->thumb1_cc_op0 = SET_SRC (set);
24541 }
24542 }
24543 else if (conds != CONDS_NOCOND)
24544 cfun->machine->thumb1_cc_insn = NULL_RTX;
24545 }
24546
24547 /* Check if unexpected far jump is used. */
24548 if (cfun->machine->lr_save_eliminated
24549 && get_attr_far_jump (insn) == FAR_JUMP_YES)
24550 internal_error("Unexpected thumb1 far jump");
24551 }
24552
24553 int
24554 thumb_shiftable_const (unsigned HOST_WIDE_INT val)
24555 {
24556 unsigned HOST_WIDE_INT mask = 0xff;
24557 int i;
24558
24559 val = val & (unsigned HOST_WIDE_INT)0xffffffffu;
24560 if (val == 0) /* XXX */
24561 return 0;
24562
24563 for (i = 0; i < 25; i++)
24564 if ((val & (mask << i)) == val)
24565 return 1;
24566
24567 return 0;
24568 }
24569
24570 /* Returns nonzero if the current function contains,
24571 or might contain a far jump. */
24572 static int
24573 thumb_far_jump_used_p (void)
24574 {
24575 rtx_insn *insn;
24576 bool far_jump = false;
24577 unsigned int func_size = 0;
24578
24579 /* If we have already decided that far jumps may be used,
24580 do not bother checking again, and always return true even if
24581 it turns out that they are not being used. Once we have made
24582 the decision that far jumps are present (and that hence the link
24583 register will be pushed onto the stack) we cannot go back on it. */
24584 if (cfun->machine->far_jump_used)
24585 return 1;
24586
24587 /* If this function is not being called from the prologue/epilogue
24588 generation code then it must be being called from the
24589 INITIAL_ELIMINATION_OFFSET macro. */
24590 if (!(ARM_DOUBLEWORD_ALIGN || reload_completed))
24591 {
24592 /* In this case we know that we are being asked about the elimination
24593 of the arg pointer register. If that register is not being used,
24594 then there are no arguments on the stack, and we do not have to
24595 worry that a far jump might force the prologue to push the link
24596 register, changing the stack offsets. In this case we can just
24597 return false, since the presence of far jumps in the function will
24598 not affect stack offsets.
24599
24600 If the arg pointer is live (or if it was live, but has now been
24601 eliminated and so set to dead) then we do have to test to see if
24602 the function might contain a far jump. This test can lead to some
24603 false negatives, since before reload is completed, then length of
24604 branch instructions is not known, so gcc defaults to returning their
24605 longest length, which in turn sets the far jump attribute to true.
24606
24607 A false negative will not result in bad code being generated, but it
24608 will result in a needless push and pop of the link register. We
24609 hope that this does not occur too often.
24610
24611 If we need doubleword stack alignment this could affect the other
24612 elimination offsets so we can't risk getting it wrong. */
24613 if (df_regs_ever_live_p (ARG_POINTER_REGNUM))
24614 cfun->machine->arg_pointer_live = 1;
24615 else if (!cfun->machine->arg_pointer_live)
24616 return 0;
24617 }
24618
24619 /* We should not change far_jump_used during or after reload, as there is
24620 no chance to change stack frame layout. */
24621 if (reload_in_progress || reload_completed)
24622 return 0;
24623
24624 /* Check to see if the function contains a branch
24625 insn with the far jump attribute set. */
24626 for (insn = get_insns (); insn; insn = NEXT_INSN (insn))
24627 {
24628 if (JUMP_P (insn) && get_attr_far_jump (insn) == FAR_JUMP_YES)
24629 {
24630 far_jump = true;
24631 }
24632 func_size += get_attr_length (insn);
24633 }
24634
24635 /* Attribute far_jump will always be true for thumb1 before
24636 shorten_branch pass. So checking far_jump attribute before
24637 shorten_branch isn't much useful.
24638
24639 Following heuristic tries to estimate more accurately if a far jump
24640 may finally be used. The heuristic is very conservative as there is
24641 no chance to roll-back the decision of not to use far jump.
24642
24643 Thumb1 long branch offset is -2048 to 2046. The worst case is each
24644 2-byte insn is associated with a 4 byte constant pool. Using
24645 function size 2048/3 as the threshold is conservative enough. */
24646 if (far_jump)
24647 {
24648 if ((func_size * 3) >= 2048)
24649 {
24650 /* Record the fact that we have decided that
24651 the function does use far jumps. */
24652 cfun->machine->far_jump_used = 1;
24653 return 1;
24654 }
24655 }
24656
24657 return 0;
24658 }
24659
24660 /* Return nonzero if FUNC must be entered in ARM mode. */
24661 static bool
24662 is_called_in_ARM_mode (tree func)
24663 {
24664 gcc_assert (TREE_CODE (func) == FUNCTION_DECL);
24665
24666 /* Ignore the problem about functions whose address is taken. */
24667 if (TARGET_CALLEE_INTERWORKING && TREE_PUBLIC (func))
24668 return true;
24669
24670 #ifdef ARM_PE
24671 return lookup_attribute ("interfacearm", DECL_ATTRIBUTES (func)) != NULL_TREE;
24672 #else
24673 return false;
24674 #endif
24675 }
24676
24677 /* Given the stack offsets and register mask in OFFSETS, decide how
24678 many additional registers to push instead of subtracting a constant
24679 from SP. For epilogues the principle is the same except we use pop.
24680 FOR_PROLOGUE indicates which we're generating. */
24681 static int
24682 thumb1_extra_regs_pushed (arm_stack_offsets *offsets, bool for_prologue)
24683 {
24684 HOST_WIDE_INT amount;
24685 unsigned long live_regs_mask = offsets->saved_regs_mask;
24686 /* Extract a mask of the ones we can give to the Thumb's push/pop
24687 instruction. */
24688 unsigned long l_mask = live_regs_mask & (for_prologue ? 0x40ff : 0xff);
24689 /* Then count how many other high registers will need to be pushed. */
24690 unsigned long high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24691 int n_free, reg_base, size;
24692
24693 if (!for_prologue && frame_pointer_needed)
24694 amount = offsets->locals_base - offsets->saved_regs;
24695 else
24696 amount = offsets->outgoing_args - offsets->saved_regs;
24697
24698 /* If the stack frame size is 512 exactly, we can save one load
24699 instruction, which should make this a win even when optimizing
24700 for speed. */
24701 if (!optimize_size && amount != 512)
24702 return 0;
24703
24704 /* Can't do this if there are high registers to push. */
24705 if (high_regs_pushed != 0)
24706 return 0;
24707
24708 /* Shouldn't do it in the prologue if no registers would normally
24709 be pushed at all. In the epilogue, also allow it if we'll have
24710 a pop insn for the PC. */
24711 if (l_mask == 0
24712 && (for_prologue
24713 || TARGET_BACKTRACE
24714 || (live_regs_mask & 1 << LR_REGNUM) == 0
24715 || TARGET_INTERWORK
24716 || crtl->args.pretend_args_size != 0))
24717 return 0;
24718
24719 /* Don't do this if thumb_expand_prologue wants to emit instructions
24720 between the push and the stack frame allocation. */
24721 if (for_prologue
24722 && ((flag_pic && arm_pic_register != INVALID_REGNUM)
24723 || (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)))
24724 return 0;
24725
24726 reg_base = 0;
24727 n_free = 0;
24728 if (!for_prologue)
24729 {
24730 size = arm_size_return_regs ();
24731 reg_base = ARM_NUM_INTS (size);
24732 live_regs_mask >>= reg_base;
24733 }
24734
24735 while (reg_base + n_free < 8 && !(live_regs_mask & 1)
24736 && (for_prologue || call_used_regs[reg_base + n_free]))
24737 {
24738 live_regs_mask >>= 1;
24739 n_free++;
24740 }
24741
24742 if (n_free == 0)
24743 return 0;
24744 gcc_assert (amount / 4 * 4 == amount);
24745
24746 if (amount >= 512 && (amount - n_free * 4) < 512)
24747 return (amount - 508) / 4;
24748 if (amount <= n_free * 4)
24749 return amount / 4;
24750 return 0;
24751 }
24752
24753 /* The bits which aren't usefully expanded as rtl. */
24754 const char *
24755 thumb1_unexpanded_epilogue (void)
24756 {
24757 arm_stack_offsets *offsets;
24758 int regno;
24759 unsigned long live_regs_mask = 0;
24760 int high_regs_pushed = 0;
24761 int extra_pop;
24762 int had_to_push_lr;
24763 int size;
24764
24765 if (cfun->machine->return_used_this_function != 0)
24766 return "";
24767
24768 if (IS_NAKED (arm_current_func_type ()))
24769 return "";
24770
24771 offsets = arm_get_frame_offsets ();
24772 live_regs_mask = offsets->saved_regs_mask;
24773 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
24774
24775 /* If we can deduce the registers used from the function's return value.
24776 This is more reliable that examining df_regs_ever_live_p () because that
24777 will be set if the register is ever used in the function, not just if
24778 the register is used to hold a return value. */
24779 size = arm_size_return_regs ();
24780
24781 extra_pop = thumb1_extra_regs_pushed (offsets, false);
24782 if (extra_pop > 0)
24783 {
24784 unsigned long extra_mask = (1 << extra_pop) - 1;
24785 live_regs_mask |= extra_mask << ARM_NUM_INTS (size);
24786 }
24787
24788 /* The prolog may have pushed some high registers to use as
24789 work registers. e.g. the testsuite file:
24790 gcc/testsuite/gcc/gcc.c-torture/execute/complex-2.c
24791 compiles to produce:
24792 push {r4, r5, r6, r7, lr}
24793 mov r7, r9
24794 mov r6, r8
24795 push {r6, r7}
24796 as part of the prolog. We have to undo that pushing here. */
24797
24798 if (high_regs_pushed)
24799 {
24800 unsigned long mask = live_regs_mask & 0xff;
24801 int next_hi_reg;
24802
24803 /* The available low registers depend on the size of the value we are
24804 returning. */
24805 if (size <= 12)
24806 mask |= 1 << 3;
24807 if (size <= 8)
24808 mask |= 1 << 2;
24809
24810 if (mask == 0)
24811 /* Oh dear! We have no low registers into which we can pop
24812 high registers! */
24813 internal_error
24814 ("no low registers available for popping high registers");
24815
24816 for (next_hi_reg = 8; next_hi_reg < 13; next_hi_reg++)
24817 if (live_regs_mask & (1 << next_hi_reg))
24818 break;
24819
24820 while (high_regs_pushed)
24821 {
24822 /* Find lo register(s) into which the high register(s) can
24823 be popped. */
24824 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24825 {
24826 if (mask & (1 << regno))
24827 high_regs_pushed--;
24828 if (high_regs_pushed == 0)
24829 break;
24830 }
24831
24832 mask &= (2 << regno) - 1; /* A noop if regno == 8 */
24833
24834 /* Pop the values into the low register(s). */
24835 thumb_pop (asm_out_file, mask);
24836
24837 /* Move the value(s) into the high registers. */
24838 for (regno = 0; regno <= LAST_LO_REGNUM; regno++)
24839 {
24840 if (mask & (1 << regno))
24841 {
24842 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", next_hi_reg,
24843 regno);
24844
24845 for (next_hi_reg++; next_hi_reg < 13; next_hi_reg++)
24846 if (live_regs_mask & (1 << next_hi_reg))
24847 break;
24848 }
24849 }
24850 }
24851 live_regs_mask &= ~0x0f00;
24852 }
24853
24854 had_to_push_lr = (live_regs_mask & (1 << LR_REGNUM)) != 0;
24855 live_regs_mask &= 0xff;
24856
24857 if (crtl->args.pretend_args_size == 0 || TARGET_BACKTRACE)
24858 {
24859 /* Pop the return address into the PC. */
24860 if (had_to_push_lr)
24861 live_regs_mask |= 1 << PC_REGNUM;
24862
24863 /* Either no argument registers were pushed or a backtrace
24864 structure was created which includes an adjusted stack
24865 pointer, so just pop everything. */
24866 if (live_regs_mask)
24867 thumb_pop (asm_out_file, live_regs_mask);
24868
24869 /* We have either just popped the return address into the
24870 PC or it is was kept in LR for the entire function.
24871 Note that thumb_pop has already called thumb_exit if the
24872 PC was in the list. */
24873 if (!had_to_push_lr)
24874 thumb_exit (asm_out_file, LR_REGNUM);
24875 }
24876 else
24877 {
24878 /* Pop everything but the return address. */
24879 if (live_regs_mask)
24880 thumb_pop (asm_out_file, live_regs_mask);
24881
24882 if (had_to_push_lr)
24883 {
24884 if (size > 12)
24885 {
24886 /* We have no free low regs, so save one. */
24887 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", IP_REGNUM,
24888 LAST_ARG_REGNUM);
24889 }
24890
24891 /* Get the return address into a temporary register. */
24892 thumb_pop (asm_out_file, 1 << LAST_ARG_REGNUM);
24893
24894 if (size > 12)
24895 {
24896 /* Move the return address to lr. */
24897 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LR_REGNUM,
24898 LAST_ARG_REGNUM);
24899 /* Restore the low register. */
24900 asm_fprintf (asm_out_file, "\tmov\t%r, %r\n", LAST_ARG_REGNUM,
24901 IP_REGNUM);
24902 regno = LR_REGNUM;
24903 }
24904 else
24905 regno = LAST_ARG_REGNUM;
24906 }
24907 else
24908 regno = LR_REGNUM;
24909
24910 /* Remove the argument registers that were pushed onto the stack. */
24911 asm_fprintf (asm_out_file, "\tadd\t%r, %r, #%d\n",
24912 SP_REGNUM, SP_REGNUM,
24913 crtl->args.pretend_args_size);
24914
24915 thumb_exit (asm_out_file, regno);
24916 }
24917
24918 return "";
24919 }
24920
24921 /* Functions to save and restore machine-specific function data. */
24922 static struct machine_function *
24923 arm_init_machine_status (void)
24924 {
24925 struct machine_function *machine;
24926 machine = ggc_cleared_alloc<machine_function> ();
24927
24928 #if ARM_FT_UNKNOWN != 0
24929 machine->func_type = ARM_FT_UNKNOWN;
24930 #endif
24931 machine->static_chain_stack_bytes = -1;
24932 return machine;
24933 }
24934
24935 /* Return an RTX indicating where the return address to the
24936 calling function can be found. */
24937 rtx
24938 arm_return_addr (int count, rtx frame ATTRIBUTE_UNUSED)
24939 {
24940 if (count != 0)
24941 return NULL_RTX;
24942
24943 return get_hard_reg_initial_val (Pmode, LR_REGNUM);
24944 }
24945
24946 /* Do anything needed before RTL is emitted for each function. */
24947 void
24948 arm_init_expanders (void)
24949 {
24950 /* Arrange to initialize and mark the machine per-function status. */
24951 init_machine_status = arm_init_machine_status;
24952
24953 /* This is to stop the combine pass optimizing away the alignment
24954 adjustment of va_arg. */
24955 /* ??? It is claimed that this should not be necessary. */
24956 if (cfun)
24957 mark_reg_pointer (arg_pointer_rtx, PARM_BOUNDARY);
24958 }
24959
24960 /* Check that FUNC is called with a different mode. */
24961
24962 bool
24963 arm_change_mode_p (tree func)
24964 {
24965 if (TREE_CODE (func) != FUNCTION_DECL)
24966 return false;
24967
24968 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (func);
24969
24970 if (!callee_tree)
24971 callee_tree = target_option_default_node;
24972
24973 struct cl_target_option *callee_opts = TREE_TARGET_OPTION (callee_tree);
24974 int flags = callee_opts->x_target_flags;
24975
24976 return (TARGET_THUMB_P (flags) != TARGET_THUMB);
24977 }
24978
24979 /* Like arm_compute_initial_elimination offset. Simpler because there
24980 isn't an ABI specified frame pointer for Thumb. Instead, we set it
24981 to point at the base of the local variables after static stack
24982 space for a function has been allocated. */
24983
24984 HOST_WIDE_INT
24985 thumb_compute_initial_elimination_offset (unsigned int from, unsigned int to)
24986 {
24987 arm_stack_offsets *offsets;
24988
24989 offsets = arm_get_frame_offsets ();
24990
24991 switch (from)
24992 {
24993 case ARG_POINTER_REGNUM:
24994 switch (to)
24995 {
24996 case STACK_POINTER_REGNUM:
24997 return offsets->outgoing_args - offsets->saved_args;
24998
24999 case FRAME_POINTER_REGNUM:
25000 return offsets->soft_frame - offsets->saved_args;
25001
25002 case ARM_HARD_FRAME_POINTER_REGNUM:
25003 return offsets->saved_regs - offsets->saved_args;
25004
25005 case THUMB_HARD_FRAME_POINTER_REGNUM:
25006 return offsets->locals_base - offsets->saved_args;
25007
25008 default:
25009 gcc_unreachable ();
25010 }
25011 break;
25012
25013 case FRAME_POINTER_REGNUM:
25014 switch (to)
25015 {
25016 case STACK_POINTER_REGNUM:
25017 return offsets->outgoing_args - offsets->soft_frame;
25018
25019 case ARM_HARD_FRAME_POINTER_REGNUM:
25020 return offsets->saved_regs - offsets->soft_frame;
25021
25022 case THUMB_HARD_FRAME_POINTER_REGNUM:
25023 return offsets->locals_base - offsets->soft_frame;
25024
25025 default:
25026 gcc_unreachable ();
25027 }
25028 break;
25029
25030 default:
25031 gcc_unreachable ();
25032 }
25033 }
25034
25035 /* Generate the function's prologue. */
25036
25037 void
25038 thumb1_expand_prologue (void)
25039 {
25040 rtx_insn *insn;
25041
25042 HOST_WIDE_INT amount;
25043 HOST_WIDE_INT size;
25044 arm_stack_offsets *offsets;
25045 unsigned long func_type;
25046 int regno;
25047 unsigned long live_regs_mask;
25048 unsigned long l_mask;
25049 unsigned high_regs_pushed = 0;
25050 bool lr_needs_saving;
25051
25052 func_type = arm_current_func_type ();
25053
25054 /* Naked functions don't have prologues. */
25055 if (IS_NAKED (func_type))
25056 {
25057 if (flag_stack_usage_info)
25058 current_function_static_stack_size = 0;
25059 return;
25060 }
25061
25062 if (IS_INTERRUPT (func_type))
25063 {
25064 error ("interrupt Service Routines cannot be coded in Thumb mode");
25065 return;
25066 }
25067
25068 if (is_called_in_ARM_mode (current_function_decl))
25069 emit_insn (gen_prologue_thumb1_interwork ());
25070
25071 offsets = arm_get_frame_offsets ();
25072 live_regs_mask = offsets->saved_regs_mask;
25073 lr_needs_saving = live_regs_mask & (1 << LR_REGNUM);
25074
25075 /* Extract a mask of the ones we can give to the Thumb's push instruction. */
25076 l_mask = live_regs_mask & 0x40ff;
25077 /* Then count how many other high registers will need to be pushed. */
25078 high_regs_pushed = bit_count (live_regs_mask & 0x0f00);
25079
25080 if (crtl->args.pretend_args_size)
25081 {
25082 rtx x = GEN_INT (-crtl->args.pretend_args_size);
25083
25084 if (cfun->machine->uses_anonymous_args)
25085 {
25086 int num_pushes = ARM_NUM_INTS (crtl->args.pretend_args_size);
25087 unsigned long mask;
25088
25089 mask = 1ul << (LAST_ARG_REGNUM + 1);
25090 mask -= 1ul << (LAST_ARG_REGNUM + 1 - num_pushes);
25091
25092 insn = thumb1_emit_multi_reg_push (mask, 0);
25093 }
25094 else
25095 {
25096 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25097 stack_pointer_rtx, x));
25098 }
25099 RTX_FRAME_RELATED_P (insn) = 1;
25100 }
25101
25102 if (TARGET_BACKTRACE)
25103 {
25104 HOST_WIDE_INT offset = 0;
25105 unsigned work_register;
25106 rtx work_reg, x, arm_hfp_rtx;
25107
25108 /* We have been asked to create a stack backtrace structure.
25109 The code looks like this:
25110
25111 0 .align 2
25112 0 func:
25113 0 sub SP, #16 Reserve space for 4 registers.
25114 2 push {R7} Push low registers.
25115 4 add R7, SP, #20 Get the stack pointer before the push.
25116 6 str R7, [SP, #8] Store the stack pointer
25117 (before reserving the space).
25118 8 mov R7, PC Get hold of the start of this code + 12.
25119 10 str R7, [SP, #16] Store it.
25120 12 mov R7, FP Get hold of the current frame pointer.
25121 14 str R7, [SP, #4] Store it.
25122 16 mov R7, LR Get hold of the current return address.
25123 18 str R7, [SP, #12] Store it.
25124 20 add R7, SP, #16 Point at the start of the
25125 backtrace structure.
25126 22 mov FP, R7 Put this value into the frame pointer. */
25127
25128 work_register = thumb_find_work_register (live_regs_mask);
25129 work_reg = gen_rtx_REG (SImode, work_register);
25130 arm_hfp_rtx = gen_rtx_REG (SImode, ARM_HARD_FRAME_POINTER_REGNUM);
25131
25132 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25133 stack_pointer_rtx, GEN_INT (-16)));
25134 RTX_FRAME_RELATED_P (insn) = 1;
25135
25136 if (l_mask)
25137 {
25138 insn = thumb1_emit_multi_reg_push (l_mask, l_mask);
25139 RTX_FRAME_RELATED_P (insn) = 1;
25140 lr_needs_saving = false;
25141
25142 offset = bit_count (l_mask) * UNITS_PER_WORD;
25143 }
25144
25145 x = GEN_INT (offset + 16 + crtl->args.pretend_args_size);
25146 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25147
25148 x = plus_constant (Pmode, stack_pointer_rtx, offset + 4);
25149 x = gen_frame_mem (SImode, x);
25150 emit_move_insn (x, work_reg);
25151
25152 /* Make sure that the instruction fetching the PC is in the right place
25153 to calculate "start of backtrace creation code + 12". */
25154 /* ??? The stores using the common WORK_REG ought to be enough to
25155 prevent the scheduler from doing anything weird. Failing that
25156 we could always move all of the following into an UNSPEC_VOLATILE. */
25157 if (l_mask)
25158 {
25159 x = gen_rtx_REG (SImode, PC_REGNUM);
25160 emit_move_insn (work_reg, x);
25161
25162 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25163 x = gen_frame_mem (SImode, x);
25164 emit_move_insn (x, work_reg);
25165
25166 emit_move_insn (work_reg, arm_hfp_rtx);
25167
25168 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25169 x = gen_frame_mem (SImode, x);
25170 emit_move_insn (x, work_reg);
25171 }
25172 else
25173 {
25174 emit_move_insn (work_reg, arm_hfp_rtx);
25175
25176 x = plus_constant (Pmode, stack_pointer_rtx, offset);
25177 x = gen_frame_mem (SImode, x);
25178 emit_move_insn (x, work_reg);
25179
25180 x = gen_rtx_REG (SImode, PC_REGNUM);
25181 emit_move_insn (work_reg, x);
25182
25183 x = plus_constant (Pmode, stack_pointer_rtx, offset + 12);
25184 x = gen_frame_mem (SImode, x);
25185 emit_move_insn (x, work_reg);
25186 }
25187
25188 x = gen_rtx_REG (SImode, LR_REGNUM);
25189 emit_move_insn (work_reg, x);
25190
25191 x = plus_constant (Pmode, stack_pointer_rtx, offset + 8);
25192 x = gen_frame_mem (SImode, x);
25193 emit_move_insn (x, work_reg);
25194
25195 x = GEN_INT (offset + 12);
25196 emit_insn (gen_addsi3 (work_reg, stack_pointer_rtx, x));
25197
25198 emit_move_insn (arm_hfp_rtx, work_reg);
25199 }
25200 /* Optimization: If we are not pushing any low registers but we are going
25201 to push some high registers then delay our first push. This will just
25202 be a push of LR and we can combine it with the push of the first high
25203 register. */
25204 else if ((l_mask & 0xff) != 0
25205 || (high_regs_pushed == 0 && lr_needs_saving))
25206 {
25207 unsigned long mask = l_mask;
25208 mask |= (1 << thumb1_extra_regs_pushed (offsets, true)) - 1;
25209 insn = thumb1_emit_multi_reg_push (mask, mask);
25210 RTX_FRAME_RELATED_P (insn) = 1;
25211 lr_needs_saving = false;
25212 }
25213
25214 if (high_regs_pushed)
25215 {
25216 unsigned pushable_regs;
25217 unsigned next_hi_reg;
25218 unsigned arg_regs_num = TARGET_AAPCS_BASED ? crtl->args.info.aapcs_ncrn
25219 : crtl->args.info.nregs;
25220 unsigned arg_regs_mask = (1 << arg_regs_num) - 1;
25221
25222 for (next_hi_reg = 12; next_hi_reg > LAST_LO_REGNUM; next_hi_reg--)
25223 if (live_regs_mask & (1 << next_hi_reg))
25224 break;
25225
25226 /* Here we need to mask out registers used for passing arguments
25227 even if they can be pushed. This is to avoid using them to stash the high
25228 registers. Such kind of stash may clobber the use of arguments. */
25229 pushable_regs = l_mask & (~arg_regs_mask);
25230 if (lr_needs_saving)
25231 pushable_regs &= ~(1 << LR_REGNUM);
25232
25233 if (pushable_regs == 0)
25234 pushable_regs = 1 << thumb_find_work_register (live_regs_mask);
25235
25236 while (high_regs_pushed > 0)
25237 {
25238 unsigned long real_regs_mask = 0;
25239 unsigned long push_mask = 0;
25240
25241 for (regno = LR_REGNUM; regno >= 0; regno --)
25242 {
25243 if (pushable_regs & (1 << regno))
25244 {
25245 emit_move_insn (gen_rtx_REG (SImode, regno),
25246 gen_rtx_REG (SImode, next_hi_reg));
25247
25248 high_regs_pushed --;
25249 real_regs_mask |= (1 << next_hi_reg);
25250 push_mask |= (1 << regno);
25251
25252 if (high_regs_pushed)
25253 {
25254 for (next_hi_reg --; next_hi_reg > LAST_LO_REGNUM;
25255 next_hi_reg --)
25256 if (live_regs_mask & (1 << next_hi_reg))
25257 break;
25258 }
25259 else
25260 break;
25261 }
25262 }
25263
25264 /* If we had to find a work register and we have not yet
25265 saved the LR then add it to the list of regs to push. */
25266 if (lr_needs_saving)
25267 {
25268 push_mask |= 1 << LR_REGNUM;
25269 real_regs_mask |= 1 << LR_REGNUM;
25270 lr_needs_saving = false;
25271 }
25272
25273 insn = thumb1_emit_multi_reg_push (push_mask, real_regs_mask);
25274 RTX_FRAME_RELATED_P (insn) = 1;
25275 }
25276 }
25277
25278 /* Load the pic register before setting the frame pointer,
25279 so we can use r7 as a temporary work register. */
25280 if (flag_pic && arm_pic_register != INVALID_REGNUM)
25281 arm_load_pic_register (live_regs_mask, NULL_RTX);
25282
25283 if (!frame_pointer_needed && CALLER_INTERWORKING_SLOT_SIZE > 0)
25284 emit_move_insn (gen_rtx_REG (Pmode, ARM_HARD_FRAME_POINTER_REGNUM),
25285 stack_pointer_rtx);
25286
25287 size = offsets->outgoing_args - offsets->saved_args;
25288 if (flag_stack_usage_info)
25289 current_function_static_stack_size = size;
25290
25291 /* If we have a frame, then do stack checking. FIXME: not implemented. */
25292 if ((flag_stack_check == STATIC_BUILTIN_STACK_CHECK
25293 || flag_stack_clash_protection)
25294 && size)
25295 sorry ("-fstack-check=specific for Thumb-1");
25296
25297 amount = offsets->outgoing_args - offsets->saved_regs;
25298 amount -= 4 * thumb1_extra_regs_pushed (offsets, true);
25299 if (amount)
25300 {
25301 if (amount < 512)
25302 {
25303 insn = emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25304 GEN_INT (- amount)));
25305 RTX_FRAME_RELATED_P (insn) = 1;
25306 }
25307 else
25308 {
25309 rtx reg, dwarf;
25310
25311 /* The stack decrement is too big for an immediate value in a single
25312 insn. In theory we could issue multiple subtracts, but after
25313 three of them it becomes more space efficient to place the full
25314 value in the constant pool and load into a register. (Also the
25315 ARM debugger really likes to see only one stack decrement per
25316 function). So instead we look for a scratch register into which
25317 we can load the decrement, and then we subtract this from the
25318 stack pointer. Unfortunately on the thumb the only available
25319 scratch registers are the argument registers, and we cannot use
25320 these as they may hold arguments to the function. Instead we
25321 attempt to locate a call preserved register which is used by this
25322 function. If we can find one, then we know that it will have
25323 been pushed at the start of the prologue and so we can corrupt
25324 it now. */
25325 for (regno = LAST_ARG_REGNUM + 1; regno <= LAST_LO_REGNUM; regno++)
25326 if (live_regs_mask & (1 << regno))
25327 break;
25328
25329 gcc_assert(regno <= LAST_LO_REGNUM);
25330
25331 reg = gen_rtx_REG (SImode, regno);
25332
25333 emit_insn (gen_movsi (reg, GEN_INT (- amount)));
25334
25335 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25336 stack_pointer_rtx, reg));
25337
25338 dwarf = gen_rtx_SET (stack_pointer_rtx,
25339 plus_constant (Pmode, stack_pointer_rtx,
25340 -amount));
25341 add_reg_note (insn, REG_FRAME_RELATED_EXPR, dwarf);
25342 RTX_FRAME_RELATED_P (insn) = 1;
25343 }
25344 }
25345
25346 if (frame_pointer_needed)
25347 thumb_set_frame_pointer (offsets);
25348
25349 /* If we are profiling, make sure no instructions are scheduled before
25350 the call to mcount. Similarly if the user has requested no
25351 scheduling in the prolog. Similarly if we want non-call exceptions
25352 using the EABI unwinder, to prevent faulting instructions from being
25353 swapped with a stack adjustment. */
25354 if (crtl->profile || !TARGET_SCHED_PROLOG
25355 || (arm_except_unwind_info (&global_options) == UI_TARGET
25356 && cfun->can_throw_non_call_exceptions))
25357 emit_insn (gen_blockage ());
25358
25359 cfun->machine->lr_save_eliminated = !thumb_force_lr_save ();
25360 if (live_regs_mask & 0xff)
25361 cfun->machine->lr_save_eliminated = 0;
25362 }
25363
25364 /* Clear caller saved registers not used to pass return values and leaked
25365 condition flags before exiting a cmse_nonsecure_entry function. */
25366
25367 void
25368 cmse_nonsecure_entry_clear_before_return (void)
25369 {
25370 int regno, maxregno = TARGET_HARD_FLOAT ? LAST_VFP_REGNUM : IP_REGNUM;
25371 uint32_t padding_bits_to_clear = 0;
25372 auto_sbitmap to_clear_bitmap (maxregno + 1);
25373 rtx r1_reg, result_rtl, clearing_reg = NULL_RTX;
25374 tree result_type;
25375
25376 bitmap_clear (to_clear_bitmap);
25377 bitmap_set_range (to_clear_bitmap, R0_REGNUM, NUM_ARG_REGS);
25378 bitmap_set_bit (to_clear_bitmap, IP_REGNUM);
25379
25380 /* If we are not dealing with -mfloat-abi=soft we will need to clear VFP
25381 registers. */
25382 if (TARGET_HARD_FLOAT)
25383 {
25384 int float_bits = D7_VFP_REGNUM - FIRST_VFP_REGNUM + 1;
25385
25386 bitmap_set_range (to_clear_bitmap, FIRST_VFP_REGNUM, float_bits);
25387
25388 /* Make sure we don't clear the two scratch registers used to clear the
25389 relevant FPSCR bits in output_return_instruction. */
25390 emit_use (gen_rtx_REG (SImode, IP_REGNUM));
25391 bitmap_clear_bit (to_clear_bitmap, IP_REGNUM);
25392 emit_use (gen_rtx_REG (SImode, 4));
25393 bitmap_clear_bit (to_clear_bitmap, 4);
25394 }
25395
25396 /* If the user has defined registers to be caller saved, these are no longer
25397 restored by the function before returning and must thus be cleared for
25398 security purposes. */
25399 for (regno = NUM_ARG_REGS; regno <= maxregno; regno++)
25400 {
25401 /* We do not touch registers that can be used to pass arguments as per
25402 the AAPCS, since these should never be made callee-saved by user
25403 options. */
25404 if (IN_RANGE (regno, FIRST_VFP_REGNUM, D7_VFP_REGNUM))
25405 continue;
25406 if (IN_RANGE (regno, IP_REGNUM, PC_REGNUM))
25407 continue;
25408 if (call_used_regs[regno])
25409 bitmap_set_bit (to_clear_bitmap, regno);
25410 }
25411
25412 /* Make sure we do not clear the registers used to return the result in. */
25413 result_type = TREE_TYPE (DECL_RESULT (current_function_decl));
25414 if (!VOID_TYPE_P (result_type))
25415 {
25416 uint64_t to_clear_return_mask;
25417 result_rtl = arm_function_value (result_type, current_function_decl, 0);
25418
25419 /* No need to check that we return in registers, because we don't
25420 support returning on stack yet. */
25421 gcc_assert (REG_P (result_rtl));
25422 to_clear_return_mask
25423 = compute_not_to_clear_mask (result_type, result_rtl, 0,
25424 &padding_bits_to_clear);
25425 if (to_clear_return_mask)
25426 {
25427 gcc_assert ((unsigned) maxregno < sizeof (long long) * __CHAR_BIT__);
25428 for (regno = R0_REGNUM; regno <= maxregno; regno++)
25429 {
25430 if (to_clear_return_mask & (1ULL << regno))
25431 bitmap_clear_bit (to_clear_bitmap, regno);
25432 }
25433 }
25434 }
25435
25436 if (padding_bits_to_clear != 0)
25437 {
25438 int to_clear_bitmap_size = SBITMAP_SIZE ((sbitmap) to_clear_bitmap);
25439 auto_sbitmap to_clear_arg_regs_bitmap (to_clear_bitmap_size);
25440
25441 /* Padding_bits_to_clear is not 0 so we know we are dealing with
25442 returning a composite type, which only uses r0. Let's make sure that
25443 r1-r3 is cleared too. */
25444 bitmap_clear (to_clear_arg_regs_bitmap);
25445 bitmap_set_range (to_clear_arg_regs_bitmap, R1_REGNUM, NUM_ARG_REGS - 1);
25446 gcc_assert (bitmap_subset_p (to_clear_arg_regs_bitmap, to_clear_bitmap));
25447 }
25448
25449 /* Clear full registers that leak before returning. */
25450 clearing_reg = gen_rtx_REG (SImode, TARGET_THUMB1 ? R0_REGNUM : LR_REGNUM);
25451 r1_reg = gen_rtx_REG (SImode, R0_REGNUM + 1);
25452 cmse_clear_registers (to_clear_bitmap, &padding_bits_to_clear, 1, r1_reg,
25453 clearing_reg);
25454 }
25455
25456 /* Generate pattern *pop_multiple_with_stack_update_and_return if single
25457 POP instruction can be generated. LR should be replaced by PC. All
25458 the checks required are already done by USE_RETURN_INSN (). Hence,
25459 all we really need to check here is if single register is to be
25460 returned, or multiple register return. */
25461 void
25462 thumb2_expand_return (bool simple_return)
25463 {
25464 int i, num_regs;
25465 unsigned long saved_regs_mask;
25466 arm_stack_offsets *offsets;
25467
25468 offsets = arm_get_frame_offsets ();
25469 saved_regs_mask = offsets->saved_regs_mask;
25470
25471 for (i = 0, num_regs = 0; i <= LAST_ARM_REGNUM; i++)
25472 if (saved_regs_mask & (1 << i))
25473 num_regs++;
25474
25475 if (!simple_return && saved_regs_mask)
25476 {
25477 /* TODO: Verify that this path is never taken for cmse_nonsecure_entry
25478 functions or adapt code to handle according to ACLE. This path should
25479 not be reachable for cmse_nonsecure_entry functions though we prefer
25480 to assert it for now to ensure that future code changes do not silently
25481 change this behavior. */
25482 gcc_assert (!IS_CMSE_ENTRY (arm_current_func_type ()));
25483 if (num_regs == 1)
25484 {
25485 rtx par = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25486 rtx reg = gen_rtx_REG (SImode, PC_REGNUM);
25487 rtx addr = gen_rtx_MEM (SImode,
25488 gen_rtx_POST_INC (SImode,
25489 stack_pointer_rtx));
25490 set_mem_alias_set (addr, get_frame_alias_set ());
25491 XVECEXP (par, 0, 0) = ret_rtx;
25492 XVECEXP (par, 0, 1) = gen_rtx_SET (reg, addr);
25493 RTX_FRAME_RELATED_P (XVECEXP (par, 0, 1)) = 1;
25494 emit_jump_insn (par);
25495 }
25496 else
25497 {
25498 saved_regs_mask &= ~ (1 << LR_REGNUM);
25499 saved_regs_mask |= (1 << PC_REGNUM);
25500 arm_emit_multi_reg_pop (saved_regs_mask);
25501 }
25502 }
25503 else
25504 {
25505 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25506 cmse_nonsecure_entry_clear_before_return ();
25507 emit_jump_insn (simple_return_rtx);
25508 }
25509 }
25510
25511 void
25512 thumb1_expand_epilogue (void)
25513 {
25514 HOST_WIDE_INT amount;
25515 arm_stack_offsets *offsets;
25516 int regno;
25517
25518 /* Naked functions don't have prologues. */
25519 if (IS_NAKED (arm_current_func_type ()))
25520 return;
25521
25522 offsets = arm_get_frame_offsets ();
25523 amount = offsets->outgoing_args - offsets->saved_regs;
25524
25525 if (frame_pointer_needed)
25526 {
25527 emit_insn (gen_movsi (stack_pointer_rtx, hard_frame_pointer_rtx));
25528 amount = offsets->locals_base - offsets->saved_regs;
25529 }
25530 amount -= 4 * thumb1_extra_regs_pushed (offsets, false);
25531
25532 gcc_assert (amount >= 0);
25533 if (amount)
25534 {
25535 emit_insn (gen_blockage ());
25536
25537 if (amount < 512)
25538 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx,
25539 GEN_INT (amount)));
25540 else
25541 {
25542 /* r3 is always free in the epilogue. */
25543 rtx reg = gen_rtx_REG (SImode, LAST_ARG_REGNUM);
25544
25545 emit_insn (gen_movsi (reg, GEN_INT (amount)));
25546 emit_insn (gen_addsi3 (stack_pointer_rtx, stack_pointer_rtx, reg));
25547 }
25548 }
25549
25550 /* Emit a USE (stack_pointer_rtx), so that
25551 the stack adjustment will not be deleted. */
25552 emit_insn (gen_force_register_use (stack_pointer_rtx));
25553
25554 if (crtl->profile || !TARGET_SCHED_PROLOG)
25555 emit_insn (gen_blockage ());
25556
25557 /* Emit a clobber for each insn that will be restored in the epilogue,
25558 so that flow2 will get register lifetimes correct. */
25559 for (regno = 0; regno < 13; regno++)
25560 if (df_regs_ever_live_p (regno) && !call_used_regs[regno])
25561 emit_clobber (gen_rtx_REG (SImode, regno));
25562
25563 if (! df_regs_ever_live_p (LR_REGNUM))
25564 emit_use (gen_rtx_REG (SImode, LR_REGNUM));
25565
25566 /* Clear all caller-saved regs that are not used to return. */
25567 if (IS_CMSE_ENTRY (arm_current_func_type ()))
25568 cmse_nonsecure_entry_clear_before_return ();
25569 }
25570
25571 /* Epilogue code for APCS frame. */
25572 static void
25573 arm_expand_epilogue_apcs_frame (bool really_return)
25574 {
25575 unsigned long func_type;
25576 unsigned long saved_regs_mask;
25577 int num_regs = 0;
25578 int i;
25579 int floats_from_frame = 0;
25580 arm_stack_offsets *offsets;
25581
25582 gcc_assert (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM);
25583 func_type = arm_current_func_type ();
25584
25585 /* Get frame offsets for ARM. */
25586 offsets = arm_get_frame_offsets ();
25587 saved_regs_mask = offsets->saved_regs_mask;
25588
25589 /* Find the offset of the floating-point save area in the frame. */
25590 floats_from_frame
25591 = (offsets->saved_args
25592 + arm_compute_static_chain_stack_bytes ()
25593 - offsets->frame);
25594
25595 /* Compute how many core registers saved and how far away the floats are. */
25596 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25597 if (saved_regs_mask & (1 << i))
25598 {
25599 num_regs++;
25600 floats_from_frame += 4;
25601 }
25602
25603 if (TARGET_HARD_FLOAT)
25604 {
25605 int start_reg;
25606 rtx ip_rtx = gen_rtx_REG (SImode, IP_REGNUM);
25607
25608 /* The offset is from IP_REGNUM. */
25609 int saved_size = arm_get_vfp_saved_size ();
25610 if (saved_size > 0)
25611 {
25612 rtx_insn *insn;
25613 floats_from_frame += saved_size;
25614 insn = emit_insn (gen_addsi3 (ip_rtx,
25615 hard_frame_pointer_rtx,
25616 GEN_INT (-floats_from_frame)));
25617 arm_add_cfa_adjust_cfa_note (insn, -floats_from_frame,
25618 ip_rtx, hard_frame_pointer_rtx);
25619 }
25620
25621 /* Generate VFP register multi-pop. */
25622 start_reg = FIRST_VFP_REGNUM;
25623
25624 for (i = FIRST_VFP_REGNUM; i < LAST_VFP_REGNUM; i += 2)
25625 /* Look for a case where a reg does not need restoring. */
25626 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25627 && (!df_regs_ever_live_p (i + 1)
25628 || call_used_regs[i + 1]))
25629 {
25630 if (start_reg != i)
25631 arm_emit_vfp_multi_reg_pop (start_reg,
25632 (i - start_reg) / 2,
25633 gen_rtx_REG (SImode,
25634 IP_REGNUM));
25635 start_reg = i + 2;
25636 }
25637
25638 /* Restore the remaining regs that we have discovered (or possibly
25639 even all of them, if the conditional in the for loop never
25640 fired). */
25641 if (start_reg != i)
25642 arm_emit_vfp_multi_reg_pop (start_reg,
25643 (i - start_reg) / 2,
25644 gen_rtx_REG (SImode, IP_REGNUM));
25645 }
25646
25647 if (TARGET_IWMMXT)
25648 {
25649 /* The frame pointer is guaranteed to be non-double-word aligned, as
25650 it is set to double-word-aligned old_stack_pointer - 4. */
25651 rtx_insn *insn;
25652 int lrm_count = (num_regs % 2) ? (num_regs + 2) : (num_regs + 1);
25653
25654 for (i = LAST_IWMMXT_REGNUM; i >= FIRST_IWMMXT_REGNUM; i--)
25655 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25656 {
25657 rtx addr = gen_frame_mem (V2SImode,
25658 plus_constant (Pmode, hard_frame_pointer_rtx,
25659 - lrm_count * 4));
25660 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25661 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25662 gen_rtx_REG (V2SImode, i),
25663 NULL_RTX);
25664 lrm_count += 2;
25665 }
25666 }
25667
25668 /* saved_regs_mask should contain IP which contains old stack pointer
25669 at the time of activation creation. Since SP and IP are adjacent registers,
25670 we can restore the value directly into SP. */
25671 gcc_assert (saved_regs_mask & (1 << IP_REGNUM));
25672 saved_regs_mask &= ~(1 << IP_REGNUM);
25673 saved_regs_mask |= (1 << SP_REGNUM);
25674
25675 /* There are two registers left in saved_regs_mask - LR and PC. We
25676 only need to restore LR (the return address), but to
25677 save time we can load it directly into PC, unless we need a
25678 special function exit sequence, or we are not really returning. */
25679 if (really_return
25680 && ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL
25681 && !crtl->calls_eh_return)
25682 /* Delete LR from the register mask, so that LR on
25683 the stack is loaded into the PC in the register mask. */
25684 saved_regs_mask &= ~(1 << LR_REGNUM);
25685 else
25686 saved_regs_mask &= ~(1 << PC_REGNUM);
25687
25688 num_regs = bit_count (saved_regs_mask);
25689 if ((offsets->outgoing_args != (1 + num_regs)) || cfun->calls_alloca)
25690 {
25691 rtx_insn *insn;
25692 emit_insn (gen_blockage ());
25693 /* Unwind the stack to just below the saved registers. */
25694 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25695 hard_frame_pointer_rtx,
25696 GEN_INT (- 4 * num_regs)));
25697
25698 arm_add_cfa_adjust_cfa_note (insn, - 4 * num_regs,
25699 stack_pointer_rtx, hard_frame_pointer_rtx);
25700 }
25701
25702 arm_emit_multi_reg_pop (saved_regs_mask);
25703
25704 if (IS_INTERRUPT (func_type))
25705 {
25706 /* Interrupt handlers will have pushed the
25707 IP onto the stack, so restore it now. */
25708 rtx_insn *insn;
25709 rtx addr = gen_rtx_MEM (SImode,
25710 gen_rtx_POST_INC (SImode,
25711 stack_pointer_rtx));
25712 set_mem_alias_set (addr, get_frame_alias_set ());
25713 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, IP_REGNUM), addr));
25714 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25715 gen_rtx_REG (SImode, IP_REGNUM),
25716 NULL_RTX);
25717 }
25718
25719 if (!really_return || (saved_regs_mask & (1 << PC_REGNUM)))
25720 return;
25721
25722 if (crtl->calls_eh_return)
25723 emit_insn (gen_addsi3 (stack_pointer_rtx,
25724 stack_pointer_rtx,
25725 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
25726
25727 if (IS_STACKALIGN (func_type))
25728 /* Restore the original stack pointer. Before prologue, the stack was
25729 realigned and the original stack pointer saved in r0. For details,
25730 see comment in arm_expand_prologue. */
25731 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
25732
25733 emit_jump_insn (simple_return_rtx);
25734 }
25735
25736 /* Generate RTL to represent ARM epilogue. Really_return is true if the
25737 function is not a sibcall. */
25738 void
25739 arm_expand_epilogue (bool really_return)
25740 {
25741 unsigned long func_type;
25742 unsigned long saved_regs_mask;
25743 int num_regs = 0;
25744 int i;
25745 int amount;
25746 arm_stack_offsets *offsets;
25747
25748 func_type = arm_current_func_type ();
25749
25750 /* Naked functions don't have epilogue. Hence, generate return pattern, and
25751 let output_return_instruction take care of instruction emission if any. */
25752 if (IS_NAKED (func_type)
25753 || (IS_VOLATILE (func_type) && TARGET_ABORT_NORETURN))
25754 {
25755 if (really_return)
25756 emit_jump_insn (simple_return_rtx);
25757 return;
25758 }
25759
25760 /* If we are throwing an exception, then we really must be doing a
25761 return, so we can't tail-call. */
25762 gcc_assert (!crtl->calls_eh_return || really_return);
25763
25764 if (TARGET_APCS_FRAME && frame_pointer_needed && TARGET_ARM)
25765 {
25766 arm_expand_epilogue_apcs_frame (really_return);
25767 return;
25768 }
25769
25770 /* Get frame offsets for ARM. */
25771 offsets = arm_get_frame_offsets ();
25772 saved_regs_mask = offsets->saved_regs_mask;
25773 num_regs = bit_count (saved_regs_mask);
25774
25775 if (frame_pointer_needed)
25776 {
25777 rtx_insn *insn;
25778 /* Restore stack pointer if necessary. */
25779 if (TARGET_ARM)
25780 {
25781 /* In ARM mode, frame pointer points to first saved register.
25782 Restore stack pointer to last saved register. */
25783 amount = offsets->frame - offsets->saved_regs;
25784
25785 /* Force out any pending memory operations that reference stacked data
25786 before stack de-allocation occurs. */
25787 emit_insn (gen_blockage ());
25788 insn = emit_insn (gen_addsi3 (stack_pointer_rtx,
25789 hard_frame_pointer_rtx,
25790 GEN_INT (amount)));
25791 arm_add_cfa_adjust_cfa_note (insn, amount,
25792 stack_pointer_rtx,
25793 hard_frame_pointer_rtx);
25794
25795 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25796 deleted. */
25797 emit_insn (gen_force_register_use (stack_pointer_rtx));
25798 }
25799 else
25800 {
25801 /* In Thumb-2 mode, the frame pointer points to the last saved
25802 register. */
25803 amount = offsets->locals_base - offsets->saved_regs;
25804 if (amount)
25805 {
25806 insn = emit_insn (gen_addsi3 (hard_frame_pointer_rtx,
25807 hard_frame_pointer_rtx,
25808 GEN_INT (amount)));
25809 arm_add_cfa_adjust_cfa_note (insn, amount,
25810 hard_frame_pointer_rtx,
25811 hard_frame_pointer_rtx);
25812 }
25813
25814 /* Force out any pending memory operations that reference stacked data
25815 before stack de-allocation occurs. */
25816 emit_insn (gen_blockage ());
25817 insn = emit_insn (gen_movsi (stack_pointer_rtx,
25818 hard_frame_pointer_rtx));
25819 arm_add_cfa_adjust_cfa_note (insn, 0,
25820 stack_pointer_rtx,
25821 hard_frame_pointer_rtx);
25822 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is not
25823 deleted. */
25824 emit_insn (gen_force_register_use (stack_pointer_rtx));
25825 }
25826 }
25827 else
25828 {
25829 /* Pop off outgoing args and local frame to adjust stack pointer to
25830 last saved register. */
25831 amount = offsets->outgoing_args - offsets->saved_regs;
25832 if (amount)
25833 {
25834 rtx_insn *tmp;
25835 /* Force out any pending memory operations that reference stacked data
25836 before stack de-allocation occurs. */
25837 emit_insn (gen_blockage ());
25838 tmp = emit_insn (gen_addsi3 (stack_pointer_rtx,
25839 stack_pointer_rtx,
25840 GEN_INT (amount)));
25841 arm_add_cfa_adjust_cfa_note (tmp, amount,
25842 stack_pointer_rtx, stack_pointer_rtx);
25843 /* Emit USE(stack_pointer_rtx) to ensure that stack adjustment is
25844 not deleted. */
25845 emit_insn (gen_force_register_use (stack_pointer_rtx));
25846 }
25847 }
25848
25849 if (TARGET_HARD_FLOAT)
25850 {
25851 /* Generate VFP register multi-pop. */
25852 int end_reg = LAST_VFP_REGNUM + 1;
25853
25854 /* Scan the registers in reverse order. We need to match
25855 any groupings made in the prologue and generate matching
25856 vldm operations. The need to match groups is because,
25857 unlike pop, vldm can only do consecutive regs. */
25858 for (i = LAST_VFP_REGNUM - 1; i >= FIRST_VFP_REGNUM; i -= 2)
25859 /* Look for a case where a reg does not need restoring. */
25860 if ((!df_regs_ever_live_p (i) || call_used_regs[i])
25861 && (!df_regs_ever_live_p (i + 1)
25862 || call_used_regs[i + 1]))
25863 {
25864 /* Restore the regs discovered so far (from reg+2 to
25865 end_reg). */
25866 if (end_reg > i + 2)
25867 arm_emit_vfp_multi_reg_pop (i + 2,
25868 (end_reg - (i + 2)) / 2,
25869 stack_pointer_rtx);
25870 end_reg = i;
25871 }
25872
25873 /* Restore the remaining regs that we have discovered (or possibly
25874 even all of them, if the conditional in the for loop never
25875 fired). */
25876 if (end_reg > i + 2)
25877 arm_emit_vfp_multi_reg_pop (i + 2,
25878 (end_reg - (i + 2)) / 2,
25879 stack_pointer_rtx);
25880 }
25881
25882 if (TARGET_IWMMXT)
25883 for (i = FIRST_IWMMXT_REGNUM; i <= LAST_IWMMXT_REGNUM; i++)
25884 if (df_regs_ever_live_p (i) && !call_used_regs[i])
25885 {
25886 rtx_insn *insn;
25887 rtx addr = gen_rtx_MEM (V2SImode,
25888 gen_rtx_POST_INC (SImode,
25889 stack_pointer_rtx));
25890 set_mem_alias_set (addr, get_frame_alias_set ());
25891 insn = emit_insn (gen_movsi (gen_rtx_REG (V2SImode, i), addr));
25892 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25893 gen_rtx_REG (V2SImode, i),
25894 NULL_RTX);
25895 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25896 stack_pointer_rtx, stack_pointer_rtx);
25897 }
25898
25899 if (saved_regs_mask)
25900 {
25901 rtx insn;
25902 bool return_in_pc = false;
25903
25904 if (ARM_FUNC_TYPE (func_type) != ARM_FT_INTERWORKED
25905 && (TARGET_ARM || ARM_FUNC_TYPE (func_type) == ARM_FT_NORMAL)
25906 && !IS_CMSE_ENTRY (func_type)
25907 && !IS_STACKALIGN (func_type)
25908 && really_return
25909 && crtl->args.pretend_args_size == 0
25910 && saved_regs_mask & (1 << LR_REGNUM)
25911 && !crtl->calls_eh_return)
25912 {
25913 saved_regs_mask &= ~(1 << LR_REGNUM);
25914 saved_regs_mask |= (1 << PC_REGNUM);
25915 return_in_pc = true;
25916 }
25917
25918 if (num_regs == 1 && (!IS_INTERRUPT (func_type) || !return_in_pc))
25919 {
25920 for (i = 0; i <= LAST_ARM_REGNUM; i++)
25921 if (saved_regs_mask & (1 << i))
25922 {
25923 rtx addr = gen_rtx_MEM (SImode,
25924 gen_rtx_POST_INC (SImode,
25925 stack_pointer_rtx));
25926 set_mem_alias_set (addr, get_frame_alias_set ());
25927
25928 if (i == PC_REGNUM)
25929 {
25930 insn = gen_rtx_PARALLEL (VOIDmode, rtvec_alloc (2));
25931 XVECEXP (insn, 0, 0) = ret_rtx;
25932 XVECEXP (insn, 0, 1) = gen_rtx_SET (gen_rtx_REG (SImode, i),
25933 addr);
25934 RTX_FRAME_RELATED_P (XVECEXP (insn, 0, 1)) = 1;
25935 insn = emit_jump_insn (insn);
25936 }
25937 else
25938 {
25939 insn = emit_insn (gen_movsi (gen_rtx_REG (SImode, i),
25940 addr));
25941 REG_NOTES (insn) = alloc_reg_note (REG_CFA_RESTORE,
25942 gen_rtx_REG (SImode, i),
25943 NULL_RTX);
25944 arm_add_cfa_adjust_cfa_note (insn, UNITS_PER_WORD,
25945 stack_pointer_rtx,
25946 stack_pointer_rtx);
25947 }
25948 }
25949 }
25950 else
25951 {
25952 if (TARGET_LDRD
25953 && current_tune->prefer_ldrd_strd
25954 && !optimize_function_for_size_p (cfun))
25955 {
25956 if (TARGET_THUMB2)
25957 thumb2_emit_ldrd_pop (saved_regs_mask);
25958 else if (TARGET_ARM && !IS_INTERRUPT (func_type))
25959 arm_emit_ldrd_pop (saved_regs_mask);
25960 else
25961 arm_emit_multi_reg_pop (saved_regs_mask);
25962 }
25963 else
25964 arm_emit_multi_reg_pop (saved_regs_mask);
25965 }
25966
25967 if (return_in_pc)
25968 return;
25969 }
25970
25971 amount
25972 = crtl->args.pretend_args_size + arm_compute_static_chain_stack_bytes();
25973 if (amount)
25974 {
25975 int i, j;
25976 rtx dwarf = NULL_RTX;
25977 rtx_insn *tmp =
25978 emit_insn (gen_addsi3 (stack_pointer_rtx,
25979 stack_pointer_rtx,
25980 GEN_INT (amount)));
25981
25982 RTX_FRAME_RELATED_P (tmp) = 1;
25983
25984 if (cfun->machine->uses_anonymous_args)
25985 {
25986 /* Restore pretend args. Refer arm_expand_prologue on how to save
25987 pretend_args in stack. */
25988 int num_regs = crtl->args.pretend_args_size / 4;
25989 saved_regs_mask = (0xf0 >> num_regs) & 0xf;
25990 for (j = 0, i = 0; j < num_regs; i++)
25991 if (saved_regs_mask & (1 << i))
25992 {
25993 rtx reg = gen_rtx_REG (SImode, i);
25994 dwarf = alloc_reg_note (REG_CFA_RESTORE, reg, dwarf);
25995 j++;
25996 }
25997 REG_NOTES (tmp) = dwarf;
25998 }
25999 arm_add_cfa_adjust_cfa_note (tmp, amount,
26000 stack_pointer_rtx, stack_pointer_rtx);
26001 }
26002
26003 /* Clear all caller-saved regs that are not used to return. */
26004 if (IS_CMSE_ENTRY (arm_current_func_type ()))
26005 {
26006 /* CMSE_ENTRY always returns. */
26007 gcc_assert (really_return);
26008 cmse_nonsecure_entry_clear_before_return ();
26009 }
26010
26011 if (!really_return)
26012 return;
26013
26014 if (crtl->calls_eh_return)
26015 emit_insn (gen_addsi3 (stack_pointer_rtx,
26016 stack_pointer_rtx,
26017 gen_rtx_REG (SImode, ARM_EH_STACKADJ_REGNUM)));
26018
26019 if (IS_STACKALIGN (func_type))
26020 /* Restore the original stack pointer. Before prologue, the stack was
26021 realigned and the original stack pointer saved in r0. For details,
26022 see comment in arm_expand_prologue. */
26023 emit_insn (gen_movsi (stack_pointer_rtx, gen_rtx_REG (SImode, R0_REGNUM)));
26024
26025 emit_jump_insn (simple_return_rtx);
26026 }
26027
26028 /* Implementation of insn prologue_thumb1_interwork. This is the first
26029 "instruction" of a function called in ARM mode. Swap to thumb mode. */
26030
26031 const char *
26032 thumb1_output_interwork (void)
26033 {
26034 const char * name;
26035 FILE *f = asm_out_file;
26036
26037 gcc_assert (MEM_P (DECL_RTL (current_function_decl)));
26038 gcc_assert (GET_CODE (XEXP (DECL_RTL (current_function_decl), 0))
26039 == SYMBOL_REF);
26040 name = XSTR (XEXP (DECL_RTL (current_function_decl), 0), 0);
26041
26042 /* Generate code sequence to switch us into Thumb mode. */
26043 /* The .code 32 directive has already been emitted by
26044 ASM_DECLARE_FUNCTION_NAME. */
26045 asm_fprintf (f, "\torr\t%r, %r, #1\n", IP_REGNUM, PC_REGNUM);
26046 asm_fprintf (f, "\tbx\t%r\n", IP_REGNUM);
26047
26048 /* Generate a label, so that the debugger will notice the
26049 change in instruction sets. This label is also used by
26050 the assembler to bypass the ARM code when this function
26051 is called from a Thumb encoded function elsewhere in the
26052 same file. Hence the definition of STUB_NAME here must
26053 agree with the definition in gas/config/tc-arm.c. */
26054
26055 #define STUB_NAME ".real_start_of"
26056
26057 fprintf (f, "\t.code\t16\n");
26058 #ifdef ARM_PE
26059 if (arm_dllexport_name_p (name))
26060 name = arm_strip_name_encoding (name);
26061 #endif
26062 asm_fprintf (f, "\t.globl %s%U%s\n", STUB_NAME, name);
26063 fprintf (f, "\t.thumb_func\n");
26064 asm_fprintf (f, "%s%U%s:\n", STUB_NAME, name);
26065
26066 return "";
26067 }
26068
26069 /* Handle the case of a double word load into a low register from
26070 a computed memory address. The computed address may involve a
26071 register which is overwritten by the load. */
26072 const char *
26073 thumb_load_double_from_address (rtx *operands)
26074 {
26075 rtx addr;
26076 rtx base;
26077 rtx offset;
26078 rtx arg1;
26079 rtx arg2;
26080
26081 gcc_assert (REG_P (operands[0]));
26082 gcc_assert (MEM_P (operands[1]));
26083
26084 /* Get the memory address. */
26085 addr = XEXP (operands[1], 0);
26086
26087 /* Work out how the memory address is computed. */
26088 switch (GET_CODE (addr))
26089 {
26090 case REG:
26091 operands[2] = adjust_address (operands[1], SImode, 4);
26092
26093 if (REGNO (operands[0]) == REGNO (addr))
26094 {
26095 output_asm_insn ("ldr\t%H0, %2", operands);
26096 output_asm_insn ("ldr\t%0, %1", operands);
26097 }
26098 else
26099 {
26100 output_asm_insn ("ldr\t%0, %1", operands);
26101 output_asm_insn ("ldr\t%H0, %2", operands);
26102 }
26103 break;
26104
26105 case CONST:
26106 /* Compute <address> + 4 for the high order load. */
26107 operands[2] = adjust_address (operands[1], SImode, 4);
26108
26109 output_asm_insn ("ldr\t%0, %1", operands);
26110 output_asm_insn ("ldr\t%H0, %2", operands);
26111 break;
26112
26113 case PLUS:
26114 arg1 = XEXP (addr, 0);
26115 arg2 = XEXP (addr, 1);
26116
26117 if (CONSTANT_P (arg1))
26118 base = arg2, offset = arg1;
26119 else
26120 base = arg1, offset = arg2;
26121
26122 gcc_assert (REG_P (base));
26123
26124 /* Catch the case of <address> = <reg> + <reg> */
26125 if (REG_P (offset))
26126 {
26127 int reg_offset = REGNO (offset);
26128 int reg_base = REGNO (base);
26129 int reg_dest = REGNO (operands[0]);
26130
26131 /* Add the base and offset registers together into the
26132 higher destination register. */
26133 asm_fprintf (asm_out_file, "\tadd\t%r, %r, %r",
26134 reg_dest + 1, reg_base, reg_offset);
26135
26136 /* Load the lower destination register from the address in
26137 the higher destination register. */
26138 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #0]",
26139 reg_dest, reg_dest + 1);
26140
26141 /* Load the higher destination register from its own address
26142 plus 4. */
26143 asm_fprintf (asm_out_file, "\tldr\t%r, [%r, #4]",
26144 reg_dest + 1, reg_dest + 1);
26145 }
26146 else
26147 {
26148 /* Compute <address> + 4 for the high order load. */
26149 operands[2] = adjust_address (operands[1], SImode, 4);
26150
26151 /* If the computed address is held in the low order register
26152 then load the high order register first, otherwise always
26153 load the low order register first. */
26154 if (REGNO (operands[0]) == REGNO (base))
26155 {
26156 output_asm_insn ("ldr\t%H0, %2", operands);
26157 output_asm_insn ("ldr\t%0, %1", operands);
26158 }
26159 else
26160 {
26161 output_asm_insn ("ldr\t%0, %1", operands);
26162 output_asm_insn ("ldr\t%H0, %2", operands);
26163 }
26164 }
26165 break;
26166
26167 case LABEL_REF:
26168 /* With no registers to worry about we can just load the value
26169 directly. */
26170 operands[2] = adjust_address (operands[1], SImode, 4);
26171
26172 output_asm_insn ("ldr\t%H0, %2", operands);
26173 output_asm_insn ("ldr\t%0, %1", operands);
26174 break;
26175
26176 default:
26177 gcc_unreachable ();
26178 }
26179
26180 return "";
26181 }
26182
26183 const char *
26184 thumb_output_move_mem_multiple (int n, rtx *operands)
26185 {
26186 switch (n)
26187 {
26188 case 2:
26189 if (REGNO (operands[4]) > REGNO (operands[5]))
26190 std::swap (operands[4], operands[5]);
26191
26192 output_asm_insn ("ldmia\t%1!, {%4, %5}", operands);
26193 output_asm_insn ("stmia\t%0!, {%4, %5}", operands);
26194 break;
26195
26196 case 3:
26197 if (REGNO (operands[4]) > REGNO (operands[5]))
26198 std::swap (operands[4], operands[5]);
26199 if (REGNO (operands[5]) > REGNO (operands[6]))
26200 std::swap (operands[5], operands[6]);
26201 if (REGNO (operands[4]) > REGNO (operands[5]))
26202 std::swap (operands[4], operands[5]);
26203
26204 output_asm_insn ("ldmia\t%1!, {%4, %5, %6}", operands);
26205 output_asm_insn ("stmia\t%0!, {%4, %5, %6}", operands);
26206 break;
26207
26208 default:
26209 gcc_unreachable ();
26210 }
26211
26212 return "";
26213 }
26214
26215 /* Output a call-via instruction for thumb state. */
26216 const char *
26217 thumb_call_via_reg (rtx reg)
26218 {
26219 int regno = REGNO (reg);
26220 rtx *labelp;
26221
26222 gcc_assert (regno < LR_REGNUM);
26223
26224 /* If we are in the normal text section we can use a single instance
26225 per compilation unit. If we are doing function sections, then we need
26226 an entry per section, since we can't rely on reachability. */
26227 if (in_section == text_section)
26228 {
26229 thumb_call_reg_needed = 1;
26230
26231 if (thumb_call_via_label[regno] == NULL)
26232 thumb_call_via_label[regno] = gen_label_rtx ();
26233 labelp = thumb_call_via_label + regno;
26234 }
26235 else
26236 {
26237 if (cfun->machine->call_via[regno] == NULL)
26238 cfun->machine->call_via[regno] = gen_label_rtx ();
26239 labelp = cfun->machine->call_via + regno;
26240 }
26241
26242 output_asm_insn ("bl\t%a0", labelp);
26243 return "";
26244 }
26245
26246 /* Routines for generating rtl. */
26247 void
26248 thumb_expand_movmemqi (rtx *operands)
26249 {
26250 rtx out = copy_to_mode_reg (SImode, XEXP (operands[0], 0));
26251 rtx in = copy_to_mode_reg (SImode, XEXP (operands[1], 0));
26252 HOST_WIDE_INT len = INTVAL (operands[2]);
26253 HOST_WIDE_INT offset = 0;
26254
26255 while (len >= 12)
26256 {
26257 emit_insn (gen_movmem12b (out, in, out, in));
26258 len -= 12;
26259 }
26260
26261 if (len >= 8)
26262 {
26263 emit_insn (gen_movmem8b (out, in, out, in));
26264 len -= 8;
26265 }
26266
26267 if (len >= 4)
26268 {
26269 rtx reg = gen_reg_rtx (SImode);
26270 emit_insn (gen_movsi (reg, gen_rtx_MEM (SImode, in)));
26271 emit_insn (gen_movsi (gen_rtx_MEM (SImode, out), reg));
26272 len -= 4;
26273 offset += 4;
26274 }
26275
26276 if (len >= 2)
26277 {
26278 rtx reg = gen_reg_rtx (HImode);
26279 emit_insn (gen_movhi (reg, gen_rtx_MEM (HImode,
26280 plus_constant (Pmode, in,
26281 offset))));
26282 emit_insn (gen_movhi (gen_rtx_MEM (HImode, plus_constant (Pmode, out,
26283 offset)),
26284 reg));
26285 len -= 2;
26286 offset += 2;
26287 }
26288
26289 if (len)
26290 {
26291 rtx reg = gen_reg_rtx (QImode);
26292 emit_insn (gen_movqi (reg, gen_rtx_MEM (QImode,
26293 plus_constant (Pmode, in,
26294 offset))));
26295 emit_insn (gen_movqi (gen_rtx_MEM (QImode, plus_constant (Pmode, out,
26296 offset)),
26297 reg));
26298 }
26299 }
26300
26301 void
26302 thumb_reload_out_hi (rtx *operands)
26303 {
26304 emit_insn (gen_thumb_movhi_clobber (operands[0], operands[1], operands[2]));
26305 }
26306
26307 /* Return the length of a function name prefix
26308 that starts with the character 'c'. */
26309 static int
26310 arm_get_strip_length (int c)
26311 {
26312 switch (c)
26313 {
26314 ARM_NAME_ENCODING_LENGTHS
26315 default: return 0;
26316 }
26317 }
26318
26319 /* Return a pointer to a function's name with any
26320 and all prefix encodings stripped from it. */
26321 const char *
26322 arm_strip_name_encoding (const char *name)
26323 {
26324 int skip;
26325
26326 while ((skip = arm_get_strip_length (* name)))
26327 name += skip;
26328
26329 return name;
26330 }
26331
26332 /* If there is a '*' anywhere in the name's prefix, then
26333 emit the stripped name verbatim, otherwise prepend an
26334 underscore if leading underscores are being used. */
26335 void
26336 arm_asm_output_labelref (FILE *stream, const char *name)
26337 {
26338 int skip;
26339 int verbatim = 0;
26340
26341 while ((skip = arm_get_strip_length (* name)))
26342 {
26343 verbatim |= (*name == '*');
26344 name += skip;
26345 }
26346
26347 if (verbatim)
26348 fputs (name, stream);
26349 else
26350 asm_fprintf (stream, "%U%s", name);
26351 }
26352
26353 /* This function is used to emit an EABI tag and its associated value.
26354 We emit the numerical value of the tag in case the assembler does not
26355 support textual tags. (Eg gas prior to 2.20). If requested we include
26356 the tag name in a comment so that anyone reading the assembler output
26357 will know which tag is being set.
26358
26359 This function is not static because arm-c.c needs it too. */
26360
26361 void
26362 arm_emit_eabi_attribute (const char *name, int num, int val)
26363 {
26364 asm_fprintf (asm_out_file, "\t.eabi_attribute %d, %d", num, val);
26365 if (flag_verbose_asm || flag_debug_asm)
26366 asm_fprintf (asm_out_file, "\t%s %s", ASM_COMMENT_START, name);
26367 asm_fprintf (asm_out_file, "\n");
26368 }
26369
26370 /* This function is used to print CPU tuning information as comment
26371 in assembler file. Pointers are not printed for now. */
26372
26373 void
26374 arm_print_tune_info (void)
26375 {
26376 asm_fprintf (asm_out_file, "\t" ASM_COMMENT_START ".tune parameters\n");
26377 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "constant_limit:\t%d\n",
26378 current_tune->constant_limit);
26379 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26380 "max_insns_skipped:\t%d\n", current_tune->max_insns_skipped);
26381 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26382 "prefetch.num_slots:\t%d\n", current_tune->prefetch.num_slots);
26383 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26384 "prefetch.l1_cache_size:\t%d\n",
26385 current_tune->prefetch.l1_cache_size);
26386 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26387 "prefetch.l1_cache_line_size:\t%d\n",
26388 current_tune->prefetch.l1_cache_line_size);
26389 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26390 "prefer_constant_pool:\t%d\n",
26391 (int) current_tune->prefer_constant_pool);
26392 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26393 "branch_cost:\t(s:speed, p:predictable)\n");
26394 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\ts&p\tcost\n");
26395 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t00\t%d\n",
26396 current_tune->branch_cost (false, false));
26397 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t01\t%d\n",
26398 current_tune->branch_cost (false, true));
26399 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t10\t%d\n",
26400 current_tune->branch_cost (true, false));
26401 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "\t\t11\t%d\n",
26402 current_tune->branch_cost (true, true));
26403 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26404 "prefer_ldrd_strd:\t%d\n",
26405 (int) current_tune->prefer_ldrd_strd);
26406 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26407 "logical_op_non_short_circuit:\t[%d,%d]\n",
26408 (int) current_tune->logical_op_non_short_circuit_thumb,
26409 (int) current_tune->logical_op_non_short_circuit_arm);
26410 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26411 "prefer_neon_for_64bits:\t%d\n",
26412 (int) current_tune->prefer_neon_for_64bits);
26413 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26414 "disparage_flag_setting_t16_encodings:\t%d\n",
26415 (int) current_tune->disparage_flag_setting_t16_encodings);
26416 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26417 "string_ops_prefer_neon:\t%d\n",
26418 (int) current_tune->string_ops_prefer_neon);
26419 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START
26420 "max_insns_inline_memset:\t%d\n",
26421 current_tune->max_insns_inline_memset);
26422 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "fusible_ops:\t%u\n",
26423 current_tune->fusible_ops);
26424 asm_fprintf (asm_out_file, "\t\t" ASM_COMMENT_START "sched_autopref:\t%d\n",
26425 (int) current_tune->sched_autopref);
26426 }
26427
26428 /* Print .arch and .arch_extension directives corresponding to the
26429 current architecture configuration. */
26430 static void
26431 arm_print_asm_arch_directives ()
26432 {
26433 const arch_option *arch
26434 = arm_parse_arch_option_name (all_architectures, "-march",
26435 arm_active_target.arch_name);
26436 auto_sbitmap opt_bits (isa_num_bits);
26437
26438 gcc_assert (arch);
26439
26440 asm_fprintf (asm_out_file, "\t.arch %s\n", arm_active_target.arch_name);
26441 arm_last_printed_arch_string = arm_active_target.arch_name;
26442 if (!arch->common.extensions)
26443 return;
26444
26445 for (const struct cpu_arch_extension *opt = arch->common.extensions;
26446 opt->name != NULL;
26447 opt++)
26448 {
26449 if (!opt->remove)
26450 {
26451 arm_initialize_isa (opt_bits, opt->isa_bits);
26452
26453 /* If every feature bit of this option is set in the target
26454 ISA specification, print out the option name. However,
26455 don't print anything if all the bits are part of the
26456 FPU specification. */
26457 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
26458 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
26459 asm_fprintf (asm_out_file, "\t.arch_extension %s\n", opt->name);
26460 }
26461 }
26462 }
26463
26464 static void
26465 arm_file_start (void)
26466 {
26467 int val;
26468
26469 if (TARGET_BPABI)
26470 {
26471 /* We don't have a specified CPU. Use the architecture to
26472 generate the tags.
26473
26474 Note: it might be better to do this unconditionally, then the
26475 assembler would not need to know about all new CPU names as
26476 they are added. */
26477 if (!arm_active_target.core_name)
26478 {
26479 /* armv7ve doesn't support any extensions. */
26480 if (strcmp (arm_active_target.arch_name, "armv7ve") == 0)
26481 {
26482 /* Keep backward compatability for assemblers
26483 which don't support armv7ve. */
26484 asm_fprintf (asm_out_file, "\t.arch armv7-a\n");
26485 asm_fprintf (asm_out_file, "\t.arch_extension virt\n");
26486 asm_fprintf (asm_out_file, "\t.arch_extension idiv\n");
26487 asm_fprintf (asm_out_file, "\t.arch_extension sec\n");
26488 asm_fprintf (asm_out_file, "\t.arch_extension mp\n");
26489 arm_last_printed_arch_string = "armv7ve";
26490 }
26491 else
26492 arm_print_asm_arch_directives ();
26493 }
26494 else if (strncmp (arm_active_target.core_name, "generic", 7) == 0)
26495 {
26496 asm_fprintf (asm_out_file, "\t.arch %s\n",
26497 arm_active_target.core_name + 8);
26498 arm_last_printed_arch_string = arm_active_target.core_name + 8;
26499 }
26500 else
26501 {
26502 const char* truncated_name
26503 = arm_rewrite_selected_cpu (arm_active_target.core_name);
26504 asm_fprintf (asm_out_file, "\t.cpu %s\n", truncated_name);
26505 }
26506
26507 if (print_tune_info)
26508 arm_print_tune_info ();
26509
26510 if (! TARGET_SOFT_FLOAT)
26511 {
26512 if (TARGET_HARD_FLOAT && TARGET_VFP_SINGLE)
26513 arm_emit_eabi_attribute ("Tag_ABI_HardFP_use", 27, 1);
26514
26515 if (TARGET_HARD_FLOAT_ABI)
26516 arm_emit_eabi_attribute ("Tag_ABI_VFP_args", 28, 1);
26517 }
26518
26519 /* Some of these attributes only apply when the corresponding features
26520 are used. However we don't have any easy way of figuring this out.
26521 Conservatively record the setting that would have been used. */
26522
26523 if (flag_rounding_math)
26524 arm_emit_eabi_attribute ("Tag_ABI_FP_rounding", 19, 1);
26525
26526 if (!flag_unsafe_math_optimizations)
26527 {
26528 arm_emit_eabi_attribute ("Tag_ABI_FP_denormal", 20, 1);
26529 arm_emit_eabi_attribute ("Tag_ABI_FP_exceptions", 21, 1);
26530 }
26531 if (flag_signaling_nans)
26532 arm_emit_eabi_attribute ("Tag_ABI_FP_user_exceptions", 22, 1);
26533
26534 arm_emit_eabi_attribute ("Tag_ABI_FP_number_model", 23,
26535 flag_finite_math_only ? 1 : 3);
26536
26537 arm_emit_eabi_attribute ("Tag_ABI_align8_needed", 24, 1);
26538 arm_emit_eabi_attribute ("Tag_ABI_align8_preserved", 25, 1);
26539 arm_emit_eabi_attribute ("Tag_ABI_enum_size", 26,
26540 flag_short_enums ? 1 : 2);
26541
26542 /* Tag_ABI_optimization_goals. */
26543 if (optimize_size)
26544 val = 4;
26545 else if (optimize >= 2)
26546 val = 2;
26547 else if (optimize)
26548 val = 1;
26549 else
26550 val = 6;
26551 arm_emit_eabi_attribute ("Tag_ABI_optimization_goals", 30, val);
26552
26553 arm_emit_eabi_attribute ("Tag_CPU_unaligned_access", 34,
26554 unaligned_access);
26555
26556 if (arm_fp16_format)
26557 arm_emit_eabi_attribute ("Tag_ABI_FP_16bit_format", 38,
26558 (int) arm_fp16_format);
26559
26560 if (arm_lang_output_object_attributes_hook)
26561 arm_lang_output_object_attributes_hook();
26562 }
26563
26564 default_file_start ();
26565 }
26566
26567 static void
26568 arm_file_end (void)
26569 {
26570 int regno;
26571
26572 if (NEED_INDICATE_EXEC_STACK)
26573 /* Add .note.GNU-stack. */
26574 file_end_indicate_exec_stack ();
26575
26576 if (! thumb_call_reg_needed)
26577 return;
26578
26579 switch_to_section (text_section);
26580 asm_fprintf (asm_out_file, "\t.code 16\n");
26581 ASM_OUTPUT_ALIGN (asm_out_file, 1);
26582
26583 for (regno = 0; regno < LR_REGNUM; regno++)
26584 {
26585 rtx label = thumb_call_via_label[regno];
26586
26587 if (label != 0)
26588 {
26589 targetm.asm_out.internal_label (asm_out_file, "L",
26590 CODE_LABEL_NUMBER (label));
26591 asm_fprintf (asm_out_file, "\tbx\t%r\n", regno);
26592 }
26593 }
26594 }
26595
26596 #ifndef ARM_PE
26597 /* Symbols in the text segment can be accessed without indirecting via the
26598 constant pool; it may take an extra binary operation, but this is still
26599 faster than indirecting via memory. Don't do this when not optimizing,
26600 since we won't be calculating al of the offsets necessary to do this
26601 simplification. */
26602
26603 static void
26604 arm_encode_section_info (tree decl, rtx rtl, int first)
26605 {
26606 if (optimize > 0 && TREE_CONSTANT (decl))
26607 SYMBOL_REF_FLAG (XEXP (rtl, 0)) = 1;
26608
26609 default_encode_section_info (decl, rtl, first);
26610 }
26611 #endif /* !ARM_PE */
26612
26613 static void
26614 arm_internal_label (FILE *stream, const char *prefix, unsigned long labelno)
26615 {
26616 if (arm_ccfsm_state == 3 && (unsigned) arm_target_label == labelno
26617 && !strcmp (prefix, "L"))
26618 {
26619 arm_ccfsm_state = 0;
26620 arm_target_insn = NULL;
26621 }
26622 default_internal_label (stream, prefix, labelno);
26623 }
26624
26625 /* Output code to add DELTA to the first argument, and then jump
26626 to FUNCTION. Used for C++ multiple inheritance. */
26627
26628 static void
26629 arm_thumb1_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26630 HOST_WIDE_INT, tree function)
26631 {
26632 static int thunk_label = 0;
26633 char label[256];
26634 char labelpc[256];
26635 int mi_delta = delta;
26636 const char *const mi_op = mi_delta < 0 ? "sub" : "add";
26637 int shift = 0;
26638 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)), function)
26639 ? 1 : 0);
26640 if (mi_delta < 0)
26641 mi_delta = - mi_delta;
26642
26643 final_start_function (emit_barrier (), file, 1);
26644
26645 if (TARGET_THUMB1)
26646 {
26647 int labelno = thunk_label++;
26648 ASM_GENERATE_INTERNAL_LABEL (label, "LTHUMBFUNC", labelno);
26649 /* Thunks are entered in arm mode when available. */
26650 if (TARGET_THUMB1_ONLY)
26651 {
26652 /* push r3 so we can use it as a temporary. */
26653 /* TODO: Omit this save if r3 is not used. */
26654 fputs ("\tpush {r3}\n", file);
26655 fputs ("\tldr\tr3, ", file);
26656 }
26657 else
26658 {
26659 fputs ("\tldr\tr12, ", file);
26660 }
26661 assemble_name (file, label);
26662 fputc ('\n', file);
26663 if (flag_pic)
26664 {
26665 /* If we are generating PIC, the ldr instruction below loads
26666 "(target - 7) - .LTHUNKPCn" into r12. The pc reads as
26667 the address of the add + 8, so we have:
26668
26669 r12 = (target - 7) - .LTHUNKPCn + (.LTHUNKPCn + 8)
26670 = target + 1.
26671
26672 Note that we have "+ 1" because some versions of GNU ld
26673 don't set the low bit of the result for R_ARM_REL32
26674 relocations against thumb function symbols.
26675 On ARMv6M this is +4, not +8. */
26676 ASM_GENERATE_INTERNAL_LABEL (labelpc, "LTHUNKPC", labelno);
26677 assemble_name (file, labelpc);
26678 fputs (":\n", file);
26679 if (TARGET_THUMB1_ONLY)
26680 {
26681 /* This is 2 insns after the start of the thunk, so we know it
26682 is 4-byte aligned. */
26683 fputs ("\tadd\tr3, pc, r3\n", file);
26684 fputs ("\tmov r12, r3\n", file);
26685 }
26686 else
26687 fputs ("\tadd\tr12, pc, r12\n", file);
26688 }
26689 else if (TARGET_THUMB1_ONLY)
26690 fputs ("\tmov r12, r3\n", file);
26691 }
26692 if (TARGET_THUMB1_ONLY)
26693 {
26694 if (mi_delta > 255)
26695 {
26696 fputs ("\tldr\tr3, ", file);
26697 assemble_name (file, label);
26698 fputs ("+4\n", file);
26699 asm_fprintf (file, "\t%ss\t%r, %r, r3\n",
26700 mi_op, this_regno, this_regno);
26701 }
26702 else if (mi_delta != 0)
26703 {
26704 /* Thumb1 unified syntax requires s suffix in instruction name when
26705 one of the operands is immediate. */
26706 asm_fprintf (file, "\t%ss\t%r, %r, #%d\n",
26707 mi_op, this_regno, this_regno,
26708 mi_delta);
26709 }
26710 }
26711 else
26712 {
26713 /* TODO: Use movw/movt for large constants when available. */
26714 while (mi_delta != 0)
26715 {
26716 if ((mi_delta & (3 << shift)) == 0)
26717 shift += 2;
26718 else
26719 {
26720 asm_fprintf (file, "\t%s\t%r, %r, #%d\n",
26721 mi_op, this_regno, this_regno,
26722 mi_delta & (0xff << shift));
26723 mi_delta &= ~(0xff << shift);
26724 shift += 8;
26725 }
26726 }
26727 }
26728 if (TARGET_THUMB1)
26729 {
26730 if (TARGET_THUMB1_ONLY)
26731 fputs ("\tpop\t{r3}\n", file);
26732
26733 fprintf (file, "\tbx\tr12\n");
26734 ASM_OUTPUT_ALIGN (file, 2);
26735 assemble_name (file, label);
26736 fputs (":\n", file);
26737 if (flag_pic)
26738 {
26739 /* Output ".word .LTHUNKn-[3,7]-.LTHUNKPCn". */
26740 rtx tem = XEXP (DECL_RTL (function), 0);
26741 /* For TARGET_THUMB1_ONLY the thunk is in Thumb mode, so the PC
26742 pipeline offset is four rather than eight. Adjust the offset
26743 accordingly. */
26744 tem = plus_constant (GET_MODE (tem), tem,
26745 TARGET_THUMB1_ONLY ? -3 : -7);
26746 tem = gen_rtx_MINUS (GET_MODE (tem),
26747 tem,
26748 gen_rtx_SYMBOL_REF (Pmode,
26749 ggc_strdup (labelpc)));
26750 assemble_integer (tem, 4, BITS_PER_WORD, 1);
26751 }
26752 else
26753 /* Output ".word .LTHUNKn". */
26754 assemble_integer (XEXP (DECL_RTL (function), 0), 4, BITS_PER_WORD, 1);
26755
26756 if (TARGET_THUMB1_ONLY && mi_delta > 255)
26757 assemble_integer (GEN_INT(mi_delta), 4, BITS_PER_WORD, 1);
26758 }
26759 else
26760 {
26761 fputs ("\tb\t", file);
26762 assemble_name (file, XSTR (XEXP (DECL_RTL (function), 0), 0));
26763 if (NEED_PLT_RELOC)
26764 fputs ("(PLT)", file);
26765 fputc ('\n', file);
26766 }
26767
26768 final_end_function ();
26769 }
26770
26771 /* MI thunk handling for TARGET_32BIT. */
26772
26773 static void
26774 arm32_output_mi_thunk (FILE *file, tree, HOST_WIDE_INT delta,
26775 HOST_WIDE_INT vcall_offset, tree function)
26776 {
26777 const bool long_call_p = arm_is_long_call_p (function);
26778
26779 /* On ARM, this_regno is R0 or R1 depending on
26780 whether the function returns an aggregate or not.
26781 */
26782 int this_regno = (aggregate_value_p (TREE_TYPE (TREE_TYPE (function)),
26783 function)
26784 ? R1_REGNUM : R0_REGNUM);
26785
26786 rtx temp = gen_rtx_REG (Pmode, IP_REGNUM);
26787 rtx this_rtx = gen_rtx_REG (Pmode, this_regno);
26788 reload_completed = 1;
26789 emit_note (NOTE_INSN_PROLOGUE_END);
26790
26791 /* Add DELTA to THIS_RTX. */
26792 if (delta != 0)
26793 arm_split_constant (PLUS, Pmode, NULL_RTX,
26794 delta, this_rtx, this_rtx, false);
26795
26796 /* Add *(*THIS_RTX + VCALL_OFFSET) to THIS_RTX. */
26797 if (vcall_offset != 0)
26798 {
26799 /* Load *THIS_RTX. */
26800 emit_move_insn (temp, gen_rtx_MEM (Pmode, this_rtx));
26801 /* Compute *THIS_RTX + VCALL_OFFSET. */
26802 arm_split_constant (PLUS, Pmode, NULL_RTX, vcall_offset, temp, temp,
26803 false);
26804 /* Compute *(*THIS_RTX + VCALL_OFFSET). */
26805 emit_move_insn (temp, gen_rtx_MEM (Pmode, temp));
26806 emit_insn (gen_add3_insn (this_rtx, this_rtx, temp));
26807 }
26808
26809 /* Generate a tail call to the target function. */
26810 if (!TREE_USED (function))
26811 {
26812 assemble_external (function);
26813 TREE_USED (function) = 1;
26814 }
26815 rtx funexp = XEXP (DECL_RTL (function), 0);
26816 if (long_call_p)
26817 {
26818 emit_move_insn (temp, funexp);
26819 funexp = temp;
26820 }
26821 funexp = gen_rtx_MEM (FUNCTION_MODE, funexp);
26822 rtx_insn *insn = emit_call_insn (gen_sibcall (funexp, const0_rtx, NULL_RTX));
26823 SIBLING_CALL_P (insn) = 1;
26824 emit_barrier ();
26825
26826 /* Indirect calls require a bit of fixup in PIC mode. */
26827 if (long_call_p)
26828 {
26829 split_all_insns_noflow ();
26830 arm_reorg ();
26831 }
26832
26833 insn = get_insns ();
26834 shorten_branches (insn);
26835 final_start_function (insn, file, 1);
26836 final (insn, file, 1);
26837 final_end_function ();
26838
26839 /* Stop pretending this is a post-reload pass. */
26840 reload_completed = 0;
26841 }
26842
26843 /* Output code to add DELTA to the first argument, and then jump
26844 to FUNCTION. Used for C++ multiple inheritance. */
26845
26846 static void
26847 arm_output_mi_thunk (FILE *file, tree thunk, HOST_WIDE_INT delta,
26848 HOST_WIDE_INT vcall_offset, tree function)
26849 {
26850 if (TARGET_32BIT)
26851 arm32_output_mi_thunk (file, thunk, delta, vcall_offset, function);
26852 else
26853 arm_thumb1_mi_thunk (file, thunk, delta, vcall_offset, function);
26854 }
26855
26856 int
26857 arm_emit_vector_const (FILE *file, rtx x)
26858 {
26859 int i;
26860 const char * pattern;
26861
26862 gcc_assert (GET_CODE (x) == CONST_VECTOR);
26863
26864 switch (GET_MODE (x))
26865 {
26866 case E_V2SImode: pattern = "%08x"; break;
26867 case E_V4HImode: pattern = "%04x"; break;
26868 case E_V8QImode: pattern = "%02x"; break;
26869 default: gcc_unreachable ();
26870 }
26871
26872 fprintf (file, "0x");
26873 for (i = CONST_VECTOR_NUNITS (x); i--;)
26874 {
26875 rtx element;
26876
26877 element = CONST_VECTOR_ELT (x, i);
26878 fprintf (file, pattern, INTVAL (element));
26879 }
26880
26881 return 1;
26882 }
26883
26884 /* Emit a fp16 constant appropriately padded to occupy a 4-byte word.
26885 HFmode constant pool entries are actually loaded with ldr. */
26886 void
26887 arm_emit_fp16_const (rtx c)
26888 {
26889 long bits;
26890
26891 bits = real_to_target (NULL, CONST_DOUBLE_REAL_VALUE (c), HFmode);
26892 if (WORDS_BIG_ENDIAN)
26893 assemble_zeros (2);
26894 assemble_integer (GEN_INT (bits), 2, BITS_PER_WORD, 1);
26895 if (!WORDS_BIG_ENDIAN)
26896 assemble_zeros (2);
26897 }
26898
26899 const char *
26900 arm_output_load_gr (rtx *operands)
26901 {
26902 rtx reg;
26903 rtx offset;
26904 rtx wcgr;
26905 rtx sum;
26906
26907 if (!MEM_P (operands [1])
26908 || GET_CODE (sum = XEXP (operands [1], 0)) != PLUS
26909 || !REG_P (reg = XEXP (sum, 0))
26910 || !CONST_INT_P (offset = XEXP (sum, 1))
26911 || ((INTVAL (offset) < 1024) && (INTVAL (offset) > -1024)))
26912 return "wldrw%?\t%0, %1";
26913
26914 /* Fix up an out-of-range load of a GR register. */
26915 output_asm_insn ("str%?\t%0, [sp, #-4]!\t@ Start of GR load expansion", & reg);
26916 wcgr = operands[0];
26917 operands[0] = reg;
26918 output_asm_insn ("ldr%?\t%0, %1", operands);
26919
26920 operands[0] = wcgr;
26921 operands[1] = reg;
26922 output_asm_insn ("tmcr%?\t%0, %1", operands);
26923 output_asm_insn ("ldr%?\t%0, [sp], #4\t@ End of GR load expansion", & reg);
26924
26925 return "";
26926 }
26927
26928 /* Worker function for TARGET_SETUP_INCOMING_VARARGS.
26929
26930 On the ARM, PRETEND_SIZE is set in order to have the prologue push the last
26931 named arg and all anonymous args onto the stack.
26932 XXX I know the prologue shouldn't be pushing registers, but it is faster
26933 that way. */
26934
26935 static void
26936 arm_setup_incoming_varargs (cumulative_args_t pcum_v,
26937 machine_mode mode,
26938 tree type,
26939 int *pretend_size,
26940 int second_time ATTRIBUTE_UNUSED)
26941 {
26942 CUMULATIVE_ARGS *pcum = get_cumulative_args (pcum_v);
26943 int nregs;
26944
26945 cfun->machine->uses_anonymous_args = 1;
26946 if (pcum->pcs_variant <= ARM_PCS_AAPCS_LOCAL)
26947 {
26948 nregs = pcum->aapcs_ncrn;
26949 if (nregs & 1)
26950 {
26951 int res = arm_needs_doubleword_align (mode, type);
26952 if (res < 0 && warn_psabi)
26953 inform (input_location, "parameter passing for argument of "
26954 "type %qT changed in GCC 7.1", type);
26955 else if (res > 0)
26956 nregs++;
26957 }
26958 }
26959 else
26960 nregs = pcum->nregs;
26961
26962 if (nregs < NUM_ARG_REGS)
26963 *pretend_size = (NUM_ARG_REGS - nregs) * UNITS_PER_WORD;
26964 }
26965
26966 /* We can't rely on the caller doing the proper promotion when
26967 using APCS or ATPCS. */
26968
26969 static bool
26970 arm_promote_prototypes (const_tree t ATTRIBUTE_UNUSED)
26971 {
26972 return !TARGET_AAPCS_BASED;
26973 }
26974
26975 static machine_mode
26976 arm_promote_function_mode (const_tree type ATTRIBUTE_UNUSED,
26977 machine_mode mode,
26978 int *punsignedp ATTRIBUTE_UNUSED,
26979 const_tree fntype ATTRIBUTE_UNUSED,
26980 int for_return ATTRIBUTE_UNUSED)
26981 {
26982 if (GET_MODE_CLASS (mode) == MODE_INT
26983 && GET_MODE_SIZE (mode) < 4)
26984 return SImode;
26985
26986 return mode;
26987 }
26988
26989
26990 static bool
26991 arm_default_short_enums (void)
26992 {
26993 return ARM_DEFAULT_SHORT_ENUMS;
26994 }
26995
26996
26997 /* AAPCS requires that anonymous bitfields affect structure alignment. */
26998
26999 static bool
27000 arm_align_anon_bitfield (void)
27001 {
27002 return TARGET_AAPCS_BASED;
27003 }
27004
27005
27006 /* The generic C++ ABI says 64-bit (long long). The EABI says 32-bit. */
27007
27008 static tree
27009 arm_cxx_guard_type (void)
27010 {
27011 return TARGET_AAPCS_BASED ? integer_type_node : long_long_integer_type_node;
27012 }
27013
27014
27015 /* The EABI says test the least significant bit of a guard variable. */
27016
27017 static bool
27018 arm_cxx_guard_mask_bit (void)
27019 {
27020 return TARGET_AAPCS_BASED;
27021 }
27022
27023
27024 /* The EABI specifies that all array cookies are 8 bytes long. */
27025
27026 static tree
27027 arm_get_cookie_size (tree type)
27028 {
27029 tree size;
27030
27031 if (!TARGET_AAPCS_BASED)
27032 return default_cxx_get_cookie_size (type);
27033
27034 size = build_int_cst (sizetype, 8);
27035 return size;
27036 }
27037
27038
27039 /* The EABI says that array cookies should also contain the element size. */
27040
27041 static bool
27042 arm_cookie_has_size (void)
27043 {
27044 return TARGET_AAPCS_BASED;
27045 }
27046
27047
27048 /* The EABI says constructors and destructors should return a pointer to
27049 the object constructed/destroyed. */
27050
27051 static bool
27052 arm_cxx_cdtor_returns_this (void)
27053 {
27054 return TARGET_AAPCS_BASED;
27055 }
27056
27057 /* The EABI says that an inline function may never be the key
27058 method. */
27059
27060 static bool
27061 arm_cxx_key_method_may_be_inline (void)
27062 {
27063 return !TARGET_AAPCS_BASED;
27064 }
27065
27066 static void
27067 arm_cxx_determine_class_data_visibility (tree decl)
27068 {
27069 if (!TARGET_AAPCS_BASED
27070 || !TARGET_DLLIMPORT_DECL_ATTRIBUTES)
27071 return;
27072
27073 /* In general, \S 3.2.5.5 of the ARM EABI requires that class data
27074 is exported. However, on systems without dynamic vague linkage,
27075 \S 3.2.5.6 says that COMDAT class data has hidden linkage. */
27076 if (!TARGET_ARM_DYNAMIC_VAGUE_LINKAGE_P && DECL_COMDAT (decl))
27077 DECL_VISIBILITY (decl) = VISIBILITY_HIDDEN;
27078 else
27079 DECL_VISIBILITY (decl) = VISIBILITY_DEFAULT;
27080 DECL_VISIBILITY_SPECIFIED (decl) = 1;
27081 }
27082
27083 static bool
27084 arm_cxx_class_data_always_comdat (void)
27085 {
27086 /* \S 3.2.5.4 of the ARM C++ ABI says that class data only have
27087 vague linkage if the class has no key function. */
27088 return !TARGET_AAPCS_BASED;
27089 }
27090
27091
27092 /* The EABI says __aeabi_atexit should be used to register static
27093 destructors. */
27094
27095 static bool
27096 arm_cxx_use_aeabi_atexit (void)
27097 {
27098 return TARGET_AAPCS_BASED;
27099 }
27100
27101
27102 void
27103 arm_set_return_address (rtx source, rtx scratch)
27104 {
27105 arm_stack_offsets *offsets;
27106 HOST_WIDE_INT delta;
27107 rtx addr, mem;
27108 unsigned long saved_regs;
27109
27110 offsets = arm_get_frame_offsets ();
27111 saved_regs = offsets->saved_regs_mask;
27112
27113 if ((saved_regs & (1 << LR_REGNUM)) == 0)
27114 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27115 else
27116 {
27117 if (frame_pointer_needed)
27118 addr = plus_constant (Pmode, hard_frame_pointer_rtx, -4);
27119 else
27120 {
27121 /* LR will be the first saved register. */
27122 delta = offsets->outgoing_args - (offsets->frame + 4);
27123
27124
27125 if (delta >= 4096)
27126 {
27127 emit_insn (gen_addsi3 (scratch, stack_pointer_rtx,
27128 GEN_INT (delta & ~4095)));
27129 addr = scratch;
27130 delta &= 4095;
27131 }
27132 else
27133 addr = stack_pointer_rtx;
27134
27135 addr = plus_constant (Pmode, addr, delta);
27136 }
27137
27138 /* The store needs to be marked to prevent DSE from deleting
27139 it as dead if it is based on fp. */
27140 mem = gen_frame_mem (Pmode, addr);
27141 MEM_VOLATILE_P (mem) = true;
27142 emit_move_insn (mem, source);
27143 }
27144 }
27145
27146
27147 void
27148 thumb_set_return_address (rtx source, rtx scratch)
27149 {
27150 arm_stack_offsets *offsets;
27151 HOST_WIDE_INT delta;
27152 HOST_WIDE_INT limit;
27153 int reg;
27154 rtx addr, mem;
27155 unsigned long mask;
27156
27157 emit_use (source);
27158
27159 offsets = arm_get_frame_offsets ();
27160 mask = offsets->saved_regs_mask;
27161 if (mask & (1 << LR_REGNUM))
27162 {
27163 limit = 1024;
27164 /* Find the saved regs. */
27165 if (frame_pointer_needed)
27166 {
27167 delta = offsets->soft_frame - offsets->saved_args;
27168 reg = THUMB_HARD_FRAME_POINTER_REGNUM;
27169 if (TARGET_THUMB1)
27170 limit = 128;
27171 }
27172 else
27173 {
27174 delta = offsets->outgoing_args - offsets->saved_args;
27175 reg = SP_REGNUM;
27176 }
27177 /* Allow for the stack frame. */
27178 if (TARGET_THUMB1 && TARGET_BACKTRACE)
27179 delta -= 16;
27180 /* The link register is always the first saved register. */
27181 delta -= 4;
27182
27183 /* Construct the address. */
27184 addr = gen_rtx_REG (SImode, reg);
27185 if (delta > limit)
27186 {
27187 emit_insn (gen_movsi (scratch, GEN_INT (delta)));
27188 emit_insn (gen_addsi3 (scratch, scratch, stack_pointer_rtx));
27189 addr = scratch;
27190 }
27191 else
27192 addr = plus_constant (Pmode, addr, delta);
27193
27194 /* The store needs to be marked to prevent DSE from deleting
27195 it as dead if it is based on fp. */
27196 mem = gen_frame_mem (Pmode, addr);
27197 MEM_VOLATILE_P (mem) = true;
27198 emit_move_insn (mem, source);
27199 }
27200 else
27201 emit_move_insn (gen_rtx_REG (Pmode, LR_REGNUM), source);
27202 }
27203
27204 /* Implements target hook vector_mode_supported_p. */
27205 bool
27206 arm_vector_mode_supported_p (machine_mode mode)
27207 {
27208 /* Neon also supports V2SImode, etc. listed in the clause below. */
27209 if (TARGET_NEON && (mode == V2SFmode || mode == V4SImode || mode == V8HImode
27210 || mode == V4HFmode || mode == V16QImode || mode == V4SFmode
27211 || mode == V2DImode || mode == V8HFmode))
27212 return true;
27213
27214 if ((TARGET_NEON || TARGET_IWMMXT)
27215 && ((mode == V2SImode)
27216 || (mode == V4HImode)
27217 || (mode == V8QImode)))
27218 return true;
27219
27220 if (TARGET_INT_SIMD && (mode == V4UQQmode || mode == V4QQmode
27221 || mode == V2UHQmode || mode == V2HQmode || mode == V2UHAmode
27222 || mode == V2HAmode))
27223 return true;
27224
27225 return false;
27226 }
27227
27228 /* Implements target hook array_mode_supported_p. */
27229
27230 static bool
27231 arm_array_mode_supported_p (machine_mode mode,
27232 unsigned HOST_WIDE_INT nelems)
27233 {
27234 /* We don't want to enable interleaved loads and stores for BYTES_BIG_ENDIAN
27235 for now, as the lane-swapping logic needs to be extended in the expanders.
27236 See PR target/82518. */
27237 if (TARGET_NEON && !BYTES_BIG_ENDIAN
27238 && (VALID_NEON_DREG_MODE (mode) || VALID_NEON_QREG_MODE (mode))
27239 && (nelems >= 2 && nelems <= 4))
27240 return true;
27241
27242 return false;
27243 }
27244
27245 /* Use the option -mvectorize-with-neon-double to override the use of quardword
27246 registers when autovectorizing for Neon, at least until multiple vector
27247 widths are supported properly by the middle-end. */
27248
27249 static machine_mode
27250 arm_preferred_simd_mode (scalar_mode mode)
27251 {
27252 if (TARGET_NEON)
27253 switch (mode)
27254 {
27255 case E_SFmode:
27256 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SFmode : V4SFmode;
27257 case E_SImode:
27258 return TARGET_NEON_VECTORIZE_DOUBLE ? V2SImode : V4SImode;
27259 case E_HImode:
27260 return TARGET_NEON_VECTORIZE_DOUBLE ? V4HImode : V8HImode;
27261 case E_QImode:
27262 return TARGET_NEON_VECTORIZE_DOUBLE ? V8QImode : V16QImode;
27263 case E_DImode:
27264 if (!TARGET_NEON_VECTORIZE_DOUBLE)
27265 return V2DImode;
27266 break;
27267
27268 default:;
27269 }
27270
27271 if (TARGET_REALLY_IWMMXT)
27272 switch (mode)
27273 {
27274 case E_SImode:
27275 return V2SImode;
27276 case E_HImode:
27277 return V4HImode;
27278 case E_QImode:
27279 return V8QImode;
27280
27281 default:;
27282 }
27283
27284 return word_mode;
27285 }
27286
27287 /* Implement TARGET_CLASS_LIKELY_SPILLED_P.
27288
27289 We need to define this for LO_REGS on Thumb-1. Otherwise we can end up
27290 using r0-r4 for function arguments, r7 for the stack frame and don't have
27291 enough left over to do doubleword arithmetic. For Thumb-2 all the
27292 potentially problematic instructions accept high registers so this is not
27293 necessary. Care needs to be taken to avoid adding new Thumb-2 patterns
27294 that require many low registers. */
27295 static bool
27296 arm_class_likely_spilled_p (reg_class_t rclass)
27297 {
27298 if ((TARGET_THUMB1 && rclass == LO_REGS)
27299 || rclass == CC_REG)
27300 return true;
27301
27302 return false;
27303 }
27304
27305 /* Implements target hook small_register_classes_for_mode_p. */
27306 bool
27307 arm_small_register_classes_for_mode_p (machine_mode mode ATTRIBUTE_UNUSED)
27308 {
27309 return TARGET_THUMB1;
27310 }
27311
27312 /* Implement TARGET_SHIFT_TRUNCATION_MASK. SImode shifts use normal
27313 ARM insns and therefore guarantee that the shift count is modulo 256.
27314 DImode shifts (those implemented by lib1funcs.S or by optabs.c)
27315 guarantee no particular behavior for out-of-range counts. */
27316
27317 static unsigned HOST_WIDE_INT
27318 arm_shift_truncation_mask (machine_mode mode)
27319 {
27320 return mode == SImode ? 255 : 0;
27321 }
27322
27323
27324 /* Map internal gcc register numbers to DWARF2 register numbers. */
27325
27326 unsigned int
27327 arm_dbx_register_number (unsigned int regno)
27328 {
27329 if (regno < 16)
27330 return regno;
27331
27332 if (IS_VFP_REGNUM (regno))
27333 {
27334 /* See comment in arm_dwarf_register_span. */
27335 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27336 return 64 + regno - FIRST_VFP_REGNUM;
27337 else
27338 return 256 + (regno - FIRST_VFP_REGNUM) / 2;
27339 }
27340
27341 if (IS_IWMMXT_GR_REGNUM (regno))
27342 return 104 + regno - FIRST_IWMMXT_GR_REGNUM;
27343
27344 if (IS_IWMMXT_REGNUM (regno))
27345 return 112 + regno - FIRST_IWMMXT_REGNUM;
27346
27347 return DWARF_FRAME_REGISTERS;
27348 }
27349
27350 /* Dwarf models VFPv3 registers as 32 64-bit registers.
27351 GCC models tham as 64 32-bit registers, so we need to describe this to
27352 the DWARF generation code. Other registers can use the default. */
27353 static rtx
27354 arm_dwarf_register_span (rtx rtl)
27355 {
27356 machine_mode mode;
27357 unsigned regno;
27358 rtx parts[16];
27359 int nregs;
27360 int i;
27361
27362 regno = REGNO (rtl);
27363 if (!IS_VFP_REGNUM (regno))
27364 return NULL_RTX;
27365
27366 /* XXX FIXME: The EABI defines two VFP register ranges:
27367 64-95: Legacy VFPv2 numbering for S0-S31 (obsolescent)
27368 256-287: D0-D31
27369 The recommended encoding for S0-S31 is a DW_OP_bit_piece of the
27370 corresponding D register. Until GDB supports this, we shall use the
27371 legacy encodings. We also use these encodings for D0-D15 for
27372 compatibility with older debuggers. */
27373 mode = GET_MODE (rtl);
27374 if (GET_MODE_SIZE (mode) < 8)
27375 return NULL_RTX;
27376
27377 if (VFP_REGNO_OK_FOR_SINGLE (regno))
27378 {
27379 nregs = GET_MODE_SIZE (mode) / 4;
27380 for (i = 0; i < nregs; i += 2)
27381 if (TARGET_BIG_END)
27382 {
27383 parts[i] = gen_rtx_REG (SImode, regno + i + 1);
27384 parts[i + 1] = gen_rtx_REG (SImode, regno + i);
27385 }
27386 else
27387 {
27388 parts[i] = gen_rtx_REG (SImode, regno + i);
27389 parts[i + 1] = gen_rtx_REG (SImode, regno + i + 1);
27390 }
27391 }
27392 else
27393 {
27394 nregs = GET_MODE_SIZE (mode) / 8;
27395 for (i = 0; i < nregs; i++)
27396 parts[i] = gen_rtx_REG (DImode, regno + i);
27397 }
27398
27399 return gen_rtx_PARALLEL (VOIDmode, gen_rtvec_v (nregs , parts));
27400 }
27401
27402 #if ARM_UNWIND_INFO
27403 /* Emit unwind directives for a store-multiple instruction or stack pointer
27404 push during alignment.
27405 These should only ever be generated by the function prologue code, so
27406 expect them to have a particular form.
27407 The store-multiple instruction sometimes pushes pc as the last register,
27408 although it should not be tracked into unwind information, or for -Os
27409 sometimes pushes some dummy registers before first register that needs
27410 to be tracked in unwind information; such dummy registers are there just
27411 to avoid separate stack adjustment, and will not be restored in the
27412 epilogue. */
27413
27414 static void
27415 arm_unwind_emit_sequence (FILE * asm_out_file, rtx p)
27416 {
27417 int i;
27418 HOST_WIDE_INT offset;
27419 HOST_WIDE_INT nregs;
27420 int reg_size;
27421 unsigned reg;
27422 unsigned lastreg;
27423 unsigned padfirst = 0, padlast = 0;
27424 rtx e;
27425
27426 e = XVECEXP (p, 0, 0);
27427 gcc_assert (GET_CODE (e) == SET);
27428
27429 /* First insn will adjust the stack pointer. */
27430 gcc_assert (GET_CODE (e) == SET
27431 && REG_P (SET_DEST (e))
27432 && REGNO (SET_DEST (e)) == SP_REGNUM
27433 && GET_CODE (SET_SRC (e)) == PLUS);
27434
27435 offset = -INTVAL (XEXP (SET_SRC (e), 1));
27436 nregs = XVECLEN (p, 0) - 1;
27437 gcc_assert (nregs);
27438
27439 reg = REGNO (SET_SRC (XVECEXP (p, 0, 1)));
27440 if (reg < 16)
27441 {
27442 /* For -Os dummy registers can be pushed at the beginning to
27443 avoid separate stack pointer adjustment. */
27444 e = XVECEXP (p, 0, 1);
27445 e = XEXP (SET_DEST (e), 0);
27446 if (GET_CODE (e) == PLUS)
27447 padfirst = INTVAL (XEXP (e, 1));
27448 gcc_assert (padfirst == 0 || optimize_size);
27449 /* The function prologue may also push pc, but not annotate it as it is
27450 never restored. We turn this into a stack pointer adjustment. */
27451 e = XVECEXP (p, 0, nregs);
27452 e = XEXP (SET_DEST (e), 0);
27453 if (GET_CODE (e) == PLUS)
27454 padlast = offset - INTVAL (XEXP (e, 1)) - 4;
27455 else
27456 padlast = offset - 4;
27457 gcc_assert (padlast == 0 || padlast == 4);
27458 if (padlast == 4)
27459 fprintf (asm_out_file, "\t.pad #4\n");
27460 reg_size = 4;
27461 fprintf (asm_out_file, "\t.save {");
27462 }
27463 else if (IS_VFP_REGNUM (reg))
27464 {
27465 reg_size = 8;
27466 fprintf (asm_out_file, "\t.vsave {");
27467 }
27468 else
27469 /* Unknown register type. */
27470 gcc_unreachable ();
27471
27472 /* If the stack increment doesn't match the size of the saved registers,
27473 something has gone horribly wrong. */
27474 gcc_assert (offset == padfirst + nregs * reg_size + padlast);
27475
27476 offset = padfirst;
27477 lastreg = 0;
27478 /* The remaining insns will describe the stores. */
27479 for (i = 1; i <= nregs; i++)
27480 {
27481 /* Expect (set (mem <addr>) (reg)).
27482 Where <addr> is (reg:SP) or (plus (reg:SP) (const_int)). */
27483 e = XVECEXP (p, 0, i);
27484 gcc_assert (GET_CODE (e) == SET
27485 && MEM_P (SET_DEST (e))
27486 && REG_P (SET_SRC (e)));
27487
27488 reg = REGNO (SET_SRC (e));
27489 gcc_assert (reg >= lastreg);
27490
27491 if (i != 1)
27492 fprintf (asm_out_file, ", ");
27493 /* We can't use %r for vfp because we need to use the
27494 double precision register names. */
27495 if (IS_VFP_REGNUM (reg))
27496 asm_fprintf (asm_out_file, "d%d", (reg - FIRST_VFP_REGNUM) / 2);
27497 else
27498 asm_fprintf (asm_out_file, "%r", reg);
27499
27500 if (flag_checking)
27501 {
27502 /* Check that the addresses are consecutive. */
27503 e = XEXP (SET_DEST (e), 0);
27504 if (GET_CODE (e) == PLUS)
27505 gcc_assert (REG_P (XEXP (e, 0))
27506 && REGNO (XEXP (e, 0)) == SP_REGNUM
27507 && CONST_INT_P (XEXP (e, 1))
27508 && offset == INTVAL (XEXP (e, 1)));
27509 else
27510 gcc_assert (i == 1
27511 && REG_P (e)
27512 && REGNO (e) == SP_REGNUM);
27513 offset += reg_size;
27514 }
27515 }
27516 fprintf (asm_out_file, "}\n");
27517 if (padfirst)
27518 fprintf (asm_out_file, "\t.pad #%d\n", padfirst);
27519 }
27520
27521 /* Emit unwind directives for a SET. */
27522
27523 static void
27524 arm_unwind_emit_set (FILE * asm_out_file, rtx p)
27525 {
27526 rtx e0;
27527 rtx e1;
27528 unsigned reg;
27529
27530 e0 = XEXP (p, 0);
27531 e1 = XEXP (p, 1);
27532 switch (GET_CODE (e0))
27533 {
27534 case MEM:
27535 /* Pushing a single register. */
27536 if (GET_CODE (XEXP (e0, 0)) != PRE_DEC
27537 || !REG_P (XEXP (XEXP (e0, 0), 0))
27538 || REGNO (XEXP (XEXP (e0, 0), 0)) != SP_REGNUM)
27539 abort ();
27540
27541 asm_fprintf (asm_out_file, "\t.save ");
27542 if (IS_VFP_REGNUM (REGNO (e1)))
27543 asm_fprintf(asm_out_file, "{d%d}\n",
27544 (REGNO (e1) - FIRST_VFP_REGNUM) / 2);
27545 else
27546 asm_fprintf(asm_out_file, "{%r}\n", REGNO (e1));
27547 break;
27548
27549 case REG:
27550 if (REGNO (e0) == SP_REGNUM)
27551 {
27552 /* A stack increment. */
27553 if (GET_CODE (e1) != PLUS
27554 || !REG_P (XEXP (e1, 0))
27555 || REGNO (XEXP (e1, 0)) != SP_REGNUM
27556 || !CONST_INT_P (XEXP (e1, 1)))
27557 abort ();
27558
27559 asm_fprintf (asm_out_file, "\t.pad #%wd\n",
27560 -INTVAL (XEXP (e1, 1)));
27561 }
27562 else if (REGNO (e0) == HARD_FRAME_POINTER_REGNUM)
27563 {
27564 HOST_WIDE_INT offset;
27565
27566 if (GET_CODE (e1) == PLUS)
27567 {
27568 if (!REG_P (XEXP (e1, 0))
27569 || !CONST_INT_P (XEXP (e1, 1)))
27570 abort ();
27571 reg = REGNO (XEXP (e1, 0));
27572 offset = INTVAL (XEXP (e1, 1));
27573 asm_fprintf (asm_out_file, "\t.setfp %r, %r, #%wd\n",
27574 HARD_FRAME_POINTER_REGNUM, reg,
27575 offset);
27576 }
27577 else if (REG_P (e1))
27578 {
27579 reg = REGNO (e1);
27580 asm_fprintf (asm_out_file, "\t.setfp %r, %r\n",
27581 HARD_FRAME_POINTER_REGNUM, reg);
27582 }
27583 else
27584 abort ();
27585 }
27586 else if (REG_P (e1) && REGNO (e1) == SP_REGNUM)
27587 {
27588 /* Move from sp to reg. */
27589 asm_fprintf (asm_out_file, "\t.movsp %r\n", REGNO (e0));
27590 }
27591 else if (GET_CODE (e1) == PLUS
27592 && REG_P (XEXP (e1, 0))
27593 && REGNO (XEXP (e1, 0)) == SP_REGNUM
27594 && CONST_INT_P (XEXP (e1, 1)))
27595 {
27596 /* Set reg to offset from sp. */
27597 asm_fprintf (asm_out_file, "\t.movsp %r, #%d\n",
27598 REGNO (e0), (int)INTVAL(XEXP (e1, 1)));
27599 }
27600 else
27601 abort ();
27602 break;
27603
27604 default:
27605 abort ();
27606 }
27607 }
27608
27609
27610 /* Emit unwind directives for the given insn. */
27611
27612 static void
27613 arm_unwind_emit (FILE * asm_out_file, rtx_insn *insn)
27614 {
27615 rtx note, pat;
27616 bool handled_one = false;
27617
27618 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27619 return;
27620
27621 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27622 && (TREE_NOTHROW (current_function_decl)
27623 || crtl->all_throwers_are_sibcalls))
27624 return;
27625
27626 if (NOTE_P (insn) || !RTX_FRAME_RELATED_P (insn))
27627 return;
27628
27629 for (note = REG_NOTES (insn); note ; note = XEXP (note, 1))
27630 {
27631 switch (REG_NOTE_KIND (note))
27632 {
27633 case REG_FRAME_RELATED_EXPR:
27634 pat = XEXP (note, 0);
27635 goto found;
27636
27637 case REG_CFA_REGISTER:
27638 pat = XEXP (note, 0);
27639 if (pat == NULL)
27640 {
27641 pat = PATTERN (insn);
27642 if (GET_CODE (pat) == PARALLEL)
27643 pat = XVECEXP (pat, 0, 0);
27644 }
27645
27646 /* Only emitted for IS_STACKALIGN re-alignment. */
27647 {
27648 rtx dest, src;
27649 unsigned reg;
27650
27651 src = SET_SRC (pat);
27652 dest = SET_DEST (pat);
27653
27654 gcc_assert (src == stack_pointer_rtx);
27655 reg = REGNO (dest);
27656 asm_fprintf (asm_out_file, "\t.unwind_raw 0, 0x%x @ vsp = r%d\n",
27657 reg + 0x90, reg);
27658 }
27659 handled_one = true;
27660 break;
27661
27662 /* The INSN is generated in epilogue. It is set as RTX_FRAME_RELATED_P
27663 to get correct dwarf information for shrink-wrap. We should not
27664 emit unwind information for it because these are used either for
27665 pretend arguments or notes to adjust sp and restore registers from
27666 stack. */
27667 case REG_CFA_DEF_CFA:
27668 case REG_CFA_ADJUST_CFA:
27669 case REG_CFA_RESTORE:
27670 return;
27671
27672 case REG_CFA_EXPRESSION:
27673 case REG_CFA_OFFSET:
27674 /* ??? Only handling here what we actually emit. */
27675 gcc_unreachable ();
27676
27677 default:
27678 break;
27679 }
27680 }
27681 if (handled_one)
27682 return;
27683 pat = PATTERN (insn);
27684 found:
27685
27686 switch (GET_CODE (pat))
27687 {
27688 case SET:
27689 arm_unwind_emit_set (asm_out_file, pat);
27690 break;
27691
27692 case SEQUENCE:
27693 /* Store multiple. */
27694 arm_unwind_emit_sequence (asm_out_file, pat);
27695 break;
27696
27697 default:
27698 abort();
27699 }
27700 }
27701
27702
27703 /* Output a reference from a function exception table to the type_info
27704 object X. The EABI specifies that the symbol should be relocated by
27705 an R_ARM_TARGET2 relocation. */
27706
27707 static bool
27708 arm_output_ttype (rtx x)
27709 {
27710 fputs ("\t.word\t", asm_out_file);
27711 output_addr_const (asm_out_file, x);
27712 /* Use special relocations for symbol references. */
27713 if (!CONST_INT_P (x))
27714 fputs ("(TARGET2)", asm_out_file);
27715 fputc ('\n', asm_out_file);
27716
27717 return TRUE;
27718 }
27719
27720 /* Implement TARGET_ASM_EMIT_EXCEPT_PERSONALITY. */
27721
27722 static void
27723 arm_asm_emit_except_personality (rtx personality)
27724 {
27725 fputs ("\t.personality\t", asm_out_file);
27726 output_addr_const (asm_out_file, personality);
27727 fputc ('\n', asm_out_file);
27728 }
27729 #endif /* ARM_UNWIND_INFO */
27730
27731 /* Implement TARGET_ASM_INITIALIZE_SECTIONS. */
27732
27733 static void
27734 arm_asm_init_sections (void)
27735 {
27736 #if ARM_UNWIND_INFO
27737 exception_section = get_unnamed_section (0, output_section_asm_op,
27738 "\t.handlerdata");
27739 #endif /* ARM_UNWIND_INFO */
27740
27741 #ifdef OBJECT_FORMAT_ELF
27742 if (target_pure_code)
27743 text_section->unnamed.data = "\t.section .text,\"0x20000006\",%progbits";
27744 #endif
27745 }
27746
27747 /* Output unwind directives for the start/end of a function. */
27748
27749 void
27750 arm_output_fn_unwind (FILE * f, bool prologue)
27751 {
27752 if (arm_except_unwind_info (&global_options) != UI_TARGET)
27753 return;
27754
27755 if (prologue)
27756 fputs ("\t.fnstart\n", f);
27757 else
27758 {
27759 /* If this function will never be unwound, then mark it as such.
27760 The came condition is used in arm_unwind_emit to suppress
27761 the frame annotations. */
27762 if (!(flag_unwind_tables || crtl->uses_eh_lsda)
27763 && (TREE_NOTHROW (current_function_decl)
27764 || crtl->all_throwers_are_sibcalls))
27765 fputs("\t.cantunwind\n", f);
27766
27767 fputs ("\t.fnend\n", f);
27768 }
27769 }
27770
27771 static bool
27772 arm_emit_tls_decoration (FILE *fp, rtx x)
27773 {
27774 enum tls_reloc reloc;
27775 rtx val;
27776
27777 val = XVECEXP (x, 0, 0);
27778 reloc = (enum tls_reloc) INTVAL (XVECEXP (x, 0, 1));
27779
27780 output_addr_const (fp, val);
27781
27782 switch (reloc)
27783 {
27784 case TLS_GD32:
27785 fputs ("(tlsgd)", fp);
27786 break;
27787 case TLS_LDM32:
27788 fputs ("(tlsldm)", fp);
27789 break;
27790 case TLS_LDO32:
27791 fputs ("(tlsldo)", fp);
27792 break;
27793 case TLS_IE32:
27794 fputs ("(gottpoff)", fp);
27795 break;
27796 case TLS_LE32:
27797 fputs ("(tpoff)", fp);
27798 break;
27799 case TLS_DESCSEQ:
27800 fputs ("(tlsdesc)", fp);
27801 break;
27802 default:
27803 gcc_unreachable ();
27804 }
27805
27806 switch (reloc)
27807 {
27808 case TLS_GD32:
27809 case TLS_LDM32:
27810 case TLS_IE32:
27811 case TLS_DESCSEQ:
27812 fputs (" + (. - ", fp);
27813 output_addr_const (fp, XVECEXP (x, 0, 2));
27814 /* For DESCSEQ the 3rd operand encodes thumbness, and is added */
27815 fputs (reloc == TLS_DESCSEQ ? " + " : " - ", fp);
27816 output_addr_const (fp, XVECEXP (x, 0, 3));
27817 fputc (')', fp);
27818 break;
27819 default:
27820 break;
27821 }
27822
27823 return TRUE;
27824 }
27825
27826 /* ARM implementation of TARGET_ASM_OUTPUT_DWARF_DTPREL. */
27827
27828 static void
27829 arm_output_dwarf_dtprel (FILE *file, int size, rtx x)
27830 {
27831 gcc_assert (size == 4);
27832 fputs ("\t.word\t", file);
27833 output_addr_const (file, x);
27834 fputs ("(tlsldo)", file);
27835 }
27836
27837 /* Implement TARGET_ASM_OUTPUT_ADDR_CONST_EXTRA. */
27838
27839 static bool
27840 arm_output_addr_const_extra (FILE *fp, rtx x)
27841 {
27842 if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_TLS)
27843 return arm_emit_tls_decoration (fp, x);
27844 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_PIC_LABEL)
27845 {
27846 char label[256];
27847 int labelno = INTVAL (XVECEXP (x, 0, 0));
27848
27849 ASM_GENERATE_INTERNAL_LABEL (label, "LPIC", labelno);
27850 assemble_name_raw (fp, label);
27851
27852 return TRUE;
27853 }
27854 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_GOTSYM_OFF)
27855 {
27856 assemble_name (fp, "_GLOBAL_OFFSET_TABLE_");
27857 if (GOT_PCREL)
27858 fputs ("+.", fp);
27859 fputs ("-(", fp);
27860 output_addr_const (fp, XVECEXP (x, 0, 0));
27861 fputc (')', fp);
27862 return TRUE;
27863 }
27864 else if (GET_CODE (x) == UNSPEC && XINT (x, 1) == UNSPEC_SYMBOL_OFFSET)
27865 {
27866 output_addr_const (fp, XVECEXP (x, 0, 0));
27867 if (GOT_PCREL)
27868 fputs ("+.", fp);
27869 fputs ("-(", fp);
27870 output_addr_const (fp, XVECEXP (x, 0, 1));
27871 fputc (')', fp);
27872 return TRUE;
27873 }
27874 else if (GET_CODE (x) == CONST_VECTOR)
27875 return arm_emit_vector_const (fp, x);
27876
27877 return FALSE;
27878 }
27879
27880 /* Output assembly for a shift instruction.
27881 SET_FLAGS determines how the instruction modifies the condition codes.
27882 0 - Do not set condition codes.
27883 1 - Set condition codes.
27884 2 - Use smallest instruction. */
27885 const char *
27886 arm_output_shift(rtx * operands, int set_flags)
27887 {
27888 char pattern[100];
27889 static const char flag_chars[3] = {'?', '.', '!'};
27890 const char *shift;
27891 HOST_WIDE_INT val;
27892 char c;
27893
27894 c = flag_chars[set_flags];
27895 shift = shift_op(operands[3], &val);
27896 if (shift)
27897 {
27898 if (val != -1)
27899 operands[2] = GEN_INT(val);
27900 sprintf (pattern, "%s%%%c\t%%0, %%1, %%2", shift, c);
27901 }
27902 else
27903 sprintf (pattern, "mov%%%c\t%%0, %%1", c);
27904
27905 output_asm_insn (pattern, operands);
27906 return "";
27907 }
27908
27909 /* Output assembly for a WMMX immediate shift instruction. */
27910 const char *
27911 arm_output_iwmmxt_shift_immediate (const char *insn_name, rtx *operands, bool wror_or_wsra)
27912 {
27913 int shift = INTVAL (operands[2]);
27914 char templ[50];
27915 machine_mode opmode = GET_MODE (operands[0]);
27916
27917 gcc_assert (shift >= 0);
27918
27919 /* If the shift value in the register versions is > 63 (for D qualifier),
27920 31 (for W qualifier) or 15 (for H qualifier). */
27921 if (((opmode == V4HImode) && (shift > 15))
27922 || ((opmode == V2SImode) && (shift > 31))
27923 || ((opmode == DImode) && (shift > 63)))
27924 {
27925 if (wror_or_wsra)
27926 {
27927 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27928 output_asm_insn (templ, operands);
27929 if (opmode == DImode)
27930 {
27931 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, 32);
27932 output_asm_insn (templ, operands);
27933 }
27934 }
27935 else
27936 {
27937 /* The destination register will contain all zeros. */
27938 sprintf (templ, "wzero\t%%0");
27939 output_asm_insn (templ, operands);
27940 }
27941 return "";
27942 }
27943
27944 if ((opmode == DImode) && (shift > 32))
27945 {
27946 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, 32);
27947 output_asm_insn (templ, operands);
27948 sprintf (templ, "%s\t%%0, %%0, #%d", insn_name, shift - 32);
27949 output_asm_insn (templ, operands);
27950 }
27951 else
27952 {
27953 sprintf (templ, "%s\t%%0, %%1, #%d", insn_name, shift);
27954 output_asm_insn (templ, operands);
27955 }
27956 return "";
27957 }
27958
27959 /* Output assembly for a WMMX tinsr instruction. */
27960 const char *
27961 arm_output_iwmmxt_tinsr (rtx *operands)
27962 {
27963 int mask = INTVAL (operands[3]);
27964 int i;
27965 char templ[50];
27966 int units = mode_nunits[GET_MODE (operands[0])];
27967 gcc_assert ((mask & (mask - 1)) == 0);
27968 for (i = 0; i < units; ++i)
27969 {
27970 if ((mask & 0x01) == 1)
27971 {
27972 break;
27973 }
27974 mask >>= 1;
27975 }
27976 gcc_assert (i < units);
27977 {
27978 switch (GET_MODE (operands[0]))
27979 {
27980 case E_V8QImode:
27981 sprintf (templ, "tinsrb%%?\t%%0, %%2, #%d", i);
27982 break;
27983 case E_V4HImode:
27984 sprintf (templ, "tinsrh%%?\t%%0, %%2, #%d", i);
27985 break;
27986 case E_V2SImode:
27987 sprintf (templ, "tinsrw%%?\t%%0, %%2, #%d", i);
27988 break;
27989 default:
27990 gcc_unreachable ();
27991 break;
27992 }
27993 output_asm_insn (templ, operands);
27994 }
27995 return "";
27996 }
27997
27998 /* Output a Thumb-1 casesi dispatch sequence. */
27999 const char *
28000 thumb1_output_casesi (rtx *operands)
28001 {
28002 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[0])));
28003
28004 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28005
28006 switch (GET_MODE(diff_vec))
28007 {
28008 case E_QImode:
28009 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28010 "bl\t%___gnu_thumb1_case_uqi" : "bl\t%___gnu_thumb1_case_sqi");
28011 case E_HImode:
28012 return (ADDR_DIFF_VEC_FLAGS (diff_vec).offset_unsigned ?
28013 "bl\t%___gnu_thumb1_case_uhi" : "bl\t%___gnu_thumb1_case_shi");
28014 case E_SImode:
28015 return "bl\t%___gnu_thumb1_case_si";
28016 default:
28017 gcc_unreachable ();
28018 }
28019 }
28020
28021 /* Output a Thumb-2 casesi instruction. */
28022 const char *
28023 thumb2_output_casesi (rtx *operands)
28024 {
28025 rtx diff_vec = PATTERN (NEXT_INSN (as_a <rtx_insn *> (operands[2])));
28026
28027 gcc_assert (GET_CODE (diff_vec) == ADDR_DIFF_VEC);
28028
28029 output_asm_insn ("cmp\t%0, %1", operands);
28030 output_asm_insn ("bhi\t%l3", operands);
28031 switch (GET_MODE(diff_vec))
28032 {
28033 case E_QImode:
28034 return "tbb\t[%|pc, %0]";
28035 case E_HImode:
28036 return "tbh\t[%|pc, %0, lsl #1]";
28037 case E_SImode:
28038 if (flag_pic)
28039 {
28040 output_asm_insn ("adr\t%4, %l2", operands);
28041 output_asm_insn ("ldr\t%5, [%4, %0, lsl #2]", operands);
28042 output_asm_insn ("add\t%4, %4, %5", operands);
28043 return "bx\t%4";
28044 }
28045 else
28046 {
28047 output_asm_insn ("adr\t%4, %l2", operands);
28048 return "ldr\t%|pc, [%4, %0, lsl #2]";
28049 }
28050 default:
28051 gcc_unreachable ();
28052 }
28053 }
28054
28055 /* Implement TARGET_SCHED_ISSUE_RATE. Lookup the issue rate in the
28056 per-core tuning structs. */
28057 static int
28058 arm_issue_rate (void)
28059 {
28060 return current_tune->issue_rate;
28061 }
28062
28063 /* Return how many instructions should scheduler lookahead to choose the
28064 best one. */
28065 static int
28066 arm_first_cycle_multipass_dfa_lookahead (void)
28067 {
28068 int issue_rate = arm_issue_rate ();
28069
28070 return issue_rate > 1 && !sched_fusion ? issue_rate : 0;
28071 }
28072
28073 /* Enable modeling of L2 auto-prefetcher. */
28074 static int
28075 arm_first_cycle_multipass_dfa_lookahead_guard (rtx_insn *insn, int ready_index)
28076 {
28077 return autopref_multipass_dfa_lookahead_guard (insn, ready_index);
28078 }
28079
28080 const char *
28081 arm_mangle_type (const_tree type)
28082 {
28083 /* The ARM ABI documents (10th October 2008) say that "__va_list"
28084 has to be managled as if it is in the "std" namespace. */
28085 if (TARGET_AAPCS_BASED
28086 && lang_hooks.types_compatible_p (CONST_CAST_TREE (type), va_list_type))
28087 return "St9__va_list";
28088
28089 /* Half-precision float. */
28090 if (TREE_CODE (type) == REAL_TYPE && TYPE_PRECISION (type) == 16)
28091 return "Dh";
28092
28093 /* Try mangling as a Neon type, TYPE_NAME is non-NULL if this is a
28094 builtin type. */
28095 if (TYPE_NAME (type) != NULL)
28096 return arm_mangle_builtin_type (type);
28097
28098 /* Use the default mangling. */
28099 return NULL;
28100 }
28101
28102 /* Order of allocation of core registers for Thumb: this allocation is
28103 written over the corresponding initial entries of the array
28104 initialized with REG_ALLOC_ORDER. We allocate all low registers
28105 first. Saving and restoring a low register is usually cheaper than
28106 using a call-clobbered high register. */
28107
28108 static const int thumb_core_reg_alloc_order[] =
28109 {
28110 3, 2, 1, 0, 4, 5, 6, 7,
28111 12, 14, 8, 9, 10, 11
28112 };
28113
28114 /* Adjust register allocation order when compiling for Thumb. */
28115
28116 void
28117 arm_order_regs_for_local_alloc (void)
28118 {
28119 const int arm_reg_alloc_order[] = REG_ALLOC_ORDER;
28120 memcpy(reg_alloc_order, arm_reg_alloc_order, sizeof (reg_alloc_order));
28121 if (TARGET_THUMB)
28122 memcpy (reg_alloc_order, thumb_core_reg_alloc_order,
28123 sizeof (thumb_core_reg_alloc_order));
28124 }
28125
28126 /* Implement TARGET_FRAME_POINTER_REQUIRED. */
28127
28128 bool
28129 arm_frame_pointer_required (void)
28130 {
28131 if (SUBTARGET_FRAME_POINTER_REQUIRED)
28132 return true;
28133
28134 /* If the function receives nonlocal gotos, it needs to save the frame
28135 pointer in the nonlocal_goto_save_area object. */
28136 if (cfun->has_nonlocal_label)
28137 return true;
28138
28139 /* The frame pointer is required for non-leaf APCS frames. */
28140 if (TARGET_ARM && TARGET_APCS_FRAME && !crtl->is_leaf)
28141 return true;
28142
28143 /* If we are probing the stack in the prologue, we will have a faulting
28144 instruction prior to the stack adjustment and this requires a frame
28145 pointer if we want to catch the exception using the EABI unwinder. */
28146 if (!IS_INTERRUPT (arm_current_func_type ())
28147 && (flag_stack_check == STATIC_BUILTIN_STACK_CHECK
28148 || flag_stack_clash_protection)
28149 && arm_except_unwind_info (&global_options) == UI_TARGET
28150 && cfun->can_throw_non_call_exceptions)
28151 {
28152 HOST_WIDE_INT size = get_frame_size ();
28153
28154 /* That's irrelevant if there is no stack adjustment. */
28155 if (size <= 0)
28156 return false;
28157
28158 /* That's relevant only if there is a stack probe. */
28159 if (crtl->is_leaf && !cfun->calls_alloca)
28160 {
28161 /* We don't have the final size of the frame so adjust. */
28162 size += 32 * UNITS_PER_WORD;
28163 if (size > PROBE_INTERVAL && size > get_stack_check_protect ())
28164 return true;
28165 }
28166 else
28167 return true;
28168 }
28169
28170 return false;
28171 }
28172
28173 /* Only thumb1 can't support conditional execution, so return true if
28174 the target is not thumb1. */
28175 static bool
28176 arm_have_conditional_execution (void)
28177 {
28178 return !TARGET_THUMB1;
28179 }
28180
28181 /* The AAPCS sets the maximum alignment of a vector to 64 bits. */
28182 static HOST_WIDE_INT
28183 arm_vector_alignment (const_tree type)
28184 {
28185 HOST_WIDE_INT align = tree_to_shwi (TYPE_SIZE (type));
28186
28187 if (TARGET_AAPCS_BASED)
28188 align = MIN (align, 64);
28189
28190 return align;
28191 }
28192
28193 static void
28194 arm_autovectorize_vector_sizes (vector_sizes *sizes)
28195 {
28196 if (!TARGET_NEON_VECTORIZE_DOUBLE)
28197 {
28198 sizes->safe_push (16);
28199 sizes->safe_push (8);
28200 }
28201 }
28202
28203 static bool
28204 arm_vector_alignment_reachable (const_tree type, bool is_packed)
28205 {
28206 /* Vectors which aren't in packed structures will not be less aligned than
28207 the natural alignment of their element type, so this is safe. */
28208 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28209 return !is_packed;
28210
28211 return default_builtin_vector_alignment_reachable (type, is_packed);
28212 }
28213
28214 static bool
28215 arm_builtin_support_vector_misalignment (machine_mode mode,
28216 const_tree type, int misalignment,
28217 bool is_packed)
28218 {
28219 if (TARGET_NEON && !BYTES_BIG_ENDIAN && unaligned_access)
28220 {
28221 HOST_WIDE_INT align = TYPE_ALIGN_UNIT (type);
28222
28223 if (is_packed)
28224 return align == 1;
28225
28226 /* If the misalignment is unknown, we should be able to handle the access
28227 so long as it is not to a member of a packed data structure. */
28228 if (misalignment == -1)
28229 return true;
28230
28231 /* Return true if the misalignment is a multiple of the natural alignment
28232 of the vector's element type. This is probably always going to be
28233 true in practice, since we've already established that this isn't a
28234 packed access. */
28235 return ((misalignment % align) == 0);
28236 }
28237
28238 return default_builtin_support_vector_misalignment (mode, type, misalignment,
28239 is_packed);
28240 }
28241
28242 static void
28243 arm_conditional_register_usage (void)
28244 {
28245 int regno;
28246
28247 if (TARGET_THUMB1 && optimize_size)
28248 {
28249 /* When optimizing for size on Thumb-1, it's better not
28250 to use the HI regs, because of the overhead of
28251 stacking them. */
28252 for (regno = FIRST_HI_REGNUM; regno <= LAST_HI_REGNUM; ++regno)
28253 fixed_regs[regno] = call_used_regs[regno] = 1;
28254 }
28255
28256 /* The link register can be clobbered by any branch insn,
28257 but we have no way to track that at present, so mark
28258 it as unavailable. */
28259 if (TARGET_THUMB1)
28260 fixed_regs[LR_REGNUM] = call_used_regs[LR_REGNUM] = 1;
28261
28262 if (TARGET_32BIT && TARGET_HARD_FLOAT)
28263 {
28264 /* VFPv3 registers are disabled when earlier VFP
28265 versions are selected due to the definition of
28266 LAST_VFP_REGNUM. */
28267 for (regno = FIRST_VFP_REGNUM;
28268 regno <= LAST_VFP_REGNUM; ++ regno)
28269 {
28270 fixed_regs[regno] = 0;
28271 call_used_regs[regno] = regno < FIRST_VFP_REGNUM + 16
28272 || regno >= FIRST_VFP_REGNUM + 32;
28273 }
28274 }
28275
28276 if (TARGET_REALLY_IWMMXT)
28277 {
28278 regno = FIRST_IWMMXT_GR_REGNUM;
28279 /* The 2002/10/09 revision of the XScale ABI has wCG0
28280 and wCG1 as call-preserved registers. The 2002/11/21
28281 revision changed this so that all wCG registers are
28282 scratch registers. */
28283 for (regno = FIRST_IWMMXT_GR_REGNUM;
28284 regno <= LAST_IWMMXT_GR_REGNUM; ++ regno)
28285 fixed_regs[regno] = 0;
28286 /* The XScale ABI has wR0 - wR9 as scratch registers,
28287 the rest as call-preserved registers. */
28288 for (regno = FIRST_IWMMXT_REGNUM;
28289 regno <= LAST_IWMMXT_REGNUM; ++ regno)
28290 {
28291 fixed_regs[regno] = 0;
28292 call_used_regs[regno] = regno < FIRST_IWMMXT_REGNUM + 10;
28293 }
28294 }
28295
28296 if ((unsigned) PIC_OFFSET_TABLE_REGNUM != INVALID_REGNUM)
28297 {
28298 fixed_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28299 call_used_regs[PIC_OFFSET_TABLE_REGNUM] = 1;
28300 }
28301 else if (TARGET_APCS_STACK)
28302 {
28303 fixed_regs[10] = 1;
28304 call_used_regs[10] = 1;
28305 }
28306 /* -mcaller-super-interworking reserves r11 for calls to
28307 _interwork_r11_call_via_rN(). Making the register global
28308 is an easy way of ensuring that it remains valid for all
28309 calls. */
28310 if (TARGET_APCS_FRAME || TARGET_CALLER_INTERWORKING
28311 || TARGET_TPCS_FRAME || TARGET_TPCS_LEAF_FRAME)
28312 {
28313 fixed_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28314 call_used_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28315 if (TARGET_CALLER_INTERWORKING)
28316 global_regs[ARM_HARD_FRAME_POINTER_REGNUM] = 1;
28317 }
28318 SUBTARGET_CONDITIONAL_REGISTER_USAGE
28319 }
28320
28321 static reg_class_t
28322 arm_preferred_rename_class (reg_class_t rclass)
28323 {
28324 /* Thumb-2 instructions using LO_REGS may be smaller than instructions
28325 using GENERIC_REGS. During register rename pass, we prefer LO_REGS,
28326 and code size can be reduced. */
28327 if (TARGET_THUMB2 && rclass == GENERAL_REGS)
28328 return LO_REGS;
28329 else
28330 return NO_REGS;
28331 }
28332
28333 /* Compute the attribute "length" of insn "*push_multi".
28334 So this function MUST be kept in sync with that insn pattern. */
28335 int
28336 arm_attr_length_push_multi(rtx parallel_op, rtx first_op)
28337 {
28338 int i, regno, hi_reg;
28339 int num_saves = XVECLEN (parallel_op, 0);
28340
28341 /* ARM mode. */
28342 if (TARGET_ARM)
28343 return 4;
28344 /* Thumb1 mode. */
28345 if (TARGET_THUMB1)
28346 return 2;
28347
28348 /* Thumb2 mode. */
28349 regno = REGNO (first_op);
28350 /* For PUSH/STM under Thumb2 mode, we can use 16-bit encodings if the register
28351 list is 8-bit. Normally this means all registers in the list must be
28352 LO_REGS, that is (R0 -R7). If any HI_REGS used, then we must use 32-bit
28353 encodings. There is one exception for PUSH that LR in HI_REGS can be used
28354 with 16-bit encoding. */
28355 hi_reg = (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28356 for (i = 1; i < num_saves && !hi_reg; i++)
28357 {
28358 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, i), 0));
28359 hi_reg |= (REGNO_REG_CLASS (regno) == HI_REGS) && (regno != LR_REGNUM);
28360 }
28361
28362 if (!hi_reg)
28363 return 2;
28364 return 4;
28365 }
28366
28367 /* Compute the attribute "length" of insn. Currently, this function is used
28368 for "*load_multiple_with_writeback", "*pop_multiple_with_return" and
28369 "*pop_multiple_with_writeback_and_return". OPERANDS is the toplevel PARALLEL
28370 rtx, RETURN_PC is true if OPERANDS contains return insn. WRITE_BACK_P is
28371 true if OPERANDS contains insn which explicit updates base register. */
28372
28373 int
28374 arm_attr_length_pop_multi (rtx *operands, bool return_pc, bool write_back_p)
28375 {
28376 /* ARM mode. */
28377 if (TARGET_ARM)
28378 return 4;
28379 /* Thumb1 mode. */
28380 if (TARGET_THUMB1)
28381 return 2;
28382
28383 rtx parallel_op = operands[0];
28384 /* Initialize to elements number of PARALLEL. */
28385 unsigned indx = XVECLEN (parallel_op, 0) - 1;
28386 /* Initialize the value to base register. */
28387 unsigned regno = REGNO (operands[1]);
28388 /* Skip return and write back pattern.
28389 We only need register pop pattern for later analysis. */
28390 unsigned first_indx = 0;
28391 first_indx += return_pc ? 1 : 0;
28392 first_indx += write_back_p ? 1 : 0;
28393
28394 /* A pop operation can be done through LDM or POP. If the base register is SP
28395 and if it's with write back, then a LDM will be alias of POP. */
28396 bool pop_p = (regno == SP_REGNUM && write_back_p);
28397 bool ldm_p = !pop_p;
28398
28399 /* Check base register for LDM. */
28400 if (ldm_p && REGNO_REG_CLASS (regno) == HI_REGS)
28401 return 4;
28402
28403 /* Check each register in the list. */
28404 for (; indx >= first_indx; indx--)
28405 {
28406 regno = REGNO (XEXP (XVECEXP (parallel_op, 0, indx), 0));
28407 /* For POP, PC in HI_REGS can be used with 16-bit encoding. See similar
28408 comment in arm_attr_length_push_multi. */
28409 if (REGNO_REG_CLASS (regno) == HI_REGS
28410 && (regno != PC_REGNUM || ldm_p))
28411 return 4;
28412 }
28413
28414 return 2;
28415 }
28416
28417 /* Compute the number of instructions emitted by output_move_double. */
28418 int
28419 arm_count_output_move_double_insns (rtx *operands)
28420 {
28421 int count;
28422 rtx ops[2];
28423 /* output_move_double may modify the operands array, so call it
28424 here on a copy of the array. */
28425 ops[0] = operands[0];
28426 ops[1] = operands[1];
28427 output_move_double (ops, false, &count);
28428 return count;
28429 }
28430
28431 int
28432 vfp3_const_double_for_fract_bits (rtx operand)
28433 {
28434 REAL_VALUE_TYPE r0;
28435
28436 if (!CONST_DOUBLE_P (operand))
28437 return 0;
28438
28439 r0 = *CONST_DOUBLE_REAL_VALUE (operand);
28440 if (exact_real_inverse (DFmode, &r0)
28441 && !REAL_VALUE_NEGATIVE (r0))
28442 {
28443 if (exact_real_truncate (DFmode, &r0))
28444 {
28445 HOST_WIDE_INT value = real_to_integer (&r0);
28446 value = value & 0xffffffff;
28447 if ((value != 0) && ( (value & (value - 1)) == 0))
28448 {
28449 int ret = exact_log2 (value);
28450 gcc_assert (IN_RANGE (ret, 0, 31));
28451 return ret;
28452 }
28453 }
28454 }
28455 return 0;
28456 }
28457
28458 /* If X is a CONST_DOUBLE with a value that is a power of 2 whose
28459 log2 is in [1, 32], return that log2. Otherwise return -1.
28460 This is used in the patterns for vcvt.s32.f32 floating-point to
28461 fixed-point conversions. */
28462
28463 int
28464 vfp3_const_double_for_bits (rtx x)
28465 {
28466 const REAL_VALUE_TYPE *r;
28467
28468 if (!CONST_DOUBLE_P (x))
28469 return -1;
28470
28471 r = CONST_DOUBLE_REAL_VALUE (x);
28472
28473 if (REAL_VALUE_NEGATIVE (*r)
28474 || REAL_VALUE_ISNAN (*r)
28475 || REAL_VALUE_ISINF (*r)
28476 || !real_isinteger (r, SFmode))
28477 return -1;
28478
28479 HOST_WIDE_INT hwint = exact_log2 (real_to_integer (r));
28480
28481 /* The exact_log2 above will have returned -1 if this is
28482 not an exact log2. */
28483 if (!IN_RANGE (hwint, 1, 32))
28484 return -1;
28485
28486 return hwint;
28487 }
28488
28489 \f
28490 /* Emit a memory barrier around an atomic sequence according to MODEL. */
28491
28492 static void
28493 arm_pre_atomic_barrier (enum memmodel model)
28494 {
28495 if (need_atomic_barrier_p (model, true))
28496 emit_insn (gen_memory_barrier ());
28497 }
28498
28499 static void
28500 arm_post_atomic_barrier (enum memmodel model)
28501 {
28502 if (need_atomic_barrier_p (model, false))
28503 emit_insn (gen_memory_barrier ());
28504 }
28505
28506 /* Emit the load-exclusive and store-exclusive instructions.
28507 Use acquire and release versions if necessary. */
28508
28509 static void
28510 arm_emit_load_exclusive (machine_mode mode, rtx rval, rtx mem, bool acq)
28511 {
28512 rtx (*gen) (rtx, rtx);
28513
28514 if (acq)
28515 {
28516 switch (mode)
28517 {
28518 case E_QImode: gen = gen_arm_load_acquire_exclusiveqi; break;
28519 case E_HImode: gen = gen_arm_load_acquire_exclusivehi; break;
28520 case E_SImode: gen = gen_arm_load_acquire_exclusivesi; break;
28521 case E_DImode: gen = gen_arm_load_acquire_exclusivedi; break;
28522 default:
28523 gcc_unreachable ();
28524 }
28525 }
28526 else
28527 {
28528 switch (mode)
28529 {
28530 case E_QImode: gen = gen_arm_load_exclusiveqi; break;
28531 case E_HImode: gen = gen_arm_load_exclusivehi; break;
28532 case E_SImode: gen = gen_arm_load_exclusivesi; break;
28533 case E_DImode: gen = gen_arm_load_exclusivedi; break;
28534 default:
28535 gcc_unreachable ();
28536 }
28537 }
28538
28539 emit_insn (gen (rval, mem));
28540 }
28541
28542 static void
28543 arm_emit_store_exclusive (machine_mode mode, rtx bval, rtx rval,
28544 rtx mem, bool rel)
28545 {
28546 rtx (*gen) (rtx, rtx, rtx);
28547
28548 if (rel)
28549 {
28550 switch (mode)
28551 {
28552 case E_QImode: gen = gen_arm_store_release_exclusiveqi; break;
28553 case E_HImode: gen = gen_arm_store_release_exclusivehi; break;
28554 case E_SImode: gen = gen_arm_store_release_exclusivesi; break;
28555 case E_DImode: gen = gen_arm_store_release_exclusivedi; break;
28556 default:
28557 gcc_unreachable ();
28558 }
28559 }
28560 else
28561 {
28562 switch (mode)
28563 {
28564 case E_QImode: gen = gen_arm_store_exclusiveqi; break;
28565 case E_HImode: gen = gen_arm_store_exclusivehi; break;
28566 case E_SImode: gen = gen_arm_store_exclusivesi; break;
28567 case E_DImode: gen = gen_arm_store_exclusivedi; break;
28568 default:
28569 gcc_unreachable ();
28570 }
28571 }
28572
28573 emit_insn (gen (bval, rval, mem));
28574 }
28575
28576 /* Mark the previous jump instruction as unlikely. */
28577
28578 static void
28579 emit_unlikely_jump (rtx insn)
28580 {
28581 rtx_insn *jump = emit_jump_insn (insn);
28582 add_reg_br_prob_note (jump, profile_probability::very_unlikely ());
28583 }
28584
28585 /* Expand a compare and swap pattern. */
28586
28587 void
28588 arm_expand_compare_and_swap (rtx operands[])
28589 {
28590 rtx bval, bdst, rval, mem, oldval, newval, is_weak, mod_s, mod_f, x;
28591 machine_mode mode, cmp_mode;
28592
28593 bval = operands[0];
28594 rval = operands[1];
28595 mem = operands[2];
28596 oldval = operands[3];
28597 newval = operands[4];
28598 is_weak = operands[5];
28599 mod_s = operands[6];
28600 mod_f = operands[7];
28601 mode = GET_MODE (mem);
28602
28603 /* Normally the succ memory model must be stronger than fail, but in the
28604 unlikely event of fail being ACQUIRE and succ being RELEASE we need to
28605 promote succ to ACQ_REL so that we don't lose the acquire semantics. */
28606
28607 if (TARGET_HAVE_LDACQ
28608 && is_mm_acquire (memmodel_from_int (INTVAL (mod_f)))
28609 && is_mm_release (memmodel_from_int (INTVAL (mod_s))))
28610 mod_s = GEN_INT (MEMMODEL_ACQ_REL);
28611
28612 switch (mode)
28613 {
28614 case E_QImode:
28615 case E_HImode:
28616 /* For narrow modes, we're going to perform the comparison in SImode,
28617 so do the zero-extension now. */
28618 rval = gen_reg_rtx (SImode);
28619 oldval = convert_modes (SImode, mode, oldval, true);
28620 /* FALLTHRU */
28621
28622 case E_SImode:
28623 /* Force the value into a register if needed. We waited until after
28624 the zero-extension above to do this properly. */
28625 if (!arm_add_operand (oldval, SImode))
28626 oldval = force_reg (SImode, oldval);
28627 break;
28628
28629 case E_DImode:
28630 if (!cmpdi_operand (oldval, mode))
28631 oldval = force_reg (mode, oldval);
28632 break;
28633
28634 default:
28635 gcc_unreachable ();
28636 }
28637
28638 if (TARGET_THUMB1)
28639 cmp_mode = E_SImode;
28640 else
28641 cmp_mode = CC_Zmode;
28642
28643 bdst = TARGET_THUMB1 ? bval : gen_rtx_REG (CC_Zmode, CC_REGNUM);
28644 emit_insn (gen_atomic_compare_and_swap_1 (cmp_mode, mode, bdst, rval, mem,
28645 oldval, newval, is_weak, mod_s, mod_f));
28646
28647 if (mode == QImode || mode == HImode)
28648 emit_move_insn (operands[1], gen_lowpart (mode, rval));
28649
28650 /* In all cases, we arrange for success to be signaled by Z set.
28651 This arrangement allows for the boolean result to be used directly
28652 in a subsequent branch, post optimization. For Thumb-1 targets, the
28653 boolean negation of the result is also stored in bval because Thumb-1
28654 backend lacks dependency tracking for CC flag due to flag-setting not
28655 being represented at RTL level. */
28656 if (TARGET_THUMB1)
28657 emit_insn (gen_cstoresi_eq0_thumb1 (bval, bdst));
28658 else
28659 {
28660 x = gen_rtx_EQ (SImode, bdst, const0_rtx);
28661 emit_insn (gen_rtx_SET (bval, x));
28662 }
28663 }
28664
28665 /* Split a compare and swap pattern. It is IMPLEMENTATION DEFINED whether
28666 another memory store between the load-exclusive and store-exclusive can
28667 reset the monitor from Exclusive to Open state. This means we must wait
28668 until after reload to split the pattern, lest we get a register spill in
28669 the middle of the atomic sequence. Success of the compare and swap is
28670 indicated by the Z flag set for 32bit targets and by neg_bval being zero
28671 for Thumb-1 targets (ie. negation of the boolean value returned by
28672 atomic_compare_and_swapmode standard pattern in operand 0). */
28673
28674 void
28675 arm_split_compare_and_swap (rtx operands[])
28676 {
28677 rtx rval, mem, oldval, newval, neg_bval, mod_s_rtx;
28678 machine_mode mode;
28679 enum memmodel mod_s, mod_f;
28680 bool is_weak;
28681 rtx_code_label *label1, *label2;
28682 rtx x, cond;
28683
28684 rval = operands[1];
28685 mem = operands[2];
28686 oldval = operands[3];
28687 newval = operands[4];
28688 is_weak = (operands[5] != const0_rtx);
28689 mod_s_rtx = operands[6];
28690 mod_s = memmodel_from_int (INTVAL (mod_s_rtx));
28691 mod_f = memmodel_from_int (INTVAL (operands[7]));
28692 neg_bval = TARGET_THUMB1 ? operands[0] : operands[8];
28693 mode = GET_MODE (mem);
28694
28695 bool is_armv8_sync = arm_arch8 && is_mm_sync (mod_s);
28696
28697 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (mod_s_rtx);
28698 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (mod_s_rtx);
28699
28700 /* For ARMv8, the load-acquire is too weak for __sync memory orders. Instead,
28701 a full barrier is emitted after the store-release. */
28702 if (is_armv8_sync)
28703 use_acquire = false;
28704
28705 /* Checks whether a barrier is needed and emits one accordingly. */
28706 if (!(use_acquire || use_release))
28707 arm_pre_atomic_barrier (mod_s);
28708
28709 label1 = NULL;
28710 if (!is_weak)
28711 {
28712 label1 = gen_label_rtx ();
28713 emit_label (label1);
28714 }
28715 label2 = gen_label_rtx ();
28716
28717 arm_emit_load_exclusive (mode, rval, mem, use_acquire);
28718
28719 /* Z is set to 0 for 32bit targets (resp. rval set to 1) if oldval != rval,
28720 as required to communicate with arm_expand_compare_and_swap. */
28721 if (TARGET_32BIT)
28722 {
28723 cond = arm_gen_compare_reg (NE, rval, oldval, neg_bval);
28724 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28725 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
28726 gen_rtx_LABEL_REF (Pmode, label2), pc_rtx);
28727 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
28728 }
28729 else
28730 {
28731 emit_move_insn (neg_bval, const1_rtx);
28732 cond = gen_rtx_NE (VOIDmode, rval, oldval);
28733 if (thumb1_cmpneg_operand (oldval, SImode))
28734 emit_unlikely_jump (gen_cbranchsi4_scratch (neg_bval, rval, oldval,
28735 label2, cond));
28736 else
28737 emit_unlikely_jump (gen_cbranchsi4_insn (cond, rval, oldval, label2));
28738 }
28739
28740 arm_emit_store_exclusive (mode, neg_bval, mem, newval, use_release);
28741
28742 /* Weak or strong, we want EQ to be true for success, so that we
28743 match the flags that we got from the compare above. */
28744 if (TARGET_32BIT)
28745 {
28746 cond = gen_rtx_REG (CCmode, CC_REGNUM);
28747 x = gen_rtx_COMPARE (CCmode, neg_bval, const0_rtx);
28748 emit_insn (gen_rtx_SET (cond, x));
28749 }
28750
28751 if (!is_weak)
28752 {
28753 /* Z is set to boolean value of !neg_bval, as required to communicate
28754 with arm_expand_compare_and_swap. */
28755 x = gen_rtx_NE (VOIDmode, neg_bval, const0_rtx);
28756 emit_unlikely_jump (gen_cbranchsi4 (x, neg_bval, const0_rtx, label1));
28757 }
28758
28759 if (!is_mm_relaxed (mod_f))
28760 emit_label (label2);
28761
28762 /* Checks whether a barrier is needed and emits one accordingly. */
28763 if (is_armv8_sync
28764 || !(use_acquire || use_release))
28765 arm_post_atomic_barrier (mod_s);
28766
28767 if (is_mm_relaxed (mod_f))
28768 emit_label (label2);
28769 }
28770
28771 /* Split an atomic operation pattern. Operation is given by CODE and is one
28772 of PLUS, MINUS, IOR, XOR, SET (for an exchange operation) or NOT (for a nand
28773 operation). Operation is performed on the content at MEM and on VALUE
28774 following the memory model MODEL_RTX. The content at MEM before and after
28775 the operation is returned in OLD_OUT and NEW_OUT respectively while the
28776 success of the operation is returned in COND. Using a scratch register or
28777 an operand register for these determines what result is returned for that
28778 pattern. */
28779
28780 void
28781 arm_split_atomic_op (enum rtx_code code, rtx old_out, rtx new_out, rtx mem,
28782 rtx value, rtx model_rtx, rtx cond)
28783 {
28784 enum memmodel model = memmodel_from_int (INTVAL (model_rtx));
28785 machine_mode mode = GET_MODE (mem);
28786 machine_mode wmode = (mode == DImode ? DImode : SImode);
28787 rtx_code_label *label;
28788 bool all_low_regs, bind_old_new;
28789 rtx x;
28790
28791 bool is_armv8_sync = arm_arch8 && is_mm_sync (model);
28792
28793 bool use_acquire = TARGET_HAVE_LDACQ && aarch_mm_needs_acquire (model_rtx);
28794 bool use_release = TARGET_HAVE_LDACQ && aarch_mm_needs_release (model_rtx);
28795
28796 /* For ARMv8, a load-acquire is too weak for __sync memory orders. Instead,
28797 a full barrier is emitted after the store-release. */
28798 if (is_armv8_sync)
28799 use_acquire = false;
28800
28801 /* Checks whether a barrier is needed and emits one accordingly. */
28802 if (!(use_acquire || use_release))
28803 arm_pre_atomic_barrier (model);
28804
28805 label = gen_label_rtx ();
28806 emit_label (label);
28807
28808 if (new_out)
28809 new_out = gen_lowpart (wmode, new_out);
28810 if (old_out)
28811 old_out = gen_lowpart (wmode, old_out);
28812 else
28813 old_out = new_out;
28814 value = simplify_gen_subreg (wmode, value, mode, 0);
28815
28816 arm_emit_load_exclusive (mode, old_out, mem, use_acquire);
28817
28818 /* Does the operation require destination and first operand to use the same
28819 register? This is decided by register constraints of relevant insn
28820 patterns in thumb1.md. */
28821 gcc_assert (!new_out || REG_P (new_out));
28822 all_low_regs = REG_P (value) && REGNO_REG_CLASS (REGNO (value)) == LO_REGS
28823 && new_out && REGNO_REG_CLASS (REGNO (new_out)) == LO_REGS
28824 && REGNO_REG_CLASS (REGNO (old_out)) == LO_REGS;
28825 bind_old_new =
28826 (TARGET_THUMB1
28827 && code != SET
28828 && code != MINUS
28829 && (code != PLUS || (!all_low_regs && !satisfies_constraint_L (value))));
28830
28831 /* We want to return the old value while putting the result of the operation
28832 in the same register as the old value so copy the old value over to the
28833 destination register and use that register for the operation. */
28834 if (old_out && bind_old_new)
28835 {
28836 emit_move_insn (new_out, old_out);
28837 old_out = new_out;
28838 }
28839
28840 switch (code)
28841 {
28842 case SET:
28843 new_out = value;
28844 break;
28845
28846 case NOT:
28847 x = gen_rtx_AND (wmode, old_out, value);
28848 emit_insn (gen_rtx_SET (new_out, x));
28849 x = gen_rtx_NOT (wmode, new_out);
28850 emit_insn (gen_rtx_SET (new_out, x));
28851 break;
28852
28853 case MINUS:
28854 if (CONST_INT_P (value))
28855 {
28856 value = GEN_INT (-INTVAL (value));
28857 code = PLUS;
28858 }
28859 /* FALLTHRU */
28860
28861 case PLUS:
28862 if (mode == DImode)
28863 {
28864 /* DImode plus/minus need to clobber flags. */
28865 /* The adddi3 and subdi3 patterns are incorrectly written so that
28866 they require matching operands, even when we could easily support
28867 three operands. Thankfully, this can be fixed up post-splitting,
28868 as the individual add+adc patterns do accept three operands and
28869 post-reload cprop can make these moves go away. */
28870 emit_move_insn (new_out, old_out);
28871 if (code == PLUS)
28872 x = gen_adddi3 (new_out, new_out, value);
28873 else
28874 x = gen_subdi3 (new_out, new_out, value);
28875 emit_insn (x);
28876 break;
28877 }
28878 /* FALLTHRU */
28879
28880 default:
28881 x = gen_rtx_fmt_ee (code, wmode, old_out, value);
28882 emit_insn (gen_rtx_SET (new_out, x));
28883 break;
28884 }
28885
28886 arm_emit_store_exclusive (mode, cond, mem, gen_lowpart (mode, new_out),
28887 use_release);
28888
28889 x = gen_rtx_NE (VOIDmode, cond, const0_rtx);
28890 emit_unlikely_jump (gen_cbranchsi4 (x, cond, const0_rtx, label));
28891
28892 /* Checks whether a barrier is needed and emits one accordingly. */
28893 if (is_armv8_sync
28894 || !(use_acquire || use_release))
28895 arm_post_atomic_barrier (model);
28896 }
28897 \f
28898 #define MAX_VECT_LEN 16
28899
28900 struct expand_vec_perm_d
28901 {
28902 rtx target, op0, op1;
28903 vec_perm_indices perm;
28904 machine_mode vmode;
28905 bool one_vector_p;
28906 bool testing_p;
28907 };
28908
28909 /* Generate a variable permutation. */
28910
28911 static void
28912 arm_expand_vec_perm_1 (rtx target, rtx op0, rtx op1, rtx sel)
28913 {
28914 machine_mode vmode = GET_MODE (target);
28915 bool one_vector_p = rtx_equal_p (op0, op1);
28916
28917 gcc_checking_assert (vmode == V8QImode || vmode == V16QImode);
28918 gcc_checking_assert (GET_MODE (op0) == vmode);
28919 gcc_checking_assert (GET_MODE (op1) == vmode);
28920 gcc_checking_assert (GET_MODE (sel) == vmode);
28921 gcc_checking_assert (TARGET_NEON);
28922
28923 if (one_vector_p)
28924 {
28925 if (vmode == V8QImode)
28926 emit_insn (gen_neon_vtbl1v8qi (target, op0, sel));
28927 else
28928 emit_insn (gen_neon_vtbl1v16qi (target, op0, sel));
28929 }
28930 else
28931 {
28932 rtx pair;
28933
28934 if (vmode == V8QImode)
28935 {
28936 pair = gen_reg_rtx (V16QImode);
28937 emit_insn (gen_neon_vcombinev8qi (pair, op0, op1));
28938 pair = gen_lowpart (TImode, pair);
28939 emit_insn (gen_neon_vtbl2v8qi (target, pair, sel));
28940 }
28941 else
28942 {
28943 pair = gen_reg_rtx (OImode);
28944 emit_insn (gen_neon_vcombinev16qi (pair, op0, op1));
28945 emit_insn (gen_neon_vtbl2v16qi (target, pair, sel));
28946 }
28947 }
28948 }
28949
28950 void
28951 arm_expand_vec_perm (rtx target, rtx op0, rtx op1, rtx sel)
28952 {
28953 machine_mode vmode = GET_MODE (target);
28954 unsigned int nelt = GET_MODE_NUNITS (vmode);
28955 bool one_vector_p = rtx_equal_p (op0, op1);
28956 rtx mask;
28957
28958 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
28959 numbering of elements for big-endian, we must reverse the order. */
28960 gcc_checking_assert (!BYTES_BIG_ENDIAN);
28961
28962 /* The VTBL instruction does not use a modulo index, so we must take care
28963 of that ourselves. */
28964 mask = GEN_INT (one_vector_p ? nelt - 1 : 2 * nelt - 1);
28965 mask = gen_const_vec_duplicate (vmode, mask);
28966 sel = expand_simple_binop (vmode, AND, sel, mask, NULL, 0, OPTAB_LIB_WIDEN);
28967
28968 arm_expand_vec_perm_1 (target, op0, op1, sel);
28969 }
28970
28971 /* Map lane ordering between architectural lane order, and GCC lane order,
28972 taking into account ABI. See comment above output_move_neon for details. */
28973
28974 static int
28975 neon_endian_lane_map (machine_mode mode, int lane)
28976 {
28977 if (BYTES_BIG_ENDIAN)
28978 {
28979 int nelems = GET_MODE_NUNITS (mode);
28980 /* Reverse lane order. */
28981 lane = (nelems - 1 - lane);
28982 /* Reverse D register order, to match ABI. */
28983 if (GET_MODE_SIZE (mode) == 16)
28984 lane = lane ^ (nelems / 2);
28985 }
28986 return lane;
28987 }
28988
28989 /* Some permutations index into pairs of vectors, this is a helper function
28990 to map indexes into those pairs of vectors. */
28991
28992 static int
28993 neon_pair_endian_lane_map (machine_mode mode, int lane)
28994 {
28995 int nelem = GET_MODE_NUNITS (mode);
28996 if (BYTES_BIG_ENDIAN)
28997 lane =
28998 neon_endian_lane_map (mode, lane & (nelem - 1)) + (lane & nelem);
28999 return lane;
29000 }
29001
29002 /* Generate or test for an insn that supports a constant permutation. */
29003
29004 /* Recognize patterns for the VUZP insns. */
29005
29006 static bool
29007 arm_evpc_neon_vuzp (struct expand_vec_perm_d *d)
29008 {
29009 unsigned int i, odd, mask, nelt = d->perm.length ();
29010 rtx out0, out1, in0, in1;
29011 int first_elem;
29012 int swap_nelt;
29013
29014 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29015 return false;
29016
29017 /* arm_expand_vec_perm_const_1 () helpfully swaps the operands for the
29018 big endian pattern on 64 bit vectors, so we correct for that. */
29019 swap_nelt = BYTES_BIG_ENDIAN && !d->one_vector_p
29020 && GET_MODE_SIZE (d->vmode) == 8 ? nelt : 0;
29021
29022 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0)] ^ swap_nelt;
29023
29024 if (first_elem == neon_endian_lane_map (d->vmode, 0))
29025 odd = 0;
29026 else if (first_elem == neon_endian_lane_map (d->vmode, 1))
29027 odd = 1;
29028 else
29029 return false;
29030 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29031
29032 for (i = 0; i < nelt; i++)
29033 {
29034 unsigned elt =
29035 (neon_pair_endian_lane_map (d->vmode, i) * 2 + odd) & mask;
29036 if ((d->perm[i] ^ swap_nelt) != neon_pair_endian_lane_map (d->vmode, elt))
29037 return false;
29038 }
29039
29040 /* Success! */
29041 if (d->testing_p)
29042 return true;
29043
29044 in0 = d->op0;
29045 in1 = d->op1;
29046 if (swap_nelt != 0)
29047 std::swap (in0, in1);
29048
29049 out0 = d->target;
29050 out1 = gen_reg_rtx (d->vmode);
29051 if (odd)
29052 std::swap (out0, out1);
29053
29054 emit_insn (gen_neon_vuzp_internal (d->vmode, out0, in0, in1, out1));
29055 return true;
29056 }
29057
29058 /* Recognize patterns for the VZIP insns. */
29059
29060 static bool
29061 arm_evpc_neon_vzip (struct expand_vec_perm_d *d)
29062 {
29063 unsigned int i, high, mask, nelt = d->perm.length ();
29064 rtx out0, out1, in0, in1;
29065 int first_elem;
29066 bool is_swapped;
29067
29068 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29069 return false;
29070
29071 is_swapped = BYTES_BIG_ENDIAN;
29072
29073 first_elem = d->perm[neon_endian_lane_map (d->vmode, 0) ^ is_swapped];
29074
29075 high = nelt / 2;
29076 if (first_elem == neon_endian_lane_map (d->vmode, high))
29077 ;
29078 else if (first_elem == neon_endian_lane_map (d->vmode, 0))
29079 high = 0;
29080 else
29081 return false;
29082 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29083
29084 for (i = 0; i < nelt / 2; i++)
29085 {
29086 unsigned elt =
29087 neon_pair_endian_lane_map (d->vmode, i + high) & mask;
29088 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + is_swapped)]
29089 != elt)
29090 return false;
29091 elt =
29092 neon_pair_endian_lane_map (d->vmode, i + nelt + high) & mask;
29093 if (d->perm[neon_pair_endian_lane_map (d->vmode, 2 * i + !is_swapped)]
29094 != elt)
29095 return false;
29096 }
29097
29098 /* Success! */
29099 if (d->testing_p)
29100 return true;
29101
29102 in0 = d->op0;
29103 in1 = d->op1;
29104 if (is_swapped)
29105 std::swap (in0, in1);
29106
29107 out0 = d->target;
29108 out1 = gen_reg_rtx (d->vmode);
29109 if (high)
29110 std::swap (out0, out1);
29111
29112 emit_insn (gen_neon_vzip_internal (d->vmode, out0, in0, in1, out1));
29113 return true;
29114 }
29115
29116 /* Recognize patterns for the VREV insns. */
29117 static bool
29118 arm_evpc_neon_vrev (struct expand_vec_perm_d *d)
29119 {
29120 unsigned int i, j, diff, nelt = d->perm.length ();
29121 rtx (*gen) (machine_mode, rtx, rtx);
29122
29123 if (!d->one_vector_p)
29124 return false;
29125
29126 diff = d->perm[0];
29127 switch (diff)
29128 {
29129 case 7:
29130 switch (d->vmode)
29131 {
29132 case E_V16QImode:
29133 case E_V8QImode:
29134 gen = gen_neon_vrev64;
29135 break;
29136 default:
29137 return false;
29138 }
29139 break;
29140 case 3:
29141 switch (d->vmode)
29142 {
29143 case E_V16QImode:
29144 case E_V8QImode:
29145 gen = gen_neon_vrev32;
29146 break;
29147 case E_V8HImode:
29148 case E_V4HImode:
29149 case E_V8HFmode:
29150 case E_V4HFmode:
29151 gen = gen_neon_vrev64;
29152 break;
29153 default:
29154 return false;
29155 }
29156 break;
29157 case 1:
29158 switch (d->vmode)
29159 {
29160 case E_V16QImode:
29161 case E_V8QImode:
29162 gen = gen_neon_vrev16;
29163 break;
29164 case E_V8HImode:
29165 case E_V4HImode:
29166 gen = gen_neon_vrev32;
29167 break;
29168 case E_V4SImode:
29169 case E_V2SImode:
29170 case E_V4SFmode:
29171 case E_V2SFmode:
29172 gen = gen_neon_vrev64;
29173 break;
29174 default:
29175 return false;
29176 }
29177 break;
29178 default:
29179 return false;
29180 }
29181
29182 for (i = 0; i < nelt ; i += diff + 1)
29183 for (j = 0; j <= diff; j += 1)
29184 {
29185 /* This is guaranteed to be true as the value of diff
29186 is 7, 3, 1 and we should have enough elements in the
29187 queue to generate this. Getting a vector mask with a
29188 value of diff other than these values implies that
29189 something is wrong by the time we get here. */
29190 gcc_assert (i + j < nelt);
29191 if (d->perm[i + j] != i + diff - j)
29192 return false;
29193 }
29194
29195 /* Success! */
29196 if (d->testing_p)
29197 return true;
29198
29199 emit_insn (gen (d->vmode, d->target, d->op0));
29200 return true;
29201 }
29202
29203 /* Recognize patterns for the VTRN insns. */
29204
29205 static bool
29206 arm_evpc_neon_vtrn (struct expand_vec_perm_d *d)
29207 {
29208 unsigned int i, odd, mask, nelt = d->perm.length ();
29209 rtx out0, out1, in0, in1;
29210
29211 if (GET_MODE_UNIT_SIZE (d->vmode) >= 8)
29212 return false;
29213
29214 /* Note that these are little-endian tests. Adjust for big-endian later. */
29215 if (d->perm[0] == 0)
29216 odd = 0;
29217 else if (d->perm[0] == 1)
29218 odd = 1;
29219 else
29220 return false;
29221 mask = (d->one_vector_p ? nelt - 1 : 2 * nelt - 1);
29222
29223 for (i = 0; i < nelt; i += 2)
29224 {
29225 if (d->perm[i] != i + odd)
29226 return false;
29227 if (d->perm[i + 1] != ((i + nelt + odd) & mask))
29228 return false;
29229 }
29230
29231 /* Success! */
29232 if (d->testing_p)
29233 return true;
29234
29235 in0 = d->op0;
29236 in1 = d->op1;
29237 if (BYTES_BIG_ENDIAN)
29238 {
29239 std::swap (in0, in1);
29240 odd = !odd;
29241 }
29242
29243 out0 = d->target;
29244 out1 = gen_reg_rtx (d->vmode);
29245 if (odd)
29246 std::swap (out0, out1);
29247
29248 emit_insn (gen_neon_vtrn_internal (d->vmode, out0, in0, in1, out1));
29249 return true;
29250 }
29251
29252 /* Recognize patterns for the VEXT insns. */
29253
29254 static bool
29255 arm_evpc_neon_vext (struct expand_vec_perm_d *d)
29256 {
29257 unsigned int i, nelt = d->perm.length ();
29258 rtx offset;
29259
29260 unsigned int location;
29261
29262 unsigned int next = d->perm[0] + 1;
29263
29264 /* TODO: Handle GCC's numbering of elements for big-endian. */
29265 if (BYTES_BIG_ENDIAN)
29266 return false;
29267
29268 /* Check if the extracted indexes are increasing by one. */
29269 for (i = 1; i < nelt; next++, i++)
29270 {
29271 /* If we hit the most significant element of the 2nd vector in
29272 the previous iteration, no need to test further. */
29273 if (next == 2 * nelt)
29274 return false;
29275
29276 /* If we are operating on only one vector: it could be a
29277 rotation. If there are only two elements of size < 64, let
29278 arm_evpc_neon_vrev catch it. */
29279 if (d->one_vector_p && (next == nelt))
29280 {
29281 if ((nelt == 2) && (d->vmode != V2DImode))
29282 return false;
29283 else
29284 next = 0;
29285 }
29286
29287 if (d->perm[i] != next)
29288 return false;
29289 }
29290
29291 location = d->perm[0];
29292
29293 /* Success! */
29294 if (d->testing_p)
29295 return true;
29296
29297 offset = GEN_INT (location);
29298
29299 if(d->vmode == E_DImode)
29300 return false;
29301
29302 emit_insn (gen_neon_vext (d->vmode, d->target, d->op0, d->op1, offset));
29303 return true;
29304 }
29305
29306 /* The NEON VTBL instruction is a fully variable permuation that's even
29307 stronger than what we expose via VEC_PERM_EXPR. What it doesn't do
29308 is mask the index operand as VEC_PERM_EXPR requires. Therefore we
29309 can do slightly better by expanding this as a constant where we don't
29310 have to apply a mask. */
29311
29312 static bool
29313 arm_evpc_neon_vtbl (struct expand_vec_perm_d *d)
29314 {
29315 rtx rperm[MAX_VECT_LEN], sel;
29316 machine_mode vmode = d->vmode;
29317 unsigned int i, nelt = d->perm.length ();
29318
29319 /* TODO: ARM's VTBL indexing is little-endian. In order to handle GCC's
29320 numbering of elements for big-endian, we must reverse the order. */
29321 if (BYTES_BIG_ENDIAN)
29322 return false;
29323
29324 if (d->testing_p)
29325 return true;
29326
29327 /* Generic code will try constant permutation twice. Once with the
29328 original mode and again with the elements lowered to QImode.
29329 So wait and don't do the selector expansion ourselves. */
29330 if (vmode != V8QImode && vmode != V16QImode)
29331 return false;
29332
29333 for (i = 0; i < nelt; ++i)
29334 rperm[i] = GEN_INT (d->perm[i]);
29335 sel = gen_rtx_CONST_VECTOR (vmode, gen_rtvec_v (nelt, rperm));
29336 sel = force_reg (vmode, sel);
29337
29338 arm_expand_vec_perm_1 (d->target, d->op0, d->op1, sel);
29339 return true;
29340 }
29341
29342 static bool
29343 arm_expand_vec_perm_const_1 (struct expand_vec_perm_d *d)
29344 {
29345 /* Check if the input mask matches vext before reordering the
29346 operands. */
29347 if (TARGET_NEON)
29348 if (arm_evpc_neon_vext (d))
29349 return true;
29350
29351 /* The pattern matching functions above are written to look for a small
29352 number to begin the sequence (0, 1, N/2). If we begin with an index
29353 from the second operand, we can swap the operands. */
29354 unsigned int nelt = d->perm.length ();
29355 if (d->perm[0] >= nelt)
29356 {
29357 d->perm.rotate_inputs (1);
29358 std::swap (d->op0, d->op1);
29359 }
29360
29361 if (TARGET_NEON)
29362 {
29363 if (arm_evpc_neon_vuzp (d))
29364 return true;
29365 if (arm_evpc_neon_vzip (d))
29366 return true;
29367 if (arm_evpc_neon_vrev (d))
29368 return true;
29369 if (arm_evpc_neon_vtrn (d))
29370 return true;
29371 return arm_evpc_neon_vtbl (d);
29372 }
29373 return false;
29374 }
29375
29376 /* Implement TARGET_VECTORIZE_VEC_PERM_CONST. */
29377
29378 static bool
29379 arm_vectorize_vec_perm_const (machine_mode vmode, rtx target, rtx op0, rtx op1,
29380 const vec_perm_indices &sel)
29381 {
29382 struct expand_vec_perm_d d;
29383 int i, nelt, which;
29384
29385 if (!VALID_NEON_DREG_MODE (vmode) && !VALID_NEON_QREG_MODE (vmode))
29386 return false;
29387
29388 d.target = target;
29389 d.op0 = op0;
29390 d.op1 = op1;
29391
29392 d.vmode = vmode;
29393 gcc_assert (VECTOR_MODE_P (d.vmode));
29394 d.testing_p = !target;
29395
29396 nelt = GET_MODE_NUNITS (d.vmode);
29397 for (i = which = 0; i < nelt; ++i)
29398 {
29399 int ei = sel[i] & (2 * nelt - 1);
29400 which |= (ei < nelt ? 1 : 2);
29401 }
29402
29403 switch (which)
29404 {
29405 default:
29406 gcc_unreachable();
29407
29408 case 3:
29409 d.one_vector_p = false;
29410 if (d.testing_p || !rtx_equal_p (op0, op1))
29411 break;
29412
29413 /* The elements of PERM do not suggest that only the first operand
29414 is used, but both operands are identical. Allow easier matching
29415 of the permutation by folding the permutation into the single
29416 input vector. */
29417 /* FALLTHRU */
29418 case 2:
29419 d.op0 = op1;
29420 d.one_vector_p = true;
29421 break;
29422
29423 case 1:
29424 d.op1 = op0;
29425 d.one_vector_p = true;
29426 break;
29427 }
29428
29429 d.perm.new_vector (sel.encoding (), d.one_vector_p ? 1 : 2, nelt);
29430
29431 if (!d.testing_p)
29432 return arm_expand_vec_perm_const_1 (&d);
29433
29434 d.target = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 1);
29435 d.op1 = d.op0 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 2);
29436 if (!d.one_vector_p)
29437 d.op1 = gen_raw_REG (d.vmode, LAST_VIRTUAL_REGISTER + 3);
29438
29439 start_sequence ();
29440 bool ret = arm_expand_vec_perm_const_1 (&d);
29441 end_sequence ();
29442
29443 return ret;
29444 }
29445
29446 bool
29447 arm_autoinc_modes_ok_p (machine_mode mode, enum arm_auto_incmodes code)
29448 {
29449 /* If we are soft float and we do not have ldrd
29450 then all auto increment forms are ok. */
29451 if (TARGET_SOFT_FLOAT && (TARGET_LDRD || GET_MODE_SIZE (mode) <= 4))
29452 return true;
29453
29454 switch (code)
29455 {
29456 /* Post increment and Pre Decrement are supported for all
29457 instruction forms except for vector forms. */
29458 case ARM_POST_INC:
29459 case ARM_PRE_DEC:
29460 if (VECTOR_MODE_P (mode))
29461 {
29462 if (code != ARM_PRE_DEC)
29463 return true;
29464 else
29465 return false;
29466 }
29467
29468 return true;
29469
29470 case ARM_POST_DEC:
29471 case ARM_PRE_INC:
29472 /* Without LDRD and mode size greater than
29473 word size, there is no point in auto-incrementing
29474 because ldm and stm will not have these forms. */
29475 if (!TARGET_LDRD && GET_MODE_SIZE (mode) > 4)
29476 return false;
29477
29478 /* Vector and floating point modes do not support
29479 these auto increment forms. */
29480 if (FLOAT_MODE_P (mode) || VECTOR_MODE_P (mode))
29481 return false;
29482
29483 return true;
29484
29485 default:
29486 return false;
29487
29488 }
29489
29490 return false;
29491 }
29492
29493 /* The default expansion of general 64-bit shifts in core-regs is suboptimal,
29494 on ARM, since we know that shifts by negative amounts are no-ops.
29495 Additionally, the default expansion code is not available or suitable
29496 for post-reload insn splits (this can occur when the register allocator
29497 chooses not to do a shift in NEON).
29498
29499 This function is used in both initial expand and post-reload splits, and
29500 handles all kinds of 64-bit shifts.
29501
29502 Input requirements:
29503 - It is safe for the input and output to be the same register, but
29504 early-clobber rules apply for the shift amount and scratch registers.
29505 - Shift by register requires both scratch registers. In all other cases
29506 the scratch registers may be NULL.
29507 - Ashiftrt by a register also clobbers the CC register. */
29508 void
29509 arm_emit_coreregs_64bit_shift (enum rtx_code code, rtx out, rtx in,
29510 rtx amount, rtx scratch1, rtx scratch2)
29511 {
29512 rtx out_high = gen_highpart (SImode, out);
29513 rtx out_low = gen_lowpart (SImode, out);
29514 rtx in_high = gen_highpart (SImode, in);
29515 rtx in_low = gen_lowpart (SImode, in);
29516
29517 /* Terminology:
29518 in = the register pair containing the input value.
29519 out = the destination register pair.
29520 up = the high- or low-part of each pair.
29521 down = the opposite part to "up".
29522 In a shift, we can consider bits to shift from "up"-stream to
29523 "down"-stream, so in a left-shift "up" is the low-part and "down"
29524 is the high-part of each register pair. */
29525
29526 rtx out_up = code == ASHIFT ? out_low : out_high;
29527 rtx out_down = code == ASHIFT ? out_high : out_low;
29528 rtx in_up = code == ASHIFT ? in_low : in_high;
29529 rtx in_down = code == ASHIFT ? in_high : in_low;
29530
29531 gcc_assert (code == ASHIFT || code == ASHIFTRT || code == LSHIFTRT);
29532 gcc_assert (out
29533 && (REG_P (out) || GET_CODE (out) == SUBREG)
29534 && GET_MODE (out) == DImode);
29535 gcc_assert (in
29536 && (REG_P (in) || GET_CODE (in) == SUBREG)
29537 && GET_MODE (in) == DImode);
29538 gcc_assert (amount
29539 && (((REG_P (amount) || GET_CODE (amount) == SUBREG)
29540 && GET_MODE (amount) == SImode)
29541 || CONST_INT_P (amount)));
29542 gcc_assert (scratch1 == NULL
29543 || (GET_CODE (scratch1) == SCRATCH)
29544 || (GET_MODE (scratch1) == SImode
29545 && REG_P (scratch1)));
29546 gcc_assert (scratch2 == NULL
29547 || (GET_CODE (scratch2) == SCRATCH)
29548 || (GET_MODE (scratch2) == SImode
29549 && REG_P (scratch2)));
29550 gcc_assert (!REG_P (out) || !REG_P (amount)
29551 || !HARD_REGISTER_P (out)
29552 || (REGNO (out) != REGNO (amount)
29553 && REGNO (out) + 1 != REGNO (amount)));
29554
29555 /* Macros to make following code more readable. */
29556 #define SUB_32(DEST,SRC) \
29557 gen_addsi3 ((DEST), (SRC), GEN_INT (-32))
29558 #define RSB_32(DEST,SRC) \
29559 gen_subsi3 ((DEST), GEN_INT (32), (SRC))
29560 #define SUB_S_32(DEST,SRC) \
29561 gen_addsi3_compare0 ((DEST), (SRC), \
29562 GEN_INT (-32))
29563 #define SET(DEST,SRC) \
29564 gen_rtx_SET ((DEST), (SRC))
29565 #define SHIFT(CODE,SRC,AMOUNT) \
29566 gen_rtx_fmt_ee ((CODE), SImode, (SRC), (AMOUNT))
29567 #define LSHIFT(CODE,SRC,AMOUNT) \
29568 gen_rtx_fmt_ee ((CODE) == ASHIFT ? ASHIFT : LSHIFTRT, \
29569 SImode, (SRC), (AMOUNT))
29570 #define REV_LSHIFT(CODE,SRC,AMOUNT) \
29571 gen_rtx_fmt_ee ((CODE) == ASHIFT ? LSHIFTRT : ASHIFT, \
29572 SImode, (SRC), (AMOUNT))
29573 #define ORR(A,B) \
29574 gen_rtx_IOR (SImode, (A), (B))
29575 #define BRANCH(COND,LABEL) \
29576 gen_arm_cond_branch ((LABEL), \
29577 gen_rtx_ ## COND (CCmode, cc_reg, \
29578 const0_rtx), \
29579 cc_reg)
29580
29581 /* Shifts by register and shifts by constant are handled separately. */
29582 if (CONST_INT_P (amount))
29583 {
29584 /* We have a shift-by-constant. */
29585
29586 /* First, handle out-of-range shift amounts.
29587 In both cases we try to match the result an ARM instruction in a
29588 shift-by-register would give. This helps reduce execution
29589 differences between optimization levels, but it won't stop other
29590 parts of the compiler doing different things. This is "undefined
29591 behavior, in any case. */
29592 if (INTVAL (amount) <= 0)
29593 emit_insn (gen_movdi (out, in));
29594 else if (INTVAL (amount) >= 64)
29595 {
29596 if (code == ASHIFTRT)
29597 {
29598 rtx const31_rtx = GEN_INT (31);
29599 emit_insn (SET (out_down, SHIFT (code, in_up, const31_rtx)));
29600 emit_insn (SET (out_up, SHIFT (code, in_up, const31_rtx)));
29601 }
29602 else
29603 emit_insn (gen_movdi (out, const0_rtx));
29604 }
29605
29606 /* Now handle valid shifts. */
29607 else if (INTVAL (amount) < 32)
29608 {
29609 /* Shifts by a constant less than 32. */
29610 rtx reverse_amount = GEN_INT (32 - INTVAL (amount));
29611
29612 /* Clearing the out register in DImode first avoids lots
29613 of spilling and results in less stack usage.
29614 Later this redundant insn is completely removed.
29615 Do that only if "in" and "out" are different registers. */
29616 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29617 emit_insn (SET (out, const0_rtx));
29618 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29619 emit_insn (SET (out_down,
29620 ORR (REV_LSHIFT (code, in_up, reverse_amount),
29621 out_down)));
29622 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29623 }
29624 else
29625 {
29626 /* Shifts by a constant greater than 31. */
29627 rtx adj_amount = GEN_INT (INTVAL (amount) - 32);
29628
29629 if (REG_P (out) && REG_P (in) && REGNO (out) != REGNO (in))
29630 emit_insn (SET (out, const0_rtx));
29631 emit_insn (SET (out_down, SHIFT (code, in_up, adj_amount)));
29632 if (code == ASHIFTRT)
29633 emit_insn (gen_ashrsi3 (out_up, in_up,
29634 GEN_INT (31)));
29635 else
29636 emit_insn (SET (out_up, const0_rtx));
29637 }
29638 }
29639 else
29640 {
29641 /* We have a shift-by-register. */
29642 rtx cc_reg = gen_rtx_REG (CC_NOOVmode, CC_REGNUM);
29643
29644 /* This alternative requires the scratch registers. */
29645 gcc_assert (scratch1 && REG_P (scratch1));
29646 gcc_assert (scratch2 && REG_P (scratch2));
29647
29648 /* We will need the values "amount-32" and "32-amount" later.
29649 Swapping them around now allows the later code to be more general. */
29650 switch (code)
29651 {
29652 case ASHIFT:
29653 emit_insn (SUB_32 (scratch1, amount));
29654 emit_insn (RSB_32 (scratch2, amount));
29655 break;
29656 case ASHIFTRT:
29657 emit_insn (RSB_32 (scratch1, amount));
29658 /* Also set CC = amount > 32. */
29659 emit_insn (SUB_S_32 (scratch2, amount));
29660 break;
29661 case LSHIFTRT:
29662 emit_insn (RSB_32 (scratch1, amount));
29663 emit_insn (SUB_32 (scratch2, amount));
29664 break;
29665 default:
29666 gcc_unreachable ();
29667 }
29668
29669 /* Emit code like this:
29670
29671 arithmetic-left:
29672 out_down = in_down << amount;
29673 out_down = (in_up << (amount - 32)) | out_down;
29674 out_down = ((unsigned)in_up >> (32 - amount)) | out_down;
29675 out_up = in_up << amount;
29676
29677 arithmetic-right:
29678 out_down = in_down >> amount;
29679 out_down = (in_up << (32 - amount)) | out_down;
29680 if (amount < 32)
29681 out_down = ((signed)in_up >> (amount - 32)) | out_down;
29682 out_up = in_up << amount;
29683
29684 logical-right:
29685 out_down = in_down >> amount;
29686 out_down = (in_up << (32 - amount)) | out_down;
29687 if (amount < 32)
29688 out_down = ((unsigned)in_up >> (amount - 32)) | out_down;
29689 out_up = in_up << amount;
29690
29691 The ARM and Thumb2 variants are the same but implemented slightly
29692 differently. If this were only called during expand we could just
29693 use the Thumb2 case and let combine do the right thing, but this
29694 can also be called from post-reload splitters. */
29695
29696 emit_insn (SET (out_down, LSHIFT (code, in_down, amount)));
29697
29698 if (!TARGET_THUMB2)
29699 {
29700 /* Emit code for ARM mode. */
29701 emit_insn (SET (out_down,
29702 ORR (SHIFT (ASHIFT, in_up, scratch1), out_down)));
29703 if (code == ASHIFTRT)
29704 {
29705 rtx_code_label *done_label = gen_label_rtx ();
29706 emit_jump_insn (BRANCH (LT, done_label));
29707 emit_insn (SET (out_down, ORR (SHIFT (ASHIFTRT, in_up, scratch2),
29708 out_down)));
29709 emit_label (done_label);
29710 }
29711 else
29712 emit_insn (SET (out_down, ORR (SHIFT (LSHIFTRT, in_up, scratch2),
29713 out_down)));
29714 }
29715 else
29716 {
29717 /* Emit code for Thumb2 mode.
29718 Thumb2 can't do shift and or in one insn. */
29719 emit_insn (SET (scratch1, SHIFT (ASHIFT, in_up, scratch1)));
29720 emit_insn (gen_iorsi3 (out_down, out_down, scratch1));
29721
29722 if (code == ASHIFTRT)
29723 {
29724 rtx_code_label *done_label = gen_label_rtx ();
29725 emit_jump_insn (BRANCH (LT, done_label));
29726 emit_insn (SET (scratch2, SHIFT (ASHIFTRT, in_up, scratch2)));
29727 emit_insn (SET (out_down, ORR (out_down, scratch2)));
29728 emit_label (done_label);
29729 }
29730 else
29731 {
29732 emit_insn (SET (scratch2, SHIFT (LSHIFTRT, in_up, scratch2)));
29733 emit_insn (gen_iorsi3 (out_down, out_down, scratch2));
29734 }
29735 }
29736
29737 emit_insn (SET (out_up, SHIFT (code, in_up, amount)));
29738 }
29739
29740 #undef SUB_32
29741 #undef RSB_32
29742 #undef SUB_S_32
29743 #undef SET
29744 #undef SHIFT
29745 #undef LSHIFT
29746 #undef REV_LSHIFT
29747 #undef ORR
29748 #undef BRANCH
29749 }
29750
29751 /* Returns true if the pattern is a valid symbolic address, which is either a
29752 symbol_ref or (symbol_ref + addend).
29753
29754 According to the ARM ELF ABI, the initial addend of REL-type relocations
29755 processing MOVW and MOVT instructions is formed by interpreting the 16-bit
29756 literal field of the instruction as a 16-bit signed value in the range
29757 -32768 <= A < 32768. */
29758
29759 bool
29760 arm_valid_symbolic_address_p (rtx addr)
29761 {
29762 rtx xop0, xop1 = NULL_RTX;
29763 rtx tmp = addr;
29764
29765 if (target_word_relocations)
29766 return false;
29767
29768 if (GET_CODE (tmp) == SYMBOL_REF || GET_CODE (tmp) == LABEL_REF)
29769 return true;
29770
29771 /* (const (plus: symbol_ref const_int)) */
29772 if (GET_CODE (addr) == CONST)
29773 tmp = XEXP (addr, 0);
29774
29775 if (GET_CODE (tmp) == PLUS)
29776 {
29777 xop0 = XEXP (tmp, 0);
29778 xop1 = XEXP (tmp, 1);
29779
29780 if (GET_CODE (xop0) == SYMBOL_REF && CONST_INT_P (xop1))
29781 return IN_RANGE (INTVAL (xop1), -0x8000, 0x7fff);
29782 }
29783
29784 return false;
29785 }
29786
29787 /* Returns true if a valid comparison operation and makes
29788 the operands in a form that is valid. */
29789 bool
29790 arm_validize_comparison (rtx *comparison, rtx * op1, rtx * op2)
29791 {
29792 enum rtx_code code = GET_CODE (*comparison);
29793 int code_int;
29794 machine_mode mode = (GET_MODE (*op1) == VOIDmode)
29795 ? GET_MODE (*op2) : GET_MODE (*op1);
29796
29797 gcc_assert (GET_MODE (*op1) != VOIDmode || GET_MODE (*op2) != VOIDmode);
29798
29799 if (code == UNEQ || code == LTGT)
29800 return false;
29801
29802 code_int = (int)code;
29803 arm_canonicalize_comparison (&code_int, op1, op2, 0);
29804 PUT_CODE (*comparison, (enum rtx_code)code_int);
29805
29806 switch (mode)
29807 {
29808 case E_SImode:
29809 if (!arm_add_operand (*op1, mode))
29810 *op1 = force_reg (mode, *op1);
29811 if (!arm_add_operand (*op2, mode))
29812 *op2 = force_reg (mode, *op2);
29813 return true;
29814
29815 case E_DImode:
29816 if (!cmpdi_operand (*op1, mode))
29817 *op1 = force_reg (mode, *op1);
29818 if (!cmpdi_operand (*op2, mode))
29819 *op2 = force_reg (mode, *op2);
29820 return true;
29821
29822 case E_HFmode:
29823 if (!TARGET_VFP_FP16INST)
29824 break;
29825 /* FP16 comparisons are done in SF mode. */
29826 mode = SFmode;
29827 *op1 = convert_to_mode (mode, *op1, 1);
29828 *op2 = convert_to_mode (mode, *op2, 1);
29829 /* Fall through. */
29830 case E_SFmode:
29831 case E_DFmode:
29832 if (!vfp_compare_operand (*op1, mode))
29833 *op1 = force_reg (mode, *op1);
29834 if (!vfp_compare_operand (*op2, mode))
29835 *op2 = force_reg (mode, *op2);
29836 return true;
29837 default:
29838 break;
29839 }
29840
29841 return false;
29842
29843 }
29844
29845 /* Maximum number of instructions to set block of memory. */
29846 static int
29847 arm_block_set_max_insns (void)
29848 {
29849 if (optimize_function_for_size_p (cfun))
29850 return 4;
29851 else
29852 return current_tune->max_insns_inline_memset;
29853 }
29854
29855 /* Return TRUE if it's profitable to set block of memory for
29856 non-vectorized case. VAL is the value to set the memory
29857 with. LENGTH is the number of bytes to set. ALIGN is the
29858 alignment of the destination memory in bytes. UNALIGNED_P
29859 is TRUE if we can only set the memory with instructions
29860 meeting alignment requirements. USE_STRD_P is TRUE if we
29861 can use strd to set the memory. */
29862 static bool
29863 arm_block_set_non_vect_profit_p (rtx val,
29864 unsigned HOST_WIDE_INT length,
29865 unsigned HOST_WIDE_INT align,
29866 bool unaligned_p, bool use_strd_p)
29867 {
29868 int num = 0;
29869 /* For leftovers in bytes of 0-7, we can set the memory block using
29870 strb/strh/str with minimum instruction number. */
29871 const int leftover[8] = {0, 1, 1, 2, 1, 2, 2, 3};
29872
29873 if (unaligned_p)
29874 {
29875 num = arm_const_inline_cost (SET, val);
29876 num += length / align + length % align;
29877 }
29878 else if (use_strd_p)
29879 {
29880 num = arm_const_double_inline_cost (val);
29881 num += (length >> 3) + leftover[length & 7];
29882 }
29883 else
29884 {
29885 num = arm_const_inline_cost (SET, val);
29886 num += (length >> 2) + leftover[length & 3];
29887 }
29888
29889 /* We may be able to combine last pair STRH/STRB into a single STR
29890 by shifting one byte back. */
29891 if (unaligned_access && length > 3 && (length & 3) == 3)
29892 num--;
29893
29894 return (num <= arm_block_set_max_insns ());
29895 }
29896
29897 /* Return TRUE if it's profitable to set block of memory for
29898 vectorized case. LENGTH is the number of bytes to set.
29899 ALIGN is the alignment of destination memory in bytes.
29900 MODE is the vector mode used to set the memory. */
29901 static bool
29902 arm_block_set_vect_profit_p (unsigned HOST_WIDE_INT length,
29903 unsigned HOST_WIDE_INT align,
29904 machine_mode mode)
29905 {
29906 int num;
29907 bool unaligned_p = ((align & 3) != 0);
29908 unsigned int nelt = GET_MODE_NUNITS (mode);
29909
29910 /* Instruction loading constant value. */
29911 num = 1;
29912 /* Instructions storing the memory. */
29913 num += (length + nelt - 1) / nelt;
29914 /* Instructions adjusting the address expression. Only need to
29915 adjust address expression if it's 4 bytes aligned and bytes
29916 leftover can only be stored by mis-aligned store instruction. */
29917 if (!unaligned_p && (length & 3) != 0)
29918 num++;
29919
29920 /* Store the first 16 bytes using vst1:v16qi for the aligned case. */
29921 if (!unaligned_p && mode == V16QImode)
29922 num--;
29923
29924 return (num <= arm_block_set_max_insns ());
29925 }
29926
29927 /* Set a block of memory using vectorization instructions for the
29928 unaligned case. We fill the first LENGTH bytes of the memory
29929 area starting from DSTBASE with byte constant VALUE. ALIGN is
29930 the alignment requirement of memory. Return TRUE if succeeded. */
29931 static bool
29932 arm_block_set_unaligned_vect (rtx dstbase,
29933 unsigned HOST_WIDE_INT length,
29934 unsigned HOST_WIDE_INT value,
29935 unsigned HOST_WIDE_INT align)
29936 {
29937 unsigned int i, nelt_v16, nelt_v8, nelt_mode;
29938 rtx dst, mem;
29939 rtx val_vec, reg;
29940 rtx (*gen_func) (rtx, rtx);
29941 machine_mode mode;
29942 unsigned HOST_WIDE_INT v = value;
29943 unsigned int offset = 0;
29944 gcc_assert ((align & 0x3) != 0);
29945 nelt_v8 = GET_MODE_NUNITS (V8QImode);
29946 nelt_v16 = GET_MODE_NUNITS (V16QImode);
29947 if (length >= nelt_v16)
29948 {
29949 mode = V16QImode;
29950 gen_func = gen_movmisalignv16qi;
29951 }
29952 else
29953 {
29954 mode = V8QImode;
29955 gen_func = gen_movmisalignv8qi;
29956 }
29957 nelt_mode = GET_MODE_NUNITS (mode);
29958 gcc_assert (length >= nelt_mode);
29959 /* Skip if it isn't profitable. */
29960 if (!arm_block_set_vect_profit_p (length, align, mode))
29961 return false;
29962
29963 dst = copy_addr_to_reg (XEXP (dstbase, 0));
29964 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29965
29966 v = sext_hwi (v, BITS_PER_WORD);
29967
29968 reg = gen_reg_rtx (mode);
29969 val_vec = gen_const_vec_duplicate (mode, GEN_INT (v));
29970 /* Emit instruction loading the constant value. */
29971 emit_move_insn (reg, val_vec);
29972
29973 /* Handle nelt_mode bytes in a vector. */
29974 for (i = 0; (i + nelt_mode <= length); i += nelt_mode)
29975 {
29976 emit_insn ((*gen_func) (mem, reg));
29977 if (i + 2 * nelt_mode <= length)
29978 {
29979 emit_insn (gen_add2_insn (dst, GEN_INT (nelt_mode)));
29980 offset += nelt_mode;
29981 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29982 }
29983 }
29984
29985 /* If there are not less than nelt_v8 bytes leftover, we must be in
29986 V16QI mode. */
29987 gcc_assert ((i + nelt_v8) > length || mode == V16QImode);
29988
29989 /* Handle (8, 16) bytes leftover. */
29990 if (i + nelt_v8 < length)
29991 {
29992 emit_insn (gen_add2_insn (dst, GEN_INT (length - i)));
29993 offset += length - i;
29994 mem = adjust_automodify_address (dstbase, mode, dst, offset);
29995
29996 /* We are shifting bytes back, set the alignment accordingly. */
29997 if ((length & 1) != 0 && align >= 2)
29998 set_mem_align (mem, BITS_PER_UNIT);
29999
30000 emit_insn (gen_movmisalignv16qi (mem, reg));
30001 }
30002 /* Handle (0, 8] bytes leftover. */
30003 else if (i < length && i + nelt_v8 >= length)
30004 {
30005 if (mode == V16QImode)
30006 reg = gen_lowpart (V8QImode, reg);
30007
30008 emit_insn (gen_add2_insn (dst, GEN_INT ((length - i)
30009 + (nelt_mode - nelt_v8))));
30010 offset += (length - i) + (nelt_mode - nelt_v8);
30011 mem = adjust_automodify_address (dstbase, V8QImode, dst, offset);
30012
30013 /* We are shifting bytes back, set the alignment accordingly. */
30014 if ((length & 1) != 0 && align >= 2)
30015 set_mem_align (mem, BITS_PER_UNIT);
30016
30017 emit_insn (gen_movmisalignv8qi (mem, reg));
30018 }
30019
30020 return true;
30021 }
30022
30023 /* Set a block of memory using vectorization instructions for the
30024 aligned case. We fill the first LENGTH bytes of the memory area
30025 starting from DSTBASE with byte constant VALUE. ALIGN is the
30026 alignment requirement of memory. Return TRUE if succeeded. */
30027 static bool
30028 arm_block_set_aligned_vect (rtx dstbase,
30029 unsigned HOST_WIDE_INT length,
30030 unsigned HOST_WIDE_INT value,
30031 unsigned HOST_WIDE_INT align)
30032 {
30033 unsigned int i, nelt_v8, nelt_v16, nelt_mode;
30034 rtx dst, addr, mem;
30035 rtx val_vec, reg;
30036 machine_mode mode;
30037 unsigned int offset = 0;
30038
30039 gcc_assert ((align & 0x3) == 0);
30040 nelt_v8 = GET_MODE_NUNITS (V8QImode);
30041 nelt_v16 = GET_MODE_NUNITS (V16QImode);
30042 if (length >= nelt_v16 && unaligned_access && !BYTES_BIG_ENDIAN)
30043 mode = V16QImode;
30044 else
30045 mode = V8QImode;
30046
30047 nelt_mode = GET_MODE_NUNITS (mode);
30048 gcc_assert (length >= nelt_mode);
30049 /* Skip if it isn't profitable. */
30050 if (!arm_block_set_vect_profit_p (length, align, mode))
30051 return false;
30052
30053 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30054
30055 reg = gen_reg_rtx (mode);
30056 val_vec = gen_const_vec_duplicate (mode, gen_int_mode (value, QImode));
30057 /* Emit instruction loading the constant value. */
30058 emit_move_insn (reg, val_vec);
30059
30060 i = 0;
30061 /* Handle first 16 bytes specially using vst1:v16qi instruction. */
30062 if (mode == V16QImode)
30063 {
30064 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30065 emit_insn (gen_movmisalignv16qi (mem, reg));
30066 i += nelt_mode;
30067 /* Handle (8, 16) bytes leftover using vst1:v16qi again. */
30068 if (i + nelt_v8 < length && i + nelt_v16 > length)
30069 {
30070 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30071 offset += length - nelt_mode;
30072 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30073 /* We are shifting bytes back, set the alignment accordingly. */
30074 if ((length & 0x3) == 0)
30075 set_mem_align (mem, BITS_PER_UNIT * 4);
30076 else if ((length & 0x1) == 0)
30077 set_mem_align (mem, BITS_PER_UNIT * 2);
30078 else
30079 set_mem_align (mem, BITS_PER_UNIT);
30080
30081 emit_insn (gen_movmisalignv16qi (mem, reg));
30082 return true;
30083 }
30084 /* Fall through for bytes leftover. */
30085 mode = V8QImode;
30086 nelt_mode = GET_MODE_NUNITS (mode);
30087 reg = gen_lowpart (V8QImode, reg);
30088 }
30089
30090 /* Handle 8 bytes in a vector. */
30091 for (; (i + nelt_mode <= length); i += nelt_mode)
30092 {
30093 addr = plus_constant (Pmode, dst, i);
30094 mem = adjust_automodify_address (dstbase, mode, addr, offset + i);
30095 emit_move_insn (mem, reg);
30096 }
30097
30098 /* Handle single word leftover by shifting 4 bytes back. We can
30099 use aligned access for this case. */
30100 if (i + UNITS_PER_WORD == length)
30101 {
30102 addr = plus_constant (Pmode, dst, i - UNITS_PER_WORD);
30103 offset += i - UNITS_PER_WORD;
30104 mem = adjust_automodify_address (dstbase, mode, addr, offset);
30105 /* We are shifting 4 bytes back, set the alignment accordingly. */
30106 if (align > UNITS_PER_WORD)
30107 set_mem_align (mem, BITS_PER_UNIT * UNITS_PER_WORD);
30108
30109 emit_move_insn (mem, reg);
30110 }
30111 /* Handle (0, 4), (4, 8) bytes leftover by shifting bytes back.
30112 We have to use unaligned access for this case. */
30113 else if (i < length)
30114 {
30115 emit_insn (gen_add2_insn (dst, GEN_INT (length - nelt_mode)));
30116 offset += length - nelt_mode;
30117 mem = adjust_automodify_address (dstbase, mode, dst, offset);
30118 /* We are shifting bytes back, set the alignment accordingly. */
30119 if ((length & 1) == 0)
30120 set_mem_align (mem, BITS_PER_UNIT * 2);
30121 else
30122 set_mem_align (mem, BITS_PER_UNIT);
30123
30124 emit_insn (gen_movmisalignv8qi (mem, reg));
30125 }
30126
30127 return true;
30128 }
30129
30130 /* Set a block of memory using plain strh/strb instructions, only
30131 using instructions allowed by ALIGN on processor. We fill the
30132 first LENGTH bytes of the memory area starting from DSTBASE
30133 with byte constant VALUE. ALIGN is the alignment requirement
30134 of memory. */
30135 static bool
30136 arm_block_set_unaligned_non_vect (rtx dstbase,
30137 unsigned HOST_WIDE_INT length,
30138 unsigned HOST_WIDE_INT value,
30139 unsigned HOST_WIDE_INT align)
30140 {
30141 unsigned int i;
30142 rtx dst, addr, mem;
30143 rtx val_exp, val_reg, reg;
30144 machine_mode mode;
30145 HOST_WIDE_INT v = value;
30146
30147 gcc_assert (align == 1 || align == 2);
30148
30149 if (align == 2)
30150 v |= (value << BITS_PER_UNIT);
30151
30152 v = sext_hwi (v, BITS_PER_WORD);
30153 val_exp = GEN_INT (v);
30154 /* Skip if it isn't profitable. */
30155 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30156 align, true, false))
30157 return false;
30158
30159 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30160 mode = (align == 2 ? HImode : QImode);
30161 val_reg = force_reg (SImode, val_exp);
30162 reg = gen_lowpart (mode, val_reg);
30163
30164 for (i = 0; (i + GET_MODE_SIZE (mode) <= length); i += GET_MODE_SIZE (mode))
30165 {
30166 addr = plus_constant (Pmode, dst, i);
30167 mem = adjust_automodify_address (dstbase, mode, addr, i);
30168 emit_move_insn (mem, reg);
30169 }
30170
30171 /* Handle single byte leftover. */
30172 if (i + 1 == length)
30173 {
30174 reg = gen_lowpart (QImode, val_reg);
30175 addr = plus_constant (Pmode, dst, i);
30176 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30177 emit_move_insn (mem, reg);
30178 i++;
30179 }
30180
30181 gcc_assert (i == length);
30182 return true;
30183 }
30184
30185 /* Set a block of memory using plain strd/str/strh/strb instructions,
30186 to permit unaligned copies on processors which support unaligned
30187 semantics for those instructions. We fill the first LENGTH bytes
30188 of the memory area starting from DSTBASE with byte constant VALUE.
30189 ALIGN is the alignment requirement of memory. */
30190 static bool
30191 arm_block_set_aligned_non_vect (rtx dstbase,
30192 unsigned HOST_WIDE_INT length,
30193 unsigned HOST_WIDE_INT value,
30194 unsigned HOST_WIDE_INT align)
30195 {
30196 unsigned int i;
30197 rtx dst, addr, mem;
30198 rtx val_exp, val_reg, reg;
30199 unsigned HOST_WIDE_INT v;
30200 bool use_strd_p;
30201
30202 use_strd_p = (length >= 2 * UNITS_PER_WORD && (align & 3) == 0
30203 && TARGET_LDRD && current_tune->prefer_ldrd_strd);
30204
30205 v = (value | (value << 8) | (value << 16) | (value << 24));
30206 if (length < UNITS_PER_WORD)
30207 v &= (0xFFFFFFFF >> (UNITS_PER_WORD - length) * BITS_PER_UNIT);
30208
30209 if (use_strd_p)
30210 v |= (v << BITS_PER_WORD);
30211 else
30212 v = sext_hwi (v, BITS_PER_WORD);
30213
30214 val_exp = GEN_INT (v);
30215 /* Skip if it isn't profitable. */
30216 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30217 align, false, use_strd_p))
30218 {
30219 if (!use_strd_p)
30220 return false;
30221
30222 /* Try without strd. */
30223 v = (v >> BITS_PER_WORD);
30224 v = sext_hwi (v, BITS_PER_WORD);
30225 val_exp = GEN_INT (v);
30226 use_strd_p = false;
30227 if (!arm_block_set_non_vect_profit_p (val_exp, length,
30228 align, false, use_strd_p))
30229 return false;
30230 }
30231
30232 i = 0;
30233 dst = copy_addr_to_reg (XEXP (dstbase, 0));
30234 /* Handle double words using strd if possible. */
30235 if (use_strd_p)
30236 {
30237 val_reg = force_reg (DImode, val_exp);
30238 reg = val_reg;
30239 for (; (i + 8 <= length); i += 8)
30240 {
30241 addr = plus_constant (Pmode, dst, i);
30242 mem = adjust_automodify_address (dstbase, DImode, addr, i);
30243 emit_move_insn (mem, reg);
30244 }
30245 }
30246 else
30247 val_reg = force_reg (SImode, val_exp);
30248
30249 /* Handle words. */
30250 reg = (use_strd_p ? gen_lowpart (SImode, val_reg) : val_reg);
30251 for (; (i + 4 <= length); i += 4)
30252 {
30253 addr = plus_constant (Pmode, dst, i);
30254 mem = adjust_automodify_address (dstbase, SImode, addr, i);
30255 if ((align & 3) == 0)
30256 emit_move_insn (mem, reg);
30257 else
30258 emit_insn (gen_unaligned_storesi (mem, reg));
30259 }
30260
30261 /* Merge last pair of STRH and STRB into a STR if possible. */
30262 if (unaligned_access && i > 0 && (i + 3) == length)
30263 {
30264 addr = plus_constant (Pmode, dst, i - 1);
30265 mem = adjust_automodify_address (dstbase, SImode, addr, i - 1);
30266 /* We are shifting one byte back, set the alignment accordingly. */
30267 if ((align & 1) == 0)
30268 set_mem_align (mem, BITS_PER_UNIT);
30269
30270 /* Most likely this is an unaligned access, and we can't tell at
30271 compilation time. */
30272 emit_insn (gen_unaligned_storesi (mem, reg));
30273 return true;
30274 }
30275
30276 /* Handle half word leftover. */
30277 if (i + 2 <= length)
30278 {
30279 reg = gen_lowpart (HImode, val_reg);
30280 addr = plus_constant (Pmode, dst, i);
30281 mem = adjust_automodify_address (dstbase, HImode, addr, i);
30282 if ((align & 1) == 0)
30283 emit_move_insn (mem, reg);
30284 else
30285 emit_insn (gen_unaligned_storehi (mem, reg));
30286
30287 i += 2;
30288 }
30289
30290 /* Handle single byte leftover. */
30291 if (i + 1 == length)
30292 {
30293 reg = gen_lowpart (QImode, val_reg);
30294 addr = plus_constant (Pmode, dst, i);
30295 mem = adjust_automodify_address (dstbase, QImode, addr, i);
30296 emit_move_insn (mem, reg);
30297 }
30298
30299 return true;
30300 }
30301
30302 /* Set a block of memory using vectorization instructions for both
30303 aligned and unaligned cases. We fill the first LENGTH bytes of
30304 the memory area starting from DSTBASE with byte constant VALUE.
30305 ALIGN is the alignment requirement of memory. */
30306 static bool
30307 arm_block_set_vect (rtx dstbase,
30308 unsigned HOST_WIDE_INT length,
30309 unsigned HOST_WIDE_INT value,
30310 unsigned HOST_WIDE_INT align)
30311 {
30312 /* Check whether we need to use unaligned store instruction. */
30313 if (((align & 3) != 0 || (length & 3) != 0)
30314 /* Check whether unaligned store instruction is available. */
30315 && (!unaligned_access || BYTES_BIG_ENDIAN))
30316 return false;
30317
30318 if ((align & 3) == 0)
30319 return arm_block_set_aligned_vect (dstbase, length, value, align);
30320 else
30321 return arm_block_set_unaligned_vect (dstbase, length, value, align);
30322 }
30323
30324 /* Expand string store operation. Firstly we try to do that by using
30325 vectorization instructions, then try with ARM unaligned access and
30326 double-word store if profitable. OPERANDS[0] is the destination,
30327 OPERANDS[1] is the number of bytes, operands[2] is the value to
30328 initialize the memory, OPERANDS[3] is the known alignment of the
30329 destination. */
30330 bool
30331 arm_gen_setmem (rtx *operands)
30332 {
30333 rtx dstbase = operands[0];
30334 unsigned HOST_WIDE_INT length;
30335 unsigned HOST_WIDE_INT value;
30336 unsigned HOST_WIDE_INT align;
30337
30338 if (!CONST_INT_P (operands[2]) || !CONST_INT_P (operands[1]))
30339 return false;
30340
30341 length = UINTVAL (operands[1]);
30342 if (length > 64)
30343 return false;
30344
30345 value = (UINTVAL (operands[2]) & 0xFF);
30346 align = UINTVAL (operands[3]);
30347 if (TARGET_NEON && length >= 8
30348 && current_tune->string_ops_prefer_neon
30349 && arm_block_set_vect (dstbase, length, value, align))
30350 return true;
30351
30352 if (!unaligned_access && (align & 3) != 0)
30353 return arm_block_set_unaligned_non_vect (dstbase, length, value, align);
30354
30355 return arm_block_set_aligned_non_vect (dstbase, length, value, align);
30356 }
30357
30358
30359 static bool
30360 arm_macro_fusion_p (void)
30361 {
30362 return current_tune->fusible_ops != tune_params::FUSE_NOTHING;
30363 }
30364
30365 /* Return true if the two back-to-back sets PREV_SET, CURR_SET are suitable
30366 for MOVW / MOVT macro fusion. */
30367
30368 static bool
30369 arm_sets_movw_movt_fusible_p (rtx prev_set, rtx curr_set)
30370 {
30371 /* We are trying to fuse
30372 movw imm / movt imm
30373 instructions as a group that gets scheduled together. */
30374
30375 rtx set_dest = SET_DEST (curr_set);
30376
30377 if (GET_MODE (set_dest) != SImode)
30378 return false;
30379
30380 /* We are trying to match:
30381 prev (movw) == (set (reg r0) (const_int imm16))
30382 curr (movt) == (set (zero_extract (reg r0)
30383 (const_int 16)
30384 (const_int 16))
30385 (const_int imm16_1))
30386 or
30387 prev (movw) == (set (reg r1)
30388 (high (symbol_ref ("SYM"))))
30389 curr (movt) == (set (reg r0)
30390 (lo_sum (reg r1)
30391 (symbol_ref ("SYM")))) */
30392
30393 if (GET_CODE (set_dest) == ZERO_EXTRACT)
30394 {
30395 if (CONST_INT_P (SET_SRC (curr_set))
30396 && CONST_INT_P (SET_SRC (prev_set))
30397 && REG_P (XEXP (set_dest, 0))
30398 && REG_P (SET_DEST (prev_set))
30399 && REGNO (XEXP (set_dest, 0)) == REGNO (SET_DEST (prev_set)))
30400 return true;
30401
30402 }
30403 else if (GET_CODE (SET_SRC (curr_set)) == LO_SUM
30404 && REG_P (SET_DEST (curr_set))
30405 && REG_P (SET_DEST (prev_set))
30406 && GET_CODE (SET_SRC (prev_set)) == HIGH
30407 && REGNO (SET_DEST (curr_set)) == REGNO (SET_DEST (prev_set)))
30408 return true;
30409
30410 return false;
30411 }
30412
30413 static bool
30414 aarch_macro_fusion_pair_p (rtx_insn* prev, rtx_insn* curr)
30415 {
30416 rtx prev_set = single_set (prev);
30417 rtx curr_set = single_set (curr);
30418
30419 if (!prev_set
30420 || !curr_set)
30421 return false;
30422
30423 if (any_condjump_p (curr))
30424 return false;
30425
30426 if (!arm_macro_fusion_p ())
30427 return false;
30428
30429 if (current_tune->fusible_ops & tune_params::FUSE_AES_AESMC
30430 && aarch_crypto_can_dual_issue (prev, curr))
30431 return true;
30432
30433 if (current_tune->fusible_ops & tune_params::FUSE_MOVW_MOVT
30434 && arm_sets_movw_movt_fusible_p (prev_set, curr_set))
30435 return true;
30436
30437 return false;
30438 }
30439
30440 /* Return true iff the instruction fusion described by OP is enabled. */
30441 bool
30442 arm_fusion_enabled_p (tune_params::fuse_ops op)
30443 {
30444 return current_tune->fusible_ops & op;
30445 }
30446
30447 /* Implement TARGET_SCHED_CAN_SPECULATE_INSN. Return true if INSN can be
30448 scheduled for speculative execution. Reject the long-running division
30449 and square-root instructions. */
30450
30451 static bool
30452 arm_sched_can_speculate_insn (rtx_insn *insn)
30453 {
30454 switch (get_attr_type (insn))
30455 {
30456 case TYPE_SDIV:
30457 case TYPE_UDIV:
30458 case TYPE_FDIVS:
30459 case TYPE_FDIVD:
30460 case TYPE_FSQRTS:
30461 case TYPE_FSQRTD:
30462 case TYPE_NEON_FP_SQRT_S:
30463 case TYPE_NEON_FP_SQRT_D:
30464 case TYPE_NEON_FP_SQRT_S_Q:
30465 case TYPE_NEON_FP_SQRT_D_Q:
30466 case TYPE_NEON_FP_DIV_S:
30467 case TYPE_NEON_FP_DIV_D:
30468 case TYPE_NEON_FP_DIV_S_Q:
30469 case TYPE_NEON_FP_DIV_D_Q:
30470 return false;
30471 default:
30472 return true;
30473 }
30474 }
30475
30476 /* Implement the TARGET_ASAN_SHADOW_OFFSET hook. */
30477
30478 static unsigned HOST_WIDE_INT
30479 arm_asan_shadow_offset (void)
30480 {
30481 return HOST_WIDE_INT_1U << 29;
30482 }
30483
30484
30485 /* This is a temporary fix for PR60655. Ideally we need
30486 to handle most of these cases in the generic part but
30487 currently we reject minus (..) (sym_ref). We try to
30488 ameliorate the case with minus (sym_ref1) (sym_ref2)
30489 where they are in the same section. */
30490
30491 static bool
30492 arm_const_not_ok_for_debug_p (rtx p)
30493 {
30494 tree decl_op0 = NULL;
30495 tree decl_op1 = NULL;
30496
30497 if (GET_CODE (p) == UNSPEC)
30498 return true;
30499 if (GET_CODE (p) == MINUS)
30500 {
30501 if (GET_CODE (XEXP (p, 1)) == SYMBOL_REF)
30502 {
30503 decl_op1 = SYMBOL_REF_DECL (XEXP (p, 1));
30504 if (decl_op1
30505 && GET_CODE (XEXP (p, 0)) == SYMBOL_REF
30506 && (decl_op0 = SYMBOL_REF_DECL (XEXP (p, 0))))
30507 {
30508 if ((VAR_P (decl_op1)
30509 || TREE_CODE (decl_op1) == CONST_DECL)
30510 && (VAR_P (decl_op0)
30511 || TREE_CODE (decl_op0) == CONST_DECL))
30512 return (get_variable_section (decl_op1, false)
30513 != get_variable_section (decl_op0, false));
30514
30515 if (TREE_CODE (decl_op1) == LABEL_DECL
30516 && TREE_CODE (decl_op0) == LABEL_DECL)
30517 return (DECL_CONTEXT (decl_op1)
30518 != DECL_CONTEXT (decl_op0));
30519 }
30520
30521 return true;
30522 }
30523 }
30524
30525 return false;
30526 }
30527
30528 /* return TRUE if x is a reference to a value in a constant pool */
30529 extern bool
30530 arm_is_constant_pool_ref (rtx x)
30531 {
30532 return (MEM_P (x)
30533 && GET_CODE (XEXP (x, 0)) == SYMBOL_REF
30534 && CONSTANT_POOL_ADDRESS_P (XEXP (x, 0)));
30535 }
30536
30537 /* Remember the last target of arm_set_current_function. */
30538 static GTY(()) tree arm_previous_fndecl;
30539
30540 /* Restore or save the TREE_TARGET_GLOBALS from or to NEW_TREE. */
30541
30542 void
30543 save_restore_target_globals (tree new_tree)
30544 {
30545 /* If we have a previous state, use it. */
30546 if (TREE_TARGET_GLOBALS (new_tree))
30547 restore_target_globals (TREE_TARGET_GLOBALS (new_tree));
30548 else if (new_tree == target_option_default_node)
30549 restore_target_globals (&default_target_globals);
30550 else
30551 {
30552 /* Call target_reinit and save the state for TARGET_GLOBALS. */
30553 TREE_TARGET_GLOBALS (new_tree) = save_target_globals_default_opts ();
30554 }
30555
30556 arm_option_params_internal ();
30557 }
30558
30559 /* Invalidate arm_previous_fndecl. */
30560
30561 void
30562 arm_reset_previous_fndecl (void)
30563 {
30564 arm_previous_fndecl = NULL_TREE;
30565 }
30566
30567 /* Establish appropriate back-end context for processing the function
30568 FNDECL. The argument might be NULL to indicate processing at top
30569 level, outside of any function scope. */
30570
30571 static void
30572 arm_set_current_function (tree fndecl)
30573 {
30574 if (!fndecl || fndecl == arm_previous_fndecl)
30575 return;
30576
30577 tree old_tree = (arm_previous_fndecl
30578 ? DECL_FUNCTION_SPECIFIC_TARGET (arm_previous_fndecl)
30579 : NULL_TREE);
30580
30581 tree new_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30582
30583 /* If current function has no attributes but previous one did,
30584 use the default node. */
30585 if (! new_tree && old_tree)
30586 new_tree = target_option_default_node;
30587
30588 /* If nothing to do return. #pragma GCC reset or #pragma GCC pop to
30589 the default have been handled by save_restore_target_globals from
30590 arm_pragma_target_parse. */
30591 if (old_tree == new_tree)
30592 return;
30593
30594 arm_previous_fndecl = fndecl;
30595
30596 /* First set the target options. */
30597 cl_target_option_restore (&global_options, TREE_TARGET_OPTION (new_tree));
30598
30599 save_restore_target_globals (new_tree);
30600 }
30601
30602 /* Implement TARGET_OPTION_PRINT. */
30603
30604 static void
30605 arm_option_print (FILE *file, int indent, struct cl_target_option *ptr)
30606 {
30607 int flags = ptr->x_target_flags;
30608 const char *fpu_name;
30609
30610 fpu_name = (ptr->x_arm_fpu_index == TARGET_FPU_auto
30611 ? "auto" : all_fpus[ptr->x_arm_fpu_index].name);
30612
30613 fprintf (file, "%*sselected isa %s\n", indent, "",
30614 TARGET_THUMB2_P (flags) ? "thumb2" :
30615 TARGET_THUMB_P (flags) ? "thumb1" :
30616 "arm");
30617
30618 if (ptr->x_arm_arch_string)
30619 fprintf (file, "%*sselected architecture %s\n", indent, "",
30620 ptr->x_arm_arch_string);
30621
30622 if (ptr->x_arm_cpu_string)
30623 fprintf (file, "%*sselected CPU %s\n", indent, "",
30624 ptr->x_arm_cpu_string);
30625
30626 if (ptr->x_arm_tune_string)
30627 fprintf (file, "%*sselected tune %s\n", indent, "",
30628 ptr->x_arm_tune_string);
30629
30630 fprintf (file, "%*sselected fpu %s\n", indent, "", fpu_name);
30631 }
30632
30633 /* Hook to determine if one function can safely inline another. */
30634
30635 static bool
30636 arm_can_inline_p (tree caller, tree callee)
30637 {
30638 tree caller_tree = DECL_FUNCTION_SPECIFIC_TARGET (caller);
30639 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (callee);
30640 bool can_inline = true;
30641
30642 struct cl_target_option *caller_opts
30643 = TREE_TARGET_OPTION (caller_tree ? caller_tree
30644 : target_option_default_node);
30645
30646 struct cl_target_option *callee_opts
30647 = TREE_TARGET_OPTION (callee_tree ? callee_tree
30648 : target_option_default_node);
30649
30650 if (callee_opts == caller_opts)
30651 return true;
30652
30653 /* Callee's ISA features should be a subset of the caller's. */
30654 struct arm_build_target caller_target;
30655 struct arm_build_target callee_target;
30656 caller_target.isa = sbitmap_alloc (isa_num_bits);
30657 callee_target.isa = sbitmap_alloc (isa_num_bits);
30658
30659 arm_configure_build_target (&caller_target, caller_opts, &global_options_set,
30660 false);
30661 arm_configure_build_target (&callee_target, callee_opts, &global_options_set,
30662 false);
30663 if (!bitmap_subset_p (callee_target.isa, caller_target.isa))
30664 can_inline = false;
30665
30666 sbitmap_free (caller_target.isa);
30667 sbitmap_free (callee_target.isa);
30668
30669 /* OK to inline between different modes.
30670 Function with mode specific instructions, e.g using asm,
30671 must be explicitly protected with noinline. */
30672 return can_inline;
30673 }
30674
30675 /* Hook to fix function's alignment affected by target attribute. */
30676
30677 static void
30678 arm_relayout_function (tree fndecl)
30679 {
30680 if (DECL_USER_ALIGN (fndecl))
30681 return;
30682
30683 tree callee_tree = DECL_FUNCTION_SPECIFIC_TARGET (fndecl);
30684
30685 if (!callee_tree)
30686 callee_tree = target_option_default_node;
30687
30688 struct cl_target_option *opts = TREE_TARGET_OPTION (callee_tree);
30689 SET_DECL_ALIGN
30690 (fndecl,
30691 FUNCTION_ALIGNMENT (FUNCTION_BOUNDARY_P (opts->x_target_flags)));
30692 }
30693
30694 /* Inner function to process the attribute((target(...))), take an argument and
30695 set the current options from the argument. If we have a list, recursively
30696 go over the list. */
30697
30698 static bool
30699 arm_valid_target_attribute_rec (tree args, struct gcc_options *opts)
30700 {
30701 if (TREE_CODE (args) == TREE_LIST)
30702 {
30703 bool ret = true;
30704
30705 for (; args; args = TREE_CHAIN (args))
30706 if (TREE_VALUE (args)
30707 && !arm_valid_target_attribute_rec (TREE_VALUE (args), opts))
30708 ret = false;
30709 return ret;
30710 }
30711
30712 else if (TREE_CODE (args) != STRING_CST)
30713 {
30714 error ("attribute %<target%> argument not a string");
30715 return false;
30716 }
30717
30718 char *argstr = ASTRDUP (TREE_STRING_POINTER (args));
30719 char *q;
30720
30721 while ((q = strtok (argstr, ",")) != NULL)
30722 {
30723 while (ISSPACE (*q)) ++q;
30724
30725 argstr = NULL;
30726 if (!strncmp (q, "thumb", 5))
30727 opts->x_target_flags |= MASK_THUMB;
30728
30729 else if (!strncmp (q, "arm", 3))
30730 opts->x_target_flags &= ~MASK_THUMB;
30731
30732 else if (!strncmp (q, "fpu=", 4))
30733 {
30734 int fpu_index;
30735 if (! opt_enum_arg_to_value (OPT_mfpu_, q+4,
30736 &fpu_index, CL_TARGET))
30737 {
30738 error ("invalid fpu for target attribute or pragma %qs", q);
30739 return false;
30740 }
30741 if (fpu_index == TARGET_FPU_auto)
30742 {
30743 /* This doesn't really make sense until we support
30744 general dynamic selection of the architecture and all
30745 sub-features. */
30746 sorry ("auto fpu selection not currently permitted here");
30747 return false;
30748 }
30749 opts->x_arm_fpu_index = (enum fpu_type) fpu_index;
30750 }
30751 else if (!strncmp (q, "arch=", 5))
30752 {
30753 char* arch = q+5;
30754 const arch_option *arm_selected_arch
30755 = arm_parse_arch_option_name (all_architectures, "arch", arch);
30756
30757 if (!arm_selected_arch)
30758 {
30759 error ("invalid architecture for target attribute or pragma %qs",
30760 q);
30761 return false;
30762 }
30763
30764 opts->x_arm_arch_string = xstrndup (arch, strlen (arch));
30765 }
30766 else if (q[0] == '+')
30767 {
30768 opts->x_arm_arch_string
30769 = xasprintf ("%s%s", opts->x_arm_arch_string, q);
30770 }
30771 else
30772 {
30773 error ("unknown target attribute or pragma %qs", q);
30774 return false;
30775 }
30776 }
30777
30778 return true;
30779 }
30780
30781 /* Return a TARGET_OPTION_NODE tree of the target options listed or NULL. */
30782
30783 tree
30784 arm_valid_target_attribute_tree (tree args, struct gcc_options *opts,
30785 struct gcc_options *opts_set)
30786 {
30787 struct cl_target_option cl_opts;
30788
30789 if (!arm_valid_target_attribute_rec (args, opts))
30790 return NULL_TREE;
30791
30792 cl_target_option_save (&cl_opts, opts);
30793 arm_configure_build_target (&arm_active_target, &cl_opts, opts_set, false);
30794 arm_option_check_internal (opts);
30795 /* Do any overrides, such as global options arch=xxx.
30796 We do this since arm_active_target was overridden. */
30797 arm_option_reconfigure_globals ();
30798 arm_options_perform_arch_sanity_checks ();
30799 arm_option_override_internal (opts, opts_set);
30800
30801 return build_target_option_node (opts);
30802 }
30803
30804 static void
30805 add_attribute (const char * mode, tree *attributes)
30806 {
30807 size_t len = strlen (mode);
30808 tree value = build_string (len, mode);
30809
30810 TREE_TYPE (value) = build_array_type (char_type_node,
30811 build_index_type (size_int (len)));
30812
30813 *attributes = tree_cons (get_identifier ("target"),
30814 build_tree_list (NULL_TREE, value),
30815 *attributes);
30816 }
30817
30818 /* For testing. Insert thumb or arm modes alternatively on functions. */
30819
30820 static void
30821 arm_insert_attributes (tree fndecl, tree * attributes)
30822 {
30823 const char *mode;
30824
30825 if (! TARGET_FLIP_THUMB)
30826 return;
30827
30828 if (TREE_CODE (fndecl) != FUNCTION_DECL || DECL_EXTERNAL(fndecl)
30829 || fndecl_built_in_p (fndecl) || DECL_ARTIFICIAL (fndecl))
30830 return;
30831
30832 /* Nested definitions must inherit mode. */
30833 if (current_function_decl)
30834 {
30835 mode = TARGET_THUMB ? "thumb" : "arm";
30836 add_attribute (mode, attributes);
30837 return;
30838 }
30839
30840 /* If there is already a setting don't change it. */
30841 if (lookup_attribute ("target", *attributes) != NULL)
30842 return;
30843
30844 mode = thumb_flipper ? "thumb" : "arm";
30845 add_attribute (mode, attributes);
30846
30847 thumb_flipper = !thumb_flipper;
30848 }
30849
30850 /* Hook to validate attribute((target("string"))). */
30851
30852 static bool
30853 arm_valid_target_attribute_p (tree fndecl, tree ARG_UNUSED (name),
30854 tree args, int ARG_UNUSED (flags))
30855 {
30856 bool ret = true;
30857 struct gcc_options func_options;
30858 tree cur_tree, new_optimize;
30859 gcc_assert ((fndecl != NULL_TREE) && (args != NULL_TREE));
30860
30861 /* Get the optimization options of the current function. */
30862 tree func_optimize = DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl);
30863
30864 /* If the function changed the optimization levels as well as setting target
30865 options, start with the optimizations specified. */
30866 if (!func_optimize)
30867 func_optimize = optimization_default_node;
30868
30869 /* Init func_options. */
30870 memset (&func_options, 0, sizeof (func_options));
30871 init_options_struct (&func_options, NULL);
30872 lang_hooks.init_options_struct (&func_options);
30873
30874 /* Initialize func_options to the defaults. */
30875 cl_optimization_restore (&func_options,
30876 TREE_OPTIMIZATION (func_optimize));
30877
30878 cl_target_option_restore (&func_options,
30879 TREE_TARGET_OPTION (target_option_default_node));
30880
30881 /* Set func_options flags with new target mode. */
30882 cur_tree = arm_valid_target_attribute_tree (args, &func_options,
30883 &global_options_set);
30884
30885 if (cur_tree == NULL_TREE)
30886 ret = false;
30887
30888 new_optimize = build_optimization_node (&func_options);
30889
30890 DECL_FUNCTION_SPECIFIC_TARGET (fndecl) = cur_tree;
30891
30892 DECL_FUNCTION_SPECIFIC_OPTIMIZATION (fndecl) = new_optimize;
30893
30894 finalize_options_struct (&func_options);
30895
30896 return ret;
30897 }
30898
30899 /* Match an ISA feature bitmap to a named FPU. We always use the
30900 first entry that exactly matches the feature set, so that we
30901 effectively canonicalize the FPU name for the assembler. */
30902 static const char*
30903 arm_identify_fpu_from_isa (sbitmap isa)
30904 {
30905 auto_sbitmap fpubits (isa_num_bits);
30906 auto_sbitmap cand_fpubits (isa_num_bits);
30907
30908 bitmap_and (fpubits, isa, isa_all_fpubits);
30909
30910 /* If there are no ISA feature bits relating to the FPU, we must be
30911 doing soft-float. */
30912 if (bitmap_empty_p (fpubits))
30913 return "softvfp";
30914
30915 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
30916 {
30917 arm_initialize_isa (cand_fpubits, all_fpus[i].isa_bits);
30918 if (bitmap_equal_p (fpubits, cand_fpubits))
30919 return all_fpus[i].name;
30920 }
30921 /* We must find an entry, or things have gone wrong. */
30922 gcc_unreachable ();
30923 }
30924
30925 /* Implement ASM_DECLARE_FUNCTION_NAME. Output the ISA features used
30926 by the function fndecl. */
30927 void
30928 arm_declare_function_name (FILE *stream, const char *name, tree decl)
30929 {
30930 tree target_parts = DECL_FUNCTION_SPECIFIC_TARGET (decl);
30931
30932 struct cl_target_option *targ_options;
30933 if (target_parts)
30934 targ_options = TREE_TARGET_OPTION (target_parts);
30935 else
30936 targ_options = TREE_TARGET_OPTION (target_option_current_node);
30937 gcc_assert (targ_options);
30938
30939 /* Only update the assembler .arch string if it is distinct from the last
30940 such string we printed. arch_to_print is set conditionally in case
30941 targ_options->x_arm_arch_string is NULL which can be the case
30942 when cc1 is invoked directly without passing -march option. */
30943 std::string arch_to_print;
30944 if (targ_options->x_arm_arch_string)
30945 arch_to_print = targ_options->x_arm_arch_string;
30946
30947 if (arch_to_print != arm_last_printed_arch_string)
30948 {
30949 std::string arch_name
30950 = arch_to_print.substr (0, arch_to_print.find ("+"));
30951 asm_fprintf (asm_out_file, "\t.arch %s\n", arch_name.c_str ());
30952 const arch_option *arch
30953 = arm_parse_arch_option_name (all_architectures, "-march",
30954 targ_options->x_arm_arch_string);
30955 auto_sbitmap opt_bits (isa_num_bits);
30956
30957 gcc_assert (arch);
30958 if (arch->common.extensions)
30959 {
30960 for (const struct cpu_arch_extension *opt = arch->common.extensions;
30961 opt->name != NULL;
30962 opt++)
30963 {
30964 if (!opt->remove)
30965 {
30966 arm_initialize_isa (opt_bits, opt->isa_bits);
30967 if (bitmap_subset_p (opt_bits, arm_active_target.isa)
30968 && !bitmap_subset_p (opt_bits, isa_all_fpubits))
30969 asm_fprintf (asm_out_file, "\t.arch_extension %s\n",
30970 opt->name);
30971 }
30972 }
30973 }
30974
30975 arm_last_printed_arch_string = arch_to_print;
30976 }
30977
30978 fprintf (stream, "\t.syntax unified\n");
30979
30980 if (TARGET_THUMB)
30981 {
30982 if (is_called_in_ARM_mode (decl)
30983 || (TARGET_THUMB1 && !TARGET_THUMB1_ONLY
30984 && cfun->is_thunk))
30985 fprintf (stream, "\t.code 32\n");
30986 else if (TARGET_THUMB1)
30987 fprintf (stream, "\t.code\t16\n\t.thumb_func\n");
30988 else
30989 fprintf (stream, "\t.thumb\n\t.thumb_func\n");
30990 }
30991 else
30992 fprintf (stream, "\t.arm\n");
30993
30994 std::string fpu_to_print
30995 = TARGET_SOFT_FLOAT
30996 ? "softvfp" : arm_identify_fpu_from_isa (arm_active_target.isa);
30997
30998 if (fpu_to_print != arm_last_printed_arch_string)
30999 {
31000 asm_fprintf (asm_out_file, "\t.fpu %s\n", fpu_to_print.c_str ());
31001 arm_last_printed_fpu_string = fpu_to_print;
31002 }
31003
31004 if (TARGET_POKE_FUNCTION_NAME)
31005 arm_poke_function_name (stream, (const char *) name);
31006 }
31007
31008 /* If MEM is in the form of [base+offset], extract the two parts
31009 of address and set to BASE and OFFSET, otherwise return false
31010 after clearing BASE and OFFSET. */
31011
31012 static bool
31013 extract_base_offset_in_addr (rtx mem, rtx *base, rtx *offset)
31014 {
31015 rtx addr;
31016
31017 gcc_assert (MEM_P (mem));
31018
31019 addr = XEXP (mem, 0);
31020
31021 /* Strip off const from addresses like (const (addr)). */
31022 if (GET_CODE (addr) == CONST)
31023 addr = XEXP (addr, 0);
31024
31025 if (GET_CODE (addr) == REG)
31026 {
31027 *base = addr;
31028 *offset = const0_rtx;
31029 return true;
31030 }
31031
31032 if (GET_CODE (addr) == PLUS
31033 && GET_CODE (XEXP (addr, 0)) == REG
31034 && CONST_INT_P (XEXP (addr, 1)))
31035 {
31036 *base = XEXP (addr, 0);
31037 *offset = XEXP (addr, 1);
31038 return true;
31039 }
31040
31041 *base = NULL_RTX;
31042 *offset = NULL_RTX;
31043
31044 return false;
31045 }
31046
31047 /* If INSN is a load or store of address in the form of [base+offset],
31048 extract the two parts and set to BASE and OFFSET. IS_LOAD is set
31049 to TRUE if it's a load. Return TRUE if INSN is such an instruction,
31050 otherwise return FALSE. */
31051
31052 static bool
31053 fusion_load_store (rtx_insn *insn, rtx *base, rtx *offset, bool *is_load)
31054 {
31055 rtx x, dest, src;
31056
31057 gcc_assert (INSN_P (insn));
31058 x = PATTERN (insn);
31059 if (GET_CODE (x) != SET)
31060 return false;
31061
31062 src = SET_SRC (x);
31063 dest = SET_DEST (x);
31064 if (GET_CODE (src) == REG && GET_CODE (dest) == MEM)
31065 {
31066 *is_load = false;
31067 extract_base_offset_in_addr (dest, base, offset);
31068 }
31069 else if (GET_CODE (src) == MEM && GET_CODE (dest) == REG)
31070 {
31071 *is_load = true;
31072 extract_base_offset_in_addr (src, base, offset);
31073 }
31074 else
31075 return false;
31076
31077 return (*base != NULL_RTX && *offset != NULL_RTX);
31078 }
31079
31080 /* Implement the TARGET_SCHED_FUSION_PRIORITY hook.
31081
31082 Currently we only support to fuse ldr or str instructions, so FUSION_PRI
31083 and PRI are only calculated for these instructions. For other instruction,
31084 FUSION_PRI and PRI are simply set to MAX_PRI. In the future, other kind
31085 instruction fusion can be supported by returning different priorities.
31086
31087 It's important that irrelevant instructions get the largest FUSION_PRI. */
31088
31089 static void
31090 arm_sched_fusion_priority (rtx_insn *insn, int max_pri,
31091 int *fusion_pri, int *pri)
31092 {
31093 int tmp, off_val;
31094 bool is_load;
31095 rtx base, offset;
31096
31097 gcc_assert (INSN_P (insn));
31098
31099 tmp = max_pri - 1;
31100 if (!fusion_load_store (insn, &base, &offset, &is_load))
31101 {
31102 *pri = tmp;
31103 *fusion_pri = tmp;
31104 return;
31105 }
31106
31107 /* Load goes first. */
31108 if (is_load)
31109 *fusion_pri = tmp - 1;
31110 else
31111 *fusion_pri = tmp - 2;
31112
31113 tmp /= 2;
31114
31115 /* INSN with smaller base register goes first. */
31116 tmp -= ((REGNO (base) & 0xff) << 20);
31117
31118 /* INSN with smaller offset goes first. */
31119 off_val = (int)(INTVAL (offset));
31120 if (off_val >= 0)
31121 tmp -= (off_val & 0xfffff);
31122 else
31123 tmp += ((- off_val) & 0xfffff);
31124
31125 *pri = tmp;
31126 return;
31127 }
31128
31129
31130 /* Construct and return a PARALLEL RTX vector with elements numbering the
31131 lanes of either the high (HIGH == TRUE) or low (HIGH == FALSE) half of
31132 the vector - from the perspective of the architecture. This does not
31133 line up with GCC's perspective on lane numbers, so we end up with
31134 different masks depending on our target endian-ness. The diagram
31135 below may help. We must draw the distinction when building masks
31136 which select one half of the vector. An instruction selecting
31137 architectural low-lanes for a big-endian target, must be described using
31138 a mask selecting GCC high-lanes.
31139
31140 Big-Endian Little-Endian
31141
31142 GCC 0 1 2 3 3 2 1 0
31143 | x | x | x | x | | x | x | x | x |
31144 Architecture 3 2 1 0 3 2 1 0
31145
31146 Low Mask: { 2, 3 } { 0, 1 }
31147 High Mask: { 0, 1 } { 2, 3 }
31148 */
31149
31150 rtx
31151 arm_simd_vect_par_cnst_half (machine_mode mode, bool high)
31152 {
31153 int nunits = GET_MODE_NUNITS (mode);
31154 rtvec v = rtvec_alloc (nunits / 2);
31155 int high_base = nunits / 2;
31156 int low_base = 0;
31157 int base;
31158 rtx t1;
31159 int i;
31160
31161 if (BYTES_BIG_ENDIAN)
31162 base = high ? low_base : high_base;
31163 else
31164 base = high ? high_base : low_base;
31165
31166 for (i = 0; i < nunits / 2; i++)
31167 RTVEC_ELT (v, i) = GEN_INT (base + i);
31168
31169 t1 = gen_rtx_PARALLEL (mode, v);
31170 return t1;
31171 }
31172
31173 /* Check OP for validity as a PARALLEL RTX vector with elements
31174 numbering the lanes of either the high (HIGH == TRUE) or low lanes,
31175 from the perspective of the architecture. See the diagram above
31176 arm_simd_vect_par_cnst_half_p for more details. */
31177
31178 bool
31179 arm_simd_check_vect_par_cnst_half_p (rtx op, machine_mode mode,
31180 bool high)
31181 {
31182 rtx ideal = arm_simd_vect_par_cnst_half (mode, high);
31183 HOST_WIDE_INT count_op = XVECLEN (op, 0);
31184 HOST_WIDE_INT count_ideal = XVECLEN (ideal, 0);
31185 int i = 0;
31186
31187 if (!VECTOR_MODE_P (mode))
31188 return false;
31189
31190 if (count_op != count_ideal)
31191 return false;
31192
31193 for (i = 0; i < count_ideal; i++)
31194 {
31195 rtx elt_op = XVECEXP (op, 0, i);
31196 rtx elt_ideal = XVECEXP (ideal, 0, i);
31197
31198 if (!CONST_INT_P (elt_op)
31199 || INTVAL (elt_ideal) != INTVAL (elt_op))
31200 return false;
31201 }
31202 return true;
31203 }
31204
31205 /* Can output mi_thunk for all cases except for non-zero vcall_offset
31206 in Thumb1. */
31207 static bool
31208 arm_can_output_mi_thunk (const_tree, HOST_WIDE_INT, HOST_WIDE_INT vcall_offset,
31209 const_tree)
31210 {
31211 /* For now, we punt and not handle this for TARGET_THUMB1. */
31212 if (vcall_offset && TARGET_THUMB1)
31213 return false;
31214
31215 /* Otherwise ok. */
31216 return true;
31217 }
31218
31219 /* Generate RTL for a conditional branch with rtx comparison CODE in
31220 mode CC_MODE. The destination of the unlikely conditional branch
31221 is LABEL_REF. */
31222
31223 void
31224 arm_gen_unlikely_cbranch (enum rtx_code code, machine_mode cc_mode,
31225 rtx label_ref)
31226 {
31227 rtx x;
31228 x = gen_rtx_fmt_ee (code, VOIDmode,
31229 gen_rtx_REG (cc_mode, CC_REGNUM),
31230 const0_rtx);
31231
31232 x = gen_rtx_IF_THEN_ELSE (VOIDmode, x,
31233 gen_rtx_LABEL_REF (VOIDmode, label_ref),
31234 pc_rtx);
31235 emit_unlikely_jump (gen_rtx_SET (pc_rtx, x));
31236 }
31237
31238 /* Implement the TARGET_ASM_ELF_FLAGS_NUMERIC hook.
31239
31240 For pure-code sections there is no letter code for this attribute, so
31241 output all the section flags numerically when this is needed. */
31242
31243 static bool
31244 arm_asm_elf_flags_numeric (unsigned int flags, unsigned int *num)
31245 {
31246
31247 if (flags & SECTION_ARM_PURECODE)
31248 {
31249 *num = 0x20000000;
31250
31251 if (!(flags & SECTION_DEBUG))
31252 *num |= 0x2;
31253 if (flags & SECTION_EXCLUDE)
31254 *num |= 0x80000000;
31255 if (flags & SECTION_WRITE)
31256 *num |= 0x1;
31257 if (flags & SECTION_CODE)
31258 *num |= 0x4;
31259 if (flags & SECTION_MERGE)
31260 *num |= 0x10;
31261 if (flags & SECTION_STRINGS)
31262 *num |= 0x20;
31263 if (flags & SECTION_TLS)
31264 *num |= 0x400;
31265 if (HAVE_COMDAT_GROUP && (flags & SECTION_LINKONCE))
31266 *num |= 0x200;
31267
31268 return true;
31269 }
31270
31271 return false;
31272 }
31273
31274 /* Implement the TARGET_ASM_FUNCTION_SECTION hook.
31275
31276 If pure-code is passed as an option, make sure all functions are in
31277 sections that have the SHF_ARM_PURECODE attribute. */
31278
31279 static section *
31280 arm_function_section (tree decl, enum node_frequency freq,
31281 bool startup, bool exit)
31282 {
31283 const char * section_name;
31284 section * sec;
31285
31286 if (!decl || TREE_CODE (decl) != FUNCTION_DECL)
31287 return default_function_section (decl, freq, startup, exit);
31288
31289 if (!target_pure_code)
31290 return default_function_section (decl, freq, startup, exit);
31291
31292
31293 section_name = DECL_SECTION_NAME (decl);
31294
31295 /* If a function is not in a named section then it falls under the 'default'
31296 text section, also known as '.text'. We can preserve previous behavior as
31297 the default text section already has the SHF_ARM_PURECODE section
31298 attribute. */
31299 if (!section_name)
31300 {
31301 section *default_sec = default_function_section (decl, freq, startup,
31302 exit);
31303
31304 /* If default_sec is not null, then it must be a special section like for
31305 example .text.startup. We set the pure-code attribute and return the
31306 same section to preserve existing behavior. */
31307 if (default_sec)
31308 default_sec->common.flags |= SECTION_ARM_PURECODE;
31309 return default_sec;
31310 }
31311
31312 /* Otherwise look whether a section has already been created with
31313 'section_name'. */
31314 sec = get_named_section (decl, section_name, 0);
31315 if (!sec)
31316 /* If that is not the case passing NULL as the section's name to
31317 'get_named_section' will create a section with the declaration's
31318 section name. */
31319 sec = get_named_section (decl, NULL, 0);
31320
31321 /* Set the SHF_ARM_PURECODE attribute. */
31322 sec->common.flags |= SECTION_ARM_PURECODE;
31323
31324 return sec;
31325 }
31326
31327 /* Implements the TARGET_SECTION_FLAGS hook.
31328
31329 If DECL is a function declaration and pure-code is passed as an option
31330 then add the SFH_ARM_PURECODE attribute to the section flags. NAME is the
31331 section's name and RELOC indicates whether the declarations initializer may
31332 contain runtime relocations. */
31333
31334 static unsigned int
31335 arm_elf_section_type_flags (tree decl, const char *name, int reloc)
31336 {
31337 unsigned int flags = default_section_type_flags (decl, name, reloc);
31338
31339 if (decl && TREE_CODE (decl) == FUNCTION_DECL && target_pure_code)
31340 flags |= SECTION_ARM_PURECODE;
31341
31342 return flags;
31343 }
31344
31345 /* Generate call to __aeabi_[mode]divmod (op0, op1). */
31346
31347 static void
31348 arm_expand_divmod_libfunc (rtx libfunc, machine_mode mode,
31349 rtx op0, rtx op1,
31350 rtx *quot_p, rtx *rem_p)
31351 {
31352 if (mode == SImode)
31353 gcc_assert (!TARGET_IDIV);
31354
31355 scalar_int_mode libval_mode
31356 = smallest_int_mode_for_size (2 * GET_MODE_BITSIZE (mode));
31357
31358 rtx libval = emit_library_call_value (libfunc, NULL_RTX, LCT_CONST,
31359 libval_mode,
31360 op0, GET_MODE (op0),
31361 op1, GET_MODE (op1));
31362
31363 rtx quotient = simplify_gen_subreg (mode, libval, libval_mode, 0);
31364 rtx remainder = simplify_gen_subreg (mode, libval, libval_mode,
31365 GET_MODE_SIZE (mode));
31366
31367 gcc_assert (quotient);
31368 gcc_assert (remainder);
31369
31370 *quot_p = quotient;
31371 *rem_p = remainder;
31372 }
31373
31374 /* This function checks for the availability of the coprocessor builtin passed
31375 in BUILTIN for the current target. Returns true if it is available and
31376 false otherwise. If a BUILTIN is passed for which this function has not
31377 been implemented it will cause an exception. */
31378
31379 bool
31380 arm_coproc_builtin_available (enum unspecv builtin)
31381 {
31382 /* None of these builtins are available in Thumb mode if the target only
31383 supports Thumb-1. */
31384 if (TARGET_THUMB1)
31385 return false;
31386
31387 switch (builtin)
31388 {
31389 case VUNSPEC_CDP:
31390 case VUNSPEC_LDC:
31391 case VUNSPEC_LDCL:
31392 case VUNSPEC_STC:
31393 case VUNSPEC_STCL:
31394 case VUNSPEC_MCR:
31395 case VUNSPEC_MRC:
31396 if (arm_arch4)
31397 return true;
31398 break;
31399 case VUNSPEC_CDP2:
31400 case VUNSPEC_LDC2:
31401 case VUNSPEC_LDC2L:
31402 case VUNSPEC_STC2:
31403 case VUNSPEC_STC2L:
31404 case VUNSPEC_MCR2:
31405 case VUNSPEC_MRC2:
31406 /* Only present in ARMv5*, ARMv6 (but not ARMv6-M), ARMv7* and
31407 ARMv8-{A,M}. */
31408 if (arm_arch5t)
31409 return true;
31410 break;
31411 case VUNSPEC_MCRR:
31412 case VUNSPEC_MRRC:
31413 /* Only present in ARMv5TE, ARMv6 (but not ARMv6-M), ARMv7* and
31414 ARMv8-{A,M}. */
31415 if (arm_arch6 || arm_arch5te)
31416 return true;
31417 break;
31418 case VUNSPEC_MCRR2:
31419 case VUNSPEC_MRRC2:
31420 if (arm_arch6)
31421 return true;
31422 break;
31423 default:
31424 gcc_unreachable ();
31425 }
31426 return false;
31427 }
31428
31429 /* This function returns true if OP is a valid memory operand for the ldc and
31430 stc coprocessor instructions and false otherwise. */
31431
31432 bool
31433 arm_coproc_ldc_stc_legitimate_address (rtx op)
31434 {
31435 HOST_WIDE_INT range;
31436 /* Has to be a memory operand. */
31437 if (!MEM_P (op))
31438 return false;
31439
31440 op = XEXP (op, 0);
31441
31442 /* We accept registers. */
31443 if (REG_P (op))
31444 return true;
31445
31446 switch GET_CODE (op)
31447 {
31448 case PLUS:
31449 {
31450 /* Or registers with an offset. */
31451 if (!REG_P (XEXP (op, 0)))
31452 return false;
31453
31454 op = XEXP (op, 1);
31455
31456 /* The offset must be an immediate though. */
31457 if (!CONST_INT_P (op))
31458 return false;
31459
31460 range = INTVAL (op);
31461
31462 /* Within the range of [-1020,1020]. */
31463 if (!IN_RANGE (range, -1020, 1020))
31464 return false;
31465
31466 /* And a multiple of 4. */
31467 return (range % 4) == 0;
31468 }
31469 case PRE_INC:
31470 case POST_INC:
31471 case PRE_DEC:
31472 case POST_DEC:
31473 return REG_P (XEXP (op, 0));
31474 default:
31475 gcc_unreachable ();
31476 }
31477 return false;
31478 }
31479
31480 /* Implement TARGET_CAN_CHANGE_MODE_CLASS.
31481
31482 In VFPv1, VFP registers could only be accessed in the mode they were
31483 set, so subregs would be invalid there. However, we don't support
31484 VFPv1 at the moment, and the restriction was lifted in VFPv2.
31485
31486 In big-endian mode, modes greater than word size (i.e. DFmode) are stored in
31487 VFP registers in little-endian order. We can't describe that accurately to
31488 GCC, so avoid taking subregs of such values.
31489
31490 The only exception is going from a 128-bit to a 64-bit type. In that
31491 case the data layout happens to be consistent for big-endian, so we
31492 explicitly allow that case. */
31493
31494 static bool
31495 arm_can_change_mode_class (machine_mode from, machine_mode to,
31496 reg_class_t rclass)
31497 {
31498 if (TARGET_BIG_END
31499 && !(GET_MODE_SIZE (from) == 16 && GET_MODE_SIZE (to) == 8)
31500 && (GET_MODE_SIZE (from) > UNITS_PER_WORD
31501 || GET_MODE_SIZE (to) > UNITS_PER_WORD)
31502 && reg_classes_intersect_p (VFP_REGS, rclass))
31503 return false;
31504 return true;
31505 }
31506
31507 /* Implement TARGET_CONSTANT_ALIGNMENT. Make strings word-aligned so
31508 strcpy from constants will be faster. */
31509
31510 static HOST_WIDE_INT
31511 arm_constant_alignment (const_tree exp, HOST_WIDE_INT align)
31512 {
31513 unsigned int factor = (TARGET_THUMB || ! arm_tune_xscale ? 1 : 2);
31514 if (TREE_CODE (exp) == STRING_CST && !optimize_size)
31515 return MAX (align, BITS_PER_WORD * factor);
31516 return align;
31517 }
31518
31519 /* Emit a speculation barrier on target architectures that do not have
31520 DSB/ISB directly. Such systems probably don't need a barrier
31521 themselves, but if the code is ever run on a later architecture, it
31522 might become a problem. */
31523 void
31524 arm_emit_speculation_barrier_function ()
31525 {
31526 emit_library_call (speculation_barrier_libfunc, LCT_NORMAL, VOIDmode);
31527 }
31528
31529 #if CHECKING_P
31530 namespace selftest {
31531
31532 /* Scan the static data tables generated by parsecpu.awk looking for
31533 potential issues with the data. We primarily check for
31534 inconsistencies in the option extensions at present (extensions
31535 that duplicate others but aren't marked as aliases). Furthermore,
31536 for correct canonicalization later options must never be a subset
31537 of an earlier option. Any extension should also only specify other
31538 feature bits and never an architecture bit. The architecture is inferred
31539 from the declaration of the extension. */
31540 static void
31541 arm_test_cpu_arch_data (void)
31542 {
31543 const arch_option *arch;
31544 const cpu_option *cpu;
31545 auto_sbitmap target_isa (isa_num_bits);
31546 auto_sbitmap isa1 (isa_num_bits);
31547 auto_sbitmap isa2 (isa_num_bits);
31548
31549 for (arch = all_architectures; arch->common.name != NULL; ++arch)
31550 {
31551 const cpu_arch_extension *ext1, *ext2;
31552
31553 if (arch->common.extensions == NULL)
31554 continue;
31555
31556 arm_initialize_isa (target_isa, arch->common.isa_bits);
31557
31558 for (ext1 = arch->common.extensions; ext1->name != NULL; ++ext1)
31559 {
31560 if (ext1->alias)
31561 continue;
31562
31563 arm_initialize_isa (isa1, ext1->isa_bits);
31564 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31565 {
31566 if (ext2->alias || ext1->remove != ext2->remove)
31567 continue;
31568
31569 arm_initialize_isa (isa2, ext2->isa_bits);
31570 /* If the option is a subset of the parent option, it doesn't
31571 add anything and so isn't useful. */
31572 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31573
31574 /* If the extension specifies any architectural bits then
31575 disallow it. Extensions should only specify feature bits. */
31576 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31577 }
31578 }
31579 }
31580
31581 for (cpu = all_cores; cpu->common.name != NULL; ++cpu)
31582 {
31583 const cpu_arch_extension *ext1, *ext2;
31584
31585 if (cpu->common.extensions == NULL)
31586 continue;
31587
31588 arm_initialize_isa (target_isa, arch->common.isa_bits);
31589
31590 for (ext1 = cpu->common.extensions; ext1->name != NULL; ++ext1)
31591 {
31592 if (ext1->alias)
31593 continue;
31594
31595 arm_initialize_isa (isa1, ext1->isa_bits);
31596 for (ext2 = ext1 + 1; ext2->name != NULL; ++ext2)
31597 {
31598 if (ext2->alias || ext1->remove != ext2->remove)
31599 continue;
31600
31601 arm_initialize_isa (isa2, ext2->isa_bits);
31602 /* If the option is a subset of the parent option, it doesn't
31603 add anything and so isn't useful. */
31604 ASSERT_TRUE (!bitmap_subset_p (isa2, isa1));
31605
31606 /* If the extension specifies any architectural bits then
31607 disallow it. Extensions should only specify feature bits. */
31608 ASSERT_TRUE (!bitmap_intersect_p (isa2, target_isa));
31609 }
31610 }
31611 }
31612 }
31613
31614 /* Scan the static data tables generated by parsecpu.awk looking for
31615 potential issues with the data. Here we check for consistency between the
31616 fpu bits, in particular we check that ISA_ALL_FPU_INTERNAL does not contain
31617 a feature bit that is not defined by any FPU flag. */
31618 static void
31619 arm_test_fpu_data (void)
31620 {
31621 auto_sbitmap isa_all_fpubits (isa_num_bits);
31622 auto_sbitmap fpubits (isa_num_bits);
31623 auto_sbitmap tmpset (isa_num_bits);
31624
31625 static const enum isa_feature fpu_bitlist[]
31626 = { ISA_ALL_FPU_INTERNAL, isa_nobit };
31627 arm_initialize_isa (isa_all_fpubits, fpu_bitlist);
31628
31629 for (unsigned int i = 0; i < TARGET_FPU_auto; i++)
31630 {
31631 arm_initialize_isa (fpubits, all_fpus[i].isa_bits);
31632 bitmap_and_compl (tmpset, isa_all_fpubits, fpubits);
31633 bitmap_clear (isa_all_fpubits);
31634 bitmap_copy (isa_all_fpubits, tmpset);
31635 }
31636
31637 if (!bitmap_empty_p (isa_all_fpubits))
31638 {
31639 fprintf (stderr, "Error: found feature bits in the ALL_FPU_INTERAL"
31640 " group that are not defined by any FPU.\n"
31641 " Check your arm-cpus.in.\n");
31642 ASSERT_TRUE (bitmap_empty_p (isa_all_fpubits));
31643 }
31644 }
31645
31646 static void
31647 arm_run_selftests (void)
31648 {
31649 arm_test_cpu_arch_data ();
31650 arm_test_fpu_data ();
31651 }
31652 } /* Namespace selftest. */
31653
31654 #undef TARGET_RUN_TARGET_SELFTESTS
31655 #define TARGET_RUN_TARGET_SELFTESTS selftest::arm_run_selftests
31656 #endif /* CHECKING_P */
31657
31658 struct gcc_target targetm = TARGET_INITIALIZER;
31659
31660 #include "gt-arm.h"